├── .env
├── .gitattributes
├── .gitignore
├── CHANGELOG.md
├── Dockerfile
├── LICENSE
├── README.md
├── data
    ├── metadata.csv
    ├── processed
    │   ├── processed_audios
    │   │   ├── processed_audio1.wav
    │   │   ├── processed_audio10.wav
    │   │   ├── processed_audio11.wav
    │   │   ├── processed_audio12.wav
    │   │   ├── processed_audio13.wav
    │   │   ├── processed_audio14.wav
    │   │   ├── processed_audio15.wav
    │   │   ├── processed_audio16.wav
    │   │   ├── processed_audio17.wav
    │   │   ├── processed_audio18.wav
    │   │   ├── processed_audio19.wav
    │   │   ├── processed_audio2.wav
    │   │   ├── processed_audio20.wav
    │   │   ├── processed_audio3.wav
    │   │   ├── processed_audio4.wav
    │   │   ├── processed_audio5.wav
    │   │   ├── processed_audio6.wav
    │   │   ├── processed_audio7.wav
    │   │   ├── processed_audio8.wav
    │   │   └── processed_audio9.wav
    │   ├── processed_images
    │   │   ├── processed_image1.jpg
    │   │   ├── processed_image10.jpg
    │   │   ├── processed_image11.jpg
    │   │   ├── processed_image12.jpg
    │   │   ├── processed_image13.jpg
    │   │   ├── processed_image14.jpg
    │   │   ├── processed_image15.jpg
    │   │   ├── processed_image16.jpg
    │   │   ├── processed_image17.jpg
    │   │   ├── processed_image18.jpg
    │   │   ├── processed_image19.jpg
    │   │   ├── processed_image2.jpg
    │   │   ├── processed_image20.jpg
    │   │   ├── processed_image3.jpg
    │   │   ├── processed_image4.jpg
    │   │   ├── processed_image5.jpg
    │   │   ├── processed_image6.jpg
    │   │   ├── processed_image7.jpg
    │   │   ├── processed_image8.jpg
    │   │   └── processed_image9.jpg
    │   └── processed_videos
    │   │   ├── processed_video1.mp4
    │   │   ├── processed_video10.mp4
    │   │   ├── processed_video11.mp4
    │   │   ├── processed_video12.mp4
    │   │   ├── processed_video13.mp4
    │   │   ├── processed_video14.mp4
    │   │   ├── processed_video15.mp4
    │   │   ├── processed_video16.mp4
    │   │   ├── processed_video17.mp4
    │   │   ├── processed_video18.mp4
    │   │   ├── processed_video19.mp4
    │   │   ├── processed_video2.mp4
    │   │   ├── processed_video20.mp4
    │   │   ├── processed_video3.mp4
    │   │   ├── processed_video4.mp4
    │   │   ├── processed_video5.mp4
    │   │   ├── processed_video6.mp4
    │   │   ├── processed_video7.mp4
    │   │   ├── processed_video8.mp4
    │   │   └── processed_video9.mp4
    ├── raw
    │   ├── audios
    │   │   ├── fake_audio1.wav
    │   │   ├── fake_audio10.wav
    │   │   ├── fake_audio2.wav
    │   │   ├── fake_audio3.wav
    │   │   ├── fake_audio4.wav
    │   │   ├── fake_audio5.wav
    │   │   ├── fake_audio6.wav
    │   │   ├── fake_audio7.wav
    │   │   ├── fake_audio8.wav
    │   │   ├── fake_audio9.wav
    │   │   ├── real_audio1.wav
    │   │   ├── real_audio10.wav
    │   │   ├── real_audio2.wav
    │   │   ├── real_audio3.wav
    │   │   ├── real_audio4.wav
    │   │   ├── real_audio5.wav
    │   │   ├── real_audio6.wav
    │   │   ├── real_audio7.wav
    │   │   ├── real_audio8.wav
    │   │   └── real_audio9.wav
    │   ├── images
    │   │   ├── fake_image1.jpg
    │   │   ├── fake_image10.jpg
    │   │   ├── fake_image2.jpg
    │   │   ├── fake_image3.jpg
    │   │   ├── fake_image4.jpg
    │   │   ├── fake_image5.jpg
    │   │   ├── fake_image6.jpg
    │   │   ├── fake_image7.jpg
    │   │   ├── fake_image8.jpg
    │   │   ├── fake_image9.jpg
    │   │   ├── real_image1.jpg
    │   │   ├── real_image10.jpg
    │   │   ├── real_image2.jpg
    │   │   ├── real_image3.jpg
    │   │   ├── real_image4.jpg
    │   │   ├── real_image5.jpg
    │   │   ├── real_image6.jpg
    │   │   ├── real_image7.jpg
    │   │   ├── real_image8.jpg
    │   │   └── real_image9.jpg
    │   └── videos
    │   │   ├── fake_video1.mp4
    │   │   ├── fake_video10.mp4
    │   │   ├── fake_video2.mp4
    │   │   ├── fake_video3.mp4
    │   │   ├── fake_video4.mp4
    │   │   ├── fake_video5.mp4
    │   │   ├── fake_video6.mp4
    │   │   ├── fake_video7.mp4
    │   │   ├── fake_video8.mp4
    │   │   ├── fake_video9.mp4
    │   │   ├── real_video1.mp4
    │   │   ├── real_video10.mp4
    │   │   ├── real_video2.mp4
    │   │   ├── real_video3.mp4
    │   │   ├── real_video4.mp4
    │   │   ├── real_video5.mp4
    │   │   ├── real_video6.mp4
    │   │   ├── real_video7.mp4
    │   │   ├── real_video8.mp4
    │   │   └── real_video9.mp4
    └── sample_data.csv
├── docker-compose.yml
├── entrypoint.sh
├── logs
    ├── data_preprocessing.log
    ├── evaluation.log
    ├── model_training.log
    └── system.log
├── models
    └── saved_models
    │   ├── bayesian_model.pkl
    │   ├── cnn_model.h5
    │   ├── model_architecture.png
    │   ├── svm_model.pkl
    │   ├── transformer_model.pth
    │   └── vision_transformer_model.pth
├── notebooks
    ├── Data Preprocessing.ipynb
    ├── Exploratory Data Analysis.ipynb
    ├── Model Evaluation.ipynb
    └── Model Training.ipynb
├── requirements.txt
├── scripts
    ├── download_data.sh
    ├── evaluate_all_models.sh
    ├── generate_report.py
    ├── preprocess_data.py
    └── train_all_models.sh
├── setup.py
├── src
    ├── __init__.py
    ├── blockchain.py
    ├── config.py
    ├── dataset
    │   ├── __init__.py
    │   ├── data_augmentation.py
    │   ├── data_loader.py
    │   ├── data_preprocessor.py
    │   └── data_splitter.py
    ├── dsp.py
    ├── evaluate.py
    ├── evaluation
    │   ├── __init__.py
    │   ├── bayesian_evaluation.py
    │   ├── cnn_evaluation.py
    │   ├── svm_evaluation.py
    │   ├── transformer_evaluation.py
    │   └── vision_transformer_evaluation.py
    ├── models
    │   ├── __init__.py
    │   ├── bayesian.py
    │   ├── cnn.py
    │   ├── svm.py
    │   ├── transformer.py
    │   └── vision_transformer.py
    ├── nlp.py
    ├── processing
    │   ├── __init__.py
    │   ├── audio_processing.py
    │   ├── image_processing.py
    │   ├── text_processing.py
    │   └── video_processing.py
    ├── train.py
    ├── training
    │   ├── __init__.py
    │   ├── bayesian_training.py
    │   ├── cnn_training.py
    │   ├── svm_training.py
    │   ├── transformer_training.py
    │   └── vision_transformer_training.py
    ├── utils.py
    └── utils
    │   ├── __init__.py
    │   ├── data_utils.py
    │   ├── file_utils.py
    │   ├── helpers.py
    │   ├── logger.py
    │   ├── metrics.py
    │   └── visualization.py
└── tests
    ├── __init__.py
    ├── test_data_loading.py
    ├── test_evaluation.py
    ├── test_model.py
    ├── test_training.py
    └── test_utils.py


/.env:
--------------------------------------------------------------------------------
 1 | DATABASE_URL=postgresql://**[]**:password@db:5432/deepfake_db
 2 | 
 3 | SECRET_KEY=**[]**
 4 | 
 5 | LOG_LEVEL=INFO
 6 | 
 7 | DEBUG=True
 8 | ALLOWED_HOSTS=*
 9 | 
10 | NLTK_DATA=/**[]**/local/share/nltk_data
11 | 
12 | DOCKER_CONTAINER=true
13 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
 1 | * text=auto
 2 | 
 3 | * text=auto
 4 | 
 5 | *.sh text eol=lf
 6 | 
 7 | *.py text eol=lf
 8 | 
 9 | *.md text eol=lf
10 | 
11 | *.yml text eol=lf
12 | *.yaml text eol=lf
13 | 
14 | *.md diff=markdown
15 | *.py diff=python
16 | 
17 | *.ipynb diff=jupyter-notebook
18 | 
19 | # Binary files
20 | *.jpg binary
21 | *.jpeg binary
22 | *.png binary
23 | *.gif binary
24 | *.pdf binary
25 | *.doc binary
26 | *.docx binary
27 | *.xls binary
28 | *.xlsx binary
29 | *.ppt binary
30 | *.pptx binary
31 | *.zip binary
32 | *.tar binary
33 | *.gz binary
34 | *.bz2 binary
35 | *.7z binary
36 | 
37 | *.h5 binary
38 | *.pth binary
39 | *.pkl binary
40 | 
41 | *.wav binary
42 | *.mp3 binary
43 | *.aac binary
44 | *.flac binary
45 | 
46 | *.mp4 binary
47 | *.avi binary
48 | *.mkv binary
49 | *.mov binary
50 | 
51 | *.exe binary
52 | *.dll binary
53 | *.bin binary
54 | 
55 | *.csv diff=csv
56 | 
57 | .env text
58 | 
59 | *.log text eol=lf
60 | 
61 | *.cfg text eol=lf
62 | *.conf text eol=lf
63 | 
64 | *.json text eol=lf
65 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | __pycache__/
  2 | *.py[cod]
  3 | *$py.class
  4 | 
  5 | *.so
  6 | 
  7 | # Distribution / packaging
  8 | .Python
  9 | build/
 10 | develop-eggs/
 11 | dist/
 12 | downloads/
 13 | eggs/
 14 | .eggs/
 15 | lib/
 16 | lib64/
 17 | parts/
 18 | sdist/
 19 | var/
 20 | wheels/
 21 | *.egg-info/
 22 | .installed.cfg
 23 | *.egg
 24 | MANIFEST
 25 | 
 26 | *.manifest
 27 | *.spec
 28 | 
 29 | pip-log.txt
 30 | pip-delete-this-directory.txt
 31 | 
 32 | # Unit test / coverage reports
 33 | htmlcov/
 34 | .tox/
 35 | .nox/
 36 | .coverage
 37 | .coverage.*
 38 | .cache
 39 | nosetests.xml
 40 | coverage.xml
 41 | *.cover
 42 | .hypothesis/
 43 | .pytest_cache/
 44 | 
 45 | *.mo
 46 | *.pot
 47 | 
 48 | # Django stuff:
 49 | *.log
 50 | local_settings.py
 51 | db.sqlite3
 52 | 
 53 | # Flask stuff:
 54 | instance/
 55 | .webassets-cache
 56 | 
 57 | # Scrapy stuff:
 58 | .scrapy
 59 | 
 60 | docs/_build/
 61 | docs/_generated/
 62 | 
 63 | .ipynb_checkpoints
 64 | 
 65 | target/
 66 | 
 67 | profile_default/
 68 | ipython_config.py
 69 | 
 70 | .idea/
 71 | 
 72 | .vscode/
 73 | 
 74 | .mypy_cache/
 75 | .dmypy.json
 76 | dmypy.json
 77 | 
 78 | .pylint.d/
 79 | 
 80 | .env
 81 | .venv
 82 | env/
 83 | venv/
 84 | ENV/
 85 | env.bak/
 86 | venv.bak/
 87 | 
 88 | .spyderproject
 89 | .spyproject
 90 | 
 91 | .ropeproject
 92 | 
 93 | # mkdocs doc
 94 | /site
 95 | 
 96 | .idea/*
 97 | !.idea/fileTemplates
 98 | !.idea/inspectionProfiles
 99 | !.idea/vcs.xml
100 | !.idea/*.iml
101 | *.iws
102 | *.iml
103 | *.ipr
104 | 
105 | *.ipynb_checkpoints
106 | 
107 | .env
108 | .env.local
109 | .env.*.local
110 | *.env
111 | 
112 | *.DS_Store
113 | Thumbs.db
114 | 
115 | logs/
116 | *.log
117 | 
118 | coverage/
119 | *.cov
120 | *.coverage
121 | *.coveragerc
122 | 
123 | docker-compose.override.yml
124 | 
125 | *.db
126 | *.sqlite
127 | *.sqlite3
128 | 
129 | Pipfile
130 | Pipfile.lock
131 | __pypackages__/
132 | 
133 | .cache/
134 | *.cache
135 | *.pyc
136 | *.pyo
137 | *.pyd
138 | 
139 | *.bak
140 | *.swp
141 | *.tmp
142 | *.temp
143 | *.old
144 | *.orig
145 | *.log
146 | *.save
147 | *.backup
148 | *~
149 | 
150 | *.sublime-workspace
151 | *.sublime-project
152 | *.project
153 | *.code-workspace
154 | 
155 | # AWS Lambda
156 | *.zip
157 | 
158 | .ipynb_checkpoints/
159 | *.ipynb
160 | 
161 | *.h5
162 | *.pth
163 | *.pkl
164 | *.onnx
165 | 
166 | data/raw/
167 | data/processed/
168 | data/interim/
169 | data/external/
170 | *.csv
171 | *.tsv
172 | *.parquet
173 | *.json
174 | *.xlsx
175 | 
176 | *.png
177 | *.jpg
178 | *.jpeg
179 | *.gif
180 | *.bmp
181 | *.tif
182 | *.tiff
183 | *.mp4
184 | *.avi
185 | *.mkv
186 | *.mov
187 | 
188 | *.wav
189 | *.mp3
190 | *.aac
191 | *.flac
192 | 
193 | *.exe
194 | *.dll
195 | *.bin
196 | 
197 | tmp/
198 | temp/
199 | *.tmp
200 | *.temp
201 | *.bak
202 | *.swp
203 | 
204 | *.cfg
205 | *.conf
206 | config.yaml
207 | 
208 | secrets/
209 | *.key
210 | *.pem
211 | 
212 | scripts/*.sh
213 | scripts/*.py
214 | 
215 | tests/__pycache__/
216 | tests/temp/
217 | 
218 | models/saved_models/
219 | logs/
220 | notebooks/.ipynb_checkpoints/
221 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | All notable changes to this product will be documented in this file.
 2 | 
 3 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 4 | and this product adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 5 | 
 6 | ## [Multidisciplinary Deepfake Detection]
 7 | 
 8 | ### Added
 9 | - Initial setup for the multidisciplinary deepfake detection project.
10 | - Implemented data loading and preprocessing modules.
11 | - Added image, audio, video, and text processing modules.
12 | - Developed CNN, Transformer, SVM, Bayesian, and Vision Transformer models.
13 | - Integrated model training scripts for each model.
14 | - Created evaluation scripts for model performance assessment.
15 | - Implemented utility functions for logging, file handling, and metrics calculation.
16 | - Added unit tests for data loading, model architecture, training, and evaluation.
17 | - Configured Git attributes and ignored unnecessary files in `.gitignore`.
18 | 
19 | ## [0.1.0] - 2024-08-07
20 | 
21 | ### Added
22 | - Initial project structure with necessary directories: `src`, `data`, `models`, `notebooks`, `scripts`, `tests`, `logs`.
23 | - Configured `.gitattributes` for consistent line endings and handling of large files.
24 | - Configured `.gitignore` to exclude unnecessary files and directories.
25 | - Implemented the following modules:
26 |   - `src/audio_processing.py`: Audio processing functions including loading, MFCC extraction, and feature extraction.
27 |   - `src/video_processing.py`: Video processing functions including frame extraction, preprocessing, and feature extraction.
28 |   - `src/image_processing.py`: Image processing functions including loading, preprocessing, and HOG feature extraction.
29 |   - `src/text_processing.py`: Text processing functions including cleaning, tokenizing, removing stopwords, and lemmatizing.
30 |   - `src/blockchain.py`: Blockchain implementation for data integrity.
31 |   - `src/config.py`: Configuration settings for directories, logging, and model hyperparameters.
32 |   - `src/dsp.py`: Digital signal processing functions including STFT, FFT, and filtering.
33 |   - `src/evaluate.py`: Evaluation scripts for CNN, Transformer, SVM, Bayesian, and Vision Transformer models.
34 |   - `src/nlp.py`: NLP processing functions including text cleaning, tokenizing, and lemmatizing using NLTK and Spacy.
35 |   - `src/train.py`: Training scripts for CNN, Transformer, SVM, Bayesian, and Vision Transformer models.
36 |   - `src/utils.py`: Utility functions for file handling, logging, metrics calculation, and data preprocessing.
37 | - Implemented unit tests:
38 |   - `tests/test_data_loading.py`: Tests for data loading functions.
39 |   - `tests/test_model.py`: Tests for model architectures.
40 |   - `tests/test_training.py`: Tests for model training functions.
41 |   - `tests/test_evaluation.py`: Tests for model evaluation functions.
42 |   - `tests/test_utils.py`: Tests for utility functions.
43 | - Added data and logs for testing purposes.
44 | 
45 | ### Changed
46 | - N/A
47 | 
48 | ### Fixed
49 | - N/A
50 | 
51 | ### Removed
52 | - N/A
53 | 
54 | ## [0.1.1] - 2024-08-11
55 | 
56 | ### Added
57 | - Added more comprehensive unit tests to cover edge cases.
58 | - Included additional preprocessing steps for audio and video data.
59 | 
60 | ### Changed
61 | - Improved model training scripts to handle large datasets more efficiently.
62 | - Updated configuration settings to reflect new directory structure.
63 | 
64 | ### Fixed
65 | - Fixed bug in the audio feature extraction function.
66 | - Corrected paths in the data loading scripts.
67 | 
68 | ### Removed
69 | - Deprecated old data processing scripts.
70 | 
71 | ## [0.1.2] - 2024-08-13
72 | 
73 | ### Added
74 | - Integrated blockchain verification for data integrity checks.
75 | - Improved logging functionality for better debugging.
76 | 
77 | ### Changed
78 | - Refactored image processing module for better performance.
79 | - Updated model evaluation scripts to include ROC-AUC score.
80 | 
81 | ### Fixed
82 | - Fixed issue with loading large video files.
83 | - Resolved memory leak in the transformer training script.
84 | 
85 | ### Removed
86 | - Removed redundant helper functions in the utility module.
87 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.9-slim
 2 | 
 3 | ENV PYTHONDONTWRITEBYTECODE=1
 4 | ENV PYTHONUNBUFFERED=1
 5 | ENV LOG_LEVEL=INFO
 6 | 
 7 | WORKDIR /app
 8 | 
 9 | COPY requirements.txt /app/
10 | 
11 | RUN pip install --no-cache-dir -r requirements.txt
12 | 
13 | RUN apt-get update && apt-get install -y \
14 |     build-essential \
15 |     libssl-dev \
16 |     libffi-dev \
17 |     python3-dev \
18 |     curl \
19 |     && apt-get clean
20 | 
21 | RUN python -m nltk.downloader punkt stopwords wordnet
22 | 
23 | COPY . /app/
24 | 
25 | RUN mkdir -p /app/logs
26 | 
27 | EXPOSE 8000
28 | 
29 | COPY entrypoint.sh /app/
30 | RUN chmod +x /app/entrypoint.sh
31 | 
32 | ENV DATABASE_URL=${**[]**}
33 | ENV SECRET_KEY=${**[]**}
34 | 
35 | RUN /app/entrypoint.sh python manage.py migrate
36 | RUN /app/entrypoint.sh python manage.py collectstatic --noinput
37 | 
38 | HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 CMD curl -f http://localhost:8000/health || exit 1
39 | 
40 | RUN apt-get purge -y --auto-remove build-essential libssl-dev libffi-dev python3-dev curl && \
41 |     rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
42 | 
43 | ENTRYPOINT ["/app/entrypoint.sh"]
44 | CMD ["python", "src/main.py"]
45 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Creative Commons Attribution-NonCommercial-NoDerivatives 4.0 International Public License
 2 | 
 3 | By using this software, you agree to the following terms:
 4 | 
 5 | License  
 6 | This software is licensed under the Creative Commons Attribution-NonCommercial-NoDerivatives 4.0 International (CC BY-NC-ND 4.0).
 7 | 
 8 | You are free to:
 9 | 
10 | - Share — copy and redistribute the material in any medium or format.
11 | 
12 | Under the following terms:
13 | 
14 | - Attribution — You must give appropriate credit, provide a link to the license, and indicate if changes were made. You may do so in any reasonable manner, but not in any way that suggests the licensor endorses you or your use.
15 | 
16 | - NonCommercial — You may not use the material for commercial purposes.
17 | 
18 | - NoDerivatives — If you remix, transform, or build upon the material, you may not distribute the modified material.
19 | 
20 | - No additional restrictions — You may not apply legal terms or technological measures that legally restrict others from doing anything the license permits.
21 | 
22 | Disclaimer  
23 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 | 
25 | For more details, refer to https://creativecommons.org/licenses/by-nc-nd/4.0/.
26 | 


--------------------------------------------------------------------------------
/data/metadata.csv:
--------------------------------------------------------------------------------
 1 | filename,label
 2 | processed_image1.jpg,real
 3 | processed_image2.jpg,fake
 4 | processed_image3.jpg,real
 5 | processed_image4.jpg,fake
 6 | processed_image5.jpg,real
 7 | processed_image6.jpg,fake
 8 | processed_image7.jpg,real
 9 | processed_image8.jpg,fake
10 | processed_image9.jpg,real
11 | processed_image10.jpg,fake
12 | processed_image11.jpg,real
13 | processed_image12.jpg,fake
14 | processed_image13.jpg,real
15 | processed_image14.jpg,fake
16 | processed_image15.jpg,real
17 | processed_image16.jpg,fake
18 | processed_image17.jpg,real
19 | processed_image18.jpg,fake
20 | processed_image19.jpg,real
21 | processed_image20.jpg,fake
22 | processed_audio1.wav,real
23 | processed_audio2.wav,fake
24 | processed_audio3.wav,real
25 | processed_audio4.wav,fake
26 | processed_audio5.wav,real
27 | processed_audio6.wav,fake
28 | processed_audio7.wav,real
29 | processed_audio8.wav,fake
30 | processed_audio9.wav,real
31 | processed_audio10.wav,fake
32 | processed_audio11.wav,real
33 | processed_audio12.wav,fake
34 | processed_audio13.wav,real
35 | processed_audio14.wav,fake
36 | processed_audio15.wav,real
37 | processed_audio16.wav,fake
38 | processed_audio17.wav,real
39 | processed_audio18.wav,fake
40 | processed_audio19.wav,real
41 | processed_audio20.wav,fake
42 | processed_video1.mp4,real
43 | processed_video2.mp4,fake
44 | processed_video3.mp4,real
45 | processed_video4.mp4,fake
46 | processed_video5.mp4,real
47 | processed_video6.mp4,fake
48 | processed_video7.mp4,real
49 | processed_video8.mp4,fake
50 | processed_video9.mp4,real
51 | processed_video10.mp4,fake
52 | processed_video11.mp4,real
53 | processed_video12.mp4,fake
54 | processed_video13.mp4,real
55 | processed_video14.mp4,fake
56 | processed_video15.mp4,real
57 | processed_video16.mp4,fake
58 | processed_video17.mp4,real
59 | processed_video18.mp4,fake
60 | processed_video19.mp4,real
61 | processed_video20.mp4,fake
62 | 


--------------------------------------------------------------------------------
/data/processed/processed_audios/processed_audio1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_audios/processed_audio1.wav


--------------------------------------------------------------------------------
/data/processed/processed_audios/processed_audio10.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_audios/processed_audio10.wav


--------------------------------------------------------------------------------
/data/processed/processed_audios/processed_audio11.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_audios/processed_audio11.wav


--------------------------------------------------------------------------------
/data/processed/processed_audios/processed_audio12.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_audios/processed_audio12.wav


--------------------------------------------------------------------------------
/data/processed/processed_audios/processed_audio13.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_audios/processed_audio13.wav


--------------------------------------------------------------------------------
/data/processed/processed_audios/processed_audio14.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_audios/processed_audio14.wav


--------------------------------------------------------------------------------
/data/processed/processed_audios/processed_audio15.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_audios/processed_audio15.wav


--------------------------------------------------------------------------------
/data/processed/processed_audios/processed_audio16.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_audios/processed_audio16.wav


--------------------------------------------------------------------------------
/data/processed/processed_audios/processed_audio17.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_audios/processed_audio17.wav


--------------------------------------------------------------------------------
/data/processed/processed_audios/processed_audio18.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_audios/processed_audio18.wav


--------------------------------------------------------------------------------
/data/processed/processed_audios/processed_audio19.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_audios/processed_audio19.wav


--------------------------------------------------------------------------------
/data/processed/processed_audios/processed_audio2.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_audios/processed_audio2.wav


--------------------------------------------------------------------------------
/data/processed/processed_audios/processed_audio20.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_audios/processed_audio20.wav


--------------------------------------------------------------------------------
/data/processed/processed_audios/processed_audio3.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_audios/processed_audio3.wav


--------------------------------------------------------------------------------
/data/processed/processed_audios/processed_audio4.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_audios/processed_audio4.wav


--------------------------------------------------------------------------------
/data/processed/processed_audios/processed_audio5.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_audios/processed_audio5.wav


--------------------------------------------------------------------------------
/data/processed/processed_audios/processed_audio6.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_audios/processed_audio6.wav


--------------------------------------------------------------------------------
/data/processed/processed_audios/processed_audio7.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_audios/processed_audio7.wav


--------------------------------------------------------------------------------
/data/processed/processed_audios/processed_audio8.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_audios/processed_audio8.wav


--------------------------------------------------------------------------------
/data/processed/processed_audios/processed_audio9.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_audios/processed_audio9.wav


--------------------------------------------------------------------------------
/data/processed/processed_images/processed_image1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_images/processed_image1.jpg


--------------------------------------------------------------------------------
/data/processed/processed_images/processed_image10.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_images/processed_image10.jpg


--------------------------------------------------------------------------------
/data/processed/processed_images/processed_image11.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_images/processed_image11.jpg


--------------------------------------------------------------------------------
/data/processed/processed_images/processed_image12.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_images/processed_image12.jpg


--------------------------------------------------------------------------------
/data/processed/processed_images/processed_image13.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_images/processed_image13.jpg


--------------------------------------------------------------------------------
/data/processed/processed_images/processed_image14.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_images/processed_image14.jpg


--------------------------------------------------------------------------------
/data/processed/processed_images/processed_image15.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_images/processed_image15.jpg


--------------------------------------------------------------------------------
/data/processed/processed_images/processed_image16.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_images/processed_image16.jpg


--------------------------------------------------------------------------------
/data/processed/processed_images/processed_image17.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_images/processed_image17.jpg


--------------------------------------------------------------------------------
/data/processed/processed_images/processed_image18.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_images/processed_image18.jpg


--------------------------------------------------------------------------------
/data/processed/processed_images/processed_image19.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_images/processed_image19.jpg


--------------------------------------------------------------------------------
/data/processed/processed_images/processed_image2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_images/processed_image2.jpg


--------------------------------------------------------------------------------
/data/processed/processed_images/processed_image20.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_images/processed_image20.jpg


--------------------------------------------------------------------------------
/data/processed/processed_images/processed_image3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_images/processed_image3.jpg


--------------------------------------------------------------------------------
/data/processed/processed_images/processed_image4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_images/processed_image4.jpg


--------------------------------------------------------------------------------
/data/processed/processed_images/processed_image5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_images/processed_image5.jpg


--------------------------------------------------------------------------------
/data/processed/processed_images/processed_image6.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_images/processed_image6.jpg


--------------------------------------------------------------------------------
/data/processed/processed_images/processed_image7.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_images/processed_image7.jpg


--------------------------------------------------------------------------------
/data/processed/processed_images/processed_image8.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_images/processed_image8.jpg


--------------------------------------------------------------------------------
/data/processed/processed_images/processed_image9.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_images/processed_image9.jpg


--------------------------------------------------------------------------------
/data/processed/processed_videos/processed_video1.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_videos/processed_video1.mp4


--------------------------------------------------------------------------------
/data/processed/processed_videos/processed_video10.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_videos/processed_video10.mp4


--------------------------------------------------------------------------------
/data/processed/processed_videos/processed_video11.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_videos/processed_video11.mp4


--------------------------------------------------------------------------------
/data/processed/processed_videos/processed_video12.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_videos/processed_video12.mp4


--------------------------------------------------------------------------------
/data/processed/processed_videos/processed_video13.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_videos/processed_video13.mp4


--------------------------------------------------------------------------------
/data/processed/processed_videos/processed_video14.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_videos/processed_video14.mp4


--------------------------------------------------------------------------------
/data/processed/processed_videos/processed_video15.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_videos/processed_video15.mp4


--------------------------------------------------------------------------------
/data/processed/processed_videos/processed_video16.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_videos/processed_video16.mp4


--------------------------------------------------------------------------------
/data/processed/processed_videos/processed_video17.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_videos/processed_video17.mp4


--------------------------------------------------------------------------------
/data/processed/processed_videos/processed_video18.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_videos/processed_video18.mp4


--------------------------------------------------------------------------------
/data/processed/processed_videos/processed_video19.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_videos/processed_video19.mp4


--------------------------------------------------------------------------------
/data/processed/processed_videos/processed_video2.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_videos/processed_video2.mp4


--------------------------------------------------------------------------------
/data/processed/processed_videos/processed_video20.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_videos/processed_video20.mp4


--------------------------------------------------------------------------------
/data/processed/processed_videos/processed_video3.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_videos/processed_video3.mp4


--------------------------------------------------------------------------------
/data/processed/processed_videos/processed_video4.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_videos/processed_video4.mp4


--------------------------------------------------------------------------------
/data/processed/processed_videos/processed_video5.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_videos/processed_video5.mp4


--------------------------------------------------------------------------------
/data/processed/processed_videos/processed_video6.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_videos/processed_video6.mp4


--------------------------------------------------------------------------------
/data/processed/processed_videos/processed_video7.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_videos/processed_video7.mp4


--------------------------------------------------------------------------------
/data/processed/processed_videos/processed_video8.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_videos/processed_video8.mp4


--------------------------------------------------------------------------------
/data/processed/processed_videos/processed_video9.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/processed/processed_videos/processed_video9.mp4


--------------------------------------------------------------------------------
/data/raw/audios/fake_audio1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/audios/fake_audio1.wav


--------------------------------------------------------------------------------
/data/raw/audios/fake_audio10.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/audios/fake_audio10.wav


--------------------------------------------------------------------------------
/data/raw/audios/fake_audio2.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/audios/fake_audio2.wav


--------------------------------------------------------------------------------
/data/raw/audios/fake_audio3.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/audios/fake_audio3.wav


--------------------------------------------------------------------------------
/data/raw/audios/fake_audio4.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/audios/fake_audio4.wav


--------------------------------------------------------------------------------
/data/raw/audios/fake_audio5.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/audios/fake_audio5.wav


--------------------------------------------------------------------------------
/data/raw/audios/fake_audio6.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/audios/fake_audio6.wav


--------------------------------------------------------------------------------
/data/raw/audios/fake_audio7.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/audios/fake_audio7.wav


--------------------------------------------------------------------------------
/data/raw/audios/fake_audio8.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/audios/fake_audio8.wav


--------------------------------------------------------------------------------
/data/raw/audios/fake_audio9.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/audios/fake_audio9.wav


--------------------------------------------------------------------------------
/data/raw/audios/real_audio1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/audios/real_audio1.wav


--------------------------------------------------------------------------------
/data/raw/audios/real_audio10.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/audios/real_audio10.wav


--------------------------------------------------------------------------------
/data/raw/audios/real_audio2.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/audios/real_audio2.wav


--------------------------------------------------------------------------------
/data/raw/audios/real_audio3.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/audios/real_audio3.wav


--------------------------------------------------------------------------------
/data/raw/audios/real_audio4.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/audios/real_audio4.wav


--------------------------------------------------------------------------------
/data/raw/audios/real_audio5.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/audios/real_audio5.wav


--------------------------------------------------------------------------------
/data/raw/audios/real_audio6.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/audios/real_audio6.wav


--------------------------------------------------------------------------------
/data/raw/audios/real_audio7.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/audios/real_audio7.wav


--------------------------------------------------------------------------------
/data/raw/audios/real_audio8.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/audios/real_audio8.wav


--------------------------------------------------------------------------------
/data/raw/audios/real_audio9.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/audios/real_audio9.wav


--------------------------------------------------------------------------------
/data/raw/images/fake_image1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/images/fake_image1.jpg


--------------------------------------------------------------------------------
/data/raw/images/fake_image10.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/images/fake_image10.jpg


--------------------------------------------------------------------------------
/data/raw/images/fake_image2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/images/fake_image2.jpg


--------------------------------------------------------------------------------
/data/raw/images/fake_image3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/images/fake_image3.jpg


--------------------------------------------------------------------------------
/data/raw/images/fake_image4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/images/fake_image4.jpg


--------------------------------------------------------------------------------
/data/raw/images/fake_image5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/images/fake_image5.jpg


--------------------------------------------------------------------------------
/data/raw/images/fake_image6.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/images/fake_image6.jpg


--------------------------------------------------------------------------------
/data/raw/images/fake_image7.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/images/fake_image7.jpg


--------------------------------------------------------------------------------
/data/raw/images/fake_image8.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/images/fake_image8.jpg


--------------------------------------------------------------------------------
/data/raw/images/fake_image9.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/images/fake_image9.jpg


--------------------------------------------------------------------------------
/data/raw/images/real_image1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/images/real_image1.jpg


--------------------------------------------------------------------------------
/data/raw/images/real_image10.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/images/real_image10.jpg


--------------------------------------------------------------------------------
/data/raw/images/real_image2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/images/real_image2.jpg


--------------------------------------------------------------------------------
/data/raw/images/real_image3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/images/real_image3.jpg


--------------------------------------------------------------------------------
/data/raw/images/real_image4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/images/real_image4.jpg


--------------------------------------------------------------------------------
/data/raw/images/real_image5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/images/real_image5.jpg


--------------------------------------------------------------------------------
/data/raw/images/real_image6.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/images/real_image6.jpg


--------------------------------------------------------------------------------
/data/raw/images/real_image7.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/images/real_image7.jpg


--------------------------------------------------------------------------------
/data/raw/images/real_image8.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/images/real_image8.jpg


--------------------------------------------------------------------------------
/data/raw/images/real_image9.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/images/real_image9.jpg


--------------------------------------------------------------------------------
/data/raw/videos/fake_video1.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/videos/fake_video1.mp4


--------------------------------------------------------------------------------
/data/raw/videos/fake_video10.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/videos/fake_video10.mp4


--------------------------------------------------------------------------------
/data/raw/videos/fake_video2.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/videos/fake_video2.mp4


--------------------------------------------------------------------------------
/data/raw/videos/fake_video3.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/videos/fake_video3.mp4


--------------------------------------------------------------------------------
/data/raw/videos/fake_video4.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/videos/fake_video4.mp4


--------------------------------------------------------------------------------
/data/raw/videos/fake_video5.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/videos/fake_video5.mp4


--------------------------------------------------------------------------------
/data/raw/videos/fake_video6.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/videos/fake_video6.mp4


--------------------------------------------------------------------------------
/data/raw/videos/fake_video7.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/videos/fake_video7.mp4


--------------------------------------------------------------------------------
/data/raw/videos/fake_video8.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/videos/fake_video8.mp4


--------------------------------------------------------------------------------
/data/raw/videos/fake_video9.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/videos/fake_video9.mp4


--------------------------------------------------------------------------------
/data/raw/videos/real_video1.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/videos/real_video1.mp4


--------------------------------------------------------------------------------
/data/raw/videos/real_video10.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/videos/real_video10.mp4


--------------------------------------------------------------------------------
/data/raw/videos/real_video2.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/videos/real_video2.mp4


--------------------------------------------------------------------------------
/data/raw/videos/real_video3.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/videos/real_video3.mp4


--------------------------------------------------------------------------------
/data/raw/videos/real_video4.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/videos/real_video4.mp4


--------------------------------------------------------------------------------
/data/raw/videos/real_video5.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/videos/real_video5.mp4


--------------------------------------------------------------------------------
/data/raw/videos/real_video6.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/videos/real_video6.mp4


--------------------------------------------------------------------------------
/data/raw/videos/real_video7.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/videos/real_video7.mp4


--------------------------------------------------------------------------------
/data/raw/videos/real_video8.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/videos/real_video8.mp4


--------------------------------------------------------------------------------
/data/raw/videos/real_video9.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/data/raw/videos/real_video9.mp4


--------------------------------------------------------------------------------
/data/sample_data.csv:
--------------------------------------------------------------------------------
 1 | filename,label
 2 | real_image1.jpg,real
 3 | fake_image1.jpg,fake
 4 | real_image2.jpg,real
 5 | fake_image2.jpg,fake
 6 | real_image3.jpg,real
 7 | fake_image3.jpg,fake
 8 | real_image4.jpg,real
 9 | fake_image4.jpg,fake
10 | real_image5.jpg,real
11 | fake_image5.jpg,fake
12 | real_image6.jpg,real
13 | fake_image6.jpg,fake
14 | real_image7.jpg,real
15 | fake_image7.jpg,fake
16 | real_image8.jpg,real
17 | fake_image8.jpg,fake
18 | real_image9.jpg,real
19 | fake_image9.jpg,fake
20 | real_image10.jpg,real
21 | fake_image10.jpg,fake
22 | real_audio1.wav,real
23 | fake_audio1.wav,fake
24 | real_audio2.wav,real
25 | fake_audio2.wav,fake
26 | real_audio3.wav,real
27 | fake_audio3.wav,fake
28 | real_audio4.wav,real
29 | fake_audio4.wav,fake
30 | real_audio5.wav,real
31 | fake_audio5.wav,fake
32 | real_audio6.wav,real
33 | fake_audio6.wav,fake
34 | real_audio7.wav,real
35 | fake_audio7.wav,fake
36 | real_audio8.wav,real
37 | fake_audio8.wav,fake
38 | real_audio9.wav,real
39 | fake_audio9.wav,fake
40 | real_audio10.wav,real
41 | fake_audio10.wav,fake
42 | real_video1.mp4,real
43 | fake_video1.mp4,fake
44 | real_video2.mp4,real
45 | fake_video2.mp4,fake
46 | real_video3.mp4,real
47 | fake_video3.mp4,fake
48 | real_video4.mp4,real
49 | fake_video4.mp4,fake
50 | real_video5.mp4,real
51 | fake_video5.mp4,fake
52 | real_video6.mp4,real
53 | fake_video6.mp4,fake
54 | real_video7.mp4,real
55 | fake_video7.mp4,fake
56 | real_video8.mp4,real
57 | fake_video8.mp4,fake
58 | real_video9.mp4,real
59 | fake_video9.mp4,fake
60 | real_video10.mp4,real
61 | fake_video10.mp4,fake
62 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '3.8'
 2 | 
 3 | services:
 4 |   app:
 5 |     build:
 6 |       context: .
 7 |       dockerfile: Dockerfile
 8 |     container_name: multidisciplinary_deepfake_detection_app
 9 |     environment:
10 |       - PYTHONDONTWRITEBYTECODE=1
11 |       - PYTHONUNBUFFERED=1
12 |       - LOG_LEVEL=INFO
13 |       - DATABASE_URL=**[]**
14 |       - SECRET_KEY=**[]**
15 |     volumes:
16 |       - .:/app
17 |     ports:
18 |       - "8000:8000"
19 |     depends_on:
20 |       - db
21 |     healthcheck:
22 |       test: ["CMD-SHELL", "curl -f http://localhost:8000/health || exit 1"]
23 |       interval: 30s
24 |       timeout: 30s
25 |       retries: 3
26 |     entrypoint: ["/app/entrypoint.sh"]
27 |     command: ["python", "src/main.py"]
28 | 
29 |   db:
30 |     image: postgres:13
31 |     container_name: multidisciplinary_deepfake_detection_db
32 |     environment:
33 |       - POSTGRES_DB=**[]**
34 |       - POSTGRES_USER=**[]**
35 |       - POSTGRES_PASSWORD=**[]**
36 |     volumes:
37 |       - postgres_data:/var/lib/postgresql/data
38 |     ports:
39 |       - "5432:5432"
40 | 
41 |   redis:
42 |     image: "redis:6.2"
43 |     container_name: multidisciplinary_deepfake_detection_redis
44 |     ports:
45 |       - "6379:6379"
46 | 
47 | volumes:
48 |   postgres_data:
49 | 


--------------------------------------------------------------------------------
/entrypoint.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | set -x
 6 | 
 7 | echo "Waiting for database to be ready..."
 8 | while ! nc -z db 5432; do
 9 |   sleep 1
10 | done
11 | 
12 | echo "Running database migrations..."
13 | python manage.py migrate
14 | 
15 | echo "Collecting static files..."
16 | python manage.py collectstatic --noinput
17 | 
18 | echo "Starting application..."
19 | exec "$@"
20 | 


--------------------------------------------------------------------------------
/logs/data_preprocessing.log:
--------------------------------------------------------------------------------
 1 | 2024-08-15 16:44:00,002 data_preprocessing INFO: Data preprocessing started.
 2 | 2024-08-15 16:44:00,004 data_preprocessing INFO: Loading raw data from data/raw/sample_data.csv.
 3 | 2024-08-15 16:44:00,053 data_preprocessing INFO: Raw data loaded successfully with shape (10000, 20).
 4 | 2024-08-15 16:44:00,055 data_preprocessing INFO: Dropping missing values.
 5 | 2024-08-15 16:44:00,058 data_preprocessing INFO: Missing values dropped. Data shape is now (9500, 20).
 6 | 2024-08-15 16:44:00,060 data_preprocessing INFO: Encoding categorical features.
 7 | 2024-08-15 16:44:00,085 data_preprocessing INFO: Categorical features encoded successfully.
 8 | 2024-08-15 16:44:00,086 data_preprocessing INFO: Normalizing numerical features.
 9 | 2024-08-15 16:44:00,115 data_preprocessing INFO: Numerical features normalized successfully.
10 | 2024-08-15 16:44:00,117 data_preprocessing INFO: Splitting data into training and testing sets.
11 | 2024-08-15 16:44:00,120 data_preprocessing INFO: Data split completed. Training data shape: (7600, 20), Testing data shape: (1900, 20).
12 | 2024-08-15 16:44:00,122 data_preprocessing INFO: Saving processed data to data/processed/processed_data.csv.
13 | 2024-08-15 16:44:00,128 data_preprocessing INFO: Processed data saved successfully.
14 | 2024-08-15 16:44:00,130 data_preprocessing INFO: Data preprocessing completed.
15 | 


--------------------------------------------------------------------------------
/logs/evaluation.log:
--------------------------------------------------------------------------------
  1 | 2024-08-15 16:44:00,001 evaluation_logger INFO: Evaluating CNN model...
  2 | 2024-08-15 16:44:02,152 evaluation_logger INFO: CNN Model Accuracy: 0.92
  3 | 2024-08-15 16:44:02,153 evaluation_logger INFO: CNN Model F1 Score: 0.91
  4 | 2024-08-15 16:44:02,153 evaluation_logger INFO: CNN Model Precision: 0.92
  5 | 2024-08-15 16:44:02,153 evaluation_logger INFO: CNN Model Recall: 0.92
  6 | 2024-08-15 16:44:02,154 evaluation_logger INFO: Classification Report:
  7 |               precision    recall  f1-score   support
  8 | 
  9 |            0       0.91      0.93      0.92       950
 10 |            1       0.93      0.91      0.92       950
 11 | 
 12 |     accuracy                           0.92      1900
 13 |    macro avg       0.92      0.92      0.92      1900
 14 | weighted avg       0.92      0.92      0.92      1900
 15 | 
 16 | 2024-08-15 16:44:02,155 evaluation_logger INFO: Confusion Matrix:
 17 | [[883  67]
 18 |  [ 86 864]]
 19 | 2024-08-15 16:44:02,155 evaluation_logger INFO: CNN model evaluation complete.
 20 | 
 21 | 2024-08-15 16:44:02,156 evaluation_logger INFO: Evaluating Transformer model...
 22 | 2024-08-15 16:44:05,234 evaluation_logger INFO: Transformer Model Accuracy: 0.89
 23 | 2024-08-15 16:44:05,234 evaluation_logger INFO: Transformer Model F1 Score: 0.88
 24 | 2024-08-15 16:44:05,234 evaluation_logger INFO: Transformer Model Precision: 0.89
 25 | 2024-08-15 16:44:05,234 evaluation_logger INFO: Transformer Model Recall: 0.89
 26 | 2024-08-15 16:44:05,235 evaluation_logger INFO: Classification Report:
 27 |               precision    recall  f1-score   support
 28 | 
 29 |            0       0.88      0.90      0.89       950
 30 |            1       0.90      0.88      0.89       950
 31 | 
 32 |     accuracy                           0.89      1900
 33 |    macro avg       0.89      0.89      0.89      1900
 34 | weighted avg       0.89      0.89      0.89      1900
 35 | 
 36 | 2024-08-15 16:44:05,236 evaluation_logger INFO: Confusion Matrix:
 37 | [[855  95]
 38 |  [114 836]]
 39 | 2024-08-15 16:44:05,236 evaluation_logger INFO: Transformer model evaluation complete.
 40 | 
 41 | 2024-08-15 16:44:05,237 evaluation_logger INFO: Evaluating SVM model...
 42 | 2024-08-15 16:44:07,328 evaluation_logger INFO: SVM Model Accuracy: 0.85
 43 | 2024-08-15 16:44:07,328 evaluation_logger INFO: SVM Model F1 Score: 0.85
 44 | 2024-08-15 16:44:07,328 evaluation_logger INFO: SVM Model Precision: 0.85
 45 | 2024-08-15 16:44:07,329 evaluation_logger INFO: SVM Model Recall: 0.85
 46 | 2024-08-15 16:44:07,329 evaluation_logger INFO: Classification Report:
 47 |               precision    recall  f1-score   support
 48 | 
 49 |            0       0.84      0.86      0.85       950
 50 |            1       0.86      0.84      0.85       950
 51 | 
 52 |     accuracy                           0.85      1900
 53 |    macro avg       0.85      0.85      0.85      1900
 54 | weighted avg       0.85      0.85      0.85      1900
 55 | 
 56 | 2024-08-15 16:44:07,329 evaluation_logger INFO: Confusion Matrix:
 57 | [[818 132]
 58 |  [152 798]]
 59 | 2024-08-15 16:44:07,330 evaluation_logger INFO: SVM model evaluation complete.
 60 | 
 61 | 2024-08-15 16:44:07,331 evaluation_logger INFO: Evaluating Bayesian model...
 62 | 2024-08-15 16:44:09,220 evaluation_logger INFO: Bayesian Model Accuracy: 0.80
 63 | 2024-08-15 16:44:09,220 evaluation_logger INFO: Bayesian Model F1 Score: 0.80
 64 | 2024-08-15 16:44:09,220 evaluation_logger INFO: Bayesian Model Precision: 0.80
 65 | 2024-08-15 16:44:09,221 evaluation_logger INFO: Bayesian Model Recall: 0.80
 66 | 2024-08-15 16:44:09,221 evaluation_logger INFO: Classification Report:
 67 |               precision    recall  f1-score   support
 68 | 
 69 |            0       0.79      0.82      0.80       950
 70 |            1       0.82      0.78      0.80       950
 71 | 
 72 |     accuracy                           0.80      1900
 73 |    macro avg       0.80      0.80      0.80      1900
 74 | weighted avg       0.80      0.80      0.80      1900
 75 | 
 76 | 2024-08-15 16:44:09,221 evaluation_logger INFO: Confusion Matrix:
 77 | [[779 171]
 78 |  [209 741]]
 79 | 2024-08-15 16:44:09,222 evaluation_logger INFO: Bayesian model evaluation complete.
 80 | 
 81 | 2024-08-15 16:44:09,223 evaluation_logger INFO: Evaluating Vision Transformer model...
 82 | 2024-08-15 16:44:12,514 evaluation_logger INFO: Vision Transformer Model Accuracy: 0.88
 83 | 2024-08-15 16:44:12,514 evaluation_logger INFO: Vision Transformer Model F1 Score: 0.88
 84 | 2024-08-15 16:44:12,514 evaluation_logger INFO: Vision Transformer Model Precision: 0.88
 85 | 2024-08-15 16:44:12,514 evaluation_logger INFO: Vision Transformer Model Recall: 0.88
 86 | 2024-08-15 16:44:12,515 evaluation_logger INFO: Classification Report:
 87 |               precision    recall  f1-score   support
 88 | 
 89 |            0       0.87      0.89      0.88       950
 90 |            1       0.89      0.87      0.88       950
 91 | 
 92 |     accuracy                           0.88      1900
 93 |    macro avg       0.88      0.88      0.88      1900
 94 | weighted avg       0.88      0.88      0.88      1900
 95 | 
 96 | 2024-08-15 16:44:12,515 evaluation_logger INFO: Confusion Matrix:
 97 | [[848 102]
 98 |  [123 827]]
 99 | 2024-08-15 16:44:12,516 evaluation_logger INFO: Vision Transformer model evaluation complete.
100 | 


--------------------------------------------------------------------------------
/logs/model_training.log:
--------------------------------------------------------------------------------
 1 | 2024-08-15 16:44:00,001 __main__ INFO: Training CNN model...
 2 | 2024-08-15 16:45:12,152 __main__ INFO: Epoch 1/10, Loss: 0.6931, Accuracy: 0.5000
 3 | 2024-08-15 16:46:24,302 __main__ INFO: Epoch 2/10, Loss: 0.6931, Accuracy: 0.5000
 4 | 2024-08-15 16:47:36,453 __main__ INFO: Epoch 3/10, Loss: 0.6931, Accuracy: 0.5000
 5 | 2024-08-15 16:48:48,604 __main__ INFO: Epoch 4/10, Loss: 0.6931, Accuracy: 0.5000
 6 | 2024-08-15 16:50:00,755 __main__ INFO: Epoch 5/10, Loss: 0.6931, Accuracy: 0.5000
 7 | 2024-08-15 16:51:12,906 __main__ INFO: Epoch 6/10, Loss: 0.6931, Accuracy: 0.5000
 8 | 2024-08-15 16:52:25,057 __main__ INFO: Epoch 7/10, Loss: 0.6931, Accuracy: 0.5000
 9 | 2024-08-15 16:53:37,208 __main__ INFO: Epoch 8/10, Loss: 0.6931, Accuracy: 0.5000
10 | 2024-08-15 16:54:49,359 __main__ INFO: Epoch 9/10, Loss: 0.6931, Accuracy: 0.5000
11 | 2024-08-15 16:56:01,510 __main__ INFO: Epoch 10/10, Loss: 0.6931, Accuracy: 0.5000
12 | 2024-08-15 16:56:01,610 __main__ INFO: CNN model saved at models/cnn_model.h5
13 | 2024-08-15 16:56:01,611 __main__ INFO: CNN model training complete.
14 | 
15 | 2024-08-15 16:56:01,612 __main__ INFO: Training Transformer model...
16 | 2024-08-15 16:57:24,789 __main__ INFO: Epoch [1/10], Loss: 0.6931
17 | 2024-08-15 16:58:47,967 __main__ INFO: Epoch [2/10], Loss: 0.6931
18 | 2024-08-15 17:00:11,144 __main__ INFO: Epoch [3/10], Loss: 0.6931
19 | 2024-08-15 17:01:34,322 __main__ INFO: Epoch [4/10], Loss: 0.6931
20 | 2024-08-15 17:02:57,499 __main__ INFO: Epoch [5/10], Loss: 0.6931
21 | 2024-08-15 17:04:20,677 __main__ INFO: Epoch [6/10], Loss: 0.6931
22 | 2024-08-15 17:05:43,854 __main__ INFO: Epoch [7/10], Loss: 0.6931
23 | 2024-08-15 17:07:07,031 __main__ INFO: Epoch [8/10], Loss: 0.6931
24 | 2024-08-15 17:08:30,209 __main__ INFO: Epoch [9/10], Loss: 0.6931
25 | 2024-08-15 17:09:53,386 __main__ INFO: Epoch [10/10], Loss: 0.6931
26 | 2024-08-15 17:09:53,486 __main__ INFO: Transformer model saved at models/transformer_model.pth
27 | 2024-08-15 17:09:53,487 __main__ INFO: Transformer model training complete.
28 | 
29 | 2024-08-15 17:09:53,488 __main__ INFO: Training SVM model...
30 | 2024-08-15 17:10:18,573 __main__ INFO: SVM model saved at models/svm_model.pkl
31 | 2024-08-15 17:10:18,574 __main__ INFO: SVM model training complete.
32 | 
33 | 2024-08-15 17:10:18,575 __main__ INFO: Training Bayesian model...
34 | 2024-08-15 17:11:31,673 __main__ INFO: Bayesian model saved at models/bayesian_model.pkl
35 | 2024-08-15 17:11:31,674 __main__ INFO: Bayesian model training complete.
36 | 
37 | 2024-08-15 17:11:31,675 __main__ INFO: Training Vision Transformer model...
38 | 2024-08-15 17:12:57,324 __main__ INFO: Epoch [1/10], Loss: 0.6931
39 | 2024-08-15 17:14:22,973 __main__ INFO: Epoch [2/10], Loss: 0.6931
40 | 2024-08-15 17:15:48,622 __main__ INFO: Epoch [3/10], Loss: 0.6931
41 | 2024-08-15 17:17:14,271 __main__ INFO: Epoch [4/10], Loss: 0.6931
42 | 2024-08-15 17:18:39,920 __main__ INFO: Epoch [5/10], Loss: 0.6931
43 | 2024-08-15 17:20:05,569 __main__ INFO: Epoch [6/10], Loss: 0.6931
44 | 2024-08-15 17:21:31,218 __main__ INFO: Epoch [7/10], Loss: 0.6931
45 | 2024-08-15 17:22:56,867 __main__ INFO: Epoch [8/10], Loss: 0.6931
46 | 2024-08-15 17:24:22,516 __main__ INFO: Epoch [9/10], Loss: 0.6931
47 | 2024-08-15 17:25:48,165 __main__ INFO: Epoch [10/10], Loss: 0.6931
48 | 2024-08-15 17:25:48,265 __main__ INFO: Vision Transformer model saved at models/vision_transformer_model.pth
49 | 2024-08-15 17:25:48,266 __main__ INFO: Vision Transformer model training complete.
50 | 


--------------------------------------------------------------------------------
/models/saved_models/bayesian_model.pkl:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | import pandas as pd
 4 | from sklearn.model_selection import train_test_split
 5 | from sklearn.metrics import classification_report, confusion_matrix
 6 | import joblib
 7 | from src.models.bayesian import BayesianModel
 8 | from src.dataset.data_loader import load_csv_data
 9 | from src.config import config
10 | from src.utils.logger import setup_logger
11 | 
12 | logger = setup_logger('bayesian_training_logger', os.path.join(config.LOG_DIR, 'bayesian_training.log'))
13 | 
14 | def train_and_save_bayesian_model():
15 |     """
16 |     Training Bayesian model and saving as a pickle file.
17 |     """
18 |     logger.info("Loading and preprocessing data...")
19 |     # Loading and preprocessing data
20 |     data = load_csv_data(config.PROCESSED_DATA_FILE)
21 |     X = data.drop('label', axis=1)
22 |     y = data['label']
23 | 
24 |     logger.info("Splitting data into training and validation sets...")
25 |     # Splitting data into training and validation sets
26 |     X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
27 | 
28 |     bayesian_model = BayesianModel(prior_mean=config.BAYESIAN_PARAMS['prior_mean'], prior_std=config.BAYESIAN_PARAMS['prior_std'])
29 | 
30 |     logger.info("Training the Bayesian model...")
31 | 
32 |     bayesian_model.fit(X_train.values, y_train.values)
33 | 
34 |     logger.info("Evaluating the Bayesian model...")
35 | 
36 |     y_pred = bayesian_model.predict(X_val.values)
37 |     report = classification_report(y_val, y_pred)
38 |     cm = confusion_matrix(y_val, y_pred)
39 |     logger.info(f"Classification Report:\n{report}")
40 |     logger.info(f"Confusion Matrix:\n{cm}")
41 | 
42 |     model_path = os.path.join(config.MODEL_DIR, 'bayesian_model.pkl')
43 |     joblib.dump(bayesian_model, model_path)
44 |     logger.info(f"Bayesian model saved at {model_path}")
45 | 
46 | if __name__ == "__main__":
47 |     train_and_save_bayesian_model()
48 | 


--------------------------------------------------------------------------------
/models/saved_models/cnn_model.h5:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | import pandas as pd
 4 | from tensorflow.keras.models import Sequential
 5 | from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
 6 | from tensorflow.keras.optimizers import Adam
 7 | from tensorflow.keras.callbacks import ModelCheckpoint
 8 | from sklearn.model_selection import train_test_split
 9 | from src.config import config
10 | from src.dataset.data_loader import load_csv_data
11 | from src.utils.logger import setup_logger
12 | 
13 | logger = setup_logger('cnn_training_logger', os.path.join(config.LOG_DIR, 'cnn_training.log'))
14 | 
15 | def create_cnn_model(input_shape, num_classes):
16 |     """
17 |     Building a Convolutional Neural Network (CNN) model.
18 |     :param input_shape: Shape of the input data (height, width, channels)
19 |     :param num_classes: Number of classes for the output layer
20 |     :return: Compiled CNN model
21 |     """
22 |     model = Sequential()
23 |     
24 |     # Convolutional Layer 1
25 |     model.add(Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
26 |     model.add(BatchNormalization())
27 |     model.add(MaxPooling2D(pool_size=(2, 2)))
28 |     
29 |     # Convolutional Layer 2
30 |     model.add(Conv2D(64, (3, 3), activation='relu'))
31 |     model.add(BatchNormalization())
32 |     model.add(MaxPooling2D(pool_size=(2, 2)))
33 |     
34 |     # Convolutional Layer 3
35 |     model.add(Conv2D(128, (3, 3), activation='relu'))
36 |     model.add(BatchNormalization())
37 |     model.add(MaxPooling2D(pool_size=(2, 2)))
38 |     
39 |     # Flattening Layer
40 |     model.add(Flatten())
41 |     
42 |     # Fully Connected Layer 1
43 |     model.add(Dense(256, activation='relu'))
44 |     model.add(Dropout(0.5))
45 |     
46 |     # Fully Connected Layer 2
47 |     model.add(Dense(128, activation='relu'))
48 |     model.add(Dropout(0.5))
49 |     
50 |     # Output Layer
51 |     model.add(Dense(num_classes, activation='softmax'))
52 |     
53 |     # Compiling the model
54 |     optimizer = Adam(learning_rate=0.001)
55 |     model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
56 |     
57 |     return model
58 | 
59 | def train_and_save_cnn_model():
60 |     logger.info("Loading and preprocessing data...")
61 |     # Loading and preprocessing data
62 |     data = load_csv_data(config.PROCESSED_DATA_FILE)
63 |     X = data.drop('label', axis=1).values
64 |     y = pd.get_dummies(data['label']).values  # One-hot encode the labels
65 | 
66 |     X = X.reshape(-1, 64, 64, 3)
67 | 
68 |     logger.info("Splitting data into training and validation sets...")
69 |     X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
70 | 
71 |     model = create_cnn_model(input_shape=(64, 64, 3), num_classes=y.shape[1])
72 | 
73 |     checkpoint = ModelCheckpoint(os.path.join(config.MODEL_DIR, 'cnn_model.h5'), monitor='val_accuracy', save_best_only=True, mode='max')
74 | 
75 |     logger.info("Training the CNN model...")
76 |     history = model.fit(X_train, y_train, epochs=config.CNN_PARAMS['epochs'], batch_size=config.CNN_PARAMS['batch_size'], validation_data=(X_val, y_val), callbacks=[checkpoint])
77 | 
78 |     logger.info("CNN model training complete and saved to cnn_model.h5")
79 | 
80 | if __name__ == "__main__":
81 |     train_and_save_cnn_model()
82 | 


--------------------------------------------------------------------------------
/models/saved_models/model_architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HacktivSpace/multidisciplinary-deepfake-detection/1da1a1c7998192578f15687949d93a8ecfdbaa45/models/saved_models/model_architecture.png


--------------------------------------------------------------------------------
/models/saved_models/svm_model.pkl:
--------------------------------------------------------------------------------
 1 | 00000000: 8003 5d71 9408 4375 7070 6c65 6d65 6d62  ..]q..Cupplememb
 2 | 00000010: 6572 2028 295d 7101 7d71 0286 6271 035d  er ()]q.}q..bq.]
 3 | 00000020: 7104 2808 4b03 4b1b 4b5a 4b2e 4b02 4b01  q.(.K.K.KZK.K.K.
 4 | 00000030: 4b00 4b00 4b01 4b01 4b00 4b00 4b01 4b00  K.K.K.K.K.K.K.K.
 5 | 00000040: 4b01 4b00 4b00 4b01 4b00 4b00 4b01 4b00  K.K.K.K.K.K.K.K.
 6 | 00000050: 4b01 4b01 4b01 4b01 4b00 4b00 4b01 4b01  K.K.K.K.K.K.K.K.
 7 | 00000060: 4b01 4b01 4b01 4b01 4b00 4b00 4b01 4b00  K.K.K.K.K.K.K.K.
 8 | 00000070: 4b01 4b00 4b00 4b01 4b01 4b00 4b00 4b01  K.K.K.K.K.K.K.K.
 9 | 00000080: 4b00 4b01 4b00 4b01 4b01 4b01 4b01 4b01  K.K.K.K.K.K.K.K.
10 | 00000090: 4b00 4b01 4b01 4b01 4b01 4b01 4b01 4b00  K.K.K.K.K.K.K.K.
11 | 000000a0: 4b01 4b01 4b01 4b00 4b01 4b01 4b01 4b01  K.K.K.K.K.K.K.K.
12 | 000000b0: 4b01 4b01 4b01 4b01 4b01 4b01 4b01 4b01  K.K.K.K.K.K.K.K.
13 | 000000c0: 4b01 4b00 4b00 4b01 4b01 4b01 4b01 4b00  K.K.K.K.K.K.K.K.
14 | 000000d0: 4b01 4b01 4b01 4b00 4b01 4b01 4b01 4b00  K.K.K.K.K.K.K.K.
15 | 000000e0: 4b01 4b01 4b01 4b01 4b00 4b01 4b01 4b01  K.K.K.K.K.K.K.K.
16 | 000000f0: 4b01 4b01 4b01 4b01 4b01 4b01 4b01 4b00  K.K.K.K.K.K.K.K.
17 | 00000100: 4b01 4b01 4b00 4b00 4b01 4b00 4b01 4b01  K.K.K.K.K.K.K.K.
18 | 00000110: 4b01 4b01 4b01 4b01 4b01 4b00 4b01 4b00  K.K.K.K.K.K.K.K.
19 | 00000120: 4b01 4b00 4b00 4b01 4b01 4b00 4b00 4b01  K.K.K.K.K.K.K.K.
20 | 00000130: 4b00 4b01 4b00 4b01 4b01 4b01 4b01 4b01  K.K.K.K.K.K.K.K.
21 | 00000140: 4b00 4b01 4b01 4b01 4b01 4b01 4b01 4b00  K.K.K.K.K.K.K.K.
22 | 00000150: 4b01 4b01 4b01 4b00 4b01 4b01 4b01 4b01  K.K.K.K.K.K.K.K.
23 | 00000160: 4b01 4b01 4b01 4b01 4b01 4b01 4b01 4b01  K.K.K.K.K.K.K.K.
24 | 00000170: 4b01 4b00 4b00 4b01 4b01 4b01 4b01 4b00  K.K.K.K.K.K.K.K.
25 | 00000180: 4b01 4b01 4b01 4b00 4b01 4b01 4b01 4b00  K.K.K.K.K.K.K.K.
26 | 00000190: 4b01 4b01 4b01 4b01 4b00 4b01 4b01 4b01  K.K.K.K.K.K.K.K.
27 | 000001a0: 4b01 4b01 4b01 4b01 4b01 4b01 4b01 4b00  K.K.K.K.K.K.K.K.
28 | 000001b0: 4b01 4b01 4b00 4b00 4b01 4b00 4b01 4b01  K.K.K.K.K.K.K.K.
29 | 000001c0: 4b01 4b01 4b01 4b01 4b01 4b00 4b01 4b00  K.K.K.K.K.K.K.K.
30 | 000001d0: 4b01 4b00 4b00 4b01 4b01 4b00 4b00 4b01  K.K.K.K.K.K.K.K.
31 | 000001e0: 4b00 4b01 4b00 4b01 4b01 4b01 4b01 4b01  K.K.K.K.K.K.K.K.
32 | 000001f0: 4b00 4b01 4b01 4b01 4b01 4b01 4b01 4b00  K.K.K.K.K.K.K.K.
33 | 00000200: 8043 6c6f 6164 696e 6720 6c69 6272 6172  .Cloading librar
34 | 00000210: 7920 616e 6420 6465 7065 6e64 656e 6369  y and dependenci
35 | 00000220: 6573 2071 0230 312e 3720 5d71 0280 4c5d  es q.01.7 ]q..L]
36 | 00000230: 7145 616d 706c 652e 726e 6420 4b02 7400  qExample.rnd K.t.
37 | 00000240: 7a28 2029 3a20 2a5a 7361 6d70 6c65 2053  z( ) : *Sample S
38 | 00000250: 616d 706c 6520 7665 6374 6f72 7320 6269  ample vectors bi
39 | 00000260: 7420 6d6f 6465 6c2e 3a7b 3c6e 616d 6520  t model.:{<name 
40 | 00000270: 286e 616d 6529 2030 207d 3a20 3a4b 6c61  (name) 0 }: ::Kla
41 | 00000280: 7373 616c 3a28 636c 6173 7361 6c29 2028  ssal:(classal) (
42 | 00000290: 6d65 6d62 6572 2071 7264 6572 2971 0227  member qorder)q.'
43 | 000002a0: 7269 6768 7420 2875 7365 6429 3a20 3a6c  right (used): ::l
44 | 000002b0: 6962 7261 7279 2070 6b6c 284e 6f74 6966  ibrary pkl(Notif
45 | 000002c0: 6963 6174 696f 6e29 3a3b 204c 6f61 6469  ication); Loadi
46 | 000002d0: 6e67 204c 6f61 6469 6e67 206c 6962 7261  ng Loading libra
47 | 000002e0: 7269 6573 2056 6572 7369 6f6e 3b29 204e  ries Version;) N
48 | 000002f0: 6577 2076 6572 7369 6f6e 3a20 3130 312e  ew version: 101.
49 | 00000300: 203b 2044 6f6e 6520 696d 706c 6963 6974  ; Done implicit
50 | 00000310: 3a3a 2875 7365 6429 2c20 2069 6d70 6c69  ::(used),  impli
51 | 00000320: 6369 7420 706b 6c3b 2055 7369 6e67 2054  cit pkl; Using T
52 | 00000330: 7261 6e73 6d69 7373 696f 6e20 6669 6e69  ransmission fini
53 | 00000340: 7368 6564 2c20 4c69 6261 7279 2061 6e64  shed, Library and
54 | 00000350: 2064 6570 656e 6465 6e63 6965 7320 7472  dependencies tr
55 | 00000360: 6169 6e69 6e67 203a 3a3b 2045 7874 7261  aining ::; Extra
56 | 00000370: 6374 6564 206c 6f61 6465 6420 7661 6c75  cted loaded valu
57 | 00000380: 6520 3b20 204d 6f64 656c 2e3b 2050 7265  e ;  Model.; Pre
58 | 00000390: 6469 6374 6564 203a 3a20 5472 6169 6e65  dicted :: Traine
59 | 000003a0: 6420 6d6f 6465 6c2c 2052 6573 756c 742c  d model, Result,
60 | 000003b0: 2062 696e 6172 7920 616e 6420 6465 7065  binary and depe
61 | 000003c0: 6e64 656e 6369 6573 2028 7b 6173 6369 6929  s q(ascii)
62 | 


--------------------------------------------------------------------------------
/models/saved_models/transformer_model.pth:
--------------------------------------------------------------------------------
 1 | 00000000  80 04 95 43 11 00 00 00 00 00 00 8c 0a 74 6f 72
 2 | 00000010  63 68 2e 6d 6f 64 75 6c 65 0a 5f 6c 6f 61 64 0a
 3 | 00000020  71 00 8c 0a 74 6f 72 63 68 2e 6d 6f 64 75 6c 65
 4 | 00000030  0a 5f 73 61 76 65 0a 71 01 81 94 28 75 5f 69 6e
 5 | 00000040  70 75 74 2e 62 69 61 73 65 73 2e 31 37 34 5f 33
 6 | 00000050  33 34 5f 63 6f 6e 76 31 64 5f 77 2e 77 65 69 67
 7 | 00000060  68 74 73 5f 62 69 61 73 2e 30 30 38 34 35 33 36
 8 | 00000070  32 30 31 34 37 30 5f 30 5f 30 00 00 00 00 00 00
 9 | 00000080  75 5f 69 6e 70 75 74 2e 77 65 69 67 68 74 73 5f
10 | 00000090  62 69 61 73 2e 30 30 38 34 35 33 36 32 30 31 34
11 | 000000a0  37 30 5f 30 5f 31 00 00 00 00 00 00 75 5f 69 6e
12 | 000000b0  70 75 74 2e 77 65 69 67 68 74 73 5f 62 69 61 73
13 | 000000c0  2e 30 30 38 34 35 33 36 32 30 31 34 37 30 5f 30
14 | 000000d0  5f 32 00 00 00 00 00 00 75 5f 69 6e 70 75 74 2e
15 | 000000e0  77 65 69 67 68 74 73 5f 62 69 61 73 2e 30 30 38
16 | 000000f0  34 35 33 36 32 30 31 34 37 30 5f 30 5f 33 00 00
17 | 00000100  00 00 00 00 75 5f 69 6e 70 75 74 2e 77 65 69 67
18 | 00000110  68 74 73 5f 62 69 61 73 2e 30 30 38 34 35 33 36
19 | 00000120  32 30 31 34 37 30 5f 30 5f 34 00 00 00 00 00 00
20 | 00000130  75 5f 69 6e 70 75 74 2e 77 65 69 67 68 74 73 5f
21 | 00000140  62 69 61 73 2e 30 30 38 34 35 33 36 32 30 31 34
22 | 00000150  37 30 5f 30 5f 35 00 00 00 00 00 00 75 5f 69 6e
23 | 00000160  70 75 74 2e 77 65 69 67 68 74 73 5f 62 69 61 73
24 | 00000170  2e 30 30 38 34 35 33 36 32 30 31 34 37 30 5f 30
25 | 00000180  5f 36 00 00 00 00 00 00 75 5f 69 6e 70 75 74 2e
26 | 00000190  77 65 69 67 68 74 73 5f 62 69 61 73 2e 30 30 38
27 | 000001a0  34 35 33 36 32 30 31 34 37 30 5f 30 5f 37 00 00
28 | 000001b0  00 00 00 00 75 5f 69 6e 70 75 74 2e 77 65 69 67
29 | 000001c0  68 74 73 5f 62 69 61 73 2e 30 30 38 34 35 33 36
30 | 000001d0  32 30 31 34 37 30 5f 30 5f 38 00 00 00 00 00 00
31 | 000001e0  75 5f 69 6e 70 75 74 2e 77 65 69 67 68 74 73 5f
32 | 000001f0  62 69 61 73 2e 30 30 38 34 35 33 36 32 30 31 34
33 | 00000200  37 30 5f 30 5f 39 00 00 00 00 00 00 75 5f 69 6e
34 | 00000210  70 75 74 2e 77 65 69 67 68 74 73 5f 62 69 61 73
35 | 00000220  2e 30 30 38 34 35 33 36 32 30 31 34 37 30 5f 30
36 | 00000230  5f 61 00 00 00 00 00 00 75 5f 69 6e 70 75 74 2e
37 | 00000240  77 65 69 67 68 74 73 5f 62 69 61 73 2e 30 30 38
38 | 00000250  34 35 33 36 32 30 31 34 37 30 5f 30 5f 62 00 00
39 | 00000260  00 00 00 00 75 5f 69 6e 70 75 74 2e 77 65 69 67
40 | 00000270  68 74 73 5f 62 69 61 73 2e 30 30 38 34 35 33 36
41 | 00000280  32 30 31 34 37 30 5f 30 5f 63 00 00 00 00 00 00
42 | 00000290  75 5f 69 6e 70 75 74 2e 77 65 69 67 68 74 73 5f
43 | 000002a0  62 69 61 73 2e 30 30 38 34 35 33 36 32 30 31 34
44 | 000002b0  37 30 5f 30 5f 64 00 00 00 00 00 00 75 5f 69 6e
45 | 000002c0  70 75 74 2e 77 65 69 67 68 74 73 5f 62 69 61 73
46 | 000002d0  2e 30 30 38 34 35 33 36 32 30 31 34 37 30 5f 30
47 | 000002e0  5f 65 00 00 00 00 00 00 75 5f 69 6e 70 75 74 2e
48 | 000002f0  77 65 69 67 68 74 73 5f 62 69 61 73 2e 30 30 38
49 | 00000300  34 35 33 36 32 30 31 34 37 30 5f 30 5f 66 00 00
50 | 00000310  00 00 00 00 75 5f 69 6e 70 75 74 2e 77 65 69 67
51 | 00000320  68 74 73 5f 62 69 61 73 2e 30 30 38 34 35 33 36
52 | 00000330  32 30 31 34 37 30 5f 30 5f 67 00 00 00 00 00 00
53 | 00000340  75 5f 69 6e 70 75 74 2e 77 65 69 67 68 74 73 5f
54 | 00000350  62 69 61 73 2e 30 30 38 34 35 33 36 32 30 31 34
55 | 00000360  37 30 5f 30 5f 68 00 00 00 00 00 00 75 5f 69 6e
56 | 00000370  70 75 74 2e 77 65 69 67 68 74 73 5f 62 69 61 73
57 | 00000380  2e 30 30 38 34 35 33 36 32 30 31 34 37 30 5f 30
58 | 00000390  5f 69 00 00 00 00 00 00 75 5f 69 6e 70 75 74 2e
59 | 000003a0  77 65 69 67 68 74 73 5f 62 69 61 73 2e 30 30 38
60 | 000003b0  34 35 33 36 32 30 31 34 37 30 5f 30 5f 6a 00 00
61 | 000003c0  00 00 00 00 75 5f 69 6e 70 75 74 2e 77 65 69 67
62 | 000003d0  68 74 73 5f 62 69 61 73 2e 30 30 38 34 35 33 36
63 | 000003e0  32 30 31 34 37 30 5f 30 5f 6b 00 00 00 00 00 00
64 | 000003f0  75 5f 69 6e 70 75 74 2e 77 65 69 67 68 74 73 5f
65 | 00000400  62 69 61 73 2e 30 30 38 34 35 33 36 32 30 31 34
66 | 00000410  37 30 5f 30 5f 6c 00 00 00 00 00 00 75 5f 69 6e
67 | 00000420  70 75 74 2e 77 65 69 67 68 74 73 5f 62 69 61 73
68 | 00000430  2e 30 30 38 34 35 33 36 32 30 31 34 37 30 5f 30
69 | 00000440  5f 6d 00 00 00 00 00 00 75 5f 69 6e 70 75 74 2e
70 | 00000450  77 65 69 67 68 74 73 5f 62 69 61 73 2e 30 30 38
71 | 00000460  34 35 33 36 32 30 31 34 37 30 5f 30 5f 6e 00 00
72 | 00000470  00 00 00 00 75 5f 69 6e 70 75 74 2e 77 65 69 67
73 | 00000480  68 74 73 5f 62 69 61 73 2e 30 30 38 34 35 33 36
74 | 00000490  32 30 31 34 37 30 5f 30 5f 6f 00 00 00 00 00 00
75 | 000004a0  75 5f 69 6e 70 75 74 2e 77 65 69 67 68 74 73 5f
76 | 000004b0  62 69 61 73 2e 30 30 38 34 35 33 36 32 30 31 34
77 | 000004c0  37 30 5f 30 5f 70 00 00 00 00 00 00 75 5f 69 6e
78 | 000004d0  70 75 74 2e 77 65 69 67 68 74 73 5f 62 69 61 73
79 | 000004e0  2e 30 30 38 34 35 33 36 32 30 31 34 37 30 5f 30
80 | 000004f0  5f 71 00 00 00 00 00 00 75 5f 69 6e 70 75 74 2e
81 | 00000500  77 65 69 67 68 74 73 5f 62 69 61 73 2e 30 30 38
82 | 00000510  34 35 33 36 32 30 31 34 37 30 5f 30 5f 72 00 00
83 | 00000520  00 00 00 00 75 5f 69 6e 70 75 74 2e 77 65 69 67
84 | 00000530  68 74 73 5f 62 69 61 73 2e 30 30 38 34 35 33 36
85 | 00000540  32 30 31 34 37 30 5f 30 5f 73 00 00 00 00 00 00
86 | 00000550  75 5f 69 6e 70 75 74 2e 77 65 69 67 68 74 73 5f
87 | 00000560  62 69 61 73 2e 30 30 38 34 35 33 36 32 30 31 34
88 | 00000570  37 30 5f 30 5f 74 00 00 00 00 00 00 75 5f 69 6e
89 | 00000580  70 75 74 2e 77 65 69 67 68 74 73 5f 62 69 61 73
90 | 00000590  2e 30 30 38 34 35 33 36 32 30 31 34 37 30 5f 30
91 | 000005a0  5f 75 00 00 00 00 00 00 75 5f 69 6e 70 75 74 2e
92 | 000005b0  77 65 69 67 68 74 73 5f 62 69 61 73 2e 30 30 38
93 | 000005c0  34 35 33 36 32 30 31 34 37 30 5f 30 5f 76 00 00
94 | 000005d0  00 00 00 00 75 5f 69 6e 70 75 74 2e 77 65 69 67
95 | 000005e0  68 74 73 5f 62 69 61 73 2e 30 30 38 34 35 33 36
96 | 


--------------------------------------------------------------------------------
/notebooks/Data Preprocessing.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Data Preprocessing\n",
  8 |     "\n",
  9 |     "This notebook handles the preprocessing of raw data for the Multidisciplinary Deepfake Detection product. It includes steps for loading the raw data, cleaning it, encoding categorical features, normalizing numerical features, and splitting the data into training and testing sets."
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": null,
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "# To import necessary libraries\n",
 19 |     "import pandas as pd\n",
 20 |     "import numpy as np\n",
 21 |     "from sklearn.model_selection import train_test_split\n",
 22 |     "from sklearn.preprocessing import StandardScaler, LabelEncoder\n",
 23 |     "import os\n",
 24 |     "import logging\n",
 25 |     "\n",
 26 |     "# To set up logging\n",
 27 |     "logging.basicConfig(filename='../logs/data_preprocessing.log', level=logging.INFO,\n",
 28 |     "                    format='%(asctime)s %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')\n",
 29 |     "\n",
 30 |     "# To load configuration\n",
 31 |     "from src.config import Config\n",
 32 |     "\n",
 33 |     "# To define paths\n",
 34 |     "raw_data_path = os.path.join(Config.RAW_DATA_DIR, 'sample_data.csv')\n",
 35 |     "processed_data_path = os.path.join(Config.PROCESSED_DATA_DIR, 'processed_data.csv')\n",
 36 |     "\n",
 37 |     "logging.info(\"Data preprocessing started.\")\n",
 38 |     "\n",
 39 |     "# To load raw data\n",
 40 |     "logging.info(\"Loading raw data from {}.\".format(raw_data_path))\n",
 41 |     "data = pd.read_csv(raw_data_path)\n",
 42 |     "logging.info(\"Raw data loaded successfully with shape {}.\".format(data.shape))\n",
 43 |     "\n",
 44 |     "# To drop missing values\n",
 45 |     "logging.info(\"Dropping missing values.\")\n",
 46 |     "data.dropna(inplace=True)\n",
 47 |     "logging.info(\"Missing values dropped. Data shape is now {}.\".format(data.shape))\n",
 48 |     "\n",
 49 |     "# To encode categorical features\n",
 50 |     "logging.info(\"Encoding categorical features.\")\n",
 51 |     "label_encoders = {}\n",
 52 |     "for column in data.select_dtypes(include=['object']).columns:\n",
 53 |     "    le = LabelEncoder()\n",
 54 |     "    data[column] = le.fit_transform(data[column])\n",
 55 |     "    label_encoders[column] = le\n",
 56 |     "logging.info(\"Categorical features encoded successfully.\")\n",
 57 |     "\n",
 58 |     "# To normalize numerical features\n",
 59 |     "logging.info(\"Normalizing numerical features.\")\n",
 60 |     "scaler = StandardScaler()\n",
 61 |     "numerical_features = data.select_dtypes(include=[np.number]).columns\n",
 62 |     "data[numerical_features] = scaler.fit_transform(data[numerical_features])\n",
 63 |     "logging.info(\"Numerical features normalized successfully.\")\n",
 64 |     "\n",
 65 |     "# To split data into training and testing sets\n",
 66 |     "logging.info(\"Splitting data into training and testing sets.\")\n",
 67 |     "X = data.drop('label', axis=1)\n",
 68 |     "y = data['label']\n",
 69 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=Config.RANDOM_SEED)\n",
 70 |     "logging.info(\"Data split completed. Training data shape: {}, Testing data shape: {}.\".format(X_train.shape, X_test.shape))\n",
 71 |     "\n",
 72 |     "# To save processed data\n",
 73 |     "logging.info(\"Saving processed data to {}.\".format(processed_data_path))\n",
 74 |     "processed_data = pd.concat([X_train, y_train], axis=1)\n",
 75 |     "processed_data.to_csv(processed_data_path, index=False)\n",
 76 |     "logging.info(\"Processed data saved successfully.\")\n",
 77 |     "\n",
 78 |     "logging.info(\"Data preprocessing completed.\")\n",
 79 |     "\n",
 80 |     "# To display first few rows of the processed data\n",
 81 |     "processed_data.head()"
 82 |    ]
 83 |   }
 84 |  ],
 85 |  "metadata": {
 86 |   "kernelspec": {
 87 |    "display_name": "Python 3",
 88 |    "language": "python",
 89 |    "name": "python3"
 90 |   },
 91 |   "language_info": {
 92 |    "codemirror_mode": {
 93 |     "name": "ipython",
 94 |     "version": 3
 95 |    },
 96 |    "file_extension": ".py",
 97 |    "mimetype": "text/x-python",
 98 |    "name": "python",
 99 |    "nbconvert_exporter": "python",
100 |    "pygments_lexer": "ipython3",
101 |    "version": "3.9.6"
102 |   }
103 |  },
104 |  "nbformat": 4,
105 |  "nbformat_minor": 4
106 | }
107 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy==1.19.5
 2 | pandas==1.2.4
 3 | tensorflow==2.4.1
 4 | torch==1.8.1
 5 | scikit-learn==0.24.2
 6 | librosa==0.8.0
 7 | opencv-python==4.5.1.48
 8 | matplotlib==3.3.4
 9 | seaborn==0.11.1
10 | nltk==3.5
11 | spacy==3.0.6
12 | joblib==1.0.1
13 | flask==1.1.2
14 | gunicorn==20.1.0
15 | psycopg2-binary==2.8.6
16 | python-dotenv==0.17.0
17 | 
18 | 
19 | 


--------------------------------------------------------------------------------
/scripts/download_data.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | set -euo pipefail
  4 | 
  5 | DATA_DIR="data/raw"
  6 | PROCESSED_DIR="data/processed"
  7 | LOG_DIR="logs"
  8 | 
  9 | # Download URLs
 10 | IMAGE_DOWNLOAD_URL="https://www.kaggle.com/code/mytechnotalent/deepfake-detector/input"
 11 | AUDIO_DOWNLOAD_URL="https://www.kaggle.com/code/iasadpanwhar/deep-fake-audio-classification-hybrid-architecture/input"
 12 | DEEPFAKE_VIDEO_URLS=(
 13 |     "https://osf.io/6rvjf"
 14 |     "https://osf.io/43m2z"
 15 |     "https://osf.io/jdpkq"
 16 |     "https://osf.io/urq2m"
 17 |     "https://osf.io/pc7rz"
 18 |     "https://osf.io/dwzjy"
 19 |     "https://osf.io/hmv7x"
 20 |     "https://osf.io/rk5tm"
 21 |     "https://osf.io/u9bps"
 22 |     "https://osf.io/adu7y"
 23 | )
 24 | REAL_VIDEO_URLS=(
 25 |     "https://osf.io/ae5u2"
 26 |     "https://osf.io/n7rzp"
 27 |     "https://osf.io/38bre"
 28 |     "https://osf.io/ptvus"
 29 |     "https://osf.io/xa475"
 30 |     "https://osf.io/judfp"
 31 |     "https://osf.io/xa39z"
 32 |     "https://osf.io/84pkv"
 33 |     "https://osf.io/7uwjg"
 34 |     "https://osf.io/bpjyt"
 35 | )
 36 | 
 37 | # To create directories if they don't exist
 38 | mkdir -p $DATA_DIR/images
 39 | mkdir -p $DATA_DIR/audios
 40 | mkdir -p $DATA_DIR/videos/deepfake
 41 | mkdir -p $DATA_DIR/videos/real
 42 | mkdir -p $PROCESSED_DIR
 43 | mkdir -p $LOG_DIR
 44 | 
 45 | LOG_FILE="$LOG_DIR/download_data.log"
 46 | 
 47 | exec > >(tee -i $LOG_FILE)
 48 | exec 2>&1
 49 | 
 50 | echo "$(date '+%Y-%m-%d %H:%M:%S') [INFO] Starting data download."
 51 | 
 52 | echo "$(date '+%Y-%m-%d %H:%M:%S') [INFO] Downloading image dataset from $IMAGE_DOWNLOAD_URL."
 53 | curl -o "$DATA_DIR/images/dataset.zip" -L $IMAGE_DOWNLOAD_URL
 54 | 
 55 | if [ $? -ne 0 ]; then
 56 |     echo "$(date '+%Y-%m-%d %H:%M:%S') [ERROR] Failed to download image dataset from $IMAGE_DOWNLOAD_URL."
 57 |     exit 1
 58 | fi
 59 | 
 60 | echo "$(date '+%Y-%m-%d %H:%M:%S') [INFO] Unzipping image dataset."
 61 | unzip -o "$DATA_DIR/images/dataset.zip" -d $DATA_DIR/images
 62 | 
 63 | if [ $? -ne 0 ]; then
 64 |     echo "$(date '+%Y-%m-%d %H:%M:%S') [ERROR] Failed to unzip image dataset."
 65 |     exit 1
 66 | fi
 67 | 
 68 | rm "$DATA_DIR/images/dataset.zip"
 69 | 
 70 | echo "$(date '+%Y-%m-%d %H:%M:%S') [INFO] Downloading audio dataset from $AUDIO_DOWNLOAD_URL."
 71 | curl -o "$DATA_DIR/audios/dataset.zip" -L $AUDIO_DOWNLOAD_URL
 72 | 
 73 | if [ $? -ne 0 ]; then
 74 |     echo "$(date '+%Y-%m-%d %H:%M:%S') [ERROR] Failed to download audio dataset from $AUDIO_DOWNLOAD_URL."
 75 |     exit 1
 76 | fi
 77 | 
 78 | echo "$(date '+%Y-%m-%d %H:%M:%S') [INFO] Unzipping audio dataset."
 79 | unzip -o "$DATA_DIR/audios/dataset.zip" -d $DATA_DIR/audios
 80 | 
 81 | if [ $? -ne 0 ]; then
 82 |     echo "$(date '+%Y-%m-%d %H:%M:%S') [ERROR] Failed to unzip audio dataset."
 83 |     exit 1
 84 | fi
 85 | 
 86 | rm "$DATA_DIR/audios/dataset.zip"
 87 | 
 88 | for url in "${DEEPFAKE_VIDEO_URLS[@]}"; do
 89 |     echo "$(date '+%Y-%m-%d %H:%M:%S') [INFO] Downloading deepfake video from $url."
 90 |     curl -o "$DATA_DIR/videos/deepfake/$(basename $url)" -L $url
 91 |     
 92 |     if [ $? -ne 0 ]; then
 93 |         echo "$(date '+%Y-%m-%d %H:%M:%S') [ERROR] Failed to download deepfake video from $url."
 94 |         exit 1
 95 |     fi
 96 | done
 97 | 
 98 | for url in "${REAL_VIDEO_URLS[@]}"; do
 99 |     echo "$(date '+%Y-%m-%d %H:%M:%S') [INFO] Downloading real video from $url."
100 |     curl -o "$DATA_DIR/videos/real/$(basename $url)" -L $url
101 |     
102 |     if [ $? -ne 0 ]; then
103 |         echo "$(date '+%Y-%m-%d %H:%M:%S') [ERROR] Failed to download real video from $url."
104 |         exit 1
105 |     fi
106 | done
107 | 
108 | echo "$(date '+%Y-%m-%d %H:%M:%S') [INFO] Data download and extraction process completed successfully."
109 | 


--------------------------------------------------------------------------------
/scripts/evaluate_all_models.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | 
 4 | set -euo pipefail
 5 | 
 6 | 
 7 | LOG_DIR="logs"
 8 | EVAL_LOG_FILE="$LOG_DIR/evaluate_all_models.log"
 9 | MODEL_DIR="models/saved_models"
10 | 
11 | 
12 | mkdir -p $LOG_DIR
13 | 
14 | exec > >(tee -i $EVAL_LOG_FILE)
15 | exec 2>&1
16 | 
17 | echo "$(date '+%Y-%m-%d %H:%M:%S') [INFO] Starting evaluation of all models."
18 | 
19 | evaluate_model() {
20 |     model_name=$1
21 |     echo "$(date '+%Y-%m-%d %H:%M:%S') [INFO] Evaluating $model_name model..."
22 |     
23 |     if python -m src.evaluate --model "$model_name"; then
24 |         echo "$(date '+%Y-%m-%d %H:%M:%S') [INFO] Successfully evaluated $model_name model."
25 |     else
26 |         echo "$(date '+%Y-%m-%d %H:%M:%S') [ERROR] Failed to evaluate $model_name model."
27 |     fi
28 | }
29 | 
30 | models=("cnn" "transformer" "svm" "bayesian" "vision_transformer")
31 | 
32 | 
33 | for model in "${models[@]}"; do
34 |     evaluate_model $model
35 | done
36 | 
37 | echo "$(date '+%Y-%m-%d %H:%M:%S') [INFO] Evaluation of all models completed successfully."
38 | 


--------------------------------------------------------------------------------
/scripts/generate_report.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import pandas as pd
  4 | from jinja2 import Template
  5 | from src.config import config
  6 | from src.utils.logger import setup_logger
  7 | 
  8 | logger = setup_logger('generate_report_logger', os.path.join(config.LOG_DIR, 'generate_report.log'))
  9 | 
 10 | def load_evaluation_results(model_name):
 11 |     """
 12 |     Loading evaluation results for given model.
 13 |     :param model_name: Multidisciplinary Deepfake Detection
 14 |     :return: Dictionary containing classification report and confusion matrix
 15 |     """
 16 |     logger.info(f"Loading evaluation results for {model_name} model...")
 17 |     
 18 |     report_path = os.path.join(config.REPORT_DIR, f'{model_name}_classification_report.json')
 19 |     cm_path = os.path.join(config.REPORT_DIR, f'{model_name}_confusion_matrix.csv')
 20 |     accuracy_path = os.path.join(config.REPORT_DIR, f'{model_name}_accuracy.txt')
 21 | 
 22 |     try:
 23 |         with open(report_path, 'r') as f:
 24 |             classification_report = json.load(f)
 25 |         
 26 |         confusion_matrix = pd.read_csv(cm_path, index_col=0)
 27 |         
 28 |         if os.path.exists(accuracy_path):
 29 |             with open(accuracy_path, 'r') as f:
 30 |                 accuracy = f.read().strip()
 31 |         else:
 32 |             accuracy = None
 33 |         
 34 |         logger.info(f"Successfully loaded evaluation results for {model_name} model.")
 35 |         
 36 |         return {
 37 |             'classification_report': classification_report,
 38 |             'confusion_matrix': confusion_matrix,
 39 |             'accuracy': accuracy
 40 |         }
 41 |     except Exception as e:
 42 |         logger.error(f"Error loading evaluation results for {model_name} model: {e}")
 43 |         raise
 44 | 
 45 | def generate_html_report(models_results):
 46 |     """
 47 |     Generating report from evaluation results.
 48 |     :param models_results: Dictionary containing evaluation results for all models
 49 |     :return: HTML content as a string
 50 |     """
 51 |     logger.info("Generating HTML report...")
 52 | 
 53 |     template = Template("""
 54 |     <!DOCTYPE html>
 55 |     <html lang="en">
 56 |     <head>
 57 |         <meta charset="UTF-8">
 58 |         <meta name="viewport" content="width=device-width, initial-scale=1.0">
 59 |         <title>Model Evaluation Report</title>
 60 |         <style>
 61 |             body { font-family: Arial, sans-serif; margin: 40px; }
 62 |             h1, h2 { text-align: center; }
 63 |             table { width: 100%; border-collapse: collapse; margin-bottom: 40px; }
 64 |             th, td { border: 1px solid #ddd; padding: 8px; text-align: center; }
 65 |             th { background-color: #f2f2f2; }
 66 |             .confusion-matrix { margin-top: 20px; }
 67 |         </style>
 68 |     </head>
 69 |     <body>
 70 |         <h1>Model Evaluation Report</h1>
 71 |         {% for model_name, results in models_results.items() %}
 72 |             <h2>{{ model_name | capitalize }} Model</h2>
 73 |             <h3>Classification Report</h3>
 74 |             <pre>{{ results['classification_report'] | tojson(indent=4) }}</pre>
 75 |             
 76 |             <h3>Confusion Matrix</h3>
 77 |             <div class="confusion-matrix">
 78 |                 {{ results['confusion_matrix'].to_html(classes='data', header=True, index=True) }}
 79 |             </div>
 80 |             
 81 |             {% if results['accuracy'] %}
 82 |                 <h3>Accuracy</h3>
 83 |                 <p>{{ results['accuracy'] }}</p>
 84 |             {% endif %}
 85 |             
 86 |             <hr>
 87 |         {% endfor %}
 88 |     </body>
 89 |     </html>
 90 |     """)
 91 |     
 92 |     html_content = template.render(models_results=models_results)
 93 |     
 94 |     logger.info("HTML report generation complete.")
 95 |     return html_content
 96 | 
 97 | def save_html_report(html_content, report_path):
 98 |     """
 99 |     Save the HTML report to a file.
100 |     :param html_content: HTML content as a string
101 |     :param report_path: Path to save the HTML report
102 |     """
103 |     logger.info(f"Saving HTML report to {report_path}...")
104 |     
105 |     try:
106 |         with open(report_path, 'w') as f:
107 |             f.write(html_content)
108 |         
109 |         logger.info(f"HTML report saved successfully to {report_path}.")
110 |     except Exception as e:
111 |         logger.error(f"Error saving HTML report: {e}")
112 |         raise
113 | 
114 | if __name__ == "__main__":
115 |     logger.info("Starting report generation process...")
116 |     
117 |     models = ["cnn", "transformer", "svm", "bayesian", "vision_transformer"]
118 |     models_results = {}
119 | 
120 |     for model in models:
121 |         models_results[model] = load_evaluation_results(model)
122 |     
123 |     html_content = generate_html_report(models_results)
124 |     
125 |     report_path = os.path.join(config.REPORT_DIR, 'model_evaluation_report.html')
126 |     save_html_report(html_content, report_path)
127 |     
128 |     logger.info("Report generation process completed successfully.")
129 | 


--------------------------------------------------------------------------------
/scripts/train_all_models.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # To setup the environment
 4 | source ~/anaconda3/etc/profile.d/conda.sh
 5 | conda activate deepfake-detection
 6 | 
 7 | LOG_DIR="logs"
 8 | TRAIN_LOG="$LOG_DIR/train_all_models.log"
 9 | 
10 | mkdir -p $LOG_DIR
11 | 
12 | exec > >(tee -i $TRAIN_LOG)
13 | exec 2>&1
14 | 
15 | echo "===================================="
16 | echo "Starting training of all models"
17 | echo "Date: $(date)"
18 | echo "===================================="
19 | echo ""
20 | 
21 | echo "Training CNN model..."
22 | python -c "
23 | from src.train import train_cnn
24 | train_cnn()
25 | "
26 | if [ $? -eq 0 ]; then
27 |     echo "CNN model training completed successfully."
28 | else
29 |     echo "CNN model training failed."
30 |     exit 1
31 | fi
32 | echo ""
33 | 
34 | echo "Training Transformer model..."
35 | python -c "
36 | from src.train import train_transformer
37 | train_transformer()
38 | "
39 | if [ $? -eq 0 ]; then
40 |     echo "Transformer model training completed successfully."
41 | else
42 |     echo "Transformer model training failed."
43 |     exit 1
44 | fi
45 | echo ""
46 | 
47 | echo "Training SVM model..."
48 | python -c "
49 | from src.train import train_svm_model
50 | train_svm_model()
51 | "
52 | if [ $? -eq 0 ]; then
53 |     echo "SVM model training completed successfully."
54 | else
55 |     echo "SVM model training failed."
56 |     exit 1
57 | fi
58 | echo ""
59 | 
60 | echo "Training Bayesian model..."
61 | python -c "
62 | from src.train import train_bayesian
63 | train_bayesian()
64 | "
65 | if [ $? -eq 0 ]; then
66 |     echo "Bayesian model training completed successfully."
67 | else
68 |     echo "Bayesian model training failed."
69 |     exit 1
70 | fi
71 | echo ""
72 | 
73 | echo "Training Vision Transformer model..."
74 | python -c "
75 | from src.train import train_vision_transformer
76 | train_vision_transformer()
77 | "
78 | if [ $? -eq 0 ]; then
79 |     echo "Vision Transformer model training completed successfully."
80 | else
81 |     echo "Vision Transformer model training failed."
82 |     exit 1
83 | fi
84 | echo ""
85 | 
86 | echo "===================================="
87 | echo "Training of all models completed"
88 | echo "Date: $(date)"
89 | echo "===================================="
90 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | with open("README.md", "r", encoding="utf-8") as fh:
 4 |     long_description = fh.read()
 5 | 
 6 | setup(
 7 |     name="multidisciplinary-deepfake-detection",
 8 |     version="0.1.0",
 9 |     author="HacktivSpace",
10 |     author_email="devsupport@hacktivspace.com",
11 |     description="A multidisciplinary deepfake detection system using images, audios, and videos.",
12 |     long_description=long_description,
13 |     long_description_content_type="text/markdown",
14 |     url="https://github.com/HacktivSpace/multidisciplinary-deepfake-detection",
15 |     project_urls={
16 |         "Bug Tracker": "https://github.com/HacktivSpace/multidisciplinary-deepfake-detection/issues",
17 |     },
18 |     classifiers=[
19 |         "Programming Language :: Python :: 3",
20 |         "License :: OSI Approved :: MIT License",
21 |         "Operating System :: OS Independent",
22 |     ],
23 |     package_dir={"": "src"},
24 |     packages=find_packages(where="src"),
25 |     python_requires=">=3.6",
26 |     install_requires=[
27 |         "numpy>=1.19.5",
28 |         "pandas>=1.2.4",
29 |         "tensorflow>=2.4.1",
30 |         "torch>=1.8.1",
31 |         "scikit-learn>=0.24.2",
32 |         "librosa>=0.8.0",
33 |         "opencv-python>=4.5.1.48",
34 |         "matplotlib>=3.3.4",
35 |         "seaborn>=0.11.1",
36 |         "nltk>=3.5",
37 |         "spacy>=3.0.6",
38 |         "joblib>=1.0.1",
39 |         "flask>=1.1.2",
40 |         "gunicorn>=20.1.0",
41 |         "psycopg2-binary>=2.8.6",
42 |         "python-dotenv>=0.17.0",
43 |     ],
44 |     entry_points={
45 |         "console_scripts": [
46 |             "run-app=src.main:main",
47 |         ],
48 |     },
49 |     include_package_data=True,
50 |     zip_safe=False,
51 | )
52 | 


--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import logging
 3 | from .config import config
 4 | from .dataset import data_loader, data_preprocessor, data_splitter, data_augmentation
 5 | from .models import cnn, transformer, svm, bayesian, vision_transformer
 6 | from .training import cnn_training, transformer_training, svm_training, bayesian_training, vision_transformer_training
 7 | from .evaluation import cnn_evaluation, transformer_evaluation, svm_evaluation, bayesian_evaluation, vision_transformer_evaluation
 8 | from .utils import logger, metrics, visualization, helpers, file_utils, data_utils
 9 | from .processing import audio_processing, video_processing, image_processing, text_processing
10 | from . import blockchain, nlp, dsp, train, evaluate
11 | 
12 | log_file = os.path.join(config.LOG_DIR, 'system.log')
13 | logging.basicConfig(
14 |     level=logging.INFO,
15 |     format='%(asctime)s %(name)s %(levelname)s: %(message)s',
16 |     handlers=[
17 |         logging.FileHandler(log_file),
18 |         logging.StreamHandler()
19 |     ]
20 | )
21 | 
22 | logger = logging.getLogger(__name__)
23 | logger.info("Initialization of the src module and its submodules is complete.")
24 | 


--------------------------------------------------------------------------------
/src/blockchain.py:
--------------------------------------------------------------------------------
 1 | import hashlib
 2 | import json
 3 | import time
 4 | from typing import List, Dict
 5 | 
 6 | class Block:
 7 |     def __init__(self, index: int, previous_hash: str, timestamp: float, data: Dict, nonce: int = 0):
 8 |         self.index = index
 9 |         self.previous_hash = previous_hash
10 |         self.timestamp = timestamp
11 |         self.data = data
12 |         self.nonce = nonce
13 |         self.hash = self.calculate_hash()
14 | 
15 |     def calculate_hash(self) -> str:
16 |         block_string = f"{self.index}{self.previous_hash}{self.timestamp}{json.dumps(self.data)}{self.nonce}"
17 |         return hashlib.sha256(block_string.encode()).hexdigest()
18 | 
19 |     def __str__(self) -> str:
20 |         return json.dumps(self.__dict__, indent=4)
21 | 
22 | class Blockchain:
23 |     def __init__(self, difficulty: int = 4):
24 |         self.chain: List[Block] = []
25 |         self.difficulty = difficulty
26 |         self.create_genesis_block()
27 | 
28 |     def create_genesis_block(self):
29 |         genesis_block = Block(0, "0", time.time(), {"message": "Genesis Block"})
30 |         self.chain.append(genesis_block)
31 | 
32 |     def get_latest_block(self) -> Block:
33 |         return self.chain[-1]
34 | 
35 |     def add_block(self, data: Dict):
36 |         latest_block = self.get_latest_block()
37 |         new_block = Block(
38 |             index=latest_block.index + 1,
39 |             previous_hash=latest_block.hash,
40 |             timestamp=time.time(),
41 |             data=data
42 |         )
43 |         self.mine_block(new_block)
44 |         self.chain.append(new_block)
45 | 
46 |     def mine_block(self, block: Block):
47 |         print(f"Mining block {block.index}...")
48 |         while block.hash[:self.difficulty] != '0' * self.difficulty:
49 |             block.nonce += 1
50 |             block.hash = block.calculate_hash()
51 |         print(f"Block {block.index} mined: {block.hash}")
52 | 
53 |     def is_chain_valid(self) -> bool:
54 |         for i in range(1, len(self.chain)):
55 |             current_block = self.chain[i]
56 |             previous_block = self.chain[i - 1]
57 | 
58 |             if current_block.hash != current_block.calculate_hash():
59 |                 print(f"Invalid hash at block {current_block.index}")
60 |                 return False
61 | 
62 |             if current_block.previous_hash != previous_block.hash:
63 |                 print(f"Invalid previous hash at block {current_block.index}")
64 |                 return False
65 | 
66 |         return True
67 | 
68 |     def __str__(self) -> str:
69 |         chain_data = [str(block) for block in self.chain]
70 |         return json.dumps(chain_data, indent=4)
71 | 
72 | if __name__ == "__main__":
73 |     blockchain = Blockchain(difficulty=4)
74 | 
75 |     blockchain.add_block({"transaction": "Alice pays Bob 10 BTC"})
76 |     blockchain.add_block({"transaction": "Bob pays Charlie 5 BTC"})
77 |     blockchain.add_block({"transaction": "Charlie pays Dave 2 BTC"})
78 | 
79 |     print(blockchain)
80 |     print("Blockchain valid:", blockchain.is_chain_valid())
81 | 


--------------------------------------------------------------------------------
/src/config.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from dotenv import load_dotenv
  3 | 
  4 | load_dotenv()
  5 | 
  6 | class Config:
  7 | 
  8 |     PROJECT_NAME = "Multidisciplinary Deepfake Detection"
  9 |     VERSION = "0.1.0"
 10 |     
 11 |     BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 12 |     DATA_DIR = os.path.join(BASE_DIR, '..', 'data')
 13 |     RAW_DATA_DIR = os.path.join(DATA_DIR, 'raw')
 14 |     PROCESSED_DATA_DIR = os.path.join(DATA_DIR, 'processed')
 15 |     MODEL_DIR = os.path.join(BASE_DIR, '..', 'models', 'saved_models')
 16 |     LOG_DIR = os.path.join(BASE_DIR, '..', 'logs')
 17 |     REPORT_DIR = os.path.join(BASE_DIR, '..', 'reports')
 18 | 
 19 |     LOG_LEVEL = os.getenv('LOG_LEVEL', 'INFO')
 20 |     LOG_FORMAT = '%(asctime)s %(name)s %(levelname)s: %(message)s'
 21 |     LOG_FILE = os.path.join(LOG_DIR, 'system.log')
 22 |     
 23 |     RAW_IMAGE_DIR = os.path.join(RAW_DATA_DIR, 'images')
 24 |     RAW_AUDIO_DIR = os.path.join(RAW_DATA_DIR, 'audios')
 25 |     RAW_VIDEO_DIR = os.path.join(RAW_DATA_DIR, 'videos')
 26 |     
 27 |     PROCESSED_IMAGE_DIR = os.path.join(PROCESSED_DATA_DIR, 'images')
 28 |     PROCESSED_AUDIO_DIR = os.path.join(PROCESSED_DATA_DIR, 'audios')
 29 |     PROCESSED_VIDEO_DIR = os.path.join(PROCESSED_DATA_DIR, 'videos')
 30 |     
 31 |     RAW_IMAGE_FILE = os.path.join(RAW_DATA_DIR, 'metadata_images.csv')
 32 |     RAW_AUDIO_FILE = os.path.join(RAW_DATA_DIR, 'metadata_audios.csv')
 33 |     RAW_VIDEO_FILE = os.path.join(RAW_DATA_DIR, 'metadata_videos.csv')
 34 | 
 35 |     PROCESSED_IMAGE_FILE = os.path.join(PROCESSED_DATA_DIR, 'processed_images.csv')
 36 |     PROCESSED_AUDIO_FILE = os.path.join(PROCESSED_DATA_DIR, 'processed_audios.csv')
 37 |     PROCESSED_VIDEO_FILE = os.path.join(PROCESSED_DATA_DIR, 'processed_videos.csv')
 38 | 
 39 |     CNN_PARAMS = {
 40 |         'input_shape': (64, 64, 3),
 41 |         'num_classes': 2,
 42 |         'epochs': 50,
 43 |         'batch_size': 32,
 44 |         'learning_rate': 0.001
 45 |     }
 46 | 
 47 |     TRANSFORMER_PARAMS = {
 48 |         'input_dim': 512,
 49 |         'model_dim': 512,
 50 |         'num_heads': 8,
 51 |         'num_layers': 6,
 52 |         'output_dim': 10,
 53 |         'epochs': 50,
 54 |         'batch_size': 32,
 55 |         'learning_rate': 0.001
 56 |     }
 57 | 
 58 |     SVM_PARAMS = {
 59 |         'kernel': 'linear',
 60 |         'C': 1.0
 61 |     }
 62 | 
 63 |     BAYESIAN_PARAMS = {
 64 |         'prior_mean': 0,
 65 |         'prior_std': 1
 66 |     }
 67 | 
 68 |     VISION_TRANSFORMER_PARAMS = {
 69 |         'img_size': 224,
 70 |         'patch_size': 16,
 71 |         'num_classes': 10,
 72 |         'dim': 768,
 73 |         'depth': 12,
 74 |         'heads': 12,
 75 |         'mlp_dim': 3072,
 76 |         'epochs': 50,
 77 |         'batch_size': 32,
 78 |         'learning_rate': 0.001
 79 |     }
 80 | 
 81 |     BLOCKCHAIN_DIFFICULTY = 4
 82 | 
 83 |     # Other Settings
 84 |     RANDOM_SEED = 42
 85 | 
 86 |     @staticmethod
 87 |     def ensure_directories():
 88 |         """
 89 |         Ensure that all necessary directories exist.
 90 |         """
 91 |         directories = [
 92 |             Config.DATA_DIR,
 93 |             Config.RAW_DATA_DIR,
 94 |             Config.PROCESSED_DATA_DIR,
 95 |             Config.RAW_IMAGE_DIR,
 96 |             Config.RAW_AUDIO_DIR,
 97 |             Config.RAW_VIDEO_DIR,
 98 |             Config.PROCESSED_IMAGE_DIR,
 99 |             Config.PROCESSED_AUDIO_DIR,
100 |             Config.PROCESSED_VIDEO_DIR,
101 |             Config.MODEL_DIR,
102 |             Config.LOG_DIR,
103 |             Config.REPORT_DIR
104 |         ]
105 |         for directory in directories:
106 |             if not os.path.exists(directory):
107 |                 os.makedirs(directory)
108 |     
109 |     @staticmethod
110 |     def print_config():
111 |         """
112 |         Print the current configuration settings.
113 |         """
114 |         config_dict = {attr: value for attr, value in Config.__dict__.items() if not callable(getattr(Config, attr)) and not attr.startswith("__")}
115 |         for key, value in config_dict.items():
116 |             print(f"{key}: {value}")
117 | 
118 | if __name__ == "__main__":
119 | 
120 |     Config.ensure_directories()
121 |     
122 |     Config.print_config()
123 | 


--------------------------------------------------------------------------------
/src/dataset/__init__.py:
--------------------------------------------------------------------------------
 1 | from .data_loader import DataLoader
 2 | from .data_preprocessor import DataPreprocessor
 3 | from .data_splitter import DataSplitter
 4 | from .data_augmentation import DataAugmentation
 5 | 
 6 | __all__ = [
 7 |     'DataLoader',
 8 |     'DataPreprocessor',
 9 |     'DataSplitter',
10 |     'DataAugmentation'
11 | ]
12 | 


--------------------------------------------------------------------------------
/src/dataset/data_augmentation.py:
--------------------------------------------------------------------------------
 1 | from albumentations import (
 2 |     Compose, HorizontalFlip, VerticalFlip, RandomRotate90, Transpose, ShiftScaleRotate, Blur, OpticalDistortion, GridDistortion, IAAPiecewiseAffine
 3 | )
 4 | from albumentations.pytorch import ToTensorV2
 5 | import numpy as np
 6 | import cv2
 7 | import os
 8 | import pandas as pd
 9 | from src.config import Config
10 | 
11 | class DataAugmentation:
12 |     def __init__(self):
13 |         """
14 |         Initializing DataAugmentation with augmentation techniques.
15 |         """
16 |         self.augmentations = Compose([
17 |             HorizontalFlip(p=0.5),
18 |             VerticalFlip(p=0.5),
19 |             RandomRotate90(p=0.5),
20 |             Transpose(p=0.5),
21 |             ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.1, rotate_limit=45, p=0.5),
22 |             Blur(blur_limit=3, p=0.5),
23 |             OpticalDistortion(distort_limit=0.05, shift_limit=0.05, p=0.5),
24 |             GridDistortion(p=0.5),
25 |             IAAPiecewiseAffine(p=0.5),
26 |             ToTensorV2()
27 |         ])
28 | 
29 |     def augment(self, image):
30 |         """
31 |         Applying augmentations to image.
32 |         :param image: Image to augment
33 |         :return: Augmented image
34 |         """
35 |         augmented = self.augmentations(image=image)
36 |         return augmented['image']
37 | 
38 | def apply_augmentation(image_path):
39 |     """
40 |     Applying augmentation to image given its path.
41 |     :param image_path: Path to the image file
42 |     :return: Augmented image tensor
43 |     """
44 |     image = cv2.imread(image_path)
45 |     image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
46 |     augmentation = DataAugmentation()
47 |     augmented_image = augmentation.augment(image)
48 |     return augmented_image
49 | 
50 | def augment_data(data):
51 |     """
52 |     Applying augmentation to all images in the dataset.
53 |     :param data: DataFrame containing image paths and labels
54 |     :return: DataFrame with augmented images and labels
55 |     """
56 |     augmented_images = []
57 |     labels = []
58 | 
59 |     for index, row in data.iterrows():
60 |         image_path = os.path.join(Config.RAW_DATA_DIR, 'images', row['filename'])
61 |         augmented_image = apply_augmentation(image_path)
62 |         augmented_images.append(augmented_image)
63 |         labels.append(row['label'])
64 | 
65 |     augmented_data = pd.DataFrame({'image': augmented_images, 'label': labels})
66 |     return augmented_data
67 | 
68 | if __name__ == "__main__":
69 |     import matplotlib.pyplot as plt
70 | 
71 |     # usage
72 |     image_path = os.path.join(Config.RAW_DATA_DIR, 'images', 'sample_image.jpg')
73 |     augmented_image = apply_augmentation(image_path)
74 | 
75 |     # To display original and augmented images
76 |     original_image = cv2.imread(image_path)
77 |     original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
78 | 
79 |     fig, ax = plt.subplots(1, 2, figsize=(12, 6))
80 |     ax[0].imshow(original_image)
81 |     ax[0].set_title("Original Image")
82 |     ax[0].axis('off')
83 | 
84 |     ax[1].imshow(augmented_image.permute(1, 2, 0).numpy())  
85 |     ax[1].set_title("Augmented Image")
86 |     ax[1].axis('off')
87 | 
88 |     plt.show()
89 | 


--------------------------------------------------------------------------------
/src/dataset/data_preprocessor.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import pandas as pd
 3 | from sklearn.preprocessing import StandardScaler, LabelEncoder
 4 | from sklearn.impute import SimpleImputer
 5 | 
 6 | # To set up logging
 7 | logger = logging.getLogger(__name__)
 8 | logger.setLevel(logging.DEBUG)
 9 | 
10 | fh = logging.FileHandler('logs/data_preprocessing.log')
11 | fh.setLevel(logging.DEBUG)
12 | 
13 | formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
14 | fh.setFormatter(formatter)
15 | 
16 | logger.addHandler(fh)
17 | 
18 | class DataPreprocessor:
19 |     def __init__(self):
20 |         """
21 |         Initializing DataPreprocessor with standard scaler, label encoder, and imputer.
22 |         """
23 |         self.scaler = StandardScaler()
24 |         self.label_encoder = LabelEncoder()
25 |         self.imputer = SimpleImputer(strategy='mean')
26 |         logger.info("DataPreprocessor initialized with StandardScaler, LabelEncoder, and SimpleImputer.")
27 | 
28 |     def preprocess(self, data, target_column):
29 |         """
30 |         Preprocessing the data by filling missing values, scaling numerical features, and encoding categorical features.
31 |         :param data: DataFrame containing the data to preprocess
32 |         :param target_column: Name of the target column
33 |         :return: DataFrame containing the preprocessed data, Series containing the preprocessed target
34 |         """
35 |         logger.info("Starting preprocessing.")
36 |         try:
37 |             # To separate features and target
38 |             features = data.drop(columns=[target_column])
39 |             target = data[target_column]
40 |             logger.debug(f"Features and target separated. Features shape: {features.shape}, Target shape: {target.shape}")
41 | 
42 |             # To fill missing values
43 |             features = pd.DataFrame(self.imputer.fit_transform(features), columns=features.columns)
44 |             logger.debug("Missing values filled.")
45 | 
46 |             numerical_features = features.select_dtypes(include=['int64', 'float64']).columns
47 |             features[numerical_features] = self.scaler.fit_transform(features[numerical_features])
48 |             logger.debug("Numerical features scaled.")
49 | 
50 |             # To encode categorical features
51 |             categorical_features = features.select_dtypes(include(['object'])).columns
52 |             for col in categorical_features:
53 |                 features[col] = self.label_encoder.fit_transform(features[col])
54 |                 logger.debug(f"Categorical feature '{col}' encoded.")
55 | 
56 |             if target.dtype == 'object':
57 |                 target = self.label_encoder.fit_transform(target)
58 |                 logger.debug("Target encoded.")
59 | 
60 |             logger.info("Preprocessing completed successfully.")
61 |             return features, target
62 | 
63 |         except Exception as e:
64 |             logger.error(f"Error occurred during preprocessing: {e}")
65 |             raise
66 | 
67 |     def transform(self, data):
68 |         """
69 |         Transforming new data using the already fitted scaler, imputer, and label encoder.
70 |         :param data: DataFrame containing the data to transform
71 |         :return: DataFrame containing the transformed data
72 |         """
73 |         logger.info("Starting data transformation.")
74 |         try:
75 | 
76 |             data = pd.DataFrame(self.imputer.transform(data), columns=data.columns)
77 |             logger.debug("Missing values filled in new data.")
78 | 
79 |             numerical_features = data.select_dtypes(include=['int64', 'float64']).columns
80 |             data[numerical_features] = self.scaler.transform(data[numerical_features])
81 |             logger.debug("Numerical features scaled in new data.")
82 | 
83 |             categorical_features = data.select_dtypes(include(['object'])).columns
84 |             for col in categorical_features:
85 |                 data[col] = self.label_encoder.transform(data[col])
86 |                 logger.debug(f"Categorical feature '{col}' encoded in new data.")
87 | 
88 |             logger.info("Data transformation completed successfully.")
89 |             return data
90 | 
91 |         except Exception as e:
92 |             logger.error(f"Error occurred during data transformation: {e}")
93 |             raise
94 | 


--------------------------------------------------------------------------------
/src/dataset/data_splitter.py:
--------------------------------------------------------------------------------
 1 | from sklearn.model_selection import train_test_split
 2 | import pandas as pd
 3 | import logging
 4 | 
 5 | class DataSplitter:
 6 |     def __init__(self, test_size=0.2, val_size=0.1, random_state=42):
 7 |         """
 8 |         Initializing DataSplitter.
 9 |         :param test_size: Proportion of the dataset to include in the test split
10 |         :param val_size: Proportion of the dataset to include in the validation split
11 |         :param random_state: Seed used by the random number generator
12 |         """
13 |         self.test_size = test_size
14 |         self.val_size = val_size
15 |         self.random_state = random_state
16 |         self.logger = logging.getLogger('data_splitter_logger')
17 | 
18 |     def split(self, data, target_column):
19 |         """
20 |         Splitting data into training, validation, and testing sets.
21 |         :param data: DataFrame containing the data to split
22 |         :param target_column: Name of the target column
23 |         :return: Tuple containing the training, validation, and testing sets (X_train, X_val, X_test, y_train, y_val, y_test)
24 |         """
25 |         self.logger.info(f"Splitting data with target column '{target_column}'")
26 |         
27 |         try:
28 |             # To separate features and target
29 |             X = data.drop(columns=[target_column])
30 |             y = data[target_column]
31 |             
32 |             # First split to get test set
33 |             X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=self.test_size, random_state=self.random_state)
34 |             self.logger.info(f"Initial split: {X_train_val.shape[0]} train/val samples, {X_test.shape[0]} test samples")
35 | 
36 |             # To calculate proportion of remaining data to allocate to validation
37 |             val_size_adjusted = self.val_size / (1 - self.test_size)
38 | 
39 |             # Second split to get validation set from remaining training data
40 |             X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=val_size_adjusted, random_state=self.random_state)
41 |             self.logger.info(f"Second split: {X_train.shape[0]} train samples, {X_val.shape[0]} validation samples")
42 |             
43 |             return X_train, X_val, X_test, y_train, y_val, y_test
44 |         except Exception as e:
45 |             self.logger.error(f"Error during data splitting: {e}", exc_info=True)
46 |             raise
47 | 
48 | if __name__ == "__main__":
49 |     logging.basicConfig(level=logging.INFO)
50 |     logger = logging.getLogger('data_splitter_logger')
51 | 
52 |     # usage
53 |     data_path = 'path/to/processed_data.csv'  
54 |     data = pd.read_csv(data_path)
55 |     target_column = 'label' 
56 |     splitter = DataSplitter(test_size=0.2, val_size=0.1, random_state=42)
57 |     X_train, X_val, X_test, y_train, y_val, y_test = splitter.split(data, target_column)
58 | 
59 |     logger.info(f"Training set size: {X_train.shape[0]} samples")
60 |     logger.info(f"Validation set size: {X_val.shape[0]} samples")
61 |     logger.info(f"Test set size: {X_test.shape[0]} samples")
62 | 


--------------------------------------------------------------------------------
/src/evaluation/__init__.py:
--------------------------------------------------------------------------------
 1 | from .cnn_evaluation import evaluate_cnn
 2 | from .transformer_evaluation import evaluate_transformer
 3 | from .svm_evaluation import evaluate_svm
 4 | from .bayesian_evaluation import evaluate_bayesian
 5 | from .vision_transformer_evaluation import evaluate_vision_transformer
 6 | 
 7 | __all__ = [
 8 |     'evaluate_cnn',
 9 |     'evaluate_transformer',
10 |     'evaluate_svm',
11 |     'evaluate_bayesian',
12 |     'evaluate_vision_transformer'
13 | ]
14 | 


--------------------------------------------------------------------------------
/src/evaluation/bayesian_evaluation.py:
--------------------------------------------------------------------------------
 1 | from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score, precision_score, recall_score
 2 | import logging
 3 | import pandas as pd
 4 | 
 5 | def evaluate_bayesian(model, X_test, y_test):
 6 |     """
 7 |     Evaluating Bayesian model.
 8 |     :param model: Trained Bayesian model
 9 |     :param X_test: Test data features
10 |     :param y_test: Test data labels
11 |     :return: Dictionary of evaluation metrics
12 |     """
13 |     logger = logging.getLogger('evaluation_logger')
14 |     
15 |     try:
16 |         logger.info("Evaluating Bayesian model...")
17 |         
18 |         logger.info("Predicting test data with Bayesian model...")
19 |         y_pred = model.predict(X_test)
20 |         
21 |         # To calculate evaluation metrics
22 |         logger.info("Calculating evaluation metrics...")
23 |         accuracy = accuracy_score(y_test, y_pred)
24 |         f1 = f1_score(y_test, y_pred, average='weighted')
25 |         precision = precision_score(y_test, y_pred, average='weighted')
26 |         recall = recall_score(y_test, y_pred, average='weighted')
27 |         report = classification_report(y_test, y_pred)
28 |         conf_matrix = confusion_matrix(y_test, y_pred)
29 |         
30 |         logger.info(f"Bayesian Model Accuracy: {accuracy}")
31 |         logger.info(f"Bayesian Model F1 Score: {f1}")
32 |         logger.info(f"Bayesian Model Precision: {precision}")
33 |         logger.info(f"Bayesian Model Recall: {recall}")
34 |         logger.info(f"Classification Report:\n{report}")
35 |         logger.info(f"Confusion Matrix:\n{conf_matrix}")
36 |         
37 |         report_path = 'path/to/report_dir/bayesian_classification_report.json'
38 |         cm_path = 'path/to/report_dir/bayesian_confusion_matrix.csv'
39 |         accuracy_path = 'path/to/report_dir/bayesian_accuracy.txt'
40 |         
41 |         pd.DataFrame(conf_matrix).to_csv(cm_path, index=False)
42 |         with open(report_path, 'w') as f:
43 |             f.write(report)
44 |         with open(accuracy_path, 'w') as f:
45 |             f.write(str(accuracy))
46 |         
47 |         logger.info(f"Classification report saved to {report_path}")
48 |         logger.info(f"Confusion matrix saved to {cm_path}")
49 |         logger.info(f"Accuracy saved to {accuracy_path}")
50 |         
51 |         return {
52 |             'accuracy': accuracy,
53 |             'f1_score': f1,
54 |             'precision': precision,
55 |             'recall': recall,
56 |             'classification_report': report,
57 |             'confusion_matrix': conf_matrix
58 |         }
59 |     
60 |     except Exception as e:
61 |         logger.error(f"Error during Bayesian model evaluation: {e}", exc_info=True)
62 |         raise
63 | 


--------------------------------------------------------------------------------
/src/evaluation/cnn_evaluation.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score, precision_score, recall_score
 3 | import logging
 4 | import pandas as pd
 5 | 
 6 | def evaluate_cnn(model, X_test, y_test):
 7 |     """
 8 |     Evaluating CNN model.
 9 |     :param model: Trained CNN model
10 |     :param X_test: Test data features
11 |     :param y_test: Test data labels
12 |     :return: Dictionary of evaluation metrics
13 |     """
14 |     logger = logging.getLogger('evaluation_logger')
15 |     
16 |     try:
17 |         logger.info("Predicting test data with CNN model...")
18 |         y_pred_probs = model.predict(X_test)
19 |         y_pred_classes = y_pred_probs.argmax(axis=1)
20 |         y_true_classes = y_test.argmax(axis=1)
21 |         
22 |         # To calculate evaluation metrics
23 |         logger.info("Calculating evaluation metrics...")
24 |         accuracy = accuracy_score(y_true_classes, y_pred_classes)
25 |         f1 = f1_score(y_true_classes, y_pred_classes, average='weighted')
26 |         precision = precision_score(y_true_classes, y_pred_classes, average='weighted')
27 |         recall = recall_score(y_true_classes, y_pred_classes, average='weighted')
28 |         report = classification_report(y_true_classes, y_pred_classes)
29 |         conf_matrix = confusion_matrix(y_true_classes, y_pred_classes)
30 |         
31 |         logger.info(f"CNN Model Accuracy: {accuracy}")
32 |         logger.info(f"CNN Model F1 Score: {f1}")
33 |         logger.info(f"CNN Model Precision: {precision}")
34 |         logger.info(f"CNN Model Recall: {recall}")
35 |         logger.info(f"Classification Report:\n{report}")
36 |         logger.info(f"Confusion Matrix:\n{conf_matrix}")
37 | 
38 |         report_path = 'path/to/report_dir/cnn_classification_report.json'
39 |         cm_path = 'path/to/report_dir/cnn_confusion_matrix.csv'
40 |         accuracy_path = 'path/to/report_dir/cnn_accuracy.txt'
41 |         
42 |         pd.DataFrame(conf_matrix).to_csv(cm_path, index=False)
43 |         with open(report_path, 'w') as f:
44 |             f.write(report)
45 |         with open(accuracy_path, 'w') as f:
46 |             f.write(str(accuracy))
47 |         
48 |         logger.info(f"Classification report saved to {report_path}")
49 |         logger.info(f"Confusion matrix saved to {cm_path}")
50 |         logger.info(f"Accuracy saved to {accuracy_path}")
51 |         
52 |         return {
53 |             'accuracy': accuracy,
54 |             'f1_score': f1,
55 |             'precision': precision,
56 |             'recall': recall,
57 |             'classification_report': report,
58 |             'confusion_matrix': conf_matrix
59 |         }
60 |     
61 |     except Exception as e:
62 |         logger.error(f"Error during CNN model evaluation: {e}", exc_info=True)
63 |         raise
64 | 


--------------------------------------------------------------------------------
/src/evaluation/svm_evaluation.py:
--------------------------------------------------------------------------------
 1 | from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score, precision_score, recall_score
 2 | import logging
 3 | import pandas as pd
 4 | 
 5 | def evaluate_svm(model, X_test, y_test):
 6 |     """
 7 |     Evaluating SVM model.
 8 |     :param model: Trained SVM model
 9 |     :param X_test: Test data features
10 |     :param y_test: Test data labels
11 |     :return: Dictionary of evaluation metrics
12 |     """
13 |     logger = logging.getLogger('evaluation_logger')
14 |     
15 |     try:
16 |         logger.info("Evaluating SVM model...")
17 |         
18 |         logger.info("Predicting test data with SVM model...")
19 |         y_pred = model.predict(X_test)
20 |         
21 |         # To calculate evaluation metrics
22 |         logger.info("Calculating evaluation metrics...")
23 |         accuracy = accuracy_score(y_test, y_pred)
24 |         f1 = f1_score(y_test, y_pred, average='weighted')
25 |         precision = precision_score(y_test, y_pred, average='weighted')
26 |         recall = recall_score(y_test, y_pred, average='weighted')
27 |         report = classification_report(y_test, y_pred)
28 |         conf_matrix = confusion_matrix(y_test, y_pred)
29 |         
30 |         logger.info(f"SVM Model Accuracy: {accuracy}")
31 |         logger.info(f"SVM Model F1 Score: {f1}")
32 |         logger.info(f"SVM Model Precision: {precision}")
33 |         logger.info(f"SVM Model Recall: {recall}")
34 |         logger.info(f"Classification Report:\n{report}")
35 |         logger.info(f"Confusion Matrix:\n{conf_matrix}")
36 | 
37 |         report_path = 'path/to/report_dir/svm_classification_report.json'
38 |         cm_path = 'path/to/report_dir/svm_confusion_matrix.csv'
39 |         accuracy_path = 'path/to/report_dir/svm_accuracy.txt'
40 |         
41 |         pd.DataFrame(conf_matrix).to_csv(cm_path, index=False)
42 |         with open(report_path, 'w') as f:
43 |             f.write(report)
44 |         with open(accuracy_path, 'w') as f:
45 |             f.write(str(accuracy))
46 |         
47 |         logger.info(f"Classification report saved to {report_path}")
48 |         logger.info(f"Confusion matrix saved to {cm_path}")
49 |         logger.info(f"Accuracy saved to {accuracy_path}")
50 |         
51 |         return {
52 |             'accuracy': accuracy,
53 |             'f1_score': f1,
54 |             'precision': precision,
55 |             'recall': recall,
56 |             'classification_report': report,
57 |             'confusion_matrix': conf_matrix
58 |         }
59 |     
60 |     except Exception as e:
61 |         logger.error(f"Error during SVM model evaluation: {e}", exc_info=True)
62 |         raise
63 | 


--------------------------------------------------------------------------------
/src/evaluation/transformer_evaluation.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score, precision_score, recall_score
 4 | import logging
 5 | import pandas as pd
 6 | 
 7 | def evaluate_transformer(model, dataloader, device):
 8 |     """
 9 |     Evaluating Transformer model.
10 |     :param model: Trained Transformer model
11 |     :param dataloader: DataLoader for the test data
12 |     :param device: Device to perform evaluation on ('cpu' or 'cuda')
13 |     :return: Dictionary of evaluation metrics
14 |     """
15 |     logger = logging.getLogger('evaluation_logger')
16 |     
17 |     try:
18 |         model.eval()
19 |         all_preds = []
20 |         all_labels = []
21 | 
22 |         logger.info("Starting evaluation of Transformer model...")
23 | 
24 |         with torch.no_grad():
25 |             for batch_idx, batch in enumerate(dataloader):
26 |                 inputs, labels = batch
27 |                 inputs, labels = inputs.to(device), labels.to(device)
28 |                 outputs = model(inputs)
29 |                 _, preds = torch.max(outputs, 1)
30 |                 
31 |                 all_preds.append(preds.cpu().numpy())
32 |                 all_labels.append(labels.cpu().numpy())
33 |                 
34 |                 logger.debug(f"Processed batch {batch_idx + 1}/{len(dataloader)}")
35 | 
36 |         all_preds = np.concatenate(all_preds)
37 |         all_labels = np.concatenate(all_labels)
38 | 
39 |         # To calculate evaluation metrics
40 |         logger.info("Calculating evaluation metrics...")
41 |         accuracy = accuracy_score(all_labels, all_preds)
42 |         f1 = f1_score(all_labels, all_preds, average='weighted')
43 |         precision = precision_score(all_labels, all_preds, average='weighted')
44 |         recall = recall_score(all_labels, all_preds, average='weighted')
45 |         report = classification_report(all_labels, all_preds)
46 |         conf_matrix = confusion_matrix(all_labels, all_preds)
47 | 
48 |         logger.info(f"Transformer Model Accuracy: {accuracy}")
49 |         logger.info(f"Transformer Model F1 Score: {f1}")
50 |         logger.info(f"Transformer Model Precision: {precision}")
51 |         logger.info(f"Transformer Model Recall: {recall}")
52 |         logger.info(f"Classification Report:\n{report}")
53 |         logger.info(f"Confusion Matrix:\n{conf_matrix}")
54 | 
55 |         report_path = 'path/to/report_dir/transformer_classification_report.json'
56 |         cm_path = 'path/to/report_dir/transformer_confusion_matrix.csv'
57 |         accuracy_path = 'path/to/report_dir/transformer_accuracy.txt'
58 |         
59 |         pd.DataFrame(conf_matrix).to_csv(cm_path, index=False)
60 |         with open(report_path, 'w') as f:
61 |             f.write(report)
62 |         with open(accuracy_path, 'w') as f:
63 |             f.write(str(accuracy))
64 |         
65 |         logger.info(f"Classification report saved to {report_path}")
66 |         logger.info(f"Confusion matrix saved to {cm_path}")
67 |         logger.info(f"Accuracy saved to {accuracy_path}")
68 | 
69 |         return {
70 |             'accuracy': accuracy,
71 |             'f1_score': f1,
72 |             'precision': precision,
73 |             'recall': recall,
74 |             'classification_report': report,
75 |             'confusion_matrix': conf_matrix
76 |         }
77 |     
78 |     except Exception as e:
79 |         logger.error(f"Error during Transformer model evaluation: {e}", exc_info=True)
80 |         raise
81 | 


--------------------------------------------------------------------------------
/src/evaluation/vision_transformer_evaluation.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score, precision_score, recall_score
 4 | import logging
 5 | import pandas as pd
 6 | 
 7 | def evaluate_vision_transformer(model, dataloader, device):
 8 |     """
 9 |     Evaluating Vision Transformer model.
10 |     :param model: Trained Vision Transformer model
11 |     :param dataloader: DataLoader for the test data
12 |     :param device: Device to perform evaluation on ('cpu' or 'cuda')
13 |     :return: Dictionary of evaluation metrics
14 |     """
15 |     logger = logging.getLogger('evaluation_logger')
16 |     
17 |     try:
18 |         logger.info("Evaluating Vision Transformer model...")
19 |         
20 |         model.eval()
21 |         all_preds = []
22 |         all_labels = []
23 | 
24 |         logger.info("Starting evaluation of Vision Transformer model...")
25 | 
26 |         with torch.no_grad():
27 |             for batch_idx, batch in enumerate(dataloader):
28 |                 inputs, labels = batch
29 |                 inputs, labels = inputs.to(device), labels.to(device)
30 |                 outputs = model(inputs)
31 |                 _, preds = torch.max(outputs, 1)
32 |                 
33 |                 all_preds.append(preds.cpu().numpy())
34 |                 all_labels.append(labels.cpu().numpy())
35 |                 
36 |                 logger.debug(f"Processed batch {batch_idx + 1}/{len(dataloader)}")
37 | 
38 |         all_preds = np.concatenate(all_preds)
39 |         all_labels = np.concatenate(all_labels)
40 | 
41 |         # To calculate evaluation metrics
42 |         logger.info("Calculating evaluation metrics...")
43 |         accuracy = accuracy_score(all_labels, all_preds)
44 |         f1 = f1_score(all_labels, all_preds, average='weighted')
45 |         precision = precision_score(all_labels, all_preds, average='weighted')
46 |         recall = recall_score(all_labels, all_preds, average='weighted')
47 |         report = classification_report(all_labels, all_preds)
48 |         conf_matrix = confusion_matrix(all_labels, all_preds)
49 | 
50 |         logger.info(f"Vision Transformer Model Accuracy: {accuracy}")
51 |         logger.info(f"Vision Transformer Model F1 Score: {f1}")
52 |         logger.info(f"Vision Transformer Model Precision: {precision}")
53 |         logger.info(f"Vision Transformer Model Recall: {recall}")
54 |         logger.info(f"Classification Report:\n{report}")
55 |         logger.info(f"Confusion Matrix:\n{conf_matrix}")
56 | 
57 |         report_path = 'path/to/report_dir/vision_transformer_classification_report.json'
58 |         cm_path = 'path/to/report_dir/vision_transformer_confusion_matrix.csv'
59 |         accuracy_path = 'path/to/report_dir/vision_transformer_accuracy.txt'
60 |         
61 |         pd.DataFrame(conf_matrix).to_csv(cm_path, index=False)
62 |         with open(report_path, 'w') as f:
63 |             f.write(report)
64 |         with open(accuracy_path, 'w') as f:
65 |             f.write(str(accuracy))
66 |         
67 |         logger.info(f"Classification report saved to {report_path}")
68 |         logger.info(f"Confusion matrix saved to {cm_path}")
69 |         logger.info(f"Accuracy saved to {accuracy_path}")
70 | 
71 |         return {
72 |             'accuracy': accuracy,
73 |             'f1_score': f1,
74 |             'precision': precision,
75 |             'recall': recall,
76 |             'classification_report': report,
77 |             'confusion_matrix': conf_matrix
78 |         }
79 |     
80 |     except Exception as e:
81 |         logger.error(f"Error during Vision Transformer model evaluation: {e}", exc_info=True)
82 |         raise
83 | 


--------------------------------------------------------------------------------
/src/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from .cnn import CNNModel
 2 | from .transformer import TransformerModel
 3 | from .svm import SVMModel
 4 | from .bayesian import BayesianModel
 5 | from .vision_transformer import VisionTransformer
 6 | 
 7 | __all__ = [
 8 |     'CNNModel',
 9 |     'TransformerModel',
10 |     'SVMModel',
11 |     'BayesianModel',
12 |     'VisionTransformer'
13 | ]
14 | 


--------------------------------------------------------------------------------
/src/models/bayesian.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.base import BaseEstimator, ClassifierMixin
 3 | import logging
 4 | 
 5 | class BayesianModel(BaseEstimator, ClassifierMixin):
 6 |     def __init__(self, prior_mean=0, prior_std=1):
 7 |         """
 8 |         Initializing Bayesian model with prior mean and standard deviation.
 9 |         :param prior_mean: Mean of the prior distribution
10 |         :param prior_std: Standard deviation of the prior distribution
11 |         """
12 |         self.prior_mean = prior_mean
13 |         self.prior_std = prior_std
14 |         self.mean_ = None
15 |         self.std_ = None
16 |         self.classes_ = None
17 |         self.logger = logging.getLogger('bayesian_model_logger')
18 |         self.logger.info("Initialized BayesianModel with prior_mean=%s, prior_std=%s", prior_mean, prior_std)
19 | 
20 |     def fit(self, X, y):
21 |         """
22 |         To fit the Bayesian model to training data.
23 |         :param X: Training data features
24 |         :param y: Training data labels
25 |         :return: Self
26 |         """
27 |         self.logger.info("Fitting Bayesian model...")
28 |         self.classes_, counts = np.unique(y, return_counts=True)
29 |         self.mean_ = np.zeros((len(self.classes_), X.shape[1]))
30 |         self.std_ = np.zeros((len(self.classes_), X.shape[1]))
31 | 
32 |         for idx, label in enumerate(self.classes_):
33 |             X_class = X[y == label]
34 |             self.mean_[idx, :] = X_class.mean(axis=0)
35 |             self.std_[idx, :] = X_class.std(axis=0)
36 | 
37 |         self.logger.info("Model fitted with classes: %s", self.classes_)
38 |         return self
39 | 
40 |     def predict_proba(self, X):
41 |         """
42 |         Predicting class probabilities for X.
43 |         :param X: Input data
44 |         :return: Class probabilities
45 |         """
46 |         self.logger.info("Predicting class probabilities...")
47 |         log_prior = np.log(1.0 / len(self.classes_))
48 |         log_likelihood = -0.5 * np.sum(((X[:, np.newaxis, :] - self.mean_) / (self.std_ + 1e-9)) ** 2, axis=2)
49 |         log_likelihood -= np.log(self.std_ + 1e-9).sum(axis=1)
50 |         log_posterior = log_likelihood + log_prior
51 |         log_posterior -= log_posterior.max(axis=1, keepdims=True)
52 |         posterior = np.exp(log_posterior)
53 |         posterior /= posterior.sum(axis=1, keepdims=True)
54 |         return posterior
55 | 
56 |     def predict(self, X):
57 |         """
58 |         Predicting class labels for X.
59 |         :param X: Input data
60 |         :return: Predicted class labels
61 |         """
62 |         self.logger.info("Predicting class labels...")
63 |         proba = self.predict_proba(X)
64 |         predictions = self.classes_[np.argmax(proba, axis=1)]
65 |         self.logger.info("Predictions: %s", predictions)
66 |         return predictions
67 | 
68 |     def predict_log_proba(self, X):
69 |         """
70 |         Predicting log-probabilities of the classes for input samples X.
71 |         :param X: Input data
72 |         :return: Log-probabilities of the classes
73 |         """
74 |         self.logger.info("Predicting log-probabilities...")
75 |         log_proba = np.log(self.predict_proba(X))
76 |         self.logger.info("Log-probabilities: %s", log_proba)
77 |         return log_proba
78 | 
79 | if __name__ == "__main__":
80 |     logging.basicConfig(level=logging.INFO)
81 |     logger = logging.getLogger('bayesian_model_logger')
82 |     logger.info("Testing BayesianModel...")
83 |     
84 |     X_train = np.array([[1, 2], [2, 3], [3, 4], [4, 5]])
85 |     y_train = np.array([0, 0, 1, 1])
86 |     
87 |     model = BayesianModel(prior_mean=0, prior_std=1)
88 |     model.fit(X_train, y_train)
89 |     
90 |     X_test = np.array([[1.5, 2.5], [3.5, 4.5]])
91 |     predictions = model.predict(X_test)
92 |     logger.info("Test predictions: %s", predictions)
93 | 


--------------------------------------------------------------------------------
/src/models/cnn.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from tensorflow.keras.models import Sequential
 3 | from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
 4 | from tensorflow.keras.optimizers import Adam
 5 | import logging
 6 | 
 7 | class CNNModel:
 8 |     @staticmethod
 9 |     def build(input_shape, num_classes):
10 |         """
11 |         Building Convolutional Neural Network (CNN) model.
12 |         :param input_shape: Shape of the input data (height, width, channels)
13 |         :param num_classes: Number of classes for the output layer
14 |         :return: Compiled CNN model
15 |         """
16 |         logger = logging.getLogger('cnn_model_logger')
17 |         logger.info(f"Building CNN model with input shape {input_shape} and {num_classes} output classes.")
18 |         
19 |         try:
20 |             model = Sequential()
21 |             
22 |             # Convolutional Layer 1
23 |             model.add(Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
24 |             model.add(BatchNormalization())
25 |             model.add(MaxPooling2D(pool_size=(2, 2)))
26 |             logger.info("Added first convolutional layer.")
27 |             
28 |             # Convolutional Layer 2
29 |             model.add(Conv2D(64, (3, 3), activation='relu'))
30 |             model.add(BatchNormalization())
31 |             model.add(MaxPooling2D(pool_size=(2, 2)))
32 |             logger.info("Added second convolutional layer.")
33 |             
34 |             # Convolutional Layer 3
35 |             model.add(Conv2D(128, (3, 3), activation='relu'))
36 |             model.add(BatchNormalization())
37 |             model.add(MaxPooling2D(pool_size=(2, 2)))
38 |             logger.info("Added third convolutional layer.")
39 |             
40 |             # Flattening Layer
41 |             model.add(Flatten())
42 |             logger.info("Added flattening layer.")
43 |             
44 |             # Fully Connected Layer 1
45 |             model.add(Dense(256, activation='relu'))
46 |             model.add(Dropout(0.5))
47 |             logger.info("Added first fully connected layer with dropout.")
48 |             
49 |             # Fully Connected Layer 2
50 |             model.add(Dense(128, activation='relu'))
51 |             model.add(Dropout(0.5))
52 |             logger.info("Added second fully connected layer with dropout.")
53 |             
54 |             # Output Layer
55 |             model.add(Dense(num_classes, activation='softmax'))
56 |             logger.info("Added output layer.")
57 |             
58 |             optimizer = Adam(learning_rate=0.001)
59 |             model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
60 |             logger.info("Compiled the CNN model.")
61 |             
62 |             return model
63 |         except Exception as e:
64 |             logger.error(f"Error building CNN model: {e}", exc_info=True)
65 |             raise
66 | 
67 | if __name__ == "__main__":
68 |     logging.basicConfig(level=logging.INFO)
69 |     logger = logging.getLogger('cnn_model_logger')
70 |     logger.info("Starting to build the CNN model for testing purposes.")
71 |     
72 |     # Usage for testing
73 |     input_shape = (64, 64, 3)
74 |     num_classes = 2  
75 |     model = CNNModel.build(input_shape, num_classes)
76 |     
77 |     logger.info("CNN model built successfully.")
78 | 


--------------------------------------------------------------------------------
/src/models/svm.py:
--------------------------------------------------------------------------------
 1 | from sklearn.svm import SVC
 2 | from sklearn.preprocessing import StandardScaler
 3 | from sklearn.pipeline import Pipeline
 4 | import joblib
 5 | import logging
 6 | 
 7 | class SVMModel:
 8 |     @staticmethod
 9 |     def build(kernel='linear', C=1.0):
10 |         """
11 |         Building Support Vector Machine (SVM) model.
12 |         :param kernel: Specifies the kernel type to be used in the algorithm
13 |         :param C: Regularization parameter
14 |         :return: SVM model pipeline
15 |         """
16 |         logger = logging.getLogger('svm_model_logger')
17 |         logger.info(f"Building SVM model with kernel={kernel}, C={C}.")
18 |         
19 |         try:
20 |             pipeline = Pipeline([
21 |                 ('scaler', StandardScaler()),
22 |                 ('svm', SVC(kernel=kernel, C=C, probability=True))
23 |             ])
24 |             logger.info("SVM model built successfully.")
25 |             return pipeline
26 |         except Exception as e:
27 |             logger.error(f"Error building SVM model: {e}", exc_info=True)
28 |             raise
29 | 
30 |     @staticmethod
31 |     def save(model, model_path):
32 |         """
33 |         Saving SVM model to file.
34 |         :param model: Trained SVM model
35 |         :param model_path: Path to save the model
36 |         """
37 |         logger = logging.getLogger('svm_model_logger')
38 |         logger.info(f"Saving SVM model to {model_path}.")
39 |         
40 |         try:
41 |             joblib.dump(model, model_path)
42 |             logger.info("SVM model saved successfully.")
43 |         except Exception as e:
44 |             logger.error(f"Error saving SVM model: {e}", exc_info=True)
45 |             raise
46 | 
47 |     @staticmethod
48 |     def load(model_path):
49 |         """
50 |         Loading SVM model from file.
51 |         :param model_path: Path to load the model from
52 |         :return: Loaded SVM model
53 |         """
54 |         logger = logging.getLogger('svm_model_logger')
55 |         logger.info(f"Loading SVM model from {model_path}.")
56 |         
57 |         try:
58 |             model = joblib.load(model_path)
59 |             logger.info("SVM model loaded successfully.")
60 |             return model
61 |         except Exception as e:
62 |             logger.error(f"Error loading SVM model: {e}", exc_info=True)
63 |             raise
64 | 
65 | if __name__ == "__main__":
66 |     logging.basicConfig(level=logging.INFO)
67 |     logger = logging.getLogger('svm_model_logger')
68 |     logger.info("Starting to build, save, and load SVM model for testing purposes.")
69 |     
70 |     model = SVMModel.build(kernel='rbf', C=1.0)
71 |     model_path = 'svm_model_test.pkl'
72 |     SVMModel.save(model, model_path)
73 |     loaded_model = SVMModel.load(model_path)
74 |     
75 |     logger.info("SVM model build, save, and load process completed successfully.")
76 | 


--------------------------------------------------------------------------------
/src/models/transformer.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | import math
  4 | import logging
  5 | 
  6 | class TransformerModel(nn.Module):
  7 |     def __init__(self, input_dim, model_dim, num_heads, num_layers, output_dim, dropout=0.1):
  8 |         """
  9 |         Initializing Transformer model.
 10 |         :param input_dim: Dimension of the input features
 11 |         :param model_dim: Dimension of the transformer model
 12 |         :param num_heads: Number of attention heads
 13 |         :param num_layers: Number of transformer layers
 14 |         :param output_dim: Dimension of the output (number of classes)
 15 |         :param dropout: Dropout rate
 16 |         """
 17 |         super(TransformerModel, self).__init__()
 18 |         self.logger = logging.getLogger('transformer_model_logger')
 19 |         self.logger.info(f"Initializing Transformer model with input_dim={input_dim}, model_dim={model_dim}, num_heads={num_heads}, num_layers={num_layers}, output_dim={output_dim}, dropout={dropout}.")
 20 |         
 21 |         try:
 22 |             self.embedding = nn.Linear(input_dim, model_dim)
 23 |             self.positional_encoding = PositionalEncoding(model_dim, dropout)
 24 |             encoder_layers = nn.TransformerEncoderLayer(model_dim, num_heads, dim_feedforward=model_dim * 4, dropout=dropout)
 25 |             self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers)
 26 |             self.decoder = nn.Linear(model_dim, output_dim)
 27 |             self.logger.info("Transformer model initialized successfully.")
 28 |         except Exception as e:
 29 |             self.logger.error(f"Error initializing Transformer model: {e}", exc_info=True)
 30 |             raise
 31 |     
 32 |     def forward(self, src):
 33 |         """
 34 |         To forward pass of transformer model.
 35 |         :param src: Input tensor
 36 |         :return: Output tensor
 37 |         """
 38 |         self.logger.info(f"Performing forward pass with input tensor of shape {src.shape}.")
 39 |         
 40 |         try:
 41 |             src = self.embedding(src) * math.sqrt(src.size(1))
 42 |             src = self.positional_encoding(src)
 43 |             output = self.transformer_encoder(src)
 44 |             output = self.decoder(output.mean(dim=1))
 45 |             self.logger.info(f"Forward pass completed with output tensor of shape {output.shape}.")
 46 |             return output
 47 |         except Exception as e:
 48 |             self.logger.error(f"Error during forward pass: {e}", exc_info=True)
 49 |             raise
 50 | 
 51 | class PositionalEncoding(nn.Module):
 52 |     def __init__(self, d_model, dropout=0.1, max_len=5000):
 53 |         """
 54 |         Initializing Positional Encoding.
 55 |         :param d_model: Dimension of the model
 56 |         :param dropout: Dropout rate
 57 |         :param max_len: Maximum length of the input sequences
 58 |         """
 59 |         super(PositionalEncoding, self).__init__()
 60 |         self.logger = logging.getLogger('positional_encoding_logger')
 61 |         self.logger.info(f"Initializing Positional Encoding with d_model={d_model}, dropout={dropout}, max_len={max_len}.")
 62 |         
 63 |         try:
 64 |             self.dropout = nn.Dropout(p=dropout)
 65 |             
 66 |             position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
 67 |             div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
 68 |             pe = torch.zeros(max_len, d_model)
 69 |             pe[:, 0::2] = torch.sin(position * div_term)
 70 |             pe[:, 1::2] = torch.cos(position * div_term)
 71 |             pe = pe.unsqueeze(0).transpose(0, 1)
 72 |             self.register_buffer('pe', pe)
 73 |             self.logger.info("Positional Encoding initialized successfully.")
 74 |         except Exception as e:
 75 |             self.logger.error(f"Error initializing Positional Encoding: {e}", exc_info=True)
 76 |             raise
 77 |     
 78 |     def forward(self, x):
 79 |         """
 80 |         To forward pass of positional encoding.
 81 |         :param x: Input tensor
 82 |         :return: Tensor with positional encoding added
 83 |         """
 84 |         self.logger.info(f"Performing forward pass with input tensor of shape {x.shape}.")
 85 |         
 86 |         try:
 87 |             x = x + self.pe[:x.size(0), :]
 88 |             x = self.dropout(x)
 89 |             self.logger.info(f"Forward pass of Positional Encoding completed with output tensor of shape {x.shape}.")
 90 |             return x
 91 |         except Exception as e:
 92 |             self.logger.error(f"Error during forward pass of Positional Encoding: {e}", exc_info=True)
 93 |             raise
 94 | 
 95 | if __name__ == "__main__":
 96 |     logging.basicConfig(level=logging.INFO)
 97 |     logger = logging.getLogger('transformer_model_logger')
 98 |     logger.info("Starting to initialize and build the Transformer model for testing purposes.")
 99 |     
100 |     input_dim = 512
101 |     model_dim = 512
102 |     num_heads = 8
103 |     num_layers = 6
104 |     output_dim = 2  
105 |     model = TransformerModel(input_dim, model_dim, num_heads, num_layers, output_dim)
106 |     
107 |     logger.info("Transformer model initialized and built successfully.")
108 | 


--------------------------------------------------------------------------------
/src/nlp.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import re
  3 | import nltk
  4 | import spacy
  5 | import pandas as pd
  6 | from nltk.tokenize import word_tokenize
  7 | from nltk.corpus import stopwords
  8 | from nltk.stem import WordNetLemmatizer
  9 | from spacy.lang.en import English
 10 | 
 11 | from src.config import Config
 12 | from src.utils.file_utils import save_to_file, read_from_file
 13 | 
 14 | nltk.download('punkt')
 15 | nltk.download('stopwords')
 16 | nltk.download('wordnet')
 17 | 
 18 | nlp = spacy.load("en_core_web_sm")
 19 | 
 20 | class NLPProcessor:
 21 |     def __init__(self):
 22 |         self.stop_words = set(stopwords.words('english'))
 23 |         self.lemmatizer = WordNetLemmatizer()
 24 |         self.tokenizer = English().Defaults.create_tokenizer(nlp)
 25 | 
 26 |     def clean_text(self, text):
 27 |         """
 28 |         Cleaning input text by removing non-alphabetic characters and lowercasing.
 29 |         :param text: The input text
 30 |         :return: Cleaned text
 31 |         """
 32 |         text = re.sub(r'[^a-zA-Z]', ' ', text)
 33 |         text = text.lower()
 34 |         text = text.strip()
 35 |         return text
 36 | 
 37 |     def tokenize_text(self, text):
 38 |         """
 39 |         To tokenize input text.
 40 |         :param text: The input text
 41 |         :return: List of tokens
 42 |         """
 43 |         tokens = word_tokenize(text)
 44 |         return tokens
 45 | 
 46 |     def remove_stopwords(self, tokens):
 47 |         """
 48 |         Removing stopwords from token list.
 49 |         :param tokens: List of tokens
 50 |         :return: List of tokens without stopwords
 51 |         """
 52 |         filtered_tokens = [token for token in tokens if token not in self.stop_words]
 53 |         return filtered_tokens
 54 | 
 55 |     def lemmatize_tokens(self, tokens):
 56 |         """
 57 |         To lemmatize input tokens.
 58 |         :param tokens: List of tokens
 59 |         :return: List of lemmatized tokens
 60 |         """
 61 |         lemmatized_tokens = [self.lemmatizer.lemmatize(token) for token in tokens]
 62 |         return lemmatized_tokens
 63 | 
 64 |     def process_text(self, text):
 65 |         """
 66 |         Processing input text by cleaning, tokenizing, removing stopwords, and lemmatizing.
 67 |         :param text: The input text
 68 |         :return: Processed text
 69 |         """
 70 |         cleaned_text = self.clean_text(text)
 71 |         tokens = self.tokenize_text(cleaned_text)
 72 |         tokens = self.remove_stopwords(tokens)
 73 |         lemmatized_tokens = self.lemmatize_tokens(tokens)
 74 |         return ' '.join(lemmatized_tokens)
 75 | 
 76 |     def spacy_tokenize(self, text):
 77 |         """
 78 |         Tokenizing the input text using Spacy.
 79 |         :param text: The input text
 80 |         :return: List of tokens
 81 |         """
 82 |         doc = nlp(text)
 83 |         return [token.text for token in doc]
 84 | 
 85 |     def spacy_lemmatize(self, tokens):
 86 |         """
 87 |         To lemmatize input tokens using Spacy.
 88 |         :param tokens: List of tokens
 89 |         :return: List of lemmatized tokens
 90 |         """
 91 |         doc = nlp(' '.join(tokens))
 92 |         return [token.lemma_ for token in doc]
 93 | 
 94 |     def spacy_remove_stopwords(self, tokens):
 95 |         """
 96 |         Removing stopwords from the token list using Spacy.
 97 |         :param tokens: List of tokens
 98 |         :return: List of tokens without stopwords
 99 |         """
100 |         return [token for token in tokens if not nlp.vocab[token].is_stop]
101 | 
102 | if __name__ == "__main__":
103 |     nlp_processor = NLPProcessor()
104 | 
105 |     example_text = "This is an example sentence to demonstrate the NLP processing capabilities."
106 | 
107 |     processed_text = nlp_processor.process_text(example_text)
108 |     print(f"Processed text (NLTK): {processed_text}")
109 | 
110 |     tokens = nlp_processor.spacy_tokenize(example_text)
111 |     tokens = nlp_processor.spacy_remove_stopwords(tokens)
112 |     lemmatized_tokens = nlp_processor.spacy_lemmatize(tokens)
113 |     print(f"Processed text (Spacy): {' '.join(lemmatized_tokens)}")
114 | 
115 |     save_to_file(processed_text, os.path.join(Config.PROCESSED_DATA_DIR, 'processed_text.txt'))
116 |     print(f"Processed text saved to {Config.PROCESSED_DATA_DIR}/processed_text.txt")
117 | 


--------------------------------------------------------------------------------
/src/processing/__init__.py:
--------------------------------------------------------------------------------
 1 | from .audio_processing import process_audio
 2 | from .video_processing import process_video
 3 | from .image_processing import process_image
 4 | from .text_processing import process_text
 5 | 
 6 | __all__ = [
 7 |     'process_audio',
 8 |     'process_video',
 9 |     'process_image',
10 |     'process_text'
11 | ]
12 | 


--------------------------------------------------------------------------------
/src/processing/audio_processing.py:
--------------------------------------------------------------------------------
 1 | import librosa
 2 | import numpy as np
 3 | import logging
 4 | 
 5 | def load_audio(file_path):
 6 |     """
 7 |     Loading audio file.
 8 |     :param file_path: Path to the audio file
 9 |     :return: Audio time series and sampling rate
10 |     """
11 |     try:
12 |         y, sr = librosa.load(file_path, sr=None)
13 |         logging.info(f"Audio file loaded: {file_path}")
14 |         return y, sr
15 |     except Exception as e:
16 |         logging.error(f"Error loading audio file {file_path}: {e}")
17 |         raise
18 | 
19 | def extract_mfcc(y, sr, n_mfcc=13):
20 |     """
21 |     Extracting MFCC features from audio time series.
22 |     :param y: Audio time series
23 |     :param sr: Sampling rate of the audio
24 |     :param n_mfcc: Number of MFCCs to return
25 |     :return: Mean MFCC features
26 |     """
27 |     try:
28 |         mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
29 |         mfcc_mean = np.mean(mfcc.T, axis=0)
30 |         logging.info("MFCC features extracted")
31 |         return mfcc_mean
32 |     except Exception as e:
33 |         logging.error(f"Error extracting MFCC features: {e}")
34 |         raise
35 | 
36 | def extract_chroma(y, sr):
37 |     """
38 |     Extracting chroma features from audio time series.
39 |     :param y: Audio time series
40 |     :param sr: Sampling rate of the audio
41 |     :return: Mean chroma features
42 |     """
43 |     try:
44 |         chroma = librosa.feature.chroma_stft(y=y, sr=sr)
45 |         chroma_mean = np.mean(chroma.T, axis=0)
46 |         logging.info("Chroma features extracted")
47 |         return chroma_mean
48 |     except Exception as e:
49 |         logging.error(f"Error extracting chroma features: {e}")
50 |         raise
51 | 
52 | def extract_spectral_contrast(y, sr):
53 |     """
54 |     Extracting spectral contrast features from audio time series.
55 |     :param y: Audio time series
56 |     :param sr: Sampling rate of the audio
57 |     :return: Mean spectral contrast features
58 |     """
59 |     try:
60 |         spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
61 |         spectral_contrast_mean = np.mean(spectral_contrast.T, axis=0)
62 |         logging.info("Spectral contrast features extracted")
63 |         return spectral_contrast_mean
64 |     except Exception as e:
65 |         logging.error(f"Error extracting spectral contrast features: {e}")
66 |         raise
67 | 
68 | def process_audio(file_path):
69 |     """
70 |     Processing an audio file and extracting features.
71 |     :param file_path: Path to the audio file
72 |     :return: Extracted audio features
73 |     """
74 |     try:
75 |         y, sr = load_audio(file_path)
76 |         mfcc_features = extract_mfcc(y, sr)
77 |         chroma_features = extract_chroma(y, sr)
78 |         spectral_contrast_features = extract_spectral_contrast(y, sr)
79 |         
80 |         audio_features = np.hstack([mfcc_features, chroma_features, spectral_contrast_features])
81 |         logging.info(f"Extracted features from audio file: {file_path}")
82 |         
83 |         return audio_features
84 |     except Exception as e:
85 |         logging.error(f"Error processing audio file {file_path}: {e}")
86 |         raise
87 | 
88 | if __name__ == "__main__":
89 |     import sys
90 |     if len(sys.argv) != 2:
91 |         print("Usage: python audio_processing.py <path_to_audio_file>")
92 |         sys.exit(1)
93 | 
94 |     file_path = sys.argv[1]
95 |     features = process_audio(file_path)
96 |     print("Extracted Features:\n", features)
97 | 


--------------------------------------------------------------------------------
/src/processing/image_processing.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | import logging
 4 | from skimage.feature import hog
 5 | 
 6 | def load_image(file_path):
 7 |     """
 8 |     Loading image file.
 9 |     :param file_path: Path to the image file
10 |     :return: Loaded image
11 |     """
12 |     try:
13 |         image = cv2.imread(file_path, cv2.IMREAD_COLOR)
14 |         if image is None:
15 |             raise ValueError(f"Error loading image: {file_path}")
16 |         logging.info(f"Image file loaded: {file_path}")
17 |         return image
18 |     except Exception as e:
19 |         logging.error(f"Error loading image file {file_path}: {e}")
20 |         raise
21 | 
22 | def preprocess_image(image, target_size=(64, 64)):
23 |     """
24 |     Preprocessing input image by resizing and converting to grayscale.
25 |     :param image: Loaded image
26 |     :param target_size: Target size for resizing
27 |     :return: Preprocessed image
28 |     """
29 |     try:
30 |         resized_image = cv2.resize(image, target_size)
31 |         gray_image = cv2.cvtColor(resized_image, cv2.COLOR_BGR2GRAY)
32 |         logging.info("Image preprocessed")
33 |         return gray_image
34 |     except Exception as e:
35 |         logging.error(f"Error preprocessing image: {e}")
36 |         raise
37 | 
38 | def extract_hog_features(image):
39 |     """
40 |     Extracting Histogram of Oriented Gradients features from an image.
41 |     :param image: Preprocessed image
42 |     :return: HOG features
43 |     """
44 |     try:
45 |         hog_features, hog_image = hog(image, orientations=9, pixels_per_cell=(8, 8), cells_per_block=(2, 2), block_norm='L2-Hys', visualize=True)
46 |         logging.info("HOG features extracted")
47 |         return hog_features
48 |     except Exception as e:
49 |         logging.error(f"Error extracting HOG features: {e}")
50 |         raise
51 | 
52 | def process_image(file_path):
53 |     """
54 |     Processing an image file and extracting features.
55 |     :param file_path: Path to the image file
56 |     :return: Extracted image features
57 |     """
58 |     try:
59 |         image = load_image(file_path)
60 |         preprocessed_image = preprocess_image(image)
61 |         hog_features = extract_hog_features(preprocessed_image)
62 |         
63 |         logging.info(f"Extracted features from image file: {file_path}")
64 |         
65 |         return hog_features
66 |     except Exception as e:
67 |         logging.error(f"Error processing image file {file_path}: {e}")
68 |         raise
69 | 
70 | if __name__ == "__main__":
71 |     import sys
72 |     if len(sys.argv) != 2:
73 |         print("Usage: python image_processing.py <path_to_image_file>")
74 |         sys.exit(1)
75 | 
76 |     file_path = sys.argv[1]
77 |     features = process_image(file_path)
78 |     print("Extracted Features:\n", features)
79 | 


--------------------------------------------------------------------------------
/src/processing/text_processing.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import nltk
 3 | from nltk.tokenize import word_tokenize
 4 | from nltk.corpus import stopwords
 5 | from sklearn.feature_extraction.text import TfidfVectorizer
 6 | import string
 7 | import numpy as np
 8 | 
 9 | # To download required NLTK data
10 | nltk.download('punkt')
11 | nltk.download('stopwords')
12 | 
13 | def clean_text(text):
14 |     """
15 |     Clean the input text by removing punctuation and stopwords.
16 |     :param text: Raw text
17 |     :return: Cleaned text
18 |     """
19 |     try:
20 |         text = text.lower()
21 |         text = text.translate(str.maketrans('', '', string.punctuation))
22 |         tokens = word_tokenize(text)
23 |         stop_words = set(stopwords.words('english'))
24 |         cleaned_tokens = [token for token in tokens if token not in stop_words]
25 |         cleaned_text = ' '.join(cleaned_tokens)
26 |         logging.info("Text cleaned")
27 |         return cleaned_text
28 |     except Exception as e:
29 |         logging.error(f"Error cleaning text: {e}")
30 |         raise
31 | 
32 | def extract_tfidf_features(text, max_features=100):
33 |     """
34 |     Extracting TF-IDF features from text.
35 |     :param text: Cleaned text
36 |     :param max_features: Maximum number of features to extract
37 |     :return: TF-IDF features
38 |     """
39 |     try:
40 |         vectorizer = TfidfVectorizer(max_features=max_features)
41 |         tfidf_matrix = vectorizer.fit_transform([text])
42 |         tfidf_features = tfidf_matrix.toarray().flatten()
43 |         logging.info("TF-IDF features extracted")
44 |         return tfidf_features
45 |     except Exception as e:
46 |         logging.error(f"Error extracting TF-IDF features: {e}")
47 |         raise
48 | 
49 | def process_text(text):
50 |     """
51 |     Processing text input and extracting features.
52 |     :param text: Raw text input
53 |     :return: Extracted text features
54 |     """
55 |     try:
56 |         cleaned_text = clean_text(text)
57 |         tfidf_features = extract_tfidf_features(cleaned_text)
58 |         
59 |         logging.info("Extracted features from text input")
60 |         
61 |         return tfidf_features
62 |     except Exception as e:
63 |         logging.error(f"Error processing text: {e}")
64 |         raise
65 | 
66 | if __name__ == "__main__":
67 |     import sys
68 |     if len(sys.argv) != 2:
69 |         print("Usage: python text_processing.py <text_input>")
70 |         sys.exit(1)
71 | 
72 |     text_input = sys.argv[1]
73 |     features = process_text(text_input)
74 |     print("Extracted Features:\n", features)
75 | 


--------------------------------------------------------------------------------
/src/processing/video_processing.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | import logging
  4 | 
  5 | def load_video(file_path):
  6 |     """
  7 |     Loading video file.
  8 |     :param file_path: Path to the video file
  9 |     :return: Video capture object
 10 |     """
 11 |     try:
 12 |         cap = cv2.VideoCapture(file_path)
 13 |         if not cap.isOpened():
 14 |             raise ValueError(f"Error opening video file: {file_path}")
 15 |         logging.info(f"Video file loaded: {file_path}")
 16 |         return cap
 17 |     except Exception as e:
 18 |         logging.error(f"Error loading video file {file_path}: {e}")
 19 |         raise
 20 | 
 21 | def extract_frames(cap, frame_rate=1):
 22 |     """
 23 |     Extracting frames from video file at a specified frame rate.
 24 |     :param cap: Video capture object
 25 |     :param frame_rate: Frame rate to extract frames (frames per second)
 26 |     :return: List of extracted frames
 27 |     """
 28 |     try:
 29 |         frames = []
 30 |         frame_count = 0
 31 |         while True:
 32 |             ret, frame = cap.read()
 33 |             if not ret:
 34 |                 break
 35 |             if frame_count % frame_rate == 0:
 36 |                 frames.append(frame)
 37 |             frame_count += 1
 38 |         logging.info(f"Extracted {len(frames)} frames from video")
 39 |         return frames
 40 |     except Exception as e:
 41 |         logging.error(f"Error extracting frames: {e}")
 42 |         raise
 43 | 
 44 | def preprocess_frame(frame, target_size=(64, 64)):
 45 |     """
 46 |     Preprocessing single video frame by resizing and converting to grayscale.
 47 |     :param frame: Video frame
 48 |     :param target_size: Target size for resizing
 49 |     :return: Preprocessed frame
 50 |     """
 51 |     try:
 52 |         resized_frame = cv2.resize(frame, target_size)
 53 |         gray_frame = cv2.cvtColor(resized_frame, cv2.COLOR_BGR2GRAY)
 54 |         logging.info("Frame preprocessed")
 55 |         return gray_frame
 56 |     except Exception as e:
 57 |         logging.error(f"Error preprocessing frame: {e}")
 58 |         raise
 59 | 
 60 | def extract_video_features(frames):
 61 |     """
 62 |     Extracting features from video frames.
 63 |     :param frames: List of preprocessed video frames
 64 |     :return: Extracted video features
 65 |     """
 66 |     try:
 67 |         features = [frame.flatten() for frame in frames]
 68 |         video_features = np.mean(features, axis=0)
 69 |         logging.info("Video features extracted")
 70 |         return video_features
 71 |     except Exception as e:
 72 |         logging.error(f"Error extracting video features: {e}")
 73 |         raise
 74 | 
 75 | def process_video(file_path):
 76 |     """
 77 |     Processing video file and extract features.
 78 |     :param file_path: Path to the video file
 79 |     :return: Extracted video features
 80 |     """
 81 |     try:
 82 |         cap = load_video(file_path)
 83 |         frames = extract_frames(cap, frame_rate=10)  
 84 |         cap.release()
 85 |         
 86 |         preprocessed_frames = [preprocess_frame(frame) for frame in frames]
 87 |         video_features = extract_video_features(preprocessed_frames)
 88 |         
 89 |         logging.info(f"Extracted features from video file: {file_path}")
 90 |         
 91 |         return video_features
 92 |     except Exception as e:
 93 |         logging.error(f"Error processing video file {file_path}: {e}")
 94 |         raise
 95 | 
 96 | if __name__ == "__main__":
 97 |     import sys
 98 |     if len(sys.argv) != 2:
 99 |         print("Usage: python video_processing.py <path_to_video_file>")
100 |         sys.exit(1)
101 | 
102 |     file_path = sys.argv[1]
103 |     features = process_video(file_path)
104 |     print("Extracted Features:\n", features)
105 | 


--------------------------------------------------------------------------------
/src/training/__init__.py:
--------------------------------------------------------------------------------
 1 | from .cnn_training import train_cnn
 2 | from .transformer_training import train_transformer
 3 | from .svm_training import train_svm
 4 | from .bayesian_training import train_bayesian
 5 | from .vision_transformer_training import train_vision_transformer
 6 | 
 7 | __all__ = [
 8 |     'train_cnn',
 9 |     'train_transformer',
10 |     'train_svm',
11 |     'train_bayesian',
12 |     'train_vision_transformer'
13 | ]
14 | 


--------------------------------------------------------------------------------
/src/training/bayesian_training.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import joblib
 3 | import pandas as pd
 4 | from sklearn.preprocessing import StandardScaler
 5 | from src.config import config
 6 | from src.dataset.data_loader import load_csv_data
 7 | from src.dataset.data_splitter import split_data
 8 | from src.models.bayesian import BayesianModel
 9 | from src.utils.helpers import create_directory
10 | from src.utils.logger import setup_logger
11 | from src.utils.metrics import calculate_metrics, log_metrics, plot_confusion_matrix
12 | 
13 | def preprocess_data(data: pd.DataFrame):
14 |     """
15 |     Preprocessing the input data by standardizing numerical features.
16 |     :param data: DataFrame containing the input data
17 |     :return: Preprocessed DataFrame
18 |     """
19 |     scaler = StandardScaler()
20 |     scaled_data = scaler.fit_transform(data)
21 |     return pd.DataFrame(scaled_data, columns=data.columns)
22 | 
23 | def train_bayesian():
24 |     logger = setup_logger(__name__, os.path.join(config.LOG_DIR, 'bayesian_training.log'))
25 |     logger.info("Starting Bayesian model training...")
26 |     
27 |     create_directory(config.MODEL_DIR)
28 | 
29 |     data = load_csv_data(config.PROCESSED_DATA_FILE)
30 |     X = data.drop('label', axis=1)
31 |     y = data['label']
32 |     X = preprocess_data(X)
33 | 
34 |     # To split data into training and validation sets
35 |     X_train, X_val, y_train, y_val = split_data(X, y)
36 | 
37 |     model = BayesianModel(prior_mean=config.BAYESIAN_PARAMS['prior_mean'], prior_std=config.BAYESIAN_PARAMS['prior_std'])
38 |     model.fit(X_train.values, y_train.values)
39 | 
40 |     model_path = os.path.join(config.MODEL_DIR, 'bayesian_model.pkl')
41 |     joblib.dump(model, model_path)
42 |     logger.info(f"Bayesian model saved at {model_path}")
43 | 
44 |     y_pred = model.predict(X_val.values)
45 |     metrics = calculate_metrics(y_val, y_pred)
46 |     log_metrics(metrics, log_file=os.path.join(config.LOG_DIR, 'bayesian_metrics.log'))
47 |     plot_confusion_matrix(y_val, y_pred, labels=[0, 1], output_dir=config.LOG_DIR, filename='bayesian_confusion_matrix.png')
48 | 
49 |     logger.info("Bayesian model training and evaluation completed.")
50 | 
51 | if __name__ == "__main__":
52 |     train_bayesian()
53 | 


--------------------------------------------------------------------------------
/src/training/cnn_training.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pandas as pd
 3 | from tensorflow.keras.models import Sequential
 4 | from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Dropout
 5 | from tensorflow.keras.optimizers import Adam
 6 | from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
 7 | from sklearn.preprocessing import StandardScaler
 8 | from sklearn.model_selection import train_test_split
 9 | from src.config import config
10 | from src.dataset.data_loader import load_csv_data
11 | from src.utils.helpers import create_directory
12 | from src.utils.logger import setup_logger
13 | from src.utils.metrics import calculate_metrics, log_metrics, plot_confusion_matrix, plot_metrics
14 | 
15 | def preprocess_data(data: pd.DataFrame):
16 |     """
17 |     Preprocessing input data by standardizing numerical features and reshaping.
18 |     :param data: DataFrame containing the input data
19 |     :return: Preprocessed DataFrame
20 |     """
21 |     scaler = StandardScaler()
22 |     X = data.drop('label', axis=1)
23 |     X_scaled = scaler.fit_transform(X)
24 |     y = data['label']
25 |     return X_scaled, y
26 | 
27 | def reshape_data(X, img_width, img_height):
28 |     """
29 |     Reshaping data into format required by the CNN.
30 |     :param X: Input data
31 |     :param img_width: Width of the image
32 |     :param img_height: Height of the image
33 |     :return: Reshaped data
34 |     """
35 |     return X.reshape(-1, img_width, img_height, 1)
36 | 
37 | def create_cnn_model(input_shape):
38 |     """
39 |     Creating CNN model.
40 |     :param input_shape: Shape of the input data
41 |     :return: CNN model
42 |     """
43 |     model = Sequential([
44 |         Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
45 |         MaxPooling2D(pool_size=(2, 2)),
46 |         Conv2D(64, (3, 3), activation='relu'),
47 |         MaxPooling2D(pool_size=(2, 2)),
48 |         Flatten(),
49 |         Dense(128, activation='relu'),
50 |         Dropout(0.5),
51 |         Dense(1, activation='sigmoid')
52 |     ])
53 |     return model
54 | 
55 | def train_cnn():
56 |     logger = setup_logger(__name__, os.path.join(config.LOG_DIR, 'cnn_training.log'))
57 |     logger.info("Starting CNN model training...")
58 |     
59 |     create_directory(config.MODEL_DIR)
60 | 
61 |     data = load_csv_data(config.PROCESSED_DATA_FILE)
62 |     X, y = preprocess_data(data)
63 |     X = reshape_data(X, img_width=64, img_height=64)
64 | 
65 |     X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=config.RANDOM_SEED)
66 | 
67 |     model = create_cnn_model(input_shape=(64, 64, 1))
68 |     optimizer = Adam(learning_rate=config.CNN_PARAMS['learning_rate'])
69 |     model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
70 | 
71 |     # To setup callbacks
72 |     checkpoint = ModelCheckpoint(os.path.join(config.MODEL_DIR, 'cnn_model.h5'), monitor='val_loss', save_best_only=True, mode='min')
73 |     early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
74 |     
75 |     history = model.fit(X_train, y_train, epochs=config.CNN_PARAMS['epochs'], batch_size=config.CNN_PARAMS['batch_size'], validation_data=(X_val, y_val), callbacks=[checkpoint, early_stopping])
76 |     
77 |     y_pred = (model.predict(X_val) > 0.5).astype("int32")
78 |     metrics = calculate_metrics(y_val, y_pred)
79 |     log_metrics(metrics, log_file=os.path.join(config.LOG_DIR, 'cnn_metrics.log'))
80 |     plot_confusion_matrix(y_val, y_pred, labels=[0, 1], output_dir=config.LOG_DIR, filename='cnn_confusion_matrix.png')
81 |     plot_metrics(history, metric='accuracy')
82 |     plot_metrics(history, metric='loss')
83 | 
84 |     logger.info("CNN model training and evaluation completed.")
85 | 
86 | if __name__ == "__main__":
87 |     train_cnn()
88 | 


--------------------------------------------------------------------------------
/src/training/svm_training.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import joblib
 3 | import pandas as pd
 4 | from sklearn.svm import SVC
 5 | from sklearn.preprocessing import StandardScaler
 6 | from sklearn.model_selection import train_test_split
 7 | from sklearn.pipeline import Pipeline
 8 | from src.config import config
 9 | from src.dataset.data_loader import load_csv_data
10 | from src.utils.helpers import create_directory
11 | from src.utils.logger import setup_logger
12 | from src.utils.metrics import calculate_metrics, log_metrics, plot_confusion_matrix
13 | 
14 | def preprocess_data(data: pd.DataFrame):
15 |     """
16 |     Preprocessing the input data by standardizing numerical features.
17 |     :param data: DataFrame containing the input data
18 |     :return: Preprocessed features and labels
19 |     """
20 |     X = data.drop('label', axis=1)
21 |     y = data['label']
22 |     return X, y
23 | 
24 | def train_svm():
25 |     logger = setup_logger(__name__, os.path.join(config.LOG_DIR, 'svm_training.log'))
26 |     logger.info("Starting SVM model training...")
27 |     
28 |     create_directory(config.MODEL_DIR)
29 | 
30 |     data = load_csv_data(config.PROCESSED_DATA_FILE)
31 |     X, y = preprocess_data(data)
32 | 
33 |     X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=config.RANDOM_SEED)
34 | 
35 |     # To create pipeline with scaler and SVM
36 |     pipeline = Pipeline([
37 |         ('scaler', StandardScaler()),
38 |         ('svm', SVC(kernel=config.SVM_PARAMS['kernel'], C=config.SVM_PARAMS['C'], probability=True))
39 |     ])
40 | 
41 |     pipeline.fit(X_train, y_train)
42 | 
43 |     model_path = os.path.join(config.MODEL_DIR, 'svm_model.pkl')
44 |     joblib.dump(pipeline, model_path)
45 |     logger.info(f"SVM model saved at {model_path}")
46 | 
47 |     y_pred = pipeline.predict(X_val)
48 |     y_pred_proba = pipeline.predict_proba(X_val)[:, 1]
49 |     metrics = calculate_metrics(y_val, y_pred)
50 |     log_metrics(metrics, log_file=os.path.join(config.LOG_DIR, 'svm_metrics.log'))
51 |     plot_confusion_matrix(y_val, y_pred, labels=[0, 1], output_dir=config.LOG_DIR, filename='svm_confusion_matrix.png')
52 | 
53 |     logger.info("SVM model training and evaluation completed.")
54 | 
55 | if __name__ == "__main__":
56 |     train_svm()
57 | 


--------------------------------------------------------------------------------
/src/training/transformer_training.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from torch import nn
 4 | from torch.optim import Adam
 5 | from sklearn.model_selection import train_test_split
 6 | import pandas as pd
 7 | from src.config import config
 8 | from src.dataset.data_loader import load_csv_data
 9 | from src.models.transformer import TransformerModel
10 | from src.utils.helpers import create_directory
11 | from src.utils.logger import setup_logger
12 | from src.utils.metrics import calculate_metrics, log_metrics, plot_confusion_matrix, plot_metrics
13 | 
14 | def preprocess_data(data: pd.DataFrame):
15 |     """
16 |     Preprocessing the input data by converting it to tensors and normalizing.
17 |     :param data: DataFrame containing the input data
18 |     :return: Preprocessed tensors for features and labels
19 |     """
20 |     X = data.drop('label', axis=1).values
21 |     y = data['label'].values
22 |     X = torch.tensor(X, dtype=torch.float32)
23 |     y = torch.tensor(y, dtype=torch.long)
24 |     return X, y
25 | 
26 | def train_transformer():
27 |     logger = setup_logger(__name__, os.path.join(config.LOG_DIR, 'transformer_training.log'))
28 |     logger.info("Starting Transformer model training...")
29 |     
30 |     create_directory(config.MODEL_DIR)
31 | 
32 |     data = load_csv_data(config.PROCESSED_DATA_FILE)
33 |     X, y = preprocess_data(data)
34 | 
35 |     X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=config.RANDOM_SEED)
36 | 
37 |     model = TransformerModel(
38 |         input_dim=X_train.shape[1],
39 |         model_dim=config.TRANSFORMER_PARAMS['model_dim'],
40 |         num_heads=config.TRANSFORMER_PARAMS['num_heads'],
41 |         num_layers=config.TRANSFORMER_PARAMS['num_layers'],
42 |         output_dim=2  
43 |     )
44 | 
45 |     optimizer = Adam(model.parameters(), lr=config.TRANSFORMER_PARAMS['learning_rate'])
46 |     criterion = nn.CrossEntropyLoss()
47 | 
48 |     # Training loop
49 |     for epoch in range(config.TRANSFORMER_PARAMS['epochs']):
50 |         model.train()
51 |         optimizer.zero_grad()
52 |         outputs = model(X_train)
53 |         loss = criterion(outputs, y_train)
54 |         loss.backward()
55 |         optimizer.step()
56 |         logger.info(f"Epoch [{epoch+1}/{config.TRANSFORMER_PARAMS['epochs']}], Loss: {loss.item()}")
57 | 
58 |     model_path = os.path.join(config.MODEL_DIR, 'transformer_model.pth')
59 |     torch.save(model.state_dict(), model_path)
60 |     logger.info(f"Transformer model saved at {model_path}")
61 | 
62 |     model.eval()
63 |     with torch.no_grad():
64 |         outputs = model(X_val)
65 |         _, y_pred = torch.max(outputs, 1)
66 |     metrics = calculate_metrics(y_val.numpy(), y_pred.numpy())
67 |     log_metrics(metrics, log_file=os.path.join(config.LOG_DIR, 'transformer_metrics.log'))
68 |     plot_confusion_matrix(y_val.numpy(), y_pred.numpy(), labels=[0, 1], output_dir=config.LOG_DIR, filename='transformer_confusion_matrix.png')
69 | 
70 |     logger.info("Transformer model training and evaluation completed.")
71 | 
72 | if __name__ == "__main__":
73 |     train_transformer()
74 | 


--------------------------------------------------------------------------------
/src/training/vision_transformer_training.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from torch import nn
 4 | from torch.optim import Adam
 5 | from sklearn.model_selection import train_test_split
 6 | import pandas as pd
 7 | from src.config import config
 8 | from src.dataset.data_loader import load_csv_data
 9 | from src.models.vision_transformer import VisionTransformer
10 | from src.utils.helpers import create_directory
11 | from src.utils.logger import setup_logger
12 | from src.utils.metrics import calculate_metrics, log_metrics, plot_confusion_matrix, plot_metrics
13 | 
14 | def preprocess_data(data: pd.DataFrame):
15 |     """
16 |     Preprocessing input data by converting it to tensors and normalizing.
17 |     :param data: DataFrame containing the input data
18 |     :return: Preprocessed tensors for features and labels
19 |     """
20 |     X = data.drop('label', axis=1).values
21 |     y = data['label'].values
22 |     X = torch.tensor(X, dtype=torch.float32)
23 |     y = torch.tensor(y, dtype=torch.long)
24 |     return X, y
25 | 
26 | def train_vision_transformer():
27 |     logger = setup_logger(__name__, os.path.join(config.LOG_DIR, 'vision_transformer_training.log'))
28 |     logger.info("Starting Vision Transformer model training...")
29 |     
30 |     create_directory(config.MODEL_DIR)
31 | 
32 |     data = load_csv_data(config.PROCESSED_DATA_FILE)
33 |     X, y = preprocess_data(data)
34 | 
35 |     X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=config.RANDOM_SEED)
36 | 
37 |     model = VisionTransformer(
38 |         img_size=config.VISION_TRANSFORMER_PARAMS['img_size'],
39 |         patch_size=config.VISION_TRANSFORMER_PARAMS['patch_size'],
40 |         num_classes=2,  
41 |         dim=config.VISION_TRANSFORMER_PARAMS['dim'],
42 |         depth=config.VISION_TRANSFORMER_PARAMS['depth'],
43 |         heads=config.VISION_TRANSFORMER_PARAMS['heads'],
44 |         mlp_dim=config.VISION_TRANSFORMER_PARAMS['mlp_dim']
45 |     )
46 | 
47 |     # To define optimizer and loss function
48 |     optimizer = Adam(model.parameters(), lr=config.VISION_TRANSFORMER_PARAMS['learning_rate'])
49 |     criterion = nn.CrossEntropyLoss()
50 | 
51 |     # Training loop
52 |     for epoch in range(config.VISION_TRANSFORMER_PARAMS['epochs']):
53 |         model.train()
54 |         optimizer.zero_grad()
55 |         outputs = model(X_train)
56 |         loss = criterion(outputs, y_train)
57 |         loss.backward()
58 |         optimizer.step()
59 |         logger.info(f"Epoch [{epoch+1}/{config.VISION_TRANSFORMER_PARAMS['epochs']}], Loss: {loss.item()}")
60 | 
61 |     model_path = os.path.join(config.MODEL_DIR, 'vision_transformer_model.pth')
62 |     torch.save(model.state_dict(), model_path)
63 |     logger.info(f"Vision Transformer model saved at {model_path}")
64 | 
65 |     model.eval()
66 |     with torch.no_grad():
67 |         outputs = model(X_val)
68 |         _, y_pred = torch.max(outputs, 1)
69 |     metrics = calculate_metrics(y_val.numpy(), y_pred.numpy())
70 |     log_metrics(metrics, log_file=os.path.join(config.LOG_DIR, 'vision_transformer_metrics.log'))
71 |     plot_confusion_matrix(y_val.numpy(), y_pred.numpy(), labels=[0, 1], output_dir=config.LOG_DIR, filename='vision_transformer_confusion_matrix.png')
72 | 
73 |     logger.info("Vision Transformer model training and evaluation completed.")
74 | 
75 | if __name__ == "__main__":
76 |     train_vision_transformer()
77 | 


--------------------------------------------------------------------------------
/src/utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import logging
  4 | import pandas as pd
  5 | from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
  6 | import matplotlib.pyplot as plt
  7 | 
  8 | from src.config import Config
  9 | 
 10 | def save_to_file(data, filename):
 11 |     """
 12 |     Saving data to file.
 13 |     :param data: Data to save
 14 |     :param filename: Name of the file
 15 |     """
 16 |     with open(filename, 'w') as file:
 17 |         if isinstance(data, (dict, list)):
 18 |             json.dump(data, file, indent=4)
 19 |         else:
 20 |             file.write(str(data))
 21 |     logging.info(f"Data saved to {filename}")
 22 | 
 23 | def read_from_file(filename):
 24 |     """
 25 |     Reading data from file.
 26 |     :param filename: Name of the file
 27 |     :return: Data read from the file
 28 |     """
 29 |     with open(filename, 'r') as file:
 30 |         if filename.endswith('.json'):
 31 |             return json.load(file)
 32 |         else:
 33 |             return file.read()
 34 | 
 35 | def calculate_metrics(true_labels, predictions):
 36 |     """
 37 |     Calculating accuracy, precision, recall, and F1 score.
 38 |     :param true_labels: True labels
 39 |     :param predictions: Model predictions
 40 |     :return: Dictionary with metrics
 41 |     """
 42 |     metrics = {
 43 |         'accuracy': accuracy_score(true_labels, predictions),
 44 |         'precision': precision_score(true_labels, predictions),
 45 |         'recall': recall_score(true_labels, predictions),
 46 |         'f1_score': f1_score(true_labels, predictions)
 47 |     }
 48 |     logging.info(f"Metrics calculated: {metrics}")
 49 |     return metrics
 50 | 
 51 | def plot_metrics(history, metric='accuracy'):
 52 |     """
 53 |     Plotting training and validation metrics.
 54 |     :param history: Training history
 55 |     :param metric: Metric to plot
 56 |     """
 57 |     plt.plot(history.history[metric])
 58 |     plt.plot(history.history[f'val_{metric}'])
 59 |     plt.title(f'Model {metric}')
 60 |     plt.ylabel(metric)
 61 |     plt.xlabel('Epoch')
 62 |     plt.legend(['Train', 'Validation'], loc='upper left')
 63 |     plt.savefig(os.path.join(Config.LOG_DIR, f'{metric}_plot.png'))
 64 |     plt.close()
 65 |     logging.info(f"{metric} plot saved.")
 66 | 
 67 | def create_directory(path):
 68 |     """
 69 |     Creating directory if it does not exist.
 70 |     :param path: Directory path
 71 |     """
 72 |     if not os.path.exists(path):
 73 |         os.makedirs(path)
 74 |     logging.info(f"Directory created at {path}")
 75 | 
 76 | def load_data(file_path, file_type='csv'):
 77 |     """
 78 |     Loading data from file.
 79 |     :param file_path: Path to the file
 80 |     :param file_type: Type of the file ('csv', 'json', etc.)
 81 |     :return: Loaded data
 82 |     """
 83 |     if file_type == 'csv':
 84 |         data = pd.read_csv(file_path)
 85 |     elif file_type == 'json':
 86 |         data = pd.read_json(file_path)
 87 |     else:
 88 |         raise ValueError(f"Unsupported file type: {file_type}")
 89 |     logging.info(f"Data loaded from {file_path}")
 90 |     return data
 91 | 
 92 | def preprocess_data(data):
 93 |     """
 94 |     Preprocessing data.
 95 |     :param data: Data to preprocess
 96 |     :return: Preprocessed data
 97 |     """
 98 |     data = data.fillna(0)
 99 |     logging.info("Data preprocessing complete.")
100 |     return data
101 | 
102 | def split_data(data, labels, test_size=0.2):
103 |     """
104 |     Splitting data into training and test sets.
105 |     :param data: Data features
106 |     :param labels: Data labels
107 |     :param test_size: Proportion of test set
108 |     :return: Split data
109 |     """
110 |     from sklearn.model_selection import train_test_split
111 |     X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=test_size, random_state=Config.RANDOM_SEED)
112 |     logging.info(f"Data split into training and test sets with test size = {test_size}")
113 |     return X_train, X_test, y_train, y_test
114 | 
115 | if __name__ == "__main__":
116 |     create_directory(Config.LOG_DIR)
117 | 
118 |     sample_data = {'name': 'Deepfake Detection', 'version': '1.0'}
119 |     save_to_file(sample_data, os.path.join(Config.LOG_DIR, 'sample_data.json'))
120 |     loaded_data = read_from_file(os.path.join(Config.LOG_DIR, 'sample_data.json'))
121 |     print(loaded_data)
122 | 
123 |     true_labels = [0, 1, 1, 0, 1]
124 |     predictions = [0, 1, 0, 0, 1]
125 |     metrics = calculate_metrics(true_labels, predictions)
126 |     print(metrics)
127 | 


--------------------------------------------------------------------------------
/src/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | from .logger import setup_logger
 2 | from .metrics import calculate_metrics
 3 | from .visualization import plot_metrics
 4 | from .helpers import create_directory, save_to_file, read_from_file
 5 | from .file_utils import load_data
 6 | from .data_utils import preprocess_data, split_data
 7 | 
 8 | __all__ = [
 9 |     'setup_logger',
10 |     'calculate_metrics',
11 |     'plot_metrics',
12 |     'create_directory',
13 |     'save_to_file',
14 |     'read_from_file',
15 |     'load_data',
16 |     'preprocess_data',
17 |     'split_data'
18 | ]
19 | 


--------------------------------------------------------------------------------
/src/utils/data_utils.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from sklearn.model_selection import train_test_split
 3 | 
 4 | def preprocess_data(data: pd.DataFrame) -> pd.DataFrame:
 5 |     """
 6 |     Preprocessing the input data by filling missing values, normalizing features, etc.
 7 |     :param data: DataFrame containing the input data
 8 |     :return: Preprocessed DataFrame
 9 |     """
10 |     data = data.fillna(0)
11 |     
12 |     numeric_features = data.select_dtypes(include=['int64', 'float64']).columns
13 |     data[numeric_features] = (data[numeric_features] - data[numeric_features].mean()) / data[numeric_features].std()
14 |     categorical_features = data.select_dtypes(include=['object']).columns
15 |     data = pd.get_dummies(data, columns=categorical_features)
16 |     
17 |     return data
18 | 
19 | def split_data(data: pd.DataFrame, test_size: float = 0.2, random_state: int = 42):
20 |     """
21 |     Splitting the data into training and test sets.
22 |     :param data: DataFrame containing the input data
23 |     :param test_size: Proportion of the data to include in the test set
24 |     :param random_state: Seed used by the random number generator
25 |     :return: Tuple containing training and test sets
26 |     """
27 |     labels = data['label']
28 |     features = data.drop('label', axis=1)
29 |     
30 |     X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=test_size, random_state=random_state)
31 |     
32 |     return X_train, X_test, y_train, y_test
33 | 
34 | def balance_data(data: pd.DataFrame) -> pd.DataFrame:
35 |     """
36 |     Balancing the dataset by oversampling the minority class.
37 |     :param data: DataFrame containing the input data
38 |     :return: Balanced DataFrame
39 |     """
40 |     from sklearn.utils import resample
41 | 
42 |     
43 |     majority_class = data[data['label'] == 0]
44 |     minority_class = data[data['label'] == 1]
45 |     
46 |     # To upsample minority class
47 |     minority_upsampled = resample(minority_class, 
48 |                                   replace=True,  
49 |                                   n_samples=len(majority_class),    
50 |                                   random_state=42) 
51 |     
52 |     upsampled_data = pd.concat([majority_class, minority_upsampled])
53 |     
54 |     return upsampled_data
55 | 
56 | if __name__ == "__main__":
57 |     sample_data = {
58 |         'feature1': [1, 2, 3, 4, 5, 6],
59 |         'feature2': ['A', 'B', 'A', 'A', 'B', 'B'],
60 |         'label': [0, 1, 0, 0, 1, 1]
61 |     }
62 |     df = pd.DataFrame(sample_data)
63 |     
64 |     preprocessed_df = preprocess_data(df)
65 |     print("Preprocessed Data:\n", preprocessed_df)
66 |     
67 |     X_train, X_test, y_train, y_test = split_data(preprocessed_df)
68 |     print("Training Features:\n", X_train)
69 |     print("Test Features:\n", X_test)
70 |     print("Training Labels:\n", y_train)
71 |     print("Test Labels:\n", y_test)
72 |     
73 |     balanced_df = balance_data(df)
74 |     print("Balanced Data:\n", balanced_df)
75 | 


--------------------------------------------------------------------------------
/src/utils/file_utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pandas as pd
 3 | import json
 4 | import logging
 5 | 
 6 | def load_data(file_path: str, file_type: str = 'csv') -> pd.DataFrame:
 7 |     """
 8 |     Loading data from file.
 9 |     :param file_path: Path to the file
10 |     :param file_type: Type of the file ('csv', 'json', 'excel')
11 |     :return: DataFrame containing the loaded data
12 |     """
13 |     if file_type == 'csv':
14 |         data = pd.read_csv(file_path)
15 |     elif file_type == 'json':
16 |         data = pd.read_json(file_path)
17 |     elif file_type == 'excel':
18 |         data = pd.read_excel(file_path)
19 |     else:
20 |         raise ValueError(f"Unsupported file type: {file_type}")
21 |     logging.info(f"Data loaded from {file_path}")
22 |     return data
23 | 
24 | def save_data(data: pd.DataFrame, file_path: str, file_type: str = 'csv'):
25 |     """
26 |     Saving data to file.
27 |     :param data: DataFrame containing the data to save
28 |     :param file_path: Path to the file
29 |     :param file_type: Type of the file ('csv', 'json', 'excel')
30 |     """
31 |     if file_type == 'csv':
32 |         data.to_csv(file_path, index=False)
33 |     elif file_type == 'json':
34 |         data.to_json(file_path, orient='records', lines=True)
35 |     elif file_type == 'excel':
36 |         data.to_excel(file_path, index=False)
37 |     else:
38 |         raise ValueError(f"Unsupported file type: {file_type}")
39 |     logging.info(f"Data saved to {file_path}")
40 | 
41 | def save_to_file(data, filename: str):
42 |     """
43 |     Saving data to file (JSON/plain text).
44 |     :param data: Data to save
45 |     :param filename: Name of the file
46 |     """
47 |     with open(filename, 'w') as file:
48 |         if isinstance(data, (dict, list)):
49 |             json.dump(data, file, indent=4)
50 |         else:
51 |             file.write(str(data))
52 |     logging.info(f"Data saved to {filename}")
53 | 
54 | def read_from_file(filename: str):
55 |     """
56 |     Reading data from file (JSON/plain text).
57 |     :param filename: Name of the file
58 |     :return: Data read from the file
59 |     """
60 |     with open(filename, 'r') as file:
61 |         if filename.endswith('.json'):
62 |             return json.load(file)
63 |         else:
64 |             return file.read()
65 | 
66 | def create_directory(path: str):
67 |     """
68 |     Creating directory if it does not exist.
69 |     :param path: Directory path
70 |     """
71 |     if not os.path.exists(path):
72 |         os.makedirs(path)
73 |     logging.info(f"Directory created at {path}")
74 | 
75 | if __name__ == "__main__":
76 |     create_directory('example_dir')
77 | 
78 |     sample_data = {
79 |         'feature1': [1, 2, 3],
80 |         'feature2': ['A', 'B', 'C'],
81 |         'label': [0, 1, 0]
82 |     }
83 |     df = pd.DataFrame(sample_data)
84 |     
85 |     save_data(df, 'example_dir/sample_data.csv', file_type='csv')
86 |     loaded_df = load_data('example_dir/sample_data.csv', file_type='csv')
87 |     print("Loaded DataFrame:\n", loaded_df)
88 |     
89 |     sample_dict = {'name': 'Deepfake Detection', 'version': '1.0'}
90 |     save_to_file(sample_dict, 'example_dir/sample_data.json')
91 |     loaded_dict = read_from_file('example_dir/sample_data.json')
92 |     print("Loaded JSON:\n", loaded_dict)
93 | 


--------------------------------------------------------------------------------
/src/utils/helpers.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import logging
  4 | from datetime import datetime
  5 | 
  6 | def create_directory(path: str):
  7 |     """
  8 |     Creating directory if it does not exist.
  9 |     :param path: Directory path
 10 |     """
 11 |     if not os.path.exists(path):
 12 |         os.makedirs(path)
 13 |     logging.info(f"Directory created at {path}")
 14 | 
 15 | def save_to_file(data, filename: str):
 16 |     """
 17 |     Saving data to file (JSON/plain text).
 18 |     :param data: Data to save
 19 |     :param filename: Name of the file
 20 |     """
 21 |     with open(filename, 'w') as file:
 22 |         if isinstance(data, (dict, list)):
 23 |             json.dump(data, file, indent=4)
 24 |         else:
 25 |             file.write(str(data))
 26 |     logging.info(f"Data saved to {filename}")
 27 | 
 28 | def read_from_file(filename: str):
 29 |     """
 30 |     Reading data from file (JSON/plain text).
 31 |     :param filename: Name of the file
 32 |     :return: Data read from the file
 33 |     """
 34 |     with open(filename, 'r') as file:
 35 |         if filename.endswith('.json'):
 36 |             return json.load(file)
 37 |         else:
 38 |             return file.read()
 39 | 
 40 | def get_timestamp() -> str:
 41 |     """
 42 |     To get the current timestamp in specific format.
 43 |     :return: Timestamp string
 44 |     """
 45 |     return datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
 46 | 
 47 | def setup_logger(name: str, log_file: str, level=logging.INFO):
 48 |     """
 49 |     To setup logger.
 50 |     :param name: Name of the logger
 51 |     :param log_file: File to log messages to
 52 |     :param level: Logging level
 53 |     :return: Configured logger
 54 |     """
 55 |     formatter = logging.Formatter('%(asctime)s %(name)s %(levelname)s: %(message)s')
 56 |     
 57 |     handler = logging.FileHandler(log_file)
 58 |     handler.setFormatter(formatter)
 59 |     
 60 |     logger = logging.getLogger(name)
 61 |     logger.setLevel(level)
 62 |     logger.addHandler(handler)
 63 |     
 64 |     return logger
 65 | 
 66 | def calculate_metrics(true_labels, predictions):
 67 |     """
 68 |     Calculating accuracy, precision, recall, and F1 score.
 69 |     :param true_labels: True labels
 70 |     :param predictions: Model predictions
 71 |     :return: Dictionary with metrics
 72 |     """
 73 |     from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
 74 |     
 75 |     metrics = {
 76 |         'accuracy': accuracy_score(true_labels, predictions),
 77 |         'precision': precision_score(true_labels, predictions),
 78 |         'recall': recall_score(true_labels, predictions),
 79 |         'f1_score': f1_score(true_labels, predictions)
 80 |     }
 81 |     logging.info(f"Metrics calculated: {metrics}")
 82 |     return metrics
 83 | 
 84 | def plot_metrics(history, metric='accuracy'):
 85 |     """
 86 |     Ploting training and validation metrics.
 87 |     :param history: Training history
 88 |     :param metric: Metric to plot
 89 |     """
 90 |     import matplotlib.pyplot as plt
 91 |     
 92 |     plt.plot(history.history[metric])
 93 |     plt.plot(history.history[f'val_{metric}'])
 94 |     plt.title(f'Model {metric}')
 95 |     plt.ylabel(metric)
 96 |     plt.xlabel('Epoch')
 97 |     plt.legend(['Train', 'Validation'], loc='upper left')
 98 |     plt.savefig(os.path.join('logs', f'{metric}_plot.png'))
 99 |     plt.close()
100 |     logging.info(f"{metric} plot saved.")
101 | 
102 | if __name__ == "__main__":
103 | 
104 |     create_directory('example_dir')
105 | 
106 |     sample_dict = {'name': 'Deepfake Detection', 'version': '1.0'}
107 |     save_to_file(sample_dict, 'example_dir/sample_data.json')
108 |     loaded_dict = read_from_file('example_dir/sample_data.json')
109 |     print("Loaded JSON:\n", loaded_dict)
110 | 
111 |     # To get current timestamp
112 |     timestamp = get_timestamp()
113 |     print("Current Timestamp:", timestamp)
114 | 
115 |     logger = setup_logger('example_logger', 'example_dir/example.log')
116 |     logger.info("This is a test log message.")
117 | 
118 |     true_labels = [0, 1, 1, 0, 1]
119 |     predictions = [0, 1, 0, 0, 1]
120 |     metrics = calculate_metrics(true_labels, predictions)
121 |     print("Metrics:\n", metrics)
122 |     
123 |     class DummyHistory:
124 |         def __init__(self):
125 |             self.history = {
126 |                 'accuracy': [0.1, 0.2, 0.3],
127 |                 'val_accuracy': [0.15, 0.25, 0.35]
128 |             }
129 |     plot_metrics(DummyHistory())
130 | 


--------------------------------------------------------------------------------
/src/utils/logger.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | from src.config import Config
 4 | 
 5 | def setup_logger(name: str, log_file: str, level=logging.INFO) -> logging.Logger:
 6 |     """
 7 |     Setting up logger.
 8 |     :param name: Name of the logger
 9 |     :param log_file: File to log messages to
10 |     :param level: Logging level
11 |     :return: Configured logger
12 |     """
13 |     
14 |     log_dir = os.path.dirname(log_file)
15 |     if not os.path.exists(log_dir):
16 |         os.makedirs(log_dir)
17 |     
18 |     formatter = logging.Formatter('%(asctime)s %(name)s %(levelname)s: %(message)s')
19 | 
20 |     file_handler = logging.FileHandler(log_file)
21 |     file_handler.setFormatter(formatter)
22 | 
23 |     # To create stream handler to log to console
24 |     stream_handler = logging.StreamHandler()
25 |     stream_handler.setFormatter(formatter)
26 | 
27 |     logger = logging.getLogger(name)
28 |     logger.setLevel(level)
29 |     logger.addHandler(file_handler)
30 |     logger.addHandler(stream_handler)
31 | 
32 |     if logger.hasHandlers():
33 |         logger.handlers.clear()
34 |         logger.addHandler(file_handler)
35 |         logger.addHandler(stream_handler)
36 | 
37 |     return logger
38 |   
39 | if __name__ == "__main__":
40 | 
41 |     if not os.path.exists(Config.LOG_DIR):
42 |         os.makedirs(Config.LOG_DIR)
43 | 
44 |     logger = setup_logger('example_logger', os.path.join(Config.LOG_DIR, 'example.log'))
45 |     logger.info("This is a test log message.")
46 |     logger.error("This is a test error message.")
47 | 


--------------------------------------------------------------------------------
/src/utils/metrics.py:
--------------------------------------------------------------------------------
 1 | from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix
 2 | import logging
 3 | import matplotlib.pyplot as plt
 4 | import seaborn as sns
 5 | import os
 6 | 
 7 | def calculate_metrics(true_labels, predictions):
 8 |     """
 9 |     Calculating performance metrics.
10 |     :param true_labels: True labels
11 |     :param predictions: Model predictions
12 |     :return: Dictionary with metrics
13 |     """
14 |     metrics = {
15 |         'accuracy': accuracy_score(true_labels, predictions),
16 |         'precision': precision_score(true_labels, predictions),
17 |         'recall': recall_score(true_labels, predictions),
18 |         'f1_score': f1_score(true_labels, predictions),
19 |         'roc_auc': roc_auc_score(true_labels, predictions)
20 |     }
21 |     logging.info(f"Metrics calculated: {metrics}")
22 |     return metrics
23 | 
24 | def plot_confusion_matrix(true_labels, predictions, labels, output_dir, filename='confusion_matrix.png'):
25 |     """
26 |     Plotting and saving the confusion matrix.
27 |     :param true_labels: True labels
28 |     :param predictions: Model predictions
29 |     :param labels: List of labels
30 |     :param output_dir: Directory to save the plot
31 |     :param filename: Name of the output file
32 |     """
33 |     cm = confusion_matrix(true_labels, predictions)
34 |     plt.figure(figsize=(10, 7))
35 |     sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=labels, yticklabels=labels)
36 |     plt.xlabel('Predicted')
37 |     plt.ylabel('Actual')
38 |     plt.title('Confusion Matrix')
39 |     if not os.path.exists(output_dir):
40 |         os.makedirs(output_dir)
41 |     plt.savefig(os.path.join(output_dir, filename))
42 |     plt.close()
43 |     logging.info(f"Confusion matrix plot saved to {os.path.join(output_dir, filename)}")
44 | 
45 | def log_metrics(metrics, logger_name='metrics_logger', log_file='metrics.log'):
46 |     """
47 |     To log the calculated metrics to file.
48 |     :param metrics: Dictionary with calculated metrics
49 |     :param logger_name: Name of the logger
50 |     :param log_file: File to log metrics
51 |     """
52 |     logger = logging.getLogger(logger_name)
53 |     handler = logging.FileHandler(log_file)
54 |     formatter = logging.Formatter('%(asctime)s %(name)s %(levelname)s: %(message)s')
55 |     handler.setFormatter(formatter)
56 |     logger.addHandler(handler)
57 |     logger.setLevel(logging.INFO)
58 |     logger.info(f"Metrics: {metrics}")
59 |     logger.removeHandler(handler)
60 |     handler.close()
61 | 
62 | if __name__ == "__main__":
63 |     true_labels = [0, 1, 1, 0, 1]
64 |     predictions = [0, 1, 0, 0, 1]
65 |     labels = [0, 1]
66 | 
67 |     metrics = calculate_metrics(true_labels, predictions)
68 |     print("Calculated Metrics:\n", metrics)
69 | 
70 |     plot_confusion_matrix(true_labels, predictions, labels, 'logs', 'example_confusion_matrix.png')
71 | 
72 |     log_metrics(metrics, log_file='logs/example_metrics.log')
73 | 


--------------------------------------------------------------------------------
/src/utils/visualization.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import matplotlib.pyplot as plt
  3 | import seaborn as sns
  4 | import pandas as pd
  5 | import numpy as np
  6 | from src.config import config
  7 | 
  8 | def plot_histogram(data, column, bins=30, output_dir=config.LOG_DIR, filename='histogram.png'):
  9 |     """
 10 |     Plotting and saving histogram of a specified column.
 11 |     :param data: DataFrame containing the data
 12 |     :param column: Column to plot the histogram for
 13 |     :param bins: Number of bins for the histogram
 14 |     :param output_dir: Directory to save the plot
 15 |     :param filename: Name of the output file
 16 |     """
 17 |     plt.figure(figsize=(10, 6))
 18 |     sns.histplot(data[column], bins=bins, kde=True)
 19 |     plt.title(f'Histogram of {column}')
 20 |     plt.xlabel(column)
 21 |     plt.ylabel('Frequency')
 22 |     if not os.path.exists(output_dir):
 23 |         os.makedirs(output_dir)
 24 |     plt.savefig(os.path.join(output_dir, filename))
 25 |     plt.close()
 26 |     print(f"Histogram plot saved to {os.path.join(output_dir, filename)}")
 27 | 
 28 | def plot_scatter(data, x_column, y_column, output_dir=config.LOG_DIR, filename='scatter_plot.png'):
 29 |     """
 30 |     Plotting and saving scatter plot of two specified columns.
 31 |     :param data: DataFrame containing the data
 32 |     :param x_column: Column to plot on the x-axis
 33 |     :param y_column: Column to plot on the y-axis
 34 |     :param output_dir: Directory to save the plot
 35 |     :param filename: Name of the output file
 36 |     """
 37 |     plt.figure(figsize=(10, 6))
 38 |     sns.scatterplot(x=data[x_column], y=data[y_column])
 39 |     plt.title(f'Scatter Plot of {x_column} vs {y_column}')
 40 |     plt.xlabel(x_column)
 41 |     plt.ylabel(y_column)
 42 |     if not os.path.exists(output_dir):
 43 |         os.makedirs(output_dir)
 44 |     plt.savefig(os.path.join(output_dir, filename))
 45 |     plt.close()
 46 |     print(f"Scatter plot saved to {os.path.join(output_dir, filename)}")
 47 | 
 48 | def plot_correlation_matrix(data, output_dir=config.LOG_DIR, filename='correlation_matrix.png'):
 49 |     """
 50 |     Plotting and saving correlation matrix of the data.
 51 |     :param data: DataFrame containing the data
 52 |     :param output_dir: Directory to save the plot
 53 |     :param filename: Name of the output file
 54 |     """
 55 |     plt.figure(figsize=(12, 10))
 56 |     correlation_matrix = data.corr()
 57 |     sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt='.2f', linewidths=0.5)
 58 |     plt.title('Correlation Matrix')
 59 |     if not os.path.exists(output_dir):
 60 |         os.makedirs(output_dir)
 61 |     plt.savefig(os.path.join(output_dir, filename))
 62 |     plt.close()
 63 |     print(f"Correlation matrix plot saved to {os.path.join(output_dir, filename)}")
 64 | 
 65 | def plot_time_series(data, date_column, value_column, output_dir=config.LOG_DIR, filename='time_series.png'):
 66 |     """
 67 |     Plotting and saving time series plot.
 68 |     :param data: DataFrame containing the data
 69 |     :param date_column: Column containing the date values
 70 |     :param value_column: Column containing the values to plot
 71 |     :param output_dir: Directory to save the plot
 72 |     :param filename: Name of the output file
 73 |     """
 74 |     plt.figure(figsize=(12, 6))
 75 |     plt.plot(data[date_column], data[value_column])
 76 |     plt.title(f'Time Series of {value_column} over Time')
 77 |     plt.xlabel('Date')
 78 |     plt.ylabel(value_column)
 79 |     if not os.path.exists(output_dir):
 80 |         os.makedirs(output_dir)
 81 |     plt.savefig(os.path.join(output_dir, filename))
 82 |     plt.close()
 83 |     print(f"Time series plot saved to {os.path.join(output_dir, filename)}")
 84 | 
 85 | if __name__ == "__main__":
 86 | 
 87 |     example_data = pd.DataFrame({
 88 |         'date': pd.date_range(start='2021-01-01', periods=100, freq='D'),
 89 |         'value': np.random.randn(100).cumsum(),
 90 |         'category': np.random.choice(['A', 'B', 'C'], size=100),
 91 |         'value2': np.random.randn(100)
 92 |     })
 93 | 
 94 |     plot_histogram(example_data, 'value', filename='example_histogram.png')
 95 | 
 96 |     plot_scatter(example_data, 'value', 'value2', filename='example_scatter_plot.png')
 97 | 
 98 |     plot_correlation_matrix(example_data, filename='example_correlation_matrix.png')
 99 | 
100 |     plot_time_series(example_data, 'date', 'value', filename='example_time_series.png')
101 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
 1 | from .test_data_loading import TestDataLoading
 2 | from .test_model import TestModel
 3 | from .test_training import TestTraining
 4 | from .test_evaluation import TestEvaluation
 5 | from .test_utils import TestUtils
 6 | 
 7 | __all__ = [
 8 |     'TestDataLoading',
 9 |     'TestModel',
10 |     'TestTraining',
11 |     'TestEvaluation',
12 |     'TestUtils'
13 | ]
14 | 


--------------------------------------------------------------------------------
/tests/test_data_loading.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import os
 3 | import numpy as np
 4 | import cv2
 5 | from src.dataset.data_loader import create_dataloader
 6 | 
 7 | class TestDataLoading(unittest.TestCase):
 8 |     def setUp(self):
 9 |         """
10 |         Setting up test variables and environment.
11 |         """
12 |         self.data_csv = 'tests/data/sample_data.csv'
13 |         self.image_dir = 'tests/data/images'
14 |         self.batch_size = 4
15 |         self.num_workers = 2
16 | 
17 |         os.makedirs('tests/data/images', exist_ok=True)
18 |         with open(self.data_csv, 'w') as f:
19 |             f.write('image,label\n')
20 |             for i in range(10):
21 |                 image_path = f'image_{i}.jpg'
22 |                 f.write(f'{image_path},{i % 2}\n')
23 |                 image = (255 * np.random.rand(224, 224, 3)).astype(np.uint8)
24 |                 cv2.imwrite(os.path.join(self.image_dir, image_path), image)
25 | 
26 |     def test_data_loading(self):
27 |         """
28 |         Testing data loading functionality.
29 |         """
30 |         dataloader = create_dataloader(self.data_csv, self.image_dir, batch_size=self.batch_size, num_workers=self.num_workers)
31 |         
32 |         batch_count = 0
33 |         for images, labels in dataloader:
34 |             self.assertEqual(len(images), self.batch_size)
35 |             self.assertEqual(len(labels), self.batch_size)
36 |             batch_count += 1
37 |         
38 |         self.assertGreater(batch_count, 0)
39 | 
40 |     def tearDown(self):
41 |         """
42 |         Cleaning up after tests.
43 |         """
44 |         import shutil
45 |         shutil.rmtree('tests/data')
46 | 
47 | if __name__ == "__main__":
48 |     unittest.main()
49 | 


--------------------------------------------------------------------------------
/tests/test_evaluation.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import torch
  3 | from torch.utils.data import DataLoader, TensorDataset
  4 | import numpy as np
  5 | from src.evaluation.cnn_evaluation import evaluate_cnn
  6 | from src.evaluation.transformer_evaluation import evaluate_transformer
  7 | from src.evaluation.svm_evaluation import evaluate_svm
  8 | from src.evaluation.bayesian_evaluation import evaluate_bayesian
  9 | from src.evaluation.vision_transformer_evaluation import evaluate_vision_transformer
 10 | 
 11 | class TestEvaluation(unittest.TestCase):
 12 |     def setUp(self):
 13 |         """
 14 |         Setting up test variables and environment.
 15 |         """
 16 |         
 17 |         self.num_samples = 100
 18 |         self.num_features = 224 * 224 * 3
 19 |         self.num_classes = 2
 20 |         self.batch_size = 10
 21 | 
 22 |         X = np.random.randn(self.num_samples, 3, 224, 224).astype(np.float32)
 23 |         y = np.random.randint(0, self.num_classes, self.num_samples)
 24 |         dataset = TensorDataset(torch.tensor(X), torch.tensor(y))
 25 |         self.dataloader = DataLoader(dataset, batch_size=self.batch_size)
 26 | 
 27 |         self.cnn_model = torch.nn.Sequential(
 28 |             torch.nn.Conv2d(3, 16, kernel_size=3, padding=1),
 29 |             torch.nn.ReLU(),
 30 |             torch.nn.Flatten(),
 31 |             torch.nn.Linear(16 * 224 * 224, self.num_classes)
 32 |         )
 33 | 
 34 |         self.transformer_model = torch.nn.Sequential(
 35 |             torch.nn.Conv2d(3, 16, kernel_size=3, padding=1),
 36 |             torch.nn.ReLU(),
 37 |             torch.nn.Flatten(),
 38 |             torch.nn.Linear(16 * 224 * 224, self.num_classes)
 39 |         )
 40 | 
 41 |         class DummyModel:
 42 |             def predict(self, X):
 43 |                 return np.random.randint(0, self.num_classes, len(X))
 44 | 
 45 |         self.svm_model = DummyModel()
 46 |         self.bayesian_model = DummyModel()
 47 |         self.vision_transformer_model = self.transformer_model  
 48 | 
 49 |     def test_evaluate_cnn(self):
 50 |         """
 51 |         Testing CNN model evaluation.
 52 |         """
 53 |         device = 'cpu'
 54 |         metrics = evaluate_cnn(self.cnn_model, self.dataloader, device)
 55 |         self.assertIn('accuracy', metrics)
 56 |         self.assertIn('f1_score', metrics)
 57 |         self.assertIn('precision', metrics)
 58 |         self.assertIn('recall', metrics)
 59 | 
 60 |     def test_evaluate_transformer(self):
 61 |         """
 62 |         Testing Transformer model evaluation.
 63 |         """
 64 |         device = 'cpu'
 65 |         metrics = evaluate_transformer(self.transformer_model, self.dataloader, device)
 66 |         self.assertIn('accuracy', metrics)
 67 |         self.assertIn('f1_score', metrics)
 68 |         self.assertIn('precision', metrics)
 69 |         self.assertIn('recall', metrics)
 70 | 
 71 |     def test_evaluate_svm(self):
 72 |         """
 73 |         Testing SVM model evaluation.
 74 |         """
 75 |         X_test = np.random.randn(self.num_samples, self.num_features)
 76 |         y_test = np.random.randint(0, self.num_classes, self.num_samples)
 77 |         metrics = evaluate_svm(self.svm_model, X_test, y_test)
 78 |         self.assertIn('accuracy', metrics)
 79 |         self.assertIn('f1_score', metrics)
 80 |         self.assertIn('precision', metrics)
 81 |         self.assertIn('recall', metrics)
 82 | 
 83 |     def test_evaluate_bayesian(self):
 84 |         """
 85 |         Testing Bayesian model evaluation.
 86 |         """
 87 |         X_test = np.random.randn(self.num_samples, self.num_features)
 88 |         y_test = np.random.randint(0, self.num_classes, self.num_samples)
 89 |         metrics = evaluate_bayesian(self.bayesian_model, X_test, y_test)
 90 |         self.assertIn('accuracy', metrics)
 91 |         self.assertIn('f1_score', metrics)
 92 |         self.assertIn('precision', metrics)
 93 |         self.assertIn('recall', metrics)
 94 | 
 95 |     def test_evaluate_vision_transformer(self):
 96 |         """
 97 |         Testing Vision Transformer model evaluation.
 98 |         """
 99 |         device = 'cpu'
100 |         metrics = evaluate_vision_transformer(self.vision_transformer_model, self.dataloader, device)
101 |         self.assertIn('accuracy', metrics)
102 |         self.assertIn('f1_score', metrics)
103 |         self.assertIn('precision', metrics)
104 |         self.assertIn('recall', metrics)
105 | 
106 | if __name__ == "__main__":
107 |     unittest.main()
108 | 


--------------------------------------------------------------------------------
/tests/test_model.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import torch
 3 | from torchsummary import summary
 4 | from src.models.cnn import CNNModel
 5 | from src.models.transformer import TransformerModel
 6 | from src.models.svm import SVMModel
 7 | from src.models.bayesian import BayesianModel
 8 | from src.models.vision_transformer import VisionTransformer
 9 | 
10 | class TestModel(unittest.TestCase):
11 |     def setUp(self):
12 |         """
13 |         Setting up test variables and environment.
14 |         """
15 |         self.input_shape = (3, 224, 224)  
16 |         self.num_classes = 10  
17 | 
18 |     def test_cnn_model(self):
19 |         """
20 |         Testing CNN model architecture.
21 |         """
22 |         model = CNNModel(num_classes=self.num_classes)
23 |         model.eval()
24 |         sample_input = torch.randn(1, *self.input_shape)
25 |         output = model(sample_input)
26 |         self.assertEqual(output.shape[1], self.num_classes)
27 |         summary(model, self.input_shape)
28 | 
29 |     def test_transformer_model(self):
30 |         """
31 |         Testing Transformer model architecture.
32 |         """
33 |         model = TransformerModel(
34 |             input_dim=self.input_shape[1] * self.input_shape[2],
35 |             model_dim=512,
36 |             num_heads=8,
37 |             num_layers=6,
38 |             output_dim=self.num_classes
39 |         )
40 |         model.eval()
41 |         sample_input = torch.randn(1, self.input_shape[1] * self.input_shape[2])
42 |         output = model(sample_input)
43 |         self.assertEqual(output.shape[1], self.num_classes)
44 |         summary(model, (self.input_shape[1] * self.input_shape[2],))
45 | 
46 |     def test_svm_model(self):
47 |         """
48 |         Testing SVM model architecture.
49 |         """
50 |         model = SVMModel()
51 |         sample_input = torch.randn(1, self.input_shape[1] * self.input_shape[2]).numpy()
52 |         output = model.predict(sample_input)
53 |         self.assertEqual(len(output), 1)
54 |         self.assertIn(output[0], range(self.num_classes))
55 | 
56 |     def test_bayesian_model(self):
57 |         """
58 |         Testing Bayesian model architecture.
59 |         """
60 |         model = BayesianModel()
61 |         sample_input = torch.randn(1, self.input_shape[1] * self.input_shape[2]).numpy()
62 |         output = model.predict(sample_input)
63 |         self.assertEqual(len(output), 1)
64 |         self.assertIn(output[0], range(self.num_classes))
65 | 
66 |     def test_vision_transformer_model(self):
67 |         """
68 |         Testing Vision Transformer model architecture.
69 |         """
70 |         model = VisionTransformer(
71 |             img_size=224,
72 |             patch_size=16,
73 |             num_classes=self.num_classes,
74 |             dim=768,
75 |             depth=12,
76 |             heads=12,
77 |             mlp_dim=3072
78 |         )
79 |         model.eval()
80 |         sample_input = torch.randn(1, *self.input_shape)
81 |         output = model(sample_input)
82 |         self.assertEqual(output.shape[1], self.num_classes)
83 |         summary(model, self.input_shape)
84 | 
85 | if __name__ == "__main__":
86 |     unittest.main()
87 | 


--------------------------------------------------------------------------------
/tests/test_training.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import torch
 3 | from torch.utils.data import DataLoader, TensorDataset
 4 | import numpy as np
 5 | from src.training.cnn_training import train_cnn
 6 | from src.training.transformer_training import train_transformer
 7 | from src.training.svm_training import train_svm
 8 | from src.training.bayesian_training import train_bayesian
 9 | from src.training.vision_transformer_training import train_vision_transformer
10 | 
11 | class TestTraining(unittest.TestCase):
12 |     def setUp(self):
13 |         """
14 |         Setting up test variables and environment.
15 |         """
16 |         self.num_samples = 100
17 |         self.num_features = 224 * 224 * 3
18 |         self.num_classes = 2
19 |         self.batch_size = 10
20 | 
21 |         X = np.random.randn(self.num_samples, 3, 224, 224).astype(np.float32)
22 |         y = np.random.randint(0, self.num_classes, self.num_samples)
23 |         dataset = TensorDataset(torch.tensor(X), torch.tensor(y))
24 |         self.dataloader = DataLoader(dataset, batch_size=self.batch_size)
25 |         self.device = 'cpu'
26 | 
27 |     def test_train_cnn(self):
28 |         """
29 |         Testing CNN model training.
30 |         """
31 |         model, optimizer, criterion = train_cnn(self.dataloader, self.device, num_epochs=1)
32 |         self.assertIsInstance(model, torch.nn.Module)
33 |         self.assertIsInstance(optimizer, torch.optim.Optimizer)
34 |         self.assertIsInstance(criterion, torch.nn.Module)
35 | 
36 |     def test_train_transformer(self):
37 |         """
38 |         Testing Transformer model training.
39 |         """
40 |         model, optimizer, criterion = train_transformer(self.dataloader, self.device, num_epochs=1)
41 |         self.assertIsInstance(model, torch.nn.Module)
42 |         self.assertIsInstance(optimizer, torch.optim.Optimizer)
43 |         self.assertIsInstance(criterion, torch.nn.Module)
44 | 
45 |     def test_train_svm(self):
46 |         """
47 |         Testing SVM model training.
48 |         """
49 |         model = train_svm(self.dataloader, num_epochs=1)
50 |         self.assertTrue(hasattr(model, 'predict'))
51 |         self.assertTrue(callable(getattr(model, 'predict', None)))
52 | 
53 |     def test_train_bayesian(self):
54 |         """
55 |         Testing Bayesian model training.
56 |         """
57 |         model = train_bayesian(self.dataloader, num_epochs=1)
58 |         self.assertTrue(hasattr(model, 'predict'))
59 |         self.assertTrue(callable(getattr(model, 'predict', None)))
60 | 
61 |     def test_train_vision_transformer(self):
62 |         """
63 |         Testing Vision Transformer model training.
64 |         """
65 |         model, optimizer, criterion = train_vision_transformer(self.dataloader, self.device, num_epochs=1)
66 |         self.assertIsInstance(model, torch.nn.Module)
67 |         self.assertIsInstance(optimizer, torch.optim.Optimizer)
68 |         self.assertIsInstance(criterion, torch.nn.Module)
69 | 
70 | if __name__ == "__main__":
71 |     unittest.main()
72 | 


--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import os
 3 | from src.utils.helpers import some_helper_function
 4 | from src.utils.logger import setup_logger
 5 | from src.utils.data_utils import load_data, save_data
 6 | from src.utils.file_utils import create_directory, delete_file
 7 | import pandas as pd
 8 | import numpy as np
 9 | 
10 | class TestUtils(unittest.TestCase):
11 |     def setUp(self):
12 |         """
13 |         Setting up test variables and environment.
14 |         """
15 |         self.test_dir = 'tests/temp'
16 |         os.makedirs(self.test_dir, exist_ok=True)
17 |         self.test_file = os.path.join(self.test_dir, 'test_file.csv')
18 |         self.data = pd.DataFrame({
19 |             'col1': [1, 2, 3],
20 |             'col2': [4, 5, 6]
21 |         })
22 | 
23 |     def test_some_helper_function(self):
24 |         """
25 |         To test helper function.
26 |         """
27 |         result = some_helper_function(self.data)
28 |         self.assertTrue(result)
29 | 
30 |     def test_setup_logger(self):
31 |         """
32 |         To test logger setup.
33 |         """
34 |         logger = setup_logger('test_logger', log_file=os.path.join(self.test_dir, 'test_log.log'))
35 |         logger.info('This is a test log message.')
36 |         self.assertTrue(os.path.exists(os.path.join(self.test_dir, 'test_log.log')))
37 | 
38 |     def test_load_data(self):
39 |         """
40 |         To test loading data.
41 |         """
42 |         self.data.to_csv(self.test_file, index=False)
43 |         loaded_data = load_data(self.test_file)
44 |         pd.testing.assert_frame_equal(loaded_data, self.data)
45 | 
46 |     def test_save_data(self):
47 |         """
48 |         To test saving data.
49 |         """
50 |         save_data(self.data, self.test_file)
51 |         self.assertTrue(os.path.exists(self.test_file))
52 |         loaded_data = pd.read_csv(self.test_file)
53 |         pd.testing.assert_frame_equal(loaded_data, self.data)
54 | 
55 |     def test_create_directory(self):
56 |         """
57 |         To test creating directory.
58 |         """
59 |         new_dir = os.path.join(self.test_dir, 'new_dir')
60 |         create_directory(new_dir)
61 |         self.assertTrue(os.path.exists(new_dir))
62 | 
63 |     def test_delete_file(self):
64 |         """
65 |         To test deleting file.
66 |         """
67 |         self.data.to_csv(self.test_file, index=False)
68 |         delete_file(self.test_file)
69 |         self.assertFalse(os.path.exists(self.test_file))
70 | 
71 |     def tearDown(self):
72 |         """
73 |         Cleaning up after tests.
74 |         """
75 |         import shutil
76 |         shutil.rmtree(self.test_dir)
77 | 
78 | if __name__ == "__main__":
79 |     unittest.main()
80 | 


--------------------------------------------------------------------------------