├── .dvc
    ├── .gitignore
    └── config
├── .dvcignore
├── .github
    └── workflows
    │   ├── .gitkeep
    │   └── ci.yml
├── .gitignore
├── LICENSE
├── README.md
├── artifacts
    ├── .gitignore
    └── data_ingestion
    │   └── .gitignore
├── configs
    └── config.yaml
├── docs
    └── images
    │   ├── .gitkeep
    │   └── Data Ingestion@2x (1).png
├── dvc.lock
├── dvc.yaml
├── init_setup.sh
├── params.yaml
├── prediction_service
    ├── Dockerfile
    ├── app.py
    └── requirements.txt
├── pyproject.toml
├── requirements.txt
├── requirements_dev.txt
├── research
    ├── mlflow_dir
    │   ├── argv_ex.py
    │   └── example.py
    ├── st_01.ipynb
    ├── st_02.ipynb
    ├── st_03.ipynb
    ├── st_04.ipynb
    ├── st_05_trails.ipynb
    ├── trials.ipynb
    └── trials.ipynbexample.py
├── scores.json
├── setup.cfg
├── setup.py
├── src
    └── deepClassifier
    │   ├── __init__.py
    │   ├── components
    │       ├── __init__.py
    │       ├── data_ingestion.py
    │       ├── evaluation.py
    │       ├── prepare_base_model.py
    │       ├── prepare_callback.py
    │       └── training.py
    │   ├── config
    │       ├── __init__.py
    │       └── configuration.py
    │   ├── constants
    │       └── __init__.py
    │   ├── entity
    │       ├── __init__.py
    │       └── config_entity.py
    │   ├── pipeline
    │       ├── __init__.py
    │       ├── stage_01_data_ingestion.py
    │       ├── stage_02_prepare_base_model.py
    │       ├── stage_03_training.py
    │       └── stage_04_evaluation.py
    │   └── utils
    │       ├── __init__.py
    │       └── common.py
├── template.py
├── tests
    ├── __init__.py
    ├── data
    │   ├── .gitkeep
    │   ├── demo.yaml
    │   ├── empty.yaml
    │   └── sample_data.zip
    ├── integration
    │   ├── __init__.py
    │   ├── test_data_ingestion.py
    │   └── test_int.py
    └── unit
    │   ├── __init__.py
    │   ├── test_data_ingestion.py
    │   └── test_utils.py
└── tox.ini


/.dvc/.gitignore:
--------------------------------------------------------------------------------
1 | /config.local
2 | /tmp
3 | /cache
4 | 


--------------------------------------------------------------------------------
/.dvc/config:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/c17hawke/FSDS_NOV_deepCNNClassifier/e54341d242a39256c54a90acdead18bc53031677/.dvc/config


--------------------------------------------------------------------------------
/.dvcignore:
--------------------------------------------------------------------------------
1 | # Add patterns of files dvc should ignore, which could improve
2 | # the performance. Learn more at
3 | # https://dvc.org/doc/user-guide/dvcignore
4 | 


--------------------------------------------------------------------------------
/.github/workflows/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/c17hawke/FSDS_NOV_deepCNNClassifier/e54341d242a39256c54a90acdead18bc53031677/.github/workflows/.gitkeep


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CV application
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ "main" ]
 6 |   pull_request:
 7 |     branches: [ "main" ]
 8 | 
 9 | permissions:
10 |   contents: read
11 | 
12 | jobs:
13 |   build:
14 | 
15 |     runs-on: ${{ matrix.os }}
16 |     strategy:
17 |       matrix:
18 |         os: [ubuntu-latest, windows-latest] 
19 |         python-version: ["3.8"]
20 | 
21 |     steps:
22 |     - uses: actions/checkout@v3
23 |     - name: Set up Python ${{ matrix.python-version }}
24 |       uses: actions/setup-python@v3
25 |       with:
26 |         python-version: ${{ matrix.python-version }}
27 |     - name: Install dependencies
28 |       run: |
29 |         python -m pip install --upgrade pip
30 |         pip install flake8 pytest tox tox-gh-actions
31 |         pip install -r requirements.txt
32 |     - name: Test with tox
33 |       run: tox
34 | 
35 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | 
132 | # vscode
133 | .vscode/
134 | *.jpg
135 | *.zip
136 | artifacts/prepare_base_model/base_model.h5
137 | artifacts/prepare_base_model/base_model_updated.h5
138 | artifacts/prepare_callbacks/checkpoint_dir/model.h5
139 | artifacts/prepare_callbacks/tensorboard_log_dir/tb_logs_at_2022-09-18-12-21-34/train/events.out.tfevents.1663483897.C17HAWKE.5048.0.v2
140 | artifacts/prepare_callbacks/tensorboard_log_dir/tb_logs_at_2022-09-18-12-21-34/validation/events.out.tfevents.1663484259.C17HAWKE.5048.1.v2
141 | artifacts/training/model.h5
142 | artifacts/prepare_callbacks/tensorboard_log_dir/tb_logs_at_2022-09-18-12-59-28/train/events.out.tfevents.1663486170.C17HAWKE.28200.0.v2
143 | artifacts/prepare_callbacks/tensorboard_log_dir/tb_logs_at_2022-09-24-11-00-35/train/events.out.tfevents.1663997438.C17HAWKE.2228.0.v2
144 | artifacts/prepare_callbacks/tensorboard_log_dir/tb_logs_at_2022-09-24-11-00-35/validation/events.out.tfevents.1663997774.C17HAWKE.2228.1.v2
145 | mlruns/*
146 | research/mlflow_dir/mlruns/*
147 | prediction_service/model.h5


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 SUNNY BHAVEEN CHANDRA
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # deep Classifier project
 2 | 
 3 | ## workflow
 4 | 
 5 | 1. Update config.yaml
 6 | 2. Update secrets.yaml [Optional]
 7 | 3. Update params.yaml
 8 | 4. Update the entity
 9 | 5. Update the configuration manager in src config.
10 | 6. Update the components
11 | 7. Update the pipeline
12 | 8. Test run pipeline stage
13 | 9. run tox for testing your package
14 | 10. Update the dvc.yaml
15 | 11. run "dvc repro" for running all the stages in pipeline
16 | 
17 | ![img](https://raw.githubusercontent.com/c17hawke/FSDS_NOV_deepCNNClassifier/main/docs/images/Data%20Ingestion%402x%20(1).png)
18 | 
19 | 
20 | STEP 1: Set the env variable | Get it from dagshub -> remote tab -> mlflow tab
21 | 
22 | MLFLOW_TRACKING_URI=https://dagshub.com/c17hawke/FSDS_NOV_deepCNNClassifier.mlflow \
23 | MLFLOW_TRACKING_USERNAME=c17hawke \
24 | MLFLOW_TRACKING_PASSWORD=<> \
25 | 
26 | STEP 2: install mlflow
27 | 
28 | STEP 3: Set remote URI
29 | 
30 | STEP 4: Use context manager of mlflow to start run and then log metrics, params and model
31 | 
32 | 
33 | ## Sample data for testing-
34 | https://raw.githubusercontent.com/c17hawke/raw_data/main/sample_data.zip


--------------------------------------------------------------------------------
/artifacts/.gitignore:
--------------------------------------------------------------------------------
1 | /prepare_base_model
2 | 


--------------------------------------------------------------------------------
/artifacts/data_ingestion/.gitignore:
--------------------------------------------------------------------------------
1 | /PetImages
2 | 


--------------------------------------------------------------------------------
/configs/config.yaml:
--------------------------------------------------------------------------------
 1 | artifacts_root: artifacts
 2 | 
 3 | 
 4 | data_ingestion:
 5 |   root_dir: artifacts/data_ingestion
 6 |   source_URL: https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_5340.zip
 7 |   local_data_file: artifacts/data_ingestion/data.zip
 8 |   unzip_dir: artifacts/data_ingestion
 9 | 
10 | prepare_base_model:
11 |   root_dir: artifacts/prepare_base_model
12 |   base_model_path: artifacts/prepare_base_model/base_model.h5
13 |   updated_base_model_path: artifacts/prepare_base_model/base_model_updated.h5
14 | 
15 | 
16 | prepare_callbacks:
17 |   root_dir: artifacts/prepare_callbacks
18 |   tensorboard_root_log_dir: artifacts/prepare_callbacks/tensorboard_log_dir
19 |   checkpoint_model_filepath: artifacts/prepare_callbacks/checkpoint_dir/model.h5
20 | 
21 | training:
22 |   root_dir: artifacts/training
23 |   trained_model_path: artifacts/training/model.h5
24 | 


--------------------------------------------------------------------------------
/docs/images/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/c17hawke/FSDS_NOV_deepCNNClassifier/e54341d242a39256c54a90acdead18bc53031677/docs/images/.gitkeep


--------------------------------------------------------------------------------
/docs/images/Data Ingestion@2x (1).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/c17hawke/FSDS_NOV_deepCNNClassifier/e54341d242a39256c54a90acdead18bc53031677/docs/images/Data Ingestion@2x (1).png


--------------------------------------------------------------------------------
/dvc.lock:
--------------------------------------------------------------------------------
  1 | schema: '2.0'
  2 | stages:
  3 |   data_ingestion:
  4 |     cmd: python src/deepClassifier/pipeline/stage_01_data_ingestion.py
  5 |     deps:
  6 |     - path: configs/config.yaml
  7 |       md5: 11cc5642ba2725648da24e6823250f7d
  8 |       size: 831
  9 |     - path: src/deepClassifier/pipeline/stage_01_data_ingestion.py
 10 |       md5: f172a70cf599c77bbef01c290e2c9f6c
 11 |       size: 717
 12 |     outs:
 13 |     - path: artifacts/data_ingestion/PetImages
 14 |       md5: 3a5cf86541f22c236620ce5b952ad684.dir
 15 |       size: 848866410
 16 |       nfiles: 24998
 17 |   prepare_base_model:
 18 |     cmd: python src/deepClassifier/pipeline/stage_02_prepare_base_model.py
 19 |     deps:
 20 |     - path: configs/config.yaml
 21 |       md5: 11cc5642ba2725648da24e6823250f7d
 22 |       size: 831
 23 |     - path: src/deepClassifier/pipeline/stage_02_prepare_base_model.py
 24 |       md5: 2d3e3ea4e6b3b70a34bb9e158e24c2ea
 25 |       size: 793
 26 |     params:
 27 |       params.yaml:
 28 |         CLASSES: 2
 29 |         IMAGE_SIZE:
 30 |         - 224
 31 |         - 224
 32 |         - 3
 33 |         INCLUDE_TOP: false
 34 |         LEARNING_RATE: 0.01
 35 |         WEIGHTS: imagenet
 36 |     outs:
 37 |     - path: artifacts/prepare_base_model
 38 |       md5: ab6c456706dce2780076b6ff43c99bcd.dir
 39 |       size: 118053368
 40 |       nfiles: 2
 41 |   training:
 42 |     cmd: python src/deepClassifier/pipeline/stage_03_training.py
 43 |     deps:
 44 |     - path: artifacts/data_ingestion/PetImages
 45 |       md5: 3a5cf86541f22c236620ce5b952ad684.dir
 46 |       size: 848866410
 47 |       nfiles: 24998
 48 |     - path: artifacts/prepare_base_model
 49 |       md5: ab6c456706dce2780076b6ff43c99bcd.dir
 50 |       size: 118053368
 51 |       nfiles: 2
 52 |     - path: configs/config.yaml
 53 |       md5: 11cc5642ba2725648da24e6823250f7d
 54 |       size: 831
 55 |     - path: src/deepClassifier/components/prepare_callback.py
 56 |       md5: b27788e9a2ef6b98bb6c03d9cd76ee48
 57 |       size: 916
 58 |     - path: src/deepClassifier/pipeline/stage_03_training.py
 59 |       md5: b544965e110cbf58357a4947c0120426
 60 |       size: 1005
 61 |     params:
 62 |       params.yaml:
 63 |         AUGMENTATION: true
 64 |         BATCH_SIZE: 16
 65 |         EPOCHS: 1
 66 |         IMAGE_SIZE:
 67 |         - 224
 68 |         - 224
 69 |         - 3
 70 |     outs:
 71 |     - path: artifacts/training/model.h5
 72 |       md5: 2b8c5b8d8c3c27a11cb57775fc840236
 73 |       size: 59135136
 74 |   evaluation:
 75 |     cmd: python src/deepClassifier/pipeline/stage_04_evaluation.py
 76 |     deps:
 77 |     - path: artifacts/data_ingestion/PetImages
 78 |       md5: 3a5cf86541f22c236620ce5b952ad684.dir
 79 |       size: 848866410
 80 |       nfiles: 24998
 81 |     - path: artifacts/training/model.h5
 82 |       md5: 2b8c5b8d8c3c27a11cb57775fc840236
 83 |       size: 59135136
 84 |     - path: configs/config.yaml
 85 |       md5: 11cc5642ba2725648da24e6823250f7d
 86 |       size: 831
 87 |     - path: src/deepClassifier/pipeline/stage_04_evaluation.py
 88 |       md5: 16274682083d12ad866d6e96b19cc5af
 89 |       size: 699
 90 |     params:
 91 |       params.yaml:
 92 |         BATCH_SIZE: 16
 93 |         IMAGE_SIZE:
 94 |         - 224
 95 |         - 224
 96 |         - 3
 97 |     outs:
 98 |     - path: scores.json
 99 |       md5: 2a2199d913c0e929d1f22b2c09aa8bb6
100 |       size: 73
101 | 


--------------------------------------------------------------------------------
/dvc.yaml:
--------------------------------------------------------------------------------
 1 | stages:
 2 |   data_ingestion:
 3 |     cmd: python src/deepClassifier/pipeline/stage_01_data_ingestion.py
 4 |     deps:
 5 |       - src/deepClassifier/pipeline/stage_01_data_ingestion.py
 6 |       - configs/config.yaml
 7 |     outs:
 8 |       - artifacts/data_ingestion/PetImages
 9 | 
10 |   prepare_base_model:
11 |     cmd: python src/deepClassifier/pipeline/stage_02_prepare_base_model.py
12 |     deps:
13 |       - src/deepClassifier/pipeline/stage_02_prepare_base_model.py
14 |       - configs/config.yaml
15 |     params:
16 |       - IMAGE_SIZE
17 |       - INCLUDE_TOP
18 |       - CLASSES
19 |       - WEIGHTS
20 |       - LEARNING_RATE
21 |     outs:
22 |       - artifacts/prepare_base_model
23 | 
24 | 
25 |   training:
26 |     cmd: python src/deepClassifier/pipeline/stage_03_training.py
27 |     deps:
28 |       - src/deepClassifier/pipeline/stage_03_training.py
29 |       - src/deepClassifier/components/prepare_callback.py
30 |       - configs/config.yaml
31 |       - artifacts/data_ingestion/PetImages
32 |       - artifacts/prepare_base_model
33 |     params:
34 |       - IMAGE_SIZE
35 |       - EPOCHS
36 |       - BATCH_SIZE
37 |       - AUGMENTATION
38 |     outs:
39 |       - artifacts/training/model.h5
40 | 
41 |   evaluation:
42 |     cmd: python src/deepClassifier/pipeline/stage_04_evaluation.py
43 |     deps:
44 |       - src/deepClassifier/pipeline/stage_04_evaluation.py
45 |       - configs/config.yaml
46 |       - artifacts/data_ingestion/PetImages
47 |       - artifacts/training/model.h5
48 |     params:
49 |       - IMAGE_SIZE
50 |       - BATCH_SIZE
51 |     metrics:
52 |     - scores.json:
53 |         cache: false


--------------------------------------------------------------------------------
/init_setup.sh:
--------------------------------------------------------------------------------
1 | echo [$(date)]: "START" 
2 | echo [$(date)]: "creating env with python 3.8 version" 
3 | conda create --prefix ./env python=3.8 -y
4 | echo [$(date)]: "activating the environment" 
5 | source activate ./env
6 | echo [$(date)]: "installing the dev requirements" 
7 | pip install -r requirements_dev.txt
8 | echo [$(date)]: "END" 


--------------------------------------------------------------------------------
/params.yaml:
--------------------------------------------------------------------------------
1 | AUGMENTATION: True
2 | IMAGE_SIZE: [224, 224, 3] # as per VGG 16 model
3 | BATCH_SIZE: 16
4 | INCLUDE_TOP: False
5 | EPOCHS: 1
6 | CLASSES: 2
7 | WEIGHTS: imagenet
8 | LEARNING_RATE: 0.01
9 | 


--------------------------------------------------------------------------------
/prediction_service/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.8-slim
2 | WORKDIR /app
3 | COPY . .
4 | RUN pip install -r requirements.txt
5 | CMD ["streamlit", "run", "app.py"]


--------------------------------------------------------------------------------
/prediction_service/app.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | from PIL import Image
 3 | import tensorflow as tf
 4 | import numpy as np
 5 | """
 6 | # deep Classifier project
 7 | 
 8 | """
 9 | model = tf.keras.models.load_model("model.h5")
10 | uploaded_file = st.file_uploader("Choose a file")
11 | if uploaded_file is not None:
12 |     # To read file as bytes:
13 | 
14 |     image = Image.open(uploaded_file)
15 |     img = image.resize((224,224))
16 |     img_array = np.array(img)
17 |     img_array = np.expand_dims(img_array, axis=0) # [batch_size, row, col, channel]
18 |     result = model.predict(img_array) # [[0.99, 0.01], [0.99, 0.01]]
19 | 
20 |     argmax_index = np.argmax(result, axis=1) # [0, 0]
21 |     if argmax_index[0] == 0:
22 |         st.image(image, caption="predicted: cat")
23 |     else:
24 |         st.image(image, caption='predicted: dog')
25 |       
26 |     


--------------------------------------------------------------------------------
/prediction_service/requirements.txt:
--------------------------------------------------------------------------------
1 | streamlit
2 | tensorflow


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ['setuptools>=42.0', "wheel"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [tool.pytest.ini_options]
 6 | testpaths = [
 7 |     "tests"
 8 |     ]
 9 | 
10 | [tool.mypy]
11 | mypy_path = "src"
12 | ignore_missing_imports = true


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | tensorflow
 2 | dvc
 3 | pandas
 4 | notebook
 5 | numpy
 6 | matplotlib
 7 | seaborn
 8 | python-box==6.0.2
 9 | pyYAML
10 | tqdm
11 | ensure==1.0.2
12 | joblib
13 | types-PyYAML
14 | scipy
15 | mlflow==1.26.1
16 | -e .
17 | 


--------------------------------------------------------------------------------
/requirements_dev.txt:
--------------------------------------------------------------------------------
 1 | tensorflow
 2 | dvc
 3 | pandas
 4 | notebook
 5 | numpy
 6 | matplotlib
 7 | seaborn
 8 | python-box==6.0.2
 9 | pyYAML
10 | tqdm
11 | ensure==1.0.2
12 | joblib
13 | types-PyYAML
14 | scipy
15 | mlflow==1.26.1
16 | -e .
17 | 
18 | ## Dev requirements
19 | pytest==7.1.3
20 | tox==3.25.1
21 | black==22.8.0
22 | flake8==5.0.4
23 | mypy==0.971


--------------------------------------------------------------------------------
/research/mlflow_dir/argv_ex.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | default = 0.3
 4 | 
 5 | alpha = float(sys.argv[1]) if len(sys.argv) > 1 else default
 6 | l1_ratio = float(sys.argv[2]) if len(sys.argv) > 2 else default
 7 | 
 8 | args = sys.argv
 9 | print(args)
10 | print(alpha, l1_ratio)
11 | 
12 | # python filename.py 0.6 0.7


--------------------------------------------------------------------------------
/research/mlflow_dir/example.py:
--------------------------------------------------------------------------------
 1 | # The data set used in this example is from http://archive.ics.uci.edu/ml/datasets/Wine+Quality
 2 | # P. Cortez, A. Cerdeira, F. Almeida, T. Matos and J. Reis.
 3 | # Modeling wine preferences by data mining from physicochemical properties. In Decision Support Systems, Elsevier, 47(4):547-553, 2009.
 4 | 
 5 | import os
 6 | import warnings
 7 | import sys
 8 | 
 9 | import pandas as pd
10 | import numpy as np
11 | from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
12 | from sklearn.model_selection import train_test_split
13 | from sklearn.linear_model import ElasticNet
14 | from urllib.parse import urlparse
15 | import mlflow
16 | import mlflow.sklearn
17 | 
18 | import logging
19 | 
20 | logging.basicConfig(level=logging.WARN)
21 | logger = logging.getLogger(__name__)
22 | 
23 | 
24 | def eval_metrics(actual, pred):
25 |     rmse = np.sqrt(mean_squared_error(actual, pred))
26 |     mae = mean_absolute_error(actual, pred)
27 |     r2 = r2_score(actual, pred)
28 |     return rmse, mae, r2
29 | 
30 | 
31 | if __name__ == "__main__":
32 |     warnings.filterwarnings("ignore")
33 |     np.random.seed(40)
34 | 
35 |     # Read the wine-quality csv file from the URL
36 |     csv_url = (
37 |         "http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
38 |     )
39 |     try:
40 |         data = pd.read_csv(csv_url, sep=";")
41 |     except Exception as e:
42 |         logger.exception(
43 |             "Unable to download training & test CSV, check your internet connection. Error: %s", e
44 |         )
45 | 
46 |     # Split the data into training and test sets. (0.75, 0.25) split.
47 |     train, test = train_test_split(data)
48 | 
49 |     # The predicted column is "quality" which is a scalar from [3, 9]
50 |     train_x = train.drop(["quality"], axis=1)
51 |     test_x = test.drop(["quality"], axis=1)
52 |     train_y = train[["quality"]]
53 |     test_y = test[["quality"]]
54 | 
55 |     alpha = float(sys.argv[1]) if len(sys.argv) > 1 else 0.5
56 |     l1_ratio = float(sys.argv[2]) if len(sys.argv) > 2 else 0.5
57 | 
58 |     with mlflow.start_run():
59 |         lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)
60 |         lr.fit(train_x, train_y)
61 | 
62 |         predicted_qualities = lr.predict(test_x)
63 | 
64 |         (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)
65 | 
66 |         print("Elasticnet model (alpha=%f, l1_ratio=%f):" % (alpha, l1_ratio))
67 |         print("  RMSE: %s" % rmse)
68 |         print("  MAE: %s" % mae)
69 |         print("  R2: %s" % r2)
70 | 
71 |         mlflow.log_param("alpha", alpha)
72 |         mlflow.log_param("l1_ratio", l1_ratio)
73 |         mlflow.log_metric("rmse", rmse)
74 |         mlflow.log_metric("r2", r2)
75 |         mlflow.log_metric("mae", mae)
76 |         
77 |         remote_server_uri = "https://dagshub.com/c17hawke/FSDS_NOV_deepCNNClassifier.mlflow"
78 |         mlflow.set_tracking_uri(remote_server_uri)
79 |         tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
80 | 
81 |         # Model registry does not work with file store
82 |         if tracking_url_type_store != "file":
83 | 
84 |             # Register the model
85 |             # There are other ways to use the Model Registry, which depends on the use case,
86 |             # please refer to the doc for more information:
87 |             # https://mlflow.org/docs/latest/model-registry.html#api-workflow
88 |             mlflow.sklearn.log_model(lr, "model", registered_model_name="ElasticnetWineModel")
89 |         else:
90 |             mlflow.sklearn.log_model(lr, "model")


--------------------------------------------------------------------------------
/research/st_01.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from collections import namedtuple\n",
 10 |     "import os\n",
 11 |     "os.chdir(\"../\")"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": 2,
 17 |    "metadata": {},
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "DataIngestionConfig = namedtuple(\"DataIngestionConfig\", [\n",
 21 |     "    \"root_dir\",\n",
 22 |     "    \"source_URL\",\n",
 23 |     "    \"local_data_file\",\n",
 24 |     "    \"unzip_dir\"\n",
 25 |     "])"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": 3,
 31 |    "metadata": {},
 32 |    "outputs": [],
 33 |    "source": [
 34 |     "from dataclasses import dataclass\n",
 35 |     "from pathlib import Path\n",
 36 |     "\n",
 37 |     "\n",
 38 |     "@dataclass(frozen=True)\n",
 39 |     "class DataIngestionConfig:\n",
 40 |     "    root_dir: Path\n",
 41 |     "    source_URL: str\n",
 42 |     "    local_data_file: Path\n",
 43 |     "    unzip_dir: Path"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": 4,
 49 |    "metadata": {},
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "from deepClassifier.constants import *\n",
 53 |     "from deepClassifier.utils import read_yaml, create_directories"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": 5,
 59 |    "metadata": {},
 60 |    "outputs": [],
 61 |    "source": [
 62 |     "class ConfigurationManager:\n",
 63 |     "    def __init__(\n",
 64 |     "        self, \n",
 65 |     "        config_filepath = CONFIG_FILE_PATH,\n",
 66 |     "        params_filepath = PARAMS_FILE_PATH):\n",
 67 |     "        self.config = read_yaml(config_filepath)\n",
 68 |     "        self.params = read_yaml(params_filepath)\n",
 69 |     "        create_directories([self.config.artifacts_root])\n",
 70 |     "\n",
 71 |     "    def get_data_ingestion_config(self) -> DataIngestionConfig:\n",
 72 |     "        config = self.config.data_ingestion\n",
 73 |     "        \n",
 74 |     "        create_directories([config.root_dir])\n",
 75 |     "\n",
 76 |     "        data_ingestion_config = DataIngestionConfig(\n",
 77 |     "            root_dir=config.root_dir,\n",
 78 |     "            source_URL=config.source_URL,\n",
 79 |     "            local_data_file=config.local_data_file,\n",
 80 |     "            unzip_dir=config.unzip_dir \n",
 81 |     "        )\n",
 82 |     "\n",
 83 |     "        return data_ingestion_config"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "code",
 88 |    "execution_count": 6,
 89 |    "metadata": {},
 90 |    "outputs": [],
 91 |    "source": [
 92 |     "import os\n",
 93 |     "import urllib.request as request\n",
 94 |     "from zipfile import ZipFile\n",
 95 |     "\n",
 96 |     "class DataIngestion:\n",
 97 |     "    def __init__(self, config: DataIngestionConfig):\n",
 98 |     "        self.config = config\n",
 99 |     "\n",
100 |     "    def download_file(self):\n",
101 |     "        if not os.path.exists(self.config.local_data_file):\n",
102 |     "            filename, headers = request.urlretrieve(\n",
103 |     "                url = self.config.source_URL,\n",
104 |     "                filename = self.config.local_data_file\n",
105 |     "            )\n",
106 |     "\n",
107 |     "    def _get_updated_list_of_files(self, list_of_files):\n",
108 |     "        return [f for f in list_of_files if f.endswith(\".jpg\") and (\"Cat\" in f or \"Dog\" in f)]\n",
109 |     "\n",
110 |     "    def _preprocess(self, zf: ZipFile, f: str, working_dir: str):\n",
111 |     "        target_filepath = os.path.join(working_dir, f)\n",
112 |     "        if not os.path.exists(target_filepath):\n",
113 |     "            zf.extract(f, working_dir)\n",
114 |     "        \n",
115 |     "        if os.path.getsize(target_filepath) == 0:\n",
116 |     "            os.remove(target_filepath)\n",
117 |     "\n",
118 |     "    def unzip_and_clean(self):\n",
119 |     "        with ZipFile(file=self.config.local_data_file, mode=\"r\") as zf:\n",
120 |     "            list_of_files = zf.namelist()\n",
121 |     "            updated_list_of_files = self._get_updated_list_of_files(list_of_files)\n",
122 |     "            for f in updated_list_of_files:\n",
123 |     "                self._preprocess(zf, f, self.config.unzip_dir)"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": 7,
129 |    "metadata": {},
130 |    "outputs": [
131 |     {
132 |      "name": "stdout",
133 |      "output_type": "stream",
134 |      "text": [
135 |       "[2022-09-17 10:34:33,486: INFO: common]: yaml file: configs\\config.yaml loaded successfully\n",
136 |       "[2022-09-17 10:34:33,489: INFO: common]: yaml file: params.yaml loaded successfully\n",
137 |       "[2022-09-17 10:34:33,491: INFO: common]: created directory at: artifacts\n",
138 |       "[2022-09-17 10:34:33,493: INFO: common]: created directory at: artifacts/data_ingestion\n"
139 |      ]
140 |     }
141 |    ],
142 |    "source": [
143 |     "try:\n",
144 |     "    config = ConfigurationManager()\n",
145 |     "    data_ingestion_config = config.get_data_ingestion_config()\n",
146 |     "    data_ingestion = DataIngestion(config=data_ingestion_config)\n",
147 |     "    data_ingestion.download_file()\n",
148 |     "    data_ingestion.unzip_and_clean()\n",
149 |     "except Exception as e:\n",
150 |     "    raise e"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "code",
155 |    "execution_count": null,
156 |    "metadata": {},
157 |    "outputs": [],
158 |    "source": []
159 |   }
160 |  ],
161 |  "metadata": {
162 |   "kernelspec": {
163 |    "display_name": "Python 3.8.13 (conda)",
164 |    "language": "python",
165 |    "name": "python3"
166 |   },
167 |   "language_info": {
168 |    "codemirror_mode": {
169 |     "name": "ipython",
170 |     "version": 3
171 |    },
172 |    "file_extension": ".py",
173 |    "mimetype": "text/x-python",
174 |    "name": "python",
175 |    "nbconvert_exporter": "python",
176 |    "pygments_lexer": "ipython3",
177 |    "version": "3.8.13"
178 |   },
179 |   "orig_nbformat": 4,
180 |   "vscode": {
181 |    "interpreter": {
182 |     "hash": "37206e62ba8fc6a6f0b961435078c80557a69ac7a5dd6249b6601b5385f5de67"
183 |    }
184 |   }
185 |  },
186 |  "nbformat": 4,
187 |  "nbformat_minor": 2
188 | }
189 | 


--------------------------------------------------------------------------------
/research/st_02.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import os\n",
 10 |     "os.chdir(\"../\")"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 16,
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "from dataclasses import dataclass\n",
 20 |     "from pathlib import Path\n",
 21 |     "\n",
 22 |     "\n",
 23 |     "@dataclass(frozen=True)\n",
 24 |     "class PrepareBaseModelConfig:\n",
 25 |     "    root_dir: Path\n",
 26 |     "    base_model_path: Path\n",
 27 |     "    updated_base_model_path: Path\n",
 28 |     "    params_image_size: list\n",
 29 |     "    params_learning_rate: float\n",
 30 |     "    params_include_top: bool\n",
 31 |     "    params_weights: str\n",
 32 |     "    params_classes: int"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": 17,
 38 |    "metadata": {},
 39 |    "outputs": [],
 40 |    "source": [
 41 |     "from deepClassifier.constants import *\n",
 42 |     "from deepClassifier.utils import read_yaml, create_directories"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": 18,
 48 |    "metadata": {},
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "class ConfigurationManager:\n",
 52 |     "    def __init__(\n",
 53 |     "        self, \n",
 54 |     "        config_filepath = CONFIG_FILE_PATH,\n",
 55 |     "        params_filepath = PARAMS_FILE_PATH):\n",
 56 |     "        self.config = read_yaml(config_filepath)\n",
 57 |     "        self.params = read_yaml(params_filepath)\n",
 58 |     "        create_directories([self.config.artifacts_root])\n",
 59 |     "\n",
 60 |     "    def get_prepare_base_model_config(self) -> PrepareBaseModelConfig:\n",
 61 |     "        config = self.config.prepare_base_model\n",
 62 |     "        \n",
 63 |     "        create_directories([config.root_dir])\n",
 64 |     "\n",
 65 |     "        prepare_base_model_config = PrepareBaseModelConfig(\n",
 66 |     "            root_dir=Path(config.root_dir),\n",
 67 |     "            base_model_path=Path(config.base_model_path),\n",
 68 |     "            updated_base_model_path=Path(config.updated_base_model_path),\n",
 69 |     "            params_image_size=self.params.IMAGE_SIZE,\n",
 70 |     "            params_learning_rate=self.params.LEARNING_RATE,\n",
 71 |     "            params_include_top=self.params.INCLUDE_TOP,\n",
 72 |     "            params_weights=self.params.WEIGHTS,\n",
 73 |     "            params_classes=self.params.CLASSES\n",
 74 |     "        )\n",
 75 |     "\n",
 76 |     "        return prepare_base_model_config"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": 23,
 82 |    "metadata": {},
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "import os\n",
 86 |     "import urllib.request as request\n",
 87 |     "from zipfile import ZipFile\n",
 88 |     "import tensorflow as tf\n",
 89 |     "\n",
 90 |     "class PrepareBaseModel:\n",
 91 |     "    def __init__(self, config: PrepareBaseModelConfig):\n",
 92 |     "        self.config = config\n",
 93 |     "\n",
 94 |     "    def get_base_model(self):\n",
 95 |     "        self.model = tf.keras.applications.vgg16.VGG16(\n",
 96 |     "            input_shape=self.config.params_image_size,\n",
 97 |     "            weights=self.config.params_weights,\n",
 98 |     "            include_top=self.config.params_include_top\n",
 99 |     "        )\n",
100 |     "\n",
101 |     "        self.save_model(path=self.config.base_model_path, model=self.model)\n",
102 |     "\n",
103 |     "\n",
104 |     "    @staticmethod\n",
105 |     "    def _prepare_full_model(model, classes, freeze_all, freeze_till, learning_rate):\n",
106 |     "        if freeze_all:\n",
107 |     "            for layer in model.layers:\n",
108 |     "                model.trainable = False\n",
109 |     "        elif (freeze_till is not None) and (freeze_till > 0):\n",
110 |     "            for layer in model.layers[:-freeze_till]:\n",
111 |     "                model.trainable = False\n",
112 |     "\n",
113 |     "        flatten_in = tf.keras.layers.Flatten()(model.output)\n",
114 |     "        prediction = tf.keras.layers.Dense(\n",
115 |     "            units=classes,\n",
116 |     "            activation=\"softmax\"\n",
117 |     "        )(flatten_in)\n",
118 |     "\n",
119 |     "        full_model = tf.keras.models.Model(\n",
120 |     "            inputs=model.input,\n",
121 |     "            outputs=prediction\n",
122 |     "        )\n",
123 |     "\n",
124 |     "        full_model.compile(\n",
125 |     "            optimizer=tf.keras.optimizers.SGD(learning_rate=learning_rate),\n",
126 |     "            loss=tf.keras.losses.CategoricalCrossentropy(),\n",
127 |     "            metrics=[\"accuracy\"]\n",
128 |     "        )\n",
129 |     "\n",
130 |     "        full_model.summary()\n",
131 |     "        return full_model\n",
132 |     "\n",
133 |     "    def update_base_model(self):\n",
134 |     "        self.full_model = self._prepare_full_model(\n",
135 |     "            model=self.model,\n",
136 |     "            classes=self.config.params_classes,\n",
137 |     "            freeze_all=True,\n",
138 |     "            freeze_till=None,\n",
139 |     "            learning_rate=self.config.params_learning_rate\n",
140 |     "        )\n",
141 |     "\n",
142 |     "        self.save_model(path=self.config.updated_base_model_path, model=self.full_model)\n",
143 |     "\n",
144 |     "    @staticmethod\n",
145 |     "    def save_model(path: Path, model: tf.keras.Model):\n",
146 |     "        model.save(path)\n",
147 |     "\n"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "code",
152 |    "execution_count": 24,
153 |    "metadata": {},
154 |    "outputs": [
155 |     {
156 |      "name": "stdout",
157 |      "output_type": "stream",
158 |      "text": [
159 |       "Model: \"model_1\"\n",
160 |       "_________________________________________________________________\n",
161 |       " Layer (type)                Output Shape              Param #   \n",
162 |       "=================================================================\n",
163 |       " input_3 (InputLayer)        [(None, 224, 224, 3)]     0         \n",
164 |       "                                                                 \n",
165 |       " block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      \n",
166 |       "                                                                 \n",
167 |       " block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     \n",
168 |       "                                                                 \n",
169 |       " block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         \n",
170 |       "                                                                 \n",
171 |       " block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     \n",
172 |       "                                                                 \n",
173 |       " block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    \n",
174 |       "                                                                 \n",
175 |       " block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0         \n",
176 |       "                                                                 \n",
177 |       " block3_conv1 (Conv2D)       (None, 56, 56, 256)       295168    \n",
178 |       "                                                                 \n",
179 |       " block3_conv2 (Conv2D)       (None, 56, 56, 256)       590080    \n",
180 |       "                                                                 \n",
181 |       " block3_conv3 (Conv2D)       (None, 56, 56, 256)       590080    \n",
182 |       "                                                                 \n",
183 |       " block3_pool (MaxPooling2D)  (None, 28, 28, 256)       0         \n",
184 |       "                                                                 \n",
185 |       " block4_conv1 (Conv2D)       (None, 28, 28, 512)       1180160   \n",
186 |       "                                                                 \n",
187 |       " block4_conv2 (Conv2D)       (None, 28, 28, 512)       2359808   \n",
188 |       "                                                                 \n",
189 |       " block4_conv3 (Conv2D)       (None, 28, 28, 512)       2359808   \n",
190 |       "                                                                 \n",
191 |       " block4_pool (MaxPooling2D)  (None, 14, 14, 512)       0         \n",
192 |       "                                                                 \n",
193 |       " block5_conv1 (Conv2D)       (None, 14, 14, 512)       2359808   \n",
194 |       "                                                                 \n",
195 |       " block5_conv2 (Conv2D)       (None, 14, 14, 512)       2359808   \n",
196 |       "                                                                 \n",
197 |       " block5_conv3 (Conv2D)       (None, 14, 14, 512)       2359808   \n",
198 |       "                                                                 \n",
199 |       " block5_pool (MaxPooling2D)  (None, 7, 7, 512)         0         \n",
200 |       "                                                                 \n",
201 |       " flatten_2 (Flatten)         (None, 25088)             0         \n",
202 |       "                                                                 \n",
203 |       " dense_2 (Dense)             (None, 2)                 50178     \n",
204 |       "                                                                 \n",
205 |       "=================================================================\n",
206 |       "Total params: 14,764,866\n",
207 |       "Trainable params: 50,178\n",
208 |       "Non-trainable params: 14,714,688\n",
209 |       "_________________________________________________________________\n"
210 |      ]
211 |     }
212 |    ],
213 |    "source": [
214 |     "try:\n",
215 |     "    config = ConfigurationManager()\n",
216 |     "    prepare_base_model_config = config.get_prepare_base_model_config()\n",
217 |     "    prepare_base_model = PrepareBaseModel(config=prepare_base_model_config)\n",
218 |     "    prepare_base_model.get_base_model()\n",
219 |     "    prepare_base_model.update_base_model()\n",
220 |     "except Exception as e:\n",
221 |     "    raise e"
222 |    ]
223 |   },
224 |   {
225 |    "cell_type": "code",
226 |    "execution_count": 15,
227 |    "metadata": {},
228 |    "outputs": [
229 |     {
230 |      "data": {
231 |       "text/plain": [
232 |        "4"
233 |       ]
234 |      },
235 |      "execution_count": 15,
236 |      "metadata": {},
237 |      "output_type": "execute_result"
238 |     }
239 |    ],
240 |    "source": [
241 |     "class Power:\n",
242 |     "    def __init__(self,x):\n",
243 |     "        self.x = x\n",
244 |     "\n",
245 |     "    def __call__(self, factor):\n",
246 |     "        return self.x**factor\n",
247 |     "\n",
248 |     "    def __call__(self, factor):\n",
249 |     "        return self.x\n",
250 |     "\n",
251 |     "    def square(self):\n",
252 |     "        return self.x**2\n",
253 |     "\n",
254 |     "\n",
255 |     "result = Power(4)(3)\n",
256 |     "result"
257 |    ]
258 |   },
259 |   {
260 |    "cell_type": "code",
261 |    "execution_count": 14,
262 |    "metadata": {},
263 |    "outputs": [
264 |     {
265 |      "data": {
266 |       "text/plain": [
267 |        "64"
268 |       ]
269 |      },
270 |      "execution_count": 14,
271 |      "metadata": {},
272 |      "output_type": "execute_result"
273 |     }
274 |    ],
275 |    "source": [
276 |     "result = Power(4)\n",
277 |     "result(3)"
278 |    ]
279 |   },
280 |   {
281 |    "cell_type": "code",
282 |    "execution_count": 13,
283 |    "metadata": {},
284 |    "outputs": [
285 |     {
286 |      "data": {
287 |       "text/plain": [
288 |        "16"
289 |       ]
290 |      },
291 |      "execution_count": 13,
292 |      "metadata": {},
293 |      "output_type": "execute_result"
294 |     }
295 |    ],
296 |    "source": [
297 |     "\n",
298 |     "result = Power(4)\n",
299 |     "result.square()"
300 |    ]
301 |   },
302 |   {
303 |    "cell_type": "code",
304 |    "execution_count": null,
305 |    "metadata": {},
306 |    "outputs": [],
307 |    "source": []
308 |   }
309 |  ],
310 |  "metadata": {
311 |   "kernelspec": {
312 |    "display_name": "Python 3.8.13",
313 |    "language": "python",
314 |    "name": "python3"
315 |   },
316 |   "language_info": {
317 |    "codemirror_mode": {
318 |     "name": "ipython",
319 |     "version": 3
320 |    },
321 |    "file_extension": ".py",
322 |    "mimetype": "text/x-python",
323 |    "name": "python",
324 |    "nbconvert_exporter": "python",
325 |    "pygments_lexer": "ipython3",
326 |    "version": "3.8.13"
327 |   },
328 |   "orig_nbformat": 4,
329 |   "vscode": {
330 |    "interpreter": {
331 |     "hash": "37206e62ba8fc6a6f0b961435078c80557a69ac7a5dd6249b6601b5385f5de67"
332 |    }
333 |   }
334 |  },
335 |  "nbformat": 4,
336 |  "nbformat_minor": 2
337 | }
338 | 


--------------------------------------------------------------------------------
/research/st_03.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import os\n",
 10 |     "os.chdir(\"../\")"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 2,
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "from dataclasses import dataclass\n",
 20 |     "from pathlib import Path\n",
 21 |     "\n",
 22 |     "\n",
 23 |     "@dataclass(frozen=True)\n",
 24 |     "class PrepareCallbacksConfig:\n",
 25 |     "    root_dir: Path\n",
 26 |     "    tensorboard_root_log_dir: Path\n",
 27 |     "    checkpoint_model_filepath: Path"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": 3,
 33 |    "metadata": {},
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "from deepClassifier.constants import *\n",
 37 |     "from deepClassifier.utils import read_yaml, create_directories"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": 4,
 43 |    "metadata": {},
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "class ConfigurationManager:\n",
 47 |     "    def __init__(\n",
 48 |     "        self, \n",
 49 |     "        config_filepath = CONFIG_FILE_PATH,\n",
 50 |     "        params_filepath = PARAMS_FILE_PATH):\n",
 51 |     "        self.config = read_yaml(config_filepath)\n",
 52 |     "        self.params = read_yaml(params_filepath)\n",
 53 |     "        create_directories([self.config.artifacts_root])\n",
 54 |     "\n",
 55 |     "    def get_prepare_callback_config(self) -> PrepareCallbacksConfig:\n",
 56 |     "        config = self.config.prepare_callbacks\n",
 57 |     "        model_ckpt_dir = os.path.dirname(config.checkpoint_model_filepath)\n",
 58 |     "        create_directories([\n",
 59 |     "            Path(model_ckpt_dir),\n",
 60 |     "            Path(config.tensorboard_root_log_dir)\n",
 61 |     "        ])\n",
 62 |     "\n",
 63 |     "        prepare_callback_config = PrepareCallbacksConfig(\n",
 64 |     "            root_dir=Path(config.root_dir),\n",
 65 |     "            tensorboard_root_log_dir=Path(config.tensorboard_root_log_dir),\n",
 66 |     "            checkpoint_model_filepath=Path(config.checkpoint_model_filepath)\n",
 67 |     "        )\n",
 68 |     "\n",
 69 |     "        return prepare_callback_config"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": 8,
 75 |    "metadata": {},
 76 |    "outputs": [],
 77 |    "source": [
 78 |     "import os\n",
 79 |     "import urllib.request as request\n",
 80 |     "from zipfile import ZipFile\n",
 81 |     "import tensorflow as tf\n",
 82 |     "import time\n",
 83 |     "\n",
 84 |     "class PrepareCallback:\n",
 85 |     "    def __init__(self, config: PrepareCallbacksConfig):\n",
 86 |     "        self.config = config\n",
 87 |     "\n",
 88 |     "    @property\n",
 89 |     "    def _create_tb_callbacks(self):\n",
 90 |     "        timestamp = time.strftime(\"%Y-%m-%d-%H-%M-%S\")\n",
 91 |     "        tb_running_log_dir = os.path.join(\n",
 92 |     "            self.config.tensorboard_root_log_dir,\n",
 93 |     "            f\"tb_logs_at_{timestamp}\",\n",
 94 |     "        )\n",
 95 |     "        return tf.keras.callbacks.TensorBoard(log_dir=tb_running_log_dir)\n",
 96 |     "\n",
 97 |     "    @property\n",
 98 |     "    def _create_ckpt_callbacks(self):\n",
 99 |     "        return tf.keras.callbacks.ModelCheckpoint(\n",
100 |     "            filepath=self.config.checkpoint_model_filepath,\n",
101 |     "            save_best_only=True\n",
102 |     "        )\n",
103 |     "\n",
104 |     "    def get_tb_ckpt_callbacks(self):\n",
105 |     "        return [\n",
106 |     "            self._create_tb_callbacks,\n",
107 |     "            self._create_ckpt_callbacks\n",
108 |     "        ]\n",
109 |     "\n"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "code",
114 |    "execution_count": 9,
115 |    "metadata": {},
116 |    "outputs": [],
117 |    "source": [
118 |     "try:\n",
119 |     "    config = ConfigurationManager()\n",
120 |     "    prepare_callbacks_config = config.get_prepare_callback_config()\n",
121 |     "    prepare_callbacks = PrepareCallback(config=prepare_callbacks_config)\n",
122 |     "    callback_list = prepare_callbacks.get_tb_ckpt_callbacks()\n",
123 |     "    \n",
124 |     "except Exception as e:\n",
125 |     "    raise e"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "code",
130 |    "execution_count": null,
131 |    "metadata": {},
132 |    "outputs": [
133 |     {
134 |      "data": {
135 |       "text/plain": [
136 |        "'x/y'"
137 |       ]
138 |      },
139 |      "execution_count": 7,
140 |      "metadata": {},
141 |      "output_type": "execute_result"
142 |     }
143 |    ],
144 |    "source": [
145 |     "import os\n",
146 |     "os.path.dirname(\"x/y/z.txt\")"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "code",
151 |    "execution_count": 7,
152 |    "metadata": {},
153 |    "outputs": [
154 |     {
155 |      "data": {
156 |       "text/plain": [
157 |        "'tb_logs_at_2022-09-18-10-48-16'"
158 |       ]
159 |      },
160 |      "execution_count": 7,
161 |      "metadata": {},
162 |      "output_type": "execute_result"
163 |     }
164 |    ],
165 |    "source": [
166 |     "import time\n",
167 |     "teimstamp = time.strftime(\"%Y-%m-%d-%H-%M-%S\")\n",
168 |     "f\"tb_logs_at_{teimstamp}\""
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "code",
173 |    "execution_count": null,
174 |    "metadata": {},
175 |    "outputs": [],
176 |    "source": []
177 |   }
178 |  ],
179 |  "metadata": {
180 |   "kernelspec": {
181 |    "display_name": "Python 3.8.13 64-bit",
182 |    "language": "python",
183 |    "name": "python3"
184 |   },
185 |   "language_info": {
186 |    "codemirror_mode": {
187 |     "name": "ipython",
188 |     "version": 3
189 |    },
190 |    "file_extension": ".py",
191 |    "mimetype": "text/x-python",
192 |    "name": "python",
193 |    "nbconvert_exporter": "python",
194 |    "pygments_lexer": "ipython3",
195 |    "version": "3.8.13"
196 |   },
197 |   "orig_nbformat": 4,
198 |   "vscode": {
199 |    "interpreter": {
200 |     "hash": "37206e62ba8fc6a6f0b961435078c80557a69ac7a5dd6249b6601b5385f5de67"
201 |    }
202 |   }
203 |  },
204 |  "nbformat": 4,
205 |  "nbformat_minor": 2
206 | }
207 | 


--------------------------------------------------------------------------------
/research/st_04.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import os\n",
 10 |     "os.chdir(\"../\")"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 2,
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "from dataclasses import dataclass\n",
 20 |     "from pathlib import Path\n",
 21 |     "\n",
 22 |     "\n",
 23 |     "@dataclass(frozen=True)\n",
 24 |     "class TrainingConfig:\n",
 25 |     "    root_dir: Path\n",
 26 |     "    trained_model_path: Path\n",
 27 |     "    updated_base_model_path: Path\n",
 28 |     "    training_data: Path\n",
 29 |     "    params_epochs: int\n",
 30 |     "    params_batch_size: int\n",
 31 |     "    params_is_augmentation: bool\n",
 32 |     "    params_image_size: list\n",
 33 |     "\n",
 34 |     "@dataclass(frozen=True)\n",
 35 |     "class PrepareCallbacksConfig:\n",
 36 |     "    root_dir: Path\n",
 37 |     "    tensorboard_root_log_dir: Path\n",
 38 |     "    checkpoint_model_filepath: Path"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": 3,
 44 |    "metadata": {},
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "from deepClassifier.constants import *\n",
 48 |     "from deepClassifier.utils import read_yaml, create_directories\n",
 49 |     "import tensorflow as tf"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": 4,
 55 |    "metadata": {},
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "class ConfigurationManager:\n",
 59 |     "    def __init__(\n",
 60 |     "        self, \n",
 61 |     "        config_filepath = CONFIG_FILE_PATH,\n",
 62 |     "        params_filepath = PARAMS_FILE_PATH):\n",
 63 |     "        self.config = read_yaml(config_filepath)\n",
 64 |     "        self.params = read_yaml(params_filepath)\n",
 65 |     "        create_directories([self.config.artifacts_root])\n",
 66 |     "\n",
 67 |     "    def get_prepare_callback_config(self) -> PrepareCallbacksConfig:\n",
 68 |     "        config = self.config.prepare_callbacks\n",
 69 |     "        model_ckpt_dir = os.path.dirname(config.checkpoint_model_filepath)\n",
 70 |     "        create_directories([\n",
 71 |     "            Path(model_ckpt_dir),\n",
 72 |     "            Path(config.tensorboard_root_log_dir)\n",
 73 |     "        ])\n",
 74 |     "\n",
 75 |     "        prepare_callback_config = PrepareCallbacksConfig(\n",
 76 |     "            root_dir=Path(config.root_dir),\n",
 77 |     "            tensorboard_root_log_dir=Path(config.tensorboard_root_log_dir),\n",
 78 |     "            checkpoint_model_filepath=Path(config.checkpoint_model_filepath)\n",
 79 |     "        )\n",
 80 |     "\n",
 81 |     "        return prepare_callback_config\n",
 82 |     "\n",
 83 |     "    def get_training_config(self) -> TrainingConfig:\n",
 84 |     "        training = self.config.training\n",
 85 |     "        prepare_base_model = self.config.prepare_base_model\n",
 86 |     "        params = self.params\n",
 87 |     "        training_data = os.path.join(self.config.data_ingestion.unzip_dir, \"PetImages\")\n",
 88 |     "        create_directories([\n",
 89 |     "            Path(training.root_dir)\n",
 90 |     "        ])\n",
 91 |     "\n",
 92 |     "        training_config = TrainingConfig(\n",
 93 |     "            root_dir=Path(training.root_dir),\n",
 94 |     "            trained_model_path=Path(training.trained_model_path),\n",
 95 |     "            updated_base_model_path=Path(prepare_base_model.updated_base_model_path),\n",
 96 |     "            training_data=Path(training_data),\n",
 97 |     "            params_epochs=params.EPOCHS,\n",
 98 |     "            params_batch_size=params.BATCH_SIZE,\n",
 99 |     "            params_is_augmentation=params.AUGMENTATION,\n",
100 |     "            params_image_size=params.IMAGE_SIZE\n",
101 |     "        )\n",
102 |     "\n",
103 |     "        return training_config"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": 5,
109 |    "metadata": {},
110 |    "outputs": [],
111 |    "source": [
112 |     "import time\n",
113 |     "\n",
114 |     "class PrepareCallback:\n",
115 |     "    def __init__(self, config: PrepareCallbacksConfig):\n",
116 |     "        self.config = config\n",
117 |     "\n",
118 |     "    @property\n",
119 |     "    def _create_tb_callbacks(self):\n",
120 |     "        timestamp = time.strftime(\"%Y-%m-%d-%H-%M-%S\")\n",
121 |     "        tb_running_log_dir = os.path.join(\n",
122 |     "            self.config.tensorboard_root_log_dir,\n",
123 |     "            f\"tb_logs_at_{timestamp}\",\n",
124 |     "        )\n",
125 |     "        return tf.keras.callbacks.TensorBoard(log_dir=tb_running_log_dir)\n",
126 |     "\n",
127 |     "    @property\n",
128 |     "    def _create_ckpt_callbacks(self):\n",
129 |     "        return tf.keras.callbacks.ModelCheckpoint(\n",
130 |     "            filepath=self.config.checkpoint_model_filepath,\n",
131 |     "            save_best_only=True\n",
132 |     "        )\n",
133 |     "\n",
134 |     "    def get_tb_ckpt_callbacks(self):\n",
135 |     "        return [\n",
136 |     "            self._create_tb_callbacks,\n",
137 |     "            self._create_ckpt_callbacks\n",
138 |     "        ]\n",
139 |     "\n"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": 6,
145 |    "metadata": {},
146 |    "outputs": [],
147 |    "source": [
148 |     "import os\n",
149 |     "import urllib.request as request\n",
150 |     "from zipfile import ZipFile\n",
151 |     "import tensorflow as tf\n",
152 |     "import time\n",
153 |     "\n",
154 |     "class Training:\n",
155 |     "    def __init__(self, config: TrainingConfig):\n",
156 |     "        self.config = config\n",
157 |     "\n",
158 |     "    def get_base_model(self):\n",
159 |     "        self.model = tf.keras.models.load_model(\n",
160 |     "            self.config.updated_base_model_path\n",
161 |     "        )\n",
162 |     "\n",
163 |     "    def train_valid_generator(self):\n",
164 |     "\n",
165 |     "        datagenerator_kwargs = dict(\n",
166 |     "            rescale = 1./255,\n",
167 |     "            validation_split=0.20\n",
168 |     "        )\n",
169 |     "\n",
170 |     "        dataflow_kwargs = dict(\n",
171 |     "            target_size=self.config.params_image_size[:-1],\n",
172 |     "            batch_size=self.config.params_batch_size,\n",
173 |     "            interpolation=\"bilinear\"\n",
174 |     "        )\n",
175 |     "\n",
176 |     "        valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(\n",
177 |     "            **datagenerator_kwargs\n",
178 |     "        )\n",
179 |     "\n",
180 |     "        self.valid_generator = valid_datagenerator.flow_from_directory(\n",
181 |     "            directory=self.config.training_data,\n",
182 |     "            subset=\"validation\",\n",
183 |     "            shuffle=False,\n",
184 |     "            **dataflow_kwargs\n",
185 |     "        )\n",
186 |     "\n",
187 |     "        if self.config.params_is_augmentation:\n",
188 |     "            train_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(\n",
189 |     "                rotation_range=40,\n",
190 |     "                horizontal_flip=True,\n",
191 |     "                width_shift_range=0.2,\n",
192 |     "                height_shift_range=0.2,\n",
193 |     "                shear_range=0.2,\n",
194 |     "                zoom_range=0.2,\n",
195 |     "                **datagenerator_kwargs\n",
196 |     "            )\n",
197 |     "        else:\n",
198 |     "            train_datagenerator = valid_datagenerator\n",
199 |     "\n",
200 |     "        self.train_generator = train_datagenerator.flow_from_directory(\n",
201 |     "            directory=self.config.training_data,\n",
202 |     "            subset=\"training\",\n",
203 |     "            shuffle=True,\n",
204 |     "            **dataflow_kwargs\n",
205 |     "        )\n",
206 |     "\n",
207 |     "    @staticmethod\n",
208 |     "    def save_model(path: Path, model: tf.keras.Model):\n",
209 |     "        model.save(path)\n",
210 |     "\n",
211 |     "\n",
212 |     "    def train(self, callback_list: list):\n",
213 |     "        self.steps_per_epoch = self.train_generator.samples // self.train_generator.batch_size\n",
214 |     "        self.validation_steps = self.valid_generator.samples // self.valid_generator.batch_size\n",
215 |     "\n",
216 |     "        self.model.fit(\n",
217 |     "            self.train_generator,\n",
218 |     "            epochs=self.config.params_epochs,\n",
219 |     "            steps_per_epoch=self.steps_per_epoch,\n",
220 |     "            validation_steps=self.validation_steps,\n",
221 |     "            validation_data=self.valid_generator,\n",
222 |     "            callbacks=callback_list\n",
223 |     "        )\n",
224 |     "\n",
225 |     "        self.save_model(\n",
226 |     "            path=self.config.trained_model_path,\n",
227 |     "            model=self.model\n",
228 |     "        )"
229 |    ]
230 |   },
231 |   {
232 |    "cell_type": "code",
233 |    "execution_count": 7,
234 |    "metadata": {},
235 |    "outputs": [],
236 |    "source": [
237 |     "# !pip install scipy"
238 |    ]
239 |   },
240 |   {
241 |    "cell_type": "code",
242 |    "execution_count": 8,
243 |    "metadata": {},
244 |    "outputs": [
245 |     {
246 |      "name": "stdout",
247 |      "output_type": "stream",
248 |      "text": [
249 |       "Found 4998 images belonging to 2 classes.\n",
250 |       "Found 20000 images belonging to 2 classes.\n",
251 |       "1234/1250 [============================>.] - ETA: 4s - loss: 6.3841 - accuracy: 0.6860"
252 |      ]
253 |     },
254 |     {
255 |      "name": "stderr",
256 |      "output_type": "stream",
257 |      "text": [
258 |       "f:\\LIVE_CLASS\\FSDS_NOV\\CodeBase\\FSDS_NOV_deepCNNClassifier\\env\\lib\\site-packages\\PIL\\TiffImagePlugin.py:845: UserWarning: Truncated File Read\n",
259 |       "  warnings.warn(str(msg))\n"
260 |      ]
261 |     },
262 |     {
263 |      "name": "stdout",
264 |      "output_type": "stream",
265 |      "text": [
266 |       "1250/1250 [==============================] - 403s 313ms/step - loss: 6.3217 - accuracy: 0.6881 - val_loss: 1.0078 - val_accuracy: 0.8996\n"
267 |      ]
268 |     }
269 |    ],
270 |    "source": [
271 |     "try:\n",
272 |     "    config = ConfigurationManager()\n",
273 |     "    prepare_callbacks_config = config.get_prepare_callback_config()\n",
274 |     "    prepare_callbacks = PrepareCallback(config=prepare_callbacks_config)\n",
275 |     "    callback_list = prepare_callbacks.get_tb_ckpt_callbacks()\n",
276 |     "    \n",
277 |     "    training_config = config.get_training_config()\n",
278 |     "    training = Training(config=training_config)\n",
279 |     "    training.get_base_model()\n",
280 |     "    training.train_valid_generator()\n",
281 |     "    training.train(\n",
282 |     "        callback_list=callback_list\n",
283 |     "    )\n",
284 |     "    \n",
285 |     "except Exception as e:\n",
286 |     "    raise e"
287 |    ]
288 |   },
289 |   {
290 |    "cell_type": "code",
291 |    "execution_count": 9,
292 |    "metadata": {},
293 |    "outputs": [],
294 |    "source": [
295 |     "def example(x, **kwargs):\n",
296 |     "    print(locals())"
297 |    ]
298 |   },
299 |   {
300 |    "cell_type": "code",
301 |    "execution_count": 12,
302 |    "metadata": {},
303 |    "outputs": [
304 |     {
305 |      "name": "stdout",
306 |      "output_type": "stream",
307 |      "text": [
308 |       "{'x': 3, 'kwargs': {'y': 4, 'z': 55}}\n"
309 |      ]
310 |     }
311 |    ],
312 |    "source": [
313 |     "extra = dict(y=4, z=55)\n",
314 |     "\n",
315 |     "example(x=3, **extra)"
316 |    ]
317 |   },
318 |   {
319 |    "cell_type": "code",
320 |    "execution_count": 1,
321 |    "metadata": {},
322 |    "outputs": [],
323 |    "source": [
324 |     "import os\n",
325 |     "os.chdir(\"../\")"
326 |    ]
327 |   },
328 |   {
329 |    "cell_type": "code",
330 |    "execution_count": 2,
331 |    "metadata": {},
332 |    "outputs": [],
333 |    "source": [
334 |     "import tensorflow as tf"
335 |    ]
336 |   },
337 |   {
338 |    "cell_type": "code",
339 |    "execution_count": 3,
340 |    "metadata": {},
341 |    "outputs": [],
342 |    "source": [
343 |     "model = tf.keras.models.load_model(\"./artifacts/training/model.h5\")"
344 |    ]
345 |   },
346 |   {
347 |    "cell_type": "code",
348 |    "execution_count": 26,
349 |    "metadata": {},
350 |    "outputs": [],
351 |    "source": [
352 |     "from PIL import Image\n",
353 |     "\n",
354 |     "img = Image.open(\"./artifacts/data_ingestion/PetImages/Cat/0.jpg\")\n",
355 |     "img = img.resize((224,224))"
356 |    ]
357 |   },
358 |   {
359 |    "cell_type": "code",
360 |    "execution_count": 27,
361 |    "metadata": {},
362 |    "outputs": [],
363 |    "source": [
364 |     "import numpy as np"
365 |    ]
366 |   },
367 |   {
368 |    "cell_type": "code",
369 |    "execution_count": 28,
370 |    "metadata": {},
371 |    "outputs": [],
372 |    "source": [
373 |     "img_array = np.array(img)"
374 |    ]
375 |   },
376 |   {
377 |    "cell_type": "code",
378 |    "execution_count": 29,
379 |    "metadata": {},
380 |    "outputs": [
381 |     {
382 |      "data": {
383 |       "text/plain": [
384 |        "(224, 224, 3)"
385 |       ]
386 |      },
387 |      "execution_count": 29,
388 |      "metadata": {},
389 |      "output_type": "execute_result"
390 |     }
391 |    ],
392 |    "source": [
393 |     "img_array.shape"
394 |    ]
395 |   },
396 |   {
397 |    "cell_type": "code",
398 |    "execution_count": 16,
399 |    "metadata": {},
400 |    "outputs": [
401 |     {
402 |      "data": {
403 |       "text/plain": [
404 |        "(1, 224, 224, 3)"
405 |       ]
406 |      },
407 |      "execution_count": 16,
408 |      "metadata": {},
409 |      "output_type": "execute_result"
410 |     }
411 |    ],
412 |    "source": [
413 |     "img_array = np.expand_dims(img_array, axis=0)\n",
414 |     "img_array.shape"
415 |    ]
416 |   },
417 |   {
418 |    "cell_type": "code",
419 |    "execution_count": 18,
420 |    "metadata": {},
421 |    "outputs": [
422 |     {
423 |      "name": "stdout",
424 |      "output_type": "stream",
425 |      "text": [
426 |       "1/1 [==============================] - 0s 39ms/step\n"
427 |      ]
428 |     },
429 |     {
430 |      "data": {
431 |       "text/plain": [
432 |        "array([[1., 0.]], dtype=float32)"
433 |       ]
434 |      },
435 |      "execution_count": 18,
436 |      "metadata": {},
437 |      "output_type": "execute_result"
438 |     }
439 |    ],
440 |    "source": [
441 |     "result = model.predict(img_array)\n",
442 |     "result"
443 |    ]
444 |   },
445 |   {
446 |    "cell_type": "code",
447 |    "execution_count": 24,
448 |    "metadata": {},
449 |    "outputs": [
450 |     {
451 |      "name": "stdout",
452 |      "output_type": "stream",
453 |      "text": [
454 |       "predicted: cat\n"
455 |      ]
456 |     }
457 |    ],
458 |    "source": [
459 |     "argmax_index = np.argmax(result, axis=1)\n",
460 |     "if argmax_index[0] == 0:\n",
461 |     "    print(\"predicted: cat\")\n",
462 |     "else:\n",
463 |     "    print(\"predicted: dog\")"
464 |    ]
465 |   },
466 |   {
467 |    "cell_type": "code",
468 |    "execution_count": null,
469 |    "metadata": {},
470 |    "outputs": [],
471 |    "source": []
472 |   }
473 |  ],
474 |  "metadata": {
475 |   "kernelspec": {
476 |    "display_name": "Python 3.8.13 64-bit",
477 |    "language": "python",
478 |    "name": "python3"
479 |   },
480 |   "language_info": {
481 |    "codemirror_mode": {
482 |     "name": "ipython",
483 |     "version": 3
484 |    },
485 |    "file_extension": ".py",
486 |    "mimetype": "text/x-python",
487 |    "name": "python",
488 |    "nbconvert_exporter": "python",
489 |    "pygments_lexer": "ipython3",
490 |    "version": "3.8.13"
491 |   },
492 |   "orig_nbformat": 4,
493 |   "vscode": {
494 |    "interpreter": {
495 |     "hash": "37206e62ba8fc6a6f0b961435078c80557a69ac7a5dd6249b6601b5385f5de67"
496 |    }
497 |   }
498 |  },
499 |  "nbformat": 4,
500 |  "nbformat_minor": 2
501 | }
502 | 


--------------------------------------------------------------------------------
/research/st_05_trails.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import tensorflow as tf"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 2,
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import os\n",
 19 |     "os.chdir(\"../\")"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": 16,
 25 |    "metadata": {},
 26 |    "outputs": [],
 27 |    "source": [
 28 |     "os.environ[\"MLFLOW_TRACKING_URI\"]=\"https://dagshub.com/c17hawke/FSDS_NOV_deepCNNClassifier.mlflow\"\n",
 29 |     "os.environ[\"MLFLOW_TRACKING_USERNAME\"]=\"c17hawke\"\n",
 30 |     "os.environ[\"MLFLOW_TRACKING_PASSWORD\"]=\"84215e85b5e87347572d9d272c798b2b1ff2a546\""
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": 17,
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "model = tf.keras.models.load_model(\"artifacts/training/model.h5\")"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": 18,
 45 |    "metadata": {},
 46 |    "outputs": [],
 47 |    "source": [
 48 |     "from dataclasses import dataclass\n",
 49 |     "from pathlib import Path\n",
 50 |     "\n",
 51 |     "@dataclass(frozen=True)\n",
 52 |     "class EvaluationConfig:\n",
 53 |     "    path_of_model: Path\n",
 54 |     "    training_data: Path\n",
 55 |     "    all_params: dict\n",
 56 |     "    mlflow_uri: str\n",
 57 |     "    params_image_size: list\n",
 58 |     "    params_batch_size: int\n"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": 19,
 64 |    "metadata": {},
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "from deepClassifier.constants import *\n",
 68 |     "from deepClassifier.utils import read_yaml, create_directories, save_json\n",
 69 |     "\n",
 70 |     "class ConfigurationManager:\n",
 71 |     "    def __init__(\n",
 72 |     "        self, \n",
 73 |     "        config_filepath = CONFIG_FILE_PATH,\n",
 74 |     "        params_filepath = PARAMS_FILE_PATH):\n",
 75 |     "        self.config = read_yaml(config_filepath)\n",
 76 |     "        self.params = read_yaml(params_filepath)\n",
 77 |     "        create_directories([self.config.artifacts_root])\n",
 78 |     "\n",
 79 |     "    def get_validation_config(self) -> EvaluationConfig:\n",
 80 |     "        eval_config = EvaluationConfig(\n",
 81 |     "            path_of_model=\"artifacts/training/model.h5\",\n",
 82 |     "            training_data=\"artifacts/data_ingestion/PetImages\",\n",
 83 |     "            mlflow_uri=\"https://dagshub.com/c17hawke/FSDS_NOV_deepCNNClassifier.mlflow\",\n",
 84 |     "            all_params=self.params,\n",
 85 |     "            params_image_size=self.params.IMAGE_SIZE,\n",
 86 |     "            params_batch_size=self.params.BATCH_SIZE\n",
 87 |     "        )\n",
 88 |     "        return eval_config"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": 22,
 94 |    "metadata": {},
 95 |    "outputs": [],
 96 |    "source": [
 97 |     "\n",
 98 |     "import tensorflow as tf\n",
 99 |     "from pathlib import Path\n",
100 |     "import mlflow\n",
101 |     "import mlflow.keras\n",
102 |     "from urllib.parse import urlparse\n",
103 |     "\n",
104 |     "class Evaluation:\n",
105 |     "    def __init__(self, config: EvaluationConfig):\n",
106 |     "        self.config = config\n",
107 |     "\n",
108 |     "    def _valid_generator(self):\n",
109 |     "\n",
110 |     "        datagenerator_kwargs = dict(\n",
111 |     "            rescale = 1./255,\n",
112 |     "            validation_split=0.30\n",
113 |     "        )\n",
114 |     "\n",
115 |     "        dataflow_kwargs = dict(\n",
116 |     "            target_size=self.config.params_image_size[:-1],\n",
117 |     "            batch_size=self.config.params_batch_size,\n",
118 |     "            interpolation=\"bilinear\"\n",
119 |     "        )\n",
120 |     "\n",
121 |     "        valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(\n",
122 |     "            **datagenerator_kwargs\n",
123 |     "        )\n",
124 |     "\n",
125 |     "        self.valid_generator = valid_datagenerator.flow_from_directory(\n",
126 |     "            directory=self.config.training_data,\n",
127 |     "            subset=\"validation\",\n",
128 |     "            shuffle=False,\n",
129 |     "            **dataflow_kwargs\n",
130 |     "        )\n",
131 |     "\n",
132 |     "\n",
133 |     "    @staticmethod\n",
134 |     "    def load_model(path: Path) -> tf.keras.Model:\n",
135 |     "        return tf.keras.models.load_model(path)\n",
136 |     "\n",
137 |     "\n",
138 |     "    def evaluation(self):\n",
139 |     "        self.model = self.load_model(self.config.path_of_model)\n",
140 |     "        self._valid_generator()\n",
141 |     "        self.score = model.evaluate(self.valid_generator)\n",
142 |     "\n",
143 |     "    def save_score(self):\n",
144 |     "        scores = {\"loss\": self.score[0], \"accuracy\": self.score[1]}\n",
145 |     "        save_json(path=Path(\"scores.json\"), data=scores)\n",
146 |     "\n",
147 |     "    def log_into_mlflow(self):\n",
148 |     "        mlflow.set_registry_uri(self.config.mlflow_uri)\n",
149 |     "        tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme\n",
150 |     "        with mlflow.start_run():\n",
151 |     "            mlflow.log_params(self.config.all_params)\n",
152 |     "            mlflow.log_metrics(\n",
153 |     "                {\"loss\": self.score[0], \"accuracy\": self.score[1]}\n",
154 |     "            )\n",
155 |     "            # Model registry does not work with file store\n",
156 |     "            if tracking_url_type_store != \"file\":\n",
157 |     "\n",
158 |     "                # Register the model\n",
159 |     "                # There are other ways to use the Model Registry, which depends on the use case,\n",
160 |     "                # please refer to the doc for more information:\n",
161 |     "                # https://mlflow.org/docs/latest/model-registry.html#api-workflow\n",
162 |     "                mlflow.keras.log_model(self.model, \"model\", registered_model_name=\"VGG16Model\")\n",
163 |     "            else:\n",
164 |     "                mlflow.keras.log_model(self.model, \"model\")\n"
165 |    ]
166 |   },
167 |   {
168 |    "cell_type": "code",
169 |    "execution_count": 23,
170 |    "metadata": {},
171 |    "outputs": [
172 |     {
173 |      "name": "stdout",
174 |      "output_type": "stream",
175 |      "text": [
176 |       "Found 7498 images belonging to 2 classes.\n",
177 |       "469/469 [==============================] - 47s 100ms/step - loss: 7.2357 - accuracy: 0.6538\n",
178 |       "INFO:tensorflow:Assets written to: C:\\Users\\sunny\\AppData\\Local\\Temp\\tmpkae92jcs\\model\\data\\model\\assets\n"
179 |      ]
180 |     },
181 |     {
182 |      "name": "stderr",
183 |      "output_type": "stream",
184 |      "text": [
185 |       "Successfully registered model ''.\n",
186 |       "2022/09/25 11:59:15 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: VGG16Model, version 2\n",
187 |       "Created version '2' of model 'VGG16Model'.\n"
188 |      ]
189 |     }
190 |    ],
191 |    "source": [
192 |     "try:\n",
193 |     "    config = ConfigurationManager()\n",
194 |     "    val_config = config.get_validation_config()\n",
195 |     "    evaluation = Evaluation(val_config)\n",
196 |     "    evaluation.evaluation()\n",
197 |     "    evaluation.save_score()\n",
198 |     "    evaluation.log_into_mlflow()\n",
199 |     "    \n",
200 |     "except Exception as e:\n",
201 |     "   raise e"
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "code",
206 |    "execution_count": null,
207 |    "metadata": {},
208 |    "outputs": [],
209 |    "source": []
210 |   }
211 |  ],
212 |  "metadata": {
213 |   "kernelspec": {
214 |    "display_name": "Python 3.8.13 64-bit",
215 |    "language": "python",
216 |    "name": "python3"
217 |   },
218 |   "language_info": {
219 |    "codemirror_mode": {
220 |     "name": "ipython",
221 |     "version": 3
222 |    },
223 |    "file_extension": ".py",
224 |    "mimetype": "text/x-python",
225 |    "name": "python",
226 |    "nbconvert_exporter": "python",
227 |    "pygments_lexer": "ipython3",
228 |    "version": "3.8.13"
229 |   },
230 |   "orig_nbformat": 4,
231 |   "vscode": {
232 |    "interpreter": {
233 |     "hash": "37206e62ba8fc6a6f0b961435078c80557a69ac7a5dd6249b6601b5385f5de67"
234 |    }
235 |   }
236 |  },
237 |  "nbformat": 4,
238 |  "nbformat_minor": 2
239 | }
240 | 


--------------------------------------------------------------------------------
/research/trials.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from box import ConfigBox"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 2,
 15 |    "metadata": {},
 16 |    "outputs": [
 17 |     {
 18 |      "data": {
 19 |       "text/plain": [
 20 |        "'value'"
 21 |       ]
 22 |      },
 23 |      "execution_count": 2,
 24 |      "metadata": {},
 25 |      "output_type": "execute_result"
 26 |     }
 27 |    ],
 28 |    "source": [
 29 |     "d = {\"key\": \"value\", \"key1\": \"value1\"}\n",
 30 |     "d[\"key\"]"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": 4,
 36 |    "metadata": {},
 37 |    "outputs": [
 38 |     {
 39 |      "data": {
 40 |       "text/plain": [
 41 |        "ConfigBox({'key': 'value', 'key1': 'value1'})"
 42 |       ]
 43 |      },
 44 |      "execution_count": 4,
 45 |      "metadata": {},
 46 |      "output_type": "execute_result"
 47 |     }
 48 |    ],
 49 |    "source": [
 50 |     "d2 = ConfigBox({\"key\": \"value\", \"key1\": \"value1\"})\n",
 51 |     "d2"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": 5,
 57 |    "metadata": {},
 58 |    "outputs": [
 59 |     {
 60 |      "data": {
 61 |       "text/plain": [
 62 |        "'value'"
 63 |       ]
 64 |      },
 65 |      "execution_count": 5,
 66 |      "metadata": {},
 67 |      "output_type": "execute_result"
 68 |     }
 69 |    ],
 70 |    "source": [
 71 |     "d2.key"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": 7,
 77 |    "metadata": {},
 78 |    "outputs": [],
 79 |    "source": [
 80 |     "from ensure import ensure_annotations"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": 8,
 86 |    "metadata": {},
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "@ensure_annotations\n",
 90 |     "def get_product(x: int, y: int) -> int:\n",
 91 |     "    return x*y"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": 9,
 97 |    "metadata": {},
 98 |    "outputs": [
 99 |     {
100 |      "data": {
101 |       "text/plain": [
102 |        "6"
103 |       ]
104 |      },
105 |      "execution_count": 9,
106 |      "metadata": {},
107 |      "output_type": "execute_result"
108 |     }
109 |    ],
110 |    "source": [
111 |     "get_product(x=3, y=2)"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": 10,
117 |    "metadata": {},
118 |    "outputs": [
119 |     {
120 |      "ename": "EnsureError",
121 |      "evalue": "Argument y of type <class 'str'> to <function get_product at 0x0000025F28411A60> does not match annotation type <class 'int'>",
122 |      "output_type": "error",
123 |      "traceback": [
124 |       "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
125 |       "\u001b[1;31mEnsureError\u001b[0m                               Traceback (most recent call last)",
126 |       "Cell \u001b[1;32mIn [10], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[43mget_product\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m3\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mhii\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
127 |       "File \u001b[1;32mf:\\LIVE_CLASS\\FSDS_NOV\\CodeBase\\FSDS_NOV_deepCNNClassifier\\env\\lib\\site-packages\\ensure\\main.py:845\u001b[0m, in \u001b[0;36mWrappedFunctionReturn.__call__\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m    840\u001b[0m     \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39misinstance\u001b[39m(value, templ):\n\u001b[0;32m    841\u001b[0m         msg \u001b[39m=\u001b[39m (\n\u001b[0;32m    842\u001b[0m             \u001b[39m\"\u001b[39m\u001b[39mArgument \u001b[39m\u001b[39m{arg}\u001b[39;00m\u001b[39m of type \u001b[39m\u001b[39m{valt}\u001b[39;00m\u001b[39m to \u001b[39m\u001b[39m{f}\u001b[39;00m\u001b[39m \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m    843\u001b[0m             \u001b[39m\"\u001b[39m\u001b[39mdoes not match annotation type \u001b[39m\u001b[39m{t}\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[0;32m    844\u001b[0m         )\n\u001b[1;32m--> 845\u001b[0m         \u001b[39mraise\u001b[39;00m EnsureError(msg\u001b[39m.\u001b[39mformat(\n\u001b[0;32m    846\u001b[0m             arg\u001b[39m=\u001b[39marg, f\u001b[39m=\u001b[39m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mf, t\u001b[39m=\u001b[39mtempl, valt\u001b[39m=\u001b[39m\u001b[39mtype\u001b[39m(value)\n\u001b[0;32m    847\u001b[0m         ))\n\u001b[0;32m    849\u001b[0m return_val \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mf(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[0;32m    850\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39misinstance\u001b[39m(return_val, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mreturn_templ):\n",
128 |       "\u001b[1;31mEnsureError\u001b[0m: Argument y of type <class 'str'> to <function get_product at 0x0000025F28411A60> does not match annotation type <class 'int'>"
129 |      ]
130 |     }
131 |    ],
132 |    "source": [
133 |     "get_product(x=3, y=\"hii\")\n"
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "code",
138 |    "execution_count": 13,
139 |    "metadata": {},
140 |    "outputs": [],
141 |    "source": [
142 |     "@ensure_annotations\n",
143 |     "def get_product(x: int, y: int) -> str:\n",
144 |     "    return str(x*y)"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": 14,
150 |    "metadata": {},
151 |    "outputs": [
152 |     {
153 |      "data": {
154 |       "text/plain": [
155 |        "'6'"
156 |       ]
157 |      },
158 |      "execution_count": 14,
159 |      "metadata": {},
160 |      "output_type": "execute_result"
161 |     }
162 |    ],
163 |    "source": [
164 |     "get_product(x=3, y=2)"
165 |    ]
166 |   },
167 |   {
168 |    "cell_type": "code",
169 |    "execution_count": null,
170 |    "metadata": {},
171 |    "outputs": [],
172 |    "source": []
173 |   }
174 |  ],
175 |  "metadata": {
176 |   "kernelspec": {
177 |    "display_name": "Python 3.8.13 (conda)",
178 |    "language": "python",
179 |    "name": "python3"
180 |   },
181 |   "language_info": {
182 |    "codemirror_mode": {
183 |     "name": "ipython",
184 |     "version": 3
185 |    },
186 |    "file_extension": ".py",
187 |    "mimetype": "text/x-python",
188 |    "name": "python",
189 |    "nbconvert_exporter": "python",
190 |    "pygments_lexer": "ipython3",
191 |    "version": "3.8.13"
192 |   },
193 |   "orig_nbformat": 4,
194 |   "vscode": {
195 |    "interpreter": {
196 |     "hash": "37206e62ba8fc6a6f0b961435078c80557a69ac7a5dd6249b6601b5385f5de67"
197 |    }
198 |   }
199 |  },
200 |  "nbformat": 4,
201 |  "nbformat_minor": 2
202 | }
203 | 


--------------------------------------------------------------------------------
/research/trials.ipynbexample.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/c17hawke/FSDS_NOV_deepCNNClassifier/e54341d242a39256c54a90acdead18bc53031677/research/trials.ipynbexample.py


--------------------------------------------------------------------------------
/scores.json:
--------------------------------------------------------------------------------
1 | {
2 |     "loss": 7.235665798187256,
3 |     "accuracy": 0.6537743210792542
4 | }


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | license = MIT
 3 | license_file = LICENSE
 4 | classifier =
 5 |     Programming Language :: Python :: 3.8
 6 |     Operating System :: OS Independent
 7 | 
 8 | [options]
 9 | install_requires =
10 |     ensure==1.0.2
11 | python_requires = >=3.7    
12 | 
13 | [options.extras_require]
14 | testing =
15 |     pytest>=7.1.3
16 |     mypy>=0.971
17 |     flake8>=5.0.4
18 |     tox>=3.25.1
19 |     black>=22.8.0
20 | 
21 | [options.package_data]
22 | deepClassifier = py.typed
23 |   
24 | [flake8]
25 | max-line-length = 160
26 | exclude = __init__.py
27 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import setuptools
 2 | 
 3 | with open("README.md", "r", encoding="utf-8") as f:
 4 |     long_description = f.read()
 5 | 
 6 | __version__ = "0.0.0"
 7 | 
 8 | REPO_NAME = "FSDS_NOV_deepCNNClassifier"
 9 | AUTHOR_USER_NAME = "c17hawke"
10 | SRC_REPO = "deepClassifier"
11 | AUTHOR_EMAIL = "sunny.c17hawke@gmail.com"
12 | 
13 | setuptools.setup(
14 |     name=SRC_REPO,
15 |     version=__version__,
16 |     author=AUTHOR_USER_NAME,
17 |     author_email=AUTHOR_EMAIL,
18 |     description="A small python package for CNN app",
19 |     long_description=long_description,
20 |     long_description_content="text/markdown",
21 |     url=f"https://github.com/{AUTHOR_USER_NAME}/{REPO_NAME}",
22 |     project_urls={
23 |         "Bug Tracker": f"https://github.com/{AUTHOR_USER_NAME}/{REPO_NAME}/issues",
24 |     },
25 |     package_dir={"": "src"},
26 |     packages=setuptools.find_packages(where="src")
27 | )
28 | 


--------------------------------------------------------------------------------
/src/deepClassifier/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import logging
 4 | 
 5 | logging_str = "[%(asctime)s: %(levelname)s: %(module)s]: %(message)s"
 6 | log_dir = "logs"
 7 | log_filepath = os.path.join(log_dir, "running_logs.log")
 8 | os.makedirs(log_dir, exist_ok=True)
 9 | 
10 | logging.basicConfig(
11 |     level=logging.INFO, 
12 |     format=logging_str,
13 |     handlers=[
14 |         logging.FileHandler(log_filepath),
15 |         # logging.StreamHandler(sys.stdout),
16 |     ])
17 | 
18 | logger = logging.getLogger("deepClassifierLogger")


--------------------------------------------------------------------------------
/src/deepClassifier/components/__init__.py:
--------------------------------------------------------------------------------
1 | from deepClassifier.components.data_ingestion import DataIngestion 
2 | from deepClassifier.components.prepare_base_model import PrepareBaseModel
3 | from deepClassifier.components.prepare_callback import PrepareCallback
4 | from deepClassifier.components.training import Training
5 | from deepClassifier.components.evaluation import Evaluation


--------------------------------------------------------------------------------
/src/deepClassifier/components/data_ingestion.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import urllib.request as request
 3 | from zipfile import ZipFile
 4 | from deepClassifier.entity import DataIngestionConfig
 5 | from deepClassifier import logger
 6 | from deepClassifier.utils import get_size
 7 | from tqdm import tqdm
 8 | from pathlib import Path
 9 | 
10 | 
11 | class DataIngestion:
12 |     def __init__(self, config: DataIngestionConfig):
13 |         self.config = config
14 | 
15 |     def download_file(self):
16 |         logger.info("Trying to download file...")
17 |         if not os.path.exists(self.config.local_data_file):
18 |             logger.info("Download started...")
19 |             filename, headers = request.urlretrieve(
20 |                 url=self.config.source_URL,
21 |                 filename=self.config.local_data_file
22 |             )
23 |             logger.info(f"{filename} download! with following info: \n{headers}")
24 |         else:
25 |             logger.info(f"File already exists of size: {get_size(Path(self.config.local_data_file))}")        
26 | 
27 |     def _get_updated_list_of_files(self, list_of_files):
28 |         return [f for f in list_of_files if f.endswith(".jpg") and ("Cat" in f or "Dog" in f)]
29 | 
30 |     def _preprocess(self, zf: ZipFile, f: str, working_dir: str):
31 |         target_filepath = os.path.join(working_dir, f)
32 |         if not os.path.exists(target_filepath):
33 |             zf.extract(f, working_dir)
34 |         
35 |         if os.path.getsize(target_filepath) == 0:
36 |             logger.info(f"removing file:{target_filepath} of size: {get_size(Path(target_filepath))}")
37 |             os.remove(target_filepath)
38 | 
39 |     def unzip_and_clean(self):
40 |         logger.info(f"unzipping file and removing unawanted files")
41 |         with ZipFile(file=self.config.local_data_file, mode="r") as zf:
42 |             list_of_files = zf.namelist()
43 |             updated_list_of_files = self._get_updated_list_of_files(list_of_files)
44 |             for f in tqdm(updated_list_of_files):
45 |                 self._preprocess(zf, f, self.config.unzip_dir)
46 | 
47 |     def create_test_data(self):
48 |         """
49 |         separte 30% of data into test data
50 |         """
51 |         pass


--------------------------------------------------------------------------------
/src/deepClassifier/components/evaluation.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import tensorflow as tf
 3 | from pathlib import Path
 4 | from deepClassifier.entity import EvaluationConfig
 5 | from deepClassifier.utils import save_json
 6 | 
 7 | class Evaluation:
 8 |     def __init__(self, config: EvaluationConfig):
 9 |         self.config = config
10 | 
11 |     def _valid_generator(self):
12 | 
13 |         datagenerator_kwargs = dict(
14 |             rescale = 1./255,
15 |             validation_split=0.30
16 |         )
17 | 
18 |         dataflow_kwargs = dict(
19 |             target_size=self.config.params_image_size[:-1],
20 |             batch_size=self.config.params_batch_size,
21 |             interpolation="bilinear"
22 |         )
23 | 
24 |         valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
25 |             **datagenerator_kwargs
26 |         )
27 | 
28 |         self.valid_generator = valid_datagenerator.flow_from_directory(
29 |             directory=self.config.training_data,
30 |             subset="validation",
31 |             shuffle=False,
32 |             **dataflow_kwargs
33 |         )
34 | 
35 | 
36 |     @staticmethod
37 |     def load_model(path: Path) -> tf.keras.Model:
38 |         return tf.keras.models.load_model(path)
39 | 
40 | 
41 |     def evaluation(self):
42 |         model = self.load_model(self.config.path_of_model)
43 |         self._valid_generator()
44 |         self.score = model.evaluate(self.valid_generator)
45 | 
46 |     def save_score(self):
47 |         scores = {"loss": self.score[0], "accuracy": self.score[1]}
48 |         save_json(path=Path("scores.json"), data=scores)


--------------------------------------------------------------------------------
/src/deepClassifier/components/prepare_base_model.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from deepClassifier.entity import PrepareBaseModelConfig
 3 | import tensorflow as tf
 4 | 
 5 | class PrepareBaseModel:
 6 |     def __init__(self, config: PrepareBaseModelConfig):
 7 |         self.config = config
 8 | 
 9 |     def get_base_model(self):
10 |         self.model = tf.keras.applications.vgg16.VGG16(
11 |             input_shape=self.config.params_image_size,
12 |             weights=self.config.params_weights,
13 |             include_top=self.config.params_include_top
14 |         )
15 | 
16 |         self.save_model(path=self.config.base_model_path, model=self.model)
17 | 
18 | 
19 |     @staticmethod
20 |     def _prepare_full_model(model, classes, freeze_all, freeze_till, learning_rate):
21 |         if freeze_all:
22 |             model.trainable = False
23 |         elif (freeze_till is not None) and (freeze_till > 0):
24 |             for layer in model.layers[:-freeze_till]:
25 |                 layer.trainable = False
26 | 
27 |         flatten_in = tf.keras.layers.Flatten()(model.output)
28 |         prediction = tf.keras.layers.Dense(
29 |             units=classes,
30 |             activation="softmax"
31 |         )(flatten_in)
32 | 
33 |         full_model = tf.keras.models.Model(
34 |             inputs=model.input,
35 |             outputs=prediction
36 |         )
37 | 
38 |         full_model.compile(
39 |             optimizer=tf.keras.optimizers.SGD(learning_rate=learning_rate),
40 |             loss=tf.keras.losses.CategoricalCrossentropy(),
41 |             metrics=["accuracy"]
42 |         )
43 | 
44 |         full_model.summary()
45 |         return full_model
46 | 
47 |     def update_base_model(self):
48 |         self.full_model = self._prepare_full_model(
49 |             model=self.model,
50 |             classes=self.config.params_classes,
51 |             freeze_all=True,
52 |             freeze_till=None,
53 |             learning_rate=self.config.params_learning_rate
54 |         )
55 | 
56 |         self.save_model(path=self.config.updated_base_model_path, model=self.full_model)
57 | 
58 |     @staticmethod
59 |     def save_model(path: Path, model: tf.keras.Model):
60 |         model.save(path)
61 | 
62 | 


--------------------------------------------------------------------------------
/src/deepClassifier/components/prepare_callback.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from deepClassifier.entity import PrepareCallbacksConfig
 3 | import tensorflow as tf
 4 | import time
 5 | 
 6 | class PrepareCallback:
 7 |     def __init__(self, config: PrepareCallbacksConfig):
 8 |         self.config = config
 9 | 
10 |     @property
11 |     def _create_tb_callbacks(self):
12 |         timestamp = time.strftime("%Y-%m-%d-%H-%M-%S")
13 |         tb_running_log_dir = os.path.join(
14 |             self.config.tensorboard_root_log_dir,
15 |             f"tb_logs_at_{timestamp}",
16 |         )
17 |         return tf.keras.callbacks.TensorBoard(log_dir=tb_running_log_dir)
18 | 
19 |     @property
20 |     def _create_ckpt_callbacks(self):
21 |         return tf.keras.callbacks.ModelCheckpoint(
22 |             filepath=self.config.checkpoint_model_filepath,
23 |             save_best_only=True
24 |         )
25 | 
26 |     def get_tb_ckpt_callbacks(self):
27 |         return [
28 |             self._create_tb_callbacks,
29 |             self._create_ckpt_callbacks
30 |         ]
31 | 
32 | 


--------------------------------------------------------------------------------
/src/deepClassifier/components/training.py:
--------------------------------------------------------------------------------
 1 | from deepClassifier.entity import TrainingConfig
 2 | import tensorflow as tf
 3 | from pathlib import Path
 4 | 
 5 | class Training:
 6 |     def __init__(self, config: TrainingConfig):
 7 |         self.config = config
 8 | 
 9 |     def get_base_model(self):
10 |         self.model = tf.keras.models.load_model(
11 |             self.config.updated_base_model_path
12 |         )
13 | 
14 |     def train_valid_generator(self):
15 | 
16 |         datagenerator_kwargs = dict(
17 |             rescale = 1./255,
18 |             validation_split=0.20
19 |         )
20 | 
21 |         dataflow_kwargs = dict(
22 |             target_size=self.config.params_image_size[:-1],
23 |             batch_size=self.config.params_batch_size,
24 |             interpolation="bilinear"
25 |         )
26 | 
27 |         valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
28 |             **datagenerator_kwargs
29 |         )
30 | 
31 |         self.valid_generator = valid_datagenerator.flow_from_directory(
32 |             directory=self.config.training_data,
33 |             subset="validation",
34 |             shuffle=False,
35 |             **dataflow_kwargs
36 |         )
37 | 
38 |         if self.config.params_is_augmentation:
39 |             train_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
40 |                 rotation_range=40,
41 |                 horizontal_flip=True,
42 |                 width_shift_range=0.2,
43 |                 height_shift_range=0.2,
44 |                 shear_range=0.2,
45 |                 zoom_range=0.2,
46 |                 **datagenerator_kwargs
47 |             )
48 |         else:
49 |             train_datagenerator = valid_datagenerator
50 | 
51 |         self.train_generator = train_datagenerator.flow_from_directory(
52 |             directory=self.config.training_data,
53 |             subset="training",
54 |             shuffle=True,
55 |             **dataflow_kwargs
56 |         )
57 | 
58 |     @staticmethod
59 |     def save_model(path: Path, model: tf.keras.Model):
60 |         model.save(path)
61 | 
62 | 
63 |     def train(self, callback_list: list):
64 |         self.steps_per_epoch = self.train_generator.samples // self.train_generator.batch_size
65 |         self.validation_steps = self.valid_generator.samples // self.valid_generator.batch_size
66 | 
67 |         self.model.fit(
68 |             self.train_generator,
69 |             epochs=self.config.params_epochs,
70 |             steps_per_epoch=self.steps_per_epoch,
71 |             validation_steps=self.validation_steps,
72 |             validation_data=self.valid_generator,
73 |             callbacks=callback_list
74 |         )
75 | 
76 |         self.save_model(
77 |             path=self.config.trained_model_path,
78 |             model=self.model
79 |         )


--------------------------------------------------------------------------------
/src/deepClassifier/config/__init__.py:
--------------------------------------------------------------------------------
1 | from deepClassifier.config.configuration import ConfigurationManager


--------------------------------------------------------------------------------
/src/deepClassifier/config/configuration.py:
--------------------------------------------------------------------------------
 1 | from deepClassifier.constants import CONFIG_FILE_PATH, PARAMS_FILE_PATH
 2 | from deepClassifier.utils import read_yaml, create_directories
 3 | from deepClassifier.entity import (
 4 |     DataIngestionConfig, 
 5 |     PrepareBaseModelConfig, 
 6 |     PrepareCallbacksConfig,
 7 |     TrainingConfig,
 8 |     EvaluationConfig
 9 | )
10 | from pathlib import Path
11 | import os
12 | 
13 | class ConfigurationManager:
14 |     def __init__(
15 |         self, 
16 |         config_filepath = CONFIG_FILE_PATH,
17 |         params_filepath = PARAMS_FILE_PATH):
18 |         self.config = read_yaml(config_filepath)
19 |         self.params = read_yaml(params_filepath)
20 |         create_directories([self.config.artifacts_root])
21 | 
22 |     def get_data_ingestion_config(self) -> DataIngestionConfig:
23 |         config = self.config.data_ingestion
24 |         
25 |         create_directories([config.root_dir])
26 | 
27 |         data_ingestion_config = DataIngestionConfig(
28 |             root_dir=config.root_dir,
29 |             source_URL=config.source_URL,
30 |             local_data_file=config.local_data_file,
31 |             unzip_dir=config.unzip_dir 
32 |         )
33 | 
34 |         return data_ingestion_config
35 | 
36 |     def get_prepare_base_model_config(self) -> PrepareBaseModelConfig:
37 |         config = self.config.prepare_base_model
38 |         
39 |         create_directories([config.root_dir])
40 | 
41 |         prepare_base_model_config = PrepareBaseModelConfig(
42 |             root_dir=Path(config.root_dir),
43 |             base_model_path=Path(config.base_model_path),
44 |             updated_base_model_path=Path(config.updated_base_model_path),
45 |             params_image_size=self.params.IMAGE_SIZE,
46 |             params_learning_rate=self.params.LEARNING_RATE,
47 |             params_include_top=self.params.INCLUDE_TOP,
48 |             params_weights=self.params.WEIGHTS,
49 |             params_classes=self.params.CLASSES
50 |         )
51 | 
52 |         return prepare_base_model_config
53 | 
54 |     def get_prepare_callback_config(self) -> PrepareCallbacksConfig:
55 |         config = self.config.prepare_callbacks
56 |         model_ckpt_dir = os.path.dirname(config.checkpoint_model_filepath)
57 |         create_directories([
58 |             Path(model_ckpt_dir),
59 |             Path(config.tensorboard_root_log_dir)
60 |         ])
61 | 
62 |         prepare_callback_config = PrepareCallbacksConfig(
63 |             root_dir=Path(config.root_dir),
64 |             tensorboard_root_log_dir=Path(config.tensorboard_root_log_dir),
65 |             checkpoint_model_filepath=Path(config.checkpoint_model_filepath)
66 |         )
67 | 
68 |         return prepare_callback_config
69 | 
70 |     def get_training_config(self) -> TrainingConfig:
71 |         training = self.config.training
72 |         prepare_base_model = self.config.prepare_base_model
73 |         params = self.params
74 |         training_data = os.path.join(self.config.data_ingestion.unzip_dir, "PetImages")
75 |         create_directories([
76 |             Path(training.root_dir)
77 |         ])
78 | 
79 |         training_config = TrainingConfig(
80 |             root_dir=Path(training.root_dir),
81 |             trained_model_path=Path(training.trained_model_path),
82 |             updated_base_model_path=Path(prepare_base_model.updated_base_model_path),
83 |             training_data=Path(training_data),
84 |             params_epochs=params.EPOCHS,
85 |             params_batch_size=params.BATCH_SIZE,
86 |             params_is_augmentation=params.AUGMENTATION,
87 |             params_image_size=params.IMAGE_SIZE
88 |         )
89 | 
90 |         return training_config
91 | 
92 |     def get_validation_config(self) -> EvaluationConfig:
93 |         eval_config = EvaluationConfig(
94 |             path_of_model=self.config.training.trained_model_path,
95 |             training_data=self.config.data_ingestion.unzip_dir,
96 |             params_image_size=self.params.IMAGE_SIZE,
97 |             params_batch_size=self.params.BATCH_SIZE
98 |         )
99 |         return eval_config


--------------------------------------------------------------------------------
/src/deepClassifier/constants/__init__.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | 
3 | CONFIG_FILE_PATH = Path("configs/config.yaml")
4 | PARAMS_FILE_PATH = Path("params.yaml")
5 | 


--------------------------------------------------------------------------------
/src/deepClassifier/entity/__init__.py:
--------------------------------------------------------------------------------
1 | from deepClassifier.entity.config_entity import (
2 |     DataIngestionConfig, 
3 |     PrepareBaseModelConfig, 
4 |     PrepareCallbacksConfig,
5 |     TrainingConfig,
6 |     EvaluationConfig
7 | )


--------------------------------------------------------------------------------
/src/deepClassifier/entity/config_entity.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from pathlib import Path
 3 | 
 4 | 
 5 | @dataclass(frozen=True)
 6 | class DataIngestionConfig:
 7 |     root_dir: Path
 8 |     source_URL: str
 9 |     local_data_file: Path
10 |     unzip_dir: Path
11 | 
12 | 
13 | @dataclass(frozen=True)
14 | class PrepareBaseModelConfig:
15 |     root_dir: Path
16 |     base_model_path: Path
17 |     updated_base_model_path: Path
18 |     params_image_size: list
19 |     params_learning_rate: float
20 |     params_include_top: bool
21 |     params_weights: str
22 |     params_classes: int
23 | 
24 | @dataclass(frozen=True)
25 | class PrepareCallbacksConfig:
26 |     root_dir: Path
27 |     tensorboard_root_log_dir: Path
28 |     checkpoint_model_filepath: Path
29 | 
30 | 
31 | @dataclass(frozen=True)
32 | class TrainingConfig:
33 |     root_dir: Path
34 |     trained_model_path: Path
35 |     updated_base_model_path: Path
36 |     training_data: Path
37 |     params_epochs: int
38 |     params_batch_size: int
39 |     params_is_augmentation: bool
40 |     params_image_size: list
41 | 
42 | @dataclass(frozen=True)
43 | class EvaluationConfig:
44 |     path_of_model: Path
45 |     training_data: Path
46 |     params_image_size: list
47 |     params_batch_size: int


--------------------------------------------------------------------------------
/src/deepClassifier/pipeline/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/c17hawke/FSDS_NOV_deepCNNClassifier/e54341d242a39256c54a90acdead18bc53031677/src/deepClassifier/pipeline/__init__.py


--------------------------------------------------------------------------------
/src/deepClassifier/pipeline/stage_01_data_ingestion.py:
--------------------------------------------------------------------------------
 1 | from deepClassifier.config import ConfigurationManager
 2 | from deepClassifier.components import DataIngestion
 3 | from deepClassifier import logger
 4 | 
 5 | STAGE_NAME = "Data Ingestion stage"
 6 | 
 7 | def main():
 8 |     config = ConfigurationManager()
 9 |     data_ingestion_config = config.get_data_ingestion_config()
10 |     data_ingestion = DataIngestion(config=data_ingestion_config)
11 |     data_ingestion.download_file()
12 |     data_ingestion.unzip_and_clean()
13 | 
14 | if __name__ == '__main__':
15 |     try:
16 |         logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
17 |         main()
18 |         logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
19 |     except Exception as e:
20 |         logger.exception(e)
21 |         raise e


--------------------------------------------------------------------------------
/src/deepClassifier/pipeline/stage_02_prepare_base_model.py:
--------------------------------------------------------------------------------
 1 | from deepClassifier.config import ConfigurationManager
 2 | from deepClassifier.components import PrepareBaseModel
 3 | from deepClassifier import logger
 4 | 
 5 | STAGE_NAME = "Prepare base model"
 6 | 
 7 | def main():
 8 |     config = ConfigurationManager()
 9 |     prepare_base_model_config = config.get_prepare_base_model_config()
10 |     prepare_base_model = PrepareBaseModel(config=prepare_base_model_config)
11 |     prepare_base_model.get_base_model()
12 |     prepare_base_model.update_base_model()
13 | 
14 | if __name__ == '__main__':
15 |     try:
16 |         logger.info(f"*******************")
17 |         logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
18 |         main()
19 |         logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
20 |     except Exception as e:
21 |         logger.exception(e)
22 |         raise e


--------------------------------------------------------------------------------
/src/deepClassifier/pipeline/stage_03_training.py:
--------------------------------------------------------------------------------
 1 | from deepClassifier.config import ConfigurationManager
 2 | from deepClassifier.components import PrepareCallback, Training
 3 | from deepClassifier import logger
 4 | 
 5 | STAGE_NAME = "Training"
 6 | 
 7 | def main():
 8 |     config = ConfigurationManager()
 9 |     prepare_callbacks_config = config.get_prepare_callback_config()
10 |     prepare_callbacks = PrepareCallback(config=prepare_callbacks_config)
11 |     callback_list = prepare_callbacks.get_tb_ckpt_callbacks()
12 |     
13 |     training_config = config.get_training_config()
14 |     training = Training(config=training_config)
15 |     training.get_base_model()
16 |     training.train_valid_generator()
17 |     training.train(
18 |         callback_list=callback_list
19 |     )
20 | 
21 | if __name__ == '__main__':
22 |     try:
23 |         logger.info(f"*******************")
24 |         logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
25 |         main()
26 |         logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
27 |     except Exception as e:
28 |         logger.exception(e)
29 |         raise e


--------------------------------------------------------------------------------
/src/deepClassifier/pipeline/stage_04_evaluation.py:
--------------------------------------------------------------------------------
 1 | from deepClassifier.config import ConfigurationManager
 2 | from deepClassifier.components import Evaluation
 3 | from deepClassifier import logger
 4 | 
 5 | STAGE_NAME = "Evaluation stage"
 6 | 
 7 | def main():
 8 |     config = ConfigurationManager()
 9 |     val_config = config.get_validation_config()
10 |     evaluation = Evaluation(val_config)
11 |     evaluation.evaluation()
12 |     evaluation.save_score()
13 | 
14 | if __name__ == '__main__':
15 |     try:
16 |         logger.info(f"*******************")
17 |         logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
18 |         main()
19 |         logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
20 |     except Exception as e:
21 |         logger.exception(e)
22 |         raise e


--------------------------------------------------------------------------------
/src/deepClassifier/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from deepClassifier.utils.common import *


--------------------------------------------------------------------------------
/src/deepClassifier/utils/common.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from box.exceptions import BoxValueError
  3 | import yaml
  4 | from deepClassifier import logger
  5 | import json
  6 | import joblib
  7 | from ensure import ensure_annotations
  8 | from box import ConfigBox
  9 | from pathlib import Path
 10 | from typing import Any
 11 | 
 12 | @ensure_annotations
 13 | def read_yaml(path_to_yaml: Path) -> ConfigBox:
 14 |     """reads yaml file and returns
 15 | 
 16 |     Args:
 17 |         path_to_yaml (str): path like input
 18 | 
 19 |     Raises:
 20 |         ValueError: if yaml file is empty
 21 |         e: empty file
 22 | 
 23 |     Returns:
 24 |         ConfigBox: ConfigBox type
 25 |     """
 26 |     try:
 27 |         with open(path_to_yaml) as yaml_file:
 28 |             content = yaml.safe_load(yaml_file)
 29 |             logger.info(f"yaml file: {path_to_yaml} loaded successfully")
 30 |             return ConfigBox(content)
 31 |     except BoxValueError:
 32 |         raise ValueError("yaml file is empty")
 33 |     except Exception as e:
 34 |         raise e
 35 | 
 36 | @ensure_annotations
 37 | def create_directories(path_to_directories: list, verbose=True):
 38 |     """create list of directories
 39 | 
 40 |     Args:
 41 |         path_to_directories (list): list of path of directories
 42 |         ignore_log (bool, optional): ignore if multiple dirs is to be created. Defaults to False.
 43 |     """
 44 |     for path in path_to_directories:
 45 |         os.makedirs(path, exist_ok=True)
 46 |         if verbose:
 47 |             logger.info(f"created directory at: {path}")
 48 | 
 49 | @ensure_annotations
 50 | def save_json(path: Path, data: dict):
 51 |     """save json data
 52 | 
 53 |     Args:
 54 |         path (Path): path to json file
 55 |         data (dict): data to be saved in json file
 56 |     """
 57 |     with open(path, "w") as f:
 58 |         json.dump(data, f, indent=4)
 59 | 
 60 |     logger.info(f"json file saved at: {path}")
 61 | 
 62 | @ensure_annotations
 63 | def load_json(path: Path) -> ConfigBox:
 64 |     """load json files data
 65 | 
 66 |     Args:
 67 |         path (Path): path to json file
 68 | 
 69 |     Returns:
 70 |         ConfigBox: data as class attributes instead of dict
 71 |     """
 72 |     with open(path) as f:
 73 |         content = json.load(f)
 74 | 
 75 |     logger.info(f"json file loaded succesfully from: {path}")
 76 |     return ConfigBox(content)
 77 | 
 78 | @ensure_annotations
 79 | def save_bin(data: Any, path: Path):
 80 |     """save binary file
 81 | 
 82 |     Args:
 83 |         data (Any): data to be saved as binary
 84 |         path (Path): path to binary file
 85 |     """
 86 |     joblib.dump(value=data, filename=path)
 87 |     logger.info(f"binary file saved at: {path}")
 88 | 
 89 | @ensure_annotations
 90 | def load_bin(path: Path) -> Any:
 91 |     """load binary data
 92 | 
 93 |     Args:
 94 |         path (Path): path to binary file
 95 | 
 96 |     Returns:
 97 |         Any: object stored in the file
 98 |     """
 99 |     data = joblib.load(path)
100 |     logger.info(f"binary file loaded from: {path}")
101 |     return data
102 | 
103 | @ensure_annotations
104 | def get_size(path: Path) -> str:
105 |     """get size in KB
106 | 
107 |     Args:
108 |         path (Path): path of the file
109 | 
110 |     Returns:
111 |         str: size in KB
112 |     """
113 |     size_in_kb = round(os.path.getsize(path)/1024)
114 |     return f"~ {size_in_kb} KB"


--------------------------------------------------------------------------------
/template.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from pathlib import Path
 3 | import logging
 4 | 
 5 | logging.basicConfig(level=logging.INFO, format='[%(asctime)s]: %(message)s: ')
 6 | 
 7 | package_name = "deepClassifier"
 8 | 
 9 | list_of_files = [
10 |    ".github/workflows/.gitkeep",
11 |    f"src/{package_name}/__init__.py", 
12 |    f"src/{package_name}/components/__init__.py", 
13 |    f"src/{package_name}/utils/__init__.py", 
14 |    f"src/{package_name}/config/__init__.py", 
15 |    f"src/{package_name}/pipeline/__init__.py", 
16 |    f"src/{package_name}/entity/__init__.py", 
17 |    f"src/{package_name}/constants/__init__.py",
18 |    "tests/__init__.py",
19 |    "tests/unit/__init__.py",
20 |    "tests/integration/__init__.py",
21 |    "configs/config.yaml",
22 |    "dvc.yaml",
23 |    "params.yaml",
24 |    "init_setup.sh",
25 |    "requirements.txt", 
26 |    "requirements_dev.txt",
27 |    "setup.py",
28 |    "setup.cfg",
29 |    "pyproject.toml",
30 |    "tox.ini",
31 |    "research/trials.ipynb", 
32 | ]
33 | 
34 | for filepath in list_of_files:
35 |     filepath = Path(filepath)
36 |     filedir, filename = os.path.split(filepath)
37 |     if filedir != "":
38 |         os.makedirs(filedir, exist_ok=True)
39 |         logging.info(f"Creating directory: {filedir} for file: {filename}")
40 | 
41 |     if (not os.path.exists(filepath)) or (os.path.getsize(filepath) == 0):
42 |         with open(filepath, "w") as f:
43 |             pass # create an empty file
44 |             logging.info(f"Creating empty file: {filepath}")
45 | 
46 |     else:
47 |         logging.info(f"{filename} already exists")


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/c17hawke/FSDS_NOV_deepCNNClassifier/e54341d242a39256c54a90acdead18bc53031677/tests/__init__.py


--------------------------------------------------------------------------------
/tests/data/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/c17hawke/FSDS_NOV_deepCNNClassifier/e54341d242a39256c54a90acdead18bc53031677/tests/data/.gitkeep


--------------------------------------------------------------------------------
/tests/data/demo.yaml:
--------------------------------------------------------------------------------
1 | key: value


--------------------------------------------------------------------------------
/tests/data/empty.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/c17hawke/FSDS_NOV_deepCNNClassifier/e54341d242a39256c54a90acdead18bc53031677/tests/data/empty.yaml


--------------------------------------------------------------------------------
/tests/data/sample_data.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/c17hawke/FSDS_NOV_deepCNNClassifier/e54341d242a39256c54a90acdead18bc53031677/tests/data/sample_data.zip


--------------------------------------------------------------------------------
/tests/integration/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/c17hawke/FSDS_NOV_deepCNNClassifier/e54341d242a39256c54a90acdead18bc53031677/tests/integration/__init__.py


--------------------------------------------------------------------------------
/tests/integration/test_data_ingestion.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from deepClassifier.entity import DataIngestionConfig
 3 | from deepClassifier.components import DataIngestion
 4 | from pathlib import Path
 5 | import os
 6 | 
 7 | class Test_DataIngestion:
 8 |     data_ingestion_config = DataIngestionConfig(
 9 |         root_dir="tests/data/", 
10 |         source_URL="https://raw.githubusercontent.com/c17hawke/raw_data/main/sample_data.zip", 
11 |         local_data_file="tests/data/data_integration.zip", 
12 |         unzip_dir="tests/data/")
13 | 
14 |     def test_download(self):
15 |         data_ingestion = DataIngestion(config=self.data_ingestion_config)
16 |         data_ingestion.download_file()
17 |         assert os.path.exists(self.data_ingestion_config.local_data_file)
18 | 
19 |     def test_unzip(self):
20 |         data_ingestion = DataIngestion(config=self.data_ingestion_config)
21 |         data_ingestion.unzip_and_clean()
22 |         assert os.path.isdir(Path("tests/data/PetImages"))
23 |         assert os.path.isdir(Path("tests/data/PetImages/Cat"))
24 |         assert os.path.isdir(Path("tests/data/PetImages/Dog"))
25 | 
26 | 
27 | 
28 |         
29 | 


--------------------------------------------------------------------------------
/tests/integration/test_int.py:
--------------------------------------------------------------------------------
1 | def test_dummy():
2 |     assert True


--------------------------------------------------------------------------------
/tests/unit/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/c17hawke/FSDS_NOV_deepCNNClassifier/e54341d242a39256c54a90acdead18bc53031677/tests/unit/__init__.py


--------------------------------------------------------------------------------
/tests/unit/test_data_ingestion.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from deepClassifier.entity import DataIngestionConfig
 3 | from deepClassifier.components import DataIngestion
 4 | from pathlib import Path
 5 | import os
 6 | 
 7 | class Test_DataIngestion_download:
 8 |     data_ingestion_config = DataIngestionConfig(
 9 |         root_dir="tests/data/", 
10 |         source_URL="https://raw.githubusercontent.com/c17hawke/raw_data/main/sample_data.zip", 
11 |         local_data_file="tests/data/data.zip", 
12 |         unzip_dir="tests/data/")
13 | 
14 |     def test_download(self):
15 |         data_ingestion = DataIngestion(config=self.data_ingestion_config)
16 |         data_ingestion.download_file()
17 |         assert os.path.exists(self.data_ingestion_config.local_data_file)
18 | 
19 | 
20 | class Test_DataIngestion_unzip:
21 |     data_ingestion_config = DataIngestionConfig(
22 |         root_dir="tests/data/", 
23 |         source_URL="", 
24 |         local_data_file="tests/data/sample_data.zip", 
25 |         unzip_dir="tests/data/")
26 | 
27 |     def test_unzip(self):
28 |         data_ingestion = DataIngestion(config=self.data_ingestion_config)
29 |         data_ingestion.unzip_and_clean()
30 |         assert os.path.isdir(Path("tests/data/PetImages"))
31 |         assert os.path.isdir(Path("tests/data/PetImages/Cat"))
32 |         assert os.path.isdir(Path("tests/data/PetImages/Dog"))
33 | 
34 |         
35 | 


--------------------------------------------------------------------------------
/tests/unit/test_utils.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from deepClassifier.utils import read_yaml
 3 | from pathlib import Path
 4 | from box import ConfigBox
 5 | from ensure.main import EnsureError
 6 | 
 7 | 
 8 | class Test_read_yaml:
 9 |     yaml_files = [
10 |         "tests/data/empty.yaml",
11 |         "tests/data/demo.yaml"
12 |     ]
13 | 
14 |     def test_read_yaml_empty(self):
15 |         with pytest.raises(ValueError):
16 |             read_yaml(Path(self.yaml_files[0]))
17 | 
18 |     def test_read_yaml_return_type(self):
19 |         respone = read_yaml(Path(self.yaml_files[-1]))
20 |         assert isinstance(respone, ConfigBox)
21 | 
22 |     @pytest.mark.parametrize("path_to_yaml", yaml_files)
23 |     def test_read_yaml_bad_type(self, path_to_yaml):
24 |         with pytest.raises(EnsureError):
25 |             read_yaml(path_to_yaml)
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist = python3.8
 3 | 
 4 | [gh-actions]
 5 | python = 
 6 |     3.8: python3.8
 7 | 
 8 | [testenv]
 9 | deps = -rrequirements_dev.txt
10 | commands = 
11 |     # stop the build if there are Python syntax errors or undefined names
12 |     flake8 src --count --select=E9,F63,F7,F82 --show-source --statistics
13 |     # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
14 |     flake8 src --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
15 |     # type linting
16 |     mypy src/
17 |     # pytest unit
18 |     pytest -v tests/unit
19 |     # pytest integration
20 |     pytest -v tests/integration


--------------------------------------------------------------------------------