├── .dvc ├── .gitignore └── config ├── .dvcignore ├── .github └── workflows │ ├── .gitkeep │ └── ci.yml ├── .gitignore ├── LICENSE ├── README.md ├── artifacts ├── .gitignore └── data_ingestion │ └── .gitignore ├── configs └── config.yaml ├── docs └── images │ ├── .gitkeep │ └── Data Ingestion@2x (1).png ├── dvc.lock ├── dvc.yaml ├── init_setup.sh ├── params.yaml ├── prediction_service ├── Dockerfile ├── app.py └── requirements.txt ├── pyproject.toml ├── requirements.txt ├── requirements_dev.txt ├── research ├── mlflow_dir │ ├── argv_ex.py │ └── example.py ├── st_01.ipynb ├── st_02.ipynb ├── st_03.ipynb ├── st_04.ipynb ├── st_05_trails.ipynb ├── trials.ipynb └── trials.ipynbexample.py ├── scores.json ├── setup.cfg ├── setup.py ├── src └── deepClassifier │ ├── __init__.py │ ├── components │ ├── __init__.py │ ├── data_ingestion.py │ ├── evaluation.py │ ├── prepare_base_model.py │ ├── prepare_callback.py │ └── training.py │ ├── config │ ├── __init__.py │ └── configuration.py │ ├── constants │ └── __init__.py │ ├── entity │ ├── __init__.py │ └── config_entity.py │ ├── pipeline │ ├── __init__.py │ ├── stage_01_data_ingestion.py │ ├── stage_02_prepare_base_model.py │ ├── stage_03_training.py │ └── stage_04_evaluation.py │ └── utils │ ├── __init__.py │ └── common.py ├── template.py ├── tests ├── __init__.py ├── data │ ├── .gitkeep │ ├── demo.yaml │ ├── empty.yaml │ └── sample_data.zip ├── integration │ ├── __init__.py │ ├── test_data_ingestion.py │ └── test_int.py └── unit │ ├── __init__.py │ ├── test_data_ingestion.py │ └── test_utils.py └── tox.ini /.dvc/.gitignore: -------------------------------------------------------------------------------- 1 | /config.local 2 | /tmp 3 | /cache 4 | -------------------------------------------------------------------------------- /.dvc/config: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c17hawke/FSDS_NOV_deepCNNClassifier/e54341d242a39256c54a90acdead18bc53031677/.dvc/config -------------------------------------------------------------------------------- /.dvcignore: -------------------------------------------------------------------------------- 1 | # Add patterns of files dvc should ignore, which could improve 2 | # the performance. Learn more at 3 | # https://dvc.org/doc/user-guide/dvcignore 4 | -------------------------------------------------------------------------------- /.github/workflows/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c17hawke/FSDS_NOV_deepCNNClassifier/e54341d242a39256c54a90acdead18bc53031677/.github/workflows/.gitkeep -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CV application 2 | 3 | on: 4 | push: 5 | branches: [ "main" ] 6 | pull_request: 7 | branches: [ "main" ] 8 | 9 | permissions: 10 | contents: read 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ${{ matrix.os }} 16 | strategy: 17 | matrix: 18 | os: [ubuntu-latest, windows-latest] 19 | python-version: ["3.8"] 20 | 21 | steps: 22 | - uses: actions/checkout@v3 23 | - name: Set up Python ${{ matrix.python-version }} 24 | uses: actions/setup-python@v3 25 | with: 26 | python-version: ${{ matrix.python-version }} 27 | - name: Install dependencies 28 | run: | 29 | python -m pip install --upgrade pip 30 | pip install flake8 pytest tox tox-gh-actions 31 | pip install -r requirements.txt 32 | - name: Test with tox 33 | run: tox 34 | 35 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | 132 | # vscode 133 | .vscode/ 134 | *.jpg 135 | *.zip 136 | artifacts/prepare_base_model/base_model.h5 137 | artifacts/prepare_base_model/base_model_updated.h5 138 | artifacts/prepare_callbacks/checkpoint_dir/model.h5 139 | artifacts/prepare_callbacks/tensorboard_log_dir/tb_logs_at_2022-09-18-12-21-34/train/events.out.tfevents.1663483897.C17HAWKE.5048.0.v2 140 | artifacts/prepare_callbacks/tensorboard_log_dir/tb_logs_at_2022-09-18-12-21-34/validation/events.out.tfevents.1663484259.C17HAWKE.5048.1.v2 141 | artifacts/training/model.h5 142 | artifacts/prepare_callbacks/tensorboard_log_dir/tb_logs_at_2022-09-18-12-59-28/train/events.out.tfevents.1663486170.C17HAWKE.28200.0.v2 143 | artifacts/prepare_callbacks/tensorboard_log_dir/tb_logs_at_2022-09-24-11-00-35/train/events.out.tfevents.1663997438.C17HAWKE.2228.0.v2 144 | artifacts/prepare_callbacks/tensorboard_log_dir/tb_logs_at_2022-09-24-11-00-35/validation/events.out.tfevents.1663997774.C17HAWKE.2228.1.v2 145 | mlruns/* 146 | research/mlflow_dir/mlruns/* 147 | prediction_service/model.h5 -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 SUNNY BHAVEEN CHANDRA 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # deep Classifier project 2 | 3 | ## workflow 4 | 5 | 1. Update config.yaml 6 | 2. Update secrets.yaml [Optional] 7 | 3. Update params.yaml 8 | 4. Update the entity 9 | 5. Update the configuration manager in src config. 10 | 6. Update the components 11 | 7. Update the pipeline 12 | 8. Test run pipeline stage 13 | 9. run tox for testing your package 14 | 10. Update the dvc.yaml 15 | 11. run "dvc repro" for running all the stages in pipeline 16 | 17 | ![img](https://raw.githubusercontent.com/c17hawke/FSDS_NOV_deepCNNClassifier/main/docs/images/Data%20Ingestion%402x%20(1).png) 18 | 19 | 20 | STEP 1: Set the env variable | Get it from dagshub -> remote tab -> mlflow tab 21 | 22 | MLFLOW_TRACKING_URI=https://dagshub.com/c17hawke/FSDS_NOV_deepCNNClassifier.mlflow \ 23 | MLFLOW_TRACKING_USERNAME=c17hawke \ 24 | MLFLOW_TRACKING_PASSWORD=<> \ 25 | 26 | STEP 2: install mlflow 27 | 28 | STEP 3: Set remote URI 29 | 30 | STEP 4: Use context manager of mlflow to start run and then log metrics, params and model 31 | 32 | 33 | ## Sample data for testing- 34 | https://raw.githubusercontent.com/c17hawke/raw_data/main/sample_data.zip -------------------------------------------------------------------------------- /artifacts/.gitignore: -------------------------------------------------------------------------------- 1 | /prepare_base_model 2 | -------------------------------------------------------------------------------- /artifacts/data_ingestion/.gitignore: -------------------------------------------------------------------------------- 1 | /PetImages 2 | -------------------------------------------------------------------------------- /configs/config.yaml: -------------------------------------------------------------------------------- 1 | artifacts_root: artifacts 2 | 3 | 4 | data_ingestion: 5 | root_dir: artifacts/data_ingestion 6 | source_URL: https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_5340.zip 7 | local_data_file: artifacts/data_ingestion/data.zip 8 | unzip_dir: artifacts/data_ingestion 9 | 10 | prepare_base_model: 11 | root_dir: artifacts/prepare_base_model 12 | base_model_path: artifacts/prepare_base_model/base_model.h5 13 | updated_base_model_path: artifacts/prepare_base_model/base_model_updated.h5 14 | 15 | 16 | prepare_callbacks: 17 | root_dir: artifacts/prepare_callbacks 18 | tensorboard_root_log_dir: artifacts/prepare_callbacks/tensorboard_log_dir 19 | checkpoint_model_filepath: artifacts/prepare_callbacks/checkpoint_dir/model.h5 20 | 21 | training: 22 | root_dir: artifacts/training 23 | trained_model_path: artifacts/training/model.h5 24 | -------------------------------------------------------------------------------- /docs/images/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c17hawke/FSDS_NOV_deepCNNClassifier/e54341d242a39256c54a90acdead18bc53031677/docs/images/.gitkeep -------------------------------------------------------------------------------- /docs/images/Data Ingestion@2x (1).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c17hawke/FSDS_NOV_deepCNNClassifier/e54341d242a39256c54a90acdead18bc53031677/docs/images/Data Ingestion@2x (1).png -------------------------------------------------------------------------------- /dvc.lock: -------------------------------------------------------------------------------- 1 | schema: '2.0' 2 | stages: 3 | data_ingestion: 4 | cmd: python src/deepClassifier/pipeline/stage_01_data_ingestion.py 5 | deps: 6 | - path: configs/config.yaml 7 | md5: 11cc5642ba2725648da24e6823250f7d 8 | size: 831 9 | - path: src/deepClassifier/pipeline/stage_01_data_ingestion.py 10 | md5: f172a70cf599c77bbef01c290e2c9f6c 11 | size: 717 12 | outs: 13 | - path: artifacts/data_ingestion/PetImages 14 | md5: 3a5cf86541f22c236620ce5b952ad684.dir 15 | size: 848866410 16 | nfiles: 24998 17 | prepare_base_model: 18 | cmd: python src/deepClassifier/pipeline/stage_02_prepare_base_model.py 19 | deps: 20 | - path: configs/config.yaml 21 | md5: 11cc5642ba2725648da24e6823250f7d 22 | size: 831 23 | - path: src/deepClassifier/pipeline/stage_02_prepare_base_model.py 24 | md5: 2d3e3ea4e6b3b70a34bb9e158e24c2ea 25 | size: 793 26 | params: 27 | params.yaml: 28 | CLASSES: 2 29 | IMAGE_SIZE: 30 | - 224 31 | - 224 32 | - 3 33 | INCLUDE_TOP: false 34 | LEARNING_RATE: 0.01 35 | WEIGHTS: imagenet 36 | outs: 37 | - path: artifacts/prepare_base_model 38 | md5: ab6c456706dce2780076b6ff43c99bcd.dir 39 | size: 118053368 40 | nfiles: 2 41 | training: 42 | cmd: python src/deepClassifier/pipeline/stage_03_training.py 43 | deps: 44 | - path: artifacts/data_ingestion/PetImages 45 | md5: 3a5cf86541f22c236620ce5b952ad684.dir 46 | size: 848866410 47 | nfiles: 24998 48 | - path: artifacts/prepare_base_model 49 | md5: ab6c456706dce2780076b6ff43c99bcd.dir 50 | size: 118053368 51 | nfiles: 2 52 | - path: configs/config.yaml 53 | md5: 11cc5642ba2725648da24e6823250f7d 54 | size: 831 55 | - path: src/deepClassifier/components/prepare_callback.py 56 | md5: b27788e9a2ef6b98bb6c03d9cd76ee48 57 | size: 916 58 | - path: src/deepClassifier/pipeline/stage_03_training.py 59 | md5: b544965e110cbf58357a4947c0120426 60 | size: 1005 61 | params: 62 | params.yaml: 63 | AUGMENTATION: true 64 | BATCH_SIZE: 16 65 | EPOCHS: 1 66 | IMAGE_SIZE: 67 | - 224 68 | - 224 69 | - 3 70 | outs: 71 | - path: artifacts/training/model.h5 72 | md5: 2b8c5b8d8c3c27a11cb57775fc840236 73 | size: 59135136 74 | evaluation: 75 | cmd: python src/deepClassifier/pipeline/stage_04_evaluation.py 76 | deps: 77 | - path: artifacts/data_ingestion/PetImages 78 | md5: 3a5cf86541f22c236620ce5b952ad684.dir 79 | size: 848866410 80 | nfiles: 24998 81 | - path: artifacts/training/model.h5 82 | md5: 2b8c5b8d8c3c27a11cb57775fc840236 83 | size: 59135136 84 | - path: configs/config.yaml 85 | md5: 11cc5642ba2725648da24e6823250f7d 86 | size: 831 87 | - path: src/deepClassifier/pipeline/stage_04_evaluation.py 88 | md5: 16274682083d12ad866d6e96b19cc5af 89 | size: 699 90 | params: 91 | params.yaml: 92 | BATCH_SIZE: 16 93 | IMAGE_SIZE: 94 | - 224 95 | - 224 96 | - 3 97 | outs: 98 | - path: scores.json 99 | md5: 2a2199d913c0e929d1f22b2c09aa8bb6 100 | size: 73 101 | -------------------------------------------------------------------------------- /dvc.yaml: -------------------------------------------------------------------------------- 1 | stages: 2 | data_ingestion: 3 | cmd: python src/deepClassifier/pipeline/stage_01_data_ingestion.py 4 | deps: 5 | - src/deepClassifier/pipeline/stage_01_data_ingestion.py 6 | - configs/config.yaml 7 | outs: 8 | - artifacts/data_ingestion/PetImages 9 | 10 | prepare_base_model: 11 | cmd: python src/deepClassifier/pipeline/stage_02_prepare_base_model.py 12 | deps: 13 | - src/deepClassifier/pipeline/stage_02_prepare_base_model.py 14 | - configs/config.yaml 15 | params: 16 | - IMAGE_SIZE 17 | - INCLUDE_TOP 18 | - CLASSES 19 | - WEIGHTS 20 | - LEARNING_RATE 21 | outs: 22 | - artifacts/prepare_base_model 23 | 24 | 25 | training: 26 | cmd: python src/deepClassifier/pipeline/stage_03_training.py 27 | deps: 28 | - src/deepClassifier/pipeline/stage_03_training.py 29 | - src/deepClassifier/components/prepare_callback.py 30 | - configs/config.yaml 31 | - artifacts/data_ingestion/PetImages 32 | - artifacts/prepare_base_model 33 | params: 34 | - IMAGE_SIZE 35 | - EPOCHS 36 | - BATCH_SIZE 37 | - AUGMENTATION 38 | outs: 39 | - artifacts/training/model.h5 40 | 41 | evaluation: 42 | cmd: python src/deepClassifier/pipeline/stage_04_evaluation.py 43 | deps: 44 | - src/deepClassifier/pipeline/stage_04_evaluation.py 45 | - configs/config.yaml 46 | - artifacts/data_ingestion/PetImages 47 | - artifacts/training/model.h5 48 | params: 49 | - IMAGE_SIZE 50 | - BATCH_SIZE 51 | metrics: 52 | - scores.json: 53 | cache: false -------------------------------------------------------------------------------- /init_setup.sh: -------------------------------------------------------------------------------- 1 | echo [$(date)]: "START" 2 | echo [$(date)]: "creating env with python 3.8 version" 3 | conda create --prefix ./env python=3.8 -y 4 | echo [$(date)]: "activating the environment" 5 | source activate ./env 6 | echo [$(date)]: "installing the dev requirements" 7 | pip install -r requirements_dev.txt 8 | echo [$(date)]: "END" -------------------------------------------------------------------------------- /params.yaml: -------------------------------------------------------------------------------- 1 | AUGMENTATION: True 2 | IMAGE_SIZE: [224, 224, 3] # as per VGG 16 model 3 | BATCH_SIZE: 16 4 | INCLUDE_TOP: False 5 | EPOCHS: 1 6 | CLASSES: 2 7 | WEIGHTS: imagenet 8 | LEARNING_RATE: 0.01 9 | -------------------------------------------------------------------------------- /prediction_service/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8-slim 2 | WORKDIR /app 3 | COPY . . 4 | RUN pip install -r requirements.txt 5 | CMD ["streamlit", "run", "app.py"] -------------------------------------------------------------------------------- /prediction_service/app.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from PIL import Image 3 | import tensorflow as tf 4 | import numpy as np 5 | """ 6 | # deep Classifier project 7 | 8 | """ 9 | model = tf.keras.models.load_model("model.h5") 10 | uploaded_file = st.file_uploader("Choose a file") 11 | if uploaded_file is not None: 12 | # To read file as bytes: 13 | 14 | image = Image.open(uploaded_file) 15 | img = image.resize((224,224)) 16 | img_array = np.array(img) 17 | img_array = np.expand_dims(img_array, axis=0) # [batch_size, row, col, channel] 18 | result = model.predict(img_array) # [[0.99, 0.01], [0.99, 0.01]] 19 | 20 | argmax_index = np.argmax(result, axis=1) # [0, 0] 21 | if argmax_index[0] == 0: 22 | st.image(image, caption="predicted: cat") 23 | else: 24 | st.image(image, caption='predicted: dog') 25 | 26 | -------------------------------------------------------------------------------- /prediction_service/requirements.txt: -------------------------------------------------------------------------------- 1 | streamlit 2 | tensorflow -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ['setuptools>=42.0', "wheel"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [tool.pytest.ini_options] 6 | testpaths = [ 7 | "tests" 8 | ] 9 | 10 | [tool.mypy] 11 | mypy_path = "src" 12 | ignore_missing_imports = true -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | tensorflow 2 | dvc 3 | pandas 4 | notebook 5 | numpy 6 | matplotlib 7 | seaborn 8 | python-box==6.0.2 9 | pyYAML 10 | tqdm 11 | ensure==1.0.2 12 | joblib 13 | types-PyYAML 14 | scipy 15 | mlflow==1.26.1 16 | -e . 17 | -------------------------------------------------------------------------------- /requirements_dev.txt: -------------------------------------------------------------------------------- 1 | tensorflow 2 | dvc 3 | pandas 4 | notebook 5 | numpy 6 | matplotlib 7 | seaborn 8 | python-box==6.0.2 9 | pyYAML 10 | tqdm 11 | ensure==1.0.2 12 | joblib 13 | types-PyYAML 14 | scipy 15 | mlflow==1.26.1 16 | -e . 17 | 18 | ## Dev requirements 19 | pytest==7.1.3 20 | tox==3.25.1 21 | black==22.8.0 22 | flake8==5.0.4 23 | mypy==0.971 -------------------------------------------------------------------------------- /research/mlflow_dir/argv_ex.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | default = 0.3 4 | 5 | alpha = float(sys.argv[1]) if len(sys.argv) > 1 else default 6 | l1_ratio = float(sys.argv[2]) if len(sys.argv) > 2 else default 7 | 8 | args = sys.argv 9 | print(args) 10 | print(alpha, l1_ratio) 11 | 12 | # python filename.py 0.6 0.7 -------------------------------------------------------------------------------- /research/mlflow_dir/example.py: -------------------------------------------------------------------------------- 1 | # The data set used in this example is from http://archive.ics.uci.edu/ml/datasets/Wine+Quality 2 | # P. Cortez, A. Cerdeira, F. Almeida, T. Matos and J. Reis. 3 | # Modeling wine preferences by data mining from physicochemical properties. In Decision Support Systems, Elsevier, 47(4):547-553, 2009. 4 | 5 | import os 6 | import warnings 7 | import sys 8 | 9 | import pandas as pd 10 | import numpy as np 11 | from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score 12 | from sklearn.model_selection import train_test_split 13 | from sklearn.linear_model import ElasticNet 14 | from urllib.parse import urlparse 15 | import mlflow 16 | import mlflow.sklearn 17 | 18 | import logging 19 | 20 | logging.basicConfig(level=logging.WARN) 21 | logger = logging.getLogger(__name__) 22 | 23 | 24 | def eval_metrics(actual, pred): 25 | rmse = np.sqrt(mean_squared_error(actual, pred)) 26 | mae = mean_absolute_error(actual, pred) 27 | r2 = r2_score(actual, pred) 28 | return rmse, mae, r2 29 | 30 | 31 | if __name__ == "__main__": 32 | warnings.filterwarnings("ignore") 33 | np.random.seed(40) 34 | 35 | # Read the wine-quality csv file from the URL 36 | csv_url = ( 37 | "http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv" 38 | ) 39 | try: 40 | data = pd.read_csv(csv_url, sep=";") 41 | except Exception as e: 42 | logger.exception( 43 | "Unable to download training & test CSV, check your internet connection. Error: %s", e 44 | ) 45 | 46 | # Split the data into training and test sets. (0.75, 0.25) split. 47 | train, test = train_test_split(data) 48 | 49 | # The predicted column is "quality" which is a scalar from [3, 9] 50 | train_x = train.drop(["quality"], axis=1) 51 | test_x = test.drop(["quality"], axis=1) 52 | train_y = train[["quality"]] 53 | test_y = test[["quality"]] 54 | 55 | alpha = float(sys.argv[1]) if len(sys.argv) > 1 else 0.5 56 | l1_ratio = float(sys.argv[2]) if len(sys.argv) > 2 else 0.5 57 | 58 | with mlflow.start_run(): 59 | lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42) 60 | lr.fit(train_x, train_y) 61 | 62 | predicted_qualities = lr.predict(test_x) 63 | 64 | (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities) 65 | 66 | print("Elasticnet model (alpha=%f, l1_ratio=%f):" % (alpha, l1_ratio)) 67 | print(" RMSE: %s" % rmse) 68 | print(" MAE: %s" % mae) 69 | print(" R2: %s" % r2) 70 | 71 | mlflow.log_param("alpha", alpha) 72 | mlflow.log_param("l1_ratio", l1_ratio) 73 | mlflow.log_metric("rmse", rmse) 74 | mlflow.log_metric("r2", r2) 75 | mlflow.log_metric("mae", mae) 76 | 77 | remote_server_uri = "https://dagshub.com/c17hawke/FSDS_NOV_deepCNNClassifier.mlflow" 78 | mlflow.set_tracking_uri(remote_server_uri) 79 | tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme 80 | 81 | # Model registry does not work with file store 82 | if tracking_url_type_store != "file": 83 | 84 | # Register the model 85 | # There are other ways to use the Model Registry, which depends on the use case, 86 | # please refer to the doc for more information: 87 | # https://mlflow.org/docs/latest/model-registry.html#api-workflow 88 | mlflow.sklearn.log_model(lr, "model", registered_model_name="ElasticnetWineModel") 89 | else: 90 | mlflow.sklearn.log_model(lr, "model") -------------------------------------------------------------------------------- /research/st_01.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from collections import namedtuple\n", 10 | "import os\n", 11 | "os.chdir(\"../\")" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "DataIngestionConfig = namedtuple(\"DataIngestionConfig\", [\n", 21 | " \"root_dir\",\n", 22 | " \"source_URL\",\n", 23 | " \"local_data_file\",\n", 24 | " \"unzip_dir\"\n", 25 | "])" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 3, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "from dataclasses import dataclass\n", 35 | "from pathlib import Path\n", 36 | "\n", 37 | "\n", 38 | "@dataclass(frozen=True)\n", 39 | "class DataIngestionConfig:\n", 40 | " root_dir: Path\n", 41 | " source_URL: str\n", 42 | " local_data_file: Path\n", 43 | " unzip_dir: Path" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 4, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "from deepClassifier.constants import *\n", 53 | "from deepClassifier.utils import read_yaml, create_directories" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 5, 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "class ConfigurationManager:\n", 63 | " def __init__(\n", 64 | " self, \n", 65 | " config_filepath = CONFIG_FILE_PATH,\n", 66 | " params_filepath = PARAMS_FILE_PATH):\n", 67 | " self.config = read_yaml(config_filepath)\n", 68 | " self.params = read_yaml(params_filepath)\n", 69 | " create_directories([self.config.artifacts_root])\n", 70 | "\n", 71 | " def get_data_ingestion_config(self) -> DataIngestionConfig:\n", 72 | " config = self.config.data_ingestion\n", 73 | " \n", 74 | " create_directories([config.root_dir])\n", 75 | "\n", 76 | " data_ingestion_config = DataIngestionConfig(\n", 77 | " root_dir=config.root_dir,\n", 78 | " source_URL=config.source_URL,\n", 79 | " local_data_file=config.local_data_file,\n", 80 | " unzip_dir=config.unzip_dir \n", 81 | " )\n", 82 | "\n", 83 | " return data_ingestion_config" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 6, 89 | "metadata": {}, 90 | "outputs": [], 91 | "source": [ 92 | "import os\n", 93 | "import urllib.request as request\n", 94 | "from zipfile import ZipFile\n", 95 | "\n", 96 | "class DataIngestion:\n", 97 | " def __init__(self, config: DataIngestionConfig):\n", 98 | " self.config = config\n", 99 | "\n", 100 | " def download_file(self):\n", 101 | " if not os.path.exists(self.config.local_data_file):\n", 102 | " filename, headers = request.urlretrieve(\n", 103 | " url = self.config.source_URL,\n", 104 | " filename = self.config.local_data_file\n", 105 | " )\n", 106 | "\n", 107 | " def _get_updated_list_of_files(self, list_of_files):\n", 108 | " return [f for f in list_of_files if f.endswith(\".jpg\") and (\"Cat\" in f or \"Dog\" in f)]\n", 109 | "\n", 110 | " def _preprocess(self, zf: ZipFile, f: str, working_dir: str):\n", 111 | " target_filepath = os.path.join(working_dir, f)\n", 112 | " if not os.path.exists(target_filepath):\n", 113 | " zf.extract(f, working_dir)\n", 114 | " \n", 115 | " if os.path.getsize(target_filepath) == 0:\n", 116 | " os.remove(target_filepath)\n", 117 | "\n", 118 | " def unzip_and_clean(self):\n", 119 | " with ZipFile(file=self.config.local_data_file, mode=\"r\") as zf:\n", 120 | " list_of_files = zf.namelist()\n", 121 | " updated_list_of_files = self._get_updated_list_of_files(list_of_files)\n", 122 | " for f in updated_list_of_files:\n", 123 | " self._preprocess(zf, f, self.config.unzip_dir)" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": 7, 129 | "metadata": {}, 130 | "outputs": [ 131 | { 132 | "name": "stdout", 133 | "output_type": "stream", 134 | "text": [ 135 | "[2022-09-17 10:34:33,486: INFO: common]: yaml file: configs\\config.yaml loaded successfully\n", 136 | "[2022-09-17 10:34:33,489: INFO: common]: yaml file: params.yaml loaded successfully\n", 137 | "[2022-09-17 10:34:33,491: INFO: common]: created directory at: artifacts\n", 138 | "[2022-09-17 10:34:33,493: INFO: common]: created directory at: artifacts/data_ingestion\n" 139 | ] 140 | } 141 | ], 142 | "source": [ 143 | "try:\n", 144 | " config = ConfigurationManager()\n", 145 | " data_ingestion_config = config.get_data_ingestion_config()\n", 146 | " data_ingestion = DataIngestion(config=data_ingestion_config)\n", 147 | " data_ingestion.download_file()\n", 148 | " data_ingestion.unzip_and_clean()\n", 149 | "except Exception as e:\n", 150 | " raise e" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": null, 156 | "metadata": {}, 157 | "outputs": [], 158 | "source": [] 159 | } 160 | ], 161 | "metadata": { 162 | "kernelspec": { 163 | "display_name": "Python 3.8.13 (conda)", 164 | "language": "python", 165 | "name": "python3" 166 | }, 167 | "language_info": { 168 | "codemirror_mode": { 169 | "name": "ipython", 170 | "version": 3 171 | }, 172 | "file_extension": ".py", 173 | "mimetype": "text/x-python", 174 | "name": "python", 175 | "nbconvert_exporter": "python", 176 | "pygments_lexer": "ipython3", 177 | "version": "3.8.13" 178 | }, 179 | "orig_nbformat": 4, 180 | "vscode": { 181 | "interpreter": { 182 | "hash": "37206e62ba8fc6a6f0b961435078c80557a69ac7a5dd6249b6601b5385f5de67" 183 | } 184 | } 185 | }, 186 | "nbformat": 4, 187 | "nbformat_minor": 2 188 | } 189 | -------------------------------------------------------------------------------- /research/st_02.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os\n", 10 | "os.chdir(\"../\")" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 16, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "from dataclasses import dataclass\n", 20 | "from pathlib import Path\n", 21 | "\n", 22 | "\n", 23 | "@dataclass(frozen=True)\n", 24 | "class PrepareBaseModelConfig:\n", 25 | " root_dir: Path\n", 26 | " base_model_path: Path\n", 27 | " updated_base_model_path: Path\n", 28 | " params_image_size: list\n", 29 | " params_learning_rate: float\n", 30 | " params_include_top: bool\n", 31 | " params_weights: str\n", 32 | " params_classes: int" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 17, 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "from deepClassifier.constants import *\n", 42 | "from deepClassifier.utils import read_yaml, create_directories" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 18, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "class ConfigurationManager:\n", 52 | " def __init__(\n", 53 | " self, \n", 54 | " config_filepath = CONFIG_FILE_PATH,\n", 55 | " params_filepath = PARAMS_FILE_PATH):\n", 56 | " self.config = read_yaml(config_filepath)\n", 57 | " self.params = read_yaml(params_filepath)\n", 58 | " create_directories([self.config.artifacts_root])\n", 59 | "\n", 60 | " def get_prepare_base_model_config(self) -> PrepareBaseModelConfig:\n", 61 | " config = self.config.prepare_base_model\n", 62 | " \n", 63 | " create_directories([config.root_dir])\n", 64 | "\n", 65 | " prepare_base_model_config = PrepareBaseModelConfig(\n", 66 | " root_dir=Path(config.root_dir),\n", 67 | " base_model_path=Path(config.base_model_path),\n", 68 | " updated_base_model_path=Path(config.updated_base_model_path),\n", 69 | " params_image_size=self.params.IMAGE_SIZE,\n", 70 | " params_learning_rate=self.params.LEARNING_RATE,\n", 71 | " params_include_top=self.params.INCLUDE_TOP,\n", 72 | " params_weights=self.params.WEIGHTS,\n", 73 | " params_classes=self.params.CLASSES\n", 74 | " )\n", 75 | "\n", 76 | " return prepare_base_model_config" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 23, 82 | "metadata": {}, 83 | "outputs": [], 84 | "source": [ 85 | "import os\n", 86 | "import urllib.request as request\n", 87 | "from zipfile import ZipFile\n", 88 | "import tensorflow as tf\n", 89 | "\n", 90 | "class PrepareBaseModel:\n", 91 | " def __init__(self, config: PrepareBaseModelConfig):\n", 92 | " self.config = config\n", 93 | "\n", 94 | " def get_base_model(self):\n", 95 | " self.model = tf.keras.applications.vgg16.VGG16(\n", 96 | " input_shape=self.config.params_image_size,\n", 97 | " weights=self.config.params_weights,\n", 98 | " include_top=self.config.params_include_top\n", 99 | " )\n", 100 | "\n", 101 | " self.save_model(path=self.config.base_model_path, model=self.model)\n", 102 | "\n", 103 | "\n", 104 | " @staticmethod\n", 105 | " def _prepare_full_model(model, classes, freeze_all, freeze_till, learning_rate):\n", 106 | " if freeze_all:\n", 107 | " for layer in model.layers:\n", 108 | " model.trainable = False\n", 109 | " elif (freeze_till is not None) and (freeze_till > 0):\n", 110 | " for layer in model.layers[:-freeze_till]:\n", 111 | " model.trainable = False\n", 112 | "\n", 113 | " flatten_in = tf.keras.layers.Flatten()(model.output)\n", 114 | " prediction = tf.keras.layers.Dense(\n", 115 | " units=classes,\n", 116 | " activation=\"softmax\"\n", 117 | " )(flatten_in)\n", 118 | "\n", 119 | " full_model = tf.keras.models.Model(\n", 120 | " inputs=model.input,\n", 121 | " outputs=prediction\n", 122 | " )\n", 123 | "\n", 124 | " full_model.compile(\n", 125 | " optimizer=tf.keras.optimizers.SGD(learning_rate=learning_rate),\n", 126 | " loss=tf.keras.losses.CategoricalCrossentropy(),\n", 127 | " metrics=[\"accuracy\"]\n", 128 | " )\n", 129 | "\n", 130 | " full_model.summary()\n", 131 | " return full_model\n", 132 | "\n", 133 | " def update_base_model(self):\n", 134 | " self.full_model = self._prepare_full_model(\n", 135 | " model=self.model,\n", 136 | " classes=self.config.params_classes,\n", 137 | " freeze_all=True,\n", 138 | " freeze_till=None,\n", 139 | " learning_rate=self.config.params_learning_rate\n", 140 | " )\n", 141 | "\n", 142 | " self.save_model(path=self.config.updated_base_model_path, model=self.full_model)\n", 143 | "\n", 144 | " @staticmethod\n", 145 | " def save_model(path: Path, model: tf.keras.Model):\n", 146 | " model.save(path)\n", 147 | "\n" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 24, 153 | "metadata": {}, 154 | "outputs": [ 155 | { 156 | "name": "stdout", 157 | "output_type": "stream", 158 | "text": [ 159 | "Model: \"model_1\"\n", 160 | "_________________________________________________________________\n", 161 | " Layer (type) Output Shape Param # \n", 162 | "=================================================================\n", 163 | " input_3 (InputLayer) [(None, 224, 224, 3)] 0 \n", 164 | " \n", 165 | " block1_conv1 (Conv2D) (None, 224, 224, 64) 1792 \n", 166 | " \n", 167 | " block1_conv2 (Conv2D) (None, 224, 224, 64) 36928 \n", 168 | " \n", 169 | " block1_pool (MaxPooling2D) (None, 112, 112, 64) 0 \n", 170 | " \n", 171 | " block2_conv1 (Conv2D) (None, 112, 112, 128) 73856 \n", 172 | " \n", 173 | " block2_conv2 (Conv2D) (None, 112, 112, 128) 147584 \n", 174 | " \n", 175 | " block2_pool (MaxPooling2D) (None, 56, 56, 128) 0 \n", 176 | " \n", 177 | " block3_conv1 (Conv2D) (None, 56, 56, 256) 295168 \n", 178 | " \n", 179 | " block3_conv2 (Conv2D) (None, 56, 56, 256) 590080 \n", 180 | " \n", 181 | " block3_conv3 (Conv2D) (None, 56, 56, 256) 590080 \n", 182 | " \n", 183 | " block3_pool (MaxPooling2D) (None, 28, 28, 256) 0 \n", 184 | " \n", 185 | " block4_conv1 (Conv2D) (None, 28, 28, 512) 1180160 \n", 186 | " \n", 187 | " block4_conv2 (Conv2D) (None, 28, 28, 512) 2359808 \n", 188 | " \n", 189 | " block4_conv3 (Conv2D) (None, 28, 28, 512) 2359808 \n", 190 | " \n", 191 | " block4_pool (MaxPooling2D) (None, 14, 14, 512) 0 \n", 192 | " \n", 193 | " block5_conv1 (Conv2D) (None, 14, 14, 512) 2359808 \n", 194 | " \n", 195 | " block5_conv2 (Conv2D) (None, 14, 14, 512) 2359808 \n", 196 | " \n", 197 | " block5_conv3 (Conv2D) (None, 14, 14, 512) 2359808 \n", 198 | " \n", 199 | " block5_pool (MaxPooling2D) (None, 7, 7, 512) 0 \n", 200 | " \n", 201 | " flatten_2 (Flatten) (None, 25088) 0 \n", 202 | " \n", 203 | " dense_2 (Dense) (None, 2) 50178 \n", 204 | " \n", 205 | "=================================================================\n", 206 | "Total params: 14,764,866\n", 207 | "Trainable params: 50,178\n", 208 | "Non-trainable params: 14,714,688\n", 209 | "_________________________________________________________________\n" 210 | ] 211 | } 212 | ], 213 | "source": [ 214 | "try:\n", 215 | " config = ConfigurationManager()\n", 216 | " prepare_base_model_config = config.get_prepare_base_model_config()\n", 217 | " prepare_base_model = PrepareBaseModel(config=prepare_base_model_config)\n", 218 | " prepare_base_model.get_base_model()\n", 219 | " prepare_base_model.update_base_model()\n", 220 | "except Exception as e:\n", 221 | " raise e" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": 15, 227 | "metadata": {}, 228 | "outputs": [ 229 | { 230 | "data": { 231 | "text/plain": [ 232 | "4" 233 | ] 234 | }, 235 | "execution_count": 15, 236 | "metadata": {}, 237 | "output_type": "execute_result" 238 | } 239 | ], 240 | "source": [ 241 | "class Power:\n", 242 | " def __init__(self,x):\n", 243 | " self.x = x\n", 244 | "\n", 245 | " def __call__(self, factor):\n", 246 | " return self.x**factor\n", 247 | "\n", 248 | " def __call__(self, factor):\n", 249 | " return self.x\n", 250 | "\n", 251 | " def square(self):\n", 252 | " return self.x**2\n", 253 | "\n", 254 | "\n", 255 | "result = Power(4)(3)\n", 256 | "result" 257 | ] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "execution_count": 14, 262 | "metadata": {}, 263 | "outputs": [ 264 | { 265 | "data": { 266 | "text/plain": [ 267 | "64" 268 | ] 269 | }, 270 | "execution_count": 14, 271 | "metadata": {}, 272 | "output_type": "execute_result" 273 | } 274 | ], 275 | "source": [ 276 | "result = Power(4)\n", 277 | "result(3)" 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": 13, 283 | "metadata": {}, 284 | "outputs": [ 285 | { 286 | "data": { 287 | "text/plain": [ 288 | "16" 289 | ] 290 | }, 291 | "execution_count": 13, 292 | "metadata": {}, 293 | "output_type": "execute_result" 294 | } 295 | ], 296 | "source": [ 297 | "\n", 298 | "result = Power(4)\n", 299 | "result.square()" 300 | ] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "execution_count": null, 305 | "metadata": {}, 306 | "outputs": [], 307 | "source": [] 308 | } 309 | ], 310 | "metadata": { 311 | "kernelspec": { 312 | "display_name": "Python 3.8.13", 313 | "language": "python", 314 | "name": "python3" 315 | }, 316 | "language_info": { 317 | "codemirror_mode": { 318 | "name": "ipython", 319 | "version": 3 320 | }, 321 | "file_extension": ".py", 322 | "mimetype": "text/x-python", 323 | "name": "python", 324 | "nbconvert_exporter": "python", 325 | "pygments_lexer": "ipython3", 326 | "version": "3.8.13" 327 | }, 328 | "orig_nbformat": 4, 329 | "vscode": { 330 | "interpreter": { 331 | "hash": "37206e62ba8fc6a6f0b961435078c80557a69ac7a5dd6249b6601b5385f5de67" 332 | } 333 | } 334 | }, 335 | "nbformat": 4, 336 | "nbformat_minor": 2 337 | } 338 | -------------------------------------------------------------------------------- /research/st_03.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os\n", 10 | "os.chdir(\"../\")" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "from dataclasses import dataclass\n", 20 | "from pathlib import Path\n", 21 | "\n", 22 | "\n", 23 | "@dataclass(frozen=True)\n", 24 | "class PrepareCallbacksConfig:\n", 25 | " root_dir: Path\n", 26 | " tensorboard_root_log_dir: Path\n", 27 | " checkpoint_model_filepath: Path" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 3, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "from deepClassifier.constants import *\n", 37 | "from deepClassifier.utils import read_yaml, create_directories" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 4, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "class ConfigurationManager:\n", 47 | " def __init__(\n", 48 | " self, \n", 49 | " config_filepath = CONFIG_FILE_PATH,\n", 50 | " params_filepath = PARAMS_FILE_PATH):\n", 51 | " self.config = read_yaml(config_filepath)\n", 52 | " self.params = read_yaml(params_filepath)\n", 53 | " create_directories([self.config.artifacts_root])\n", 54 | "\n", 55 | " def get_prepare_callback_config(self) -> PrepareCallbacksConfig:\n", 56 | " config = self.config.prepare_callbacks\n", 57 | " model_ckpt_dir = os.path.dirname(config.checkpoint_model_filepath)\n", 58 | " create_directories([\n", 59 | " Path(model_ckpt_dir),\n", 60 | " Path(config.tensorboard_root_log_dir)\n", 61 | " ])\n", 62 | "\n", 63 | " prepare_callback_config = PrepareCallbacksConfig(\n", 64 | " root_dir=Path(config.root_dir),\n", 65 | " tensorboard_root_log_dir=Path(config.tensorboard_root_log_dir),\n", 66 | " checkpoint_model_filepath=Path(config.checkpoint_model_filepath)\n", 67 | " )\n", 68 | "\n", 69 | " return prepare_callback_config" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 8, 75 | "metadata": {}, 76 | "outputs": [], 77 | "source": [ 78 | "import os\n", 79 | "import urllib.request as request\n", 80 | "from zipfile import ZipFile\n", 81 | "import tensorflow as tf\n", 82 | "import time\n", 83 | "\n", 84 | "class PrepareCallback:\n", 85 | " def __init__(self, config: PrepareCallbacksConfig):\n", 86 | " self.config = config\n", 87 | "\n", 88 | " @property\n", 89 | " def _create_tb_callbacks(self):\n", 90 | " timestamp = time.strftime(\"%Y-%m-%d-%H-%M-%S\")\n", 91 | " tb_running_log_dir = os.path.join(\n", 92 | " self.config.tensorboard_root_log_dir,\n", 93 | " f\"tb_logs_at_{timestamp}\",\n", 94 | " )\n", 95 | " return tf.keras.callbacks.TensorBoard(log_dir=tb_running_log_dir)\n", 96 | "\n", 97 | " @property\n", 98 | " def _create_ckpt_callbacks(self):\n", 99 | " return tf.keras.callbacks.ModelCheckpoint(\n", 100 | " filepath=self.config.checkpoint_model_filepath,\n", 101 | " save_best_only=True\n", 102 | " )\n", 103 | "\n", 104 | " def get_tb_ckpt_callbacks(self):\n", 105 | " return [\n", 106 | " self._create_tb_callbacks,\n", 107 | " self._create_ckpt_callbacks\n", 108 | " ]\n", 109 | "\n" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 9, 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [ 118 | "try:\n", 119 | " config = ConfigurationManager()\n", 120 | " prepare_callbacks_config = config.get_prepare_callback_config()\n", 121 | " prepare_callbacks = PrepareCallback(config=prepare_callbacks_config)\n", 122 | " callback_list = prepare_callbacks.get_tb_ckpt_callbacks()\n", 123 | " \n", 124 | "except Exception as e:\n", 125 | " raise e" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": null, 131 | "metadata": {}, 132 | "outputs": [ 133 | { 134 | "data": { 135 | "text/plain": [ 136 | "'x/y'" 137 | ] 138 | }, 139 | "execution_count": 7, 140 | "metadata": {}, 141 | "output_type": "execute_result" 142 | } 143 | ], 144 | "source": [ 145 | "import os\n", 146 | "os.path.dirname(\"x/y/z.txt\")" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": 7, 152 | "metadata": {}, 153 | "outputs": [ 154 | { 155 | "data": { 156 | "text/plain": [ 157 | "'tb_logs_at_2022-09-18-10-48-16'" 158 | ] 159 | }, 160 | "execution_count": 7, 161 | "metadata": {}, 162 | "output_type": "execute_result" 163 | } 164 | ], 165 | "source": [ 166 | "import time\n", 167 | "teimstamp = time.strftime(\"%Y-%m-%d-%H-%M-%S\")\n", 168 | "f\"tb_logs_at_{teimstamp}\"" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": null, 174 | "metadata": {}, 175 | "outputs": [], 176 | "source": [] 177 | } 178 | ], 179 | "metadata": { 180 | "kernelspec": { 181 | "display_name": "Python 3.8.13 64-bit", 182 | "language": "python", 183 | "name": "python3" 184 | }, 185 | "language_info": { 186 | "codemirror_mode": { 187 | "name": "ipython", 188 | "version": 3 189 | }, 190 | "file_extension": ".py", 191 | "mimetype": "text/x-python", 192 | "name": "python", 193 | "nbconvert_exporter": "python", 194 | "pygments_lexer": "ipython3", 195 | "version": "3.8.13" 196 | }, 197 | "orig_nbformat": 4, 198 | "vscode": { 199 | "interpreter": { 200 | "hash": "37206e62ba8fc6a6f0b961435078c80557a69ac7a5dd6249b6601b5385f5de67" 201 | } 202 | } 203 | }, 204 | "nbformat": 4, 205 | "nbformat_minor": 2 206 | } 207 | -------------------------------------------------------------------------------- /research/st_04.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os\n", 10 | "os.chdir(\"../\")" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "from dataclasses import dataclass\n", 20 | "from pathlib import Path\n", 21 | "\n", 22 | "\n", 23 | "@dataclass(frozen=True)\n", 24 | "class TrainingConfig:\n", 25 | " root_dir: Path\n", 26 | " trained_model_path: Path\n", 27 | " updated_base_model_path: Path\n", 28 | " training_data: Path\n", 29 | " params_epochs: int\n", 30 | " params_batch_size: int\n", 31 | " params_is_augmentation: bool\n", 32 | " params_image_size: list\n", 33 | "\n", 34 | "@dataclass(frozen=True)\n", 35 | "class PrepareCallbacksConfig:\n", 36 | " root_dir: Path\n", 37 | " tensorboard_root_log_dir: Path\n", 38 | " checkpoint_model_filepath: Path" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 3, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "from deepClassifier.constants import *\n", 48 | "from deepClassifier.utils import read_yaml, create_directories\n", 49 | "import tensorflow as tf" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 4, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "class ConfigurationManager:\n", 59 | " def __init__(\n", 60 | " self, \n", 61 | " config_filepath = CONFIG_FILE_PATH,\n", 62 | " params_filepath = PARAMS_FILE_PATH):\n", 63 | " self.config = read_yaml(config_filepath)\n", 64 | " self.params = read_yaml(params_filepath)\n", 65 | " create_directories([self.config.artifacts_root])\n", 66 | "\n", 67 | " def get_prepare_callback_config(self) -> PrepareCallbacksConfig:\n", 68 | " config = self.config.prepare_callbacks\n", 69 | " model_ckpt_dir = os.path.dirname(config.checkpoint_model_filepath)\n", 70 | " create_directories([\n", 71 | " Path(model_ckpt_dir),\n", 72 | " Path(config.tensorboard_root_log_dir)\n", 73 | " ])\n", 74 | "\n", 75 | " prepare_callback_config = PrepareCallbacksConfig(\n", 76 | " root_dir=Path(config.root_dir),\n", 77 | " tensorboard_root_log_dir=Path(config.tensorboard_root_log_dir),\n", 78 | " checkpoint_model_filepath=Path(config.checkpoint_model_filepath)\n", 79 | " )\n", 80 | "\n", 81 | " return prepare_callback_config\n", 82 | "\n", 83 | " def get_training_config(self) -> TrainingConfig:\n", 84 | " training = self.config.training\n", 85 | " prepare_base_model = self.config.prepare_base_model\n", 86 | " params = self.params\n", 87 | " training_data = os.path.join(self.config.data_ingestion.unzip_dir, \"PetImages\")\n", 88 | " create_directories([\n", 89 | " Path(training.root_dir)\n", 90 | " ])\n", 91 | "\n", 92 | " training_config = TrainingConfig(\n", 93 | " root_dir=Path(training.root_dir),\n", 94 | " trained_model_path=Path(training.trained_model_path),\n", 95 | " updated_base_model_path=Path(prepare_base_model.updated_base_model_path),\n", 96 | " training_data=Path(training_data),\n", 97 | " params_epochs=params.EPOCHS,\n", 98 | " params_batch_size=params.BATCH_SIZE,\n", 99 | " params_is_augmentation=params.AUGMENTATION,\n", 100 | " params_image_size=params.IMAGE_SIZE\n", 101 | " )\n", 102 | "\n", 103 | " return training_config" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": 5, 109 | "metadata": {}, 110 | "outputs": [], 111 | "source": [ 112 | "import time\n", 113 | "\n", 114 | "class PrepareCallback:\n", 115 | " def __init__(self, config: PrepareCallbacksConfig):\n", 116 | " self.config = config\n", 117 | "\n", 118 | " @property\n", 119 | " def _create_tb_callbacks(self):\n", 120 | " timestamp = time.strftime(\"%Y-%m-%d-%H-%M-%S\")\n", 121 | " tb_running_log_dir = os.path.join(\n", 122 | " self.config.tensorboard_root_log_dir,\n", 123 | " f\"tb_logs_at_{timestamp}\",\n", 124 | " )\n", 125 | " return tf.keras.callbacks.TensorBoard(log_dir=tb_running_log_dir)\n", 126 | "\n", 127 | " @property\n", 128 | " def _create_ckpt_callbacks(self):\n", 129 | " return tf.keras.callbacks.ModelCheckpoint(\n", 130 | " filepath=self.config.checkpoint_model_filepath,\n", 131 | " save_best_only=True\n", 132 | " )\n", 133 | "\n", 134 | " def get_tb_ckpt_callbacks(self):\n", 135 | " return [\n", 136 | " self._create_tb_callbacks,\n", 137 | " self._create_ckpt_callbacks\n", 138 | " ]\n", 139 | "\n" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": 6, 145 | "metadata": {}, 146 | "outputs": [], 147 | "source": [ 148 | "import os\n", 149 | "import urllib.request as request\n", 150 | "from zipfile import ZipFile\n", 151 | "import tensorflow as tf\n", 152 | "import time\n", 153 | "\n", 154 | "class Training:\n", 155 | " def __init__(self, config: TrainingConfig):\n", 156 | " self.config = config\n", 157 | "\n", 158 | " def get_base_model(self):\n", 159 | " self.model = tf.keras.models.load_model(\n", 160 | " self.config.updated_base_model_path\n", 161 | " )\n", 162 | "\n", 163 | " def train_valid_generator(self):\n", 164 | "\n", 165 | " datagenerator_kwargs = dict(\n", 166 | " rescale = 1./255,\n", 167 | " validation_split=0.20\n", 168 | " )\n", 169 | "\n", 170 | " dataflow_kwargs = dict(\n", 171 | " target_size=self.config.params_image_size[:-1],\n", 172 | " batch_size=self.config.params_batch_size,\n", 173 | " interpolation=\"bilinear\"\n", 174 | " )\n", 175 | "\n", 176 | " valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(\n", 177 | " **datagenerator_kwargs\n", 178 | " )\n", 179 | "\n", 180 | " self.valid_generator = valid_datagenerator.flow_from_directory(\n", 181 | " directory=self.config.training_data,\n", 182 | " subset=\"validation\",\n", 183 | " shuffle=False,\n", 184 | " **dataflow_kwargs\n", 185 | " )\n", 186 | "\n", 187 | " if self.config.params_is_augmentation:\n", 188 | " train_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(\n", 189 | " rotation_range=40,\n", 190 | " horizontal_flip=True,\n", 191 | " width_shift_range=0.2,\n", 192 | " height_shift_range=0.2,\n", 193 | " shear_range=0.2,\n", 194 | " zoom_range=0.2,\n", 195 | " **datagenerator_kwargs\n", 196 | " )\n", 197 | " else:\n", 198 | " train_datagenerator = valid_datagenerator\n", 199 | "\n", 200 | " self.train_generator = train_datagenerator.flow_from_directory(\n", 201 | " directory=self.config.training_data,\n", 202 | " subset=\"training\",\n", 203 | " shuffle=True,\n", 204 | " **dataflow_kwargs\n", 205 | " )\n", 206 | "\n", 207 | " @staticmethod\n", 208 | " def save_model(path: Path, model: tf.keras.Model):\n", 209 | " model.save(path)\n", 210 | "\n", 211 | "\n", 212 | " def train(self, callback_list: list):\n", 213 | " self.steps_per_epoch = self.train_generator.samples // self.train_generator.batch_size\n", 214 | " self.validation_steps = self.valid_generator.samples // self.valid_generator.batch_size\n", 215 | "\n", 216 | " self.model.fit(\n", 217 | " self.train_generator,\n", 218 | " epochs=self.config.params_epochs,\n", 219 | " steps_per_epoch=self.steps_per_epoch,\n", 220 | " validation_steps=self.validation_steps,\n", 221 | " validation_data=self.valid_generator,\n", 222 | " callbacks=callback_list\n", 223 | " )\n", 224 | "\n", 225 | " self.save_model(\n", 226 | " path=self.config.trained_model_path,\n", 227 | " model=self.model\n", 228 | " )" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": 7, 234 | "metadata": {}, 235 | "outputs": [], 236 | "source": [ 237 | "# !pip install scipy" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": 8, 243 | "metadata": {}, 244 | "outputs": [ 245 | { 246 | "name": "stdout", 247 | "output_type": "stream", 248 | "text": [ 249 | "Found 4998 images belonging to 2 classes.\n", 250 | "Found 20000 images belonging to 2 classes.\n", 251 | "1234/1250 [============================>.] - ETA: 4s - loss: 6.3841 - accuracy: 0.6860" 252 | ] 253 | }, 254 | { 255 | "name": "stderr", 256 | "output_type": "stream", 257 | "text": [ 258 | "f:\\LIVE_CLASS\\FSDS_NOV\\CodeBase\\FSDS_NOV_deepCNNClassifier\\env\\lib\\site-packages\\PIL\\TiffImagePlugin.py:845: UserWarning: Truncated File Read\n", 259 | " warnings.warn(str(msg))\n" 260 | ] 261 | }, 262 | { 263 | "name": "stdout", 264 | "output_type": "stream", 265 | "text": [ 266 | "1250/1250 [==============================] - 403s 313ms/step - loss: 6.3217 - accuracy: 0.6881 - val_loss: 1.0078 - val_accuracy: 0.8996\n" 267 | ] 268 | } 269 | ], 270 | "source": [ 271 | "try:\n", 272 | " config = ConfigurationManager()\n", 273 | " prepare_callbacks_config = config.get_prepare_callback_config()\n", 274 | " prepare_callbacks = PrepareCallback(config=prepare_callbacks_config)\n", 275 | " callback_list = prepare_callbacks.get_tb_ckpt_callbacks()\n", 276 | " \n", 277 | " training_config = config.get_training_config()\n", 278 | " training = Training(config=training_config)\n", 279 | " training.get_base_model()\n", 280 | " training.train_valid_generator()\n", 281 | " training.train(\n", 282 | " callback_list=callback_list\n", 283 | " )\n", 284 | " \n", 285 | "except Exception as e:\n", 286 | " raise e" 287 | ] 288 | }, 289 | { 290 | "cell_type": "code", 291 | "execution_count": 9, 292 | "metadata": {}, 293 | "outputs": [], 294 | "source": [ 295 | "def example(x, **kwargs):\n", 296 | " print(locals())" 297 | ] 298 | }, 299 | { 300 | "cell_type": "code", 301 | "execution_count": 12, 302 | "metadata": {}, 303 | "outputs": [ 304 | { 305 | "name": "stdout", 306 | "output_type": "stream", 307 | "text": [ 308 | "{'x': 3, 'kwargs': {'y': 4, 'z': 55}}\n" 309 | ] 310 | } 311 | ], 312 | "source": [ 313 | "extra = dict(y=4, z=55)\n", 314 | "\n", 315 | "example(x=3, **extra)" 316 | ] 317 | }, 318 | { 319 | "cell_type": "code", 320 | "execution_count": 1, 321 | "metadata": {}, 322 | "outputs": [], 323 | "source": [ 324 | "import os\n", 325 | "os.chdir(\"../\")" 326 | ] 327 | }, 328 | { 329 | "cell_type": "code", 330 | "execution_count": 2, 331 | "metadata": {}, 332 | "outputs": [], 333 | "source": [ 334 | "import tensorflow as tf" 335 | ] 336 | }, 337 | { 338 | "cell_type": "code", 339 | "execution_count": 3, 340 | "metadata": {}, 341 | "outputs": [], 342 | "source": [ 343 | "model = tf.keras.models.load_model(\"./artifacts/training/model.h5\")" 344 | ] 345 | }, 346 | { 347 | "cell_type": "code", 348 | "execution_count": 26, 349 | "metadata": {}, 350 | "outputs": [], 351 | "source": [ 352 | "from PIL import Image\n", 353 | "\n", 354 | "img = Image.open(\"./artifacts/data_ingestion/PetImages/Cat/0.jpg\")\n", 355 | "img = img.resize((224,224))" 356 | ] 357 | }, 358 | { 359 | "cell_type": "code", 360 | "execution_count": 27, 361 | "metadata": {}, 362 | "outputs": [], 363 | "source": [ 364 | "import numpy as np" 365 | ] 366 | }, 367 | { 368 | "cell_type": "code", 369 | "execution_count": 28, 370 | "metadata": {}, 371 | "outputs": [], 372 | "source": [ 373 | "img_array = np.array(img)" 374 | ] 375 | }, 376 | { 377 | "cell_type": "code", 378 | "execution_count": 29, 379 | "metadata": {}, 380 | "outputs": [ 381 | { 382 | "data": { 383 | "text/plain": [ 384 | "(224, 224, 3)" 385 | ] 386 | }, 387 | "execution_count": 29, 388 | "metadata": {}, 389 | "output_type": "execute_result" 390 | } 391 | ], 392 | "source": [ 393 | "img_array.shape" 394 | ] 395 | }, 396 | { 397 | "cell_type": "code", 398 | "execution_count": 16, 399 | "metadata": {}, 400 | "outputs": [ 401 | { 402 | "data": { 403 | "text/plain": [ 404 | "(1, 224, 224, 3)" 405 | ] 406 | }, 407 | "execution_count": 16, 408 | "metadata": {}, 409 | "output_type": "execute_result" 410 | } 411 | ], 412 | "source": [ 413 | "img_array = np.expand_dims(img_array, axis=0)\n", 414 | "img_array.shape" 415 | ] 416 | }, 417 | { 418 | "cell_type": "code", 419 | "execution_count": 18, 420 | "metadata": {}, 421 | "outputs": [ 422 | { 423 | "name": "stdout", 424 | "output_type": "stream", 425 | "text": [ 426 | "1/1 [==============================] - 0s 39ms/step\n" 427 | ] 428 | }, 429 | { 430 | "data": { 431 | "text/plain": [ 432 | "array([[1., 0.]], dtype=float32)" 433 | ] 434 | }, 435 | "execution_count": 18, 436 | "metadata": {}, 437 | "output_type": "execute_result" 438 | } 439 | ], 440 | "source": [ 441 | "result = model.predict(img_array)\n", 442 | "result" 443 | ] 444 | }, 445 | { 446 | "cell_type": "code", 447 | "execution_count": 24, 448 | "metadata": {}, 449 | "outputs": [ 450 | { 451 | "name": "stdout", 452 | "output_type": "stream", 453 | "text": [ 454 | "predicted: cat\n" 455 | ] 456 | } 457 | ], 458 | "source": [ 459 | "argmax_index = np.argmax(result, axis=1)\n", 460 | "if argmax_index[0] == 0:\n", 461 | " print(\"predicted: cat\")\n", 462 | "else:\n", 463 | " print(\"predicted: dog\")" 464 | ] 465 | }, 466 | { 467 | "cell_type": "code", 468 | "execution_count": null, 469 | "metadata": {}, 470 | "outputs": [], 471 | "source": [] 472 | } 473 | ], 474 | "metadata": { 475 | "kernelspec": { 476 | "display_name": "Python 3.8.13 64-bit", 477 | "language": "python", 478 | "name": "python3" 479 | }, 480 | "language_info": { 481 | "codemirror_mode": { 482 | "name": "ipython", 483 | "version": 3 484 | }, 485 | "file_extension": ".py", 486 | "mimetype": "text/x-python", 487 | "name": "python", 488 | "nbconvert_exporter": "python", 489 | "pygments_lexer": "ipython3", 490 | "version": "3.8.13" 491 | }, 492 | "orig_nbformat": 4, 493 | "vscode": { 494 | "interpreter": { 495 | "hash": "37206e62ba8fc6a6f0b961435078c80557a69ac7a5dd6249b6601b5385f5de67" 496 | } 497 | } 498 | }, 499 | "nbformat": 4, 500 | "nbformat_minor": 2 501 | } 502 | -------------------------------------------------------------------------------- /research/st_05_trails.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import tensorflow as tf" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 2, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import os\n", 19 | "os.chdir(\"../\")" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 16, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "os.environ[\"MLFLOW_TRACKING_URI\"]=\"https://dagshub.com/c17hawke/FSDS_NOV_deepCNNClassifier.mlflow\"\n", 29 | "os.environ[\"MLFLOW_TRACKING_USERNAME\"]=\"c17hawke\"\n", 30 | "os.environ[\"MLFLOW_TRACKING_PASSWORD\"]=\"84215e85b5e87347572d9d272c798b2b1ff2a546\"" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 17, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "model = tf.keras.models.load_model(\"artifacts/training/model.h5\")" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 18, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "from dataclasses import dataclass\n", 49 | "from pathlib import Path\n", 50 | "\n", 51 | "@dataclass(frozen=True)\n", 52 | "class EvaluationConfig:\n", 53 | " path_of_model: Path\n", 54 | " training_data: Path\n", 55 | " all_params: dict\n", 56 | " mlflow_uri: str\n", 57 | " params_image_size: list\n", 58 | " params_batch_size: int\n" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 19, 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "from deepClassifier.constants import *\n", 68 | "from deepClassifier.utils import read_yaml, create_directories, save_json\n", 69 | "\n", 70 | "class ConfigurationManager:\n", 71 | " def __init__(\n", 72 | " self, \n", 73 | " config_filepath = CONFIG_FILE_PATH,\n", 74 | " params_filepath = PARAMS_FILE_PATH):\n", 75 | " self.config = read_yaml(config_filepath)\n", 76 | " self.params = read_yaml(params_filepath)\n", 77 | " create_directories([self.config.artifacts_root])\n", 78 | "\n", 79 | " def get_validation_config(self) -> EvaluationConfig:\n", 80 | " eval_config = EvaluationConfig(\n", 81 | " path_of_model=\"artifacts/training/model.h5\",\n", 82 | " training_data=\"artifacts/data_ingestion/PetImages\",\n", 83 | " mlflow_uri=\"https://dagshub.com/c17hawke/FSDS_NOV_deepCNNClassifier.mlflow\",\n", 84 | " all_params=self.params,\n", 85 | " params_image_size=self.params.IMAGE_SIZE,\n", 86 | " params_batch_size=self.params.BATCH_SIZE\n", 87 | " )\n", 88 | " return eval_config" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 22, 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [ 97 | "\n", 98 | "import tensorflow as tf\n", 99 | "from pathlib import Path\n", 100 | "import mlflow\n", 101 | "import mlflow.keras\n", 102 | "from urllib.parse import urlparse\n", 103 | "\n", 104 | "class Evaluation:\n", 105 | " def __init__(self, config: EvaluationConfig):\n", 106 | " self.config = config\n", 107 | "\n", 108 | " def _valid_generator(self):\n", 109 | "\n", 110 | " datagenerator_kwargs = dict(\n", 111 | " rescale = 1./255,\n", 112 | " validation_split=0.30\n", 113 | " )\n", 114 | "\n", 115 | " dataflow_kwargs = dict(\n", 116 | " target_size=self.config.params_image_size[:-1],\n", 117 | " batch_size=self.config.params_batch_size,\n", 118 | " interpolation=\"bilinear\"\n", 119 | " )\n", 120 | "\n", 121 | " valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(\n", 122 | " **datagenerator_kwargs\n", 123 | " )\n", 124 | "\n", 125 | " self.valid_generator = valid_datagenerator.flow_from_directory(\n", 126 | " directory=self.config.training_data,\n", 127 | " subset=\"validation\",\n", 128 | " shuffle=False,\n", 129 | " **dataflow_kwargs\n", 130 | " )\n", 131 | "\n", 132 | "\n", 133 | " @staticmethod\n", 134 | " def load_model(path: Path) -> tf.keras.Model:\n", 135 | " return tf.keras.models.load_model(path)\n", 136 | "\n", 137 | "\n", 138 | " def evaluation(self):\n", 139 | " self.model = self.load_model(self.config.path_of_model)\n", 140 | " self._valid_generator()\n", 141 | " self.score = model.evaluate(self.valid_generator)\n", 142 | "\n", 143 | " def save_score(self):\n", 144 | " scores = {\"loss\": self.score[0], \"accuracy\": self.score[1]}\n", 145 | " save_json(path=Path(\"scores.json\"), data=scores)\n", 146 | "\n", 147 | " def log_into_mlflow(self):\n", 148 | " mlflow.set_registry_uri(self.config.mlflow_uri)\n", 149 | " tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme\n", 150 | " with mlflow.start_run():\n", 151 | " mlflow.log_params(self.config.all_params)\n", 152 | " mlflow.log_metrics(\n", 153 | " {\"loss\": self.score[0], \"accuracy\": self.score[1]}\n", 154 | " )\n", 155 | " # Model registry does not work with file store\n", 156 | " if tracking_url_type_store != \"file\":\n", 157 | "\n", 158 | " # Register the model\n", 159 | " # There are other ways to use the Model Registry, which depends on the use case,\n", 160 | " # please refer to the doc for more information:\n", 161 | " # https://mlflow.org/docs/latest/model-registry.html#api-workflow\n", 162 | " mlflow.keras.log_model(self.model, \"model\", registered_model_name=\"VGG16Model\")\n", 163 | " else:\n", 164 | " mlflow.keras.log_model(self.model, \"model\")\n" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": 23, 170 | "metadata": {}, 171 | "outputs": [ 172 | { 173 | "name": "stdout", 174 | "output_type": "stream", 175 | "text": [ 176 | "Found 7498 images belonging to 2 classes.\n", 177 | "469/469 [==============================] - 47s 100ms/step - loss: 7.2357 - accuracy: 0.6538\n", 178 | "INFO:tensorflow:Assets written to: C:\\Users\\sunny\\AppData\\Local\\Temp\\tmpkae92jcs\\model\\data\\model\\assets\n" 179 | ] 180 | }, 181 | { 182 | "name": "stderr", 183 | "output_type": "stream", 184 | "text": [ 185 | "Successfully registered model ''.\n", 186 | "2022/09/25 11:59:15 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: VGG16Model, version 2\n", 187 | "Created version '2' of model 'VGG16Model'.\n" 188 | ] 189 | } 190 | ], 191 | "source": [ 192 | "try:\n", 193 | " config = ConfigurationManager()\n", 194 | " val_config = config.get_validation_config()\n", 195 | " evaluation = Evaluation(val_config)\n", 196 | " evaluation.evaluation()\n", 197 | " evaluation.save_score()\n", 198 | " evaluation.log_into_mlflow()\n", 199 | " \n", 200 | "except Exception as e:\n", 201 | " raise e" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": null, 207 | "metadata": {}, 208 | "outputs": [], 209 | "source": [] 210 | } 211 | ], 212 | "metadata": { 213 | "kernelspec": { 214 | "display_name": "Python 3.8.13 64-bit", 215 | "language": "python", 216 | "name": "python3" 217 | }, 218 | "language_info": { 219 | "codemirror_mode": { 220 | "name": "ipython", 221 | "version": 3 222 | }, 223 | "file_extension": ".py", 224 | "mimetype": "text/x-python", 225 | "name": "python", 226 | "nbconvert_exporter": "python", 227 | "pygments_lexer": "ipython3", 228 | "version": "3.8.13" 229 | }, 230 | "orig_nbformat": 4, 231 | "vscode": { 232 | "interpreter": { 233 | "hash": "37206e62ba8fc6a6f0b961435078c80557a69ac7a5dd6249b6601b5385f5de67" 234 | } 235 | } 236 | }, 237 | "nbformat": 4, 238 | "nbformat_minor": 2 239 | } 240 | -------------------------------------------------------------------------------- /research/trials.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from box import ConfigBox" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 2, 15 | "metadata": {}, 16 | "outputs": [ 17 | { 18 | "data": { 19 | "text/plain": [ 20 | "'value'" 21 | ] 22 | }, 23 | "execution_count": 2, 24 | "metadata": {}, 25 | "output_type": "execute_result" 26 | } 27 | ], 28 | "source": [ 29 | "d = {\"key\": \"value\", \"key1\": \"value1\"}\n", 30 | "d[\"key\"]" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 4, 36 | "metadata": {}, 37 | "outputs": [ 38 | { 39 | "data": { 40 | "text/plain": [ 41 | "ConfigBox({'key': 'value', 'key1': 'value1'})" 42 | ] 43 | }, 44 | "execution_count": 4, 45 | "metadata": {}, 46 | "output_type": "execute_result" 47 | } 48 | ], 49 | "source": [ 50 | "d2 = ConfigBox({\"key\": \"value\", \"key1\": \"value1\"})\n", 51 | "d2" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": 5, 57 | "metadata": {}, 58 | "outputs": [ 59 | { 60 | "data": { 61 | "text/plain": [ 62 | "'value'" 63 | ] 64 | }, 65 | "execution_count": 5, 66 | "metadata": {}, 67 | "output_type": "execute_result" 68 | } 69 | ], 70 | "source": [ 71 | "d2.key" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 7, 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": [ 80 | "from ensure import ensure_annotations" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 8, 86 | "metadata": {}, 87 | "outputs": [], 88 | "source": [ 89 | "@ensure_annotations\n", 90 | "def get_product(x: int, y: int) -> int:\n", 91 | " return x*y" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 9, 97 | "metadata": {}, 98 | "outputs": [ 99 | { 100 | "data": { 101 | "text/plain": [ 102 | "6" 103 | ] 104 | }, 105 | "execution_count": 9, 106 | "metadata": {}, 107 | "output_type": "execute_result" 108 | } 109 | ], 110 | "source": [ 111 | "get_product(x=3, y=2)" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": 10, 117 | "metadata": {}, 118 | "outputs": [ 119 | { 120 | "ename": "EnsureError", 121 | "evalue": "Argument y of type to does not match annotation type ", 122 | "output_type": "error", 123 | "traceback": [ 124 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 125 | "\u001b[1;31mEnsureError\u001b[0m Traceback (most recent call last)", 126 | "Cell \u001b[1;32mIn [10], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[43mget_product\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m3\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mhii\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", 127 | "File \u001b[1;32mf:\\LIVE_CLASS\\FSDS_NOV\\CodeBase\\FSDS_NOV_deepCNNClassifier\\env\\lib\\site-packages\\ensure\\main.py:845\u001b[0m, in \u001b[0;36mWrappedFunctionReturn.__call__\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 840\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39misinstance\u001b[39m(value, templ):\n\u001b[0;32m 841\u001b[0m msg \u001b[39m=\u001b[39m (\n\u001b[0;32m 842\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mArgument \u001b[39m\u001b[39m{arg}\u001b[39;00m\u001b[39m of type \u001b[39m\u001b[39m{valt}\u001b[39;00m\u001b[39m to \u001b[39m\u001b[39m{f}\u001b[39;00m\u001b[39m \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 843\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mdoes not match annotation type \u001b[39m\u001b[39m{t}\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[0;32m 844\u001b[0m )\n\u001b[1;32m--> 845\u001b[0m \u001b[39mraise\u001b[39;00m EnsureError(msg\u001b[39m.\u001b[39mformat(\n\u001b[0;32m 846\u001b[0m arg\u001b[39m=\u001b[39marg, f\u001b[39m=\u001b[39m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mf, t\u001b[39m=\u001b[39mtempl, valt\u001b[39m=\u001b[39m\u001b[39mtype\u001b[39m(value)\n\u001b[0;32m 847\u001b[0m ))\n\u001b[0;32m 849\u001b[0m return_val \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mf(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[0;32m 850\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39misinstance\u001b[39m(return_val, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mreturn_templ):\n", 128 | "\u001b[1;31mEnsureError\u001b[0m: Argument y of type to does not match annotation type " 129 | ] 130 | } 131 | ], 132 | "source": [ 133 | "get_product(x=3, y=\"hii\")\n" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": 13, 139 | "metadata": {}, 140 | "outputs": [], 141 | "source": [ 142 | "@ensure_annotations\n", 143 | "def get_product(x: int, y: int) -> str:\n", 144 | " return str(x*y)" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": 14, 150 | "metadata": {}, 151 | "outputs": [ 152 | { 153 | "data": { 154 | "text/plain": [ 155 | "'6'" 156 | ] 157 | }, 158 | "execution_count": 14, 159 | "metadata": {}, 160 | "output_type": "execute_result" 161 | } 162 | ], 163 | "source": [ 164 | "get_product(x=3, y=2)" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": null, 170 | "metadata": {}, 171 | "outputs": [], 172 | "source": [] 173 | } 174 | ], 175 | "metadata": { 176 | "kernelspec": { 177 | "display_name": "Python 3.8.13 (conda)", 178 | "language": "python", 179 | "name": "python3" 180 | }, 181 | "language_info": { 182 | "codemirror_mode": { 183 | "name": "ipython", 184 | "version": 3 185 | }, 186 | "file_extension": ".py", 187 | "mimetype": "text/x-python", 188 | "name": "python", 189 | "nbconvert_exporter": "python", 190 | "pygments_lexer": "ipython3", 191 | "version": "3.8.13" 192 | }, 193 | "orig_nbformat": 4, 194 | "vscode": { 195 | "interpreter": { 196 | "hash": "37206e62ba8fc6a6f0b961435078c80557a69ac7a5dd6249b6601b5385f5de67" 197 | } 198 | } 199 | }, 200 | "nbformat": 4, 201 | "nbformat_minor": 2 202 | } 203 | -------------------------------------------------------------------------------- /research/trials.ipynbexample.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c17hawke/FSDS_NOV_deepCNNClassifier/e54341d242a39256c54a90acdead18bc53031677/research/trials.ipynbexample.py -------------------------------------------------------------------------------- /scores.json: -------------------------------------------------------------------------------- 1 | { 2 | "loss": 7.235665798187256, 3 | "accuracy": 0.6537743210792542 4 | } -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | license = MIT 3 | license_file = LICENSE 4 | classifier = 5 | Programming Language :: Python :: 3.8 6 | Operating System :: OS Independent 7 | 8 | [options] 9 | install_requires = 10 | ensure==1.0.2 11 | python_requires = >=3.7 12 | 13 | [options.extras_require] 14 | testing = 15 | pytest>=7.1.3 16 | mypy>=0.971 17 | flake8>=5.0.4 18 | tox>=3.25.1 19 | black>=22.8.0 20 | 21 | [options.package_data] 22 | deepClassifier = py.typed 23 | 24 | [flake8] 25 | max-line-length = 160 26 | exclude = __init__.py 27 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | with open("README.md", "r", encoding="utf-8") as f: 4 | long_description = f.read() 5 | 6 | __version__ = "0.0.0" 7 | 8 | REPO_NAME = "FSDS_NOV_deepCNNClassifier" 9 | AUTHOR_USER_NAME = "c17hawke" 10 | SRC_REPO = "deepClassifier" 11 | AUTHOR_EMAIL = "sunny.c17hawke@gmail.com" 12 | 13 | setuptools.setup( 14 | name=SRC_REPO, 15 | version=__version__, 16 | author=AUTHOR_USER_NAME, 17 | author_email=AUTHOR_EMAIL, 18 | description="A small python package for CNN app", 19 | long_description=long_description, 20 | long_description_content="text/markdown", 21 | url=f"https://github.com/{AUTHOR_USER_NAME}/{REPO_NAME}", 22 | project_urls={ 23 | "Bug Tracker": f"https://github.com/{AUTHOR_USER_NAME}/{REPO_NAME}/issues", 24 | }, 25 | package_dir={"": "src"}, 26 | packages=setuptools.find_packages(where="src") 27 | ) 28 | -------------------------------------------------------------------------------- /src/deepClassifier/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import logging 4 | 5 | logging_str = "[%(asctime)s: %(levelname)s: %(module)s]: %(message)s" 6 | log_dir = "logs" 7 | log_filepath = os.path.join(log_dir, "running_logs.log") 8 | os.makedirs(log_dir, exist_ok=True) 9 | 10 | logging.basicConfig( 11 | level=logging.INFO, 12 | format=logging_str, 13 | handlers=[ 14 | logging.FileHandler(log_filepath), 15 | # logging.StreamHandler(sys.stdout), 16 | ]) 17 | 18 | logger = logging.getLogger("deepClassifierLogger") -------------------------------------------------------------------------------- /src/deepClassifier/components/__init__.py: -------------------------------------------------------------------------------- 1 | from deepClassifier.components.data_ingestion import DataIngestion 2 | from deepClassifier.components.prepare_base_model import PrepareBaseModel 3 | from deepClassifier.components.prepare_callback import PrepareCallback 4 | from deepClassifier.components.training import Training 5 | from deepClassifier.components.evaluation import Evaluation -------------------------------------------------------------------------------- /src/deepClassifier/components/data_ingestion.py: -------------------------------------------------------------------------------- 1 | import os 2 | import urllib.request as request 3 | from zipfile import ZipFile 4 | from deepClassifier.entity import DataIngestionConfig 5 | from deepClassifier import logger 6 | from deepClassifier.utils import get_size 7 | from tqdm import tqdm 8 | from pathlib import Path 9 | 10 | 11 | class DataIngestion: 12 | def __init__(self, config: DataIngestionConfig): 13 | self.config = config 14 | 15 | def download_file(self): 16 | logger.info("Trying to download file...") 17 | if not os.path.exists(self.config.local_data_file): 18 | logger.info("Download started...") 19 | filename, headers = request.urlretrieve( 20 | url=self.config.source_URL, 21 | filename=self.config.local_data_file 22 | ) 23 | logger.info(f"{filename} download! with following info: \n{headers}") 24 | else: 25 | logger.info(f"File already exists of size: {get_size(Path(self.config.local_data_file))}") 26 | 27 | def _get_updated_list_of_files(self, list_of_files): 28 | return [f for f in list_of_files if f.endswith(".jpg") and ("Cat" in f or "Dog" in f)] 29 | 30 | def _preprocess(self, zf: ZipFile, f: str, working_dir: str): 31 | target_filepath = os.path.join(working_dir, f) 32 | if not os.path.exists(target_filepath): 33 | zf.extract(f, working_dir) 34 | 35 | if os.path.getsize(target_filepath) == 0: 36 | logger.info(f"removing file:{target_filepath} of size: {get_size(Path(target_filepath))}") 37 | os.remove(target_filepath) 38 | 39 | def unzip_and_clean(self): 40 | logger.info(f"unzipping file and removing unawanted files") 41 | with ZipFile(file=self.config.local_data_file, mode="r") as zf: 42 | list_of_files = zf.namelist() 43 | updated_list_of_files = self._get_updated_list_of_files(list_of_files) 44 | for f in tqdm(updated_list_of_files): 45 | self._preprocess(zf, f, self.config.unzip_dir) 46 | 47 | def create_test_data(self): 48 | """ 49 | separte 30% of data into test data 50 | """ 51 | pass -------------------------------------------------------------------------------- /src/deepClassifier/components/evaluation.py: -------------------------------------------------------------------------------- 1 | 2 | import tensorflow as tf 3 | from pathlib import Path 4 | from deepClassifier.entity import EvaluationConfig 5 | from deepClassifier.utils import save_json 6 | 7 | class Evaluation: 8 | def __init__(self, config: EvaluationConfig): 9 | self.config = config 10 | 11 | def _valid_generator(self): 12 | 13 | datagenerator_kwargs = dict( 14 | rescale = 1./255, 15 | validation_split=0.30 16 | ) 17 | 18 | dataflow_kwargs = dict( 19 | target_size=self.config.params_image_size[:-1], 20 | batch_size=self.config.params_batch_size, 21 | interpolation="bilinear" 22 | ) 23 | 24 | valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator( 25 | **datagenerator_kwargs 26 | ) 27 | 28 | self.valid_generator = valid_datagenerator.flow_from_directory( 29 | directory=self.config.training_data, 30 | subset="validation", 31 | shuffle=False, 32 | **dataflow_kwargs 33 | ) 34 | 35 | 36 | @staticmethod 37 | def load_model(path: Path) -> tf.keras.Model: 38 | return tf.keras.models.load_model(path) 39 | 40 | 41 | def evaluation(self): 42 | model = self.load_model(self.config.path_of_model) 43 | self._valid_generator() 44 | self.score = model.evaluate(self.valid_generator) 45 | 46 | def save_score(self): 47 | scores = {"loss": self.score[0], "accuracy": self.score[1]} 48 | save_json(path=Path("scores.json"), data=scores) -------------------------------------------------------------------------------- /src/deepClassifier/components/prepare_base_model.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from deepClassifier.entity import PrepareBaseModelConfig 3 | import tensorflow as tf 4 | 5 | class PrepareBaseModel: 6 | def __init__(self, config: PrepareBaseModelConfig): 7 | self.config = config 8 | 9 | def get_base_model(self): 10 | self.model = tf.keras.applications.vgg16.VGG16( 11 | input_shape=self.config.params_image_size, 12 | weights=self.config.params_weights, 13 | include_top=self.config.params_include_top 14 | ) 15 | 16 | self.save_model(path=self.config.base_model_path, model=self.model) 17 | 18 | 19 | @staticmethod 20 | def _prepare_full_model(model, classes, freeze_all, freeze_till, learning_rate): 21 | if freeze_all: 22 | model.trainable = False 23 | elif (freeze_till is not None) and (freeze_till > 0): 24 | for layer in model.layers[:-freeze_till]: 25 | layer.trainable = False 26 | 27 | flatten_in = tf.keras.layers.Flatten()(model.output) 28 | prediction = tf.keras.layers.Dense( 29 | units=classes, 30 | activation="softmax" 31 | )(flatten_in) 32 | 33 | full_model = tf.keras.models.Model( 34 | inputs=model.input, 35 | outputs=prediction 36 | ) 37 | 38 | full_model.compile( 39 | optimizer=tf.keras.optimizers.SGD(learning_rate=learning_rate), 40 | loss=tf.keras.losses.CategoricalCrossentropy(), 41 | metrics=["accuracy"] 42 | ) 43 | 44 | full_model.summary() 45 | return full_model 46 | 47 | def update_base_model(self): 48 | self.full_model = self._prepare_full_model( 49 | model=self.model, 50 | classes=self.config.params_classes, 51 | freeze_all=True, 52 | freeze_till=None, 53 | learning_rate=self.config.params_learning_rate 54 | ) 55 | 56 | self.save_model(path=self.config.updated_base_model_path, model=self.full_model) 57 | 58 | @staticmethod 59 | def save_model(path: Path, model: tf.keras.Model): 60 | model.save(path) 61 | 62 | -------------------------------------------------------------------------------- /src/deepClassifier/components/prepare_callback.py: -------------------------------------------------------------------------------- 1 | import os 2 | from deepClassifier.entity import PrepareCallbacksConfig 3 | import tensorflow as tf 4 | import time 5 | 6 | class PrepareCallback: 7 | def __init__(self, config: PrepareCallbacksConfig): 8 | self.config = config 9 | 10 | @property 11 | def _create_tb_callbacks(self): 12 | timestamp = time.strftime("%Y-%m-%d-%H-%M-%S") 13 | tb_running_log_dir = os.path.join( 14 | self.config.tensorboard_root_log_dir, 15 | f"tb_logs_at_{timestamp}", 16 | ) 17 | return tf.keras.callbacks.TensorBoard(log_dir=tb_running_log_dir) 18 | 19 | @property 20 | def _create_ckpt_callbacks(self): 21 | return tf.keras.callbacks.ModelCheckpoint( 22 | filepath=self.config.checkpoint_model_filepath, 23 | save_best_only=True 24 | ) 25 | 26 | def get_tb_ckpt_callbacks(self): 27 | return [ 28 | self._create_tb_callbacks, 29 | self._create_ckpt_callbacks 30 | ] 31 | 32 | -------------------------------------------------------------------------------- /src/deepClassifier/components/training.py: -------------------------------------------------------------------------------- 1 | from deepClassifier.entity import TrainingConfig 2 | import tensorflow as tf 3 | from pathlib import Path 4 | 5 | class Training: 6 | def __init__(self, config: TrainingConfig): 7 | self.config = config 8 | 9 | def get_base_model(self): 10 | self.model = tf.keras.models.load_model( 11 | self.config.updated_base_model_path 12 | ) 13 | 14 | def train_valid_generator(self): 15 | 16 | datagenerator_kwargs = dict( 17 | rescale = 1./255, 18 | validation_split=0.20 19 | ) 20 | 21 | dataflow_kwargs = dict( 22 | target_size=self.config.params_image_size[:-1], 23 | batch_size=self.config.params_batch_size, 24 | interpolation="bilinear" 25 | ) 26 | 27 | valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator( 28 | **datagenerator_kwargs 29 | ) 30 | 31 | self.valid_generator = valid_datagenerator.flow_from_directory( 32 | directory=self.config.training_data, 33 | subset="validation", 34 | shuffle=False, 35 | **dataflow_kwargs 36 | ) 37 | 38 | if self.config.params_is_augmentation: 39 | train_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator( 40 | rotation_range=40, 41 | horizontal_flip=True, 42 | width_shift_range=0.2, 43 | height_shift_range=0.2, 44 | shear_range=0.2, 45 | zoom_range=0.2, 46 | **datagenerator_kwargs 47 | ) 48 | else: 49 | train_datagenerator = valid_datagenerator 50 | 51 | self.train_generator = train_datagenerator.flow_from_directory( 52 | directory=self.config.training_data, 53 | subset="training", 54 | shuffle=True, 55 | **dataflow_kwargs 56 | ) 57 | 58 | @staticmethod 59 | def save_model(path: Path, model: tf.keras.Model): 60 | model.save(path) 61 | 62 | 63 | def train(self, callback_list: list): 64 | self.steps_per_epoch = self.train_generator.samples // self.train_generator.batch_size 65 | self.validation_steps = self.valid_generator.samples // self.valid_generator.batch_size 66 | 67 | self.model.fit( 68 | self.train_generator, 69 | epochs=self.config.params_epochs, 70 | steps_per_epoch=self.steps_per_epoch, 71 | validation_steps=self.validation_steps, 72 | validation_data=self.valid_generator, 73 | callbacks=callback_list 74 | ) 75 | 76 | self.save_model( 77 | path=self.config.trained_model_path, 78 | model=self.model 79 | ) -------------------------------------------------------------------------------- /src/deepClassifier/config/__init__.py: -------------------------------------------------------------------------------- 1 | from deepClassifier.config.configuration import ConfigurationManager -------------------------------------------------------------------------------- /src/deepClassifier/config/configuration.py: -------------------------------------------------------------------------------- 1 | from deepClassifier.constants import CONFIG_FILE_PATH, PARAMS_FILE_PATH 2 | from deepClassifier.utils import read_yaml, create_directories 3 | from deepClassifier.entity import ( 4 | DataIngestionConfig, 5 | PrepareBaseModelConfig, 6 | PrepareCallbacksConfig, 7 | TrainingConfig, 8 | EvaluationConfig 9 | ) 10 | from pathlib import Path 11 | import os 12 | 13 | class ConfigurationManager: 14 | def __init__( 15 | self, 16 | config_filepath = CONFIG_FILE_PATH, 17 | params_filepath = PARAMS_FILE_PATH): 18 | self.config = read_yaml(config_filepath) 19 | self.params = read_yaml(params_filepath) 20 | create_directories([self.config.artifacts_root]) 21 | 22 | def get_data_ingestion_config(self) -> DataIngestionConfig: 23 | config = self.config.data_ingestion 24 | 25 | create_directories([config.root_dir]) 26 | 27 | data_ingestion_config = DataIngestionConfig( 28 | root_dir=config.root_dir, 29 | source_URL=config.source_URL, 30 | local_data_file=config.local_data_file, 31 | unzip_dir=config.unzip_dir 32 | ) 33 | 34 | return data_ingestion_config 35 | 36 | def get_prepare_base_model_config(self) -> PrepareBaseModelConfig: 37 | config = self.config.prepare_base_model 38 | 39 | create_directories([config.root_dir]) 40 | 41 | prepare_base_model_config = PrepareBaseModelConfig( 42 | root_dir=Path(config.root_dir), 43 | base_model_path=Path(config.base_model_path), 44 | updated_base_model_path=Path(config.updated_base_model_path), 45 | params_image_size=self.params.IMAGE_SIZE, 46 | params_learning_rate=self.params.LEARNING_RATE, 47 | params_include_top=self.params.INCLUDE_TOP, 48 | params_weights=self.params.WEIGHTS, 49 | params_classes=self.params.CLASSES 50 | ) 51 | 52 | return prepare_base_model_config 53 | 54 | def get_prepare_callback_config(self) -> PrepareCallbacksConfig: 55 | config = self.config.prepare_callbacks 56 | model_ckpt_dir = os.path.dirname(config.checkpoint_model_filepath) 57 | create_directories([ 58 | Path(model_ckpt_dir), 59 | Path(config.tensorboard_root_log_dir) 60 | ]) 61 | 62 | prepare_callback_config = PrepareCallbacksConfig( 63 | root_dir=Path(config.root_dir), 64 | tensorboard_root_log_dir=Path(config.tensorboard_root_log_dir), 65 | checkpoint_model_filepath=Path(config.checkpoint_model_filepath) 66 | ) 67 | 68 | return prepare_callback_config 69 | 70 | def get_training_config(self) -> TrainingConfig: 71 | training = self.config.training 72 | prepare_base_model = self.config.prepare_base_model 73 | params = self.params 74 | training_data = os.path.join(self.config.data_ingestion.unzip_dir, "PetImages") 75 | create_directories([ 76 | Path(training.root_dir) 77 | ]) 78 | 79 | training_config = TrainingConfig( 80 | root_dir=Path(training.root_dir), 81 | trained_model_path=Path(training.trained_model_path), 82 | updated_base_model_path=Path(prepare_base_model.updated_base_model_path), 83 | training_data=Path(training_data), 84 | params_epochs=params.EPOCHS, 85 | params_batch_size=params.BATCH_SIZE, 86 | params_is_augmentation=params.AUGMENTATION, 87 | params_image_size=params.IMAGE_SIZE 88 | ) 89 | 90 | return training_config 91 | 92 | def get_validation_config(self) -> EvaluationConfig: 93 | eval_config = EvaluationConfig( 94 | path_of_model=self.config.training.trained_model_path, 95 | training_data=self.config.data_ingestion.unzip_dir, 96 | params_image_size=self.params.IMAGE_SIZE, 97 | params_batch_size=self.params.BATCH_SIZE 98 | ) 99 | return eval_config -------------------------------------------------------------------------------- /src/deepClassifier/constants/__init__.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | CONFIG_FILE_PATH = Path("configs/config.yaml") 4 | PARAMS_FILE_PATH = Path("params.yaml") 5 | -------------------------------------------------------------------------------- /src/deepClassifier/entity/__init__.py: -------------------------------------------------------------------------------- 1 | from deepClassifier.entity.config_entity import ( 2 | DataIngestionConfig, 3 | PrepareBaseModelConfig, 4 | PrepareCallbacksConfig, 5 | TrainingConfig, 6 | EvaluationConfig 7 | ) -------------------------------------------------------------------------------- /src/deepClassifier/entity/config_entity.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from pathlib import Path 3 | 4 | 5 | @dataclass(frozen=True) 6 | class DataIngestionConfig: 7 | root_dir: Path 8 | source_URL: str 9 | local_data_file: Path 10 | unzip_dir: Path 11 | 12 | 13 | @dataclass(frozen=True) 14 | class PrepareBaseModelConfig: 15 | root_dir: Path 16 | base_model_path: Path 17 | updated_base_model_path: Path 18 | params_image_size: list 19 | params_learning_rate: float 20 | params_include_top: bool 21 | params_weights: str 22 | params_classes: int 23 | 24 | @dataclass(frozen=True) 25 | class PrepareCallbacksConfig: 26 | root_dir: Path 27 | tensorboard_root_log_dir: Path 28 | checkpoint_model_filepath: Path 29 | 30 | 31 | @dataclass(frozen=True) 32 | class TrainingConfig: 33 | root_dir: Path 34 | trained_model_path: Path 35 | updated_base_model_path: Path 36 | training_data: Path 37 | params_epochs: int 38 | params_batch_size: int 39 | params_is_augmentation: bool 40 | params_image_size: list 41 | 42 | @dataclass(frozen=True) 43 | class EvaluationConfig: 44 | path_of_model: Path 45 | training_data: Path 46 | params_image_size: list 47 | params_batch_size: int -------------------------------------------------------------------------------- /src/deepClassifier/pipeline/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c17hawke/FSDS_NOV_deepCNNClassifier/e54341d242a39256c54a90acdead18bc53031677/src/deepClassifier/pipeline/__init__.py -------------------------------------------------------------------------------- /src/deepClassifier/pipeline/stage_01_data_ingestion.py: -------------------------------------------------------------------------------- 1 | from deepClassifier.config import ConfigurationManager 2 | from deepClassifier.components import DataIngestion 3 | from deepClassifier import logger 4 | 5 | STAGE_NAME = "Data Ingestion stage" 6 | 7 | def main(): 8 | config = ConfigurationManager() 9 | data_ingestion_config = config.get_data_ingestion_config() 10 | data_ingestion = DataIngestion(config=data_ingestion_config) 11 | data_ingestion.download_file() 12 | data_ingestion.unzip_and_clean() 13 | 14 | if __name__ == '__main__': 15 | try: 16 | logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<") 17 | main() 18 | logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x") 19 | except Exception as e: 20 | logger.exception(e) 21 | raise e -------------------------------------------------------------------------------- /src/deepClassifier/pipeline/stage_02_prepare_base_model.py: -------------------------------------------------------------------------------- 1 | from deepClassifier.config import ConfigurationManager 2 | from deepClassifier.components import PrepareBaseModel 3 | from deepClassifier import logger 4 | 5 | STAGE_NAME = "Prepare base model" 6 | 7 | def main(): 8 | config = ConfigurationManager() 9 | prepare_base_model_config = config.get_prepare_base_model_config() 10 | prepare_base_model = PrepareBaseModel(config=prepare_base_model_config) 11 | prepare_base_model.get_base_model() 12 | prepare_base_model.update_base_model() 13 | 14 | if __name__ == '__main__': 15 | try: 16 | logger.info(f"*******************") 17 | logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<") 18 | main() 19 | logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x") 20 | except Exception as e: 21 | logger.exception(e) 22 | raise e -------------------------------------------------------------------------------- /src/deepClassifier/pipeline/stage_03_training.py: -------------------------------------------------------------------------------- 1 | from deepClassifier.config import ConfigurationManager 2 | from deepClassifier.components import PrepareCallback, Training 3 | from deepClassifier import logger 4 | 5 | STAGE_NAME = "Training" 6 | 7 | def main(): 8 | config = ConfigurationManager() 9 | prepare_callbacks_config = config.get_prepare_callback_config() 10 | prepare_callbacks = PrepareCallback(config=prepare_callbacks_config) 11 | callback_list = prepare_callbacks.get_tb_ckpt_callbacks() 12 | 13 | training_config = config.get_training_config() 14 | training = Training(config=training_config) 15 | training.get_base_model() 16 | training.train_valid_generator() 17 | training.train( 18 | callback_list=callback_list 19 | ) 20 | 21 | if __name__ == '__main__': 22 | try: 23 | logger.info(f"*******************") 24 | logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<") 25 | main() 26 | logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x") 27 | except Exception as e: 28 | logger.exception(e) 29 | raise e -------------------------------------------------------------------------------- /src/deepClassifier/pipeline/stage_04_evaluation.py: -------------------------------------------------------------------------------- 1 | from deepClassifier.config import ConfigurationManager 2 | from deepClassifier.components import Evaluation 3 | from deepClassifier import logger 4 | 5 | STAGE_NAME = "Evaluation stage" 6 | 7 | def main(): 8 | config = ConfigurationManager() 9 | val_config = config.get_validation_config() 10 | evaluation = Evaluation(val_config) 11 | evaluation.evaluation() 12 | evaluation.save_score() 13 | 14 | if __name__ == '__main__': 15 | try: 16 | logger.info(f"*******************") 17 | logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<") 18 | main() 19 | logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x") 20 | except Exception as e: 21 | logger.exception(e) 22 | raise e -------------------------------------------------------------------------------- /src/deepClassifier/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from deepClassifier.utils.common import * -------------------------------------------------------------------------------- /src/deepClassifier/utils/common.py: -------------------------------------------------------------------------------- 1 | import os 2 | from box.exceptions import BoxValueError 3 | import yaml 4 | from deepClassifier import logger 5 | import json 6 | import joblib 7 | from ensure import ensure_annotations 8 | from box import ConfigBox 9 | from pathlib import Path 10 | from typing import Any 11 | 12 | @ensure_annotations 13 | def read_yaml(path_to_yaml: Path) -> ConfigBox: 14 | """reads yaml file and returns 15 | 16 | Args: 17 | path_to_yaml (str): path like input 18 | 19 | Raises: 20 | ValueError: if yaml file is empty 21 | e: empty file 22 | 23 | Returns: 24 | ConfigBox: ConfigBox type 25 | """ 26 | try: 27 | with open(path_to_yaml) as yaml_file: 28 | content = yaml.safe_load(yaml_file) 29 | logger.info(f"yaml file: {path_to_yaml} loaded successfully") 30 | return ConfigBox(content) 31 | except BoxValueError: 32 | raise ValueError("yaml file is empty") 33 | except Exception as e: 34 | raise e 35 | 36 | @ensure_annotations 37 | def create_directories(path_to_directories: list, verbose=True): 38 | """create list of directories 39 | 40 | Args: 41 | path_to_directories (list): list of path of directories 42 | ignore_log (bool, optional): ignore if multiple dirs is to be created. Defaults to False. 43 | """ 44 | for path in path_to_directories: 45 | os.makedirs(path, exist_ok=True) 46 | if verbose: 47 | logger.info(f"created directory at: {path}") 48 | 49 | @ensure_annotations 50 | def save_json(path: Path, data: dict): 51 | """save json data 52 | 53 | Args: 54 | path (Path): path to json file 55 | data (dict): data to be saved in json file 56 | """ 57 | with open(path, "w") as f: 58 | json.dump(data, f, indent=4) 59 | 60 | logger.info(f"json file saved at: {path}") 61 | 62 | @ensure_annotations 63 | def load_json(path: Path) -> ConfigBox: 64 | """load json files data 65 | 66 | Args: 67 | path (Path): path to json file 68 | 69 | Returns: 70 | ConfigBox: data as class attributes instead of dict 71 | """ 72 | with open(path) as f: 73 | content = json.load(f) 74 | 75 | logger.info(f"json file loaded succesfully from: {path}") 76 | return ConfigBox(content) 77 | 78 | @ensure_annotations 79 | def save_bin(data: Any, path: Path): 80 | """save binary file 81 | 82 | Args: 83 | data (Any): data to be saved as binary 84 | path (Path): path to binary file 85 | """ 86 | joblib.dump(value=data, filename=path) 87 | logger.info(f"binary file saved at: {path}") 88 | 89 | @ensure_annotations 90 | def load_bin(path: Path) -> Any: 91 | """load binary data 92 | 93 | Args: 94 | path (Path): path to binary file 95 | 96 | Returns: 97 | Any: object stored in the file 98 | """ 99 | data = joblib.load(path) 100 | logger.info(f"binary file loaded from: {path}") 101 | return data 102 | 103 | @ensure_annotations 104 | def get_size(path: Path) -> str: 105 | """get size in KB 106 | 107 | Args: 108 | path (Path): path of the file 109 | 110 | Returns: 111 | str: size in KB 112 | """ 113 | size_in_kb = round(os.path.getsize(path)/1024) 114 | return f"~ {size_in_kb} KB" -------------------------------------------------------------------------------- /template.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | import logging 4 | 5 | logging.basicConfig(level=logging.INFO, format='[%(asctime)s]: %(message)s: ') 6 | 7 | package_name = "deepClassifier" 8 | 9 | list_of_files = [ 10 | ".github/workflows/.gitkeep", 11 | f"src/{package_name}/__init__.py", 12 | f"src/{package_name}/components/__init__.py", 13 | f"src/{package_name}/utils/__init__.py", 14 | f"src/{package_name}/config/__init__.py", 15 | f"src/{package_name}/pipeline/__init__.py", 16 | f"src/{package_name}/entity/__init__.py", 17 | f"src/{package_name}/constants/__init__.py", 18 | "tests/__init__.py", 19 | "tests/unit/__init__.py", 20 | "tests/integration/__init__.py", 21 | "configs/config.yaml", 22 | "dvc.yaml", 23 | "params.yaml", 24 | "init_setup.sh", 25 | "requirements.txt", 26 | "requirements_dev.txt", 27 | "setup.py", 28 | "setup.cfg", 29 | "pyproject.toml", 30 | "tox.ini", 31 | "research/trials.ipynb", 32 | ] 33 | 34 | for filepath in list_of_files: 35 | filepath = Path(filepath) 36 | filedir, filename = os.path.split(filepath) 37 | if filedir != "": 38 | os.makedirs(filedir, exist_ok=True) 39 | logging.info(f"Creating directory: {filedir} for file: {filename}") 40 | 41 | if (not os.path.exists(filepath)) or (os.path.getsize(filepath) == 0): 42 | with open(filepath, "w") as f: 43 | pass # create an empty file 44 | logging.info(f"Creating empty file: {filepath}") 45 | 46 | else: 47 | logging.info(f"{filename} already exists") -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c17hawke/FSDS_NOV_deepCNNClassifier/e54341d242a39256c54a90acdead18bc53031677/tests/__init__.py -------------------------------------------------------------------------------- /tests/data/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c17hawke/FSDS_NOV_deepCNNClassifier/e54341d242a39256c54a90acdead18bc53031677/tests/data/.gitkeep -------------------------------------------------------------------------------- /tests/data/demo.yaml: -------------------------------------------------------------------------------- 1 | key: value -------------------------------------------------------------------------------- /tests/data/empty.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c17hawke/FSDS_NOV_deepCNNClassifier/e54341d242a39256c54a90acdead18bc53031677/tests/data/empty.yaml -------------------------------------------------------------------------------- /tests/data/sample_data.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c17hawke/FSDS_NOV_deepCNNClassifier/e54341d242a39256c54a90acdead18bc53031677/tests/data/sample_data.zip -------------------------------------------------------------------------------- /tests/integration/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c17hawke/FSDS_NOV_deepCNNClassifier/e54341d242a39256c54a90acdead18bc53031677/tests/integration/__init__.py -------------------------------------------------------------------------------- /tests/integration/test_data_ingestion.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from deepClassifier.entity import DataIngestionConfig 3 | from deepClassifier.components import DataIngestion 4 | from pathlib import Path 5 | import os 6 | 7 | class Test_DataIngestion: 8 | data_ingestion_config = DataIngestionConfig( 9 | root_dir="tests/data/", 10 | source_URL="https://raw.githubusercontent.com/c17hawke/raw_data/main/sample_data.zip", 11 | local_data_file="tests/data/data_integration.zip", 12 | unzip_dir="tests/data/") 13 | 14 | def test_download(self): 15 | data_ingestion = DataIngestion(config=self.data_ingestion_config) 16 | data_ingestion.download_file() 17 | assert os.path.exists(self.data_ingestion_config.local_data_file) 18 | 19 | def test_unzip(self): 20 | data_ingestion = DataIngestion(config=self.data_ingestion_config) 21 | data_ingestion.unzip_and_clean() 22 | assert os.path.isdir(Path("tests/data/PetImages")) 23 | assert os.path.isdir(Path("tests/data/PetImages/Cat")) 24 | assert os.path.isdir(Path("tests/data/PetImages/Dog")) 25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /tests/integration/test_int.py: -------------------------------------------------------------------------------- 1 | def test_dummy(): 2 | assert True -------------------------------------------------------------------------------- /tests/unit/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c17hawke/FSDS_NOV_deepCNNClassifier/e54341d242a39256c54a90acdead18bc53031677/tests/unit/__init__.py -------------------------------------------------------------------------------- /tests/unit/test_data_ingestion.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from deepClassifier.entity import DataIngestionConfig 3 | from deepClassifier.components import DataIngestion 4 | from pathlib import Path 5 | import os 6 | 7 | class Test_DataIngestion_download: 8 | data_ingestion_config = DataIngestionConfig( 9 | root_dir="tests/data/", 10 | source_URL="https://raw.githubusercontent.com/c17hawke/raw_data/main/sample_data.zip", 11 | local_data_file="tests/data/data.zip", 12 | unzip_dir="tests/data/") 13 | 14 | def test_download(self): 15 | data_ingestion = DataIngestion(config=self.data_ingestion_config) 16 | data_ingestion.download_file() 17 | assert os.path.exists(self.data_ingestion_config.local_data_file) 18 | 19 | 20 | class Test_DataIngestion_unzip: 21 | data_ingestion_config = DataIngestionConfig( 22 | root_dir="tests/data/", 23 | source_URL="", 24 | local_data_file="tests/data/sample_data.zip", 25 | unzip_dir="tests/data/") 26 | 27 | def test_unzip(self): 28 | data_ingestion = DataIngestion(config=self.data_ingestion_config) 29 | data_ingestion.unzip_and_clean() 30 | assert os.path.isdir(Path("tests/data/PetImages")) 31 | assert os.path.isdir(Path("tests/data/PetImages/Cat")) 32 | assert os.path.isdir(Path("tests/data/PetImages/Dog")) 33 | 34 | 35 | -------------------------------------------------------------------------------- /tests/unit/test_utils.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from deepClassifier.utils import read_yaml 3 | from pathlib import Path 4 | from box import ConfigBox 5 | from ensure.main import EnsureError 6 | 7 | 8 | class Test_read_yaml: 9 | yaml_files = [ 10 | "tests/data/empty.yaml", 11 | "tests/data/demo.yaml" 12 | ] 13 | 14 | def test_read_yaml_empty(self): 15 | with pytest.raises(ValueError): 16 | read_yaml(Path(self.yaml_files[0])) 17 | 18 | def test_read_yaml_return_type(self): 19 | respone = read_yaml(Path(self.yaml_files[-1])) 20 | assert isinstance(respone, ConfigBox) 21 | 22 | @pytest.mark.parametrize("path_to_yaml", yaml_files) 23 | def test_read_yaml_bad_type(self, path_to_yaml): 24 | with pytest.raises(EnsureError): 25 | read_yaml(path_to_yaml) 26 | 27 | 28 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = python3.8 3 | 4 | [gh-actions] 5 | python = 6 | 3.8: python3.8 7 | 8 | [testenv] 9 | deps = -rrequirements_dev.txt 10 | commands = 11 | # stop the build if there are Python syntax errors or undefined names 12 | flake8 src --count --select=E9,F63,F7,F82 --show-source --statistics 13 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 14 | flake8 src --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 15 | # type linting 16 | mypy src/ 17 | # pytest unit 18 | pytest -v tests/unit 19 | # pytest integration 20 | pytest -v tests/integration --------------------------------------------------------------------------------