├── .dvc ├── .gitignore └── config ├── .dvcignore ├── .github └── workflows │ └── main.yaml ├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── app.py ├── config └── config.yaml ├── dvc.lock ├── dvc.yaml ├── inputImage.jpg ├── main.py ├── model └── model.h5 ├── params.yaml ├── requirements.txt ├── research ├── 01_data_ingestion.ipynb ├── 02_prepare_base_model.ipynb ├── 03_model_training.ipynb ├── 04_model_evaluation_with_mlflow.ipynb └── trials.ipynb ├── scores.json ├── setup.py ├── src └── cnnClassifier │ ├── __init__.py │ ├── components │ ├── __init__.py │ ├── data_ingestion.py │ ├── model_evaluation_mlflow.py │ ├── model_training.py │ └── prepare_base_model.py │ ├── config │ ├── __init__.py │ └── configuration.py │ ├── constants │ └── __init__.py │ ├── entity │ ├── __init__.py │ └── config_entity.py │ ├── pipeline │ ├── __init__.py │ ├── prediction.py │ ├── stage_01_data_ingestion.py │ ├── stage_02_prepare_base_model.py │ ├── stage_03_model_training.py │ └── stage_04_model_evaluation.py │ └── utils │ ├── __init__.py │ └── common.py ├── template.py └── templates └── index.html /.dvc/.gitignore: -------------------------------------------------------------------------------- 1 | /config.local 2 | /tmp 3 | /cache 4 | -------------------------------------------------------------------------------- /.dvc/config: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Kidney-Disease-Classification-Deep-Learning-Project/0e50432a49baa216e12867f9feed1b63505dda86/.dvc/config -------------------------------------------------------------------------------- /.dvcignore: -------------------------------------------------------------------------------- 1 | # Add patterns of files dvc should ignore, which could improve 2 | # the performance. Learn more at 3 | # https://dvc.org/doc/user-guide/dvcignore 4 | -------------------------------------------------------------------------------- /.github/workflows/main.yaml: -------------------------------------------------------------------------------- 1 | name: workflow 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | paths-ignore: 8 | - 'README.md' 9 | 10 | permissions: 11 | id-token: write 12 | contents: read 13 | 14 | jobs: 15 | integration: 16 | name: Continuous Integration 17 | runs-on: ubuntu-latest 18 | steps: 19 | - name: Checkout Code 20 | uses: actions/checkout@v3 21 | 22 | - name: Lint code 23 | run: echo "Linting repository" 24 | 25 | - name: Run unit tests 26 | run: echo "Running unit tests" 27 | 28 | build-and-push-ecr-image: 29 | name: Continuous Delivery 30 | needs: integration 31 | runs-on: ubuntu-latest 32 | steps: 33 | - name: Checkout Code 34 | uses: actions/checkout@v3 35 | 36 | - name: Install Utilities 37 | run: | 38 | sudo apt-get update 39 | sudo apt-get install -y jq unzip 40 | - name: Configure AWS credentials 41 | uses: aws-actions/configure-aws-credentials@v1 42 | with: 43 | aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} 44 | aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} 45 | aws-region: ${{ secrets.AWS_REGION }} 46 | 47 | - name: Login to Amazon ECR 48 | id: login-ecr 49 | uses: aws-actions/amazon-ecr-login@v1 50 | 51 | - name: Build, tag, and push image to Amazon ECR 52 | id: build-image 53 | env: 54 | ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} 55 | ECR_REPOSITORY: ${{ secrets.ECR_REPOSITORY_NAME }} 56 | IMAGE_TAG: latest 57 | run: | 58 | # Build a docker container and 59 | # push it to ECR so that it can 60 | # be deployed to ECS. 61 | docker build -t $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG . 62 | docker push $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG 63 | echo "::set-output name=image::$ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG" 64 | 65 | 66 | Continuous-Deployment: 67 | needs: build-and-push-ecr-image 68 | runs-on: self-hosted 69 | steps: 70 | - name: Checkout 71 | uses: actions/checkout@v3 72 | 73 | - name: Configure AWS credentials 74 | uses: aws-actions/configure-aws-credentials@v1 75 | with: 76 | aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} 77 | aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} 78 | aws-region: ${{ secrets.AWS_REGION }} 79 | 80 | - name: Login to Amazon ECR 81 | id: login-ecr 82 | uses: aws-actions/amazon-ecr-login@v1 83 | 84 | 85 | - name: Pull latest images 86 | run: | 87 | docker pull ${{secrets.AWS_ECR_LOGIN_URI}}/${{ secrets.ECR_REPOSITORY_NAME }}:latest 88 | 89 | # - name: Stop and remove container if running 90 | # run: | 91 | # docker ps -q --filter "name=cnncls" | grep -q . && docker stop cnncls && docker rm -fv cnncls 92 | 93 | - name: Run Docker Image to serve users 94 | run: | 95 | docker run -d -p 8080:8080 --name=cnncls -e 'AWS_ACCESS_KEY_ID=${{ secrets.AWS_ACCESS_KEY_ID }}' -e 'AWS_SECRET_ACCESS_KEY=${{ secrets.AWS_SECRET_ACCESS_KEY }}' -e 'AWS_REGION=${{ secrets.AWS_REGION }}' ${{secrets.AWS_ECR_LOGIN_URI}}/${{ secrets.ECR_REPOSITORY_NAME }}:latest 96 | - name: Clean previous images and containers 97 | run: | 98 | docker system prune -f -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | 162 | artifacts/* -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8-slim-buster 2 | 3 | RUN apt update -y && apt install awscli -y 4 | WORKDIR /app 5 | 6 | COPY . /app 7 | RUN pip install -r requirements.txt 8 | 9 | CMD ["python3", "app.py"] -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Krish Naik 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Kidney-Disease-Classification-MLflow-DVC 2 | 3 | 4 | ## Workflows 5 | 6 | 1. Update config.yaml 7 | 2. Update secrets.yaml [Optional] 8 | 3. Update params.yaml 9 | 4. Update the entity 10 | 5. Update the configuration manager in src config 11 | 6. Update the components 12 | 7. Update the pipeline 13 | 8. Update the main.py 14 | 9. Update the dvc.yaml 15 | 10. app.py 16 | 17 | # How to run? 18 | ### STEPS: 19 | 20 | Clone the repository 21 | 22 | ```bash 23 | https://github.com/krishnaik06/Kidney-Disease-Classification-Deep-Learning-Project 24 | ``` 25 | ### STEP 01- Create a conda environment after opening the repository 26 | 27 | ```bash 28 | conda create -n cnncls python=3.8 -y 29 | ``` 30 | 31 | ```bash 32 | conda activate cnncls 33 | ``` 34 | 35 | 36 | ### STEP 02- install the requirements 37 | ```bash 38 | pip install -r requirements.txt 39 | ``` 40 | 41 | ```bash 42 | # Finally run the following command 43 | python app.py 44 | ``` 45 | 46 | Now, 47 | ```bash 48 | open up you local host and port 49 | ``` 50 | 51 | 52 | 53 | 54 | 55 | 56 | ## MLflow 57 | 58 | - [Documentation](https://mlflow.org/docs/latest/index.html) 59 | 60 | - [MLflow tutorial](https://youtu.be/qdcHHrsXA48?si=bD5vDS60akNphkem) 61 | 62 | ##### cmd 63 | - mlflow ui 64 | 65 | ### dagshub 66 | [dagshub](https://dagshub.com/) 67 | 68 | MLFLOW_TRACKING_URI=https://dagshub.com/entbappy/Kidney-Disease-Classification-MLflow-DVC.mlflow \ 69 | MLFLOW_TRACKING_USERNAME=entbappy \ 70 | MLFLOW_TRACKING_PASSWORD=6824692c47a369aa6f9eac5b10041d5c8edbcef0 \ 71 | python script.py 72 | 73 | Run this to export as env variables: 74 | 75 | ```bash 76 | 77 | export MLFLOW_TRACKING_URI=https://dagshub.com/entbappy/Kidney-Disease-Classification-MLflow-DVC.mlflow 78 | 79 | export MLFLOW_TRACKING_USERNAME=entbappy 80 | 81 | export MLFLOW_TRACKING_PASSWORD=6824692c47a369aa6f9eac5b10041d5c8edbcef0 82 | 83 | ``` 84 | 85 | 86 | ### DVC cmd 87 | 88 | 1. dvc init 89 | 2. dvc repro 90 | 3. dvc dag 91 | 92 | 93 | ## About MLflow & DVC 94 | 95 | MLflow 96 | 97 | - Its Production Grade 98 | - Trace all of your expriements 99 | - Logging & taging your model 100 | 101 | 102 | DVC 103 | 104 | - Its very lite weight for POC only 105 | - lite weight expriements tracker 106 | - It can perform Orchestration (Creating Pipelines) 107 | 108 | 109 | 110 | # AWS-CICD-Deployment-with-Github-Actions 111 | 112 | ## 1. Login to AWS console. 113 | 114 | ## 2. Create IAM user for deployment 115 | 116 | #with specific access 117 | 118 | 1. EC2 access : It is virtual machine 119 | 120 | 2. ECR: Elastic Container registry to save your docker image in aws 121 | 122 | 123 | #Description: About the deployment 124 | 125 | 1. Build docker image of the source code 126 | 127 | 2. Push your docker image to ECR 128 | 129 | 3. Launch Your EC2 130 | 131 | 4. Pull Your image from ECR in EC2 132 | 133 | 5. Lauch your docker image in EC2 134 | 135 | #Policy: 136 | 137 | 1. AmazonEC2ContainerRegistryFullAccess 138 | 139 | 2. AmazonEC2FullAccess 140 | 141 | 142 | ## 3. Create ECR repo to store/save docker image 143 | - Save the URI: 566373416292.dkr.ecr.us-east-1.amazonaws.com/chicken 144 | 145 | 146 | ## 4. Create EC2 machine (Ubuntu) 147 | 148 | ## 5. Open EC2 and Install docker in EC2 Machine: 149 | 150 | 151 | #optinal 152 | 153 | sudo apt-get update -y 154 | 155 | sudo apt-get upgrade 156 | 157 | #required 158 | 159 | curl -fsSL https://get.docker.com -o get-docker.sh 160 | 161 | sudo sh get-docker.sh 162 | 163 | sudo usermod -aG docker ubuntu 164 | 165 | newgrp docker 166 | 167 | # 6. Configure EC2 as self-hosted runner: 168 | setting>actions>runner>new self hosted runner> choose os> then run command one by one 169 | 170 | 171 | # 7. Setup github secrets: 172 | 173 | AWS_ACCESS_KEY_ID= 174 | 175 | AWS_SECRET_ACCESS_KEY= 176 | 177 | AWS_REGION = us-east-1 178 | 179 | AWS_ECR_LOGIN_URI = demo>> 566373416292.dkr.ecr.ap-south-1.amazonaws.com 180 | 181 | ECR_REPOSITORY_NAME = simple-app 182 | 183 | 184 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, request, jsonify, render_template 2 | import os 3 | from flask_cors import CORS, cross_origin 4 | from cnnClassifier.utils.common import decodeImage 5 | from cnnClassifier.pipeline.prediction import PredictionPipeline 6 | 7 | 8 | 9 | os.putenv('LANG', 'en_US.UTF-8') 10 | os.putenv('LC_ALL', 'en_US.UTF-8') 11 | 12 | app = Flask(__name__) 13 | CORS(app) 14 | 15 | 16 | class ClientApp: 17 | def __init__(self): 18 | self.filename = "inputImage.jpg" 19 | self.classifier = PredictionPipeline(self.filename) 20 | 21 | 22 | @app.route("/", methods=['GET']) 23 | @cross_origin() 24 | def home(): 25 | return render_template('index.html') 26 | 27 | 28 | 29 | 30 | @app.route("/train", methods=['GET','POST']) 31 | @cross_origin() 32 | def trainRoute(): 33 | os.system("python main.py") 34 | # os.system("dvc repro") 35 | return "Training done successfully!" 36 | 37 | 38 | 39 | @app.route("/predict", methods=['POST']) 40 | @cross_origin() 41 | def predictRoute(): 42 | image = request.json['image'] 43 | decodeImage(image, clApp.filename) 44 | result = clApp.classifier.predict() 45 | return jsonify(result) 46 | 47 | 48 | if __name__ == "__main__": 49 | clApp = ClientApp() 50 | 51 | app.run(host='0.0.0.0', port=8080) #for AWS 52 | 53 | 54 | -------------------------------------------------------------------------------- /config/config.yaml: -------------------------------------------------------------------------------- 1 | artifacts_root: artifacts 2 | 3 | 4 | data_ingestion: 5 | root_dir: artifacts/data_ingestion 6 | source_URL: https://drive.google.com/file/d/1vlhZ5c7abUKF8xXERIw6m9Te8fW7ohw3/view?usp=sharing 7 | local_data_file: artifacts/data_ingestion/data.zip 8 | unzip_dir: artifacts/data_ingestion 9 | 10 | 11 | prepare_base_model: 12 | root_dir: artifacts/prepare_base_model 13 | base_model_path: artifacts/prepare_base_model/base_model.h5 14 | updated_base_model_path: artifacts/prepare_base_model/base_model_updated.h5 15 | 16 | 17 | 18 | 19 | training: 20 | root_dir: artifacts/training 21 | trained_model_path: artifacts/training/model.h5 22 | -------------------------------------------------------------------------------- /dvc.lock: -------------------------------------------------------------------------------- 1 | schema: '2.0' 2 | stages: 3 | data_ingestion: 4 | cmd: python src/cnnClassifier/pipeline/stage_01_data_ingestion.py 5 | deps: 6 | - path: config/config.yaml 7 | hash: md5 8 | md5: b4b65d6956da078d7e0a360eb4018eac 9 | size: 588 10 | - path: src/cnnClassifier/pipeline/stage_01_data_ingestion.py 11 | hash: md5 12 | md5: fd612007c0aaccdf7b95571f53f0454f 13 | size: 910 14 | outs: 15 | - path: artifacts/data_ingestion/kidney-ct-scan-image 16 | hash: md5 17 | md5: 33ed59dbe5dec8ce2bb8e489b55203e4.dir 18 | size: 58936381 19 | nfiles: 465 20 | prepare_base_model: 21 | cmd: python src/cnnClassifier/pipeline/stage_02_prepare_base_model.py 22 | deps: 23 | - path: config/config.yaml 24 | hash: md5 25 | md5: b4b65d6956da078d7e0a360eb4018eac 26 | size: 588 27 | - path: src/cnnClassifier/pipeline/stage_02_prepare_base_model.py 28 | hash: md5 29 | md5: e8e39a301f4b90d1b4f2c86acc66ef32 30 | size: 999 31 | params: 32 | params.yaml: 33 | CLASSES: 2 34 | IMAGE_SIZE: 35 | - 224 36 | - 224 37 | - 3 38 | INCLUDE_TOP: false 39 | LEARNING_RATE: 0.01 40 | WEIGHTS: imagenet 41 | outs: 42 | - path: artifacts/prepare_base_model 43 | hash: md5 44 | md5: db5e77fa73dd4d15cf5a783646457181.dir 45 | size: 118054560 46 | nfiles: 2 47 | training: 48 | cmd: python src/cnnClassifier/pipeline/stage_03_model_training.py 49 | deps: 50 | - path: artifacts/data_ingestion/kidney-ct-scan-image 51 | hash: md5 52 | md5: 33ed59dbe5dec8ce2bb8e489b55203e4.dir 53 | size: 58936381 54 | nfiles: 465 55 | - path: artifacts/prepare_base_model 56 | hash: md5 57 | md5: db5e77fa73dd4d15cf5a783646457181.dir 58 | size: 118054560 59 | nfiles: 2 60 | - path: config/config.yaml 61 | hash: md5 62 | md5: b4b65d6956da078d7e0a360eb4018eac 63 | size: 588 64 | - path: src/cnnClassifier/pipeline/stage_03_model_training.py 65 | hash: md5 66 | md5: 316063ef5fe008654c19c00f5863d9d3 67 | size: 929 68 | params: 69 | params.yaml: 70 | AUGMENTATION: true 71 | BATCH_SIZE: 16 72 | EPOCHS: 1 73 | IMAGE_SIZE: 74 | - 224 75 | - 224 76 | - 3 77 | outs: 78 | - path: artifacts/training/model.h5 79 | hash: md5 80 | md5: a1edc90fa58890855ca59b8c0f04816f 81 | size: 59337520 82 | evaluation: 83 | cmd: python src/cnnClassifier/pipeline/stage_04_model_evaluation.py 84 | deps: 85 | - path: artifacts/data_ingestion/kidney-ct-scan-image 86 | hash: md5 87 | md5: 33ed59dbe5dec8ce2bb8e489b55203e4.dir 88 | size: 58936381 89 | nfiles: 465 90 | - path: artifacts/training/model.h5 91 | hash: md5 92 | md5: a1edc90fa58890855ca59b8c0f04816f 93 | size: 59337520 94 | - path: config/config.yaml 95 | hash: md5 96 | md5: b4b65d6956da078d7e0a360eb4018eac 97 | size: 588 98 | - path: src/cnnClassifier/pipeline/stage_04_model_evaluation.py 99 | hash: md5 100 | md5: e5acfe1c4e9e0a3f116bebf9898cb1ee 101 | size: 938 102 | params: 103 | params.yaml: 104 | BATCH_SIZE: 16 105 | IMAGE_SIZE: 106 | - 224 107 | - 224 108 | - 3 109 | outs: 110 | - path: scores.json 111 | hash: md5 112 | md5: 5df350113349a34928c05c2ce4bcb685 113 | size: 73 114 | -------------------------------------------------------------------------------- /dvc.yaml: -------------------------------------------------------------------------------- 1 | stages: 2 | data_ingestion: 3 | cmd: python src/cnnClassifier/pipeline/stage_01_data_ingestion.py 4 | deps: 5 | - src/cnnClassifier/pipeline/stage_01_data_ingestion.py 6 | - config/config.yaml 7 | outs: 8 | - artifacts/data_ingestion/kidney-ct-scan-image 9 | 10 | 11 | prepare_base_model: 12 | cmd: python src/cnnClassifier/pipeline/stage_02_prepare_base_model.py 13 | deps: 14 | - src/cnnClassifier/pipeline/stage_02_prepare_base_model.py 15 | - config/config.yaml 16 | params: 17 | - IMAGE_SIZE 18 | - INCLUDE_TOP 19 | - CLASSES 20 | - WEIGHTS 21 | - LEARNING_RATE 22 | outs: 23 | - artifacts/prepare_base_model 24 | 25 | 26 | training: 27 | cmd: python src/cnnClassifier/pipeline/stage_03_model_training.py 28 | deps: 29 | - src/cnnClassifier/pipeline/stage_03_model_training.py 30 | - config/config.yaml 31 | - artifacts/data_ingestion/kidney-ct-scan-image 32 | - artifacts/prepare_base_model 33 | params: 34 | - IMAGE_SIZE 35 | - EPOCHS 36 | - BATCH_SIZE 37 | - AUGMENTATION 38 | outs: 39 | - artifacts/training/model.h5 40 | 41 | 42 | evaluation: 43 | cmd: python src/cnnClassifier/pipeline/stage_04_model_evaluation.py 44 | deps: 45 | - src/cnnClassifier/pipeline/stage_04_model_evaluation.py 46 | - config/config.yaml 47 | - artifacts/data_ingestion/kidney-ct-scan-image 48 | - artifacts/training/model.h5 49 | params: 50 | - IMAGE_SIZE 51 | - BATCH_SIZE 52 | metrics: 53 | - scores.json: 54 | cache: false 55 | -------------------------------------------------------------------------------- /inputImage.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Kidney-Disease-Classification-Deep-Learning-Project/0e50432a49baa216e12867f9feed1b63505dda86/inputImage.jpg -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from cnnClassifier import logger 2 | from cnnClassifier.pipeline.stage_01_data_ingestion import DataIngestionTrainingPipeline 3 | from cnnClassifier.pipeline.stage_02_prepare_base_model import PrepareBaseModelTrainingPipeline 4 | from cnnClassifier.pipeline.stage_03_model_training import ModelTrainingPipeline 5 | from cnnClassifier.pipeline.stage_04_model_evaluation import EvaluationPipeline 6 | 7 | 8 | 9 | 10 | STAGE_NAME = "Data Ingestion stage" 11 | try: 12 | logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<") 13 | data_ingestion = DataIngestionTrainingPipeline() 14 | data_ingestion.main() 15 | logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x") 16 | except Exception as e: 17 | logger.exception(e) 18 | raise e 19 | 20 | 21 | 22 | 23 | 24 | STAGE_NAME = "Prepare base model" 25 | try: 26 | logger.info(f"*******************") 27 | logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<") 28 | prepare_base_model = PrepareBaseModelTrainingPipeline() 29 | prepare_base_model.main() 30 | logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x") 31 | except Exception as e: 32 | logger.exception(e) 33 | raise e 34 | 35 | 36 | 37 | STAGE_NAME = "Training" 38 | try: 39 | logger.info(f"*******************") 40 | logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<") 41 | model_trainer = ModelTrainingPipeline() 42 | model_trainer.main() 43 | logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x") 44 | except Exception as e: 45 | logger.exception(e) 46 | raise e 47 | 48 | 49 | 50 | 51 | STAGE_NAME = "Evaluation stage" 52 | try: 53 | logger.info(f"*******************") 54 | logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<") 55 | model_evalution = EvaluationPipeline() 56 | model_evalution.main() 57 | logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x") 58 | 59 | except Exception as e: 60 | logger.exception(e) 61 | raise e 62 | 63 | 64 | -------------------------------------------------------------------------------- /model/model.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Kidney-Disease-Classification-Deep-Learning-Project/0e50432a49baa216e12867f9feed1b63505dda86/model/model.h5 -------------------------------------------------------------------------------- /params.yaml: -------------------------------------------------------------------------------- 1 | AUGMENTATION: True 2 | IMAGE_SIZE: [224, 224, 3] # as per VGG 16 model 3 | BATCH_SIZE: 16 4 | INCLUDE_TOP: False 5 | EPOCHS: 1 6 | CLASSES: 2 7 | WEIGHTS: imagenet 8 | LEARNING_RATE: 0.01 9 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | tensorflow==2.12.0 2 | pandas 3 | dvc 4 | mlflow==2.2.2 5 | notebook 6 | numpy 7 | matplotlib 8 | seaborn 9 | python-box==6.0.2 10 | pyYAML 11 | tqdm 12 | ensure==1.0.2 13 | joblib 14 | types-PyYAML 15 | scipy 16 | Flask 17 | Flask-Cors 18 | gdown 19 | -e . -------------------------------------------------------------------------------- /research/01_data_ingestion.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 2, 15 | "metadata": {}, 16 | "outputs": [ 17 | { 18 | "data": { 19 | "text/plain": [ 20 | "'d:\\\\Bappy\\\\YouTube\\\\Kidney-Disease-Classification-Deep-Learning-Project\\\\research'" 21 | ] 22 | }, 23 | "execution_count": 2, 24 | "metadata": {}, 25 | "output_type": "execute_result" 26 | } 27 | ], 28 | "source": [ 29 | "%pwd" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 3, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "os.chdir(\"../\")" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 4, 44 | "metadata": {}, 45 | "outputs": [ 46 | { 47 | "data": { 48 | "text/plain": [ 49 | "'d:\\\\Bappy\\\\YouTube\\\\Kidney-Disease-Classification-Deep-Learning-Project'" 50 | ] 51 | }, 52 | "execution_count": 4, 53 | "metadata": {}, 54 | "output_type": "execute_result" 55 | } 56 | ], 57 | "source": [ 58 | "%pwd" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 5, 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "from dataclasses import dataclass\n", 68 | "from pathlib import Path\n", 69 | "\n", 70 | "\n", 71 | "@dataclass(frozen=True)\n", 72 | "class DataIngestionConfig:\n", 73 | " root_dir: Path\n", 74 | " source_URL: str\n", 75 | " local_data_file: Path\n", 76 | " unzip_dir: Path" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 6, 82 | "metadata": {}, 83 | "outputs": [], 84 | "source": [ 85 | "from cnnClassifier.constants import *\n", 86 | "from cnnClassifier.utils.common import read_yaml, create_directories" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 7, 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [ 95 | "class ConfigurationManager:\n", 96 | " def __init__(\n", 97 | " self,\n", 98 | " config_filepath = CONFIG_FILE_PATH,\n", 99 | " params_filepath = PARAMS_FILE_PATH):\n", 100 | "\n", 101 | " self.config = read_yaml(config_filepath)\n", 102 | " self.params = read_yaml(params_filepath)\n", 103 | "\n", 104 | " create_directories([self.config.artifacts_root])\n", 105 | "\n", 106 | "\n", 107 | " \n", 108 | " def get_data_ingestion_config(self) -> DataIngestionConfig:\n", 109 | " config = self.config.data_ingestion\n", 110 | "\n", 111 | " create_directories([config.root_dir])\n", 112 | "\n", 113 | " data_ingestion_config = DataIngestionConfig(\n", 114 | " root_dir=config.root_dir,\n", 115 | " source_URL=config.source_URL,\n", 116 | " local_data_file=config.local_data_file,\n", 117 | " unzip_dir=config.unzip_dir \n", 118 | " )\n", 119 | "\n", 120 | " return data_ingestion_config" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": 8, 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [ 129 | "import os\n", 130 | "import zipfile\n", 131 | "import gdown\n", 132 | "from cnnClassifier import logger\n", 133 | "from cnnClassifier.utils.common import get_size" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": 9, 139 | "metadata": {}, 140 | "outputs": [], 141 | "source": [ 142 | "class DataIngestion:\n", 143 | " def __init__(self, config: DataIngestionConfig):\n", 144 | " self.config = config\n", 145 | "\n", 146 | " \n", 147 | " def download_file(self)-> str:\n", 148 | " '''\n", 149 | " Fetch data from the url\n", 150 | " '''\n", 151 | "\n", 152 | " try: \n", 153 | " dataset_url = self.config.source_URL\n", 154 | " zip_download_dir = self.config.local_data_file\n", 155 | " os.makedirs(\"artifacts/data_ingestion\", exist_ok=True)\n", 156 | " logger.info(f\"Downloading data from {dataset_url} into file {zip_download_dir}\")\n", 157 | "\n", 158 | " file_id = dataset_url.split(\"/\")[-2]\n", 159 | " prefix = 'https://drive.google.com/uc?/export=download&id='\n", 160 | " gdown.download(prefix+file_id,zip_download_dir)\n", 161 | "\n", 162 | " logger.info(f\"Downloaded data from {dataset_url} into file {zip_download_dir}\")\n", 163 | "\n", 164 | " except Exception as e:\n", 165 | " raise e\n", 166 | " \n", 167 | " \n", 168 | "\n", 169 | " def extract_zip_file(self):\n", 170 | " \"\"\"\n", 171 | " zip_file_path: str\n", 172 | " Extracts the zip file into the data directory\n", 173 | " Function returns None\n", 174 | " \"\"\"\n", 175 | " unzip_path = self.config.unzip_dir\n", 176 | " os.makedirs(unzip_path, exist_ok=True)\n", 177 | " with zipfile.ZipFile(self.config.local_data_file, 'r') as zip_ref:\n", 178 | " zip_ref.extractall(unzip_path)\n", 179 | "\n" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": 10, 185 | "metadata": {}, 186 | "outputs": [ 187 | { 188 | "name": "stdout", 189 | "output_type": "stream", 190 | "text": [ 191 | "[2023-09-29 19:30:37,399: INFO: common: yaml file: config\\config.yaml loaded successfully]\n", 192 | "[2023-09-29 19:30:37,407: INFO: common: yaml file: params.yaml loaded successfully]\n", 193 | "[2023-09-29 19:30:37,408: INFO: common: created directory at: artifacts]\n", 194 | "[2023-09-29 19:30:37,410: INFO: common: created directory at: artifacts/data_ingestion]\n", 195 | "[2023-09-29 19:30:37,411: INFO: 3172177572: Downloading data from https://drive.google.com/file/d/1vlhZ5c7abUKF8xXERIw6m9Te8fW7ohw3/view?usp=sharing into file artifacts/data_ingestion/data.zip]\n" 196 | ] 197 | }, 198 | { 199 | "name": "stderr", 200 | "output_type": "stream", 201 | "text": [ 202 | "Downloading...\n", 203 | "From (uriginal): https://drive.google.com/uc?/export=download&id=1vlhZ5c7abUKF8xXERIw6m9Te8fW7ohw3\n", 204 | "From (redirected): https://drive.google.com/uc?/export=download&id=1vlhZ5c7abUKF8xXERIw6m9Te8fW7ohw3&confirm=t&uuid=c57857b4-dc46-4b68-8e4f-7fc6264eacfe\n", 205 | "To: d:\\Bappy\\YouTube\\Kidney-Disease-Classification-Deep-Learning-Project\\artifacts\\data_ingestion\\data.zip\n", 206 | "100%|██████████| 57.7M/57.7M [01:38<00:00, 587kB/s]" 207 | ] 208 | }, 209 | { 210 | "name": "stdout", 211 | "output_type": "stream", 212 | "text": [ 213 | "[2023-09-29 19:32:18,290: INFO: 3172177572: Downloaded data from https://drive.google.com/file/d/1vlhZ5c7abUKF8xXERIw6m9Te8fW7ohw3/view?usp=sharing into file artifacts/data_ingestion/data.zip]\n" 214 | ] 215 | }, 216 | { 217 | "name": "stderr", 218 | "output_type": "stream", 219 | "text": [ 220 | "\n" 221 | ] 222 | } 223 | ], 224 | "source": [ 225 | "try:\n", 226 | " config = ConfigurationManager()\n", 227 | " data_ingestion_config = config.get_data_ingestion_config()\n", 228 | " data_ingestion = DataIngestion(config=data_ingestion_config)\n", 229 | " data_ingestion.download_file()\n", 230 | " data_ingestion.extract_zip_file()\n", 231 | "except Exception as e:\n", 232 | " raise e" 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": null, 238 | "metadata": {}, 239 | "outputs": [], 240 | "source": [] 241 | } 242 | ], 243 | "metadata": { 244 | "kernelspec": { 245 | "display_name": "kidney", 246 | "language": "python", 247 | "name": "python3" 248 | }, 249 | "language_info": { 250 | "codemirror_mode": { 251 | "name": "ipython", 252 | "version": 3 253 | }, 254 | "file_extension": ".py", 255 | "mimetype": "text/x-python", 256 | "name": "python", 257 | "nbconvert_exporter": "python", 258 | "pygments_lexer": "ipython3", 259 | "version": "3.8.18" 260 | }, 261 | "orig_nbformat": 4 262 | }, 263 | "nbformat": 4, 264 | "nbformat_minor": 2 265 | } 266 | -------------------------------------------------------------------------------- /research/02_prepare_base_model.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 2, 15 | "metadata": {}, 16 | "outputs": [ 17 | { 18 | "data": { 19 | "text/plain": [ 20 | "'d:\\\\Bappy\\\\YouTube\\\\Kidney-Disease-Classification-Deep-Learning-Project\\\\research'" 21 | ] 22 | }, 23 | "execution_count": 2, 24 | "metadata": {}, 25 | "output_type": "execute_result" 26 | } 27 | ], 28 | "source": [ 29 | "%pwd" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 3, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "os.chdir(\"../\")" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 4, 44 | "metadata": {}, 45 | "outputs": [ 46 | { 47 | "data": { 48 | "text/plain": [ 49 | "'d:\\\\Bappy\\\\YouTube\\\\Kidney-Disease-Classification-Deep-Learning-Project'" 50 | ] 51 | }, 52 | "execution_count": 4, 53 | "metadata": {}, 54 | "output_type": "execute_result" 55 | } 56 | ], 57 | "source": [ 58 | "%pwd" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 5, 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "from dataclasses import dataclass\n", 68 | "from pathlib import Path\n", 69 | "\n", 70 | "\n", 71 | "@dataclass(frozen=True)\n", 72 | "class PrepareBaseModelConfig:\n", 73 | " root_dir: Path\n", 74 | " base_model_path: Path\n", 75 | " updated_base_model_path: Path\n", 76 | " params_image_size: list\n", 77 | " params_learning_rate: float\n", 78 | " params_include_top: bool\n", 79 | " params_weights: str\n", 80 | " params_classes: int" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 6, 86 | "metadata": {}, 87 | "outputs": [], 88 | "source": [ 89 | "from cnnClassifier.constants import *\n", 90 | "from cnnClassifier.utils.common import read_yaml, create_directories" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 7, 96 | "metadata": {}, 97 | "outputs": [], 98 | "source": [ 99 | "class ConfigurationManager:\n", 100 | " def __init__(\n", 101 | " self,\n", 102 | " config_filepath = CONFIG_FILE_PATH,\n", 103 | " params_filepath = PARAMS_FILE_PATH):\n", 104 | "\n", 105 | " self.config = read_yaml(config_filepath)\n", 106 | " self.params = read_yaml(params_filepath)\n", 107 | "\n", 108 | " create_directories([self.config.artifacts_root])\n", 109 | "\n", 110 | " \n", 111 | "\n", 112 | " def get_prepare_base_model_config(self) -> PrepareBaseModelConfig:\n", 113 | " config = self.config.prepare_base_model\n", 114 | " \n", 115 | " create_directories([config.root_dir])\n", 116 | "\n", 117 | " prepare_base_model_config = PrepareBaseModelConfig(\n", 118 | " root_dir=Path(config.root_dir),\n", 119 | " base_model_path=Path(config.base_model_path),\n", 120 | " updated_base_model_path=Path(config.updated_base_model_path),\n", 121 | " params_image_size=self.params.IMAGE_SIZE,\n", 122 | " params_learning_rate=self.params.LEARNING_RATE,\n", 123 | " params_include_top=self.params.INCLUDE_TOP,\n", 124 | " params_weights=self.params.WEIGHTS,\n", 125 | " params_classes=self.params.CLASSES\n", 126 | " )\n", 127 | "\n", 128 | " return prepare_base_model_config" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 8, 134 | "metadata": {}, 135 | "outputs": [], 136 | "source": [ 137 | "import os\n", 138 | "import urllib.request as request\n", 139 | "from zipfile import ZipFile\n", 140 | "import tensorflow as tf" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": 9, 146 | "metadata": {}, 147 | "outputs": [], 148 | "source": [ 149 | "class PrepareBaseModel:\n", 150 | " def __init__(self, config: PrepareBaseModelConfig):\n", 151 | " self.config = config\n", 152 | "\n", 153 | " \n", 154 | " def get_base_model(self):\n", 155 | " self.model = tf.keras.applications.vgg16.VGG16(\n", 156 | " input_shape=self.config.params_image_size,\n", 157 | " weights=self.config.params_weights,\n", 158 | " include_top=self.config.params_include_top\n", 159 | " )\n", 160 | "\n", 161 | " self.save_model(path=self.config.base_model_path, model=self.model)\n", 162 | "\n", 163 | " \n", 164 | "\n", 165 | " @staticmethod\n", 166 | " def _prepare_full_model(model, classes, freeze_all, freeze_till, learning_rate):\n", 167 | " if freeze_all:\n", 168 | " for layer in model.layers:\n", 169 | " model.trainable = False\n", 170 | " elif (freeze_till is not None) and (freeze_till > 0):\n", 171 | " for layer in model.layers[:-freeze_till]:\n", 172 | " model.trainable = False\n", 173 | "\n", 174 | " flatten_in = tf.keras.layers.Flatten()(model.output)\n", 175 | " prediction = tf.keras.layers.Dense(\n", 176 | " units=classes,\n", 177 | " activation=\"softmax\"\n", 178 | " )(flatten_in)\n", 179 | "\n", 180 | " full_model = tf.keras.models.Model(\n", 181 | " inputs=model.input,\n", 182 | " outputs=prediction\n", 183 | " )\n", 184 | "\n", 185 | " full_model.compile(\n", 186 | " optimizer=tf.keras.optimizers.SGD(learning_rate=learning_rate),\n", 187 | " loss=tf.keras.losses.CategoricalCrossentropy(),\n", 188 | " metrics=[\"accuracy\"]\n", 189 | " )\n", 190 | "\n", 191 | " full_model.summary()\n", 192 | " return full_model\n", 193 | " \n", 194 | " \n", 195 | " def update_base_model(self):\n", 196 | " self.full_model = self._prepare_full_model(\n", 197 | " model=self.model,\n", 198 | " classes=self.config.params_classes,\n", 199 | " freeze_all=True,\n", 200 | " freeze_till=None,\n", 201 | " learning_rate=self.config.params_learning_rate\n", 202 | " )\n", 203 | "\n", 204 | " self.save_model(path=self.config.updated_base_model_path, model=self.full_model)\n", 205 | "\n", 206 | " \n", 207 | " \n", 208 | " @staticmethod\n", 209 | " def save_model(path: Path, model: tf.keras.Model):\n", 210 | " model.save(path)\n", 211 | "\n" 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": 10, 217 | "metadata": {}, 218 | "outputs": [ 219 | { 220 | "name": "stdout", 221 | "output_type": "stream", 222 | "text": [ 223 | "[2023-10-02 09:52:42,695: INFO: common: yaml file: config\\config.yaml loaded successfully]\n", 224 | "[2023-10-02 09:52:42,700: INFO: common: yaml file: params.yaml loaded successfully]\n", 225 | "[2023-10-02 09:52:42,702: INFO: common: created directory at: artifacts]\n", 226 | "[2023-10-02 09:52:42,703: INFO: common: created directory at: artifacts/prepare_base_model]\n", 227 | "[2023-10-02 09:52:43,136: WARNING: saving_utils: Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.]\n", 228 | "Model: \"model\"\n", 229 | "_________________________________________________________________\n", 230 | " Layer (type) Output Shape Param # \n", 231 | "=================================================================\n", 232 | " input_1 (InputLayer) [(None, 224, 224, 3)] 0 \n", 233 | " \n", 234 | " block1_conv1 (Conv2D) (None, 224, 224, 64) 1792 \n", 235 | " \n", 236 | " block1_conv2 (Conv2D) (None, 224, 224, 64) 36928 \n", 237 | " \n", 238 | " block1_pool (MaxPooling2D) (None, 112, 112, 64) 0 \n", 239 | " \n", 240 | " block2_conv1 (Conv2D) (None, 112, 112, 128) 73856 \n", 241 | " \n", 242 | " block2_conv2 (Conv2D) (None, 112, 112, 128) 147584 \n", 243 | " \n", 244 | " block2_pool (MaxPooling2D) (None, 56, 56, 128) 0 \n", 245 | " \n", 246 | " block3_conv1 (Conv2D) (None, 56, 56, 256) 295168 \n", 247 | " \n", 248 | " block3_conv2 (Conv2D) (None, 56, 56, 256) 590080 \n", 249 | " \n", 250 | " block3_conv3 (Conv2D) (None, 56, 56, 256) 590080 \n", 251 | " \n", 252 | " block3_pool (MaxPooling2D) (None, 28, 28, 256) 0 \n", 253 | " \n", 254 | " block4_conv1 (Conv2D) (None, 28, 28, 512) 1180160 \n", 255 | " \n", 256 | " block4_conv2 (Conv2D) (None, 28, 28, 512) 2359808 \n", 257 | " \n", 258 | " block4_conv3 (Conv2D) (None, 28, 28, 512) 2359808 \n", 259 | " \n", 260 | " block4_pool (MaxPooling2D) (None, 14, 14, 512) 0 \n", 261 | " \n", 262 | " block5_conv1 (Conv2D) (None, 14, 14, 512) 2359808 \n", 263 | " \n", 264 | " block5_conv2 (Conv2D) (None, 14, 14, 512) 2359808 \n", 265 | " \n", 266 | " block5_conv3 (Conv2D) (None, 14, 14, 512) 2359808 \n", 267 | " \n", 268 | " block5_pool (MaxPooling2D) (None, 7, 7, 512) 0 \n", 269 | " \n", 270 | " flatten (Flatten) (None, 25088) 0 \n", 271 | " \n", 272 | " dense (Dense) (None, 2) 50178 \n", 273 | " \n", 274 | "=================================================================\n", 275 | "Total params: 14,764,866\n", 276 | "Trainable params: 50,178\n", 277 | "Non-trainable params: 14,714,688\n", 278 | "_________________________________________________________________\n" 279 | ] 280 | } 281 | ], 282 | "source": [ 283 | "try:\n", 284 | " config = ConfigurationManager()\n", 285 | " prepare_base_model_config = config.get_prepare_base_model_config()\n", 286 | " prepare_base_model = PrepareBaseModel(config=prepare_base_model_config)\n", 287 | " prepare_base_model.get_base_model()\n", 288 | " prepare_base_model.update_base_model()\n", 289 | "except Exception as e:\n", 290 | " raise e" 291 | ] 292 | }, 293 | { 294 | "cell_type": "code", 295 | "execution_count": null, 296 | "metadata": {}, 297 | "outputs": [], 298 | "source": [] 299 | } 300 | ], 301 | "metadata": { 302 | "kernelspec": { 303 | "display_name": "kidney", 304 | "language": "python", 305 | "name": "python3" 306 | }, 307 | "language_info": { 308 | "codemirror_mode": { 309 | "name": "ipython", 310 | "version": 3 311 | }, 312 | "file_extension": ".py", 313 | "mimetype": "text/x-python", 314 | "name": "python", 315 | "nbconvert_exporter": "python", 316 | "pygments_lexer": "ipython3", 317 | "version": "3.8.18" 318 | }, 319 | "orig_nbformat": 4 320 | }, 321 | "nbformat": 4, 322 | "nbformat_minor": 2 323 | } 324 | -------------------------------------------------------------------------------- /research/03_model_training.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 2, 15 | "metadata": {}, 16 | "outputs": [ 17 | { 18 | "data": { 19 | "text/plain": [ 20 | "'d:\\\\Bappy\\\\YouTube\\\\Kidney-Disease-Classification-Deep-Learning-Project\\\\research'" 21 | ] 22 | }, 23 | "execution_count": 2, 24 | "metadata": {}, 25 | "output_type": "execute_result" 26 | } 27 | ], 28 | "source": [ 29 | "%pwd" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 3, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "os.chdir(\"../\")" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 4, 44 | "metadata": {}, 45 | "outputs": [ 46 | { 47 | "data": { 48 | "text/plain": [ 49 | "'d:\\\\Bappy\\\\YouTube\\\\Kidney-Disease-Classification-Deep-Learning-Project'" 50 | ] 51 | }, 52 | "execution_count": 4, 53 | "metadata": {}, 54 | "output_type": "execute_result" 55 | } 56 | ], 57 | "source": [ 58 | "%pwd\n" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 5, 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "from dataclasses import dataclass\n", 68 | "from pathlib import Path\n", 69 | "\n", 70 | "\n", 71 | "@dataclass(frozen=True)\n", 72 | "class TrainingConfig:\n", 73 | " root_dir: Path\n", 74 | " trained_model_path: Path\n", 75 | " updated_base_model_path: Path\n", 76 | " training_data: Path\n", 77 | " params_epochs: int\n", 78 | " params_batch_size: int\n", 79 | " params_is_augmentation: bool\n", 80 | " params_image_size: list" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 6, 86 | "metadata": {}, 87 | "outputs": [], 88 | "source": [ 89 | "from cnnClassifier.constants import *\n", 90 | "from cnnClassifier.utils.common import read_yaml, create_directories\n", 91 | "import tensorflow as tf" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 7, 97 | "metadata": {}, 98 | "outputs": [], 99 | "source": [ 100 | "class ConfigurationManager:\n", 101 | " def __init__(\n", 102 | " self,\n", 103 | " config_filepath = CONFIG_FILE_PATH,\n", 104 | " params_filepath = PARAMS_FILE_PATH):\n", 105 | "\n", 106 | " self.config = read_yaml(config_filepath)\n", 107 | " self.params = read_yaml(params_filepath)\n", 108 | "\n", 109 | " create_directories([self.config.artifacts_root])\n", 110 | "\n", 111 | "\n", 112 | " \n", 113 | " def get_training_config(self) -> TrainingConfig:\n", 114 | " training = self.config.training\n", 115 | " prepare_base_model = self.config.prepare_base_model\n", 116 | " params = self.params\n", 117 | " training_data = os.path.join(self.config.data_ingestion.unzip_dir, \"kidney-ct-scan-image\")\n", 118 | " create_directories([\n", 119 | " Path(training.root_dir)\n", 120 | " ])\n", 121 | "\n", 122 | " training_config = TrainingConfig(\n", 123 | " root_dir=Path(training.root_dir),\n", 124 | " trained_model_path=Path(training.trained_model_path),\n", 125 | " updated_base_model_path=Path(prepare_base_model.updated_base_model_path),\n", 126 | " training_data=Path(training_data),\n", 127 | " params_epochs=params.EPOCHS,\n", 128 | " params_batch_size=params.BATCH_SIZE,\n", 129 | " params_is_augmentation=params.AUGMENTATION,\n", 130 | " params_image_size=params.IMAGE_SIZE\n", 131 | " )\n", 132 | "\n", 133 | " return training_config" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": 8, 139 | "metadata": {}, 140 | "outputs": [], 141 | "source": [ 142 | "import os\n", 143 | "import urllib.request as request\n", 144 | "from zipfile import ZipFile\n", 145 | "import tensorflow as tf\n", 146 | "import time" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": 10, 152 | "metadata": {}, 153 | "outputs": [], 154 | "source": [ 155 | "class Training:\n", 156 | " def __init__(self, config: TrainingConfig):\n", 157 | " self.config = config\n", 158 | "\n", 159 | " \n", 160 | " def get_base_model(self):\n", 161 | " self.model = tf.keras.models.load_model(\n", 162 | " self.config.updated_base_model_path\n", 163 | " )\n", 164 | "\n", 165 | " def train_valid_generator(self):\n", 166 | "\n", 167 | " datagenerator_kwargs = dict(\n", 168 | " rescale = 1./255,\n", 169 | " validation_split=0.20\n", 170 | " )\n", 171 | "\n", 172 | " dataflow_kwargs = dict(\n", 173 | " target_size=self.config.params_image_size[:-1],\n", 174 | " batch_size=self.config.params_batch_size,\n", 175 | " interpolation=\"bilinear\"\n", 176 | " )\n", 177 | "\n", 178 | " valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(\n", 179 | " **datagenerator_kwargs\n", 180 | " )\n", 181 | "\n", 182 | " self.valid_generator = valid_datagenerator.flow_from_directory(\n", 183 | " directory=self.config.training_data,\n", 184 | " subset=\"validation\",\n", 185 | " shuffle=False,\n", 186 | " **dataflow_kwargs\n", 187 | " )\n", 188 | "\n", 189 | " if self.config.params_is_augmentation:\n", 190 | " train_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(\n", 191 | " rotation_range=40,\n", 192 | " horizontal_flip=True,\n", 193 | " width_shift_range=0.2,\n", 194 | " height_shift_range=0.2,\n", 195 | " shear_range=0.2,\n", 196 | " zoom_range=0.2,\n", 197 | " **datagenerator_kwargs\n", 198 | " )\n", 199 | " else:\n", 200 | " train_datagenerator = valid_datagenerator\n", 201 | "\n", 202 | " self.train_generator = train_datagenerator.flow_from_directory(\n", 203 | " directory=self.config.training_data,\n", 204 | " subset=\"training\",\n", 205 | " shuffle=True,\n", 206 | " **dataflow_kwargs\n", 207 | " )\n", 208 | "\n", 209 | " \n", 210 | " @staticmethod\n", 211 | " def save_model(path: Path, model: tf.keras.Model):\n", 212 | " model.save(path)\n", 213 | "\n", 214 | "\n", 215 | "\n", 216 | " \n", 217 | " def train(self):\n", 218 | " self.steps_per_epoch = self.train_generator.samples // self.train_generator.batch_size\n", 219 | " self.validation_steps = self.valid_generator.samples // self.valid_generator.batch_size\n", 220 | "\n", 221 | " self.model.fit(\n", 222 | " self.train_generator,\n", 223 | " epochs=self.config.params_epochs,\n", 224 | " steps_per_epoch=self.steps_per_epoch,\n", 225 | " validation_steps=self.validation_steps,\n", 226 | " validation_data=self.valid_generator\n", 227 | " )\n", 228 | "\n", 229 | " self.save_model(\n", 230 | " path=self.config.trained_model_path,\n", 231 | " model=self.model\n", 232 | " )\n", 233 | "\n" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": 11, 239 | "metadata": {}, 240 | "outputs": [ 241 | { 242 | "name": "stdout", 243 | "output_type": "stream", 244 | "text": [ 245 | "[2023-10-02 10:21:07,888: INFO: common: yaml file: config\\config.yaml loaded successfully]\n", 246 | "[2023-10-02 10:21:07,890: INFO: common: yaml file: params.yaml loaded successfully]\n", 247 | "[2023-10-02 10:21:07,892: INFO: common: created directory at: artifacts]\n", 248 | "[2023-10-02 10:21:07,893: INFO: common: created directory at: artifacts\\training]\n", 249 | "Found 93 images belonging to 2 classes.\n", 250 | "Found 372 images belonging to 2 classes.\n", 251 | "23/23 [==============================] - 38s 2s/step - loss: 12.0650 - accuracy: 0.5618 - val_loss: 14.3907 - val_accuracy: 0.6000\n" 252 | ] 253 | } 254 | ], 255 | "source": [ 256 | "try:\n", 257 | " config = ConfigurationManager()\n", 258 | " training_config = config.get_training_config()\n", 259 | " training = Training(config=training_config)\n", 260 | " training.get_base_model()\n", 261 | " training.train_valid_generator()\n", 262 | " training.train()\n", 263 | " \n", 264 | "except Exception as e:\n", 265 | " raise e\n" 266 | ] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "execution_count": null, 271 | "metadata": {}, 272 | "outputs": [], 273 | "source": [] 274 | } 275 | ], 276 | "metadata": { 277 | "kernelspec": { 278 | "display_name": "kidney", 279 | "language": "python", 280 | "name": "python3" 281 | }, 282 | "language_info": { 283 | "codemirror_mode": { 284 | "name": "ipython", 285 | "version": 3 286 | }, 287 | "file_extension": ".py", 288 | "mimetype": "text/x-python", 289 | "name": "python", 290 | "nbconvert_exporter": "python", 291 | "pygments_lexer": "ipython3", 292 | "version": "3.8.18" 293 | }, 294 | "orig_nbformat": 4 295 | }, 296 | "nbformat": 4, 297 | "nbformat_minor": 2 298 | } 299 | -------------------------------------------------------------------------------- /research/04_model_evaluation_with_mlflow.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 2, 15 | "metadata": {}, 16 | "outputs": [ 17 | { 18 | "data": { 19 | "text/plain": [ 20 | "'d:\\\\Bappy\\\\YouTube\\\\Kidney-Disease-Classification-Deep-Learning-Project\\\\research'" 21 | ] 22 | }, 23 | "execution_count": 2, 24 | "metadata": {}, 25 | "output_type": "execute_result" 26 | } 27 | ], 28 | "source": [ 29 | "%pwd" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 3, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "os.chdir(\"../\")" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 4, 44 | "metadata": {}, 45 | "outputs": [ 46 | { 47 | "data": { 48 | "text/plain": [ 49 | "'d:\\\\Bappy\\\\YouTube\\\\Kidney-Disease-Classification-Deep-Learning-Project'" 50 | ] 51 | }, 52 | "execution_count": 4, 53 | "metadata": {}, 54 | "output_type": "execute_result" 55 | } 56 | ], 57 | "source": [ 58 | "%pwd" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 5, 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "os.environ[\"MLFLOW_TRACKING_URI\"]=\"https://dagshub.com/entbappy/Kidney-Disease-Classification-MLflow-DVC.mlflow\"\n", 68 | "os.environ[\"MLFLOW_TRACKING_USERNAME\"]=\"entbappy\"\n", 69 | "os.environ[\"MLFLOW_TRACKING_PASSWORD\"]=\"6824692c47a369aa6f9eac5b10041d5c8edbcef0\"" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 6, 75 | "metadata": {}, 76 | "outputs": [], 77 | "source": [ 78 | "import tensorflow as tf" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": 7, 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [ 87 | "model = tf.keras.models.load_model(\"artifacts/training/model.h5\")" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 8, 93 | "metadata": {}, 94 | "outputs": [], 95 | "source": [ 96 | "from dataclasses import dataclass\n", 97 | "from pathlib import Path\n", 98 | "\n", 99 | "@dataclass(frozen=True)\n", 100 | "class EvaluationConfig:\n", 101 | " path_of_model: Path\n", 102 | " training_data: Path\n", 103 | " all_params: dict\n", 104 | " mlflow_uri: str\n", 105 | " params_image_size: list\n", 106 | " params_batch_size: int" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 9, 112 | "metadata": {}, 113 | "outputs": [], 114 | "source": [ 115 | "from cnnClassifier.constants import *\n", 116 | "from cnnClassifier.utils.common import read_yaml, create_directories, save_json" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 10, 122 | "metadata": {}, 123 | "outputs": [], 124 | "source": [ 125 | "class ConfigurationManager:\n", 126 | " def __init__(\n", 127 | " self, \n", 128 | " config_filepath = CONFIG_FILE_PATH,\n", 129 | " params_filepath = PARAMS_FILE_PATH):\n", 130 | " self.config = read_yaml(config_filepath)\n", 131 | " self.params = read_yaml(params_filepath)\n", 132 | " create_directories([self.config.artifacts_root])\n", 133 | "\n", 134 | " \n", 135 | " def get_evaluation_config(self) -> EvaluationConfig:\n", 136 | " eval_config = EvaluationConfig(\n", 137 | " path_of_model=\"artifacts/training/model.h5\",\n", 138 | " training_data=\"artifacts/data_ingestion/kidney-ct-scan-image\",\n", 139 | " mlflow_uri=\"https://dagshub.com/entbappy/Kidney-Disease-Classification-MLflow-DVC.mlflow\",\n", 140 | " all_params=self.params,\n", 141 | " params_image_size=self.params.IMAGE_SIZE,\n", 142 | " params_batch_size=self.params.BATCH_SIZE\n", 143 | " )\n", 144 | " return eval_config\n", 145 | "\n", 146 | "\n" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": 11, 152 | "metadata": {}, 153 | "outputs": [], 154 | "source": [ 155 | "import tensorflow as tf\n", 156 | "from pathlib import Path\n", 157 | "import mlflow\n", 158 | "import mlflow.keras\n", 159 | "from urllib.parse import urlparse" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": 12, 165 | "metadata": {}, 166 | "outputs": [], 167 | "source": [ 168 | "class Evaluation:\n", 169 | " def __init__(self, config: EvaluationConfig):\n", 170 | " self.config = config\n", 171 | "\n", 172 | " \n", 173 | " def _valid_generator(self):\n", 174 | "\n", 175 | " datagenerator_kwargs = dict(\n", 176 | " rescale = 1./255,\n", 177 | " validation_split=0.30\n", 178 | " )\n", 179 | "\n", 180 | " dataflow_kwargs = dict(\n", 181 | " target_size=self.config.params_image_size[:-1],\n", 182 | " batch_size=self.config.params_batch_size,\n", 183 | " interpolation=\"bilinear\"\n", 184 | " )\n", 185 | "\n", 186 | " valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(\n", 187 | " **datagenerator_kwargs\n", 188 | " )\n", 189 | "\n", 190 | " self.valid_generator = valid_datagenerator.flow_from_directory(\n", 191 | " directory=self.config.training_data,\n", 192 | " subset=\"validation\",\n", 193 | " shuffle=False,\n", 194 | " **dataflow_kwargs\n", 195 | " )\n", 196 | "\n", 197 | "\n", 198 | " @staticmethod\n", 199 | " def load_model(path: Path) -> tf.keras.Model:\n", 200 | " return tf.keras.models.load_model(path)\n", 201 | " \n", 202 | "\n", 203 | " def evaluation(self):\n", 204 | " self.model = self.load_model(self.config.path_of_model)\n", 205 | " self._valid_generator()\n", 206 | " self.score = model.evaluate(self.valid_generator)\n", 207 | " self.save_score()\n", 208 | "\n", 209 | " def save_score(self):\n", 210 | " scores = {\"loss\": self.score[0], \"accuracy\": self.score[1]}\n", 211 | " save_json(path=Path(\"scores.json\"), data=scores)\n", 212 | "\n", 213 | " \n", 214 | " def log_into_mlflow(self):\n", 215 | " mlflow.set_registry_uri(self.config.mlflow_uri)\n", 216 | " tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme\n", 217 | " \n", 218 | " with mlflow.start_run():\n", 219 | " mlflow.log_params(self.config.all_params)\n", 220 | " mlflow.log_metrics(\n", 221 | " {\"loss\": self.score[0], \"accuracy\": self.score[1]}\n", 222 | " )\n", 223 | " # Model registry does not work with file store\n", 224 | " if tracking_url_type_store != \"file\":\n", 225 | "\n", 226 | " # Register the model\n", 227 | " # There are other ways to use the Model Registry, which depends on the use case,\n", 228 | " # please refer to the doc for more information:\n", 229 | " # https://mlflow.org/docs/latest/model-registry.html#api-workflow\n", 230 | " mlflow.keras.log_model(self.model, \"model\", registered_model_name=\"VGG16Model\")\n", 231 | " else:\n", 232 | " mlflow.keras.log_model(self.model, \"model\")\n" 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": 13, 238 | "metadata": {}, 239 | "outputs": [ 240 | { 241 | "name": "stdout", 242 | "output_type": "stream", 243 | "text": [ 244 | "[2023-10-02 15:10:13,653: INFO: common: yaml file: config\\config.yaml loaded successfully]\n", 245 | "[2023-10-02 15:10:13,655: INFO: common: yaml file: params.yaml loaded successfully]\n", 246 | "[2023-10-02 15:10:13,656: INFO: common: created directory at: artifacts]\n", 247 | "Found 139 images belonging to 2 classes.\n", 248 | "9/9 [==============================] - 12s 1s/step - loss: 1.9542 - accuracy: 0.6115\n", 249 | "[2023-10-02 15:10:26,038: INFO: common: json file saved at: scores.json]\n" 250 | ] 251 | }, 252 | { 253 | "name": "stderr", 254 | "output_type": "stream", 255 | "text": [ 256 | "2023/10/02 15:10:28 WARNING mlflow.tensorflow: You are saving a TensorFlow Core model or Keras model without a signature. Inference with mlflow.pyfunc.spark_udf() will not work unless the model's pyfunc representation accepts pandas DataFrames as inference inputs.\n" 257 | ] 258 | }, 259 | { 260 | "name": "stdout", 261 | "output_type": "stream", 262 | "text": [ 263 | "[2023-10-02 15:10:29,308: WARNING: save: Found untraced functions such as _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op while saving (showing 5 of 14). These functions will not be directly callable after loading.]\n", 264 | "INFO:tensorflow:Assets written to: C:\\Users\\bokti\\AppData\\Local\\Temp\\tmpny8egng1\\model\\data\\model\\assets\n", 265 | "[2023-10-02 15:10:29,857: INFO: builder_impl: Assets written to: C:\\Users\\bokti\\AppData\\Local\\Temp\\tmpny8egng1\\model\\data\\model\\assets]\n" 266 | ] 267 | }, 268 | { 269 | "name": "stderr", 270 | "output_type": "stream", 271 | "text": [ 272 | "d:\\Softwares\\anaconda3\\envs\\kidney\\lib\\site-packages\\_distutils_hack\\__init__.py:33: UserWarning: Setuptools is replacing distutils.\n", 273 | " warnings.warn(\"Setuptools is replacing distutils.\")\n", 274 | "Registered model 'VGG16Model' already exists. Creating a new version of this model...\n", 275 | "2023/10/02 15:12:07 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: VGG16Model, version 4\n", 276 | "Created version '4' of model 'VGG16Model'.\n" 277 | ] 278 | } 279 | ], 280 | "source": [ 281 | "try:\n", 282 | " config = ConfigurationManager()\n", 283 | " eval_config = config.get_evaluation_config()\n", 284 | " evaluation = Evaluation(eval_config)\n", 285 | " evaluation.evaluation()\n", 286 | " evaluation.log_into_mlflow()\n", 287 | "\n", 288 | "except Exception as e:\n", 289 | " raise e" 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": null, 295 | "metadata": {}, 296 | "outputs": [], 297 | "source": [] 298 | } 299 | ], 300 | "metadata": { 301 | "kernelspec": { 302 | "display_name": "kidney", 303 | "language": "python", 304 | "name": "python3" 305 | }, 306 | "language_info": { 307 | "codemirror_mode": { 308 | "name": "ipython", 309 | "version": 3 310 | }, 311 | "file_extension": ".py", 312 | "mimetype": "text/x-python", 313 | "name": "python", 314 | "nbconvert_exporter": "python", 315 | "pygments_lexer": "ipython3", 316 | "version": "3.8.18" 317 | }, 318 | "orig_nbformat": 4 319 | }, 320 | "nbformat": 4, 321 | "nbformat_minor": 2 322 | } 323 | -------------------------------------------------------------------------------- /research/trials.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# config box\n", 10 | "d = {\"key\":\"val\", \"key1\":\"val1\"}" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 3, 16 | "metadata": {}, 17 | "outputs": [ 18 | { 19 | "data": { 20 | "text/plain": [ 21 | "'val1'" 22 | ] 23 | }, 24 | "execution_count": 3, 25 | "metadata": {}, 26 | "output_type": "execute_result" 27 | } 28 | ], 29 | "source": [ 30 | "d['key1']" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 5, 36 | "metadata": {}, 37 | "outputs": [ 38 | { 39 | "ename": "AttributeError", 40 | "evalue": "'dict' object has no attribute 'key1'", 41 | "output_type": "error", 42 | "traceback": [ 43 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 44 | "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", 45 | "\u001b[1;32md:\\Bappy\\YouTube\\Kidney-Disease-Classification-Deep-Learning-Project\\research\\trials.ipynb Cell 3\u001b[0m line \u001b[0;36m1\n\u001b[1;32m----> 1\u001b[0m d\u001b[39m.\u001b[39;49mkey1\n", 46 | "\u001b[1;31mAttributeError\u001b[0m: 'dict' object has no attribute 'key1'" 47 | ] 48 | } 49 | ], 50 | "source": [ 51 | "d.key1" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": 6, 57 | "metadata": {}, 58 | "outputs": [], 59 | "source": [ 60 | "from box import ConfigBox" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 7, 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [ 69 | "d2 = ConfigBox({\"key\":\"val\", \"key1\":\"val1\"})" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 9, 75 | "metadata": {}, 76 | "outputs": [ 77 | { 78 | "data": { 79 | "text/plain": [ 80 | "'val1'" 81 | ] 82 | }, 83 | "execution_count": 9, 84 | "metadata": {}, 85 | "output_type": "execute_result" 86 | } 87 | ], 88 | "source": [ 89 | "d2.key1" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": 10, 95 | "metadata": {}, 96 | "outputs": [ 97 | { 98 | "data": { 99 | "text/plain": [ 100 | "box.config_box.ConfigBox" 101 | ] 102 | }, 103 | "execution_count": 10, 104 | "metadata": {}, 105 | "output_type": "execute_result" 106 | } 107 | ], 108 | "source": [ 109 | "type(d2)" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 11, 115 | "metadata": {}, 116 | "outputs": [ 117 | { 118 | "data": { 119 | "text/plain": [ 120 | "dict" 121 | ] 122 | }, 123 | "execution_count": 11, 124 | "metadata": {}, 125 | "output_type": "execute_result" 126 | } 127 | ], 128 | "source": [ 129 | "type(d)" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 12, 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [ 138 | "# ensure annotations \n", 139 | "\n", 140 | "def get_prodict(x:int, y:int) -> int:\n", 141 | " return x*y" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": 13, 147 | "metadata": {}, 148 | "outputs": [ 149 | { 150 | "data": { 151 | "text/plain": [ 152 | "6" 153 | ] 154 | }, 155 | "execution_count": 13, 156 | "metadata": {}, 157 | "output_type": "execute_result" 158 | } 159 | ], 160 | "source": [ 161 | "get_prodict(x=2, y=3)" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": 14, 167 | "metadata": {}, 168 | "outputs": [ 169 | { 170 | "data": { 171 | "text/plain": [ 172 | "'33'" 173 | ] 174 | }, 175 | "execution_count": 14, 176 | "metadata": {}, 177 | "output_type": "execute_result" 178 | } 179 | ], 180 | "source": [ 181 | "get_prodict(x=2, y=\"3\")" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": 15, 187 | "metadata": {}, 188 | "outputs": [], 189 | "source": [ 190 | "from ensure import ensure_annotations" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": 16, 196 | "metadata": {}, 197 | "outputs": [], 198 | "source": [ 199 | "@ensure_annotations\n", 200 | "def get_prodict(x:int, y:int) -> int:\n", 201 | " return x*y" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": 17, 207 | "metadata": {}, 208 | "outputs": [ 209 | { 210 | "data": { 211 | "text/plain": [ 212 | "6" 213 | ] 214 | }, 215 | "execution_count": 17, 216 | "metadata": {}, 217 | "output_type": "execute_result" 218 | } 219 | ], 220 | "source": [ 221 | "get_prodict(x=2, y=3)" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": 18, 227 | "metadata": {}, 228 | "outputs": [ 229 | { 230 | "ename": "EnsureError", 231 | "evalue": "Argument y of type to does not match annotation type ", 232 | "output_type": "error", 233 | "traceback": [ 234 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 235 | "\u001b[1;31mEnsureError\u001b[0m Traceback (most recent call last)", 236 | "\u001b[1;32md:\\Bappy\\YouTube\\Kidney-Disease-Classification-Deep-Learning-Project\\research\\trials.ipynb Cell 15\u001b[0m line \u001b[0;36m1\n\u001b[1;32m----> 1\u001b[0m get_prodict(x\u001b[39m=\u001b[39;49m\u001b[39m2\u001b[39;49m, y\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39m3\u001b[39;49m\u001b[39m\"\u001b[39;49m)\n", 237 | "File \u001b[1;32md:\\Softwares\\anaconda3\\envs\\kidney\\lib\\site-packages\\ensure\\main.py:845\u001b[0m, in \u001b[0;36mWrappedFunctionReturn.__call__\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 840\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39misinstance\u001b[39m(value, templ):\n\u001b[0;32m 841\u001b[0m msg \u001b[39m=\u001b[39m (\n\u001b[0;32m 842\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mArgument \u001b[39m\u001b[39m{arg}\u001b[39;00m\u001b[39m of type \u001b[39m\u001b[39m{valt}\u001b[39;00m\u001b[39m to \u001b[39m\u001b[39m{f}\u001b[39;00m\u001b[39m \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 843\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mdoes not match annotation type \u001b[39m\u001b[39m{t}\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[0;32m 844\u001b[0m )\n\u001b[1;32m--> 845\u001b[0m \u001b[39mraise\u001b[39;00m EnsureError(msg\u001b[39m.\u001b[39mformat(\n\u001b[0;32m 846\u001b[0m arg\u001b[39m=\u001b[39marg, f\u001b[39m=\u001b[39m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mf, t\u001b[39m=\u001b[39mtempl, valt\u001b[39m=\u001b[39m\u001b[39mtype\u001b[39m(value)\n\u001b[0;32m 847\u001b[0m ))\n\u001b[0;32m 849\u001b[0m return_val \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mf(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[0;32m 850\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39misinstance\u001b[39m(return_val, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mreturn_templ):\n", 238 | "\u001b[1;31mEnsureError\u001b[0m: Argument y of type to does not match annotation type " 239 | ] 240 | } 241 | ], 242 | "source": [ 243 | "get_prodict(x=2, y=\"3\")" 244 | ] 245 | }, 246 | { 247 | "cell_type": "markdown", 248 | "metadata": {}, 249 | "source": [ 250 | "## Donwload data from gdrive" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": 1, 256 | "metadata": {}, 257 | "outputs": [], 258 | "source": [ 259 | "import gdown" 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": 2, 265 | "metadata": {}, 266 | "outputs": [], 267 | "source": [ 268 | "url=\"https://drive.google.com/file/d/1vlhZ5c7abUKF8xXERIw6m9Te8fW7ohw3/view?usp=sharing\"" 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": 5, 274 | "metadata": {}, 275 | "outputs": [ 276 | { 277 | "data": { 278 | "text/plain": [ 279 | "'1vlhZ5c7abUKF8xXERIw6m9Te8fW7ohw3'" 280 | ] 281 | }, 282 | "execution_count": 5, 283 | "metadata": {}, 284 | "output_type": "execute_result" 285 | } 286 | ], 287 | "source": [ 288 | "file_id = url.split(\"/\")[-2]\n", 289 | "file_id" 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": 6, 295 | "metadata": {}, 296 | "outputs": [ 297 | { 298 | "name": "stderr", 299 | "output_type": "stream", 300 | "text": [ 301 | "Downloading...\n", 302 | "From (uriginal): https://drive.google.com/uc?/export=download&id=1vlhZ5c7abUKF8xXERIw6m9Te8fW7ohw3\n", 303 | "From (redirected): https://drive.google.com/uc?/export=download&id=1vlhZ5c7abUKF8xXERIw6m9Te8fW7ohw3&confirm=t&uuid=1d1b34c8-24e5-4b4c-b682-9f1d92b28169\n", 304 | "To: d:\\Bappy\\YouTube\\Kidney-Disease-Classification-Deep-Learning-Project\\research\\kidney-CT-Scan-data.zip\n", 305 | "100%|██████████| 57.7M/57.7M [01:36<00:00, 595kB/s]\n" 306 | ] 307 | }, 308 | { 309 | "data": { 310 | "text/plain": [ 311 | "'kidney-CT-Scan-data.zip'" 312 | ] 313 | }, 314 | "execution_count": 6, 315 | "metadata": {}, 316 | "output_type": "execute_result" 317 | } 318 | ], 319 | "source": [ 320 | "prefix = 'https://drive.google.com/uc?/export=download&id='\n", 321 | "gdown.download(prefix+file_id, \"kidney-CT-Scan-data.zip\")" 322 | ] 323 | }, 324 | { 325 | "cell_type": "code", 326 | "execution_count": null, 327 | "metadata": {}, 328 | "outputs": [], 329 | "source": [] 330 | } 331 | ], 332 | "metadata": { 333 | "kernelspec": { 334 | "display_name": "kidney", 335 | "language": "python", 336 | "name": "python3" 337 | }, 338 | "language_info": { 339 | "codemirror_mode": { 340 | "name": "ipython", 341 | "version": 3 342 | }, 343 | "file_extension": ".py", 344 | "mimetype": "text/x-python", 345 | "name": "python", 346 | "nbconvert_exporter": "python", 347 | "pygments_lexer": "ipython3", 348 | "version": "3.8.18" 349 | }, 350 | "orig_nbformat": 4 351 | }, 352 | "nbformat": 4, 353 | "nbformat_minor": 2 354 | } 355 | -------------------------------------------------------------------------------- /scores.json: -------------------------------------------------------------------------------- 1 | { 2 | "loss": 24.858373641967773, 3 | "accuracy": 0.5179855823516846 4 | } -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | with open("README.md", "r", encoding="utf-8") as f: 4 | long_description = f.read() 5 | 6 | 7 | __version__ = "0.0.0" 8 | 9 | REPO_NAME = "Kidney-Disease-Classification-Deep-Learning-Project" 10 | AUTHOR_USER_NAME = "krishnaik06" 11 | SRC_REPO = "cnnClassifier" 12 | AUTHOR_EMAIL = "entbappy73@gmail.com" 13 | 14 | 15 | setuptools.setup( 16 | name=SRC_REPO, 17 | version=__version__, 18 | author=AUTHOR_USER_NAME, 19 | author_email=AUTHOR_EMAIL, 20 | description="A small python package for CNN app", 21 | long_description=long_description, 22 | long_description_content="text/markdown", 23 | url=f"https://github.com/{AUTHOR_USER_NAME}/{REPO_NAME}", 24 | project_urls={ 25 | "Bug Tracker": f"https://github.com/{AUTHOR_USER_NAME}/{REPO_NAME}/issues", 26 | }, 27 | package_dir={"": "src"}, 28 | packages=setuptools.find_packages(where="src") 29 | ) -------------------------------------------------------------------------------- /src/cnnClassifier/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import logging 4 | 5 | logging_str = "[%(asctime)s: %(levelname)s: %(module)s: %(message)s]" 6 | 7 | log_dir = "logs" 8 | log_filepath = os.path.join(log_dir,"running_logs.log") 9 | os.makedirs(log_dir, exist_ok=True) 10 | 11 | 12 | logging.basicConfig( 13 | level= logging.INFO, 14 | format= logging_str, 15 | 16 | handlers=[ 17 | logging.FileHandler(log_filepath), 18 | logging.StreamHandler(sys.stdout) 19 | ] 20 | ) 21 | 22 | logger = logging.getLogger("cnnClassifierLogger") -------------------------------------------------------------------------------- /src/cnnClassifier/components/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Kidney-Disease-Classification-Deep-Learning-Project/0e50432a49baa216e12867f9feed1b63505dda86/src/cnnClassifier/components/__init__.py -------------------------------------------------------------------------------- /src/cnnClassifier/components/data_ingestion.py: -------------------------------------------------------------------------------- 1 | import os 2 | import zipfile 3 | import gdown 4 | from cnnClassifier import logger 5 | from cnnClassifier.utils.common import get_size 6 | from cnnClassifier.entity.config_entity import (DataIngestionConfig) 7 | 8 | 9 | 10 | class DataIngestion: 11 | def __init__(self, config: DataIngestionConfig): 12 | self.config = config 13 | 14 | 15 | def download_file(self)-> str: 16 | ''' 17 | Fetch data from the url 18 | ''' 19 | 20 | try: 21 | dataset_url = self.config.source_URL 22 | zip_download_dir = self.config.local_data_file 23 | os.makedirs("artifacts/data_ingestion", exist_ok=True) 24 | logger.info(f"Downloading data from {dataset_url} into file {zip_download_dir}") 25 | 26 | file_id = dataset_url.split("/")[-2] 27 | prefix = 'https://drive.google.com/uc?/export=download&id=' 28 | gdown.download(prefix+file_id,zip_download_dir) 29 | 30 | logger.info(f"Downloaded data from {dataset_url} into file {zip_download_dir}") 31 | 32 | except Exception as e: 33 | raise e 34 | 35 | 36 | 37 | def extract_zip_file(self): 38 | """ 39 | zip_file_path: str 40 | Extracts the zip file into the data directory 41 | Function returns None 42 | """ 43 | unzip_path = self.config.unzip_dir 44 | os.makedirs(unzip_path, exist_ok=True) 45 | with zipfile.ZipFile(self.config.local_data_file, 'r') as zip_ref: 46 | zip_ref.extractall(unzip_path) 47 | 48 | -------------------------------------------------------------------------------- /src/cnnClassifier/components/model_evaluation_mlflow.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from pathlib import Path 3 | import mlflow 4 | import mlflow.keras 5 | from urllib.parse import urlparse 6 | from cnnClassifier.entity.config_entity import EvaluationConfig 7 | from cnnClassifier.utils.common import read_yaml, create_directories,save_json 8 | 9 | 10 | class Evaluation: 11 | def __init__(self, config: EvaluationConfig): 12 | self.config = config 13 | 14 | 15 | def _valid_generator(self): 16 | 17 | datagenerator_kwargs = dict( 18 | rescale = 1./255, 19 | validation_split=0.30 20 | ) 21 | 22 | dataflow_kwargs = dict( 23 | target_size=self.config.params_image_size[:-1], 24 | batch_size=self.config.params_batch_size, 25 | interpolation="bilinear" 26 | ) 27 | 28 | valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator( 29 | **datagenerator_kwargs 30 | ) 31 | 32 | self.valid_generator = valid_datagenerator.flow_from_directory( 33 | directory=self.config.training_data, 34 | subset="validation", 35 | shuffle=False, 36 | **dataflow_kwargs 37 | ) 38 | 39 | 40 | @staticmethod 41 | def load_model(path: Path) -> tf.keras.Model: 42 | return tf.keras.models.load_model(path) 43 | 44 | 45 | def evaluation(self): 46 | self.model = self.load_model(self.config.path_of_model) 47 | self._valid_generator() 48 | self.score = self.model.evaluate(self.valid_generator) 49 | self.save_score() 50 | 51 | def save_score(self): 52 | scores = {"loss": self.score[0], "accuracy": self.score[1]} 53 | save_json(path=Path("scores.json"), data=scores) 54 | 55 | 56 | def log_into_mlflow(self): 57 | mlflow.set_registry_uri(self.config.mlflow_uri) 58 | tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme 59 | 60 | with mlflow.start_run(): 61 | mlflow.log_params(self.config.all_params) 62 | mlflow.log_metrics( 63 | {"loss": self.score[0], "accuracy": self.score[1]} 64 | ) 65 | # Model registry does not work with file store 66 | if tracking_url_type_store != "file": 67 | 68 | # Register the model 69 | # There are other ways to use the Model Registry, which depends on the use case, 70 | # please refer to the doc for more information: 71 | # https://mlflow.org/docs/latest/model-registry.html#api-workflow 72 | mlflow.keras.log_model(self.model, "model", registered_model_name="VGG16Model") 73 | else: 74 | mlflow.keras.log_model(self.model, "model") 75 | -------------------------------------------------------------------------------- /src/cnnClassifier/components/model_training.py: -------------------------------------------------------------------------------- 1 | import os 2 | import urllib.request as request 3 | from zipfile import ZipFile 4 | import tensorflow as tf 5 | import time 6 | from pathlib import Path 7 | from cnnClassifier.entity.config_entity import TrainingConfig 8 | 9 | 10 | class Training: 11 | def __init__(self, config: TrainingConfig): 12 | self.config = config 13 | 14 | 15 | def get_base_model(self): 16 | self.model = tf.keras.models.load_model( 17 | self.config.updated_base_model_path 18 | ) 19 | 20 | def train_valid_generator(self): 21 | 22 | datagenerator_kwargs = dict( 23 | rescale = 1./255, 24 | validation_split=0.20 25 | ) 26 | 27 | dataflow_kwargs = dict( 28 | target_size=self.config.params_image_size[:-1], 29 | batch_size=self.config.params_batch_size, 30 | interpolation="bilinear" 31 | ) 32 | 33 | valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator( 34 | **datagenerator_kwargs 35 | ) 36 | 37 | self.valid_generator = valid_datagenerator.flow_from_directory( 38 | directory=self.config.training_data, 39 | subset="validation", 40 | shuffle=False, 41 | **dataflow_kwargs 42 | ) 43 | 44 | if self.config.params_is_augmentation: 45 | train_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator( 46 | rotation_range=40, 47 | horizontal_flip=True, 48 | width_shift_range=0.2, 49 | height_shift_range=0.2, 50 | shear_range=0.2, 51 | zoom_range=0.2, 52 | **datagenerator_kwargs 53 | ) 54 | else: 55 | train_datagenerator = valid_datagenerator 56 | 57 | self.train_generator = train_datagenerator.flow_from_directory( 58 | directory=self.config.training_data, 59 | subset="training", 60 | shuffle=True, 61 | **dataflow_kwargs 62 | ) 63 | 64 | 65 | @staticmethod 66 | def save_model(path: Path, model: tf.keras.Model): 67 | model.save(path) 68 | 69 | 70 | 71 | 72 | def train(self): 73 | self.steps_per_epoch = self.train_generator.samples // self.train_generator.batch_size 74 | self.validation_steps = self.valid_generator.samples // self.valid_generator.batch_size 75 | 76 | self.model.fit( 77 | self.train_generator, 78 | epochs=self.config.params_epochs, 79 | steps_per_epoch=self.steps_per_epoch, 80 | validation_steps=self.validation_steps, 81 | validation_data=self.valid_generator 82 | ) 83 | 84 | self.save_model( 85 | path=self.config.trained_model_path, 86 | model=self.model 87 | ) 88 | 89 | -------------------------------------------------------------------------------- /src/cnnClassifier/components/prepare_base_model.py: -------------------------------------------------------------------------------- 1 | import os 2 | import urllib.request as request 3 | from zipfile import ZipFile 4 | import tensorflow as tf 5 | from pathlib import Path 6 | from cnnClassifier.entity.config_entity import PrepareBaseModelConfig 7 | 8 | 9 | class PrepareBaseModel: 10 | def __init__(self, config: PrepareBaseModelConfig): 11 | self.config = config 12 | 13 | 14 | def get_base_model(self): 15 | self.model = tf.keras.applications.vgg16.VGG16( 16 | input_shape=self.config.params_image_size, 17 | weights=self.config.params_weights, 18 | include_top=self.config.params_include_top 19 | ) 20 | 21 | self.save_model(path=self.config.base_model_path, model=self.model) 22 | 23 | 24 | 25 | @staticmethod 26 | def _prepare_full_model(model, classes, freeze_all, freeze_till, learning_rate): 27 | if freeze_all: 28 | for layer in model.layers: 29 | model.trainable = False 30 | elif (freeze_till is not None) and (freeze_till > 0): 31 | for layer in model.layers[:-freeze_till]: 32 | model.trainable = False 33 | 34 | flatten_in = tf.keras.layers.Flatten()(model.output) 35 | prediction = tf.keras.layers.Dense( 36 | units=classes, 37 | activation="softmax" 38 | )(flatten_in) 39 | 40 | full_model = tf.keras.models.Model( 41 | inputs=model.input, 42 | outputs=prediction 43 | ) 44 | 45 | full_model.compile( 46 | optimizer=tf.keras.optimizers.SGD(learning_rate=learning_rate), 47 | loss=tf.keras.losses.CategoricalCrossentropy(), 48 | metrics=["accuracy"] 49 | ) 50 | 51 | full_model.summary() 52 | return full_model 53 | 54 | 55 | def update_base_model(self): 56 | self.full_model = self._prepare_full_model( 57 | model=self.model, 58 | classes=self.config.params_classes, 59 | freeze_all=True, 60 | freeze_till=None, 61 | learning_rate=self.config.params_learning_rate 62 | ) 63 | 64 | self.save_model(path=self.config.updated_base_model_path, model=self.full_model) 65 | 66 | 67 | 68 | @staticmethod 69 | def save_model(path: Path, model: tf.keras.Model): 70 | model.save(path) 71 | 72 | -------------------------------------------------------------------------------- /src/cnnClassifier/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Kidney-Disease-Classification-Deep-Learning-Project/0e50432a49baa216e12867f9feed1b63505dda86/src/cnnClassifier/config/__init__.py -------------------------------------------------------------------------------- /src/cnnClassifier/config/configuration.py: -------------------------------------------------------------------------------- 1 | from cnnClassifier.constants import * 2 | import os 3 | from cnnClassifier.utils.common import read_yaml, create_directories,save_json 4 | from cnnClassifier.entity.config_entity import (DataIngestionConfig, 5 | PrepareBaseModelConfig, 6 | TrainingConfig, 7 | EvaluationConfig) 8 | 9 | 10 | class ConfigurationManager: 11 | def __init__( 12 | self, 13 | config_filepath = CONFIG_FILE_PATH, 14 | params_filepath = PARAMS_FILE_PATH): 15 | 16 | self.config = read_yaml(config_filepath) 17 | self.params = read_yaml(params_filepath) 18 | 19 | create_directories([self.config.artifacts_root]) 20 | 21 | 22 | 23 | def get_data_ingestion_config(self) -> DataIngestionConfig: 24 | config = self.config.data_ingestion 25 | 26 | create_directories([config.root_dir]) 27 | 28 | data_ingestion_config = DataIngestionConfig( 29 | root_dir=config.root_dir, 30 | source_URL=config.source_URL, 31 | local_data_file=config.local_data_file, 32 | unzip_dir=config.unzip_dir 33 | ) 34 | 35 | return data_ingestion_config 36 | 37 | 38 | 39 | 40 | def get_prepare_base_model_config(self) -> PrepareBaseModelConfig: 41 | config = self.config.prepare_base_model 42 | 43 | create_directories([config.root_dir]) 44 | 45 | prepare_base_model_config = PrepareBaseModelConfig( 46 | root_dir=Path(config.root_dir), 47 | base_model_path=Path(config.base_model_path), 48 | updated_base_model_path=Path(config.updated_base_model_path), 49 | params_image_size=self.params.IMAGE_SIZE, 50 | params_learning_rate=self.params.LEARNING_RATE, 51 | params_include_top=self.params.INCLUDE_TOP, 52 | params_weights=self.params.WEIGHTS, 53 | params_classes=self.params.CLASSES 54 | ) 55 | 56 | return prepare_base_model_config 57 | 58 | 59 | 60 | 61 | def get_training_config(self) -> TrainingConfig: 62 | training = self.config.training 63 | prepare_base_model = self.config.prepare_base_model 64 | params = self.params 65 | training_data = os.path.join(self.config.data_ingestion.unzip_dir, "kidney-ct-scan-image") 66 | create_directories([ 67 | Path(training.root_dir) 68 | ]) 69 | 70 | training_config = TrainingConfig( 71 | root_dir=Path(training.root_dir), 72 | trained_model_path=Path(training.trained_model_path), 73 | updated_base_model_path=Path(prepare_base_model.updated_base_model_path), 74 | training_data=Path(training_data), 75 | params_epochs=params.EPOCHS, 76 | params_batch_size=params.BATCH_SIZE, 77 | params_is_augmentation=params.AUGMENTATION, 78 | params_image_size=params.IMAGE_SIZE 79 | ) 80 | 81 | return training_config 82 | 83 | 84 | 85 | def get_evaluation_config(self) -> EvaluationConfig: 86 | eval_config = EvaluationConfig( 87 | path_of_model="artifacts/training/model.h5", 88 | training_data="artifacts/data_ingestion/kidney-ct-scan-image", 89 | mlflow_uri="https://dagshub.com/entbappy/Kidney-Disease-Classification-MLflow-DVC.mlflow", 90 | all_params=self.params, 91 | params_image_size=self.params.IMAGE_SIZE, 92 | params_batch_size=self.params.BATCH_SIZE 93 | ) 94 | return eval_config 95 | 96 | -------------------------------------------------------------------------------- /src/cnnClassifier/constants/__init__.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | CONFIG_FILE_PATH = Path("config/config.yaml") 4 | PARAMS_FILE_PATH = Path("params.yaml") -------------------------------------------------------------------------------- /src/cnnClassifier/entity/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Kidney-Disease-Classification-Deep-Learning-Project/0e50432a49baa216e12867f9feed1b63505dda86/src/cnnClassifier/entity/__init__.py -------------------------------------------------------------------------------- /src/cnnClassifier/entity/config_entity.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from pathlib import Path 3 | 4 | 5 | @dataclass(frozen=True) 6 | class DataIngestionConfig: 7 | root_dir: Path 8 | source_URL: str 9 | local_data_file: Path 10 | unzip_dir: Path 11 | 12 | 13 | 14 | @dataclass(frozen=True) 15 | class PrepareBaseModelConfig: 16 | root_dir: Path 17 | base_model_path: Path 18 | updated_base_model_path: Path 19 | params_image_size: list 20 | params_learning_rate: float 21 | params_include_top: bool 22 | params_weights: str 23 | params_classes: int 24 | 25 | 26 | 27 | @dataclass(frozen=True) 28 | class TrainingConfig: 29 | root_dir: Path 30 | trained_model_path: Path 31 | updated_base_model_path: Path 32 | training_data: Path 33 | params_epochs: int 34 | params_batch_size: int 35 | params_is_augmentation: bool 36 | params_image_size: list 37 | 38 | 39 | 40 | @dataclass(frozen=True) 41 | class EvaluationConfig: 42 | path_of_model: Path 43 | training_data: Path 44 | all_params: dict 45 | mlflow_uri: str 46 | params_image_size: list 47 | params_batch_size: int -------------------------------------------------------------------------------- /src/cnnClassifier/pipeline/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Kidney-Disease-Classification-Deep-Learning-Project/0e50432a49baa216e12867f9feed1b63505dda86/src/cnnClassifier/pipeline/__init__.py -------------------------------------------------------------------------------- /src/cnnClassifier/pipeline/prediction.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from tensorflow.keras.models import load_model 3 | from tensorflow.keras.preprocessing import image 4 | import os 5 | 6 | 7 | 8 | class PredictionPipeline: 9 | def __init__(self,filename): 10 | self.filename =filename 11 | 12 | 13 | 14 | def predict(self): 15 | # load model 16 | model = load_model(os.path.join("model", "model.h5")) 17 | 18 | imagename = self.filename 19 | test_image = image.load_img(imagename, target_size = (224,224)) 20 | test_image = image.img_to_array(test_image) 21 | test_image = np.expand_dims(test_image, axis = 0) 22 | result = np.argmax(model.predict(test_image), axis=1) 23 | print(result) 24 | 25 | if result[0] == 1: 26 | prediction = 'Tumor' 27 | return [{ "image" : prediction}] 28 | else: 29 | prediction = 'Normal' 30 | return [{ "image" : prediction}] -------------------------------------------------------------------------------- /src/cnnClassifier/pipeline/stage_01_data_ingestion.py: -------------------------------------------------------------------------------- 1 | from cnnClassifier.config.configuration import ConfigurationManager 2 | from cnnClassifier.components.data_ingestion import DataIngestion 3 | from cnnClassifier import logger 4 | 5 | STAGE_NAME = "Data Ingestion stage" 6 | 7 | 8 | class DataIngestionTrainingPipeline: 9 | def __init__(self): 10 | pass 11 | 12 | def main(self): 13 | config = ConfigurationManager() 14 | data_ingestion_config = config.get_data_ingestion_config() 15 | data_ingestion = DataIngestion(config=data_ingestion_config) 16 | data_ingestion.download_file() 17 | data_ingestion.extract_zip_file() 18 | 19 | 20 | 21 | if __name__ == '__main__': 22 | try: 23 | logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<") 24 | obj = DataIngestionTrainingPipeline() 25 | obj.main() 26 | logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x") 27 | except Exception as e: 28 | logger.exception(e) 29 | raise e 30 | -------------------------------------------------------------------------------- /src/cnnClassifier/pipeline/stage_02_prepare_base_model.py: -------------------------------------------------------------------------------- 1 | from cnnClassifier.config.configuration import ConfigurationManager 2 | from cnnClassifier.components.prepare_base_model import PrepareBaseModel 3 | from cnnClassifier import logger 4 | 5 | 6 | STAGE_NAME = "Prepare base model" 7 | 8 | 9 | class PrepareBaseModelTrainingPipeline: 10 | def __init__(self): 11 | pass 12 | 13 | def main(self): 14 | config = ConfigurationManager() 15 | prepare_base_model_config = config.get_prepare_base_model_config() 16 | prepare_base_model = PrepareBaseModel(config=prepare_base_model_config) 17 | prepare_base_model.get_base_model() 18 | prepare_base_model.update_base_model() 19 | 20 | 21 | 22 | if __name__ == '__main__': 23 | try: 24 | logger.info(f"*******************") 25 | logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<") 26 | obj = PrepareBaseModelTrainingPipeline() 27 | obj.main() 28 | logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x") 29 | except Exception as e: 30 | logger.exception(e) 31 | raise e -------------------------------------------------------------------------------- /src/cnnClassifier/pipeline/stage_03_model_training.py: -------------------------------------------------------------------------------- 1 | from cnnClassifier.config.configuration import ConfigurationManager 2 | from cnnClassifier.components.model_training import Training 3 | from cnnClassifier import logger 4 | 5 | 6 | 7 | STAGE_NAME = "Training" 8 | 9 | 10 | 11 | class ModelTrainingPipeline: 12 | def __init__(self): 13 | pass 14 | 15 | def main(self): 16 | config = ConfigurationManager() 17 | training_config = config.get_training_config() 18 | training = Training(config=training_config) 19 | training.get_base_model() 20 | training.train_valid_generator() 21 | training.train() 22 | 23 | 24 | 25 | if __name__ == '__main__': 26 | try: 27 | logger.info(f"*******************") 28 | logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<") 29 | obj = ModelTrainingPipeline() 30 | obj.main() 31 | logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x") 32 | except Exception as e: 33 | logger.exception(e) 34 | raise e 35 | 36 | -------------------------------------------------------------------------------- /src/cnnClassifier/pipeline/stage_04_model_evaluation.py: -------------------------------------------------------------------------------- 1 | from cnnClassifier.config.configuration import ConfigurationManager 2 | from cnnClassifier.components.model_evaluation_mlflow import Evaluation 3 | from cnnClassifier import logger 4 | 5 | 6 | 7 | STAGE_NAME = "Evaluation stage" 8 | 9 | 10 | class EvaluationPipeline: 11 | def __init__(self): 12 | pass 13 | 14 | def main(self): 15 | config = ConfigurationManager() 16 | eval_config = config.get_evaluation_config() 17 | evaluation = Evaluation(eval_config) 18 | evaluation.evaluation() 19 | evaluation.save_score() 20 | # evaluation.log_into_mlflow() 21 | 22 | 23 | 24 | 25 | if __name__ == '__main__': 26 | try: 27 | logger.info(f"*******************") 28 | logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<") 29 | obj = EvaluationPipeline() 30 | obj.main() 31 | logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x") 32 | except Exception as e: 33 | logger.exception(e) 34 | raise e 35 | -------------------------------------------------------------------------------- /src/cnnClassifier/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Kidney-Disease-Classification-Deep-Learning-Project/0e50432a49baa216e12867f9feed1b63505dda86/src/cnnClassifier/utils/__init__.py -------------------------------------------------------------------------------- /src/cnnClassifier/utils/common.py: -------------------------------------------------------------------------------- 1 | import os 2 | from box.exceptions import BoxValueError 3 | import yaml 4 | from cnnClassifier import logger 5 | import json 6 | import joblib 7 | from ensure import ensure_annotations 8 | from box import ConfigBox 9 | from pathlib import Path 10 | from typing import Any 11 | import base64 12 | 13 | 14 | 15 | @ensure_annotations 16 | def read_yaml(path_to_yaml: Path) -> ConfigBox: 17 | """reads yaml file and returns 18 | 19 | Args: 20 | path_to_yaml (str): path like input 21 | 22 | Raises: 23 | ValueError: if yaml file is empty 24 | e: empty file 25 | 26 | Returns: 27 | ConfigBox: ConfigBox type 28 | """ 29 | try: 30 | with open(path_to_yaml) as yaml_file: 31 | content = yaml.safe_load(yaml_file) 32 | logger.info(f"yaml file: {path_to_yaml} loaded successfully") 33 | return ConfigBox(content) 34 | except BoxValueError: 35 | raise ValueError("yaml file is empty") 36 | except Exception as e: 37 | raise e 38 | 39 | 40 | 41 | @ensure_annotations 42 | def create_directories(path_to_directories: list, verbose=True): 43 | """create list of directories 44 | 45 | Args: 46 | path_to_directories (list): list of path of directories 47 | ignore_log (bool, optional): ignore if multiple dirs is to be created. Defaults to False. 48 | """ 49 | for path in path_to_directories: 50 | os.makedirs(path, exist_ok=True) 51 | if verbose: 52 | logger.info(f"created directory at: {path}") 53 | 54 | 55 | @ensure_annotations 56 | def save_json(path: Path, data: dict): 57 | """save json data 58 | 59 | Args: 60 | path (Path): path to json file 61 | data (dict): data to be saved in json file 62 | """ 63 | with open(path, "w") as f: 64 | json.dump(data, f, indent=4) 65 | 66 | logger.info(f"json file saved at: {path}") 67 | 68 | 69 | 70 | 71 | @ensure_annotations 72 | def load_json(path: Path) -> ConfigBox: 73 | """load json files data 74 | 75 | Args: 76 | path (Path): path to json file 77 | 78 | Returns: 79 | ConfigBox: data as class attributes instead of dict 80 | """ 81 | with open(path) as f: 82 | content = json.load(f) 83 | 84 | logger.info(f"json file loaded succesfully from: {path}") 85 | return ConfigBox(content) 86 | 87 | 88 | @ensure_annotations 89 | def save_bin(data: Any, path: Path): 90 | """save binary file 91 | 92 | Args: 93 | data (Any): data to be saved as binary 94 | path (Path): path to binary file 95 | """ 96 | joblib.dump(value=data, filename=path) 97 | logger.info(f"binary file saved at: {path}") 98 | 99 | 100 | @ensure_annotations 101 | def load_bin(path: Path) -> Any: 102 | """load binary data 103 | 104 | Args: 105 | path (Path): path to binary file 106 | 107 | Returns: 108 | Any: object stored in the file 109 | """ 110 | data = joblib.load(path) 111 | logger.info(f"binary file loaded from: {path}") 112 | return data 113 | 114 | @ensure_annotations 115 | def get_size(path: Path) -> str: 116 | """get size in KB 117 | 118 | Args: 119 | path (Path): path of the file 120 | 121 | Returns: 122 | str: size in KB 123 | """ 124 | size_in_kb = round(os.path.getsize(path)/1024) 125 | return f"~ {size_in_kb} KB" 126 | 127 | 128 | def decodeImage(imgstring, fileName): 129 | imgdata = base64.b64decode(imgstring) 130 | with open(fileName, 'wb') as f: 131 | f.write(imgdata) 132 | f.close() 133 | 134 | 135 | def encodeImageIntoBase64(croppedImagePath): 136 | with open(croppedImagePath, "rb") as f: 137 | return base64.b64encode(f.read()) 138 | 139 | 140 | -------------------------------------------------------------------------------- /template.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | import logging 4 | 5 | #logging string 6 | logging.basicConfig(level=logging.INFO, format='[%(asctime)s]: %(message)s:') 7 | 8 | project_name = 'cnnClassifier' 9 | 10 | list_of_files = [ 11 | ".github/workflows/.gitkeep", 12 | f"src/{project_name}/__init__.py", 13 | f"src/{project_name}/components/__init__.py", 14 | f"src/{project_name}/utils/__init__.py", 15 | f"src/{project_name}/config/__init__.py", 16 | f"src/{project_name}/config/configuration.py", 17 | f"src/{project_name}/pipeline/__init__.py", 18 | f"src/{project_name}/entity/__init__.py", 19 | f"src/{project_name}/constants/__init__.py", 20 | "config/config.yaml", 21 | "dvc.yaml", 22 | "params.yaml", 23 | "requirements.txt", 24 | "setup.py", 25 | "research/trials.ipynb", 26 | "templates/index.html" 27 | 28 | 29 | ] 30 | 31 | 32 | for filepath in list_of_files: 33 | filepath = Path(filepath) 34 | filedir, filename = os.path.split(filepath) 35 | 36 | 37 | if filedir !="": 38 | os.makedirs(filedir, exist_ok=True) 39 | logging.info(f"Creating directory; {filedir} for the file: {filename}") 40 | 41 | if (not os.path.exists(filepath)) or (os.path.getsize(filepath) == 0): 42 | with open(filepath, "w") as f: 43 | pass 44 | logging.info(f"Creating empty file: {filepath}") 45 | 46 | 47 | else: 48 | logging.info(f"{filename} is already exists") -------------------------------------------------------------------------------- /templates/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | cnncls 8 | 10 | 11 | 93 | 94 | 95 | 96 | 97 | Object Classification 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | Upload 109 | Predict 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | Prediction Results 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 233 | 234 | --------------------------------------------------------------------------------