├── .dvc ├── .gitignore ├── config └── plots │ ├── confusion.json │ ├── confusion_normalized.json │ ├── default.json │ ├── linear.json │ ├── scatter.json │ └── smooth.json ├── .dvcignore ├── .github └── workflows │ ├── basic.yaml │ └── build_docker_image.yaml ├── .gitignore ├── LICENSE ├── README.md ├── images ├── basic_flow.png ├── docker_flow.png ├── dvc.png ├── ecr_flow.png ├── hydra.png ├── kibana_flow.png ├── lambda_flow.png ├── onnx.jpeg ├── pl.jpeg ├── summary.png └── wandb.png ├── week_0_project_setup ├── README.md ├── data.py ├── experimental_notebooks │ └── data_exploration.ipynb ├── inference.py ├── model.py ├── requirements.txt └── train.py ├── week_1_wandb_logging ├── README.md ├── data.py ├── experimental_notebooks │ └── data_exploration.ipynb ├── inference.py ├── model.py ├── requirements.txt └── train.py ├── week_2_hydra_config ├── README.md ├── configs │ ├── config.yaml │ ├── model │ │ └── default.yaml │ ├── processing │ │ └── default.yaml │ └── training │ │ └── default.yaml ├── data.py ├── experimental_notebooks │ └── data_exploration.ipynb ├── inference.py ├── model.py ├── requirements.txt └── train.py ├── week_3_dvc ├── README.md ├── configs │ ├── config.yaml │ ├── model │ │ └── default.yaml │ ├── processing │ │ └── default.yaml │ └── training │ │ └── default.yaml ├── data.py ├── dvcfiles │ └── trained_model.dvc ├── experimental_notebooks │ └── data_exploration.ipynb ├── inference.py ├── model.py ├── models │ └── .gitignore ├── requirements.txt └── train.py ├── week_4_onnx ├── README.md ├── configs │ ├── config.yaml │ ├── model │ │ └── default.yaml │ ├── processing │ │ └── default.yaml │ └── training │ │ └── default.yaml ├── convert_model_to_onnx.py ├── data.py ├── dvcfiles │ └── trained_model.dvc ├── experimental_notebooks │ └── data_exploration.ipynb ├── inference.py ├── inference_onnx.py ├── model.py ├── requirements.txt ├── train.py └── utils.py ├── week_5_docker ├── Dockerfile ├── README.md ├── app.py ├── configs │ ├── config.yaml │ ├── model │ │ └── default.yaml │ ├── processing │ │ └── default.yaml │ └── training │ │ └── default.yaml ├── convert_model_to_onnx.py ├── data.py ├── docker-compose.yml ├── dvcfiles │ └── trained_model.dvc ├── experimental_notebooks │ └── data_exploration.ipynb ├── inference.py ├── inference_onnx.py ├── model.py ├── requirements.txt ├── requirements_inference.txt ├── train.py └── utils.py ├── week_6_github_actions ├── Dockerfile ├── README.md ├── app.py ├── configs │ ├── config.yaml │ ├── model │ │ └── default.yaml │ ├── processing │ │ └── default.yaml │ └── training │ │ └── default.yaml ├── convert_model_to_onnx.py ├── data.py ├── docker-compose.yml ├── dvcfiles │ └── trained_model.dvc ├── experimental_notebooks │ └── data_exploration.ipynb ├── inference.py ├── inference_onnx.py ├── model.py ├── parse_json.py ├── requirements.txt ├── requirements_inference.txt ├── train.py └── utils.py ├── week_7_ecr ├── Dockerfile ├── README.md ├── app.py ├── configs │ ├── config.yaml │ ├── model │ │ └── default.yaml │ ├── processing │ │ └── default.yaml │ └── training │ │ └── default.yaml ├── convert_model_to_onnx.py ├── data.py ├── docker-compose.yml ├── dvcfiles │ └── trained_model.dvc ├── experimental_notebooks │ └── data_exploration.ipynb ├── inference.py ├── inference_onnx.py ├── model.py ├── parse_json.py ├── requirements.txt ├── requirements_inference.txt ├── train.py └── utils.py ├── week_8_serverless ├── Dockerfile ├── README.md ├── app.py ├── configs │ ├── config.yaml │ ├── model │ │ └── default.yaml │ ├── processing │ │ └── default.yaml │ └── training │ │ └── default.yaml ├── convert_model_to_onnx.py ├── data.py ├── docker-compose.yml ├── dvcfiles │ └── trained_model.dvc ├── experimental_notebooks │ └── data_exploration.ipynb ├── inference.py ├── inference_onnx.py ├── lambda_handler.py ├── model.py ├── parse_json.py ├── requirements.txt ├── requirements_inference.txt ├── train.py └── utils.py └── week_9_monitoring ├── Dockerfile ├── README.md ├── app.py ├── configs ├── config.yaml ├── model │ └── default.yaml ├── processing │ └── default.yaml └── training │ └── default.yaml ├── convert_model_to_onnx.py ├── data.py ├── docker-compose.yml ├── dvcfiles └── trained_model.dvc ├── experimental_notebooks └── data_exploration.ipynb ├── inference.py ├── inference_onnx.py ├── lambda_handler.py ├── model.py ├── parse_json.py ├── requirements.txt ├── requirements_inference.txt ├── train.py └── utils.py /.dvc/.gitignore: -------------------------------------------------------------------------------- 1 | /config.local 2 | /tmp 3 | /cache 4 | -------------------------------------------------------------------------------- /.dvc/config: -------------------------------------------------------------------------------- 1 | [core] 2 | remote = model-store 3 | ['remote "storage"'] 4 | url = gdrive://19JK5AFbqOBlrFVwDHjTrf9uvQFtS0954 5 | ['remote "model-store"'] 6 | url = s3://models-dvc/trained_models/ 7 | -------------------------------------------------------------------------------- /.dvc/plots/confusion.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://vega.github.io/schema/vega-lite/v4.json", 3 | "data": { 4 | "values": "" 5 | }, 6 | "title": "", 7 | "facet": { 8 | "field": "rev", 9 | "type": "nominal" 10 | }, 11 | "spec": { 12 | "transform": [ 13 | { 14 | "aggregate": [ 15 | { 16 | "op": "count", 17 | "as": "xy_count" 18 | } 19 | ], 20 | "groupby": [ 21 | "", 22 | "" 23 | ] 24 | }, 25 | { 26 | "impute": "xy_count", 27 | "groupby": [ 28 | "rev", 29 | "" 30 | ], 31 | "key": "", 32 | "value": 0 33 | }, 34 | { 35 | "impute": "xy_count", 36 | "groupby": [ 37 | "rev", 38 | "" 39 | ], 40 | "key": "", 41 | "value": 0 42 | }, 43 | { 44 | "joinaggregate": [ 45 | { 46 | "op": "max", 47 | "field": "xy_count", 48 | "as": "max_count" 49 | } 50 | ], 51 | "groupby": [] 52 | }, 53 | { 54 | "calculate": "datum.xy_count / datum.max_count", 55 | "as": "percent_of_max" 56 | } 57 | ], 58 | "encoding": { 59 | "x": { 60 | "field": "", 61 | "type": "nominal", 62 | "sort": "ascending", 63 | "title": "" 64 | }, 65 | "y": { 66 | "field": "", 67 | "type": "nominal", 68 | "sort": "ascending", 69 | "title": "" 70 | } 71 | }, 72 | "layer": [ 73 | { 74 | "mark": "rect", 75 | "width": 300, 76 | "height": 300, 77 | "encoding": { 78 | "color": { 79 | "field": "xy_count", 80 | "type": "quantitative", 81 | "title": "", 82 | "scale": { 83 | "domainMin": 0, 84 | "nice": true 85 | } 86 | } 87 | } 88 | }, 89 | { 90 | "mark": "text", 91 | "encoding": { 92 | "text": { 93 | "field": "xy_count", 94 | "type": "quantitative" 95 | }, 96 | "color": { 97 | "condition": { 98 | "test": "datum.percent_of_max > 0.5", 99 | "value": "white" 100 | }, 101 | "value": "black" 102 | } 103 | } 104 | } 105 | ] 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /.dvc/plots/confusion_normalized.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://vega.github.io/schema/vega-lite/v4.json", 3 | "data": { 4 | "values": "" 5 | }, 6 | "title": "", 7 | "facet": { 8 | "field": "rev", 9 | "type": "nominal" 10 | }, 11 | "spec": { 12 | "transform": [ 13 | { 14 | "aggregate": [ 15 | { 16 | "op": "count", 17 | "as": "xy_count" 18 | } 19 | ], 20 | "groupby": [ 21 | "", 22 | "" 23 | ] 24 | }, 25 | { 26 | "impute": "xy_count", 27 | "groupby": [ 28 | "rev", 29 | "" 30 | ], 31 | "key": "", 32 | "value": 0 33 | }, 34 | { 35 | "impute": "xy_count", 36 | "groupby": [ 37 | "rev", 38 | "" 39 | ], 40 | "key": "", 41 | "value": 0 42 | }, 43 | { 44 | "joinaggregate": [ 45 | { 46 | "op": "sum", 47 | "field": "xy_count", 48 | "as": "sum_y" 49 | } 50 | ], 51 | "groupby": [ 52 | "" 53 | ] 54 | }, 55 | { 56 | "calculate": "datum.xy_count / datum.sum_y", 57 | "as": "percent_of_y" 58 | } 59 | ], 60 | "encoding": { 61 | "x": { 62 | "field": "", 63 | "type": "nominal", 64 | "sort": "ascending", 65 | "title": "" 66 | }, 67 | "y": { 68 | "field": "", 69 | "type": "nominal", 70 | "sort": "ascending", 71 | "title": "" 72 | } 73 | }, 74 | "layer": [ 75 | { 76 | "mark": "rect", 77 | "width": 300, 78 | "height": 300, 79 | "encoding": { 80 | "color": { 81 | "field": "percent_of_y", 82 | "type": "quantitative", 83 | "title": "", 84 | "scale": { 85 | "domain": [ 86 | 0, 87 | 1 88 | ] 89 | } 90 | } 91 | } 92 | }, 93 | { 94 | "mark": "text", 95 | "encoding": { 96 | "text": { 97 | "field": "percent_of_y", 98 | "type": "quantitative", 99 | "format": ".2f" 100 | }, 101 | "color": { 102 | "condition": { 103 | "test": "datum.percent_of_y > 0.5", 104 | "value": "white" 105 | }, 106 | "value": "black" 107 | } 108 | } 109 | } 110 | ] 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /.dvc/plots/default.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://vega.github.io/schema/vega-lite/v4.json", 3 | "data": { 4 | "values": "" 5 | }, 6 | "title": "", 7 | "width": 300, 8 | "height": 300, 9 | "mark": { 10 | "type": "line" 11 | }, 12 | "encoding": { 13 | "x": { 14 | "field": "", 15 | "type": "quantitative", 16 | "title": "" 17 | }, 18 | "y": { 19 | "field": "", 20 | "type": "quantitative", 21 | "title": "", 22 | "scale": { 23 | "zero": false 24 | } 25 | }, 26 | "color": { 27 | "field": "rev", 28 | "type": "nominal" 29 | } 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /.dvc/plots/linear.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://vega.github.io/schema/vega-lite/v4.json", 3 | "data": { 4 | "values": "" 5 | }, 6 | "title": "", 7 | "width": 300, 8 | "height": 300, 9 | "layer": [ 10 | { 11 | "encoding": { 12 | "x": { 13 | "field": "", 14 | "type": "quantitative", 15 | "title": "" 16 | }, 17 | "y": { 18 | "field": "", 19 | "type": "quantitative", 20 | "title": "", 21 | "scale": { 22 | "zero": false 23 | } 24 | }, 25 | "color": { 26 | "field": "rev", 27 | "type": "nominal" 28 | } 29 | }, 30 | "layer": [ 31 | { 32 | "mark": "line" 33 | }, 34 | { 35 | "selection": { 36 | "label": { 37 | "type": "single", 38 | "nearest": true, 39 | "on": "mouseover", 40 | "encodings": [ 41 | "x" 42 | ], 43 | "empty": "none", 44 | "clear": "mouseout" 45 | } 46 | }, 47 | "mark": "point", 48 | "encoding": { 49 | "opacity": { 50 | "condition": { 51 | "selection": "label", 52 | "value": 1 53 | }, 54 | "value": 0 55 | } 56 | } 57 | } 58 | ] 59 | }, 60 | { 61 | "transform": [ 62 | { 63 | "filter": { 64 | "selection": "label" 65 | } 66 | } 67 | ], 68 | "layer": [ 69 | { 70 | "mark": { 71 | "type": "rule", 72 | "color": "gray" 73 | }, 74 | "encoding": { 75 | "x": { 76 | "field": "", 77 | "type": "quantitative" 78 | } 79 | } 80 | }, 81 | { 82 | "encoding": { 83 | "text": { 84 | "type": "quantitative", 85 | "field": "" 86 | }, 87 | "x": { 88 | "field": "", 89 | "type": "quantitative" 90 | }, 91 | "y": { 92 | "field": "", 93 | "type": "quantitative" 94 | } 95 | }, 96 | "layer": [ 97 | { 98 | "mark": { 99 | "type": "text", 100 | "align": "left", 101 | "dx": 5, 102 | "dy": -5 103 | }, 104 | "encoding": { 105 | "color": { 106 | "type": "nominal", 107 | "field": "rev" 108 | } 109 | } 110 | } 111 | ] 112 | } 113 | ] 114 | } 115 | ] 116 | } 117 | -------------------------------------------------------------------------------- /.dvc/plots/scatter.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://vega.github.io/schema/vega-lite/v4.json", 3 | "data": { 4 | "values": "" 5 | }, 6 | "title": "", 7 | "width": 300, 8 | "height": 300, 9 | "layer": [ 10 | { 11 | "encoding": { 12 | "x": { 13 | "field": "", 14 | "type": "quantitative", 15 | "title": "" 16 | }, 17 | "y": { 18 | "field": "", 19 | "type": "quantitative", 20 | "title": "", 21 | "scale": { 22 | "zero": false 23 | } 24 | }, 25 | "color": { 26 | "field": "rev", 27 | "type": "nominal" 28 | } 29 | }, 30 | "layer": [ 31 | { 32 | "mark": "point" 33 | }, 34 | { 35 | "selection": { 36 | "label": { 37 | "type": "single", 38 | "nearest": true, 39 | "on": "mouseover", 40 | "encodings": [ 41 | "x" 42 | ], 43 | "empty": "none", 44 | "clear": "mouseout" 45 | } 46 | }, 47 | "mark": "point", 48 | "encoding": { 49 | "opacity": { 50 | "condition": { 51 | "selection": "label", 52 | "value": 1 53 | }, 54 | "value": 0 55 | } 56 | } 57 | } 58 | ] 59 | }, 60 | { 61 | "transform": [ 62 | { 63 | "filter": { 64 | "selection": "label" 65 | } 66 | } 67 | ], 68 | "layer": [ 69 | { 70 | "encoding": { 71 | "text": { 72 | "type": "quantitative", 73 | "field": "" 74 | }, 75 | "x": { 76 | "field": "", 77 | "type": "quantitative" 78 | }, 79 | "y": { 80 | "field": "", 81 | "type": "quantitative" 82 | } 83 | }, 84 | "layer": [ 85 | { 86 | "mark": { 87 | "type": "text", 88 | "align": "left", 89 | "dx": 5, 90 | "dy": -5 91 | }, 92 | "encoding": { 93 | "color": { 94 | "type": "nominal", 95 | "field": "rev" 96 | } 97 | } 98 | } 99 | ] 100 | } 101 | ] 102 | } 103 | ] 104 | } 105 | -------------------------------------------------------------------------------- /.dvc/plots/smooth.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://vega.github.io/schema/vega-lite/v4.json", 3 | "data": { 4 | "values": "" 5 | }, 6 | "title": "", 7 | "mark": { 8 | "type": "line" 9 | }, 10 | "encoding": { 11 | "x": { 12 | "field": "", 13 | "type": "quantitative", 14 | "title": "" 15 | }, 16 | "y": { 17 | "field": "", 18 | "type": "quantitative", 19 | "title": "", 20 | "scale": { 21 | "zero": false 22 | } 23 | }, 24 | "color": { 25 | "field": "rev", 26 | "type": "nominal" 27 | } 28 | }, 29 | "transform": [ 30 | { 31 | "loess": "", 32 | "on": "", 33 | "groupby": [ 34 | "rev" 35 | ], 36 | "bandwidth": 0.3 37 | } 38 | ] 39 | } 40 | -------------------------------------------------------------------------------- /.dvcignore: -------------------------------------------------------------------------------- 1 | # Add patterns of files dvc should ignore, which could improve 2 | # the performance. Learn more at 3 | # https://dvc.org/doc/user-guide/dvcignore 4 | -------------------------------------------------------------------------------- /.github/workflows/basic.yaml: -------------------------------------------------------------------------------- 1 | name: GitHub Actions Basic Flow 2 | on: [push] 3 | jobs: 4 | Basic-workflow: 5 | runs-on: ubuntu-latest 6 | steps: 7 | - name: Basic Information 8 | run: | 9 | echo "🎬 The job was automatically triggered by a ${{ github.event_name }} event." 10 | echo "💻 This job is now running on a ${{ runner.os }} server hosted by GitHub!" 11 | echo "🎋 Workflow is running on the branch ${{ github.ref }}" 12 | - name: Checking out the repository 13 | uses: actions/checkout@v2 14 | - name: Information after checking out 15 | run: | 16 | echo "💡 The ${{ github.repository }} repository has been cloned to the runner." 17 | echo "🖥️ The workflow is now ready to test your code on the runner." 18 | - name: List files in the repository 19 | run: | 20 | ls ${{ github.workspace }} 21 | - run: echo "🍏 This job's status is ${{ job.status }}." -------------------------------------------------------------------------------- /.github/workflows/build_docker_image.yaml: -------------------------------------------------------------------------------- 1 | name: Create Docker Container 2 | 3 | on: [push] 4 | 5 | jobs: 6 | mlops-container: 7 | runs-on: ubuntu-latest 8 | defaults: 9 | run: 10 | working-directory: ./week_9_monitoring 11 | steps: 12 | - name: Checkout 13 | uses: actions/checkout@v2 14 | with: 15 | ref: ${{ github.ref }} 16 | - name: Configure AWS Credentials 17 | uses: aws-actions/configure-aws-credentials@v1 18 | with: 19 | aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} 20 | aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} 21 | aws-region: us-west-2 22 | - name: Build container 23 | run: | 24 | docker build --build-arg AWS_ACCOUNT_ID=${{ secrets.AWS_ACCOUNT_ID }} \ 25 | --build-arg AWS_ACCESS_KEY_ID=${{ secrets.AWS_ACCESS_KEY_ID }} \ 26 | --build-arg AWS_SECRET_ACCESS_KEY=${{ secrets.AWS_SECRET_ACCESS_KEY }} \ 27 | --tag mlops-basics . 28 | - name: Push2ECR 29 | id: ecr 30 | uses: jwalton/gh-ecr-push@v1 31 | with: 32 | access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} 33 | secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} 34 | region: us-west-2 35 | image: mlops-basics:latest 36 | 37 | - name: Update lambda with image 38 | run: aws lambda update-function-code --function-name MLOps-Basics --image-uri 246113150184.dkr.ecr.us-west-2.amazonaws.com/mlops-basics:latest 39 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | .vscode/ 131 | */logs/* 132 | */models/* 133 | */wandb/* 134 | */outputs/* 135 | */multirun/* 136 | 137 | .DS_Store 138 | */.DS_Store -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 raviraja 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /images/basic_flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graviraja/MLOps-Basics/558adce8203aed827952d0ca6516e188589fb930/images/basic_flow.png -------------------------------------------------------------------------------- /images/docker_flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graviraja/MLOps-Basics/558adce8203aed827952d0ca6516e188589fb930/images/docker_flow.png -------------------------------------------------------------------------------- /images/dvc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graviraja/MLOps-Basics/558adce8203aed827952d0ca6516e188589fb930/images/dvc.png -------------------------------------------------------------------------------- /images/ecr_flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graviraja/MLOps-Basics/558adce8203aed827952d0ca6516e188589fb930/images/ecr_flow.png -------------------------------------------------------------------------------- /images/hydra.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graviraja/MLOps-Basics/558adce8203aed827952d0ca6516e188589fb930/images/hydra.png -------------------------------------------------------------------------------- /images/kibana_flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graviraja/MLOps-Basics/558adce8203aed827952d0ca6516e188589fb930/images/kibana_flow.png -------------------------------------------------------------------------------- /images/lambda_flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graviraja/MLOps-Basics/558adce8203aed827952d0ca6516e188589fb930/images/lambda_flow.png -------------------------------------------------------------------------------- /images/onnx.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graviraja/MLOps-Basics/558adce8203aed827952d0ca6516e188589fb930/images/onnx.jpeg -------------------------------------------------------------------------------- /images/pl.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graviraja/MLOps-Basics/558adce8203aed827952d0ca6516e188589fb930/images/pl.jpeg -------------------------------------------------------------------------------- /images/summary.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graviraja/MLOps-Basics/558adce8203aed827952d0ca6516e188589fb930/images/summary.png -------------------------------------------------------------------------------- /images/wandb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graviraja/MLOps-Basics/558adce8203aed827952d0ca6516e188589fb930/images/wandb.png -------------------------------------------------------------------------------- /week_0_project_setup/README.md: -------------------------------------------------------------------------------- 1 | 2 | **Note: The purpose of the project to explore the libraries and learn how to use them. Not to build a SOTA model.** 3 | 4 | ## Requirements: 5 | 6 | This project uses Python 3.8 7 | 8 | Create a virtual env with the following command: 9 | 10 | ``` 11 | conda create --name project-setup python=3.8 12 | conda activate project-setup 13 | ``` 14 | 15 | Install the requirements: 16 | 17 | ``` 18 | pip install -r requirements.txt 19 | ``` 20 | 21 | ## Running 22 | 23 | ### Training 24 | 25 | After installing the requirements, in order to train the model simply run: 26 | 27 | ``` 28 | python train.py 29 | ``` 30 | 31 | ### Inference 32 | 33 | After training, update the model checkpoint path in the code and run 34 | 35 | ``` 36 | python inference.py 37 | ``` 38 | 39 | ### Running notebooks 40 | 41 | I am using [Jupyter lab](https://jupyter.org/install) to run the notebooks. 42 | 43 | Since I am using a virtualenv, when I run the command `jupyter lab` it might or might not use the virtualenv. 44 | 45 | To make sure to use the virutalenv, run the following commands before running `jupyter lab` 46 | 47 | ``` 48 | conda install ipykernel 49 | python -m ipykernel install --user --name project-setup 50 | pip install ipywidgets 51 | ``` 52 | 53 | 54 | -------------------------------------------------------------------------------- /week_0_project_setup/data.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import datasets 3 | import pytorch_lightning as pl 4 | 5 | from datasets import load_dataset 6 | from transformers import AutoTokenizer 7 | 8 | 9 | class DataModule(pl.LightningDataModule): 10 | def __init__(self, model_name="google/bert_uncased_L-2_H-128_A-2", batch_size=32): 11 | super().__init__() 12 | 13 | self.batch_size = batch_size 14 | self.tokenizer = AutoTokenizer.from_pretrained(model_name) 15 | 16 | def prepare_data(self): 17 | cola_dataset = load_dataset("glue", "cola") 18 | self.train_data = cola_dataset["train"] 19 | self.val_data = cola_dataset["validation"] 20 | 21 | def tokenize_data(self, example): 22 | return self.tokenizer( 23 | example["sentence"], 24 | truncation=True, 25 | padding="max_length", 26 | max_length=512, 27 | ) 28 | 29 | def setup(self, stage=None): 30 | # we set up only relevant datasets when stage is specified 31 | if stage == "fit" or stage is None: 32 | self.train_data = self.train_data.map(self.tokenize_data, batched=True) 33 | self.train_data.set_format( 34 | type="torch", columns=["input_ids", "attention_mask", "label"] 35 | ) 36 | 37 | self.val_data = self.val_data.map(self.tokenize_data, batched=True) 38 | self.val_data.set_format( 39 | type="torch", columns=["input_ids", "attention_mask", "label"] 40 | ) 41 | 42 | def train_dataloader(self): 43 | return torch.utils.data.DataLoader( 44 | self.train_data, batch_size=self.batch_size, shuffle=True 45 | ) 46 | 47 | def val_dataloader(self): 48 | return torch.utils.data.DataLoader( 49 | self.val_data, batch_size=self.batch_size, shuffle=False 50 | ) 51 | 52 | 53 | if __name__ == "__main__": 54 | data_model = DataModule() 55 | data_model.prepare_data() 56 | data_model.setup() 57 | print(next(iter(data_model.train_dataloader()))["input_ids"].shape) 58 | -------------------------------------------------------------------------------- /week_0_project_setup/inference.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from model import ColaModel 3 | from data import DataModule 4 | 5 | 6 | class ColaPredictor: 7 | def __init__(self, model_path): 8 | self.model_path = model_path 9 | self.model = ColaModel.load_from_checkpoint(model_path) 10 | self.model.eval() 11 | self.model.freeze() 12 | self.processor = DataModule() 13 | self.softmax = torch.nn.Softmax(dim=0) 14 | self.lables = ["unacceptable", "acceptable"] 15 | 16 | def predict(self, text): 17 | inference_sample = {"sentence": text} 18 | processed = self.processor.tokenize_data(inference_sample) 19 | logits = self.model( 20 | torch.tensor([processed["input_ids"]]), 21 | torch.tensor([processed["attention_mask"]]), 22 | ) 23 | scores = self.softmax(logits[0]).tolist() 24 | predictions = [] 25 | for score, label in zip(scores, self.lables): 26 | predictions.append({"label": label, "score": score}) 27 | return predictions 28 | 29 | 30 | if __name__ == "__main__": 31 | sentence = "The boy is sitting on a bench" 32 | predictor = ColaPredictor("./models/epoch=0-step=267.ckpt") 33 | print(predictor.predict(sentence)) 34 | -------------------------------------------------------------------------------- /week_0_project_setup/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import pytorch_lightning as pl 4 | import torch.nn.functional as F 5 | from transformers import AutoModel 6 | from sklearn.metrics import accuracy_score 7 | 8 | 9 | class ColaModel(pl.LightningModule): 10 | def __init__(self, model_name="google/bert_uncased_L-2_H-128_A-2", lr=1e-2): 11 | super(ColaModel, self).__init__() 12 | self.save_hyperparameters() 13 | 14 | self.bert = AutoModel.from_pretrained(model_name) 15 | self.W = nn.Linear(self.bert.config.hidden_size, 2) 16 | self.num_classes = 2 17 | 18 | def forward(self, input_ids, attention_mask): 19 | outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask) 20 | 21 | h_cls = outputs.last_hidden_state[:, 0] 22 | logits = self.W(h_cls) 23 | return logits 24 | 25 | def training_step(self, batch, batch_idx): 26 | logits = self.forward(batch["input_ids"], batch["attention_mask"]) 27 | loss = F.cross_entropy(logits, batch["label"]) 28 | self.log("train_loss", loss, prog_bar=True) 29 | return loss 30 | 31 | def validation_step(self, batch, batch_idx): 32 | logits = self.forward(batch["input_ids"], batch["attention_mask"]) 33 | loss = F.cross_entropy(logits, batch["label"]) 34 | _, preds = torch.max(logits, dim=1) 35 | val_acc = accuracy_score(preds.cpu(), batch["label"].cpu()) 36 | val_acc = torch.tensor(val_acc) 37 | self.log("val_loss", loss, prog_bar=True) 38 | self.log("val_acc", val_acc, prog_bar=True) 39 | 40 | def configure_optimizers(self): 41 | return torch.optim.Adam(self.parameters(), lr=self.hparams["lr"]) 42 | -------------------------------------------------------------------------------- /week_0_project_setup/requirements.txt: -------------------------------------------------------------------------------- 1 | pytorch-lightning==1.2.10 2 | datasets==1.6.2 3 | transformers==4.5.1 4 | scikit-learn==0.24.2 -------------------------------------------------------------------------------- /week_0_project_setup/train.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import pytorch_lightning as pl 3 | from pytorch_lightning.callbacks import ModelCheckpoint 4 | from pytorch_lightning.callbacks.early_stopping import EarlyStopping 5 | 6 | from data import DataModule 7 | from model import ColaModel 8 | 9 | 10 | def main(): 11 | cola_data = DataModule() 12 | cola_model = ColaModel() 13 | 14 | checkpoint_callback = ModelCheckpoint( 15 | dirpath="./models", monitor="val_loss", mode="min" 16 | ) 17 | early_stopping_callback = EarlyStopping( 18 | monitor="val_loss", patience=3, verbose=True, mode="min" 19 | ) 20 | 21 | trainer = pl.Trainer( 22 | default_root_dir="logs", 23 | gpus=(1 if torch.cuda.is_available() else 0), 24 | max_epochs=5, 25 | fast_dev_run=False, 26 | logger=pl.loggers.TensorBoardLogger("logs/", name="cola", version=1), 27 | callbacks=[checkpoint_callback, early_stopping_callback], 28 | ) 29 | trainer.fit(cola_model, cola_data) 30 | 31 | 32 | if __name__ == "__main__": 33 | main() 34 | -------------------------------------------------------------------------------- /week_1_wandb_logging/README.md: -------------------------------------------------------------------------------- 1 | 2 | **Note: The purpose of the project to explore the libraries and learn how to use them. Not to build a SOTA model.** 3 | 4 | ## Requirements: 5 | 6 | This project uses Python 3.8 7 | 8 | Create a virtual env with the following command: 9 | 10 | ``` 11 | conda create --name project-setup python=3.8 12 | conda activate project-setup 13 | ``` 14 | 15 | Install the requirements: 16 | 17 | ``` 18 | pip install -r requirements.txt 19 | ``` 20 | 21 | ## Running 22 | 23 | ### Training 24 | 25 | After installing the requirements, in order to train the model simply run: 26 | 27 | ``` 28 | python train.py 29 | ``` 30 | 31 | ### Monitoring 32 | 33 | Once the training is completed in the end of the logs you will see something like: 34 | 35 | ``` 36 | wandb: Synced 5 W&B file(s), 4 media file(s), 3 artifact file(s) and 0 other file(s) 37 | wandb: 38 | wandb: Synced proud-mountain-77: https://wandb.ai/raviraja/MLOps%20Basics/runs/3vp1twdc 39 | ``` 40 | 41 | Follow the link to see the wandb dashboard which contains all the plots. 42 | 43 | ### Inference 44 | 45 | After training, update the model checkpoint path in the code and run 46 | 47 | ``` 48 | python inference.py 49 | ``` 50 | 51 | ### Running notebooks 52 | 53 | I am using [Jupyter lab](https://jupyter.org/install) to run the notebooks. 54 | 55 | Since I am using a virtualenv, when I run the command `jupyter lab` it might or might not use the virtualenv. 56 | 57 | To make sure to use the virutalenv, run the following commands before running `jupyter lab` 58 | 59 | ``` 60 | conda install ipykernel 61 | python -m ipykernel install --user --name project-setup 62 | pip install ipywidgets 63 | ``` -------------------------------------------------------------------------------- /week_1_wandb_logging/data.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import datasets 3 | import pytorch_lightning as pl 4 | 5 | from datasets import load_dataset 6 | from transformers import AutoTokenizer 7 | 8 | 9 | class DataModule(pl.LightningDataModule): 10 | def __init__(self, model_name="google/bert_uncased_L-2_H-128_A-2", batch_size=64): 11 | super().__init__() 12 | 13 | self.batch_size = batch_size 14 | self.tokenizer = AutoTokenizer.from_pretrained(model_name) 15 | 16 | def prepare_data(self): 17 | cola_dataset = load_dataset("glue", "cola") 18 | self.train_data = cola_dataset["train"] 19 | self.val_data = cola_dataset["validation"] 20 | 21 | def tokenize_data(self, example): 22 | return self.tokenizer( 23 | example["sentence"], 24 | truncation=True, 25 | padding="max_length", 26 | max_length=128, 27 | ) 28 | 29 | def setup(self, stage=None): 30 | # we set up only relevant datasets when stage is specified 31 | if stage == "fit" or stage is None: 32 | self.train_data = self.train_data.map(self.tokenize_data, batched=True) 33 | self.train_data.set_format( 34 | type="torch", columns=["input_ids", "attention_mask", "label"] 35 | ) 36 | 37 | self.val_data = self.val_data.map(self.tokenize_data, batched=True) 38 | self.val_data.set_format( 39 | type="torch", 40 | columns=["input_ids", "attention_mask", "label"], 41 | output_all_columns=True, 42 | ) 43 | 44 | def train_dataloader(self): 45 | return torch.utils.data.DataLoader( 46 | self.train_data, batch_size=self.batch_size, shuffle=True 47 | ) 48 | 49 | def val_dataloader(self): 50 | return torch.utils.data.DataLoader( 51 | self.val_data, batch_size=self.batch_size, shuffle=False 52 | ) 53 | 54 | 55 | if __name__ == "__main__": 56 | data_model = DataModule() 57 | data_model.prepare_data() 58 | data_model.setup() 59 | print(next(iter(data_model.train_dataloader()))["input_ids"].shape) 60 | -------------------------------------------------------------------------------- /week_1_wandb_logging/inference.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from model import ColaModel 3 | from data import DataModule 4 | 5 | 6 | class ColaPredictor: 7 | def __init__(self, model_path): 8 | self.model_path = model_path 9 | self.model = ColaModel.load_from_checkpoint(model_path) 10 | self.model.eval() 11 | self.model.freeze() 12 | self.processor = DataModule() 13 | self.softmax = torch.nn.Softmax(dim=0) 14 | self.lables = ["unacceptable", "acceptable"] 15 | 16 | def predict(self, text): 17 | inference_sample = {"sentence": text} 18 | processed = self.processor.tokenize_data(inference_sample) 19 | logits = self.model( 20 | torch.tensor([processed["input_ids"]]), 21 | torch.tensor([processed["attention_mask"]]), 22 | ) 23 | scores = self.softmax(logits[0]).tolist() 24 | predictions = [] 25 | for score, label in zip(scores, self.lables): 26 | predictions.append({"label": label, "score": score}) 27 | return predictions 28 | 29 | 30 | if __name__ == "__main__": 31 | sentence = "The boy is sitting on a bench" 32 | predictor = ColaPredictor("./models/epoch=0-step=267.ckpt") 33 | print(predictor.predict(sentence)) 34 | -------------------------------------------------------------------------------- /week_1_wandb_logging/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import wandb 3 | import numpy as np 4 | import pandas as pd 5 | import pytorch_lightning as pl 6 | from transformers import AutoModelForSequenceClassification 7 | import torchmetrics 8 | from sklearn.metrics import confusion_matrix 9 | import matplotlib.pyplot as plt 10 | import seaborn as sns 11 | 12 | 13 | class ColaModel(pl.LightningModule): 14 | def __init__(self, model_name="google/bert_uncased_L-2_H-128_A-2", lr=3e-5): 15 | super(ColaModel, self).__init__() 16 | self.save_hyperparameters() 17 | 18 | self.bert = AutoModelForSequenceClassification.from_pretrained( 19 | model_name, num_labels=2 20 | ) 21 | self.num_classes = 2 22 | self.train_accuracy_metric = torchmetrics.Accuracy() 23 | self.val_accuracy_metric = torchmetrics.Accuracy() 24 | self.f1_metric = torchmetrics.F1(num_classes=self.num_classes) 25 | self.precision_macro_metric = torchmetrics.Precision( 26 | average="macro", num_classes=self.num_classes 27 | ) 28 | self.recall_macro_metric = torchmetrics.Recall( 29 | average="macro", num_classes=self.num_classes 30 | ) 31 | self.precision_micro_metric = torchmetrics.Precision(average="micro") 32 | self.recall_micro_metric = torchmetrics.Recall(average="micro") 33 | 34 | def forward(self, input_ids, attention_mask, labels=None): 35 | outputs = self.bert( 36 | input_ids=input_ids, attention_mask=attention_mask, labels=labels 37 | ) 38 | return outputs 39 | 40 | def training_step(self, batch, batch_idx): 41 | outputs = self.forward( 42 | batch["input_ids"], batch["attention_mask"], labels=batch["label"] 43 | ) 44 | # loss = F.cross_entropy(logits, batch["label"]) 45 | preds = torch.argmax(outputs.logits, 1) 46 | train_acc = self.train_accuracy_metric(preds, batch["label"]) 47 | self.log("train/loss", outputs.loss, prog_bar=True, on_epoch=True) 48 | self.log("train/acc", train_acc, prog_bar=True, on_epoch=True) 49 | return outputs.loss 50 | 51 | def validation_step(self, batch, batch_idx): 52 | labels = batch["label"] 53 | outputs = self.forward( 54 | batch["input_ids"], batch["attention_mask"], labels=batch["label"] 55 | ) 56 | preds = torch.argmax(outputs.logits, 1) 57 | 58 | # Metrics 59 | valid_acc = self.val_accuracy_metric(preds, labels) 60 | precision_macro = self.precision_macro_metric(preds, labels) 61 | recall_macro = self.recall_macro_metric(preds, labels) 62 | precision_micro = self.precision_micro_metric(preds, labels) 63 | recall_micro = self.recall_micro_metric(preds, labels) 64 | f1 = self.f1_metric(preds, labels) 65 | 66 | # Logging metrics 67 | self.log("valid/loss", outputs.loss, prog_bar=True, on_step=True) 68 | self.log("valid/acc", valid_acc, prog_bar=True, on_epoch=True) 69 | self.log("valid/precision_macro", precision_macro, prog_bar=True, on_epoch=True) 70 | self.log("valid/recall_macro", recall_macro, prog_bar=True, on_epoch=True) 71 | self.log("valid/precision_micro", precision_micro, prog_bar=True, on_epoch=True) 72 | self.log("valid/recall_micro", recall_micro, prog_bar=True, on_epoch=True) 73 | self.log("valid/f1", f1, prog_bar=True, on_epoch=True) 74 | return {"labels": labels, "logits": outputs.logits} 75 | 76 | def validation_epoch_end(self, outputs): 77 | labels = torch.cat([x["labels"] for x in outputs]) 78 | logits = torch.cat([x["logits"] for x in outputs]) 79 | preds = torch.argmax(logits, 1) 80 | 81 | ## There are multiple ways to track the metrics 82 | # 1. Confusion matrix plotting using inbuilt W&B method 83 | self.logger.experiment.log( 84 | { 85 | "conf": wandb.plot.confusion_matrix( 86 | probs=logits.numpy(), y_true=labels.numpy() 87 | ) 88 | } 89 | ) 90 | 91 | # 2. Confusion Matrix plotting using scikit-learn method 92 | # wandb.log({"cm": wandb.sklearn.plot_confusion_matrix(labels.numpy(), preds)}) 93 | 94 | # 3. Confusion Matric plotting using Seaborn 95 | # data = confusion_matrix(labels.numpy(), preds.numpy()) 96 | # df_cm = pd.DataFrame(data, columns=np.unique(labels), index=np.unique(labels)) 97 | # df_cm.index.name = "Actual" 98 | # df_cm.columns.name = "Predicted" 99 | # plt.figure(figsize=(7, 4)) 100 | # plot = sns.heatmap( 101 | # df_cm, cmap="Blues", annot=True, annot_kws={"size": 16} 102 | # ) # font size 103 | # self.logger.experiment.log({"Confusion Matrix": wandb.Image(plot)}) 104 | 105 | # self.logger.experiment.log( 106 | # {"roc": wandb.plot.roc_curve(labels.numpy(), logits.numpy())} 107 | # ) 108 | 109 | def configure_optimizers(self): 110 | return torch.optim.Adam(self.parameters(), lr=self.hparams["lr"]) 111 | -------------------------------------------------------------------------------- /week_1_wandb_logging/requirements.txt: -------------------------------------------------------------------------------- 1 | pytorch-lightning==1.2.10 2 | datasets==1.6.2 3 | transformers==4.5.1 4 | scikit-learn==0.24.2 5 | wandb 6 | torchmetrics 7 | matplotlib 8 | seaborn -------------------------------------------------------------------------------- /week_1_wandb_logging/train.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import wandb 3 | import pandas as pd 4 | import pytorch_lightning as pl 5 | from pytorch_lightning.callbacks import ModelCheckpoint 6 | from pytorch_lightning.callbacks.early_stopping import EarlyStopping 7 | from pytorch_lightning.loggers import WandbLogger 8 | 9 | from data import DataModule 10 | from model import ColaModel 11 | 12 | 13 | class SamplesVisualisationLogger(pl.Callback): 14 | def __init__(self, datamodule): 15 | super().__init__() 16 | 17 | self.datamodule = datamodule 18 | 19 | def on_validation_end(self, trainer, pl_module): 20 | val_batch = next(iter(self.datamodule.val_dataloader())) 21 | sentences = val_batch["sentence"] 22 | 23 | outputs = pl_module(val_batch["input_ids"], val_batch["attention_mask"]) 24 | preds = torch.argmax(outputs.logits, 1) 25 | labels = val_batch["label"] 26 | 27 | df = pd.DataFrame( 28 | {"Sentence": sentences, "Label": labels.numpy(), "Predicted": preds.numpy()} 29 | ) 30 | 31 | wrong_df = df[df["Label"] != df["Predicted"]] 32 | trainer.logger.experiment.log( 33 | { 34 | "examples": wandb.Table(dataframe=wrong_df, allow_mixed_types=True), 35 | "global_step": trainer.global_step, 36 | } 37 | ) 38 | 39 | 40 | def main(): 41 | cola_data = DataModule() 42 | cola_model = ColaModel() 43 | 44 | checkpoint_callback = ModelCheckpoint( 45 | dirpath="./models", 46 | filename="best-checkpoint.ckpt", 47 | monitor="valid/loss", 48 | mode="min", 49 | ) 50 | 51 | early_stopping_callback = EarlyStopping( 52 | monitor="valid/loss", patience=3, verbose=True, mode="min" 53 | ) 54 | 55 | wandb_logger = WandbLogger(project="MLOps Basics", entity="raviraja") 56 | trainer = pl.Trainer( 57 | max_epochs=1, 58 | logger=wandb_logger, 59 | callbacks=[checkpoint_callback, SamplesVisualisationLogger(cola_data), early_stopping_callback], 60 | log_every_n_steps=10, 61 | deterministic=True, 62 | # limit_train_batches=0.25, 63 | # limit_val_batches=0.25 64 | ) 65 | trainer.fit(cola_model, cola_data) 66 | 67 | 68 | if __name__ == "__main__": 69 | main() 70 | -------------------------------------------------------------------------------- /week_2_hydra_config/README.md: -------------------------------------------------------------------------------- 1 | 2 | **Note: The purpose of the project to explore the libraries and learn how to use them. Not to build a SOTA model.** 3 | 4 | ## Requirements: 5 | 6 | This project uses Python 3.8 7 | 8 | Create a virtual env with the following command: 9 | 10 | ``` 11 | conda create --name project-setup python=3.8 12 | conda activate project-setup 13 | ``` 14 | 15 | Install the requirements: 16 | 17 | ``` 18 | pip install -r requirements.txt 19 | ``` 20 | 21 | ## Running 22 | 23 | ### Training 24 | 25 | After installing the requirements, in order to train the model simply run: 26 | 27 | ``` 28 | python train.py 29 | ``` 30 | 31 | ### Monitoring 32 | 33 | Once the training is completed in the end of the logs you will see something like: 34 | 35 | ``` 36 | wandb: Synced 5 W&B file(s), 4 media file(s), 3 artifact file(s) and 0 other file(s) 37 | wandb: 38 | wandb: Synced proud-mountain-77: https://wandb.ai/raviraja/MLOps%20Basics/runs/3vp1twdc 39 | ``` 40 | 41 | Follow the link to see the wandb dashboard which contains all the plots. 42 | 43 | ### Inference 44 | 45 | After training, update the model checkpoint path in the code and run 46 | 47 | ``` 48 | python inference.py 49 | ``` 50 | 51 | ### Running notebooks 52 | 53 | I am using [Jupyter lab](https://jupyter.org/install) to run the notebooks. 54 | 55 | Since I am using a virtualenv, when I run the command `jupyter lab` it might or might not use the virtualenv. 56 | 57 | To make sure to use the virutalenv, run the following commands before running `jupyter lab` 58 | 59 | ``` 60 | conda install ipykernel 61 | python -m ipykernel install --user --name project-setup 62 | pip install ipywidgets 63 | ``` -------------------------------------------------------------------------------- /week_2_hydra_config/configs/config.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - model: default 3 | - processing: default 4 | - training: default 5 | - override hydra/job_logging: colorlog 6 | - override hydra/hydra_logging: colorlog -------------------------------------------------------------------------------- /week_2_hydra_config/configs/model/default.yaml: -------------------------------------------------------------------------------- 1 | name: google/bert_uncased_L-2_H-128_A-2 # model used for training the classifier 2 | tokenizer: google/bert_uncased_L-2_H-128_A-2 # tokenizer used for processing the data -------------------------------------------------------------------------------- /week_2_hydra_config/configs/processing/default.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 64 2 | max_length: 128 -------------------------------------------------------------------------------- /week_2_hydra_config/configs/training/default.yaml: -------------------------------------------------------------------------------- 1 | max_epochs: 1 2 | log_every_n_steps: 10 3 | deterministic: true 4 | limit_train_batches: 0.25 5 | limit_val_batches: ${training.limit_train_batches} -------------------------------------------------------------------------------- /week_2_hydra_config/data.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import datasets 3 | import pytorch_lightning as pl 4 | 5 | from datasets import load_dataset 6 | from transformers import AutoTokenizer 7 | 8 | 9 | class DataModule(pl.LightningDataModule): 10 | def __init__( 11 | self, 12 | model_name="google/bert_uncased_L-2_H-128_A-2", 13 | batch_size=64, 14 | max_length=128, 15 | ): 16 | super().__init__() 17 | 18 | self.batch_size = batch_size 19 | self.max_length = max_length 20 | self.tokenizer = AutoTokenizer.from_pretrained(model_name) 21 | 22 | def prepare_data(self): 23 | cola_dataset = load_dataset("glue", "cola") 24 | self.train_data = cola_dataset["train"] 25 | self.val_data = cola_dataset["validation"] 26 | 27 | def tokenize_data(self, example): 28 | return self.tokenizer( 29 | example["sentence"], 30 | truncation=True, 31 | padding="max_length", 32 | max_length=self.max_length, 33 | ) 34 | 35 | def setup(self, stage=None): 36 | # we set up only relevant datasets when stage is specified 37 | if stage == "fit" or stage is None: 38 | self.train_data = self.train_data.map(self.tokenize_data, batched=True) 39 | self.train_data.set_format( 40 | type="torch", columns=["input_ids", "attention_mask", "label"] 41 | ) 42 | 43 | self.val_data = self.val_data.map(self.tokenize_data, batched=True) 44 | self.val_data.set_format( 45 | type="torch", 46 | columns=["input_ids", "attention_mask", "label"], 47 | output_all_columns=True, 48 | ) 49 | 50 | def train_dataloader(self): 51 | return torch.utils.data.DataLoader( 52 | self.train_data, batch_size=self.batch_size, shuffle=True 53 | ) 54 | 55 | def val_dataloader(self): 56 | return torch.utils.data.DataLoader( 57 | self.val_data, batch_size=self.batch_size, shuffle=False 58 | ) 59 | 60 | 61 | if __name__ == "__main__": 62 | data_model = DataModule() 63 | data_model.prepare_data() 64 | data_model.setup() 65 | print(next(iter(data_model.train_dataloader()))["input_ids"].shape) 66 | -------------------------------------------------------------------------------- /week_2_hydra_config/inference.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from model import ColaModel 3 | from data import DataModule 4 | 5 | 6 | class ColaPredictor: 7 | def __init__(self, model_path): 8 | self.model_path = model_path 9 | self.model = ColaModel.load_from_checkpoint(model_path) 10 | self.model.eval() 11 | self.model.freeze() 12 | self.processor = DataModule() 13 | self.softmax = torch.nn.Softmax(dim=0) 14 | self.lables = ["unacceptable", "acceptable"] 15 | 16 | def predict(self, text): 17 | inference_sample = {"sentence": text} 18 | processed = self.processor.tokenize_data(inference_sample) 19 | logits = self.model( 20 | torch.tensor([processed["input_ids"]]), 21 | torch.tensor([processed["attention_mask"]]), 22 | ) 23 | scores = self.softmax(logits[0]).tolist()[0] 24 | predictions = [] 25 | for score, label in zip(scores, self.lables): 26 | predictions.append({"label": label, "score": score}) 27 | return predictions 28 | 29 | 30 | if __name__ == "__main__": 31 | sentence = "The boy is sitting on a bench" 32 | predictor = ColaPredictor("./models/best-checkpoint.ckpt") 33 | print(predictor.predict(sentence)) 34 | -------------------------------------------------------------------------------- /week_2_hydra_config/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import wandb 3 | import hydra 4 | import numpy as np 5 | import pandas as pd 6 | import torchmetrics 7 | import pytorch_lightning as pl 8 | from transformers import AutoModelForSequenceClassification 9 | from omegaconf import OmegaConf, DictConfig 10 | from sklearn.metrics import confusion_matrix 11 | import matplotlib.pyplot as plt 12 | import seaborn as sns 13 | 14 | 15 | class ColaModel(pl.LightningModule): 16 | def __init__(self, model_name="google/bert_uncased_L-2_H-128_A-2", lr=3e-5): 17 | super(ColaModel, self).__init__() 18 | self.save_hyperparameters() 19 | 20 | self.bert = AutoModelForSequenceClassification.from_pretrained( 21 | model_name, num_labels=2 22 | ) 23 | self.num_classes = 2 24 | self.train_accuracy_metric = torchmetrics.Accuracy() 25 | self.val_accuracy_metric = torchmetrics.Accuracy() 26 | self.f1_metric = torchmetrics.F1(num_classes=self.num_classes) 27 | self.precision_macro_metric = torchmetrics.Precision( 28 | average="macro", num_classes=self.num_classes 29 | ) 30 | self.recall_macro_metric = torchmetrics.Recall( 31 | average="macro", num_classes=self.num_classes 32 | ) 33 | self.precision_micro_metric = torchmetrics.Precision(average="micro") 34 | self.recall_micro_metric = torchmetrics.Recall(average="micro") 35 | 36 | def forward(self, input_ids, attention_mask, labels=None): 37 | outputs = self.bert( 38 | input_ids=input_ids, attention_mask=attention_mask, labels=labels 39 | ) 40 | return outputs 41 | 42 | def training_step(self, batch, batch_idx): 43 | outputs = self.forward( 44 | batch["input_ids"], batch["attention_mask"], labels=batch["label"] 45 | ) 46 | # loss = F.cross_entropy(logits, batch["label"]) 47 | preds = torch.argmax(outputs.logits, 1) 48 | train_acc = self.train_accuracy_metric(preds, batch["label"]) 49 | self.log("train/loss", outputs.loss, prog_bar=True, on_epoch=True) 50 | self.log("train/acc", train_acc, prog_bar=True, on_epoch=True) 51 | return outputs.loss 52 | 53 | def validation_step(self, batch, batch_idx): 54 | labels = batch["label"] 55 | outputs = self.forward( 56 | batch["input_ids"], batch["attention_mask"], labels=batch["label"] 57 | ) 58 | preds = torch.argmax(outputs.logits, 1) 59 | 60 | # Metrics 61 | valid_acc = self.val_accuracy_metric(preds, labels) 62 | precision_macro = self.precision_macro_metric(preds, labels) 63 | recall_macro = self.recall_macro_metric(preds, labels) 64 | precision_micro = self.precision_micro_metric(preds, labels) 65 | recall_micro = self.recall_micro_metric(preds, labels) 66 | f1 = self.f1_metric(preds, labels) 67 | 68 | # Logging metrics 69 | self.log("valid/loss", outputs.loss, prog_bar=True, on_step=True) 70 | self.log("valid/acc", valid_acc, prog_bar=True, on_epoch=True) 71 | self.log("valid/precision_macro", precision_macro, prog_bar=True, on_epoch=True) 72 | self.log("valid/recall_macro", recall_macro, prog_bar=True, on_epoch=True) 73 | self.log("valid/precision_micro", precision_micro, prog_bar=True, on_epoch=True) 74 | self.log("valid/recall_micro", recall_micro, prog_bar=True, on_epoch=True) 75 | self.log("valid/f1", f1, prog_bar=True, on_epoch=True) 76 | return {"labels": labels, "logits": outputs.logits} 77 | 78 | def validation_epoch_end(self, outputs): 79 | labels = torch.cat([x["labels"] for x in outputs]) 80 | logits = torch.cat([x["logits"] for x in outputs]) 81 | preds = torch.argmax(logits, 1) 82 | 83 | ## There are multiple ways to track the metrics 84 | # 1. Confusion matrix plotting using inbuilt W&B method 85 | self.logger.experiment.log( 86 | { 87 | "conf": wandb.plot.confusion_matrix( 88 | probs=logits.numpy(), y_true=labels.numpy() 89 | ) 90 | } 91 | ) 92 | 93 | # 2. Confusion Matrix plotting using scikit-learn method 94 | # wandb.log({"cm": wandb.sklearn.plot_confusion_matrix(labels.numpy(), preds)}) 95 | 96 | # 3. Confusion Matric plotting using Seaborn 97 | # data = confusion_matrix(labels.numpy(), preds.numpy()) 98 | # df_cm = pd.DataFrame(data, columns=np.unique(labels), index=np.unique(labels)) 99 | # df_cm.index.name = "Actual" 100 | # df_cm.columns.name = "Predicted" 101 | # plt.figure(figsize=(7, 4)) 102 | # plot = sns.heatmap( 103 | # df_cm, cmap="Blues", annot=True, annot_kws={"size": 16} 104 | # ) # font size 105 | # self.logger.experiment.log({"Confusion Matrix": wandb.Image(plot)}) 106 | 107 | # self.logger.experiment.log( 108 | # {"roc": wandb.plot.roc_curve(labels.numpy(), logits.numpy())} 109 | # ) 110 | 111 | def configure_optimizers(self): 112 | return torch.optim.Adam(self.parameters(), lr=self.hparams["lr"]) 113 | -------------------------------------------------------------------------------- /week_2_hydra_config/requirements.txt: -------------------------------------------------------------------------------- 1 | pytorch-lightning==1.2.10 2 | datasets==1.6.2 3 | transformers==4.5.1 4 | scikit-learn==0.24.2 5 | wandb 6 | torchmetrics 7 | matplotlib 8 | seaborn 9 | hydra-core 10 | omegaconf 11 | hydra_colorlog -------------------------------------------------------------------------------- /week_2_hydra_config/train.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import hydra 3 | import wandb 4 | import logging 5 | 6 | import pandas as pd 7 | import pytorch_lightning as pl 8 | from omegaconf.omegaconf import OmegaConf 9 | from pytorch_lightning.callbacks import ModelCheckpoint 10 | from pytorch_lightning.callbacks.early_stopping import EarlyStopping 11 | from pytorch_lightning.loggers import WandbLogger 12 | 13 | from data import DataModule 14 | from model import ColaModel 15 | 16 | logger = logging.getLogger(__name__) 17 | 18 | 19 | class SamplesVisualisationLogger(pl.Callback): 20 | def __init__(self, datamodule): 21 | super().__init__() 22 | 23 | self.datamodule = datamodule 24 | 25 | def on_validation_end(self, trainer, pl_module): 26 | val_batch = next(iter(self.datamodule.val_dataloader())) 27 | sentences = val_batch["sentence"] 28 | 29 | outputs = pl_module(val_batch["input_ids"], val_batch["attention_mask"]) 30 | preds = torch.argmax(outputs.logits, 1) 31 | labels = val_batch["label"] 32 | 33 | df = pd.DataFrame( 34 | {"Sentence": sentences, "Label": labels.numpy(), "Predicted": preds.numpy()} 35 | ) 36 | 37 | wrong_df = df[df["Label"] != df["Predicted"]] 38 | trainer.logger.experiment.log( 39 | { 40 | "examples": wandb.Table(dataframe=wrong_df, allow_mixed_types=True), 41 | "global_step": trainer.global_step, 42 | } 43 | ) 44 | 45 | 46 | @hydra.main(config_path="./configs", config_name="config") 47 | def main(cfg): 48 | logger.info(OmegaConf.to_yaml(cfg, resolve=True)) 49 | logger.info(f"Using the model: {cfg.model.name}") 50 | logger.info(f"Using the tokenizer: {cfg.model.tokenizer}") 51 | cola_data = DataModule( 52 | cfg.model.tokenizer, cfg.processing.batch_size, cfg.processing.max_length 53 | ) 54 | cola_model = ColaModel(cfg.model.name) 55 | 56 | checkpoint_callback = ModelCheckpoint( 57 | dirpath="./models", 58 | filename="best-checkpoint", 59 | monitor="valid/loss", 60 | mode="min", 61 | ) 62 | 63 | early_stopping_callback = EarlyStopping( 64 | monitor="valid/loss", patience=3, verbose=True, mode="min" 65 | ) 66 | 67 | wandb_logger = WandbLogger(project="MLOps Basics", entity="raviraja") 68 | trainer = pl.Trainer( 69 | max_epochs=cfg.training.max_epochs, 70 | logger=wandb_logger, 71 | callbacks=[checkpoint_callback, SamplesVisualisationLogger(cola_data), early_stopping_callback], 72 | log_every_n_steps=cfg.training.log_every_n_steps, 73 | deterministic=cfg.training.deterministic, 74 | limit_train_batches=cfg.training.limit_train_batches, 75 | limit_val_batches=cfg.training.limit_val_batches, 76 | ) 77 | trainer.fit(cola_model, cola_data) 78 | wandb.finish() 79 | 80 | 81 | if __name__ == "__main__": 82 | main() 83 | -------------------------------------------------------------------------------- /week_3_dvc/README.md: -------------------------------------------------------------------------------- 1 | 2 | **Note: The purpose of the project to explore the libraries and learn how to use them. Not to build a SOTA model.** 3 | 4 | ## Requirements: 5 | 6 | This project uses Python 3.8 7 | 8 | Create a virtual env with the following command: 9 | 10 | ``` 11 | conda create --name project-setup python=3.8 12 | conda activate project-setup 13 | ``` 14 | 15 | Install the requirements: 16 | 17 | ``` 18 | pip install -r requirements.txt 19 | ``` 20 | 21 | ## Running 22 | 23 | ### Training 24 | 25 | After installing the requirements, in order to train the model simply run: 26 | 27 | ``` 28 | python train.py 29 | ``` 30 | 31 | ### Monitoring 32 | 33 | Once the training is completed in the end of the logs you will see something like: 34 | 35 | ``` 36 | wandb: Synced 5 W&B file(s), 4 media file(s), 3 artifact file(s) and 0 other file(s) 37 | wandb: 38 | wandb: Synced proud-mountain-77: https://wandb.ai/raviraja/MLOps%20Basics/runs/3vp1twdc 39 | ``` 40 | 41 | Follow the link to see the wandb dashboard which contains all the plots. 42 | 43 | ### Inference 44 | 45 | After training, update the model checkpoint path in the code and run 46 | 47 | ``` 48 | python inference.py 49 | ``` 50 | 51 | ### Versioning data 52 | 53 | Refer to the blog: [DVC Configuration](https://www.ravirajag.dev/blog/mlops-dvc) 54 | 55 | ### Running notebooks 56 | 57 | I am using [Jupyter lab](https://jupyter.org/install) to run the notebooks. 58 | 59 | Since I am using a virtualenv, when I run the command `jupyter lab` it might or might not use the virtualenv. 60 | 61 | To make sure to use the virutalenv, run the following commands before running `jupyter lab` 62 | 63 | ``` 64 | conda install ipykernel 65 | python -m ipykernel install --user --name project-setup 66 | pip install ipywidgets 67 | ``` -------------------------------------------------------------------------------- /week_3_dvc/configs/config.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - model: default 3 | - processing: default 4 | - training: default 5 | - override hydra/job_logging: colorlog 6 | - override hydra/hydra_logging: colorlog -------------------------------------------------------------------------------- /week_3_dvc/configs/model/default.yaml: -------------------------------------------------------------------------------- 1 | name: google/bert_uncased_L-2_H-128_A-2 # model used for training the classifier 2 | tokenizer: google/bert_uncased_L-2_H-128_A-2 # tokenizer used for processing the data -------------------------------------------------------------------------------- /week_3_dvc/configs/processing/default.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 64 2 | max_length: 128 -------------------------------------------------------------------------------- /week_3_dvc/configs/training/default.yaml: -------------------------------------------------------------------------------- 1 | max_epochs: 1 2 | log_every_n_steps: 10 3 | deterministic: true 4 | limit_train_batches: 0.25 5 | limit_val_batches: ${training.limit_train_batches} -------------------------------------------------------------------------------- /week_3_dvc/data.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import pytorch_lightning as pl 3 | 4 | from datasets import load_dataset 5 | from transformers import AutoTokenizer 6 | 7 | 8 | class DataModule(pl.LightningDataModule): 9 | def __init__( 10 | self, 11 | model_name="google/bert_uncased_L-2_H-128_A-2", 12 | batch_size=64, 13 | max_length=128, 14 | ): 15 | super().__init__() 16 | 17 | self.batch_size = batch_size 18 | self.max_length = max_length 19 | self.tokenizer = AutoTokenizer.from_pretrained(model_name) 20 | 21 | def prepare_data(self): 22 | cola_dataset = load_dataset("glue", "cola") 23 | self.train_data = cola_dataset["train"] 24 | self.val_data = cola_dataset["validation"] 25 | 26 | def tokenize_data(self, example): 27 | return self.tokenizer( 28 | example["sentence"], 29 | truncation=True, 30 | padding="max_length", 31 | max_length=self.max_length, 32 | ) 33 | 34 | def setup(self, stage=None): 35 | # we set up only relevant datasets when stage is specified 36 | if stage == "fit" or stage is None: 37 | self.train_data = self.train_data.map(self.tokenize_data, batched=True) 38 | self.train_data.set_format( 39 | type="torch", columns=["input_ids", "attention_mask", "label"] 40 | ) 41 | 42 | self.val_data = self.val_data.map(self.tokenize_data, batched=True) 43 | self.val_data.set_format( 44 | type="torch", 45 | columns=["input_ids", "attention_mask", "label"], 46 | output_all_columns=True, 47 | ) 48 | 49 | def train_dataloader(self): 50 | return torch.utils.data.DataLoader( 51 | self.train_data, batch_size=self.batch_size, shuffle=True 52 | ) 53 | 54 | def val_dataloader(self): 55 | return torch.utils.data.DataLoader( 56 | self.val_data, batch_size=self.batch_size, shuffle=False 57 | ) 58 | 59 | 60 | if __name__ == "__main__": 61 | data_model = DataModule() 62 | data_model.prepare_data() 63 | data_model.setup() 64 | print(next(iter(data_model.train_dataloader()))["input_ids"].shape) 65 | -------------------------------------------------------------------------------- /week_3_dvc/dvcfiles/trained_model.dvc: -------------------------------------------------------------------------------- 1 | wdir: ../models 2 | outs: 3 | - md5: c2f5c0a1954209865b9be1945f33ed6e 4 | size: 17567709 5 | path: best-checkpoint.ckpt 6 | -------------------------------------------------------------------------------- /week_3_dvc/inference.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from model import ColaModel 3 | from data import DataModule 4 | 5 | 6 | class ColaPredictor: 7 | def __init__(self, model_path): 8 | self.model_path = model_path 9 | self.model = ColaModel.load_from_checkpoint(model_path) 10 | self.model.eval() 11 | self.model.freeze() 12 | self.processor = DataModule() 13 | self.softmax = torch.nn.Softmax(dim=0) 14 | self.lables = ["unacceptable", "acceptable"] 15 | 16 | def predict(self, text): 17 | inference_sample = {"sentence": text} 18 | processed = self.processor.tokenize_data(inference_sample) 19 | logits = self.model( 20 | torch.tensor([processed["input_ids"]]), 21 | torch.tensor([processed["attention_mask"]]), 22 | ) 23 | scores = self.softmax(logits[0]).tolist()[0] 24 | predictions = [] 25 | for score, label in zip(scores, self.lables): 26 | predictions.append({"label": label, "score": score}) 27 | return predictions 28 | 29 | 30 | if __name__ == "__main__": 31 | sentence = "The boy is sitting on a bench" 32 | predictor = ColaPredictor("./models/best-checkpoint.ckpt") 33 | print(predictor.predict(sentence)) 34 | -------------------------------------------------------------------------------- /week_3_dvc/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import wandb 3 | import hydra 4 | import numpy as np 5 | import pandas as pd 6 | import torchmetrics 7 | import pytorch_lightning as pl 8 | from transformers import AutoModelForSequenceClassification 9 | from omegaconf import OmegaConf, DictConfig 10 | from sklearn.metrics import confusion_matrix 11 | import matplotlib.pyplot as plt 12 | import seaborn as sns 13 | 14 | 15 | class ColaModel(pl.LightningModule): 16 | def __init__(self, model_name="google/bert_uncased_L-2_H-128_A-2", lr=3e-5): 17 | super(ColaModel, self).__init__() 18 | self.save_hyperparameters() 19 | 20 | self.bert = AutoModelForSequenceClassification.from_pretrained( 21 | model_name, num_labels=2 22 | ) 23 | self.num_classes = 2 24 | self.train_accuracy_metric = torchmetrics.Accuracy() 25 | self.val_accuracy_metric = torchmetrics.Accuracy() 26 | self.f1_metric = torchmetrics.F1(num_classes=self.num_classes) 27 | self.precision_macro_metric = torchmetrics.Precision( 28 | average="macro", num_classes=self.num_classes 29 | ) 30 | self.recall_macro_metric = torchmetrics.Recall( 31 | average="macro", num_classes=self.num_classes 32 | ) 33 | self.precision_micro_metric = torchmetrics.Precision(average="micro") 34 | self.recall_micro_metric = torchmetrics.Recall(average="micro") 35 | 36 | def forward(self, input_ids, attention_mask, labels=None): 37 | outputs = self.bert( 38 | input_ids=input_ids, attention_mask=attention_mask, labels=labels 39 | ) 40 | return outputs 41 | 42 | def training_step(self, batch, batch_idx): 43 | outputs = self.forward( 44 | batch["input_ids"], batch["attention_mask"], labels=batch["label"] 45 | ) 46 | # loss = F.cross_entropy(logits, batch["label"]) 47 | preds = torch.argmax(outputs.logits, 1) 48 | train_acc = self.train_accuracy_metric(preds, batch["label"]) 49 | self.log("train/loss", outputs.loss, prog_bar=True, on_epoch=True) 50 | self.log("train/acc", train_acc, prog_bar=True, on_epoch=True) 51 | return outputs.loss 52 | 53 | def validation_step(self, batch, batch_idx): 54 | labels = batch["label"] 55 | outputs = self.forward( 56 | batch["input_ids"], batch["attention_mask"], labels=batch["label"] 57 | ) 58 | preds = torch.argmax(outputs.logits, 1) 59 | 60 | # Metrics 61 | valid_acc = self.val_accuracy_metric(preds, labels) 62 | precision_macro = self.precision_macro_metric(preds, labels) 63 | recall_macro = self.recall_macro_metric(preds, labels) 64 | precision_micro = self.precision_micro_metric(preds, labels) 65 | recall_micro = self.recall_micro_metric(preds, labels) 66 | f1 = self.f1_metric(preds, labels) 67 | 68 | # Logging metrics 69 | self.log("valid/loss", outputs.loss, prog_bar=True, on_step=True) 70 | self.log("valid/acc", valid_acc, prog_bar=True, on_epoch=True) 71 | self.log("valid/precision_macro", precision_macro, prog_bar=True, on_epoch=True) 72 | self.log("valid/recall_macro", recall_macro, prog_bar=True, on_epoch=True) 73 | self.log("valid/precision_micro", precision_micro, prog_bar=True, on_epoch=True) 74 | self.log("valid/recall_micro", recall_micro, prog_bar=True, on_epoch=True) 75 | self.log("valid/f1", f1, prog_bar=True, on_epoch=True) 76 | return {"labels": labels, "logits": outputs.logits} 77 | 78 | def validation_epoch_end(self, outputs): 79 | labels = torch.cat([x["labels"] for x in outputs]) 80 | logits = torch.cat([x["logits"] for x in outputs]) 81 | preds = torch.argmax(logits, 1) 82 | 83 | ## There are multiple ways to track the metrics 84 | # 1. Confusion matrix plotting using inbuilt W&B method 85 | self.logger.experiment.log( 86 | { 87 | "conf": wandb.plot.confusion_matrix( 88 | probs=logits.numpy(), y_true=labels.numpy() 89 | ) 90 | } 91 | ) 92 | 93 | # 2. Confusion Matrix plotting using scikit-learn method 94 | # wandb.log({"cm": wandb.sklearn.plot_confusion_matrix(labels.numpy(), preds)}) 95 | 96 | # 3. Confusion Matric plotting using Seaborn 97 | # data = confusion_matrix(labels.numpy(), preds.numpy()) 98 | # df_cm = pd.DataFrame(data, columns=np.unique(labels), index=np.unique(labels)) 99 | # df_cm.index.name = "Actual" 100 | # df_cm.columns.name = "Predicted" 101 | # plt.figure(figsize=(7, 4)) 102 | # plot = sns.heatmap( 103 | # df_cm, cmap="Blues", annot=True, annot_kws={"size": 16} 104 | # ) # font size 105 | # self.logger.experiment.log({"Confusion Matrix": wandb.Image(plot)}) 106 | 107 | # self.logger.experiment.log( 108 | # {"roc": wandb.plot.roc_curve(labels.numpy(), logits.numpy())} 109 | # ) 110 | 111 | def configure_optimizers(self): 112 | return torch.optim.Adam(self.parameters(), lr=self.hparams["lr"]) 113 | -------------------------------------------------------------------------------- /week_3_dvc/models/.gitignore: -------------------------------------------------------------------------------- 1 | /best-checkpoint.ckpt 2 | -------------------------------------------------------------------------------- /week_3_dvc/requirements.txt: -------------------------------------------------------------------------------- 1 | pytorch-lightning==1.2.10 2 | datasets==1.6.2 3 | transformers==4.5.1 4 | scikit-learn==0.24.2 5 | wandb 6 | torchmetrics 7 | matplotlib 8 | seaborn 9 | hydra-core 10 | omegaconf 11 | hydra_colorlog -------------------------------------------------------------------------------- /week_3_dvc/train.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import hydra 3 | import wandb 4 | import logging 5 | 6 | import pandas as pd 7 | import pytorch_lightning as pl 8 | from omegaconf.omegaconf import OmegaConf 9 | from pytorch_lightning.callbacks import ModelCheckpoint 10 | from pytorch_lightning.callbacks.early_stopping import EarlyStopping 11 | from pytorch_lightning.loggers import WandbLogger 12 | 13 | from data import DataModule 14 | from model import ColaModel 15 | 16 | logger = logging.getLogger(__name__) 17 | 18 | 19 | class SamplesVisualisationLogger(pl.Callback): 20 | def __init__(self, datamodule): 21 | super().__init__() 22 | 23 | self.datamodule = datamodule 24 | 25 | def on_validation_end(self, trainer, pl_module): 26 | val_batch = next(iter(self.datamodule.val_dataloader())) 27 | sentences = val_batch["sentence"] 28 | 29 | outputs = pl_module(val_batch["input_ids"], val_batch["attention_mask"]) 30 | preds = torch.argmax(outputs.logits, 1) 31 | labels = val_batch["label"] 32 | 33 | df = pd.DataFrame( 34 | {"Sentence": sentences, "Label": labels.numpy(), "Predicted": preds.numpy()} 35 | ) 36 | 37 | wrong_df = df[df["Label"] != df["Predicted"]] 38 | trainer.logger.experiment.log( 39 | { 40 | "examples": wandb.Table(dataframe=wrong_df, allow_mixed_types=True), 41 | "global_step": trainer.global_step, 42 | } 43 | ) 44 | 45 | 46 | @hydra.main(config_path="./configs", config_name="config") 47 | def main(cfg): 48 | logger.info(OmegaConf.to_yaml(cfg, resolve=True)) 49 | logger.info(f"Using the model: {cfg.model.name}") 50 | logger.info(f"Using the tokenizer: {cfg.model.tokenizer}") 51 | cola_data = DataModule( 52 | cfg.model.tokenizer, cfg.processing.batch_size, cfg.processing.max_length 53 | ) 54 | cola_model = ColaModel(cfg.model.name) 55 | 56 | root_dir = hydra.utils.get_original_cwd() 57 | checkpoint_callback = ModelCheckpoint( 58 | dirpath=f"{root_dir}/models", 59 | filename="best-checkpoint", 60 | monitor="valid/loss", 61 | mode="min", 62 | ) 63 | 64 | early_stopping_callback = EarlyStopping( 65 | monitor="valid/loss", patience=3, verbose=True, mode="min" 66 | ) 67 | 68 | wandb_logger = WandbLogger(project="MLOps Basics", entity="raviraja") 69 | trainer = pl.Trainer( 70 | max_epochs=cfg.training.max_epochs, 71 | logger=wandb_logger, 72 | callbacks=[checkpoint_callback, SamplesVisualisationLogger(cola_data), early_stopping_callback], 73 | log_every_n_steps=cfg.training.log_every_n_steps, 74 | deterministic=cfg.training.deterministic, 75 | # limit_train_batches=cfg.training.limit_train_batches, 76 | # limit_val_batches=cfg.training.limit_val_batches, 77 | ) 78 | trainer.fit(cola_model, cola_data) 79 | wandb.finish() 80 | 81 | 82 | if __name__ == "__main__": 83 | main() 84 | -------------------------------------------------------------------------------- /week_4_onnx/README.md: -------------------------------------------------------------------------------- 1 | 2 | **Note: The purpose of the project to explore the libraries and learn how to use them. Not to build a SOTA model.** 3 | 4 | ## Requirements: 5 | 6 | This project uses Python 3.8 7 | 8 | Create a virtual env with the following command: 9 | 10 | ``` 11 | conda create --name project-setup python=3.8 12 | conda activate project-setup 13 | ``` 14 | 15 | Install the requirements: 16 | 17 | ``` 18 | pip install -r requirements.txt 19 | ``` 20 | 21 | ## Running 22 | 23 | ### Training 24 | 25 | After installing the requirements, in order to train the model simply run: 26 | 27 | ``` 28 | python train.py 29 | ``` 30 | 31 | ### Monitoring 32 | 33 | Once the training is completed in the end of the logs you will see something like: 34 | 35 | ``` 36 | wandb: Synced 5 W&B file(s), 4 media file(s), 3 artifact file(s) and 0 other file(s) 37 | wandb: 38 | wandb: Synced proud-mountain-77: https://wandb.ai/raviraja/MLOps%20Basics/runs/3vp1twdc 39 | ``` 40 | 41 | Follow the link to see the wandb dashboard which contains all the plots. 42 | 43 | ### Versioning data 44 | 45 | Refer to the blog: [DVC Configuration](https://www.ravirajag.dev/blog/mlops-dvc) 46 | 47 | ### Exporting model to ONNX 48 | 49 | Once the model is trained, convert the model using the following command: 50 | 51 | ``` 52 | python convert_model_to_onnx.py 53 | ``` 54 | 55 | ### Inference 56 | 57 | #### Inference using standard pytorch 58 | 59 | ``` 60 | python inference.py 61 | ``` 62 | 63 | #### Inference using ONNX Runtime 64 | 65 | ``` 66 | python inference_onnx.py 67 | ``` 68 | 69 | 70 | ### Running notebooks 71 | 72 | I am using [Jupyter lab](https://jupyter.org/install) to run the notebooks. 73 | 74 | Since I am using a virtualenv, when I run the command `jupyter lab` it might or might not use the virtualenv. 75 | 76 | To make sure to use the virutalenv, run the following commands before running `jupyter lab` 77 | 78 | ``` 79 | conda install ipykernel 80 | python -m ipykernel install --user --name project-setup 81 | pip install ipywidgets 82 | ``` -------------------------------------------------------------------------------- /week_4_onnx/configs/config.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - model: default 3 | - processing: default 4 | - training: default 5 | - override hydra/job_logging: colorlog 6 | - override hydra/hydra_logging: colorlog -------------------------------------------------------------------------------- /week_4_onnx/configs/model/default.yaml: -------------------------------------------------------------------------------- 1 | name: google/bert_uncased_L-2_H-128_A-2 # model used for training the classifier 2 | tokenizer: google/bert_uncased_L-2_H-128_A-2 # tokenizer used for processing the data -------------------------------------------------------------------------------- /week_4_onnx/configs/processing/default.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 64 2 | max_length: 128 -------------------------------------------------------------------------------- /week_4_onnx/configs/training/default.yaml: -------------------------------------------------------------------------------- 1 | max_epochs: 1 2 | log_every_n_steps: 10 3 | deterministic: true 4 | limit_train_batches: 0.25 5 | limit_val_batches: ${training.limit_train_batches} -------------------------------------------------------------------------------- /week_4_onnx/convert_model_to_onnx.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import hydra 3 | import logging 4 | 5 | from omegaconf.omegaconf import OmegaConf 6 | 7 | from model import ColaModel 8 | from data import DataModule 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | @hydra.main(config_path="./configs", config_name="config") 14 | def convert_model(cfg): 15 | root_dir = hydra.utils.get_original_cwd() 16 | model_path = f"{root_dir}/models/best-checkpoint.ckpt" 17 | logger.info(f"Loading pre-trained model from: {model_path}") 18 | cola_model = ColaModel.load_from_checkpoint(model_path) 19 | 20 | data_model = DataModule( 21 | cfg.model.tokenizer, cfg.processing.batch_size, cfg.processing.max_length 22 | ) 23 | data_model.prepare_data() 24 | data_model.setup() 25 | input_batch = next(iter(data_model.train_dataloader())) 26 | input_sample = { 27 | "input_ids": input_batch["input_ids"][0].unsqueeze(0), 28 | "attention_mask": input_batch["attention_mask"][0].unsqueeze(0), 29 | } 30 | 31 | # Export the model 32 | logger.info(f"Converting the model into ONNX format") 33 | torch.onnx.export( 34 | cola_model, # model being run 35 | ( 36 | input_sample["input_ids"], 37 | input_sample["attention_mask"], 38 | ), # model input (or a tuple for multiple inputs) 39 | f"{root_dir}/models/model.onnx", # where to save the model (can be a file or file-like object) 40 | export_params=True, 41 | opset_version=10, 42 | input_names=["input_ids", "attention_mask"], # the model's input names 43 | output_names=["output"], # the model's output names 44 | dynamic_axes={ 45 | "input_ids": {0: "batch_size"}, # variable length axes 46 | "attention_mask": {0: "batch_size"}, 47 | "output": {0: "batch_size"}, 48 | }, 49 | ) 50 | 51 | logger.info( 52 | f"Model converted successfully. ONNX format model is at: {root_dir}/models/model.onnx" 53 | ) 54 | 55 | 56 | if __name__ == "__main__": 57 | convert_model() 58 | -------------------------------------------------------------------------------- /week_4_onnx/data.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import datasets 3 | import pytorch_lightning as pl 4 | 5 | from datasets import load_dataset 6 | from transformers import AutoTokenizer 7 | 8 | 9 | class DataModule(pl.LightningDataModule): 10 | def __init__( 11 | self, 12 | model_name="google/bert_uncased_L-2_H-128_A-2", 13 | batch_size=64, 14 | max_length=128, 15 | ): 16 | super().__init__() 17 | 18 | self.batch_size = batch_size 19 | self.max_length = max_length 20 | self.tokenizer = AutoTokenizer.from_pretrained(model_name) 21 | 22 | def prepare_data(self): 23 | cola_dataset = load_dataset("glue", "cola") 24 | self.train_data = cola_dataset["train"] 25 | self.val_data = cola_dataset["validation"] 26 | 27 | def tokenize_data(self, example): 28 | return self.tokenizer( 29 | example["sentence"], 30 | truncation=True, 31 | padding="max_length", 32 | max_length=self.max_length, 33 | ) 34 | 35 | def setup(self, stage=None): 36 | # we set up only relevant datasets when stage is specified 37 | if stage == "fit" or stage is None: 38 | self.train_data = self.train_data.map(self.tokenize_data, batched=True) 39 | self.train_data.set_format( 40 | type="torch", columns=["input_ids", "attention_mask", "label"] 41 | ) 42 | 43 | self.val_data = self.val_data.map(self.tokenize_data, batched=True) 44 | self.val_data.set_format( 45 | type="torch", 46 | columns=["input_ids", "attention_mask", "label"], 47 | output_all_columns=True, 48 | ) 49 | 50 | def train_dataloader(self): 51 | return torch.utils.data.DataLoader( 52 | self.train_data, batch_size=self.batch_size, shuffle=True 53 | ) 54 | 55 | def val_dataloader(self): 56 | return torch.utils.data.DataLoader( 57 | self.val_data, batch_size=self.batch_size, shuffle=False 58 | ) 59 | 60 | 61 | if __name__ == "__main__": 62 | data_model = DataModule() 63 | data_model.prepare_data() 64 | data_model.setup() 65 | print(next(iter(data_model.train_dataloader()))["input_ids"].shape) 66 | -------------------------------------------------------------------------------- /week_4_onnx/dvcfiles/trained_model.dvc: -------------------------------------------------------------------------------- 1 | wdir: ../models 2 | outs: 3 | - md5: c2f5c0a1954209865b9be1945f33ed6e 4 | size: 17567709 5 | path: best-checkpoint.ckpt 6 | -------------------------------------------------------------------------------- /week_4_onnx/inference.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from model import ColaModel 3 | from data import DataModule 4 | from utils import timing 5 | 6 | 7 | class ColaPredictor: 8 | def __init__(self, model_path): 9 | self.model_path = model_path 10 | self.model = ColaModel.load_from_checkpoint(model_path) 11 | self.model.eval() 12 | self.model.freeze() 13 | self.processor = DataModule() 14 | self.softmax = torch.nn.Softmax(dim=1) 15 | self.lables = ["unacceptable", "acceptable"] 16 | 17 | @timing 18 | def predict(self, text): 19 | inference_sample = {"sentence": text} 20 | processed = self.processor.tokenize_data(inference_sample) 21 | logits = self.model( 22 | torch.tensor([processed["input_ids"]]), 23 | torch.tensor([processed["attention_mask"]]), 24 | ) 25 | scores = self.softmax(logits[0]).tolist()[0] 26 | predictions = [] 27 | for score, label in zip(scores, self.lables): 28 | predictions.append({"label": label, "score": score}) 29 | return predictions 30 | 31 | 32 | if __name__ == "__main__": 33 | sentence = "The boy is sitting on a bench" 34 | predictor = ColaPredictor("./models/best-checkpoint.ckpt") 35 | print(predictor.predict(sentence)) 36 | sentences = ["The boy is sitting on a bench"] * 10 37 | for sentence in sentences: 38 | predictor.predict(sentence) 39 | -------------------------------------------------------------------------------- /week_4_onnx/inference_onnx.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import onnxruntime as ort 3 | from scipy.special import softmax 4 | 5 | from data import DataModule 6 | from utils import timing 7 | 8 | 9 | class ColaONNXPredictor: 10 | def __init__(self, model_path): 11 | self.ort_session = ort.InferenceSession(model_path) 12 | self.processor = DataModule() 13 | self.lables = ["unacceptable", "acceptable"] 14 | 15 | @timing 16 | def predict(self, text): 17 | inference_sample = {"sentence": text} 18 | processed = self.processor.tokenize_data(inference_sample) 19 | 20 | ort_inputs = { 21 | "input_ids": np.expand_dims(processed["input_ids"], axis=0), 22 | "attention_mask": np.expand_dims(processed["attention_mask"], axis=0), 23 | } 24 | ort_outs = self.ort_session.run(None, ort_inputs) 25 | scores = softmax(ort_outs[0])[0] 26 | predictions = [] 27 | for score, label in zip(scores, self.lables): 28 | predictions.append({"label": label, "score": score}) 29 | return predictions 30 | 31 | 32 | if __name__ == "__main__": 33 | sentence = "The boy is sitting on a bench" 34 | predictor = ColaONNXPredictor("./models/model.onnx") 35 | print(predictor.predict(sentence)) 36 | sentences = ["The boy is sitting on a bench"] * 10 37 | for sentence in sentences: 38 | predictor.predict(sentence) 39 | -------------------------------------------------------------------------------- /week_4_onnx/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import wandb 3 | import hydra 4 | import numpy as np 5 | import pandas as pd 6 | import torchmetrics 7 | import pytorch_lightning as pl 8 | from transformers import AutoModelForSequenceClassification 9 | from omegaconf import OmegaConf, DictConfig 10 | from sklearn.metrics import confusion_matrix 11 | import matplotlib.pyplot as plt 12 | import seaborn as sns 13 | 14 | 15 | class ColaModel(pl.LightningModule): 16 | def __init__(self, model_name="google/bert_uncased_L-2_H-128_A-2", lr=3e-5): 17 | super(ColaModel, self).__init__() 18 | self.save_hyperparameters() 19 | 20 | self.bert = AutoModelForSequenceClassification.from_pretrained( 21 | model_name, num_labels=2 22 | ) 23 | self.num_classes = 2 24 | self.train_accuracy_metric = torchmetrics.Accuracy() 25 | self.val_accuracy_metric = torchmetrics.Accuracy() 26 | self.f1_metric = torchmetrics.F1(num_classes=self.num_classes) 27 | self.precision_macro_metric = torchmetrics.Precision( 28 | average="macro", num_classes=self.num_classes 29 | ) 30 | self.recall_macro_metric = torchmetrics.Recall( 31 | average="macro", num_classes=self.num_classes 32 | ) 33 | self.precision_micro_metric = torchmetrics.Precision(average="micro") 34 | self.recall_micro_metric = torchmetrics.Recall(average="micro") 35 | 36 | def forward(self, input_ids, attention_mask, labels=None): 37 | outputs = self.bert( 38 | input_ids=input_ids, attention_mask=attention_mask, labels=labels 39 | ) 40 | return outputs 41 | 42 | def training_step(self, batch, batch_idx): 43 | outputs = self.forward( 44 | batch["input_ids"], batch["attention_mask"], labels=batch["label"] 45 | ) 46 | # loss = F.cross_entropy(logits, batch["label"]) 47 | preds = torch.argmax(outputs.logits, 1) 48 | train_acc = self.train_accuracy_metric(preds, batch["label"]) 49 | self.log("train/loss", outputs.loss, prog_bar=True, on_epoch=True) 50 | self.log("train/acc", train_acc, prog_bar=True, on_epoch=True) 51 | return outputs.loss 52 | 53 | def validation_step(self, batch, batch_idx): 54 | labels = batch["label"] 55 | outputs = self.forward( 56 | batch["input_ids"], batch["attention_mask"], labels=batch["label"] 57 | ) 58 | preds = torch.argmax(outputs.logits, 1) 59 | 60 | # Metrics 61 | valid_acc = self.val_accuracy_metric(preds, labels) 62 | precision_macro = self.precision_macro_metric(preds, labels) 63 | recall_macro = self.recall_macro_metric(preds, labels) 64 | precision_micro = self.precision_micro_metric(preds, labels) 65 | recall_micro = self.recall_micro_metric(preds, labels) 66 | f1 = self.f1_metric(preds, labels) 67 | 68 | # Logging metrics 69 | self.log("valid/loss", outputs.loss, prog_bar=True, on_step=True) 70 | self.log("valid/acc", valid_acc, prog_bar=True, on_epoch=True) 71 | self.log("valid/precision_macro", precision_macro, prog_bar=True, on_epoch=True) 72 | self.log("valid/recall_macro", recall_macro, prog_bar=True, on_epoch=True) 73 | self.log("valid/precision_micro", precision_micro, prog_bar=True, on_epoch=True) 74 | self.log("valid/recall_micro", recall_micro, prog_bar=True, on_epoch=True) 75 | self.log("valid/f1", f1, prog_bar=True, on_epoch=True) 76 | return {"labels": labels, "logits": outputs.logits} 77 | 78 | def validation_epoch_end(self, outputs): 79 | labels = torch.cat([x["labels"] for x in outputs]) 80 | logits = torch.cat([x["logits"] for x in outputs]) 81 | preds = torch.argmax(logits, 1) 82 | 83 | ## There are multiple ways to track the metrics 84 | # 1. Confusion matrix plotting using inbuilt W&B method 85 | self.logger.experiment.log( 86 | { 87 | "conf": wandb.plot.confusion_matrix( 88 | probs=logits.numpy(), y_true=labels.numpy() 89 | ) 90 | } 91 | ) 92 | 93 | # 2. Confusion Matrix plotting using scikit-learn method 94 | # wandb.log({"cm": wandb.sklearn.plot_confusion_matrix(labels.numpy(), preds)}) 95 | 96 | # 3. Confusion Matric plotting using Seaborn 97 | # data = confusion_matrix(labels.numpy(), preds.numpy()) 98 | # df_cm = pd.DataFrame(data, columns=np.unique(labels), index=np.unique(labels)) 99 | # df_cm.index.name = "Actual" 100 | # df_cm.columns.name = "Predicted" 101 | # plt.figure(figsize=(7, 4)) 102 | # plot = sns.heatmap( 103 | # df_cm, cmap="Blues", annot=True, annot_kws={"size": 16} 104 | # ) # font size 105 | # self.logger.experiment.log({"Confusion Matrix": wandb.Image(plot)}) 106 | 107 | # self.logger.experiment.log( 108 | # {"roc": wandb.plot.roc_curve(labels.numpy(), logits.numpy())} 109 | # ) 110 | 111 | def configure_optimizers(self): 112 | return torch.optim.Adam(self.parameters(), lr=self.hparams["lr"]) 113 | -------------------------------------------------------------------------------- /week_4_onnx/requirements.txt: -------------------------------------------------------------------------------- 1 | pytorch-lightning==1.2.10 2 | datasets==1.6.2 3 | transformers==4.5.1 4 | scikit-learn==0.24.2 5 | wandb 6 | torchmetrics 7 | matplotlib 8 | seaborn 9 | hydra-core 10 | omegaconf 11 | hydra_colorlog -------------------------------------------------------------------------------- /week_4_onnx/train.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import hydra 3 | import wandb 4 | import logging 5 | 6 | import pandas as pd 7 | import pytorch_lightning as pl 8 | from omegaconf.omegaconf import OmegaConf 9 | from pytorch_lightning.callbacks import ModelCheckpoint 10 | from pytorch_lightning.callbacks.early_stopping import EarlyStopping 11 | from pytorch_lightning.loggers import WandbLogger 12 | 13 | from data import DataModule 14 | from model import ColaModel 15 | 16 | logger = logging.getLogger(__name__) 17 | 18 | 19 | class SamplesVisualisationLogger(pl.Callback): 20 | def __init__(self, datamodule): 21 | super().__init__() 22 | 23 | self.datamodule = datamodule 24 | 25 | def on_validation_end(self, trainer, pl_module): 26 | val_batch = next(iter(self.datamodule.val_dataloader())) 27 | sentences = val_batch["sentence"] 28 | 29 | outputs = pl_module(val_batch["input_ids"], val_batch["attention_mask"]) 30 | preds = torch.argmax(outputs.logits, 1) 31 | labels = val_batch["label"] 32 | 33 | df = pd.DataFrame( 34 | {"Sentence": sentences, "Label": labels.numpy(), "Predicted": preds.numpy()} 35 | ) 36 | 37 | wrong_df = df[df["Label"] != df["Predicted"]] 38 | trainer.logger.experiment.log( 39 | { 40 | "examples": wandb.Table(dataframe=wrong_df, allow_mixed_types=True), 41 | "global_step": trainer.global_step, 42 | } 43 | ) 44 | 45 | 46 | @hydra.main(config_path="./configs", config_name="config") 47 | def main(cfg): 48 | logger.info(OmegaConf.to_yaml(cfg, resolve=True)) 49 | logger.info(f"Using the model: {cfg.model.name}") 50 | logger.info(f"Using the tokenizer: {cfg.model.tokenizer}") 51 | cola_data = DataModule( 52 | cfg.model.tokenizer, cfg.processing.batch_size, cfg.processing.max_length 53 | ) 54 | cola_model = ColaModel(cfg.model.name) 55 | 56 | root_dir = hydra.utils.get_original_cwd() 57 | checkpoint_callback = ModelCheckpoint( 58 | dirpath=f"{root_dir}/models", 59 | filename="best-checkpoint", 60 | monitor="valid/loss", 61 | mode="min", 62 | ) 63 | 64 | early_stopping_callback = EarlyStopping( 65 | monitor="valid/loss", patience=3, verbose=True, mode="min" 66 | ) 67 | 68 | wandb_logger = WandbLogger(project="MLOps Basics", entity="raviraja") 69 | trainer = pl.Trainer( 70 | max_epochs=cfg.training.max_epochs, 71 | logger=wandb_logger, 72 | callbacks=[checkpoint_callback, SamplesVisualisationLogger(cola_data), early_stopping_callback], 73 | log_every_n_steps=cfg.training.log_every_n_steps, 74 | deterministic=cfg.training.deterministic, 75 | # limit_train_batches=cfg.training.limit_train_batches, 76 | # limit_val_batches=cfg.training.limit_val_batches, 77 | ) 78 | trainer.fit(cola_model, cola_data) 79 | wandb.finish() 80 | 81 | 82 | if __name__ == "__main__": 83 | main() 84 | -------------------------------------------------------------------------------- /week_4_onnx/utils.py: -------------------------------------------------------------------------------- 1 | import time 2 | from functools import wraps 3 | 4 | 5 | def timing(f): 6 | """Decorator for timing functions 7 | Usage: 8 | @timing 9 | def function(a): 10 | pass 11 | """ 12 | 13 | @wraps(f) 14 | def wrapper(*args, **kwargs): 15 | start = time.time() 16 | result = f(*args, **kwargs) 17 | end = time.time() 18 | print("function:%r took: %2.5f sec" % (f.__name__, end - start)) 19 | return result 20 | 21 | return wrapper 22 | -------------------------------------------------------------------------------- /week_5_docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM huggingface/transformers-pytorch-cpu:latest 2 | COPY ./ /app 3 | WORKDIR /app 4 | RUN pip install -r requirements_prod.txt 5 | ENV LC_ALL=C.UTF-8 6 | ENV LANG=C.UTF-8 7 | EXPOSE 8000 8 | CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"] 9 | -------------------------------------------------------------------------------- /week_5_docker/README.md: -------------------------------------------------------------------------------- 1 | 2 | **Note: The purpose of the project to explore the libraries and learn how to use them. Not to build a SOTA model.** 3 | 4 | ## Requirements: 5 | 6 | This project uses Python 3.8 7 | 8 | Create a virtual env with the following command: 9 | 10 | ``` 11 | conda create --name project-setup python=3.8 12 | conda activate project-setup 13 | ``` 14 | 15 | Install the requirements: 16 | 17 | ``` 18 | pip install -r requirements.txt 19 | ``` 20 | 21 | ## Running 22 | 23 | ### Training 24 | 25 | After installing the requirements, in order to train the model simply run: 26 | 27 | ``` 28 | python train.py 29 | ``` 30 | 31 | ### Monitoring 32 | 33 | Once the training is completed in the end of the logs you will see something like: 34 | 35 | ``` 36 | wandb: Synced 5 W&B file(s), 4 media file(s), 3 artifact file(s) and 0 other file(s) 37 | wandb: 38 | wandb: Synced proud-mountain-77: https://wandb.ai/raviraja/MLOps%20Basics/runs/3vp1twdc 39 | ``` 40 | 41 | Follow the link to see the wandb dashboard which contains all the plots. 42 | 43 | ### Versioning data 44 | 45 | Refer to the blog: [DVC Configuration](https://www.ravirajag.dev/blog/mlops-dvc) 46 | 47 | ### Exporting model to ONNX 48 | 49 | Once the model is trained, convert the model using the following command: 50 | 51 | ``` 52 | python convert_model_to_onnx.py 53 | ``` 54 | 55 | ### Inference 56 | 57 | #### Inference using standard pytorch 58 | 59 | ``` 60 | python inference.py 61 | ``` 62 | 63 | #### Inference using ONNX Runtime 64 | 65 | ``` 66 | python inference_onnx.py 67 | ``` 68 | 69 | ### Docker 70 | 71 | Install the docker using the [instructions here](https://docs.docker.com/engine/install/) 72 | 73 | Build the image using the command 74 | 75 | ```shell 76 | docker build -t inference:latest . 77 | ``` 78 | 79 | Then run the container using the command 80 | 81 | ```shell 82 | docker run -p 8000:8000 --name inference_container inference:latest 83 | ``` 84 | 85 | (or) 86 | 87 | Build and run the container using the command 88 | 89 | ```shell 90 | docker-compose up 91 | ``` 92 | 93 | 94 | ### Running notebooks 95 | 96 | I am using [Jupyter lab](https://jupyter.org/install) to run the notebooks. 97 | 98 | Since I am using a virtualenv, when I run the command `jupyter lab` it might or might not use the virtualenv. 99 | 100 | To make sure to use the virutalenv, run the following commands before running `jupyter lab` 101 | 102 | ``` 103 | conda install ipykernel 104 | python -m ipykernel install --user --name project-setup 105 | pip install ipywidgets 106 | ``` -------------------------------------------------------------------------------- /week_5_docker/app.py: -------------------------------------------------------------------------------- 1 | from fastapi import FastAPI 2 | from inference_onnx import ColaONNXPredictor 3 | app = FastAPI(title="MLOps Basics App") 4 | 5 | predictor = ColaONNXPredictor("./models/model.onnx") 6 | 7 | @app.get("/") 8 | async def home_page(): 9 | return "

Sample prediction API

" 10 | 11 | 12 | @app.get("/predict") 13 | async def get_prediction(text: str): 14 | result = predictor.predict(text) 15 | return result -------------------------------------------------------------------------------- /week_5_docker/configs/config.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - model: default 3 | - processing: default 4 | - training: default 5 | - override hydra/job_logging: colorlog 6 | - override hydra/hydra_logging: colorlog -------------------------------------------------------------------------------- /week_5_docker/configs/model/default.yaml: -------------------------------------------------------------------------------- 1 | name: google/bert_uncased_L-2_H-128_A-2 # model used for training the classifier 2 | tokenizer: google/bert_uncased_L-2_H-128_A-2 # tokenizer used for processing the data -------------------------------------------------------------------------------- /week_5_docker/configs/processing/default.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 64 2 | max_length: 128 -------------------------------------------------------------------------------- /week_5_docker/configs/training/default.yaml: -------------------------------------------------------------------------------- 1 | max_epochs: 1 2 | log_every_n_steps: 10 3 | deterministic: true 4 | limit_train_batches: 0.25 5 | limit_val_batches: ${training.limit_train_batches} -------------------------------------------------------------------------------- /week_5_docker/convert_model_to_onnx.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import hydra 3 | import logging 4 | 5 | from omegaconf.omegaconf import OmegaConf 6 | 7 | from model import ColaModel 8 | from data import DataModule 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | @hydra.main(config_path="./configs", config_name="config") 14 | def convert_model(cfg): 15 | root_dir = hydra.utils.get_original_cwd() 16 | model_path = f"{root_dir}/models/best-checkpoint.ckpt" 17 | logger.info(f"Loading pre-trained model from: {model_path}") 18 | cola_model = ColaModel.load_from_checkpoint(model_path) 19 | 20 | data_model = DataModule( 21 | cfg.model.tokenizer, cfg.processing.batch_size, cfg.processing.max_length 22 | ) 23 | data_model.prepare_data() 24 | data_model.setup() 25 | input_batch = next(iter(data_model.train_dataloader())) 26 | input_sample = { 27 | "input_ids": input_batch["input_ids"][0].unsqueeze(0), 28 | "attention_mask": input_batch["attention_mask"][0].unsqueeze(0), 29 | } 30 | 31 | # Export the model 32 | logger.info(f"Converting the model into ONNX format") 33 | torch.onnx.export( 34 | cola_model, # model being run 35 | ( 36 | input_sample["input_ids"], 37 | input_sample["attention_mask"], 38 | ), # model input (or a tuple for multiple inputs) 39 | f"{root_dir}/models/model.onnx", # where to save the model (can be a file or file-like object) 40 | export_params=True, 41 | opset_version=10, 42 | input_names=["input_ids", "attention_mask"], # the model's input names 43 | output_names=["output"], # the model's output names 44 | dynamic_axes={ 45 | "input_ids": {0: "batch_size"}, # variable length axes 46 | "attention_mask": {0: "batch_size"}, 47 | "output": {0: "batch_size"}, 48 | }, 49 | ) 50 | 51 | logger.info( 52 | f"Model converted successfully. ONNX format model is at: {root_dir}/models/model.onnx" 53 | ) 54 | 55 | 56 | if __name__ == "__main__": 57 | convert_model() 58 | -------------------------------------------------------------------------------- /week_5_docker/data.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import datasets 3 | import pytorch_lightning as pl 4 | 5 | from datasets import load_dataset 6 | from transformers import AutoTokenizer 7 | 8 | 9 | class DataModule(pl.LightningDataModule): 10 | def __init__( 11 | self, 12 | model_name="google/bert_uncased_L-2_H-128_A-2", 13 | batch_size=64, 14 | max_length=128, 15 | ): 16 | super().__init__() 17 | 18 | self.batch_size = batch_size 19 | self.max_length = max_length 20 | self.tokenizer = AutoTokenizer.from_pretrained(model_name) 21 | 22 | def prepare_data(self): 23 | cola_dataset = load_dataset("glue", "cola") 24 | self.train_data = cola_dataset["train"] 25 | self.val_data = cola_dataset["validation"] 26 | 27 | def tokenize_data(self, example): 28 | return self.tokenizer( 29 | example["sentence"], 30 | truncation=True, 31 | padding="max_length", 32 | max_length=self.max_length, 33 | ) 34 | 35 | def setup(self, stage=None): 36 | # we set up only relevant datasets when stage is specified 37 | if stage == "fit" or stage is None: 38 | self.train_data = self.train_data.map(self.tokenize_data, batched=True) 39 | self.train_data.set_format( 40 | type="torch", columns=["input_ids", "attention_mask", "label"] 41 | ) 42 | 43 | self.val_data = self.val_data.map(self.tokenize_data, batched=True) 44 | self.val_data.set_format( 45 | type="torch", 46 | columns=["input_ids", "attention_mask", "label"], 47 | output_all_columns=True, 48 | ) 49 | 50 | def train_dataloader(self): 51 | return torch.utils.data.DataLoader( 52 | self.train_data, batch_size=self.batch_size, shuffle=True 53 | ) 54 | 55 | def val_dataloader(self): 56 | return torch.utils.data.DataLoader( 57 | self.val_data, batch_size=self.batch_size, shuffle=False 58 | ) 59 | 60 | 61 | if __name__ == "__main__": 62 | data_model = DataModule() 63 | data_model.prepare_data() 64 | data_model.setup() 65 | print(next(iter(data_model.train_dataloader()))["input_ids"].shape) 66 | -------------------------------------------------------------------------------- /week_5_docker/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | services: 3 | prediction_api: 4 | build: . 5 | container_name: "inference_container" 6 | ports: 7 | - "8000:8000" -------------------------------------------------------------------------------- /week_5_docker/dvcfiles/trained_model.dvc: -------------------------------------------------------------------------------- 1 | wdir: ../models 2 | outs: 3 | - md5: c2f5c0a1954209865b9be1945f33ed6e 4 | size: 17567709 5 | path: best-checkpoint.ckpt 6 | -------------------------------------------------------------------------------- /week_5_docker/inference.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from model import ColaModel 3 | from data import DataModule 4 | from utils import timing 5 | 6 | 7 | class ColaPredictor: 8 | def __init__(self, model_path): 9 | self.model_path = model_path 10 | self.model = ColaModel.load_from_checkpoint(model_path) 11 | self.model.eval() 12 | self.model.freeze() 13 | self.processor = DataModule() 14 | self.softmax = torch.nn.Softmax(dim=1) 15 | self.lables = ["unacceptable", "acceptable"] 16 | 17 | @timing 18 | def predict(self, text): 19 | inference_sample = {"sentence": text} 20 | processed = self.processor.tokenize_data(inference_sample) 21 | logits = self.model( 22 | torch.tensor([processed["input_ids"]]), 23 | torch.tensor([processed["attention_mask"]]), 24 | ) 25 | scores = self.softmax(logits[0]).tolist()[0] 26 | predictions = [] 27 | for score, label in zip(scores, self.lables): 28 | predictions.append({"label": label, "score": score}) 29 | return predictions 30 | 31 | 32 | if __name__ == "__main__": 33 | sentence = "The boy is sitting on a bench" 34 | predictor = ColaPredictor("./models/best-checkpoint.ckpt") 35 | print(predictor.predict(sentence)) 36 | sentences = ["The boy is sitting on a bench"] * 10 37 | for sentence in sentences: 38 | predictor.predict(sentence) 39 | -------------------------------------------------------------------------------- /week_5_docker/inference_onnx.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import onnxruntime as ort 3 | from scipy.special import softmax 4 | 5 | from data import DataModule 6 | from utils import timing 7 | 8 | 9 | class ColaONNXPredictor: 10 | def __init__(self, model_path): 11 | self.ort_session = ort.InferenceSession(model_path) 12 | self.processor = DataModule() 13 | self.lables = ["unacceptable", "acceptable"] 14 | 15 | @timing 16 | def predict(self, text): 17 | inference_sample = {"sentence": text} 18 | processed = self.processor.tokenize_data(inference_sample) 19 | 20 | ort_inputs = { 21 | "input_ids": np.expand_dims(processed["input_ids"], axis=0), 22 | "attention_mask": np.expand_dims(processed["attention_mask"], axis=0), 23 | } 24 | ort_outs = self.ort_session.run(None, ort_inputs) 25 | scores = softmax(ort_outs[0])[0] 26 | predictions = [] 27 | for score, label in zip(scores, self.lables): 28 | predictions.append({"label": label, "score": float(score)}) 29 | print(predictions) 30 | return predictions 31 | 32 | 33 | if __name__ == "__main__": 34 | sentence = "The boy is sitting on a bench" 35 | predictor = ColaONNXPredictor("./models/model.onnx") 36 | print(predictor.predict(sentence)) 37 | sentences = ["The boy is sitting on a bench"] * 10 38 | for sentence in sentences: 39 | predictor.predict(sentence) 40 | -------------------------------------------------------------------------------- /week_5_docker/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import wandb 3 | import hydra 4 | import numpy as np 5 | import pandas as pd 6 | import torchmetrics 7 | import pytorch_lightning as pl 8 | from transformers import AutoModelForSequenceClassification 9 | from omegaconf import OmegaConf, DictConfig 10 | from sklearn.metrics import confusion_matrix 11 | import matplotlib.pyplot as plt 12 | import seaborn as sns 13 | 14 | 15 | class ColaModel(pl.LightningModule): 16 | def __init__(self, model_name="google/bert_uncased_L-2_H-128_A-2", lr=3e-5): 17 | super(ColaModel, self).__init__() 18 | self.save_hyperparameters() 19 | 20 | self.bert = AutoModelForSequenceClassification.from_pretrained( 21 | model_name, num_labels=2 22 | ) 23 | self.num_classes = 2 24 | self.train_accuracy_metric = torchmetrics.Accuracy() 25 | self.val_accuracy_metric = torchmetrics.Accuracy() 26 | self.f1_metric = torchmetrics.F1(num_classes=self.num_classes) 27 | self.precision_macro_metric = torchmetrics.Precision( 28 | average="macro", num_classes=self.num_classes 29 | ) 30 | self.recall_macro_metric = torchmetrics.Recall( 31 | average="macro", num_classes=self.num_classes 32 | ) 33 | self.precision_micro_metric = torchmetrics.Precision(average="micro") 34 | self.recall_micro_metric = torchmetrics.Recall(average="micro") 35 | 36 | def forward(self, input_ids, attention_mask, labels=None): 37 | outputs = self.bert( 38 | input_ids=input_ids, attention_mask=attention_mask, labels=labels 39 | ) 40 | return outputs 41 | 42 | def training_step(self, batch, batch_idx): 43 | outputs = self.forward( 44 | batch["input_ids"], batch["attention_mask"], labels=batch["label"] 45 | ) 46 | # loss = F.cross_entropy(logits, batch["label"]) 47 | preds = torch.argmax(outputs.logits, 1) 48 | train_acc = self.train_accuracy_metric(preds, batch["label"]) 49 | self.log("train/loss", outputs.loss, prog_bar=True, on_epoch=True) 50 | self.log("train/acc", train_acc, prog_bar=True, on_epoch=True) 51 | return outputs.loss 52 | 53 | def validation_step(self, batch, batch_idx): 54 | labels = batch["label"] 55 | outputs = self.forward( 56 | batch["input_ids"], batch["attention_mask"], labels=batch["label"] 57 | ) 58 | preds = torch.argmax(outputs.logits, 1) 59 | 60 | # Metrics 61 | valid_acc = self.val_accuracy_metric(preds, labels) 62 | precision_macro = self.precision_macro_metric(preds, labels) 63 | recall_macro = self.recall_macro_metric(preds, labels) 64 | precision_micro = self.precision_micro_metric(preds, labels) 65 | recall_micro = self.recall_micro_metric(preds, labels) 66 | f1 = self.f1_metric(preds, labels) 67 | 68 | # Logging metrics 69 | self.log("valid/loss", outputs.loss, prog_bar=True, on_step=True) 70 | self.log("valid/acc", valid_acc, prog_bar=True, on_epoch=True) 71 | self.log("valid/precision_macro", precision_macro, prog_bar=True, on_epoch=True) 72 | self.log("valid/recall_macro", recall_macro, prog_bar=True, on_epoch=True) 73 | self.log("valid/precision_micro", precision_micro, prog_bar=True, on_epoch=True) 74 | self.log("valid/recall_micro", recall_micro, prog_bar=True, on_epoch=True) 75 | self.log("valid/f1", f1, prog_bar=True, on_epoch=True) 76 | return {"labels": labels, "logits": outputs.logits} 77 | 78 | def validation_epoch_end(self, outputs): 79 | labels = torch.cat([x["labels"] for x in outputs]) 80 | logits = torch.cat([x["logits"] for x in outputs]) 81 | preds = torch.argmax(logits, 1) 82 | 83 | ## There are multiple ways to track the metrics 84 | # 1. Confusion matrix plotting using inbuilt W&B method 85 | self.logger.experiment.log( 86 | { 87 | "conf": wandb.plot.confusion_matrix( 88 | probs=logits.numpy(), y_true=labels.numpy() 89 | ) 90 | } 91 | ) 92 | 93 | # 2. Confusion Matrix plotting using scikit-learn method 94 | # wandb.log({"cm": wandb.sklearn.plot_confusion_matrix(labels.numpy(), preds)}) 95 | 96 | # 3. Confusion Matric plotting using Seaborn 97 | # data = confusion_matrix(labels.numpy(), preds.numpy()) 98 | # df_cm = pd.DataFrame(data, columns=np.unique(labels), index=np.unique(labels)) 99 | # df_cm.index.name = "Actual" 100 | # df_cm.columns.name = "Predicted" 101 | # plt.figure(figsize=(7, 4)) 102 | # plot = sns.heatmap( 103 | # df_cm, cmap="Blues", annot=True, annot_kws={"size": 16} 104 | # ) # font size 105 | # self.logger.experiment.log({"Confusion Matrix": wandb.Image(plot)}) 106 | 107 | # self.logger.experiment.log( 108 | # {"roc": wandb.plot.roc_curve(labels.numpy(), logits.numpy())} 109 | # ) 110 | 111 | def configure_optimizers(self): 112 | return torch.optim.Adam(self.parameters(), lr=self.hparams["lr"]) 113 | -------------------------------------------------------------------------------- /week_5_docker/requirements.txt: -------------------------------------------------------------------------------- 1 | pytorch-lightning==1.2.10 2 | datasets==1.6.2 3 | transformers==4.5.1 4 | scikit-learn==0.24.2 5 | wandb 6 | torchmetrics 7 | matplotlib 8 | seaborn 9 | hydra-core 10 | omegaconf 11 | hydra_colorlog 12 | fastapi 13 | uvicorn 14 | -------------------------------------------------------------------------------- /week_5_docker/requirements_inference.txt: -------------------------------------------------------------------------------- 1 | pytorch-lightning==1.2.10 2 | datasets==1.6.2 3 | scikit-learn==0.24.2 4 | hydra-core 5 | omegaconf 6 | hydra_colorlog 7 | onnxruntime 8 | fastapi 9 | uvicorn 10 | -------------------------------------------------------------------------------- /week_5_docker/train.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import hydra 3 | import wandb 4 | import logging 5 | 6 | import pandas as pd 7 | import pytorch_lightning as pl 8 | from omegaconf.omegaconf import OmegaConf 9 | from pytorch_lightning.callbacks import ModelCheckpoint 10 | from pytorch_lightning.callbacks.early_stopping import EarlyStopping 11 | from pytorch_lightning.loggers import WandbLogger 12 | 13 | from data import DataModule 14 | from model import ColaModel 15 | 16 | logger = logging.getLogger(__name__) 17 | 18 | 19 | class SamplesVisualisationLogger(pl.Callback): 20 | def __init__(self, datamodule): 21 | super().__init__() 22 | 23 | self.datamodule = datamodule 24 | 25 | def on_validation_end(self, trainer, pl_module): 26 | val_batch = next(iter(self.datamodule.val_dataloader())) 27 | sentences = val_batch["sentence"] 28 | 29 | outputs = pl_module(val_batch["input_ids"], val_batch["attention_mask"]) 30 | preds = torch.argmax(outputs.logits, 1) 31 | labels = val_batch["label"] 32 | 33 | df = pd.DataFrame( 34 | {"Sentence": sentences, "Label": labels.numpy(), "Predicted": preds.numpy()} 35 | ) 36 | 37 | wrong_df = df[df["Label"] != df["Predicted"]] 38 | trainer.logger.experiment.log( 39 | { 40 | "examples": wandb.Table(dataframe=wrong_df, allow_mixed_types=True), 41 | "global_step": trainer.global_step, 42 | } 43 | ) 44 | 45 | 46 | @hydra.main(config_path="./configs", config_name="config") 47 | def main(cfg): 48 | logger.info(OmegaConf.to_yaml(cfg, resolve=True)) 49 | logger.info(f"Using the model: {cfg.model.name}") 50 | logger.info(f"Using the tokenizer: {cfg.model.tokenizer}") 51 | cola_data = DataModule( 52 | cfg.model.tokenizer, cfg.processing.batch_size, cfg.processing.max_length 53 | ) 54 | cola_model = ColaModel(cfg.model.name) 55 | 56 | root_dir = hydra.utils.get_original_cwd() 57 | checkpoint_callback = ModelCheckpoint( 58 | dirpath=f"{root_dir}/models", 59 | filename="best-checkpoint", 60 | monitor="valid/loss", 61 | mode="min", 62 | ) 63 | 64 | early_stopping_callback = EarlyStopping( 65 | monitor="valid/loss", patience=3, verbose=True, mode="min" 66 | ) 67 | 68 | wandb_logger = WandbLogger(project="MLOps Basics", entity="raviraja") 69 | trainer = pl.Trainer( 70 | max_epochs=cfg.training.max_epochs, 71 | logger=wandb_logger, 72 | callbacks=[checkpoint_callback, SamplesVisualisationLogger(cola_data), early_stopping_callback], 73 | log_every_n_steps=cfg.training.log_every_n_steps, 74 | deterministic=cfg.training.deterministic, 75 | # limit_train_batches=cfg.training.limit_train_batches, 76 | # limit_val_batches=cfg.training.limit_val_batches, 77 | ) 78 | trainer.fit(cola_model, cola_data) 79 | wandb.finish() 80 | 81 | 82 | if __name__ == "__main__": 83 | main() 84 | -------------------------------------------------------------------------------- /week_5_docker/utils.py: -------------------------------------------------------------------------------- 1 | import time 2 | from functools import wraps 3 | 4 | 5 | def timing(f): 6 | """Decorator for timing functions 7 | Usage: 8 | @timing 9 | def function(a): 10 | pass 11 | """ 12 | 13 | @wraps(f) 14 | def wrapper(*args, **kwargs): 15 | start = time.time() 16 | result = f(*args, **kwargs) 17 | end = time.time() 18 | print("function:%r took: %2.5f sec" % (f.__name__, end - start)) 19 | return result 20 | 21 | return wrapper 22 | -------------------------------------------------------------------------------- /week_6_github_actions/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM huggingface/transformers-pytorch-cpu:latest 2 | 3 | COPY ./ /app 4 | WORKDIR /app 5 | 6 | # install requirements 7 | RUN pip install "dvc[gdrive]" 8 | RUN pip install -r requirements_inference.txt 9 | 10 | # initialise dvc 11 | RUN dvc init --no-scm 12 | # configuring remote server in dvc 13 | RUN dvc remote add -d storage gdrive://19JK5AFbqOBlrFVwDHjTrf9uvQFtS0954 14 | RUN dvc remote modify storage gdrive_use_service_account true 15 | RUN dvc remote modify storage gdrive_service_account_json_file_path creds.json 16 | 17 | RUN cat .dvc/config 18 | # pulling the trained model 19 | RUN dvc pull dvcfiles/trained_model.dvc 20 | 21 | ENV LC_ALL=C.UTF-8 22 | ENV LANG=C.UTF-8 23 | 24 | # running the application 25 | EXPOSE 8000 26 | CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"] 27 | -------------------------------------------------------------------------------- /week_6_github_actions/README.md: -------------------------------------------------------------------------------- 1 | 2 | **Note: The purpose of the project to explore the libraries and learn how to use them. Not to build a SOTA model.** 3 | 4 | ## Requirements: 5 | 6 | This project uses Python 3.8 7 | 8 | Create a virtual env with the following command: 9 | 10 | ``` 11 | conda create --name project-setup python=3.8 12 | conda activate project-setup 13 | ``` 14 | 15 | Install the requirements: 16 | 17 | ``` 18 | pip install -r requirements.txt 19 | ``` 20 | 21 | ## Running 22 | 23 | ### Training 24 | 25 | After installing the requirements, in order to train the model simply run: 26 | 27 | ``` 28 | python train.py 29 | ``` 30 | 31 | ### Monitoring 32 | 33 | Once the training is completed in the end of the logs you will see something like: 34 | 35 | ``` 36 | wandb: Synced 5 W&B file(s), 4 media file(s), 3 artifact file(s) and 0 other file(s) 37 | wandb: 38 | wandb: Synced proud-mountain-77: https://wandb.ai/raviraja/MLOps%20Basics/runs/3vp1twdc 39 | ``` 40 | 41 | Follow the link to see the wandb dashboard which contains all the plots. 42 | 43 | ### Versioning data 44 | 45 | Refer to the blog: [DVC Configuration](https://www.ravirajag.dev/blog/mlops-dvc) 46 | 47 | ### Exporting model to ONNX 48 | 49 | Once the model is trained, convert the model using the following command: 50 | 51 | ``` 52 | python convert_model_to_onnx.py 53 | ``` 54 | 55 | ### Inference 56 | 57 | #### Inference using standard pytorch 58 | 59 | ``` 60 | python inference.py 61 | ``` 62 | 63 | #### Inference using ONNX Runtime 64 | 65 | ``` 66 | python inference_onnx.py 67 | ``` 68 | 69 | ### Google Service account 70 | 71 | Create service account using the steps mentioned here: [Create service account](https://www.ravirajag.dev/blog/mlops-github-actions) 72 | 73 | ### Configuring dvc 74 | 75 | ``` 76 | dvc init 77 | dvc remote add -d storage gdrive://19JK5AFbqOBlrFVwDHjTrf9uvQFtS0954 78 | dvc remote modify storage gdrive_use_service_account true 79 | dvc remote modify storage gdrive_service_account_json_file_path creds.json 80 | ``` 81 | 82 | `creds.json` is the file created during service account creation 83 | 84 | 85 | ### Docker 86 | 87 | Install the docker using the [instructions here](https://docs.docker.com/engine/install/) 88 | 89 | Build the image using the command 90 | 91 | ```shell 92 | docker build -t inference:latest . 93 | ``` 94 | 95 | Then run the container using the command 96 | 97 | ```shell 98 | docker run -p 8000:8000 --name inference_container inference:latest 99 | ``` 100 | 101 | (or) 102 | 103 | Build and run the container using the command 104 | 105 | ```shell 106 | docker-compose up 107 | ``` 108 | 109 | 110 | ### Running notebooks 111 | 112 | I am using [Jupyter lab](https://jupyter.org/install) to run the notebooks. 113 | 114 | Since I am using a virtualenv, when I run the command `jupyter lab` it might or might not use the virtualenv. 115 | 116 | To make sure to use the virutalenv, run the following commands before running `jupyter lab` 117 | 118 | ``` 119 | conda install ipykernel 120 | python -m ipykernel install --user --name project-setup 121 | pip install ipywidgets 122 | ``` -------------------------------------------------------------------------------- /week_6_github_actions/app.py: -------------------------------------------------------------------------------- 1 | from fastapi import FastAPI 2 | from inference_onnx import ColaONNXPredictor 3 | app = FastAPI(title="MLOps Basics App") 4 | 5 | predictor = ColaONNXPredictor("./models/model.onnx") 6 | 7 | @app.get("/") 8 | async def home_page(): 9 | return "

Sample prediction API

" 10 | 11 | 12 | @app.get("/predict") 13 | async def get_prediction(text: str): 14 | result = predictor.predict(text) 15 | return result -------------------------------------------------------------------------------- /week_6_github_actions/configs/config.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - model: default 3 | - processing: default 4 | - training: default 5 | - override hydra/job_logging: colorlog 6 | - override hydra/hydra_logging: colorlog -------------------------------------------------------------------------------- /week_6_github_actions/configs/model/default.yaml: -------------------------------------------------------------------------------- 1 | name: google/bert_uncased_L-2_H-128_A-2 # model used for training the classifier 2 | tokenizer: google/bert_uncased_L-2_H-128_A-2 # tokenizer used for processing the data -------------------------------------------------------------------------------- /week_6_github_actions/configs/processing/default.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 64 2 | max_length: 128 -------------------------------------------------------------------------------- /week_6_github_actions/configs/training/default.yaml: -------------------------------------------------------------------------------- 1 | max_epochs: 1 2 | log_every_n_steps: 10 3 | deterministic: true 4 | limit_train_batches: 0.25 5 | limit_val_batches: ${training.limit_train_batches} -------------------------------------------------------------------------------- /week_6_github_actions/convert_model_to_onnx.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import hydra 3 | import logging 4 | 5 | from omegaconf.omegaconf import OmegaConf 6 | 7 | from model import ColaModel 8 | from data import DataModule 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | @hydra.main(config_path="./configs", config_name="config") 14 | def convert_model(cfg): 15 | root_dir = hydra.utils.get_original_cwd() 16 | model_path = f"{root_dir}/models/best-checkpoint.ckpt" 17 | logger.info(f"Loading pre-trained model from: {model_path}") 18 | cola_model = ColaModel.load_from_checkpoint(model_path) 19 | 20 | data_model = DataModule( 21 | cfg.model.tokenizer, cfg.processing.batch_size, cfg.processing.max_length 22 | ) 23 | data_model.prepare_data() 24 | data_model.setup() 25 | input_batch = next(iter(data_model.train_dataloader())) 26 | input_sample = { 27 | "input_ids": input_batch["input_ids"][0].unsqueeze(0), 28 | "attention_mask": input_batch["attention_mask"][0].unsqueeze(0), 29 | } 30 | 31 | # Export the model 32 | logger.info(f"Converting the model into ONNX format") 33 | torch.onnx.export( 34 | cola_model, # model being run 35 | ( 36 | input_sample["input_ids"], 37 | input_sample["attention_mask"], 38 | ), # model input (or a tuple for multiple inputs) 39 | f"{root_dir}/models/model.onnx", # where to save the model (can be a file or file-like object) 40 | export_params=True, 41 | opset_version=10, 42 | input_names=["input_ids", "attention_mask"], # the model's input names 43 | output_names=["output"], # the model's output names 44 | dynamic_axes={ 45 | "input_ids": {0: "batch_size"}, # variable length axes 46 | "attention_mask": {0: "batch_size"}, 47 | "output": {0: "batch_size"}, 48 | }, 49 | ) 50 | 51 | logger.info( 52 | f"Model converted successfully. ONNX format model is at: {root_dir}/models/model.onnx" 53 | ) 54 | 55 | 56 | if __name__ == "__main__": 57 | convert_model() 58 | -------------------------------------------------------------------------------- /week_6_github_actions/data.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import datasets 3 | import pytorch_lightning as pl 4 | 5 | from datasets import load_dataset 6 | from transformers import AutoTokenizer 7 | 8 | 9 | class DataModule(pl.LightningDataModule): 10 | def __init__( 11 | self, 12 | model_name="google/bert_uncased_L-2_H-128_A-2", 13 | batch_size=64, 14 | max_length=128, 15 | ): 16 | super().__init__() 17 | 18 | self.batch_size = batch_size 19 | self.max_length = max_length 20 | self.tokenizer = AutoTokenizer.from_pretrained(model_name) 21 | 22 | def prepare_data(self): 23 | cola_dataset = load_dataset("glue", "cola") 24 | self.train_data = cola_dataset["train"] 25 | self.val_data = cola_dataset["validation"] 26 | 27 | def tokenize_data(self, example): 28 | return self.tokenizer( 29 | example["sentence"], 30 | truncation=True, 31 | padding="max_length", 32 | max_length=self.max_length, 33 | ) 34 | 35 | def setup(self, stage=None): 36 | # we set up only relevant datasets when stage is specified 37 | if stage == "fit" or stage is None: 38 | self.train_data = self.train_data.map(self.tokenize_data, batched=True) 39 | self.train_data.set_format( 40 | type="torch", columns=["input_ids", "attention_mask", "label"] 41 | ) 42 | 43 | self.val_data = self.val_data.map(self.tokenize_data, batched=True) 44 | self.val_data.set_format( 45 | type="torch", 46 | columns=["input_ids", "attention_mask", "label"], 47 | output_all_columns=True, 48 | ) 49 | 50 | def train_dataloader(self): 51 | return torch.utils.data.DataLoader( 52 | self.train_data, batch_size=self.batch_size, shuffle=True 53 | ) 54 | 55 | def val_dataloader(self): 56 | return torch.utils.data.DataLoader( 57 | self.val_data, batch_size=self.batch_size, shuffle=False 58 | ) 59 | 60 | 61 | if __name__ == "__main__": 62 | data_model = DataModule() 63 | data_model.prepare_data() 64 | data_model.setup() 65 | print(next(iter(data_model.train_dataloader()))["input_ids"].shape) 66 | -------------------------------------------------------------------------------- /week_6_github_actions/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | services: 3 | prediction_api: 4 | build: . 5 | container_name: "inference_container" 6 | ports: 7 | - "8000:8000" -------------------------------------------------------------------------------- /week_6_github_actions/dvcfiles/trained_model.dvc: -------------------------------------------------------------------------------- 1 | wdir: ../models 2 | outs: 3 | - md5: d82b8390fa2f09b121de4abfa094a7a9 4 | size: 17562590 5 | path: model.onnx 6 | -------------------------------------------------------------------------------- /week_6_github_actions/inference.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from model import ColaModel 3 | from data import DataModule 4 | from utils import timing 5 | 6 | 7 | class ColaPredictor: 8 | def __init__(self, model_path): 9 | self.model_path = model_path 10 | self.model = ColaModel.load_from_checkpoint(model_path) 11 | self.model.eval() 12 | self.model.freeze() 13 | self.processor = DataModule() 14 | self.softmax = torch.nn.Softmax(dim=1) 15 | self.lables = ["unacceptable", "acceptable"] 16 | 17 | @timing 18 | def predict(self, text): 19 | inference_sample = {"sentence": text} 20 | processed = self.processor.tokenize_data(inference_sample) 21 | logits = self.model( 22 | torch.tensor([processed["input_ids"]]), 23 | torch.tensor([processed["attention_mask"]]), 24 | ) 25 | scores = self.softmax(logits[0]).tolist()[0] 26 | predictions = [] 27 | for score, label in zip(scores, self.lables): 28 | predictions.append({"label": label, "score": score}) 29 | return predictions 30 | 31 | 32 | if __name__ == "__main__": 33 | sentence = "The boy is sitting on a bench" 34 | predictor = ColaPredictor("./models/best-checkpoint.ckpt") 35 | print(predictor.predict(sentence)) 36 | sentences = ["The boy is sitting on a bench"] * 10 37 | for sentence in sentences: 38 | predictor.predict(sentence) 39 | -------------------------------------------------------------------------------- /week_6_github_actions/inference_onnx.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import onnxruntime as ort 3 | from scipy.special import softmax 4 | 5 | from data import DataModule 6 | from utils import timing 7 | 8 | 9 | class ColaONNXPredictor: 10 | def __init__(self, model_path): 11 | self.ort_session = ort.InferenceSession(model_path) 12 | self.processor = DataModule() 13 | self.lables = ["unacceptable", "acceptable"] 14 | 15 | @timing 16 | def predict(self, text): 17 | inference_sample = {"sentence": text} 18 | processed = self.processor.tokenize_data(inference_sample) 19 | 20 | ort_inputs = { 21 | "input_ids": np.expand_dims(processed["input_ids"], axis=0), 22 | "attention_mask": np.expand_dims(processed["attention_mask"], axis=0), 23 | } 24 | ort_outs = self.ort_session.run(None, ort_inputs) 25 | scores = softmax(ort_outs[0])[0] 26 | predictions = [] 27 | for score, label in zip(scores, self.lables): 28 | predictions.append({"label": label, "score": float(score)}) 29 | print(predictions) 30 | return predictions 31 | 32 | 33 | if __name__ == "__main__": 34 | sentence = "The boy is sitting on a bench" 35 | predictor = ColaONNXPredictor("./models/model.onnx") 36 | print(predictor.predict(sentence)) 37 | sentences = ["The boy is sitting on a bench"] * 10 38 | for sentence in sentences: 39 | predictor.predict(sentence) 40 | -------------------------------------------------------------------------------- /week_6_github_actions/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import wandb 3 | import hydra 4 | import numpy as np 5 | import pandas as pd 6 | import torchmetrics 7 | import pytorch_lightning as pl 8 | from transformers import AutoModelForSequenceClassification 9 | from omegaconf import OmegaConf, DictConfig 10 | from sklearn.metrics import confusion_matrix 11 | import matplotlib.pyplot as plt 12 | import seaborn as sns 13 | 14 | 15 | class ColaModel(pl.LightningModule): 16 | def __init__(self, model_name="google/bert_uncased_L-2_H-128_A-2", lr=3e-5): 17 | super(ColaModel, self).__init__() 18 | self.save_hyperparameters() 19 | 20 | self.bert = AutoModelForSequenceClassification.from_pretrained( 21 | model_name, num_labels=2 22 | ) 23 | self.num_classes = 2 24 | self.train_accuracy_metric = torchmetrics.Accuracy() 25 | self.val_accuracy_metric = torchmetrics.Accuracy() 26 | self.f1_metric = torchmetrics.F1(num_classes=self.num_classes) 27 | self.precision_macro_metric = torchmetrics.Precision( 28 | average="macro", num_classes=self.num_classes 29 | ) 30 | self.recall_macro_metric = torchmetrics.Recall( 31 | average="macro", num_classes=self.num_classes 32 | ) 33 | self.precision_micro_metric = torchmetrics.Precision(average="micro") 34 | self.recall_micro_metric = torchmetrics.Recall(average="micro") 35 | 36 | def forward(self, input_ids, attention_mask, labels=None): 37 | outputs = self.bert( 38 | input_ids=input_ids, attention_mask=attention_mask, labels=labels 39 | ) 40 | return outputs 41 | 42 | def training_step(self, batch, batch_idx): 43 | outputs = self.forward( 44 | batch["input_ids"], batch["attention_mask"], labels=batch["label"] 45 | ) 46 | # loss = F.cross_entropy(logits, batch["label"]) 47 | preds = torch.argmax(outputs.logits, 1) 48 | train_acc = self.train_accuracy_metric(preds, batch["label"]) 49 | self.log("train/loss", outputs.loss, prog_bar=True, on_epoch=True) 50 | self.log("train/acc", train_acc, prog_bar=True, on_epoch=True) 51 | return outputs.loss 52 | 53 | def validation_step(self, batch, batch_idx): 54 | labels = batch["label"] 55 | outputs = self.forward( 56 | batch["input_ids"], batch["attention_mask"], labels=batch["label"] 57 | ) 58 | preds = torch.argmax(outputs.logits, 1) 59 | 60 | # Metrics 61 | valid_acc = self.val_accuracy_metric(preds, labels) 62 | precision_macro = self.precision_macro_metric(preds, labels) 63 | recall_macro = self.recall_macro_metric(preds, labels) 64 | precision_micro = self.precision_micro_metric(preds, labels) 65 | recall_micro = self.recall_micro_metric(preds, labels) 66 | f1 = self.f1_metric(preds, labels) 67 | 68 | # Logging metrics 69 | self.log("valid/loss", outputs.loss, prog_bar=True, on_step=True) 70 | self.log("valid/acc", valid_acc, prog_bar=True, on_epoch=True) 71 | self.log("valid/precision_macro", precision_macro, prog_bar=True, on_epoch=True) 72 | self.log("valid/recall_macro", recall_macro, prog_bar=True, on_epoch=True) 73 | self.log("valid/precision_micro", precision_micro, prog_bar=True, on_epoch=True) 74 | self.log("valid/recall_micro", recall_micro, prog_bar=True, on_epoch=True) 75 | self.log("valid/f1", f1, prog_bar=True, on_epoch=True) 76 | return {"labels": labels, "logits": outputs.logits} 77 | 78 | def validation_epoch_end(self, outputs): 79 | labels = torch.cat([x["labels"] for x in outputs]) 80 | logits = torch.cat([x["logits"] for x in outputs]) 81 | preds = torch.argmax(logits, 1) 82 | 83 | ## There are multiple ways to track the metrics 84 | # 1. Confusion matrix plotting using inbuilt W&B method 85 | self.logger.experiment.log( 86 | { 87 | "conf": wandb.plot.confusion_matrix( 88 | probs=logits.numpy(), y_true=labels.numpy() 89 | ) 90 | } 91 | ) 92 | 93 | # 2. Confusion Matrix plotting using scikit-learn method 94 | # wandb.log({"cm": wandb.sklearn.plot_confusion_matrix(labels.numpy(), preds)}) 95 | 96 | # 3. Confusion Matric plotting using Seaborn 97 | # data = confusion_matrix(labels.numpy(), preds.numpy()) 98 | # df_cm = pd.DataFrame(data, columns=np.unique(labels), index=np.unique(labels)) 99 | # df_cm.index.name = "Actual" 100 | # df_cm.columns.name = "Predicted" 101 | # plt.figure(figsize=(7, 4)) 102 | # plot = sns.heatmap( 103 | # df_cm, cmap="Blues", annot=True, annot_kws={"size": 16} 104 | # ) # font size 105 | # self.logger.experiment.log({"Confusion Matrix": wandb.Image(plot)}) 106 | 107 | # self.logger.experiment.log( 108 | # {"roc": wandb.plot.roc_curve(labels.numpy(), logits.numpy())} 109 | # ) 110 | 111 | def configure_optimizers(self): 112 | return torch.optim.Adam(self.parameters(), lr=self.hparams["lr"]) 113 | -------------------------------------------------------------------------------- /week_6_github_actions/parse_json.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | with open('creds.txt') as f: 4 | data = f.read() 5 | 6 | print(data) 7 | # data = json.loads(data, strict=False) 8 | # print(data) 9 | data = eval(data) 10 | print(data) 11 | 12 | with open('test.json', 'w') as f: 13 | json.dump(data, f) 14 | -------------------------------------------------------------------------------- /week_6_github_actions/requirements.txt: -------------------------------------------------------------------------------- 1 | pytorch-lightning==1.2.10 2 | datasets==1.6.2 3 | transformers==4.5.1 4 | scikit-learn==0.24.2 5 | wandb 6 | torchmetrics 7 | matplotlib 8 | seaborn 9 | hydra-core 10 | omegaconf 11 | hydra_colorlog 12 | fastapi 13 | uvicorn 14 | -------------------------------------------------------------------------------- /week_6_github_actions/requirements_inference.txt: -------------------------------------------------------------------------------- 1 | pytorch-lightning==1.2.10 2 | datasets==1.6.2 3 | scikit-learn==0.24.2 4 | hydra-core 5 | omegaconf 6 | hydra_colorlog 7 | onnxruntime 8 | fastapi 9 | uvicorn 10 | dvc -------------------------------------------------------------------------------- /week_6_github_actions/train.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import hydra 3 | import wandb 4 | import logging 5 | 6 | import pandas as pd 7 | import pytorch_lightning as pl 8 | from omegaconf.omegaconf import OmegaConf 9 | from pytorch_lightning.callbacks import ModelCheckpoint 10 | from pytorch_lightning.callbacks.early_stopping import EarlyStopping 11 | from pytorch_lightning.loggers import WandbLogger 12 | 13 | from data import DataModule 14 | from model import ColaModel 15 | 16 | logger = logging.getLogger(__name__) 17 | 18 | 19 | class SamplesVisualisationLogger(pl.Callback): 20 | def __init__(self, datamodule): 21 | super().__init__() 22 | 23 | self.datamodule = datamodule 24 | 25 | def on_validation_end(self, trainer, pl_module): 26 | val_batch = next(iter(self.datamodule.val_dataloader())) 27 | sentences = val_batch["sentence"] 28 | 29 | outputs = pl_module(val_batch["input_ids"], val_batch["attention_mask"]) 30 | preds = torch.argmax(outputs.logits, 1) 31 | labels = val_batch["label"] 32 | 33 | df = pd.DataFrame( 34 | {"Sentence": sentences, "Label": labels.numpy(), "Predicted": preds.numpy()} 35 | ) 36 | 37 | wrong_df = df[df["Label"] != df["Predicted"]] 38 | trainer.logger.experiment.log( 39 | { 40 | "examples": wandb.Table(dataframe=wrong_df, allow_mixed_types=True), 41 | "global_step": trainer.global_step, 42 | } 43 | ) 44 | 45 | 46 | @hydra.main(config_path="./configs", config_name="config") 47 | def main(cfg): 48 | logger.info(OmegaConf.to_yaml(cfg, resolve=True)) 49 | logger.info(f"Using the model: {cfg.model.name}") 50 | logger.info(f"Using the tokenizer: {cfg.model.tokenizer}") 51 | cola_data = DataModule( 52 | cfg.model.tokenizer, cfg.processing.batch_size, cfg.processing.max_length 53 | ) 54 | cola_model = ColaModel(cfg.model.name) 55 | 56 | root_dir = hydra.utils.get_original_cwd() 57 | checkpoint_callback = ModelCheckpoint( 58 | dirpath=f"{root_dir}/models", 59 | filename="best-checkpoint", 60 | monitor="valid/loss", 61 | mode="min", 62 | ) 63 | 64 | early_stopping_callback = EarlyStopping( 65 | monitor="valid/loss", patience=3, verbose=True, mode="min" 66 | ) 67 | 68 | wandb_logger = WandbLogger(project="MLOps Basics", entity="raviraja") 69 | trainer = pl.Trainer( 70 | max_epochs=cfg.training.max_epochs, 71 | logger=wandb_logger, 72 | callbacks=[checkpoint_callback, SamplesVisualisationLogger(cola_data), early_stopping_callback], 73 | log_every_n_steps=cfg.training.log_every_n_steps, 74 | deterministic=cfg.training.deterministic, 75 | # limit_train_batches=cfg.training.limit_train_batches, 76 | # limit_val_batches=cfg.training.limit_val_batches, 77 | ) 78 | trainer.fit(cola_model, cola_data) 79 | wandb.finish() 80 | 81 | 82 | if __name__ == "__main__": 83 | main() 84 | -------------------------------------------------------------------------------- /week_6_github_actions/utils.py: -------------------------------------------------------------------------------- 1 | import time 2 | from functools import wraps 3 | 4 | 5 | def timing(f): 6 | """Decorator for timing functions 7 | Usage: 8 | @timing 9 | def function(a): 10 | pass 11 | """ 12 | 13 | @wraps(f) 14 | def wrapper(*args, **kwargs): 15 | start = time.time() 16 | result = f(*args, **kwargs) 17 | end = time.time() 18 | print("function:%r took: %2.5f sec" % (f.__name__, end - start)) 19 | return result 20 | 21 | return wrapper 22 | -------------------------------------------------------------------------------- /week_7_ecr/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM huggingface/transformers-pytorch-cpu:latest 2 | 3 | COPY ./ /app 4 | WORKDIR /app 5 | 6 | ARG AWS_ACCESS_KEY_ID 7 | ARG AWS_SECRET_ACCESS_KEY 8 | 9 | 10 | #this envs are experimental 11 | ENV AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \ 12 | AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY 13 | 14 | 15 | # install requirements 16 | RUN pip install "dvc[s3]" 17 | RUN pip install -r requirements_inference.txt 18 | 19 | # initialise dvc 20 | RUN dvc init --no-scm 21 | # configuring remote server in dvc 22 | RUN dvc remote add -d model-store s3://models-dvc/trained_models/ 23 | 24 | RUN cat .dvc/config 25 | # pulling the trained model 26 | RUN dvc pull dvcfiles/trained_model.dvc 27 | 28 | ENV LC_ALL=C.UTF-8 29 | ENV LANG=C.UTF-8 30 | 31 | # running the application 32 | EXPOSE 8000 33 | CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"] 34 | -------------------------------------------------------------------------------- /week_7_ecr/README.md: -------------------------------------------------------------------------------- 1 | 2 | **Note: The purpose of the project to explore the libraries and learn how to use them. Not to build a SOTA model.** 3 | 4 | ## Requirements: 5 | 6 | This project uses Python 3.8 7 | 8 | Create a virtual env with the following command: 9 | 10 | ``` 11 | conda create --name project-setup python=3.8 12 | conda activate project-setup 13 | ``` 14 | 15 | Install the requirements: 16 | 17 | ``` 18 | pip install -r requirements.txt 19 | ``` 20 | 21 | ## Running 22 | 23 | ### Training 24 | 25 | After installing the requirements, in order to train the model simply run: 26 | 27 | ``` 28 | python train.py 29 | ``` 30 | 31 | ### Monitoring 32 | 33 | Once the training is completed in the end of the logs you will see something like: 34 | 35 | ``` 36 | wandb: Synced 5 W&B file(s), 4 media file(s), 3 artifact file(s) and 0 other file(s) 37 | wandb: 38 | wandb: Synced proud-mountain-77: https://wandb.ai/raviraja/MLOps%20Basics/runs/3vp1twdc 39 | ``` 40 | 41 | Follow the link to see the wandb dashboard which contains all the plots. 42 | 43 | ### Versioning data 44 | 45 | Refer to the blog: [DVC Configuration](https://www.ravirajag.dev/blog/mlops-dvc) 46 | 47 | ### Exporting model to ONNX 48 | 49 | Once the model is trained, convert the model using the following command: 50 | 51 | ``` 52 | python convert_model_to_onnx.py 53 | ``` 54 | 55 | ### Inference 56 | 57 | #### Inference using standard pytorch 58 | 59 | ``` 60 | python inference.py 61 | ``` 62 | 63 | #### Inference using ONNX Runtime 64 | 65 | ``` 66 | python inference_onnx.py 67 | ``` 68 | 69 | ## S3 & ECR 70 | 71 | Follow the instructions mentioned in the [blog post](https://www.ravirajag.dev/blog/mlops-container-registry) for creating S3 bucket and ECR repository. 72 | 73 | ### Configuring dvc 74 | 75 | ``` 76 | dvc init (this has to be done at root folder) 77 | dvc remote add -d model-store s3://models-dvc/trained_models/ 78 | ``` 79 | 80 | ### AWS credentials 81 | 82 | Create the credentials as mentioned in the [blog post](https://www.ravirajag.dev/blog/mlops-container-registry) 83 | 84 | **Do not share the secrets with others** 85 | 86 | Set the ACCESS key and id values in environment variables. 87 | 88 | ``` 89 | export AWS_ACCESS_KEY_ID= 90 | export AWS_SECRET_ACCESS_KEY= 91 | ``` 92 | 93 | ### Trained model in DVC 94 | 95 | Sdd the trained model(onnx) to dvc using the following command: 96 | 97 | ```shell 98 | cd dvcfiles 99 | dvc add ../models/model.onnx --file trained_model.dvc 100 | ``` 101 | 102 | Push the model to remote storage 103 | 104 | ```shell 105 | dvc push trained_model.dvc 106 | ``` 107 | 108 | ### Docker 109 | 110 | Install the docker using the [instructions here](https://docs.docker.com/engine/install/) 111 | 112 | Build the image using the command 113 | 114 | ```shell 115 | docker build -t mlops-basics:latest . 116 | ``` 117 | 118 | Then run the container using the command 119 | 120 | ```shell 121 | docker run -p 8000:8000 --name inference_container mlops-basics:latest 122 | ``` 123 | 124 | (or) 125 | 126 | Build and run the container using the command 127 | 128 | ```shell 129 | docker-compose up 130 | ``` 131 | 132 | ### Pushing the image to ECR 133 | 134 | Follow the instructions mentioned in [blog post](https://www.ravirajag.dev/blog/mlops-container-registry) for creating ECR repository. 135 | 136 | - Authenticating docker client to ECR 137 | 138 | ``` 139 | aws ecr get-login-password --region us-west-2 | docker login --username AWS --password-stdin 246113150184.dkr.ecr.us-west-2.amazonaws.com 140 | ``` 141 | 142 | - Tagging the image 143 | 144 | ``` 145 | docker tag mlops-basics:latest 246113150184.dkr.ecr.us-west-2.amazonaws.com/mlops-basics:latest 146 | ``` 147 | 148 | - Pushing the image 149 | 150 | ``` 151 | docker push 246113150184.dkr.ecr.us-west-2.amazonaws.com/mlops-basics:latest 152 | ``` 153 | 154 | Refer to `.github/workflows/build_docker_image.yaml` file for automatically creating the docker image with trained model and pushing it to ECR. 155 | 156 | 157 | ### Running notebooks 158 | 159 | I am using [Jupyter lab](https://jupyter.org/install) to run the notebooks. 160 | 161 | Since I am using a virtualenv, when I run the command `jupyter lab` it might or might not use the virtualenv. 162 | 163 | To make sure to use the virutalenv, run the following commands before running `jupyter lab` 164 | 165 | ``` 166 | conda install ipykernel 167 | python -m ipykernel install --user --name project-setup 168 | pip install ipywidgets 169 | ``` -------------------------------------------------------------------------------- /week_7_ecr/app.py: -------------------------------------------------------------------------------- 1 | from fastapi import FastAPI 2 | from inference_onnx import ColaONNXPredictor 3 | app = FastAPI(title="MLOps Basics App") 4 | 5 | predictor = ColaONNXPredictor("./models/model.onnx") 6 | 7 | @app.get("/") 8 | async def home_page(): 9 | return "

Sample prediction API

" 10 | 11 | 12 | @app.get("/predict") 13 | async def get_prediction(text: str): 14 | result = predictor.predict(text) 15 | return result -------------------------------------------------------------------------------- /week_7_ecr/configs/config.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - model: default 3 | - processing: default 4 | - training: default 5 | - override hydra/job_logging: colorlog 6 | - override hydra/hydra_logging: colorlog -------------------------------------------------------------------------------- /week_7_ecr/configs/model/default.yaml: -------------------------------------------------------------------------------- 1 | name: google/bert_uncased_L-2_H-128_A-2 # model used for training the classifier 2 | tokenizer: google/bert_uncased_L-2_H-128_A-2 # tokenizer used for processing the data -------------------------------------------------------------------------------- /week_7_ecr/configs/processing/default.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 64 2 | max_length: 128 -------------------------------------------------------------------------------- /week_7_ecr/configs/training/default.yaml: -------------------------------------------------------------------------------- 1 | max_epochs: 1 2 | log_every_n_steps: 10 3 | deterministic: true 4 | limit_train_batches: 0.25 5 | limit_val_batches: ${training.limit_train_batches} -------------------------------------------------------------------------------- /week_7_ecr/convert_model_to_onnx.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import hydra 3 | import logging 4 | 5 | from omegaconf.omegaconf import OmegaConf 6 | 7 | from model import ColaModel 8 | from data import DataModule 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | @hydra.main(config_path="./configs", config_name="config") 14 | def convert_model(cfg): 15 | root_dir = hydra.utils.get_original_cwd() 16 | model_path = f"{root_dir}/models/best-checkpoint.ckpt" 17 | logger.info(f"Loading pre-trained model from: {model_path}") 18 | cola_model = ColaModel.load_from_checkpoint(model_path) 19 | 20 | data_model = DataModule( 21 | cfg.model.tokenizer, cfg.processing.batch_size, cfg.processing.max_length 22 | ) 23 | data_model.prepare_data() 24 | data_model.setup() 25 | input_batch = next(iter(data_model.train_dataloader())) 26 | input_sample = { 27 | "input_ids": input_batch["input_ids"][0].unsqueeze(0), 28 | "attention_mask": input_batch["attention_mask"][0].unsqueeze(0), 29 | } 30 | 31 | # Export the model 32 | logger.info(f"Converting the model into ONNX format") 33 | torch.onnx.export( 34 | cola_model, # model being run 35 | ( 36 | input_sample["input_ids"], 37 | input_sample["attention_mask"], 38 | ), # model input (or a tuple for multiple inputs) 39 | f"{root_dir}/models/model.onnx", # where to save the model (can be a file or file-like object) 40 | export_params=True, 41 | opset_version=10, 42 | input_names=["input_ids", "attention_mask"], # the model's input names 43 | output_names=["output"], # the model's output names 44 | dynamic_axes={ 45 | "input_ids": {0: "batch_size"}, # variable length axes 46 | "attention_mask": {0: "batch_size"}, 47 | "output": {0: "batch_size"}, 48 | }, 49 | ) 50 | 51 | logger.info( 52 | f"Model converted successfully. ONNX format model is at: {root_dir}/models/model.onnx" 53 | ) 54 | 55 | 56 | if __name__ == "__main__": 57 | convert_model() 58 | -------------------------------------------------------------------------------- /week_7_ecr/data.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import datasets 3 | import pytorch_lightning as pl 4 | 5 | from datasets import load_dataset 6 | from transformers import AutoTokenizer 7 | 8 | 9 | class DataModule(pl.LightningDataModule): 10 | def __init__( 11 | self, 12 | model_name="google/bert_uncased_L-2_H-128_A-2", 13 | batch_size=64, 14 | max_length=128, 15 | ): 16 | super().__init__() 17 | 18 | self.batch_size = batch_size 19 | self.max_length = max_length 20 | self.tokenizer = AutoTokenizer.from_pretrained(model_name) 21 | 22 | def prepare_data(self): 23 | cola_dataset = load_dataset("glue", "cola") 24 | self.train_data = cola_dataset["train"] 25 | self.val_data = cola_dataset["validation"] 26 | 27 | def tokenize_data(self, example): 28 | return self.tokenizer( 29 | example["sentence"], 30 | truncation=True, 31 | padding="max_length", 32 | max_length=self.max_length, 33 | ) 34 | 35 | def setup(self, stage=None): 36 | # we set up only relevant datasets when stage is specified 37 | if stage == "fit" or stage is None: 38 | self.train_data = self.train_data.map(self.tokenize_data, batched=True) 39 | self.train_data.set_format( 40 | type="torch", columns=["input_ids", "attention_mask", "label"] 41 | ) 42 | 43 | self.val_data = self.val_data.map(self.tokenize_data, batched=True) 44 | self.val_data.set_format( 45 | type="torch", 46 | columns=["input_ids", "attention_mask", "label"], 47 | output_all_columns=True, 48 | ) 49 | 50 | def train_dataloader(self): 51 | return torch.utils.data.DataLoader( 52 | self.train_data, batch_size=self.batch_size, shuffle=True 53 | ) 54 | 55 | def val_dataloader(self): 56 | return torch.utils.data.DataLoader( 57 | self.val_data, batch_size=self.batch_size, shuffle=False 58 | ) 59 | 60 | 61 | if __name__ == "__main__": 62 | data_model = DataModule() 63 | data_model.prepare_data() 64 | data_model.setup() 65 | print(next(iter(data_model.train_dataloader()))["input_ids"].shape) 66 | -------------------------------------------------------------------------------- /week_7_ecr/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | services: 3 | prediction_api: 4 | build: . 5 | container_name: "inference_container" 6 | ports: 7 | - "8000:8000" -------------------------------------------------------------------------------- /week_7_ecr/dvcfiles/trained_model.dvc: -------------------------------------------------------------------------------- 1 | wdir: ../models 2 | outs: 3 | - md5: 02f3b0034769ba45d758ad1bb9de33a3 4 | size: 17562590 5 | path: model.onnx 6 | -------------------------------------------------------------------------------- /week_7_ecr/inference.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from model import ColaModel 3 | from data import DataModule 4 | from utils import timing 5 | 6 | 7 | class ColaPredictor: 8 | def __init__(self, model_path): 9 | self.model_path = model_path 10 | self.model = ColaModel.load_from_checkpoint(model_path) 11 | self.model.eval() 12 | self.model.freeze() 13 | self.processor = DataModule() 14 | self.softmax = torch.nn.Softmax(dim=1) 15 | self.lables = ["unacceptable", "acceptable"] 16 | 17 | @timing 18 | def predict(self, text): 19 | inference_sample = {"sentence": text} 20 | processed = self.processor.tokenize_data(inference_sample) 21 | logits = self.model( 22 | torch.tensor([processed["input_ids"]]), 23 | torch.tensor([processed["attention_mask"]]), 24 | ) 25 | scores = self.softmax(logits[0]).tolist()[0] 26 | predictions = [] 27 | for score, label in zip(scores, self.lables): 28 | predictions.append({"label": label, "score": score}) 29 | return predictions 30 | 31 | 32 | if __name__ == "__main__": 33 | sentence = "The boy is sitting on a bench" 34 | predictor = ColaPredictor("./models/best-checkpoint.ckpt") 35 | print(predictor.predict(sentence)) 36 | sentences = ["The boy is sitting on a bench"] * 10 37 | for sentence in sentences: 38 | predictor.predict(sentence) 39 | -------------------------------------------------------------------------------- /week_7_ecr/inference_onnx.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import onnxruntime as ort 3 | from scipy.special import softmax 4 | 5 | from data import DataModule 6 | from utils import timing 7 | 8 | 9 | class ColaONNXPredictor: 10 | def __init__(self, model_path): 11 | self.ort_session = ort.InferenceSession(model_path) 12 | self.processor = DataModule() 13 | self.lables = ["unacceptable", "acceptable"] 14 | 15 | @timing 16 | def predict(self, text): 17 | inference_sample = {"sentence": text} 18 | processed = self.processor.tokenize_data(inference_sample) 19 | 20 | ort_inputs = { 21 | "input_ids": np.expand_dims(processed["input_ids"], axis=0), 22 | "attention_mask": np.expand_dims(processed["attention_mask"], axis=0), 23 | } 24 | ort_outs = self.ort_session.run(None, ort_inputs) 25 | scores = softmax(ort_outs[0])[0] 26 | predictions = [] 27 | for score, label in zip(scores, self.lables): 28 | predictions.append({"label": label, "score": float(score)}) 29 | print(predictions) 30 | return predictions 31 | 32 | 33 | if __name__ == "__main__": 34 | sentence = "The boy is sitting on a bench" 35 | predictor = ColaONNXPredictor("./models/model.onnx") 36 | print(predictor.predict(sentence)) 37 | sentences = ["The boy is sitting on a bench"] * 10 38 | for sentence in sentences: 39 | predictor.predict(sentence) 40 | -------------------------------------------------------------------------------- /week_7_ecr/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import wandb 3 | import hydra 4 | import numpy as np 5 | import pandas as pd 6 | import torchmetrics 7 | import pytorch_lightning as pl 8 | from transformers import AutoModelForSequenceClassification 9 | from omegaconf import OmegaConf, DictConfig 10 | from sklearn.metrics import confusion_matrix 11 | import matplotlib.pyplot as plt 12 | import seaborn as sns 13 | 14 | 15 | class ColaModel(pl.LightningModule): 16 | def __init__(self, model_name="google/bert_uncased_L-2_H-128_A-2", lr=3e-5): 17 | super(ColaModel, self).__init__() 18 | self.save_hyperparameters() 19 | 20 | self.bert = AutoModelForSequenceClassification.from_pretrained( 21 | model_name, num_labels=2 22 | ) 23 | self.num_classes = 2 24 | self.train_accuracy_metric = torchmetrics.Accuracy() 25 | self.val_accuracy_metric = torchmetrics.Accuracy() 26 | self.f1_metric = torchmetrics.F1(num_classes=self.num_classes) 27 | self.precision_macro_metric = torchmetrics.Precision( 28 | average="macro", num_classes=self.num_classes 29 | ) 30 | self.recall_macro_metric = torchmetrics.Recall( 31 | average="macro", num_classes=self.num_classes 32 | ) 33 | self.precision_micro_metric = torchmetrics.Precision(average="micro") 34 | self.recall_micro_metric = torchmetrics.Recall(average="micro") 35 | 36 | def forward(self, input_ids, attention_mask, labels=None): 37 | outputs = self.bert( 38 | input_ids=input_ids, attention_mask=attention_mask, labels=labels 39 | ) 40 | return outputs 41 | 42 | def training_step(self, batch, batch_idx): 43 | outputs = self.forward( 44 | batch["input_ids"], batch["attention_mask"], labels=batch["label"] 45 | ) 46 | # loss = F.cross_entropy(logits, batch["label"]) 47 | preds = torch.argmax(outputs.logits, 1) 48 | train_acc = self.train_accuracy_metric(preds, batch["label"]) 49 | self.log("train/loss", outputs.loss, prog_bar=True, on_epoch=True) 50 | self.log("train/acc", train_acc, prog_bar=True, on_epoch=True) 51 | return outputs.loss 52 | 53 | def validation_step(self, batch, batch_idx): 54 | labels = batch["label"] 55 | outputs = self.forward( 56 | batch["input_ids"], batch["attention_mask"], labels=batch["label"] 57 | ) 58 | preds = torch.argmax(outputs.logits, 1) 59 | 60 | # Metrics 61 | valid_acc = self.val_accuracy_metric(preds, labels) 62 | precision_macro = self.precision_macro_metric(preds, labels) 63 | recall_macro = self.recall_macro_metric(preds, labels) 64 | precision_micro = self.precision_micro_metric(preds, labels) 65 | recall_micro = self.recall_micro_metric(preds, labels) 66 | f1 = self.f1_metric(preds, labels) 67 | 68 | # Logging metrics 69 | self.log("valid/loss", outputs.loss, prog_bar=True, on_step=True) 70 | self.log("valid/acc", valid_acc, prog_bar=True, on_epoch=True) 71 | self.log("valid/precision_macro", precision_macro, prog_bar=True, on_epoch=True) 72 | self.log("valid/recall_macro", recall_macro, prog_bar=True, on_epoch=True) 73 | self.log("valid/precision_micro", precision_micro, prog_bar=True, on_epoch=True) 74 | self.log("valid/recall_micro", recall_micro, prog_bar=True, on_epoch=True) 75 | self.log("valid/f1", f1, prog_bar=True, on_epoch=True) 76 | return {"labels": labels, "logits": outputs.logits} 77 | 78 | def validation_epoch_end(self, outputs): 79 | labels = torch.cat([x["labels"] for x in outputs]) 80 | logits = torch.cat([x["logits"] for x in outputs]) 81 | preds = torch.argmax(logits, 1) 82 | 83 | ## There are multiple ways to track the metrics 84 | # 1. Confusion matrix plotting using inbuilt W&B method 85 | self.logger.experiment.log( 86 | { 87 | "conf": wandb.plot.confusion_matrix( 88 | probs=logits.numpy(), y_true=labels.numpy() 89 | ) 90 | } 91 | ) 92 | 93 | # 2. Confusion Matrix plotting using scikit-learn method 94 | # wandb.log({"cm": wandb.sklearn.plot_confusion_matrix(labels.numpy(), preds)}) 95 | 96 | # 3. Confusion Matric plotting using Seaborn 97 | # data = confusion_matrix(labels.numpy(), preds.numpy()) 98 | # df_cm = pd.DataFrame(data, columns=np.unique(labels), index=np.unique(labels)) 99 | # df_cm.index.name = "Actual" 100 | # df_cm.columns.name = "Predicted" 101 | # plt.figure(figsize=(7, 4)) 102 | # plot = sns.heatmap( 103 | # df_cm, cmap="Blues", annot=True, annot_kws={"size": 16} 104 | # ) # font size 105 | # self.logger.experiment.log({"Confusion Matrix": wandb.Image(plot)}) 106 | 107 | # self.logger.experiment.log( 108 | # {"roc": wandb.plot.roc_curve(labels.numpy(), logits.numpy())} 109 | # ) 110 | 111 | def configure_optimizers(self): 112 | return torch.optim.Adam(self.parameters(), lr=self.hparams["lr"]) 113 | -------------------------------------------------------------------------------- /week_7_ecr/parse_json.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | with open('creds.txt') as f: 4 | data = f.read() 5 | 6 | print(data) 7 | # data = json.loads(data, strict=False) 8 | # print(data) 9 | data = eval(data) 10 | print(data) 11 | 12 | with open('test.json', 'w') as f: 13 | json.dump(data, f) 14 | -------------------------------------------------------------------------------- /week_7_ecr/requirements.txt: -------------------------------------------------------------------------------- 1 | pytorch-lightning==1.2.10 2 | datasets==1.6.2 3 | transformers==4.5.1 4 | scikit-learn==0.24.2 5 | wandb 6 | torchmetrics 7 | matplotlib 8 | seaborn 9 | hydra-core 10 | omegaconf 11 | hydra_colorlog 12 | fastapi 13 | uvicorn 14 | -------------------------------------------------------------------------------- /week_7_ecr/requirements_inference.txt: -------------------------------------------------------------------------------- 1 | pytorch-lightning==1.2.10 2 | datasets==1.6.2 3 | scikit-learn==0.24.2 4 | hydra-core 5 | omegaconf 6 | hydra_colorlog 7 | onnxruntime 8 | fastapi 9 | uvicorn 10 | dvc -------------------------------------------------------------------------------- /week_7_ecr/train.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import hydra 3 | import wandb 4 | import logging 5 | 6 | import pandas as pd 7 | import pytorch_lightning as pl 8 | from omegaconf.omegaconf import OmegaConf 9 | from pytorch_lightning.callbacks import ModelCheckpoint 10 | from pytorch_lightning.callbacks.early_stopping import EarlyStopping 11 | from pytorch_lightning.loggers import WandbLogger 12 | 13 | from data import DataModule 14 | from model import ColaModel 15 | 16 | logger = logging.getLogger(__name__) 17 | 18 | 19 | class SamplesVisualisationLogger(pl.Callback): 20 | def __init__(self, datamodule): 21 | super().__init__() 22 | 23 | self.datamodule = datamodule 24 | 25 | def on_validation_end(self, trainer, pl_module): 26 | val_batch = next(iter(self.datamodule.val_dataloader())) 27 | sentences = val_batch["sentence"] 28 | 29 | outputs = pl_module(val_batch["input_ids"], val_batch["attention_mask"]) 30 | preds = torch.argmax(outputs.logits, 1) 31 | labels = val_batch["label"] 32 | 33 | df = pd.DataFrame( 34 | {"Sentence": sentences, "Label": labels.numpy(), "Predicted": preds.numpy()} 35 | ) 36 | 37 | wrong_df = df[df["Label"] != df["Predicted"]] 38 | trainer.logger.experiment.log( 39 | { 40 | "examples": wandb.Table(dataframe=wrong_df, allow_mixed_types=True), 41 | "global_step": trainer.global_step, 42 | } 43 | ) 44 | 45 | 46 | @hydra.main(config_path="./configs", config_name="config") 47 | def main(cfg): 48 | logger.info(OmegaConf.to_yaml(cfg, resolve=True)) 49 | logger.info(f"Using the model: {cfg.model.name}") 50 | logger.info(f"Using the tokenizer: {cfg.model.tokenizer}") 51 | cola_data = DataModule( 52 | cfg.model.tokenizer, cfg.processing.batch_size, cfg.processing.max_length 53 | ) 54 | cola_model = ColaModel(cfg.model.name) 55 | 56 | root_dir = hydra.utils.get_original_cwd() 57 | checkpoint_callback = ModelCheckpoint( 58 | dirpath=f"{root_dir}/models", 59 | filename="best-checkpoint", 60 | monitor="valid/loss", 61 | mode="min", 62 | ) 63 | 64 | early_stopping_callback = EarlyStopping( 65 | monitor="valid/loss", patience=3, verbose=True, mode="min" 66 | ) 67 | 68 | wandb_logger = WandbLogger(project="MLOps Basics", entity="raviraja") 69 | trainer = pl.Trainer( 70 | max_epochs=cfg.training.max_epochs, 71 | logger=wandb_logger, 72 | callbacks=[checkpoint_callback, SamplesVisualisationLogger(cola_data), early_stopping_callback], 73 | log_every_n_steps=cfg.training.log_every_n_steps, 74 | deterministic=cfg.training.deterministic, 75 | # limit_train_batches=cfg.training.limit_train_batches, 76 | # limit_val_batches=cfg.training.limit_val_batches, 77 | ) 78 | trainer.fit(cola_model, cola_data) 79 | wandb.finish() 80 | 81 | 82 | if __name__ == "__main__": 83 | main() 84 | -------------------------------------------------------------------------------- /week_7_ecr/utils.py: -------------------------------------------------------------------------------- 1 | import time 2 | from functools import wraps 3 | 4 | 5 | def timing(f): 6 | """Decorator for timing functions 7 | Usage: 8 | @timing 9 | def function(a): 10 | pass 11 | """ 12 | 13 | @wraps(f) 14 | def wrapper(*args, **kwargs): 15 | start = time.time() 16 | result = f(*args, **kwargs) 17 | end = time.time() 18 | print("function:%r took: %2.5f sec" % (f.__name__, end - start)) 19 | return result 20 | 21 | return wrapper 22 | -------------------------------------------------------------------------------- /week_8_serverless/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM amazon/aws-lambda-python 2 | 3 | ARG AWS_ACCESS_KEY_ID 4 | ARG AWS_SECRET_ACCESS_KEY 5 | ARG MODEL_DIR=./models 6 | RUN mkdir $MODEL_DIR 7 | 8 | ENV TRANSFORMERS_CACHE=$MODEL_DIR \ 9 | TRANSFORMERS_VERBOSITY=error 10 | 11 | ENV AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \ 12 | AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY 13 | 14 | RUN yum install git -y && yum -y install gcc-c++ 15 | COPY requirements_inference.txt requirements_inference.txt 16 | RUN pip install -r requirements_inference.txt --no-cache-dir 17 | COPY ./ ./ 18 | ENV PYTHONPATH "${PYTHONPATH}:./" 19 | ENV LC_ALL=C.UTF-8 20 | ENV LANG=C.UTF-8 21 | RUN pip install "dvc[s3]" 22 | # configuring remote server in dvc 23 | RUN dvc init --no-scm 24 | RUN dvc remote add -d model-store s3://models-dvc/trained_models/ 25 | 26 | # pulling the trained model 27 | RUN dvc pull dvcfiles/trained_model.dvc 28 | RUN ls 29 | RUN python lambda_handler.py 30 | RUN chmod -R 0755 $MODEL_DIR 31 | CMD [ "lambda_handler.lambda_handler"] -------------------------------------------------------------------------------- /week_8_serverless/README.md: -------------------------------------------------------------------------------- 1 | 2 | **Note: The purpose of the project to explore the libraries and learn how to use them. Not to build a SOTA model.** 3 | 4 | ## Requirements: 5 | 6 | This project uses Python 3.8 7 | 8 | Create a virtual env with the following command: 9 | 10 | ``` 11 | conda create --name project-setup python=3.8 12 | conda activate project-setup 13 | ``` 14 | 15 | Install the requirements: 16 | 17 | ``` 18 | pip install -r requirements.txt 19 | ``` 20 | 21 | ## Running 22 | 23 | ### Training 24 | 25 | After installing the requirements, in order to train the model simply run: 26 | 27 | ``` 28 | python train.py 29 | ``` 30 | 31 | ### Monitoring 32 | 33 | Once the training is completed in the end of the logs you will see something like: 34 | 35 | ``` 36 | wandb: Synced 5 W&B file(s), 4 media file(s), 3 artifact file(s) and 0 other file(s) 37 | wandb: 38 | wandb: Synced proud-mountain-77: https://wandb.ai/raviraja/MLOps%20Basics/runs/3vp1twdc 39 | ``` 40 | 41 | Follow the link to see the wandb dashboard which contains all the plots. 42 | 43 | ### Versioning data 44 | 45 | Refer to the blog: [DVC Configuration](https://www.ravirajag.dev/blog/mlops-dvc) 46 | 47 | ### Exporting model to ONNX 48 | 49 | Once the model is trained, convert the model using the following command: 50 | 51 | ``` 52 | python convert_model_to_onnx.py 53 | ``` 54 | 55 | ### Inference 56 | 57 | #### Inference using standard pytorch 58 | 59 | ``` 60 | python inference.py 61 | ``` 62 | 63 | #### Inference using ONNX Runtime 64 | 65 | ``` 66 | python inference_onnx.py 67 | ``` 68 | 69 | ## S3 & ECR 70 | 71 | Follow the instructions mentioned in the [blog post](https://www.ravirajag.dev/blog/mlops-container-registry) for creating S3 bucket and ECR repository. 72 | 73 | ### Configuring dvc 74 | 75 | ``` 76 | dvc init (this has to be done at root folder) 77 | dvc remote add -d model-store s3://models-dvc/trained_models/ 78 | ``` 79 | 80 | ### AWS credentials 81 | 82 | Create the credentials as mentioned in the [blog post](https://www.ravirajag.dev/blog/mlops-container-registry) 83 | 84 | **Do not share the secrets with others** 85 | 86 | Set the ACCESS key and id values in environment variables. 87 | 88 | ``` 89 | export AWS_ACCESS_KEY_ID= 90 | export AWS_SECRET_ACCESS_KEY= 91 | ``` 92 | 93 | ### Trained model in DVC 94 | 95 | Sdd the trained model(onnx) to dvc using the following command: 96 | 97 | ```shell 98 | cd dvcfiles 99 | dvc add ../models/model.onnx --file trained_model.dvc 100 | ``` 101 | 102 | Push the model to remote storage 103 | 104 | ```shell 105 | dvc push trained_model.dvc 106 | ``` 107 | 108 | ### Docker 109 | 110 | Install the docker using the [instructions here](https://docs.docker.com/engine/install/) 111 | 112 | Build the image using the command 113 | 114 | ```shell 115 | docker build -t mlops-basics:latest . 116 | ``` 117 | 118 | **The default command in dockerfile is modified to support the lambda. If you want to run without lambda use the last weeks dockerfile.** 119 | 120 | Then run the container using the command 121 | 122 | ```shell 123 | docker run -p 8000:8000 --name inference_container mlops-basics:latest 124 | ``` 125 | 126 | (or) 127 | 128 | Build and run the container using the command 129 | 130 | ```shell 131 | docker-compose up 132 | ``` 133 | 134 | ### Pushing the image to ECR 135 | 136 | Follow the instructions mentioned in [blog post](https://www.ravirajag.dev/blog/mlops-container-registry) for creating ECR repository. 137 | 138 | - Authenticating docker client to ECR 139 | 140 | ``` 141 | aws ecr get-login-password --region us-west-2 | docker login --username AWS --password-stdin 246113150184.dkr.ecr.us-west-2.amazonaws.com 142 | ``` 143 | 144 | - Tagging the image 145 | 146 | ``` 147 | docker tag mlops-basics:latest 246113150184.dkr.ecr.us-west-2.amazonaws.com/mlops-basics:latest 148 | ``` 149 | 150 | - Pushing the image 151 | 152 | ``` 153 | docker push 246113150184.dkr.ecr.us-west-2.amazonaws.com/mlops-basics:latest 154 | ``` 155 | 156 | Refer to `.github/workflows/build_docker_image.yaml` file for automatically creating the docker image with trained model and pushing it to ECR. 157 | 158 | ### Serveless - Lambda 159 | 160 | Refer to the [Blog Post here](https://www.ravirajag.dev/blog/mlops-serverless) for detailed instructions on configuring lambda with the docker image and invoking it using a API. 161 | 162 | 163 | ### Running notebooks 164 | 165 | I am using [Jupyter lab](https://jupyter.org/install) to run the notebooks. 166 | 167 | Since I am using a virtualenv, when I run the command `jupyter lab` it might or might not use the virtualenv. 168 | 169 | To make sure to use the virutalenv, run the following commands before running `jupyter lab` 170 | 171 | ``` 172 | conda install ipykernel 173 | python -m ipykernel install --user --name project-setup 174 | pip install ipywidgets 175 | ``` -------------------------------------------------------------------------------- /week_8_serverless/app.py: -------------------------------------------------------------------------------- 1 | from fastapi import FastAPI 2 | from inference_onnx import ColaONNXPredictor 3 | app = FastAPI(title="MLOps Basics App") 4 | 5 | predictor = ColaONNXPredictor("./models/model.onnx") 6 | 7 | @app.get("/") 8 | async def home_page(): 9 | return "

Sample prediction API

" 10 | 11 | 12 | @app.get("/predict") 13 | async def get_prediction(text: str): 14 | result = predictor.predict(text) 15 | return result -------------------------------------------------------------------------------- /week_8_serverless/configs/config.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - model: default 3 | - processing: default 4 | - training: default 5 | - override hydra/job_logging: colorlog 6 | - override hydra/hydra_logging: colorlog -------------------------------------------------------------------------------- /week_8_serverless/configs/model/default.yaml: -------------------------------------------------------------------------------- 1 | name: google/bert_uncased_L-2_H-128_A-2 # model used for training the classifier 2 | tokenizer: google/bert_uncased_L-2_H-128_A-2 # tokenizer used for processing the data -------------------------------------------------------------------------------- /week_8_serverless/configs/processing/default.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 64 2 | max_length: 128 -------------------------------------------------------------------------------- /week_8_serverless/configs/training/default.yaml: -------------------------------------------------------------------------------- 1 | max_epochs: 1 2 | log_every_n_steps: 10 3 | deterministic: true 4 | limit_train_batches: 0.25 5 | limit_val_batches: ${training.limit_train_batches} -------------------------------------------------------------------------------- /week_8_serverless/convert_model_to_onnx.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import hydra 3 | import logging 4 | 5 | from omegaconf.omegaconf import OmegaConf 6 | 7 | from model import ColaModel 8 | from data import DataModule 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | @hydra.main(config_path="./configs", config_name="config") 14 | def convert_model(cfg): 15 | root_dir = hydra.utils.get_original_cwd() 16 | model_path = f"{root_dir}/models/best-checkpoint.ckpt" 17 | logger.info(f"Loading pre-trained model from: {model_path}") 18 | cola_model = ColaModel.load_from_checkpoint(model_path) 19 | 20 | data_model = DataModule( 21 | cfg.model.tokenizer, cfg.processing.batch_size, cfg.processing.max_length 22 | ) 23 | data_model.prepare_data() 24 | data_model.setup() 25 | input_batch = next(iter(data_model.train_dataloader())) 26 | input_sample = { 27 | "input_ids": input_batch["input_ids"][0].unsqueeze(0), 28 | "attention_mask": input_batch["attention_mask"][0].unsqueeze(0), 29 | } 30 | 31 | # Export the model 32 | logger.info(f"Converting the model into ONNX format") 33 | torch.onnx.export( 34 | cola_model, # model being run 35 | ( 36 | input_sample["input_ids"], 37 | input_sample["attention_mask"], 38 | ), # model input (or a tuple for multiple inputs) 39 | f"{root_dir}/models/model.onnx", # where to save the model (can be a file or file-like object) 40 | export_params=True, 41 | opset_version=10, 42 | input_names=["input_ids", "attention_mask"], # the model's input names 43 | output_names=["output"], # the model's output names 44 | dynamic_axes={ 45 | "input_ids": {0: "batch_size"}, # variable length axes 46 | "attention_mask": {0: "batch_size"}, 47 | "output": {0: "batch_size"}, 48 | }, 49 | ) 50 | 51 | logger.info( 52 | f"Model converted successfully. ONNX format model is at: {root_dir}/models/model.onnx" 53 | ) 54 | 55 | 56 | if __name__ == "__main__": 57 | convert_model() 58 | -------------------------------------------------------------------------------- /week_8_serverless/data.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import datasets 3 | import pytorch_lightning as pl 4 | 5 | from datasets import load_dataset 6 | from transformers import AutoTokenizer 7 | 8 | 9 | class DataModule(pl.LightningDataModule): 10 | def __init__( 11 | self, 12 | model_name="google/bert_uncased_L-2_H-128_A-2", 13 | batch_size=64, 14 | max_length=128, 15 | ): 16 | super().__init__() 17 | 18 | self.batch_size = batch_size 19 | self.max_length = max_length 20 | self.tokenizer = AutoTokenizer.from_pretrained(model_name) 21 | 22 | def prepare_data(self): 23 | cola_dataset = load_dataset("glue", "cola") 24 | self.train_data = cola_dataset["train"] 25 | self.val_data = cola_dataset["validation"] 26 | 27 | def tokenize_data(self, example): 28 | return self.tokenizer( 29 | example["sentence"], 30 | truncation=True, 31 | padding="max_length", 32 | max_length=self.max_length, 33 | ) 34 | 35 | def setup(self, stage=None): 36 | # we set up only relevant datasets when stage is specified 37 | if stage == "fit" or stage is None: 38 | self.train_data = self.train_data.map(self.tokenize_data, batched=True) 39 | self.train_data.set_format( 40 | type="torch", columns=["input_ids", "attention_mask", "label"] 41 | ) 42 | 43 | self.val_data = self.val_data.map(self.tokenize_data, batched=True) 44 | self.val_data.set_format( 45 | type="torch", 46 | columns=["input_ids", "attention_mask", "label"], 47 | output_all_columns=True, 48 | ) 49 | 50 | def train_dataloader(self): 51 | return torch.utils.data.DataLoader( 52 | self.train_data, batch_size=self.batch_size, shuffle=True 53 | ) 54 | 55 | def val_dataloader(self): 56 | return torch.utils.data.DataLoader( 57 | self.val_data, batch_size=self.batch_size, shuffle=False 58 | ) 59 | 60 | 61 | if __name__ == "__main__": 62 | data_model = DataModule() 63 | data_model.prepare_data() 64 | data_model.setup() 65 | print(next(iter(data_model.train_dataloader()))["input_ids"].shape) 66 | -------------------------------------------------------------------------------- /week_8_serverless/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | services: 3 | prediction_api: 4 | build: . 5 | container_name: "inference_container" 6 | ports: 7 | - "8000:8000" -------------------------------------------------------------------------------- /week_8_serverless/dvcfiles/trained_model.dvc: -------------------------------------------------------------------------------- 1 | wdir: ../models 2 | outs: 3 | - md5: 02f3b0034769ba45d758ad1bb9de33a3 4 | size: 17562590 5 | path: model.onnx 6 | -------------------------------------------------------------------------------- /week_8_serverless/inference.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from model import ColaModel 3 | from data import DataModule 4 | from utils import timing 5 | 6 | 7 | class ColaPredictor: 8 | def __init__(self, model_path): 9 | self.model_path = model_path 10 | self.model = ColaModel.load_from_checkpoint(model_path) 11 | self.model.eval() 12 | self.model.freeze() 13 | self.processor = DataModule() 14 | self.softmax = torch.nn.Softmax(dim=1) 15 | self.lables = ["unacceptable", "acceptable"] 16 | 17 | @timing 18 | def predict(self, text): 19 | inference_sample = {"sentence": text} 20 | processed = self.processor.tokenize_data(inference_sample) 21 | logits = self.model( 22 | torch.tensor([processed["input_ids"]]), 23 | torch.tensor([processed["attention_mask"]]), 24 | ) 25 | scores = self.softmax(logits[0]).tolist()[0] 26 | predictions = [] 27 | for score, label in zip(scores, self.lables): 28 | predictions.append({"label": label, "score": score}) 29 | return predictions 30 | 31 | 32 | if __name__ == "__main__": 33 | sentence = "The boy is sitting on a bench" 34 | predictor = ColaPredictor("./models/best-checkpoint.ckpt") 35 | print(predictor.predict(sentence)) 36 | sentences = ["The boy is sitting on a bench"] * 10 37 | for sentence in sentences: 38 | predictor.predict(sentence) 39 | -------------------------------------------------------------------------------- /week_8_serverless/inference_onnx.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import onnxruntime as ort 3 | from scipy.special import softmax 4 | 5 | from data import DataModule 6 | from utils import timing 7 | 8 | 9 | class ColaONNXPredictor: 10 | def __init__(self, model_path): 11 | self.ort_session = ort.InferenceSession(model_path) 12 | self.processor = DataModule() 13 | self.lables = ["unacceptable", "acceptable"] 14 | 15 | @timing 16 | def predict(self, text): 17 | inference_sample = {"sentence": text} 18 | processed = self.processor.tokenize_data(inference_sample) 19 | 20 | ort_inputs = { 21 | "input_ids": np.expand_dims(processed["input_ids"], axis=0), 22 | "attention_mask": np.expand_dims(processed["attention_mask"], axis=0), 23 | } 24 | ort_outs = self.ort_session.run(None, ort_inputs) 25 | scores = softmax(ort_outs[0])[0] 26 | predictions = [] 27 | for score, label in zip(scores, self.lables): 28 | predictions.append({"label": label, "score": float(score)}) 29 | print(predictions) 30 | return predictions 31 | 32 | 33 | if __name__ == "__main__": 34 | sentence = "The boy is sitting on a bench" 35 | predictor = ColaONNXPredictor("./models/model.onnx") 36 | print(predictor.predict(sentence)) 37 | sentences = ["The boy is sitting on a bench"] * 10 38 | for sentence in sentences: 39 | predictor.predict(sentence) 40 | -------------------------------------------------------------------------------- /week_8_serverless/lambda_handler.py: -------------------------------------------------------------------------------- 1 | """ 2 | Lambda wrapper 3 | """ 4 | 5 | import json 6 | from inference_onnx import ColaONNXPredictor 7 | 8 | inferencing_instance = ColaONNXPredictor("./models/model.onnx") 9 | 10 | def lambda_handler(event, context): 11 | """ 12 | Lambda function handler for predicting linguistic acceptability of the given sentence 13 | """ 14 | 15 | if "resource" in event.keys(): 16 | body = event["body"] 17 | body = json.loads(body) 18 | print(f"Got the input: {body['sentence']}") 19 | response = inferencing_instance.predict(body["sentence"]) 20 | return { 21 | "statusCode": 200, 22 | "headers": {}, 23 | "body": json.dumps(response) 24 | } 25 | else: 26 | return inferencing_instance.predict(event["sentence"]) 27 | 28 | if __name__ == "__main__": 29 | test = {"sentence": "this is a sample sentence"} 30 | lambda_handler(test, None) 31 | -------------------------------------------------------------------------------- /week_8_serverless/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import wandb 3 | import hydra 4 | import numpy as np 5 | import pandas as pd 6 | import torchmetrics 7 | import pytorch_lightning as pl 8 | from transformers import AutoModelForSequenceClassification 9 | from omegaconf import OmegaConf, DictConfig 10 | from sklearn.metrics import confusion_matrix 11 | import matplotlib.pyplot as plt 12 | import seaborn as sns 13 | 14 | 15 | class ColaModel(pl.LightningModule): 16 | def __init__(self, model_name="google/bert_uncased_L-2_H-128_A-2", lr=3e-5): 17 | super(ColaModel, self).__init__() 18 | self.save_hyperparameters() 19 | 20 | self.bert = AutoModelForSequenceClassification.from_pretrained( 21 | model_name, num_labels=2 22 | ) 23 | self.num_classes = 2 24 | self.train_accuracy_metric = torchmetrics.Accuracy() 25 | self.val_accuracy_metric = torchmetrics.Accuracy() 26 | self.f1_metric = torchmetrics.F1(num_classes=self.num_classes) 27 | self.precision_macro_metric = torchmetrics.Precision( 28 | average="macro", num_classes=self.num_classes 29 | ) 30 | self.recall_macro_metric = torchmetrics.Recall( 31 | average="macro", num_classes=self.num_classes 32 | ) 33 | self.precision_micro_metric = torchmetrics.Precision(average="micro") 34 | self.recall_micro_metric = torchmetrics.Recall(average="micro") 35 | 36 | def forward(self, input_ids, attention_mask, labels=None): 37 | outputs = self.bert( 38 | input_ids=input_ids, attention_mask=attention_mask, labels=labels 39 | ) 40 | return outputs 41 | 42 | def training_step(self, batch, batch_idx): 43 | outputs = self.forward( 44 | batch["input_ids"], batch["attention_mask"], labels=batch["label"] 45 | ) 46 | # loss = F.cross_entropy(logits, batch["label"]) 47 | preds = torch.argmax(outputs.logits, 1) 48 | train_acc = self.train_accuracy_metric(preds, batch["label"]) 49 | self.log("train/loss", outputs.loss, prog_bar=True, on_epoch=True) 50 | self.log("train/acc", train_acc, prog_bar=True, on_epoch=True) 51 | return outputs.loss 52 | 53 | def validation_step(self, batch, batch_idx): 54 | labels = batch["label"] 55 | outputs = self.forward( 56 | batch["input_ids"], batch["attention_mask"], labels=batch["label"] 57 | ) 58 | preds = torch.argmax(outputs.logits, 1) 59 | 60 | # Metrics 61 | valid_acc = self.val_accuracy_metric(preds, labels) 62 | precision_macro = self.precision_macro_metric(preds, labels) 63 | recall_macro = self.recall_macro_metric(preds, labels) 64 | precision_micro = self.precision_micro_metric(preds, labels) 65 | recall_micro = self.recall_micro_metric(preds, labels) 66 | f1 = self.f1_metric(preds, labels) 67 | 68 | # Logging metrics 69 | self.log("valid/loss", outputs.loss, prog_bar=True, on_step=True) 70 | self.log("valid/acc", valid_acc, prog_bar=True, on_epoch=True) 71 | self.log("valid/precision_macro", precision_macro, prog_bar=True, on_epoch=True) 72 | self.log("valid/recall_macro", recall_macro, prog_bar=True, on_epoch=True) 73 | self.log("valid/precision_micro", precision_micro, prog_bar=True, on_epoch=True) 74 | self.log("valid/recall_micro", recall_micro, prog_bar=True, on_epoch=True) 75 | self.log("valid/f1", f1, prog_bar=True, on_epoch=True) 76 | return {"labels": labels, "logits": outputs.logits} 77 | 78 | def validation_epoch_end(self, outputs): 79 | labels = torch.cat([x["labels"] for x in outputs]) 80 | logits = torch.cat([x["logits"] for x in outputs]) 81 | preds = torch.argmax(logits, 1) 82 | 83 | ## There are multiple ways to track the metrics 84 | # 1. Confusion matrix plotting using inbuilt W&B method 85 | self.logger.experiment.log( 86 | { 87 | "conf": wandb.plot.confusion_matrix( 88 | probs=logits.numpy(), y_true=labels.numpy() 89 | ) 90 | } 91 | ) 92 | 93 | # 2. Confusion Matrix plotting using scikit-learn method 94 | # wandb.log({"cm": wandb.sklearn.plot_confusion_matrix(labels.numpy(), preds)}) 95 | 96 | # 3. Confusion Matric plotting using Seaborn 97 | # data = confusion_matrix(labels.numpy(), preds.numpy()) 98 | # df_cm = pd.DataFrame(data, columns=np.unique(labels), index=np.unique(labels)) 99 | # df_cm.index.name = "Actual" 100 | # df_cm.columns.name = "Predicted" 101 | # plt.figure(figsize=(7, 4)) 102 | # plot = sns.heatmap( 103 | # df_cm, cmap="Blues", annot=True, annot_kws={"size": 16} 104 | # ) # font size 105 | # self.logger.experiment.log({"Confusion Matrix": wandb.Image(plot)}) 106 | 107 | # self.logger.experiment.log( 108 | # {"roc": wandb.plot.roc_curve(labels.numpy(), logits.numpy())} 109 | # ) 110 | 111 | def configure_optimizers(self): 112 | return torch.optim.Adam(self.parameters(), lr=self.hparams["lr"]) 113 | -------------------------------------------------------------------------------- /week_8_serverless/parse_json.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | with open('creds.txt') as f: 4 | data = f.read() 5 | 6 | print(data) 7 | # data = json.loads(data, strict=False) 8 | # print(data) 9 | data = eval(data) 10 | print(data) 11 | 12 | with open('test.json', 'w') as f: 13 | json.dump(data, f) 14 | -------------------------------------------------------------------------------- /week_8_serverless/requirements.txt: -------------------------------------------------------------------------------- 1 | pytorch-lightning==1.2.10 2 | datasets==1.6.2 3 | transformers==4.5.1 4 | scikit-learn==0.24.2 5 | wandb 6 | torchmetrics 7 | matplotlib 8 | seaborn 9 | hydra-core 10 | omegaconf 11 | hydra_colorlog 12 | fastapi 13 | uvicorn 14 | -------------------------------------------------------------------------------- /week_8_serverless/requirements_inference.txt: -------------------------------------------------------------------------------- 1 | pytorch-lightning==1.2.10 2 | datasets==1.6.2 3 | scikit-learn==0.24.2 4 | hydra-core 5 | omegaconf 6 | hydra_colorlog 7 | onnxruntime 8 | fastapi 9 | uvicorn 10 | dvc 11 | tokenizers==0.10.2 12 | transformers==4.5.1 -------------------------------------------------------------------------------- /week_8_serverless/train.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import hydra 3 | import wandb 4 | import logging 5 | 6 | import pandas as pd 7 | import pytorch_lightning as pl 8 | from omegaconf.omegaconf import OmegaConf 9 | from pytorch_lightning.callbacks import ModelCheckpoint 10 | from pytorch_lightning.callbacks.early_stopping import EarlyStopping 11 | from pytorch_lightning.loggers import WandbLogger 12 | 13 | from data import DataModule 14 | from model import ColaModel 15 | 16 | logger = logging.getLogger(__name__) 17 | 18 | 19 | class SamplesVisualisationLogger(pl.Callback): 20 | def __init__(self, datamodule): 21 | super().__init__() 22 | 23 | self.datamodule = datamodule 24 | 25 | def on_validation_end(self, trainer, pl_module): 26 | val_batch = next(iter(self.datamodule.val_dataloader())) 27 | sentences = val_batch["sentence"] 28 | 29 | outputs = pl_module(val_batch["input_ids"], val_batch["attention_mask"]) 30 | preds = torch.argmax(outputs.logits, 1) 31 | labels = val_batch["label"] 32 | 33 | df = pd.DataFrame( 34 | {"Sentence": sentences, "Label": labels.numpy(), "Predicted": preds.numpy()} 35 | ) 36 | 37 | wrong_df = df[df["Label"] != df["Predicted"]] 38 | trainer.logger.experiment.log( 39 | { 40 | "examples": wandb.Table(dataframe=wrong_df, allow_mixed_types=True), 41 | "global_step": trainer.global_step, 42 | } 43 | ) 44 | 45 | 46 | @hydra.main(config_path="./configs", config_name="config") 47 | def main(cfg): 48 | logger.info(OmegaConf.to_yaml(cfg, resolve=True)) 49 | logger.info(f"Using the model: {cfg.model.name}") 50 | logger.info(f"Using the tokenizer: {cfg.model.tokenizer}") 51 | cola_data = DataModule( 52 | cfg.model.tokenizer, cfg.processing.batch_size, cfg.processing.max_length 53 | ) 54 | cola_model = ColaModel(cfg.model.name) 55 | 56 | root_dir = hydra.utils.get_original_cwd() 57 | checkpoint_callback = ModelCheckpoint( 58 | dirpath=f"{root_dir}/models", 59 | filename="best-checkpoint", 60 | monitor="valid/loss", 61 | mode="min", 62 | ) 63 | 64 | early_stopping_callback = EarlyStopping( 65 | monitor="valid/loss", patience=3, verbose=True, mode="min" 66 | ) 67 | 68 | wandb_logger = WandbLogger(project="MLOps Basics", entity="raviraja") 69 | trainer = pl.Trainer( 70 | max_epochs=cfg.training.max_epochs, 71 | logger=wandb_logger, 72 | callbacks=[checkpoint_callback, SamplesVisualisationLogger(cola_data), early_stopping_callback], 73 | log_every_n_steps=cfg.training.log_every_n_steps, 74 | deterministic=cfg.training.deterministic, 75 | # limit_train_batches=cfg.training.limit_train_batches, 76 | # limit_val_batches=cfg.training.limit_val_batches, 77 | ) 78 | trainer.fit(cola_model, cola_data) 79 | wandb.finish() 80 | 81 | 82 | if __name__ == "__main__": 83 | main() 84 | -------------------------------------------------------------------------------- /week_8_serverless/utils.py: -------------------------------------------------------------------------------- 1 | import time 2 | from functools import wraps 3 | 4 | 5 | def timing(f): 6 | """Decorator for timing functions 7 | Usage: 8 | @timing 9 | def function(a): 10 | pass 11 | """ 12 | 13 | @wraps(f) 14 | def wrapper(*args, **kwargs): 15 | start = time.time() 16 | result = f(*args, **kwargs) 17 | end = time.time() 18 | print("function:%r took: %2.5f sec" % (f.__name__, end - start)) 19 | return result 20 | 21 | return wrapper 22 | -------------------------------------------------------------------------------- /week_9_monitoring/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM amazon/aws-lambda-python 2 | 3 | ARG AWS_ACCESS_KEY_ID 4 | ARG AWS_SECRET_ACCESS_KEY 5 | ARG MODEL_DIR=./models 6 | RUN mkdir $MODEL_DIR 7 | 8 | ENV TRANSFORMERS_CACHE=$MODEL_DIR \ 9 | TRANSFORMERS_VERBOSITY=error 10 | 11 | ENV AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \ 12 | AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY 13 | 14 | RUN yum install git -y && yum -y install gcc-c++ 15 | COPY requirements_inference.txt requirements_inference.txt 16 | RUN pip install -r requirements_inference.txt --no-cache-dir 17 | COPY ./ ./ 18 | ENV PYTHONPATH "${PYTHONPATH}:./" 19 | ENV LC_ALL=C.UTF-8 20 | ENV LANG=C.UTF-8 21 | RUN pip install "dvc[s3]" 22 | # configuring remote server in dvc 23 | RUN dvc init --no-scm 24 | RUN dvc remote add -d model-store s3://models-dvc/trained_models/ 25 | 26 | # pulling the trained model 27 | RUN dvc pull dvcfiles/trained_model.dvc 28 | RUN ls 29 | RUN python lambda_handler.py 30 | RUN chmod -R 0755 $MODEL_DIR 31 | CMD [ "lambda_handler.lambda_handler"] -------------------------------------------------------------------------------- /week_9_monitoring/README.md: -------------------------------------------------------------------------------- 1 | 2 | **Note: The purpose of the project to explore the libraries and learn how to use them. Not to build a SOTA model.** 3 | 4 | ## Requirements: 5 | 6 | This project uses Python 3.8 7 | 8 | Create a virtual env with the following command: 9 | 10 | ``` 11 | conda create --name project-setup python=3.8 12 | conda activate project-setup 13 | ``` 14 | 15 | Install the requirements: 16 | 17 | ``` 18 | pip install -r requirements.txt 19 | ``` 20 | 21 | ## Running 22 | 23 | ### Training 24 | 25 | After installing the requirements, in order to train the model simply run: 26 | 27 | ``` 28 | python train.py 29 | ``` 30 | 31 | ### Monitoring 32 | 33 | Once the training is completed in the end of the logs you will see something like: 34 | 35 | ``` 36 | wandb: Synced 5 W&B file(s), 4 media file(s), 3 artifact file(s) and 0 other file(s) 37 | wandb: 38 | wandb: Synced proud-mountain-77: https://wandb.ai/raviraja/MLOps%20Basics/runs/3vp1twdc 39 | ``` 40 | 41 | Follow the link to see the wandb dashboard which contains all the plots. 42 | 43 | ### Versioning data 44 | 45 | Refer to the blog: [DVC Configuration](https://www.ravirajag.dev/blog/mlops-dvc) 46 | 47 | ### Exporting model to ONNX 48 | 49 | Once the model is trained, convert the model using the following command: 50 | 51 | ``` 52 | python convert_model_to_onnx.py 53 | ``` 54 | 55 | ### Inference 56 | 57 | #### Inference using standard pytorch 58 | 59 | ``` 60 | python inference.py 61 | ``` 62 | 63 | #### Inference using ONNX Runtime 64 | 65 | ``` 66 | python inference_onnx.py 67 | ``` 68 | 69 | ## S3 & ECR 70 | 71 | Follow the instructions mentioned in the [blog post](https://www.ravirajag.dev/blog/mlops-container-registry) for creating S3 bucket and ECR repository. 72 | 73 | ### Configuring dvc 74 | 75 | ``` 76 | dvc init (this has to be done at root folder) 77 | dvc remote add -d model-store s3://models-dvc/trained_models/ 78 | ``` 79 | 80 | ### AWS credentials 81 | 82 | Create the credentials as mentioned in the [blog post](https://www.ravirajag.dev/blog/mlops-container-registry) 83 | 84 | **Do not share the secrets with others** 85 | 86 | Set the ACCESS key and id values in environment variables. 87 | 88 | ``` 89 | export AWS_ACCESS_KEY_ID= 90 | export AWS_SECRET_ACCESS_KEY= 91 | ``` 92 | 93 | ### Trained model in DVC 94 | 95 | Sdd the trained model(onnx) to dvc using the following command: 96 | 97 | ```shell 98 | cd dvcfiles 99 | dvc add ../models/model.onnx --file trained_model.dvc 100 | ``` 101 | 102 | Push the model to remote storage 103 | 104 | ```shell 105 | dvc push trained_model.dvc 106 | ``` 107 | 108 | ### Docker 109 | 110 | Install the docker using the [instructions here](https://docs.docker.com/engine/install/) 111 | 112 | Build the image using the command 113 | 114 | ```shell 115 | docker build -t mlops-basics:latest . 116 | ``` 117 | 118 | **The default command in dockerfile is modified to support the lambda. If you want to run without lambda use the last weeks dockerfile.** 119 | 120 | Then run the container using the command 121 | 122 | ```shell 123 | docker run -p 8000:8000 --name inference_container mlops-basics:latest 124 | ``` 125 | 126 | (or) 127 | 128 | Build and run the container using the command 129 | 130 | ```shell 131 | docker-compose up 132 | ``` 133 | 134 | ### Pushing the image to ECR 135 | 136 | Follow the instructions mentioned in [blog post](https://www.ravirajag.dev/blog/mlops-container-registry) for creating ECR repository. 137 | 138 | - Authenticating docker client to ECR 139 | 140 | ``` 141 | aws ecr get-login-password --region us-west-2 | docker login --username AWS --password-stdin 246113150184.dkr.ecr.us-west-2.amazonaws.com 142 | ``` 143 | 144 | - Tagging the image 145 | 146 | ``` 147 | docker tag mlops-basics:latest 246113150184.dkr.ecr.us-west-2.amazonaws.com/mlops-basics:latest 148 | ``` 149 | 150 | - Pushing the image 151 | 152 | ``` 153 | docker push 246113150184.dkr.ecr.us-west-2.amazonaws.com/mlops-basics:latest 154 | ``` 155 | 156 | Refer to `.github/workflows/build_docker_image.yaml` file for automatically creating the docker image with trained model and pushing it to ECR. 157 | 158 | ### Serveless - Lambda 159 | 160 | Refer to the [Blog Post here](https://www.ravirajag.dev/blog/mlops-serverless) for detailed instructions on configuring lambda with the docker image and invoking it using a API. 161 | 162 | ### Monitoring - Kibana 163 | 164 | Refer to the [Blog Post here](https://www.ravirajag.dev/blog/mlops-monitoring) for detailed instructions on configuring kibana using elasticsarch cluster and integrating with cloudwatch logs. 165 | 166 | 167 | ### Running notebooks 168 | 169 | I am using [Jupyter lab](https://jupyter.org/install) to run the notebooks. 170 | 171 | Since I am using a virtualenv, when I run the command `jupyter lab` it might or might not use the virtualenv. 172 | 173 | To make sure to use the virutalenv, run the following commands before running `jupyter lab` 174 | 175 | ``` 176 | conda install ipykernel 177 | python -m ipykernel install --user --name project-setup 178 | pip install ipywidgets 179 | ``` -------------------------------------------------------------------------------- /week_9_monitoring/app.py: -------------------------------------------------------------------------------- 1 | from fastapi import FastAPI 2 | from inference_onnx import ColaONNXPredictor 3 | app = FastAPI(title="MLOps Basics App") 4 | 5 | predictor = ColaONNXPredictor("./models/model.onnx") 6 | 7 | @app.get("/") 8 | async def home_page(): 9 | return "

Sample prediction API

" 10 | 11 | 12 | @app.get("/predict") 13 | async def get_prediction(text: str): 14 | result = predictor.predict(text) 15 | return result -------------------------------------------------------------------------------- /week_9_monitoring/configs/config.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - model: default 3 | - processing: default 4 | - training: default 5 | - override hydra/job_logging: colorlog 6 | - override hydra/hydra_logging: colorlog -------------------------------------------------------------------------------- /week_9_monitoring/configs/model/default.yaml: -------------------------------------------------------------------------------- 1 | name: google/bert_uncased_L-2_H-128_A-2 # model used for training the classifier 2 | tokenizer: google/bert_uncased_L-2_H-128_A-2 # tokenizer used for processing the data -------------------------------------------------------------------------------- /week_9_monitoring/configs/processing/default.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 64 2 | max_length: 128 -------------------------------------------------------------------------------- /week_9_monitoring/configs/training/default.yaml: -------------------------------------------------------------------------------- 1 | max_epochs: 1 2 | log_every_n_steps: 10 3 | deterministic: true 4 | limit_train_batches: 0.25 5 | limit_val_batches: ${training.limit_train_batches} -------------------------------------------------------------------------------- /week_9_monitoring/convert_model_to_onnx.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import hydra 3 | import logging 4 | 5 | from omegaconf.omegaconf import OmegaConf 6 | 7 | from model import ColaModel 8 | from data import DataModule 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | @hydra.main(config_path="./configs", config_name="config") 14 | def convert_model(cfg): 15 | root_dir = hydra.utils.get_original_cwd() 16 | model_path = f"{root_dir}/models/best-checkpoint.ckpt" 17 | logger.info(f"Loading pre-trained model from: {model_path}") 18 | cola_model = ColaModel.load_from_checkpoint(model_path) 19 | 20 | data_model = DataModule( 21 | cfg.model.tokenizer, cfg.processing.batch_size, cfg.processing.max_length 22 | ) 23 | data_model.prepare_data() 24 | data_model.setup() 25 | input_batch = next(iter(data_model.train_dataloader())) 26 | input_sample = { 27 | "input_ids": input_batch["input_ids"][0].unsqueeze(0), 28 | "attention_mask": input_batch["attention_mask"][0].unsqueeze(0), 29 | } 30 | 31 | # Export the model 32 | logger.info(f"Converting the model into ONNX format") 33 | torch.onnx.export( 34 | cola_model, # model being run 35 | ( 36 | input_sample["input_ids"], 37 | input_sample["attention_mask"], 38 | ), # model input (or a tuple for multiple inputs) 39 | f"{root_dir}/models/model.onnx", # where to save the model (can be a file or file-like object) 40 | export_params=True, 41 | opset_version=10, 42 | input_names=["input_ids", "attention_mask"], # the model's input names 43 | output_names=["output"], # the model's output names 44 | dynamic_axes={ 45 | "input_ids": {0: "batch_size"}, # variable length axes 46 | "attention_mask": {0: "batch_size"}, 47 | "output": {0: "batch_size"}, 48 | }, 49 | ) 50 | 51 | logger.info( 52 | f"Model converted successfully. ONNX format model is at: {root_dir}/models/model.onnx" 53 | ) 54 | 55 | 56 | if __name__ == "__main__": 57 | convert_model() 58 | -------------------------------------------------------------------------------- /week_9_monitoring/data.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import datasets 3 | import pytorch_lightning as pl 4 | 5 | from datasets import load_dataset 6 | from transformers import AutoTokenizer 7 | 8 | 9 | class DataModule(pl.LightningDataModule): 10 | def __init__( 11 | self, 12 | model_name="google/bert_uncased_L-2_H-128_A-2", 13 | batch_size=64, 14 | max_length=128, 15 | ): 16 | super().__init__() 17 | 18 | self.batch_size = batch_size 19 | self.max_length = max_length 20 | self.tokenizer = AutoTokenizer.from_pretrained(model_name) 21 | 22 | def prepare_data(self): 23 | cola_dataset = load_dataset("glue", "cola") 24 | self.train_data = cola_dataset["train"] 25 | self.val_data = cola_dataset["validation"] 26 | 27 | def tokenize_data(self, example): 28 | return self.tokenizer( 29 | example["sentence"], 30 | truncation=True, 31 | padding="max_length", 32 | max_length=self.max_length, 33 | ) 34 | 35 | def setup(self, stage=None): 36 | # we set up only relevant datasets when stage is specified 37 | if stage == "fit" or stage is None: 38 | self.train_data = self.train_data.map(self.tokenize_data, batched=True) 39 | self.train_data.set_format( 40 | type="torch", columns=["input_ids", "attention_mask", "label"] 41 | ) 42 | 43 | self.val_data = self.val_data.map(self.tokenize_data, batched=True) 44 | self.val_data.set_format( 45 | type="torch", 46 | columns=["input_ids", "attention_mask", "label"], 47 | output_all_columns=True, 48 | ) 49 | 50 | def train_dataloader(self): 51 | return torch.utils.data.DataLoader( 52 | self.train_data, batch_size=self.batch_size, shuffle=True 53 | ) 54 | 55 | def val_dataloader(self): 56 | return torch.utils.data.DataLoader( 57 | self.val_data, batch_size=self.batch_size, shuffle=False 58 | ) 59 | 60 | 61 | if __name__ == "__main__": 62 | data_model = DataModule() 63 | data_model.prepare_data() 64 | data_model.setup() 65 | print(next(iter(data_model.train_dataloader()))["input_ids"].shape) 66 | -------------------------------------------------------------------------------- /week_9_monitoring/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | services: 3 | prediction_api: 4 | build: . 5 | container_name: "inference_container" 6 | ports: 7 | - "8000:8000" -------------------------------------------------------------------------------- /week_9_monitoring/dvcfiles/trained_model.dvc: -------------------------------------------------------------------------------- 1 | wdir: ../models 2 | outs: 3 | - md5: 02f3b0034769ba45d758ad1bb9de33a3 4 | size: 17562590 5 | path: model.onnx 6 | -------------------------------------------------------------------------------- /week_9_monitoring/inference.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from model import ColaModel 3 | from data import DataModule 4 | from utils import timing 5 | 6 | 7 | class ColaPredictor: 8 | def __init__(self, model_path): 9 | self.model_path = model_path 10 | self.model = ColaModel.load_from_checkpoint(model_path) 11 | self.model.eval() 12 | self.model.freeze() 13 | self.processor = DataModule() 14 | self.softmax = torch.nn.Softmax(dim=1) 15 | self.lables = ["unacceptable", "acceptable"] 16 | 17 | @timing 18 | def predict(self, text): 19 | inference_sample = {"sentence": text} 20 | processed = self.processor.tokenize_data(inference_sample) 21 | logits = self.model( 22 | torch.tensor([processed["input_ids"]]), 23 | torch.tensor([processed["attention_mask"]]), 24 | ) 25 | scores = self.softmax(logits[0]).tolist()[0] 26 | predictions = [] 27 | for score, label in zip(scores, self.lables): 28 | predictions.append({"label": label, "score": score}) 29 | return predictions 30 | 31 | 32 | if __name__ == "__main__": 33 | sentence = "The boy is sitting on a bench" 34 | predictor = ColaPredictor("./models/best-checkpoint.ckpt") 35 | print(predictor.predict(sentence)) 36 | sentences = ["The boy is sitting on a bench"] * 10 37 | for sentence in sentences: 38 | predictor.predict(sentence) 39 | -------------------------------------------------------------------------------- /week_9_monitoring/inference_onnx.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import onnxruntime as ort 3 | from scipy.special import softmax 4 | 5 | from data import DataModule 6 | from utils import timing 7 | 8 | 9 | class ColaONNXPredictor: 10 | def __init__(self, model_path): 11 | self.ort_session = ort.InferenceSession(model_path) 12 | self.processor = DataModule() 13 | self.labels = ["unacceptable", "acceptable"] 14 | 15 | @timing 16 | def predict(self, text): 17 | inference_sample = {"sentence": text} 18 | processed = self.processor.tokenize_data(inference_sample) 19 | 20 | ort_inputs = { 21 | "input_ids": np.expand_dims(processed["input_ids"], axis=0), 22 | "attention_mask": np.expand_dims(processed["attention_mask"], axis=0), 23 | } 24 | ort_outs = self.ort_session.run(None, ort_inputs) 25 | scores = softmax(ort_outs[0])[0] 26 | max_score_id = np.argmax(scores) 27 | prediction ={} 28 | prediction['label'] = self.labels[max_score_id] 29 | prediction['score'] = round(float(scores[max_score_id]), 2) 30 | 31 | result = {} 32 | result['text'] = text 33 | result['prediction'] = prediction 34 | return result 35 | 36 | 37 | if __name__ == "__main__": 38 | sentence = "The boy is sitting on a bench" 39 | predictor = ColaONNXPredictor("./models/model.onnx") 40 | print(predictor.predict(sentence)) 41 | sentences = ["The boy is sitting on a bench"] * 10 42 | for sentence in sentences: 43 | predictor.predict(sentence) 44 | -------------------------------------------------------------------------------- /week_9_monitoring/lambda_handler.py: -------------------------------------------------------------------------------- 1 | """ 2 | Lambda wrapper 3 | """ 4 | 5 | import json 6 | import logging 7 | from inference_onnx import ColaONNXPredictor 8 | 9 | logging.basicConfig() 10 | logger = logging.getLogger(__name__) 11 | logger.setLevel(level=logging.DEBUG) 12 | 13 | logger.info(f"Loading the model") 14 | inferencing_instance = ColaONNXPredictor("./models/model.onnx") 15 | 16 | 17 | def lambda_handler(event, context): 18 | """ 19 | Lambda function handler for predicting linguistic acceptability of the given sentence 20 | """ 21 | 22 | if "resource" in event.keys(): 23 | body = event["body"] 24 | body = json.loads(body) 25 | logger.info(f"Got the input: {body['sentence']}") 26 | 27 | response = inferencing_instance.predict(body["sentence"]) 28 | logger.info(json.dumps(response)) 29 | return { 30 | "statusCode": 200, 31 | "headers": {}, 32 | "body": json.dumps(response) 33 | } 34 | else: 35 | logger.info(f"Got the input: {event['sentence']}") 36 | response = inferencing_instance.predict(event["sentence"]) 37 | logger.info(json.dumps(response)) 38 | return response 39 | 40 | if __name__ == "__main__": 41 | test = {"sentence": "this is a sample sentence"} 42 | lambda_handler(test, None) 43 | -------------------------------------------------------------------------------- /week_9_monitoring/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import wandb 3 | import hydra 4 | import numpy as np 5 | import pandas as pd 6 | import torchmetrics 7 | import pytorch_lightning as pl 8 | from transformers import AutoModelForSequenceClassification 9 | from omegaconf import OmegaConf, DictConfig 10 | from sklearn.metrics import confusion_matrix 11 | import matplotlib.pyplot as plt 12 | import seaborn as sns 13 | 14 | 15 | class ColaModel(pl.LightningModule): 16 | def __init__(self, model_name="google/bert_uncased_L-2_H-128_A-2", lr=3e-5): 17 | super(ColaModel, self).__init__() 18 | self.save_hyperparameters() 19 | 20 | self.bert = AutoModelForSequenceClassification.from_pretrained( 21 | model_name, num_labels=2 22 | ) 23 | self.num_classes = 2 24 | self.train_accuracy_metric = torchmetrics.Accuracy() 25 | self.val_accuracy_metric = torchmetrics.Accuracy() 26 | self.f1_metric = torchmetrics.F1(num_classes=self.num_classes) 27 | self.precision_macro_metric = torchmetrics.Precision( 28 | average="macro", num_classes=self.num_classes 29 | ) 30 | self.recall_macro_metric = torchmetrics.Recall( 31 | average="macro", num_classes=self.num_classes 32 | ) 33 | self.precision_micro_metric = torchmetrics.Precision(average="micro") 34 | self.recall_micro_metric = torchmetrics.Recall(average="micro") 35 | 36 | def forward(self, input_ids, attention_mask, labels=None): 37 | outputs = self.bert( 38 | input_ids=input_ids, attention_mask=attention_mask, labels=labels 39 | ) 40 | return outputs 41 | 42 | def training_step(self, batch, batch_idx): 43 | outputs = self.forward( 44 | batch["input_ids"], batch["attention_mask"], labels=batch["label"] 45 | ) 46 | # loss = F.cross_entropy(logits, batch["label"]) 47 | preds = torch.argmax(outputs.logits, 1) 48 | train_acc = self.train_accuracy_metric(preds, batch["label"]) 49 | self.log("train/loss", outputs.loss, prog_bar=True, on_epoch=True) 50 | self.log("train/acc", train_acc, prog_bar=True, on_epoch=True) 51 | return outputs.loss 52 | 53 | def validation_step(self, batch, batch_idx): 54 | labels = batch["label"] 55 | outputs = self.forward( 56 | batch["input_ids"], batch["attention_mask"], labels=batch["label"] 57 | ) 58 | preds = torch.argmax(outputs.logits, 1) 59 | 60 | # Metrics 61 | valid_acc = self.val_accuracy_metric(preds, labels) 62 | precision_macro = self.precision_macro_metric(preds, labels) 63 | recall_macro = self.recall_macro_metric(preds, labels) 64 | precision_micro = self.precision_micro_metric(preds, labels) 65 | recall_micro = self.recall_micro_metric(preds, labels) 66 | f1 = self.f1_metric(preds, labels) 67 | 68 | # Logging metrics 69 | self.log("valid/loss", outputs.loss, prog_bar=True, on_step=True) 70 | self.log("valid/acc", valid_acc, prog_bar=True, on_epoch=True) 71 | self.log("valid/precision_macro", precision_macro, prog_bar=True, on_epoch=True) 72 | self.log("valid/recall_macro", recall_macro, prog_bar=True, on_epoch=True) 73 | self.log("valid/precision_micro", precision_micro, prog_bar=True, on_epoch=True) 74 | self.log("valid/recall_micro", recall_micro, prog_bar=True, on_epoch=True) 75 | self.log("valid/f1", f1, prog_bar=True, on_epoch=True) 76 | return {"labels": labels, "logits": outputs.logits} 77 | 78 | def validation_epoch_end(self, outputs): 79 | labels = torch.cat([x["labels"] for x in outputs]) 80 | logits = torch.cat([x["logits"] for x in outputs]) 81 | preds = torch.argmax(logits, 1) 82 | 83 | ## There are multiple ways to track the metrics 84 | # 1. Confusion matrix plotting using inbuilt W&B method 85 | self.logger.experiment.log( 86 | { 87 | "conf": wandb.plot.confusion_matrix( 88 | probs=logits.numpy(), y_true=labels.numpy() 89 | ) 90 | } 91 | ) 92 | 93 | # 2. Confusion Matrix plotting using scikit-learn method 94 | # wandb.log({"cm": wandb.sklearn.plot_confusion_matrix(labels.numpy(), preds)}) 95 | 96 | # 3. Confusion Matric plotting using Seaborn 97 | # data = confusion_matrix(labels.numpy(), preds.numpy()) 98 | # df_cm = pd.DataFrame(data, columns=np.unique(labels), index=np.unique(labels)) 99 | # df_cm.index.name = "Actual" 100 | # df_cm.columns.name = "Predicted" 101 | # plt.figure(figsize=(7, 4)) 102 | # plot = sns.heatmap( 103 | # df_cm, cmap="Blues", annot=True, annot_kws={"size": 16} 104 | # ) # font size 105 | # self.logger.experiment.log({"Confusion Matrix": wandb.Image(plot)}) 106 | 107 | # self.logger.experiment.log( 108 | # {"roc": wandb.plot.roc_curve(labels.numpy(), logits.numpy())} 109 | # ) 110 | 111 | def configure_optimizers(self): 112 | return torch.optim.Adam(self.parameters(), lr=self.hparams["lr"]) 113 | -------------------------------------------------------------------------------- /week_9_monitoring/parse_json.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | with open('creds.txt') as f: 4 | data = f.read() 5 | 6 | print(data) 7 | # data = json.loads(data, strict=False) 8 | # print(data) 9 | data = eval(data) 10 | print(data) 11 | 12 | with open('test.json', 'w') as f: 13 | json.dump(data, f) 14 | -------------------------------------------------------------------------------- /week_9_monitoring/requirements.txt: -------------------------------------------------------------------------------- 1 | pytorch-lightning==1.2.10 2 | datasets==1.6.2 3 | transformers==4.5.1 4 | scikit-learn==0.24.2 5 | wandb 6 | torchmetrics 7 | matplotlib 8 | seaborn 9 | hydra-core 10 | omegaconf 11 | hydra_colorlog 12 | fastapi 13 | uvicorn 14 | -------------------------------------------------------------------------------- /week_9_monitoring/requirements_inference.txt: -------------------------------------------------------------------------------- 1 | pytorch-lightning==1.2.10 2 | datasets==1.6.2 3 | scikit-learn==0.24.2 4 | hydra-core 5 | omegaconf 6 | hydra_colorlog 7 | onnxruntime 8 | fastapi 9 | uvicorn 10 | dvc 11 | tokenizers==0.10.2 12 | transformers==4.5.1 -------------------------------------------------------------------------------- /week_9_monitoring/train.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import hydra 3 | import wandb 4 | import logging 5 | 6 | import pandas as pd 7 | import pytorch_lightning as pl 8 | from omegaconf.omegaconf import OmegaConf 9 | from pytorch_lightning.callbacks import ModelCheckpoint 10 | from pytorch_lightning.callbacks.early_stopping import EarlyStopping 11 | from pytorch_lightning.loggers import WandbLogger 12 | 13 | from data import DataModule 14 | from model import ColaModel 15 | 16 | logger = logging.getLogger(__name__) 17 | 18 | 19 | class SamplesVisualisationLogger(pl.Callback): 20 | def __init__(self, datamodule): 21 | super().__init__() 22 | 23 | self.datamodule = datamodule 24 | 25 | def on_validation_end(self, trainer, pl_module): 26 | val_batch = next(iter(self.datamodule.val_dataloader())) 27 | sentences = val_batch["sentence"] 28 | 29 | outputs = pl_module(val_batch["input_ids"], val_batch["attention_mask"]) 30 | preds = torch.argmax(outputs.logits, 1) 31 | labels = val_batch["label"] 32 | 33 | df = pd.DataFrame( 34 | {"Sentence": sentences, "Label": labels.numpy(), "Predicted": preds.numpy()} 35 | ) 36 | 37 | wrong_df = df[df["Label"] != df["Predicted"]] 38 | trainer.logger.experiment.log( 39 | { 40 | "examples": wandb.Table(dataframe=wrong_df, allow_mixed_types=True), 41 | "global_step": trainer.global_step, 42 | } 43 | ) 44 | 45 | 46 | @hydra.main(config_path="./configs", config_name="config") 47 | def main(cfg): 48 | logger.info(OmegaConf.to_yaml(cfg, resolve=True)) 49 | logger.info(f"Using the model: {cfg.model.name}") 50 | logger.info(f"Using the tokenizer: {cfg.model.tokenizer}") 51 | cola_data = DataModule( 52 | cfg.model.tokenizer, cfg.processing.batch_size, cfg.processing.max_length 53 | ) 54 | cola_model = ColaModel(cfg.model.name) 55 | 56 | root_dir = hydra.utils.get_original_cwd() 57 | checkpoint_callback = ModelCheckpoint( 58 | dirpath=f"{root_dir}/models", 59 | filename="best-checkpoint", 60 | monitor="valid/loss", 61 | mode="min", 62 | ) 63 | 64 | early_stopping_callback = EarlyStopping( 65 | monitor="valid/loss", patience=3, verbose=True, mode="min" 66 | ) 67 | 68 | wandb_logger = WandbLogger(project="MLOps Basics", entity="raviraja") 69 | trainer = pl.Trainer( 70 | max_epochs=cfg.training.max_epochs, 71 | logger=wandb_logger, 72 | callbacks=[checkpoint_callback, SamplesVisualisationLogger(cola_data), early_stopping_callback], 73 | log_every_n_steps=cfg.training.log_every_n_steps, 74 | deterministic=cfg.training.deterministic, 75 | # limit_train_batches=cfg.training.limit_train_batches, 76 | # limit_val_batches=cfg.training.limit_val_batches, 77 | ) 78 | trainer.fit(cola_model, cola_data) 79 | wandb.finish() 80 | 81 | 82 | if __name__ == "__main__": 83 | main() 84 | -------------------------------------------------------------------------------- /week_9_monitoring/utils.py: -------------------------------------------------------------------------------- 1 | import time 2 | from functools import wraps 3 | 4 | 5 | def timing(f): 6 | """Decorator for timing functions 7 | Usage: 8 | @timing 9 | def function(a): 10 | pass 11 | """ 12 | 13 | @wraps(f) 14 | def wrapper(*args, **kwargs): 15 | start = time.time() 16 | result = f(*args, **kwargs) 17 | end = time.time() 18 | print("function:%r took: %2.5f sec" % (f.__name__, end - start)) 19 | return result 20 | 21 | return wrapper 22 | --------------------------------------------------------------------------------