├── .github └── FUNDING.yml ├── .gitignore ├── 01-intro ├── 01-what-is-ml.md ├── 02-ml-vs-rules.md ├── 03-supervised-ml.md ├── 04-crisp-dm.md ├── 05-model-selection.md ├── 06-environment.md ├── 07-numpy.md ├── 08-linear-algebra.md ├── 09-pandas.md ├── 10-summary.md ├── README.md ├── homework.md ├── images │ ├── add-code-for-datafile-download.png │ ├── sample-code.png │ ├── sample-data-file.png │ ├── sample-jupyter-notebook.png │ ├── thumbnail-1-01.jpg │ ├── thumbnail-1-02.jpg │ ├── thumbnail-1-03.jpg │ ├── thumbnail-1-04.jpg │ ├── thumbnail-1-05.jpg │ ├── thumbnail-1-07.jpg │ ├── thumbnail-1-08.jpg │ ├── thumbnail-1-09.jpg │ └── thumbnail-1-10.jpg └── notebooks │ ├── 07-numpy.ipynb │ ├── 08-linear-algebra.ipynb │ └── 09-pandas.ipynb ├── 02-regression ├── 01-car-price-intro.md ├── 02-data-preparation.md ├── 03-eda.md ├── 04-validation-framework.md ├── 05-linear-regression-simple.md ├── 06-linear-regression-vector.md ├── 07-linear-regression-training.md ├── 08-baseline-model.md ├── 09-rmse.md ├── 10-car-price-validation.md ├── 11-feature-engineering.md ├── 12-categorical-variables.md ├── 13-regularization.md ├── 14-tuning-model.md ├── 15-using-model.md ├── 16-summary.md ├── 17-explore-more.md ├── README.md ├── homework.md ├── images │ ├── thumbnail-2-01.jpg │ ├── thumbnail-2-02.jpg │ ├── thumbnail-2-03.jpg │ ├── thumbnail-2-04.jpg │ ├── thumbnail-2-05.jpg │ ├── thumbnail-2-06.jpg │ ├── thumbnail-2-07.jpg │ ├── thumbnail-2-08.jpg │ ├── thumbnail-2-09.jpg │ ├── thumbnail-2-10.jpg │ ├── thumbnail-2-11.jpg │ ├── thumbnail-2-12.jpg │ ├── thumbnail-2-13.jpg │ ├── thumbnail-2-14.jpg │ ├── thumbnail-2-15.jpg │ └── thumbnail-2-16.jpg ├── meta.json └── notebook.ipynb ├── 03-classification ├── 01-churn-project.md ├── 02-data-preparation.md ├── 03-validation.md ├── 04-eda.md ├── 05-risk.md ├── 06-mutual-info.md ├── 07-correlation.md ├── 08-ohe.md ├── 09-logistic-regression.md ├── 10-training-log-reg.md ├── 11-log-reg-interpretation.md ├── 12-using-log-reg.md ├── 13-summary.md ├── 14-explore-more.md ├── README.md ├── homework.md ├── images │ ├── correlation-matrix.png │ ├── thumbnail-3-01.jpg │ ├── thumbnail-3-02.jpg │ ├── thumbnail-3-03.jpg │ ├── thumbnail-3-04.jpg │ ├── thumbnail-3-05.jpg │ ├── thumbnail-3-06.jpg │ ├── thumbnail-3-07.jpg │ ├── thumbnail-3-08.jpg │ ├── thumbnail-3-09.jpg │ ├── thumbnail-3-10.jpg │ ├── thumbnail-3-11.jpg │ ├── thumbnail-3-12.jpg │ └── thumbnail-3-13.jpg ├── meta.csv ├── meta.json ├── notebook-scaling-ohe.ipynb └── notebook.ipynb ├── 04-evaluation ├── 01-overview.md ├── 02-accuracy.md ├── 03-confusion-table.md ├── 04-precision-recall.md ├── 05-roc.md ├── 06-auc.md ├── 07-cross-validation.md ├── 08-summary.md ├── 09-explore-more.md ├── README.md ├── homework.md ├── images │ ├── TPR_FPR.png │ ├── classification_metrics.png │ ├── confusion_matrix.png │ ├── thumbnail-4-01.jpg │ ├── thumbnail-4-02.jpg │ ├── thumbnail-4-03.jpg │ ├── thumbnail-4-04.jpg │ ├── thumbnail-4-05-cont.jpg │ ├── thumbnail-4-05.jpg │ ├── thumbnail-4-06.jpg │ ├── thumbnail-4-07.jpg │ └── thumbnail-4-08.jpg ├── meta.csv ├── meta.json └── notebook.ipynb ├── 05-deployment ├── 01-intro.md ├── 02-pickle.md ├── 03-flask-intro.md ├── 04-flask-deployment.md ├── 05-pipenv.md ├── 06-docker.md ├── 07-aws-eb.md ├── 08-summary.md ├── 09-explore-more.md ├── README.md ├── code │ ├── 05-train-churn-model.ipynb │ ├── Dockerfile │ ├── Pipfile │ ├── Pipfile.lock │ ├── model_C=1.0.bin │ ├── ping.py │ ├── plan.md │ ├── predict-test.py │ ├── predict.py │ └── train.py ├── homework.md ├── images │ ├── thumbnail-5-01.jpg │ ├── thumbnail-5-02.jpg │ ├── thumbnail-5-03.jpg │ ├── thumbnail-5-04.jpg │ ├── thumbnail-5-05.jpg │ ├── thumbnail-5-06.jpg │ ├── thumbnail-5-07.jpg │ └── thumbnail-5-08.jpg ├── meta.csv └── meta.json ├── 06-trees ├── 01-credit-risk.md ├── 02-data-prep.md ├── 03-decision-trees.md ├── 04-decision-tree-learning.md ├── 05-decision-tree-tuning.md ├── 06-random-forest.md ├── 07-boosting.md ├── 08-xgb-tuning.md ├── 09-final-model.md ├── 10-summary.md ├── 11-explore-more.md ├── README.md ├── homework.md ├── images │ ├── thumbnail-6-01.jpg │ ├── thumbnail-6-02.jpg │ ├── thumbnail-6-03.jpg │ ├── thumbnail-6-04.jpg │ ├── thumbnail-6-05.jpg │ ├── thumbnail-6-06.jpg │ ├── thumbnail-6-07.jpg │ ├── thumbnail-6-08.jpg │ ├── thumbnail-6-09.jpg │ ├── thumbnail-6-10.jpg │ └── thumbnail-6-12.jpg ├── meta.csv ├── meta.json └── notebook.ipynb ├── 08-deep-learning ├── 01-fashion-classification.md ├── 01b-saturn-cloud.md ├── 02-tensorflow-keras.md ├── 03-pretrained-models.md ├── 04-conv-neural-nets.md ├── 05-transfer-learning.md ├── 06-learning-rate.md ├── 07-checkpointing.md ├── 08-more-layers.md ├── 09-dropout.md ├── 10-augmentation.md ├── 11-large-model.md ├── 12-using-model.md ├── 13-summary.md ├── 14-explore-more.md ├── README.md ├── homework.md ├── images │ ├── thumbnail-8-01.jpg │ ├── thumbnail-8-01b.jpg │ ├── thumbnail-8-02.jpg │ ├── thumbnail-8-03.jpg │ ├── thumbnail-8-04.jpg │ ├── thumbnail-8-05.jpg │ ├── thumbnail-8-06.jpg │ ├── thumbnail-8-07.jpg │ ├── thumbnail-8-08.jpg │ ├── thumbnail-8-09.jpg │ ├── thumbnail-8-10.jpg │ ├── thumbnail-8-11.jpg │ ├── thumbnail-8-12.jpg │ └── thumbnail-8-13.jpg ├── install.md ├── meta.csv ├── meta.json └── notebook.ipynb ├── 09-serverless ├── 01-intro.md ├── 02-aws-lambda.md ├── 03-tensorflow-lite.md ├── 04-preparing-code.md ├── 05-docker-image.md ├── 06-creating-lambda.md ├── 07-api-gateway.md ├── 08-summary.md ├── 09-explore-more.md ├── README.md ├── code │ ├── Dockerfile │ ├── convert-model.py │ ├── lambda_function.py │ ├── plan.md │ ├── tensorflow-model.ipynb │ └── test.py ├── homework.md ├── images │ ├── thumbnail-9-01.jpg │ ├── thumbnail-9-02.jpg │ ├── thumbnail-9-03.jpg │ ├── thumbnail-9-04.jpg │ ├── thumbnail-9-05.jpg │ ├── thumbnail-9-06.jpg │ ├── thumbnail-9-07.jpg │ └── thumbnail-9-08.jpg ├── meta.csv ├── meta.json └── updates.md ├── 10-kubernetes ├── 01-overview.md ├── 02-tensorflow-serving.md ├── 03-preprocessing.md ├── 04-docker-compose.md ├── 05-kubernetes-intro.md ├── 06-kubernetes-simple-service.md ├── 07-kubernetes-tf-serving.md ├── 08-eks.md ├── 09-summary.md ├── 10-explore-more.md ├── README.md ├── code │ ├── Pipfile │ ├── Pipfile.lock │ ├── README.md │ ├── docker-compose.yaml │ ├── gateway.py │ ├── image-gateway.dockerfile │ ├── image-model.dockerfile │ ├── kube-config │ │ ├── eks-config.yaml │ │ ├── gateway-deployment.yaml │ │ ├── gateway-service.yaml │ │ ├── model-deployment.yaml │ │ └── model-service.yaml │ ├── ping │ │ ├── Dockerfile │ │ ├── Pipfile │ │ ├── Pipfile.lock │ │ ├── deployment.yaml │ │ ├── metallb-config.yaml │ │ ├── ping.py │ │ └── service.yaml │ ├── plan.md │ ├── proto.py │ ├── test.py │ └── tf-serving-connect.ipynb ├── homework.md ├── images │ ├── thumbnail-10-01.jpg │ ├── thumbnail-10-02.jpg │ ├── thumbnail-10-03.jpg │ ├── thumbnail-10-04.jpg │ ├── thumbnail-10-05.jpg │ ├── thumbnail-10-06.jpg │ ├── thumbnail-10-07.jpg │ ├── thumbnail-10-08.jpg │ └── thumbnail-10-09.jpg ├── meta.csv └── meta.json ├── 11-kserve ├── 01-overview.md ├── 02-kserve-local.md ├── 03-kserve-sklearn.md ├── 04-kserve-custom-image.md ├── 05-tensorflow-kserve.md ├── 06-kserve-transformers.md ├── 07-kserve-eks-upd.md ├── 07-kserve-eks.md ├── 08-summary.md ├── 09-explore-more.md ├── README.md ├── code │ ├── .gitignore │ ├── README.md │ ├── churn │ │ ├── Pipfile │ │ ├── Pipfile.lock │ │ ├── churn-service.yaml │ │ ├── churn-test.py │ │ ├── churn-train.py │ │ └── model.joblib │ ├── clothes │ │ ├── clothes-service.yaml │ │ ├── convert.py │ │ ├── test-transformer.py │ │ ├── test.ipynb │ │ └── test.py │ ├── eks │ │ ├── clothes-service.yaml │ │ ├── cluster.yaml │ │ └── test-transformer.py │ ├── image_transfomer │ │ ├── Dockerfile │ │ ├── Pipfile │ │ ├── Pipfile.lock │ │ └── image_transformer.py │ ├── iris │ │ ├── iris-example.yaml │ │ ├── iris-request.json │ │ └── iris-test.py │ └── plan.md ├── images │ ├── thumbnail-11-01.jpg │ ├── thumbnail-11-02.jpg │ ├── thumbnail-11-03.jpg │ ├── thumbnail-11-04.jpg │ ├── thumbnail-11-05.jpg │ ├── thumbnail-11-06.jpg │ └── thumbnail-11-07.jpg ├── meta.csv └── meta.json ├── README.md ├── after-sign-up.md ├── article └── README.md ├── asking-questions.md ├── bento.md ├── certificates.md ├── cohorts ├── 2021 │ ├── 01-intro │ │ ├── homework-1.ipynb │ │ └── homework.md │ ├── 02-regression │ │ ├── homework.ipynb │ │ └── homework.md │ ├── 03-classification │ │ ├── homework.ipynb │ │ └── homework.md │ ├── 04-evaluation │ │ ├── homework-4-solution.ipynb │ │ ├── homework-4-starter.ipynb │ │ └── homework.md │ ├── 05-deployment │ │ ├── homework.md │ │ └── homework │ │ │ ├── Dockerfile │ │ │ ├── Pipfile │ │ │ ├── Pipfile.lock │ │ │ ├── dv.bin │ │ │ ├── homework.md │ │ │ ├── model1.bin │ │ │ ├── q3_test.py │ │ │ ├── q4_predict.py │ │ │ ├── q4_test.py │ │ │ ├── q6_predict.py │ │ │ └── q6_test.py │ ├── 06-trees │ │ ├── homework-6-solution.ipynb │ │ ├── homework-6-starter.ipynb │ │ └── homework.md │ ├── 07-midterm-project │ │ ├── README.md │ │ ├── week10-office-hours.ipynb │ │ ├── week8-office-hours.ipynb │ │ └── week9-office-hours.ipynb │ ├── 08-deep-learning │ │ ├── CNN_solution.ipynb │ │ ├── homework.md │ │ └── week-11-office-hours.ipynb │ ├── 09-serverless │ │ ├── homework.md │ │ └── homework │ │ │ ├── Dockerfile │ │ │ ├── homework.ipynb │ │ │ ├── homework.py │ │ │ └── test.py │ ├── 10-kubernetes │ │ ├── homework.md │ │ └── homework │ │ │ ├── deployment.yaml │ │ │ └── service.yaml │ ├── 12-capstone │ │ └── README.md │ ├── 13-article │ │ └── README.md │ ├── 14-project │ │ └── README.md │ ├── leaderboard.md │ └── office-hours.md ├── 2022 │ ├── 01-intro │ │ ├── homework.md │ │ └── homework_1.ipynb │ ├── 02-regression │ │ ├── homework.md │ │ └── homework_2.ipynb │ ├── 03-classification │ │ ├── homework.md │ │ └── homework_3.ipynb │ ├── 04-evaluation │ │ ├── homework.md │ │ └── homework_4.ipynb │ ├── 05-deployment │ │ ├── homework.md │ │ └── homework │ │ │ ├── Dockerfile │ │ │ ├── Pipfile │ │ │ ├── Pipfile.lock │ │ │ ├── dv.bin │ │ │ ├── model1.bin │ │ │ ├── q3_test.py │ │ │ ├── q4_predict.py │ │ │ ├── q4_test.py │ │ │ ├── q6_predict.py │ │ │ └── q6_test.py │ ├── 06-trees │ │ ├── homework.md │ │ ├── homework_6.ipynb │ │ └── homework_6_starter.ipynb │ ├── 07-bento-production │ │ ├── homework.md │ │ ├── locustfile.py │ │ └── resources │ │ │ ├── classify-endpoint.png │ │ │ ├── dragon1.jpeg │ │ │ ├── dragon2.jpeg │ │ │ ├── dragon3.jpeg │ │ │ └── dragon4.jpeg │ ├── 08-deep-learning │ │ ├── homework.md │ │ └── homework_8.ipynb │ ├── 09-serverless │ │ ├── homework.md │ │ └── homework │ │ │ ├── Dockerfile │ │ │ ├── homework.ipynb │ │ │ ├── homework.py │ │ │ └── test.py │ ├── 10-kubernetes │ │ ├── homework.md │ │ └── homework │ │ │ ├── deployment.yaml │ │ │ ├── hpa.yaml │ │ │ ├── service.yaml │ │ │ └── test.py │ ├── README.md │ ├── article.md │ ├── leaderboard.md │ └── projects.md ├── 2023 │ ├── 01-intro │ │ ├── homework.md │ │ └── homework_1.ipynb │ ├── 02-regression │ │ └── homework.md │ ├── 03-classification │ │ ├── homework.md │ │ └── homework_3.ipynb │ ├── 04-evaluation │ │ └── homework.md │ ├── 05-deployment │ │ ├── homework.md │ │ └── homework │ │ │ ├── Dockerfile │ │ │ ├── Pipfile │ │ │ ├── Pipfile.lock │ │ │ ├── dv.bin │ │ │ ├── model1.bin │ │ │ ├── q3_test.py │ │ │ ├── q4_predict.py │ │ │ ├── q4_test.py │ │ │ ├── q6_predict.py │ │ │ └── q6_test.py │ ├── 06-trees │ │ └── homework.md │ ├── 08-deep-learning │ │ ├── homework.ipynb │ │ └── homework.md │ ├── 09-serverless │ │ └── homework.md │ ├── 10-kubernetes │ │ └── homework.md │ ├── README.md │ ├── article.md │ ├── leaderboard.md │ └── projects.md └── 2024 │ ├── 01-intro │ ├── homework.md │ └── homework_1.ipynb │ ├── 02-regression │ ├── homework.ipynb │ └── homework.md │ ├── 03-classification │ ├── homework.md │ └── homework_3.ipynb │ ├── 04-evaluation │ ├── homework.ipynb │ └── homework.md │ ├── 05-deployment │ ├── homework.md │ └── homework │ │ ├── Dockerfile │ │ ├── Pipfile │ │ ├── Pipfile.lock │ │ ├── dv.bin │ │ ├── model1.bin │ │ ├── q3_test.py │ │ ├── q4_predict.py │ │ ├── q4_test.py │ │ ├── q6_predict.py │ │ └── q6_test.py │ ├── 06-trees │ ├── homework.ipynb │ └── homework.md │ ├── 08-deep-learning │ ├── homework.md │ └── homework_8.ipynb │ ├── 09-serverless │ ├── homework.md │ └── homework │ │ ├── Dockerfile │ │ ├── entrypoint.sh │ │ ├── homework.dockerfile │ │ ├── homework.ipynb │ │ ├── homework.py │ │ └── test.py │ ├── 10-kubernetes │ ├── homework.md │ └── homework │ │ ├── components.yaml │ │ ├── deployment.yaml │ │ ├── hpa.yaml │ │ ├── service.yaml │ │ └── test.py │ ├── README.md │ ├── article.md │ └── projects.md ├── generate-description.ipynb ├── generate-pages.ipynb ├── images ├── bentoml.png ├── play.png └── zoomcamp.jpg ├── learning-in-public.md └── projects └── README.md /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | custom: ['http://bit.ly/mlbookcamp'] 2 | github: alexeygrigorev 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # generated 2 | .ipynb_checkpoints/ 3 | __pycache__/ 4 | **my_dir/ 5 | **logs/ 6 | **models/ 7 | 8 | # file types 9 | *.h5 10 | *.tflite 11 | *.keras 12 | *.zip 13 | *.pdf 14 | 15 | # data folders 16 | **data/ 17 | 18 | # content-specific 19 | /08-deep-learning/clothing-dataset-small/ 20 | /08-deep-learning/clothing-dataset/ 21 | /08-deep-learning/ImageClassification/ 22 | /08-deep-learning/my_dir/ 23 | 24 | /09-serverless/clothing-model/ 25 | /09-serverless/clothing-model/ 26 | 27 | **midterms_evaluations/ 28 | **samples/ 29 | -------------------------------------------------------------------------------- /01-intro/01-what-is-ml.md: -------------------------------------------------------------------------------- 1 | ## 1.1 Introduction to Machine Learning 2 | 3 | 4 | 5 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-11-introduction-to-machine-learning) 6 | 7 | 8 | ## Notes 9 | 10 | The concept of ML is depicted with an example of predicting the price of a car. The ML model 11 | learns from data, represented as some **features** such as year, mileage, among others, and the **target** variable, in this 12 | case, the car's price, by extracting patterns from the data. 13 | 14 | Then, the model is given new data (**without** the target) about cars and predicts their price (target). 15 | 16 | In summary, ML is a process of **extracting patterns from data**, which is of two types: 17 | 18 | * features (information about the object) and 19 | * target (property to predict for unseen objects). 20 | 21 | Therefore, new feature values are presented to the model, and it makes **predictions** from the learned patterns. 22 | 23 | 24 | 25 | 26 | 30 | 31 |
⚠️ 27 | The notes are written by the community.
28 | If you see an error here, please create a PR with a fix. 29 |
32 | 33 | 34 | ## Notes 35 | 36 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/09/09/ml-zoomcamp-2023-introduction-to-machine-learning-part-1/) 37 | 38 | ## Navigation 39 | 40 | * [Machine Learning Zoomcamp course](../) 41 | * [Lesson 1: Introduction to Machine Learning](./) 42 | * Next: [ML vs Rule-Based Systems](02-ml-vs-rules.md) 43 | -------------------------------------------------------------------------------- /01-intro/09-pandas.md: -------------------------------------------------------------------------------- 1 | ## 1.9 Introduction to Pandas 2 | 3 | 4 | 5 | 6 | ## Notes 7 | 8 | 9 | Add notes here (PRs are welcome). 10 | 11 | 12 | 13 | 14 | 18 | 19 |
⚠️ 15 | The notes are written by the community.
16 | If you see an error here, please create a PR with a fix. 17 |
20 | 21 | * [Notes from Peter Ernicke - Part 1/2](https://knowmledge.com/2023/09/16/ml-zoomcamp-2023-introduction-to-machine-learning-part-12/) 22 | * [Notes from Peter Ernicke - Part 2/2](https://knowmledge.com/2023/09/17/ml-zoomcamp-2023-introduction-to-machine-learning-part-13/) 23 | 24 | ## Links 25 | 26 | * [Notebook from the video](notebooks/09-pandas.ipynb) 27 | * [Notebook](https://github.com/alexeygrigorev/mlbookcamp-code/blob/master/appendix-d-pandas.ipynb) 28 | 29 | ## Additional links 30 | 31 | * [Pandas Cheat sheet](https://www.datacamp.com/community/blog/python-pandas-cheat-sheet) 32 | 33 | ## Navigation 34 | 35 | * [Machine Learning Zoomcamp course](../) 36 | * [Lesson 1: Introduction to Machine Learning](./) 37 | * Previous: [Linear Algebra Refresher](08-linear-algebra.md) 38 | * Next: [Summary](10-summary.md) 39 | -------------------------------------------------------------------------------- /01-intro/homework.md: -------------------------------------------------------------------------------- 1 | ## Homework 2 | 3 | * For 2024 cohort homework, check [the 2024 cohort folder](../cohorts/2024/01-intro/homework.md) 4 | * For 2023 cohort homework, check [the 2023 cohort folder](../cohorts/2023/01-intro/homework.md) 5 | * For 2022 cohort homework, check [the 2022 cohort folder](../cohorts/2022/01-intro/homework.md) 6 | * For 2021 cohort homework and solution, check [the 2021 cohort folder](../cohorts/2021/01-intro/) 7 | 8 | 9 | ## Navigation 10 | 11 | * [Machine Learning Zoomcamp course](../) 12 | * [Lesson 1: Introduction to Machine Learning](./) 13 | * Previous: [Summary](10-summary.md) 14 | -------------------------------------------------------------------------------- /01-intro/images/add-code-for-datafile-download.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/01-intro/images/add-code-for-datafile-download.png -------------------------------------------------------------------------------- /01-intro/images/sample-code.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/01-intro/images/sample-code.png -------------------------------------------------------------------------------- /01-intro/images/sample-data-file.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/01-intro/images/sample-data-file.png -------------------------------------------------------------------------------- /01-intro/images/sample-jupyter-notebook.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/01-intro/images/sample-jupyter-notebook.png -------------------------------------------------------------------------------- /01-intro/images/thumbnail-1-01.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/01-intro/images/thumbnail-1-01.jpg -------------------------------------------------------------------------------- /01-intro/images/thumbnail-1-02.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/01-intro/images/thumbnail-1-02.jpg -------------------------------------------------------------------------------- /01-intro/images/thumbnail-1-03.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/01-intro/images/thumbnail-1-03.jpg -------------------------------------------------------------------------------- /01-intro/images/thumbnail-1-04.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/01-intro/images/thumbnail-1-04.jpg -------------------------------------------------------------------------------- /01-intro/images/thumbnail-1-05.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/01-intro/images/thumbnail-1-05.jpg -------------------------------------------------------------------------------- /01-intro/images/thumbnail-1-07.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/01-intro/images/thumbnail-1-07.jpg -------------------------------------------------------------------------------- /01-intro/images/thumbnail-1-08.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/01-intro/images/thumbnail-1-08.jpg -------------------------------------------------------------------------------- /01-intro/images/thumbnail-1-09.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/01-intro/images/thumbnail-1-09.jpg -------------------------------------------------------------------------------- /01-intro/images/thumbnail-1-10.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/01-intro/images/thumbnail-1-10.jpg -------------------------------------------------------------------------------- /02-regression/01-car-price-intro.md: -------------------------------------------------------------------------------- 1 | 2 | ## 2.1 Car price prediction project 3 | 4 | 5 | 6 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-21-car-price-prediction-project) 7 | 8 | 9 | ## Notes 10 | 11 | This project is about the creation of a model for helping users to predict car prices. The dataset was obtained from [this 12 | kaggle competition](https://www.kaggle.com/CooperUnion/cardataset). 13 | 14 | **Project plan:** 15 | 16 | * Prepare data and Exploratory data analysis (EDA) 17 | * Use linear regression for predicting price 18 | * Understanding the internals of linear regression 19 | * Evaluating the model with RMSE 20 | * Feature engineering 21 | * Regularization 22 | * Using the model 23 | 24 | The code and dataset are available at this [link](https://github.com/alexeygrigorev/mlbookcamp-code/tree/master/chapter-02-car-price). 25 | 26 | 27 | 28 | 29 | 33 | 34 |
⚠️ 30 | The notes are written by the community.
31 | If you see an error here, please create a PR with a fix. 32 |
35 | 36 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/09/18/ml-zoomcamp-2023-machine-learning-for-regression-part-1/) 37 | 38 | ## Navigation 39 | 40 | * [Machine Learning Zoomcamp course](../) 41 | * [Session 2: Machine Learning for Regression](./) 42 | * Next: [Data preparation](02-data-preparation.md) 43 | -------------------------------------------------------------------------------- /02-regression/02-data-preparation.md: -------------------------------------------------------------------------------- 1 | 2 | ## 2.2 Data preparation 3 | 4 | 5 | 6 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-2-slides) 7 | 8 | 9 | ## Notes 10 | 11 | **Pandas attributes and methods:** 12 | 13 | * `pd.read_csv()` -> read csv files 14 | * `df.head()` -> take a look of the dataframe 15 | * `df.columns` -> retrieve colum names of a dataframe 16 | * `df.columns.str.lower()` -> lowercase all the letters 17 | * `df.columns.str.replace(' ', '_')` -> replace the space separator 18 | * `df.dtypes` -> retrieve data types of all features 19 | * `df.index` -> retrieve indices of a dataframe 20 | 21 | The entire code of this project is available in [this jupyter notebook](notebook.ipynb). 22 | 23 | 24 | 25 | 26 | 30 | 31 |
⚠️ 27 | The notes are written by the community.
28 | If you see an error here, please create a PR with a fix. 29 |
32 | 33 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/09/18/ml-zoomcamp-2023-machine-learning-for-regression-part-1/) 34 | 35 | ## Navigation 36 | 37 | * [Machine Learning Zoomcamp course](../) 38 | * [Session 2: Machine Learning for Regression](./) 39 | * Previous: [Car price prediction project](01-car-price-intro.md) 40 | * Next: [Exploratory data analysis](03-eda.md) 41 | -------------------------------------------------------------------------------- /02-regression/03-eda.md: -------------------------------------------------------------------------------- 1 | 2 | ## 2.3 Exploratory data analysis 3 | 4 | 5 | 6 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-2-slides) 7 | 8 | 9 | ## Notes 10 | 11 | **Pandas attributes and methods:** 12 | 13 | * `df[col].unique()` -> return a list of unique values in the series 14 | * `df[col].nunique()` -> return the number of unique values in the series 15 | * `df.isnull().sum()` -> return the number of null values in the dataframe 16 | 17 | **Matplotlib and seaborn methods:** 18 | 19 | * `%matplotlib inline` -> assure that plots are displayed in jupyter notebook's cells 20 | * `sns.histplot()` -> show the histogram of a series 21 | 22 | **Numpy methods:** 23 | * `np.log1p()` -> apply log transformation to a variable, after adding one to each input value. 24 | 25 | Long-tail distributions usually confuse the ML models, so the recommendation is to transform the target variable distribution to a normal one whenever possible. 26 | 27 | The entire code of this project is available in [this jupyter notebook](https://github.com/alexeygrigorev/mlbookcamp-code/blob/master/chapter-02-car-price/02-carprice.ipynb). 28 | 29 | 30 | 31 | 32 | 36 | 37 |
⚠️ 33 | The notes are written by the community.
34 | If you see an error here, please create a PR with a fix. 35 |
38 | 39 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/09/19/ml-zoomcamp-2023-machine-learning-for-regression-part-2/) 40 | 41 | ## Navigation 42 | 43 | * [Machine Learning Zoomcamp course](../) 44 | * [Session 2: Machine Learning for Regression](./) 45 | * Previous: [Data preparation](02-data-preparation.md) 46 | * Next: [Setting up the validation framework](04-validation-framework.md) 47 | -------------------------------------------------------------------------------- /02-regression/06-linear-regression-vector.md: -------------------------------------------------------------------------------- 1 | 2 | ## 2.6 Linear regression: vector form 3 | 4 | 5 | 6 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-2-slides) 7 | 8 | 9 | ## Notes 10 | 11 | The formula of linear regression can be synthesized with the dot product between features and weights. The feature vector includes the *bias* term with an *x* value of one, such as $w_{0}^{x_{i0}},\ where\ x_{i0} = 1\ for\ w_0$. 12 | 13 | When all the records are included, the linear regression can be calculated with the dot product between ***feature matrix*** and ***vector of weights***, obtaining the `y` vector of predictions. 14 | 15 | The entire code of this project is available in [this jupyter notebook](https://github.com/alexeygrigorev/mlbookcamp-code/blob/master/chapter-02-car-price/02-carprice.ipynb). 16 | 17 | 18 | 19 | 20 | 24 | 25 |
⚠️ 21 | The notes are written by the community.
22 | If you see an error here, please create a PR with a fix. 23 |
26 | 27 | * [Notes from Peter Ernicke](https://knowmledge.wordpress.com/2023/09/20/ml-zoomcamp-2023-machine-learning-for-regression-part-5/) 28 | 29 | ## Navigation 30 | 31 | * [Machine Learning Zoomcamp course](../) 32 | * [Session 2: Machine Learning for Regression](./) 33 | * Previous: [Linear regression](05-linear-regression-simple.md) 34 | * Next: [Training linear regression: Normal equation](07-linear-regression-training.md) 35 | -------------------------------------------------------------------------------- /02-regression/07-linear-regression-training.md: -------------------------------------------------------------------------------- 1 | 2 | ## 2.7 Training linear regression: Normal equation 3 | 4 | 5 | 6 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-2-slides) 7 | 8 | 9 | ## Notes 10 | 11 | Obtaining predictions as close as possible to $y$ target values requires the calculation of weights from the general 12 | LR equation. The feature matrix does not 13 | have an inverse because it is not square, so it is required to obtain an approximate solution, which can be 14 | obtained using the **Gram matrix** 15 | (multiplication of feature matrix ($X$) and its transpose ($X^T$)). The vector of weights or coefficients $w$ obtained with this 16 | formula is the closest possible solution to the LR system. 17 | 18 | Normal Equation: 19 | 20 | $w$ = $(X^TX)^{-1}X^Ty$ 21 | 22 | Where: 23 | 24 | $X^TX$ is the Gram Matrix 25 | 26 | 27 | 28 | 29 | The entire code of this project is available in [this jupyter notebook](https://github.com/alexeygrigorev/mlbookcamp-code/blob/master/chapter-02-car-price/02-carprice.ipynb). 30 | 31 | 32 | 33 | 34 | 38 | 39 |
⚠️ 35 | The notes are written by the community.
36 | If you see an error here, please create a PR with a fix. 37 |
40 | 41 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/09/21/ml-zoomcamp-2023-machine-learning-for-regression-part-6/) 42 | 43 | ## Navigation 44 | 45 | * [Machine Learning Zoomcamp course](../) 46 | * [Session 2: Machine Learning for Regression](./) 47 | * Previous: [Linear regression: vector form](06-linear-regression-vector.md) 48 | * Next: [Baseline model for car price prediction project](08-baseline-model.md) 49 | -------------------------------------------------------------------------------- /02-regression/09-rmse.md: -------------------------------------------------------------------------------- 1 | 2 | ## 2.9 Root Mean Squared Error (RMSE) 3 | 4 | 5 | 6 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-2-slides) 7 | 8 | 9 | ## Notes 10 | 11 | * In the previous lesson we found out our predictions were a bit off from the actual target values in the training dataset. We need a way to quantify how good or bad the model is. This is where RMSE can be of help. 12 | * Root Mean Squared Error (RMSE) is a way to evaluate regression models. It measures the error associated with the model being evaluated. This numerical figure can then be used to compare models, enabling us to choose the one that gives the best predictions. 13 | 14 | $$RMSE = \sqrt{ \frac{1}{m} \sum_{i=1}^{m} {(g(x_i) - y_i)^2}}$$ 15 | 16 | - $g(x_i)$ is the prediction 17 | - $y_i$ is the actual value 18 | - $m$ is the number of observations in the dataset (i.e. cars) 19 | 20 | 21 | The entire code of this project is available in [this jupyter notebook](https://github.com/alexeygrigorev/mlbookcamp-code/blob/master/chapter-02-car-price/02-carprice.ipynb). 22 | 23 | 24 | 25 | 26 | 30 | 31 |
⚠️ 27 | The notes are written by the community.
28 | If you see an error here, please create a PR with a fix. 29 |
32 | 33 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/09/22/ml-zoomcamp-2023-machine-learning-for-regression-part-8/) 34 | 35 | ## Navigation 36 | 37 | * [Machine Learning Zoomcamp course](../) 38 | * [Session 2: Machine Learning for Regression](./) 39 | * Previous: [Baseline model for car price prediction project](08-baseline-model.md) 40 | * Next: [Using RMSE on validation data](10-car-price-validation.md) 41 | -------------------------------------------------------------------------------- /02-regression/10-car-price-validation.md: -------------------------------------------------------------------------------- 1 | 2 | ## 2.10 Computing RMSE on validation data 3 | 4 | 5 | 6 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-2-slides) 7 | 8 | 9 | ## Notes 10 | 11 | Calculation of the RMSE on validation partition of the dataset of car price prediction. In this way, we have a metric to evaluate the model's 12 | performance. 13 | 14 | The entire code of this project is available in [this jupyter notebook](https://github.com/alexeygrigorev/mlbookcamp-code/blob/master/chapter-02-car-price/02-carprice.ipynb). 15 | 16 | 17 | 18 | 19 | 23 | 24 |
⚠️ 20 | The notes are written by the community.
21 | If you see an error here, please create a PR with a fix. 22 |
25 | 26 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/09/22/ml-zoomcamp-2023-machine-learning-for-regression-part-8/) 27 | 28 | ## Navigation 29 | 30 | * [Machine Learning Zoomcamp course](../) 31 | * [Session 2: Machine Learning for Regression](./) 32 | * Previous: [Root mean squared error](09-rmse.md) 33 | * Next: [Feature engineering](11-feature-engineering.md) 34 | -------------------------------------------------------------------------------- /02-regression/11-feature-engineering.md: -------------------------------------------------------------------------------- 1 | ## 2.11 Feature engineering 2 | 3 | Feature engineering is the process of creating new features 4 | 5 | 6 | 7 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-2-slides) 8 | 9 | 10 | ## Notes 11 | 12 | The feature age of the car was included in the dataset, obtained with the subtraction of the maximum year of cars and each of the years of cars. 13 | This new feature improved the model performance, measured with the RMSE and comparing the distributions of y target variable and predictions. 14 | 15 | The entire code of this project is available in [this jupyter notebook](https://github.com/alexeygrigorev/mlbookcamp-code/blob/master/chapter-02-car-price/02-carprice.ipynb). 16 | 17 | 18 | 19 | 20 | 24 | 25 |
⚠️ 21 | The notes are written by the community.
22 | If you see an error here, please create a PR with a fix. 23 |
26 | 27 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/09/22/ml-zoomcamp-2023-machine-learning-for-regression-part-9/) 28 | 29 | ## Navigation 30 | 31 | * [Machine Learning Zoomcamp course](../) 32 | * [Session 2: Machine Learning for Regression](./) 33 | * Previous: [Using RMSE on validation data](10-car-price-validation.md) 34 | * Next: [Categorical variables](12-categorical-variables.md) 35 | -------------------------------------------------------------------------------- /02-regression/12-categorical-variables.md: -------------------------------------------------------------------------------- 1 | 2 | ## 2.12 Categorical variables 3 | 4 | 5 | 6 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-2-slides) 7 | 8 | 9 | ## Notes 10 | 11 | Categorical variables are typically represented as strings, and pandas identifies them as object types. However, some variables that appear to be numerical may actually be categorical (e.g., the number of doors a car has). All these categorical variables need to be converted to a numerical form because ML 12 | models can interpret only numerical features. It is possible to incorporate certain categories from a feature, not necessarily all of them. 13 | This transformation from categorical to numerical variables is known as One-Hot encoding. 14 | 15 | The entire code of this project is available in [this jupyter notebook](https://github.com/alexeygrigorev/mlbookcamp-code/blob/master/chapter-02-car-price/02-carprice.ipynb). 16 | 17 | 18 | 19 | 20 | 24 | 25 |
⚠️ 21 | The notes are written by the community.
22 | If you see an error here, please create a PR with a fix. 23 |
26 | 27 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/09/23/ml-zoomcamp-2023-machine-learning-for-regression-part-10/) 28 | 29 | ## Comments 30 | 31 | This way of encoding categorical features is called "one-hot encoding". 32 | We'll learn more about it in Session 3. 33 | 34 | 35 | ## Navigation 36 | 37 | * [Machine Learning Zoomcamp course](../) 38 | * [Session 2: Machine Learning for Regression](./) 39 | * Previous: [Feature engineering](11-feature-engineering.md) 40 | * Next: [Regularization](13-regularization.md) 41 | -------------------------------------------------------------------------------- /02-regression/14-tuning-model.md: -------------------------------------------------------------------------------- 1 | 2 | ## 2.14 Tuning the model 3 | 4 | 5 | 6 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-2-slides) 7 | 8 | 9 | ## Notes 10 | 11 | Tuning the model consisted of finding the best regularization hyperparameter value, using the validation partition of the dataset. The model was then trained with this regularization value. 12 | 13 | The entire code of this project is available in [this jupyter notebook](https://github.com/alexeygrigorev/mlbookcamp-code/blob/master/chapter-02-car-price/02-carprice.ipynb). 14 | 15 | 16 | 17 | 18 | 22 | 23 |
⚠️ 19 | The notes are written by the community.
20 | If you see an error here, please create a PR with a fix. 21 |
24 | 25 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/09/24/ml-zoomcamp-2023-machine-learning-for-regression-part-12/) 26 | 27 | ## Navigation 28 | 29 | * [Machine Learning Zoomcamp course](../) 30 | * [Session 2: Machine Learning for Regression](./) 31 | * Previous: [Regularization](13-regularization.md) 32 | * Next: [Using the model](15-using-model.md) 33 | -------------------------------------------------------------------------------- /02-regression/15-using-model.md: -------------------------------------------------------------------------------- 1 | 2 | ## 2.15 Using the model 3 | 4 | 5 | 6 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-2-slides) 7 | 8 | ## Notes 9 | 10 | After finding the best model and its parameters, it was trained with training and validation partitions and the final RMSE was calculated on the test partition. 11 | 12 | Finally, the final model was used to predict the price of new cars. 13 | 14 | The entire code of this project is available in [this jupyter notebook](https://github.com/alexeygrigorev/mlbookcamp-code/blob/master/chapter-02-car-price/02-carprice.ipynb). 15 | 16 | 17 | 18 | 19 | 23 | 24 |
⚠️ 20 | The notes are written by the community.
21 | If you see an error here, please create a PR with a fix. 22 |
25 | 26 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/09/24/ml-zoomcamp-2023-machine-learning-for-regression-part-12/) 27 | 28 | ## Navigation 29 | 30 | * [Machine Learning Zoomcamp course](../) 31 | * [Session 2: Machine Learning for Regression](./) 32 | * Previous: [Tuning the model](14-tuning-model.md) 33 | * Next: [Car price prediction project summary](16-summary.md) 34 | -------------------------------------------------------------------------------- /02-regression/16-summary.md: -------------------------------------------------------------------------------- 1 | 2 | ## 2.16 Car price prediction project summary 3 | 4 | 5 | 6 | 7 | 8 | ## Notes 9 | 10 | In summary, this session covered some topics, including data preparation, exploratory data analysis, the validation framework, linear regression model, LR vector and 11 | normal forms, the baseline model, root mean squared error, feature engineering, regularization, tuning the model, and using the best model with new data. All these concepts 12 | were explained using the problem to predict the price of cars. 13 | 14 | 15 | 16 | 17 | 21 | 22 |
⚠️ 18 | The notes are written by the community.
19 | If you see an error here, please create a PR with a fix. 20 |
23 | 24 | * [Notes from Maximilien Eyengue](https://github.com/maxim-eyengue/Python-Codes/blob/main/ML_Zoomcamp_2024/02_regression/Summary_Session_02.md) 25 | 26 | ## Navigation 27 | 28 | * [Machine Learning Zoomcamp course](../) 29 | * [Session 2: Machine Learning for Regression](./) 30 | * Previous: [Using the model](15-using-model.md) 31 | * Next: [Explore more](17-explore-more.md) 32 | -------------------------------------------------------------------------------- /02-regression/17-explore-more.md: -------------------------------------------------------------------------------- 1 | 2 | ## 2.17 Explore more 3 | 4 | ### Questions 5 | 6 | * In this project, we included only 5 top features. What happens if we include 10? 7 | 8 | > That's not a graded homework, it's just for you if you want to try more things on this project 9 | 10 | 11 | ### Other projects 12 | 13 | Here are other datasets that you can play with to learn more about the topic: 14 | 15 | * [California housing dataset](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.fetch_california_housing.html) - predict the price of a house 16 | * [Student Performance Data Set](https://archive.ics.uci.edu/ml/datasets/Student+Performance) - predict the performance of students 17 | * UCI ML Repository contains a lot of other datasets suitable for practicing regression - https://archive.ics.uci.edu/ml/datasets.php?task=reg 18 | 19 | 20 | ## Navigation 21 | 22 | * [Machine Learning Zoomcamp course](../) 23 | * [Session 2: Machine Learning for Regression](./) 24 | * Previous: [Car price prediction project summary](16-summary.md) 25 | * Next: [Homework](homework.md) 26 | -------------------------------------------------------------------------------- /02-regression/homework.md: -------------------------------------------------------------------------------- 1 | ## Homework 2 | 3 | * For 2024 cohort homework, check [the 2024 cohort folder](../cohorts/2024/02-regression/homework.md) 4 | * For 2023 cohort homework, check [the 2023 cohort folder](../cohorts/2023/02-regression/homework.md) 5 | * For 2022 cohort homework, check [the 2022 cohort folder](../cohorts/2022/02-regression/homework.md) 6 | * For 2021 cohort homework and solution, check [the 2021 cohort folder](../cohorts/2021/02-regression/) 7 | 8 | 9 | ## Navigation 10 | 11 | * [Machine Learning Zoomcamp course](../) 12 | * [Session 2: Machine Learning for Regression](./) 13 | * Previous: [Explore more](17-explore-more.md) 14 | -------------------------------------------------------------------------------- /02-regression/images/thumbnail-2-01.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/02-regression/images/thumbnail-2-01.jpg -------------------------------------------------------------------------------- /02-regression/images/thumbnail-2-02.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/02-regression/images/thumbnail-2-02.jpg -------------------------------------------------------------------------------- /02-regression/images/thumbnail-2-03.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/02-regression/images/thumbnail-2-03.jpg -------------------------------------------------------------------------------- /02-regression/images/thumbnail-2-04.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/02-regression/images/thumbnail-2-04.jpg -------------------------------------------------------------------------------- /02-regression/images/thumbnail-2-05.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/02-regression/images/thumbnail-2-05.jpg -------------------------------------------------------------------------------- /02-regression/images/thumbnail-2-06.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/02-regression/images/thumbnail-2-06.jpg -------------------------------------------------------------------------------- /02-regression/images/thumbnail-2-07.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/02-regression/images/thumbnail-2-07.jpg -------------------------------------------------------------------------------- /02-regression/images/thumbnail-2-08.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/02-regression/images/thumbnail-2-08.jpg -------------------------------------------------------------------------------- /02-regression/images/thumbnail-2-09.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/02-regression/images/thumbnail-2-09.jpg -------------------------------------------------------------------------------- /02-regression/images/thumbnail-2-10.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/02-regression/images/thumbnail-2-10.jpg -------------------------------------------------------------------------------- /02-regression/images/thumbnail-2-11.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/02-regression/images/thumbnail-2-11.jpg -------------------------------------------------------------------------------- /02-regression/images/thumbnail-2-12.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/02-regression/images/thumbnail-2-12.jpg -------------------------------------------------------------------------------- /02-regression/images/thumbnail-2-13.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/02-regression/images/thumbnail-2-13.jpg -------------------------------------------------------------------------------- /02-regression/images/thumbnail-2-14.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/02-regression/images/thumbnail-2-14.jpg -------------------------------------------------------------------------------- /02-regression/images/thumbnail-2-15.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/02-regression/images/thumbnail-2-15.jpg -------------------------------------------------------------------------------- /02-regression/images/thumbnail-2-16.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/02-regression/images/thumbnail-2-16.jpg -------------------------------------------------------------------------------- /02-regression/meta.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": "meta.csv", 3 | "session": 2, 4 | "name": "Machine Learning for Regression" 5 | } -------------------------------------------------------------------------------- /03-classification/01-churn-project.md: -------------------------------------------------------------------------------- 1 | # 3.1 Churn prediction project 2 | 3 | 4 | 5 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-3-machine-learning-for-classification) empty? 6 | 7 | ## Notes 8 | 9 | The project aims to identify customers that are likely to churn or stop to using a service. Each customer has a score associated with the probability of churning. Considering this data, the company would send an email with discounts or other promotions to avoid churning. 10 | 11 | The ML strategy applied to approach this problem is binary classification, which for one instance ($i^{th}$ customer), can be expressed as: 12 | 13 | $$\large g\left(x_{i}\right) = y_{i}$$ 14 | 15 | In the formula, $y_i$ is the model's prediction and belongs to {0,1}, with 0 being the negative value or no churning, and 1 the positive value or churning. The output corresponds to the likelihood of churning. 16 | 17 | In brief, the main idea behind this project is to build a model with historical data from customers and assign a score of the likelihood of churning. 18 | 19 | For this project, we used a [Kaggle dataset](https://www.kaggle.com/blastchar/telco-customer-churn). 20 | 21 | |⚠️|The notes are written by the community.
If you see an error here, please create a PR with a fix.| 22 | |---|:-:| 23 | 24 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/09/25/ml-zoomcamp-2023-machine-learning-for-classification-part-1/) 25 | 26 | ## Navigation 27 | 28 | * [Machine Learning Zoomcamp course](../) 29 | * [Session 3: Machine Learning for Classification](./) 30 | * Next: [Data preparation](02-data-preparation.md) 31 | -------------------------------------------------------------------------------- /03-classification/03-validation.md: -------------------------------------------------------------------------------- 1 | 2 | ## 3.3 Setting up the validation framework 3 | 4 | 5 | 6 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-3-machine-learning-for-classification) 7 | 8 | 9 | ## Notes 10 | 11 | Splitting the dataset with **Scikit-Learn**. 12 | 13 | **Classes, functions, and methods:** 14 | 15 | * `train_test_split` - Scikit-Learn class for splitting a dataset into two parts. The `test_size` argument states how large the test set should be. The `random_state` argument sets a random seed for reproducibility purposes. 16 | * `df.reset_index(drop=True)` - reset the indices of a dataframe and delete the previous ones. 17 | * `df.x.values` - extract the values from x series 18 | * `del df['x']` - delete x series from a dataframe 19 | 20 | The entire code of this project is available in [this jupyter notebook](https://github.com/alexeygrigorev/mlbookcamp-code/blob/master/chapter-03-churn-prediction/03-churn.ipynb). 21 | 22 | 23 | 24 | 25 | 29 | 30 |
⚠️ 26 | The notes are written by the community.
27 | If you see an error here, please create a PR with a fix. 28 |
31 | 32 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/09/27/ml-zoomcamp-2023-machine-learning-for-classification-part-3/) 33 | 34 | ## Navigation 35 | 36 | * [Machine Learning Zoomcamp course](../) 37 | * [Session 3: Machine Learning for Classification](./) 38 | * Previous: [Data preparation](02-data-preparation.md) 39 | * Next: [EDA](04-eda.md) 40 | -------------------------------------------------------------------------------- /03-classification/04-eda.md: -------------------------------------------------------------------------------- 1 | 2 | ## 3.4 EDA 3 | 4 | 5 | 6 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-3-machine-learning-for-classification) 7 | 8 | 9 | ## Notes 10 | 11 | The EDA for this project consisted of: 12 | * Checking missing values 13 | * Looking at the distribution of the target variable (churn) 14 | * Looking at numerical and categorical variables 15 | 16 | **Functions and methods:** 17 | 18 | * `df.isnull().sum()` - returns the number of null values in the dataframe. 19 | * `df.x.value_counts()` returns the number of values for each category in x series. The `normalize=True` argument retrieves the percentage of each category. In this project, the mean of churn is equal to the churn rate obtained with the value_counts method. 20 | * `round(x, y)` - round an x number with y decimal places 21 | * `df[x].nunique()` - returns the number of unique values in x series 22 | 23 | The entire code of this project is available in [this jupyter notebook](https://github.com/alexeygrigorev/mlbookcamp-code/blob/master/chapter-03-churn-prediction/03-churn.ipynb). 24 | 25 | 26 | 27 | 28 | 32 | 33 |
⚠️ 29 | The notes are written by the community.
30 | If you see an error here, please create a PR with a fix. 31 |
34 | 35 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/09/27/ml-zoomcamp-2023-machine-learning-for-classification-part-4/) 36 | 37 | ## Navigation 38 | 39 | * [Machine Learning Zoomcamp course](../) 40 | * [Session 3: Machine Learning for Classification](./) 41 | * Previous: [Setting up the validation framework](03-validation.md) 42 | * Next: [Feature importance: Churn rate and risk ratio](05-risk.md) 43 | -------------------------------------------------------------------------------- /03-classification/06-mutual-info.md: -------------------------------------------------------------------------------- 1 | 2 | ## 3.6 Feature importance: Mutual information 3 | 4 | 5 | 6 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-3-machine-learning-for-classification) 7 | 8 | 9 | ## Notes 10 | 11 | Mutual information is a concept from information theory, which measures how much we can learn about one variable if we know the value of another. In this project, we can think of this as how much do we learn about churn if we have the information from a particular feature. So, it is a measure of the importance of a categorical variable. 12 | 13 | **Classes, functions, and methods:** 14 | 15 | * `mutual_info_score(x, y)` - Scikit-Learn class for calculating the mutual information between the x target variable and y feature. 16 | * `df[x].apply(y)` - apply a y function to the x series of the df dataframe. 17 | * ` df.sort_values(ascending=False).to_frame(name='x')` - sort values in an ascending order and called the column as x. 18 | 19 | The entire code of this project is available in [this jupyter notebook](https://github.com/alexeygrigorev/mlbookcamp-code/blob/master/chapter-03-churn-prediction/03-churn.ipynb). 20 | 21 | 22 | 23 | 24 | 28 | 29 |
⚠️ 25 | The notes are written by the community.
26 | If you see an error here, please create a PR with a fix. 27 |
30 | 31 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/09/28/ml-zoomcamp-2023-machine-learning-for-classification-part-6/) 32 | 33 | ## Navigation 34 | 35 | * [Machine Learning Zoomcamp course](../) 36 | * [Session 3: Machine Learning for Classification](./) 37 | * Previous: [Feature importance: Churn rate and risk ratio](05-risk.md) 38 | * Next: [Feature importance: Correlation](07-correlation.md) 39 | -------------------------------------------------------------------------------- /03-classification/08-ohe.md: -------------------------------------------------------------------------------- 1 | 2 | ## 3.8 One-hot encoding 3 | 4 | 5 | 6 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-3-machine-learning-for-classification) 7 | 8 | 9 | ## Notes 10 | 11 | One-Hot Encoding allows encoding categorical variables in numerical ones. This method represents each category of a variable as one column, and a 1 is assigned if the value belongs to the category or 0 otherwise. 12 | 13 | **Classes, functions, and methods:** 14 | 15 | * `df[x].to_dict(orient='records')` - convert x series to dictionaries, oriented by rows. 16 | * `DictVectorizer().fit_transform(x)` - Scikit-Learn class for one-hot encoding by converting x dictionaries into a sparse matrix. It does not affect the numerical variables. 17 | * `DictVectorizer().get_feature_names()` - return the names of the columns in the sparse matrix. 18 | 19 | The entire code of this project is available in [this jupyter notebook](https://github.com/alexeygrigorev/mlbookcamp-code/blob/master/chapter-03-churn-prediction/03-churn.ipynb). 20 | 21 | 22 | 23 | 24 | 28 | 29 |
⚠️ 25 | The notes are written by the community.
26 | If you see an error here, please create a PR with a fix. 27 |
30 | 31 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/09/29/ml-zoomcamp-2023-machine-learning-for-classification-part-8/) 32 | 33 | ## Navigation 34 | 35 | * [Machine Learning Zoomcamp course](../) 36 | * [Session 3: Machine Learning for Classification](./) 37 | * Previous: [Feature importance: Correlation](07-correlation.md) 38 | * Next: [Logistic regression](09-logistic-regression.md) 39 | -------------------------------------------------------------------------------- /03-classification/10-training-log-reg.md: -------------------------------------------------------------------------------- 1 | 2 | ## 3.10 Training logistic regression with Scikit-Learn 3 | 4 | 5 | 6 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-3-machine-learning-for-classification) 7 | 8 | 9 | ## Notes 10 | 11 | This video was about training a logistic regression model with Scikit-Learn, applying it to the validation dataset, and calculating its accuracy. 12 | 13 | **Classes, functions, and methods:** 14 | 15 | * `LogisticRegression().fit(x)` - Scikit-Learn class for training the logistic regression model. 16 | * `LogisticRegression().coef_[0]` - return the coefficients or weights of the LR model 17 | * `LogisticRegression().intercept_[0]` - return the bias or intercept of the LR model 18 | * `LogisticRegression().predict[x]` - make predictions on the x dataset 19 | * `LogisticRegression().predict_proba[x]` - make predictions on the x dataset by returning two columns with their probabilities for the two categories - soft predictions 20 | 21 | The entire code of this project is available in [this jupyter notebook](https://github.com/alexeygrigorev/mlbookcamp-code/blob/master/chapter-03-churn-prediction/03-churn.ipynb). 22 | 23 | 24 | 25 | 26 | 30 | 31 |
⚠️ 27 | The notes are written by the community.
28 | If you see an error here, please create a PR with a fix. 29 |
32 | 33 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/09/30/ml-zoomcamp-2023-machine-learning-for-classification-part-10/) 34 | 35 | ## Navigation 36 | 37 | * [Machine Learning Zoomcamp course](../) 38 | * [Session 3: Machine Learning for Classification](./) 39 | * Previous: [Logistic regression](09-logistic-regression.md) 40 | * Next: [Model interpretation](11-log-reg-interpretation.md) 41 | -------------------------------------------------------------------------------- /03-classification/11-log-reg-interpretation.md: -------------------------------------------------------------------------------- 1 | 2 | ## 3.11 Model interpretation 3 | 4 | 5 | 6 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-3-machine-learning-for-classification) 7 | 8 | 9 | ## Notes 10 | 11 | This video was about the interpretation of coefficients, and training a model with fewer features. 12 | 13 | In the formula of the logistic regression model, only one of the one-hot encoded categories is multiplied by 1, and the other by 0. In this way, we only consider the appropriate category for each categorical feature. 14 | 15 | **Classes, functions, and methods:** 16 | 17 | * `zip(x,y)` - returns a new list with elements from x joined with their corresponding elements on y 18 | 19 | The entire code of this project is available in [this jupyter notebook](https://github.com/alexeygrigorev/mlbookcamp-code/blob/master/chapter-03-churn-prediction/03-churn.ipynb). 20 | 21 | 22 | 23 | 24 | 28 | 29 |
⚠️ 25 | The notes are written by the community.
26 | If you see an error here, please create a PR with a fix. 27 |
30 | 31 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/10/01/ml-zoomcamp-2023-machine-learning-for-classification-part-11/) 32 | 33 | ## Navigation 34 | 35 | * [Machine Learning Zoomcamp course](../) 36 | * [Session 3: Machine Learning for Classification](./) 37 | * Previous: [Training logistic regression with Scikit-Learn](10-training-log-reg.md) 38 | * Next: [Using the model](12-using-log-reg.md) 39 | -------------------------------------------------------------------------------- /03-classification/12-using-log-reg.md: -------------------------------------------------------------------------------- 1 | 2 | ## 3.12 Using the model 3 | 4 | 5 | 6 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-3-machine-learning-for-classification) 7 | 8 | 9 | ## Notes 10 | 11 | We trained the logistic regression model with the full training dataset (training + validation), considering numerical and categorical features. Thus, predictions were made on the test dataset, and we evaluated the model using the accuracy metric. 12 | 13 | In this case, the predictions of validation and test were similar, which means that the model is working well. 14 | 15 | 16 | 17 | 18 | 22 | 23 |
⚠️ 19 | The notes are written by the community.
20 | If you see an error here, please create a PR with a fix. 21 |
24 | 25 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/10/01/ml-zoomcamp-2023-machine-learning-for-classification-part-12/) 26 | 27 | ## Navigation 28 | 29 | * [Machine Learning Zoomcamp course](../) 30 | * [Session 3: Machine Learning for Classification](./) 31 | * Previous: [Model interpretation](11-log-reg-interpretation.md) 32 | * Next: [Summary](13-summary.md) 33 | -------------------------------------------------------------------------------- /03-classification/13-summary.md: -------------------------------------------------------------------------------- 1 | 2 | ## 3.13 Summary 3 | 4 | 5 | 6 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-3-machine-learning-for-classification) 7 | 8 | 9 | ## Notes 10 | 11 | In this session, we worked on a project to predict churning in customers from a company. We learned the feature importance of numerical and categorical variables, including risk ratio, mutual information, and correlation coefficient. Also, we understood one-hot encoding and implemented logistic regression with Scikit-Learn. 12 | 13 | 14 | 15 | 16 | 20 | 21 |
⚠️ 17 | The notes are written by the community.
18 | If you see an error here, please create a PR with a fix. 19 |
22 | 23 | 24 | ## Navigation 25 | 26 | * [Machine Learning Zoomcamp course](../) 27 | * [Session 3: Machine Learning for Classification](./) 28 | * Previous: [Using the model](12-using-log-reg.md) 29 | * Next: [Explore more](14-explore-more.md) -------------------------------------------------------------------------------- /03-classification/14-explore-more.md: -------------------------------------------------------------------------------- 1 | 2 | ## 3.14 Explore more 3 | 4 | More things 5 | 6 | * Try to exclude least useful features 7 | 8 | Use scikit-learn in project of last week 9 | 10 | * Re-implement train/val/test split using scikit-learn in the project from the last week 11 | * Also, instead of our own linear regression, use `LinearRegression` (not regularized) and `RidgeRegression` (regularized). Find the best regularization parameter for Ridge 12 | * There are other ways to implement one-hot encoding. E.g. using the `OneHotEncoding` class. Check how to use it [here](notebook-scaling-ohe.ipynb). 13 | * Sometimes numerical features require scaling, especially for iterative solves like "lbfgs". Check how to use `StandardScaler` for that [here](notebook-scaling-ohe.ipynb). 14 | 15 | 16 | Other projects 17 | 18 | * Lead scoring - https://www.kaggle.com/ashydv/leads-dataset 19 | * Default prediction - https://archive.ics.uci.edu/ml/datasets/default+of+credit+card+clients 20 | 21 | 22 | ## Navigation 23 | 24 | * [Machine Learning Zoomcamp course](../) 25 | * [Session 3: Machine Learning for Classification](./) 26 | * Previous: [Summary](13-summary.md) 27 | * Next: [Homework](homework.md) 28 | -------------------------------------------------------------------------------- /03-classification/homework.md: -------------------------------------------------------------------------------- 1 | ## Homework 2 | 3 | * For 2024 cohort homework, check [the 2024 cohort folder](../cohorts/2024/03-classification/homework.md) 4 | * For 2023 cohort homework, check [the 2023 cohort folder](../cohorts/2023/03-classification/homework.md) 5 | * For 2022 cohort homework, check [the 2022 cohort folder](../cohorts/2022/03-classification/homework.md) 6 | * For 2021 cohort homework and solution, check [the 2021 cohort folder](../cohorts/2021/03-classification/) 7 | 8 | 9 | ## Navigation 10 | 11 | * [Machine Learning Zoomcamp course](../) 12 | * [Session 3: Machine Learning for Classification](./) 13 | * Previous: [Explore more](14-explore-more.md) 14 | -------------------------------------------------------------------------------- /03-classification/images/correlation-matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/03-classification/images/correlation-matrix.png -------------------------------------------------------------------------------- /03-classification/images/thumbnail-3-01.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/03-classification/images/thumbnail-3-01.jpg -------------------------------------------------------------------------------- /03-classification/images/thumbnail-3-02.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/03-classification/images/thumbnail-3-02.jpg -------------------------------------------------------------------------------- /03-classification/images/thumbnail-3-03.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/03-classification/images/thumbnail-3-03.jpg -------------------------------------------------------------------------------- /03-classification/images/thumbnail-3-04.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/03-classification/images/thumbnail-3-04.jpg -------------------------------------------------------------------------------- /03-classification/images/thumbnail-3-05.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/03-classification/images/thumbnail-3-05.jpg -------------------------------------------------------------------------------- /03-classification/images/thumbnail-3-06.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/03-classification/images/thumbnail-3-06.jpg -------------------------------------------------------------------------------- /03-classification/images/thumbnail-3-07.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/03-classification/images/thumbnail-3-07.jpg -------------------------------------------------------------------------------- /03-classification/images/thumbnail-3-08.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/03-classification/images/thumbnail-3-08.jpg -------------------------------------------------------------------------------- /03-classification/images/thumbnail-3-09.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/03-classification/images/thumbnail-3-09.jpg -------------------------------------------------------------------------------- /03-classification/images/thumbnail-3-10.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/03-classification/images/thumbnail-3-10.jpg -------------------------------------------------------------------------------- /03-classification/images/thumbnail-3-11.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/03-classification/images/thumbnail-3-11.jpg -------------------------------------------------------------------------------- /03-classification/images/thumbnail-3-12.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/03-classification/images/thumbnail-3-12.jpg -------------------------------------------------------------------------------- /03-classification/images/thumbnail-3-13.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/03-classification/images/thumbnail-3-13.jpg -------------------------------------------------------------------------------- /03-classification/meta.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": "meta.csv", 3 | "session": 3, 4 | "name": "Machine Learning for Classification" 5 | } -------------------------------------------------------------------------------- /04-evaluation/01-overview.md: -------------------------------------------------------------------------------- 1 | 2 | ## 4.1 Evaluation metrics: session overview 3 | 4 | 5 | 6 | 7 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-4-evaluation-metrics-for-classification) 8 | 9 | 10 | ## Notes 11 | 12 | The fourth week of Machine Learning Zoomcamp is about different metrics to evaluate a binary classifier. These measures include accuracy, confusion table, precision, recall, ROC curves(TPR, FRP, random model, and ideal model), AUROC, and cross-validation. 13 | 14 | For this project, we used a [Kaggle dataset](https://www.kaggle.com/blastchar/telco-customer-churn) about churn prediction. 15 | 16 | Add notes from the video (PRs are welcome) 17 | 18 | 19 | 20 | 21 | 25 | 26 |
⚠️ 22 | The notes are written by the community.
23 | If you see an error here, please create a PR with a fix. 24 |
27 | 28 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/10/02/ml-zoomcamp-2023-evaluation-metrics-for-classification-part-1/) 29 | 30 | ## Navigation 31 | 32 | * [Machine Learning Zoomcamp course](../) 33 | * [Session 4: Evaluation Metrics for Classification](./) 34 | * Next: [Accuracy and dummy model](02-accuracy.md) 35 | -------------------------------------------------------------------------------- /04-evaluation/06-auc.md: -------------------------------------------------------------------------------- 1 | ## 4.6 ROC AUC 2 | 3 | 4 | 5 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-4-evaluation-metrics-for-classification) 6 | 7 | 8 | ## Notes 9 | 10 | The Area under the ROC curves can tell us how good is our model with a single value. The AUROC of a random model is 0.5, while for an ideal one is 1. 11 | 12 | In other words, AUC can be interpreted as the probability that a randomly selected positive example has a greater score than a randomly selected negative example. 13 | 14 | **Classes and methods:** 15 | 16 | * `auc(x, y)` - sklearn.metrics class for calculating area under the curve of the x and y datasets. For ROC curves x would be false positive rate, and y true positive rate. 17 | * `roc_auc_score(x, y)` - sklearn.metrics class for calculating area under the ROC curves of the x false positive rate and y true positive rate datasets. 18 | * `randint(x, y, size=z)` - np.random class for generating random integers from the “discrete uniform” distribution; from `x` (inclusive) to `y` (exclusive) of size `z`. 19 | 20 | The entire code of this project is available in [this jupyter notebook](notebook.ipynb). 21 | 22 | Add notes from the video (PRs are welcome) 23 | 24 | 25 | 26 | 27 | 31 | 32 |
⚠️ 28 | The notes are written by the community.
29 | If you see an error here, please create a PR with a fix. 30 |
33 | 34 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/10/07/ml-zoomcamp-2023-evaluation-metrics-for-classification-part-6/) 35 | 36 | ## Navigation 37 | 38 | * [Machine Learning Zoomcamp course](../) 39 | * [Session 4: Evaluation Metrics for Classification](./) 40 | * Previous: [ROC Curves](05-roc.md) 41 | * Next: [Cross-Validation](07-cross-validation.md) 42 | -------------------------------------------------------------------------------- /04-evaluation/08-summary.md: -------------------------------------------------------------------------------- 1 | ## 4.8 Summary 2 | 3 | 4 | 5 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-4-evaluation-metrics-for-classification) 6 | 7 | 8 | ## Notes 9 | 10 | General definitions: 11 | 12 | * **Metric:** A single number that describes the performance of a model 13 | * **Accuracy:** Fraction of correct answers; sometimes misleading 14 | * Precision and recall are less misleading when we have class imbalance 15 | * **ROC Curve:** A way to evaluate the performance at all thresholds; okay to use with imbalance 16 | * **K-Fold CV:** More reliable estimate for performance (mean + std) 17 | 18 | In brief, this weeks was about different metrics to evaluate a binary classifier. These measures included accuracy, confusion table, precision, recall, ROC curves(TPR, FPR, random model, and ideal model), and AUROC. Also, we talked about a different way to estimate the performance of the model and make the parameter tuning with cross-validation. 19 | 20 | The code of this project is available in [this jupyter notebook](https://github.com/alexeygrigorev/mlbookcamp-code/blob/master/course-zoomcamp/04-evaluation/notebook.ipynb). 21 | 22 | Add notes from the video (PRs are welcome) 23 | 24 | 25 | 26 | 27 | 31 | 32 |
⚠️ 28 | The notes are written by the community.
29 | If you see an error here, please create a PR with a fix. 30 |
33 | 34 | - [Notes from Maximilien Eyengue](https://github.com/maxim-eyengue/Python-Codes/blob/main/ML_Zoomcamp_2024/04_evaluation/Summary_Session_04.md) 35 | 36 | ## Navigation 37 | 38 | * [Machine Learning Zoomcamp course](../) 39 | * [Session 4: Evaluation Metrics for Classification](./) 40 | * Previous: [Cross-Validation](07-cross-validation.md) 41 | * Next: [Explore more](09-explore-more.md) 42 | -------------------------------------------------------------------------------- /04-evaluation/09-explore-more.md: -------------------------------------------------------------------------------- 1 | ## 4.9 Explore more 2 | 3 | * Check the precision and recall of the dummy classifier that always predict "FALSE" 4 | * F1 score = 2 P R / (P + R) 5 | * Evaluate precision and recall at different thresholds, plot P vs R - this way you'll get the precision/recall curve (similar to ROC curve) 6 | * Area under the PR curve is also a useful metric 7 | 8 | 9 | ### Other projects 10 | 11 | * Calculate the metrics for the suggested datasets from the previous week 12 | 13 | 14 | ## Navigation 15 | 16 | * [Machine Learning Zoomcamp course](../) 17 | * [Session 4: Evaluation Metrics for Classification](./) 18 | * Previous: [Summary](08-summary.md) 19 | * Next: [Homework](homework.md) -------------------------------------------------------------------------------- /04-evaluation/README.md: -------------------------------------------------------------------------------- 1 | ## 4. Evaluation Metrics for Classification 2 | 3 | - 4.1 [Evaluation metrics: session overview](01-overview.md) 4 | - 4.2 [Accuracy and dummy model](02-accuracy.md) 5 | - 4.3 [Confusion table](03-confusion-table.md) 6 | - 4.4 [Precision and Recall](04-precision-recall.md) 7 | - 4.5 [ROC Curves](05-roc.md) 8 | - 4.6 [ROC AUC](06-auc.md) 9 | - 4.7 [Cross-Validation](07-cross-validation.md) 10 | - 4.8 [Summary](08-summary.md) 11 | - 4.9 [Explore more](09-explore-more.md) 12 | - 4.10 [Homework](homework.md) 13 | 14 | ## Community notes 15 | 16 | Did you take notes? You can share them here (or in each unit separately) 17 | * [Some cross-validation methods](https://github.com/razekmaiden/ml_zoomcamp/blob/main/additional_topics/ML_ZOOMCAMP_CROSS_VALIDATION_METHODS.ipynb) 18 | * [Notes from Kwang Yang](https://www.kaggle.com/kwangyangchia/notebook-for-lesson-4-mle) 19 | * [Notes from Sebastián Ayala Ruano](https://github.com/sayalaruano/100DaysOfMLCode/blob/main/Classification/Notes/NotesDay14.md) 20 | * [Notes from Alvaro Navas](https://github.com/ziritrion/ml-zoomcamp/blob/main/notes/04_01_classification_eval_metrics.md) 21 | * [Notes from froukje](https://github.com/froukje/ml-zoomcamp/blob/main/week4/Lecture_4_metrics.ipynb) 22 | * [Notes from Hareesh Tummala](https://github.com/tummala-hareesh/ml_zoomcamp_ht/blob/main/notes/week-4-notes.md) 23 | * [Notes from Memoona Tahira](https://github.com/MemoonaTahira/MLZoomcamp2022/tree/main/Notes/Week_4%20-evaluation_metrics_for_ML_model) 24 | * [Notes from Peter Ernicke](https://knowmledge.com/category/courses/ml-zoomcamp/evaluation-metrics/) 25 | * [Notes from Kemal Dahha](https://github.com/kemaldahha/machine-learning-course/blob/main/week_4_notes.ipynb) 26 | * Add your notes here 27 | 28 | -------------------------------------------------------------------------------- /04-evaluation/homework.md: -------------------------------------------------------------------------------- 1 | ## Homework 2 | 3 | * For 2024 cohort homework, check [the 2024 cohort folder](../cohorts/2024/04-evaluation/homework.md) 4 | * For 2023 cohort homework, check [the 2023 cohort folder](../cohorts/2023/04-evaluation/homework.md) 5 | * For 2022 cohort homework, check [the 2022 cohort folder](../cohorts/2022/04-evaluation/homework.md) 6 | * For 2021 cohort homework and solution, check [the 2021 cohort folder](../cohorts/2021/04-evaluation/) 7 | 8 | 9 | ## Navigation 10 | 11 | * [Machine Learning Zoomcamp course](../) 12 | * [Session 4: Evaluation Metrics for Classification](./) 13 | * Previous: [Explore more](09-explore-more.md) 14 | -------------------------------------------------------------------------------- /04-evaluation/images/TPR_FPR.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/04-evaluation/images/TPR_FPR.png -------------------------------------------------------------------------------- /04-evaluation/images/classification_metrics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/04-evaluation/images/classification_metrics.png -------------------------------------------------------------------------------- /04-evaluation/images/confusion_matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/04-evaluation/images/confusion_matrix.png -------------------------------------------------------------------------------- /04-evaluation/images/thumbnail-4-01.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/04-evaluation/images/thumbnail-4-01.jpg -------------------------------------------------------------------------------- /04-evaluation/images/thumbnail-4-02.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/04-evaluation/images/thumbnail-4-02.jpg -------------------------------------------------------------------------------- /04-evaluation/images/thumbnail-4-03.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/04-evaluation/images/thumbnail-4-03.jpg -------------------------------------------------------------------------------- /04-evaluation/images/thumbnail-4-04.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/04-evaluation/images/thumbnail-4-04.jpg -------------------------------------------------------------------------------- /04-evaluation/images/thumbnail-4-05-cont.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/04-evaluation/images/thumbnail-4-05-cont.jpg -------------------------------------------------------------------------------- /04-evaluation/images/thumbnail-4-05.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/04-evaluation/images/thumbnail-4-05.jpg -------------------------------------------------------------------------------- /04-evaluation/images/thumbnail-4-06.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/04-evaluation/images/thumbnail-4-06.jpg -------------------------------------------------------------------------------- /04-evaluation/images/thumbnail-4-07.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/04-evaluation/images/thumbnail-4-07.jpg -------------------------------------------------------------------------------- /04-evaluation/images/thumbnail-4-08.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/04-evaluation/images/thumbnail-4-08.jpg -------------------------------------------------------------------------------- /04-evaluation/meta.csv: -------------------------------------------------------------------------------- 1 | lesson,name,page_name,video,slides,notebook 2 | 1,Evaluation metrics: session overview,01-overview.md,https://www.youtube.com/watch?v=gmg5jw1bM8A,https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-4-evaluation-metrics-for-classification,notebook.ipynb 3 | 2,Accuracy and dummy model,02-accuracy.md,https://www.youtube.com/watch?v=FW_l7lB0HUI,https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-4-evaluation-metrics-for-classification,notebook.ipynb 4 | 3,Confusion table,03-confusion-table.md,https://www.youtube.com/watch?v=Jt2dDLSlBng,https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-4-evaluation-metrics-for-classification,notebook.ipynb 5 | 4,Precision and Recall,04-precision-recall.md,https://www.youtube.com/watch?v=gRLP_mlglMM,https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-4-evaluation-metrics-for-classification,notebook.ipynb 6 | 5,ROC Curves,05-roc.md,https://www.youtube.com/watch?v=dnBZLk53sQI,https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-4-evaluation-metrics-for-classification,notebook.ipynb 7 | 6,ROC AUC,06-auc.md,https://www.youtube.com/watch?v=hvIQPAwkVZo,https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-4-evaluation-metrics-for-classification,notebook.ipynb 8 | 7,Cross-Validation,07-cross-validation.md,https://www.youtube.com/watch?v=BIIZaVtUbf4,https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-4-evaluation-metrics-for-classification,notebook.ipynb 9 | 8,Summary,08-summary.md,https://www.youtube.com/watch?v=-v8XEQ2AHvQ,https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-4-evaluation-metrics-for-classification,notebook.ipynb 10 | 9,Explore more,09-explore-more.md,,,notebook.ipynb 11 | 10,Homework,homework.md,,,notebook.ipynb -------------------------------------------------------------------------------- /04-evaluation/meta.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": "meta.csv", 3 | "session": 4, 4 | "name": "Evaluation Metrics for Classification" 5 | } -------------------------------------------------------------------------------- /05-deployment/08-summary.md: -------------------------------------------------------------------------------- 1 | ## 5.8 Summary 2 | 3 | 4 | 5 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-5-model-deployment) 6 | 7 | 8 | ## Notes 9 | In this chapter we learned these topics: 10 | - We learned how to save the model and load it to re-use it without running the previous code. 11 | - How to deploy the model in a web service. 12 | - How to create a virtual environment. 13 | - How to create a container and run our code in any operating systems. 14 | - How to deploy our code in a public web service and access it externally from outside a local computer. 15 | 16 | In the next chapter we will learn the algorithms such as Decision trees, Random forests and Gradient boosting as an alternative way of combining decision tress. 17 | 18 | Add notes from the video (PRs are welcome) 19 | 20 | 21 | 22 | 23 | 24 | 28 | 29 |
⚠️ 25 | The notes are written by the community.
26 | If you see an error here, please create a PR with a fix. 27 |
30 | 31 | ## Navigation 32 | 33 | * [Machine Learning Zoomcamp course](../) 34 | * [Session 5: Deploying Machine Learning Models](./) 35 | * Previous: [Deployment to the cloud: AWS Elastic Beanstalk (optional)](07-aws-eb.md) 36 | * Next: [Explore more](09-explore-more.md) 37 | -------------------------------------------------------------------------------- /05-deployment/09-explore-more.md: -------------------------------------------------------------------------------- 1 | 2 | ## 5.9 Explore more 3 | 4 | * Flask is not the only framework for creating web services. Try others, e.g. FastAPI. 5 | * Experiment with other ways of managing environment, e.g. virtual env, conda, poetry. 6 | * Explore other ways of deploying web services, e.g. GCP, Azure, Heroku, Python Anywhere, etc. 7 | 8 | 9 | Add notes from the video (PRs are welcome) 10 | 11 | 12 | 13 | 14 | 15 | 19 | 20 |
⚠️ 16 | The notes are written by the community.
17 | If you see an error here, please create a PR with a fix. 18 |
21 | 22 | 23 | ## Navigation 24 | 25 | * [Machine Learning Zoomcamp course](../) 26 | * [Session 5: Deploying Machine Learning Models](./) 27 | * Previous: [Summary](08-summary.md) 28 | * Next: [Homework](homework.md) 29 | -------------------------------------------------------------------------------- /05-deployment/code/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8.12-slim 2 | 3 | RUN pip install pipenv 4 | 5 | WORKDIR /app 6 | 7 | COPY ["Pipfile", "Pipfile.lock", "./"] 8 | 9 | RUN pipenv install --system --deploy 10 | 11 | COPY ["predict.py", "model_C=1.0.bin", "./"] 12 | 13 | EXPOSE 9696 14 | 15 | ENTRYPOINT ["gunicorn", "--bind=0.0.0.0:9696", "predict:app"] -------------------------------------------------------------------------------- /05-deployment/code/Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | numpy = "*" 8 | scikit-learn = "==0.24.2" 9 | flask = "*" 10 | gunicorn = "*" 11 | 12 | [dev-packages] 13 | 14 | [requires] 15 | python_version = "3.8" 16 | -------------------------------------------------------------------------------- /05-deployment/code/model_C=1.0.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/05-deployment/code/model_C=1.0.bin -------------------------------------------------------------------------------- /05-deployment/code/ping.py: -------------------------------------------------------------------------------- 1 | from flask import Flask 2 | 3 | app = Flask('ping') 4 | 5 | @app.route('/ping', methods=['GET']) 6 | def ping(): 7 | return "PONG" 8 | 9 | if __name__ == "__main__": 10 | app.run(debug=True, host='0.0.0.0', port=9696) -------------------------------------------------------------------------------- /05-deployment/code/predict-test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | import requests 5 | 6 | 7 | url = 'http://localhost:9696/predict' 8 | 9 | customer_id = 'xyz-123' 10 | customer = { 11 | "gender": "female", 12 | "seniorcitizen": 0, 13 | "partner": "yes", 14 | "dependents": "no", 15 | "phoneservice": "no", 16 | "multiplelines": "no_phone_service", 17 | "internetservice": "dsl", 18 | "onlinesecurity": "no", 19 | "onlinebackup": "yes", 20 | "deviceprotection": "no", 21 | "techsupport": "no", 22 | "streamingtv": "no", 23 | "streamingmovies": "no", 24 | "contract": "month-to-month", 25 | "paperlessbilling": "yes", 26 | "paymentmethod": "electronic_check", 27 | "tenure": 24, 28 | "monthlycharges": 29.85, 29 | "totalcharges": (24 * 29.85) 30 | } 31 | 32 | 33 | response = requests.post(url, json=customer).json() 34 | print(response) 35 | 36 | if response['churn'] == True: 37 | print('sending promo email to %s' % customer_id) 38 | else: 39 | print('not sending promo email to %s' % customer_id) -------------------------------------------------------------------------------- /05-deployment/code/predict.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | from flask import Flask 4 | from flask import request 5 | from flask import jsonify 6 | 7 | 8 | model_file = 'model_C=1.0.bin' 9 | 10 | with open(model_file, 'rb') as f_in: 11 | dv, model = pickle.load(f_in) 12 | 13 | app = Flask('churn') 14 | 15 | @app.route('/predict', methods=['POST']) 16 | def predict(): 17 | customer = request.get_json() 18 | 19 | X = dv.transform([customer]) 20 | y_pred = model.predict_proba(X)[0, 1] 21 | churn = y_pred >= 0.5 22 | 23 | result = { 24 | 'churn_probability': float(y_pred), 25 | 'churn': bool(churn) 26 | } 27 | 28 | return jsonify(result) 29 | 30 | 31 | if __name__ == "__main__": 32 | app.run(debug=True, host='0.0.0.0', port=9696) -------------------------------------------------------------------------------- /05-deployment/homework.md: -------------------------------------------------------------------------------- 1 | ## Homework 2 | 3 | 4 | * For 2024 cohort homework, check [the 2024 cohort folder](../cohorts/2024/) 5 | * For 2023 cohort homework, check [the 2023 cohort folder](../cohorts/2023/) 6 | * For 2022 cohort homework, check [the 2022 cohort folder](../cohorts/2022/) 7 | * For 2021 cohort homework and solution, check [the 2021 cohort folder](../cohorts/2021/05-deployment/) 8 | 9 | 10 | ## Navigation 11 | 12 | * [Machine Learning Zoomcamp course](../) 13 | * [Session 5: Deploying Machine Learning Models](./) 14 | * Previous: [Explore more](09-explore-more.md) 15 | -------------------------------------------------------------------------------- /05-deployment/images/thumbnail-5-01.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/05-deployment/images/thumbnail-5-01.jpg -------------------------------------------------------------------------------- /05-deployment/images/thumbnail-5-02.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/05-deployment/images/thumbnail-5-02.jpg -------------------------------------------------------------------------------- /05-deployment/images/thumbnail-5-03.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/05-deployment/images/thumbnail-5-03.jpg -------------------------------------------------------------------------------- /05-deployment/images/thumbnail-5-04.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/05-deployment/images/thumbnail-5-04.jpg -------------------------------------------------------------------------------- /05-deployment/images/thumbnail-5-05.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/05-deployment/images/thumbnail-5-05.jpg -------------------------------------------------------------------------------- /05-deployment/images/thumbnail-5-06.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/05-deployment/images/thumbnail-5-06.jpg -------------------------------------------------------------------------------- /05-deployment/images/thumbnail-5-07.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/05-deployment/images/thumbnail-5-07.jpg -------------------------------------------------------------------------------- /05-deployment/images/thumbnail-5-08.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/05-deployment/images/thumbnail-5-08.jpg -------------------------------------------------------------------------------- /05-deployment/meta.csv: -------------------------------------------------------------------------------- 1 | lesson,name,page_name,video,slides 2 | 1,Intro / Session overview,01-intro.md,https://www.youtube.com/watch?v=agIFak9A3m8,https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-5-model-deployment 3 | 2,Saving and loading the model,02-pickle.md,https://www.youtube.com/watch?v=EJpqZ7OlwFU,https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-5-model-deployment 4 | 3,Web services: introduction to Flask,03-flask-intro.md,https://www.youtube.com/watch?v=W7ubna1Rfv8,https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-5-model-deployment 5 | 4,Serving the churn model with Flask,04-flask-deployment.md,https://www.youtube.com/watch?v=Q7ZWPgPnRz8,https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-5-model-deployment 6 | 5,Python virtual environment: Pipenv,05-pipenv.md,https://www.youtube.com/watch?v=BMXh8JGROHM,https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-5-model-deployment 7 | 6,Environment management: Docker,06-docker.md,https://www.youtube.com/watch?v=wAtyYZ6zvAs,https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-5-model-deployment 8 | 7,Deployment to the cloud: AWS Elastic Beanstalk (optional),07-aws-eb.md,https://www.youtube.com/watch?v=HGPJ4ekhcLg,https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-5-model-deployment 9 | 8,Summary,08-summary.md,https://www.youtube.com/watch?v=sSAqYSk7Br4,https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-5-model-deployment 10 | 9,Explore more,09-explore-more.md,, 11 | 10,Homework,homework.md,, -------------------------------------------------------------------------------- /05-deployment/meta.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": "meta.csv", 3 | "session": 5, 4 | "name": "Deploying Machine Learning Models" 5 | } -------------------------------------------------------------------------------- /06-trees/01-credit-risk.md: -------------------------------------------------------------------------------- 1 | 2 | ## 6.1 Credit risk scoring project 3 | 4 | 5 | 6 | 7 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-6-decision-trees-and-ensemble-learning) 8 | 9 | 10 | ## Notes 11 | 12 | In this session we'll learn about decision trees and ensemble learning algorithms. The questions that we try to address this week are, "What are decision trees? How are they different from ensemble algorithms? How can we implement and fine-tune these models to make binary classification predictions?" 13 | 14 | To be specific, we'll use [credit scoring data](https://github.com/gastonstat/CreditScoring) to build a model that predicts whether a bank should lend loan to a client or not. The bank takes these decisions based on the historical record. 15 | 16 | In the credit scoring classification problem, 17 | - if the model returns 0, this means, the client is very likely to payback the loan and the bank will approve the loan. 18 | - if the model returns 1, then the client is considered as a `defaulter` and the bank may not approve the loan. 19 | 20 | Add notes from the video (PRs are welcome) 21 | 22 | 23 | 24 | 25 | 26 | 30 | 31 |
⚠️ 27 | The notes are written by the community.
28 | If you see an error here, please create a PR with a fix. 29 |
32 | 33 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/10/16/ml-zoomcamp-2023-decision-trees-and-ensemble-learning-part-1/) 34 | 35 | ## Navigation 36 | 37 | * [Machine Learning Zoomcamp course](../) 38 | * [Session 6: Decision Trees and Ensemble Learning](./) 39 | * Next: [Data cleaning and preparation](02-data-prep.md) 40 | -------------------------------------------------------------------------------- /06-trees/09-final-model.md: -------------------------------------------------------------------------------- 1 | ## 6.9 Selecting the best model 2 | 3 | 4 | 5 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-6-decision-trees-and-ensemble-learning) 6 | 7 | 8 | ## Notes 9 | 10 | We select the final model from decision tree, random forest, or xgboost based on the best auc scores. After that we prepare the `df_full_train` and `df_test` to train and evaluate the final model. If there is not much difference between model auc scores on the train as well as test data then the model has generalized the patterns well enough. 11 | 12 | Generally, XGBoost models perform better on tabular data than other machine learning models but the downside is that these model are easy to overfit cause of the high number of hyperparameter. Therefore, XGBoost models require a lot more attention for parameters tuning to optimize them. 13 | 14 | Add notes from the video (PRs are welcome) 15 | 16 | 17 | 18 | 19 | 20 | 24 | 25 |
⚠️ 21 | The notes are written by the community.
22 | If you see an error here, please create a PR with a fix. 23 |
26 | 27 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/10/29/ml-zoomcamp-2023-decision-trees-and-ensemble-learning-part-14/) 28 | 29 | ## Navigation 30 | 31 | * [Machine Learning Zoomcamp course](../) 32 | * [Session 6: Decision Trees and Ensemble Learning](./) 33 | * Previous: [XGBoost parameter tuning](08-xgb-tuning.md) 34 | * Next: [Summary](10-summary.md) 35 | -------------------------------------------------------------------------------- /06-trees/10-summary.md: -------------------------------------------------------------------------------- 1 | ## 6.10 Summary 2 | 3 | 4 | 5 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-6-decision-trees-and-ensemble-learning) 6 | 7 | 8 | ## Notes 9 | 10 | - Decision trees learn if-then-else rules from data. 11 | - Finding the best split: select the least impure split. This algorithm can overfit, that's why we control it by limiting the max depth and the size of the group. 12 | - Random forest is a way of combining multiple decision trees. It should have a diverse set of models to make good predictions. 13 | - Gradient boosting trains model sequentially: each model tries to fix errors of the previous model. XGBoost is an implementation of gradient boosting. 14 | 15 | Add notes from the video (PRs are welcome) 16 | 17 | 18 | 19 | 20 | 21 | 25 | 26 |
⚠️ 22 | The notes are written by the community.
23 | If you see an error here, please create a PR with a fix. 24 |
27 | 28 | 29 | ## Navigation 30 | 31 | * [Machine Learning Zoomcamp course](../) 32 | * [Session 6: Decision Trees and Ensemble Learning](./) 33 | * Previous: [Selecting the best model](09-final-model.md) 34 | * Next: [Explore more](11-explore-more.md) 35 | -------------------------------------------------------------------------------- /06-trees/11-explore-more.md: -------------------------------------------------------------------------------- 1 | 2 | ## 6.11 Explore more 3 | 4 | * For this dataset we didn't do EDA or feature engineering. You can do it to get more insights into the problem. 5 | * For random forest, there are more parameters that we can tune. Check max_features and bootstrap. 6 | * There's a variation of random forest caled "extremely randomized trees", or "extra trees". Instead of selecting the best split among all possible thresholds, it selects a few thresholds randomly and picks the best one among them. Because of that extra trees never overfit. In Scikit-Learn, they are implemented in ExtraTreesClassifier. Try it for this project. 7 | * XGBoost can deal with NAs - we don't have to do fillna for it. Check if not filling NA's help improve performance. 8 | * Experiment with other XGBoost parameters: subsample and colsample_bytree. 9 | * When selecting the best split, decision trees find the most useful features. This information can be used for understanding which features are more important than otheres. See example here for random forest (it's the same for plain decision trees) and for xgboost 10 | * Trees can also be used for solving the regression problems: check DecisionTreeRegressor, RandomForestRegressor and the objective=reg:squarederror parameter for XGBoost. 11 | 12 | ## Notes 13 | 14 | Add notes from the video (PRs are welcome) 15 | 16 | 17 | 18 | 19 | 20 | 24 | 25 |
⚠️ 21 | The notes are written by the community.
22 | If you see an error here, please create a PR with a fix. 23 |
26 | 27 | 28 | ## Navigation 29 | 30 | * [Machine Learning Zoomcamp course](../) 31 | * [Session 6: Decision Trees and Ensemble Learning](./) 32 | * Previous: [Summary](10-summary.md) 33 | * Next: [Homework](homework.md) 34 | -------------------------------------------------------------------------------- /06-trees/README.md: -------------------------------------------------------------------------------- 1 | ## 6. Decision Trees and Ensemble Learning 2 | 3 | - 6.1 [Credit risk scoring project](01-credit-risk.md) 4 | - 6.2 [Data cleaning and preparation](02-data-prep.md) 5 | - 6.3 [Decision trees](03-decision-trees.md) 6 | - 6.4 [Decision tree learning algorithm](04-decision-tree-learning.md) 7 | - 6.5 [Decision trees parameter tuning](05-decision-tree-tuning.md) 8 | - 6.6 [Ensemble learning and random forest](06-random-forest.md) 9 | - 6.7 [Gradient boosting and XGBoost](07-boosting.md) 10 | - 6.8 [XGBoost parameter tuning](08-xgb-tuning.md) 11 | - 6.9 [Selecting the best model](09-final-model.md) 12 | - 6.10 [Summary](10-summary.md) 13 | - 6.11 [Explore more](11-explore-more.md) 14 | - 6.12 [Homework](homework.md) 15 | 16 | 17 | ## Community notes 18 | 19 | Did you take notes? You can share them here (or in each unit separately) 20 | 21 | * [Kwang Yang's Notes](https://www.kaggle.com/kwangyangchia/notebook-for-lesson-6-mle) 22 | * [Alvaro Navas' Notes](https://github.com/ziritrion/ml-zoomcamp/blob/main/notes/06_trees.md) 23 | * [Notes from froukje](https://github.com/froukje/ml-zoomcamp/blob/main/week6/Lecture_6_Decision_Trees_Ensemble_Learning.ipynb) 24 | * [Decision Trees and Ensembled Learning by Oscar Garcia](https://github.com/ozkary/machine-learning-engineering/tree/main/06-trees) 25 | * [Notes from Peter Ernicke](https://knowmledge.com/category/courses/ml-zoomcamp/decision-trees/) 26 | * [Notes from Maximilien Eyengue](https://github.com/maxim-eyengue/Python-Codes/blob/main/ML_Zoomcamp_2024/06_trees/Summary_Session_06.md) 27 | * [Notes from Kemal Dahha](https://github.com/kemaldahha/machine-learning-course/blob/main/week_6_notes.ipynb) 28 | * Add your notes here 29 | -------------------------------------------------------------------------------- /06-trees/homework.md: -------------------------------------------------------------------------------- 1 | ## Homework 2 | 3 | * For 2024 cohort homework, check [the 2024 cohort folder](../cohorts/2024/06-trees/homework.md) 4 | * For 2023 cohort homework, check [the 2023 cohort folder](../cohorts/2023/) 5 | * For 2022 cohort homework, check [the 2022 cohort folder](../cohorts/2022/) 6 | * For 2021 cohort homework and solution, check [the 2021 cohort folder](../cohorts/2021/06-trees/) 7 | 8 | 9 | ## Navigation 10 | 11 | * [Machine Learning Zoomcamp course](../) 12 | * [Session 6: Decision Trees and Ensemble Learning](./) 13 | * Previous: [Explore more](11-explore-more.md) 14 | -------------------------------------------------------------------------------- /06-trees/images/thumbnail-6-01.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/06-trees/images/thumbnail-6-01.jpg -------------------------------------------------------------------------------- /06-trees/images/thumbnail-6-02.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/06-trees/images/thumbnail-6-02.jpg -------------------------------------------------------------------------------- /06-trees/images/thumbnail-6-03.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/06-trees/images/thumbnail-6-03.jpg -------------------------------------------------------------------------------- /06-trees/images/thumbnail-6-04.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/06-trees/images/thumbnail-6-04.jpg -------------------------------------------------------------------------------- /06-trees/images/thumbnail-6-05.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/06-trees/images/thumbnail-6-05.jpg -------------------------------------------------------------------------------- /06-trees/images/thumbnail-6-06.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/06-trees/images/thumbnail-6-06.jpg -------------------------------------------------------------------------------- /06-trees/images/thumbnail-6-07.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/06-trees/images/thumbnail-6-07.jpg -------------------------------------------------------------------------------- /06-trees/images/thumbnail-6-08.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/06-trees/images/thumbnail-6-08.jpg -------------------------------------------------------------------------------- /06-trees/images/thumbnail-6-09.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/06-trees/images/thumbnail-6-09.jpg -------------------------------------------------------------------------------- /06-trees/images/thumbnail-6-10.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/06-trees/images/thumbnail-6-10.jpg -------------------------------------------------------------------------------- /06-trees/images/thumbnail-6-12.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/06-trees/images/thumbnail-6-12.jpg -------------------------------------------------------------------------------- /06-trees/meta.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": "meta.csv", 3 | "session": 6, 4 | "name": "Decision Trees and Ensemble Learning" 5 | } -------------------------------------------------------------------------------- /08-deep-learning/07-checkpointing.md: -------------------------------------------------------------------------------- 1 | ## 8.7 Checkpointing 2 | 3 | 4 | 5 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-8-neural-networks-and-deep-learning-250592316) 6 | 7 | 8 | `ModelCheckpoint` callback is used with training the model to save a model or weights in a checkpoint file at some interval, so the model or weights can be loaded later to continue the training from the state saved or to use for deployment. 9 | 10 | **Classes, function, and attributes**: 11 | 12 | - `keras.callbacks.ModelCheckpoint`: ModelCheckpoint class from keras callbacks api 13 | - `filepath`: path to save the model file 14 | - `monitor`: the metric name to monitor 15 | - `save_best_only`: only save when the model is considered the best according to the metric provided in `monitor` 16 | - `mode`: overwrite the save file based on either maximum or the minimum scores according the metric provided in `monitor` 17 | 18 | ## Notes 19 | 20 | Add notes from the video (PRs are welcome) 21 | 22 | * checkpointing saves the model after each training iteration 23 | * checkpoint conditions may include reaching the best performance 24 | * keras callbacks 25 | 26 | 27 | 28 | 29 | 33 | 34 |
⚠️ 30 | The notes are written by the community.
31 | If you see an error here, please create a PR with a fix. 32 |
35 | 36 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/11/24/ml-zoomcamp-2023-deep-learning-part-9/) 37 | 38 | ## Navigation 39 | 40 | * [Machine Learning Zoomcamp course](../) 41 | * [Session 8: Neural Networks and Deep Learning](./) 42 | * Previous: [Adjusting the learning rate](06-learning-rate.md) 43 | * Next: [Adding more layers](08-more-layers.md) 44 | -------------------------------------------------------------------------------- /08-deep-learning/11-large-model.md: -------------------------------------------------------------------------------- 1 | ## 8.11 Training a larger model 2 | 3 | 4 | 5 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-8-neural-networks-and-deep-learning-250592316) 6 | 7 | 8 | In this section we increase the image input size from `150` to `299`, reduce the amount of data augmentation parameters and lower the learning rate. This gives us the best results than any previous experiments. 9 | 10 | ## Notes 11 | 12 | Add notes from the video (PRs are welcome) 13 | 14 | 15 | 16 | 17 | 18 | 22 | 23 |
⚠️ 19 | The notes are written by the community.
20 | If you see an error here, please create a PR with a fix. 21 |
24 | 25 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/11/28/ml-zoomcamp-2023-deep-learning-part-13/) 26 | 27 | ## Navigation 28 | 29 | * [Machine Learning Zoomcamp course](../) 30 | * [Session 8: Neural Networks and Deep Learning](./) 31 | * Previous: [Data augmentation](10-augmentation.md) 32 | * Next: [Using the model](12-using-model.md) 33 | -------------------------------------------------------------------------------- /08-deep-learning/13-summary.md: -------------------------------------------------------------------------------- 1 | ## 8.13 Summary 2 | 3 | 4 | 5 | - We can use pre-trained models for general image classification 6 | - Convolutional layers let us turn an image into a vector 7 | - Dense layers use the vector to make the predictions 8 | - Instead of training a model from scratch, we can use transfer learning and re-use already trained convolutional layers 9 | - First, train a small model (150x150) before training a big one (299x299) 10 | - Learning rate - how fast the model trains. Fast learner aren't always best ones 11 | - We can save the best model using callbacks and checkpointing 12 | - To avoid overfitting, use dropout and augmentation 13 | 14 | ## Notes 15 | 16 | Add notes from the video (PRs are welcome) 17 | 18 | 19 | 20 | 21 | 22 | 26 | 27 |
⚠️ 23 | The notes are written by the community.
24 | If you see an error here, please create a PR with a fix. 25 |
28 | 29 | 30 | ## Navigation 31 | 32 | * [Machine Learning Zoomcamp course](../) 33 | * [Session 8: Neural Networks and Deep Learning](./) 34 | * Previous: [Using the model](12-using-model.md) 35 | * Next: [Explore more](14-explore-more.md) -------------------------------------------------------------------------------- /08-deep-learning/14-explore-more.md: -------------------------------------------------------------------------------- 1 | ## 8.14 Explore more 2 | 3 | **TODO** 4 | 5 | - Add more data, e.g, Zalando etc 6 | - Albumentations - another way of generating augmentations 7 | - Use PyTorch or MXNet instead of TensorFlow/Keras 8 | - In addition to Xception, there are others architectures - try them 9 | 10 | **Other projects:** 11 | 12 | - cats vs dogs 13 | - Hotdog vs not hotdog 14 | - Category of images 15 | 16 | ## Navigation 17 | 18 | * [Machine Learning Zoomcamp course](../) 19 | * [Session 8: Neural Networks and Deep Learning](./) 20 | * Previous: [Summary](13-summary.md) 21 | * Next: [Homework](homework.md) -------------------------------------------------------------------------------- /08-deep-learning/homework.md: -------------------------------------------------------------------------------- 1 | ## Homework 2 | * For 2024 cohort homework, check [the 2024 cohort folder](../cohorts/2024/08-deep-learning/) 3 | * For 2023 cohort homework, check [the 2023 cohort folder](../cohorts/2023/08-deep-learning/) 4 | * For 2022 cohort homework, check [the 2022 cohort folder](../cohorts/2022/) 5 | * For 2021 cohort homework and solution, check [the 2021 cohort folder](../cohorts/2021/08-deep-learning/) 6 | 7 | 8 | ## Navigation 9 | 10 | * [Machine Learning Zoomcamp course](../) 11 | * [Session 8: Neural Networks and Deep Learning](./) 12 | * Previous: [Explore more](14-explore-more.md) 13 | -------------------------------------------------------------------------------- /08-deep-learning/images/thumbnail-8-01.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/08-deep-learning/images/thumbnail-8-01.jpg -------------------------------------------------------------------------------- /08-deep-learning/images/thumbnail-8-01b.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/08-deep-learning/images/thumbnail-8-01b.jpg -------------------------------------------------------------------------------- /08-deep-learning/images/thumbnail-8-02.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/08-deep-learning/images/thumbnail-8-02.jpg -------------------------------------------------------------------------------- /08-deep-learning/images/thumbnail-8-03.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/08-deep-learning/images/thumbnail-8-03.jpg -------------------------------------------------------------------------------- /08-deep-learning/images/thumbnail-8-04.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/08-deep-learning/images/thumbnail-8-04.jpg -------------------------------------------------------------------------------- /08-deep-learning/images/thumbnail-8-05.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/08-deep-learning/images/thumbnail-8-05.jpg -------------------------------------------------------------------------------- /08-deep-learning/images/thumbnail-8-06.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/08-deep-learning/images/thumbnail-8-06.jpg -------------------------------------------------------------------------------- /08-deep-learning/images/thumbnail-8-07.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/08-deep-learning/images/thumbnail-8-07.jpg -------------------------------------------------------------------------------- /08-deep-learning/images/thumbnail-8-08.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/08-deep-learning/images/thumbnail-8-08.jpg -------------------------------------------------------------------------------- /08-deep-learning/images/thumbnail-8-09.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/08-deep-learning/images/thumbnail-8-09.jpg -------------------------------------------------------------------------------- /08-deep-learning/images/thumbnail-8-10.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/08-deep-learning/images/thumbnail-8-10.jpg -------------------------------------------------------------------------------- /08-deep-learning/images/thumbnail-8-11.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/08-deep-learning/images/thumbnail-8-11.jpg -------------------------------------------------------------------------------- /08-deep-learning/images/thumbnail-8-12.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/08-deep-learning/images/thumbnail-8-12.jpg -------------------------------------------------------------------------------- /08-deep-learning/images/thumbnail-8-13.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/08-deep-learning/images/thumbnail-8-13.jpg -------------------------------------------------------------------------------- /08-deep-learning/meta.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": "meta.csv", 3 | "session": 8, 4 | "name": "Neural Networks and Deep Learning" 5 | } -------------------------------------------------------------------------------- /09-serverless/01-intro.md: -------------------------------------------------------------------------------- 1 | 2 | ## 9.1 Introduction to Serverless 3 | 4 | 5 | 6 | In the last session, we built and trained a clothes classification deep learning model using `Keras` and `TensorFlow`. This session focuses on deploying it. The model categorizes images of clothing items (e.g., 👕 t-shirts, 👖 pants, etc.) uploaded by users on a website. Deployment will be done using **AWS Lambda**, a serverless solution to execute code without managing servers, and instead of `TensorFlow`, we will use `TensorFlow-lite`. 7 | 8 | Refer to [updates.md](updates.md) for info on running TF lite 9 | in 2024. 10 | 11 | 12 | ## Notes 13 | 14 | Add notes from the video (PRs are welcome) 15 | 16 | * introduction to the topic of the week: deploying a deep learning model to the cloud, aws lambda and tensorflow lite 17 | 18 | 19 | 20 | 21 | 25 | 26 |
⚠️ 22 | The notes are written by the community.
23 | If you see an error here, please create a PR with a fix. 24 |
27 | 28 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/11/30/ml-zoomcamp-2023-serverless-part-1/) 29 | 30 | ## Navigation 31 | 32 | * [Machine Learning Zoomcamp course](../) 33 | * [Session 9: Serverless Deep Learning](./) 34 | * Next: [AWS Lambda](02-aws-lambda.md) 35 | -------------------------------------------------------------------------------- /09-serverless/04-preparing-code.md: -------------------------------------------------------------------------------- 1 | 2 | ## 9.4 Preparing the code for Lambda 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | ## Notes 11 | 12 | Add notes from the video (PRs are welcome) 13 | 14 | 15 | 16 | 17 | 18 | 22 | 23 |
⚠️ 19 | The notes are written by the community.
20 | If you see an error here, please create a PR with a fix. 21 |
24 | 25 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/12/03/ml-zoomcamp-2023-serverless-part-4/) 26 | 27 | ## Navigation 28 | 29 | * [Machine Learning Zoomcamp course](../) 30 | * [Session 9: Serverless Deep Learning](./) 31 | * Previous: [TensorFlow Lite](03-tensorflow-lite.md) 32 | * Next: [Preparing a Docker image](05-docker-image.md) 33 | -------------------------------------------------------------------------------- /09-serverless/06-creating-lambda.md: -------------------------------------------------------------------------------- 1 | 2 | ## 9.6 Creating the lambda function 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | ## Notes 11 | 12 | Add notes from the video (PRs are welcome) 13 | 14 | 15 | 16 | 17 | 18 | 22 | 23 |
⚠️ 19 | The notes are written by the community.
20 | If you see an error here, please create a PR with a fix. 21 |
24 | 25 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/12/05/ml-zoomcamp-2023-serverless-part-6/) 26 | 27 | ## Navigation 28 | 29 | * [Machine Learning Zoomcamp course](../) 30 | * [Session 9: Serverless Deep Learning](./) 31 | * Previous: [Preparing a Docker image](05-docker-image.md) 32 | * Next: [API Gateway: exposing the lambda function](07-api-gateway.md) 33 | -------------------------------------------------------------------------------- /09-serverless/07-api-gateway.md: -------------------------------------------------------------------------------- 1 | 2 | ## 9.7 API Gateway: exposing the lambda function 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | ## Notes 11 | 12 | Add notes from the video (PRs are welcome) 13 | 14 | 15 | 16 | 17 | 18 | 22 | 23 |
⚠️ 19 | The notes are written by the community.
20 | If you see an error here, please create a PR with a fix. 21 |
24 | 25 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/12/06/ml-zoomcamp-2023-serverless-part-7/) 26 | 27 | ## Navigation 28 | 29 | * [Machine Learning Zoomcamp course](../) 30 | * [Session 9: Serverless Deep Learning](./) 31 | * Previous: [Creating the lambda function](06-creating-lambda.md) 32 | * Next: [Summary](08-summary.md) 33 | -------------------------------------------------------------------------------- /09-serverless/08-summary.md: -------------------------------------------------------------------------------- 1 | ## 9.8 Summary 2 | 3 | 4 | 5 | 6 | ## Notes 7 | 8 | Add notes from the video (PRs are welcome) 9 | 10 | 11 | 12 | 13 | 14 | 18 | 19 |
⚠️ 15 | The notes are written by the community.
16 | If you see an error here, please create a PR with a fix. 17 |
20 | 21 | 22 | ## Navigation 23 | 24 | * [Machine Learning Zoomcamp course](../) 25 | * [Session 9: Serverless Deep Learning](./) 26 | * Previous: [API Gateway: exposing the lambda function](07-api-gateway.md) 27 | * Next: [Explore more](09-explore-more.md) -------------------------------------------------------------------------------- /09-serverless/09-explore-more.md: -------------------------------------------------------------------------------- 1 | ## 9.9 Explore more 2 | 3 | * Try similar serverless services from Google Cloud and Microsoft Azure 4 | * Deploy cats vs dogs and other Keras models with AWS Lambda 5 | * AWS Lambda is also good for other libraries, not just Tensorflow. You can deploy Scikit-Learn and XGBoost models with it as well 6 | 7 | 8 | ## Navigation 9 | 10 | * [Machine Learning Zoomcamp course](../) 11 | * [Session 9: Serverless Deep Learning](./) 12 | * Previous: [Summary](08-summary.md) 13 | * Next: [Homework](homework.md) -------------------------------------------------------------------------------- /09-serverless/README.md: -------------------------------------------------------------------------------- 1 | ## 9. Serverless Deep Learning 2 | 3 | - 9.1 [Introduction to Serverless](01-intro.md) 4 | - 9.2 [AWS Lambda](02-aws-lambda.md) 5 | - 9.3 [TensorFlow Lite](03-tensorflow-lite.md) 6 | - 9.4 [Preparing the code for Lambda](04-preparing-code.md) 7 | - 9.5 [Preparing a Docker image](05-docker-image.md) 8 | - 9.6 [Creating the lambda function](06-creating-lambda.md) 9 | - 9.7 [API Gateway: exposing the lambda function](07-api-gateway.md) 10 | - 9.8 [Summary](08-summary.md) 11 | - 9.9 [Explore more](09-explore-more.md) 12 | - 9.10 [Homework](homework.md) 13 | 14 | 15 | 16 | ## Community notes 17 | 18 | Did you take notes? You can share them here (or in each unit separately) 19 | 20 | * [Alvaro Navas' Notes](https://github.com/ziritrion/ml-zoomcamp/blob/main/notes/09_serverless.md) 21 | * [Notes from froukje](https://github.com/froukje/ml-zoomcamp/blob/main/week9/Lecture_9_serverless.ipynb) 22 | * [Notes from Memoona Tahira](https://github.com/MemoonaTahira/MLZoomcamp2022/tree/main/Notes/Week_9-Serverless) 23 | * [Notes from Oscar Garcia](https://github.com/ozkary/machine-learning-engineering/tree/main/09-serverless) 24 | * [Notes from Peter Ernicke](https://knowmledge.com/category/courses/ml-zoomcamp/serverless-deployment/) 25 | * [Notes from Maximilien Eyengue](https://github.com/maxim-eyengue/Python-Codes/blob/main/ML_Zoomcamp_2024/09_serverless/Summary_Session_09.md) 26 | 27 | * Add your notes here 28 | -------------------------------------------------------------------------------- /09-serverless/code/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM public.ecr.aws/lambda/python:3.10 2 | 3 | RUN pip install keras-image-helper 4 | RUN pip install https://github.com/alexeygrigorev/tflite-aws-lambda/raw/main/tflite/tflite_runtime-2.14.0-cp310-cp310-linux_x86_64.whl 5 | 6 | COPY clothing-model.tflite . 7 | COPY lambda_function.py . 8 | 9 | CMD [ "lambda_function.lambda_handler" ] -------------------------------------------------------------------------------- /09-serverless/code/convert-model.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow import keras 3 | 4 | model = keras.models.load_model('clothing-model.h5') 5 | 6 | converter = tf.lite.TFLiteConverter.from_keras_model(model) 7 | 8 | tflite_model = converter.convert() 9 | 10 | with open('clothing-model.tflite', 'wb') as f_out: 11 | f_out.write(tflite_model) -------------------------------------------------------------------------------- /09-serverless/code/lambda_function.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | import tflite_runtime.interpreter as tflite 5 | from keras_image_helper import create_preprocessor 6 | 7 | 8 | preprocessor = create_preprocessor('xception', target_size=(299, 299)) 9 | 10 | 11 | interpreter = tflite.Interpreter(model_path='clothing-model.tflite') 12 | interpreter.allocate_tensors() 13 | 14 | input_index = interpreter.get_input_details()[0]['index'] 15 | output_index = interpreter.get_output_details()[0]['index'] 16 | 17 | 18 | classes = [ 19 | 'dress', 20 | 'hat', 21 | 'longsleeve', 22 | 'outwear', 23 | 'pants', 24 | 'shirt', 25 | 'shoes', 26 | 'shorts', 27 | 'skirt', 28 | 't-shirt' 29 | ] 30 | 31 | # url = 'http://bit.ly/mlbookcamp-pants' 32 | 33 | def predict(url): 34 | X = preprocessor.from_url(url) 35 | 36 | interpreter.set_tensor(input_index, X) 37 | interpreter.invoke() 38 | preds = interpreter.get_tensor(output_index) 39 | 40 | float_predictions = preds[0].tolist() 41 | 42 | return dict(zip(classes, float_predictions)) 43 | 44 | 45 | def lambda_handler(event, context): 46 | url = event['url'] 47 | result = predict(url) 48 | return result 49 | 50 | 51 | -------------------------------------------------------------------------------- /09-serverless/code/plan.md: -------------------------------------------------------------------------------- 1 | # 9. Serverless Deep Learning 2 | 3 | We'll deploy the clothes classification model we trained previously. 4 | 5 | ## 9.1 Introduction to Serverless 6 | 7 | * What we'll cover this week 8 | 9 | 10 | ## 9.2 AWS Lambda 11 | 12 | * Intro to AWS Lambda 13 | * Serverless vs serverfull 14 | 15 | 16 | ## 9.3 TensorFlow Lite 17 | 18 | * Why not TensorFlow 19 | * Converting the model 20 | * Using the TF-Lite model for making predictions 21 | 22 | 23 | ## 9.4 Preparing the Lambda code 24 | 25 | * Moving the code from notebook to script 26 | * Testing it locally 27 | 28 | 29 | ## 9.5 Preparing a Docker image 30 | 31 | * Lambda base images 32 | * Preparing the Dockerfile 33 | * Using the right TF-Lite wheel 34 | 35 | 36 | ## 9.6 Creating the lambda function 37 | 38 | * Publishing the image to AWS ECR 39 | * Creating the function 40 | * Configuring it 41 | * Testing the function from the AWS Console 42 | * Pricing 43 | 44 | 45 | ## 9.7 API Gateway: exposing the lambda function 46 | 47 | * Creating and configuring the gateway 48 | 49 | 50 | ## 9.8 Summary 51 | 52 | * AWS Lambda is way of deploying models without having to worry about servers 53 | * Tensorflow Lite is a lightweight alternative to Tensorflow that only focuses on inference 54 | * To deploy your code, package it in a Docker container 55 | * Expose the lambda function via API Gateway 56 | 57 | 58 | ## 9.9 Explore more 59 | 60 | * Try similar serverless services from Google Cloud and Microsoft Azure 61 | * Deploy cats vs dogs and other Keras models with AWS Lambda 62 | * AWS Lambda is also good for other libraries, not just Tensorflow. You can deploy Scikit-Learn and XGBoost models with it as well. 63 | -------------------------------------------------------------------------------- /09-serverless/code/test.py: -------------------------------------------------------------------------------- 1 | import requests 2 | 3 | url = 'http://localhost:8080/2015-03-31/functions/function/invocations' 4 | 5 | data = {'url': 'http://bit.ly/mlbookcamp-pants'} 6 | 7 | result = requests.post(url, json=data).json() 8 | print(result) 9 | -------------------------------------------------------------------------------- /09-serverless/homework.md: -------------------------------------------------------------------------------- 1 | ## Homework 2 | 3 | * For 2023 cohort homework, check [the 2023 cohort folder](../cohorts/2023/) 4 | * For 2022 cohort homework, check [the 2022 cohort folder](../cohorts/2022/) 5 | * For 2021 cohort homework and solution, check [the 2021 cohort folder](../cohorts/2021/09-serverless/) 6 | 7 | 8 | ## Navigation 9 | 10 | * [Machine Learning Zoomcamp course](../) 11 | * [Session 9: Serverless Deep Learning](./) 12 | * Previous: [Explore more](09-explore-more.md) -------------------------------------------------------------------------------- /09-serverless/images/thumbnail-9-01.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/09-serverless/images/thumbnail-9-01.jpg -------------------------------------------------------------------------------- /09-serverless/images/thumbnail-9-02.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/09-serverless/images/thumbnail-9-02.jpg -------------------------------------------------------------------------------- /09-serverless/images/thumbnail-9-03.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/09-serverless/images/thumbnail-9-03.jpg -------------------------------------------------------------------------------- /09-serverless/images/thumbnail-9-04.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/09-serverless/images/thumbnail-9-04.jpg -------------------------------------------------------------------------------- /09-serverless/images/thumbnail-9-05.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/09-serverless/images/thumbnail-9-05.jpg -------------------------------------------------------------------------------- /09-serverless/images/thumbnail-9-06.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/09-serverless/images/thumbnail-9-06.jpg -------------------------------------------------------------------------------- /09-serverless/images/thumbnail-9-07.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/09-serverless/images/thumbnail-9-07.jpg -------------------------------------------------------------------------------- /09-serverless/images/thumbnail-9-08.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/09-serverless/images/thumbnail-9-08.jpg -------------------------------------------------------------------------------- /09-serverless/meta.csv: -------------------------------------------------------------------------------- 1 | lesson,name,page_name,video,slides,notebook 2 | 1,Introduction to Serverless,01-intro.md,https://www.youtube.com/watch?v=JLIVwIsU6RA,, 3 | 2,AWS Lambda,02-aws-lambda.md,https://www.youtube.com/watch?v=_UX8-2WhHZo,, 4 | 3,TensorFlow Lite,03-tensorflow-lite.md,https://www.youtube.com/watch?v=OzZA4mSBE0Q,,code/tensorflow-model.ipynb 5 | 4,Preparing the code for Lambda,04-preparing-code.md,https://www.youtube.com/watch?v=XXBUivsHhec,, 6 | 5,Preparing a Docker image,05-docker-image.md,https://www.youtube.com/watch?v=y4_YQjfOsDo,, 7 | 6,Creating the lambda function,06-creating-lambda.md,https://www.youtube.com/watch?v=kBch5oD5BkY,, 8 | 7,API Gateway: exposing the lambda function,07-api-gateway.md,https://www.youtube.com/watch?v=wyZ9aqQOXvs,, 9 | 8,Summary,08-summary.md,https://www.youtube.com/watch?v=bu3nPiHCNLU,, 10 | 9,Explore more,09-explore-more.md,,, 11 | 10,Homework,homework.md,,, -------------------------------------------------------------------------------- /09-serverless/meta.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": "meta.csv", 3 | "session": 9, 4 | "name": "Serverless Deep Learning" 5 | } -------------------------------------------------------------------------------- /09-serverless/updates.md: -------------------------------------------------------------------------------- 1 | ## Python 3.12 vs TF Lite 2.17 2 | 3 | The latest versions of TF Lite don't support Python 3.12 yet. 4 | 5 | As a workaround, we can use the previous version of TF Lite 6 | to serve the models created by TensorFlow 2.17. We tested 7 | it with TF Lite 2.14 and the deep learning models we use 8 | in the course work successfully with this setup. 9 | 10 | Here's how you do it 11 | 12 | 13 | First, use Python 3.10. It means that you will need to use 14 | `public.ecr.aws/lambda/python:3.10` as the base image: 15 | 16 | ```docker 17 | FROM public.ecr.aws/lambda/python:3.10 18 | ``` 19 | 20 | Second, use numpy 1.23.1: 21 | 22 | ```docker 23 | RUN pip install numpy==1.23.1 24 | ``` 25 | 26 | When installing tf lite interpreter for AWS lambda, 27 | make sure you don't install dependencies with `--no-deps` flag: 28 | 29 | ```docker 30 | RUN pip install --no-deps https://github.com/alexeygrigorev/tflite-aws-lambda/raw/main/tflite/tflite_runtime-2.14.0-cp310-cp310-linux_x86_64.whl 31 | ``` 32 | 33 | If you don't do it, pip will try to upgdate the version of numpy 34 | and your code won't work (as the tflite runtime was compiled 35 | with numpy 1, not numpy 2). 36 | 37 | 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /10-kubernetes/01-overview.md: -------------------------------------------------------------------------------- 1 | 2 | ## 10.1 Overview 3 | 4 | 5 | 6 | 7 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-10-kubernetes) 8 | 9 | 10 | ## Notes 11 | 12 | Add notes from the video (PRs are welcome) 13 | 14 | * same use case as in the session before: classifying images of clothes 15 | * using tensorflow serving, written in C++, with focus on inference 16 | * gRPC binary protocol 17 | * deploying to kubernetes 18 | * 1st component: gateway (download image, resize, turn into numpy array - computationally not expensive - can be done with CPU) 19 | * 2nd component: model (matrix multiplications - computationally expensive - thus use GPU) 20 | * scaling the two components independently: i.e. 5 gateways handing images to 1 model 21 | * two components in two different docker container (lesson four) 22 | * kubernetes main concepts (lesson five) 23 | * running kubernetes on your local machine (lesson six) 24 | * deploy the two services to kubernetes (lesson seven) 25 | * move from local to cloud (lesson eight) 26 | 27 | 28 | 29 | 30 | 34 | 35 |
⚠️ 31 | The notes are written by the community.
32 | If you see an error here, please create a PR with a fix. 33 |
36 | 37 | 38 | ## Navigation 39 | 40 | * [Machine Learning Zoomcamp course](../) 41 | * [Session 10: Kubernetes and TensorFlow Serving](./) 42 | * Next: [TensorFlow Serving](02-tensorflow-serving.md) 43 | -------------------------------------------------------------------------------- /10-kubernetes/09-summary.md: -------------------------------------------------------------------------------- 1 | ## 10.9 Summary 2 | 3 | 4 | 5 | In the session 10 we covered the following concepts: 6 | 7 | - TF-Serving is a system for deploying TensorFlow models 8 | - When using TF-Serving, we need a component for pre-processing 9 | - Kubernetes is a container orchestration platform 10 | - To deploy something on Kubernetes, we need to specify a deployment and a service 11 | - You can use Docker compose and Kind for local experiments 12 | 13 | ## Notes 14 | 15 | Add notes from the video (PRs are welcome) 16 | 17 | 18 | 19 | 20 | 21 | 25 | 26 |
⚠️ 22 | The notes are written by the community.
23 | If you see an error here, please create a PR with a fix. 24 |
27 | 28 | 29 | ## Navigation 30 | 31 | * [Machine Learning Zoomcamp course](../) 32 | * [Session 10: Kubernetes and TensorFlow Serving](./) 33 | * Previous: [Deploying to EKS](08-eks.md) 34 | * Next: [Explore more](10-explore-more.md) -------------------------------------------------------------------------------- /10-kubernetes/10-explore-more.md: -------------------------------------------------------------------------------- 1 | 2 | ## 10.10 Explore more 3 | 4 | - Other local Kubernetes: minikube, k3d, k3s, microk8s, EKS Anywhere 5 | - [Rancher desktop](https://rancherdesktop.io/) 6 | - [Docker desktop](https://www.docker.com/products/docker-desktop/) 7 | - [Lens](https://k8slens.dev/) 8 | - Many cloud providers have Kubernetes: GCP, Azure, Digital ocean and others. Look for "Managed Kubernetes" in your favourite search engine 9 | - Deploy the model from previous modules and from your project with Kubernetes 10 | - Learn about Kubernetes namespaces. Here we used the default namespace 11 | 12 | ## Notes 13 | 14 | Add notes from the video (PRs are welcome) 15 | 16 | 17 | 18 | 19 | 20 | 24 | 25 |
⚠️ 21 | The notes are written by the community.
22 | If you see an error here, please create a PR with a fix. 23 |
26 | 27 | 28 | ## Navigation 29 | 30 | * [Machine Learning Zoomcamp course](../) 31 | * [Session 10: Kubernetes and TensorFlow Serving](./) 32 | * Previous: [Summary](09-summary.md) 33 | * Next: [Homework](homework.md) -------------------------------------------------------------------------------- /10-kubernetes/README.md: -------------------------------------------------------------------------------- 1 | ## 10. Kubernetes and TensorFlow Serving 2 | 3 | - 10.1 [Overview](01-overview.md) 4 | - 10.2 [TensorFlow Serving](02-tensorflow-serving.md) 5 | - 10.3 [Creating a pre-processing service](03-preprocessing.md) 6 | - 10.4 [Running everything locally with Docker-compose](04-docker-compose.md) 7 | - 10.5 [Introduction to Kubernetes](05-kubernetes-intro.md) 8 | - 10.6 [Deploying a simple service to Kubernetes](06-kubernetes-simple-service.md) 9 | - 10.7 [Deploying TensorFlow models to Kubernetes](07-kubernetes-tf-serving.md) 10 | - 10.8 [Deploying to EKS](08-eks.md) 11 | - 10.9 [Summary](09-summary.md) 12 | - 10.10 [Explore more](10-explore-more.md) 13 | - 10.11 [Homework](homework.md) 14 | 15 | 16 | ## Community notes 17 | 18 | Did you take notes? You can share them here (or in each unit separately) 19 | 20 | * [Alvaro Navas' Notes](https://github.com/ziritrion/ml-zoomcamp/blob/main/notes/10_kubernetes.md) 21 | * [Notes from Oscar Garcia](https://github.com/ozkary/machine-learning-engineering/tree/main/10-kubernetes) 22 | * [Notes from Maximilien Eyengue](https://github.com/maxim-eyengue/Python-Codes/blob/main/ML_Zoomcamp_2024/10_kubernetes/Summary_Session_10.md) 23 | * Add your notes here 24 | -------------------------------------------------------------------------------- /10-kubernetes/code/Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | grpcio = "==1.42.0" 8 | flask = "*" 9 | gunicorn = "*" 10 | keras-image-helper = "*" 11 | tensorflow-protobuf = "==2.7.0" 12 | 13 | [dev-packages] 14 | 15 | [requires] 16 | python_version = "3.8" 17 | -------------------------------------------------------------------------------- /10-kubernetes/code/docker-compose.yaml: -------------------------------------------------------------------------------- 1 | version: "3.9" 2 | services: 3 | clothing-model: 4 | image: zoomcamp-10-model:xception-v4-001 5 | gateway: 6 | image: zoomcamp-10-gateway:002 7 | environment: 8 | - TF_SERVING_HOST=clothing-model:8500 9 | ports: 10 | - "9696:9696" -------------------------------------------------------------------------------- /10-kubernetes/code/image-gateway.dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8.12-slim 2 | 3 | RUN pip install pipenv 4 | 5 | WORKDIR /app 6 | 7 | COPY ["Pipfile", "Pipfile.lock", "./"] 8 | 9 | RUN pipenv install --system --deploy 10 | 11 | COPY ["gateway.py", "proto.py", "./"] 12 | 13 | EXPOSE 9696 14 | 15 | ENTRYPOINT ["gunicorn", "--bind=0.0.0.0:9696", "gateway:app"] -------------------------------------------------------------------------------- /10-kubernetes/code/image-model.dockerfile: -------------------------------------------------------------------------------- 1 | FROM tensorflow/serving:2.7.0 2 | 3 | COPY clothing-model /models/clothing-model/1 4 | ENV MODEL_NAME="clothing-model" -------------------------------------------------------------------------------- /10-kubernetes/code/kube-config/eks-config.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: eksctl.io/v1alpha5 2 | kind: ClusterConfig 3 | 4 | metadata: 5 | name: mlzoomcamp-eks 6 | region: eu-west-1 7 | 8 | nodeGroups: 9 | - name: ng-m5-xlarge 10 | instanceType: m5.xlarge 11 | desiredCapacity: 1 -------------------------------------------------------------------------------- /10-kubernetes/code/kube-config/gateway-deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: gateway 5 | spec: 6 | selector: 7 | matchLabels: 8 | app: gateway 9 | template: 10 | metadata: 11 | labels: 12 | app: gateway 13 | spec: 14 | containers: 15 | - name: gateway 16 | image: 387546586013.dkr.ecr.eu-west-1.amazonaws.com/mlzoomcamp-images:zoomcamp-10-gateway-002 17 | resources: 18 | limits: 19 | memory: "128Mi" 20 | cpu: "100m" 21 | ports: 22 | - containerPort: 9696 23 | env: 24 | - name: TF_SERVING_HOST 25 | value: tf-serving-clothing-model.default.svc.cluster.local:8500 26 | -------------------------------------------------------------------------------- /10-kubernetes/code/kube-config/gateway-service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: gateway 5 | spec: 6 | type: LoadBalancer 7 | selector: 8 | app: gateway 9 | ports: 10 | - port: 80 11 | targetPort: 9696 12 | -------------------------------------------------------------------------------- /10-kubernetes/code/kube-config/model-deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: tf-serving-clothing-model 5 | spec: 6 | replicas: 1 7 | selector: 8 | matchLabels: 9 | app: tf-serving-clothing-model 10 | template: 11 | metadata: 12 | labels: 13 | app: tf-serving-clothing-model 14 | spec: 15 | containers: 16 | - name: tf-serving-clothing-model 17 | image: 387546586013.dkr.ecr.eu-west-1.amazonaws.com/mlzoomcamp-images:zoomcamp-10-model-xception-v4-001 18 | resources: 19 | limits: 20 | memory: "512Mi" 21 | cpu: "0.5" 22 | ports: 23 | - containerPort: 8500 24 | -------------------------------------------------------------------------------- /10-kubernetes/code/kube-config/model-service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: tf-serving-clothing-model 5 | spec: 6 | selector: 7 | app: tf-serving-clothing-model 8 | ports: 9 | - port: 8500 10 | targetPort: 8500 11 | -------------------------------------------------------------------------------- /10-kubernetes/code/ping/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8.12-slim 2 | 3 | RUN pip install pipenv 4 | 5 | WORKDIR /app 6 | 7 | COPY ["Pipfile", "Pipfile.lock", "./"] 8 | 9 | RUN pipenv install --system --deploy 10 | 11 | COPY "ping.py" . 12 | 13 | EXPOSE 9696 14 | 15 | ENTRYPOINT ["gunicorn", "--bind=0.0.0.0:9696", "ping:app"] -------------------------------------------------------------------------------- /10-kubernetes/code/ping/Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | flask = "*" 8 | gunicorn = "*" 9 | 10 | [dev-packages] 11 | 12 | [requires] 13 | python_version = "3.8" 14 | -------------------------------------------------------------------------------- /10-kubernetes/code/ping/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: ping-deployment 5 | spec: 6 | replicas: 1 7 | selector: 8 | matchLabels: 9 | app: ping 10 | template: 11 | metadata: 12 | labels: 13 | app: ping 14 | spec: 15 | containers: 16 | - name: ping-pod 17 | image: ping:v001 18 | resources: 19 | limits: 20 | memory: "128Mi" 21 | cpu: "200m" 22 | ports: 23 | - containerPort: 9696 24 | -------------------------------------------------------------------------------- /10-kubernetes/code/ping/metallb-config.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: metallb.io/v1beta1 2 | kind: IPAddressPool 3 | metadata: 4 | name: example 5 | namespace: metallb-system 6 | spec: 7 | addresses: 8 | - 172.20.255.200-172.20.255.250 9 | --- 10 | apiVersion: metallb.io/v1beta1 11 | kind: L2Advertisement 12 | metadata: 13 | name: empty 14 | namespace: metallb-system -------------------------------------------------------------------------------- /10-kubernetes/code/ping/ping.py: -------------------------------------------------------------------------------- 1 | from flask import Flask 2 | 3 | app = Flask('ping') 4 | 5 | @app.route('/ping', methods=['GET']) 6 | def ping(): 7 | return "PONG" 8 | 9 | if __name__ == "__main__": 10 | app.run(debug=True, host='0.0.0.0', port=9696) -------------------------------------------------------------------------------- /10-kubernetes/code/ping/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: ping 5 | spec: 6 | type: LoadBalancer 7 | selector: 8 | app: ping 9 | ports: 10 | - port: 80 11 | targetPort: 9696 12 | -------------------------------------------------------------------------------- /10-kubernetes/code/proto.py: -------------------------------------------------------------------------------- 1 | from tensorflow.core.framework import tensor_pb2, tensor_shape_pb2, types_pb2 2 | 3 | 4 | def dtypes_as_dtype(dtype): 5 | if dtype == "float32": 6 | return types_pb2.DT_FLOAT 7 | raise Exception("dtype %s is not supported" % dtype) 8 | 9 | 10 | def make_tensor_proto(data): 11 | shape = data.shape 12 | dims = [tensor_shape_pb2.TensorShapeProto.Dim(size=i) for i in shape] 13 | proto_shape = tensor_shape_pb2.TensorShapeProto(dim=dims) 14 | 15 | proto_dtype = dtypes_as_dtype(data.dtype) 16 | 17 | tensor_proto = tensor_pb2.TensorProto(dtype=proto_dtype, tensor_shape=proto_shape) 18 | tensor_proto.tensor_content = data.tostring() 19 | 20 | return tensor_proto 21 | 22 | 23 | def np_to_protobuf(data): 24 | if data.dtype != "float32": 25 | data = data.astype("float32") 26 | return make_tensor_proto(data) 27 | -------------------------------------------------------------------------------- /10-kubernetes/code/test.py: -------------------------------------------------------------------------------- 1 | import requests 2 | 3 | url = 'http://localhost:9696/predict' 4 | 5 | data = {'url': 'http://bit.ly/mlbookcamp-pants'} 6 | 7 | result = requests.post(url, json=data).json() 8 | print(result) -------------------------------------------------------------------------------- /10-kubernetes/homework.md: -------------------------------------------------------------------------------- 1 | ## Homework 2 | 3 | * For 2024 cohort homework, check [the 2024 cohort folder](../cohorts/2024/) 4 | * For 2023 cohort homework, check [the 2023 cohort folder](../cohorts/2023/) 5 | * For 2022 cohort homework, check [the 2022 cohort folder](../cohorts/2022/) 6 | * For 2021 cohort homework and solution, check [the 2021 cohort folder](../cohorts/2021/10-kubernetes/) 7 | 8 | 9 | ## Navigation 10 | 11 | * [Machine Learning Zoomcamp course](../) 12 | * [Session 10: Kubernetes and TensorFlow Serving](./) 13 | * Previous: [Explore more](10-explore-more.md) -------------------------------------------------------------------------------- /10-kubernetes/images/thumbnail-10-01.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/10-kubernetes/images/thumbnail-10-01.jpg -------------------------------------------------------------------------------- /10-kubernetes/images/thumbnail-10-02.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/10-kubernetes/images/thumbnail-10-02.jpg -------------------------------------------------------------------------------- /10-kubernetes/images/thumbnail-10-03.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/10-kubernetes/images/thumbnail-10-03.jpg -------------------------------------------------------------------------------- /10-kubernetes/images/thumbnail-10-04.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/10-kubernetes/images/thumbnail-10-04.jpg -------------------------------------------------------------------------------- /10-kubernetes/images/thumbnail-10-05.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/10-kubernetes/images/thumbnail-10-05.jpg -------------------------------------------------------------------------------- /10-kubernetes/images/thumbnail-10-06.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/10-kubernetes/images/thumbnail-10-06.jpg -------------------------------------------------------------------------------- /10-kubernetes/images/thumbnail-10-07.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/10-kubernetes/images/thumbnail-10-07.jpg -------------------------------------------------------------------------------- /10-kubernetes/images/thumbnail-10-08.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/10-kubernetes/images/thumbnail-10-08.jpg -------------------------------------------------------------------------------- /10-kubernetes/images/thumbnail-10-09.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/10-kubernetes/images/thumbnail-10-09.jpg -------------------------------------------------------------------------------- /10-kubernetes/meta.csv: -------------------------------------------------------------------------------- 1 | lesson,name,page_name,video,slides,notebook 2 | 1,Overview,01-overview.md,https://www.youtube.com/watch?v=mvPER7YfTkw,https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-10-kubernetes, 3 | 2,TensorFlow Serving,02-tensorflow-serving.md,https://www.youtube.com/watch?v=deXR2fThYDw,, 4 | 3,Creating a pre-processing service,03-preprocessing.md,https://www.youtube.com/watch?v=OIlrS14Zi0o,,code/tf-serving-connect.ipynb 5 | 4,Running everything locally with Docker-compose,04-docker-compose.md,https://www.youtube.com/watch?v=ZhQQfpWfkKY,https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-10-kubernetes, 6 | 5,Introduction to Kubernetes,05-kubernetes-intro.md,https://www.youtube.com/watch?v=UjVkpszDzgk,https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-10-kubernetes, 7 | 6,Deploying a simple service to Kubernetes,06-kubernetes-simple-service.md,https://www.youtube.com/watch?v=PPUCVRIV9t8,https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-10-kubernetes, 8 | 7,Deploying TensorFlow models to Kubernetes,07-kubernetes-tf-serving.md,https://www.youtube.com/watch?v=6vHLMdnjO2w,, 9 | 8,Deploying to EKS,08-eks.md,https://www.youtube.com/watch?v=89jxeddZtC0,, 10 | 9,Summary,09-summary.md,https://www.youtube.com/watch?v=J5LMRTIu4jY,, 11 | 10,Explore more,10-explore-more.md,,, 12 | 11,Homework,homework.md,,, -------------------------------------------------------------------------------- /10-kubernetes/meta.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": "meta.csv", 3 | "session": 10, 4 | "name": "Kubernetes and TensorFlow Serving" 5 | } -------------------------------------------------------------------------------- /11-kserve/01-overview.md: -------------------------------------------------------------------------------- 1 | 2 | ## 11.1 Overview 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | ## Notes 11 | 12 | Add notes from the video (PRs are welcome) 13 | 14 | 15 | 16 | 17 | 18 | 22 | 23 |
⚠️ 19 | The notes are written by the community.
20 | If you see an error here, please create a PR with a fix. 21 |
24 | 25 | 26 | ## Navigation 27 | 28 | * [Machine Learning Zoomcamp course](../) 29 | * [Session 11: KServe](./) 30 | * Next: [Running KServe locally](02-kserve-local.md) -------------------------------------------------------------------------------- /11-kserve/02-kserve-local.md: -------------------------------------------------------------------------------- 1 | 2 | ## 11.2 Running KServe locally 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | ## Notes 11 | 12 | Add notes from the video (PRs are welcome) 13 | 14 | 15 | 16 | 17 | 18 | 22 | 23 |
⚠️ 19 | The notes are written by the community.
20 | If you see an error here, please create a PR with a fix. 21 |
24 | 25 | ### Updated Instructions (09 July, 2022) 26 | 27 | In the `iris-example.yaml` file, instead of `"gs://kfserving-samples/models/sklearn/iris"`, use `"gs://kfserving-examples/models/sklearn/1.0/model"` as the URL in storageUri. 28 | 29 | Also make sure to use the following versions 30 | - kind: 0.11.1 (via https://kind.sigs.k8s.io/dl/v0.11.1/kind-linux-amd64) 31 | - kubectl: 1.21.1 (via https://dl.k8s.io/release/v1.21.1/bin/linux/amd64/kubectl) 32 | - kindest/node image: 1.21.1 (via `kind create cluster --image kindest/node:v1.21.1`) 33 | - kserve=0.8 (via https://raw.githubusercontent.com/kserve/kserve/release-0.8/hack/quick_install.sh) 34 | 35 | ## Navigation 36 | 37 | * [Machine Learning Zoomcamp course](../) 38 | * [Session 11: KServe](./) 39 | * Previous: [Overview](01-overview.md) 40 | * Next: [Deploying a Scikit-Learn model with KServe](03-kserve-sklearn.md) 41 | -------------------------------------------------------------------------------- /11-kserve/03-kserve-sklearn.md: -------------------------------------------------------------------------------- 1 | 2 | ## 11.3 Deploying a Scikit-Learn model with KServe 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | ## Notes 11 | 12 | Add notes from the video (PRs are welcome) 13 | 14 | 15 | 16 | 17 | 18 | 22 | 23 |
⚠️ 19 | The notes are written by the community.
20 | If you see an error here, please create a PR with a fix. 21 |
24 | 25 | 26 | ## Navigation 27 | 28 | * [Machine Learning Zoomcamp course](../) 29 | * [Session 11: KServe](./) 30 | * Previous: [Running KServe locally](02-kserve-local.md) 31 | * Next: [Deploying custom Scikit-Learn images with KServe](04-kserve-custom-image.md) -------------------------------------------------------------------------------- /11-kserve/04-kserve-custom-image.md: -------------------------------------------------------------------------------- 1 | 2 | ## 11.4 Deploying custom Scikit-Learn images with KServe 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | ## Notes 11 | 12 | Add notes from the video (PRs are welcome) 13 | 14 | 15 | 16 | 17 | 18 | 22 | 23 |
⚠️ 19 | The notes are written by the community.
20 | If you see an error here, please create a PR with a fix. 21 |
24 | 25 | 26 | ## Navigation 27 | 28 | * [Machine Learning Zoomcamp course](../) 29 | * [Session 11: KServe](./) 30 | * Previous: [Deploying a Scikit-Learn model with KServe](03-kserve-sklearn.md) 31 | * Next: [Serving TensorFlow models with KServe](05-tensorflow-kserve.md) -------------------------------------------------------------------------------- /11-kserve/05-tensorflow-kserve.md: -------------------------------------------------------------------------------- 1 | 2 | ## 11.5 Serving TensorFlow models with KServe 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | ## Notes 11 | 12 | Add notes from the video (PRs are welcome) 13 | 14 | 15 | 16 | 17 | 18 | 22 | 23 |
⚠️ 19 | The notes are written by the community.
20 | If you see an error here, please create a PR with a fix. 21 |
24 | 25 | * For PyTorch users, [see this repo](https://github.com/mmg10/torchserve_kserve) on how to serve a PyTorch model using KServe. 26 | 27 | ## Navigation 28 | 29 | * [Machine Learning Zoomcamp course](../) 30 | * [Session 11: KServe](./) 31 | * Previous: [Deploying custom Scikit-Learn images with KServe](04-kserve-custom-image.md) 32 | * Next: [KServe transformers](06-kserve-transformers.md) 33 | -------------------------------------------------------------------------------- /11-kserve/06-kserve-transformers.md: -------------------------------------------------------------------------------- 1 | 2 | ## 11.6 KServe transformers 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | ## Notes 11 | 12 | Add notes from the video (PRs are welcome) 13 | 14 | 15 | 16 | 17 | 18 | 22 | 23 |
⚠️ 19 | The notes are written by the community.
20 | If you see an error here, please create a PR with a fix. 21 |
24 | 25 | 26 | ## Navigation 27 | 28 | * [Machine Learning Zoomcamp course](../) 29 | * [Session 11: KServe](./) 30 | * Previous: [Serving TensorFlow models with KServe](05-tensorflow-kserve.md) 31 | * Next: [Deploying with KServe and EKS](07-kserve-eks.md) -------------------------------------------------------------------------------- /11-kserve/07-kserve-eks.md: -------------------------------------------------------------------------------- 1 | 2 | ## 11.7 Deploying with KServe and EKS 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | ## Notes 11 | 12 | Add notes from the video (PRs are welcome) 13 | 14 | 15 | 16 | 17 | 21 | 22 |
⚠️ 18 | The notes are written by the community.
19 | If you see an error here, please create a PR with a fix. 20 |
23 | 24 | ### Updated Instructions (13 July, 2022) 25 | See [the instructions here](07-kserve-eks-upd.md) 26 | 27 | ## Navigation 28 | 29 | * [Machine Learning Zoomcamp course](../) 30 | * [Session 11: KServe](./) 31 | * Previous: [KServe transformers](06-kserve-transformers.md) 32 | * Next: [Summary](08-summary.md) 33 | -------------------------------------------------------------------------------- /11-kserve/08-summary.md: -------------------------------------------------------------------------------- 1 | 2 | ## 11.8 Summary 3 | 4 | Coming soon (or not so soon) 5 | 6 | 7 | ## Notes 8 | 9 | Add notes from the video (PRs are welcome) 10 | 11 | 12 | 13 | 14 | 15 | 19 | 20 |
⚠️ 16 | The notes are written by the community.
17 | If you see an error here, please create a PR with a fix. 18 |
21 | 22 | 23 | ## Navigation 24 | 25 | * [Machine Learning Zoomcamp course](../) 26 | * [Session 11: KServe](./) 27 | * Previous: [Deploying with KServe and EKS](07-kserve-eks.md) 28 | * Next: [Explore more](09-explore-more.md) -------------------------------------------------------------------------------- /11-kserve/09-explore-more.md: -------------------------------------------------------------------------------- 1 | 2 | ## 11.9 Explore more 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | ## Notes 11 | 12 | Add notes from the video (PRs are welcome) 13 | 14 | 15 | 16 | 17 | 18 | 22 | 23 |
⚠️ 19 | The notes are written by the community.
20 | If you see an error here, please create a PR with a fix. 21 |
24 | 25 | 26 | ## Navigation 27 | 28 | * [Machine Learning Zoomcamp course](../) 29 | * [Session 11: KServe](./) 30 | * Previous: [Summary](08-summary.md) -------------------------------------------------------------------------------- /11-kserve/README.md: -------------------------------------------------------------------------------- 1 | ## 11. KServe 2 | 3 | - 11.1 [Overview](01-overview.md) 4 | - 11.2 [Running KServe locally](02-kserve-local.md) 5 | - 11.3 [Deploying a Scikit-Learn model with KServe](03-kserve-sklearn.md) 6 | - 11.4 [Deploying custom Scikit-Learn images with KServe](04-kserve-custom-image.md) 7 | - 11.5 [Serving TensorFlow models with KServe](05-tensorflow-kserve.md) 8 | - 11.6 [KServe transformers](06-kserve-transformers.md) 9 | - 11.7 [Deploying with KServe and EKS](07-kserve-eks.dm) 10 | - 11.8 [Summary](08-summary.md) 11 | - 11.9 [Explore more](09-explore-more.md) -------------------------------------------------------------------------------- /11-kserve/code/.gitignore: -------------------------------------------------------------------------------- 1 | kserve -------------------------------------------------------------------------------- /11-kserve/code/README.md: -------------------------------------------------------------------------------- 1 | 2 | ```bash 3 | kubectl port-forward -n istio-system service/istio-ingressgateway 8080:80 4 | 5 | 6 | SERVICE_NAME="sklearn-iris" 7 | HOST="${SERVICE_NAME}.default.example.com" 8 | ACTUAL_HOST="http://localhost:8080" 9 | URL="${ACTUAL_HOST}/v1/models/${SERVICE_NAME}:predict" 10 | 11 | curl -H "Host: ${HOST}" \ 12 | ${URL} \ 13 | -d @iris-request.json 14 | 15 | 16 | docker build -t kserve-sklearnserver:predict_proba-3.8-1.0 -f sklearn.Dockerfile . 17 | 18 | docker run -it --rm \ 19 | -v "$(pwd)/model.joblib:/mnt/models/model.joblib" \ 20 | -p 8081:8080 \ 21 | kserve-sklearnserver:predict_proba-3.8-1.0 \ 22 | --model_dir=/mnt/models \ 23 | --model_name=churn 24 | ``` -------------------------------------------------------------------------------- /11-kserve/code/churn/Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | scikit-learn = "==1.0" 8 | joblib = "*" 9 | pandas = "*" 10 | 11 | [dev-packages] 12 | 13 | [requires] 14 | python_version = "3.8" 15 | -------------------------------------------------------------------------------- /11-kserve/code/churn/churn-service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: "serving.kserve.io/v1beta1" 2 | kind: "InferenceService" 3 | metadata: 4 | name: "churn" 5 | spec: 6 | predictor: 7 | sklearn: 8 | #image: kserve-sklearnserver:predict_proba-3.8-1.0 9 | image: agrigorev/sklearnserver:3.8-1.0-predict-proba 10 | storageUri: "http://172.31.13.90:8000/churn/model.joblib" 11 | resources: 12 | requests: 13 | cpu: 300m 14 | memory: 256Mi 15 | limits: 16 | cpu: 500m 17 | memory: 512Mi -------------------------------------------------------------------------------- /11-kserve/code/churn/churn-test.py: -------------------------------------------------------------------------------- 1 | import requests 2 | 3 | service_name = 'churn' 4 | host = f'{service_name}.default.example.com' 5 | 6 | actual_domain = 'http://localhost:8080' 7 | url = f'{actual_domain}/v1/models/{service_name}:predict' 8 | 9 | 10 | headers = {'Host': host} 11 | 12 | request = { 13 | "instances": [ 14 | {'contract': 'one_year', 'tenure': 34, 'monthlycharges': 56.95}, 15 | {'contract': 'month-to-month', 'tenure': 13, 'monthlycharges': 49.95} 16 | ] 17 | } 18 | 19 | response = requests.post(url, json=request, headers=headers) 20 | print(response.json()) -------------------------------------------------------------------------------- /11-kserve/code/churn/churn-train.py: -------------------------------------------------------------------------------- 1 | import joblib 2 | 3 | import pandas as pd 4 | import numpy as np 5 | 6 | from sklearn.pipeline import Pipeline 7 | from sklearn.feature_extraction import DictVectorizer 8 | from sklearn.linear_model import LogisticRegression 9 | 10 | url = 'https://github.com/alexeygrigorev/mlbookcamp-code/blob/master/chapter-03-churn-prediction/WA_Fn-UseC_-Telco-Customer-Churn.csv?raw=true' 11 | df = pd.read_csv(url) 12 | 13 | df.columns = df.columns.str.lower().str.replace(' ', '_') 14 | 15 | categorical_columns = list(df.dtypes[df.dtypes == 'object'].index) 16 | 17 | for c in categorical_columns: 18 | df[c] = df[c].str.lower().str.replace(' ', '_') 19 | 20 | df.totalcharges = pd.to_numeric(df.totalcharges, errors='coerce') 21 | df.totalcharges = df.totalcharges.fillna(0) 22 | 23 | df.churn = (df.churn == 'yes').astype(int) 24 | 25 | 26 | numerical = ['tenure', 'monthlycharges'] 27 | categorical = ['contract'] 28 | 29 | 30 | dicts = df[categorical + numerical].to_dict(orient='records') 31 | 32 | pipeline = Pipeline([ 33 | ('vectorizer', DictVectorizer(sparse=False)), 34 | ('model', LogisticRegression(C=1.0, solver='liblinear')) 35 | ]) 36 | 37 | pipeline.fit(dicts, df.churn.values) 38 | 39 | joblib.dump(pipeline, 'model.joblib') 40 | 41 | -------------------------------------------------------------------------------- /11-kserve/code/churn/model.joblib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/11-kserve/code/churn/model.joblib -------------------------------------------------------------------------------- /11-kserve/code/clothes/clothes-service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: "serving.kserve.io/v1beta1" 2 | kind: "InferenceService" 3 | metadata: 4 | name: "clothes" 5 | spec: 6 | transformer: 7 | containers: 8 | - image: "agrigorev/kfserving-keras-transformer:0.0.1" 9 | name: user-container 10 | env: 11 | - name: MODEL_INPUT_SIZE 12 | value: "299,299" 13 | - name: KERAS_MODEL_NAME 14 | value: "xception" 15 | - name: MODEL_LABELS 16 | value: "dress,hat,longsleeve,outwear,pants,shirt,shoes,shorts,skirt,t-shirt" 17 | resources: 18 | requests: 19 | cpu: 300m 20 | memory: 256Mi 21 | limits: 22 | cpu: 500m 23 | memory: 512Mi 24 | predictor: 25 | tensorflow: 26 | storageUri: "http://172.31.13.90:8000/clothes/clothing-model/clothing-model.zip" 27 | resources: 28 | requests: 29 | cpu: 500m 30 | memory: 512Mi 31 | limits: 32 | cpu: 1000m 33 | memory: 512Mi -------------------------------------------------------------------------------- /11-kserve/code/clothes/convert.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow import keras 3 | 4 | model = keras.models.load_model('xception_v4_large_08_0.894.h5') 5 | 6 | tf.saved_model.save(model, 'clothing-model') 7 | -------------------------------------------------------------------------------- /11-kserve/code/clothes/test-transformer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | import requests 5 | 6 | 7 | service_name = 'clothes' 8 | host = f'{service_name}.default.example.com' 9 | 10 | actual_domain = 'http://localhost:8080' 11 | service_url = f'{actual_domain}/v1/models/{service_name}:predict' 12 | 13 | headers = {'Host': host} 14 | 15 | 16 | request = { 17 | "instances": [ 18 | {'url': 'http://bit.ly/mlbookcamp-pants'}, 19 | {'url': 'http://bit.ly/mlbookcamp-pants'} 20 | ] 21 | } 22 | 23 | 24 | response = requests.post(service_url, json=request, headers=headers) 25 | 26 | print(response) 27 | print(response.content) 28 | print(response.json()) 29 | -------------------------------------------------------------------------------- /11-kserve/code/clothes/test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | import requests 5 | 6 | from keras_image_helper import create_preprocessor 7 | 8 | preprocessor = create_preprocessor('xception', target_size=(299, 299)) 9 | 10 | 11 | service_name = 'clothes' 12 | host = f'{service_name}.default.example.com' 13 | 14 | actual_domain = 'http://localhost:8080' 15 | service_url = f'{actual_domain}/v1/models/{service_name}:predict' 16 | 17 | headers = {'Host': host} 18 | 19 | 20 | url = 'http://bit.ly/mlbookcamp-pants' 21 | X = preprocessor.from_url(url) 22 | 23 | 24 | request = { 25 | "instances": X.tolist() 26 | } 27 | 28 | 29 | response = requests.post(service_url, json=request, headers=headers).json() 30 | 31 | predictions = response['predictions'] 32 | 33 | 34 | classes = [ 35 | 'dress', 36 | 'hat', 37 | 'longsleeve', 38 | 'outwear', 39 | 'pants', 40 | 'shirt', 41 | 'shoes', 42 | 'shorts', 43 | 'skirt', 44 | 't-shirt' 45 | ] 46 | 47 | 48 | pred = predictions[0] 49 | 50 | print(dict(zip(classes, pred))) 51 | -------------------------------------------------------------------------------- /11-kserve/code/eks/clothes-service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: "serving.kserve.io/v1beta1" 2 | kind: "InferenceService" 3 | metadata: 4 | name: "clothes" 5 | spec: 6 | transformer: 7 | containers: 8 | - image: "agrigorev/kfserving-keras-transformer:0.0.1" 9 | name: user-container 10 | env: 11 | - name: MODEL_INPUT_SIZE 12 | value: "299,299" 13 | - name: KERAS_MODEL_NAME 14 | value: "xception" 15 | - name: MODEL_LABELS 16 | value: "dress,hat,longsleeve,outwear,pants,shirt,shoes,shorts,skirt,t-shirt" 17 | resources: 18 | requests: 19 | cpu: 300m 20 | memory: 256Mi 21 | limits: 22 | cpu: 500m 23 | memory: 512Mi 24 | predictor: 25 | serviceAccountName: sa 26 | tensorflow: 27 | storageUri: "s3://alexey-mlzookcamp-models/clothing-model" 28 | resources: 29 | requests: 30 | cpu: 500m 31 | memory: 512Mi 32 | limits: 33 | cpu: 1000m 34 | memory: 512Mi -------------------------------------------------------------------------------- /11-kserve/code/eks/cluster.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: eksctl.io/v1alpha5 2 | kind: ClusterConfig 3 | 4 | metadata: 5 | name: mlzoomcamp-eks 6 | region: eu-west-1 7 | version: "1.21" 8 | 9 | nodeGroups: 10 | - name: ng 11 | desiredCapacity: 2 12 | instanceType: m5.xlarge 13 | -------------------------------------------------------------------------------- /11-kserve/code/eks/test-transformer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | import requests 5 | 6 | 7 | service_name = 'clothes' 8 | actual_domain = 'https://clothes.default.kubeflow.mlbookcamp.com' 9 | service_url = f'{actual_domain}/v1/models/{service_name}:predict' 10 | 11 | request = { 12 | "instances": [ 13 | {'url': 'http://bit.ly/mlbookcamp-pants'}, 14 | {'url': 'http://bit.ly/mlbookcamp-pants'} 15 | ] 16 | } 17 | 18 | 19 | response = requests.post(service_url, json=request) 20 | 21 | print(response) 22 | print(response.content) 23 | print(response.json()) 24 | -------------------------------------------------------------------------------- /11-kserve/code/image_transfomer/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8.12-slim 2 | 3 | RUN pip install pipenv 4 | 5 | WORKDIR /app 6 | 7 | COPY ["Pipfile", "Pipfile.lock", "./"] 8 | 9 | RUN pipenv install --system --deploy 10 | 11 | COPY "image_tranformer.py" . 12 | 13 | ENTRYPOINT ["python", "image_tranformer.py"] -------------------------------------------------------------------------------- /11-kserve/code/image_transfomer/Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | kserve = "==0.7.0" 8 | keras-image-helper = "*" 9 | 10 | [dev-packages] 11 | 12 | [requires] 13 | python_version = "3.8" 14 | -------------------------------------------------------------------------------- /11-kserve/code/iris/iris-example.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: "serving.kserve.io/v1beta1" 2 | kind: "InferenceService" 3 | metadata: 4 | name: "sklearn-iris" 5 | spec: 6 | predictor: 7 | sklearn: 8 | storageUri: "gs://kfserving-samples/models/sklearn/iris" -------------------------------------------------------------------------------- /11-kserve/code/iris/iris-request.json: -------------------------------------------------------------------------------- 1 | { 2 | "instances": [ 3 | [6.8, 2.8, 4.8, 1.4], 4 | [6.0, 3.4, 4.5, 1.6] 5 | ] 6 | } -------------------------------------------------------------------------------- /11-kserve/code/iris/iris-test.py: -------------------------------------------------------------------------------- 1 | import requests 2 | 3 | service_name = 'sklearn-iris' 4 | host = f'{service_name}.default.example.com' 5 | 6 | actual_domain = 'http://localhost:8080' 7 | url = f'{actual_domain}/v1/models/{service_name}:predict' 8 | 9 | headers = { 10 | 'Host': host 11 | } 12 | 13 | request = { 14 | "instances": [ 15 | [6.8, 2.8, 4.8, 1.4], 16 | [6.0, 3.4, 4.5, 1.6] 17 | ] 18 | } 19 | 20 | response = requests.post(url, json=request, headers=headers) 21 | print(response.json()) -------------------------------------------------------------------------------- /11-kserve/code/plan.md: -------------------------------------------------------------------------------- 1 | # 11. Deploying models with KServe (formerly Kubeflow Serving) 2 | 3 | This week we'll learn how to use KServe for deploying ML models. 4 | 5 | We'll deploy two models: 6 | 7 | * Churn prediction model (Scikit-Learn) 8 | * Clothing classification model (TensorFlow) 9 | 10 | 11 | ## 11.1 Overview 12 | 13 | * Kubeflow and KServe 14 | * What we'll cover this week 15 | * Two-tier architecture 16 | 17 | 18 | ## 11.2 Running KServe locally 19 | 20 | * Installing KServe locally with kind 21 | * Deploying an example model from documentation 22 | 23 | 24 | ## 11.3 Deploying a Scikit-Learn model with KServe 25 | 26 | * Training the churn model with specific Scikit-Learn version 27 | * Deploying the churn prediction model with KServe 28 | 29 | 30 | ## 11.4 Deploying custom Scikit-Learn images with KServe 31 | 32 | * Customizing the Scikit-Learn image 33 | * Running KServe service locally 34 | 35 | 36 | ## 11.5 Serving TensorFlow models with KServe 37 | 38 | * Converting the Keras model to saved_model format 39 | * Deploying the model 40 | * Preparing the input 41 | 42 | 43 | ## 11.6 KServe transformers 44 | 45 | * Why do we need transformers 46 | * Creating a service for pre- and post-processing 47 | * Using existing transformers 48 | 49 | 50 | ## 11.7 Deploying with KServe and EKS 51 | 52 | * Creating an EKS cluster 53 | * Installing KServe on EKS 54 | * Configuring the domain 55 | * Setting up S3 access 56 | * Deploying the clothing model 57 | 58 | [Guide used during the video](https://github.com/alexeygrigorev/kubeflow-deep-learning/blob/main/guide.md) 59 | 60 | 61 | ## 11.8 Summary 62 | 63 | * Less yaml, faster deployment 64 | * Less stability 65 | * The need for Ops is not gone 66 | 67 | 68 | ## 11.9 Explore more 69 | 70 | * Helm charts 71 | * Kubeflow, Kubeflow pipelines 72 | * Sagemaker 73 | * A lot of vendors that take care of Ops 74 | 75 | 76 | -------------------------------------------------------------------------------- /11-kserve/images/thumbnail-11-01.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/11-kserve/images/thumbnail-11-01.jpg -------------------------------------------------------------------------------- /11-kserve/images/thumbnail-11-02.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/11-kserve/images/thumbnail-11-02.jpg -------------------------------------------------------------------------------- /11-kserve/images/thumbnail-11-03.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/11-kserve/images/thumbnail-11-03.jpg -------------------------------------------------------------------------------- /11-kserve/images/thumbnail-11-04.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/11-kserve/images/thumbnail-11-04.jpg -------------------------------------------------------------------------------- /11-kserve/images/thumbnail-11-05.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/11-kserve/images/thumbnail-11-05.jpg -------------------------------------------------------------------------------- /11-kserve/images/thumbnail-11-06.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/11-kserve/images/thumbnail-11-06.jpg -------------------------------------------------------------------------------- /11-kserve/images/thumbnail-11-07.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/11-kserve/images/thumbnail-11-07.jpg -------------------------------------------------------------------------------- /11-kserve/meta.csv: -------------------------------------------------------------------------------- 1 | lesson,name,page_name,video,slides,notebook 2 | 1,Overview,01-overview.md,https://www.youtube.com/watch?v=CFWK05dQoGM,, 3 | 2,Running KServe locally,02-kserve-local.md,https://www.youtube.com/watch?v=A3tFt14iTEI,, 4 | 3,Deploying a Scikit-Learn model with KServe,03-kserve-sklearn.md,https://www.youtube.com/watch?v=8kBIDggLwgE,, 5 | 4,Deploying custom Scikit-Learn images with KServe,04-kserve-custom-image.md,https://www.youtube.com/watch?v=REGNWrHZiCw,, 6 | 5,Serving TensorFlow models with KServe,05-tensorflow-kserve.md,https://www.youtube.com/watch?v=iXwQb2LRKGI,, 7 | 6,KServe transformers,06-kserve-transformers.md,https://www.youtube.com/watch?v=haowRqeAqJU,, 8 | 7,Deploying with KServe and EKS,07-kserve-eks.md,https://www.youtube.com/watch?v=MpuTzBSYBBI,, 9 | 8,Summary,08-summary.md,,, 10 | 9,Explore more,09-explore-more.md,,, -------------------------------------------------------------------------------- /11-kserve/meta.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": "meta.csv", 3 | "session": 11, 4 | "name": "KServe" 5 | } -------------------------------------------------------------------------------- /after-sign-up.md: -------------------------------------------------------------------------------- 1 | ## Thank you! 2 | 3 | Thanks for signing up for the course. 4 | 5 | The process of adding you to the mailing list is not automated yet, 6 | but you will hear from us closer to the course start. 7 | 8 | To make sure you don't miss any announcements 9 | 10 | - Register in [DataTalks.Club's Slack](https://datatalks.club/slack.html) and join the [`#course-ml-zoomcamp`](https://app.slack.com/client/T01ATQK62F8/C0288NJ5XSA) channel 11 | - Join the [course Telegram channel with announcements](https://t.me/mlzoomcamp) 12 | - [Tweet about the course!](https://clicktotweet.com/XZ6b9) 13 | - Subscribe to [DataTalks.Club's YouTube channel](https://www.youtube.com/c/DataTalksClub) and check 14 | [the course playlist](https://www.youtube.com/playlist?list=PL3MmuxUbc_hIhxl5Ji8t4O6lPAOpHaCLR) 15 | - Subscribe to our [public Google Calendar](https://calendar.google.com/calendar/?cid=cGtjZ2tkbGc1OG9yb2lxa2Vwc2g4YXMzMmNAZ3JvdXAuY2FsZW5kYXIuZ29vZ2xlLmNvbQ) (it works from Desktop only) 16 | - Check our [Technical FAQ](https://docs.google.com/document/d/1LpPanc33QJJ6BSsyxVg-pWNMplal84TdZtq10naIhD8/edit) if you have questions 17 | 18 | See you in September! 19 | -------------------------------------------------------------------------------- /asking-questions.md: -------------------------------------------------------------------------------- 1 | ## Asking questions 2 | 3 | If you have any questions, ask them 4 | in the [`#course-ml-zoomcamp`](https://app.slack.com/client/T01ATQK62F8/C0288NJ5XSA) channel in [DataTalks.Club](https://datatalks.club) slack. 5 | 6 | To keep our discussion in Slack more organized, we ask you to follow these suggestions: 7 | 8 | * Before asking a question, check [FAQ](https://docs.google.com/document/d/1LpPanc33QJJ6BSsyxVg-pWNMplal84TdZtq10naIhD8/edit). 9 | * Use threads. When you have a problem, first describe the problem shortly 10 | and then put the actual error in the thread - so it doesn't take the entire screen. 11 | * Instead of screenshots, it's better to copy-paste the error you're getting in text. 12 | Use ` ``` ` for formatting your code. 13 | It's very difficult to read text from screenshots. 14 | * Please don't take pictures of your code with a phone. It's even harder to read. Follow the previous suggestion, 15 | and in rare cases when you need to show what happens on your screen, take a screenshot. 16 | * You don't need to tag the instructors when you have a problem. We will see it eventually. 17 | * If somebody helped you with your problem and it's not in [FAQ](https://docs.google.com/document/d/1LpPanc33QJJ6BSsyxVg-pWNMplal84TdZtq10naIhD8/edit), please add it there. 18 | It'll help other students. 19 | 20 | -------------------------------------------------------------------------------- /bento.md: -------------------------------------------------------------------------------- 1 | 2 | ## 7. [Production-Ready Machine Learning (Bento ML)](07-bentoml-production/) 3 | 4 | - 7.1 [Intro/Session Overview](07-bentoml-production/01-intro.md) 5 | - 7.2 [Building Your Prediction Service with BentoML](07-bentoml-production/02-build-bento-service.md) 6 | - 7.3 [Deploying Your Prediction Service](07-bentoml-production/03-deploy-bento-service.md) 7 | - 7.4 [Sending, Receiving and Validating Data](07-bentoml-production/04-validation.md) 8 | - 7.5 [High-Performance Serving](07-bentoml-production/05-high-performance.md) 9 | - 7.6 [Bento Production Deployment](07-bentoml-production/06-production-deployment.md) 10 | - 7.7 [(Optional) Advanced Example: Deploying Stable Diffusion Model](07-bentoml-production/07-stable-diffusion.md) 11 | - 7.8 [Summary](07-bentoml-production/08-summary.md) 12 | - 7.9 [Homework](07-bentoml-production/homework.md) 13 | 14 | 15 | ## Supporters and partners 16 | 17 | Thanks to the course sponsors for making it possible to run this course 18 | 19 |

20 | 21 | 22 | 23 |

24 | -------------------------------------------------------------------------------- /certificates.md: -------------------------------------------------------------------------------- 1 | ## Getting your certificate 2 | 3 | Congratulations on finishing the course! 4 | 5 | Here's how you can get your certificate. 6 | 7 | First, get your certificate id using the `compute_certificate_id` function: 8 | 9 | ```python 10 | from hashlib import sha1 11 | 12 | def compute_hash(email): 13 | return sha1(email.encode('utf-8')).hexdigest() 14 | 15 | def compute_certificate_id(email): 16 | email_clean = email.lower().strip() 17 | return compute_hash(email_clean + '_') 18 | ``` 19 | 20 | > **Note** that this is not the same hash as you have on the leaderboard 21 | > There's an extra "_" added to your email, so the hash is different. 22 | 23 | 24 | Then use this hash to get the URL 25 | 26 | ```python 27 | cohort = 2023 28 | course = 'ml-zoomcamp' 29 | your_id = compute_certificate_id('never.give.up@gmail.com') 30 | url = f"https://certificate.datatalks.club/{course}/{cohort}/{your_id}.pdf" 31 | print(url) 32 | ``` 33 | 34 | Example: https://certificate.datatalks.club/ml-zoomcamp/2023/fe629854d45c559e9c10b3b8458ea392fdeb68a9.pdf 35 | 36 | 37 | ## Adding to LinkedIn 38 | 39 | You can add your certificate to LinkedIn: 40 | 41 | * Log in to your LinkedIn account, then go to your profile. 42 | * On the right, in the "Add profile" section dropdown, choose "Background" and then select the drop-down triangle next to "Licenses & Certifications". 43 | * In "Name", enter "Machine Learning Zoomcamp". 44 | * In "Issuing Organization", enter "DataTalksClub". 45 | * (Optional) In "Issue Date", enter the time when the certificate was created. 46 | * (Optional) Select the checkbox This certification does not expire. 47 | * Put your certificate ID. 48 | * In "Certification URL", enter the URL for your certificate. 49 | 50 | [Adapted from here](https://support.edx.org/hc/en-us/articles/206501938-How-can-I-add-my-certificate-to-my-LinkedIn-profile-) 51 | -------------------------------------------------------------------------------- /cohorts/2021/05-deployment/homework/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM agrigorev/zoomcamp-model:3.8.12-slim 2 | 3 | RUN pip install pipenv 4 | 5 | COPY ["Pipfile", "Pipfile.lock", "./"] 6 | 7 | RUN pipenv install --system --deploy 8 | 9 | COPY ["q6_predict.py", "./"] 10 | 11 | EXPOSE 9696 12 | 13 | ENTRYPOINT ["gunicorn", "--bind=0.0.0.0:9696", "q6_predict:app"] -------------------------------------------------------------------------------- /cohorts/2021/05-deployment/homework/Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | scikit-learn = "==1.0" 8 | flask = "*" 9 | gunicorn = "*" 10 | 11 | [dev-packages] 12 | 13 | [requires] 14 | python_version = "3.8" 15 | -------------------------------------------------------------------------------- /cohorts/2021/05-deployment/homework/dv.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/cohorts/2021/05-deployment/homework/dv.bin -------------------------------------------------------------------------------- /cohorts/2021/05-deployment/homework/homework.md: -------------------------------------------------------------------------------- 1 | ../homework.md -------------------------------------------------------------------------------- /cohorts/2021/05-deployment/homework/model1.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/cohorts/2021/05-deployment/homework/model1.bin -------------------------------------------------------------------------------- /cohorts/2021/05-deployment/homework/q3_test.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | def load(filename): 4 | with open(filename, 'rb') as f_in: 5 | return pickle.load(f_in) 6 | 7 | 8 | dv = load('dv.bin') 9 | model = load('model1.bin') 10 | 11 | customer = {"contract": "two_year", "tenure": 12, "monthlycharges": 19.7} 12 | 13 | X = dv.transform([customer]) 14 | y_pred = model.predict_proba(X)[0, 1] 15 | 16 | print(y_pred) -------------------------------------------------------------------------------- /cohorts/2021/05-deployment/homework/q4_predict.py: -------------------------------------------------------------------------------- 1 | from flask import Flask 2 | from flask import request 3 | from flask import jsonify 4 | 5 | import pickle 6 | 7 | def load(filename): 8 | with open(filename, 'rb') as f_in: 9 | return pickle.load(f_in) 10 | 11 | 12 | dv = load('dv.bin') 13 | model = load('model1.bin') 14 | 15 | app = Flask('churn') 16 | 17 | @app.route('/predict', methods=['POST']) 18 | def predict(): 19 | customer = request.get_json() 20 | 21 | X = dv.transform([customer]) 22 | y_pred = model.predict_proba(X)[0, 1] 23 | churn = y_pred >= 0.5 24 | 25 | result = { 26 | 'churn_probability': float(y_pred), 27 | 'churn': bool(churn) 28 | } 29 | 30 | return jsonify(result) 31 | 32 | 33 | if __name__ == "__main__": 34 | app.run(debug=True, host='0.0.0.0', port=9696) 35 | 36 | -------------------------------------------------------------------------------- /cohorts/2021/05-deployment/homework/q4_test.py: -------------------------------------------------------------------------------- 1 | 2 | import requests 3 | 4 | url = "http://localhost:9696/predict" 5 | 6 | customer = {"contract": "two_year", "tenure": 1, "monthlycharges": 10} 7 | respose = requests.post(url, json=customer).json() 8 | 9 | 10 | print(respose) 11 | 12 | -------------------------------------------------------------------------------- /cohorts/2021/05-deployment/homework/q6_predict.py: -------------------------------------------------------------------------------- 1 | from flask import Flask 2 | from flask import request 3 | from flask import jsonify 4 | 5 | import pickle 6 | 7 | def load(filename): 8 | with open(filename, 'rb') as f_in: 9 | return pickle.load(f_in) 10 | 11 | 12 | dv = load('dv.bin') 13 | model = load('model2.bin') 14 | 15 | app = Flask('churn') 16 | 17 | @app.route('/predict', methods=['POST']) 18 | def predict(): 19 | customer = request.get_json() 20 | 21 | X = dv.transform([customer]) 22 | y_pred = model.predict_proba(X)[0, 1] 23 | churn = y_pred >= 0.5 24 | 25 | result = { 26 | 'churn_probability': float(y_pred), 27 | 'churn': bool(churn) 28 | } 29 | 30 | return jsonify(result) 31 | 32 | 33 | if __name__ == "__main__": 34 | app.run(debug=True, host='0.0.0.0', port=9696) 35 | 36 | -------------------------------------------------------------------------------- /cohorts/2021/05-deployment/homework/q6_test.py: -------------------------------------------------------------------------------- 1 | 2 | import requests 3 | 4 | url = "http://localhost:9696/predict" 5 | 6 | customer = {"contract": "two_year", "tenure": 12, "monthlycharges": 10} 7 | respose = requests.post(url, json=customer).json() 8 | 9 | print(respose) 10 | 11 | -------------------------------------------------------------------------------- /cohorts/2021/09-serverless/homework/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM agrigorev/zoomcamp-cats-dogs-lambda:v2 2 | 3 | RUN pip install keras-image-helper 4 | RUN pip install https://github.com/alexeygrigorev/tflite-aws-lambda/raw/main/tflite/tflite_runtime-2.7.0-cp38-cp38-linux_x86_64.whl 5 | 6 | COPY homework.py . 7 | ENV MODEL_NAME=cats-dogs-v2.tflite 8 | 9 | CMD [ "homework.lambda_handler" ] -------------------------------------------------------------------------------- /cohorts/2021/09-serverless/homework/homework.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # import tensorflow.lite as tflite 5 | import tflite_runtime.interpreter as tflite 6 | 7 | import os 8 | import numpy as np 9 | 10 | from io import BytesIO 11 | from urllib import request 12 | 13 | from PIL import Image 14 | 15 | 16 | MODEL_NAME = os.getenv('MODEL_NAME', 'dogs-vs-cats.tflite') 17 | 18 | 19 | def download_image(url): 20 | with request.urlopen(url) as resp: 21 | buffer = resp.read() 22 | stream = BytesIO(buffer) 23 | img = Image.open(stream) 24 | return img 25 | 26 | 27 | def prepare_image(img, target_size): 28 | if img.mode != 'RGB': 29 | img = img.convert('RGB') 30 | img = img.resize(target_size, Image.NEAREST) 31 | return img 32 | 33 | 34 | def prepare_input(x): 35 | return x / 255.0 36 | 37 | 38 | interpreter = tflite.Interpreter(model_path=MODEL_NAME) 39 | interpreter.allocate_tensors() 40 | 41 | input_index = interpreter.get_input_details()[0]['index'] 42 | output_index = interpreter.get_output_details()[0]['index'] 43 | 44 | 45 | # 'https://upload.wikimedia.org/wikipedia/commons/9/9a/Pug_600.jpg' 46 | 47 | def predict(url): 48 | img = download_image(url) 49 | img = prepare_image(img, target_size=(150, 150)) 50 | 51 | x = np.array(img, dtype='float32') 52 | X = np.array([x]) 53 | X = prepare_input(X) 54 | 55 | interpreter.set_tensor(input_index, X) 56 | interpreter.invoke() 57 | 58 | preds = interpreter.get_tensor(output_index) 59 | 60 | return float(preds[0, 0]) 61 | 62 | 63 | def lambda_handler(event, context): 64 | url = event['url'] 65 | pred = predict(url) 66 | result = { 67 | 'prediction': pred 68 | } 69 | 70 | return result 71 | 72 | -------------------------------------------------------------------------------- /cohorts/2021/09-serverless/homework/test.py: -------------------------------------------------------------------------------- 1 | import requests 2 | 3 | url = 'http://localhost:8080/2015-03-31/functions/function/invocations' 4 | 5 | data = {'url': 'https://upload.wikimedia.org/wikipedia/commons/1/18/Vombatus_ursinus_-Maria_Island_National_Park.jpg'} 6 | 7 | result = requests.post(url, json=data).json() 8 | print(result) -------------------------------------------------------------------------------- /cohorts/2021/10-kubernetes/homework/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: churn 5 | spec: 6 | selector: 7 | matchLabels: 8 | app: churn 9 | template: 10 | metadata: 11 | labels: 12 | app: churn 13 | spec: 14 | containers: 15 | - name: churn 16 | image: churn-model:v001 17 | resources: 18 | limits: 19 | memory: "128Mi" 20 | cpu: "500m" 21 | ports: 22 | - containerPort: 9696 -------------------------------------------------------------------------------- /cohorts/2021/10-kubernetes/homework/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: churn 5 | spec: 6 | type: LoadBalancer 7 | selector: 8 | app: churn 9 | ports: 10 | - port: 80 11 | targetPort: 9696 -------------------------------------------------------------------------------- /cohorts/2021/14-project/README.md: -------------------------------------------------------------------------------- 1 | ## 14. Third project 2 | 3 | The requirements are the same as for the [capstone project](../12-capstone/) 4 | 5 | 6 | * Submit your project [here](https://forms.gle/2V2qyDKeUhGwnfn4A) 7 | * Deadline: January 26, 23:00 CET 8 | * Evaluate your peers 9 | * Project evaluation assingment: [here](https://docs.google.com/spreadsheets/d/e/2PACX-1vSXSWfDcoeefJZbt4anSRZQVuDGzkij8eiSdWoCRD3GbKSAYj-6BSA5X9M0w5CstxtMXU2jjuTtWr_v/pubhtml) 10 | * Submit the results [here](https://forms.gle/WYAcXaawfcpGtZGD7) 11 | * Deadline: February 2, 22:00 CET 12 | 13 | -------------------------------------------------------------------------------- /cohorts/2021/office-hours.md: -------------------------------------------------------------------------------- 1 | ## Office Hours 2 | 3 | -------------------------------------------------------------------------------- /cohorts/2022/05-deployment/homework/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM svizor/zoomcamp-model:3.9.12-slim 2 | 3 | RUN pip install pipenv 4 | 5 | COPY ["Pipfile", "Pipfile.lock", "./"] 6 | 7 | RUN pipenv install --system --deploy 8 | 9 | COPY ["q6_predict.py", "./"] 10 | 11 | EXPOSE 9696 12 | 13 | ENTRYPOINT ["waitress-serve", "--listen=0.0.0.0:9696", "q6_predict:app"] -------------------------------------------------------------------------------- /cohorts/2022/05-deployment/homework/Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | scikit-learn = "==1.0.2" 8 | flask = "*" 9 | waitress = "*" 10 | 11 | [dev-packages] 12 | 13 | [requires] 14 | python_version = "3.9" 15 | -------------------------------------------------------------------------------- /cohorts/2022/05-deployment/homework/dv.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/cohorts/2022/05-deployment/homework/dv.bin -------------------------------------------------------------------------------- /cohorts/2022/05-deployment/homework/model1.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/cohorts/2022/05-deployment/homework/model1.bin -------------------------------------------------------------------------------- /cohorts/2022/05-deployment/homework/q3_test.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | 4 | def load(filename: str): 5 | with open(filename, 'rb') as f_in: 6 | return pickle.load(f_in) 7 | 8 | 9 | dv = load('dv.bin') 10 | model = load('model1.bin') 11 | 12 | client = {"reports": 0, "share": 0.001694, "expenditure": 0.12, "owner": "yes"} 13 | 14 | X = dv.transform([client]) 15 | y_pred = model.predict_proba(X)[0, 1] 16 | 17 | print(y_pred) -------------------------------------------------------------------------------- /cohorts/2022/05-deployment/homework/q4_predict.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | from flask import Flask 4 | from flask import request 5 | from flask import jsonify 6 | 7 | 8 | def load(filename: str): 9 | with open(filename, 'rb') as f_in: 10 | return pickle.load(f_in) 11 | 12 | 13 | dv = load('dv.bin') 14 | model = load('model1.bin') 15 | 16 | app = Flask('credict-card') 17 | 18 | 19 | @app.route('/predict', methods=['POST']) 20 | def predict(): 21 | client = request.get_json() 22 | 23 | X = dv.transform([client]) 24 | y_pred = model.predict_proba(X)[0, 1] 25 | get_card = y_pred >= 0.5 26 | 27 | result = { 28 | 'get_card_probability': float(y_pred), 29 | 'get_card': bool(get_card) 30 | } 31 | 32 | return jsonify(result) 33 | 34 | 35 | if __name__ == "__main__": 36 | app.run(debug=True, host='0.0.0.0', port=9696) 37 | -------------------------------------------------------------------------------- /cohorts/2022/05-deployment/homework/q4_test.py: -------------------------------------------------------------------------------- 1 | import requests 2 | 3 | 4 | url = "http://localhost:9696/predict" 5 | 6 | client = {"reports": 0, "share": 0.245, "expenditure": 3.438, "owner": "yes"} 7 | response = requests.post(url, json=client).json() 8 | 9 | print(response) 10 | -------------------------------------------------------------------------------- /cohorts/2022/05-deployment/homework/q6_predict.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | from flask import Flask 4 | from flask import request 5 | from flask import jsonify 6 | 7 | 8 | def load(filename: str): 9 | with open(filename, 'rb') as f_in: 10 | return pickle.load(f_in) 11 | 12 | 13 | dv = load('dv.bin') 14 | model = load('model2.bin') 15 | 16 | app = Flask('credict-card') 17 | 18 | 19 | @app.route('/predict', methods=['POST']) 20 | def predict(): 21 | client = request.get_json() 22 | 23 | X = dv.transform([client]) 24 | y_pred = model.predict_proba(X)[0, 1] 25 | get_card = y_pred >= 0.5 26 | 27 | result = { 28 | 'get_card_probability': float(y_pred), 29 | 'get_card': bool(get_card) 30 | } 31 | 32 | return jsonify(result) 33 | 34 | 35 | if __name__ == "__main__": 36 | app.run(debug=True, host='0.0.0.0', port=9696) 37 | -------------------------------------------------------------------------------- /cohorts/2022/05-deployment/homework/q6_test.py: -------------------------------------------------------------------------------- 1 | import requests 2 | 3 | 4 | url = "http://localhost:9696/predict" 5 | 6 | client = {"reports": 0, "share": 0.245, "expenditure": 3.438, "owner": "yes"} 7 | response = requests.post(url, json=client).json() 8 | 9 | print(response) 10 | -------------------------------------------------------------------------------- /cohorts/2022/07-bento-production/locustfile.py: -------------------------------------------------------------------------------- 1 | from locust import task 2 | from locust import between 3 | from locust import HttpUser 4 | 5 | sample = [[6.4,3.5,4.5,1.2]] 6 | 7 | class MLZoomUser(HttpUser): 8 | """ 9 | Usage: 10 | Start locust load testing client with: 11 | 12 | locust -H http://localhost:3000 13 | 14 | Open browser at http://0.0.0.0:8089, adjust desired number of users and spawn 15 | rate for the load test from the Web UI and start swarming. 16 | """ 17 | 18 | @task 19 | def classify(self): 20 | self.client.post("/classify", json=sample) 21 | 22 | wait_time = between(0.01, 2) 23 | -------------------------------------------------------------------------------- /cohorts/2022/07-bento-production/resources/classify-endpoint.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/cohorts/2022/07-bento-production/resources/classify-endpoint.png -------------------------------------------------------------------------------- /cohorts/2022/07-bento-production/resources/dragon1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/cohorts/2022/07-bento-production/resources/dragon1.jpeg -------------------------------------------------------------------------------- /cohorts/2022/07-bento-production/resources/dragon2.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/cohorts/2022/07-bento-production/resources/dragon2.jpeg -------------------------------------------------------------------------------- /cohorts/2022/07-bento-production/resources/dragon3.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/cohorts/2022/07-bento-production/resources/dragon3.jpeg -------------------------------------------------------------------------------- /cohorts/2022/07-bento-production/resources/dragon4.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/cohorts/2022/07-bento-production/resources/dragon4.jpeg -------------------------------------------------------------------------------- /cohorts/2022/09-serverless/homework/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM svizor42/zoomcamp-dino-dragon-lambda:v2 2 | 3 | RUN pip install keras-image-helper 4 | RUN pip install https://github.com/alexeygrigorev/tflite-aws-lambda/raw/main/tflite/tflite_runtime-2.7.0-cp39-cp39-linux_x86_64.whl 5 | 6 | COPY homework.py . 7 | ENV MODEL_NAME=dino-vs-dragon-v2.tflite 8 | 9 | CMD [ "homework.lambda_handler" ] -------------------------------------------------------------------------------- /cohorts/2022/09-serverless/homework/homework.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # import tensorflow.lite as tflite 5 | import tflite_runtime.interpreter as tflite 6 | 7 | import os 8 | import numpy as np 9 | 10 | from io import BytesIO 11 | from urllib import request 12 | 13 | from PIL import Image 14 | 15 | 16 | MODEL_NAME = os.getenv('MODEL_NAME', 'dino-vs-dragon-v2.tflite') 17 | 18 | 19 | def download_image(url): 20 | with request.urlopen(url) as resp: 21 | buffer = resp.read() 22 | stream = BytesIO(buffer) 23 | img = Image.open(stream) 24 | return img 25 | 26 | 27 | def prepare_image(img, target_size): 28 | if img.mode != 'RGB': 29 | img = img.convert('RGB') 30 | img = img.resize(target_size, Image.NEAREST) 31 | return img 32 | 33 | 34 | def prepare_input(x): 35 | return x / 255.0 36 | 37 | 38 | interpreter = tflite.Interpreter(model_path=MODEL_NAME) 39 | interpreter.allocate_tensors() 40 | 41 | input_index = interpreter.get_input_details()[0]['index'] 42 | output_index = interpreter.get_output_details()[0]['index'] 43 | 44 | 45 | # 'https://upload.wikimedia.org/wikipedia/en/e/e9/GodzillaEncounterModel.jpg' 46 | 47 | def predict(url): 48 | img = download_image(url) 49 | img = prepare_image(img, target_size=(150, 150)) 50 | 51 | x = np.array(img, dtype='float32') 52 | X = np.array([x]) 53 | X = prepare_input(X) 54 | 55 | interpreter.set_tensor(input_index, X) 56 | interpreter.invoke() 57 | 58 | preds = interpreter.get_tensor(output_index) 59 | 60 | return float(preds[0, 0]) 61 | 62 | 63 | def lambda_handler(event, context): 64 | url = event['url'] 65 | pred = predict(url) 66 | result = { 67 | 'prediction': pred 68 | } 69 | 70 | return result -------------------------------------------------------------------------------- /cohorts/2022/09-serverless/homework/test.py: -------------------------------------------------------------------------------- 1 | import requests 2 | 3 | url = 'http://localhost:8080/2015-03-31/functions/function/invocations' 4 | 5 | data = {'url': 'https://upload.wikimedia.org/wikipedia/en/e/e9/GodzillaEncounterModel.jpg'} 6 | 7 | result = requests.post(url, json=data).json() 8 | print(result) -------------------------------------------------------------------------------- /cohorts/2022/10-kubernetes/homework/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: credit-card 5 | spec: 6 | selector: 7 | matchLabels: 8 | app: credit-card 9 | replicas: 1 10 | template: 11 | metadata: 12 | labels: 13 | app: credit-card 14 | spec: 15 | containers: 16 | - name: credit-card 17 | image: zoomcamp-model:v001 18 | resources: 19 | requests: 20 | memory: "64Mi" 21 | cpu: "100m" 22 | limits: 23 | memory: "128Mi" 24 | cpu: "200m" 25 | ports: 26 | - containerPort: 9696 -------------------------------------------------------------------------------- /cohorts/2022/10-kubernetes/homework/hpa.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: autoscaling/v1 2 | kind: HorizontalPodAutoscaler 3 | metadata: 4 | name: credit-card-v1 5 | spec: 6 | scaleTargetRef: 7 | apiVersion: apps/v1 8 | kind: Deployment 9 | name: credit-card 10 | minReplicas: 1 11 | maxReplicas: 3 12 | targetCPUUtilizationPercentage: 20 -------------------------------------------------------------------------------- /cohorts/2022/10-kubernetes/homework/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: credit-card 5 | spec: 6 | type: LoadBalancer 7 | selector: 8 | app: credit-card 9 | ports: 10 | - port: 80 11 | targetPort: 9696 -------------------------------------------------------------------------------- /cohorts/2022/10-kubernetes/homework/test.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from time import sleep 3 | 4 | 5 | url = "http://localhost:9696/predict" 6 | client = {"reports": 0, "share": 0.245, "expenditure": 3.438, "owner": "yes"} 7 | 8 | while True: 9 | sleep(0.1) 10 | response = requests.post(url, json=client).json() 11 | print(response) 12 | -------------------------------------------------------------------------------- /cohorts/2022/article.md: -------------------------------------------------------------------------------- 1 | ## Article (optional) 2 | 3 | The best way to learn about something - to teach it. 4 | 5 | In this part of the course, we'll explore some topics that weren't 6 | covered in the course. 7 | 8 | You'll need to learn about this topic and then write about what 9 | you learned in an article. 10 | 11 | ## Submitting the results 12 | 13 | * Submit your article to [`#course-ml-zoomcamp-articles`](https://app.slack.com/client/T01ATQK62F8/C02QXBFS1PU) - 14 | just share the link to your article there 15 | * Submit the same link to [this form](https://forms.gle/6KKQg5EZPjtBpbb29) so we could link it to your message from the channel 16 | 17 | ## Evaluating 18 | 19 | We'll use voting for scoring your articles. 20 | 21 | * Check the articles in the [`#course-ml-zoomcamp-articles`](https://app.slack.com/client/T01ATQK62F8/C02QXBFS1PU) channel and put a :+1: reaction to articles that you liked 22 | * The top voted articles will get 20 points 23 | 24 | 25 | ## Deadline 26 | 27 | The deadline for finishing the article is 30 January, 22:00 CET. 28 | 29 | 30 | ## Articles from 2022 31 | 32 | * https://medium.com/@sotoblanco263542/track-your-machine-learning-experiments-with-w-b-d5f9431e6bc2 33 | * https://blog.aaishamuhammad.co.za/posts/onnx_machine_learning/ 34 | * https://github.com/ElenaNKn/scaling_methods 35 | * https://medium.com/@alexandervalverdeguillen/math-optimization-methods-for-machine-learning-8837eba9e3fe 36 | * https://rzabolotin.hashnode.dev/deploying-ml-model-via-telegram-bot 37 | * https://github.com/bsenst/mlbookcamp-2022/blob/main/social-media-analysis-ml-zoomcamp.ipynb 38 | * https://github.com/mary435/Telegram.git 39 | 40 | 41 | 42 | ### Past articles 43 | 44 | See examples of articles from the 2021 cohort [here](../../article/README.md) 45 | -------------------------------------------------------------------------------- /cohorts/2023/05-deployment/homework/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM svizor/zoomcamp-model:3.10.12-slim 2 | 3 | RUN pip install pipenv 4 | COPY ["Pipfile", "Pipfile.lock", "./"] 5 | RUN pipenv install --system --deploy 6 | 7 | COPY ["q6_predict.py", "./"] 8 | EXPOSE 9696 9 | ENTRYPOINT ["waitress-serve", "--listen=0.0.0.0:9696", "q6_predict:app"] -------------------------------------------------------------------------------- /cohorts/2023/05-deployment/homework/Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | scikit-learn = "==1.3.1" 8 | flask = "*" 9 | waitress = "*" 10 | 11 | [dev-packages] 12 | 13 | [requires] 14 | python_version = "3.10" 15 | -------------------------------------------------------------------------------- /cohorts/2023/05-deployment/homework/dv.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/cohorts/2023/05-deployment/homework/dv.bin -------------------------------------------------------------------------------- /cohorts/2023/05-deployment/homework/model1.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/cohorts/2023/05-deployment/homework/model1.bin -------------------------------------------------------------------------------- /cohorts/2023/05-deployment/homework/q3_test.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | 4 | def load(filename: str): 5 | with open(filename, 'rb') as f_in: 6 | return pickle.load(f_in) 7 | 8 | 9 | dv = load('dv.bin') 10 | model = load('model1.bin') 11 | 12 | client = {"job": "retired", "duration": 445, "poutcome": "success"} 13 | 14 | X = dv.transform([client]) 15 | y_pred = model.predict_proba(X)[0, 1] 16 | 17 | print(y_pred) 18 | -------------------------------------------------------------------------------- /cohorts/2023/05-deployment/homework/q4_predict.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | from flask import Flask 4 | from flask import request 5 | from flask import jsonify 6 | 7 | 8 | def load(filename: str): 9 | with open(filename, 'rb') as f_in: 10 | return pickle.load(f_in) 11 | 12 | 13 | dv = load('dv.bin') 14 | model = load('model1.bin') 15 | 16 | app = Flask('get-credit') 17 | 18 | 19 | @app.route('/predict', methods=['POST']) 20 | def predict(): 21 | client = request.get_json() 22 | 23 | X = dv.transform([client]) 24 | y_pred = model.predict_proba(X)[0, 1] 25 | get_credit = y_pred >= 0.5 26 | 27 | result = { 28 | 'get_credit_probability': float(y_pred), 29 | 'get_credit': bool(get_credit) 30 | } 31 | 32 | return jsonify(result) 33 | 34 | 35 | if __name__ == "__main__": 36 | app.run(debug=True, host='0.0.0.0', port=9696) 37 | -------------------------------------------------------------------------------- /cohorts/2023/05-deployment/homework/q4_test.py: -------------------------------------------------------------------------------- 1 | import requests 2 | 3 | 4 | url = "http://localhost:9696/predict" 5 | 6 | client = {"job": "unknown", "duration": 270, "poutcome": "failure"} 7 | response = requests.post(url, json=client).json() 8 | 9 | print(response) 10 | -------------------------------------------------------------------------------- /cohorts/2023/05-deployment/homework/q6_predict.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | from flask import Flask 4 | from flask import request 5 | from flask import jsonify 6 | 7 | 8 | def load(filename: str): 9 | with open(filename, 'rb') as f_in: 10 | return pickle.load(f_in) 11 | 12 | 13 | dv = load('dv.bin') 14 | model = load('model2.bin') 15 | 16 | app = Flask('get-credit') 17 | 18 | 19 | @app.route('/predict', methods=['POST']) 20 | def predict(): 21 | client = request.get_json() 22 | 23 | X = dv.transform([client]) 24 | y_pred = model.predict_proba(X)[0, 1] 25 | get_credit = y_pred >= 0.5 26 | 27 | result = { 28 | 'get_credit_probability': float(y_pred), 29 | 'get_credit': bool(get_credit) 30 | } 31 | 32 | return jsonify(result) 33 | 34 | 35 | if __name__ == "__main__": 36 | app.run(debug=True, host='0.0.0.0', port=9696) 37 | -------------------------------------------------------------------------------- /cohorts/2023/05-deployment/homework/q6_test.py: -------------------------------------------------------------------------------- 1 | import requests 2 | 3 | 4 | url = "http://localhost:9696/predict" 5 | 6 | client = {"job": "retired", "duration": 445, "poutcome": "success"} 7 | response = requests.post(url, json=client).json() 8 | 9 | print(response) 10 | -------------------------------------------------------------------------------- /cohorts/2023/article.md: -------------------------------------------------------------------------------- 1 | ## Article (optional) 2 | 3 | The best way to learn about something - to teach it. 4 | 5 | In this part of the course, we'll explore some topics that weren't 6 | covered in the course. 7 | 8 | You'll need to learn about this topic and then write about what 9 | you learned in an article. 10 | 11 | 12 | [More information about articles](../../article/README.md) 13 | 14 | ## Submitting the results 15 | 16 | * Submit your article to [`#course-ml-zoomcamp-articles`](https://app.slack.com/client/T01ATQK62F8/C02QXBFS1PU) - 17 | just share the link to your article there 18 | * Submit the same link to [this form](https://forms.gle/Q5v2K19QitvzegoE9) so we could link it to your message from the channel 19 | 20 | ## Evaluating 21 | 22 | We'll use voting for scoring your articles. 23 | 24 | * Check the articles in the [`#course-ml-zoomcamp-articles`](https://app.slack.com/client/T01ATQK62F8/C02QXBFS1PU) channel and put a :+1: reaction to articles that you liked 25 | * The top voted articles will get 20 points 26 | 27 | 28 | ## Deadline 29 | 30 | The deadline for finishing the article is 31 January, 23:00 CET. 31 | 32 | 33 | ## Articles from 2023 34 | 35 | * [A Learning Based Approach For Predicting Heart Disease by Oscar Garcia](https://www.ozkary.dev/ai-engineering-predicting-heart-disease-machine-learning/) 36 | * [Understanding Convolutional Layers in a Convolutional Neural Network](https://marcosbenicio.github.io/2023/11/27/cnn.html) by Marcos Benício 37 | * [From Pixels to Predictions: A Journey into Image Segmentation](https://knowmledge.com/2024/01/26/from-pixels-to-predictions-a-journey-into-image-segmentation/) by Peter Ernicke 38 | * Add your article above this line 39 | 40 | 41 | ### Past articles 42 | 43 | See examples of articles from the previous cohorts [here](../../article/README.md) 44 | -------------------------------------------------------------------------------- /cohorts/2024/05-deployment/homework/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM svizor/zoomcamp-model:3.11.5-slim 2 | 3 | RUN pip install pipenv 4 | COPY ["Pipfile", "Pipfile.lock", "./"] 5 | RUN pipenv install --system --deploy 6 | 7 | COPY ["q6_predict.py", "./"] 8 | EXPOSE 9696 9 | ENTRYPOINT ["waitress-serve", "--listen=0.0.0.0:9696", "q6_predict:app"] -------------------------------------------------------------------------------- /cohorts/2024/05-deployment/homework/Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | scikit-learn = "==1.5.2" 8 | flask = "*" 9 | waitress = "*" 10 | 11 | [dev-packages] 12 | 13 | [requires] 14 | python_version = "3.11" 15 | -------------------------------------------------------------------------------- /cohorts/2024/05-deployment/homework/dv.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/cohorts/2024/05-deployment/homework/dv.bin -------------------------------------------------------------------------------- /cohorts/2024/05-deployment/homework/model1.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/cohorts/2024/05-deployment/homework/model1.bin -------------------------------------------------------------------------------- /cohorts/2024/05-deployment/homework/q3_test.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | 4 | def load(filename: str): 5 | with open(filename, 'rb') as f_in: 6 | return pickle.load(f_in) 7 | 8 | 9 | dv = load('dv.bin') 10 | model = load('model1.bin') 11 | 12 | client = {"job": "management", "duration": 400, "poutcome": "success"} 13 | 14 | X = dv.transform([client]) 15 | y_pred = model.predict_proba(X)[0, 1] 16 | 17 | print(y_pred) 18 | -------------------------------------------------------------------------------- /cohorts/2024/05-deployment/homework/q4_predict.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | from flask import Flask 4 | from flask import request 5 | from flask import jsonify 6 | 7 | 8 | def load(filename: str): 9 | with open(filename, 'rb') as f_in: 10 | return pickle.load(f_in) 11 | 12 | 13 | dv = load('dv.bin') 14 | model = load('model1.bin') 15 | 16 | app = Flask('has-subscribed') 17 | 18 | 19 | @app.route('/predict', methods=['POST']) 20 | def predict(): 21 | client = request.get_json() 22 | 23 | X = dv.transform([client]) 24 | y_pred = model.predict_proba(X)[0, 1] 25 | has_subscribed = y_pred >= 0.5 26 | 27 | result = { 28 | 'has_subscribed_probability': float(y_pred), 29 | 'has_subscribed': bool(has_subscribed) 30 | } 31 | 32 | return jsonify(result) 33 | 34 | 35 | if __name__ == "__main__": 36 | app.run(debug=True, host='0.0.0.0', port=9696) 37 | -------------------------------------------------------------------------------- /cohorts/2024/05-deployment/homework/q4_test.py: -------------------------------------------------------------------------------- 1 | import requests 2 | 3 | 4 | url = "http://localhost:9696/predict" 5 | 6 | client = {"job": "student", "duration": 280, "poutcome": "failure"} 7 | response = requests.post(url, json=client).json() 8 | 9 | print(response) 10 | -------------------------------------------------------------------------------- /cohorts/2024/05-deployment/homework/q6_predict.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | from flask import Flask 4 | from flask import request 5 | from flask import jsonify 6 | 7 | 8 | def load(filename: str): 9 | with open(filename, 'rb') as f_in: 10 | return pickle.load(f_in) 11 | 12 | 13 | dv = load('dv.bin') 14 | model = load('model2.bin') 15 | 16 | app = Flask('has-subscribed') 17 | 18 | 19 | @app.route('/predict', methods=['POST']) 20 | def predict(): 21 | client = request.get_json() 22 | 23 | X = dv.transform([client]) 24 | y_pred = model.predict_proba(X)[0, 1] 25 | has_subscribed = y_pred >= 0.5 26 | 27 | result = { 28 | 'has_subscribed_probability': float(y_pred), 29 | 'has_subscribed': bool(has_subscribed) 30 | } 31 | 32 | return jsonify(result) 33 | 34 | 35 | if __name__ == "__main__": 36 | app.run(debug=True, host='0.0.0.0', port=9696) 37 | -------------------------------------------------------------------------------- /cohorts/2024/05-deployment/homework/q6_test.py: -------------------------------------------------------------------------------- 1 | import requests 2 | 3 | 4 | url = "http://localhost:9696/predict" 5 | 6 | client = {"job": "management", "duration": 400, "poutcome": "success"} 7 | response = requests.post(url, json=client).json() 8 | 9 | print(response) 10 | -------------------------------------------------------------------------------- /cohorts/2024/09-serverless/homework/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM agrigorev/model-2024-hairstyle:v3 2 | 3 | RUN pip install keras-image-helper 4 | RUN pip install --no-deps https://github.com/alexeygrigorev/tflite-aws-lambda/raw/main/tflite/tflite_runtime-2.14.0-cp310-cp310-linux_x86_64.whl 5 | 6 | COPY homework.py . 7 | ENV MODEL_NAME=model_2024_hairstyle_v2.tflite 8 | 9 | CMD [ "homework.lambda_handler" ] -------------------------------------------------------------------------------- /cohorts/2024/09-serverless/homework/entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | if [ -z "${AWS_LAMBDA_RUNTIME_API}" ]; then 3 | exec /usr/local/bin/aws-lambda-rie /usr/bin/python3.12 -m awslambdaric "$@" 4 | else 5 | exec /usr/bin/python3.12 -m awslambdaric "$@" 6 | fi -------------------------------------------------------------------------------- /cohorts/2024/09-serverless/homework/homework.dockerfile: -------------------------------------------------------------------------------- 1 | FROM public.ecr.aws/lambda/python:3.10 2 | 3 | COPY model_2024_hairstyle_v2.tflite . 4 | 5 | RUN pip install numpy==1.23.1 6 | -------------------------------------------------------------------------------- /cohorts/2024/09-serverless/homework/homework.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # import tensorflow.lite as tflite 5 | import tflite_runtime.interpreter as tflite 6 | 7 | import os 8 | import numpy as np 9 | 10 | from io import BytesIO 11 | from urllib import request 12 | 13 | from PIL import Image 14 | 15 | 16 | MODEL_NAME = os.getenv('MODEL_NAME', 'model_2024_hairstyle_v2.tflite') 17 | 18 | 19 | def download_image(url): 20 | with request.urlopen(url) as resp: 21 | buffer = resp.read() 22 | stream = BytesIO(buffer) 23 | img = Image.open(stream) 24 | return img 25 | 26 | 27 | def prepare_image(img, target_size): 28 | if img.mode != 'RGB': 29 | img = img.convert('RGB') 30 | img = img.resize(target_size, Image.NEAREST) 31 | return img 32 | 33 | 34 | def prepare_input(x): 35 | return x / 255.0 36 | 37 | 38 | interpreter = tflite.Interpreter(model_path=MODEL_NAME) 39 | interpreter.allocate_tensors() 40 | 41 | input_index = interpreter.get_input_details()[0]['index'] 42 | output_index = interpreter.get_output_details()[0]['index'] 43 | 44 | 45 | # https://habrastorage.org/webt/yf/_d/ok/yf_dokzqy3vcritme8ggnzqlvwa.jpeg 46 | 47 | def predict(url): 48 | img = download_image(url) 49 | img = prepare_image(img, target_size=(200, 200)) 50 | 51 | x = np.array(img, dtype='float32') 52 | X = np.array([x]) 53 | X = prepare_input(X) 54 | 55 | interpreter.set_tensor(input_index, X) 56 | interpreter.invoke() 57 | 58 | preds = interpreter.get_tensor(output_index) 59 | 60 | return float(preds[0, 0]) 61 | 62 | 63 | def lambda_handler(event, context): 64 | url = event['url'] 65 | pred = predict(url) 66 | result = { 67 | 'prediction': pred 68 | } 69 | 70 | return result -------------------------------------------------------------------------------- /cohorts/2024/09-serverless/homework/test.py: -------------------------------------------------------------------------------- 1 | import requests 2 | 3 | url = 'http://localhost:8080/2015-03-31/functions/function/invocations' 4 | 5 | data = {'url': 'https://habrastorage.org/webt/yf/_d/ok/yf_dokzqy3vcritme8ggnzqlvwa.jpeg'} 6 | 7 | result = requests.post(url, json=data).json() 8 | print(result) -------------------------------------------------------------------------------- /cohorts/2024/10-kubernetes/homework/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: subscription 5 | spec: 6 | selector: 7 | matchLabels: 8 | app: subscription 9 | replicas: 1 10 | template: 11 | metadata: 12 | labels: 13 | app: subscription 14 | spec: 15 | containers: 16 | - name: subscription 17 | image: svizor/zoomcamp-model:3.11.5-hw10 18 | resources: 19 | requests: 20 | memory: "64Mi" 21 | cpu: "100m" 22 | limits: 23 | memory: "128Mi" 24 | cpu: "200m" 25 | ports: 26 | - containerPort: 9696 -------------------------------------------------------------------------------- /cohorts/2024/10-kubernetes/homework/hpa.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: autoscaling/v1 2 | kind: HorizontalPodAutoscaler 3 | metadata: 4 | name: subscription-v1 5 | spec: 6 | scaleTargetRef: 7 | apiVersion: apps/v1 8 | kind: Deployment 9 | name: subscription 10 | minReplicas: 1 11 | maxReplicas: 3 12 | targetCPUUtilizationPercentage: 20 -------------------------------------------------------------------------------- /cohorts/2024/10-kubernetes/homework/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: subscription 5 | spec: 6 | type: LoadBalancer 7 | selector: 8 | app: subscription 9 | ports: 10 | - port: 80 11 | targetPort: 9696 -------------------------------------------------------------------------------- /cohorts/2024/10-kubernetes/homework/test.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from time import sleep 3 | 4 | 5 | url = "http://localhost:9696/predict" 6 | client = {"job": "management", "duration": 400, "poutcome": "success"} 7 | 8 | while True: 9 | sleep(0.1) 10 | response = requests.post(url, json=client).json() 11 | print(response) 12 | -------------------------------------------------------------------------------- /cohorts/2024/article.md: -------------------------------------------------------------------------------- 1 | ## Article (optional) 2 | 3 | The best way to learn about something - to teach it. 4 | 5 | In this part of the course, we'll explore some topics that weren't 6 | covered in the course. 7 | 8 | You'll need to learn about this topic and then write about what 9 | you learned in an article. 10 | 11 | 12 | [More information about articles](../../article/README.md) 13 | 14 | ## Submitting the results 15 | 16 | * Submit your article to [`#course-ml-zoomcamp-articles`](https://app.slack.com/client/T01ATQK62F8/C02QXBFS1PU) - 17 | just share the link to your article there 18 | * Submit the same link to [this form](https://courses.datatalks.club/ml-zoomcamp-2024/homework/article) so we could link it to your message from the channel 19 | 20 | ## Evaluating 21 | 22 | We'll use voting for scoring your articles. 23 | 24 | * Check the articles in the [`#course-ml-zoomcamp-articles`](https://app.slack.com/client/T01ATQK62F8/C02QXBFS1PU) channel and put a :+1: reaction to articles that you liked 25 | * The top voted articles will get 20 points 26 | 27 | 28 | ## Deadline 29 | 30 | The deadline for finishing the article is 31 January 31 | 32 | 33 | ## Articles from 2024 34 | 35 | 36 | * [You will like my offer](https://markogolovko.com/blog/you-will-like-my-offer/) by Marko Golovko 37 | * [Natural Language Processing using spaCy, TensorFlow and BERT model architecture](https://volcano-camp-325.notion.site/Natural-Language-Processing-using-spaCy-TensorFlow-and-BERT-model-architecture-1895067176b380d09484d4b0338b0c5e?pvs=4) by Alexander Daniel Rios 38 | * [Setting up the environments for ML Zoomcamp 2024 - macOS](https://medium.com/@till.meineke/setting-up-the-environments-for-ml-zoomcamp-2024-eceb6e42e36e) by Till Meineke 39 | 40 | 41 | ### Past articles 42 | 43 | See examples of articles from the previous cohorts [here](../../article/README.md) 44 | -------------------------------------------------------------------------------- /cohorts/2024/projects.md: -------------------------------------------------------------------------------- 1 | # Projects 2 | 3 | See more information about projects [here](../../projects/) 4 | 5 | ## Evaluation 6 | 7 | Evaluation via [course management system](https://courses.datatalks.club/ml-zoomcamp-2024/) 8 | 9 | * [Evaluation criteria](https://docs.google.com/spreadsheets/d/e/2PACX-1vQCwqAtkjl07MTW-SxWUK9GUvMQ3Pv_fF8UadcuIYLgHa0PlNu9BRWtfLgivI8xSCncQs82HDwGXSm3/pubhtml) 10 | * [Video with instructions](https://www.loom.com/share/4f5c155c550e48ddb54b71ba76516b04) 11 | 12 | 13 | ## Midterm Project 14 | 15 | - Project due date: 26 November 2024, 00:00 CET (Berlin time) 16 | - Submit your project [here](https://courses.datatalks.club/ml-zoomcamp-2024/project/midterm) 17 | - Evaluation due date: 03 December 2024, 00:00 CET (Berlin time) 18 | - [Projects of Midterm Project](https://courses.datatalks.club/ml-zoomcamp-2024/project/midterm/list) 19 | 20 | 21 | ## Capstone 1 22 | 23 | - Project due date: 14 January 2025, 00:00 CET (Berlin time) 24 | - Submit your project [here](https://courses.datatalks.club/ml-zoomcamp-2024/project/capstone1) 25 | - Evaluation due date: 21 January 2025, 00:00 CET (Berlin time) 26 | - [Projects of Capstone 1](https://courses.datatalks.club/ml-zoomcamp-2024/project/capstone1/list) 27 | 28 | ## Capstone 2 29 | 30 | - Project due date: 28 January 2025, 00:00 CET (Berlin time) 31 | - Submit your project [here](https://courses.datatalks.club/ml-zoomcamp-2024/project/capstone2) 32 | - Evaluation due date: 04 February 2025, 00:00 CET (Berlin time) 33 | - [Projects of Capstone 2](https://courses.datatalks.club/ml-zoomcamp-2024/project/capstone2/list) 34 | -------------------------------------------------------------------------------- /images/bentoml.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/images/bentoml.png -------------------------------------------------------------------------------- /images/play.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/images/play.png -------------------------------------------------------------------------------- /images/zoomcamp.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/images/zoomcamp.jpg -------------------------------------------------------------------------------- /learning-in-public.md: -------------------------------------------------------------------------------- 1 | # Learning in public 2 | 3 | Most people learn in private: they consume content but don't tell 4 | anyone about it. There's nothing wrong with it. 5 | 6 | But we want to encourage you to document your progress and 7 | share it publicly on social media. 8 | 9 | It helps you get noticed and will lead to: 10 | 11 | * Expanding your network: meeting new people and making new friends 12 | * Being invited to meetups, conferences and podcasts 13 | * Landing a job or getting clients 14 | * Many other good things 15 | 16 | Here's a more compresensive reading on why you want to do it: https://github.com/readme/guides/publishing-your-work 17 | 18 | 19 | ## Learning in Public for Zoomcamps 20 | 21 | When you submit your homework or project, you can also submit 22 | learning in public posts: 23 | 24 | 25 | 26 | You can watch this video to see how your learning in public posts may look like: 27 | 28 | 29 | 30 | 31 | 32 | 33 | Send a PR if you want to suggest improvements for this document 34 | --------------------------------------------------------------------------------