├── .github
└── FUNDING.yml
├── .gitignore
├── 01-intro
├── 01-what-is-ml.md
├── 02-ml-vs-rules.md
├── 03-supervised-ml.md
├── 04-crisp-dm.md
├── 05-model-selection.md
├── 06-environment.md
├── 07-numpy.md
├── 08-linear-algebra.md
├── 09-pandas.md
├── 10-summary.md
├── README.md
├── homework.md
├── images
│ ├── add-code-for-datafile-download.png
│ ├── sample-code.png
│ ├── sample-data-file.png
│ ├── sample-jupyter-notebook.png
│ ├── thumbnail-1-01.jpg
│ ├── thumbnail-1-02.jpg
│ ├── thumbnail-1-03.jpg
│ ├── thumbnail-1-04.jpg
│ ├── thumbnail-1-05.jpg
│ ├── thumbnail-1-07.jpg
│ ├── thumbnail-1-08.jpg
│ ├── thumbnail-1-09.jpg
│ └── thumbnail-1-10.jpg
└── notebooks
│ ├── 07-numpy.ipynb
│ ├── 08-linear-algebra.ipynb
│ └── 09-pandas.ipynb
├── 02-regression
├── 01-car-price-intro.md
├── 02-data-preparation.md
├── 03-eda.md
├── 04-validation-framework.md
├── 05-linear-regression-simple.md
├── 06-linear-regression-vector.md
├── 07-linear-regression-training.md
├── 08-baseline-model.md
├── 09-rmse.md
├── 10-car-price-validation.md
├── 11-feature-engineering.md
├── 12-categorical-variables.md
├── 13-regularization.md
├── 14-tuning-model.md
├── 15-using-model.md
├── 16-summary.md
├── 17-explore-more.md
├── README.md
├── homework.md
├── images
│ ├── thumbnail-2-01.jpg
│ ├── thumbnail-2-02.jpg
│ ├── thumbnail-2-03.jpg
│ ├── thumbnail-2-04.jpg
│ ├── thumbnail-2-05.jpg
│ ├── thumbnail-2-06.jpg
│ ├── thumbnail-2-07.jpg
│ ├── thumbnail-2-08.jpg
│ ├── thumbnail-2-09.jpg
│ ├── thumbnail-2-10.jpg
│ ├── thumbnail-2-11.jpg
│ ├── thumbnail-2-12.jpg
│ ├── thumbnail-2-13.jpg
│ ├── thumbnail-2-14.jpg
│ ├── thumbnail-2-15.jpg
│ └── thumbnail-2-16.jpg
├── meta.json
└── notebook.ipynb
├── 03-classification
├── 01-churn-project.md
├── 02-data-preparation.md
├── 03-validation.md
├── 04-eda.md
├── 05-risk.md
├── 06-mutual-info.md
├── 07-correlation.md
├── 08-ohe.md
├── 09-logistic-regression.md
├── 10-training-log-reg.md
├── 11-log-reg-interpretation.md
├── 12-using-log-reg.md
├── 13-summary.md
├── 14-explore-more.md
├── README.md
├── homework.md
├── images
│ ├── correlation-matrix.png
│ ├── thumbnail-3-01.jpg
│ ├── thumbnail-3-02.jpg
│ ├── thumbnail-3-03.jpg
│ ├── thumbnail-3-04.jpg
│ ├── thumbnail-3-05.jpg
│ ├── thumbnail-3-06.jpg
│ ├── thumbnail-3-07.jpg
│ ├── thumbnail-3-08.jpg
│ ├── thumbnail-3-09.jpg
│ ├── thumbnail-3-10.jpg
│ ├── thumbnail-3-11.jpg
│ ├── thumbnail-3-12.jpg
│ └── thumbnail-3-13.jpg
├── meta.csv
├── meta.json
├── notebook-scaling-ohe.ipynb
└── notebook.ipynb
├── 04-evaluation
├── 01-overview.md
├── 02-accuracy.md
├── 03-confusion-table.md
├── 04-precision-recall.md
├── 05-roc.md
├── 06-auc.md
├── 07-cross-validation.md
├── 08-summary.md
├── 09-explore-more.md
├── README.md
├── homework.md
├── images
│ ├── TPR_FPR.png
│ ├── classification_metrics.png
│ ├── confusion_matrix.png
│ ├── thumbnail-4-01.jpg
│ ├── thumbnail-4-02.jpg
│ ├── thumbnail-4-03.jpg
│ ├── thumbnail-4-04.jpg
│ ├── thumbnail-4-05-cont.jpg
│ ├── thumbnail-4-05.jpg
│ ├── thumbnail-4-06.jpg
│ ├── thumbnail-4-07.jpg
│ └── thumbnail-4-08.jpg
├── meta.csv
├── meta.json
└── notebook.ipynb
├── 05-deployment
├── 01-intro.md
├── 02-pickle.md
├── 03-flask-intro.md
├── 04-flask-deployment.md
├── 05-pipenv.md
├── 06-docker.md
├── 07-aws-eb.md
├── 08-summary.md
├── 09-explore-more.md
├── README.md
├── code
│ ├── 05-train-churn-model.ipynb
│ ├── Dockerfile
│ ├── Pipfile
│ ├── Pipfile.lock
│ ├── model_C=1.0.bin
│ ├── ping.py
│ ├── plan.md
│ ├── predict-test.py
│ ├── predict.py
│ └── train.py
├── homework.md
├── images
│ ├── thumbnail-5-01.jpg
│ ├── thumbnail-5-02.jpg
│ ├── thumbnail-5-03.jpg
│ ├── thumbnail-5-04.jpg
│ ├── thumbnail-5-05.jpg
│ ├── thumbnail-5-06.jpg
│ ├── thumbnail-5-07.jpg
│ └── thumbnail-5-08.jpg
├── meta.csv
└── meta.json
├── 06-trees
├── 01-credit-risk.md
├── 02-data-prep.md
├── 03-decision-trees.md
├── 04-decision-tree-learning.md
├── 05-decision-tree-tuning.md
├── 06-random-forest.md
├── 07-boosting.md
├── 08-xgb-tuning.md
├── 09-final-model.md
├── 10-summary.md
├── 11-explore-more.md
├── README.md
├── homework.md
├── images
│ ├── thumbnail-6-01.jpg
│ ├── thumbnail-6-02.jpg
│ ├── thumbnail-6-03.jpg
│ ├── thumbnail-6-04.jpg
│ ├── thumbnail-6-05.jpg
│ ├── thumbnail-6-06.jpg
│ ├── thumbnail-6-07.jpg
│ ├── thumbnail-6-08.jpg
│ ├── thumbnail-6-09.jpg
│ ├── thumbnail-6-10.jpg
│ └── thumbnail-6-12.jpg
├── meta.csv
├── meta.json
└── notebook.ipynb
├── 08-deep-learning
├── 01-fashion-classification.md
├── 01b-saturn-cloud.md
├── 02-tensorflow-keras.md
├── 03-pretrained-models.md
├── 04-conv-neural-nets.md
├── 05-transfer-learning.md
├── 06-learning-rate.md
├── 07-checkpointing.md
├── 08-more-layers.md
├── 09-dropout.md
├── 10-augmentation.md
├── 11-large-model.md
├── 12-using-model.md
├── 13-summary.md
├── 14-explore-more.md
├── README.md
├── homework.md
├── images
│ ├── thumbnail-8-01.jpg
│ ├── thumbnail-8-01b.jpg
│ ├── thumbnail-8-02.jpg
│ ├── thumbnail-8-03.jpg
│ ├── thumbnail-8-04.jpg
│ ├── thumbnail-8-05.jpg
│ ├── thumbnail-8-06.jpg
│ ├── thumbnail-8-07.jpg
│ ├── thumbnail-8-08.jpg
│ ├── thumbnail-8-09.jpg
│ ├── thumbnail-8-10.jpg
│ ├── thumbnail-8-11.jpg
│ ├── thumbnail-8-12.jpg
│ └── thumbnail-8-13.jpg
├── install.md
├── meta.csv
├── meta.json
└── notebook.ipynb
├── 09-serverless
├── 01-intro.md
├── 02-aws-lambda.md
├── 03-tensorflow-lite.md
├── 04-preparing-code.md
├── 05-docker-image.md
├── 06-creating-lambda.md
├── 07-api-gateway.md
├── 08-summary.md
├── 09-explore-more.md
├── README.md
├── code
│ ├── Dockerfile
│ ├── convert-model.py
│ ├── lambda_function.py
│ ├── plan.md
│ ├── tensorflow-model.ipynb
│ └── test.py
├── homework.md
├── images
│ ├── thumbnail-9-01.jpg
│ ├── thumbnail-9-02.jpg
│ ├── thumbnail-9-03.jpg
│ ├── thumbnail-9-04.jpg
│ ├── thumbnail-9-05.jpg
│ ├── thumbnail-9-06.jpg
│ ├── thumbnail-9-07.jpg
│ └── thumbnail-9-08.jpg
├── meta.csv
├── meta.json
└── updates.md
├── 10-kubernetes
├── 01-overview.md
├── 02-tensorflow-serving.md
├── 03-preprocessing.md
├── 04-docker-compose.md
├── 05-kubernetes-intro.md
├── 06-kubernetes-simple-service.md
├── 07-kubernetes-tf-serving.md
├── 08-eks.md
├── 09-summary.md
├── 10-explore-more.md
├── README.md
├── code
│ ├── Pipfile
│ ├── Pipfile.lock
│ ├── README.md
│ ├── docker-compose.yaml
│ ├── gateway.py
│ ├── image-gateway.dockerfile
│ ├── image-model.dockerfile
│ ├── kube-config
│ │ ├── eks-config.yaml
│ │ ├── gateway-deployment.yaml
│ │ ├── gateway-service.yaml
│ │ ├── model-deployment.yaml
│ │ └── model-service.yaml
│ ├── ping
│ │ ├── Dockerfile
│ │ ├── Pipfile
│ │ ├── Pipfile.lock
│ │ ├── deployment.yaml
│ │ ├── metallb-config.yaml
│ │ ├── ping.py
│ │ └── service.yaml
│ ├── plan.md
│ ├── proto.py
│ ├── test.py
│ └── tf-serving-connect.ipynb
├── homework.md
├── images
│ ├── thumbnail-10-01.jpg
│ ├── thumbnail-10-02.jpg
│ ├── thumbnail-10-03.jpg
│ ├── thumbnail-10-04.jpg
│ ├── thumbnail-10-05.jpg
│ ├── thumbnail-10-06.jpg
│ ├── thumbnail-10-07.jpg
│ ├── thumbnail-10-08.jpg
│ └── thumbnail-10-09.jpg
├── meta.csv
└── meta.json
├── 11-kserve
├── 01-overview.md
├── 02-kserve-local.md
├── 03-kserve-sklearn.md
├── 04-kserve-custom-image.md
├── 05-tensorflow-kserve.md
├── 06-kserve-transformers.md
├── 07-kserve-eks-upd.md
├── 07-kserve-eks.md
├── 08-summary.md
├── 09-explore-more.md
├── README.md
├── code
│ ├── .gitignore
│ ├── README.md
│ ├── churn
│ │ ├── Pipfile
│ │ ├── Pipfile.lock
│ │ ├── churn-service.yaml
│ │ ├── churn-test.py
│ │ ├── churn-train.py
│ │ └── model.joblib
│ ├── clothes
│ │ ├── clothes-service.yaml
│ │ ├── convert.py
│ │ ├── test-transformer.py
│ │ ├── test.ipynb
│ │ └── test.py
│ ├── eks
│ │ ├── clothes-service.yaml
│ │ ├── cluster.yaml
│ │ └── test-transformer.py
│ ├── image_transfomer
│ │ ├── Dockerfile
│ │ ├── Pipfile
│ │ ├── Pipfile.lock
│ │ └── image_transformer.py
│ ├── iris
│ │ ├── iris-example.yaml
│ │ ├── iris-request.json
│ │ └── iris-test.py
│ └── plan.md
├── images
│ ├── thumbnail-11-01.jpg
│ ├── thumbnail-11-02.jpg
│ ├── thumbnail-11-03.jpg
│ ├── thumbnail-11-04.jpg
│ ├── thumbnail-11-05.jpg
│ ├── thumbnail-11-06.jpg
│ └── thumbnail-11-07.jpg
├── meta.csv
└── meta.json
├── README.md
├── after-sign-up.md
├── article
└── README.md
├── asking-questions.md
├── bento.md
├── certificates.md
├── cohorts
├── 2021
│ ├── 01-intro
│ │ ├── homework-1.ipynb
│ │ └── homework.md
│ ├── 02-regression
│ │ ├── homework.ipynb
│ │ └── homework.md
│ ├── 03-classification
│ │ ├── homework.ipynb
│ │ └── homework.md
│ ├── 04-evaluation
│ │ ├── homework-4-solution.ipynb
│ │ ├── homework-4-starter.ipynb
│ │ └── homework.md
│ ├── 05-deployment
│ │ ├── homework.md
│ │ └── homework
│ │ │ ├── Dockerfile
│ │ │ ├── Pipfile
│ │ │ ├── Pipfile.lock
│ │ │ ├── dv.bin
│ │ │ ├── homework.md
│ │ │ ├── model1.bin
│ │ │ ├── q3_test.py
│ │ │ ├── q4_predict.py
│ │ │ ├── q4_test.py
│ │ │ ├── q6_predict.py
│ │ │ └── q6_test.py
│ ├── 06-trees
│ │ ├── homework-6-solution.ipynb
│ │ ├── homework-6-starter.ipynb
│ │ └── homework.md
│ ├── 07-midterm-project
│ │ ├── README.md
│ │ ├── week10-office-hours.ipynb
│ │ ├── week8-office-hours.ipynb
│ │ └── week9-office-hours.ipynb
│ ├── 08-deep-learning
│ │ ├── CNN_solution.ipynb
│ │ ├── homework.md
│ │ └── week-11-office-hours.ipynb
│ ├── 09-serverless
│ │ ├── homework.md
│ │ └── homework
│ │ │ ├── Dockerfile
│ │ │ ├── homework.ipynb
│ │ │ ├── homework.py
│ │ │ └── test.py
│ ├── 10-kubernetes
│ │ ├── homework.md
│ │ └── homework
│ │ │ ├── deployment.yaml
│ │ │ └── service.yaml
│ ├── 12-capstone
│ │ └── README.md
│ ├── 13-article
│ │ └── README.md
│ ├── 14-project
│ │ └── README.md
│ ├── leaderboard.md
│ └── office-hours.md
├── 2022
│ ├── 01-intro
│ │ ├── homework.md
│ │ └── homework_1.ipynb
│ ├── 02-regression
│ │ ├── homework.md
│ │ └── homework_2.ipynb
│ ├── 03-classification
│ │ ├── homework.md
│ │ └── homework_3.ipynb
│ ├── 04-evaluation
│ │ ├── homework.md
│ │ └── homework_4.ipynb
│ ├── 05-deployment
│ │ ├── homework.md
│ │ └── homework
│ │ │ ├── Dockerfile
│ │ │ ├── Pipfile
│ │ │ ├── Pipfile.lock
│ │ │ ├── dv.bin
│ │ │ ├── model1.bin
│ │ │ ├── q3_test.py
│ │ │ ├── q4_predict.py
│ │ │ ├── q4_test.py
│ │ │ ├── q6_predict.py
│ │ │ └── q6_test.py
│ ├── 06-trees
│ │ ├── homework.md
│ │ ├── homework_6.ipynb
│ │ └── homework_6_starter.ipynb
│ ├── 07-bento-production
│ │ ├── homework.md
│ │ ├── locustfile.py
│ │ └── resources
│ │ │ ├── classify-endpoint.png
│ │ │ ├── dragon1.jpeg
│ │ │ ├── dragon2.jpeg
│ │ │ ├── dragon3.jpeg
│ │ │ └── dragon4.jpeg
│ ├── 08-deep-learning
│ │ ├── homework.md
│ │ └── homework_8.ipynb
│ ├── 09-serverless
│ │ ├── homework.md
│ │ └── homework
│ │ │ ├── Dockerfile
│ │ │ ├── homework.ipynb
│ │ │ ├── homework.py
│ │ │ └── test.py
│ ├── 10-kubernetes
│ │ ├── homework.md
│ │ └── homework
│ │ │ ├── deployment.yaml
│ │ │ ├── hpa.yaml
│ │ │ ├── service.yaml
│ │ │ └── test.py
│ ├── README.md
│ ├── article.md
│ ├── leaderboard.md
│ └── projects.md
├── 2023
│ ├── 01-intro
│ │ ├── homework.md
│ │ └── homework_1.ipynb
│ ├── 02-regression
│ │ └── homework.md
│ ├── 03-classification
│ │ ├── homework.md
│ │ └── homework_3.ipynb
│ ├── 04-evaluation
│ │ └── homework.md
│ ├── 05-deployment
│ │ ├── homework.md
│ │ └── homework
│ │ │ ├── Dockerfile
│ │ │ ├── Pipfile
│ │ │ ├── Pipfile.lock
│ │ │ ├── dv.bin
│ │ │ ├── model1.bin
│ │ │ ├── q3_test.py
│ │ │ ├── q4_predict.py
│ │ │ ├── q4_test.py
│ │ │ ├── q6_predict.py
│ │ │ └── q6_test.py
│ ├── 06-trees
│ │ └── homework.md
│ ├── 08-deep-learning
│ │ ├── homework.ipynb
│ │ └── homework.md
│ ├── 09-serverless
│ │ └── homework.md
│ ├── 10-kubernetes
│ │ └── homework.md
│ ├── README.md
│ ├── article.md
│ ├── leaderboard.md
│ └── projects.md
└── 2024
│ ├── 01-intro
│ ├── homework.md
│ └── homework_1.ipynb
│ ├── 02-regression
│ ├── homework.ipynb
│ └── homework.md
│ ├── 03-classification
│ ├── homework.md
│ └── homework_3.ipynb
│ ├── 04-evaluation
│ ├── homework.ipynb
│ └── homework.md
│ ├── 05-deployment
│ ├── homework.md
│ └── homework
│ │ ├── Dockerfile
│ │ ├── Pipfile
│ │ ├── Pipfile.lock
│ │ ├── dv.bin
│ │ ├── model1.bin
│ │ ├── q3_test.py
│ │ ├── q4_predict.py
│ │ ├── q4_test.py
│ │ ├── q6_predict.py
│ │ └── q6_test.py
│ ├── 06-trees
│ ├── homework.ipynb
│ └── homework.md
│ ├── 08-deep-learning
│ ├── homework.md
│ └── homework_8.ipynb
│ ├── 09-serverless
│ ├── homework.md
│ └── homework
│ │ ├── Dockerfile
│ │ ├── entrypoint.sh
│ │ ├── homework.dockerfile
│ │ ├── homework.ipynb
│ │ ├── homework.py
│ │ └── test.py
│ ├── 10-kubernetes
│ ├── homework.md
│ └── homework
│ │ ├── components.yaml
│ │ ├── deployment.yaml
│ │ ├── hpa.yaml
│ │ ├── service.yaml
│ │ └── test.py
│ ├── README.md
│ ├── article.md
│ └── projects.md
├── generate-description.ipynb
├── generate-pages.ipynb
├── images
├── bentoml.png
├── play.png
└── zoomcamp.jpg
├── learning-in-public.md
└── projects
└── README.md
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | custom: ['http://bit.ly/mlbookcamp']
2 | github: alexeygrigorev
3 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # generated
2 | .ipynb_checkpoints/
3 | __pycache__/
4 | **my_dir/
5 | **logs/
6 | **models/
7 |
8 | # file types
9 | *.h5
10 | *.tflite
11 | *.keras
12 | *.zip
13 | *.pdf
14 |
15 | # data folders
16 | **data/
17 |
18 | # content-specific
19 | /08-deep-learning/clothing-dataset-small/
20 | /08-deep-learning/clothing-dataset/
21 | /08-deep-learning/ImageClassification/
22 | /08-deep-learning/my_dir/
23 |
24 | /09-serverless/clothing-model/
25 | /09-serverless/clothing-model/
26 |
27 | **midterms_evaluations/
28 | **samples/
29 |
--------------------------------------------------------------------------------
/01-intro/01-what-is-ml.md:
--------------------------------------------------------------------------------
1 | ## 1.1 Introduction to Machine Learning
2 |
3 |
4 |
5 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-11-introduction-to-machine-learning)
6 |
7 |
8 | ## Notes
9 |
10 | The concept of ML is depicted with an example of predicting the price of a car. The ML model
11 | learns from data, represented as some **features** such as year, mileage, among others, and the **target** variable, in this
12 | case, the car's price, by extracting patterns from the data.
13 |
14 | Then, the model is given new data (**without** the target) about cars and predicts their price (target).
15 |
16 | In summary, ML is a process of **extracting patterns from data**, which is of two types:
17 |
18 | * features (information about the object) and
19 | * target (property to predict for unseen objects).
20 |
21 | Therefore, new feature values are presented to the model, and it makes **predictions** from the learned patterns.
22 |
23 |
24 |
25 | ⚠️ |
26 |
27 | The notes are written by the community.
28 | If you see an error here, please create a PR with a fix.
29 | |
30 |
31 |
32 |
33 |
34 | ## Notes
35 |
36 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/09/09/ml-zoomcamp-2023-introduction-to-machine-learning-part-1/)
37 |
38 | ## Navigation
39 |
40 | * [Machine Learning Zoomcamp course](../)
41 | * [Lesson 1: Introduction to Machine Learning](./)
42 | * Next: [ML vs Rule-Based Systems](02-ml-vs-rules.md)
43 |
--------------------------------------------------------------------------------
/01-intro/09-pandas.md:
--------------------------------------------------------------------------------
1 | ## 1.9 Introduction to Pandas
2 |
3 |
4 |
5 |
6 | ## Notes
7 |
8 |
9 | Add notes here (PRs are welcome).
10 |
11 |
12 |
13 | ⚠️ |
14 |
15 | The notes are written by the community.
16 | If you see an error here, please create a PR with a fix.
17 | |
18 |
19 |
20 |
21 | * [Notes from Peter Ernicke - Part 1/2](https://knowmledge.com/2023/09/16/ml-zoomcamp-2023-introduction-to-machine-learning-part-12/)
22 | * [Notes from Peter Ernicke - Part 2/2](https://knowmledge.com/2023/09/17/ml-zoomcamp-2023-introduction-to-machine-learning-part-13/)
23 |
24 | ## Links
25 |
26 | * [Notebook from the video](notebooks/09-pandas.ipynb)
27 | * [Notebook](https://github.com/alexeygrigorev/mlbookcamp-code/blob/master/appendix-d-pandas.ipynb)
28 |
29 | ## Additional links
30 |
31 | * [Pandas Cheat sheet](https://www.datacamp.com/community/blog/python-pandas-cheat-sheet)
32 |
33 | ## Navigation
34 |
35 | * [Machine Learning Zoomcamp course](../)
36 | * [Lesson 1: Introduction to Machine Learning](./)
37 | * Previous: [Linear Algebra Refresher](08-linear-algebra.md)
38 | * Next: [Summary](10-summary.md)
39 |
--------------------------------------------------------------------------------
/01-intro/homework.md:
--------------------------------------------------------------------------------
1 | ## Homework
2 |
3 | * For 2024 cohort homework, check [the 2024 cohort folder](../cohorts/2024/01-intro/homework.md)
4 | * For 2023 cohort homework, check [the 2023 cohort folder](../cohorts/2023/01-intro/homework.md)
5 | * For 2022 cohort homework, check [the 2022 cohort folder](../cohorts/2022/01-intro/homework.md)
6 | * For 2021 cohort homework and solution, check [the 2021 cohort folder](../cohorts/2021/01-intro/)
7 |
8 |
9 | ## Navigation
10 |
11 | * [Machine Learning Zoomcamp course](../)
12 | * [Lesson 1: Introduction to Machine Learning](./)
13 | * Previous: [Summary](10-summary.md)
14 |
--------------------------------------------------------------------------------
/01-intro/images/add-code-for-datafile-download.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/01-intro/images/add-code-for-datafile-download.png
--------------------------------------------------------------------------------
/01-intro/images/sample-code.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/01-intro/images/sample-code.png
--------------------------------------------------------------------------------
/01-intro/images/sample-data-file.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/01-intro/images/sample-data-file.png
--------------------------------------------------------------------------------
/01-intro/images/sample-jupyter-notebook.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/01-intro/images/sample-jupyter-notebook.png
--------------------------------------------------------------------------------
/01-intro/images/thumbnail-1-01.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/01-intro/images/thumbnail-1-01.jpg
--------------------------------------------------------------------------------
/01-intro/images/thumbnail-1-02.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/01-intro/images/thumbnail-1-02.jpg
--------------------------------------------------------------------------------
/01-intro/images/thumbnail-1-03.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/01-intro/images/thumbnail-1-03.jpg
--------------------------------------------------------------------------------
/01-intro/images/thumbnail-1-04.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/01-intro/images/thumbnail-1-04.jpg
--------------------------------------------------------------------------------
/01-intro/images/thumbnail-1-05.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/01-intro/images/thumbnail-1-05.jpg
--------------------------------------------------------------------------------
/01-intro/images/thumbnail-1-07.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/01-intro/images/thumbnail-1-07.jpg
--------------------------------------------------------------------------------
/01-intro/images/thumbnail-1-08.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/01-intro/images/thumbnail-1-08.jpg
--------------------------------------------------------------------------------
/01-intro/images/thumbnail-1-09.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/01-intro/images/thumbnail-1-09.jpg
--------------------------------------------------------------------------------
/01-intro/images/thumbnail-1-10.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/01-intro/images/thumbnail-1-10.jpg
--------------------------------------------------------------------------------
/02-regression/01-car-price-intro.md:
--------------------------------------------------------------------------------
1 |
2 | ## 2.1 Car price prediction project
3 |
4 |
5 |
6 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-21-car-price-prediction-project)
7 |
8 |
9 | ## Notes
10 |
11 | This project is about the creation of a model for helping users to predict car prices. The dataset was obtained from [this
12 | kaggle competition](https://www.kaggle.com/CooperUnion/cardataset).
13 |
14 | **Project plan:**
15 |
16 | * Prepare data and Exploratory data analysis (EDA)
17 | * Use linear regression for predicting price
18 | * Understanding the internals of linear regression
19 | * Evaluating the model with RMSE
20 | * Feature engineering
21 | * Regularization
22 | * Using the model
23 |
24 | The code and dataset are available at this [link](https://github.com/alexeygrigorev/mlbookcamp-code/tree/master/chapter-02-car-price).
25 |
26 |
27 |
28 | ⚠️ |
29 |
30 | The notes are written by the community.
31 | If you see an error here, please create a PR with a fix.
32 | |
33 |
34 |
35 |
36 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/09/18/ml-zoomcamp-2023-machine-learning-for-regression-part-1/)
37 |
38 | ## Navigation
39 |
40 | * [Machine Learning Zoomcamp course](../)
41 | * [Session 2: Machine Learning for Regression](./)
42 | * Next: [Data preparation](02-data-preparation.md)
43 |
--------------------------------------------------------------------------------
/02-regression/02-data-preparation.md:
--------------------------------------------------------------------------------
1 |
2 | ## 2.2 Data preparation
3 |
4 |
5 |
6 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-2-slides)
7 |
8 |
9 | ## Notes
10 |
11 | **Pandas attributes and methods:**
12 |
13 | * `pd.read_csv()` -> read csv files
14 | * `df.head()` -> take a look of the dataframe
15 | * `df.columns` -> retrieve colum names of a dataframe
16 | * `df.columns.str.lower()` -> lowercase all the letters
17 | * `df.columns.str.replace(' ', '_')` -> replace the space separator
18 | * `df.dtypes` -> retrieve data types of all features
19 | * `df.index` -> retrieve indices of a dataframe
20 |
21 | The entire code of this project is available in [this jupyter notebook](notebook.ipynb).
22 |
23 |
24 |
25 | ⚠️ |
26 |
27 | The notes are written by the community.
28 | If you see an error here, please create a PR with a fix.
29 | |
30 |
31 |
32 |
33 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/09/18/ml-zoomcamp-2023-machine-learning-for-regression-part-1/)
34 |
35 | ## Navigation
36 |
37 | * [Machine Learning Zoomcamp course](../)
38 | * [Session 2: Machine Learning for Regression](./)
39 | * Previous: [Car price prediction project](01-car-price-intro.md)
40 | * Next: [Exploratory data analysis](03-eda.md)
41 |
--------------------------------------------------------------------------------
/02-regression/03-eda.md:
--------------------------------------------------------------------------------
1 |
2 | ## 2.3 Exploratory data analysis
3 |
4 |
5 |
6 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-2-slides)
7 |
8 |
9 | ## Notes
10 |
11 | **Pandas attributes and methods:**
12 |
13 | * `df[col].unique()` -> return a list of unique values in the series
14 | * `df[col].nunique()` -> return the number of unique values in the series
15 | * `df.isnull().sum()` -> return the number of null values in the dataframe
16 |
17 | **Matplotlib and seaborn methods:**
18 |
19 | * `%matplotlib inline` -> assure that plots are displayed in jupyter notebook's cells
20 | * `sns.histplot()` -> show the histogram of a series
21 |
22 | **Numpy methods:**
23 | * `np.log1p()` -> apply log transformation to a variable, after adding one to each input value.
24 |
25 | Long-tail distributions usually confuse the ML models, so the recommendation is to transform the target variable distribution to a normal one whenever possible.
26 |
27 | The entire code of this project is available in [this jupyter notebook](https://github.com/alexeygrigorev/mlbookcamp-code/blob/master/chapter-02-car-price/02-carprice.ipynb).
28 |
29 |
30 |
31 | ⚠️ |
32 |
33 | The notes are written by the community.
34 | If you see an error here, please create a PR with a fix.
35 | |
36 |
37 |
38 |
39 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/09/19/ml-zoomcamp-2023-machine-learning-for-regression-part-2/)
40 |
41 | ## Navigation
42 |
43 | * [Machine Learning Zoomcamp course](../)
44 | * [Session 2: Machine Learning for Regression](./)
45 | * Previous: [Data preparation](02-data-preparation.md)
46 | * Next: [Setting up the validation framework](04-validation-framework.md)
47 |
--------------------------------------------------------------------------------
/02-regression/06-linear-regression-vector.md:
--------------------------------------------------------------------------------
1 |
2 | ## 2.6 Linear regression: vector form
3 |
4 |
5 |
6 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-2-slides)
7 |
8 |
9 | ## Notes
10 |
11 | The formula of linear regression can be synthesized with the dot product between features and weights. The feature vector includes the *bias* term with an *x* value of one, such as $w_{0}^{x_{i0}},\ where\ x_{i0} = 1\ for\ w_0$.
12 |
13 | When all the records are included, the linear regression can be calculated with the dot product between ***feature matrix*** and ***vector of weights***, obtaining the `y` vector of predictions.
14 |
15 | The entire code of this project is available in [this jupyter notebook](https://github.com/alexeygrigorev/mlbookcamp-code/blob/master/chapter-02-car-price/02-carprice.ipynb).
16 |
17 |
18 |
19 | ⚠️ |
20 |
21 | The notes are written by the community.
22 | If you see an error here, please create a PR with a fix.
23 | |
24 |
25 |
26 |
27 | * [Notes from Peter Ernicke](https://knowmledge.wordpress.com/2023/09/20/ml-zoomcamp-2023-machine-learning-for-regression-part-5/)
28 |
29 | ## Navigation
30 |
31 | * [Machine Learning Zoomcamp course](../)
32 | * [Session 2: Machine Learning for Regression](./)
33 | * Previous: [Linear regression](05-linear-regression-simple.md)
34 | * Next: [Training linear regression: Normal equation](07-linear-regression-training.md)
35 |
--------------------------------------------------------------------------------
/02-regression/07-linear-regression-training.md:
--------------------------------------------------------------------------------
1 |
2 | ## 2.7 Training linear regression: Normal equation
3 |
4 |
5 |
6 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-2-slides)
7 |
8 |
9 | ## Notes
10 |
11 | Obtaining predictions as close as possible to $y$ target values requires the calculation of weights from the general
12 | LR equation. The feature matrix does not
13 | have an inverse because it is not square, so it is required to obtain an approximate solution, which can be
14 | obtained using the **Gram matrix**
15 | (multiplication of feature matrix ($X$) and its transpose ($X^T$)). The vector of weights or coefficients $w$ obtained with this
16 | formula is the closest possible solution to the LR system.
17 |
18 | Normal Equation:
19 |
20 | $w$ = $(X^TX)^{-1}X^Ty$
21 |
22 | Where:
23 |
24 | $X^TX$ is the Gram Matrix
25 |
26 |
27 |
28 |
29 | The entire code of this project is available in [this jupyter notebook](https://github.com/alexeygrigorev/mlbookcamp-code/blob/master/chapter-02-car-price/02-carprice.ipynb).
30 |
31 |
32 |
33 | ⚠️ |
34 |
35 | The notes are written by the community.
36 | If you see an error here, please create a PR with a fix.
37 | |
38 |
39 |
40 |
41 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/09/21/ml-zoomcamp-2023-machine-learning-for-regression-part-6/)
42 |
43 | ## Navigation
44 |
45 | * [Machine Learning Zoomcamp course](../)
46 | * [Session 2: Machine Learning for Regression](./)
47 | * Previous: [Linear regression: vector form](06-linear-regression-vector.md)
48 | * Next: [Baseline model for car price prediction project](08-baseline-model.md)
49 |
--------------------------------------------------------------------------------
/02-regression/09-rmse.md:
--------------------------------------------------------------------------------
1 |
2 | ## 2.9 Root Mean Squared Error (RMSE)
3 |
4 |
5 |
6 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-2-slides)
7 |
8 |
9 | ## Notes
10 |
11 | * In the previous lesson we found out our predictions were a bit off from the actual target values in the training dataset. We need a way to quantify how good or bad the model is. This is where RMSE can be of help.
12 | * Root Mean Squared Error (RMSE) is a way to evaluate regression models. It measures the error associated with the model being evaluated. This numerical figure can then be used to compare models, enabling us to choose the one that gives the best predictions.
13 |
14 | $$RMSE = \sqrt{ \frac{1}{m} \sum_{i=1}^{m} {(g(x_i) - y_i)^2}}$$
15 |
16 | - $g(x_i)$ is the prediction
17 | - $y_i$ is the actual value
18 | - $m$ is the number of observations in the dataset (i.e. cars)
19 |
20 |
21 | The entire code of this project is available in [this jupyter notebook](https://github.com/alexeygrigorev/mlbookcamp-code/blob/master/chapter-02-car-price/02-carprice.ipynb).
22 |
23 |
24 |
25 | ⚠️ |
26 |
27 | The notes are written by the community.
28 | If you see an error here, please create a PR with a fix.
29 | |
30 |
31 |
32 |
33 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/09/22/ml-zoomcamp-2023-machine-learning-for-regression-part-8/)
34 |
35 | ## Navigation
36 |
37 | * [Machine Learning Zoomcamp course](../)
38 | * [Session 2: Machine Learning for Regression](./)
39 | * Previous: [Baseline model for car price prediction project](08-baseline-model.md)
40 | * Next: [Using RMSE on validation data](10-car-price-validation.md)
41 |
--------------------------------------------------------------------------------
/02-regression/10-car-price-validation.md:
--------------------------------------------------------------------------------
1 |
2 | ## 2.10 Computing RMSE on validation data
3 |
4 |
5 |
6 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-2-slides)
7 |
8 |
9 | ## Notes
10 |
11 | Calculation of the RMSE on validation partition of the dataset of car price prediction. In this way, we have a metric to evaluate the model's
12 | performance.
13 |
14 | The entire code of this project is available in [this jupyter notebook](https://github.com/alexeygrigorev/mlbookcamp-code/blob/master/chapter-02-car-price/02-carprice.ipynb).
15 |
16 |
17 |
18 | ⚠️ |
19 |
20 | The notes are written by the community.
21 | If you see an error here, please create a PR with a fix.
22 | |
23 |
24 |
25 |
26 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/09/22/ml-zoomcamp-2023-machine-learning-for-regression-part-8/)
27 |
28 | ## Navigation
29 |
30 | * [Machine Learning Zoomcamp course](../)
31 | * [Session 2: Machine Learning for Regression](./)
32 | * Previous: [Root mean squared error](09-rmse.md)
33 | * Next: [Feature engineering](11-feature-engineering.md)
34 |
--------------------------------------------------------------------------------
/02-regression/11-feature-engineering.md:
--------------------------------------------------------------------------------
1 | ## 2.11 Feature engineering
2 |
3 | Feature engineering is the process of creating new features
4 |
5 |
6 |
7 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-2-slides)
8 |
9 |
10 | ## Notes
11 |
12 | The feature age of the car was included in the dataset, obtained with the subtraction of the maximum year of cars and each of the years of cars.
13 | This new feature improved the model performance, measured with the RMSE and comparing the distributions of y target variable and predictions.
14 |
15 | The entire code of this project is available in [this jupyter notebook](https://github.com/alexeygrigorev/mlbookcamp-code/blob/master/chapter-02-car-price/02-carprice.ipynb).
16 |
17 |
18 |
19 | ⚠️ |
20 |
21 | The notes are written by the community.
22 | If you see an error here, please create a PR with a fix.
23 | |
24 |
25 |
26 |
27 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/09/22/ml-zoomcamp-2023-machine-learning-for-regression-part-9/)
28 |
29 | ## Navigation
30 |
31 | * [Machine Learning Zoomcamp course](../)
32 | * [Session 2: Machine Learning for Regression](./)
33 | * Previous: [Using RMSE on validation data](10-car-price-validation.md)
34 | * Next: [Categorical variables](12-categorical-variables.md)
35 |
--------------------------------------------------------------------------------
/02-regression/12-categorical-variables.md:
--------------------------------------------------------------------------------
1 |
2 | ## 2.12 Categorical variables
3 |
4 |
5 |
6 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-2-slides)
7 |
8 |
9 | ## Notes
10 |
11 | Categorical variables are typically represented as strings, and pandas identifies them as object types. However, some variables that appear to be numerical may actually be categorical (e.g., the number of doors a car has). All these categorical variables need to be converted to a numerical form because ML
12 | models can interpret only numerical features. It is possible to incorporate certain categories from a feature, not necessarily all of them.
13 | This transformation from categorical to numerical variables is known as One-Hot encoding.
14 |
15 | The entire code of this project is available in [this jupyter notebook](https://github.com/alexeygrigorev/mlbookcamp-code/blob/master/chapter-02-car-price/02-carprice.ipynb).
16 |
17 |
18 |
19 | ⚠️ |
20 |
21 | The notes are written by the community.
22 | If you see an error here, please create a PR with a fix.
23 | |
24 |
25 |
26 |
27 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/09/23/ml-zoomcamp-2023-machine-learning-for-regression-part-10/)
28 |
29 | ## Comments
30 |
31 | This way of encoding categorical features is called "one-hot encoding".
32 | We'll learn more about it in Session 3.
33 |
34 |
35 | ## Navigation
36 |
37 | * [Machine Learning Zoomcamp course](../)
38 | * [Session 2: Machine Learning for Regression](./)
39 | * Previous: [Feature engineering](11-feature-engineering.md)
40 | * Next: [Regularization](13-regularization.md)
41 |
--------------------------------------------------------------------------------
/02-regression/14-tuning-model.md:
--------------------------------------------------------------------------------
1 |
2 | ## 2.14 Tuning the model
3 |
4 |
5 |
6 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-2-slides)
7 |
8 |
9 | ## Notes
10 |
11 | Tuning the model consisted of finding the best regularization hyperparameter value, using the validation partition of the dataset. The model was then trained with this regularization value.
12 |
13 | The entire code of this project is available in [this jupyter notebook](https://github.com/alexeygrigorev/mlbookcamp-code/blob/master/chapter-02-car-price/02-carprice.ipynb).
14 |
15 |
16 |
17 | ⚠️ |
18 |
19 | The notes are written by the community.
20 | If you see an error here, please create a PR with a fix.
21 | |
22 |
23 |
24 |
25 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/09/24/ml-zoomcamp-2023-machine-learning-for-regression-part-12/)
26 |
27 | ## Navigation
28 |
29 | * [Machine Learning Zoomcamp course](../)
30 | * [Session 2: Machine Learning for Regression](./)
31 | * Previous: [Regularization](13-regularization.md)
32 | * Next: [Using the model](15-using-model.md)
33 |
--------------------------------------------------------------------------------
/02-regression/15-using-model.md:
--------------------------------------------------------------------------------
1 |
2 | ## 2.15 Using the model
3 |
4 |
5 |
6 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-2-slides)
7 |
8 | ## Notes
9 |
10 | After finding the best model and its parameters, it was trained with training and validation partitions and the final RMSE was calculated on the test partition.
11 |
12 | Finally, the final model was used to predict the price of new cars.
13 |
14 | The entire code of this project is available in [this jupyter notebook](https://github.com/alexeygrigorev/mlbookcamp-code/blob/master/chapter-02-car-price/02-carprice.ipynb).
15 |
16 |
17 |
18 | ⚠️ |
19 |
20 | The notes are written by the community.
21 | If you see an error here, please create a PR with a fix.
22 | |
23 |
24 |
25 |
26 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/09/24/ml-zoomcamp-2023-machine-learning-for-regression-part-12/)
27 |
28 | ## Navigation
29 |
30 | * [Machine Learning Zoomcamp course](../)
31 | * [Session 2: Machine Learning for Regression](./)
32 | * Previous: [Tuning the model](14-tuning-model.md)
33 | * Next: [Car price prediction project summary](16-summary.md)
34 |
--------------------------------------------------------------------------------
/02-regression/16-summary.md:
--------------------------------------------------------------------------------
1 |
2 | ## 2.16 Car price prediction project summary
3 |
4 |
5 |
6 |
7 |
8 | ## Notes
9 |
10 | In summary, this session covered some topics, including data preparation, exploratory data analysis, the validation framework, linear regression model, LR vector and
11 | normal forms, the baseline model, root mean squared error, feature engineering, regularization, tuning the model, and using the best model with new data. All these concepts
12 | were explained using the problem to predict the price of cars.
13 |
14 |
15 |
16 | ⚠️ |
17 |
18 | The notes are written by the community.
19 | If you see an error here, please create a PR with a fix.
20 | |
21 |
22 |
23 |
24 | * [Notes from Maximilien Eyengue](https://github.com/maxim-eyengue/Python-Codes/blob/main/ML_Zoomcamp_2024/02_regression/Summary_Session_02.md)
25 |
26 | ## Navigation
27 |
28 | * [Machine Learning Zoomcamp course](../)
29 | * [Session 2: Machine Learning for Regression](./)
30 | * Previous: [Using the model](15-using-model.md)
31 | * Next: [Explore more](17-explore-more.md)
32 |
--------------------------------------------------------------------------------
/02-regression/17-explore-more.md:
--------------------------------------------------------------------------------
1 |
2 | ## 2.17 Explore more
3 |
4 | ### Questions
5 |
6 | * In this project, we included only 5 top features. What happens if we include 10?
7 |
8 | > That's not a graded homework, it's just for you if you want to try more things on this project
9 |
10 |
11 | ### Other projects
12 |
13 | Here are other datasets that you can play with to learn more about the topic:
14 |
15 | * [California housing dataset](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.fetch_california_housing.html) - predict the price of a house
16 | * [Student Performance Data Set](https://archive.ics.uci.edu/ml/datasets/Student+Performance) - predict the performance of students
17 | * UCI ML Repository contains a lot of other datasets suitable for practicing regression - https://archive.ics.uci.edu/ml/datasets.php?task=reg
18 |
19 |
20 | ## Navigation
21 |
22 | * [Machine Learning Zoomcamp course](../)
23 | * [Session 2: Machine Learning for Regression](./)
24 | * Previous: [Car price prediction project summary](16-summary.md)
25 | * Next: [Homework](homework.md)
26 |
--------------------------------------------------------------------------------
/02-regression/homework.md:
--------------------------------------------------------------------------------
1 | ## Homework
2 |
3 | * For 2024 cohort homework, check [the 2024 cohort folder](../cohorts/2024/02-regression/homework.md)
4 | * For 2023 cohort homework, check [the 2023 cohort folder](../cohorts/2023/02-regression/homework.md)
5 | * For 2022 cohort homework, check [the 2022 cohort folder](../cohorts/2022/02-regression/homework.md)
6 | * For 2021 cohort homework and solution, check [the 2021 cohort folder](../cohorts/2021/02-regression/)
7 |
8 |
9 | ## Navigation
10 |
11 | * [Machine Learning Zoomcamp course](../)
12 | * [Session 2: Machine Learning for Regression](./)
13 | * Previous: [Explore more](17-explore-more.md)
14 |
--------------------------------------------------------------------------------
/02-regression/images/thumbnail-2-01.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/02-regression/images/thumbnail-2-01.jpg
--------------------------------------------------------------------------------
/02-regression/images/thumbnail-2-02.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/02-regression/images/thumbnail-2-02.jpg
--------------------------------------------------------------------------------
/02-regression/images/thumbnail-2-03.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/02-regression/images/thumbnail-2-03.jpg
--------------------------------------------------------------------------------
/02-regression/images/thumbnail-2-04.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/02-regression/images/thumbnail-2-04.jpg
--------------------------------------------------------------------------------
/02-regression/images/thumbnail-2-05.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/02-regression/images/thumbnail-2-05.jpg
--------------------------------------------------------------------------------
/02-regression/images/thumbnail-2-06.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/02-regression/images/thumbnail-2-06.jpg
--------------------------------------------------------------------------------
/02-regression/images/thumbnail-2-07.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/02-regression/images/thumbnail-2-07.jpg
--------------------------------------------------------------------------------
/02-regression/images/thumbnail-2-08.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/02-regression/images/thumbnail-2-08.jpg
--------------------------------------------------------------------------------
/02-regression/images/thumbnail-2-09.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/02-regression/images/thumbnail-2-09.jpg
--------------------------------------------------------------------------------
/02-regression/images/thumbnail-2-10.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/02-regression/images/thumbnail-2-10.jpg
--------------------------------------------------------------------------------
/02-regression/images/thumbnail-2-11.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/02-regression/images/thumbnail-2-11.jpg
--------------------------------------------------------------------------------
/02-regression/images/thumbnail-2-12.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/02-regression/images/thumbnail-2-12.jpg
--------------------------------------------------------------------------------
/02-regression/images/thumbnail-2-13.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/02-regression/images/thumbnail-2-13.jpg
--------------------------------------------------------------------------------
/02-regression/images/thumbnail-2-14.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/02-regression/images/thumbnail-2-14.jpg
--------------------------------------------------------------------------------
/02-regression/images/thumbnail-2-15.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/02-regression/images/thumbnail-2-15.jpg
--------------------------------------------------------------------------------
/02-regression/images/thumbnail-2-16.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/02-regression/images/thumbnail-2-16.jpg
--------------------------------------------------------------------------------
/02-regression/meta.json:
--------------------------------------------------------------------------------
1 | {
2 | "data": "meta.csv",
3 | "session": 2,
4 | "name": "Machine Learning for Regression"
5 | }
--------------------------------------------------------------------------------
/03-classification/01-churn-project.md:
--------------------------------------------------------------------------------
1 | # 3.1 Churn prediction project
2 |
3 |
4 |
5 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-3-machine-learning-for-classification) empty?
6 |
7 | ## Notes
8 |
9 | The project aims to identify customers that are likely to churn or stop to using a service. Each customer has a score associated with the probability of churning. Considering this data, the company would send an email with discounts or other promotions to avoid churning.
10 |
11 | The ML strategy applied to approach this problem is binary classification, which for one instance ($i^{th}$ customer), can be expressed as:
12 |
13 | $$\large g\left(x_{i}\right) = y_{i}$$
14 |
15 | In the formula, $y_i$ is the model's prediction and belongs to {0,1}, with 0 being the negative value or no churning, and 1 the positive value or churning. The output corresponds to the likelihood of churning.
16 |
17 | In brief, the main idea behind this project is to build a model with historical data from customers and assign a score of the likelihood of churning.
18 |
19 | For this project, we used a [Kaggle dataset](https://www.kaggle.com/blastchar/telco-customer-churn).
20 |
21 | |⚠️|The notes are written by the community.
If you see an error here, please create a PR with a fix.|
22 | |---|:-:|
23 |
24 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/09/25/ml-zoomcamp-2023-machine-learning-for-classification-part-1/)
25 |
26 | ## Navigation
27 |
28 | * [Machine Learning Zoomcamp course](../)
29 | * [Session 3: Machine Learning for Classification](./)
30 | * Next: [Data preparation](02-data-preparation.md)
31 |
--------------------------------------------------------------------------------
/03-classification/03-validation.md:
--------------------------------------------------------------------------------
1 |
2 | ## 3.3 Setting up the validation framework
3 |
4 |
5 |
6 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-3-machine-learning-for-classification)
7 |
8 |
9 | ## Notes
10 |
11 | Splitting the dataset with **Scikit-Learn**.
12 |
13 | **Classes, functions, and methods:**
14 |
15 | * `train_test_split` - Scikit-Learn class for splitting a dataset into two parts. The `test_size` argument states how large the test set should be. The `random_state` argument sets a random seed for reproducibility purposes.
16 | * `df.reset_index(drop=True)` - reset the indices of a dataframe and delete the previous ones.
17 | * `df.x.values` - extract the values from x series
18 | * `del df['x']` - delete x series from a dataframe
19 |
20 | The entire code of this project is available in [this jupyter notebook](https://github.com/alexeygrigorev/mlbookcamp-code/blob/master/chapter-03-churn-prediction/03-churn.ipynb).
21 |
22 |
23 |
24 | ⚠️ |
25 |
26 | The notes are written by the community.
27 | If you see an error here, please create a PR with a fix.
28 | |
29 |
30 |
31 |
32 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/09/27/ml-zoomcamp-2023-machine-learning-for-classification-part-3/)
33 |
34 | ## Navigation
35 |
36 | * [Machine Learning Zoomcamp course](../)
37 | * [Session 3: Machine Learning for Classification](./)
38 | * Previous: [Data preparation](02-data-preparation.md)
39 | * Next: [EDA](04-eda.md)
40 |
--------------------------------------------------------------------------------
/03-classification/04-eda.md:
--------------------------------------------------------------------------------
1 |
2 | ## 3.4 EDA
3 |
4 |
5 |
6 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-3-machine-learning-for-classification)
7 |
8 |
9 | ## Notes
10 |
11 | The EDA for this project consisted of:
12 | * Checking missing values
13 | * Looking at the distribution of the target variable (churn)
14 | * Looking at numerical and categorical variables
15 |
16 | **Functions and methods:**
17 |
18 | * `df.isnull().sum()` - returns the number of null values in the dataframe.
19 | * `df.x.value_counts()` returns the number of values for each category in x series. The `normalize=True` argument retrieves the percentage of each category. In this project, the mean of churn is equal to the churn rate obtained with the value_counts method.
20 | * `round(x, y)` - round an x number with y decimal places
21 | * `df[x].nunique()` - returns the number of unique values in x series
22 |
23 | The entire code of this project is available in [this jupyter notebook](https://github.com/alexeygrigorev/mlbookcamp-code/blob/master/chapter-03-churn-prediction/03-churn.ipynb).
24 |
25 |
26 |
27 | ⚠️ |
28 |
29 | The notes are written by the community.
30 | If you see an error here, please create a PR with a fix.
31 | |
32 |
33 |
34 |
35 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/09/27/ml-zoomcamp-2023-machine-learning-for-classification-part-4/)
36 |
37 | ## Navigation
38 |
39 | * [Machine Learning Zoomcamp course](../)
40 | * [Session 3: Machine Learning for Classification](./)
41 | * Previous: [Setting up the validation framework](03-validation.md)
42 | * Next: [Feature importance: Churn rate and risk ratio](05-risk.md)
43 |
--------------------------------------------------------------------------------
/03-classification/06-mutual-info.md:
--------------------------------------------------------------------------------
1 |
2 | ## 3.6 Feature importance: Mutual information
3 |
4 |
5 |
6 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-3-machine-learning-for-classification)
7 |
8 |
9 | ## Notes
10 |
11 | Mutual information is a concept from information theory, which measures how much we can learn about one variable if we know the value of another. In this project, we can think of this as how much do we learn about churn if we have the information from a particular feature. So, it is a measure of the importance of a categorical variable.
12 |
13 | **Classes, functions, and methods:**
14 |
15 | * `mutual_info_score(x, y)` - Scikit-Learn class for calculating the mutual information between the x target variable and y feature.
16 | * `df[x].apply(y)` - apply a y function to the x series of the df dataframe.
17 | * ` df.sort_values(ascending=False).to_frame(name='x')` - sort values in an ascending order and called the column as x.
18 |
19 | The entire code of this project is available in [this jupyter notebook](https://github.com/alexeygrigorev/mlbookcamp-code/blob/master/chapter-03-churn-prediction/03-churn.ipynb).
20 |
21 |
22 |
23 | ⚠️ |
24 |
25 | The notes are written by the community.
26 | If you see an error here, please create a PR with a fix.
27 | |
28 |
29 |
30 |
31 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/09/28/ml-zoomcamp-2023-machine-learning-for-classification-part-6/)
32 |
33 | ## Navigation
34 |
35 | * [Machine Learning Zoomcamp course](../)
36 | * [Session 3: Machine Learning for Classification](./)
37 | * Previous: [Feature importance: Churn rate and risk ratio](05-risk.md)
38 | * Next: [Feature importance: Correlation](07-correlation.md)
39 |
--------------------------------------------------------------------------------
/03-classification/08-ohe.md:
--------------------------------------------------------------------------------
1 |
2 | ## 3.8 One-hot encoding
3 |
4 |
5 |
6 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-3-machine-learning-for-classification)
7 |
8 |
9 | ## Notes
10 |
11 | One-Hot Encoding allows encoding categorical variables in numerical ones. This method represents each category of a variable as one column, and a 1 is assigned if the value belongs to the category or 0 otherwise.
12 |
13 | **Classes, functions, and methods:**
14 |
15 | * `df[x].to_dict(orient='records')` - convert x series to dictionaries, oriented by rows.
16 | * `DictVectorizer().fit_transform(x)` - Scikit-Learn class for one-hot encoding by converting x dictionaries into a sparse matrix. It does not affect the numerical variables.
17 | * `DictVectorizer().get_feature_names()` - return the names of the columns in the sparse matrix.
18 |
19 | The entire code of this project is available in [this jupyter notebook](https://github.com/alexeygrigorev/mlbookcamp-code/blob/master/chapter-03-churn-prediction/03-churn.ipynb).
20 |
21 |
22 |
23 | ⚠️ |
24 |
25 | The notes are written by the community.
26 | If you see an error here, please create a PR with a fix.
27 | |
28 |
29 |
30 |
31 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/09/29/ml-zoomcamp-2023-machine-learning-for-classification-part-8/)
32 |
33 | ## Navigation
34 |
35 | * [Machine Learning Zoomcamp course](../)
36 | * [Session 3: Machine Learning for Classification](./)
37 | * Previous: [Feature importance: Correlation](07-correlation.md)
38 | * Next: [Logistic regression](09-logistic-regression.md)
39 |
--------------------------------------------------------------------------------
/03-classification/10-training-log-reg.md:
--------------------------------------------------------------------------------
1 |
2 | ## 3.10 Training logistic regression with Scikit-Learn
3 |
4 |
5 |
6 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-3-machine-learning-for-classification)
7 |
8 |
9 | ## Notes
10 |
11 | This video was about training a logistic regression model with Scikit-Learn, applying it to the validation dataset, and calculating its accuracy.
12 |
13 | **Classes, functions, and methods:**
14 |
15 | * `LogisticRegression().fit(x)` - Scikit-Learn class for training the logistic regression model.
16 | * `LogisticRegression().coef_[0]` - return the coefficients or weights of the LR model
17 | * `LogisticRegression().intercept_[0]` - return the bias or intercept of the LR model
18 | * `LogisticRegression().predict[x]` - make predictions on the x dataset
19 | * `LogisticRegression().predict_proba[x]` - make predictions on the x dataset by returning two columns with their probabilities for the two categories - soft predictions
20 |
21 | The entire code of this project is available in [this jupyter notebook](https://github.com/alexeygrigorev/mlbookcamp-code/blob/master/chapter-03-churn-prediction/03-churn.ipynb).
22 |
23 |
24 |
25 | ⚠️ |
26 |
27 | The notes are written by the community.
28 | If you see an error here, please create a PR with a fix.
29 | |
30 |
31 |
32 |
33 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/09/30/ml-zoomcamp-2023-machine-learning-for-classification-part-10/)
34 |
35 | ## Navigation
36 |
37 | * [Machine Learning Zoomcamp course](../)
38 | * [Session 3: Machine Learning for Classification](./)
39 | * Previous: [Logistic regression](09-logistic-regression.md)
40 | * Next: [Model interpretation](11-log-reg-interpretation.md)
41 |
--------------------------------------------------------------------------------
/03-classification/11-log-reg-interpretation.md:
--------------------------------------------------------------------------------
1 |
2 | ## 3.11 Model interpretation
3 |
4 |
5 |
6 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-3-machine-learning-for-classification)
7 |
8 |
9 | ## Notes
10 |
11 | This video was about the interpretation of coefficients, and training a model with fewer features.
12 |
13 | In the formula of the logistic regression model, only one of the one-hot encoded categories is multiplied by 1, and the other by 0. In this way, we only consider the appropriate category for each categorical feature.
14 |
15 | **Classes, functions, and methods:**
16 |
17 | * `zip(x,y)` - returns a new list with elements from x joined with their corresponding elements on y
18 |
19 | The entire code of this project is available in [this jupyter notebook](https://github.com/alexeygrigorev/mlbookcamp-code/blob/master/chapter-03-churn-prediction/03-churn.ipynb).
20 |
21 |
22 |
23 | ⚠️ |
24 |
25 | The notes are written by the community.
26 | If you see an error here, please create a PR with a fix.
27 | |
28 |
29 |
30 |
31 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/10/01/ml-zoomcamp-2023-machine-learning-for-classification-part-11/)
32 |
33 | ## Navigation
34 |
35 | * [Machine Learning Zoomcamp course](../)
36 | * [Session 3: Machine Learning for Classification](./)
37 | * Previous: [Training logistic regression with Scikit-Learn](10-training-log-reg.md)
38 | * Next: [Using the model](12-using-log-reg.md)
39 |
--------------------------------------------------------------------------------
/03-classification/12-using-log-reg.md:
--------------------------------------------------------------------------------
1 |
2 | ## 3.12 Using the model
3 |
4 |
5 |
6 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-3-machine-learning-for-classification)
7 |
8 |
9 | ## Notes
10 |
11 | We trained the logistic regression model with the full training dataset (training + validation), considering numerical and categorical features. Thus, predictions were made on the test dataset, and we evaluated the model using the accuracy metric.
12 |
13 | In this case, the predictions of validation and test were similar, which means that the model is working well.
14 |
15 |
16 |
17 | ⚠️ |
18 |
19 | The notes are written by the community.
20 | If you see an error here, please create a PR with a fix.
21 | |
22 |
23 |
24 |
25 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/10/01/ml-zoomcamp-2023-machine-learning-for-classification-part-12/)
26 |
27 | ## Navigation
28 |
29 | * [Machine Learning Zoomcamp course](../)
30 | * [Session 3: Machine Learning for Classification](./)
31 | * Previous: [Model interpretation](11-log-reg-interpretation.md)
32 | * Next: [Summary](13-summary.md)
33 |
--------------------------------------------------------------------------------
/03-classification/13-summary.md:
--------------------------------------------------------------------------------
1 |
2 | ## 3.13 Summary
3 |
4 |
5 |
6 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-3-machine-learning-for-classification)
7 |
8 |
9 | ## Notes
10 |
11 | In this session, we worked on a project to predict churning in customers from a company. We learned the feature importance of numerical and categorical variables, including risk ratio, mutual information, and correlation coefficient. Also, we understood one-hot encoding and implemented logistic regression with Scikit-Learn.
12 |
13 |
14 |
15 | ⚠️ |
16 |
17 | The notes are written by the community.
18 | If you see an error here, please create a PR with a fix.
19 | |
20 |
21 |
22 |
23 |
24 | ## Navigation
25 |
26 | * [Machine Learning Zoomcamp course](../)
27 | * [Session 3: Machine Learning for Classification](./)
28 | * Previous: [Using the model](12-using-log-reg.md)
29 | * Next: [Explore more](14-explore-more.md)
--------------------------------------------------------------------------------
/03-classification/14-explore-more.md:
--------------------------------------------------------------------------------
1 |
2 | ## 3.14 Explore more
3 |
4 | More things
5 |
6 | * Try to exclude least useful features
7 |
8 | Use scikit-learn in project of last week
9 |
10 | * Re-implement train/val/test split using scikit-learn in the project from the last week
11 | * Also, instead of our own linear regression, use `LinearRegression` (not regularized) and `RidgeRegression` (regularized). Find the best regularization parameter for Ridge
12 | * There are other ways to implement one-hot encoding. E.g. using the `OneHotEncoding` class. Check how to use it [here](notebook-scaling-ohe.ipynb).
13 | * Sometimes numerical features require scaling, especially for iterative solves like "lbfgs". Check how to use `StandardScaler` for that [here](notebook-scaling-ohe.ipynb).
14 |
15 |
16 | Other projects
17 |
18 | * Lead scoring - https://www.kaggle.com/ashydv/leads-dataset
19 | * Default prediction - https://archive.ics.uci.edu/ml/datasets/default+of+credit+card+clients
20 |
21 |
22 | ## Navigation
23 |
24 | * [Machine Learning Zoomcamp course](../)
25 | * [Session 3: Machine Learning for Classification](./)
26 | * Previous: [Summary](13-summary.md)
27 | * Next: [Homework](homework.md)
28 |
--------------------------------------------------------------------------------
/03-classification/homework.md:
--------------------------------------------------------------------------------
1 | ## Homework
2 |
3 | * For 2024 cohort homework, check [the 2024 cohort folder](../cohorts/2024/03-classification/homework.md)
4 | * For 2023 cohort homework, check [the 2023 cohort folder](../cohorts/2023/03-classification/homework.md)
5 | * For 2022 cohort homework, check [the 2022 cohort folder](../cohorts/2022/03-classification/homework.md)
6 | * For 2021 cohort homework and solution, check [the 2021 cohort folder](../cohorts/2021/03-classification/)
7 |
8 |
9 | ## Navigation
10 |
11 | * [Machine Learning Zoomcamp course](../)
12 | * [Session 3: Machine Learning for Classification](./)
13 | * Previous: [Explore more](14-explore-more.md)
14 |
--------------------------------------------------------------------------------
/03-classification/images/correlation-matrix.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/03-classification/images/correlation-matrix.png
--------------------------------------------------------------------------------
/03-classification/images/thumbnail-3-01.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/03-classification/images/thumbnail-3-01.jpg
--------------------------------------------------------------------------------
/03-classification/images/thumbnail-3-02.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/03-classification/images/thumbnail-3-02.jpg
--------------------------------------------------------------------------------
/03-classification/images/thumbnail-3-03.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/03-classification/images/thumbnail-3-03.jpg
--------------------------------------------------------------------------------
/03-classification/images/thumbnail-3-04.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/03-classification/images/thumbnail-3-04.jpg
--------------------------------------------------------------------------------
/03-classification/images/thumbnail-3-05.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/03-classification/images/thumbnail-3-05.jpg
--------------------------------------------------------------------------------
/03-classification/images/thumbnail-3-06.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/03-classification/images/thumbnail-3-06.jpg
--------------------------------------------------------------------------------
/03-classification/images/thumbnail-3-07.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/03-classification/images/thumbnail-3-07.jpg
--------------------------------------------------------------------------------
/03-classification/images/thumbnail-3-08.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/03-classification/images/thumbnail-3-08.jpg
--------------------------------------------------------------------------------
/03-classification/images/thumbnail-3-09.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/03-classification/images/thumbnail-3-09.jpg
--------------------------------------------------------------------------------
/03-classification/images/thumbnail-3-10.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/03-classification/images/thumbnail-3-10.jpg
--------------------------------------------------------------------------------
/03-classification/images/thumbnail-3-11.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/03-classification/images/thumbnail-3-11.jpg
--------------------------------------------------------------------------------
/03-classification/images/thumbnail-3-12.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/03-classification/images/thumbnail-3-12.jpg
--------------------------------------------------------------------------------
/03-classification/images/thumbnail-3-13.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/03-classification/images/thumbnail-3-13.jpg
--------------------------------------------------------------------------------
/03-classification/meta.json:
--------------------------------------------------------------------------------
1 | {
2 | "data": "meta.csv",
3 | "session": 3,
4 | "name": "Machine Learning for Classification"
5 | }
--------------------------------------------------------------------------------
/04-evaluation/01-overview.md:
--------------------------------------------------------------------------------
1 |
2 | ## 4.1 Evaluation metrics: session overview
3 |
4 |
5 |
6 |
7 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-4-evaluation-metrics-for-classification)
8 |
9 |
10 | ## Notes
11 |
12 | The fourth week of Machine Learning Zoomcamp is about different metrics to evaluate a binary classifier. These measures include accuracy, confusion table, precision, recall, ROC curves(TPR, FRP, random model, and ideal model), AUROC, and cross-validation.
13 |
14 | For this project, we used a [Kaggle dataset](https://www.kaggle.com/blastchar/telco-customer-churn) about churn prediction.
15 |
16 | Add notes from the video (PRs are welcome)
17 |
18 |
19 |
20 | ⚠️ |
21 |
22 | The notes are written by the community.
23 | If you see an error here, please create a PR with a fix.
24 | |
25 |
26 |
27 |
28 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/10/02/ml-zoomcamp-2023-evaluation-metrics-for-classification-part-1/)
29 |
30 | ## Navigation
31 |
32 | * [Machine Learning Zoomcamp course](../)
33 | * [Session 4: Evaluation Metrics for Classification](./)
34 | * Next: [Accuracy and dummy model](02-accuracy.md)
35 |
--------------------------------------------------------------------------------
/04-evaluation/06-auc.md:
--------------------------------------------------------------------------------
1 | ## 4.6 ROC AUC
2 |
3 |
4 |
5 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-4-evaluation-metrics-for-classification)
6 |
7 |
8 | ## Notes
9 |
10 | The Area under the ROC curves can tell us how good is our model with a single value. The AUROC of a random model is 0.5, while for an ideal one is 1.
11 |
12 | In other words, AUC can be interpreted as the probability that a randomly selected positive example has a greater score than a randomly selected negative example.
13 |
14 | **Classes and methods:**
15 |
16 | * `auc(x, y)` - sklearn.metrics class for calculating area under the curve of the x and y datasets. For ROC curves x would be false positive rate, and y true positive rate.
17 | * `roc_auc_score(x, y)` - sklearn.metrics class for calculating area under the ROC curves of the x false positive rate and y true positive rate datasets.
18 | * `randint(x, y, size=z)` - np.random class for generating random integers from the “discrete uniform” distribution; from `x` (inclusive) to `y` (exclusive) of size `z`.
19 |
20 | The entire code of this project is available in [this jupyter notebook](notebook.ipynb).
21 |
22 | Add notes from the video (PRs are welcome)
23 |
24 |
25 |
26 | ⚠️ |
27 |
28 | The notes are written by the community.
29 | If you see an error here, please create a PR with a fix.
30 | |
31 |
32 |
33 |
34 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/10/07/ml-zoomcamp-2023-evaluation-metrics-for-classification-part-6/)
35 |
36 | ## Navigation
37 |
38 | * [Machine Learning Zoomcamp course](../)
39 | * [Session 4: Evaluation Metrics for Classification](./)
40 | * Previous: [ROC Curves](05-roc.md)
41 | * Next: [Cross-Validation](07-cross-validation.md)
42 |
--------------------------------------------------------------------------------
/04-evaluation/08-summary.md:
--------------------------------------------------------------------------------
1 | ## 4.8 Summary
2 |
3 |
4 |
5 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-4-evaluation-metrics-for-classification)
6 |
7 |
8 | ## Notes
9 |
10 | General definitions:
11 |
12 | * **Metric:** A single number that describes the performance of a model
13 | * **Accuracy:** Fraction of correct answers; sometimes misleading
14 | * Precision and recall are less misleading when we have class imbalance
15 | * **ROC Curve:** A way to evaluate the performance at all thresholds; okay to use with imbalance
16 | * **K-Fold CV:** More reliable estimate for performance (mean + std)
17 |
18 | In brief, this weeks was about different metrics to evaluate a binary classifier. These measures included accuracy, confusion table, precision, recall, ROC curves(TPR, FPR, random model, and ideal model), and AUROC. Also, we talked about a different way to estimate the performance of the model and make the parameter tuning with cross-validation.
19 |
20 | The code of this project is available in [this jupyter notebook](https://github.com/alexeygrigorev/mlbookcamp-code/blob/master/course-zoomcamp/04-evaluation/notebook.ipynb).
21 |
22 | Add notes from the video (PRs are welcome)
23 |
24 |
25 |
26 | ⚠️ |
27 |
28 | The notes are written by the community.
29 | If you see an error here, please create a PR with a fix.
30 | |
31 |
32 |
33 |
34 | - [Notes from Maximilien Eyengue](https://github.com/maxim-eyengue/Python-Codes/blob/main/ML_Zoomcamp_2024/04_evaluation/Summary_Session_04.md)
35 |
36 | ## Navigation
37 |
38 | * [Machine Learning Zoomcamp course](../)
39 | * [Session 4: Evaluation Metrics for Classification](./)
40 | * Previous: [Cross-Validation](07-cross-validation.md)
41 | * Next: [Explore more](09-explore-more.md)
42 |
--------------------------------------------------------------------------------
/04-evaluation/09-explore-more.md:
--------------------------------------------------------------------------------
1 | ## 4.9 Explore more
2 |
3 | * Check the precision and recall of the dummy classifier that always predict "FALSE"
4 | * F1 score = 2 P R / (P + R)
5 | * Evaluate precision and recall at different thresholds, plot P vs R - this way you'll get the precision/recall curve (similar to ROC curve)
6 | * Area under the PR curve is also a useful metric
7 |
8 |
9 | ### Other projects
10 |
11 | * Calculate the metrics for the suggested datasets from the previous week
12 |
13 |
14 | ## Navigation
15 |
16 | * [Machine Learning Zoomcamp course](../)
17 | * [Session 4: Evaluation Metrics for Classification](./)
18 | * Previous: [Summary](08-summary.md)
19 | * Next: [Homework](homework.md)
--------------------------------------------------------------------------------
/04-evaluation/README.md:
--------------------------------------------------------------------------------
1 | ## 4. Evaluation Metrics for Classification
2 |
3 | - 4.1 [Evaluation metrics: session overview](01-overview.md)
4 | - 4.2 [Accuracy and dummy model](02-accuracy.md)
5 | - 4.3 [Confusion table](03-confusion-table.md)
6 | - 4.4 [Precision and Recall](04-precision-recall.md)
7 | - 4.5 [ROC Curves](05-roc.md)
8 | - 4.6 [ROC AUC](06-auc.md)
9 | - 4.7 [Cross-Validation](07-cross-validation.md)
10 | - 4.8 [Summary](08-summary.md)
11 | - 4.9 [Explore more](09-explore-more.md)
12 | - 4.10 [Homework](homework.md)
13 |
14 | ## Community notes
15 |
16 | Did you take notes? You can share them here (or in each unit separately)
17 | * [Some cross-validation methods](https://github.com/razekmaiden/ml_zoomcamp/blob/main/additional_topics/ML_ZOOMCAMP_CROSS_VALIDATION_METHODS.ipynb)
18 | * [Notes from Kwang Yang](https://www.kaggle.com/kwangyangchia/notebook-for-lesson-4-mle)
19 | * [Notes from Sebastián Ayala Ruano](https://github.com/sayalaruano/100DaysOfMLCode/blob/main/Classification/Notes/NotesDay14.md)
20 | * [Notes from Alvaro Navas](https://github.com/ziritrion/ml-zoomcamp/blob/main/notes/04_01_classification_eval_metrics.md)
21 | * [Notes from froukje](https://github.com/froukje/ml-zoomcamp/blob/main/week4/Lecture_4_metrics.ipynb)
22 | * [Notes from Hareesh Tummala](https://github.com/tummala-hareesh/ml_zoomcamp_ht/blob/main/notes/week-4-notes.md)
23 | * [Notes from Memoona Tahira](https://github.com/MemoonaTahira/MLZoomcamp2022/tree/main/Notes/Week_4%20-evaluation_metrics_for_ML_model)
24 | * [Notes from Peter Ernicke](https://knowmledge.com/category/courses/ml-zoomcamp/evaluation-metrics/)
25 | * [Notes from Kemal Dahha](https://github.com/kemaldahha/machine-learning-course/blob/main/week_4_notes.ipynb)
26 | * Add your notes here
27 |
28 |
--------------------------------------------------------------------------------
/04-evaluation/homework.md:
--------------------------------------------------------------------------------
1 | ## Homework
2 |
3 | * For 2024 cohort homework, check [the 2024 cohort folder](../cohorts/2024/04-evaluation/homework.md)
4 | * For 2023 cohort homework, check [the 2023 cohort folder](../cohorts/2023/04-evaluation/homework.md)
5 | * For 2022 cohort homework, check [the 2022 cohort folder](../cohorts/2022/04-evaluation/homework.md)
6 | * For 2021 cohort homework and solution, check [the 2021 cohort folder](../cohorts/2021/04-evaluation/)
7 |
8 |
9 | ## Navigation
10 |
11 | * [Machine Learning Zoomcamp course](../)
12 | * [Session 4: Evaluation Metrics for Classification](./)
13 | * Previous: [Explore more](09-explore-more.md)
14 |
--------------------------------------------------------------------------------
/04-evaluation/images/TPR_FPR.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/04-evaluation/images/TPR_FPR.png
--------------------------------------------------------------------------------
/04-evaluation/images/classification_metrics.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/04-evaluation/images/classification_metrics.png
--------------------------------------------------------------------------------
/04-evaluation/images/confusion_matrix.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/04-evaluation/images/confusion_matrix.png
--------------------------------------------------------------------------------
/04-evaluation/images/thumbnail-4-01.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/04-evaluation/images/thumbnail-4-01.jpg
--------------------------------------------------------------------------------
/04-evaluation/images/thumbnail-4-02.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/04-evaluation/images/thumbnail-4-02.jpg
--------------------------------------------------------------------------------
/04-evaluation/images/thumbnail-4-03.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/04-evaluation/images/thumbnail-4-03.jpg
--------------------------------------------------------------------------------
/04-evaluation/images/thumbnail-4-04.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/04-evaluation/images/thumbnail-4-04.jpg
--------------------------------------------------------------------------------
/04-evaluation/images/thumbnail-4-05-cont.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/04-evaluation/images/thumbnail-4-05-cont.jpg
--------------------------------------------------------------------------------
/04-evaluation/images/thumbnail-4-05.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/04-evaluation/images/thumbnail-4-05.jpg
--------------------------------------------------------------------------------
/04-evaluation/images/thumbnail-4-06.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/04-evaluation/images/thumbnail-4-06.jpg
--------------------------------------------------------------------------------
/04-evaluation/images/thumbnail-4-07.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/04-evaluation/images/thumbnail-4-07.jpg
--------------------------------------------------------------------------------
/04-evaluation/images/thumbnail-4-08.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/04-evaluation/images/thumbnail-4-08.jpg
--------------------------------------------------------------------------------
/04-evaluation/meta.csv:
--------------------------------------------------------------------------------
1 | lesson,name,page_name,video,slides,notebook
2 | 1,Evaluation metrics: session overview,01-overview.md,https://www.youtube.com/watch?v=gmg5jw1bM8A,https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-4-evaluation-metrics-for-classification,notebook.ipynb
3 | 2,Accuracy and dummy model,02-accuracy.md,https://www.youtube.com/watch?v=FW_l7lB0HUI,https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-4-evaluation-metrics-for-classification,notebook.ipynb
4 | 3,Confusion table,03-confusion-table.md,https://www.youtube.com/watch?v=Jt2dDLSlBng,https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-4-evaluation-metrics-for-classification,notebook.ipynb
5 | 4,Precision and Recall,04-precision-recall.md,https://www.youtube.com/watch?v=gRLP_mlglMM,https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-4-evaluation-metrics-for-classification,notebook.ipynb
6 | 5,ROC Curves,05-roc.md,https://www.youtube.com/watch?v=dnBZLk53sQI,https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-4-evaluation-metrics-for-classification,notebook.ipynb
7 | 6,ROC AUC,06-auc.md,https://www.youtube.com/watch?v=hvIQPAwkVZo,https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-4-evaluation-metrics-for-classification,notebook.ipynb
8 | 7,Cross-Validation,07-cross-validation.md,https://www.youtube.com/watch?v=BIIZaVtUbf4,https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-4-evaluation-metrics-for-classification,notebook.ipynb
9 | 8,Summary,08-summary.md,https://www.youtube.com/watch?v=-v8XEQ2AHvQ,https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-4-evaluation-metrics-for-classification,notebook.ipynb
10 | 9,Explore more,09-explore-more.md,,,notebook.ipynb
11 | 10,Homework,homework.md,,,notebook.ipynb
--------------------------------------------------------------------------------
/04-evaluation/meta.json:
--------------------------------------------------------------------------------
1 | {
2 | "data": "meta.csv",
3 | "session": 4,
4 | "name": "Evaluation Metrics for Classification"
5 | }
--------------------------------------------------------------------------------
/05-deployment/08-summary.md:
--------------------------------------------------------------------------------
1 | ## 5.8 Summary
2 |
3 |
4 |
5 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-5-model-deployment)
6 |
7 |
8 | ## Notes
9 | In this chapter we learned these topics:
10 | - We learned how to save the model and load it to re-use it without running the previous code.
11 | - How to deploy the model in a web service.
12 | - How to create a virtual environment.
13 | - How to create a container and run our code in any operating systems.
14 | - How to deploy our code in a public web service and access it externally from outside a local computer.
15 |
16 | In the next chapter we will learn the algorithms such as Decision trees, Random forests and Gradient boosting as an alternative way of combining decision tress.
17 |
18 | Add notes from the video (PRs are welcome)
19 |
20 |
21 |
22 |
23 | ⚠️ |
24 |
25 | The notes are written by the community.
26 | If you see an error here, please create a PR with a fix.
27 | |
28 |
29 |
30 |
31 | ## Navigation
32 |
33 | * [Machine Learning Zoomcamp course](../)
34 | * [Session 5: Deploying Machine Learning Models](./)
35 | * Previous: [Deployment to the cloud: AWS Elastic Beanstalk (optional)](07-aws-eb.md)
36 | * Next: [Explore more](09-explore-more.md)
37 |
--------------------------------------------------------------------------------
/05-deployment/09-explore-more.md:
--------------------------------------------------------------------------------
1 |
2 | ## 5.9 Explore more
3 |
4 | * Flask is not the only framework for creating web services. Try others, e.g. FastAPI.
5 | * Experiment with other ways of managing environment, e.g. virtual env, conda, poetry.
6 | * Explore other ways of deploying web services, e.g. GCP, Azure, Heroku, Python Anywhere, etc.
7 |
8 |
9 | Add notes from the video (PRs are welcome)
10 |
11 |
12 |
13 |
14 | ⚠️ |
15 |
16 | The notes are written by the community.
17 | If you see an error here, please create a PR with a fix.
18 | |
19 |
20 |
21 |
22 |
23 | ## Navigation
24 |
25 | * [Machine Learning Zoomcamp course](../)
26 | * [Session 5: Deploying Machine Learning Models](./)
27 | * Previous: [Summary](08-summary.md)
28 | * Next: [Homework](homework.md)
29 |
--------------------------------------------------------------------------------
/05-deployment/code/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.8.12-slim
2 |
3 | RUN pip install pipenv
4 |
5 | WORKDIR /app
6 |
7 | COPY ["Pipfile", "Pipfile.lock", "./"]
8 |
9 | RUN pipenv install --system --deploy
10 |
11 | COPY ["predict.py", "model_C=1.0.bin", "./"]
12 |
13 | EXPOSE 9696
14 |
15 | ENTRYPOINT ["gunicorn", "--bind=0.0.0.0:9696", "predict:app"]
--------------------------------------------------------------------------------
/05-deployment/code/Pipfile:
--------------------------------------------------------------------------------
1 | [[source]]
2 | url = "https://pypi.org/simple"
3 | verify_ssl = true
4 | name = "pypi"
5 |
6 | [packages]
7 | numpy = "*"
8 | scikit-learn = "==0.24.2"
9 | flask = "*"
10 | gunicorn = "*"
11 |
12 | [dev-packages]
13 |
14 | [requires]
15 | python_version = "3.8"
16 |
--------------------------------------------------------------------------------
/05-deployment/code/model_C=1.0.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/05-deployment/code/model_C=1.0.bin
--------------------------------------------------------------------------------
/05-deployment/code/ping.py:
--------------------------------------------------------------------------------
1 | from flask import Flask
2 |
3 | app = Flask('ping')
4 |
5 | @app.route('/ping', methods=['GET'])
6 | def ping():
7 | return "PONG"
8 |
9 | if __name__ == "__main__":
10 | app.run(debug=True, host='0.0.0.0', port=9696)
--------------------------------------------------------------------------------
/05-deployment/code/predict-test.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 |
4 | import requests
5 |
6 |
7 | url = 'http://localhost:9696/predict'
8 |
9 | customer_id = 'xyz-123'
10 | customer = {
11 | "gender": "female",
12 | "seniorcitizen": 0,
13 | "partner": "yes",
14 | "dependents": "no",
15 | "phoneservice": "no",
16 | "multiplelines": "no_phone_service",
17 | "internetservice": "dsl",
18 | "onlinesecurity": "no",
19 | "onlinebackup": "yes",
20 | "deviceprotection": "no",
21 | "techsupport": "no",
22 | "streamingtv": "no",
23 | "streamingmovies": "no",
24 | "contract": "month-to-month",
25 | "paperlessbilling": "yes",
26 | "paymentmethod": "electronic_check",
27 | "tenure": 24,
28 | "monthlycharges": 29.85,
29 | "totalcharges": (24 * 29.85)
30 | }
31 |
32 |
33 | response = requests.post(url, json=customer).json()
34 | print(response)
35 |
36 | if response['churn'] == True:
37 | print('sending promo email to %s' % customer_id)
38 | else:
39 | print('not sending promo email to %s' % customer_id)
--------------------------------------------------------------------------------
/05-deployment/code/predict.py:
--------------------------------------------------------------------------------
1 | import pickle
2 |
3 | from flask import Flask
4 | from flask import request
5 | from flask import jsonify
6 |
7 |
8 | model_file = 'model_C=1.0.bin'
9 |
10 | with open(model_file, 'rb') as f_in:
11 | dv, model = pickle.load(f_in)
12 |
13 | app = Flask('churn')
14 |
15 | @app.route('/predict', methods=['POST'])
16 | def predict():
17 | customer = request.get_json()
18 |
19 | X = dv.transform([customer])
20 | y_pred = model.predict_proba(X)[0, 1]
21 | churn = y_pred >= 0.5
22 |
23 | result = {
24 | 'churn_probability': float(y_pred),
25 | 'churn': bool(churn)
26 | }
27 |
28 | return jsonify(result)
29 |
30 |
31 | if __name__ == "__main__":
32 | app.run(debug=True, host='0.0.0.0', port=9696)
--------------------------------------------------------------------------------
/05-deployment/homework.md:
--------------------------------------------------------------------------------
1 | ## Homework
2 |
3 |
4 | * For 2024 cohort homework, check [the 2024 cohort folder](../cohorts/2024/)
5 | * For 2023 cohort homework, check [the 2023 cohort folder](../cohorts/2023/)
6 | * For 2022 cohort homework, check [the 2022 cohort folder](../cohorts/2022/)
7 | * For 2021 cohort homework and solution, check [the 2021 cohort folder](../cohorts/2021/05-deployment/)
8 |
9 |
10 | ## Navigation
11 |
12 | * [Machine Learning Zoomcamp course](../)
13 | * [Session 5: Deploying Machine Learning Models](./)
14 | * Previous: [Explore more](09-explore-more.md)
15 |
--------------------------------------------------------------------------------
/05-deployment/images/thumbnail-5-01.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/05-deployment/images/thumbnail-5-01.jpg
--------------------------------------------------------------------------------
/05-deployment/images/thumbnail-5-02.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/05-deployment/images/thumbnail-5-02.jpg
--------------------------------------------------------------------------------
/05-deployment/images/thumbnail-5-03.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/05-deployment/images/thumbnail-5-03.jpg
--------------------------------------------------------------------------------
/05-deployment/images/thumbnail-5-04.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/05-deployment/images/thumbnail-5-04.jpg
--------------------------------------------------------------------------------
/05-deployment/images/thumbnail-5-05.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/05-deployment/images/thumbnail-5-05.jpg
--------------------------------------------------------------------------------
/05-deployment/images/thumbnail-5-06.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/05-deployment/images/thumbnail-5-06.jpg
--------------------------------------------------------------------------------
/05-deployment/images/thumbnail-5-07.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/05-deployment/images/thumbnail-5-07.jpg
--------------------------------------------------------------------------------
/05-deployment/images/thumbnail-5-08.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/05-deployment/images/thumbnail-5-08.jpg
--------------------------------------------------------------------------------
/05-deployment/meta.csv:
--------------------------------------------------------------------------------
1 | lesson,name,page_name,video,slides
2 | 1,Intro / Session overview,01-intro.md,https://www.youtube.com/watch?v=agIFak9A3m8,https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-5-model-deployment
3 | 2,Saving and loading the model,02-pickle.md,https://www.youtube.com/watch?v=EJpqZ7OlwFU,https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-5-model-deployment
4 | 3,Web services: introduction to Flask,03-flask-intro.md,https://www.youtube.com/watch?v=W7ubna1Rfv8,https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-5-model-deployment
5 | 4,Serving the churn model with Flask,04-flask-deployment.md,https://www.youtube.com/watch?v=Q7ZWPgPnRz8,https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-5-model-deployment
6 | 5,Python virtual environment: Pipenv,05-pipenv.md,https://www.youtube.com/watch?v=BMXh8JGROHM,https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-5-model-deployment
7 | 6,Environment management: Docker,06-docker.md,https://www.youtube.com/watch?v=wAtyYZ6zvAs,https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-5-model-deployment
8 | 7,Deployment to the cloud: AWS Elastic Beanstalk (optional),07-aws-eb.md,https://www.youtube.com/watch?v=HGPJ4ekhcLg,https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-5-model-deployment
9 | 8,Summary,08-summary.md,https://www.youtube.com/watch?v=sSAqYSk7Br4,https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-5-model-deployment
10 | 9,Explore more,09-explore-more.md,,
11 | 10,Homework,homework.md,,
--------------------------------------------------------------------------------
/05-deployment/meta.json:
--------------------------------------------------------------------------------
1 | {
2 | "data": "meta.csv",
3 | "session": 5,
4 | "name": "Deploying Machine Learning Models"
5 | }
--------------------------------------------------------------------------------
/06-trees/01-credit-risk.md:
--------------------------------------------------------------------------------
1 |
2 | ## 6.1 Credit risk scoring project
3 |
4 |
5 |
6 |
7 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-6-decision-trees-and-ensemble-learning)
8 |
9 |
10 | ## Notes
11 |
12 | In this session we'll learn about decision trees and ensemble learning algorithms. The questions that we try to address this week are, "What are decision trees? How are they different from ensemble algorithms? How can we implement and fine-tune these models to make binary classification predictions?"
13 |
14 | To be specific, we'll use [credit scoring data](https://github.com/gastonstat/CreditScoring) to build a model that predicts whether a bank should lend loan to a client or not. The bank takes these decisions based on the historical record.
15 |
16 | In the credit scoring classification problem,
17 | - if the model returns 0, this means, the client is very likely to payback the loan and the bank will approve the loan.
18 | - if the model returns 1, then the client is considered as a `defaulter` and the bank may not approve the loan.
19 |
20 | Add notes from the video (PRs are welcome)
21 |
22 |
23 |
24 |
25 | ⚠️ |
26 |
27 | The notes are written by the community.
28 | If you see an error here, please create a PR with a fix.
29 | |
30 |
31 |
32 |
33 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/10/16/ml-zoomcamp-2023-decision-trees-and-ensemble-learning-part-1/)
34 |
35 | ## Navigation
36 |
37 | * [Machine Learning Zoomcamp course](../)
38 | * [Session 6: Decision Trees and Ensemble Learning](./)
39 | * Next: [Data cleaning and preparation](02-data-prep.md)
40 |
--------------------------------------------------------------------------------
/06-trees/09-final-model.md:
--------------------------------------------------------------------------------
1 | ## 6.9 Selecting the best model
2 |
3 |
4 |
5 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-6-decision-trees-and-ensemble-learning)
6 |
7 |
8 | ## Notes
9 |
10 | We select the final model from decision tree, random forest, or xgboost based on the best auc scores. After that we prepare the `df_full_train` and `df_test` to train and evaluate the final model. If there is not much difference between model auc scores on the train as well as test data then the model has generalized the patterns well enough.
11 |
12 | Generally, XGBoost models perform better on tabular data than other machine learning models but the downside is that these model are easy to overfit cause of the high number of hyperparameter. Therefore, XGBoost models require a lot more attention for parameters tuning to optimize them.
13 |
14 | Add notes from the video (PRs are welcome)
15 |
16 |
17 |
18 |
19 | ⚠️ |
20 |
21 | The notes are written by the community.
22 | If you see an error here, please create a PR with a fix.
23 | |
24 |
25 |
26 |
27 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/10/29/ml-zoomcamp-2023-decision-trees-and-ensemble-learning-part-14/)
28 |
29 | ## Navigation
30 |
31 | * [Machine Learning Zoomcamp course](../)
32 | * [Session 6: Decision Trees and Ensemble Learning](./)
33 | * Previous: [XGBoost parameter tuning](08-xgb-tuning.md)
34 | * Next: [Summary](10-summary.md)
35 |
--------------------------------------------------------------------------------
/06-trees/10-summary.md:
--------------------------------------------------------------------------------
1 | ## 6.10 Summary
2 |
3 |
4 |
5 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-6-decision-trees-and-ensemble-learning)
6 |
7 |
8 | ## Notes
9 |
10 | - Decision trees learn if-then-else rules from data.
11 | - Finding the best split: select the least impure split. This algorithm can overfit, that's why we control it by limiting the max depth and the size of the group.
12 | - Random forest is a way of combining multiple decision trees. It should have a diverse set of models to make good predictions.
13 | - Gradient boosting trains model sequentially: each model tries to fix errors of the previous model. XGBoost is an implementation of gradient boosting.
14 |
15 | Add notes from the video (PRs are welcome)
16 |
17 |
18 |
19 |
20 | ⚠️ |
21 |
22 | The notes are written by the community.
23 | If you see an error here, please create a PR with a fix.
24 | |
25 |
26 |
27 |
28 |
29 | ## Navigation
30 |
31 | * [Machine Learning Zoomcamp course](../)
32 | * [Session 6: Decision Trees and Ensemble Learning](./)
33 | * Previous: [Selecting the best model](09-final-model.md)
34 | * Next: [Explore more](11-explore-more.md)
35 |
--------------------------------------------------------------------------------
/06-trees/11-explore-more.md:
--------------------------------------------------------------------------------
1 |
2 | ## 6.11 Explore more
3 |
4 | * For this dataset we didn't do EDA or feature engineering. You can do it to get more insights into the problem.
5 | * For random forest, there are more parameters that we can tune. Check max_features and bootstrap.
6 | * There's a variation of random forest caled "extremely randomized trees", or "extra trees". Instead of selecting the best split among all possible thresholds, it selects a few thresholds randomly and picks the best one among them. Because of that extra trees never overfit. In Scikit-Learn, they are implemented in ExtraTreesClassifier. Try it for this project.
7 | * XGBoost can deal with NAs - we don't have to do fillna for it. Check if not filling NA's help improve performance.
8 | * Experiment with other XGBoost parameters: subsample and colsample_bytree.
9 | * When selecting the best split, decision trees find the most useful features. This information can be used for understanding which features are more important than otheres. See example here for random forest (it's the same for plain decision trees) and for xgboost
10 | * Trees can also be used for solving the regression problems: check DecisionTreeRegressor, RandomForestRegressor and the objective=reg:squarederror parameter for XGBoost.
11 |
12 | ## Notes
13 |
14 | Add notes from the video (PRs are welcome)
15 |
16 |
17 |
18 |
19 | ⚠️ |
20 |
21 | The notes are written by the community.
22 | If you see an error here, please create a PR with a fix.
23 | |
24 |
25 |
26 |
27 |
28 | ## Navigation
29 |
30 | * [Machine Learning Zoomcamp course](../)
31 | * [Session 6: Decision Trees and Ensemble Learning](./)
32 | * Previous: [Summary](10-summary.md)
33 | * Next: [Homework](homework.md)
34 |
--------------------------------------------------------------------------------
/06-trees/README.md:
--------------------------------------------------------------------------------
1 | ## 6. Decision Trees and Ensemble Learning
2 |
3 | - 6.1 [Credit risk scoring project](01-credit-risk.md)
4 | - 6.2 [Data cleaning and preparation](02-data-prep.md)
5 | - 6.3 [Decision trees](03-decision-trees.md)
6 | - 6.4 [Decision tree learning algorithm](04-decision-tree-learning.md)
7 | - 6.5 [Decision trees parameter tuning](05-decision-tree-tuning.md)
8 | - 6.6 [Ensemble learning and random forest](06-random-forest.md)
9 | - 6.7 [Gradient boosting and XGBoost](07-boosting.md)
10 | - 6.8 [XGBoost parameter tuning](08-xgb-tuning.md)
11 | - 6.9 [Selecting the best model](09-final-model.md)
12 | - 6.10 [Summary](10-summary.md)
13 | - 6.11 [Explore more](11-explore-more.md)
14 | - 6.12 [Homework](homework.md)
15 |
16 |
17 | ## Community notes
18 |
19 | Did you take notes? You can share them here (or in each unit separately)
20 |
21 | * [Kwang Yang's Notes](https://www.kaggle.com/kwangyangchia/notebook-for-lesson-6-mle)
22 | * [Alvaro Navas' Notes](https://github.com/ziritrion/ml-zoomcamp/blob/main/notes/06_trees.md)
23 | * [Notes from froukje](https://github.com/froukje/ml-zoomcamp/blob/main/week6/Lecture_6_Decision_Trees_Ensemble_Learning.ipynb)
24 | * [Decision Trees and Ensembled Learning by Oscar Garcia](https://github.com/ozkary/machine-learning-engineering/tree/main/06-trees)
25 | * [Notes from Peter Ernicke](https://knowmledge.com/category/courses/ml-zoomcamp/decision-trees/)
26 | * [Notes from Maximilien Eyengue](https://github.com/maxim-eyengue/Python-Codes/blob/main/ML_Zoomcamp_2024/06_trees/Summary_Session_06.md)
27 | * [Notes from Kemal Dahha](https://github.com/kemaldahha/machine-learning-course/blob/main/week_6_notes.ipynb)
28 | * Add your notes here
29 |
--------------------------------------------------------------------------------
/06-trees/homework.md:
--------------------------------------------------------------------------------
1 | ## Homework
2 |
3 | * For 2024 cohort homework, check [the 2024 cohort folder](../cohorts/2024/06-trees/homework.md)
4 | * For 2023 cohort homework, check [the 2023 cohort folder](../cohorts/2023/)
5 | * For 2022 cohort homework, check [the 2022 cohort folder](../cohorts/2022/)
6 | * For 2021 cohort homework and solution, check [the 2021 cohort folder](../cohorts/2021/06-trees/)
7 |
8 |
9 | ## Navigation
10 |
11 | * [Machine Learning Zoomcamp course](../)
12 | * [Session 6: Decision Trees and Ensemble Learning](./)
13 | * Previous: [Explore more](11-explore-more.md)
14 |
--------------------------------------------------------------------------------
/06-trees/images/thumbnail-6-01.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/06-trees/images/thumbnail-6-01.jpg
--------------------------------------------------------------------------------
/06-trees/images/thumbnail-6-02.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/06-trees/images/thumbnail-6-02.jpg
--------------------------------------------------------------------------------
/06-trees/images/thumbnail-6-03.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/06-trees/images/thumbnail-6-03.jpg
--------------------------------------------------------------------------------
/06-trees/images/thumbnail-6-04.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/06-trees/images/thumbnail-6-04.jpg
--------------------------------------------------------------------------------
/06-trees/images/thumbnail-6-05.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/06-trees/images/thumbnail-6-05.jpg
--------------------------------------------------------------------------------
/06-trees/images/thumbnail-6-06.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/06-trees/images/thumbnail-6-06.jpg
--------------------------------------------------------------------------------
/06-trees/images/thumbnail-6-07.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/06-trees/images/thumbnail-6-07.jpg
--------------------------------------------------------------------------------
/06-trees/images/thumbnail-6-08.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/06-trees/images/thumbnail-6-08.jpg
--------------------------------------------------------------------------------
/06-trees/images/thumbnail-6-09.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/06-trees/images/thumbnail-6-09.jpg
--------------------------------------------------------------------------------
/06-trees/images/thumbnail-6-10.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/06-trees/images/thumbnail-6-10.jpg
--------------------------------------------------------------------------------
/06-trees/images/thumbnail-6-12.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/06-trees/images/thumbnail-6-12.jpg
--------------------------------------------------------------------------------
/06-trees/meta.json:
--------------------------------------------------------------------------------
1 | {
2 | "data": "meta.csv",
3 | "session": 6,
4 | "name": "Decision Trees and Ensemble Learning"
5 | }
--------------------------------------------------------------------------------
/08-deep-learning/07-checkpointing.md:
--------------------------------------------------------------------------------
1 | ## 8.7 Checkpointing
2 |
3 |
4 |
5 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-8-neural-networks-and-deep-learning-250592316)
6 |
7 |
8 | `ModelCheckpoint` callback is used with training the model to save a model or weights in a checkpoint file at some interval, so the model or weights can be loaded later to continue the training from the state saved or to use for deployment.
9 |
10 | **Classes, function, and attributes**:
11 |
12 | - `keras.callbacks.ModelCheckpoint`: ModelCheckpoint class from keras callbacks api
13 | - `filepath`: path to save the model file
14 | - `monitor`: the metric name to monitor
15 | - `save_best_only`: only save when the model is considered the best according to the metric provided in `monitor`
16 | - `mode`: overwrite the save file based on either maximum or the minimum scores according the metric provided in `monitor`
17 |
18 | ## Notes
19 |
20 | Add notes from the video (PRs are welcome)
21 |
22 | * checkpointing saves the model after each training iteration
23 | * checkpoint conditions may include reaching the best performance
24 | * keras callbacks
25 |
26 |
27 |
28 | ⚠️ |
29 |
30 | The notes are written by the community.
31 | If you see an error here, please create a PR with a fix.
32 | |
33 |
34 |
35 |
36 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/11/24/ml-zoomcamp-2023-deep-learning-part-9/)
37 |
38 | ## Navigation
39 |
40 | * [Machine Learning Zoomcamp course](../)
41 | * [Session 8: Neural Networks and Deep Learning](./)
42 | * Previous: [Adjusting the learning rate](06-learning-rate.md)
43 | * Next: [Adding more layers](08-more-layers.md)
44 |
--------------------------------------------------------------------------------
/08-deep-learning/11-large-model.md:
--------------------------------------------------------------------------------
1 | ## 8.11 Training a larger model
2 |
3 |
4 |
5 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-8-neural-networks-and-deep-learning-250592316)
6 |
7 |
8 | In this section we increase the image input size from `150` to `299`, reduce the amount of data augmentation parameters and lower the learning rate. This gives us the best results than any previous experiments.
9 |
10 | ## Notes
11 |
12 | Add notes from the video (PRs are welcome)
13 |
14 |
15 |
16 |
17 | ⚠️ |
18 |
19 | The notes are written by the community.
20 | If you see an error here, please create a PR with a fix.
21 | |
22 |
23 |
24 |
25 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/11/28/ml-zoomcamp-2023-deep-learning-part-13/)
26 |
27 | ## Navigation
28 |
29 | * [Machine Learning Zoomcamp course](../)
30 | * [Session 8: Neural Networks and Deep Learning](./)
31 | * Previous: [Data augmentation](10-augmentation.md)
32 | * Next: [Using the model](12-using-model.md)
33 |
--------------------------------------------------------------------------------
/08-deep-learning/13-summary.md:
--------------------------------------------------------------------------------
1 | ## 8.13 Summary
2 |
3 |
4 |
5 | - We can use pre-trained models for general image classification
6 | - Convolutional layers let us turn an image into a vector
7 | - Dense layers use the vector to make the predictions
8 | - Instead of training a model from scratch, we can use transfer learning and re-use already trained convolutional layers
9 | - First, train a small model (150x150) before training a big one (299x299)
10 | - Learning rate - how fast the model trains. Fast learner aren't always best ones
11 | - We can save the best model using callbacks and checkpointing
12 | - To avoid overfitting, use dropout and augmentation
13 |
14 | ## Notes
15 |
16 | Add notes from the video (PRs are welcome)
17 |
18 |
19 |
20 |
21 | ⚠️ |
22 |
23 | The notes are written by the community.
24 | If you see an error here, please create a PR with a fix.
25 | |
26 |
27 |
28 |
29 |
30 | ## Navigation
31 |
32 | * [Machine Learning Zoomcamp course](../)
33 | * [Session 8: Neural Networks and Deep Learning](./)
34 | * Previous: [Using the model](12-using-model.md)
35 | * Next: [Explore more](14-explore-more.md)
--------------------------------------------------------------------------------
/08-deep-learning/14-explore-more.md:
--------------------------------------------------------------------------------
1 | ## 8.14 Explore more
2 |
3 | **TODO**
4 |
5 | - Add more data, e.g, Zalando etc
6 | - Albumentations - another way of generating augmentations
7 | - Use PyTorch or MXNet instead of TensorFlow/Keras
8 | - In addition to Xception, there are others architectures - try them
9 |
10 | **Other projects:**
11 |
12 | - cats vs dogs
13 | - Hotdog vs not hotdog
14 | - Category of images
15 |
16 | ## Navigation
17 |
18 | * [Machine Learning Zoomcamp course](../)
19 | * [Session 8: Neural Networks and Deep Learning](./)
20 | * Previous: [Summary](13-summary.md)
21 | * Next: [Homework](homework.md)
--------------------------------------------------------------------------------
/08-deep-learning/homework.md:
--------------------------------------------------------------------------------
1 | ## Homework
2 | * For 2024 cohort homework, check [the 2024 cohort folder](../cohorts/2024/08-deep-learning/)
3 | * For 2023 cohort homework, check [the 2023 cohort folder](../cohorts/2023/08-deep-learning/)
4 | * For 2022 cohort homework, check [the 2022 cohort folder](../cohorts/2022/)
5 | * For 2021 cohort homework and solution, check [the 2021 cohort folder](../cohorts/2021/08-deep-learning/)
6 |
7 |
8 | ## Navigation
9 |
10 | * [Machine Learning Zoomcamp course](../)
11 | * [Session 8: Neural Networks and Deep Learning](./)
12 | * Previous: [Explore more](14-explore-more.md)
13 |
--------------------------------------------------------------------------------
/08-deep-learning/images/thumbnail-8-01.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/08-deep-learning/images/thumbnail-8-01.jpg
--------------------------------------------------------------------------------
/08-deep-learning/images/thumbnail-8-01b.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/08-deep-learning/images/thumbnail-8-01b.jpg
--------------------------------------------------------------------------------
/08-deep-learning/images/thumbnail-8-02.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/08-deep-learning/images/thumbnail-8-02.jpg
--------------------------------------------------------------------------------
/08-deep-learning/images/thumbnail-8-03.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/08-deep-learning/images/thumbnail-8-03.jpg
--------------------------------------------------------------------------------
/08-deep-learning/images/thumbnail-8-04.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/08-deep-learning/images/thumbnail-8-04.jpg
--------------------------------------------------------------------------------
/08-deep-learning/images/thumbnail-8-05.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/08-deep-learning/images/thumbnail-8-05.jpg
--------------------------------------------------------------------------------
/08-deep-learning/images/thumbnail-8-06.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/08-deep-learning/images/thumbnail-8-06.jpg
--------------------------------------------------------------------------------
/08-deep-learning/images/thumbnail-8-07.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/08-deep-learning/images/thumbnail-8-07.jpg
--------------------------------------------------------------------------------
/08-deep-learning/images/thumbnail-8-08.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/08-deep-learning/images/thumbnail-8-08.jpg
--------------------------------------------------------------------------------
/08-deep-learning/images/thumbnail-8-09.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/08-deep-learning/images/thumbnail-8-09.jpg
--------------------------------------------------------------------------------
/08-deep-learning/images/thumbnail-8-10.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/08-deep-learning/images/thumbnail-8-10.jpg
--------------------------------------------------------------------------------
/08-deep-learning/images/thumbnail-8-11.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/08-deep-learning/images/thumbnail-8-11.jpg
--------------------------------------------------------------------------------
/08-deep-learning/images/thumbnail-8-12.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/08-deep-learning/images/thumbnail-8-12.jpg
--------------------------------------------------------------------------------
/08-deep-learning/images/thumbnail-8-13.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/08-deep-learning/images/thumbnail-8-13.jpg
--------------------------------------------------------------------------------
/08-deep-learning/meta.json:
--------------------------------------------------------------------------------
1 | {
2 | "data": "meta.csv",
3 | "session": 8,
4 | "name": "Neural Networks and Deep Learning"
5 | }
--------------------------------------------------------------------------------
/09-serverless/01-intro.md:
--------------------------------------------------------------------------------
1 |
2 | ## 9.1 Introduction to Serverless
3 |
4 |
5 |
6 | In the last session, we built and trained a clothes classification deep learning model using `Keras` and `TensorFlow`. This session focuses on deploying it. The model categorizes images of clothing items (e.g., 👕 t-shirts, 👖 pants, etc.) uploaded by users on a website. Deployment will be done using **AWS Lambda**, a serverless solution to execute code without managing servers, and instead of `TensorFlow`, we will use `TensorFlow-lite`.
7 |
8 | Refer to [updates.md](updates.md) for info on running TF lite
9 | in 2024.
10 |
11 |
12 | ## Notes
13 |
14 | Add notes from the video (PRs are welcome)
15 |
16 | * introduction to the topic of the week: deploying a deep learning model to the cloud, aws lambda and tensorflow lite
17 |
18 |
19 |
20 | ⚠️ |
21 |
22 | The notes are written by the community.
23 | If you see an error here, please create a PR with a fix.
24 | |
25 |
26 |
27 |
28 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/11/30/ml-zoomcamp-2023-serverless-part-1/)
29 |
30 | ## Navigation
31 |
32 | * [Machine Learning Zoomcamp course](../)
33 | * [Session 9: Serverless Deep Learning](./)
34 | * Next: [AWS Lambda](02-aws-lambda.md)
35 |
--------------------------------------------------------------------------------
/09-serverless/04-preparing-code.md:
--------------------------------------------------------------------------------
1 |
2 | ## 9.4 Preparing the code for Lambda
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 | ## Notes
11 |
12 | Add notes from the video (PRs are welcome)
13 |
14 |
15 |
16 |
17 | ⚠️ |
18 |
19 | The notes are written by the community.
20 | If you see an error here, please create a PR with a fix.
21 | |
22 |
23 |
24 |
25 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/12/03/ml-zoomcamp-2023-serverless-part-4/)
26 |
27 | ## Navigation
28 |
29 | * [Machine Learning Zoomcamp course](../)
30 | * [Session 9: Serverless Deep Learning](./)
31 | * Previous: [TensorFlow Lite](03-tensorflow-lite.md)
32 | * Next: [Preparing a Docker image](05-docker-image.md)
33 |
--------------------------------------------------------------------------------
/09-serverless/06-creating-lambda.md:
--------------------------------------------------------------------------------
1 |
2 | ## 9.6 Creating the lambda function
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 | ## Notes
11 |
12 | Add notes from the video (PRs are welcome)
13 |
14 |
15 |
16 |
17 | ⚠️ |
18 |
19 | The notes are written by the community.
20 | If you see an error here, please create a PR with a fix.
21 | |
22 |
23 |
24 |
25 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/12/05/ml-zoomcamp-2023-serverless-part-6/)
26 |
27 | ## Navigation
28 |
29 | * [Machine Learning Zoomcamp course](../)
30 | * [Session 9: Serverless Deep Learning](./)
31 | * Previous: [Preparing a Docker image](05-docker-image.md)
32 | * Next: [API Gateway: exposing the lambda function](07-api-gateway.md)
33 |
--------------------------------------------------------------------------------
/09-serverless/07-api-gateway.md:
--------------------------------------------------------------------------------
1 |
2 | ## 9.7 API Gateway: exposing the lambda function
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 | ## Notes
11 |
12 | Add notes from the video (PRs are welcome)
13 |
14 |
15 |
16 |
17 | ⚠️ |
18 |
19 | The notes are written by the community.
20 | If you see an error here, please create a PR with a fix.
21 | |
22 |
23 |
24 |
25 | * [Notes from Peter Ernicke](https://knowmledge.com/2023/12/06/ml-zoomcamp-2023-serverless-part-7/)
26 |
27 | ## Navigation
28 |
29 | * [Machine Learning Zoomcamp course](../)
30 | * [Session 9: Serverless Deep Learning](./)
31 | * Previous: [Creating the lambda function](06-creating-lambda.md)
32 | * Next: [Summary](08-summary.md)
33 |
--------------------------------------------------------------------------------
/09-serverless/08-summary.md:
--------------------------------------------------------------------------------
1 | ## 9.8 Summary
2 |
3 |
4 |
5 |
6 | ## Notes
7 |
8 | Add notes from the video (PRs are welcome)
9 |
10 |
11 |
12 |
13 | ⚠️ |
14 |
15 | The notes are written by the community.
16 | If you see an error here, please create a PR with a fix.
17 | |
18 |
19 |
20 |
21 |
22 | ## Navigation
23 |
24 | * [Machine Learning Zoomcamp course](../)
25 | * [Session 9: Serverless Deep Learning](./)
26 | * Previous: [API Gateway: exposing the lambda function](07-api-gateway.md)
27 | * Next: [Explore more](09-explore-more.md)
--------------------------------------------------------------------------------
/09-serverless/09-explore-more.md:
--------------------------------------------------------------------------------
1 | ## 9.9 Explore more
2 |
3 | * Try similar serverless services from Google Cloud and Microsoft Azure
4 | * Deploy cats vs dogs and other Keras models with AWS Lambda
5 | * AWS Lambda is also good for other libraries, not just Tensorflow. You can deploy Scikit-Learn and XGBoost models with it as well
6 |
7 |
8 | ## Navigation
9 |
10 | * [Machine Learning Zoomcamp course](../)
11 | * [Session 9: Serverless Deep Learning](./)
12 | * Previous: [Summary](08-summary.md)
13 | * Next: [Homework](homework.md)
--------------------------------------------------------------------------------
/09-serverless/README.md:
--------------------------------------------------------------------------------
1 | ## 9. Serverless Deep Learning
2 |
3 | - 9.1 [Introduction to Serverless](01-intro.md)
4 | - 9.2 [AWS Lambda](02-aws-lambda.md)
5 | - 9.3 [TensorFlow Lite](03-tensorflow-lite.md)
6 | - 9.4 [Preparing the code for Lambda](04-preparing-code.md)
7 | - 9.5 [Preparing a Docker image](05-docker-image.md)
8 | - 9.6 [Creating the lambda function](06-creating-lambda.md)
9 | - 9.7 [API Gateway: exposing the lambda function](07-api-gateway.md)
10 | - 9.8 [Summary](08-summary.md)
11 | - 9.9 [Explore more](09-explore-more.md)
12 | - 9.10 [Homework](homework.md)
13 |
14 |
15 |
16 | ## Community notes
17 |
18 | Did you take notes? You can share them here (or in each unit separately)
19 |
20 | * [Alvaro Navas' Notes](https://github.com/ziritrion/ml-zoomcamp/blob/main/notes/09_serverless.md)
21 | * [Notes from froukje](https://github.com/froukje/ml-zoomcamp/blob/main/week9/Lecture_9_serverless.ipynb)
22 | * [Notes from Memoona Tahira](https://github.com/MemoonaTahira/MLZoomcamp2022/tree/main/Notes/Week_9-Serverless)
23 | * [Notes from Oscar Garcia](https://github.com/ozkary/machine-learning-engineering/tree/main/09-serverless)
24 | * [Notes from Peter Ernicke](https://knowmledge.com/category/courses/ml-zoomcamp/serverless-deployment/)
25 | * [Notes from Maximilien Eyengue](https://github.com/maxim-eyengue/Python-Codes/blob/main/ML_Zoomcamp_2024/09_serverless/Summary_Session_09.md)
26 |
27 | * Add your notes here
28 |
--------------------------------------------------------------------------------
/09-serverless/code/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM public.ecr.aws/lambda/python:3.10
2 |
3 | RUN pip install keras-image-helper
4 | RUN pip install https://github.com/alexeygrigorev/tflite-aws-lambda/raw/main/tflite/tflite_runtime-2.14.0-cp310-cp310-linux_x86_64.whl
5 |
6 | COPY clothing-model.tflite .
7 | COPY lambda_function.py .
8 |
9 | CMD [ "lambda_function.lambda_handler" ]
--------------------------------------------------------------------------------
/09-serverless/code/convert-model.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | from tensorflow import keras
3 |
4 | model = keras.models.load_model('clothing-model.h5')
5 |
6 | converter = tf.lite.TFLiteConverter.from_keras_model(model)
7 |
8 | tflite_model = converter.convert()
9 |
10 | with open('clothing-model.tflite', 'wb') as f_out:
11 | f_out.write(tflite_model)
--------------------------------------------------------------------------------
/09-serverless/code/lambda_function.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 |
4 | import tflite_runtime.interpreter as tflite
5 | from keras_image_helper import create_preprocessor
6 |
7 |
8 | preprocessor = create_preprocessor('xception', target_size=(299, 299))
9 |
10 |
11 | interpreter = tflite.Interpreter(model_path='clothing-model.tflite')
12 | interpreter.allocate_tensors()
13 |
14 | input_index = interpreter.get_input_details()[0]['index']
15 | output_index = interpreter.get_output_details()[0]['index']
16 |
17 |
18 | classes = [
19 | 'dress',
20 | 'hat',
21 | 'longsleeve',
22 | 'outwear',
23 | 'pants',
24 | 'shirt',
25 | 'shoes',
26 | 'shorts',
27 | 'skirt',
28 | 't-shirt'
29 | ]
30 |
31 | # url = 'http://bit.ly/mlbookcamp-pants'
32 |
33 | def predict(url):
34 | X = preprocessor.from_url(url)
35 |
36 | interpreter.set_tensor(input_index, X)
37 | interpreter.invoke()
38 | preds = interpreter.get_tensor(output_index)
39 |
40 | float_predictions = preds[0].tolist()
41 |
42 | return dict(zip(classes, float_predictions))
43 |
44 |
45 | def lambda_handler(event, context):
46 | url = event['url']
47 | result = predict(url)
48 | return result
49 |
50 |
51 |
--------------------------------------------------------------------------------
/09-serverless/code/plan.md:
--------------------------------------------------------------------------------
1 | # 9. Serverless Deep Learning
2 |
3 | We'll deploy the clothes classification model we trained previously.
4 |
5 | ## 9.1 Introduction to Serverless
6 |
7 | * What we'll cover this week
8 |
9 |
10 | ## 9.2 AWS Lambda
11 |
12 | * Intro to AWS Lambda
13 | * Serverless vs serverfull
14 |
15 |
16 | ## 9.3 TensorFlow Lite
17 |
18 | * Why not TensorFlow
19 | * Converting the model
20 | * Using the TF-Lite model for making predictions
21 |
22 |
23 | ## 9.4 Preparing the Lambda code
24 |
25 | * Moving the code from notebook to script
26 | * Testing it locally
27 |
28 |
29 | ## 9.5 Preparing a Docker image
30 |
31 | * Lambda base images
32 | * Preparing the Dockerfile
33 | * Using the right TF-Lite wheel
34 |
35 |
36 | ## 9.6 Creating the lambda function
37 |
38 | * Publishing the image to AWS ECR
39 | * Creating the function
40 | * Configuring it
41 | * Testing the function from the AWS Console
42 | * Pricing
43 |
44 |
45 | ## 9.7 API Gateway: exposing the lambda function
46 |
47 | * Creating and configuring the gateway
48 |
49 |
50 | ## 9.8 Summary
51 |
52 | * AWS Lambda is way of deploying models without having to worry about servers
53 | * Tensorflow Lite is a lightweight alternative to Tensorflow that only focuses on inference
54 | * To deploy your code, package it in a Docker container
55 | * Expose the lambda function via API Gateway
56 |
57 |
58 | ## 9.9 Explore more
59 |
60 | * Try similar serverless services from Google Cloud and Microsoft Azure
61 | * Deploy cats vs dogs and other Keras models with AWS Lambda
62 | * AWS Lambda is also good for other libraries, not just Tensorflow. You can deploy Scikit-Learn and XGBoost models with it as well.
63 |
--------------------------------------------------------------------------------
/09-serverless/code/test.py:
--------------------------------------------------------------------------------
1 | import requests
2 |
3 | url = 'http://localhost:8080/2015-03-31/functions/function/invocations'
4 |
5 | data = {'url': 'http://bit.ly/mlbookcamp-pants'}
6 |
7 | result = requests.post(url, json=data).json()
8 | print(result)
9 |
--------------------------------------------------------------------------------
/09-serverless/homework.md:
--------------------------------------------------------------------------------
1 | ## Homework
2 |
3 | * For 2023 cohort homework, check [the 2023 cohort folder](../cohorts/2023/)
4 | * For 2022 cohort homework, check [the 2022 cohort folder](../cohorts/2022/)
5 | * For 2021 cohort homework and solution, check [the 2021 cohort folder](../cohorts/2021/09-serverless/)
6 |
7 |
8 | ## Navigation
9 |
10 | * [Machine Learning Zoomcamp course](../)
11 | * [Session 9: Serverless Deep Learning](./)
12 | * Previous: [Explore more](09-explore-more.md)
--------------------------------------------------------------------------------
/09-serverless/images/thumbnail-9-01.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/09-serverless/images/thumbnail-9-01.jpg
--------------------------------------------------------------------------------
/09-serverless/images/thumbnail-9-02.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/09-serverless/images/thumbnail-9-02.jpg
--------------------------------------------------------------------------------
/09-serverless/images/thumbnail-9-03.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/09-serverless/images/thumbnail-9-03.jpg
--------------------------------------------------------------------------------
/09-serverless/images/thumbnail-9-04.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/09-serverless/images/thumbnail-9-04.jpg
--------------------------------------------------------------------------------
/09-serverless/images/thumbnail-9-05.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/09-serverless/images/thumbnail-9-05.jpg
--------------------------------------------------------------------------------
/09-serverless/images/thumbnail-9-06.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/09-serverless/images/thumbnail-9-06.jpg
--------------------------------------------------------------------------------
/09-serverless/images/thumbnail-9-07.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/09-serverless/images/thumbnail-9-07.jpg
--------------------------------------------------------------------------------
/09-serverless/images/thumbnail-9-08.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/09-serverless/images/thumbnail-9-08.jpg
--------------------------------------------------------------------------------
/09-serverless/meta.csv:
--------------------------------------------------------------------------------
1 | lesson,name,page_name,video,slides,notebook
2 | 1,Introduction to Serverless,01-intro.md,https://www.youtube.com/watch?v=JLIVwIsU6RA,,
3 | 2,AWS Lambda,02-aws-lambda.md,https://www.youtube.com/watch?v=_UX8-2WhHZo,,
4 | 3,TensorFlow Lite,03-tensorflow-lite.md,https://www.youtube.com/watch?v=OzZA4mSBE0Q,,code/tensorflow-model.ipynb
5 | 4,Preparing the code for Lambda,04-preparing-code.md,https://www.youtube.com/watch?v=XXBUivsHhec,,
6 | 5,Preparing a Docker image,05-docker-image.md,https://www.youtube.com/watch?v=y4_YQjfOsDo,,
7 | 6,Creating the lambda function,06-creating-lambda.md,https://www.youtube.com/watch?v=kBch5oD5BkY,,
8 | 7,API Gateway: exposing the lambda function,07-api-gateway.md,https://www.youtube.com/watch?v=wyZ9aqQOXvs,,
9 | 8,Summary,08-summary.md,https://www.youtube.com/watch?v=bu3nPiHCNLU,,
10 | 9,Explore more,09-explore-more.md,,,
11 | 10,Homework,homework.md,,,
--------------------------------------------------------------------------------
/09-serverless/meta.json:
--------------------------------------------------------------------------------
1 | {
2 | "data": "meta.csv",
3 | "session": 9,
4 | "name": "Serverless Deep Learning"
5 | }
--------------------------------------------------------------------------------
/09-serverless/updates.md:
--------------------------------------------------------------------------------
1 | ## Python 3.12 vs TF Lite 2.17
2 |
3 | The latest versions of TF Lite don't support Python 3.12 yet.
4 |
5 | As a workaround, we can use the previous version of TF Lite
6 | to serve the models created by TensorFlow 2.17. We tested
7 | it with TF Lite 2.14 and the deep learning models we use
8 | in the course work successfully with this setup.
9 |
10 | Here's how you do it
11 |
12 |
13 | First, use Python 3.10. It means that you will need to use
14 | `public.ecr.aws/lambda/python:3.10` as the base image:
15 |
16 | ```docker
17 | FROM public.ecr.aws/lambda/python:3.10
18 | ```
19 |
20 | Second, use numpy 1.23.1:
21 |
22 | ```docker
23 | RUN pip install numpy==1.23.1
24 | ```
25 |
26 | When installing tf lite interpreter for AWS lambda,
27 | make sure you don't install dependencies with `--no-deps` flag:
28 |
29 | ```docker
30 | RUN pip install --no-deps https://github.com/alexeygrigorev/tflite-aws-lambda/raw/main/tflite/tflite_runtime-2.14.0-cp310-cp310-linux_x86_64.whl
31 | ```
32 |
33 | If you don't do it, pip will try to upgdate the version of numpy
34 | and your code won't work (as the tflite runtime was compiled
35 | with numpy 1, not numpy 2).
36 |
37 |
38 |
39 |
40 |
41 |
--------------------------------------------------------------------------------
/10-kubernetes/01-overview.md:
--------------------------------------------------------------------------------
1 |
2 | ## 10.1 Overview
3 |
4 |
5 |
6 |
7 | [Slides](https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-10-kubernetes)
8 |
9 |
10 | ## Notes
11 |
12 | Add notes from the video (PRs are welcome)
13 |
14 | * same use case as in the session before: classifying images of clothes
15 | * using tensorflow serving, written in C++, with focus on inference
16 | * gRPC binary protocol
17 | * deploying to kubernetes
18 | * 1st component: gateway (download image, resize, turn into numpy array - computationally not expensive - can be done with CPU)
19 | * 2nd component: model (matrix multiplications - computationally expensive - thus use GPU)
20 | * scaling the two components independently: i.e. 5 gateways handing images to 1 model
21 | * two components in two different docker container (lesson four)
22 | * kubernetes main concepts (lesson five)
23 | * running kubernetes on your local machine (lesson six)
24 | * deploy the two services to kubernetes (lesson seven)
25 | * move from local to cloud (lesson eight)
26 |
27 |
28 |
29 | ⚠️ |
30 |
31 | The notes are written by the community.
32 | If you see an error here, please create a PR with a fix.
33 | |
34 |
35 |
36 |
37 |
38 | ## Navigation
39 |
40 | * [Machine Learning Zoomcamp course](../)
41 | * [Session 10: Kubernetes and TensorFlow Serving](./)
42 | * Next: [TensorFlow Serving](02-tensorflow-serving.md)
43 |
--------------------------------------------------------------------------------
/10-kubernetes/09-summary.md:
--------------------------------------------------------------------------------
1 | ## 10.9 Summary
2 |
3 |
4 |
5 | In the session 10 we covered the following concepts:
6 |
7 | - TF-Serving is a system for deploying TensorFlow models
8 | - When using TF-Serving, we need a component for pre-processing
9 | - Kubernetes is a container orchestration platform
10 | - To deploy something on Kubernetes, we need to specify a deployment and a service
11 | - You can use Docker compose and Kind for local experiments
12 |
13 | ## Notes
14 |
15 | Add notes from the video (PRs are welcome)
16 |
17 |
18 |
19 |
20 | ⚠️ |
21 |
22 | The notes are written by the community.
23 | If you see an error here, please create a PR with a fix.
24 | |
25 |
26 |
27 |
28 |
29 | ## Navigation
30 |
31 | * [Machine Learning Zoomcamp course](../)
32 | * [Session 10: Kubernetes and TensorFlow Serving](./)
33 | * Previous: [Deploying to EKS](08-eks.md)
34 | * Next: [Explore more](10-explore-more.md)
--------------------------------------------------------------------------------
/10-kubernetes/10-explore-more.md:
--------------------------------------------------------------------------------
1 |
2 | ## 10.10 Explore more
3 |
4 | - Other local Kubernetes: minikube, k3d, k3s, microk8s, EKS Anywhere
5 | - [Rancher desktop](https://rancherdesktop.io/)
6 | - [Docker desktop](https://www.docker.com/products/docker-desktop/)
7 | - [Lens](https://k8slens.dev/)
8 | - Many cloud providers have Kubernetes: GCP, Azure, Digital ocean and others. Look for "Managed Kubernetes" in your favourite search engine
9 | - Deploy the model from previous modules and from your project with Kubernetes
10 | - Learn about Kubernetes namespaces. Here we used the default namespace
11 |
12 | ## Notes
13 |
14 | Add notes from the video (PRs are welcome)
15 |
16 |
17 |
18 |
19 | ⚠️ |
20 |
21 | The notes are written by the community.
22 | If you see an error here, please create a PR with a fix.
23 | |
24 |
25 |
26 |
27 |
28 | ## Navigation
29 |
30 | * [Machine Learning Zoomcamp course](../)
31 | * [Session 10: Kubernetes and TensorFlow Serving](./)
32 | * Previous: [Summary](09-summary.md)
33 | * Next: [Homework](homework.md)
--------------------------------------------------------------------------------
/10-kubernetes/README.md:
--------------------------------------------------------------------------------
1 | ## 10. Kubernetes and TensorFlow Serving
2 |
3 | - 10.1 [Overview](01-overview.md)
4 | - 10.2 [TensorFlow Serving](02-tensorflow-serving.md)
5 | - 10.3 [Creating a pre-processing service](03-preprocessing.md)
6 | - 10.4 [Running everything locally with Docker-compose](04-docker-compose.md)
7 | - 10.5 [Introduction to Kubernetes](05-kubernetes-intro.md)
8 | - 10.6 [Deploying a simple service to Kubernetes](06-kubernetes-simple-service.md)
9 | - 10.7 [Deploying TensorFlow models to Kubernetes](07-kubernetes-tf-serving.md)
10 | - 10.8 [Deploying to EKS](08-eks.md)
11 | - 10.9 [Summary](09-summary.md)
12 | - 10.10 [Explore more](10-explore-more.md)
13 | - 10.11 [Homework](homework.md)
14 |
15 |
16 | ## Community notes
17 |
18 | Did you take notes? You can share them here (or in each unit separately)
19 |
20 | * [Alvaro Navas' Notes](https://github.com/ziritrion/ml-zoomcamp/blob/main/notes/10_kubernetes.md)
21 | * [Notes from Oscar Garcia](https://github.com/ozkary/machine-learning-engineering/tree/main/10-kubernetes)
22 | * [Notes from Maximilien Eyengue](https://github.com/maxim-eyengue/Python-Codes/blob/main/ML_Zoomcamp_2024/10_kubernetes/Summary_Session_10.md)
23 | * Add your notes here
24 |
--------------------------------------------------------------------------------
/10-kubernetes/code/Pipfile:
--------------------------------------------------------------------------------
1 | [[source]]
2 | url = "https://pypi.org/simple"
3 | verify_ssl = true
4 | name = "pypi"
5 |
6 | [packages]
7 | grpcio = "==1.42.0"
8 | flask = "*"
9 | gunicorn = "*"
10 | keras-image-helper = "*"
11 | tensorflow-protobuf = "==2.7.0"
12 |
13 | [dev-packages]
14 |
15 | [requires]
16 | python_version = "3.8"
17 |
--------------------------------------------------------------------------------
/10-kubernetes/code/docker-compose.yaml:
--------------------------------------------------------------------------------
1 | version: "3.9"
2 | services:
3 | clothing-model:
4 | image: zoomcamp-10-model:xception-v4-001
5 | gateway:
6 | image: zoomcamp-10-gateway:002
7 | environment:
8 | - TF_SERVING_HOST=clothing-model:8500
9 | ports:
10 | - "9696:9696"
--------------------------------------------------------------------------------
/10-kubernetes/code/image-gateway.dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.8.12-slim
2 |
3 | RUN pip install pipenv
4 |
5 | WORKDIR /app
6 |
7 | COPY ["Pipfile", "Pipfile.lock", "./"]
8 |
9 | RUN pipenv install --system --deploy
10 |
11 | COPY ["gateway.py", "proto.py", "./"]
12 |
13 | EXPOSE 9696
14 |
15 | ENTRYPOINT ["gunicorn", "--bind=0.0.0.0:9696", "gateway:app"]
--------------------------------------------------------------------------------
/10-kubernetes/code/image-model.dockerfile:
--------------------------------------------------------------------------------
1 | FROM tensorflow/serving:2.7.0
2 |
3 | COPY clothing-model /models/clothing-model/1
4 | ENV MODEL_NAME="clothing-model"
--------------------------------------------------------------------------------
/10-kubernetes/code/kube-config/eks-config.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: eksctl.io/v1alpha5
2 | kind: ClusterConfig
3 |
4 | metadata:
5 | name: mlzoomcamp-eks
6 | region: eu-west-1
7 |
8 | nodeGroups:
9 | - name: ng-m5-xlarge
10 | instanceType: m5.xlarge
11 | desiredCapacity: 1
--------------------------------------------------------------------------------
/10-kubernetes/code/kube-config/gateway-deployment.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: Deployment
3 | metadata:
4 | name: gateway
5 | spec:
6 | selector:
7 | matchLabels:
8 | app: gateway
9 | template:
10 | metadata:
11 | labels:
12 | app: gateway
13 | spec:
14 | containers:
15 | - name: gateway
16 | image: 387546586013.dkr.ecr.eu-west-1.amazonaws.com/mlzoomcamp-images:zoomcamp-10-gateway-002
17 | resources:
18 | limits:
19 | memory: "128Mi"
20 | cpu: "100m"
21 | ports:
22 | - containerPort: 9696
23 | env:
24 | - name: TF_SERVING_HOST
25 | value: tf-serving-clothing-model.default.svc.cluster.local:8500
26 |
--------------------------------------------------------------------------------
/10-kubernetes/code/kube-config/gateway-service.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Service
3 | metadata:
4 | name: gateway
5 | spec:
6 | type: LoadBalancer
7 | selector:
8 | app: gateway
9 | ports:
10 | - port: 80
11 | targetPort: 9696
12 |
--------------------------------------------------------------------------------
/10-kubernetes/code/kube-config/model-deployment.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: Deployment
3 | metadata:
4 | name: tf-serving-clothing-model
5 | spec:
6 | replicas: 1
7 | selector:
8 | matchLabels:
9 | app: tf-serving-clothing-model
10 | template:
11 | metadata:
12 | labels:
13 | app: tf-serving-clothing-model
14 | spec:
15 | containers:
16 | - name: tf-serving-clothing-model
17 | image: 387546586013.dkr.ecr.eu-west-1.amazonaws.com/mlzoomcamp-images:zoomcamp-10-model-xception-v4-001
18 | resources:
19 | limits:
20 | memory: "512Mi"
21 | cpu: "0.5"
22 | ports:
23 | - containerPort: 8500
24 |
--------------------------------------------------------------------------------
/10-kubernetes/code/kube-config/model-service.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Service
3 | metadata:
4 | name: tf-serving-clothing-model
5 | spec:
6 | selector:
7 | app: tf-serving-clothing-model
8 | ports:
9 | - port: 8500
10 | targetPort: 8500
11 |
--------------------------------------------------------------------------------
/10-kubernetes/code/ping/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.8.12-slim
2 |
3 | RUN pip install pipenv
4 |
5 | WORKDIR /app
6 |
7 | COPY ["Pipfile", "Pipfile.lock", "./"]
8 |
9 | RUN pipenv install --system --deploy
10 |
11 | COPY "ping.py" .
12 |
13 | EXPOSE 9696
14 |
15 | ENTRYPOINT ["gunicorn", "--bind=0.0.0.0:9696", "ping:app"]
--------------------------------------------------------------------------------
/10-kubernetes/code/ping/Pipfile:
--------------------------------------------------------------------------------
1 | [[source]]
2 | url = "https://pypi.org/simple"
3 | verify_ssl = true
4 | name = "pypi"
5 |
6 | [packages]
7 | flask = "*"
8 | gunicorn = "*"
9 |
10 | [dev-packages]
11 |
12 | [requires]
13 | python_version = "3.8"
14 |
--------------------------------------------------------------------------------
/10-kubernetes/code/ping/deployment.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: Deployment
3 | metadata:
4 | name: ping-deployment
5 | spec:
6 | replicas: 1
7 | selector:
8 | matchLabels:
9 | app: ping
10 | template:
11 | metadata:
12 | labels:
13 | app: ping
14 | spec:
15 | containers:
16 | - name: ping-pod
17 | image: ping:v001
18 | resources:
19 | limits:
20 | memory: "128Mi"
21 | cpu: "200m"
22 | ports:
23 | - containerPort: 9696
24 |
--------------------------------------------------------------------------------
/10-kubernetes/code/ping/metallb-config.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: metallb.io/v1beta1
2 | kind: IPAddressPool
3 | metadata:
4 | name: example
5 | namespace: metallb-system
6 | spec:
7 | addresses:
8 | - 172.20.255.200-172.20.255.250
9 | ---
10 | apiVersion: metallb.io/v1beta1
11 | kind: L2Advertisement
12 | metadata:
13 | name: empty
14 | namespace: metallb-system
--------------------------------------------------------------------------------
/10-kubernetes/code/ping/ping.py:
--------------------------------------------------------------------------------
1 | from flask import Flask
2 |
3 | app = Flask('ping')
4 |
5 | @app.route('/ping', methods=['GET'])
6 | def ping():
7 | return "PONG"
8 |
9 | if __name__ == "__main__":
10 | app.run(debug=True, host='0.0.0.0', port=9696)
--------------------------------------------------------------------------------
/10-kubernetes/code/ping/service.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Service
3 | metadata:
4 | name: ping
5 | spec:
6 | type: LoadBalancer
7 | selector:
8 | app: ping
9 | ports:
10 | - port: 80
11 | targetPort: 9696
12 |
--------------------------------------------------------------------------------
/10-kubernetes/code/proto.py:
--------------------------------------------------------------------------------
1 | from tensorflow.core.framework import tensor_pb2, tensor_shape_pb2, types_pb2
2 |
3 |
4 | def dtypes_as_dtype(dtype):
5 | if dtype == "float32":
6 | return types_pb2.DT_FLOAT
7 | raise Exception("dtype %s is not supported" % dtype)
8 |
9 |
10 | def make_tensor_proto(data):
11 | shape = data.shape
12 | dims = [tensor_shape_pb2.TensorShapeProto.Dim(size=i) for i in shape]
13 | proto_shape = tensor_shape_pb2.TensorShapeProto(dim=dims)
14 |
15 | proto_dtype = dtypes_as_dtype(data.dtype)
16 |
17 | tensor_proto = tensor_pb2.TensorProto(dtype=proto_dtype, tensor_shape=proto_shape)
18 | tensor_proto.tensor_content = data.tostring()
19 |
20 | return tensor_proto
21 |
22 |
23 | def np_to_protobuf(data):
24 | if data.dtype != "float32":
25 | data = data.astype("float32")
26 | return make_tensor_proto(data)
27 |
--------------------------------------------------------------------------------
/10-kubernetes/code/test.py:
--------------------------------------------------------------------------------
1 | import requests
2 |
3 | url = 'http://localhost:9696/predict'
4 |
5 | data = {'url': 'http://bit.ly/mlbookcamp-pants'}
6 |
7 | result = requests.post(url, json=data).json()
8 | print(result)
--------------------------------------------------------------------------------
/10-kubernetes/homework.md:
--------------------------------------------------------------------------------
1 | ## Homework
2 |
3 | * For 2024 cohort homework, check [the 2024 cohort folder](../cohorts/2024/)
4 | * For 2023 cohort homework, check [the 2023 cohort folder](../cohorts/2023/)
5 | * For 2022 cohort homework, check [the 2022 cohort folder](../cohorts/2022/)
6 | * For 2021 cohort homework and solution, check [the 2021 cohort folder](../cohorts/2021/10-kubernetes/)
7 |
8 |
9 | ## Navigation
10 |
11 | * [Machine Learning Zoomcamp course](../)
12 | * [Session 10: Kubernetes and TensorFlow Serving](./)
13 | * Previous: [Explore more](10-explore-more.md)
--------------------------------------------------------------------------------
/10-kubernetes/images/thumbnail-10-01.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/10-kubernetes/images/thumbnail-10-01.jpg
--------------------------------------------------------------------------------
/10-kubernetes/images/thumbnail-10-02.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/10-kubernetes/images/thumbnail-10-02.jpg
--------------------------------------------------------------------------------
/10-kubernetes/images/thumbnail-10-03.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/10-kubernetes/images/thumbnail-10-03.jpg
--------------------------------------------------------------------------------
/10-kubernetes/images/thumbnail-10-04.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/10-kubernetes/images/thumbnail-10-04.jpg
--------------------------------------------------------------------------------
/10-kubernetes/images/thumbnail-10-05.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/10-kubernetes/images/thumbnail-10-05.jpg
--------------------------------------------------------------------------------
/10-kubernetes/images/thumbnail-10-06.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/10-kubernetes/images/thumbnail-10-06.jpg
--------------------------------------------------------------------------------
/10-kubernetes/images/thumbnail-10-07.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/10-kubernetes/images/thumbnail-10-07.jpg
--------------------------------------------------------------------------------
/10-kubernetes/images/thumbnail-10-08.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/10-kubernetes/images/thumbnail-10-08.jpg
--------------------------------------------------------------------------------
/10-kubernetes/images/thumbnail-10-09.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/10-kubernetes/images/thumbnail-10-09.jpg
--------------------------------------------------------------------------------
/10-kubernetes/meta.csv:
--------------------------------------------------------------------------------
1 | lesson,name,page_name,video,slides,notebook
2 | 1,Overview,01-overview.md,https://www.youtube.com/watch?v=mvPER7YfTkw,https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-10-kubernetes,
3 | 2,TensorFlow Serving,02-tensorflow-serving.md,https://www.youtube.com/watch?v=deXR2fThYDw,,
4 | 3,Creating a pre-processing service,03-preprocessing.md,https://www.youtube.com/watch?v=OIlrS14Zi0o,,code/tf-serving-connect.ipynb
5 | 4,Running everything locally with Docker-compose,04-docker-compose.md,https://www.youtube.com/watch?v=ZhQQfpWfkKY,https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-10-kubernetes,
6 | 5,Introduction to Kubernetes,05-kubernetes-intro.md,https://www.youtube.com/watch?v=UjVkpszDzgk,https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-10-kubernetes,
7 | 6,Deploying a simple service to Kubernetes,06-kubernetes-simple-service.md,https://www.youtube.com/watch?v=PPUCVRIV9t8,https://www.slideshare.net/AlexeyGrigorev/ml-zoomcamp-10-kubernetes,
8 | 7,Deploying TensorFlow models to Kubernetes,07-kubernetes-tf-serving.md,https://www.youtube.com/watch?v=6vHLMdnjO2w,,
9 | 8,Deploying to EKS,08-eks.md,https://www.youtube.com/watch?v=89jxeddZtC0,,
10 | 9,Summary,09-summary.md,https://www.youtube.com/watch?v=J5LMRTIu4jY,,
11 | 10,Explore more,10-explore-more.md,,,
12 | 11,Homework,homework.md,,,
--------------------------------------------------------------------------------
/10-kubernetes/meta.json:
--------------------------------------------------------------------------------
1 | {
2 | "data": "meta.csv",
3 | "session": 10,
4 | "name": "Kubernetes and TensorFlow Serving"
5 | }
--------------------------------------------------------------------------------
/11-kserve/01-overview.md:
--------------------------------------------------------------------------------
1 |
2 | ## 11.1 Overview
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 | ## Notes
11 |
12 | Add notes from the video (PRs are welcome)
13 |
14 |
15 |
16 |
17 | ⚠️ |
18 |
19 | The notes are written by the community.
20 | If you see an error here, please create a PR with a fix.
21 | |
22 |
23 |
24 |
25 |
26 | ## Navigation
27 |
28 | * [Machine Learning Zoomcamp course](../)
29 | * [Session 11: KServe](./)
30 | * Next: [Running KServe locally](02-kserve-local.md)
--------------------------------------------------------------------------------
/11-kserve/02-kserve-local.md:
--------------------------------------------------------------------------------
1 |
2 | ## 11.2 Running KServe locally
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 | ## Notes
11 |
12 | Add notes from the video (PRs are welcome)
13 |
14 |
15 |
16 |
17 | ⚠️ |
18 |
19 | The notes are written by the community.
20 | If you see an error here, please create a PR with a fix.
21 | |
22 |
23 |
24 |
25 | ### Updated Instructions (09 July, 2022)
26 |
27 | In the `iris-example.yaml` file, instead of `"gs://kfserving-samples/models/sklearn/iris"`, use `"gs://kfserving-examples/models/sklearn/1.0/model"` as the URL in storageUri.
28 |
29 | Also make sure to use the following versions
30 | - kind: 0.11.1 (via https://kind.sigs.k8s.io/dl/v0.11.1/kind-linux-amd64)
31 | - kubectl: 1.21.1 (via https://dl.k8s.io/release/v1.21.1/bin/linux/amd64/kubectl)
32 | - kindest/node image: 1.21.1 (via `kind create cluster --image kindest/node:v1.21.1`)
33 | - kserve=0.8 (via https://raw.githubusercontent.com/kserve/kserve/release-0.8/hack/quick_install.sh)
34 |
35 | ## Navigation
36 |
37 | * [Machine Learning Zoomcamp course](../)
38 | * [Session 11: KServe](./)
39 | * Previous: [Overview](01-overview.md)
40 | * Next: [Deploying a Scikit-Learn model with KServe](03-kserve-sklearn.md)
41 |
--------------------------------------------------------------------------------
/11-kserve/03-kserve-sklearn.md:
--------------------------------------------------------------------------------
1 |
2 | ## 11.3 Deploying a Scikit-Learn model with KServe
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 | ## Notes
11 |
12 | Add notes from the video (PRs are welcome)
13 |
14 |
15 |
16 |
17 | ⚠️ |
18 |
19 | The notes are written by the community.
20 | If you see an error here, please create a PR with a fix.
21 | |
22 |
23 |
24 |
25 |
26 | ## Navigation
27 |
28 | * [Machine Learning Zoomcamp course](../)
29 | * [Session 11: KServe](./)
30 | * Previous: [Running KServe locally](02-kserve-local.md)
31 | * Next: [Deploying custom Scikit-Learn images with KServe](04-kserve-custom-image.md)
--------------------------------------------------------------------------------
/11-kserve/04-kserve-custom-image.md:
--------------------------------------------------------------------------------
1 |
2 | ## 11.4 Deploying custom Scikit-Learn images with KServe
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 | ## Notes
11 |
12 | Add notes from the video (PRs are welcome)
13 |
14 |
15 |
16 |
17 | ⚠️ |
18 |
19 | The notes are written by the community.
20 | If you see an error here, please create a PR with a fix.
21 | |
22 |
23 |
24 |
25 |
26 | ## Navigation
27 |
28 | * [Machine Learning Zoomcamp course](../)
29 | * [Session 11: KServe](./)
30 | * Previous: [Deploying a Scikit-Learn model with KServe](03-kserve-sklearn.md)
31 | * Next: [Serving TensorFlow models with KServe](05-tensorflow-kserve.md)
--------------------------------------------------------------------------------
/11-kserve/05-tensorflow-kserve.md:
--------------------------------------------------------------------------------
1 |
2 | ## 11.5 Serving TensorFlow models with KServe
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 | ## Notes
11 |
12 | Add notes from the video (PRs are welcome)
13 |
14 |
15 |
16 |
17 | ⚠️ |
18 |
19 | The notes are written by the community.
20 | If you see an error here, please create a PR with a fix.
21 | |
22 |
23 |
24 |
25 | * For PyTorch users, [see this repo](https://github.com/mmg10/torchserve_kserve) on how to serve a PyTorch model using KServe.
26 |
27 | ## Navigation
28 |
29 | * [Machine Learning Zoomcamp course](../)
30 | * [Session 11: KServe](./)
31 | * Previous: [Deploying custom Scikit-Learn images with KServe](04-kserve-custom-image.md)
32 | * Next: [KServe transformers](06-kserve-transformers.md)
33 |
--------------------------------------------------------------------------------
/11-kserve/06-kserve-transformers.md:
--------------------------------------------------------------------------------
1 |
2 | ## 11.6 KServe transformers
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 | ## Notes
11 |
12 | Add notes from the video (PRs are welcome)
13 |
14 |
15 |
16 |
17 | ⚠️ |
18 |
19 | The notes are written by the community.
20 | If you see an error here, please create a PR with a fix.
21 | |
22 |
23 |
24 |
25 |
26 | ## Navigation
27 |
28 | * [Machine Learning Zoomcamp course](../)
29 | * [Session 11: KServe](./)
30 | * Previous: [Serving TensorFlow models with KServe](05-tensorflow-kserve.md)
31 | * Next: [Deploying with KServe and EKS](07-kserve-eks.md)
--------------------------------------------------------------------------------
/11-kserve/07-kserve-eks.md:
--------------------------------------------------------------------------------
1 |
2 | ## 11.7 Deploying with KServe and EKS
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 | ## Notes
11 |
12 | Add notes from the video (PRs are welcome)
13 |
14 |
15 |
16 | ⚠️ |
17 |
18 | The notes are written by the community.
19 | If you see an error here, please create a PR with a fix.
20 | |
21 |
22 |
23 |
24 | ### Updated Instructions (13 July, 2022)
25 | See [the instructions here](07-kserve-eks-upd.md)
26 |
27 | ## Navigation
28 |
29 | * [Machine Learning Zoomcamp course](../)
30 | * [Session 11: KServe](./)
31 | * Previous: [KServe transformers](06-kserve-transformers.md)
32 | * Next: [Summary](08-summary.md)
33 |
--------------------------------------------------------------------------------
/11-kserve/08-summary.md:
--------------------------------------------------------------------------------
1 |
2 | ## 11.8 Summary
3 |
4 | Coming soon (or not so soon)
5 |
6 |
7 | ## Notes
8 |
9 | Add notes from the video (PRs are welcome)
10 |
11 |
12 |
13 |
14 | ⚠️ |
15 |
16 | The notes are written by the community.
17 | If you see an error here, please create a PR with a fix.
18 | |
19 |
20 |
21 |
22 |
23 | ## Navigation
24 |
25 | * [Machine Learning Zoomcamp course](../)
26 | * [Session 11: KServe](./)
27 | * Previous: [Deploying with KServe and EKS](07-kserve-eks.md)
28 | * Next: [Explore more](09-explore-more.md)
--------------------------------------------------------------------------------
/11-kserve/09-explore-more.md:
--------------------------------------------------------------------------------
1 |
2 | ## 11.9 Explore more
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 | ## Notes
11 |
12 | Add notes from the video (PRs are welcome)
13 |
14 |
15 |
16 |
17 | ⚠️ |
18 |
19 | The notes are written by the community.
20 | If you see an error here, please create a PR with a fix.
21 | |
22 |
23 |
24 |
25 |
26 | ## Navigation
27 |
28 | * [Machine Learning Zoomcamp course](../)
29 | * [Session 11: KServe](./)
30 | * Previous: [Summary](08-summary.md)
--------------------------------------------------------------------------------
/11-kserve/README.md:
--------------------------------------------------------------------------------
1 | ## 11. KServe
2 |
3 | - 11.1 [Overview](01-overview.md)
4 | - 11.2 [Running KServe locally](02-kserve-local.md)
5 | - 11.3 [Deploying a Scikit-Learn model with KServe](03-kserve-sklearn.md)
6 | - 11.4 [Deploying custom Scikit-Learn images with KServe](04-kserve-custom-image.md)
7 | - 11.5 [Serving TensorFlow models with KServe](05-tensorflow-kserve.md)
8 | - 11.6 [KServe transformers](06-kserve-transformers.md)
9 | - 11.7 [Deploying with KServe and EKS](07-kserve-eks.dm)
10 | - 11.8 [Summary](08-summary.md)
11 | - 11.9 [Explore more](09-explore-more.md)
--------------------------------------------------------------------------------
/11-kserve/code/.gitignore:
--------------------------------------------------------------------------------
1 | kserve
--------------------------------------------------------------------------------
/11-kserve/code/README.md:
--------------------------------------------------------------------------------
1 |
2 | ```bash
3 | kubectl port-forward -n istio-system service/istio-ingressgateway 8080:80
4 |
5 |
6 | SERVICE_NAME="sklearn-iris"
7 | HOST="${SERVICE_NAME}.default.example.com"
8 | ACTUAL_HOST="http://localhost:8080"
9 | URL="${ACTUAL_HOST}/v1/models/${SERVICE_NAME}:predict"
10 |
11 | curl -H "Host: ${HOST}" \
12 | ${URL} \
13 | -d @iris-request.json
14 |
15 |
16 | docker build -t kserve-sklearnserver:predict_proba-3.8-1.0 -f sklearn.Dockerfile .
17 |
18 | docker run -it --rm \
19 | -v "$(pwd)/model.joblib:/mnt/models/model.joblib" \
20 | -p 8081:8080 \
21 | kserve-sklearnserver:predict_proba-3.8-1.0 \
22 | --model_dir=/mnt/models \
23 | --model_name=churn
24 | ```
--------------------------------------------------------------------------------
/11-kserve/code/churn/Pipfile:
--------------------------------------------------------------------------------
1 | [[source]]
2 | url = "https://pypi.org/simple"
3 | verify_ssl = true
4 | name = "pypi"
5 |
6 | [packages]
7 | scikit-learn = "==1.0"
8 | joblib = "*"
9 | pandas = "*"
10 |
11 | [dev-packages]
12 |
13 | [requires]
14 | python_version = "3.8"
15 |
--------------------------------------------------------------------------------
/11-kserve/code/churn/churn-service.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: "serving.kserve.io/v1beta1"
2 | kind: "InferenceService"
3 | metadata:
4 | name: "churn"
5 | spec:
6 | predictor:
7 | sklearn:
8 | #image: kserve-sklearnserver:predict_proba-3.8-1.0
9 | image: agrigorev/sklearnserver:3.8-1.0-predict-proba
10 | storageUri: "http://172.31.13.90:8000/churn/model.joblib"
11 | resources:
12 | requests:
13 | cpu: 300m
14 | memory: 256Mi
15 | limits:
16 | cpu: 500m
17 | memory: 512Mi
--------------------------------------------------------------------------------
/11-kserve/code/churn/churn-test.py:
--------------------------------------------------------------------------------
1 | import requests
2 |
3 | service_name = 'churn'
4 | host = f'{service_name}.default.example.com'
5 |
6 | actual_domain = 'http://localhost:8080'
7 | url = f'{actual_domain}/v1/models/{service_name}:predict'
8 |
9 |
10 | headers = {'Host': host}
11 |
12 | request = {
13 | "instances": [
14 | {'contract': 'one_year', 'tenure': 34, 'monthlycharges': 56.95},
15 | {'contract': 'month-to-month', 'tenure': 13, 'monthlycharges': 49.95}
16 | ]
17 | }
18 |
19 | response = requests.post(url, json=request, headers=headers)
20 | print(response.json())
--------------------------------------------------------------------------------
/11-kserve/code/churn/churn-train.py:
--------------------------------------------------------------------------------
1 | import joblib
2 |
3 | import pandas as pd
4 | import numpy as np
5 |
6 | from sklearn.pipeline import Pipeline
7 | from sklearn.feature_extraction import DictVectorizer
8 | from sklearn.linear_model import LogisticRegression
9 |
10 | url = 'https://github.com/alexeygrigorev/mlbookcamp-code/blob/master/chapter-03-churn-prediction/WA_Fn-UseC_-Telco-Customer-Churn.csv?raw=true'
11 | df = pd.read_csv(url)
12 |
13 | df.columns = df.columns.str.lower().str.replace(' ', '_')
14 |
15 | categorical_columns = list(df.dtypes[df.dtypes == 'object'].index)
16 |
17 | for c in categorical_columns:
18 | df[c] = df[c].str.lower().str.replace(' ', '_')
19 |
20 | df.totalcharges = pd.to_numeric(df.totalcharges, errors='coerce')
21 | df.totalcharges = df.totalcharges.fillna(0)
22 |
23 | df.churn = (df.churn == 'yes').astype(int)
24 |
25 |
26 | numerical = ['tenure', 'monthlycharges']
27 | categorical = ['contract']
28 |
29 |
30 | dicts = df[categorical + numerical].to_dict(orient='records')
31 |
32 | pipeline = Pipeline([
33 | ('vectorizer', DictVectorizer(sparse=False)),
34 | ('model', LogisticRegression(C=1.0, solver='liblinear'))
35 | ])
36 |
37 | pipeline.fit(dicts, df.churn.values)
38 |
39 | joblib.dump(pipeline, 'model.joblib')
40 |
41 |
--------------------------------------------------------------------------------
/11-kserve/code/churn/model.joblib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/11-kserve/code/churn/model.joblib
--------------------------------------------------------------------------------
/11-kserve/code/clothes/clothes-service.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: "serving.kserve.io/v1beta1"
2 | kind: "InferenceService"
3 | metadata:
4 | name: "clothes"
5 | spec:
6 | transformer:
7 | containers:
8 | - image: "agrigorev/kfserving-keras-transformer:0.0.1"
9 | name: user-container
10 | env:
11 | - name: MODEL_INPUT_SIZE
12 | value: "299,299"
13 | - name: KERAS_MODEL_NAME
14 | value: "xception"
15 | - name: MODEL_LABELS
16 | value: "dress,hat,longsleeve,outwear,pants,shirt,shoes,shorts,skirt,t-shirt"
17 | resources:
18 | requests:
19 | cpu: 300m
20 | memory: 256Mi
21 | limits:
22 | cpu: 500m
23 | memory: 512Mi
24 | predictor:
25 | tensorflow:
26 | storageUri: "http://172.31.13.90:8000/clothes/clothing-model/clothing-model.zip"
27 | resources:
28 | requests:
29 | cpu: 500m
30 | memory: 512Mi
31 | limits:
32 | cpu: 1000m
33 | memory: 512Mi
--------------------------------------------------------------------------------
/11-kserve/code/clothes/convert.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | from tensorflow import keras
3 |
4 | model = keras.models.load_model('xception_v4_large_08_0.894.h5')
5 |
6 | tf.saved_model.save(model, 'clothing-model')
7 |
--------------------------------------------------------------------------------
/11-kserve/code/clothes/test-transformer.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 |
4 | import requests
5 |
6 |
7 | service_name = 'clothes'
8 | host = f'{service_name}.default.example.com'
9 |
10 | actual_domain = 'http://localhost:8080'
11 | service_url = f'{actual_domain}/v1/models/{service_name}:predict'
12 |
13 | headers = {'Host': host}
14 |
15 |
16 | request = {
17 | "instances": [
18 | {'url': 'http://bit.ly/mlbookcamp-pants'},
19 | {'url': 'http://bit.ly/mlbookcamp-pants'}
20 | ]
21 | }
22 |
23 |
24 | response = requests.post(service_url, json=request, headers=headers)
25 |
26 | print(response)
27 | print(response.content)
28 | print(response.json())
29 |
--------------------------------------------------------------------------------
/11-kserve/code/clothes/test.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 |
4 | import requests
5 |
6 | from keras_image_helper import create_preprocessor
7 |
8 | preprocessor = create_preprocessor('xception', target_size=(299, 299))
9 |
10 |
11 | service_name = 'clothes'
12 | host = f'{service_name}.default.example.com'
13 |
14 | actual_domain = 'http://localhost:8080'
15 | service_url = f'{actual_domain}/v1/models/{service_name}:predict'
16 |
17 | headers = {'Host': host}
18 |
19 |
20 | url = 'http://bit.ly/mlbookcamp-pants'
21 | X = preprocessor.from_url(url)
22 |
23 |
24 | request = {
25 | "instances": X.tolist()
26 | }
27 |
28 |
29 | response = requests.post(service_url, json=request, headers=headers).json()
30 |
31 | predictions = response['predictions']
32 |
33 |
34 | classes = [
35 | 'dress',
36 | 'hat',
37 | 'longsleeve',
38 | 'outwear',
39 | 'pants',
40 | 'shirt',
41 | 'shoes',
42 | 'shorts',
43 | 'skirt',
44 | 't-shirt'
45 | ]
46 |
47 |
48 | pred = predictions[0]
49 |
50 | print(dict(zip(classes, pred)))
51 |
--------------------------------------------------------------------------------
/11-kserve/code/eks/clothes-service.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: "serving.kserve.io/v1beta1"
2 | kind: "InferenceService"
3 | metadata:
4 | name: "clothes"
5 | spec:
6 | transformer:
7 | containers:
8 | - image: "agrigorev/kfserving-keras-transformer:0.0.1"
9 | name: user-container
10 | env:
11 | - name: MODEL_INPUT_SIZE
12 | value: "299,299"
13 | - name: KERAS_MODEL_NAME
14 | value: "xception"
15 | - name: MODEL_LABELS
16 | value: "dress,hat,longsleeve,outwear,pants,shirt,shoes,shorts,skirt,t-shirt"
17 | resources:
18 | requests:
19 | cpu: 300m
20 | memory: 256Mi
21 | limits:
22 | cpu: 500m
23 | memory: 512Mi
24 | predictor:
25 | serviceAccountName: sa
26 | tensorflow:
27 | storageUri: "s3://alexey-mlzookcamp-models/clothing-model"
28 | resources:
29 | requests:
30 | cpu: 500m
31 | memory: 512Mi
32 | limits:
33 | cpu: 1000m
34 | memory: 512Mi
--------------------------------------------------------------------------------
/11-kserve/code/eks/cluster.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: eksctl.io/v1alpha5
2 | kind: ClusterConfig
3 |
4 | metadata:
5 | name: mlzoomcamp-eks
6 | region: eu-west-1
7 | version: "1.21"
8 |
9 | nodeGroups:
10 | - name: ng
11 | desiredCapacity: 2
12 | instanceType: m5.xlarge
13 |
--------------------------------------------------------------------------------
/11-kserve/code/eks/test-transformer.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 |
4 | import requests
5 |
6 |
7 | service_name = 'clothes'
8 | actual_domain = 'https://clothes.default.kubeflow.mlbookcamp.com'
9 | service_url = f'{actual_domain}/v1/models/{service_name}:predict'
10 |
11 | request = {
12 | "instances": [
13 | {'url': 'http://bit.ly/mlbookcamp-pants'},
14 | {'url': 'http://bit.ly/mlbookcamp-pants'}
15 | ]
16 | }
17 |
18 |
19 | response = requests.post(service_url, json=request)
20 |
21 | print(response)
22 | print(response.content)
23 | print(response.json())
24 |
--------------------------------------------------------------------------------
/11-kserve/code/image_transfomer/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.8.12-slim
2 |
3 | RUN pip install pipenv
4 |
5 | WORKDIR /app
6 |
7 | COPY ["Pipfile", "Pipfile.lock", "./"]
8 |
9 | RUN pipenv install --system --deploy
10 |
11 | COPY "image_tranformer.py" .
12 |
13 | ENTRYPOINT ["python", "image_tranformer.py"]
--------------------------------------------------------------------------------
/11-kserve/code/image_transfomer/Pipfile:
--------------------------------------------------------------------------------
1 | [[source]]
2 | url = "https://pypi.org/simple"
3 | verify_ssl = true
4 | name = "pypi"
5 |
6 | [packages]
7 | kserve = "==0.7.0"
8 | keras-image-helper = "*"
9 |
10 | [dev-packages]
11 |
12 | [requires]
13 | python_version = "3.8"
14 |
--------------------------------------------------------------------------------
/11-kserve/code/iris/iris-example.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: "serving.kserve.io/v1beta1"
2 | kind: "InferenceService"
3 | metadata:
4 | name: "sklearn-iris"
5 | spec:
6 | predictor:
7 | sklearn:
8 | storageUri: "gs://kfserving-samples/models/sklearn/iris"
--------------------------------------------------------------------------------
/11-kserve/code/iris/iris-request.json:
--------------------------------------------------------------------------------
1 | {
2 | "instances": [
3 | [6.8, 2.8, 4.8, 1.4],
4 | [6.0, 3.4, 4.5, 1.6]
5 | ]
6 | }
--------------------------------------------------------------------------------
/11-kserve/code/iris/iris-test.py:
--------------------------------------------------------------------------------
1 | import requests
2 |
3 | service_name = 'sklearn-iris'
4 | host = f'{service_name}.default.example.com'
5 |
6 | actual_domain = 'http://localhost:8080'
7 | url = f'{actual_domain}/v1/models/{service_name}:predict'
8 |
9 | headers = {
10 | 'Host': host
11 | }
12 |
13 | request = {
14 | "instances": [
15 | [6.8, 2.8, 4.8, 1.4],
16 | [6.0, 3.4, 4.5, 1.6]
17 | ]
18 | }
19 |
20 | response = requests.post(url, json=request, headers=headers)
21 | print(response.json())
--------------------------------------------------------------------------------
/11-kserve/code/plan.md:
--------------------------------------------------------------------------------
1 | # 11. Deploying models with KServe (formerly Kubeflow Serving)
2 |
3 | This week we'll learn how to use KServe for deploying ML models.
4 |
5 | We'll deploy two models:
6 |
7 | * Churn prediction model (Scikit-Learn)
8 | * Clothing classification model (TensorFlow)
9 |
10 |
11 | ## 11.1 Overview
12 |
13 | * Kubeflow and KServe
14 | * What we'll cover this week
15 | * Two-tier architecture
16 |
17 |
18 | ## 11.2 Running KServe locally
19 |
20 | * Installing KServe locally with kind
21 | * Deploying an example model from documentation
22 |
23 |
24 | ## 11.3 Deploying a Scikit-Learn model with KServe
25 |
26 | * Training the churn model with specific Scikit-Learn version
27 | * Deploying the churn prediction model with KServe
28 |
29 |
30 | ## 11.4 Deploying custom Scikit-Learn images with KServe
31 |
32 | * Customizing the Scikit-Learn image
33 | * Running KServe service locally
34 |
35 |
36 | ## 11.5 Serving TensorFlow models with KServe
37 |
38 | * Converting the Keras model to saved_model format
39 | * Deploying the model
40 | * Preparing the input
41 |
42 |
43 | ## 11.6 KServe transformers
44 |
45 | * Why do we need transformers
46 | * Creating a service for pre- and post-processing
47 | * Using existing transformers
48 |
49 |
50 | ## 11.7 Deploying with KServe and EKS
51 |
52 | * Creating an EKS cluster
53 | * Installing KServe on EKS
54 | * Configuring the domain
55 | * Setting up S3 access
56 | * Deploying the clothing model
57 |
58 | [Guide used during the video](https://github.com/alexeygrigorev/kubeflow-deep-learning/blob/main/guide.md)
59 |
60 |
61 | ## 11.8 Summary
62 |
63 | * Less yaml, faster deployment
64 | * Less stability
65 | * The need for Ops is not gone
66 |
67 |
68 | ## 11.9 Explore more
69 |
70 | * Helm charts
71 | * Kubeflow, Kubeflow pipelines
72 | * Sagemaker
73 | * A lot of vendors that take care of Ops
74 |
75 |
76 |
--------------------------------------------------------------------------------
/11-kserve/images/thumbnail-11-01.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/11-kserve/images/thumbnail-11-01.jpg
--------------------------------------------------------------------------------
/11-kserve/images/thumbnail-11-02.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/11-kserve/images/thumbnail-11-02.jpg
--------------------------------------------------------------------------------
/11-kserve/images/thumbnail-11-03.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/11-kserve/images/thumbnail-11-03.jpg
--------------------------------------------------------------------------------
/11-kserve/images/thumbnail-11-04.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/11-kserve/images/thumbnail-11-04.jpg
--------------------------------------------------------------------------------
/11-kserve/images/thumbnail-11-05.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/11-kserve/images/thumbnail-11-05.jpg
--------------------------------------------------------------------------------
/11-kserve/images/thumbnail-11-06.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/11-kserve/images/thumbnail-11-06.jpg
--------------------------------------------------------------------------------
/11-kserve/images/thumbnail-11-07.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/11-kserve/images/thumbnail-11-07.jpg
--------------------------------------------------------------------------------
/11-kserve/meta.csv:
--------------------------------------------------------------------------------
1 | lesson,name,page_name,video,slides,notebook
2 | 1,Overview,01-overview.md,https://www.youtube.com/watch?v=CFWK05dQoGM,,
3 | 2,Running KServe locally,02-kserve-local.md,https://www.youtube.com/watch?v=A3tFt14iTEI,,
4 | 3,Deploying a Scikit-Learn model with KServe,03-kserve-sklearn.md,https://www.youtube.com/watch?v=8kBIDggLwgE,,
5 | 4,Deploying custom Scikit-Learn images with KServe,04-kserve-custom-image.md,https://www.youtube.com/watch?v=REGNWrHZiCw,,
6 | 5,Serving TensorFlow models with KServe,05-tensorflow-kserve.md,https://www.youtube.com/watch?v=iXwQb2LRKGI,,
7 | 6,KServe transformers,06-kserve-transformers.md,https://www.youtube.com/watch?v=haowRqeAqJU,,
8 | 7,Deploying with KServe and EKS,07-kserve-eks.md,https://www.youtube.com/watch?v=MpuTzBSYBBI,,
9 | 8,Summary,08-summary.md,,,
10 | 9,Explore more,09-explore-more.md,,,
--------------------------------------------------------------------------------
/11-kserve/meta.json:
--------------------------------------------------------------------------------
1 | {
2 | "data": "meta.csv",
3 | "session": 11,
4 | "name": "KServe"
5 | }
--------------------------------------------------------------------------------
/after-sign-up.md:
--------------------------------------------------------------------------------
1 | ## Thank you!
2 |
3 | Thanks for signing up for the course.
4 |
5 | The process of adding you to the mailing list is not automated yet,
6 | but you will hear from us closer to the course start.
7 |
8 | To make sure you don't miss any announcements
9 |
10 | - Register in [DataTalks.Club's Slack](https://datatalks.club/slack.html) and join the [`#course-ml-zoomcamp`](https://app.slack.com/client/T01ATQK62F8/C0288NJ5XSA) channel
11 | - Join the [course Telegram channel with announcements](https://t.me/mlzoomcamp)
12 | - [Tweet about the course!](https://clicktotweet.com/XZ6b9)
13 | - Subscribe to [DataTalks.Club's YouTube channel](https://www.youtube.com/c/DataTalksClub) and check
14 | [the course playlist](https://www.youtube.com/playlist?list=PL3MmuxUbc_hIhxl5Ji8t4O6lPAOpHaCLR)
15 | - Subscribe to our [public Google Calendar](https://calendar.google.com/calendar/?cid=cGtjZ2tkbGc1OG9yb2lxa2Vwc2g4YXMzMmNAZ3JvdXAuY2FsZW5kYXIuZ29vZ2xlLmNvbQ) (it works from Desktop only)
16 | - Check our [Technical FAQ](https://docs.google.com/document/d/1LpPanc33QJJ6BSsyxVg-pWNMplal84TdZtq10naIhD8/edit) if you have questions
17 |
18 | See you in September!
19 |
--------------------------------------------------------------------------------
/asking-questions.md:
--------------------------------------------------------------------------------
1 | ## Asking questions
2 |
3 | If you have any questions, ask them
4 | in the [`#course-ml-zoomcamp`](https://app.slack.com/client/T01ATQK62F8/C0288NJ5XSA) channel in [DataTalks.Club](https://datatalks.club) slack.
5 |
6 | To keep our discussion in Slack more organized, we ask you to follow these suggestions:
7 |
8 | * Before asking a question, check [FAQ](https://docs.google.com/document/d/1LpPanc33QJJ6BSsyxVg-pWNMplal84TdZtq10naIhD8/edit).
9 | * Use threads. When you have a problem, first describe the problem shortly
10 | and then put the actual error in the thread - so it doesn't take the entire screen.
11 | * Instead of screenshots, it's better to copy-paste the error you're getting in text.
12 | Use ` ``` ` for formatting your code.
13 | It's very difficult to read text from screenshots.
14 | * Please don't take pictures of your code with a phone. It's even harder to read. Follow the previous suggestion,
15 | and in rare cases when you need to show what happens on your screen, take a screenshot.
16 | * You don't need to tag the instructors when you have a problem. We will see it eventually.
17 | * If somebody helped you with your problem and it's not in [FAQ](https://docs.google.com/document/d/1LpPanc33QJJ6BSsyxVg-pWNMplal84TdZtq10naIhD8/edit), please add it there.
18 | It'll help other students.
19 |
20 |
--------------------------------------------------------------------------------
/bento.md:
--------------------------------------------------------------------------------
1 |
2 | ## 7. [Production-Ready Machine Learning (Bento ML)](07-bentoml-production/)
3 |
4 | - 7.1 [Intro/Session Overview](07-bentoml-production/01-intro.md)
5 | - 7.2 [Building Your Prediction Service with BentoML](07-bentoml-production/02-build-bento-service.md)
6 | - 7.3 [Deploying Your Prediction Service](07-bentoml-production/03-deploy-bento-service.md)
7 | - 7.4 [Sending, Receiving and Validating Data](07-bentoml-production/04-validation.md)
8 | - 7.5 [High-Performance Serving](07-bentoml-production/05-high-performance.md)
9 | - 7.6 [Bento Production Deployment](07-bentoml-production/06-production-deployment.md)
10 | - 7.7 [(Optional) Advanced Example: Deploying Stable Diffusion Model](07-bentoml-production/07-stable-diffusion.md)
11 | - 7.8 [Summary](07-bentoml-production/08-summary.md)
12 | - 7.9 [Homework](07-bentoml-production/homework.md)
13 |
14 |
15 | ## Supporters and partners
16 |
17 | Thanks to the course sponsors for making it possible to run this course
18 |
19 |
20 |
21 |
22 |
23 |
24 |
--------------------------------------------------------------------------------
/certificates.md:
--------------------------------------------------------------------------------
1 | ## Getting your certificate
2 |
3 | Congratulations on finishing the course!
4 |
5 | Here's how you can get your certificate.
6 |
7 | First, get your certificate id using the `compute_certificate_id` function:
8 |
9 | ```python
10 | from hashlib import sha1
11 |
12 | def compute_hash(email):
13 | return sha1(email.encode('utf-8')).hexdigest()
14 |
15 | def compute_certificate_id(email):
16 | email_clean = email.lower().strip()
17 | return compute_hash(email_clean + '_')
18 | ```
19 |
20 | > **Note** that this is not the same hash as you have on the leaderboard
21 | > There's an extra "_" added to your email, so the hash is different.
22 |
23 |
24 | Then use this hash to get the URL
25 |
26 | ```python
27 | cohort = 2023
28 | course = 'ml-zoomcamp'
29 | your_id = compute_certificate_id('never.give.up@gmail.com')
30 | url = f"https://certificate.datatalks.club/{course}/{cohort}/{your_id}.pdf"
31 | print(url)
32 | ```
33 |
34 | Example: https://certificate.datatalks.club/ml-zoomcamp/2023/fe629854d45c559e9c10b3b8458ea392fdeb68a9.pdf
35 |
36 |
37 | ## Adding to LinkedIn
38 |
39 | You can add your certificate to LinkedIn:
40 |
41 | * Log in to your LinkedIn account, then go to your profile.
42 | * On the right, in the "Add profile" section dropdown, choose "Background" and then select the drop-down triangle next to "Licenses & Certifications".
43 | * In "Name", enter "Machine Learning Zoomcamp".
44 | * In "Issuing Organization", enter "DataTalksClub".
45 | * (Optional) In "Issue Date", enter the time when the certificate was created.
46 | * (Optional) Select the checkbox This certification does not expire.
47 | * Put your certificate ID.
48 | * In "Certification URL", enter the URL for your certificate.
49 |
50 | [Adapted from here](https://support.edx.org/hc/en-us/articles/206501938-How-can-I-add-my-certificate-to-my-LinkedIn-profile-)
51 |
--------------------------------------------------------------------------------
/cohorts/2021/05-deployment/homework/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM agrigorev/zoomcamp-model:3.8.12-slim
2 |
3 | RUN pip install pipenv
4 |
5 | COPY ["Pipfile", "Pipfile.lock", "./"]
6 |
7 | RUN pipenv install --system --deploy
8 |
9 | COPY ["q6_predict.py", "./"]
10 |
11 | EXPOSE 9696
12 |
13 | ENTRYPOINT ["gunicorn", "--bind=0.0.0.0:9696", "q6_predict:app"]
--------------------------------------------------------------------------------
/cohorts/2021/05-deployment/homework/Pipfile:
--------------------------------------------------------------------------------
1 | [[source]]
2 | url = "https://pypi.org/simple"
3 | verify_ssl = true
4 | name = "pypi"
5 |
6 | [packages]
7 | scikit-learn = "==1.0"
8 | flask = "*"
9 | gunicorn = "*"
10 |
11 | [dev-packages]
12 |
13 | [requires]
14 | python_version = "3.8"
15 |
--------------------------------------------------------------------------------
/cohorts/2021/05-deployment/homework/dv.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/cohorts/2021/05-deployment/homework/dv.bin
--------------------------------------------------------------------------------
/cohorts/2021/05-deployment/homework/homework.md:
--------------------------------------------------------------------------------
1 | ../homework.md
--------------------------------------------------------------------------------
/cohorts/2021/05-deployment/homework/model1.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/cohorts/2021/05-deployment/homework/model1.bin
--------------------------------------------------------------------------------
/cohorts/2021/05-deployment/homework/q3_test.py:
--------------------------------------------------------------------------------
1 | import pickle
2 |
3 | def load(filename):
4 | with open(filename, 'rb') as f_in:
5 | return pickle.load(f_in)
6 |
7 |
8 | dv = load('dv.bin')
9 | model = load('model1.bin')
10 |
11 | customer = {"contract": "two_year", "tenure": 12, "monthlycharges": 19.7}
12 |
13 | X = dv.transform([customer])
14 | y_pred = model.predict_proba(X)[0, 1]
15 |
16 | print(y_pred)
--------------------------------------------------------------------------------
/cohorts/2021/05-deployment/homework/q4_predict.py:
--------------------------------------------------------------------------------
1 | from flask import Flask
2 | from flask import request
3 | from flask import jsonify
4 |
5 | import pickle
6 |
7 | def load(filename):
8 | with open(filename, 'rb') as f_in:
9 | return pickle.load(f_in)
10 |
11 |
12 | dv = load('dv.bin')
13 | model = load('model1.bin')
14 |
15 | app = Flask('churn')
16 |
17 | @app.route('/predict', methods=['POST'])
18 | def predict():
19 | customer = request.get_json()
20 |
21 | X = dv.transform([customer])
22 | y_pred = model.predict_proba(X)[0, 1]
23 | churn = y_pred >= 0.5
24 |
25 | result = {
26 | 'churn_probability': float(y_pred),
27 | 'churn': bool(churn)
28 | }
29 |
30 | return jsonify(result)
31 |
32 |
33 | if __name__ == "__main__":
34 | app.run(debug=True, host='0.0.0.0', port=9696)
35 |
36 |
--------------------------------------------------------------------------------
/cohorts/2021/05-deployment/homework/q4_test.py:
--------------------------------------------------------------------------------
1 |
2 | import requests
3 |
4 | url = "http://localhost:9696/predict"
5 |
6 | customer = {"contract": "two_year", "tenure": 1, "monthlycharges": 10}
7 | respose = requests.post(url, json=customer).json()
8 |
9 |
10 | print(respose)
11 |
12 |
--------------------------------------------------------------------------------
/cohorts/2021/05-deployment/homework/q6_predict.py:
--------------------------------------------------------------------------------
1 | from flask import Flask
2 | from flask import request
3 | from flask import jsonify
4 |
5 | import pickle
6 |
7 | def load(filename):
8 | with open(filename, 'rb') as f_in:
9 | return pickle.load(f_in)
10 |
11 |
12 | dv = load('dv.bin')
13 | model = load('model2.bin')
14 |
15 | app = Flask('churn')
16 |
17 | @app.route('/predict', methods=['POST'])
18 | def predict():
19 | customer = request.get_json()
20 |
21 | X = dv.transform([customer])
22 | y_pred = model.predict_proba(X)[0, 1]
23 | churn = y_pred >= 0.5
24 |
25 | result = {
26 | 'churn_probability': float(y_pred),
27 | 'churn': bool(churn)
28 | }
29 |
30 | return jsonify(result)
31 |
32 |
33 | if __name__ == "__main__":
34 | app.run(debug=True, host='0.0.0.0', port=9696)
35 |
36 |
--------------------------------------------------------------------------------
/cohorts/2021/05-deployment/homework/q6_test.py:
--------------------------------------------------------------------------------
1 |
2 | import requests
3 |
4 | url = "http://localhost:9696/predict"
5 |
6 | customer = {"contract": "two_year", "tenure": 12, "monthlycharges": 10}
7 | respose = requests.post(url, json=customer).json()
8 |
9 | print(respose)
10 |
11 |
--------------------------------------------------------------------------------
/cohorts/2021/09-serverless/homework/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM agrigorev/zoomcamp-cats-dogs-lambda:v2
2 |
3 | RUN pip install keras-image-helper
4 | RUN pip install https://github.com/alexeygrigorev/tflite-aws-lambda/raw/main/tflite/tflite_runtime-2.7.0-cp38-cp38-linux_x86_64.whl
5 |
6 | COPY homework.py .
7 | ENV MODEL_NAME=cats-dogs-v2.tflite
8 |
9 | CMD [ "homework.lambda_handler" ]
--------------------------------------------------------------------------------
/cohorts/2021/09-serverless/homework/homework.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 |
4 | # import tensorflow.lite as tflite
5 | import tflite_runtime.interpreter as tflite
6 |
7 | import os
8 | import numpy as np
9 |
10 | from io import BytesIO
11 | from urllib import request
12 |
13 | from PIL import Image
14 |
15 |
16 | MODEL_NAME = os.getenv('MODEL_NAME', 'dogs-vs-cats.tflite')
17 |
18 |
19 | def download_image(url):
20 | with request.urlopen(url) as resp:
21 | buffer = resp.read()
22 | stream = BytesIO(buffer)
23 | img = Image.open(stream)
24 | return img
25 |
26 |
27 | def prepare_image(img, target_size):
28 | if img.mode != 'RGB':
29 | img = img.convert('RGB')
30 | img = img.resize(target_size, Image.NEAREST)
31 | return img
32 |
33 |
34 | def prepare_input(x):
35 | return x / 255.0
36 |
37 |
38 | interpreter = tflite.Interpreter(model_path=MODEL_NAME)
39 | interpreter.allocate_tensors()
40 |
41 | input_index = interpreter.get_input_details()[0]['index']
42 | output_index = interpreter.get_output_details()[0]['index']
43 |
44 |
45 | # 'https://upload.wikimedia.org/wikipedia/commons/9/9a/Pug_600.jpg'
46 |
47 | def predict(url):
48 | img = download_image(url)
49 | img = prepare_image(img, target_size=(150, 150))
50 |
51 | x = np.array(img, dtype='float32')
52 | X = np.array([x])
53 | X = prepare_input(X)
54 |
55 | interpreter.set_tensor(input_index, X)
56 | interpreter.invoke()
57 |
58 | preds = interpreter.get_tensor(output_index)
59 |
60 | return float(preds[0, 0])
61 |
62 |
63 | def lambda_handler(event, context):
64 | url = event['url']
65 | pred = predict(url)
66 | result = {
67 | 'prediction': pred
68 | }
69 |
70 | return result
71 |
72 |
--------------------------------------------------------------------------------
/cohorts/2021/09-serverless/homework/test.py:
--------------------------------------------------------------------------------
1 | import requests
2 |
3 | url = 'http://localhost:8080/2015-03-31/functions/function/invocations'
4 |
5 | data = {'url': 'https://upload.wikimedia.org/wikipedia/commons/1/18/Vombatus_ursinus_-Maria_Island_National_Park.jpg'}
6 |
7 | result = requests.post(url, json=data).json()
8 | print(result)
--------------------------------------------------------------------------------
/cohorts/2021/10-kubernetes/homework/deployment.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: Deployment
3 | metadata:
4 | name: churn
5 | spec:
6 | selector:
7 | matchLabels:
8 | app: churn
9 | template:
10 | metadata:
11 | labels:
12 | app: churn
13 | spec:
14 | containers:
15 | - name: churn
16 | image: churn-model:v001
17 | resources:
18 | limits:
19 | memory: "128Mi"
20 | cpu: "500m"
21 | ports:
22 | - containerPort: 9696
--------------------------------------------------------------------------------
/cohorts/2021/10-kubernetes/homework/service.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Service
3 | metadata:
4 | name: churn
5 | spec:
6 | type: LoadBalancer
7 | selector:
8 | app: churn
9 | ports:
10 | - port: 80
11 | targetPort: 9696
--------------------------------------------------------------------------------
/cohorts/2021/14-project/README.md:
--------------------------------------------------------------------------------
1 | ## 14. Third project
2 |
3 | The requirements are the same as for the [capstone project](../12-capstone/)
4 |
5 |
6 | * Submit your project [here](https://forms.gle/2V2qyDKeUhGwnfn4A)
7 | * Deadline: January 26, 23:00 CET
8 | * Evaluate your peers
9 | * Project evaluation assingment: [here](https://docs.google.com/spreadsheets/d/e/2PACX-1vSXSWfDcoeefJZbt4anSRZQVuDGzkij8eiSdWoCRD3GbKSAYj-6BSA5X9M0w5CstxtMXU2jjuTtWr_v/pubhtml)
10 | * Submit the results [here](https://forms.gle/WYAcXaawfcpGtZGD7)
11 | * Deadline: February 2, 22:00 CET
12 |
13 |
--------------------------------------------------------------------------------
/cohorts/2021/office-hours.md:
--------------------------------------------------------------------------------
1 | ## Office Hours
2 |
3 |
--------------------------------------------------------------------------------
/cohorts/2022/05-deployment/homework/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM svizor/zoomcamp-model:3.9.12-slim
2 |
3 | RUN pip install pipenv
4 |
5 | COPY ["Pipfile", "Pipfile.lock", "./"]
6 |
7 | RUN pipenv install --system --deploy
8 |
9 | COPY ["q6_predict.py", "./"]
10 |
11 | EXPOSE 9696
12 |
13 | ENTRYPOINT ["waitress-serve", "--listen=0.0.0.0:9696", "q6_predict:app"]
--------------------------------------------------------------------------------
/cohorts/2022/05-deployment/homework/Pipfile:
--------------------------------------------------------------------------------
1 | [[source]]
2 | url = "https://pypi.org/simple"
3 | verify_ssl = true
4 | name = "pypi"
5 |
6 | [packages]
7 | scikit-learn = "==1.0.2"
8 | flask = "*"
9 | waitress = "*"
10 |
11 | [dev-packages]
12 |
13 | [requires]
14 | python_version = "3.9"
15 |
--------------------------------------------------------------------------------
/cohorts/2022/05-deployment/homework/dv.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/cohorts/2022/05-deployment/homework/dv.bin
--------------------------------------------------------------------------------
/cohorts/2022/05-deployment/homework/model1.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/cohorts/2022/05-deployment/homework/model1.bin
--------------------------------------------------------------------------------
/cohorts/2022/05-deployment/homework/q3_test.py:
--------------------------------------------------------------------------------
1 | import pickle
2 |
3 |
4 | def load(filename: str):
5 | with open(filename, 'rb') as f_in:
6 | return pickle.load(f_in)
7 |
8 |
9 | dv = load('dv.bin')
10 | model = load('model1.bin')
11 |
12 | client = {"reports": 0, "share": 0.001694, "expenditure": 0.12, "owner": "yes"}
13 |
14 | X = dv.transform([client])
15 | y_pred = model.predict_proba(X)[0, 1]
16 |
17 | print(y_pred)
--------------------------------------------------------------------------------
/cohorts/2022/05-deployment/homework/q4_predict.py:
--------------------------------------------------------------------------------
1 | import pickle
2 |
3 | from flask import Flask
4 | from flask import request
5 | from flask import jsonify
6 |
7 |
8 | def load(filename: str):
9 | with open(filename, 'rb') as f_in:
10 | return pickle.load(f_in)
11 |
12 |
13 | dv = load('dv.bin')
14 | model = load('model1.bin')
15 |
16 | app = Flask('credict-card')
17 |
18 |
19 | @app.route('/predict', methods=['POST'])
20 | def predict():
21 | client = request.get_json()
22 |
23 | X = dv.transform([client])
24 | y_pred = model.predict_proba(X)[0, 1]
25 | get_card = y_pred >= 0.5
26 |
27 | result = {
28 | 'get_card_probability': float(y_pred),
29 | 'get_card': bool(get_card)
30 | }
31 |
32 | return jsonify(result)
33 |
34 |
35 | if __name__ == "__main__":
36 | app.run(debug=True, host='0.0.0.0', port=9696)
37 |
--------------------------------------------------------------------------------
/cohorts/2022/05-deployment/homework/q4_test.py:
--------------------------------------------------------------------------------
1 | import requests
2 |
3 |
4 | url = "http://localhost:9696/predict"
5 |
6 | client = {"reports": 0, "share": 0.245, "expenditure": 3.438, "owner": "yes"}
7 | response = requests.post(url, json=client).json()
8 |
9 | print(response)
10 |
--------------------------------------------------------------------------------
/cohorts/2022/05-deployment/homework/q6_predict.py:
--------------------------------------------------------------------------------
1 | import pickle
2 |
3 | from flask import Flask
4 | from flask import request
5 | from flask import jsonify
6 |
7 |
8 | def load(filename: str):
9 | with open(filename, 'rb') as f_in:
10 | return pickle.load(f_in)
11 |
12 |
13 | dv = load('dv.bin')
14 | model = load('model2.bin')
15 |
16 | app = Flask('credict-card')
17 |
18 |
19 | @app.route('/predict', methods=['POST'])
20 | def predict():
21 | client = request.get_json()
22 |
23 | X = dv.transform([client])
24 | y_pred = model.predict_proba(X)[0, 1]
25 | get_card = y_pred >= 0.5
26 |
27 | result = {
28 | 'get_card_probability': float(y_pred),
29 | 'get_card': bool(get_card)
30 | }
31 |
32 | return jsonify(result)
33 |
34 |
35 | if __name__ == "__main__":
36 | app.run(debug=True, host='0.0.0.0', port=9696)
37 |
--------------------------------------------------------------------------------
/cohorts/2022/05-deployment/homework/q6_test.py:
--------------------------------------------------------------------------------
1 | import requests
2 |
3 |
4 | url = "http://localhost:9696/predict"
5 |
6 | client = {"reports": 0, "share": 0.245, "expenditure": 3.438, "owner": "yes"}
7 | response = requests.post(url, json=client).json()
8 |
9 | print(response)
10 |
--------------------------------------------------------------------------------
/cohorts/2022/07-bento-production/locustfile.py:
--------------------------------------------------------------------------------
1 | from locust import task
2 | from locust import between
3 | from locust import HttpUser
4 |
5 | sample = [[6.4,3.5,4.5,1.2]]
6 |
7 | class MLZoomUser(HttpUser):
8 | """
9 | Usage:
10 | Start locust load testing client with:
11 |
12 | locust -H http://localhost:3000
13 |
14 | Open browser at http://0.0.0.0:8089, adjust desired number of users and spawn
15 | rate for the load test from the Web UI and start swarming.
16 | """
17 |
18 | @task
19 | def classify(self):
20 | self.client.post("/classify", json=sample)
21 |
22 | wait_time = between(0.01, 2)
23 |
--------------------------------------------------------------------------------
/cohorts/2022/07-bento-production/resources/classify-endpoint.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/cohorts/2022/07-bento-production/resources/classify-endpoint.png
--------------------------------------------------------------------------------
/cohorts/2022/07-bento-production/resources/dragon1.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/cohorts/2022/07-bento-production/resources/dragon1.jpeg
--------------------------------------------------------------------------------
/cohorts/2022/07-bento-production/resources/dragon2.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/cohorts/2022/07-bento-production/resources/dragon2.jpeg
--------------------------------------------------------------------------------
/cohorts/2022/07-bento-production/resources/dragon3.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/cohorts/2022/07-bento-production/resources/dragon3.jpeg
--------------------------------------------------------------------------------
/cohorts/2022/07-bento-production/resources/dragon4.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/cohorts/2022/07-bento-production/resources/dragon4.jpeg
--------------------------------------------------------------------------------
/cohorts/2022/09-serverless/homework/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM svizor42/zoomcamp-dino-dragon-lambda:v2
2 |
3 | RUN pip install keras-image-helper
4 | RUN pip install https://github.com/alexeygrigorev/tflite-aws-lambda/raw/main/tflite/tflite_runtime-2.7.0-cp39-cp39-linux_x86_64.whl
5 |
6 | COPY homework.py .
7 | ENV MODEL_NAME=dino-vs-dragon-v2.tflite
8 |
9 | CMD [ "homework.lambda_handler" ]
--------------------------------------------------------------------------------
/cohorts/2022/09-serverless/homework/homework.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 |
4 | # import tensorflow.lite as tflite
5 | import tflite_runtime.interpreter as tflite
6 |
7 | import os
8 | import numpy as np
9 |
10 | from io import BytesIO
11 | from urllib import request
12 |
13 | from PIL import Image
14 |
15 |
16 | MODEL_NAME = os.getenv('MODEL_NAME', 'dino-vs-dragon-v2.tflite')
17 |
18 |
19 | def download_image(url):
20 | with request.urlopen(url) as resp:
21 | buffer = resp.read()
22 | stream = BytesIO(buffer)
23 | img = Image.open(stream)
24 | return img
25 |
26 |
27 | def prepare_image(img, target_size):
28 | if img.mode != 'RGB':
29 | img = img.convert('RGB')
30 | img = img.resize(target_size, Image.NEAREST)
31 | return img
32 |
33 |
34 | def prepare_input(x):
35 | return x / 255.0
36 |
37 |
38 | interpreter = tflite.Interpreter(model_path=MODEL_NAME)
39 | interpreter.allocate_tensors()
40 |
41 | input_index = interpreter.get_input_details()[0]['index']
42 | output_index = interpreter.get_output_details()[0]['index']
43 |
44 |
45 | # 'https://upload.wikimedia.org/wikipedia/en/e/e9/GodzillaEncounterModel.jpg'
46 |
47 | def predict(url):
48 | img = download_image(url)
49 | img = prepare_image(img, target_size=(150, 150))
50 |
51 | x = np.array(img, dtype='float32')
52 | X = np.array([x])
53 | X = prepare_input(X)
54 |
55 | interpreter.set_tensor(input_index, X)
56 | interpreter.invoke()
57 |
58 | preds = interpreter.get_tensor(output_index)
59 |
60 | return float(preds[0, 0])
61 |
62 |
63 | def lambda_handler(event, context):
64 | url = event['url']
65 | pred = predict(url)
66 | result = {
67 | 'prediction': pred
68 | }
69 |
70 | return result
--------------------------------------------------------------------------------
/cohorts/2022/09-serverless/homework/test.py:
--------------------------------------------------------------------------------
1 | import requests
2 |
3 | url = 'http://localhost:8080/2015-03-31/functions/function/invocations'
4 |
5 | data = {'url': 'https://upload.wikimedia.org/wikipedia/en/e/e9/GodzillaEncounterModel.jpg'}
6 |
7 | result = requests.post(url, json=data).json()
8 | print(result)
--------------------------------------------------------------------------------
/cohorts/2022/10-kubernetes/homework/deployment.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: Deployment
3 | metadata:
4 | name: credit-card
5 | spec:
6 | selector:
7 | matchLabels:
8 | app: credit-card
9 | replicas: 1
10 | template:
11 | metadata:
12 | labels:
13 | app: credit-card
14 | spec:
15 | containers:
16 | - name: credit-card
17 | image: zoomcamp-model:v001
18 | resources:
19 | requests:
20 | memory: "64Mi"
21 | cpu: "100m"
22 | limits:
23 | memory: "128Mi"
24 | cpu: "200m"
25 | ports:
26 | - containerPort: 9696
--------------------------------------------------------------------------------
/cohorts/2022/10-kubernetes/homework/hpa.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: autoscaling/v1
2 | kind: HorizontalPodAutoscaler
3 | metadata:
4 | name: credit-card-v1
5 | spec:
6 | scaleTargetRef:
7 | apiVersion: apps/v1
8 | kind: Deployment
9 | name: credit-card
10 | minReplicas: 1
11 | maxReplicas: 3
12 | targetCPUUtilizationPercentage: 20
--------------------------------------------------------------------------------
/cohorts/2022/10-kubernetes/homework/service.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Service
3 | metadata:
4 | name: credit-card
5 | spec:
6 | type: LoadBalancer
7 | selector:
8 | app: credit-card
9 | ports:
10 | - port: 80
11 | targetPort: 9696
--------------------------------------------------------------------------------
/cohorts/2022/10-kubernetes/homework/test.py:
--------------------------------------------------------------------------------
1 | import requests
2 | from time import sleep
3 |
4 |
5 | url = "http://localhost:9696/predict"
6 | client = {"reports": 0, "share": 0.245, "expenditure": 3.438, "owner": "yes"}
7 |
8 | while True:
9 | sleep(0.1)
10 | response = requests.post(url, json=client).json()
11 | print(response)
12 |
--------------------------------------------------------------------------------
/cohorts/2022/article.md:
--------------------------------------------------------------------------------
1 | ## Article (optional)
2 |
3 | The best way to learn about something - to teach it.
4 |
5 | In this part of the course, we'll explore some topics that weren't
6 | covered in the course.
7 |
8 | You'll need to learn about this topic and then write about what
9 | you learned in an article.
10 |
11 | ## Submitting the results
12 |
13 | * Submit your article to [`#course-ml-zoomcamp-articles`](https://app.slack.com/client/T01ATQK62F8/C02QXBFS1PU) -
14 | just share the link to your article there
15 | * Submit the same link to [this form](https://forms.gle/6KKQg5EZPjtBpbb29) so we could link it to your message from the channel
16 |
17 | ## Evaluating
18 |
19 | We'll use voting for scoring your articles.
20 |
21 | * Check the articles in the [`#course-ml-zoomcamp-articles`](https://app.slack.com/client/T01ATQK62F8/C02QXBFS1PU) channel and put a :+1: reaction to articles that you liked
22 | * The top voted articles will get 20 points
23 |
24 |
25 | ## Deadline
26 |
27 | The deadline for finishing the article is 30 January, 22:00 CET.
28 |
29 |
30 | ## Articles from 2022
31 |
32 | * https://medium.com/@sotoblanco263542/track-your-machine-learning-experiments-with-w-b-d5f9431e6bc2
33 | * https://blog.aaishamuhammad.co.za/posts/onnx_machine_learning/
34 | * https://github.com/ElenaNKn/scaling_methods
35 | * https://medium.com/@alexandervalverdeguillen/math-optimization-methods-for-machine-learning-8837eba9e3fe
36 | * https://rzabolotin.hashnode.dev/deploying-ml-model-via-telegram-bot
37 | * https://github.com/bsenst/mlbookcamp-2022/blob/main/social-media-analysis-ml-zoomcamp.ipynb
38 | * https://github.com/mary435/Telegram.git
39 |
40 |
41 |
42 | ### Past articles
43 |
44 | See examples of articles from the 2021 cohort [here](../../article/README.md)
45 |
--------------------------------------------------------------------------------
/cohorts/2023/05-deployment/homework/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM svizor/zoomcamp-model:3.10.12-slim
2 |
3 | RUN pip install pipenv
4 | COPY ["Pipfile", "Pipfile.lock", "./"]
5 | RUN pipenv install --system --deploy
6 |
7 | COPY ["q6_predict.py", "./"]
8 | EXPOSE 9696
9 | ENTRYPOINT ["waitress-serve", "--listen=0.0.0.0:9696", "q6_predict:app"]
--------------------------------------------------------------------------------
/cohorts/2023/05-deployment/homework/Pipfile:
--------------------------------------------------------------------------------
1 | [[source]]
2 | url = "https://pypi.org/simple"
3 | verify_ssl = true
4 | name = "pypi"
5 |
6 | [packages]
7 | scikit-learn = "==1.3.1"
8 | flask = "*"
9 | waitress = "*"
10 |
11 | [dev-packages]
12 |
13 | [requires]
14 | python_version = "3.10"
15 |
--------------------------------------------------------------------------------
/cohorts/2023/05-deployment/homework/dv.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/cohorts/2023/05-deployment/homework/dv.bin
--------------------------------------------------------------------------------
/cohorts/2023/05-deployment/homework/model1.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/cohorts/2023/05-deployment/homework/model1.bin
--------------------------------------------------------------------------------
/cohorts/2023/05-deployment/homework/q3_test.py:
--------------------------------------------------------------------------------
1 | import pickle
2 |
3 |
4 | def load(filename: str):
5 | with open(filename, 'rb') as f_in:
6 | return pickle.load(f_in)
7 |
8 |
9 | dv = load('dv.bin')
10 | model = load('model1.bin')
11 |
12 | client = {"job": "retired", "duration": 445, "poutcome": "success"}
13 |
14 | X = dv.transform([client])
15 | y_pred = model.predict_proba(X)[0, 1]
16 |
17 | print(y_pred)
18 |
--------------------------------------------------------------------------------
/cohorts/2023/05-deployment/homework/q4_predict.py:
--------------------------------------------------------------------------------
1 | import pickle
2 |
3 | from flask import Flask
4 | from flask import request
5 | from flask import jsonify
6 |
7 |
8 | def load(filename: str):
9 | with open(filename, 'rb') as f_in:
10 | return pickle.load(f_in)
11 |
12 |
13 | dv = load('dv.bin')
14 | model = load('model1.bin')
15 |
16 | app = Flask('get-credit')
17 |
18 |
19 | @app.route('/predict', methods=['POST'])
20 | def predict():
21 | client = request.get_json()
22 |
23 | X = dv.transform([client])
24 | y_pred = model.predict_proba(X)[0, 1]
25 | get_credit = y_pred >= 0.5
26 |
27 | result = {
28 | 'get_credit_probability': float(y_pred),
29 | 'get_credit': bool(get_credit)
30 | }
31 |
32 | return jsonify(result)
33 |
34 |
35 | if __name__ == "__main__":
36 | app.run(debug=True, host='0.0.0.0', port=9696)
37 |
--------------------------------------------------------------------------------
/cohorts/2023/05-deployment/homework/q4_test.py:
--------------------------------------------------------------------------------
1 | import requests
2 |
3 |
4 | url = "http://localhost:9696/predict"
5 |
6 | client = {"job": "unknown", "duration": 270, "poutcome": "failure"}
7 | response = requests.post(url, json=client).json()
8 |
9 | print(response)
10 |
--------------------------------------------------------------------------------
/cohorts/2023/05-deployment/homework/q6_predict.py:
--------------------------------------------------------------------------------
1 | import pickle
2 |
3 | from flask import Flask
4 | from flask import request
5 | from flask import jsonify
6 |
7 |
8 | def load(filename: str):
9 | with open(filename, 'rb') as f_in:
10 | return pickle.load(f_in)
11 |
12 |
13 | dv = load('dv.bin')
14 | model = load('model2.bin')
15 |
16 | app = Flask('get-credit')
17 |
18 |
19 | @app.route('/predict', methods=['POST'])
20 | def predict():
21 | client = request.get_json()
22 |
23 | X = dv.transform([client])
24 | y_pred = model.predict_proba(X)[0, 1]
25 | get_credit = y_pred >= 0.5
26 |
27 | result = {
28 | 'get_credit_probability': float(y_pred),
29 | 'get_credit': bool(get_credit)
30 | }
31 |
32 | return jsonify(result)
33 |
34 |
35 | if __name__ == "__main__":
36 | app.run(debug=True, host='0.0.0.0', port=9696)
37 |
--------------------------------------------------------------------------------
/cohorts/2023/05-deployment/homework/q6_test.py:
--------------------------------------------------------------------------------
1 | import requests
2 |
3 |
4 | url = "http://localhost:9696/predict"
5 |
6 | client = {"job": "retired", "duration": 445, "poutcome": "success"}
7 | response = requests.post(url, json=client).json()
8 |
9 | print(response)
10 |
--------------------------------------------------------------------------------
/cohorts/2023/article.md:
--------------------------------------------------------------------------------
1 | ## Article (optional)
2 |
3 | The best way to learn about something - to teach it.
4 |
5 | In this part of the course, we'll explore some topics that weren't
6 | covered in the course.
7 |
8 | You'll need to learn about this topic and then write about what
9 | you learned in an article.
10 |
11 |
12 | [More information about articles](../../article/README.md)
13 |
14 | ## Submitting the results
15 |
16 | * Submit your article to [`#course-ml-zoomcamp-articles`](https://app.slack.com/client/T01ATQK62F8/C02QXBFS1PU) -
17 | just share the link to your article there
18 | * Submit the same link to [this form](https://forms.gle/Q5v2K19QitvzegoE9) so we could link it to your message from the channel
19 |
20 | ## Evaluating
21 |
22 | We'll use voting for scoring your articles.
23 |
24 | * Check the articles in the [`#course-ml-zoomcamp-articles`](https://app.slack.com/client/T01ATQK62F8/C02QXBFS1PU) channel and put a :+1: reaction to articles that you liked
25 | * The top voted articles will get 20 points
26 |
27 |
28 | ## Deadline
29 |
30 | The deadline for finishing the article is 31 January, 23:00 CET.
31 |
32 |
33 | ## Articles from 2023
34 |
35 | * [A Learning Based Approach For Predicting Heart Disease by Oscar Garcia](https://www.ozkary.dev/ai-engineering-predicting-heart-disease-machine-learning/)
36 | * [Understanding Convolutional Layers in a Convolutional Neural Network](https://marcosbenicio.github.io/2023/11/27/cnn.html) by Marcos Benício
37 | * [From Pixels to Predictions: A Journey into Image Segmentation](https://knowmledge.com/2024/01/26/from-pixels-to-predictions-a-journey-into-image-segmentation/) by Peter Ernicke
38 | * Add your article above this line
39 |
40 |
41 | ### Past articles
42 |
43 | See examples of articles from the previous cohorts [here](../../article/README.md)
44 |
--------------------------------------------------------------------------------
/cohorts/2024/05-deployment/homework/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM svizor/zoomcamp-model:3.11.5-slim
2 |
3 | RUN pip install pipenv
4 | COPY ["Pipfile", "Pipfile.lock", "./"]
5 | RUN pipenv install --system --deploy
6 |
7 | COPY ["q6_predict.py", "./"]
8 | EXPOSE 9696
9 | ENTRYPOINT ["waitress-serve", "--listen=0.0.0.0:9696", "q6_predict:app"]
--------------------------------------------------------------------------------
/cohorts/2024/05-deployment/homework/Pipfile:
--------------------------------------------------------------------------------
1 | [[source]]
2 | url = "https://pypi.org/simple"
3 | verify_ssl = true
4 | name = "pypi"
5 |
6 | [packages]
7 | scikit-learn = "==1.5.2"
8 | flask = "*"
9 | waitress = "*"
10 |
11 | [dev-packages]
12 |
13 | [requires]
14 | python_version = "3.11"
15 |
--------------------------------------------------------------------------------
/cohorts/2024/05-deployment/homework/dv.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/cohorts/2024/05-deployment/homework/dv.bin
--------------------------------------------------------------------------------
/cohorts/2024/05-deployment/homework/model1.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/cohorts/2024/05-deployment/homework/model1.bin
--------------------------------------------------------------------------------
/cohorts/2024/05-deployment/homework/q3_test.py:
--------------------------------------------------------------------------------
1 | import pickle
2 |
3 |
4 | def load(filename: str):
5 | with open(filename, 'rb') as f_in:
6 | return pickle.load(f_in)
7 |
8 |
9 | dv = load('dv.bin')
10 | model = load('model1.bin')
11 |
12 | client = {"job": "management", "duration": 400, "poutcome": "success"}
13 |
14 | X = dv.transform([client])
15 | y_pred = model.predict_proba(X)[0, 1]
16 |
17 | print(y_pred)
18 |
--------------------------------------------------------------------------------
/cohorts/2024/05-deployment/homework/q4_predict.py:
--------------------------------------------------------------------------------
1 | import pickle
2 |
3 | from flask import Flask
4 | from flask import request
5 | from flask import jsonify
6 |
7 |
8 | def load(filename: str):
9 | with open(filename, 'rb') as f_in:
10 | return pickle.load(f_in)
11 |
12 |
13 | dv = load('dv.bin')
14 | model = load('model1.bin')
15 |
16 | app = Flask('has-subscribed')
17 |
18 |
19 | @app.route('/predict', methods=['POST'])
20 | def predict():
21 | client = request.get_json()
22 |
23 | X = dv.transform([client])
24 | y_pred = model.predict_proba(X)[0, 1]
25 | has_subscribed = y_pred >= 0.5
26 |
27 | result = {
28 | 'has_subscribed_probability': float(y_pred),
29 | 'has_subscribed': bool(has_subscribed)
30 | }
31 |
32 | return jsonify(result)
33 |
34 |
35 | if __name__ == "__main__":
36 | app.run(debug=True, host='0.0.0.0', port=9696)
37 |
--------------------------------------------------------------------------------
/cohorts/2024/05-deployment/homework/q4_test.py:
--------------------------------------------------------------------------------
1 | import requests
2 |
3 |
4 | url = "http://localhost:9696/predict"
5 |
6 | client = {"job": "student", "duration": 280, "poutcome": "failure"}
7 | response = requests.post(url, json=client).json()
8 |
9 | print(response)
10 |
--------------------------------------------------------------------------------
/cohorts/2024/05-deployment/homework/q6_predict.py:
--------------------------------------------------------------------------------
1 | import pickle
2 |
3 | from flask import Flask
4 | from flask import request
5 | from flask import jsonify
6 |
7 |
8 | def load(filename: str):
9 | with open(filename, 'rb') as f_in:
10 | return pickle.load(f_in)
11 |
12 |
13 | dv = load('dv.bin')
14 | model = load('model2.bin')
15 |
16 | app = Flask('has-subscribed')
17 |
18 |
19 | @app.route('/predict', methods=['POST'])
20 | def predict():
21 | client = request.get_json()
22 |
23 | X = dv.transform([client])
24 | y_pred = model.predict_proba(X)[0, 1]
25 | has_subscribed = y_pred >= 0.5
26 |
27 | result = {
28 | 'has_subscribed_probability': float(y_pred),
29 | 'has_subscribed': bool(has_subscribed)
30 | }
31 |
32 | return jsonify(result)
33 |
34 |
35 | if __name__ == "__main__":
36 | app.run(debug=True, host='0.0.0.0', port=9696)
37 |
--------------------------------------------------------------------------------
/cohorts/2024/05-deployment/homework/q6_test.py:
--------------------------------------------------------------------------------
1 | import requests
2 |
3 |
4 | url = "http://localhost:9696/predict"
5 |
6 | client = {"job": "management", "duration": 400, "poutcome": "success"}
7 | response = requests.post(url, json=client).json()
8 |
9 | print(response)
10 |
--------------------------------------------------------------------------------
/cohorts/2024/09-serverless/homework/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM agrigorev/model-2024-hairstyle:v3
2 |
3 | RUN pip install keras-image-helper
4 | RUN pip install --no-deps https://github.com/alexeygrigorev/tflite-aws-lambda/raw/main/tflite/tflite_runtime-2.14.0-cp310-cp310-linux_x86_64.whl
5 |
6 | COPY homework.py .
7 | ENV MODEL_NAME=model_2024_hairstyle_v2.tflite
8 |
9 | CMD [ "homework.lambda_handler" ]
--------------------------------------------------------------------------------
/cohorts/2024/09-serverless/homework/entrypoint.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | if [ -z "${AWS_LAMBDA_RUNTIME_API}" ]; then
3 | exec /usr/local/bin/aws-lambda-rie /usr/bin/python3.12 -m awslambdaric "$@"
4 | else
5 | exec /usr/bin/python3.12 -m awslambdaric "$@"
6 | fi
--------------------------------------------------------------------------------
/cohorts/2024/09-serverless/homework/homework.dockerfile:
--------------------------------------------------------------------------------
1 | FROM public.ecr.aws/lambda/python:3.10
2 |
3 | COPY model_2024_hairstyle_v2.tflite .
4 |
5 | RUN pip install numpy==1.23.1
6 |
--------------------------------------------------------------------------------
/cohorts/2024/09-serverless/homework/homework.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 |
4 | # import tensorflow.lite as tflite
5 | import tflite_runtime.interpreter as tflite
6 |
7 | import os
8 | import numpy as np
9 |
10 | from io import BytesIO
11 | from urllib import request
12 |
13 | from PIL import Image
14 |
15 |
16 | MODEL_NAME = os.getenv('MODEL_NAME', 'model_2024_hairstyle_v2.tflite')
17 |
18 |
19 | def download_image(url):
20 | with request.urlopen(url) as resp:
21 | buffer = resp.read()
22 | stream = BytesIO(buffer)
23 | img = Image.open(stream)
24 | return img
25 |
26 |
27 | def prepare_image(img, target_size):
28 | if img.mode != 'RGB':
29 | img = img.convert('RGB')
30 | img = img.resize(target_size, Image.NEAREST)
31 | return img
32 |
33 |
34 | def prepare_input(x):
35 | return x / 255.0
36 |
37 |
38 | interpreter = tflite.Interpreter(model_path=MODEL_NAME)
39 | interpreter.allocate_tensors()
40 |
41 | input_index = interpreter.get_input_details()[0]['index']
42 | output_index = interpreter.get_output_details()[0]['index']
43 |
44 |
45 | # https://habrastorage.org/webt/yf/_d/ok/yf_dokzqy3vcritme8ggnzqlvwa.jpeg
46 |
47 | def predict(url):
48 | img = download_image(url)
49 | img = prepare_image(img, target_size=(200, 200))
50 |
51 | x = np.array(img, dtype='float32')
52 | X = np.array([x])
53 | X = prepare_input(X)
54 |
55 | interpreter.set_tensor(input_index, X)
56 | interpreter.invoke()
57 |
58 | preds = interpreter.get_tensor(output_index)
59 |
60 | return float(preds[0, 0])
61 |
62 |
63 | def lambda_handler(event, context):
64 | url = event['url']
65 | pred = predict(url)
66 | result = {
67 | 'prediction': pred
68 | }
69 |
70 | return result
--------------------------------------------------------------------------------
/cohorts/2024/09-serverless/homework/test.py:
--------------------------------------------------------------------------------
1 | import requests
2 |
3 | url = 'http://localhost:8080/2015-03-31/functions/function/invocations'
4 |
5 | data = {'url': 'https://habrastorage.org/webt/yf/_d/ok/yf_dokzqy3vcritme8ggnzqlvwa.jpeg'}
6 |
7 | result = requests.post(url, json=data).json()
8 | print(result)
--------------------------------------------------------------------------------
/cohorts/2024/10-kubernetes/homework/deployment.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: Deployment
3 | metadata:
4 | name: subscription
5 | spec:
6 | selector:
7 | matchLabels:
8 | app: subscription
9 | replicas: 1
10 | template:
11 | metadata:
12 | labels:
13 | app: subscription
14 | spec:
15 | containers:
16 | - name: subscription
17 | image: svizor/zoomcamp-model:3.11.5-hw10
18 | resources:
19 | requests:
20 | memory: "64Mi"
21 | cpu: "100m"
22 | limits:
23 | memory: "128Mi"
24 | cpu: "200m"
25 | ports:
26 | - containerPort: 9696
--------------------------------------------------------------------------------
/cohorts/2024/10-kubernetes/homework/hpa.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: autoscaling/v1
2 | kind: HorizontalPodAutoscaler
3 | metadata:
4 | name: subscription-v1
5 | spec:
6 | scaleTargetRef:
7 | apiVersion: apps/v1
8 | kind: Deployment
9 | name: subscription
10 | minReplicas: 1
11 | maxReplicas: 3
12 | targetCPUUtilizationPercentage: 20
--------------------------------------------------------------------------------
/cohorts/2024/10-kubernetes/homework/service.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Service
3 | metadata:
4 | name: subscription
5 | spec:
6 | type: LoadBalancer
7 | selector:
8 | app: subscription
9 | ports:
10 | - port: 80
11 | targetPort: 9696
--------------------------------------------------------------------------------
/cohorts/2024/10-kubernetes/homework/test.py:
--------------------------------------------------------------------------------
1 | import requests
2 | from time import sleep
3 |
4 |
5 | url = "http://localhost:9696/predict"
6 | client = {"job": "management", "duration": 400, "poutcome": "success"}
7 |
8 | while True:
9 | sleep(0.1)
10 | response = requests.post(url, json=client).json()
11 | print(response)
12 |
--------------------------------------------------------------------------------
/cohorts/2024/article.md:
--------------------------------------------------------------------------------
1 | ## Article (optional)
2 |
3 | The best way to learn about something - to teach it.
4 |
5 | In this part of the course, we'll explore some topics that weren't
6 | covered in the course.
7 |
8 | You'll need to learn about this topic and then write about what
9 | you learned in an article.
10 |
11 |
12 | [More information about articles](../../article/README.md)
13 |
14 | ## Submitting the results
15 |
16 | * Submit your article to [`#course-ml-zoomcamp-articles`](https://app.slack.com/client/T01ATQK62F8/C02QXBFS1PU) -
17 | just share the link to your article there
18 | * Submit the same link to [this form](https://courses.datatalks.club/ml-zoomcamp-2024/homework/article) so we could link it to your message from the channel
19 |
20 | ## Evaluating
21 |
22 | We'll use voting for scoring your articles.
23 |
24 | * Check the articles in the [`#course-ml-zoomcamp-articles`](https://app.slack.com/client/T01ATQK62F8/C02QXBFS1PU) channel and put a :+1: reaction to articles that you liked
25 | * The top voted articles will get 20 points
26 |
27 |
28 | ## Deadline
29 |
30 | The deadline for finishing the article is 31 January
31 |
32 |
33 | ## Articles from 2024
34 |
35 |
36 | * [You will like my offer](https://markogolovko.com/blog/you-will-like-my-offer/) by Marko Golovko
37 | * [Natural Language Processing using spaCy, TensorFlow and BERT model architecture](https://volcano-camp-325.notion.site/Natural-Language-Processing-using-spaCy-TensorFlow-and-BERT-model-architecture-1895067176b380d09484d4b0338b0c5e?pvs=4) by Alexander Daniel Rios
38 | * [Setting up the environments for ML Zoomcamp 2024 - macOS](https://medium.com/@till.meineke/setting-up-the-environments-for-ml-zoomcamp-2024-eceb6e42e36e) by Till Meineke
39 |
40 |
41 | ### Past articles
42 |
43 | See examples of articles from the previous cohorts [here](../../article/README.md)
44 |
--------------------------------------------------------------------------------
/cohorts/2024/projects.md:
--------------------------------------------------------------------------------
1 | # Projects
2 |
3 | See more information about projects [here](../../projects/)
4 |
5 | ## Evaluation
6 |
7 | Evaluation via [course management system](https://courses.datatalks.club/ml-zoomcamp-2024/)
8 |
9 | * [Evaluation criteria](https://docs.google.com/spreadsheets/d/e/2PACX-1vQCwqAtkjl07MTW-SxWUK9GUvMQ3Pv_fF8UadcuIYLgHa0PlNu9BRWtfLgivI8xSCncQs82HDwGXSm3/pubhtml)
10 | * [Video with instructions](https://www.loom.com/share/4f5c155c550e48ddb54b71ba76516b04)
11 |
12 |
13 | ## Midterm Project
14 |
15 | - Project due date: 26 November 2024, 00:00 CET (Berlin time)
16 | - Submit your project [here](https://courses.datatalks.club/ml-zoomcamp-2024/project/midterm)
17 | - Evaluation due date: 03 December 2024, 00:00 CET (Berlin time)
18 | - [Projects of Midterm Project](https://courses.datatalks.club/ml-zoomcamp-2024/project/midterm/list)
19 |
20 |
21 | ## Capstone 1
22 |
23 | - Project due date: 14 January 2025, 00:00 CET (Berlin time)
24 | - Submit your project [here](https://courses.datatalks.club/ml-zoomcamp-2024/project/capstone1)
25 | - Evaluation due date: 21 January 2025, 00:00 CET (Berlin time)
26 | - [Projects of Capstone 1](https://courses.datatalks.club/ml-zoomcamp-2024/project/capstone1/list)
27 |
28 | ## Capstone 2
29 |
30 | - Project due date: 28 January 2025, 00:00 CET (Berlin time)
31 | - Submit your project [here](https://courses.datatalks.club/ml-zoomcamp-2024/project/capstone2)
32 | - Evaluation due date: 04 February 2025, 00:00 CET (Berlin time)
33 | - [Projects of Capstone 2](https://courses.datatalks.club/ml-zoomcamp-2024/project/capstone2/list)
34 |
--------------------------------------------------------------------------------
/images/bentoml.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/images/bentoml.png
--------------------------------------------------------------------------------
/images/play.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/images/play.png
--------------------------------------------------------------------------------
/images/zoomcamp.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DataTalksClub/machine-learning-zoomcamp/9c89731ffdc0649af4e49ad2a70b4b508ca730df/images/zoomcamp.jpg
--------------------------------------------------------------------------------
/learning-in-public.md:
--------------------------------------------------------------------------------
1 | # Learning in public
2 |
3 | Most people learn in private: they consume content but don't tell
4 | anyone about it. There's nothing wrong with it.
5 |
6 | But we want to encourage you to document your progress and
7 | share it publicly on social media.
8 |
9 | It helps you get noticed and will lead to:
10 |
11 | * Expanding your network: meeting new people and making new friends
12 | * Being invited to meetups, conferences and podcasts
13 | * Landing a job or getting clients
14 | * Many other good things
15 |
16 | Here's a more compresensive reading on why you want to do it: https://github.com/readme/guides/publishing-your-work
17 |
18 |
19 | ## Learning in Public for Zoomcamps
20 |
21 | When you submit your homework or project, you can also submit
22 | learning in public posts:
23 |
24 |
25 |
26 | You can watch this video to see how your learning in public posts may look like:
27 |
28 |
29 |
30 |
31 |
32 |
33 | Send a PR if you want to suggest improvements for this document
34 |
--------------------------------------------------------------------------------