├── 06-best-practices ├── code │ ├── tests │ │ ├── __init__.py │ │ ├── data.b64 │ │ └── model_test.py │ ├── .gitignore │ ├── scripts │ │ ├── publish.sh │ │ ├── test_cloud_e2e.sh │ │ └── deploy_manual.sh │ ├── infrastructure │ │ ├── modules │ │ │ ├── s3 │ │ │ │ ├── variables.tf │ │ │ │ └── main.tf │ │ │ ├── kinesis │ │ │ │ ├── main.tf │ │ │ │ └── variables.tf │ │ │ ├── ecr │ │ │ │ ├── variables.tf │ │ │ │ └── main.tf │ │ │ └── lambda │ │ │ │ ├── variables.tf │ │ │ │ ├── main.tf │ │ │ │ └── iam.tf │ │ ├── vars │ │ │ ├── stg.tfvars │ │ │ └── prod.tfvars │ │ ├── variables.tf │ │ └── main.tf │ ├── integraton-test │ │ ├── model │ │ │ ├── requirements.txt │ │ │ ├── model.pkl │ │ │ ├── python_env.yaml │ │ │ ├── conda.yaml │ │ │ └── MLmodel │ │ ├── docker-compose.yaml │ │ ├── test_docker.py │ │ ├── event.json │ │ ├── run.sh │ │ └── test_kinesis.py │ ├── .vscode │ │ └── settings.json │ ├── Dockerfile │ ├── Pipfile │ ├── plan.md │ ├── pyproject.toml │ ├── lambda_function.py │ ├── Makefile │ ├── .pre-commit-config.yaml │ ├── model.py │ └── README.md ├── homework_solution │ ├── tests │ │ ├── __init__.py │ │ └── test_batch.py │ ├── model.bin │ ├── docker-compose.yaml │ ├── Pipfile │ ├── Dockerfile │ ├── integration_test.sh │ ├── integration_test.py │ └── batch.py ├── ci_cd_zoomcamp.png ├── homework │ ├── model.bin │ ├── Pipfile │ ├── Dockerfile │ └── batch.py ├── AWS-stream-pipeline.png ├── images │ ├── thumbnail-6-1.jpg │ ├── thumbnail-6-2.jpg │ ├── thumbnail-6-3.jpg │ ├── thumbnail-6-4.jpg │ ├── thumbnail-6-5.jpg │ └── thumbnail-6-6.jpg ├── meta.json └── docs.md ├── 05-monitoring ├── evidently_service │ ├── datasets │ │ └── do_not_delete │ ├── requirements.txt │ ├── Dockerfile │ ├── config │ │ ├── grafana_datasources.yaml │ │ ├── grafana_dashboards.yaml │ │ └── prometheus.yml │ └── config.yaml ├── images │ ├── thumbnail-5-01.jpg │ ├── thumbnail-5-02.jpg │ ├── thumbnail-5-03.jpg │ ├── thumbnail-5-04.jpg │ └── thumbnail-5-05.jpg ├── prediction_service │ ├── lin_reg.bin │ ├── requirements.txt │ ├── Dockerfile │ └── app.py ├── homework │ ├── prediction_service │ │ ├── lin_reg.bin │ │ ├── lin_reg_V2.bin │ │ ├── Pipfile │ │ ├── Dockerfile │ │ └── app.py │ ├── requirements.txt │ ├── prefect-monitoring │ │ ├── clean_mongo.py │ │ ├── Pipfile │ │ ├── prepare_reference_data.py │ │ ├── monitor_profile.ipynb │ │ ├── send_data.py │ │ ├── monitor_profile_solution.ipynb │ │ ├── prefect_monitoring.py │ │ └── prefect_monitoring_solution.py │ ├── test.py │ ├── prepare.py │ ├── docker-compose-homework.yml │ ├── docker-compose-homework-solution.yml │ └── model_training.py ├── requirements.txt ├── test.py ├── prepare.py ├── send_data.py ├── meta.json ├── docker-compose.yml ├── prefect_example.py └── README.md ├── images ├── play.png ├── banner.png ├── prefect.png ├── mlops-world.png └── IMG_20230323_134059_927.png ├── 04-deployment ├── homework │ ├── Dockerfile │ ├── model.bin │ ├── Pipfile │ ├── homework.dockerfile │ ├── batch.py │ └── starter.ipynb ├── web-service │ ├── lin_reg.bin │ ├── test.py │ ├── Pipfile │ ├── Dockerfile │ ├── README.md │ └── predict.py ├── images │ ├── thumbnail-4-01.jpg │ ├── thumbnail-4-02.jpg │ ├── thumbnail-4-03.jpg │ ├── thumbnail-4-04.jpg │ ├── thumbnail-4-05.jpg │ └── thumbnail-4-06.jpg ├── web-service-mlflow │ ├── dict_vectorizer.bin │ ├── test.py │ ├── Pipfile │ ├── README.md │ └── predict.py ├── batch │ ├── README.md │ ├── Pipfile │ ├── score_deploy.py │ ├── score_backfill.py │ └── score.py ├── streaming │ ├── Pipfile │ ├── Dockerfile │ ├── test.py │ ├── test_docker.py │ └── lambda_function.py ├── meta.json └── README.md ├── 03-orchestration ├── .gitignore ├── requirements.txt ├── images │ ├── thumbnail-3-01.jpg │ ├── thumbnail-3-02.jpg │ ├── thumbnail-3-03.jpg │ ├── thumbnail-3-04.jpg │ ├── thumbnail-3-05.jpg │ └── thumbnail-3-06.jpg ├── work-queue.py ├── windows.md ├── meta.json ├── homework.py ├── homework_solution.py └── README.md ├── 02-experiment-tracking ├── requirements.txt ├── images │ ├── ec2_os.png │ ├── key_pair.png │ ├── s3_bucket.png │ ├── db_password.png │ ├── db_settings.png │ ├── postgresql.png │ ├── security_group.png │ ├── thumbnail-2-01.jpg │ ├── thumbnail-2-02.jpg │ ├── thumbnail-2-03.jpg │ ├── thumbnail-2-04.jpg │ ├── thumbnail-2-05.jpg │ ├── thumbnail-2-06.jpg │ ├── thumbnail-2-07.jpg │ ├── db_configuration.png │ ├── select_key_pair.png │ ├── ec2_instance_type.png │ └── postgresql_inbound_rule.png ├── homework │ ├── train.py │ ├── hpo.py │ ├── preprocess_data.py │ └── register_model.py ├── meta.json ├── README.md ├── running-mlflow-examples │ ├── scenario-1.ipynb │ ├── scenario-2.ipynb │ └── scenario-3.ipynb └── mlflow_on_aws.md ├── 01-intro ├── images │ ├── thumbnail-1-01.jpg │ ├── thumbnail-1-02.jpg │ ├── thumbnail-1-03.jpg │ ├── thumbnail-1-04.jpg │ └── thumbnail-1-05.jpg ├── meta.json └── homework.md ├── .gitignore ├── after-sign-up.md ├── asking-questions.md ├── certificate.md └── .github └── workflows ├── ci-tests.yml └── cd-deploy.yml /06-best-practices/code/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /06-best-practices/code/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | -------------------------------------------------------------------------------- /06-best-practices/homework_solution/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /05-monitoring/evidently_service/datasets/do_not_delete: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /images/play.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/images/play.png -------------------------------------------------------------------------------- /images/banner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/images/banner.png -------------------------------------------------------------------------------- /images/prefect.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/images/prefect.png -------------------------------------------------------------------------------- /06-best-practices/code/scripts/publish.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | echo "publishing image ${LOCAL_IMAGE_NAME} to ECR..." -------------------------------------------------------------------------------- /04-deployment/homework/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.9.7-slim 2 | 3 | WORKDIR /app 4 | COPY [ "model2.bin", "model.bin" ] 5 | -------------------------------------------------------------------------------- /images/mlops-world.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/images/mlops-world.png -------------------------------------------------------------------------------- /03-orchestration/.gitignore: -------------------------------------------------------------------------------- 1 | data/ 2 | .ipynb_checkpoints 3 | models/* 4 | mlruns/* 5 | .vscode/ 6 | ./DS_Store 7 | *.db 8 | *.DS_Store -------------------------------------------------------------------------------- /06-best-practices/code/infrastructure/modules/s3/variables.tf: -------------------------------------------------------------------------------- 1 | variable "bucket_name" { 2 | description = "Name of the bucket" 3 | } 4 | -------------------------------------------------------------------------------- /02-experiment-tracking/requirements.txt: -------------------------------------------------------------------------------- 1 | mlflow 2 | jupyter 3 | scikit-learn 4 | pandas 5 | seaborn 6 | hyperopt 7 | xgboost 8 | fastparquet 9 | boto3 -------------------------------------------------------------------------------- /04-deployment/homework/model.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/04-deployment/homework/model.bin -------------------------------------------------------------------------------- /01-intro/images/thumbnail-1-01.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/01-intro/images/thumbnail-1-01.jpg -------------------------------------------------------------------------------- /01-intro/images/thumbnail-1-02.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/01-intro/images/thumbnail-1-02.jpg -------------------------------------------------------------------------------- /01-intro/images/thumbnail-1-03.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/01-intro/images/thumbnail-1-03.jpg -------------------------------------------------------------------------------- /01-intro/images/thumbnail-1-04.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/01-intro/images/thumbnail-1-04.jpg -------------------------------------------------------------------------------- /01-intro/images/thumbnail-1-05.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/01-intro/images/thumbnail-1-05.jpg -------------------------------------------------------------------------------- /03-orchestration/requirements.txt: -------------------------------------------------------------------------------- 1 | mlflow 2 | jupyter 3 | scikit-learn 4 | pandas 5 | seaborn 6 | hyperopt 7 | xgboost 8 | fastparquet 9 | prefect==2.3.1 -------------------------------------------------------------------------------- /images/IMG_20230323_134059_927.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/images/IMG_20230323_134059_927.png -------------------------------------------------------------------------------- /04-deployment/web-service/lin_reg.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/04-deployment/web-service/lin_reg.bin -------------------------------------------------------------------------------- /06-best-practices/ci_cd_zoomcamp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/06-best-practices/ci_cd_zoomcamp.png -------------------------------------------------------------------------------- /06-best-practices/homework/model.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/06-best-practices/homework/model.bin -------------------------------------------------------------------------------- /02-experiment-tracking/images/ec2_os.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/02-experiment-tracking/images/ec2_os.png -------------------------------------------------------------------------------- /04-deployment/images/thumbnail-4-01.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/04-deployment/images/thumbnail-4-01.jpg -------------------------------------------------------------------------------- /04-deployment/images/thumbnail-4-02.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/04-deployment/images/thumbnail-4-02.jpg -------------------------------------------------------------------------------- /04-deployment/images/thumbnail-4-03.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/04-deployment/images/thumbnail-4-03.jpg -------------------------------------------------------------------------------- /04-deployment/images/thumbnail-4-04.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/04-deployment/images/thumbnail-4-04.jpg -------------------------------------------------------------------------------- /04-deployment/images/thumbnail-4-05.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/04-deployment/images/thumbnail-4-05.jpg -------------------------------------------------------------------------------- /04-deployment/images/thumbnail-4-06.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/04-deployment/images/thumbnail-4-06.jpg -------------------------------------------------------------------------------- /05-monitoring/images/thumbnail-5-01.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/05-monitoring/images/thumbnail-5-01.jpg -------------------------------------------------------------------------------- /05-monitoring/images/thumbnail-5-02.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/05-monitoring/images/thumbnail-5-02.jpg -------------------------------------------------------------------------------- /05-monitoring/images/thumbnail-5-03.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/05-monitoring/images/thumbnail-5-03.jpg -------------------------------------------------------------------------------- /05-monitoring/images/thumbnail-5-04.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/05-monitoring/images/thumbnail-5-04.jpg -------------------------------------------------------------------------------- /05-monitoring/images/thumbnail-5-05.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/05-monitoring/images/thumbnail-5-05.jpg -------------------------------------------------------------------------------- /02-experiment-tracking/images/key_pair.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/02-experiment-tracking/images/key_pair.png -------------------------------------------------------------------------------- /02-experiment-tracking/images/s3_bucket.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/02-experiment-tracking/images/s3_bucket.png -------------------------------------------------------------------------------- /03-orchestration/images/thumbnail-3-01.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/03-orchestration/images/thumbnail-3-01.jpg -------------------------------------------------------------------------------- /03-orchestration/images/thumbnail-3-02.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/03-orchestration/images/thumbnail-3-02.jpg -------------------------------------------------------------------------------- /03-orchestration/images/thumbnail-3-03.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/03-orchestration/images/thumbnail-3-03.jpg -------------------------------------------------------------------------------- /03-orchestration/images/thumbnail-3-04.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/03-orchestration/images/thumbnail-3-04.jpg -------------------------------------------------------------------------------- /03-orchestration/images/thumbnail-3-05.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/03-orchestration/images/thumbnail-3-05.jpg -------------------------------------------------------------------------------- /03-orchestration/images/thumbnail-3-06.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/03-orchestration/images/thumbnail-3-06.jpg -------------------------------------------------------------------------------- /06-best-practices/AWS-stream-pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/06-best-practices/AWS-stream-pipeline.png -------------------------------------------------------------------------------- /06-best-practices/images/thumbnail-6-1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/06-best-practices/images/thumbnail-6-1.jpg -------------------------------------------------------------------------------- /06-best-practices/images/thumbnail-6-2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/06-best-practices/images/thumbnail-6-2.jpg -------------------------------------------------------------------------------- /06-best-practices/images/thumbnail-6-3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/06-best-practices/images/thumbnail-6-3.jpg -------------------------------------------------------------------------------- /06-best-practices/images/thumbnail-6-4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/06-best-practices/images/thumbnail-6-4.jpg -------------------------------------------------------------------------------- /06-best-practices/images/thumbnail-6-5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/06-best-practices/images/thumbnail-6-5.jpg -------------------------------------------------------------------------------- /06-best-practices/images/thumbnail-6-6.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/06-best-practices/images/thumbnail-6-6.jpg -------------------------------------------------------------------------------- /02-experiment-tracking/images/db_password.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/02-experiment-tracking/images/db_password.png -------------------------------------------------------------------------------- /02-experiment-tracking/images/db_settings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/02-experiment-tracking/images/db_settings.png -------------------------------------------------------------------------------- /02-experiment-tracking/images/postgresql.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/02-experiment-tracking/images/postgresql.png -------------------------------------------------------------------------------- /05-monitoring/prediction_service/lin_reg.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/05-monitoring/prediction_service/lin_reg.bin -------------------------------------------------------------------------------- /06-best-practices/code/integraton-test/model/requirements.txt: -------------------------------------------------------------------------------- 1 | mlflow 2 | cloudpickle==2.0.0 3 | psutil==5.8.0 4 | scikit-learn==1.0.2 5 | typing-extensions==3.10.0.2 6 | -------------------------------------------------------------------------------- /06-best-practices/homework_solution/model.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/06-best-practices/homework_solution/model.bin -------------------------------------------------------------------------------- /02-experiment-tracking/images/security_group.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/02-experiment-tracking/images/security_group.png -------------------------------------------------------------------------------- /02-experiment-tracking/images/thumbnail-2-01.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/02-experiment-tracking/images/thumbnail-2-01.jpg -------------------------------------------------------------------------------- /02-experiment-tracking/images/thumbnail-2-02.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/02-experiment-tracking/images/thumbnail-2-02.jpg -------------------------------------------------------------------------------- /02-experiment-tracking/images/thumbnail-2-03.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/02-experiment-tracking/images/thumbnail-2-03.jpg -------------------------------------------------------------------------------- /02-experiment-tracking/images/thumbnail-2-04.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/02-experiment-tracking/images/thumbnail-2-04.jpg -------------------------------------------------------------------------------- /02-experiment-tracking/images/thumbnail-2-05.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/02-experiment-tracking/images/thumbnail-2-05.jpg -------------------------------------------------------------------------------- /02-experiment-tracking/images/thumbnail-2-06.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/02-experiment-tracking/images/thumbnail-2-06.jpg -------------------------------------------------------------------------------- /02-experiment-tracking/images/thumbnail-2-07.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/02-experiment-tracking/images/thumbnail-2-07.jpg -------------------------------------------------------------------------------- /02-experiment-tracking/images/db_configuration.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/02-experiment-tracking/images/db_configuration.png -------------------------------------------------------------------------------- /02-experiment-tracking/images/select_key_pair.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/02-experiment-tracking/images/select_key_pair.png -------------------------------------------------------------------------------- /02-experiment-tracking/images/ec2_instance_type.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/02-experiment-tracking/images/ec2_instance_type.png -------------------------------------------------------------------------------- /04-deployment/web-service-mlflow/dict_vectorizer.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/04-deployment/web-service-mlflow/dict_vectorizer.bin -------------------------------------------------------------------------------- /05-monitoring/homework/prediction_service/lin_reg.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/05-monitoring/homework/prediction_service/lin_reg.bin -------------------------------------------------------------------------------- /06-best-practices/code/integraton-test/model/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/06-best-practices/code/integraton-test/model/model.pkl -------------------------------------------------------------------------------- /02-experiment-tracking/images/postgresql_inbound_rule.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/02-experiment-tracking/images/postgresql_inbound_rule.png -------------------------------------------------------------------------------- /05-monitoring/homework/prediction_service/lin_reg_V2.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/My-Machine-Learning-Projects-CT/mlops-zoomcamp/main/05-monitoring/homework/prediction_service/lin_reg_V2.bin -------------------------------------------------------------------------------- /06-best-practices/homework_solution/docker-compose.yaml: -------------------------------------------------------------------------------- 1 | services: 2 | s3: 3 | image: localstack/localstack 4 | ports: 5 | - "4566:4566" 6 | environment: 7 | - SERVICES=s3 -------------------------------------------------------------------------------- /05-monitoring/evidently_service/requirements.txt: -------------------------------------------------------------------------------- 1 | dataclasses==0.6 2 | Flask~=2.0.1 3 | pandas~=1.1.5 4 | Werkzeug~=2.0.1 5 | requests~=2.26.0 6 | prometheus_client~=0.11.0 7 | pyyaml~=5.4.1 8 | pyarrow -------------------------------------------------------------------------------- /06-best-practices/code/integraton-test/model/python_env.yaml: -------------------------------------------------------------------------------- 1 | python: 3.9.7 2 | build_dependencies: 3 | - pip==22.1 4 | - setuptools==58.0.4 5 | - wheel==0.37.0 6 | dependencies: 7 | - -r requirements.txt 8 | -------------------------------------------------------------------------------- /04-deployment/batch/README.md: -------------------------------------------------------------------------------- 1 | ## Batch deployment 2 | 3 | * Turn the notebook for training a model into a notebook for applying the model 4 | * Turn the notebook into a script 5 | * Clean it and parametrize 6 | -------------------------------------------------------------------------------- /05-monitoring/prediction_service/requirements.txt: -------------------------------------------------------------------------------- 1 | scikit-learn 2 | dataclasses==0.6 3 | Flask~=2.0.1 4 | pandas~=1.1.5 5 | Werkzeug~=2.0.1 6 | requests~=2.26.0 7 | prometheus_client~=0.11.0 8 | pyyaml~=5.4.1 9 | evidently 10 | pymongo -------------------------------------------------------------------------------- /06-best-practices/code/tests/data.b64: -------------------------------------------------------------------------------- 1 | ewogICAgICAgICJyaWRlIjogewogICAgICAgICAgICAiUFVMb2NhdGlvbklEIjogMTMwLAogICAgICAgICAgICAiRE9Mb2NhdGlvbklEIjogMjA1LAogICAgICAgICAgICAidHJpcF9kaXN0YW5jZSI6IDMuNjYKICAgICAgICB9LCAKICAgICAgICAicmlkZV9pZCI6IDI1NgogICAgfQ== 2 | -------------------------------------------------------------------------------- /05-monitoring/requirements.txt: -------------------------------------------------------------------------------- 1 | scikit-learn 2 | dataclasses==0.6 3 | Flask~=2.0.1 4 | pandas>=1.1.5 5 | Werkzeug~=2.0.1 6 | requests~=2.26.0 7 | prometheus_client~=0.11.0 8 | pyyaml~=5.4.1 9 | tqdm 10 | pyarrow 11 | prefect>=2.0b 12 | pymongo 13 | evidently 14 | -------------------------------------------------------------------------------- /06-best-practices/code/infrastructure/modules/s3/main.tf: -------------------------------------------------------------------------------- 1 | resource "aws_s3_bucket" "s3_bucket" { 2 | bucket = var.bucket_name 3 | acl = "private" 4 | force_destroy = true 5 | } 6 | 7 | output "name" { 8 | value = aws_s3_bucket.s3_bucket.bucket 9 | } 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | data/ 2 | .ipynb_checkpoints 3 | .bin 4 | *.db 5 | 6 | *.parquet 7 | *.html 8 | *.csv 9 | 10 | .venv 11 | venv 12 | .idea 13 | **/artifacts/ 14 | **/models/ 15 | 16 | __pycache__/ 17 | **.env 18 | **.terraform/ 19 | **.terraform.lock* 20 | **terraform.tfstate* 21 | -------------------------------------------------------------------------------- /04-deployment/streaming/Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | boto3 = "*" 8 | mlflow = "*" 9 | scikit-learn = "==1.0.2" 10 | 11 | [dev-packages] 12 | 13 | [requires] 14 | python_version = "3.9" 15 | -------------------------------------------------------------------------------- /04-deployment/web-service/test.py: -------------------------------------------------------------------------------- 1 | import requests 2 | 3 | ride = { 4 | "PULocationID": 10, 5 | "DOLocationID": 50, 6 | "trip_distance": 40 7 | } 8 | 9 | url = 'http://localhost:9696/predict' 10 | response = requests.post(url, json=ride) 11 | print(response.json()) 12 | -------------------------------------------------------------------------------- /04-deployment/web-service-mlflow/test.py: -------------------------------------------------------------------------------- 1 | import requests 2 | 3 | ride = { 4 | "PULocationID": 10, 5 | "DOLocationID": 50, 6 | "trip_distance": 40 7 | } 8 | 9 | url = 'http://localhost:9696/predict' 10 | response = requests.post(url, json=ride) 11 | print(response.json()) 12 | -------------------------------------------------------------------------------- /05-monitoring/homework/requirements.txt: -------------------------------------------------------------------------------- 1 | scikit-learn==1.0.2 2 | dataclasses==0.6 3 | Flask~=2.0.1 4 | pandas>=1.1.5 5 | Werkzeug~=2.0.1 6 | requests~=2.26.0 7 | prometheus_client~=0.11.0 8 | pyyaml~=5.4.1 9 | tqdm 10 | pyarrow 11 | prefect==2.0b8 12 | pymongo 13 | evidently 14 | pipenv 15 | -------------------------------------------------------------------------------- /04-deployment/homework/Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | scikit-learn = "==1.0.2" 8 | pandas = "*" 9 | pyarrow = "*" 10 | s3fs = "*" 11 | 12 | [dev-packages] 13 | 14 | [requires] 15 | python_version = "3.9" 16 | -------------------------------------------------------------------------------- /06-best-practices/code/integraton-test/model/conda.yaml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python=3.9.7 5 | - pip<=22.1 6 | - pip: 7 | - mlflow 8 | - cloudpickle==2.0.0 9 | - psutil==5.8.0 10 | - scikit-learn==1.0.2 11 | - typing-extensions==3.10.0.2 12 | name: mlflow-env 13 | -------------------------------------------------------------------------------- /06-best-practices/homework/Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | scikit-learn = "==1.0.2" 8 | pandas = "*" 9 | pyarrow = "*" 10 | s3fs = "*" 11 | 12 | [dev-packages] 13 | 14 | [requires] 15 | python_version = "3.9" 16 | -------------------------------------------------------------------------------- /04-deployment/web-service/Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | scikit-learn = "==1.0.2" 8 | flask = "*" 9 | gunicorn = "*" 10 | 11 | [dev-packages] 12 | requests = "*" 13 | 14 | [requires] 15 | python_version = "3.9" 16 | -------------------------------------------------------------------------------- /06-best-practices/code/.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.testing.pytestArgs": [ 3 | "tests" 4 | ], 5 | "python.testing.unittestEnabled": false, 6 | "python.testing.pytestEnabled": true, 7 | "python.linting.pylintEnabled": true, 8 | "python.linting.enabled": true 9 | } 10 | -------------------------------------------------------------------------------- /06-best-practices/homework_solution/Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | scikit-learn = "==1.0.2" 8 | pandas = "*" 9 | pyarrow = "*" 10 | s3fs = "*" 11 | 12 | [dev-packages] 13 | pytest = "*" 14 | 15 | [requires] 16 | python_version = "3.9" 17 | -------------------------------------------------------------------------------- /04-deployment/streaming/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM public.ecr.aws/lambda/python:3.9 2 | 3 | RUN pip install -U pip 4 | RUN pip install pipenv 5 | 6 | COPY [ "Pipfile", "Pipfile.lock", "./" ] 7 | 8 | RUN pipenv install --system --deploy 9 | 10 | COPY [ "lambda_function.py", "./" ] 11 | 12 | CMD [ "lambda_function.lambda_handler" ] 13 | -------------------------------------------------------------------------------- /05-monitoring/homework/prefect-monitoring/clean_mongo.py: -------------------------------------------------------------------------------- 1 | from pymongo import MongoClient 2 | 3 | MONGO_CLIENT_ADDRESS = "mongodb://localhost:27017/" 4 | MONGO_DATABASE = "prediction_service" 5 | 6 | 7 | if __name__ == "__main__": 8 | client = MongoClient(MONGO_CLIENT_ADDRESS) 9 | client.drop_database(MONGO_DATABASE) 10 | -------------------------------------------------------------------------------- /06-best-practices/code/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM public.ecr.aws/lambda/python:3.9 2 | 3 | RUN pip install -U pip 4 | RUN pip install pipenv 5 | 6 | COPY [ "Pipfile", "Pipfile.lock", "./" ] 7 | 8 | RUN pipenv install --system --deploy 9 | 10 | COPY [ "lambda_function.py", "model.py", "./" ] 11 | 12 | CMD [ "lambda_function.lambda_handler" ] 13 | -------------------------------------------------------------------------------- /04-deployment/web-service-mlflow/Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | scikit-learn = "==1.0.2" 8 | flask = "*" 9 | gunicorn = "*" 10 | mlflow = "*" 11 | boto3 = "*" 12 | 13 | [dev-packages] 14 | requests = "*" 15 | 16 | [requires] 17 | python_version = "3.9" 18 | -------------------------------------------------------------------------------- /04-deployment/batch/Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | scikit-learn = "==1.0.2" 8 | prefect = "==2.0b6" 9 | mlflow = "*" 10 | pandas = "*" 11 | boto3 = "*" 12 | pyarrow = "*" 13 | s3fs = "*" 14 | 15 | [dev-packages] 16 | 17 | [requires] 18 | python_version = "3.9" 19 | -------------------------------------------------------------------------------- /04-deployment/homework/homework.dockerfile: -------------------------------------------------------------------------------- 1 | FROM agrigorev/zoomcamp-model:mlops-3.9.7-slim 2 | 3 | RUN pip install -U pip 4 | RUN pip install pipenv 5 | 6 | WORKDIR /app 7 | 8 | COPY [ "Pipfile", "Pipfile.lock", "./" ] 9 | 10 | RUN pipenv install --system --deploy 11 | 12 | COPY [ "batch.py", "batch.py" ] 13 | 14 | ENTRYPOINT [ "python", "batch.py" ] -------------------------------------------------------------------------------- /05-monitoring/test.py: -------------------------------------------------------------------------------- 1 | import requests 2 | 3 | url = 'http://127.0.0.1:9696/predict' 4 | 5 | ride = { 6 | 'lpep_pickup_datetime': '2021-01-01 00:15:56', 7 | 'PULocationID': 43, 8 | 'DOLocationID': 151, 9 | 'passenger_count': 1.0, 10 | 'trip_distance': 1.01 11 | } 12 | 13 | response = requests.post(url, json=ride).json() 14 | print(response) 15 | -------------------------------------------------------------------------------- /06-best-practices/homework/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.9.7-slim 2 | 3 | RUN pip install -U pip 4 | RUN pip install pipenv 5 | 6 | WORKDIR /app 7 | 8 | COPY [ "Pipfile", "Pipfile.lock", "./" ] 9 | 10 | RUN pipenv install --system --deploy 11 | 12 | COPY [ "batch.py", "batch.py" ] 13 | COPY [ "model.bin", "model.bin" ] 14 | 15 | ENTRYPOINT [ "python", "batch.py" ] -------------------------------------------------------------------------------- /06-best-practices/homework_solution/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.9.7-slim 2 | 3 | RUN pip install -U pip 4 | RUN pip install pipenv 5 | 6 | WORKDIR /app 7 | 8 | COPY [ "Pipfile", "Pipfile.lock", "./" ] 9 | 10 | RUN pipenv install --system --deploy 11 | 12 | COPY [ "batch.py", "batch.py" ] 13 | COPY [ "model.bin", "model.bin" ] 14 | 15 | ENTRYPOINT [ "python", "batch.py" ] -------------------------------------------------------------------------------- /05-monitoring/evidently_service/Dockerfile: -------------------------------------------------------------------------------- 1 | # syntax=docker/dockerfile:1 2 | 3 | FROM python:3.8-slim-buster 4 | 5 | WORKDIR /app 6 | 7 | COPY requirements.txt requirements.txt 8 | 9 | RUN pip3 install -r requirements.txt 10 | 11 | RUN pip3 install evidently==0.1.51.dev0 12 | 13 | COPY app.py . 14 | 15 | CMD [ "python3", "-m" , "flask", "run", "--host=0.0.0.0", "--port=8085"] -------------------------------------------------------------------------------- /04-deployment/web-service/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.9.7-slim 2 | 3 | RUN pip install -U pip 4 | RUN pip install pipenv 5 | 6 | WORKDIR /app 7 | 8 | COPY [ "Pipfile", "Pipfile.lock", "./" ] 9 | 10 | RUN pipenv install --system --deploy 11 | 12 | COPY [ "predict.py", "lin_reg.bin", "./" ] 13 | 14 | EXPOSE 9696 15 | 16 | ENTRYPOINT [ "gunicorn", "--bind=0.0.0.0:9696", "predict:app" ] -------------------------------------------------------------------------------- /06-best-practices/code/infrastructure/vars/stg.tfvars: -------------------------------------------------------------------------------- 1 | source_stream_name = "stg_ride_events" 2 | output_stream_name = "stg_ride_predictions" 3 | model_bucket = "stg-mlflow-models-code-owners" 4 | lambda_function_local_path = "../lambda_function.py" 5 | docker_image_local_path = "../Dockerfile" 6 | ecr_repo_name = "stg_stream_model_duration" 7 | lambda_function_name = "stg_prediction_lambda" 8 | -------------------------------------------------------------------------------- /05-monitoring/homework/prediction_service/Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | scikit-learn = "==1.0.2" 8 | flask = "==2.0.1" 9 | pandas = "==1.1.5" 10 | evidently = "*" 11 | pymongo = "*" 12 | gunicorn = "*" 13 | 14 | [dev-packages] 15 | pyarrow = "*" 16 | 17 | [requires] 18 | python_version = "3.8" 19 | -------------------------------------------------------------------------------- /05-monitoring/homework/prefect-monitoring/Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | scikit-learn = "==1.0.2" 8 | pyarrow = "*" 9 | prefect = "==2.0b8" 10 | pandas = "*" 11 | pymongo = "*" 12 | psutil = "==5.9.1" 13 | evidently = "*" 14 | 15 | [dev-packages] 16 | 17 | [requires] 18 | python_version = "3.8" 19 | -------------------------------------------------------------------------------- /06-best-practices/code/infrastructure/vars/prod.tfvars: -------------------------------------------------------------------------------- 1 | source_stream_name = "prod_ride_events" 2 | output_stream_name = "prod_ride_predictions" 3 | model_bucket = "prod-mlflow-models-code-owners" 4 | lambda_function_local_path = "../lambda_function.py" 5 | docker_image_local_path = "../Dockerfile" 6 | ecr_repo_name = "prod_stream_model_duration" 7 | lambda_function_name = "prod_prediction_lambda" 8 | -------------------------------------------------------------------------------- /05-monitoring/prediction_service/Dockerfile: -------------------------------------------------------------------------------- 1 | # syntax=docker/dockerfile:1 2 | 3 | FROM python:3.8-slim-buster 4 | 5 | WORKDIR /app 6 | 7 | COPY requirements.txt requirements.txt 8 | 9 | RUN pip3 install -r requirements.txt 10 | 11 | RUN pip3 install evidently 12 | 13 | COPY app.py . 14 | COPY lin_reg.bin . 15 | 16 | CMD [ "python3", "-m" , "flask", "run", "--host=0.0.0.0", "--port=9696"] -------------------------------------------------------------------------------- /06-best-practices/code/Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | boto3 = "*" 8 | mlflow = "*" 9 | scikit-learn = "==1.0.2" 10 | 11 | [dev-packages] 12 | pytest = "*" 13 | deepdiff = "*" 14 | pylint = "==2.14.4" 15 | black = "*" 16 | isort = "*" 17 | pre-commit = "*" 18 | 19 | [requires] 20 | python_version = "3.9" 21 | -------------------------------------------------------------------------------- /06-best-practices/code/plan.md: -------------------------------------------------------------------------------- 1 | ## Plan 2 | 3 | - [x] Testing the code: unit tests with pytest 4 | - [x] Integration tests with docker-compose 5 | - [x] Testing cloud services with LocalStack 6 | - [x] Code quality: linting and formatting 7 | - [x] Git pre-commit hooks 8 | - [x] Makefiles and make 9 | - [ ] Staging and production environments 10 | - [ ] Infrastructure as Code 11 | - [ ] CI/CD and GitHub Actions 12 | -------------------------------------------------------------------------------- /05-monitoring/homework/prediction_service/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8-slim-buster 2 | 3 | RUN pip install -U pip 4 | RUN pip install pipenv 5 | 6 | WORKDIR /app 7 | 8 | COPY [ "Pipfile", "Pipfile.lock", "./" ] 9 | 10 | RUN pipenv install --system --deploy 11 | 12 | COPY [ "app.py", "lin_reg.bin", "lin_reg_V2.bin", "./" ] 13 | 14 | EXPOSE 9696 15 | 16 | ENTRYPOINT ["gunicorn", "--bind=0.0.0.0:9696", "app:app" ] 17 | -------------------------------------------------------------------------------- /04-deployment/web-service/README.md: -------------------------------------------------------------------------------- 1 | ## Deploying a model as a web-service 2 | 3 | * Creating a virtual environment with Pipenv 4 | * Creating a script for predictiong 5 | * Putting the script into a Flask app 6 | * Packaging the app to Docker 7 | 8 | 9 | ```bash 10 | docker build -t ride-duration-prediction-service:v1 . 11 | ``` 12 | 13 | ```bash 14 | docker run -it --rm -p 9696:9696 ride-duration-prediction-service:v1 15 | ``` 16 | -------------------------------------------------------------------------------- /05-monitoring/evidently_service/config/grafana_datasources.yaml: -------------------------------------------------------------------------------- 1 | # config file version 2 | apiVersion: 1 3 | 4 | # list of datasources that should be deleted from the database 5 | deleteDatasources: 6 | - name: Prometheus 7 | orgId: 1 8 | 9 | # list of datasources to insert/update depending 10 | # what's available in the database 11 | datasources: 12 | - name: Prometheus 13 | type: prometheus 14 | access: proxy 15 | url: http://prometheus.:9090 -------------------------------------------------------------------------------- /06-best-practices/code/infrastructure/modules/kinesis/main.tf: -------------------------------------------------------------------------------- 1 | # Create Kinesis Data Stream 2 | 3 | resource "aws_kinesis_stream" "stream" { 4 | name = var.stream_name 5 | shard_count = var.shard_count 6 | retention_period = var.retention_period 7 | shard_level_metrics = var.shard_level_metrics 8 | tags = { 9 | CreatedBy = var.tags 10 | } 11 | } 12 | 13 | output "stream_arn" { 14 | value = aws_kinesis_stream.stream.arn 15 | } 16 | -------------------------------------------------------------------------------- /06-best-practices/code/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.pylint.messages_control] 2 | 3 | disable = [ 4 | "missing-function-docstring", 5 | "missing-final-newline", 6 | "missing-class-docstring", 7 | "missing-module-docstring", 8 | "invalid-name", 9 | "too-few-public-methods" 10 | ] 11 | 12 | [tool.black] 13 | line-length = 88 14 | target-version = ['py39'] 15 | skip-string-normalization = true 16 | 17 | [tool.isort] 18 | multi_line_output = 3 19 | length_sort = true 20 | -------------------------------------------------------------------------------- /06-best-practices/code/integraton-test/model/MLmodel: -------------------------------------------------------------------------------- 1 | artifact_path: model 2 | flavors: 3 | python_function: 4 | env: conda.yaml 5 | loader_module: mlflow.sklearn 6 | model_path: model.pkl 7 | python_version: 3.9.7 8 | sklearn: 9 | code: null 10 | pickled_model: model.pkl 11 | serialization_format: cloudpickle 12 | sklearn_version: 1.0.2 13 | mlflow_version: 1.26.1 14 | model_uuid: 78edf19ceea5463aadce7d84f3f9bc82 15 | run_id: e1efc53e9bd149078b0c12aeaa6365df 16 | utc_time_created: '2022-06-01 12:49:55.846831' 17 | -------------------------------------------------------------------------------- /04-deployment/batch/score_deploy.py: -------------------------------------------------------------------------------- 1 | from prefect.deployments import Deployment 2 | from prefect.orion.schemas.schedules import CronSchedule 3 | from score import ride_duration_prediction 4 | 5 | deployment = Deployment.build_from_flow( 6 | flow=ride_duration_prediction, 7 | name="ride_duration_prediction", 8 | parameters={ 9 | "taxi_type": "green", 10 | "run_id": "e1efc53e9bd149078b0c12aeaa6365df", 11 | }, 12 | schedule=CronSchedule(cron="0 3 2 * *"), 13 | work_queue_name="ml", 14 | ) 15 | 16 | deployment.apply() 17 | -------------------------------------------------------------------------------- /06-best-practices/code/lambda_function.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import model 4 | 5 | PREDICTIONS_STREAM_NAME = os.getenv('PREDICTIONS_STREAM_NAME', 'ride_predictions') 6 | RUN_ID = os.getenv('RUN_ID') 7 | TEST_RUN = os.getenv('TEST_RUN', 'False') == 'True' 8 | 9 | 10 | model_service = model.init( 11 | prediction_stream_name=PREDICTIONS_STREAM_NAME, 12 | run_id=RUN_ID, 13 | test_run=TEST_RUN, 14 | ) 15 | 16 | 17 | def lambda_handler(event, context): 18 | # pylint: disable=unused-argument 19 | return model_service.lambda_handler(event) 20 | -------------------------------------------------------------------------------- /05-monitoring/homework/prefect-monitoring/prepare_reference_data.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import pyarrow.parquet as pq 3 | 4 | 5 | data_files = ["../datasets/green_tripdata_2021-03.parquet", "../datasets/green_tripdata_2021-04.parquet"] 6 | output_file = "green_tripdata_2021-03to04.parquet" 7 | 8 | df = pd.DataFrame() 9 | for file in data_files: 10 | data = pq.read_table(file).to_pandas() 11 | df = pd.concat([data, df], ignore_index=True) 12 | 13 | df.to_parquet( 14 | output_file, 15 | engine='pyarrow', 16 | compression=None, 17 | index=False 18 | ) 19 | -------------------------------------------------------------------------------- /05-monitoring/evidently_service/config.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | taxi: 3 | column_mapping: 4 | categorical_features: 5 | - 'PULocationID' 6 | - 'DOLocationID' 7 | numerical_features: 8 | - 'trip_distance' 9 | data_format: 10 | header: true 11 | separator: ',' 12 | monitors: 13 | - data_drift 14 | reference_file: ./datasets/green_tripdata_2021-01.parquet 15 | service: 16 | calculation_period_sec: 2 17 | min_reference_size: 30 18 | moving_reference: false 19 | datasets_path: datasets 20 | use_reference: true 21 | window_size: 5 22 | -------------------------------------------------------------------------------- /06-best-practices/code/Makefile: -------------------------------------------------------------------------------- 1 | LOCAL_TAG:=$(shell date +"%Y-%m-%d-%H-%M") 2 | LOCAL_IMAGE_NAME:=stream-model-duration:${LOCAL_TAG} 3 | 4 | test: 5 | pytest tests/ 6 | 7 | quality_checks: 8 | isort . 9 | black . 10 | pylint --recursive=y . 11 | 12 | build: quality_checks test 13 | docker build -t ${LOCAL_IMAGE_NAME} . 14 | 15 | integration_test: build 16 | LOCAL_IMAGE_NAME=${LOCAL_IMAGE_NAME} bash integraton-test/run.sh 17 | 18 | publish: build integration_test 19 | LOCAL_IMAGE_NAME=${LOCAL_IMAGE_NAME} bash scripts/publish.sh 20 | 21 | setup: 22 | pipenv install --dev 23 | pre-commit install -------------------------------------------------------------------------------- /06-best-practices/code/integraton-test/docker-compose.yaml: -------------------------------------------------------------------------------- 1 | services: 2 | backend: 3 | image: ${LOCAL_IMAGE_NAME} 4 | ports: 5 | - "8080:8080" 6 | environment: 7 | - PREDICTIONS_STREAM_NAME=${PREDICTIONS_STREAM_NAME} 8 | - RUN_ID=Test123 9 | - AWS_DEFAULT_REGION=eu-west-1 10 | - MODEL_LOCATION=/app/model 11 | - KINESIS_ENDPOINT_URL=http://kinesis:4566/ 12 | - AWS_ACCESS_KEY_ID=abc 13 | - AWS_SECRET_ACCESS_KEY=xyz 14 | volumes: 15 | - "./model:/app/model" 16 | kinesis: 17 | image: localstack/localstack 18 | ports: 19 | - "4566:4566" 20 | environment: 21 | - SERVICES=kinesis 22 | -------------------------------------------------------------------------------- /06-best-practices/homework_solution/integration_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | 4 | 5 | docker-compose up -d 6 | 7 | sleep 5 8 | 9 | export INPUT_FILE_PATTERN="s3://nyc-duration/in/{year:04d}-{month:02d}.parquet" 10 | export OUTPUT_FILE_PATTERN="s3://nyc-duration/out/{year:04d}-{month:02d}.parquet" 11 | export S3_ENDPOINT_URL="http://localhost:4566" 12 | 13 | 14 | aws --endpoint-url="${S3_ENDPOINT_URL}" s3 mb s3://nyc-duration 15 | 16 | pipenv run python integration_test.py 17 | 18 | ERROR_CODE=$? 19 | 20 | if [ ${ERROR_CODE} != 0 ]; then 21 | docker-compose logs 22 | docker-compose down 23 | exit ${ERROR_CODE} 24 | fi 25 | 26 | echo "yay tests work!" 27 | 28 | docker-compose down -------------------------------------------------------------------------------- /05-monitoring/homework/test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pprint 3 | from pymongo import MongoClient 4 | 5 | import requests 6 | 7 | MONGODB_ADDRESS = os.getenv("MONGODB_ADDRESS", "mongodb://127.0.0.1:27017/") 8 | FLASK_URL = "http://127.0.0.1:9696/predict-duration" 9 | 10 | 11 | mongo_client = MongoClient(MONGODB_ADDRESS) 12 | mongo_db = mongo_client['prediction_service'] 13 | mongo_collection = mongo_db['data'] 14 | ride_test_data = { 15 | "PULocationID": 10, 16 | "DOLocationID": 50, 17 | "trip_distance": 40 18 | } 19 | 20 | 21 | if __name__ == "__main__": 22 | requests.post(url=FLASK_URL ,json=ride_test_data) 23 | for coll in mongo_collection.find(): 24 | pprint.pprint(coll) 25 | -------------------------------------------------------------------------------- /05-monitoring/prepare.py: -------------------------------------------------------------------------------- 1 | from tqdm import tqdm 2 | import requests 3 | 4 | files = [("green_tripdata_2022-01.parquet", "."), ("green_tripdata_2021-01.parquet", "./evidently_service/datasets")] 5 | 6 | print(f"Download files:") 7 | for file, path in files: 8 | url = f"https://d37ci6vzurychx.cloudfront.net/trip-data/{file}" 9 | resp = requests.get(url, stream=True) 10 | save_path = f"{path}/{file}" 11 | with open(save_path, "wb") as handle: 12 | for data in tqdm(resp.iter_content(), 13 | desc=f"{file}", 14 | postfix=f"save to {save_path}", 15 | total=int(resp.headers["Content-Length"])): 16 | handle.write(data) 17 | -------------------------------------------------------------------------------- /04-deployment/batch/score_backfill.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from dateutil.relativedelta import relativedelta 3 | 4 | from prefect import flow 5 | 6 | import score 7 | 8 | 9 | @flow 10 | def ride_duration_prediction_backfill(): 11 | start_date = datetime(year=2021, month=3, day=1) 12 | end_date = datetime(year=2022, month=4, day=1) 13 | 14 | d = start_date 15 | 16 | while d <= end_date: 17 | score.ride_duration_prediction( 18 | taxi_type='green', 19 | run_id='e1efc53e9bd149078b0c12aeaa6365df', 20 | run_date=d 21 | ) 22 | 23 | d = d + relativedelta(months=1) 24 | 25 | 26 | if __name__ == '__main__': 27 | ride_duration_prediction_backfill() -------------------------------------------------------------------------------- /06-best-practices/code/infrastructure/modules/ecr/variables.tf: -------------------------------------------------------------------------------- 1 | variable "ecr_repo_name" { 2 | type = string 3 | description = "ECR repo name" 4 | } 5 | 6 | variable "ecr_image_tag" { 7 | type = string 8 | description = "ECR repo name" 9 | default = "latest" 10 | } 11 | 12 | variable "lambda_function_local_path" { 13 | type = string 14 | description = "Local path to lambda function / python file" 15 | } 16 | 17 | variable "docker_image_local_path" { 18 | type = string 19 | description = "Local path to Dockerfile" 20 | } 21 | 22 | variable "region" { 23 | type = string 24 | description = "region" 25 | default = "eu-west-1" 26 | } 27 | 28 | variable "account_id" { 29 | } 30 | -------------------------------------------------------------------------------- /03-orchestration/work-queue.py: -------------------------------------------------------------------------------- 1 | from prefect import flow 2 | 3 | @flow 4 | def myflow(): 5 | print("hello") 6 | 7 | from prefect.deployments import Deployment 8 | from prefect.orion.schemas.schedules import IntervalSchedule 9 | from datetime import timedelta 10 | 11 | deployment_dev = Deployment.build_from_flow( 12 | flow=myflow, 13 | name="model_training-dev", 14 | schedule=IntervalSchedule(interval=timedelta(minutes=5)), 15 | work_queue_name="dev" 16 | ) 17 | 18 | deployment_dev.apply() 19 | 20 | deployment_prod = Deployment.build_from_flow( 21 | flow=myflow, 22 | name="model_training-prod", 23 | schedule=IntervalSchedule(interval=timedelta(minutes=5)), 24 | work_queue_name="prod" 25 | ) 26 | 27 | deployment_prod.apply() 28 | 29 | -------------------------------------------------------------------------------- /06-best-practices/code/infrastructure/variables.tf: -------------------------------------------------------------------------------- 1 | variable "aws_region" { 2 | description = "AWS region to create resources" 3 | default = "eu-west-1" 4 | } 5 | 6 | variable "project_id" { 7 | description = "project_id" 8 | default = "mlops-zoomcamp" 9 | } 10 | 11 | variable "source_stream_name" { 12 | description = "" 13 | } 14 | 15 | variable "output_stream_name" { 16 | description = "" 17 | } 18 | 19 | variable "model_bucket" { 20 | description = "s3_bucket" 21 | } 22 | 23 | variable "lambda_function_local_path" { 24 | description = "" 25 | } 26 | 27 | variable "docker_image_local_path" { 28 | description = "" 29 | } 30 | 31 | variable "ecr_repo_name" { 32 | description = "" 33 | } 34 | 35 | variable "lambda_function_name" { 36 | description = "" 37 | } -------------------------------------------------------------------------------- /05-monitoring/homework/prepare.py: -------------------------------------------------------------------------------- 1 | from tqdm import tqdm 2 | import requests 3 | 4 | files = ["green_tripdata_2021-03.parquet", "green_tripdata_2021-04.parquet", "green_tripdata_2021-05.parquet"] 5 | path = "./datasets" 6 | print(f"Download files:") 7 | for file in files: 8 | 9 | # Change the url based on what works for you whether s3 or cloudfront 10 | url = f"https://d37ci6vzurychx.cloudfront.net/trip-data/{file}" 11 | resp = requests.get(url, stream=True) 12 | save_path = f"{path}/{file}" 13 | with open(save_path, "wb") as handle: 14 | for data in tqdm(resp.iter_content(), 15 | desc=f"{file}", 16 | postfix=f"save to {save_path}", 17 | total=int(resp.headers["Content-Length"])): 18 | handle.write(data) 19 | -------------------------------------------------------------------------------- /06-best-practices/code/scripts/test_cloud_e2e.sh: -------------------------------------------------------------------------------- 1 | export KINESIS_STREAM_INPUT="stg_ride_events-mlops-zoomcamp" 2 | export KINESIS_STREAM_OUTPUT="stg_ride_predictions-mlops-zoomcamp" 3 | 4 | SHARD_ID=$(aws kinesis put-record \ 5 | --stream-name ${KINESIS_STREAM_INPUT} \ 6 | --partition-key 1 --cli-binary-format raw-in-base64-out \ 7 | --data '{"ride": { 8 | "PULocationID": 130, 9 | "DOLocationID": 205, 10 | "trip_distance": 3.66 11 | }, 12 | "ride_id": 156}' \ 13 | --query 'ShardId' 14 | ) 15 | 16 | #SHARD_ITERATOR=$(aws kinesis get-shard-iterator --shard-id ${SHARD_ID} --shard-iterator-type TRIM_HORIZON --stream-name ${KINESIS_STREAM_OUTPUT} --query 'ShardIterator') 17 | 18 | #aws kinesis get-records --shard-iterator $SHARD_ITERATOR 19 | -------------------------------------------------------------------------------- /04-deployment/web-service-mlflow/README.md: -------------------------------------------------------------------------------- 1 | ## Getting the model for deployment from MLflow 2 | 3 | * Take the code from the previous video 4 | * Train another model, register with MLflow 5 | * Put the model into a scikit-learn pipeline 6 | * Model deployment with tracking server 7 | * Model deployment without the tracking server 8 | 9 | Starting the MLflow server with S3: 10 | 11 | ```bash 12 | mlflow server \ 13 | --backend-store-uri=sqlite:///mlflow.db \ 14 | --default-artifact-root=s3://mlflow-models-alexey/ 15 | ``` 16 | 17 | Downloading the artifact 18 | 19 | ```bash 20 | export MLFLOW_TRACKING_URI="http://127.0.0.1:5000" 21 | export MODEL_RUN_ID="6dd459b11b4e48dc862f4e1019d166f6" 22 | 23 | mlflow artifacts download \ 24 | --run-id ${MODEL_RUN_ID} \ 25 | --artifact-path model \ 26 | --dst-path . 27 | ``` -------------------------------------------------------------------------------- /05-monitoring/homework/docker-compose-homework.yml: -------------------------------------------------------------------------------- 1 | version: "3.7" 2 | 3 | volumes: 4 | mongo_data: {} 5 | 6 | networks: 7 | front-tier: 8 | back-tier: 9 | 10 | services: 11 | prediction_service: 12 | build: 13 | context: prediction_service 14 | dockerfile: Dockerfile 15 | depends_on: 16 | - mongo 17 | environment: 18 | MONGO_DATABASE: "prediction_service" 19 | MONGO_ADDRESS: "mongodb://mongo.:27017/" 20 | MODEL_VERSION: "1" 21 | MODEL_FILE: "lin_reg.bin" 22 | 23 | ports: 24 | - 9696:9696 25 | networks: 26 | - back-tier 27 | - front-tier 28 | 29 | mongo: 30 | image: mongo 31 | ports: 32 | - 27017:27017 33 | volumes: 34 | - mongo_data:/data/db 35 | networks: 36 | - back-tier 37 | - front-tier 38 | -------------------------------------------------------------------------------- /05-monitoring/homework/docker-compose-homework-solution.yml: -------------------------------------------------------------------------------- 1 | version: "3.7" 2 | 3 | volumes: 4 | mongo_data: {} 5 | 6 | networks: 7 | front-tier: 8 | back-tier: 9 | 10 | services: 11 | prediction_service: 12 | build: 13 | context: prediction_service 14 | dockerfile: Dockerfile 15 | depends_on: 16 | - mongo 17 | environment: 18 | MONGO_DATABASE: "prediction_service" 19 | MONGO_ADDRESS: "mongodb://mongo.:27017/" 20 | MODEL_VERSION: "2" 21 | MODEL_FILE: "lin_reg_V2.bin" 22 | 23 | ports: 24 | - 9696:9696 25 | networks: 26 | - back-tier 27 | - front-tier 28 | 29 | mongo: 30 | image: mongo 31 | ports: 32 | - 27017:27017 33 | volumes: 34 | - mongo_data:/data/db 35 | networks: 36 | - back-tier 37 | - front-tier 38 | -------------------------------------------------------------------------------- /06-best-practices/code/infrastructure/modules/lambda/variables.tf: -------------------------------------------------------------------------------- 1 | variable "source_stream_name" { 2 | type = string 3 | description = "Source Kinesis Data Streams stream name" 4 | } 5 | 6 | variable "source_stream_arn" { 7 | type = string 8 | description = "Source Kinesis Data Streams stream name" 9 | } 10 | 11 | variable "output_stream_name" { 12 | description = "Name of output stream where all the events will be passed" 13 | } 14 | 15 | variable "output_stream_arn" { 16 | description = "ARN of output stream where all the events will be passed" 17 | } 18 | 19 | variable "model_bucket" { 20 | description = "Name of the bucket" 21 | } 22 | 23 | variable "lambda_function_name" { 24 | description = "Name of the lambda function" 25 | } 26 | 27 | variable "image_uri" { 28 | description = "ECR image uri" 29 | } 30 | -------------------------------------------------------------------------------- /06-best-practices/code/infrastructure/modules/kinesis/variables.tf: -------------------------------------------------------------------------------- 1 | variable "stream_name" { 2 | type = string 3 | description = "Kinesis stream name" 4 | } 5 | 6 | variable "shard_count" { 7 | type = number 8 | description = "Kinesis stream shard count" 9 | } 10 | 11 | variable "retention_period" { 12 | type = number 13 | description = "Kinesis stream retention period" 14 | } 15 | 16 | variable "shard_level_metrics" { 17 | type = list(string) 18 | description = "shard_level_metrics" 19 | default = [ 20 | "IncomingBytes", 21 | "OutgoingBytes", 22 | "OutgoingRecords", 23 | "ReadProvisionedThroughputExceeded", 24 | "WriteProvisionedThroughputExceeded", 25 | "IncomingRecords", 26 | "IteratorAgeMilliseconds", 27 | ] 28 | } 29 | 30 | variable "tags" { 31 | description = "Tags for kinesis stream" 32 | default = "mlops-zoomcamp" 33 | } 34 | -------------------------------------------------------------------------------- /04-deployment/web-service/predict.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | from flask import Flask, request, jsonify 4 | 5 | with open('lin_reg.bin', 'rb') as f_in: 6 | (dv, model) = pickle.load(f_in) 7 | 8 | 9 | def prepare_features(ride): 10 | features = {} 11 | features['PU_DO'] = '%s_%s' % (ride['PULocationID'], ride['DOLocationID']) 12 | features['trip_distance'] = ride['trip_distance'] 13 | return features 14 | 15 | 16 | def predict(features): 17 | X = dv.transform(features) 18 | preds = model.predict(X) 19 | return float(preds[0]) 20 | 21 | 22 | app = Flask('duration-prediction') 23 | 24 | 25 | @app.route('/predict', methods=['POST']) 26 | def predict_endpoint(): 27 | ride = request.get_json() 28 | 29 | features = prepare_features(ride) 30 | pred = predict(features) 31 | 32 | result = { 33 | 'duration': pred 34 | } 35 | 36 | return jsonify(result) 37 | 38 | 39 | if __name__ == "__main__": 40 | app.run(debug=True, host='0.0.0.0', port=9696) -------------------------------------------------------------------------------- /01-intro/meta.json: -------------------------------------------------------------------------------- 1 | { 2 | "module": { 3 | "number": 1, 4 | "title": "Introduction" 5 | }, 6 | "units": [ 7 | { 8 | "number": 1, 9 | "title": "Introduction", 10 | "youtube": "https://www.youtube.com/watch?v=s0uaFZSzwfI" 11 | }, 12 | { 13 | "number": 2, 14 | "title": "Environment preparation", 15 | "youtube": "https://www.youtube.com/watch?v=IXSiYkP23zo" 16 | }, 17 | { 18 | "number": 3, 19 | "title": "(Optional) Training a ride duration prediction model", 20 | "youtube": "https://www.youtube.com/watch?v=iRunifGSHFc" 21 | }, 22 | { 23 | "number": 4, 24 | "title": "Course overview", 25 | "youtube": "https://www.youtube.com/watch?v=teP9KWkP6SM" 26 | }, 27 | { 28 | "number": 5, 29 | "title": "MLOps maturity model", 30 | "youtube": "https://www.youtube.com/watch?v=XwTH8BDGzYk" 31 | }, 32 | { 33 | "number": 6, 34 | "title": "Homework", 35 | "youtube": "" 36 | } 37 | ] 38 | } -------------------------------------------------------------------------------- /05-monitoring/evidently_service/config/grafana_dashboards.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: 1 2 | 3 | providers: 4 | # an unique provider name. Required 5 | - name: 'Evidently Dashboards' 6 | # Org id. Default to 1 7 | orgId: 1 8 | # name of the dashboard folder. 9 | folder: '' 10 | # folder UID. will be automatically generated if not specified 11 | folderUid: '' 12 | # provider type. Default to 'file' 13 | type: file 14 | # disable dashboard deletion 15 | disableDeletion: false 16 | # how often Grafana will scan for changed dashboards 17 | updateIntervalSeconds: 10 18 | # allow updating provisioned dashboards from the UI 19 | allowUiUpdates: false 20 | options: 21 | # path to dashboard files on disk. Required when using the 'file' type 22 | path: /opt/grafana/dashboards 23 | # use folder names from filesystem to create folders in Grafana 24 | foldersFromFilesStructure: true 25 | -------------------------------------------------------------------------------- /06-best-practices/code/integraton-test/test_docker.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=duplicate-code 2 | 3 | import json 4 | 5 | import requests 6 | from deepdiff import DeepDiff 7 | 8 | with open('event.json', 'rt', encoding='utf-8') as f_in: 9 | event = json.load(f_in) 10 | 11 | 12 | url = 'http://localhost:8080/2015-03-31/functions/function/invocations' 13 | actual_response = requests.post(url, json=event).json() 14 | print('actual response:') 15 | 16 | print(json.dumps(actual_response, indent=2)) 17 | 18 | expected_response = { 19 | 'predictions': [ 20 | { 21 | 'model': 'ride_duration_prediction_model', 22 | 'version': 'Test123', 23 | 'prediction': { 24 | 'ride_duration': 21.3, 25 | 'ride_id': 256, 26 | }, 27 | } 28 | ] 29 | } 30 | 31 | 32 | diff = DeepDiff(actual_response, expected_response, significant_digits=1) 33 | print(f'diff={diff}') 34 | 35 | assert 'type_changes' not in diff 36 | assert 'values_changed' not in diff 37 | -------------------------------------------------------------------------------- /05-monitoring/homework/prefect-monitoring/monitor_profile.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 48, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from pymongo import MongoClient\n", 10 | "import pprint" 11 | ] 12 | } 13 | ], 14 | "metadata": { 15 | "kernelspec": { 16 | "display_name": "Python 3.9.12 ('prediction_service_practice-b8Zbdkaa')", 17 | "language": "python", 18 | "name": "python3" 19 | }, 20 | "language_info": { 21 | "codemirror_mode": { 22 | "name": "ipython", 23 | "version": 3 24 | }, 25 | "file_extension": ".py", 26 | "mimetype": "text/x-python", 27 | "name": "python", 28 | "nbconvert_exporter": "python", 29 | "pygments_lexer": "ipython3", 30 | "version": "3.9.12" 31 | }, 32 | "orig_nbformat": 4, 33 | "vscode": { 34 | "interpreter": { 35 | "hash": "63df8a96dcc14a3f8fc6f13bb4daf95ac616547a440980d0dc62a5d5ed05a07e" 36 | } 37 | } 38 | }, 39 | "nbformat": 4, 40 | "nbformat_minor": 2 41 | } 42 | -------------------------------------------------------------------------------- /02-experiment-tracking/homework/train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import pickle 4 | 5 | from sklearn.ensemble import RandomForestRegressor 6 | from sklearn.metrics import mean_squared_error 7 | 8 | 9 | def load_pickle(filename: str): 10 | with open(filename, "rb") as f_in: 11 | return pickle.load(f_in) 12 | 13 | 14 | def run(data_path): 15 | 16 | X_train, y_train = load_pickle(os.path.join(data_path, "train.pkl")) 17 | X_valid, y_valid = load_pickle(os.path.join(data_path, "valid.pkl")) 18 | 19 | rf = RandomForestRegressor(max_depth=10, random_state=0) 20 | rf.fit(X_train, y_train) 21 | y_pred = rf.predict(X_valid) 22 | 23 | rmse = mean_squared_error(y_valid, y_pred, squared=False) 24 | 25 | 26 | if __name__ == '__main__': 27 | 28 | parser = argparse.ArgumentParser() 29 | parser.add_argument( 30 | "--data_path", 31 | default="./output", 32 | help="the location where the processed NYC taxi trip data was saved." 33 | ) 34 | args = parser.parse_args() 35 | 36 | run(args.data_path) 37 | -------------------------------------------------------------------------------- /05-monitoring/send_data.py: -------------------------------------------------------------------------------- 1 | import json 2 | import uuid 3 | from datetime import datetime 4 | from time import sleep 5 | 6 | import pyarrow.parquet as pq 7 | import requests 8 | 9 | table = pq.read_table("green_tripdata_2022-01.parquet") 10 | data = table.to_pylist() 11 | 12 | 13 | class DateTimeEncoder(json.JSONEncoder): 14 | def default(self, o): 15 | if isinstance(o, datetime): 16 | return o.isoformat() 17 | return json.JSONEncoder.default(self, o) 18 | 19 | 20 | with open("target.csv", 'w') as f_target: 21 | for row in data: 22 | row['id'] = str(uuid.uuid4()) 23 | duration = (row['lpep_dropoff_datetime'] - row['lpep_pickup_datetime']).total_seconds() / 60 24 | if duration != 0.0: 25 | f_target.write(f"{row['id']},{duration}\n") 26 | resp = requests.post("http://127.0.0.1:9696/predict", 27 | headers={"Content-Type": "application/json"}, 28 | data=json.dumps(row, cls=DateTimeEncoder)).json() 29 | print(f"prediction: {resp['duration']}") 30 | sleep(1) 31 | -------------------------------------------------------------------------------- /after-sign-up.md: -------------------------------------------------------------------------------- 1 | ## Thank you! 2 | 3 | Thanks for signining up for the course. 4 | 5 | The process of adding you to the mailing list is not automated yet, 6 | but you will hear from us closer to the course start. 7 | 8 | To make sure you don't miss any announcements 9 | 10 | - Register in [DataTalks.Club's Slack](https://datatalks.club/slack.html) and join the [`#course-mlops-zoomcamp`](https://app.slack.com/client/T01ATQK62F8/C02R98X7DS9) channel 11 | - Join the [course Telegram channel with announcements](https://t.me/dtc_courses) 12 | - [Tweet about the course!](https://ctt.ac/fH67W) 13 | - Subscribe to [DataTalks.Club's YouTube channel](https://www.youtube.com/c/DataTalksClub) and check 14 | [the course playlist](https://www.youtube.com/playlist?list=PL3MmuxUbc_hIUISrluw_A7wDSmfOhErJK) 15 | - [Technical FAQ](https://docs.google.com/document/d/12TlBfhIiKtyBv8RnsoJR6F72bkPDGEvPOItJIxaEzE0/edit) 16 | Subscribe to our [public Google Calendar](https://calendar.google.com/calendar/?cid=M3Jzbmg0ZDA2aHVsY2M1ZjcyNDJtODNyMTRAZ3JvdXAuY2FsZW5kYXIuZ29vZ2xlLmNvbQ) (it works from Desktop only) 17 | 18 | See you in May! 19 | -------------------------------------------------------------------------------- /06-best-practices/code/integraton-test/event.json: -------------------------------------------------------------------------------- 1 | { 2 | "Records": [ 3 | { 4 | "kinesis": { 5 | "kinesisSchemaVersion": "1.0", 6 | "partitionKey": "1", 7 | "sequenceNumber": "49630081666084879290581185630324770398608704880802529282", 8 | "data": "ewogICAgICAgICJyaWRlIjogewogICAgICAgICAgICAiUFVMb2NhdGlvbklEIjogMTMwLAogICAgICAgICAgICAiRE9Mb2NhdGlvbklEIjogMjA1LAogICAgICAgICAgICAidHJpcF9kaXN0YW5jZSI6IDMuNjYKICAgICAgICB9LCAKICAgICAgICAicmlkZV9pZCI6IDI1NgogICAgfQ==", 9 | "approximateArrivalTimestamp": 1654161514.132 10 | }, 11 | "eventSource": "aws:kinesis", 12 | "eventVersion": "1.0", 13 | "eventID": "shardId-000000000000:49630081666084879290581185630324770398608704880802529282", 14 | "eventName": "aws:kinesis:record", 15 | "invokeIdentityArn": "arn:aws:iam::387546586013:role/lambda-kinesis-role", 16 | "awsRegion": "eu-west-1", 17 | "eventSourceARN": "arn:aws:kinesis:eu-west-1:387546586013:stream/ride_events" 18 | } 19 | ] 20 | } 21 | -------------------------------------------------------------------------------- /05-monitoring/meta.json: -------------------------------------------------------------------------------- 1 | { 2 | "module": { 3 | "number": 5, 4 | "title": "Model Monitoring" 5 | }, 6 | "units": [ 7 | { 8 | "number": 1, 9 | "title": "Monitoring for ML-based services", 10 | "youtube": "https://www.youtube.com/watch?v=gMiT11Bp05A" 11 | }, 12 | { 13 | "number": 2, 14 | "title": "Setting up the environment", 15 | "youtube": "https://www.youtube.com/watch?v=VkkpVXW53bo" 16 | }, 17 | { 18 | "number": 3, 19 | "title": "Creating a prediction service and simulating traffic", 20 | "youtube": "https://www.youtube.com/watch?v=umQ3Mo5G1o8" 21 | }, 22 | { 23 | "number": 4, 24 | "title": "Realtime monitoring walktrough (Prometheus, Evidently, Grafana)", 25 | "youtube": "https://www.youtube.com/watch?v=r_m4VFEJ8yY" 26 | }, 27 | { 28 | "number": 5, 29 | "title": "Batch monitoring walktrough (Prefect, MongoDB, Evidently)", 30 | "youtube": "https://www.youtube.com/watch?v=KefdYuue_FE" 31 | }, 32 | { 33 | "number": 6, 34 | "title": "Homework", 35 | "youtube": "" 36 | } 37 | ] 38 | } -------------------------------------------------------------------------------- /04-deployment/web-service-mlflow/predict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | 4 | import mlflow 5 | from flask import Flask, request, jsonify 6 | 7 | 8 | RUN_ID = os.getenv('RUN_ID') 9 | 10 | logged_model = f's3://mlflow-models-alexey/1/{RUN_ID}/artifacts/model' 11 | # logged_model = f'runs:/{RUN_ID}/model' 12 | model = mlflow.pyfunc.load_model(logged_model) 13 | 14 | 15 | def prepare_features(ride): 16 | features = {} 17 | features['PU_DO'] = '%s_%s' % (ride['PULocationID'], ride['DOLocationID']) 18 | features['trip_distance'] = ride['trip_distance'] 19 | return features 20 | 21 | 22 | def predict(features): 23 | preds = model.predict(features) 24 | return float(preds[0]) 25 | 26 | 27 | app = Flask('duration-prediction') 28 | 29 | 30 | @app.route('/predict', methods=['POST']) 31 | def predict_endpoint(): 32 | ride = request.get_json() 33 | 34 | features = prepare_features(ride) 35 | pred = predict(features) 36 | 37 | result = { 38 | 'duration': pred, 39 | 'model_version': RUN_ID 40 | } 41 | 42 | return jsonify(result) 43 | 44 | 45 | if __name__ == "__main__": 46 | app.run(debug=True, host='0.0.0.0', port=9696) 47 | -------------------------------------------------------------------------------- /06-best-practices/code/.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # See https://pre-commit.com for more information 2 | # See https://pre-commit.com/hooks.html for more hooks 3 | repos: 4 | - repo: https://github.com/pre-commit/pre-commit-hooks 5 | rev: v3.2.0 6 | hooks: 7 | - id: trailing-whitespace 8 | - id: end-of-file-fixer 9 | - id: check-yaml 10 | - id: check-added-large-files 11 | - repo: https://github.com/pycqa/isort 12 | rev: 5.10.1 13 | hooks: 14 | - id: isort 15 | name: isort (python) 16 | - repo: https://github.com/psf/black 17 | rev: 22.6.0 18 | hooks: 19 | - id: black 20 | language_version: python3.9 21 | - repo: local 22 | hooks: 23 | - id: pylint 24 | name: pylint 25 | entry: pylint 26 | language: system 27 | types: [python] 28 | args: [ 29 | "-rn", # Only display messages 30 | "-sn", # Don't display the score 31 | "--recursive=y" 32 | ] 33 | - repo: local 34 | hooks: 35 | - id: pytest-check 36 | name: pytest-check 37 | entry: pytest 38 | language: system 39 | pass_filenames: false 40 | always_run: true 41 | args: [ 42 | "tests/" 43 | ] 44 | -------------------------------------------------------------------------------- /05-monitoring/homework/prefect-monitoring/send_data.py: -------------------------------------------------------------------------------- 1 | import json 2 | import uuid 3 | from datetime import datetime 4 | 5 | import pyarrow.parquet as pq 6 | import requests 7 | 8 | table = pq.read_table("../datasets/green_tripdata_2021-05.parquet")\ 9 | .to_pandas()\ 10 | .sample(n=5000, random_state=42) #5000 rows sampled 11 | data = table.copy() 12 | 13 | 14 | class DateTimeEncoder(json.JSONEncoder): 15 | def default(self, o): 16 | if isinstance(o, datetime): 17 | return o.isoformat() 18 | return json.JSONEncoder.default(self, o) 19 | 20 | 21 | with open("target.csv", 'w') as f_target: 22 | for index, row in data.iterrows(): 23 | row['id'] = str(uuid.uuid4()) 24 | duration = (row['lpep_dropoff_datetime'] - row['lpep_pickup_datetime']).total_seconds() / 60 25 | if duration >= 1 and duration <= 60: 26 | f_target.write(f"{row['id']},{duration}\n") 27 | resp = requests.post("http://127.0.0.1:9696/predict-duration", 28 | headers={"Content-Type": "application/json"}, 29 | data=row.to_json()).json() 30 | print(f"prediction: {resp['data']['duration']}") 31 | -------------------------------------------------------------------------------- /03-orchestration/windows.md: -------------------------------------------------------------------------------- 1 | ## Prefect on Windows 2 | 3 | If you use WSL, you should have no problems running Prefect Orion. 4 | 5 | But if you aren't, there is just a slight tweak to installation instructions if you are on Windows. 6 | 7 | You will need to install 2.0b7 (to be released soon). 2.0b7 will officially support Windows. Use this instead of 2.0b5 shows in the lectures. 8 | 9 | ``` 10 | pip install prefect==2.0b7 11 | ``` 12 | 13 | Note that 2.0b5 and 2.0b7 are not compatible because 2.0b7 contains breaking changes. If you run into issues, you can reset the Prefect database by doing: 14 | 15 | ``` 16 | prefect orion database reset 17 | ``` 18 | 19 | This command will clear the data held by Orion. 20 | 21 | ### Docker 22 | 23 | You can also try running Prefect in Docker. For example: 24 | 25 | ``` 26 | docker run -it --rm \ 27 | -p 4200:4200 \ 28 | prefecthq/prefect:2.0b5-python3.8 \ 29 | prefect orion start --host=0.0.0.0 30 | ``` 31 | 32 | and then view it from `localhost:4200`. 33 | 34 | ### Prefect Cloud 35 | 36 | You can also just use Cloud so you don't have to host Prefect Orion yourself. Instructions can be found here: 37 | 38 | https://orion-docs.prefect.io/ui/cloud-getting-started/ 39 | -------------------------------------------------------------------------------- /06-best-practices/code/integraton-test/run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | if [[ -z "${GITHUB_ACTIONS}" ]]; then 4 | cd "$(dirname "$0")" 5 | fi 6 | 7 | if [ "${LOCAL_IMAGE_NAME}" == "" ]; then 8 | LOCAL_TAG=`date +"%Y-%m-%d-%H-%M"` 9 | export LOCAL_IMAGE_NAME="stream-model-duration:${LOCAL_TAG}" 10 | echo "LOCAL_IMAGE_NAME is not set, building a new image with tag ${LOCAL_IMAGE_NAME}" 11 | docker build -t ${LOCAL_IMAGE_NAME} .. 12 | else 13 | echo "no need to build image ${LOCAL_IMAGE_NAME}" 14 | fi 15 | 16 | export PREDICTIONS_STREAM_NAME="ride_predictions" 17 | 18 | docker-compose up -d 19 | 20 | sleep 5 21 | 22 | aws --endpoint-url=http://localhost:4566 \ 23 | kinesis create-stream \ 24 | --stream-name ${PREDICTIONS_STREAM_NAME} \ 25 | --shard-count 1 26 | 27 | pipenv run python test_docker.py 28 | 29 | ERROR_CODE=$? 30 | 31 | if [ ${ERROR_CODE} != 0 ]; then 32 | docker-compose logs 33 | docker-compose down 34 | exit ${ERROR_CODE} 35 | fi 36 | 37 | 38 | pipenv run python test_kinesis.py 39 | 40 | ERROR_CODE=$? 41 | 42 | if [ ${ERROR_CODE} != 0 ]; then 43 | docker-compose logs 44 | docker-compose down 45 | exit ${ERROR_CODE} 46 | fi 47 | 48 | 49 | docker-compose down 50 | -------------------------------------------------------------------------------- /04-deployment/streaming/test.py: -------------------------------------------------------------------------------- 1 | 2 | import lambda_function 3 | 4 | event = { 5 | "Records": [ 6 | { 7 | "kinesis": { 8 | "kinesisSchemaVersion": "1.0", 9 | "partitionKey": "1", 10 | "sequenceNumber": "49630081666084879290581185630324770398608704880802529282", 11 | "data": "ewogICAgICAgICJyaWRlIjogewogICAgICAgICAgICAiUFVMb2NhdGlvbklEIjogMTMwLAogICAgICAgICAgICAiRE9Mb2NhdGlvbklEIjogMjA1LAogICAgICAgICAgICAidHJpcF9kaXN0YW5jZSI6IDMuNjYKICAgICAgICB9LCAKICAgICAgICAicmlkZV9pZCI6IDI1NgogICAgfQ==", 12 | "approximateArrivalTimestamp": 1654161514.132 13 | }, 14 | "eventSource": "aws:kinesis", 15 | "eventVersion": "1.0", 16 | "eventID": "shardId-000000000000:49630081666084879290581185630324770398608704880802529282", 17 | "eventName": "aws:kinesis:record", 18 | "invokeIdentityArn": "arn:aws:iam::387546586013:role/lambda-kinesis-role", 19 | "awsRegion": "eu-west-1", 20 | "eventSourceARN": "arn:aws:kinesis:eu-west-1:387546586013:stream/ride_events" 21 | } 22 | ] 23 | } 24 | 25 | 26 | result = lambda_function.lambda_handler(event, None) 27 | print(result) 28 | -------------------------------------------------------------------------------- /06-best-practices/homework_solution/integration_test.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from datetime import datetime 4 | import pandas as pd 5 | 6 | import batch 7 | 8 | def dt(hour, minute, second=0): 9 | return datetime(2021, 1, 1, hour, minute, second) 10 | 11 | 12 | S3_ENDPOINT_URL = os.getenv('S3_ENDPOINT_URL') 13 | 14 | options = { 15 | 'client_kwargs': { 16 | 'endpoint_url': S3_ENDPOINT_URL 17 | } 18 | } 19 | 20 | data = [ 21 | (None, None, dt(1, 2), dt(1, 10)), 22 | (1, 1, dt(1, 2), dt(1, 10)), 23 | (1, 1, dt(1, 2, 0), dt(1, 2, 50)), 24 | (1, 1, dt(1, 2, 0), dt(2, 2, 1)), 25 | ] 26 | 27 | columns = ['PUlocationID', 'DOlocationID', 'pickup_datetime', 'dropOff_datetime'] 28 | df_input = pd.DataFrame(data, columns=columns) 29 | 30 | 31 | input_file = batch.get_input_path(2021, 1) 32 | output_file = batch.get_output_path(2021, 1) 33 | 34 | df_input.to_parquet( 35 | input_file, 36 | engine='pyarrow', 37 | compression=None, 38 | index=False, 39 | storage_options=options 40 | ) 41 | 42 | 43 | os.system('python batch.py 2021 1') 44 | 45 | 46 | df_actual = pd.read_parquet(output_file, storage_options=options) 47 | 48 | 49 | assert abs(df_actual['predicted_duration'].sum() - 69.28) < 0.1 -------------------------------------------------------------------------------- /06-best-practices/meta.json: -------------------------------------------------------------------------------- 1 | { 2 | "module": { 3 | "number": 6, 4 | "title": "Best Practices" 5 | }, 6 | "units": [ 7 | { 8 | "number": "1", 9 | "title": "Testing Python code with pytest", 10 | "youtube": "https://www.youtube.com/watch?v=CJp1eFQP5nk" 11 | }, 12 | { 13 | "number": "2", 14 | "title": "Integration tests with docker-compose", 15 | "youtube": "https://www.youtube.com/watch?v=lBX0Gl7Z1ck" 16 | }, 17 | { 18 | "number": "3", 19 | "title": "Testing cloud services with LocalStack", 20 | "youtube": "https://www.youtube.com/watch?v=9yMO86SYvuI" 21 | }, 22 | { 23 | "number": "4", 24 | "title": "Code quality: linting and formatting", 25 | "youtube": "https://www.youtube.com/watch?v=uImvWE-iSDQ" 26 | }, 27 | { 28 | "number": "5", 29 | "title": "Git pre-commit hooks", 30 | "youtube": "https://www.youtube.com/watch?v=lmMZ7Axk2T8" 31 | }, 32 | { 33 | "number": "6", 34 | "title": "Makefiles and make", 35 | "youtube": "https://www.youtube.com/watch?v=F6DZdvbRZQQ" 36 | }, 37 | { 38 | "number": "X", 39 | "title": "Homework", 40 | "youtube": "" 41 | } 42 | ] 43 | } -------------------------------------------------------------------------------- /03-orchestration/meta.json: -------------------------------------------------------------------------------- 1 | { 2 | "module": { 3 | "number": 3, 4 | "title": "Orchestration and ML Pipelines" 5 | }, 6 | "units": [ 7 | { 8 | "number": 1, 9 | "title": "Negative engineering and workflow orchestration", 10 | "youtube": "https://www.youtube.com/watch?v=eKzCjNXoCTc" 11 | }, 12 | { 13 | "number": 2, 14 | "title": "Introduction to Prefect 2.0", 15 | "youtube": "https://www.youtube.com/watch?v=Yb6NJwI7bXw" 16 | }, 17 | { 18 | "number": 3, 19 | "title": "First Prefect flow and basics", 20 | "youtube": "https://www.youtube.com/watch?v=MCFpURG506w" 21 | }, 22 | { 23 | "number": 4, 24 | "title": "Remote Prefect Orion deployment", 25 | "youtube": "https://www.youtube.com/watch?v=ComkSIAB0k4" 26 | }, 27 | { 28 | "number": 5, 29 | "title": "Deployment of Prefect flow", 30 | "youtube": "https://www.youtube.com/watch?v=xw9JfaWPPps" 31 | }, 32 | { 33 | "number": 6, 34 | "title": "MLOps Zoomcamp 3.6 - (Optional) Work queues and agents", 35 | "youtube": "https://www.youtube.com/watch?v=oDSf0ThKsso" 36 | }, 37 | { 38 | "number": 7, 39 | "title": "Homework", 40 | "youtube": "" 41 | } 42 | ] 43 | } -------------------------------------------------------------------------------- /06-best-practices/homework_solution/tests/test_batch.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | import pandas as pd 4 | 5 | import batch 6 | 7 | 8 | def dt(hour, minute, second=0): 9 | return datetime(2021, 1, 1, hour, minute, second) 10 | 11 | 12 | def test_prepare_data(): 13 | data = [ 14 | (None, None, dt(1, 2), dt(1, 10)), 15 | (1, 1, dt(1, 2), dt(1, 10)), 16 | (1, 1, dt(1, 2, 0), dt(1, 2, 50)), 17 | (1, 1, dt(1, 2, 0), dt(2, 2, 1)), 18 | ] 19 | 20 | categorical = ['PUlocationID', 'DOlocationID'] 21 | columns = ['PUlocationID', 'DOlocationID', 'pickup_datetime', 'dropOff_datetime'] 22 | df = pd.DataFrame(data, columns=columns) 23 | 24 | df_actual = batch.prepare_data(df, categorical) 25 | 26 | data_expected = [ 27 | ('-1', '-1', 8.0), 28 | ( '1', '1', 8.0), 29 | ] 30 | 31 | columns_test = ['PUlocationID', 'DOlocationID', 'duration'] 32 | df_expected = pd.DataFrame(data_expected, columns=columns_test) 33 | print(df_actual) 34 | 35 | assert (df_actual['PUlocationID'] == df_expected['PUlocationID']).all() 36 | assert (df_actual['DOlocationID'] == df_expected['DOlocationID']).all() 37 | assert (df_actual['duration'] - df_expected['duration']).abs().sum() < 0.0000001 38 | 39 | 40 | -------------------------------------------------------------------------------- /04-deployment/streaming/test_docker.py: -------------------------------------------------------------------------------- 1 | import requests 2 | 3 | event = { 4 | "Records": [ 5 | { 6 | "kinesis": { 7 | "kinesisSchemaVersion": "1.0", 8 | "partitionKey": "1", 9 | "sequenceNumber": "49630081666084879290581185630324770398608704880802529282", 10 | "data": "ewogICAgICAgICJyaWRlIjogewogICAgICAgICAgICAiUFVMb2NhdGlvbklEIjogMTMwLAogICAgICAgICAgICAiRE9Mb2NhdGlvbklEIjogMjA1LAogICAgICAgICAgICAidHJpcF9kaXN0YW5jZSI6IDMuNjYKICAgICAgICB9LCAKICAgICAgICAicmlkZV9pZCI6IDI1NgogICAgfQ==", 11 | "approximateArrivalTimestamp": 1654161514.132 12 | }, 13 | "eventSource": "aws:kinesis", 14 | "eventVersion": "1.0", 15 | "eventID": "shardId-000000000000:49630081666084879290581185630324770398608704880802529282", 16 | "eventName": "aws:kinesis:record", 17 | "invokeIdentityArn": "arn:aws:iam::387546586013:role/lambda-kinesis-role", 18 | "awsRegion": "eu-west-1", 19 | "eventSourceARN": "arn:aws:kinesis:eu-west-1:387546586013:stream/ride_events" 20 | } 21 | ] 22 | } 23 | 24 | 25 | url = 'http://localhost:8080/2015-03-31/functions/function/invocations' 26 | response = requests.post(url, json=event) 27 | print(response.json()) 28 | -------------------------------------------------------------------------------- /asking-questions.md: -------------------------------------------------------------------------------- 1 | ## Asking questions 2 | 3 | If you have any questions, ask them 4 | in the [`#course-mlops-zoomcamp`](https://app.slack.com/client/T01ATQK62F8/C02R98X7DS9) channel in [DataTalks.Club](https://datatalks.club) slack. 5 | 6 | To keep our discussion in Slack more organized, we ask you to follow these suggestions: 7 | 8 | * Before asking a question, check [FAQ](https://docs.google.com/document/d/12TlBfhIiKtyBv8RnsoJR6F72bkPDGEvPOItJIxaEzE0/edit). 9 | * Use threads. When you have a problem, first describe the problem shortly 10 | and then put the actual error in the thread - so it doesn't take the entire screen. 11 | * Instead of screenshots, it's better to copy-paste the error you're getting in text. 12 | Use ` ``` ` for formatting your code. 13 | It's very difficult to read text from screenshots. 14 | * Please don't take pictures of your code with a phone. It's even harder to read. Follow the previous suggestion, 15 | and in rare cases when you need to show what happens on your screen, take a screenshot. 16 | * You don't need to tag the instructors when you have a problem. We will see it eventually. 17 | * If somebody helped you with your problem and it's not in [FAQ](https://docs.google.com/document/d/12TlBfhIiKtyBv8RnsoJR6F72bkPDGEvPOItJIxaEzE0/edit), please add it there. 18 | It'll help other students. 19 | -------------------------------------------------------------------------------- /06-best-practices/code/scripts/deploy_manual.sh: -------------------------------------------------------------------------------- 1 | AWS_REGION="eu-west-1" 2 | 3 | # Dynamically generated by TF 4 | export MODEL_BUCKET_PROD="stg-mlflow-models-code-owners-mlops-zoomcamp" 5 | export PREDICTIONS_STREAM_NAME="stg_ride_predictions-mlops-zoomcamp" 6 | export LAMBDA_FUNCTION="stg_prediction_lambda_mlops-zoomcamp" 7 | 8 | # Model artifacts bucket from the previous weeks (MLflow experiments) 9 | export MODEL_BUCKET_DEV="mlflow-models-alexey" 10 | 11 | # Get latest RUN_ID from latest S3 partition. 12 | # NOT FOR PRODUCTION! 13 | # In practice, this is generally picked up from your experiment tracking tool such as MLflow or DVC 14 | export RUN_ID=$(aws s3api list-objects-v2 --bucket ${MODEL_BUCKET_DEV} \ 15 | --query 'sort_by(Contents, &LastModified)[-1].Key' --output=text | cut -f2 -d/) 16 | 17 | # NOT FOR PRODUCTION! 18 | # Just mocking the artifacts from training process in the Prod env 19 | aws s3 sync s3://${MODEL_BUCKET_DEV} s3://${MODEL_BUCKET_PROD} 20 | 21 | # Set new var RUN_ID in existing set of vars. 22 | variables="{PREDICTIONS_STREAM_NAME=${PREDICTIONS_STREAM_NAME}, MODEL_BUCKET=${MODEL_BUCKET_PROD}, RUN_ID=${RUN_ID}}" 23 | 24 | # https://docs.aws.amazon.com/lambda/latest/dg/configuration-envvars.html 25 | aws lambda update-function-configuration --function-name ${LAMBDA_FUNCTION} --environment "Variables=${variables}" 26 | -------------------------------------------------------------------------------- /04-deployment/meta.json: -------------------------------------------------------------------------------- 1 | { 2 | "module": { 3 | "number": 4, 4 | "title": "Model Deployment" 5 | }, 6 | "units": [ 7 | { 8 | "number": 1, 9 | "title": "Three ways of deploying a model", 10 | "youtube": "https://www.youtube.com/watch?v=JMGe4yIoBRA" 11 | }, 12 | { 13 | "number": 2, 14 | "title": "Web-services: Deploying models with Flask and Docker", 15 | "youtube": "https://www.youtube.com/watch?v=D7wfMAdgdF8" 16 | }, 17 | { 18 | "number": 3, 19 | "title": "Web-services: Getting the models from the model registry (MLflow)", 20 | "youtube": "https://www.youtube.com/watch?v=aewOpHSCkqI" 21 | }, 22 | { 23 | "number": 4, 24 | "title": "(Optional) Streaming: Deploying models with Kinesis and Lambda ", 25 | "youtube": "https://www.youtube.com/watch?v=TCqr9HNcrsI" 26 | }, 27 | { 28 | "number": 5, 29 | "title": "Batch: Preparing a scoring script", 30 | "youtube": "https://www.youtube.com/watch?v=18Lbaaeigek" 31 | }, 32 | { 33 | "number": 6, 34 | "title": "MLOps Zoomcamp 4.6 - Batch: Scheduling batch scoring jobs with Prefect", 35 | "youtube": "https://www.youtube.com/watch?v=ekT_JW213Tc" 36 | }, 37 | { 38 | "number": 7, 39 | "title": "Homework", 40 | "youtube": "" 41 | } 42 | ] 43 | } -------------------------------------------------------------------------------- /02-experiment-tracking/meta.json: -------------------------------------------------------------------------------- 1 | { 2 | "module": { 3 | "number": 2, 4 | "title": "Experiment tracking and model management" 5 | }, 6 | "units": [ 7 | { 8 | "number": 1, 9 | "title": "Experiment tracking intro", 10 | "youtube": "https://www.youtube.com/watch?v=MiA7LQin9c8" 11 | }, 12 | { 13 | "number": 2, 14 | "title": "Getting started with MLflow", 15 | "youtube": "https://www.youtube.com/watch?v=cESCQE9J3ZE" 16 | }, 17 | { 18 | "number": 3, 19 | "title": "Experiment tracking with MLflow", 20 | "youtube": "https://www.youtube.com/watch?v=iaJz-T7VWec" 21 | }, 22 | { 23 | "number": 4, 24 | "title": "Model management", 25 | "youtube": "https://www.youtube.com/watch?v=OVUPIX88q88" 26 | }, 27 | { 28 | "number": 5, 29 | "title": "Model registry", 30 | "youtube": "https://www.youtube.com/watch?v=TKHU7HAvGH8" 31 | }, 32 | { 33 | "number": 6, 34 | "title": "MLflow in practice", 35 | "youtube": "https://www.youtube.com/watch?v=1ykg4YmbFVA" 36 | }, 37 | { 38 | "number": 7, 39 | "title": "MLflow: benefits, limitations and alternatives", 40 | "youtube": "https://www.youtube.com/watch?v=Lugy1JPsBRY" 41 | }, 42 | { 43 | "number": 8, 44 | "title": "Homework", 45 | "youtube": "" 46 | } 47 | ] 48 | } -------------------------------------------------------------------------------- /06-best-practices/code/infrastructure/modules/lambda/main.tf: -------------------------------------------------------------------------------- 1 | resource "aws_lambda_function" "kinesis_lambda" { 2 | function_name = var.lambda_function_name 3 | # This can also be any base image to bootstrap the lambda config, unrelated to your Inference service on ECR 4 | # which would be anyway updated regularly via a CI/CD pipeline 5 | image_uri = var.image_uri # required-argument 6 | package_type = "Image" 7 | role = aws_iam_role.iam_lambda.arn 8 | tracing_config { 9 | mode = "Active" 10 | } 11 | // This step is optional (environment) 12 | environment { 13 | variables = { 14 | PREDICTIONS_STREAM_NAME = var.output_stream_name 15 | MODEL_BUCKET = var.model_bucket 16 | } 17 | } 18 | timeout = 180 19 | } 20 | 21 | # Lambda Invoke & Event Source Mapping: 22 | 23 | resource "aws_lambda_function_event_invoke_config" "kinesis_lambda_event" { 24 | function_name = aws_lambda_function.kinesis_lambda.function_name 25 | maximum_event_age_in_seconds = 60 26 | maximum_retry_attempts = 0 27 | } 28 | 29 | resource "aws_lambda_event_source_mapping" "kinesis_mapping" { 30 | event_source_arn = var.source_stream_arn 31 | function_name = aws_lambda_function.kinesis_lambda.arn 32 | starting_position = "LATEST" 33 | depends_on = [ 34 | aws_iam_role_policy_attachment.kinesis_processing 35 | ] 36 | // enabled = var.lambda_event_source_mapping_enabled 37 | // batch_size = var.lambda_event_source_mapping_batch_size 38 | } 39 | -------------------------------------------------------------------------------- /06-best-practices/code/integraton-test/test_kinesis.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=duplicate-code 2 | 3 | import os 4 | import json 5 | from pprint import pprint 6 | 7 | import boto3 8 | from deepdiff import DeepDiff 9 | 10 | kinesis_endpoint = os.getenv('KINESIS_ENDPOINT_URL', "http://localhost:4566") 11 | kinesis_client = boto3.client('kinesis', endpoint_url=kinesis_endpoint) 12 | 13 | stream_name = os.getenv('PREDICTIONS_STREAM_NAME', 'ride_predictions') 14 | shard_id = 'shardId-000000000000' 15 | 16 | 17 | shard_iterator_response = kinesis_client.get_shard_iterator( 18 | StreamName=stream_name, 19 | ShardId=shard_id, 20 | ShardIteratorType='TRIM_HORIZON', 21 | ) 22 | 23 | shard_iterator_id = shard_iterator_response['ShardIterator'] 24 | 25 | 26 | records_response = kinesis_client.get_records( 27 | ShardIterator=shard_iterator_id, 28 | Limit=1, 29 | ) 30 | 31 | 32 | records = records_response['Records'] 33 | pprint(records) 34 | 35 | 36 | assert len(records) == 1 37 | 38 | 39 | actual_record = json.loads(records[0]['Data']) 40 | pprint(actual_record) 41 | 42 | expected_record = { 43 | 'model': 'ride_duration_prediction_model', 44 | 'version': 'Test123', 45 | 'prediction': { 46 | 'ride_duration': 21.3, 47 | 'ride_id': 256, 48 | }, 49 | } 50 | 51 | diff = DeepDiff(actual_record, expected_record, significant_digits=1) 52 | print(f'diff={diff}') 53 | 54 | assert 'values_changed' not in diff 55 | assert 'type_changes' not in diff 56 | 57 | 58 | print('all good') 59 | -------------------------------------------------------------------------------- /06-best-practices/homework/batch.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | import sys 5 | import pickle 6 | import pandas as pd 7 | 8 | 9 | year = int(sys.argv[1]) 10 | month = int(sys.argv[2]) 11 | 12 | input_file = f'https://raw.githubusercontent.com/alexeygrigorev/datasets/master/nyc-tlc/fhv/fhv_tripdata_{year:04d}-{month:02d}.parquet' 13 | output_file = f's3://nyc-duration-prediction-alexey/taxi_type=fhv/year={year:04d}/month={month:02d}/predictions.parquet' 14 | 15 | 16 | with open('model.bin', 'rb') as f_in: 17 | dv, lr = pickle.load(f_in) 18 | 19 | 20 | categorical = ['PUlocationID', 'DOlocationID'] 21 | 22 | def read_data(filename): 23 | df = pd.read_parquet(filename) 24 | 25 | df['duration'] = df.dropOff_datetime - df.pickup_datetime 26 | df['duration'] = df.duration.dt.total_seconds() / 60 27 | 28 | df = df[(df.duration >= 1) & (df.duration <= 60)].copy() 29 | 30 | df[categorical] = df[categorical].fillna(-1).astype('int').astype('str') 31 | 32 | return df 33 | 34 | 35 | df = read_data(input_file) 36 | df['ride_id'] = f'{year:04d}/{month:02d}_' + df.index.astype('str') 37 | 38 | 39 | dicts = df[categorical].to_dict(orient='records') 40 | X_val = dv.transform(dicts) 41 | y_pred = lr.predict(X_val) 42 | 43 | 44 | print('predicted mean duration:', y_pred.mean()) 45 | 46 | 47 | df_result = pd.DataFrame() 48 | df_result['ride_id'] = df['ride_id'] 49 | df_result['predicted_duration'] = y_pred 50 | 51 | df_result.to_parquet(output_file, engine='pyarrow', index=False) -------------------------------------------------------------------------------- /04-deployment/homework/batch.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | import sys 5 | import pickle 6 | import pandas as pd 7 | 8 | 9 | year = int(sys.argv[1]) # 2021 10 | month = int(sys.argv[2]) #2 11 | 12 | input_file = f's3://nyc-tlc/trip data/fhv_tripdata_{year:04d}-{month:02d}.parquet' 13 | output_file = f's3://nyc-duration-prediction-alexey/taxi_type=fhv/year={year:04d}/month={month:02d}/predictions.parquet' 14 | 15 | 16 | with open('model.bin', 'rb') as f_in: 17 | dv, lr = pickle.load(f_in) 18 | 19 | 20 | categorical = ['PUlocationID', 'DOlocationID'] 21 | 22 | def read_data(filename): 23 | df = pd.read_parquet(filename) 24 | 25 | df['duration'] = df.dropOff_datetime - df.pickup_datetime 26 | df['duration'] = df.duration.dt.total_seconds() / 60 27 | 28 | df = df[(df.duration >= 1) & (df.duration <= 60)].copy() 29 | 30 | df[categorical] = df[categorical].fillna(-1).astype('int').astype('str') 31 | 32 | return df 33 | 34 | 35 | df = read_data(input_file) 36 | df['ride_id'] = f'{year:04d}/{month:02d}_' + df.index.astype('str') 37 | 38 | 39 | dicts = df[categorical].to_dict(orient='records') 40 | X_val = dv.transform(dicts) 41 | y_pred = lr.predict(X_val) 42 | 43 | 44 | print('predicted mean duration:', y_pred.mean()) 45 | 46 | 47 | df_result = pd.DataFrame() 48 | df_result['ride_id'] = df['ride_id'] 49 | df_result['predicted_duration'] = y_pred 50 | 51 | 52 | df_result.to_parquet( 53 | output_file, 54 | engine='pyarrow', 55 | compression=None, 56 | index=False 57 | ) -------------------------------------------------------------------------------- /05-monitoring/prediction_service/app.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | 4 | import requests 5 | from flask import Flask 6 | from flask import request 7 | from flask import jsonify 8 | 9 | from pymongo import MongoClient 10 | 11 | 12 | MODEL_FILE = os.getenv('MODEL_FILE', 'lin_reg.bin') 13 | 14 | EVIDENTLY_SERVICE_ADDRESS = os.getenv('EVIDENTLY_SERVICE', 'http://127.0.0.1:5000') 15 | MONGODB_ADDRESS = os.getenv("MONGODB_ADDRESS", "mongodb://127.0.0.1:27017") 16 | 17 | with open(MODEL_FILE, 'rb') as f_in: 18 | dv, model = pickle.load(f_in) 19 | 20 | 21 | app = Flask('duration') 22 | mongo_client = MongoClient(MONGODB_ADDRESS) 23 | db = mongo_client.get_database("prediction_service") 24 | collection = db.get_collection("data") 25 | 26 | 27 | @app.route('/predict', methods=['POST']) 28 | def predict(): 29 | record = request.get_json() 30 | 31 | record['PU_DO'] = '%s_%s' % (record['PULocationID'], record['DOLocationID']) 32 | 33 | X = dv.transform([record]) 34 | y_pred = model.predict(X) 35 | 36 | result = { 37 | 'duration': float(y_pred), 38 | } 39 | 40 | save_to_db(record, float(y_pred)) 41 | send_to_evidently_service(record, float(y_pred)) 42 | return jsonify(result) 43 | 44 | 45 | def save_to_db(record, prediction): 46 | rec = record.copy() 47 | rec['prediction'] = prediction 48 | collection.insert_one(rec) 49 | 50 | 51 | def send_to_evidently_service(record, prediction): 52 | rec = record.copy() 53 | rec['prediction'] = prediction 54 | requests.post(f"{EVIDENTLY_SERVICE_ADDRESS}/iterate/taxi", json=[rec]) 55 | 56 | 57 | if __name__ == "__main__": 58 | app.run(debug=True, host='0.0.0.0', port=9696) -------------------------------------------------------------------------------- /06-best-practices/code/infrastructure/modules/ecr/main.tf: -------------------------------------------------------------------------------- 1 | resource "aws_ecr_repository" "repo" { 2 | name = var.ecr_repo_name 3 | image_tag_mutability = "MUTABLE" 4 | 5 | image_scanning_configuration { 6 | scan_on_push = false 7 | } 8 | 9 | force_delete = true 10 | } 11 | 12 | # In practice, the Image build-and-push step is handled separately by the CI/CD pipeline and not the IaC script. 13 | # But because the lambda config would fail without an existing Image URI in ECR, 14 | # we can also upload any base image to bootstrap the lambda config, unrelated to your Inference logic 15 | resource null_resource ecr_image { 16 | triggers = { 17 | python_file = md5(file(var.lambda_function_local_path)) 18 | docker_file = md5(file(var.docker_image_local_path)) 19 | } 20 | 21 | provisioner "local-exec" { 22 | command = <` to any timeseries scraped from this config. 30 | 31 | - job_name: 'prometheus' 32 | 33 | # Override the global default and scrape targets from this job every 5 seconds. 34 | scrape_interval: 5s 35 | 36 | static_configs: 37 | - targets: ['localhost:9090'] 38 | 39 | 40 | # - job_name: 'cadvisor' 41 | # 42 | # # Override the global default and scrape targets from this job every 5 seconds. 43 | # scrape_interval: 5s 44 | # 45 | # dns_sd_configs: 46 | # - names: 47 | # - 'tasks.cadvisor' 48 | # type: 'A' 49 | # port: 8080 50 | 51 | # static_configs: 52 | # - targets: ['cadvisor:8080'] 53 | 54 | # - job_name: 'node-exporter' 55 | # 56 | # # Override the global default and scrape targets from this job every 5 seconds. 57 | # scrape_interval: 5s 58 | # 59 | # dns_sd_configs: 60 | # - names: 61 | # - 'tasks.node-exporter' 62 | # type: 'A' 63 | # port: 9100 64 | 65 | # - job_name: 'pushgateway' 66 | # scrape_interval: 10s 67 | # dns_\sd_configs: 68 | # - names: 69 | # - 'tasks.pushgateway' 70 | # type: 'A' 71 | # port: 9091 72 | 73 | # static_configs: 74 | # - targets: ['node-exporter:9100'] 75 | - job_name: 'service' 76 | scrape_interval: 10s 77 | static_configs: 78 | - targets: ['evidently_service.:8085'] -------------------------------------------------------------------------------- /05-monitoring/homework/model_training.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | import pandas as pd 4 | import pyarrow.parquet as pq 5 | from sklearn.feature_extraction import DictVectorizer 6 | from sklearn.linear_model import LinearRegression 7 | 8 | 9 | def read_dataframe(filename): 10 | df = pq.read_table(filename).to_pandas() 11 | 12 | df.lpep_dropoff_datetime = pd.to_datetime(df.lpep_dropoff_datetime) 13 | df.lpep_pickup_datetime = pd.to_datetime(df.lpep_pickup_datetime) 14 | 15 | df['duration'] = df.lpep_dropoff_datetime - df.lpep_pickup_datetime 16 | df.duration = df.duration.apply(lambda td: td.total_seconds() / 60) 17 | 18 | df = df[(df.duration >= 1) & (df.duration <= 60)] 19 | 20 | categorical = ['PULocationID', 'DOLocationID'] 21 | df[categorical] = df[categorical].astype(str) 22 | 23 | return df 24 | 25 | def add_features(train_data="./datasets/green_tripdata_2021-03.parquet", 26 | additional_training_data=None): 27 | df_train = read_dataframe(train_data) 28 | 29 | if additional_training_data: 30 | extra_data = read_dataframe(additional_training_data) 31 | df_train = pd.concat([df_train, extra_data], axis=0, ignore_index=True) 32 | 33 | 34 | 35 | df_train['PU_DO'] = df_train['PULocationID'] + '_' + df_train['DOLocationID'] 36 | 37 | categorical = ['PU_DO'] 38 | numerical = ['trip_distance'] 39 | 40 | dv = DictVectorizer() 41 | 42 | train_dicts = df_train[categorical + numerical].to_dict(orient='records') 43 | X_train = dv.fit_transform(train_dicts) 44 | 45 | target = 'duration' 46 | y_train = df_train[target].values 47 | 48 | return X_train, y_train, dv 49 | 50 | 51 | 52 | 53 | if __name__ == "__main__": 54 | X_train, y_train, dv = add_features() 55 | 56 | print("Training model with one month of data") 57 | lr = LinearRegression() 58 | lr.fit(X_train, y_train) 59 | 60 | 61 | with open('prediction_service/lin_reg.bin', 'wb') as f_out: 62 | pickle.dump((dv, lr), f_out) 63 | 64 | X_train, y_train, dv = add_features(additional_training_data="./datasets/green_tripdata_2021-04.parquet") 65 | print("Training model with two months of data") 66 | lr = LinearRegression() 67 | lr.fit(X_train, y_train) 68 | 69 | with open('prediction_service/lin_reg_V2.bin', 'wb') as f_out: 70 | pickle.dump((dv, lr), f_out) 71 | -------------------------------------------------------------------------------- /06-best-practices/code/infrastructure/main.tf: -------------------------------------------------------------------------------- 1 | # Make sure to create state bucket beforehand 2 | terraform { 3 | required_version = ">= 1.0" 4 | backend "s3" { 5 | bucket = "tf-state-mlops-zoomcamp" 6 | key = "mlops-zoomcamp-stg.tfstate" 7 | region = "eu-west-1" 8 | encrypt = true 9 | } 10 | } 11 | 12 | provider "aws" { 13 | region = var.aws_region 14 | } 15 | 16 | data "aws_caller_identity" "current_identity" {} 17 | 18 | locals { 19 | account_id = data.aws_caller_identity.current_identity.account_id 20 | } 21 | 22 | # ride_events 23 | module "source_kinesis_stream" { 24 | source = "./modules/kinesis" 25 | retention_period = 48 26 | shard_count = 2 27 | stream_name = "${var.source_stream_name}-${var.project_id}" 28 | tags = var.project_id 29 | } 30 | 31 | # ride_predictions 32 | module "output_kinesis_stream" { 33 | source = "./modules/kinesis" 34 | retention_period = 48 35 | shard_count = 2 36 | stream_name = "${var.output_stream_name}-${var.project_id}" 37 | tags = var.project_id 38 | } 39 | 40 | # model bucket 41 | module "s3_bucket" { 42 | source = "./modules/s3" 43 | bucket_name = "${var.model_bucket}-${var.project_id}" 44 | } 45 | 46 | # image registry 47 | module "ecr_image" { 48 | source = "./modules/ecr" 49 | ecr_repo_name = "${var.ecr_repo_name}_${var.project_id}" 50 | account_id = local.account_id 51 | lambda_function_local_path = var.lambda_function_local_path 52 | docker_image_local_path = var.docker_image_local_path 53 | } 54 | 55 | module "lambda_function" { 56 | source = "./modules/lambda" 57 | image_uri = module.ecr_image.image_uri 58 | lambda_function_name = "${var.lambda_function_name}_${var.project_id}" 59 | model_bucket = module.s3_bucket.name 60 | output_stream_name = "${var.output_stream_name}-${var.project_id}" 61 | output_stream_arn = module.output_kinesis_stream.stream_arn 62 | source_stream_name = "${var.source_stream_name}-${var.project_id}" 63 | source_stream_arn = module.source_kinesis_stream.stream_arn 64 | } 65 | 66 | # For CI/CD 67 | output "lambda_function" { 68 | value = "${var.lambda_function_name}_${var.project_id}" 69 | } 70 | 71 | output "model_bucket" { 72 | value = module.s3_bucket.name 73 | } 74 | 75 | output "predictions_stream_name" { 76 | value = "${var.output_stream_name}-${var.project_id}" 77 | } 78 | 79 | output "ecr_repo" { 80 | value = "${var.ecr_repo_name}_${var.project_id}" 81 | } 82 | -------------------------------------------------------------------------------- /03-orchestration/homework.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | from sklearn.feature_extraction import DictVectorizer 4 | from sklearn.linear_model import LinearRegression 5 | from sklearn.metrics import mean_squared_error 6 | 7 | def read_data(path): 8 | df = pd.read_parquet(path) 9 | return df 10 | 11 | def prepare_features(df, categorical, train=True): 12 | df['duration'] = df.dropOff_datetime - df.pickup_datetime 13 | df['duration'] = df.duration.dt.total_seconds() / 60 14 | df = df[(df.duration >= 1) & (df.duration <= 60)].copy() 15 | 16 | mean_duration = df.duration.mean() 17 | if train: 18 | print(f"The mean duration of training is {mean_duration}") 19 | else: 20 | print(f"The mean duration of validation is {mean_duration}") 21 | 22 | df[categorical] = df[categorical].fillna(-1).astype('int').astype('str') 23 | return df 24 | 25 | def train_model(df, categorical): 26 | 27 | train_dicts = df[categorical].to_dict(orient='records') 28 | dv = DictVectorizer() 29 | X_train = dv.fit_transform(train_dicts) 30 | y_train = df.duration.values 31 | 32 | print(f"The shape of X_train is {X_train.shape}") 33 | print(f"The DictVectorizer has {len(dv.feature_names_)} features") 34 | 35 | lr = LinearRegression() 36 | lr.fit(X_train, y_train) 37 | y_pred = lr.predict(X_train) 38 | mse = mean_squared_error(y_train, y_pred, squared=False) 39 | print(f"The MSE of training is: {mse}") 40 | return lr, dv 41 | 42 | def run_model(df, categorical, dv, lr): 43 | val_dicts = df[categorical].to_dict(orient='records') 44 | X_val = dv.transform(val_dicts) 45 | y_pred = lr.predict(X_val) 46 | y_val = df.duration.values 47 | 48 | mse = mean_squared_error(y_val, y_pred, squared=False) 49 | print(f"The MSE of validation is: {mse}") 50 | return 51 | 52 | def main(train_path: str = './data/fhv_tripdata_2021-01.parquet', 53 | val_path: str = './data/fhv_tripdata_2021-02.parquet'): 54 | 55 | categorical = ['PUlocationID', 'DOlocationID'] 56 | 57 | df_train = read_data(train_path) 58 | df_train_processed = prepare_features(df_train, categorical) 59 | 60 | df_val = read_data(val_path) 61 | df_val_processed = prepare_features(df_val, categorical, False) 62 | 63 | # train the model 64 | lr, dv = train_model(df_train_processed, categorical) 65 | run_model(df_val_processed, categorical, dv, lr) 66 | 67 | main() 68 | -------------------------------------------------------------------------------- /05-monitoring/homework/prediction_service/app.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import pickle 4 | import uuid 5 | 6 | from flask import Flask, jsonify, request 7 | from pymongo import MongoClient 8 | 9 | 10 | MONGO_ADDRESS = os.getenv("MONGO_ADDRESS", "mongodb://localhost:27017/") 11 | MONGO_DATABASE = os.getenv("MONGO_DATABASE", "ride_prediction") 12 | LOGGED_MODEL = os.getenv("MODEL_FILE", "lin_reg.bin") 13 | MODEL_VERSION = os.getenv("MODEL_VERSION", "1") 14 | 15 | with open(LOGGED_MODEL, 'rb') as f_in: 16 | dv, model = pickle.load(f_in) 17 | 18 | 19 | mongo_client = MongoClient(MONGO_ADDRESS) 20 | mongo_db = mongo_client[MONGO_DATABASE] 21 | mongo_collection = mongo_db.get_collection("data") 22 | 23 | 24 | app = Flask("Ride-Prediction-Service") 25 | logging.basicConfig(level=logging.INFO) 26 | 27 | 28 | def prepare_features(ride): 29 | """Function to prepare features before making prediction""" 30 | 31 | record = ride.copy() 32 | record['PU_DO'] = '%s_%s' % (record['PULocationID'], record['DOLocationID']) 33 | 34 | features = dv.transform([record]) 35 | 36 | return features, record 37 | 38 | 39 | def save_db(record, pred_result): 40 | """Save data to mongo db collection""" 41 | 42 | rec = record.copy() 43 | rec["prediction"] = pred_result[0] 44 | mongo_collection.insert_one(rec) 45 | 46 | 47 | 48 | @app.route("/", methods=["GET"]) 49 | def get_info(): 50 | """Function to provide info about the app""" 51 | info = """

Ride Prediction Service

52 |
53 |

Data Request Example

54 |
55 |

"ride = { 56 | "PULocationID": 10, 57 | "DOLocationID": 50, 58 | "trip_distance": 40 59 | }" 60 |

61 |
62 |
""" 63 | return info 64 | 65 | @app.route("/predict-duration", methods=["POST"]) 66 | def predict_duration(): 67 | """Function to predict duration""" 68 | 69 | ride = request.get_json() 70 | features, record = prepare_features(ride) 71 | 72 | prediction = model.predict(features) 73 | ride_id = str(uuid.uuid4()) 74 | pred_data = { 75 | "ride_id": ride_id, 76 | "PU_DO": record["PU_DO"], 77 | "trip_distance": record["trip_distance"], 78 | "status": 200, 79 | "duration": prediction[0], 80 | "model_version": MODEL_VERSION 81 | } 82 | 83 | save_db(record, prediction) 84 | 85 | result = { 86 | "statusCode": 200, 87 | "data" : pred_data 88 | } 89 | 90 | return jsonify(result) 91 | 92 | 93 | if __name__ == "__main__": 94 | app.run(debug=True, host="0.0.0.0", port=9696) 95 | -------------------------------------------------------------------------------- /06-best-practices/code/tests/model_test.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import model 4 | 5 | 6 | def read_text(file): 7 | test_directory = Path(__file__).parent 8 | 9 | with open(test_directory / file, 'rt', encoding='utf-8') as f_in: 10 | return f_in.read().strip() 11 | 12 | 13 | def test_base64_decode(): 14 | base64_input = read_text('data.b64') 15 | 16 | actual_result = model.base64_decode(base64_input) 17 | expected_result = { 18 | "ride": { 19 | "PULocationID": 130, 20 | "DOLocationID": 205, 21 | "trip_distance": 3.66, 22 | }, 23 | "ride_id": 256, 24 | } 25 | 26 | assert actual_result == expected_result 27 | 28 | 29 | def test_prepare_features(): 30 | model_service = model.ModelService(None) 31 | 32 | ride = { 33 | "PULocationID": 130, 34 | "DOLocationID": 205, 35 | "trip_distance": 3.66, 36 | } 37 | 38 | actual_features = model_service.prepare_features(ride) 39 | 40 | expected_fetures = { 41 | "PU_DO": "130_205", 42 | "trip_distance": 3.66, 43 | } 44 | 45 | assert actual_features == expected_fetures 46 | 47 | 48 | class ModelMock: 49 | def __init__(self, value): 50 | self.value = value 51 | 52 | def predict(self, X): 53 | n = len(X) 54 | return [self.value] * n 55 | 56 | 57 | def test_predict(): 58 | model_mock = ModelMock(10.0) 59 | model_service = model.ModelService(model_mock) 60 | 61 | features = { 62 | "PU_DO": "130_205", 63 | "trip_distance": 3.66, 64 | } 65 | 66 | actual_prediction = model_service.predict(features) 67 | expected_prediction = 10.0 68 | 69 | assert actual_prediction == expected_prediction 70 | 71 | 72 | def test_lambda_handler(): 73 | model_mock = ModelMock(10.0) 74 | model_version = 'Test123' 75 | model_service = model.ModelService(model_mock, model_version) 76 | 77 | base64_input = read_text('data.b64') 78 | 79 | event = { 80 | "Records": [ 81 | { 82 | "kinesis": { 83 | "data": base64_input, 84 | }, 85 | } 86 | ] 87 | } 88 | 89 | actual_predictions = model_service.lambda_handler(event) 90 | expected_predictions = { 91 | 'predictions': [ 92 | { 93 | 'model': 'ride_duration_prediction_model', 94 | 'version': model_version, 95 | 'prediction': { 96 | 'ride_duration': 10.0, 97 | 'ride_id': 256, 98 | }, 99 | } 100 | ] 101 | } 102 | 103 | assert actual_predictions == expected_predictions 104 | -------------------------------------------------------------------------------- /02-experiment-tracking/homework/preprocess_data.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import pickle 4 | 5 | import pandas as pd 6 | from sklearn.feature_extraction import DictVectorizer 7 | 8 | 9 | def dump_pickle(obj, filename): 10 | with open(filename, "wb") as f_out: 11 | return pickle.dump(obj, f_out) 12 | 13 | 14 | def read_dataframe(filename: str): 15 | df = pd.read_parquet(filename) 16 | 17 | df['duration'] = df.lpep_dropoff_datetime - df.lpep_pickup_datetime 18 | df.duration = df.duration.apply(lambda td: td.total_seconds() / 60) 19 | df = df[(df.duration >= 1) & (df.duration <= 60)] 20 | 21 | categorical = ['PULocationID', 'DOLocationID'] 22 | df[categorical] = df[categorical].astype(str) 23 | 24 | return df 25 | 26 | 27 | def preprocess(df: pd.DataFrame, dv: DictVectorizer, fit_dv: bool = False): 28 | df['PU_DO'] = df['PULocationID'] + '_' + df['DOLocationID'] 29 | categorical = ['PU_DO'] 30 | numerical = ['trip_distance'] 31 | dicts = df[categorical + numerical].to_dict(orient='records') 32 | if fit_dv: 33 | X = dv.fit_transform(dicts) 34 | else: 35 | X = dv.transform(dicts) 36 | return X, dv 37 | 38 | 39 | def run(raw_data_path: str, dest_path: str, dataset: str = "green"): 40 | # load parquet files 41 | df_train = read_dataframe( 42 | os.path.join(raw_data_path, f"{dataset}_tripdata_2021-01.parquet") 43 | ) 44 | df_valid = read_dataframe( 45 | os.path.join(raw_data_path, f"{dataset}_tripdata_2021-02.parquet") 46 | ) 47 | df_test = read_dataframe( 48 | os.path.join(raw_data_path, f"{dataset}_tripdata_2021-03.parquet") 49 | ) 50 | 51 | # extract the target 52 | target = 'duration' 53 | y_train = df_train[target].values 54 | y_valid = df_valid[target].values 55 | y_test = df_test[target].values 56 | 57 | # fit the dictvectorizer and preprocess data 58 | dv = DictVectorizer() 59 | X_train, dv = preprocess(df_train, dv, fit_dv=True) 60 | X_valid, _ = preprocess(df_valid, dv, fit_dv=False) 61 | X_test, _ = preprocess(df_test, dv, fit_dv=False) 62 | 63 | # create dest_path folder unless it already exists 64 | os.makedirs(dest_path, exist_ok=True) 65 | 66 | # save dictvectorizer and datasets 67 | dump_pickle(dv, os.path.join(dest_path, "dv.pkl")) 68 | dump_pickle((X_train, y_train), os.path.join(dest_path, "train.pkl")) 69 | dump_pickle((X_valid, y_valid), os.path.join(dest_path, "valid.pkl")) 70 | dump_pickle((X_test, y_test), os.path.join(dest_path, "test.pkl")) 71 | 72 | 73 | if __name__ == '__main__': 74 | 75 | parser = argparse.ArgumentParser() 76 | parser.add_argument( 77 | "--raw_data_path", 78 | help="the location where the raw NYC taxi trip data was saved" 79 | ) 80 | parser.add_argument( 81 | "--dest_path", 82 | help="the location where the resulting files will be saved." 83 | ) 84 | args = parser.parse_args() 85 | 86 | run(args.raw_data_path, args.dest_path) 87 | -------------------------------------------------------------------------------- /06-best-practices/homework_solution/batch.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | import os 5 | import sys 6 | import pickle 7 | import pandas as pd 8 | 9 | 10 | def prepare_data(df, categorical): 11 | df['duration'] = df.dropOff_datetime - df.pickup_datetime 12 | df['duration'] = df.duration.dt.total_seconds() / 60 13 | 14 | df = df[(df.duration >= 1) & (df.duration <= 60)].copy() 15 | 16 | df[categorical] = df[categorical].fillna(-1).astype('int').astype('str') 17 | return df 18 | 19 | 20 | def read_data(filename, categorical): 21 | S3_ENDPOINT_URL = os.getenv('S3_ENDPOINT_URL') 22 | 23 | if S3_ENDPOINT_URL is not None: 24 | options = { 25 | 'client_kwargs': { 26 | 'endpoint_url': S3_ENDPOINT_URL 27 | } 28 | } 29 | 30 | df = pd.read_parquet(filename, storage_options=options) 31 | else: 32 | df = pd.read_parquet(filename) 33 | 34 | return prepare_data(df, categorical) 35 | 36 | 37 | def write_date(filename, df): 38 | S3_ENDPOINT_URL = os.getenv('S3_ENDPOINT_URL') 39 | 40 | if S3_ENDPOINT_URL is not None: 41 | options = { 42 | 'client_kwargs': { 43 | 'endpoint_url': S3_ENDPOINT_URL 44 | } 45 | } 46 | 47 | df.to_parquet(filename, engine='pyarrow', index=False, storage_options=options) 48 | else: 49 | df.to_parquet(filename, engine='pyarrow', index=False) 50 | 51 | 52 | def get_input_path(year, month): 53 | default_input_pattern = 'https://raw.githubusercontent.com/alexeygrigorev/datasets/master/nyc-tlc/fhv/fhv_tripdata_{year:04d}-{month:02d}.parquet' 54 | input_pattern = os.getenv('INPUT_FILE_PATTERN', default_input_pattern) 55 | return input_pattern.format(year=year, month=month) 56 | 57 | 58 | def get_output_path(year, month): 59 | default_output_pattern = 's3://nyc-duration-prediction-alexey/taxi_type=fhv/year={year:04d}/month={month:02d}/predictions.parquet' 60 | output_pattern = os.getenv('OUTPUT_FILE_PATTERN', default_output_pattern) 61 | return output_pattern.format(year=year, month=month) 62 | 63 | 64 | def main(year, month): 65 | input_file = get_input_path(year, month) 66 | output_file = get_output_path(year, month) 67 | 68 | with open('model.bin', 'rb') as f_in: 69 | dv, lr = pickle.load(f_in) 70 | 71 | categorical = ['PUlocationID', 'DOlocationID'] 72 | 73 | df = read_data(input_file, categorical) 74 | df['ride_id'] = f'{year:04d}/{month:02d}_' + df.index.astype('str') 75 | 76 | dicts = df[categorical].to_dict(orient='records') 77 | X_val = dv.transform(dicts) 78 | y_pred = lr.predict(X_val) 79 | 80 | print('predicted mean duration:', y_pred.mean()) 81 | 82 | df_result = pd.DataFrame() 83 | df_result['ride_id'] = df['ride_id'] 84 | df_result['predicted_duration'] = y_pred 85 | 86 | write_date(output_file, df_result) 87 | 88 | 89 | 90 | if __name__ == '__main__': 91 | year = int(sys.argv[1]) 92 | month = int(sys.argv[2]) 93 | main(year, month) -------------------------------------------------------------------------------- /01-intro/homework.md: -------------------------------------------------------------------------------- 1 | ## 1.6 Homework 2 | 3 | The goal of this homework is to train a simple model for predicting the duration of a ride - similar to what we did in this module. 4 | 5 | 6 | ## Q1. Downloading the data 7 | 8 | We'll use [the same NYC taxi dataset](https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page), 9 | but instead of "Green Taxi Trip Records", we'll use "For-Hire Vehicle Trip Records". 10 | 11 | Download the data for January and February 2021. 12 | 13 | Note that you need "For-Hire Vehicle Trip Records", not "High Volume For-Hire Vehicle Trip Records". 14 | 15 | Read the data for January. How many records are there? 16 | 17 | * 1054112 18 | * 1154112 19 | * 1254112 20 | * 1354112 21 | 22 | 23 | ## Q2. Computing duration 24 | 25 | Now let's compute the `duration` variable. It should contain the duration of a ride in minutes. 26 | 27 | What's the average trip duration in January? 28 | 29 | * 15.16 30 | * 19.16 31 | * 24.16 32 | * 29.16 33 | 34 | ## Data preparation 35 | 36 | Check the distribution of the duration variable. There are some outliers. 37 | 38 | Let's remove them and keep only the records where the duration was between 1 and 60 minutes (inclusive). 39 | 40 | How many records did you drop? 41 | 42 | ## Q3. Missing values 43 | 44 | The features we'll use for our model are the pickup and dropoff location IDs. 45 | 46 | But they have a lot of missing values there. Let's replace them with "-1". 47 | 48 | What's the fractions of missing values for the pickup location ID? I.e. fraction of "-1"s after you filled the NAs. 49 | 50 | * 53% 51 | * 63% 52 | * 73% 53 | * 83% 54 | 55 | ## Q4. One-hot encoding 56 | 57 | Let's apply one-hot encoding to the pickup and dropoff location IDs. We'll use only these two features for our model. 58 | 59 | * Turn the dataframe into a list of dictionaries 60 | * Fit a dictionary vectorizer 61 | * Get a feature matrix from it 62 | 63 | What's the dimensionality of this matrix? (The number of columns). 64 | 65 | * 2 66 | * 152 67 | * 352 68 | * 525 69 | * 725 70 | 71 | ## Q5. Training a model 72 | 73 | Now let's use the feature matrix from the previous step to train a model. 74 | 75 | * Train a plain linear regression model with default parameters 76 | * Calculate the RMSE of the model on the training data 77 | 78 | What's the RMSE on train? 79 | 80 | * 5.52 81 | * 10.52 82 | * 15.52 83 | * 20.52 84 | 85 | 86 | ## Q6. Evaluating the model 87 | 88 | Now let's apply this model to the validation dataset (Feb 2021). 89 | 90 | What's the RMSE on validation? 91 | 92 | * 6.01 93 | * 11.01 94 | * 16.01 95 | * 21.01 96 | 97 | ## Submit the results 98 | 99 | Submit your results here: https://forms.gle/V8q5rv7QRoZ13Sft6 100 | 101 | It's possible that your answers won't match exactly. If it's the case, select the closest one. 102 | 103 | 104 | ## Deadline 105 | 106 | The deadline for submitting is 24 May 2022 (Tuesday) 23:00 CET. After that, the form will be closed. 107 | 108 | 109 | ## Solution 110 | 111 | * [Video](https://www.youtube.com/watch?v=feH1PMLyu-Q&list=PL3MmuxUbc_hIUISrluw_A7wDSmfOhErJK&index=9) 112 | * [Notebook](homework.ipynb) 113 | -------------------------------------------------------------------------------- /02-experiment-tracking/homework/register_model.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import pickle 4 | 5 | import mlflow 6 | from hyperopt import hp, space_eval 7 | from hyperopt.pyll import scope 8 | from mlflow.entities import ViewType 9 | from mlflow.tracking import MlflowClient 10 | from sklearn.ensemble import RandomForestRegressor 11 | from sklearn.metrics import mean_squared_error 12 | 13 | HPO_EXPERIMENT_NAME = "random-forest-hyperopt" 14 | EXPERIMENT_NAME = "random-forest-best-models" 15 | 16 | mlflow.set_tracking_uri("http://127.0.0.1:5000") 17 | mlflow.set_experiment(EXPERIMENT_NAME) 18 | mlflow.sklearn.autolog() 19 | 20 | SPACE = { 21 | 'max_depth': scope.int(hp.quniform('max_depth', 1, 20, 1)), 22 | 'n_estimators': scope.int(hp.quniform('n_estimators', 10, 50, 1)), 23 | 'min_samples_split': scope.int(hp.quniform('min_samples_split', 2, 10, 1)), 24 | 'min_samples_leaf': scope.int(hp.quniform('min_samples_leaf', 1, 4, 1)), 25 | 'random_state': 42 26 | } 27 | 28 | 29 | def load_pickle(filename): 30 | with open(filename, "rb") as f_in: 31 | return pickle.load(f_in) 32 | 33 | 34 | def train_and_log_model(data_path, params): 35 | X_train, y_train = load_pickle(os.path.join(data_path, "train.pkl")) 36 | X_valid, y_valid = load_pickle(os.path.join(data_path, "valid.pkl")) 37 | X_test, y_test = load_pickle(os.path.join(data_path, "test.pkl")) 38 | 39 | with mlflow.start_run(): 40 | params = space_eval(SPACE, params) 41 | rf = RandomForestRegressor(**params) 42 | rf.fit(X_train, y_train) 43 | 44 | # evaluate model on the validation and test sets 45 | valid_rmse = mean_squared_error(y_valid, rf.predict(X_valid), squared=False) 46 | mlflow.log_metric("valid_rmse", valid_rmse) 47 | test_rmse = mean_squared_error(y_test, rf.predict(X_test), squared=False) 48 | mlflow.log_metric("test_rmse", test_rmse) 49 | 50 | 51 | def run(data_path, log_top): 52 | 53 | client = MlflowClient() 54 | 55 | # retrieve the top_n model runs and log the models to MLflow 56 | experiment = client.get_experiment_by_name(HPO_EXPERIMENT_NAME) 57 | runs = client.search_runs( 58 | experiment_ids=experiment.experiment_id, 59 | run_view_type=ViewType.ACTIVE_ONLY, 60 | max_results=log_top, 61 | order_by=["metrics.rmse ASC"] 62 | ) 63 | for run in runs: 64 | train_and_log_model(data_path=data_path, params=run.data.params) 65 | 66 | # select the model with the lowest test RMSE 67 | experiment = client.get_experiment_by_name(EXPERIMENT_NAME) 68 | # best_run = client.search_runs( ... )[0] 69 | 70 | # register the best model 71 | # mlflow.register_model( ... ) 72 | 73 | 74 | if __name__ == '__main__': 75 | 76 | parser = argparse.ArgumentParser() 77 | parser.add_argument( 78 | "--data_path", 79 | default="./output", 80 | help="the location where the processed NYC taxi trip data was saved." 81 | ) 82 | parser.add_argument( 83 | "--top_n", 84 | default=5, 85 | type=int, 86 | help="the top 'top_n' models will be evaluated to decide which model to promote." 87 | ) 88 | args = parser.parse_args() 89 | 90 | run(args.data_path, args.top_n) 91 | -------------------------------------------------------------------------------- /04-deployment/README.md: -------------------------------------------------------------------------------- 1 | # 4. Model Deployment 2 | 3 | ## 4.1 Three ways of deploying a model 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | ## 4.2 Web-services: Deploying models with Flask and Docker 12 | 13 | 14 | 15 | 16 | 17 | 18 | [See code here](web-service/) 19 | 20 | 21 | ## 4.3 Web-services: Getting the models from the model registry (MLflow) 22 | 23 | 24 | 25 | 26 | 27 | 28 | [See code here](web-service-mlflow/) 29 | 30 | 31 | ## 4.4 (Optional) Streaming: Deploying models with Kinesis and Lambda 32 | 33 | 34 | 35 | 36 | 37 | 38 | [See code here](streaming/) 39 | 40 | 41 | ## 4.5 Batch: Preparing a scoring script 42 | 43 | 44 | 45 | 46 | 47 | 48 | [See code here](batch/) 49 | 50 | 51 | ## 4.6 MLOps Zoomcamp 4.6 - Batch: Scheduling batch scoring jobs with Prefect 52 | 53 | **Note:** There are several changes to deployment in Prefect 2.3.1 since 2.0b8: 54 | - `DeploymentSpec` in 2.0b8 now becomes `Deployment`. 55 | - `work_queue_name` is used instead of `tags` to submit the deployment to the a specific work queue. 56 | - You don't need to create a work queue before using the work queue. A work queue will be created if it doesn't exist. 57 | - `flow_location` is replaced with `flow` 58 | - `flow_runner` and `flow_storage` are no longer supported 59 | 60 | ```python 61 | from prefect.deployments import Deployment 62 | from prefect.orion.schemas.schedules import CronSchedule 63 | from score import ride_duration_prediction 64 | 65 | deployment = Deployment.build_from_flow( 66 | flow=ride_duration_prediction, 67 | name="ride_duration_prediction", 68 | parameters={ 69 | "taxi_type": "green", 70 | "run_id": "e1efc53e9bd149078b0c12aeaa6365df", 71 | }, 72 | schedule=CronSchedule(cron="0 3 2 * *"), 73 | work_queue_name="ml", 74 | ) 75 | 76 | deployment.apply() 77 | ``` 78 | 79 | 80 | 81 | 82 | 83 | ## 4.7 Choosing the right way of deployment 84 | 85 | COMING SOON 86 | 87 | 88 | ## 4.8 Homework 89 | 90 | More information here: [homework.md](homework.md) 91 | 92 | 93 | ## Notes 94 | 95 | Did you take notes? Add them here: 96 | 97 | * [Notes on model deployment (+ creating a modeling package) by Ron M.](https://particle1331.github.io/inefficient-networks/notebooks/mlops/04-deployment/notes.html) 98 | * [Notes on Model Deployment using Google Cloud Platform, by M. Ayoub C.](https://gist.github.com/Qfl3x/de2a9b98a370749a4b17a4c94ef46185) 99 | * [Week4: Notes on Model Deployment by Bhagabat](https://github.com/BPrasad123/MLOps_Zoomcamp/tree/main/Week4) 100 | * [Week 4: Deployment notes by Ayoub.B](https://github.com/ayoub-berdeddouch/mlops-journey/blob/main/deployment-04.md) 101 | * Send a PR, add your notes above this line 102 | -------------------------------------------------------------------------------- /05-monitoring/prefect_example.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import pickle 4 | 5 | import pandas 6 | from prefect import flow, task 7 | from pymongo import MongoClient 8 | import pyarrow.parquet as pq 9 | 10 | from evidently import ColumnMapping 11 | 12 | from evidently.dashboard import Dashboard 13 | from evidently.dashboard.tabs import DataDriftTab,RegressionPerformanceTab 14 | 15 | from evidently.model_profile import Profile 16 | from evidently.model_profile.sections import DataDriftProfileSection, RegressionPerformanceProfileSection 17 | 18 | 19 | @task 20 | def upload_target(filename): 21 | client = MongoClient("mongodb://localhost:27018/") 22 | collection = client.get_database("prediction_service").get_collection("data") 23 | with open(filename) as f_target: 24 | for line in f_target.readlines(): 25 | row = line.split(",") 26 | collection.update_one({"id": row[0]}, {"$set": {"target": float(row[1])}}) 27 | client.close() 28 | 29 | 30 | @task 31 | def load_reference_data(filename): 32 | MODEL_FILE = os.getenv('MODEL_FILE', './prediction_service/lin_reg.bin') 33 | with open(MODEL_FILE, 'rb') as f_in: 34 | dv, model = pickle.load(f_in) 35 | reference_data = pq.read_table(filename).to_pandas() 36 | # Create features 37 | reference_data['PU_DO'] = reference_data['PULocationID'].astype(str) + "_" + reference_data['DOLocationID'].astype(str) 38 | 39 | # add target column 40 | reference_data['target'] = reference_data.lpep_dropoff_datetime - reference_data.lpep_pickup_datetime 41 | reference_data.target = reference_data.target.apply(lambda td: td.total_seconds() / 60) 42 | reference_data = reference_data[(reference_data.target >= 1) & (reference_data.target <= 60)] 43 | features = ['PU_DO', 'PULocationID', 'DOLocationID', 'trip_distance'] 44 | x_pred = dv.transform(reference_data[features].to_dict(orient='records')) 45 | reference_data['prediction'] = model.predict(x_pred) 46 | return reference_data 47 | 48 | 49 | @task 50 | def fetch_data(): 51 | client = MongoClient("mongodb://localhost:27018/") 52 | data = client.get_database("prediction_service").get_collection("data").find() 53 | df = pandas.DataFrame(list(data)) 54 | return df 55 | 56 | 57 | @task 58 | def run_evidently(ref_data, data): 59 | ref_data.drop('ehail_fee', axis=1, inplace=True) 60 | data.drop('ehail_fee', axis=1, inplace=True) # drop empty column (until Evidently will work with it properly) 61 | profile = Profile(sections=[DataDriftProfileSection(), RegressionPerformanceProfileSection()]) 62 | mapping = ColumnMapping(prediction="prediction", numerical_features=['trip_distance'], 63 | categorical_features=['PULocationID', 'DOLocationID'], 64 | datetime_features=[]) 65 | profile.calculate(ref_data, data, mapping) 66 | 67 | dashboard = Dashboard(tabs=[DataDriftTab(), RegressionPerformanceTab(verbose_level=0)]) 68 | dashboard.calculate(ref_data, data, mapping) 69 | return json.loads(profile.json()), dashboard 70 | 71 | 72 | @task 73 | def save_report(result): 74 | client = MongoClient("mongodb://localhost:27018/") 75 | client.get_database("prediction_service").get_collection("report").insert_one(result[0]) 76 | 77 | 78 | @task 79 | def save_html_report(result): 80 | result[1].save("evidently_report_example.html") 81 | 82 | 83 | @flow 84 | def batch_analyze(): 85 | upload_target("target.csv") 86 | ref_data = load_reference_data("./evidently_service/datasets/green_tripdata_2021-01.parquet") 87 | data = fetch_data() 88 | result = run_evidently(ref_data, data) 89 | save_report(result) 90 | save_html_report(result) 91 | 92 | batch_analyze() 93 | -------------------------------------------------------------------------------- /02-experiment-tracking/README.md: -------------------------------------------------------------------------------- 1 | # 2. Experiment tracking and model management 2 | 3 | 4 | * [Slides](https://drive.google.com/file/d/1YtkAtOQS3wvY7yts_nosVlXrLQBq5q37/view?usp=sharing) 5 | 6 | 7 | ## 2.1 Experiment tracking intro 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | ## 2.2 Getting started with MLflow 16 | 17 | 18 | 19 | 20 | 21 | Note: in the videos, Cristian uses Jupyter in VS code and runs everything locally 22 | 23 | But if you set up a VM in the previous module, you can keep using it 24 | and use the usual Jupyter from your browser. There's no significant 25 | difference between using Jupyter with VS code and without 26 | 27 | 28 | ## 2.3 Experiment tracking with MLflow 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | ## 2.4 Model management 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | ## 2.5 Model registry 45 | 46 | 47 | 48 | 49 | 50 | 51 | ## 2.6 MLflow in practice 52 | 53 | 54 | 55 | 56 | 57 | 58 | ## 2.7 MLflow: benefits, limitations and alternatives 59 | 60 | 61 | 62 | 63 | 64 | 65 | ## 2.7 Homework 66 | 67 | More information here: [homework.md](homework.md) 68 | 69 | 70 | ## Notes 71 | 72 | Did you take notes? Add them here: 73 | 74 | * [Notes/General Docs on MLflow by Ayoub](https://gist.github.com/Qfl3x/ccff6b0708358c040e437d52af0c2e43) 75 | * [Minimalist MLflow code reference by Anna V](https://github.com/annnvv/mlops_zoomcamp/blob/main/notes/module2_notes_MLflow.md) 76 | * [Notes from second lesson by Neimv](https://gitlab.com/neimv/mlops/-/blob/main/lessons_weeks/notes_2.md) 77 | * [2nd Week Experiment & Tracking notes by Ayoub.B](https://github.com/ayoub-berdeddouch/mlops-journey/blob/main/experiment_tracking_02.md) 78 | * [Notes on Experiment Tracking with MLflow (Jupyter Book) by particle1331](https://particle1331.github.io/inefficient-networks/notebooks/mlops/2-mlflow/2-mlflow.html) 79 | * [Week 2: Experiment & Tracking Notes by Bengsoon Chuah](https://github.com/bengsoon/mlops-zoomcamp/blob/main/02-experiment-tracking/notes/Experiment_Tracking_notes.md) 80 | * [2.4 Model Management Notes by Alvaro Pena](https://github.com/alvarofps/mlops-zoomcamp/blob/main/02-experiment-tracking/my-notes/2.4%20Model%20management.md) 81 | * [Notes by Alvaro Navas](https://github.com/ziritrion/mlopszoomcamp/blob/main/notes/2_experiment.md) 82 | * [Notebook from froukje](https://github.com/froukje/ml-ops-zoomcamp/blob/master/02-experiment-tracking/week02.ipynb) and [notes](https://medium.com/@falbrechtg/getting-started-with-mlflow-tracking-46a0089d6a73) 83 | * [Blog post on setting up MLFlow on GCP by Isaac Kargar](https://kargarisaac.github.io/blog/mlops/data%20engineering/2022/06/15/MLFlow-on-GCP.html). 84 | * [Week2: Experiment tracking notes and notebook by Bhagabat](https://github.com/BPrasad123/MLOps_Zoomcamp/tree/main/Week2) 85 | * [Notes of ML-flow by Jaime Cabrera-Salcedo](https://github.com/jaimeh94/MLOps-Zoomcamp/tree/main/02-experiment-tracking) 86 | * Send a PR, add your notes above this line 87 | -------------------------------------------------------------------------------- /05-monitoring/README.md: -------------------------------------------------------------------------------- 1 | # 5. Model Monitoring 2 | 3 | ## 5.1 Monitoring for ML-based services 4 | 5 | 6 | 7 | 8 | 9 | 10 | [Slides](https://drive.google.com/file/d/1wcMU75ZcNNJie4ELjsKPkITIL93wHykt/view?usp=sharing) 11 | 12 | 13 | ## 5.2 Setting up the environment 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | ## 5.3 Creating a prediction service and simulating traffic 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | ## 5.4 Realtime monitoring walktrough (Prometheus, Evidently, Grafana) 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | ## 5.5 Batch monitoring walktrough (Prefect, MongoDB, Evidently) 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | ## 5.6 Homework 46 | 47 | More information here: [homework.md](homework.md). Please also give us feedback for this week videos using 48 | [this form](https://forms.gle/fb7dJKjyb1oeNeAz9) 49 | 50 | 51 | ## Notes 52 | 53 | Did you take notes? Add them here: 54 | 55 | * [Week 5 notes by M. Ayoub C.](https://gist.github.com/Qfl3x/aa6b1bec35fb645ded0371c46e8aafd1) 56 | * [week 5: Monitoring notes Ayoub.B](https://github.com/ayoub-berdeddouch/mlops-journey/blob/main/monitoring-05.md) 57 | * Send a PR, add your notes above this line 58 | 59 | 60 | 61 | # Monitoring example 62 | 63 | ## Prerequisites 64 | 65 | You need following tools installed: 66 | - `docker` 67 | - `docker-compose` (included to Docker Desktop for Mac and Docker Desktop for Windows ) 68 | 69 | ## Preparation 70 | 71 | Note: all actions expected to be executed in repo folder. 72 | 73 | - Create virtual environment and activate it (eg. `python -m venv venv && source ./venv/bin/activate`) 74 | - Install required packages `pip install -r requirements.txt` 75 | - Run `python prepare.py` for downloading datasets 76 | 77 | ## Monitoring Example 78 | 79 | ### Starting services 80 | 81 | To start all required services, execute: 82 | ```bash 83 | docker-compose up 84 | ``` 85 | 86 | It will start following services: 87 | - `prometheus` - TSDB for metrics 88 | - `grafana` - Visual tool for metrics 89 | - `mongo` - MongoDB, for storing raw data, predictions, targets and profile reports 90 | - `evidently_service` - Evindently RT-monitoring service (draft example) 91 | - `prediction_service` - main service, which makes predictions 92 | 93 | ### Sending data 94 | 95 | To start sending data to service, execute: 96 | ```bash 97 | python send_data.py 98 | ``` 99 | 100 | This script will send every second single row from dataset to prediction service along with creating file `target.csv` with actual results (so it can be loaded after) 101 | 102 | ## Batch Monitoring Example 103 | 104 | After you stop sending data to service, you can run batch monitoring pipeline (using Prefect) by running script: 105 | 106 | ```bash 107 | python prefect_example.py 108 | ``` 109 | 110 | This script will: 111 | - load `target.csv` to MongoDB 112 | - download dataset from MongoDB 113 | - Run Evidently Model Profile and Evidently Report on this data 114 | - Save Profile data back to MongoDB 115 | - Save Report to `evidently_report_example.html` 116 | 117 | You can look at Prefect steps in Prefect Orion UI 118 | (to start it execute `prefect orion start`) 119 | -------------------------------------------------------------------------------- /03-orchestration/homework_solution.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | from sklearn.feature_extraction import DictVectorizer 4 | from sklearn.linear_model import LinearRegression 5 | from sklearn.metrics import mean_squared_error 6 | 7 | from prefect import task, flow, get_run_logger 8 | from datetime import datetime 9 | import pickle 10 | 11 | @task 12 | def read_data(path): 13 | df = pd.read_parquet(path) 14 | return df 15 | 16 | @task 17 | def prepare_features(df, categorical, train=True): 18 | logger = get_run_logger() 19 | df['duration'] = df.dropOff_datetime - df.pickup_datetime 20 | df['duration'] = df.duration.dt.total_seconds() / 60 21 | df = df[(df.duration >= 1) & (df.duration <= 60)].copy() 22 | 23 | mean_duration = df.duration.mean() 24 | if train: 25 | logger.info(f"The mean duration of training is {mean_duration}") 26 | else: 27 | logger.info(f"The mean duration of validation is {mean_duration}") 28 | 29 | df[categorical] = df[categorical].fillna(-1).astype('int').astype('str') 30 | return df 31 | 32 | @task 33 | def train_model(df, categorical): 34 | logger = get_run_logger() 35 | train_dicts = df[categorical].to_dict(orient='records') 36 | dv = DictVectorizer() 37 | X_train = dv.fit_transform(train_dicts) 38 | y_train = df.duration.values 39 | 40 | logger.info(f"The shape of X_train is {X_train.shape}") 41 | logger.info(f"The DictVectorizer has {len(dv.feature_names_)} features") 42 | 43 | lr = LinearRegression() 44 | lr.fit(X_train, y_train) 45 | y_pred = lr.predict(X_train) 46 | mse = mean_squared_error(y_train, y_pred, squared=False) 47 | logger.info(f"The MSE of training is: {mse}") 48 | return lr, dv 49 | 50 | @task 51 | def run_model(df, categorical, dv, lr): 52 | logger = get_run_logger() 53 | val_dicts = df[categorical].to_dict(orient='records') 54 | X_val = dv.transform(val_dicts) 55 | y_pred = lr.predict(X_val) 56 | y_val = df.duration.values 57 | 58 | mse = mean_squared_error(y_val, y_pred, squared=False) 59 | logger.info(f"The MSE of validation is: {mse}") 60 | return 61 | 62 | @task 63 | def get_paths(date): 64 | from dateutil.relativedelta import relativedelta 65 | if date: 66 | processed_date = datetime.strptime(date, "%Y-%m-%d") 67 | else: 68 | processed_date = datetime.today() 69 | train_date = processed_date - relativedelta(months=2) 70 | val_date = processed_date - relativedelta(months=1) 71 | train_path = f"./data/fhv_tripdata_{train_date.year}-{str(train_date.month).zfill(2)}.parquet" 72 | val_path = f"./data/fhv_tripdata_{val_date.year}-{str(val_date.month).zfill(2)}.parquet" 73 | return train_path, val_path 74 | 75 | @flow 76 | def main(date=None): 77 | train_path, val_path = get_paths(date).result() 78 | 79 | categorical = ['PUlocationID', 'DOlocationID'] 80 | 81 | df_train = read_data(train_path) 82 | df_train_processed = prepare_features(df_train, categorical) 83 | 84 | df_val = read_data(val_path) 85 | df_val_processed = prepare_features(df_val, categorical, False) 86 | 87 | # train the model 88 | lr, dv = train_model(df_train_processed, categorical).result() 89 | run_model(df_val_processed, categorical, dv, lr) 90 | 91 | if date is None: 92 | date = datetime.today.strftime("%Y-%m-%d") 93 | with open(f'./models/dv-{date}.b', 'wb') as f_out: 94 | pickle.dump(dv, f_out) 95 | 96 | # main("2021-08-15") 97 | 98 | from prefect.deployments import Deployment 99 | from prefect.orion.schemas.schedules import CronSchedule 100 | from prefect.flow_runners import SubprocessFlowRunner 101 | 102 | Deployment( 103 | flow=main, 104 | name="model_training", 105 | schedule=CronSchedule(cron="0 9 15 * *"), 106 | flow_runner=SubprocessFlowRunner(), 107 | ) 108 | -------------------------------------------------------------------------------- /06-best-practices/code/model.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import base64 4 | 5 | import boto3 6 | import mlflow 7 | 8 | 9 | def get_model_location(run_id): 10 | model_location = os.getenv('MODEL_LOCATION') 11 | 12 | if model_location is not None: 13 | return model_location 14 | 15 | model_bucket = os.getenv('MODEL_BUCKET', 'mlflow-models-alexey') 16 | experiment_id = os.getenv('MLFLOW_EXPERIMENT_ID', '1') 17 | 18 | model_location = f's3://{model_bucket}/{experiment_id}/{run_id}/artifacts/model' 19 | return model_location 20 | 21 | 22 | def load_model(run_id): 23 | model_path = get_model_location(run_id) 24 | model = mlflow.pyfunc.load_model(model_path) 25 | return model 26 | 27 | 28 | def base64_decode(encoded_data): 29 | decoded_data = base64.b64decode(encoded_data).decode('utf-8') 30 | ride_event = json.loads(decoded_data) 31 | return ride_event 32 | 33 | 34 | class ModelService: 35 | def __init__(self, model, model_version=None, callbacks=None): 36 | self.model = model 37 | self.model_version = model_version 38 | self.callbacks = callbacks or [] 39 | 40 | def prepare_features(self, ride): 41 | features = {} 42 | features['PU_DO'] = f"{ride['PULocationID']}_{ride['DOLocationID']}" 43 | features['trip_distance'] = ride['trip_distance'] 44 | return features 45 | 46 | def predict(self, features): 47 | pred = self.model.predict(features) 48 | return float(pred[0]) 49 | 50 | def lambda_handler(self, event): 51 | # print(json.dumps(event)) 52 | 53 | predictions_events = [] 54 | 55 | for record in event['Records']: 56 | encoded_data = record['kinesis']['data'] 57 | ride_event = base64_decode(encoded_data) 58 | 59 | # print(ride_event) 60 | ride = ride_event['ride'] 61 | ride_id = ride_event['ride_id'] 62 | 63 | features = self.prepare_features(ride) 64 | prediction = self.predict(features) 65 | 66 | prediction_event = { 67 | 'model': 'ride_duration_prediction_model', 68 | 'version': self.model_version, 69 | 'prediction': {'ride_duration': prediction, 'ride_id': ride_id}, 70 | } 71 | 72 | for callback in self.callbacks: 73 | callback(prediction_event) 74 | 75 | predictions_events.append(prediction_event) 76 | 77 | return {'predictions': predictions_events} 78 | 79 | 80 | class KinesisCallback: 81 | def __init__(self, kinesis_client, prediction_stream_name): 82 | self.kinesis_client = kinesis_client 83 | self.prediction_stream_name = prediction_stream_name 84 | 85 | def put_record(self, prediction_event): 86 | ride_id = prediction_event['prediction']['ride_id'] 87 | 88 | self.kinesis_client.put_record( 89 | StreamName=self.prediction_stream_name, 90 | Data=json.dumps(prediction_event), 91 | PartitionKey=str(ride_id), 92 | ) 93 | 94 | 95 | def create_kinesis_client(): 96 | endpoint_url = os.getenv('KINESIS_ENDPOINT_URL') 97 | 98 | if endpoint_url is None: 99 | return boto3.client('kinesis') 100 | 101 | return boto3.client('kinesis', endpoint_url=endpoint_url) 102 | 103 | 104 | def init(prediction_stream_name: str, run_id: str, test_run: bool): 105 | model = load_model(run_id) 106 | 107 | callbacks = [] 108 | 109 | if not test_run: 110 | kinesis_client = create_kinesis_client() 111 | kinesis_callback = KinesisCallback(kinesis_client, prediction_stream_name) 112 | callbacks.append(kinesis_callback.put_record) 113 | 114 | model_service = ModelService(model=model, model_version=run_id, callbacks=callbacks) 115 | 116 | return model_service 117 | -------------------------------------------------------------------------------- /03-orchestration/README.md: -------------------------------------------------------------------------------- 1 | # 3. Orchestration and ML Pipelines 2 | 3 | **Note:** [`orchestration.py`](orchestration.py) is a ready final version. The rest of the files were worked on together during the video tutorials. 4 | 5 | **Note** With Prefect version [`2.2.1`](https://github.com/PrefectHQ/prefect/blob/orion/RELEASE-NOTES.md#20b8) or later `DeploymentSpec`'s are now just `Deployment`'s. 6 | 7 | ## 3.1 Negative engineering and workflow orchestration 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | ## 3.2 Introduction to Prefect 2.0 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | ## 3.3 First Prefect flow and basics 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | ## 3.4 Remote Prefect Orion deployment 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | ## 3.5 Deployment of Prefect flow 40 | 41 | **Note:** There are several changes to deployment in Prefect 2.3.1 since 2.0b8: 42 | - `DeploymentSpec` in 2.0b8 now becomes `Deployment`. 43 | - `work_queue_name` is used instead of `tags` to submit the deployment to the a specific work queue. 44 | - You don't need to create a work queue before using the work queue. A work queue will be created if it doesn't exist. 45 | 46 | ```python 47 | from prefect.deployments import Deployment 48 | from prefect.orion.schemas.schedules import IntervalSchedule 49 | from datetime import timedelta 50 | 51 | deployment = Deployment.build_from_flow( 52 | flow=main, 53 | name="model_training", 54 | schedule=IntervalSchedule(interval=timedelta(minutes=5)), 55 | work_queue_name="ml" 56 | ) 57 | 58 | deployment.apply() 59 | ``` 60 | 61 | 62 | 63 | 64 | 65 | Links: 66 | 67 | * [Instructions for Hosting Prefect Orion](https://discourse.prefect.io/t/hosting-an-orion-instance-on-a-cloud-vm/967) 68 | 69 | 70 | ## 3.6 MLOps Zoomcamp 3.6 - (Optional) Work queues and agents 71 | 72 | 73 | 74 | 75 | 76 | 77 | ## 3.7 Homework 78 | 79 | More information here: [homework.md](homework.md) 80 | 81 | 82 | ## Notes 83 | 84 | Did you take notes? Add them here: 85 | 86 | * [Week 3, Prefect Introduction and S3 Bucket configuration with Prefect by M. Ayoub C.](https://gist.github.com/Qfl3x/8dd69b8173f027b9468016c118f3b6a5) 87 | * [Notes from froukje](https://github.com/froukje/ml-ops-zoomcamp/blob/master/03-orchestration/week03_orchestration.ipynb) 88 | * [Minimalist code notes from Anna V](https://github.com/annnvv/mlops_zoomcamp/blob/main/notes/module3_notes_prefect.md) 89 | * [Getting Started on Prefect 2.0 + Deploying worfklows for MLflow Staging by Ron Medina (Jupyter Book)](https://particle1331.github.io/inefficient-networks/notebooks/mlops/3-prefect/3-prefect.html) 90 | * [Quickstart your homework by Zioalex](https://github.com/zioalex/mlops-zoomcamp/blob/week3/03-orchestration/homework_quickstart.md) 91 | * [Notes from Maxime M](https://github.com/maxmarkov/mlops-zoomcamp/blob/master/lecture-notes/WEEK-3/03-orchestration.md) 92 | * [Week3: Prefect introduction and homework notes by Bhagabat](https://github.com/BPrasad123/MLOps_Zoomcamp/tree/main/Week3) 93 | * [Week 3: Orchestration notes by Ayoub.B](https://github.com/ayoub-berdeddouch/mlops-journey/blob/main/orchestration-03.md) 94 | * Send a PR, add your notes above this line 95 | -------------------------------------------------------------------------------- /05-monitoring/homework/prefect-monitoring/prefect_monitoring.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import pickle 4 | 5 | import pandas 6 | import pyarrow.parquet as pq 7 | from evidently import ColumnMapping 8 | from evidently.dashboard import Dashboard 9 | from evidently.dashboard.tabs import DataDriftTab, RegressionPerformanceTab 10 | from evidently.model_profile import Profile 11 | from evidently.model_profile.sections import ( 12 | DataDriftProfileSection, RegressionPerformanceProfileSection) 13 | from prefect import flow, task 14 | from pymongo import MongoClient 15 | 16 | MONGO_CLIENT_ADDRESS = "mongodb://localhost:27017/" 17 | MONGO_DATABASE = "prediction_service" 18 | PREDICTION_COLLECTION = "data" 19 | REPORT_COLLECTION = "report" 20 | REFERENCE_DATA_FILE = "../datasets/green_tripdata_2021-03.parquet" # Modify this for Q7 21 | TARGET_DATA_FILE = "target.csv" 22 | MODEL_FILE = os.getenv('MODEL_FILE', '../prediction_service/lin_reg.bin') # Modify this for Q7 23 | 24 | @task 25 | def upload_target(filename): 26 | client = MongoClient(MONGO_CLIENT_ADDRESS) 27 | collection = client.get_database(MONGO_DATABASE).get_collection(PREDICTION_COLLECTION) 28 | with open(filename) as f_target: 29 | for line in f_target.readlines(): 30 | row = line.split(",") 31 | collection.update_one({"id": row[0]}, 32 | {"$set": {"target": float(row[1])}} 33 | ) 34 | 35 | 36 | 37 | @task 38 | def load_reference_data(filename): 39 | 40 | with open(MODEL_FILE, 'rb') as f_in: 41 | dv, model = pickle.load(f_in) 42 | reference_data = pq.read_table(filename).to_pandas().sample(n=5000,random_state=42) #Monitoring for 1st 5000 records 43 | # Create features 44 | reference_data['PU_DO'] = reference_data['PULocationID'].astype(str) + "_" + reference_data['DOLocationID'].astype(str) 45 | 46 | # add target column 47 | reference_data['target'] = reference_data.lpep_dropoff_datetime - reference_data.lpep_pickup_datetime 48 | reference_data.target = reference_data.target.apply(lambda td: td.total_seconds() / 60) 49 | reference_data = reference_data[(reference_data.target >= 1) & (reference_data.target <= 60)] 50 | features = ['PU_DO', 'PULocationID', 'DOLocationID', 'trip_distance'] 51 | x_pred = dv.transform(reference_data[features].to_dict(orient='records')) 52 | reference_data['prediction'] = model.predict(x_pred) 53 | return reference_data 54 | 55 | 56 | @task 57 | def fetch_data(): 58 | client = MongoClient(MONGO_CLIENT_ADDRESS) 59 | data = client.get_database(MONGO_DATABASE).get_collection(PREDICTION_COLLECTION).find() 60 | df = pandas.DataFrame(list(data)) 61 | return df 62 | 63 | @task 64 | def run_evidently(ref_data, data): 65 | 66 | ref_data.drop(['ehail_fee'], axis=1, inplace=True) 67 | data.drop('ehail_fee', axis=1, inplace=True) # drop empty column (until Evidently will work with it properly) 68 | 69 | profile = Profile(sections=[DataDriftProfileSection(), RegressionPerformanceProfileSection()]) 70 | mapping = ColumnMapping(prediction="prediction", numerical_features=['trip_distance'], 71 | categorical_features=['PULocationID', 'DOLocationID'], 72 | datetime_features=[]) 73 | profile.calculate(ref_data, data, mapping) 74 | 75 | dashboard = Dashboard(tabs=[DataDriftTab(), RegressionPerformanceTab(verbose_level=0)]) 76 | dashboard.calculate(ref_data, data, mapping) 77 | return json.loads(profile.json()), dashboard 78 | 79 | 80 | @task 81 | def save_report(result): 82 | pass 83 | 84 | @task 85 | def save_html_report(result): 86 | pass 87 | 88 | 89 | @flow 90 | def batch_analyze(): 91 | upload_target(TARGET_DATA_FILE) 92 | ref_data = load_reference_data(REFERENCE_DATA_FILE).result() 93 | data = fetch_data().result() 94 | profile, dashboard = run_evidently(ref_data, data).result() 95 | save_report(profile) 96 | save_html_report(dashboard) 97 | 98 | batch_analyze() 99 | -------------------------------------------------------------------------------- /06-best-practices/docs.md: -------------------------------------------------------------------------------- 1 | ## Extra Material 2 | 3 | ### Concepts of IaC and Terraform 4 | 5 | #### Summary 6 | 7 | **Infrastructure-as-Code (IaC)**: 8 | * Define and automate operations around you application's infrastructure. 9 | * Can use version control to track changes made to infrastructure 10 | * Easy to replicate the configuration across different environments such as development, staging, and production. 11 | 12 | 13 | #### Reference Material 14 | 15 | We have already covered Terraform concepts at a deeper level in the [Data Engineering Zoomcamp](https://github.com/DataTalksClub/data-engineering-zoomcamp), and will not be repeating some of those basic concepts again. You can find the content here for your reference: 16 | 17 | **Notes**: 18 | * [Terraform Overview](https://github.com/DataTalksClub/data-engineering-zoomcamp/blob/main/week_1_basics_n_setup/1_terraform_gcp/1_terraform_overview.md) 19 | 20 | **Videos**: 21 | 22 | 1. For an introduction to Terraform and IaC concepts, please refer to [this video](https://www.youtube.com/watch?v=Hajwnmj0xfQ&list=PL3MmuxUbc_hJed7dXYoJw8DoCuVHhGEQb&index=11) 23 | (from the DE Zoomcamp), especially the sections in the time-codes: 24 | 25 | * 00:00 Introduction 26 | * 00:35 What is Terraform? 27 | * 01:10 What is IaC? 28 | * 01:43 Advantages of IaC 29 | * 14:48 Installing Terraform 30 | * 02:28 More on Installing Terraform 31 | 32 | 2. For a quickstart tutorial, and understanding the main components of a basic Terraform script, please refer to [this video](https://www.youtube.com/watch?v=dNkEgO-CExg&list=PL3MmuxUbc_hJed7dXYoJw8DoCuVHhGEQb&index=12) 33 | (from the DE Zoomcamp). Please note that this example uses GCP as a cloud provider, while for MLOps Zoomcamp we are using AWS. 34 | 35 | * 00:00 Introduction 36 | * 00:20 .terraform-version 37 | * 01:04 main.tf 38 | * 01:23 terraform declaration 39 | * 03:25 provider plugins 40 | * 04:00 resource example - google_storage_bucket 41 | * 05:42 provider credentials 42 | * 06:34 variables.tf 43 | * 10:54 overview of terraform commands 44 | * 13:35 running terraform commands 45 | * 18:08 recap 46 | 47 | In case you're using GCP instead of AWS, following is some setup material: 48 | * [Local Setup for Terraform and GCP](https://github.com/DataTalksClub/data-engineering-zoomcamp/tree/main/week_1_basics_n_setup/1_terraform_gcp) 49 | * [GCP Overview](https://github.com/DataTalksClub/data-engineering-zoomcamp/blob/main/week_1_basics_n_setup/1_terraform_gcp/2_gcp_overview.md) 50 | * [main.tf](https://github.com/DataTalksClub/data-engineering-zoomcamp/blob/main/week_1_basics_n_setup/1_terraform_gcp/terraform/main.tf) 51 | 52 | #### References 53 | * Terraform with AWS: [Getting Started](https://learn.hashicorp.com/collections/terraform/aws-get-started) and [AWS provider library](https://registry.terraform.io/providers/hashicorp/aws/latest/docs) 54 | * Terraform Modules: [Define](https://www.terraform.io/language/modules/develop) and [Call](https://www.terraform.io/language/modules/syntax) 55 | 56 | 57 | ### Concepts of CI/CD and GitHub Actions 58 | 59 | #### Summary 60 | * Using GitHub Actions to create workflows to automatically test a pull request, 61 | build and push a Docker image, and deploy the updated lambda service to production. 62 | * Creating specific YAML files in GitHub repo, to automatically kick off a series of automation steps. 63 | * Motivation on automating your further tasks with GitHub Actions: 64 | * Orchestrating a continuous training pipeline (CT) to retrain your model and generate updated model artifacts in production 65 | * Integrating the model registry (MLflow, DVC etc.) to fetch the latest model version or experiment ID 66 | * and many more... 67 | 68 | 69 | #### Reference Material 70 | * [GitHub Actions & Workflows](https://docs.github.com/en/actions/using-workflows) 71 | * [Build-Push image to ECR](https://docs.github.com/en/actions/deployment/deploying-to-your-cloud-provider/deploying-to-amazon-elastic-container-service) 72 | * [Python tests](https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python) 73 | -------------------------------------------------------------------------------- /02-experiment-tracking/running-mlflow-examples/scenario-1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Scenario 1: A single data scientist participating in an ML competition\n", 8 | "\n", 9 | "MLflow setup:\n", 10 | "* Tracking server: no\n", 11 | "* Backend store: local filesystem\n", 12 | "* Artifacts store: local filesystem\n", 13 | "\n", 14 | "The experiments can be explored locally by launching the MLflow UI." 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "import mlflow" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "print(f\"tracking URI: '{mlflow.get_tracking_uri()}'\")" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": null, 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "mlflow.list_experiments()" 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "metadata": {}, 47 | "source": [ 48 | "### Creating an experiment and logging a new run" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": null, 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [ 57 | "from sklearn.linear_model import LogisticRegression\n", 58 | "from sklearn.datasets import load_iris\n", 59 | "from sklearn.metrics import accuracy_score\n", 60 | "\n", 61 | "mlflow.set_experiment(\"my-experiment-1\")\n", 62 | "\n", 63 | "with mlflow.start_run():\n", 64 | "\n", 65 | " X, y = load_iris(return_X_y=True)\n", 66 | "\n", 67 | " params = {\"C\": 0.1, \"random_state\": 42}\n", 68 | " mlflow.log_params(params)\n", 69 | "\n", 70 | " lr = LogisticRegression(**params).fit(X, y)\n", 71 | " y_pred = lr.predict(X)\n", 72 | " mlflow.log_metric(\"accuracy\", accuracy_score(y, y_pred))\n", 73 | "\n", 74 | " mlflow.sklearn.log_model(lr, artifact_path=\"models\")\n", 75 | " print(f\"default artifacts URI: '{mlflow.get_artifact_uri()}'\")" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": null, 81 | "metadata": {}, 82 | "outputs": [], 83 | "source": [ 84 | "mlflow.list_experiments()" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": null, 90 | "metadata": {}, 91 | "outputs": [], 92 | "source": [] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": {}, 97 | "source": [ 98 | "### Interacting with the model registry" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": null, 104 | "metadata": {}, 105 | "outputs": [], 106 | "source": [ 107 | "from mlflow.tracking import MlflowClient\n", 108 | "\n", 109 | "\n", 110 | "client = MlflowClient()" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": null, 116 | "metadata": {}, 117 | "outputs": [], 118 | "source": [ 119 | "from mlflow.exceptions import MlflowException\n", 120 | "\n", 121 | "try:\n", 122 | " client.list_registered_models()\n", 123 | "except MlflowException:\n", 124 | " print(\"It's not possible to access the model registry :(\")" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": null, 130 | "metadata": {}, 131 | "outputs": [], 132 | "source": [] 133 | } 134 | ], 135 | "metadata": { 136 | "interpreter": { 137 | "hash": "0848c9d6c7d415ad6c477ff7ff8e98694d1a4aa96d0deee89244642e6b630036" 138 | }, 139 | "kernelspec": { 140 | "display_name": "Python 3.9.12 ('exp-tracking-env')", 141 | "language": "python", 142 | "name": "python3" 143 | }, 144 | "language_info": { 145 | "codemirror_mode": { 146 | "name": "ipython", 147 | "version": 3 148 | }, 149 | "file_extension": ".py", 150 | "mimetype": "text/x-python", 151 | "name": "python", 152 | "nbconvert_exporter": "python", 153 | "pygments_lexer": "ipython3", 154 | "version": "3.9.12" 155 | }, 156 | "orig_nbformat": 4 157 | }, 158 | "nbformat": 4, 159 | "nbformat_minor": 2 160 | } 161 | -------------------------------------------------------------------------------- /04-deployment/batch/score.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | import os 5 | import sys 6 | 7 | import uuid 8 | import pickle 9 | 10 | from datetime import datetime 11 | 12 | import pandas as pd 13 | 14 | import mlflow 15 | 16 | from prefect import task, flow, get_run_logger 17 | from prefect.context import get_run_context 18 | 19 | from dateutil.relativedelta import relativedelta 20 | 21 | from sklearn.feature_extraction import DictVectorizer 22 | from sklearn.ensemble import RandomForestRegressor 23 | from sklearn.metrics import mean_squared_error 24 | from sklearn.pipeline import make_pipeline 25 | 26 | 27 | def generate_uuids(n): 28 | ride_ids = [] 29 | for i in range(n): 30 | ride_ids.append(str(uuid.uuid4())) 31 | return ride_ids 32 | 33 | 34 | def read_dataframe(filename: str): 35 | df = pd.read_parquet(filename) 36 | 37 | df['duration'] = df.lpep_dropoff_datetime - df.lpep_pickup_datetime 38 | df.duration = df.duration.dt.total_seconds() / 60 39 | df = df[(df.duration >= 1) & (df.duration <= 60)] 40 | 41 | df['ride_id'] = generate_uuids(len(df)) 42 | 43 | return df 44 | 45 | 46 | def prepare_dictionaries(df: pd.DataFrame): 47 | categorical = ['PULocationID', 'DOLocationID'] 48 | df[categorical] = df[categorical].astype(str) 49 | 50 | df['PU_DO'] = df['PULocationID'] + '_' + df['DOLocationID'] 51 | 52 | categorical = ['PU_DO'] 53 | numerical = ['trip_distance'] 54 | dicts = df[categorical + numerical].to_dict(orient='records') 55 | return dicts 56 | 57 | 58 | def load_model(run_id): 59 | logged_model = f's3://mlflow-models-alexey/1/{run_id}/artifacts/model' 60 | model = mlflow.pyfunc.load_model(logged_model) 61 | return model 62 | 63 | 64 | def save_results(df, y_pred, run_id, output_file): 65 | df_result = pd.DataFrame() 66 | df_result['ride_id'] = df['ride_id'] 67 | df_result['lpep_pickup_datetime'] = df['lpep_pickup_datetime'] 68 | df_result['PULocationID'] = df['PULocationID'] 69 | df_result['DOLocationID'] = df['DOLocationID'] 70 | df_result['actual_duration'] = df['duration'] 71 | df_result['predicted_duration'] = y_pred 72 | df_result['diff'] = df_result['actual_duration'] - df_result['predicted_duration'] 73 | df_result['model_version'] = run_id 74 | 75 | df_result.to_parquet(output_file, index=False) 76 | 77 | 78 | @task 79 | def apply_model(input_file, run_id, output_file): 80 | logger = get_run_logger() 81 | 82 | logger.info(f'reading the data from {input_file}...') 83 | df = read_dataframe(input_file) 84 | dicts = prepare_dictionaries(df) 85 | 86 | logger.info(f'loading the model with RUN_ID={run_id}...') 87 | model = load_model(run_id) 88 | 89 | logger.info(f'applying the model...') 90 | y_pred = model.predict(dicts) 91 | 92 | logger.info(f'saving the result to {output_file}...') 93 | 94 | save_results(df, y_pred, run_id, output_file) 95 | return output_file 96 | 97 | 98 | def get_paths(run_date, taxi_type, run_id): 99 | prev_month = run_date - relativedelta(months=1) 100 | year = prev_month.year 101 | month = prev_month.month 102 | 103 | input_file = f's3://nyc-tlc/trip data/{taxi_type}_tripdata_{year:04d}-{month:02d}.parquet' 104 | output_file = f's3://nyc-duration-prediction-alexey/taxi_type={taxi_type}/year={year:04d}/month={month:02d}/{run_id}.parquet' 105 | 106 | return input_file, output_file 107 | 108 | 109 | @flow 110 | def ride_duration_prediction( 111 | taxi_type: str, 112 | run_id: str, 113 | run_date: datetime = None): 114 | if run_date is None: 115 | ctx = get_run_context() 116 | run_date = ctx.flow_run.expected_start_time 117 | 118 | input_file, output_file = get_paths(run_date, taxi_type, run_id) 119 | 120 | apply_model( 121 | input_file=input_file, 122 | run_id=run_id, 123 | output_file=output_file 124 | ) 125 | 126 | 127 | def run(): 128 | taxi_type = sys.argv[1] # 'green' 129 | year = int(sys.argv[2]) # 2021 130 | month = int(sys.argv[3]) # 3 131 | 132 | run_id = sys.argv[4] # 'e1efc53e9bd149078b0c12aeaa6365df' 133 | 134 | ride_duration_prediction( 135 | taxi_type=taxi_type, 136 | run_id=run_id, 137 | run_date=datetime(year=year, month=month, day=1) 138 | ) 139 | 140 | 141 | if __name__ == '__main__': 142 | run() 143 | 144 | 145 | 146 | 147 | -------------------------------------------------------------------------------- /06-best-practices/code/infrastructure/modules/lambda/iam.tf: -------------------------------------------------------------------------------- 1 | resource "aws_iam_role" "iam_lambda" { 2 | name = "iam_${var.lambda_function_name}" 3 | assume_role_policy = <= 1) & (reference_data.target <= 60)] 51 | features = ['PU_DO', 'PULocationID', 'DOLocationID', 'trip_distance'] 52 | x_pred = dv.transform(reference_data[features].to_dict(orient='records')) 53 | reference_data['prediction'] = model.predict(x_pred) 54 | return reference_data 55 | 56 | 57 | @task 58 | def fetch_data(): 59 | client = MongoClient(MONGO_CLIENT_ADDRESS) 60 | data = client.get_database(MONGO_DATABASE).get_collection(PREDICTION_COLLECTION).find() 61 | df = pandas.DataFrame(list(data)) 62 | return df 63 | 64 | @task 65 | def run_evidently(ref_data, data): 66 | 67 | ref_data.drop(['ehail_fee'], axis=1, inplace=True) 68 | data.drop('ehail_fee', axis=1, inplace=True) # drop empty column (until Evidently will work with it properly) 69 | 70 | profile = Profile(sections=[DataDriftProfileSection(), RegressionPerformanceProfileSection()]) 71 | mapping = ColumnMapping(prediction="prediction", numerical_features=['trip_distance'], 72 | categorical_features=['PULocationID', 'DOLocationID'], 73 | datetime_features=[]) 74 | profile.calculate(ref_data, data, mapping) 75 | 76 | dashboard = Dashboard(tabs=[DataDriftTab(), RegressionPerformanceTab(verbose_level=0)]) 77 | dashboard.calculate(ref_data, data, mapping) 78 | return json.loads(profile.json()), dashboard 79 | 80 | 81 | @task 82 | def save_report(result): 83 | """Save evidendtly profile for ride prediction to mongo server""" 84 | 85 | client = MongoClient(MONGO_CLIENT_ADDRESS) 86 | collection = client.get_database(MONGO_DATABASE).get_collection(REPORT_COLLECTION) 87 | collection.insert_one(result) 88 | 89 | @task 90 | def save_html_report(result, filename_suffix=None): 91 | """Create evidently html report file for ride prediction""" 92 | 93 | if filename_suffix is None: 94 | filename_suffix = datetime.now().strftime('%Y-%m-%d-%H-%M') 95 | 96 | result.save(f"ride_prediction_drift_report_{filename_suffix}.html") 97 | 98 | 99 | @flow 100 | def batch_analyze(): 101 | upload_target(TARGET_DATA_FILE) 102 | ref_data = load_reference_data(REFERENCE_DATA_FILE).result() 103 | data = fetch_data().result() 104 | profile, dashboard = run_evidently(ref_data, data).result() 105 | save_report(profile) 106 | save_html_report(dashboard) 107 | 108 | batch_analyze() 109 | -------------------------------------------------------------------------------- /.github/workflows/cd-deploy.yml: -------------------------------------------------------------------------------- 1 | name: CD-Deploy 2 | on: 3 | push: 4 | branches: 5 | - 'develop' 6 | # paths: 7 | # - '06-best-practices/code/**' 8 | 9 | jobs: 10 | build-push-deploy: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Check out repo 14 | uses: actions/checkout@v3 15 | - name: Configure AWS Credentials 16 | uses: aws-actions/configure-aws-credentials@v1 17 | with: 18 | aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} 19 | aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} 20 | aws-region: "eu-west-1" 21 | - uses: hashicorp/setup-terraform@v2 22 | with: 23 | terraform_wrapper: false 24 | 25 | # Define the infrastructure 26 | - name: TF plan 27 | id: tf-plan 28 | working-directory: '06-best-practices/code/infrastructure' 29 | run: | 30 | terraform init -backend-config="key=mlops-zoomcamp-prod.tfstate" -reconfigure && terraform plan -var-file=vars/prod.tfvars 31 | 32 | - name: TF Apply 33 | id: tf-apply 34 | working-directory: '06-best-practices/code/infrastructure' 35 | if: ${{ steps.tf-plan.outcome }} == 'success' 36 | run: | 37 | terraform apply -auto-approve -var-file=vars/prod.tfvars 38 | echo "::set-output name=ecr_repo::$(terraform output ecr_repo | xargs)" 39 | echo "::set-output name=predictions_stream_name::$(terraform output predictions_stream_name | xargs)" 40 | echo "::set-output name=model_bucket::$(terraform output model_bucket | xargs)" 41 | echo "::set-output name=lambda_function::$(terraform output lambda_function | xargs)" 42 | 43 | # Build-Push 44 | - name: Login to Amazon ECR 45 | id: login-ecr 46 | uses: aws-actions/amazon-ecr-login@v1 47 | 48 | - name: Build, tag, and push image to Amazon ECR 49 | id: build-image-step 50 | working-directory: "06-best-practices/code" 51 | env: 52 | ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} 53 | ECR_REPOSITORY: ${{ steps.tf-apply.outputs.ecr_repo }} 54 | IMAGE_TAG: "latest" # ${{ github.sha }} 55 | run: | 56 | docker build -t ${ECR_REGISTRY}/${ECR_REPOSITORY}:${IMAGE_TAG} . 57 | docker push $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG 58 | echo "::set-output name=image_uri::$ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG" 59 | 60 | # Deploy 61 | - name: Get model artifacts 62 | # The steps here are not suited for production. 63 | # In practice, retrieving the latest model version or RUN_ID from a service like MLflow or DVC can also be integrated into a CI/CD pipeline. 64 | # But due to the limited scope of this workshop, we would be keeping things simple. 65 | # In practice, you would also have a separate training pipeline to write new model artifacts to your Model Bucket in Prod. 66 | 67 | id: get-model-artifacts 68 | working-directory: "06-best-practices/code" 69 | env: 70 | MODEL_BUCKET_DEV: "mlflow-models-alexey" 71 | MODEL_BUCKET_PROD: ${{ steps.tf-apply.outputs.model_bucket }} 72 | run: | 73 | export RUN_ID=$(aws s3api list-objects-v2 --bucket ${MODEL_BUCKET_DEV} \ 74 | --query 'sort_by(Contents, &LastModified)[-1].Key' --output=text | cut -f2 -d/) 75 | aws s3 sync s3://${MODEL_BUCKET_DEV} s3://${MODEL_BUCKET_PROD} 76 | echo "::set-output name=run_id::${RUN_ID}" 77 | 78 | - name: Update Lambda 79 | env: 80 | LAMBDA_FUNCTION: ${{ steps.tf-apply.outputs.lambda_function }} 81 | PREDICTIONS_STREAM_NAME: ${{ steps.tf-apply.outputs.predictions_stream_name }} 82 | MODEL_BUCKET: ${{ steps.tf-apply.outputs.model_bucket }} 83 | RUN_ID: ${{ steps.get-model-artifacts.outputs.run_id }} 84 | run: | 85 | variables="{ \ 86 | PREDICTIONS_STREAM_NAME=$PREDICTIONS_STREAM_NAME, MODEL_BUCKET=$MODEL_BUCKET, RUN_ID=$RUN_ID \ 87 | }" 88 | 89 | STATE=$(aws lambda get-function --function-name $LAMBDA_FUNCTION --region "eu-west-1" --query 'Configuration.LastUpdateStatus' --output text) 90 | while [[ "$STATE" == "InProgress" ]] 91 | do 92 | echo "sleep 5sec ...." 93 | sleep 5s 94 | STATE=$(aws lambda get-function --function-name $LAMBDA_FUNCTION --region "eu-west-1" --query 'Configuration.LastUpdateStatus' --output text) 95 | echo $STATE 96 | done 97 | 98 | aws lambda update-function-configuration --function-name $LAMBDA_FUNCTION \ 99 | --environment "Variables=${variables}" 100 | -------------------------------------------------------------------------------- /02-experiment-tracking/mlflow_on_aws.md: -------------------------------------------------------------------------------- 1 | # Basic AWS setup 2 | 3 | This tutorials explains how to configure a remote tracking server on AWS. We will use an RDS database as the backend store and an s3 bucket as the artifact store. 4 | 5 | 1. First, you need to [create an AWS account](https://aws.amazon.com/free). If you open a new account, AWS allows you to use some of their products for free but take into account that **you may be charged for using the AWS services**. More information [here](https://youtu.be/rkKvzCskpLE) and [here](https://aws.amazon.com/premiumsupport/knowledge-center/free-tier-charges/). 6 | 7 | 2. Launch a new EC2 instance. 8 | 9 | For this, you can select one of the instance types that are free tier eligible. For example, we will select an Amazon Linux OS (`Amazon Linux 2 AMI (HVM) - Kernel 5.10, SSD Volume Type`) and a `t2.micro` instance type, which are free tier eligible. 10 | 11 | 12 | 13 | 14 | 15 | 16 | You'll also need to create a new key pair so later you can connect to the new instance using SSH. Click on "Create new key pair" and complete the details like in the image below: 17 | 18 | 19 | 20 | Select the new key pair and then click on "Launch Instance". 21 | 22 | 23 | 24 | Finally, you have to edit the security group so the EC2 instance accepts SSH (port 22) and HTTP connections (port 5000): 25 | 26 | 27 | 28 | 3. Create an s3 bucket to be used as the artifact store. 29 | 30 | Go to s3 and click on "Create bucket". Fill in the bucket name as in the image below and let all the other configurations with their default values. 31 | 32 | 33 | 34 | Note: s3 bucket names must be unique across all AWS account in all the AWS Regions within a partition, that means that once a bucket is created, the name of that bucket cannot be used by another AWS account within the same region. If you get an error saying that the bucket name was already taken you can fix it easily by just changing the name to something like `mlflow-artifacts-remote-2` or another name. 35 | 36 | 4. Create a new PostgreSQL database to be used as the backend store 37 | 38 | Go to the RDS Console and click on "Create database". Make sure to select "PostgreSQL" engine type and the "Free tier" template. 39 | 40 | 41 | 42 | Select a name for your DB instance, set the master username as "mlflow" and tick the option "Auto generate a password" so Amazon RDS generate a password automatically. 43 | 44 | 45 | 46 | Finally, on the section "Additional configuration" specify a database name so RDS automatically creates an initial database for you. 47 | 48 | 49 | 50 | After clicking on "launch database" you will be able to check the newly generated password, but take into account that the automatically generated password will be shown only once! 51 | 52 | 53 | 54 | You can use the default values for all the other configurations. 55 | 56 | Take note of the following information: 57 | 58 | * master username 59 | * password 60 | * initial database name 61 | * endpoint 62 | 63 | Once the DB instance is created, go to the RDS console, select the new db and under "Connectivity & security" select the VPC security group. Modify the security group by adding a new inbound rule that allows postgreSQL connections on the port 5432 from the security group of the EC2 instance. This way, the server will be able to connect to the postgres database. 64 | 65 | 66 | 67 | 5. Connect to the EC2 instance and launch the tracking server. 68 | 69 | Go to the EC2 Console and find the instance launched on the step 2. Click on "Connect" and then follow the steps described in the tab "SSH". 70 | 71 | Run the following commands to install the dependencies, configure the environment and launch the server: 72 | * `sudo yum update` 73 | * `pip3 install mlflow boto3 psycopg2-binary` 74 | * `aws configure` # you'll need to input your AWS credentials here 75 | * `mlflow server -h 0.0.0.0 -p 5000 --backend-store-uri postgresql://DB_USER:DB_PASSWORD@DB_ENDPOINT:5432/DB_NAME --default-artifact-root s3://S3_BUCKET_NAME` 76 | 77 | Note: before launching the server, check that the instance can access the s3 bucket created in the step number 3. To do that, just run this command from the EC2 instance: `aws s3 ls`. You should see the bucket listed in the result. 78 | 79 | 6. Access the remote tracking server from your local machine. 80 | 81 | Open a new tab on your web browser and go to this address: `http://:5000` (you can find the instance's public DNS by checking the details of your instance in the EC2 Console). -------------------------------------------------------------------------------- /06-best-practices/code/README.md: -------------------------------------------------------------------------------- 1 | ## Code snippets 2 | 3 | ### Building and running Docker images 4 | 5 | ```bash 6 | docker build -t stream-model-duration:v2 . 7 | ``` 8 | 9 | ```bash 10 | docker run -it --rm \ 11 | -p 8080:8080 \ 12 | -e PREDICTIONS_STREAM_NAME="ride_predictions" \ 13 | -e RUN_ID="e1efc53e9bd149078b0c12aeaa6365df" \ 14 | -e TEST_RUN="True" \ 15 | -e AWS_DEFAULT_REGION="eu-west-1" \ 16 | stream-model-duration:v2 17 | ``` 18 | 19 | Mounting the model folder: 20 | 21 | ``` 22 | docker run -it --rm \ 23 | -p 8080:8080 \ 24 | -e PREDICTIONS_STREAM_NAME="ride_predictions" \ 25 | -e RUN_ID="Test123" \ 26 | -e MODEL_LOCATION="/app/model" \ 27 | -e TEST_RUN="True" \ 28 | -e AWS_DEFAULT_REGION="eu-west-1" \ 29 | -v $(pwd)/model:/app/model \ 30 | stream-model-duration:v2 31 | ``` 32 | 33 | ### Specifying endpoint URL 34 | 35 | ```bash 36 | aws --endpoint-url=http://localhost:4566 \ 37 | kinesis list-streams 38 | ``` 39 | 40 | ```bash 41 | aws --endpoint-url=http://localhost:4566 \ 42 | kinesis create-stream \ 43 | --stream-name ride_predictions \ 44 | --shard-count 1 45 | ``` 46 | 47 | ```bash 48 | aws --endpoint-url=http://localhost:4566 \ 49 | kinesis get-shard-iterator \ 50 | --shard-id ${SHARD} \ 51 | --shard-iterator-type TRIM_HORIZON \ 52 | --stream-name ${PREDICTIONS_STREAM_NAME} \ 53 | --query 'ShardIterator' 54 | ``` 55 | 56 | ### Unable to locate credentials 57 | 58 | If you get `'Unable to locate credentials'` error, add these 59 | env variables to the `docker-compose.yaml` file: 60 | 61 | ```yaml 62 | - AWS_ACCESS_KEY_ID=abc 63 | - AWS_SECRET_ACCESS_KEY=xyz 64 | ``` 65 | 66 | ### Make 67 | 68 | Without make: 69 | 70 | ``` 71 | isort . 72 | black . 73 | pylint --recursive=y . 74 | pytest tests/ 75 | ``` 76 | 77 | With make: 78 | 79 | ``` 80 | make quality_checks 81 | make test 82 | ``` 83 | 84 | 85 | To prepare the project, run 86 | 87 | ```bash 88 | make setup 89 | ``` 90 | 91 | 92 | ### IaC 93 | w/ Terraform 94 | 95 | #### Setup 96 | 97 | **Installation**: 98 | 99 | * [aws-cli](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html) (both versions are fine) 100 | * [terraform client](https://www.terraform.io/downloads) 101 | 102 | **Configuration**: 103 | 104 | 1. If you've already created an AWS account, head to the IAM section, generate your secret-key, and download it locally. 105 | [Instructions](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-prereqs.html) 106 | 107 | 2. [Configure]((https://docs.aws.amazon.com/cli/latest/userguide/getting-started-quickstart.html)) `aws-cli` with your downloaded AWS secret keys: 108 | ```shell 109 | $ aws configure 110 | AWS Access Key ID [None]: xxx 111 | AWS Secret Access Key [None]: xxx 112 | Default region name [None]: eu-west-1 113 | Default output format [None]: 114 | ``` 115 | 116 | 3. Verify aws config: 117 | ```shell 118 | $ aws sts get-caller-identity 119 | ``` 120 | 121 | 4. (Optional) Configuring with `aws profile`: [here](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-sourcing-external.html) and [here](https://registry.terraform.io/providers/hashicorp/aws/latest/docs#using-an-external-credentials-process) 122 | 123 |
124 | 125 | #### Execution 126 | 127 | 128 | 1. To create infra (manually, in order to test on staging env) 129 | ```shell 130 | # Initialize state file (.tfstate) 131 | terraform init 132 | 133 | # Check changes to new infra plan 134 | terraform plan -var-file=vars/stg.tfvars 135 | ``` 136 | 137 | ```shell 138 | # Create new infra 139 | terraform apply -var-file=vars/stg.tfvars 140 | ``` 141 | 142 | 2. To prepare aws env (copy model artifacts, set env-vars for lambda etc.): 143 | ``` 144 | . ./scripts/deploy_manual.sh 145 | ``` 146 | 147 | 3. To test the pipeline end-to-end with our new cloud infra: 148 | ``` 149 | . ./scripts/test_cloud_e2e.sh 150 | ``` 151 | 152 | 4. And then check on CloudWatch logs. Or try `get-records` on the `output_kinesis_stream` (refer to `integration_test`) 153 | 154 | 5. Destroy infra after use: 155 | ```shell 156 | # Delete infra after your work, to avoid costs on any running services 157 | terraform destroy 158 | ``` 159 | 160 |
161 | 162 | ### CI/CD 163 | 164 | 1. Create a PR (feature branch): `.github/workflows/ci-tests.yml` 165 | * Env setup, Unit test, Integration test, Terraform plan 166 | 2. Merge PR to `develop`: `.github/workflows/cd-deploy.yml` 167 | * Terraform plan, Terraform apply, Docker build & ECR push, Update Lambda config 168 | 169 | ### Notes 170 | 171 | * Unfortunately, the `RUN_ID` (if set via the `ENV` or `ARG` in `Dockerfile`), disappears during lambda invocation. 172 | We'll set it via `aws lambda update-function-configuration` CLI command (refer to `deploy_manual.sh` or `.github/workflows/cd-deploy.yml`) 173 | 174 | -------------------------------------------------------------------------------- /02-experiment-tracking/running-mlflow-examples/scenario-3.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Scenario 3: Multiple data scientists working on multiple ML models\n", 8 | "\n", 9 | "MLflow setup:\n", 10 | "* Tracking server: yes, remote server (EC2).\n", 11 | "* Backend store: postgresql database.\n", 12 | "* Artifacts store: s3 bucket.\n", 13 | "\n", 14 | "The experiments can be explored by accessing the remote server.\n", 15 | "\n", 16 | "The exampe uses AWS to host a remote server. In order to run the example you'll need an AWS account. Follow the steps described in the file `mlflow_on_aws.md` to create a new AWS account and launch the tracking server. " 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "import mlflow\n", 26 | "import os\n", 27 | "\n", 28 | "os.environ[\"AWS_PROFILE\"] = \"\" # fill in with your AWS profile. More info: https://docs.aws.amazon.com/sdk-for-java/latest/developer-guide/setup.html#setup-credentials\n", 29 | "\n", 30 | "TRACKING_SERVER_HOST = \"\" # fill in with the public DNS of the EC2 instance\n", 31 | "mlflow.set_tracking_uri(f\"http://{TRACKING_SERVER_HOST}:5000\")" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "print(f\"tracking URI: '{mlflow.get_tracking_uri()}'\")" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "mlflow.list_experiments()" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "from sklearn.linear_model import LogisticRegression\n", 59 | "from sklearn.datasets import load_iris\n", 60 | "from sklearn.metrics import accuracy_score\n", 61 | "\n", 62 | "mlflow.set_experiment(\"my-experiment-1\")\n", 63 | "\n", 64 | "with mlflow.start_run():\n", 65 | "\n", 66 | " X, y = load_iris(return_X_y=True)\n", 67 | "\n", 68 | " params = {\"C\": 0.1, \"random_state\": 42}\n", 69 | " mlflow.log_params(params)\n", 70 | "\n", 71 | " lr = LogisticRegression(**params).fit(X, y)\n", 72 | " y_pred = lr.predict(X)\n", 73 | " mlflow.log_metric(\"accuracy\", accuracy_score(y, y_pred))\n", 74 | "\n", 75 | " mlflow.sklearn.log_model(lr, artifact_path=\"models\")\n", 76 | " print(f\"default artifacts URI: '{mlflow.get_artifact_uri()}'\")" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": null, 82 | "metadata": {}, 83 | "outputs": [], 84 | "source": [ 85 | "mlflow.list_experiments()" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": null, 91 | "metadata": {}, 92 | "outputs": [], 93 | "source": [] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "metadata": {}, 98 | "source": [ 99 | "### Interacting with the model registry" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "metadata": {}, 106 | "outputs": [], 107 | "source": [ 108 | "from mlflow.tracking import MlflowClient\n", 109 | "\n", 110 | "\n", 111 | "client = MlflowClient(f\"http://{TRACKING_SERVER_HOST}:5000\")" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": null, 117 | "metadata": {}, 118 | "outputs": [], 119 | "source": [ 120 | "client.list_registered_models()" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": null, 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [ 129 | "run_id = client.list_run_infos(experiment_id='1')[0].run_id\n", 130 | "mlflow.register_model(\n", 131 | " model_uri=f\"runs:/{run_id}/models\",\n", 132 | " name='iris-classifier'\n", 133 | ")" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": null, 139 | "metadata": {}, 140 | "outputs": [], 141 | "source": [] 142 | } 143 | ], 144 | "metadata": { 145 | "interpreter": { 146 | "hash": "0848c9d6c7d415ad6c477ff7ff8e98694d1a4aa96d0deee89244642e6b630036" 147 | }, 148 | "kernelspec": { 149 | "display_name": "Python 3.9.12 ('exp-tracking-env')", 150 | "language": "python", 151 | "name": "python3" 152 | }, 153 | "language_info": { 154 | "codemirror_mode": { 155 | "name": "ipython", 156 | "version": 3 157 | }, 158 | "file_extension": ".py", 159 | "mimetype": "text/x-python", 160 | "name": "python", 161 | "nbconvert_exporter": "python", 162 | "pygments_lexer": "ipython3", 163 | "version": "3.9.12" 164 | }, 165 | "orig_nbformat": 4 166 | }, 167 | "nbformat": 4, 168 | "nbformat_minor": 2 169 | } 170 | -------------------------------------------------------------------------------- /04-deployment/homework/starter.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "2c51efaa", 7 | "metadata": {}, 8 | "outputs": [ 9 | { 10 | "name": "stdout", 11 | "output_type": "stream", 12 | "text": [ 13 | "scikit-learn==1.0.2\n", 14 | "scikit-learn-intelex==2021.20210714.120553\n" 15 | ] 16 | } 17 | ], 18 | "source": [ 19 | "!pip freeze | grep scikit-learn" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 1, 25 | "id": "0ef880a0", 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "import pickle\n", 30 | "import pandas as pd" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 15, 36 | "id": "920cff32", 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "year = 2021\n", 41 | "month = 2\n", 42 | "\n", 43 | "input_file = f'https://nyc-tlc.s3.amazonaws.com/trip+data/fhv_tripdata_{year:04d}-{month:02d}.parquet'\n", 44 | "output_file = f'output/fhv_tripdata_{year:04d}-{month:02d}.parquet'" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 2, 50 | "id": "7836ccfd", 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "with open('model.bin', 'rb') as f_in:\n", 55 | " dv, lr = pickle.load(f_in)" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 3, 61 | "id": "41c08294", 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "categorical = ['PUlocationID', 'DOlocationID']\n", 66 | "\n", 67 | "def read_data(filename):\n", 68 | " df = pd.read_parquet(filename)\n", 69 | " \n", 70 | " df['duration'] = df.dropOff_datetime - df.pickup_datetime\n", 71 | " df['duration'] = df.duration.dt.total_seconds() / 60\n", 72 | "\n", 73 | " df = df[(df.duration >= 1) & (df.duration <= 60)].copy()\n", 74 | "\n", 75 | " df[categorical] = df[categorical].fillna(-1).astype('int').astype('str')\n", 76 | " \n", 77 | " return df" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 10, 83 | "id": "4854399a", 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [ 87 | "df = read_data(input_file)\n", 88 | "df['ride_id'] = f'{year:04d}/{month:02d}_' + df.index.astype('str')" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 6, 94 | "id": "669fda0a", 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [ 98 | "dicts = df[categorical].to_dict(orient='records')\n", 99 | "X_val = dv.transform(dicts)\n", 100 | "y_pred = lr.predict(X_val)" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": 7, 106 | "id": "914b15a5", 107 | "metadata": {}, 108 | "outputs": [ 109 | { 110 | "data": { 111 | "text/plain": [ 112 | "16.191691679979066" 113 | ] 114 | }, 115 | "execution_count": 7, 116 | "metadata": {}, 117 | "output_type": "execute_result" 118 | } 119 | ], 120 | "source": [ 121 | "y_pred.mean()" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 13, 127 | "id": "037e3d22", 128 | "metadata": {}, 129 | "outputs": [], 130 | "source": [ 131 | "df_result = pd.DataFrame()\n", 132 | "df_result['ride_id'] = df['ride_id']\n", 133 | "df_result['predicted_duration'] = y_pred" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": 17, 139 | "id": "7a5753be", 140 | "metadata": {}, 141 | "outputs": [], 142 | "source": [ 143 | "df_result.to_parquet(\n", 144 | " output_file,\n", 145 | " engine='pyarrow',\n", 146 | " compression=None,\n", 147 | " index=False\n", 148 | ")" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": 18, 154 | "id": "f0b3b58c", 155 | "metadata": {}, 156 | "outputs": [ 157 | { 158 | "name": "stdout", 159 | "output_type": "stream", 160 | "text": [ 161 | "total 19M\r\n", 162 | "-rw-rw-r-- 1 ubuntu ubuntu 19M Jun 30 08:43 fhv_tripdata_2021-02.parquet\r\n" 163 | ] 164 | } 165 | ], 166 | "source": [ 167 | "!ls -lh output/" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": null, 173 | "id": "0dbe3e15", 174 | "metadata": {}, 175 | "outputs": [], 176 | "source": [] 177 | } 178 | ], 179 | "metadata": { 180 | "kernelspec": { 181 | "display_name": "Python 3 (ipykernel)", 182 | "language": "python", 183 | "name": "python3" 184 | }, 185 | "language_info": { 186 | "codemirror_mode": { 187 | "name": "ipython", 188 | "version": 3 189 | }, 190 | "file_extension": ".py", 191 | "mimetype": "text/x-python", 192 | "name": "python", 193 | "nbconvert_exporter": "python", 194 | "pygments_lexer": "ipython3", 195 | "version": "3.9.7" 196 | } 197 | }, 198 | "nbformat": 4, 199 | "nbformat_minor": 5 200 | } 201 | --------------------------------------------------------------------------------