├── .github └── workflows │ ├── documentation.yaml │ ├── json_to_md.py │ ├── serve.yaml │ └── workloads.yaml ├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE ├── Makefile ├── README.md ├── datasets ├── dataset.csv ├── holdout.csv ├── projects.csv └── tags.csv ├── deploy ├── cluster_compute.yaml ├── cluster_env.yaml ├── jobs │ ├── workloads.sh │ └── workloads.yaml └── services │ ├── serve_model.py │ └── serve_model.yaml ├── docs ├── index.md └── madewithml │ ├── data.md │ ├── evaluate.md │ ├── models.md │ ├── predict.md │ ├── serve.md │ ├── train.md │ ├── tune.md │ └── utils.md ├── madewithml ├── __init__.py ├── config.py ├── data.py ├── evaluate.py ├── models.py ├── predict.py ├── serve.py ├── train.py ├── tune.py └── utils.py ├── mkdocs.yml ├── notebooks ├── benchmarks.ipynb ├── clear_cell_nums.py └── madewithml.ipynb ├── pyproject.toml ├── requirements.txt └── tests ├── code ├── conftest.py ├── test_data.py ├── test_predict.py ├── test_train.py ├── test_tune.py ├── test_utils.py └── utils.py ├── data ├── conftest.py └── test_dataset.py └── model ├── conftest.py ├── test_behavioral.py └── utils.py /.github/workflows/documentation.yaml: -------------------------------------------------------------------------------- 1 | name: documentation 2 | on: 3 | push: 4 | branches: 5 | - main 6 | 7 | jobs: 8 | build-docs: 9 | runs-on: ubuntu-22.04 10 | steps: 11 | # Set up dependencies 12 | - uses: actions/checkout@v3 13 | - uses: actions/setup-python@v4 14 | with: 15 | python-version: '3.10.11' 16 | cache: 'pip' 17 | - run: python3 -m pip install mkdocs==1.4.2 mkdocstrings==0.21.2 "mkdocstrings[python]>=0.18" 18 | 19 | # Deploy docs 20 | - name: Deploy documentation 21 | run: mkdocs gh-deploy --force 22 | -------------------------------------------------------------------------------- /.github/workflows/json_to_md.py: -------------------------------------------------------------------------------- 1 | import json 2 | import sys 3 | 4 | 5 | def to_markdown(data): 6 | markdown = "" 7 | for key, value in data.items(): 8 | markdown += f"**{key}:**\n\n" 9 | if isinstance(value, dict): 10 | markdown += "| Key | Value |\n| --- | --- |\n" 11 | for nested_key, nested_value in value.items(): 12 | nested_value = ( 13 | round(nested_value, 3) 14 | if isinstance(nested_value, float) 15 | else {k: round(v, 3) for k, v in nested_value.items()} 16 | if isinstance(nested_value, dict) 17 | else nested_value 18 | ) 19 | markdown += f"| {nested_key} | {nested_value} |\n" 20 | elif isinstance(value, list) and all(isinstance(item, dict) for item in value): 21 | if value: 22 | headers = sorted(set().union(*[item.keys() for item in value])) 23 | markdown += "| " + " | ".join(headers) + " |\n| " + " | ".join(["---"] * len(headers)) + " |\n" 24 | for item in value: 25 | value_list = [ 26 | "{:.3e}".format(float(item.get(header, ""))) if not str(item.get(header, "")).isdigit() else str(item.get(header, "")) 27 | for header in headers 28 | ] 29 | markdown += "| " + " | ".join(value_list) + " |\n" 30 | else: 31 | markdown += "(empty list)\n" 32 | else: 33 | markdown += f"{value}\n" 34 | markdown += "\n" 35 | return markdown 36 | 37 | 38 | def json_to_markdown(json_fp, md_fp): 39 | """Convert a json file to markdown.""" 40 | # Read JSON file 41 | with open(json_fp, "r") as file: 42 | data = json.load(file) 43 | 44 | # Convert to markdown 45 | markdown = to_markdown(data) 46 | 47 | # Save to markdown file 48 | with open(md_fp, "w") as file: 49 | file.write(markdown) 50 | return markdown 51 | 52 | 53 | if __name__ == "__main__": 54 | # Check if the correct number of arguments is provided 55 | if len(sys.argv) < 3: 56 | print("Usage: python script.py ") 57 | sys.exit(1) 58 | 59 | # Get the JSON file path and output Markdown file path from command-line arguments 60 | json_file = sys.argv[1] 61 | md_file = sys.argv[2] 62 | 63 | # Call the JSON to Markdown conversion function 64 | json_to_markdown(json_file, md_file) 65 | -------------------------------------------------------------------------------- /.github/workflows/serve.yaml: -------------------------------------------------------------------------------- 1 | name: serve 2 | on: 3 | workflow_dispatch: # manual 4 | push: 5 | branches: 6 | - main 7 | permissions: write-all 8 | 9 | jobs: 10 | serve: 11 | runs-on: ubuntu-22.04 12 | steps: 13 | 14 | # Configure AWS credentials 15 | - name: Configure AWS credentials 16 | uses: aws-actions/configure-aws-credentials@v2 17 | with: 18 | role-to-assume: arn:aws:iam::593241322649:role/github-actions-madewithml 19 | role-session-name: s3access 20 | aws-region: us-west-2 21 | 22 | # Set up dependencies 23 | - uses: actions/checkout@v3 24 | - uses: actions/setup-python@v4 25 | with: 26 | python-version: '3.10.11' 27 | cache: 'pip' 28 | - run: python3 -m pip install anyscale==0.5.131 typer==0.9.0 29 | 30 | # Serve model 31 | - name: Serve model 32 | run: | 33 | export ANYSCALE_HOST=${{ secrets.ANYSCALE_HOST }} 34 | export ANYSCALE_CLI_TOKEN=${{ secrets.ANYSCALE_CLI_TOKEN }} 35 | anyscale service rollout --service-config-file deploy/services/serve_model.yaml 36 | -------------------------------------------------------------------------------- /.github/workflows/workloads.yaml: -------------------------------------------------------------------------------- 1 | name: workloads 2 | on: 3 | workflow_dispatch: # manual 4 | pull_request: 5 | branches: 6 | - main 7 | permissions: write-all 8 | 9 | jobs: 10 | workloads: 11 | runs-on: ubuntu-22.04 12 | steps: 13 | 14 | # Configure AWS credentials 15 | - name: Configure AWS credentials 16 | uses: aws-actions/configure-aws-credentials@v2 17 | with: 18 | role-to-assume: arn:aws:iam::593241322649:role/github-actions-madewithml 19 | role-session-name: s3access 20 | aws-region: us-west-2 21 | 22 | # Set up dependencies 23 | - uses: actions/checkout@v3 24 | - uses: actions/setup-python@v4 25 | with: 26 | python-version: '3.10.11' 27 | cache: 'pip' 28 | - run: python3 -m pip install anyscale==0.5.131 typer==0.9.0 29 | 30 | # Run workloads 31 | - name: Workloads 32 | run: | 33 | export ANYSCALE_HOST=${{ secrets.ANYSCALE_HOST }} 34 | export ANYSCALE_CLI_TOKEN=${{ secrets.ANYSCALE_CLI_TOKEN }} 35 | anyscale jobs submit deploy/jobs/workloads.yaml --wait 36 | 37 | # Read results from S3 38 | - name: Read results from S3 39 | run: | 40 | mkdir results 41 | aws s3 cp s3://madewithml/${{ github.actor }}/results/ results/ --recursive 42 | python .github/workflows/json_to_md.py results/training_results.json results/training_results.md 43 | python .github/workflows/json_to_md.py results/evaluation_results.json results/evaluation_results.md 44 | 45 | # Comment results to PR 46 | - name: Comment training results on PR 47 | uses: thollander/actions-comment-pull-request@v2 48 | with: 49 | filePath: results/training_results.md 50 | - name: Comment evaluation results on PR 51 | uses: thollander/actions-comment-pull-request@v2 52 | with: 53 | filePath: results/evaluation_results.md 54 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Data 2 | logs/ 3 | stores/ 4 | mlflow/ 5 | results/ 6 | workspaces/ 7 | efs/ 8 | 9 | # VSCode 10 | .vscode/ 11 | .idea 12 | 13 | # Byte-compiled / optimized / DLL files 14 | __pycache__/ 15 | *.py[cod] 16 | *$py.class 17 | 18 | # C extensions 19 | *.so 20 | 21 | # Distribution / packaging 22 | .Python 23 | build/ 24 | develop-eggs/ 25 | dist/ 26 | downloads/ 27 | eggs/ 28 | .eggs/ 29 | lib/ 30 | lib64/ 31 | parts/ 32 | sdist/ 33 | var/ 34 | wheels/ 35 | pip-wheel-metadata/ 36 | share/python-wheels/ 37 | *.egg-info/ 38 | .installed.cfg 39 | *.egg 40 | MANIFEST 41 | 42 | # PyInstaller 43 | *.manifest 44 | *.spec 45 | 46 | # Installer logs 47 | pip-log.txt 48 | pip-delete-this-directory.txt 49 | 50 | # Unit test / coverage reports 51 | htmlcov/ 52 | .tox/ 53 | .nox/ 54 | .coverage 55 | .coverage.* 56 | .cache 57 | nosetests.xml 58 | coverage.xml 59 | *.cover 60 | *.py,cover 61 | .hypothesis/ 62 | .pytest_cache/ 63 | 64 | # Flask: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy: 69 | .scrapy 70 | 71 | # Sphinx 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # IPython 78 | .ipynb_checkpoints 79 | profile_default/ 80 | ipython_config.py 81 | 82 | # pyenv 83 | .python-version 84 | 85 | # PEP 582 86 | __pypackages__/ 87 | 88 | # Celery 89 | celerybeat-schedule 90 | celerybeat.pid 91 | 92 | # Environment 93 | .env 94 | .venv 95 | env/ 96 | venv/ 97 | ENV/ 98 | env.bak/ 99 | venv.bak/ 100 | 101 | # mkdocs 102 | site/ 103 | 104 | # Airflow 105 | airflow/airflow.db 106 | 107 | # MacOS 108 | .DS_Store 109 | 110 | # Clean up 111 | .trash/ 112 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # See https://pre-commit.com for more information 2 | # See https://pre-commit.com/hooks.html for more hooks 3 | repos: 4 | - repo: https://github.com/pre-commit/pre-commit-hooks 5 | rev: v4.5.0 6 | hooks: 7 | - id: trailing-whitespace 8 | - id: end-of-file-fixer 9 | - id: check-merge-conflict 10 | - id: check-yaml 11 | - id: check-added-large-files 12 | args: ['--maxkb=1000'] 13 | exclude: "notebooks" 14 | - id: check-yaml 15 | exclude: "mkdocs.yml" 16 | - repo: local 17 | hooks: 18 | - id: clean 19 | name: clean 20 | entry: make 21 | args: ["clean"] 22 | language: system 23 | pass_filenames: false 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Made With ML 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Makefile 2 | SHELL = /bin/bash 3 | 4 | # Styling 5 | .PHONY: style 6 | style: 7 | black . 8 | flake8 9 | python3 -m isort . 10 | pyupgrade 11 | 12 | # Cleaning 13 | .PHONY: clean 14 | clean: style 15 | python notebooks/clear_cell_nums.py 16 | find . -type f -name "*.DS_Store" -ls -delete 17 | find . | grep -E "(__pycache__|\.pyc|\.pyo)" | xargs rm -rf 18 | find . | grep -E ".pytest_cache" | xargs rm -rf 19 | find . | grep -E ".ipynb_checkpoints" | xargs rm -rf 20 | rm -rf .coverage* 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 |

 Made With ML

3 | Design · Develop · Deploy · Iterate 4 |
5 | Join 40K+ developers in learning how to responsibly deliver value with ML. 6 |
7 |
8 | 9 |
10 | 11 |
12 |   13 |   14 |   15 | 16 |
17 | 🔥  Among the top ML repositories on GitHub 18 |
19 | 20 |
21 |
22 | 23 | ## Lessons 24 | 25 | Learn how to combine machine learning with software engineering to design, develop, deploy and iterate on production-grade ML applications. 26 | 27 | - Lessons: https://madewithml.com/ 28 | - Code: [GokuMohandas/Made-With-ML](https://github.com/GokuMohandas/Made-With-ML) 29 | 30 | 31 | lessons 32 | 33 | 34 | ## Overview 35 | 36 | In this course, we'll go from experimentation (design + development) to production (deployment + iteration). We'll do this iteratively by motivating the components that will enable us to build a *reliable* production system. 37 | 38 |
39 |   Be sure to watch the video below for a quick overview of what we'll be building. 40 |
41 | 42 |
43 | Course overview video 44 |
45 | 46 |
47 | 48 | - **💡 First principles**: before we jump straight into the code, we develop a first principles understanding for every machine learning concept. 49 | - **💻 Best practices**: implement software engineering best practices as we develop and deploy our machine learning models. 50 | - **📈 Scale**: easily scale ML workloads (data, train, tune, serve) in Python without having to learn completely new languages. 51 | - **⚙️ MLOps**: connect MLOps components (tracking, testing, serving, orchestration, etc.) as we build an end-to-end machine learning system. 52 | - **🚀 Dev to Prod**: learn how to quickly and reliably go from development to production without any changes to our code or infra management. 53 | - **🐙 CI/CD**: learn how to create mature CI/CD workflows to continuously train and deploy better models in a modular way that integrates with any stack. 54 | 55 | ## Audience 56 | 57 | Machine learning is not a separate industry, instead, it's a powerful way of thinking about data that's not reserved for any one type of person. 58 | 59 | - **👩‍💻 All developers**: whether software/infra engineer or data scientist, ML is increasingly becoming a key part of the products that you'll be developing. 60 | - **👩‍🎓 College graduates**: learn the practical skills required for industry and bridge gap between the university curriculum and what industry expects. 61 | - **👩‍💼 Product/Leadership**: who want to develop a technical foundation so that they can build amazing (and reliable) products powered by machine learning. 62 | 63 | ## Set up 64 | 65 | Be sure to go through the [course](https://madewithml/#course) for a much more detailed walkthrough of the content on this repository. We will have instructions for both local laptop and Anyscale clusters for the sections below, so be sure to toggle the ► dropdown based on what you're using (Anyscale instructions will be toggled on by default). If you do want to run this course with Anyscale, where we'll provide the **structure**, **compute (GPUs)** and **community** to learn everything in one day, join our next upcoming live cohort → [sign up here](https://4190urw86oh.typeform.com/madewithml)! 66 | 67 | ### Cluster 68 | 69 | We'll start by setting up our cluster with the environment and compute configurations. 70 | 71 |
72 | Local
73 | Your personal laptop (single machine) will act as the cluster, where one CPU will be the head node and some of the remaining CPU will be the worker nodes. All of the code in this course will work in any personal laptop though it will be slower than executing the same workloads on a larger cluster. 74 |
75 | 76 |
77 | Anyscale
78 | 79 | We can create an [Anyscale Workspace](https://docs.anyscale.com/develop/workspaces/get-started) using the [webpage UI](https://console.anyscale.com/o/madewithml/workspaces/add/blank). 80 | 81 | ```md 82 | - Workspace name: `madewithml` 83 | - Project: `madewithml` 84 | - Cluster environment name: `madewithml-cluster-env` 85 | # Toggle `Select from saved configurations` 86 | - Compute config: `madewithml-cluster-compute-g5.4xlarge` 87 | ``` 88 | 89 | > Alternatively, we can use the [CLI](https://docs.anyscale.com/reference/anyscale-cli) to create the workspace via `anyscale workspace create ...` 90 | 91 |
92 | 93 |
94 | Other (cloud platforms, K8s, on-prem)
95 | 96 | If you don't want to do this course locally or via Anyscale, you have the following options: 97 | 98 | - On [AWS and GCP](https://docs.ray.io/en/latest/cluster/vms/index.html#cloud-vm-index). Community-supported Azure and Aliyun integrations also exist. 99 | - On [Kubernetes](https://docs.ray.io/en/latest/cluster/kubernetes/index.html#kuberay-index), via the officially supported KubeRay project. 100 | - Deploy Ray manually [on-prem](https://docs.ray.io/en/latest/cluster/vms/user-guides/launching-clusters/on-premises.html#on-prem) or onto platforms [not listed here](https://docs.ray.io/en/latest/cluster/vms/user-guides/community/index.html#ref-cluster-setup). 101 | 102 |
103 | 104 | ### Git setup 105 | 106 | Create a repository by following these instructions: [Create a new repository](https://github.com/new) → name it `Made-With-ML` → Toggle `Add a README file` (**very important** as this creates a `main` branch) → Click `Create repository` (scroll down) 107 | 108 | Now we're ready to clone the repository that has all of our code: 109 | 110 | ```bash 111 | git clone https://github.com/GokuMohandas/Made-With-ML.git . 112 | ``` 113 | 114 | ### Credentials 115 | 116 | ```bash 117 | touch .env 118 | ``` 119 | ```bash 120 | # Inside .env 121 | GITHUB_USERNAME="CHANGE_THIS_TO_YOUR_USERNAME" # ← CHANGE THIS 122 | ``` 123 | ```bash 124 | source .env 125 | ``` 126 | 127 | ### Virtual environment 128 | 129 |
130 | Local
131 | 132 | ```bash 133 | export PYTHONPATH=$PYTHONPATH:$PWD 134 | python3 -m venv venv # recommend using Python 3.10 135 | source venv/bin/activate # on Windows: venv\Scripts\activate 136 | python3 -m pip install --upgrade pip setuptools wheel 137 | python3 -m pip install -r requirements.txt 138 | pre-commit install 139 | pre-commit autoupdate 140 | ``` 141 | 142 | > Highly recommend using Python `3.10` and using [pyenv](https://github.com/pyenv/pyenv) (mac) or [pyenv-win](https://github.com/pyenv-win/pyenv-win) (windows). 143 | 144 |
145 | 146 |
147 | Anyscale
148 | 149 | Our environment with the appropriate Python version and libraries is already all set for us through the cluster environment we used when setting up our Anyscale Workspace. So we just need to run these commands: 150 | ```bash 151 | export PYTHONPATH=$PYTHONPATH:$PWD 152 | pre-commit install 153 | pre-commit autoupdate 154 | ``` 155 | 156 |
157 | 158 | ## Notebook 159 | 160 | Start by exploring the [jupyter notebook](notebooks/madewithml.ipynb) to interactively walkthrough the core machine learning workloads. 161 | 162 |
163 | 164 |
165 | 166 |
167 | Local
168 | 169 | ```bash 170 | # Start notebook 171 | jupyter lab notebooks/madewithml.ipynb 172 | ``` 173 | 174 |
175 | 176 |
177 | Anyscale
178 | 179 | Click on the Jupyter icon    at the top right corner of our Anyscale Workspace page and this will open up our JupyterLab instance in a new tab. Then navigate to the `notebooks` directory and open up the `madewithml.ipynb` notebook. 180 | 181 |
182 | 183 | 184 | ## Scripts 185 | 186 | Now we'll execute the same workloads using the clean Python scripts following software engineering best practices (testing, documentation, logging, serving, versioning, etc.) The code we've implemented in our notebook will be refactored into the following scripts: 187 | 188 | ```bash 189 | madewithml 190 | ├── config.py 191 | ├── data.py 192 | ├── evaluate.py 193 | ├── models.py 194 | ├── predict.py 195 | ├── serve.py 196 | ├── train.py 197 | ├── tune.py 198 | └── utils.py 199 | ``` 200 | 201 | **Note**: Change the `--num-workers`, `--cpu-per-worker`, and `--gpu-per-worker` input argument values below based on your system's resources. For example, if you're on a local laptop, a reasonable configuration would be `--num-workers 6 --cpu-per-worker 1 --gpu-per-worker 0`. 202 | 203 | ### Training 204 | ```bash 205 | export EXPERIMENT_NAME="llm" 206 | export DATASET_LOC="https://raw.githubusercontent.com/GokuMohandas/Made-With-ML/main/datasets/dataset.csv" 207 | export TRAIN_LOOP_CONFIG='{"dropout_p": 0.5, "lr": 1e-4, "lr_factor": 0.8, "lr_patience": 3}' 208 | python madewithml/train.py \ 209 | --experiment-name "$EXPERIMENT_NAME" \ 210 | --dataset-loc "$DATASET_LOC" \ 211 | --train-loop-config "$TRAIN_LOOP_CONFIG" \ 212 | --num-workers 1 \ 213 | --cpu-per-worker 3 \ 214 | --gpu-per-worker 1 \ 215 | --num-epochs 10 \ 216 | --batch-size 256 \ 217 | --results-fp results/training_results.json 218 | ``` 219 | 220 | ### Tuning 221 | ```bash 222 | export EXPERIMENT_NAME="llm" 223 | export DATASET_LOC="https://raw.githubusercontent.com/GokuMohandas/Made-With-ML/main/datasets/dataset.csv" 224 | export TRAIN_LOOP_CONFIG='{"dropout_p": 0.5, "lr": 1e-4, "lr_factor": 0.8, "lr_patience": 3}' 225 | export INITIAL_PARAMS="[{\"train_loop_config\": $TRAIN_LOOP_CONFIG}]" 226 | python madewithml/tune.py \ 227 | --experiment-name "$EXPERIMENT_NAME" \ 228 | --dataset-loc "$DATASET_LOC" \ 229 | --initial-params "$INITIAL_PARAMS" \ 230 | --num-runs 2 \ 231 | --num-workers 1 \ 232 | --cpu-per-worker 3 \ 233 | --gpu-per-worker 1 \ 234 | --num-epochs 10 \ 235 | --batch-size 256 \ 236 | --results-fp results/tuning_results.json 237 | ``` 238 | 239 | ### Experiment tracking 240 | 241 | We'll use [MLflow](https://mlflow.org/) to track our experiments and store our models and the [MLflow Tracking UI](https://www.mlflow.org/docs/latest/tracking.html#tracking-ui) to view our experiments. We have been saving our experiments to a local directory but note that in an actual production setting, we would have a central location to store all of our experiments. It's easy/inexpensive to spin up your own MLflow server for all of your team members to track their experiments on or use a managed solution like [Weights & Biases](https://wandb.ai/site), [Comet](https://www.comet.ml/), etc. 242 | 243 | ```bash 244 | export MODEL_REGISTRY=$(python -c "from madewithml import config; print(config.MODEL_REGISTRY)") 245 | mlflow server -h 0.0.0.0 -p 8080 --backend-store-uri $MODEL_REGISTRY 246 | ``` 247 | 248 |
249 | Local
250 | 251 | If you're running this notebook on your local laptop then head on over to http://localhost:8080/ to view your MLflow dashboard. 252 | 253 |
254 | 255 |
256 | Anyscale
257 | 258 | If you're on Anyscale Workspaces, then we need to first expose the port of the MLflow server. Run the following command on your Anyscale Workspace terminal to generate the public URL to your MLflow server. 259 | 260 | ```bash 261 | APP_PORT=8080 262 | echo https://$APP_PORT-port-$ANYSCALE_SESSION_DOMAIN 263 | ``` 264 | 265 |
266 | 267 | ### Evaluation 268 | ```bash 269 | export EXPERIMENT_NAME="llm" 270 | export RUN_ID=$(python madewithml/predict.py get-best-run-id --experiment-name $EXPERIMENT_NAME --metric val_loss --mode ASC) 271 | export HOLDOUT_LOC="https://raw.githubusercontent.com/GokuMohandas/Made-With-ML/main/datasets/holdout.csv" 272 | python madewithml/evaluate.py \ 273 | --run-id $RUN_ID \ 274 | --dataset-loc $HOLDOUT_LOC \ 275 | --results-fp results/evaluation_results.json 276 | ``` 277 | ```json 278 | { 279 | "timestamp": "June 09, 2023 09:26:18 AM", 280 | "run_id": "6149e3fec8d24f1492d4a4cabd5c06f6", 281 | "overall": { 282 | "precision": 0.9076136428670714, 283 | "recall": 0.9057591623036649, 284 | "f1": 0.9046792827719773, 285 | "num_samples": 191.0 286 | }, 287 | ... 288 | ``` 289 | 290 | ### Inference 291 | ```bash 292 | export EXPERIMENT_NAME="llm" 293 | export RUN_ID=$(python madewithml/predict.py get-best-run-id --experiment-name $EXPERIMENT_NAME --metric val_loss --mode ASC) 294 | python madewithml/predict.py predict \ 295 | --run-id $RUN_ID \ 296 | --title "Transfer learning with transformers" \ 297 | --description "Using transformers for transfer learning on text classification tasks." 298 | ``` 299 | ```json 300 | [{ 301 | "prediction": [ 302 | "natural-language-processing" 303 | ], 304 | "probabilities": { 305 | "computer-vision": 0.0009767753, 306 | "mlops": 0.0008223939, 307 | "natural-language-processing": 0.99762577, 308 | "other": 0.000575123 309 | } 310 | }] 311 | ``` 312 | 313 | ### Serving 314 | 315 |
316 | Local
317 | 318 | ```bash 319 | # Start 320 | ray start --head 321 | ``` 322 | 323 | ```bash 324 | # Set up 325 | export EXPERIMENT_NAME="llm" 326 | export RUN_ID=$(python madewithml/predict.py get-best-run-id --experiment-name $EXPERIMENT_NAME --metric val_loss --mode ASC) 327 | python madewithml/serve.py --run_id $RUN_ID 328 | ``` 329 | 330 | Once the application is running, we can use it via cURL, Python, etc.: 331 | 332 | ```python 333 | # via Python 334 | import json 335 | import requests 336 | title = "Transfer learning with transformers" 337 | description = "Using transformers for transfer learning on text classification tasks." 338 | json_data = json.dumps({"title": title, "description": description}) 339 | requests.post("http://127.0.0.1:8000/predict", data=json_data).json() 340 | ``` 341 | 342 | ```bash 343 | ray stop # shutdown 344 | ``` 345 | 346 |
347 | 348 |
349 | Anyscale
350 | 351 | In Anyscale Workspaces, Ray is already running so we don't have to manually start/shutdown like we have to do locally. 352 | 353 | ```bash 354 | # Set up 355 | export EXPERIMENT_NAME="llm" 356 | export RUN_ID=$(python madewithml/predict.py get-best-run-id --experiment-name $EXPERIMENT_NAME --metric val_loss --mode ASC) 357 | python madewithml/serve.py --run_id $RUN_ID 358 | ``` 359 | 360 | Once the application is running, we can use it via cURL, Python, etc.: 361 | 362 | ```python 363 | # via Python 364 | import json 365 | import requests 366 | title = "Transfer learning with transformers" 367 | description = "Using transformers for transfer learning on text classification tasks." 368 | json_data = json.dumps({"title": title, "description": description}) 369 | requests.post("http://127.0.0.1:8000/predict", data=json_data).json() 370 | ``` 371 | 372 |
373 | 374 | ### Testing 375 | ```bash 376 | # Code 377 | python3 -m pytest tests/code --verbose --disable-warnings 378 | 379 | # Data 380 | export DATASET_LOC="https://raw.githubusercontent.com/GokuMohandas/Made-With-ML/main/datasets/dataset.csv" 381 | pytest --dataset-loc=$DATASET_LOC tests/data --verbose --disable-warnings 382 | 383 | # Model 384 | export EXPERIMENT_NAME="llm" 385 | export RUN_ID=$(python madewithml/predict.py get-best-run-id --experiment-name $EXPERIMENT_NAME --metric val_loss --mode ASC) 386 | pytest --run-id=$RUN_ID tests/model --verbose --disable-warnings 387 | 388 | # Coverage 389 | python3 -m pytest tests/code --cov madewithml --cov-report html --disable-warnings # html report 390 | python3 -m pytest tests/code --cov madewithml --cov-report term --disable-warnings # terminal report 391 | ``` 392 | 393 | ## Production 394 | 395 | From this point onwards, in order to deploy our application into production, we'll need to either be on Anyscale or on a [cloud VM](https://docs.ray.io/en/latest/cluster/vms/index.html#cloud-vm-index) / [on-prem](https://docs.ray.io/en/latest/cluster/vms/user-guides/launching-clusters/on-premises.html#on-prem) cluster you manage yourself (w/ Ray). If not on Anyscale, the commands will be [slightly different](https://docs.ray.io/en/latest/cluster/running-applications/job-submission/index.html) but the concepts will be the same. 396 | 397 | > If you don't want to set up all of this yourself, we highly recommend joining our [upcoming live cohort](https://4190urw86oh.typeform.com/madewithml){:target="_blank"} where we'll provide an environment with all of this infrastructure already set up for you so that you just focused on the machine learning. 398 | 399 |
400 | 401 |
402 | 403 | ### Authentication 404 | 405 | These credentials below are **automatically** set for us if we're using Anyscale Workspaces. We **do not** need to set these credentials explicitly on Workspaces but we do if we're running this locally or on a cluster outside of where our Anyscale Jobs and Services are configured to run. 406 | 407 | ``` bash 408 | export ANYSCALE_HOST=https://console.anyscale.com 409 | export ANYSCALE_CLI_TOKEN=$YOUR_CLI_TOKEN # retrieved from Anyscale credentials page 410 | ``` 411 | 412 | ### Cluster environment 413 | 414 | The cluster environment determines **where** our workloads will be executed (OS, dependencies, etc.) We've already created this [cluster environment](./deploy/cluster_env.yaml) for us but this is how we can create/update one ourselves. 415 | 416 | ```bash 417 | export CLUSTER_ENV_NAME="madewithml-cluster-env" 418 | anyscale cluster-env build deploy/cluster_env.yaml --name $CLUSTER_ENV_NAME 419 | ``` 420 | 421 | ### Compute configuration 422 | 423 | The compute configuration determines **what** resources our workloads will be executes on. We've already created this [compute configuration](./deploy/cluster_compute.yaml) for us but this is how we can create it ourselves. 424 | 425 | ```bash 426 | export CLUSTER_COMPUTE_NAME="madewithml-cluster-compute-g5.4xlarge" 427 | anyscale cluster-compute create deploy/cluster_compute.yaml --name $CLUSTER_COMPUTE_NAME 428 | ``` 429 | 430 | ### Anyscale jobs 431 | 432 | Now we're ready to execute our ML workloads. We've decided to combine them all together into one [job](./deploy/jobs/workloads.yaml) but we could have also created separate jobs for each workload (train, evaluate, etc.) We'll start by editing the `$GITHUB_USERNAME` slots inside our [`workloads.yaml`](./deploy/jobs/workloads.yaml) file: 433 | ```yaml 434 | runtime_env: 435 | working_dir: . 436 | upload_path: s3://madewithml/$GITHUB_USERNAME/jobs # <--- CHANGE USERNAME (case-sensitive) 437 | env_vars: 438 | GITHUB_USERNAME: $GITHUB_USERNAME # <--- CHANGE USERNAME (case-sensitive) 439 | ``` 440 | 441 | The `runtime_env` here specifies that we should upload our current `working_dir` to an S3 bucket so that all of our workers when we execute an Anyscale Job have access to the code to use. The `GITHUB_USERNAME` is used later to save results from our workloads to S3 so that we can retrieve them later (ex. for serving). 442 | 443 | Now we're ready to submit our job to execute our ML workloads: 444 | ```bash 445 | anyscale job submit deploy/jobs/workloads.yaml 446 | ``` 447 | 448 | ### Anyscale Services 449 | 450 | And after our ML workloads have been executed, we're ready to launch our serve our model to production. Similar to our Anyscale Jobs configs, be sure to change the `$GITHUB_USERNAME` in [`serve_model.yaml`](./deploy/services/serve_model.yaml). 451 | 452 | ```yaml 453 | ray_serve_config: 454 | import_path: deploy.services.serve_model:entrypoint 455 | runtime_env: 456 | working_dir: . 457 | upload_path: s3://madewithml/$GITHUB_USERNAME/services # <--- CHANGE USERNAME (case-sensitive) 458 | env_vars: 459 | GITHUB_USERNAME: $GITHUB_USERNAME # <--- CHANGE USERNAME (case-sensitive) 460 | ``` 461 | 462 | Now we're ready to launch our service: 463 | ```bash 464 | # Rollout service 465 | anyscale service rollout -f deploy/services/serve_model.yaml 466 | 467 | # Query 468 | curl -X POST -H "Content-Type: application/json" -H "Authorization: Bearer $SECRET_TOKEN" -d '{ 469 | "title": "Transfer learning with transformers", 470 | "description": "Using transformers for transfer learning on text classification tasks." 471 | }' $SERVICE_ENDPOINT/predict/ 472 | 473 | # Rollback (to previous version of the Service) 474 | anyscale service rollback -f $SERVICE_CONFIG --name $SERVICE_NAME 475 | 476 | # Terminate 477 | anyscale service terminate --name $SERVICE_NAME 478 | ``` 479 | 480 | ### CI/CD 481 | 482 | We're not going to manually deploy our application every time we make a change. Instead, we'll automate this process using GitHub Actions! 483 | 484 |
485 | 486 |
487 | 488 | 1. Create a new github branch to save our changes to and execute CI/CD workloads: 489 | ```bash 490 | git remote set-url origin https://github.com/$GITHUB_USERNAME/Made-With-ML.git # <-- CHANGE THIS to your username 491 | git checkout -b dev 492 | ``` 493 | 494 | 2. We'll start by adding the necessary credentials to the [`/settings/secrets/actions`](https://github.com/GokuMohandas/Made-With-ML/settings/secrets/actions) page of our GitHub repository. 495 | 496 | ``` bash 497 | export ANYSCALE_HOST=https://console.anyscale.com 498 | export ANYSCALE_CLI_TOKEN=$YOUR_CLI_TOKEN # retrieved from https://console.anyscale.com/o/madewithml/credentials 499 | ``` 500 | 501 | 3. Now we can make changes to our code (not on `main` branch) and push them to GitHub. But in order to push our code to GitHub, we'll need to first authenticate with our credentials before pushing to our repository: 502 | 503 | ```bash 504 | git config --global user.name $GITHUB_USERNAME # <-- CHANGE THIS to your username 505 | git config --global user.email you@example.com # <-- CHANGE THIS to your email 506 | git add . 507 | git commit -m "" # <-- CHANGE THIS to your message 508 | git push origin dev 509 | ``` 510 | 511 | Now you will be prompted to enter your username and password (personal access token). Follow these steps to get personal access token: [New GitHub personal access token](https://github.com/settings/tokens/new) → Add a name → Toggle `repo` and `workflow` → Click `Generate token` (scroll down) → Copy the token and paste it when prompted for your password. 512 | 513 | 4. Now we can start a PR from this branch to our `main` branch and this will trigger the [workloads workflow](/.github/workflows/workloads.yaml). If the workflow (Anyscale Jobs) succeeds, this will produce comments with the training and evaluation results directly on the PR. 514 | 515 |
516 | 517 |
518 | 519 | 5. If we like the results, we can merge the PR into the `main` branch. This will trigger the [serve workflow](/.github/workflows/serve.yaml) which will rollout our new service to production! 520 | 521 | ### Continual learning 522 | 523 | With our CI/CD workflow in place to deploy our application, we can now focus on continually improving our model. It becomes really easy to extend on this foundation to connect to scheduled runs (cron), [data pipelines](https://madewithml.com/courses/mlops/data-engineering/), drift detected through [monitoring](https://madewithml.com/courses/mlops/monitoring/), [online evaluation](https://madewithml.com/courses/mlops/evaluation/#online-evaluation), etc. And we can easily add additional context such as comparing any experiment with what's currently in production (directly in the PR even), etc. 524 | 525 |
526 | 527 |
528 | 529 | ## FAQ 530 | 531 | ### Jupyter notebook kernels 532 | 533 | Issues with configuring the notebooks with jupyter? By default, jupyter will use the kernel with our virtual environment but we can also manually add it to jupyter: 534 | ```bash 535 | python3 -m ipykernel install --user --name=venv 536 | ``` 537 | Now we can open up a notebook → Kernel (top menu bar) → Change Kernel → `venv`. To ever delete this kernel, we can do the following: 538 | ```bash 539 | jupyter kernelspec list 540 | jupyter kernelspec uninstall venv 541 | ``` 542 | -------------------------------------------------------------------------------- /datasets/holdout.csv: -------------------------------------------------------------------------------- 1 | id,created_on,title,description,tag 2 | 19,2020-03-03 13:54:31,Diffusion to Vector,Reference implementation of Diffusion2Vec (Complenet 2018) built on Gensim and NetworkX. ,other 3 | 26,2020-03-07 23:11:58,Graph Wavelet Neural Network,"A PyTorch implementation of ""Graph Wavelet Neural Network"" (ICLR 2019) ",other 4 | 44,2020-03-08 00:32:58,Capsule Graph Neural Network,"A PyTorch implementation of ""Capsule Graph Neural Network"" (ICLR 2019).",other 5 | 80,2020-03-20 05:59:32,NeRF: Neural Radiance Fields,Representing scenes as neural radiance fields for view synthesis.,computer-vision 6 | 84,2020-03-20 15:18:43,Mention Classifier,"Category prediction model 7 | This repo contains AllenNLP model for prediction of Named Entity categories by its mentions.",natural-language-processing 8 | 107,2020-03-21 23:09:03,Plant Fruit Classifier,Building a world-class image classifier model with a custom dataset.,computer-vision 9 | 126,2020-03-25 15:05:27,Unet Implementation is Keras with GPU,Vector Map generation from aerial imagery using deep learning GeoSpatial UNET,computer-vision 10 | 130,2020-03-25 16:55:31,Gymnast Pose Analysis,"Pose modelling for gymnasts using open-pose and open-cv. 11 | ",computer-vision 12 | 131,2020-03-25 17:00:54,EfficientDet: Scalable and Efficient Object Detection,Implementation EfficientDet: Scalable and Efficient Object Detection in PyTorch.,computer-vision 13 | 136,2020-03-26 17:22:36,Finetune: Scikit-learn Style Model Finetuning for NLP,Finetune is a library that allows users to leverage state-of-the-art pretrained NLP models for a wide variety of downstream tasks.,natural-language-processing 14 | 141,2020-03-28 17:41:42,First Order Motion Model for Image Animation,Generating a video sequence so that an object in a source image is animated according to the motion of a driving video.,computer-vision 15 | 142,2020-03-28 17:49:20,TorchIO: Medical Image Processing in Deep Learning and PyTorch,Tools for medical image processing in deep learning and PyTorch,computer-vision 16 | 144,2020-03-29 18:23:06,Finetuning Transformers with JAX + Haiku,"Walking through a port of the RoBERTa pre-trained model to JAX + Haiku, then fine-tuning the model to solve a downstream task.",natural-language-processing 17 | 218,2020-04-06 11:29:57,Distributional RL using TensorFlow2,🐳 Implementation of various Distributional Reinforcement Learning Algorithms using TensorFlow2.,other 18 | 220,2020-04-06 15:19:59,Module 2: Convolutional Neural Networks - CS231n ,In Lecture 5 we move from fully-connected neural networks to convolutional neural networks.,computer-vision 19 | 249,2020-04-06 19:20:12,makesense.ai,Free to use online tool for labelling photos.,computer-vision 20 | 264,2020-04-06 21:33:32,The Unreasonable Effectiveness of Recurrent Neural Networks,A close look at how RNNs are able to perform so well.,natural-language-processing 21 | 268,2020-04-06 21:51:55,A Gentle Introduction to Text Summarization in Machine Learning,Text summarization is the technique for generating a concise and precise summary of voluminous texts while focusing on the sections that convey useful info.,natural-language-processing 22 | 285,2020-04-07 03:45:03,A (Long) Peek into Reinforcement Learning,"In this post, we are gonna briefly go over the field of Reinforcement Learning (RL), from fundamental concepts to classic algorithms.",other 23 | 305,2020-04-07 20:00:37,Question Answering with a Fine-Tuned BERT,What does it mean for BERT to achieve “human-level performance on Question Answering”?,natural-language-processing 24 | 314,2020-04-08 00:06:21,The Autonomous Learning Library,A PyTorch library for building deep reinforcement learning agents.,other 25 | 317,2020-04-08 00:14:27,COCO Annotator,"✏️ Web-based image segmentation tool for object detection, localization and key points.",computer-vision 26 | 328,2020-04-08 14:29:22,ProteinGCN: Protein model quality assessment using GCNs,Source code for the paper: ProteinGCN: Protein model quality assessment using Graph Convolutional Networks.,other 27 | 344,2020-04-08 16:11:28,Tokenizers,💥Fast State-of-the-Art Tokenizers optimized for Research and Production.,natural-language-processing 28 | 353,2020-04-08 17:08:41,Keras OCR,A packaged and flexible version of the CRAFT text detector and Keras CRNN recognition model. ,computer-vision 29 | 384,2020-04-08 21:22:25,Visualizing Memorization in RNNs,Inspecting gradient magnitudes in context can be a powerful tool to see when recurrent units use short-term or long-term contextual understanding.,natural-language-processing 30 | 407,2020-04-08 23:00:02,AllenNLP,"An open-source NLP research library, built on PyTorch.",natural-language-processing 31 | 410,2020-04-08 23:09:15,Frameworks for Machine Learning Model Management,This blog post will follow up by comparing three different tools developed to support reproducible machine learning model development.,mlops 32 | 414,2020-04-08 23:18:04,TensorBoard.dev ,"Easily host, track, and share your ML experiments for free.",mlops 33 | 415,2020-04-08 23:21:13,BertViz,"Tool for visualizing attention in the Transformer model (BERT, GPT-2, Albert, XLNet, RoBERTa, CTRL, etc.)",natural-language-processing 34 | 426,2020-04-09 16:37:10,The Transformer Family,"This post presents how the vanilla Transformer can be improved for longer-term attention span, less memory and computation consumption, RL task solving, etc.",natural-language-processing 35 | 437,2020-04-10 17:14:11,Pruning Bert to Accelerate Inference,"After previously discussing various ways of accelerating models like BERT, in this blog post we empirically evaluate the pruning approach.",natural-language-processing 36 | 438,2020-04-10 17:26:39,Compressing Bert for Faster Prediction,"In this blog post, we discuss ways to make huge models like BERT smaller and faster. ",natural-language-processing 37 | 451,2020-04-10 20:10:28,Evaluation Metrics for Language Modeling,"In this article, we will focus on traditional intrinsic metrics that are extremely useful during the process of training the language model itself. ",natural-language-processing 38 | 454,2020-04-10 20:27:12,All The Ways You Can Compress BERT,In this post I’ll list and briefly taxonomize all the papers I’ve seen compressing BERT. ,natural-language-processing 39 | 458,2020-04-10 20:58:41,"Limitations of Deep Learning for Vision, and How We Might Fix The",This is an opinion paper about the strengths and weaknesses of Deep Nets for vision.,computer-vision 40 | 487,2020-04-14 21:15:35,Face Alignment in Full Pose Range: A 3D Total Solution,Face Alignment in Full Pose Range: A 3D Total Solution.,computer-vision 41 | 488,2020-04-14 21:21:51,V2V-PoseNet Pytorch,PyTorch implementation of V2V-PoseNet with IntegralPose/PoseFix loss.,computer-vision 42 | 496,2020-04-14 23:14:59,Fast- Neural Style,Pytorch implementation of an algorithm for artistic style transfer. ,computer-vision 43 | 497,2020-04-14 23:21:16,Torchvision Object Detection Finetuning Tutorial,Finetuning a pre-trained Mask R-CNN model in the Penn-Fudan Database for Pedestrian Detection and Segmentation.,computer-vision 44 | 559,2020-04-16 16:18:26,Creating an End-to-End Machine Learning Application,"A complete, end-to-end ML application, implemented in both TensorFlow 2.0 and PyTorch.",mlops 45 | 561,2020-04-16 16:27:31,How Docker Can Help You Become A More Effective Data Scientist,A look at Docker from the perspective of a data scientist.,mlops 46 | 569,2020-04-18 13:32:36,An Introduction to Transfer Learning and HuggingFace,In this talk I'll start by introducing the recent breakthroughs in NLP that resulted from the combination of Transfer Learning schemes and Transformer architect,natural-language-processing 47 | 570,2020-04-19 17:40:48,Introduction to Image Inpainting With Deep Learning,"In this article, we are going to learn how to do “image inpainting”, i.e. fill in missing parts of images precisely using deep learning.",computer-vision 48 | 579,2020-04-20 00:53:19,Transfer Learning & Fine-Tuning With Keras,Your 100% up-to-date guide to transfer learning & fine-tuning with Keras.,computer-vision 49 | 582,2020-04-20 21:38:50,CS285: Deep Reinforcement Learning,"A course on deep reinforcement learning, transfer and multi-task learning.",other 50 | 594,2020-04-21 23:25:53,TorchServe & TorchElastic PyTorch Libraries for Serving/Training,The officially supported way to deploy and manage models with PyTorch.,mlops 51 | 600,2020-04-22 17:37:25,Building a Simple Chatbot from Scratch in Python (using NLTK),A look at retrieval based and generative conversational AI for creating chatbots.,natural-language-processing 52 | 612,2020-04-23 13:56:46,Implementing DCGANs using PyTorch C++ API (Libtorch),"The blog discusses the paper review of DCGANs and implementation using PyTorch C++ API in detail. From loading models to visualizing batch of the data, in C++! ",computer-vision 53 | 620,2020-04-23 17:26:26,ELECTRA ,"Explaining the new self-supervised task for language representation learning, ELECTRA which uses ""replace token detection"".",natural-language-processing 54 | 624,2020-04-24 00:42:41,How to Train a New Language Model From Scratch Using Transformers,"In this post we’ll demo how to train a “small” model (84 M parameters = 6 layers, 768 hidden size, 12 attention heads).",natural-language-processing 55 | 629,2020-04-24 05:01:26,ARIMA Modeling - Guide to Time Series Forecasting in Python,"How ARIMA models works . How to train and forecast using ARIMA, SARIMA, SARIMAX and find the optimal model with Python",other 56 | 649,2020-04-28 03:42:29,Spektral,Graph Neural Networks with Keras and Tensorflow 2.,other 57 | 666,2020-04-29 12:10:43,AIDeveloper,"GUI-based software for training, evaluating and applying deep neural nets for image classification ",computer-vision 58 | 671,2020-04-29 23:22:43,MedCAT - Medical Concept Annotation Tool,A tool used to extract information from Electronic Health Records (EHRs) and link it to biomedical ontologies like SNOMED-CT and UMLS.,natural-language-processing 59 | 681,2020-05-01 16:25:34,The AI Economist,Improving Equality and Productivity with AI-Driven Tax Policies,other 60 | 684,2020-05-01 16:48:19,WT5?! Training Text-to-Text Models to Explain their Predictions,We leverage the text-to-text framework proposed by Raffel et al.(2019) to train language models to output a natural text explanation alongside their prediction.,natural-language-processing 61 | 689,2020-05-01 17:51:53,Ensemble Forecasts ,"Time series forecasting using classical methods (ETS, Holt-Winter's, SARIMA) and Prophet. I show and discuss advantages of Ensemble Forecast",other 62 | 703,2020-05-04 05:09:59,Implementing Graph Neural Networks with JAX,I’ll talk about my experience on how to build and train Graph Neural Networks (GNNs) with JAX.,other 63 | 705,2020-05-04 14:13:13,Deep Learning With Graph-Structured Representations,Novel approaches based on the theme of structuring the representations and computations of neural network-based models in the form of a graph.,other 64 | 706,2020-05-04 14:18:58,GNNExplainer: Generating Explanations for Graph Neural Networks,General tool for explaining predictions made by graph neural networks (GNNs).,other 65 | 710,2020-05-05 04:01:24,Differential Subspace Search in High-Dimensional Latent Space,"Differential subspace search to allow efficient iterative user exploration in such a space, without relying on domain- or data-specific assumptions.",computer-vision 66 | 723,2020-05-05 19:45:50,DeepWay: Autonomous navigation for blind.,I have tried to make something which can be used by blind people to navigate around the streets. Have a look at the video and GitHub repo for details.,computer-vision 67 | 737,2020-05-06 18:06:04,Nature-Scene Classification using FASTAI,Classifying Nature-scene images using deep learning with fastai library,computer-vision 68 | 738,2020-05-06 20:33:00,Machine-Learning-Single-Layer-Multiclass-Perceptron,Implemented a Single Layer Perceptron and applied it on the MNIST dataset for multi-class classification using NumPy.,computer-vision 69 | 780,2020-05-08 12:06:30,Med7 - clinical natural language processing for EHR,"Med7 is a transferable clinical natural language processing model for electronic health records, compatible with spaCy, for named-entity recognition task",natural-language-processing 70 | 784,2020-05-08 14:59:08,Haystack — Neural Question Answering At Scale,Scaling Question Answering models to find answers in large document stores via retriever and reader approach.,natural-language-processing 71 | 785,2020-05-08 17:13:36,SimCLR in TensorFlow 2,(Minimally) implements SimCLR (https://arxiv.org/abs/2002.05709) in TensorFlow 2.,computer-vision 72 | 787,2020-05-08 18:15:56,Semantic Cord19 Paper Explorer,Semantic research paper explorer to search Research Papers in COVID and CoronaVirus. Can be easily modified to any Research Paper Database,natural-language-processing 73 | 807,2020-05-11 02:25:51,Introduction to Machine Learning Problem Framing,This course helps you frame machine learning (ML) problems.,mlops 74 | 834,2020-05-13 04:36:33,TailorGAN: Making User-Defined Fashion Designs,Generate a photo-realistic image which combines the texture from reference A and the new attribute from reference B.,computer-vision 75 | 843,2020-05-13 14:49:21,T5 fine-tuning,A colab notebook to showcase how to fine-tune T5 model on various NLP tasks (especially non text-2-text tasks with text-2-text approach),natural-language-processing 76 | 854,2020-05-14 12:05:20,ASAP: Pooling for Graph Neural Network (AAAI 2020),ASAP is a sparse and differentiable pooling method that addresses the limitations of previous graph pooling layers.,other 77 | 878,2020-05-16 05:27:56,Exploratory Data Analysis on MS COCO Style Datasets,A Simple Toolkit to do exploratory data analysis on MS COCO style formatted datasets.,computer-vision 78 | 898,2020-05-17 05:11:22,Single-Stage Semantic Segmentation from Image Labels,"We attain competitive results by training a single network model 79 | for segmentation in a self-supervised fashion using only 80 | image-level annotations",computer-vision 81 | 906,2020-05-18 14:50:45,NLPAug,Data augmentation for NLP,natural-language-processing 82 | 916,2020-05-19 08:11:05,Get Subreddit Suggestions for a Post,"Trained on 4M Reddit posts from 4k Subreddits. End-to-end ML pipeline built with fasttext and FastAPI, deployed to Valohai.",natural-language-processing 83 | 917,2020-05-19 13:45:03,Transfer Learning In NLP,A brief history of Transfer Learning In NLP,natural-language-processing 84 | 919,2020-05-20 02:29:48,IntelliCode Compose: Code Generation Using Transformer,"Code completion tool which is capable of predicting sequences of code tokens of arbitrary types, generating up to entire lines of syntactically correct code.",natural-language-processing 85 | 943,2020-05-22 06:27:43,Transfer Learning in NLP with Tensorflow Hub and Keras,Learn how to integrate and finetune tensorflow-hub modules in Tensorflow 2.0,natural-language-processing 86 | 946,2020-05-22 07:57:14,Replicating Airbnb's Amenity Detection (documentary series),Airbnb's engineering team shared an article on how they used computer vision to detection amenities in photos. It read like a recipe so I replicated it.,computer-vision 87 | 965,2020-05-24 08:14:30,GANs in Computer Vision : An article review series ,"An article series where we review the most important research papers on GANs from 2015 to today. 6 articles, 20 papers, 20000 words",computer-vision 88 | 991,2020-05-27 05:09:20,NLP Viewer 🤗,A simple website for browsing popular NLP datasets.,natural-language-processing 89 | 999,2020-05-28 03:32:05,MediaPipe,"Simplest way for researchers and developers to build world-class ML solutions and applications for mobile, edge, cloud and the web. ",computer-vision 90 | 1011,2020-05-29 02:57:44,ML in Production - Deployment Series,"A multi-part blog series on deploying machine learning models in an automated, reproducible, and auditable manner.",mlops 91 | 1019,2020-05-29 08:14:05,Visual Object Tracking using Adaptive Correlation Filters,This article gives step by step tutorial with code on understanding MOSSE tracking algorithm,computer-vision 92 | 1032,2020-05-29 14:50:28,Pix2Pix with Tf-js,"Implementation of web friendly ML models using TensorFlow.js. pix2pix, face segmentation, fast style transfer and many more ...",computer-vision 93 | 1056,2020-05-30 09:08:31,Font Recognition Using Deep Learning - DeepFont ( Adobe ),DeepFont Paper is a technique created by Adobe.Inc to detect font from images using deep learning . They published their work as a paper for the public .,computer-vision 94 | 1078,2020-05-31 05:04:44,Building Footprint Extraction,The project retrieves satellite imagery from Google and performs building footprint extraction using a U-Net. ,computer-vision 95 | 1114,2020-06-01 21:00:24,Reinforcement Learning in JAX,"Implementation of interesting Deep Reinforcement Learning Algorithms using JAX based libraries (flax, haiku and rlax) As of now tasks come from OpenAI gym",other 96 | 1155,2020-06-03 15:22:11,GaborNet,Modified network architecture that focuses on improving convergence and reducing training complexity.,computer-vision 97 | 1159,2020-06-03 18:17:01,Learning To Classify Images Without Labels,A two-step approach where feature learning and clustering are decoupled.,computer-vision 98 | 1167,2020-06-04 03:58:21,From Pre-trained Word Embeddings to Pre-trained Language Models,from Static Word Embedding to Dynamic (Contextualized) Word Embedding.,natural-language-processing 99 | 1172,2020-06-04 07:01:13,Converting images to TF Records,A Colab Notebook showing how to convert an image dataset (for classification) to TF Records and more.,computer-vision 100 | 1266,2020-06-09 16:09:08,Text Classification using Bert from Tensorflow-Hub,This Tutorial helps to learn about Bert Models for Classification task on a #Tweet dataset.,natural-language-processing 101 | 1286,2020-06-10 17:24:19,Exploring Knowledge Captured in Probability of Strings,An exploration of simple knowledge captured by language models with code examples,natural-language-processing 102 | 1363,2020-06-13 13:46:44,Short Notes on Batch Constrained Deep Reinforcement Learning,Blog article on Off-Policy Deep Reinforcement Learning without Exploration paper by Fujimoto et al. (ICML 2019),other 103 | 1426,2020-06-15 02:34:27,From GRU to Transformer,How recurrent units and self-attention are related to each other.,natural-language-processing 104 | 1430,2020-06-15 04:24:12,Melanoma Classification,This was Shubhamai 3-week project for working a new kaggle competition and deploying a web application to predicting benign or malignant based on images.,computer-vision 105 | 1434,2020-06-15 07:52:13,Universal Sentence Encoder Visually Explained,A deep-dive into how Universal Sentence Encoder learns to generate fixed-length sentence embeddings,natural-language-processing 106 | 1445,2020-06-15 17:49:16,Image Smoothing via L0 Gradient Minimization,This is a edge-aware image smoothing algorithm. This algorithm tries to smoothen the image while preserving the global structural information of the image. ,computer-vision 107 | 1450,2020-06-15 21:00:47,BERT NLP — How To Build a Question Answering Bot,Understanding the intuition with hands-on PyTorch code for BERT fine-tuned on SQuAD.,natural-language-processing 108 | 1451,2020-06-16 01:21:09,EfficientDet (PyTorch),A PyTorch implementation of EfficientDet faithful to the original Google implementation with ported weights.,computer-vision 109 | 1459,2020-06-16 03:06:10,SuperGlue: Learning Feature Matching with Graph Neural Networks,"SuperGlue, a neural network that matches two sets of local features by jointly finding correspondences and rejecting non-matchable points.",other 110 | 1462,2020-06-16 03:28:40,Open Compound Domain Adaptation,"Pytorch implementation for ""Open Compound Domain Adaptation""",computer-vision 111 | 1485,2020-06-17 16:33:50,Sudoku-Game-Solver,This is a Computer Vision Application that solves a 9x9 sudoku board game using Deep Learning and Backtracking algorithm.,computer-vision 112 | 1488,2020-06-17 19:27:36,Smart Picture Editor,Tool to automatically remove unwanted objects from photos,computer-vision 113 | 1494,2020-06-18 00:14:40,Object Goal Navigation using Goal-oriented Semantic Exploration,Embodied interactive learning for object detection by using semantic curiosity to learn an exploration policy on set of the training environments.,computer-vision 114 | 1501,2020-06-18 18:17:18,Traffic-Sign-Recognition-Using-Deep-Learning,"The training dataset contains around 39,000 images while test dataset contains around 12,000 images containing 43 different classes. We will be using Convolutio",computer-vision 115 | 1508,2020-06-19 06:43:47,Long Form Question Answering with ELI5,A model for open domain long form question answering.,natural-language-processing 116 | 1511,2020-06-19 06:54:23,RepNet - Class Agnostic Video Repetition Counting in the Wild,Counting Out Time: Class Agnostic Video Repetition Counting in the Wild,computer-vision 117 | 1515,2020-06-19 16:37:10,"Cut, Paste and Learn: Surprisingly Easy Synthesis for Detection",Generate synthetic scenes and bounding box annotations for object detection.,computer-vision 118 | 1524,2020-06-20 10:42:25,Machine Learning Projects ,"This Repo contains projects done by me while learning the basics. All the familiar types of regression, classification, and clustering methods have been used.",natural-language-processing 119 | 1540,2020-06-21 13:03:19,codeBERT - Masked Language Model for source code ,Tutorial to use codeBERT a MLM for Python code. Model trained from scratch using roBERTa,natural-language-processing 120 | 1588,2020-06-24 03:29:51,Multi-task Training with Hugging Face Transformers and NLP, A recipe for multi-task training with Transformers' Trainer and NLP datasets.,natural-language-processing 121 | 1600,2020-06-25 00:45:26,BERT Distillation with Catalyst,How to distill BERT with Catalyst.,natural-language-processing 122 | 1628,2020-06-28 06:12:20,Deep Reinforcement Learning Amidst Lifelong Non-Stationarity,"How can robots learn in changing, open-world environments? We introduce dynamic-parameter MDPs, to capture environments with persistent, unobserved changes. ",other 123 | 1654,2020-06-30 03:58:46,3D Detection and Domain Adaptation,1st Place Solution for Waymo Open Dataset Challenge,computer-vision 124 | 1659,2020-07-01 02:26:20,Evaluation of Text Generation: A Survey,Evaluation methods of natural language generation (NLG) and language modeling.,natural-language-processing 125 | 1661,2020-07-01 06:42:59,SpineNet: A Novel Architecture for Object Detection,"A meta architecture called a scale-permuted model that enables two major improvements on backbone architecture design,iscovered with neural architecture search.",computer-vision 126 | 1665,2020-07-01 07:17:48,BERTology Meets Biology,Interpreting Attention in Protein Language Models.,natural-language-processing 127 | 1681,2020-07-03 04:02:52,A Survey on Deep Learning for Localization and Mapping,Towards the Age of Spatial Machine Intelligence,computer-vision 128 | 1685,2020-07-03 04:12:28,Text Data Cleanup - Dynamic Embedding Visualisation,Identify noisy text in a Machine Translation dataset through dynamic text embedding visualisation.,natural-language-processing 129 | 1689,2020-07-03 04:29:04,Offline Reinforcement Learning,"Challenges, algorithms and benchmarks.",other 130 | 1692,2020-07-03 04:42:45,Low-Dimensional Hyperbolic Knowledge Graph Embeddings,Low-dimensional knowledge graph embeddings that simultaneously capture hierarchical relations and logical patterns.,other 131 | 1703,2020-07-04 09:22:50,Awesome Deep RL,This project is built for people who are learning and researching on the latest deep reinforcement learning methods.,other 132 | 1709,2020-07-05 05:25:34,Anti-Patterns in NLP (8 types of NLP idiots),A talk which discusses the recurring industrial problems in making NLP solutions. ,natural-language-processing 133 | 1715,2020-07-06 18:25:16,Image Classifier,Pure JavaScript Image Classifier,computer-vision 134 | 1717,2020-07-07 04:09:35,TaBERT,Pretraining for Joint Understanding of Textual and Tabular Data,natural-language-processing 135 | 1719,2020-07-07 04:17:11,Texthero,"Text preprocessing, representation and visualization from zero to hero.",natural-language-processing 136 | 1743,2020-07-09 01:51:41,How to Benchmark Models with Transformers,HuggingFace's Transformer library allows users to benchmark models for both TensorFlow 2 and PyTorch using the PyTorchBenchmark and TensorFlowBenchmark classes.,natural-language-processing 137 | 1756,2020-07-10 02:53:13,Linear Attention Transformer,A fully featured Transformer that mixes (QKᵀ)V local attention with Q(KᵀV) global attention (scales linearly with respect to sequence length).,natural-language-processing 138 | 1770,2020-07-11 05:12:49,imgaug,"Image augmentation for machine learning experiments. 139 | 140 | ",computer-vision 141 | 1779,2020-07-11 05:48:03,All Models and checkpoints - Hugging Face,"Massive (and growing) collection of NLP models are nearly any NLP tasks, especially those involving the use of transformers.",natural-language-processing 142 | 1799,2020-07-11 06:49:38,FlashText,"Extract Keywords from sentence or Replace keywords in sentences. 143 | 144 | ",natural-language-processing 145 | 1804,2020-07-11 07:04:25,Text Preprocessing in Python using spaCy library,"In this article, we have explored Text Preprocessing in Python using spaCy library in detail. This is the fundamental step to prepare data for applications.",natural-language-processing 146 | 1805,2020-07-11 07:12:32,Segmentation Models,"Segmentation models with pretrained backbones. Keras and TensorFlow Keras. 147 | 148 | ",computer-vision 149 | 1825,2020-07-11 08:43:20,MLflow: A Machine Learning Lifecycle Platform,Open source platform for the machine learning lifecycle.,mlops 150 | 1827,2020-07-11 08:56:02,token2index,"A lightweight but powerful library to build token indices for NLP tasks, compatible with major Deep Learning frameworks like PyTorch and Tensorflow.",natural-language-processing 151 | 1853,2020-07-13 20:23:32,The Transformer Neural Network Architecture Explained,"⚙️ It is time to explain how Transformers work. If you are looking for an easy explanation, you are exactly right!",natural-language-processing 152 | 1858,2020-07-14 03:30:14,QSVM,Quantum SVM for sentiment analysis,natural-language-processing 153 | 1866,2020-07-14 22:58:15,PYthon Automated Term Extraction,"Term extraction algorithms such as C-Value, Basic, Combo Basic, Weirdness and Term Extractor using spaCy POS tagging.",natural-language-processing 154 | 1870,2020-07-15 20:38:36,Interpretability and Analysis of Models for NLP,An in-depth look at interpretability and analysis of models for NLP (ACL 2020).,natural-language-processing 155 | 1888,2020-07-17 16:53:37,Monitoring Machine Learning Models in Production,Once you have deployed your machine learning model to production it rapidly becomes apparent that the work is not over.,mlops 156 | 1901,2020-07-19 08:31:43,Quora Question Pair Similarity,"Identify which questions asked on Quora are duplicates of questions that have already been asked. Using Text features, classifying them as duplicates or not. 157 | 158 | ",natural-language-processing 159 | 1905,2020-07-19 14:51:57,PyTorch CNN Trainer,A simple package to fine-tune CNNs from torchvision and Pytorch Image models by Ross Wightman.,computer-vision 160 | 1934,2020-07-21 01:47:01,Graphein,Protein Graph Library,other 161 | 1935,2020-07-21 04:44:52,Integrated Gradients in TensorFlow 2,"In this tutorial, you will walk through an implementation of IG step-by-step in TensorFlow 2 to understand the pixel feature importances of an image classifier.",computer-vision 162 | 1950,2020-07-23 00:42:09,GPT-3: A Hitchhiker's Guide,Post to guide your thinking on GPT-3.,natural-language-processing 163 | 1959,2020-07-24 10:00:13,TeachEasy: Web app for Text Summarization & Q/A generation,An intuitive Streamlit based web app for Text Summarization and Question Answer generation so as to reduce the work for School teachers.,natural-language-processing 164 | 1961,2020-07-24 10:38:52,Python Template for All Projects,"A template that gives the batteries required to package code, CI checks, auto build and deploy docs, easy PyPi publishing support and docker files.",mlops 165 | 1964,2020-07-25 02:52:36,MLOps Tutorial Series,How to create an automatic model training & testing setup using GitHub Actions and Continuous Machine Learning (CML).,mlops 166 | 1972,2020-07-27 02:54:19,Evolution of Representations in the Transformer,"The evolution of representations of individual tokens in Transformers trained with different training objectives (MT, LM, MLM - BERT-style).",natural-language-processing 167 | 1975,2020-07-27 14:09:26,Ensemble methods for object detection,"In this repository, we provide the code for ensembling the output of object detection models, and applying test-time augmentation for object detection. This lib",computer-vision 168 | 1976,2020-07-27 14:12:03,Close-Domain fine-tuning for table detection,"In this project, we show the benefits of using models trained on a close domain, using the TableBank dataset, for fine-tuning table detection models. In additio",computer-vision 169 | 1997,2020-07-29 16:13:46,Image Classification by @carrycooldude,Image Classification using TFLite and ImageNet by @carrycooldude,computer-vision 170 | 2007,2020-07-30 14:47:39,CLoDSA: A Tool for Augmentation in Computer Vision tasks,"CLoDSA is an open-source image augmentation library for object classification, localization, detection, semantic segmentation and instance segmentation. It supp",computer-vision 171 | 2010,2020-07-30 15:00:43,FrImCla: A framework for image classification," 172 | FrImCla is an open-source framework for Image Classification using traditional and deep learning techniques. It supports a wide variety of deep learning and c",computer-vision 173 | 2011,2020-07-30 15:02:04,UFOD: A Unified Framework for Object Detection,UFOD is an open-source framework that enables the training and comparison of object detection models on custom datasets using different underlying frameworks an,computer-vision 174 | 2023,2020-08-01 14:46:19,Why You Should Do NLP Beyond English,7000+ languages are spoken around the world but NLP research has mostly focused on English. This post outlines why you should work on languages other than Eng.,natural-language-processing 175 | 2025,2020-08-01 14:57:11,Haystack — Neural Question Answering At Scale,"🔍 Transformers at scale for question answering & search 176 | 177 | ",natural-language-processing 178 | 2034,2020-08-03 04:00:29,Finding Similar Documents with Transformers,How transformers can help us distill text documents into points in N-dimensional vector spaces.,natural-language-processing 179 | 2040,2020-08-04 18:00:56,A Barebones Image Retrieval System,This project presents a simple framework to retrieve images similar to a query image.,computer-vision 180 | 2056,2020-08-06 00:30:49,Fast Sentence Embeddings (fse),Fast Sentence Embeddings is a Python library that serves as an addition to Gensim.,natural-language-processing 181 | 2131,2020-08-13 01:39:01,How to Trust Your Deep Learning Code,"We will focus on how to write reusable unit tests, so that you “Don’t repeat yourself”.",mlops 182 | 2137,2020-08-13 02:10:03,Unpopular Opinion - Data Scientists Should Be More End-to-End,I believe data scientists can be more effective by being end-to-end.,mlops 183 | 2172,2020-08-18 04:12:18,Compression of Deep Learning Models for Text: A Survey,"In this survey, we discuss six different types of methods for compression of such models to enable their deployment in real industry NLP projects.",natural-language-processing 184 | 2186,2020-08-18 23:24:41,AI in Medicine and Imaging - Stanford Symposium 2020,Through the AIMI Symposium we hope to address gaps and barriers in the field and catalyze more evidence-based solutions to improve health for all.,computer-vision 185 | 2195,2020-08-20 20:45:52,Streamlit Terran Timeline,A face-recognition timeline generator tool for any kind of video!,computer-vision 186 | 2199,2020-08-21 08:37:20,How to Set Up Continuous Integration for Machine Learning,How to Set Up Continuous Integration for Machine Learning with Github Actions and Neptune: Step by Step Guide.,mlops 187 | 2200,2020-08-21 12:45:54,Bad passwords and the NIST guidelines,"Example project provided by DataCamp. In this project, you will write code that automatically detects and flags the bad passwords.",natural-language-processing 188 | 2232,2020-08-27 11:00:34,GenRL,GenRL is a PyTorch-First Reinforcement Learning library centered around reproducible and generalizable algorithm implementations.,other 189 | 2246,2020-08-30 06:05:21,Questgen- An NLP library for state-of-the-art Question Generation,"Questgen AI is an opensource, easy to use NLP library for Question generation. It can generate MCQs, Boolean (Yes/No), FAQs and also paraphrase any question. 190 | ",natural-language-processing 191 | 2250,2020-08-31 09:20:55,Text Data Augmentation with MarianMT,Learn how to use machine translation models in Hugging Face Transformers for data augmentation.,natural-language-processing 192 | 2262,2020-09-03 12:10:24,R.U.Stoked,NLP (Sentiment Analysis) project to demonstrate a pipeline of data from the very first stage of data collection through ML model deployment.,natural-language-processing 193 | 2266,2020-09-04 01:42:26,Wav2Lip: Accurately Lip-syncing Videos In The Wild,A Lip Sync Expert Is All You Need for Speech to Lip Generation In the Wild,computer-vision 194 | 2271,2020-09-05 07:10:06,Latest advancements in video streaming with AI,"AI developments in video streaming using Super-resolution, Per-title encoding, P2P",computer-vision 195 | 2289,2020-09-08 04:12:41,ElasticTransformers,Making BERT stretchy. Semantic Elasticsearch with Sentence Transformers.,natural-language-processing 196 | 2310,2020-09-12 12:33:20,Image Super-Resolution,In this project we learn how to train a super-resolution model ESPCN on DIV2K dataset to upscale images using AI by 3x,computer-vision 197 | 2312,2020-09-12 22:33:56,Codequestion,Ask coding questions directly from the terminal.,natural-language-processing 198 | 2336,2020-09-19 08:40:37,G-SimCLR,TensorFlow implementation of G-SimCLR. ,computer-vision 199 | 2339,2020-09-19 11:17:48,Neural CDEs for Long Time-Series via the Log-ODE Method,NCDEs for Long Time-Series via the Log-ODE Method.,other 200 | 2350,2020-09-22 03:07:29,"Part 1: Deep Representations, a way towards neural style transfer",A top down approach to conceiving neural style transfer,computer-vision 201 | 2366,2020-09-25 02:26:00,Help-Me-Read: Text Summarization using Flask and HuggingFace.,"Text summarization, translation and Questions Answers generation using HuggingFace and deployed using Flask, Streamlit. Detailed guide on github. ",natural-language-processing 202 | 2367,2020-09-25 07:39:43,Interactive Analysis of Sentence Embeddings,Learn how to interactively explore sentence embedding and labels in Tensorflow Embedding Projector.,natural-language-processing 203 | 2390,2020-09-28 05:46:03,mini-pokedex end to end tutorial - Gotta classify 'em all!,"Build a Pokemon image classifier to classify the awesome starters Pikachu, Charmander, Squirtle, and Bulbasaur.",computer-vision 204 | 2394,2020-09-28 22:46:36,Why Data Quality is Key to Successful ML Ops,A look at ML Ops and highlight how and why data quality is key to ML Ops workflows.,mlops 205 | 2403,2020-09-30 22:15:07,Easy Data Augmentation (EDA),Easy Data Augmentation Techniques for Boosting Performance on Text Classification Tasks,natural-language-processing 206 | 2413,2020-10-01 23:50:04,Keeping Data Pipelines healthy w/ Great Expectations GH Actions,"We show you how you can use GitHub Actions together with the open source project Great Expectations to automatically test, document, and profile data pipelines.",mlops 207 | 2428,2020-10-05 02:09:23,Efficient Transformers: A Survey,"Characterizes a large and thoughtful selection of recent efficiency-flavored ""X-former"" models.",natural-language-processing 208 | 2429,2020-10-05 02:16:34,Meta-learning for Few-shot Natural Language Processing: A Survey,"Clear definitions, progress summary and some common datasets of applying meta-learning to few-shot NLP.",natural-language-processing 209 | -------------------------------------------------------------------------------- /datasets/tags.csv: -------------------------------------------------------------------------------- 1 | tag 2 | computer-vision 3 | computer-vision 4 | graph-learning 5 | reinforcement-learning 6 | graph-learning 7 | graph-learning 8 | graph-learning 9 | graph-learning 10 | graph-learning 11 | computer-vision 12 | computer-vision 13 | computer-vision 14 | computer-vision 15 | computer-vision 16 | computer-vision 17 | computer-vision 18 | graph-learning 19 | natural-language-processing 20 | mlops 21 | computer-vision 22 | computer-vision 23 | computer-vision 24 | natural-language-processing 25 | natural-language-processing 26 | natural-language-processing 27 | computer-vision 28 | computer-vision 29 | computer-vision 30 | computer-vision 31 | computer-vision 32 | computer-vision 33 | reinforcement-learning 34 | natural-language-processing 35 | natural-language-processing 36 | natural-language-processing 37 | computer-vision 38 | natural-language-processing 39 | mlops 40 | computer-vision 41 | natural-language-processing 42 | computer-vision 43 | natural-language-processing 44 | graph-learning 45 | computer-vision 46 | graph-learning 47 | computer-vision 48 | computer-vision 49 | mlops 50 | natural-language-processing 51 | natural-language-processing 52 | computer-vision 53 | natural-language-processing 54 | natural-language-processing 55 | computer-vision 56 | natural-language-processing 57 | natural-language-processing 58 | computer-vision 59 | computer-vision 60 | natural-language-processing 61 | time-series 62 | natural-language-processing 63 | natural-language-processing 64 | natural-language-processing 65 | natural-language-processing 66 | natural-language-processing 67 | natural-language-processing 68 | natural-language-processing 69 | natural-language-processing 70 | natural-language-processing 71 | natural-language-processing 72 | natural-language-processing 73 | natural-language-processing 74 | natural-language-processing 75 | natural-language-processing 76 | natural-language-processing 77 | natural-language-processing 78 | natural-language-processing 79 | reinforcement-learning 80 | computer-vision 81 | reinforcement-learning 82 | reinforcement-learning 83 | natural-language-processing 84 | reinforcement-learning 85 | computer-vision 86 | natural-language-processing 87 | computer-vision 88 | computer-vision 89 | graph-learning 90 | graph-learning 91 | natural-language-processing 92 | natural-language-processing 93 | natural-language-processing 94 | natural-language-processing 95 | computer-vision 96 | natural-language-processing 97 | computer-vision 98 | computer-vision 99 | natural-language-processing 100 | natural-language-processing 101 | natural-language-processing 102 | mlops 103 | mlops 104 | natural-language-processing 105 | natural-language-processing 106 | natural-language-processing 107 | natural-language-processing 108 | reinforcement-learning 109 | reinforcement-learning 110 | graph-learning 111 | computer-vision 112 | natural-language-processing 113 | natural-language-processing 114 | computer-vision 115 | computer-vision 116 | natural-language-processing 117 | computer-vision 118 | mlops 119 | natural-language-processing 120 | computer-vision 121 | natural-language-processing 122 | time-series 123 | computer-vision 124 | natural-language-processing 125 | natural-language-processing 126 | natural-language-processing 127 | computer-vision 128 | natural-language-processing 129 | natural-language-processing 130 | natural-language-processing 131 | natural-language-processing 132 | natural-language-processing 133 | natural-language-processing 134 | natural-language-processing 135 | natural-language-processing 136 | computer-vision 137 | natural-language-processing 138 | computer-vision 139 | computer-vision 140 | computer-vision 141 | natural-language-processing 142 | natural-language-processing 143 | natural-language-processing 144 | natural-language-processing 145 | natural-language-processing 146 | natural-language-processing 147 | natural-language-processing 148 | computer-vision 149 | computer-vision 150 | computer-vision 151 | computer-vision 152 | computer-vision 153 | computer-vision 154 | computer-vision 155 | computer-vision 156 | computer-vision 157 | computer-vision 158 | computer-vision 159 | computer-vision 160 | computer-vision 161 | computer-vision 162 | computer-vision 163 | computer-vision 164 | natural-language-processing 165 | computer-vision 166 | time-series 167 | computer-vision 168 | time-series 169 | natural-language-processing 170 | computer-vision 171 | computer-vision 172 | natural-language-processing 173 | mlops 174 | computer-vision 175 | computer-vision 176 | natural-language-processing 177 | computer-vision 178 | mlops 179 | natural-language-processing 180 | mlops 181 | natural-language-processing 182 | natural-language-processing 183 | computer-vision 184 | natural-language-processing 185 | natural-language-processing 186 | reinforcement-learning 187 | computer-vision 188 | computer-vision 189 | computer-vision 190 | natural-language-processing 191 | natural-language-processing 192 | graph-learning 193 | reinforcement-learning 194 | natural-language-processing 195 | computer-vision 196 | natural-language-processing 197 | natural-language-processing 198 | natural-language-processing 199 | natural-language-processing 200 | natural-language-processing 201 | computer-vision 202 | computer-vision 203 | natural-language-processing 204 | computer-vision 205 | graph-learning 206 | natural-language-processing 207 | natural-language-processing 208 | time-series 209 | computer-vision 210 | natural-language-processing 211 | natural-language-processing 212 | natural-language-processing 213 | computer-vision 214 | natural-language-processing 215 | computer-vision 216 | natural-language-processing 217 | natural-language-processing 218 | time-series 219 | time-series 220 | natural-language-processing 221 | computer-vision 222 | graph-learning 223 | computer-vision 224 | natural-language-processing 225 | natural-language-processing 226 | natural-language-processing 227 | natural-language-processing 228 | natural-language-processing 229 | computer-vision 230 | natural-language-processing 231 | computer-vision 232 | computer-vision 233 | computer-vision 234 | graph-learning 235 | mlops 236 | computer-vision 237 | graph-learning 238 | mlops 239 | computer-vision 240 | natural-language-processing 241 | computer-vision 242 | natural-language-processing 243 | reinforcement-learning 244 | computer-vision 245 | computer-vision 246 | reinforcement-learning 247 | natural-language-processing 248 | computer-vision 249 | graph-learning 250 | natural-language-processing 251 | computer-vision 252 | natural-language-processing 253 | natural-language-processing 254 | natural-language-processing 255 | computer-vision 256 | natural-language-processing 257 | natural-language-processing 258 | natural-language-processing 259 | natural-language-processing 260 | reinforcement-learning 261 | computer-vision 262 | natural-language-processing 263 | natural-language-processing 264 | natural-language-processing 265 | natural-language-processing 266 | natural-language-processing 267 | computer-vision 268 | computer-vision 269 | natural-language-processing 270 | time-series 271 | natural-language-processing 272 | reinforcement-learning 273 | natural-language-processing 274 | natural-language-processing 275 | computer-vision 276 | reinforcement-learning 277 | natural-language-processing 278 | natural-language-processing 279 | computer-vision 280 | natural-language-processing 281 | computer-vision 282 | reinforcement-learning 283 | natural-language-processing 284 | natural-language-processing 285 | natural-language-processing 286 | natural-language-processing 287 | natural-language-processing 288 | computer-vision 289 | natural-language-processing 290 | reinforcement-learning 291 | natural-language-processing 292 | time-series 293 | computer-vision 294 | computer-vision 295 | time-series 296 | computer-vision 297 | computer-vision 298 | computer-vision 299 | computer-vision 300 | computer-vision 301 | computer-vision 302 | natural-language-processing 303 | computer-vision 304 | natural-language-processing 305 | mlops 306 | natural-language-processing 307 | natural-language-processing 308 | natural-language-processing 309 | natural-language-processing 310 | computer-vision 311 | computer-vision 312 | time-series 313 | computer-vision 314 | computer-vision 315 | natural-language-processing 316 | natural-language-processing 317 | computer-vision 318 | computer-vision 319 | natural-language-processing 320 | natural-language-processing 321 | computer-vision 322 | natural-language-processing 323 | natural-language-processing 324 | reinforcement-learning 325 | computer-vision 326 | computer-vision 327 | natural-language-processing 328 | natural-language-processing 329 | natural-language-processing 330 | natural-language-processing 331 | natural-language-processing 332 | natural-language-processing 333 | natural-language-processing 334 | natural-language-processing 335 | natural-language-processing 336 | computer-vision 337 | natural-language-processing 338 | computer-vision 339 | natural-language-processing 340 | natural-language-processing 341 | natural-language-processing 342 | natural-language-processing 343 | computer-vision 344 | computer-vision 345 | computer-vision 346 | mlops 347 | computer-vision 348 | natural-language-processing 349 | natural-language-processing 350 | computer-vision 351 | computer-vision 352 | computer-vision 353 | natural-language-processing 354 | natural-language-processing 355 | reinforcement-learning 356 | computer-vision 357 | mlops 358 | natural-language-processing 359 | natural-language-processing 360 | natural-language-processing 361 | time-series 362 | computer-vision 363 | natural-language-processing 364 | reinforcement-learning 365 | natural-language-processing 366 | natural-language-processing 367 | reinforcement-learning 368 | computer-vision 369 | reinforcement-learning 370 | natural-language-processing 371 | computer-vision 372 | natural-language-processing 373 | natural-language-processing 374 | natural-language-processing 375 | natural-language-processing 376 | natural-language-processing 377 | natural-language-processing 378 | natural-language-processing 379 | natural-language-processing 380 | computer-vision 381 | natural-language-processing 382 | computer-vision 383 | natural-language-processing 384 | computer-vision 385 | natural-language-processing 386 | computer-vision 387 | natural-language-processing 388 | natural-language-processing 389 | time-series 390 | natural-language-processing 391 | natural-language-processing 392 | computer-vision 393 | natural-language-processing 394 | natural-language-processing 395 | computer-vision 396 | computer-vision 397 | computer-vision 398 | natural-language-processing 399 | mlops 400 | reinforcement-learning 401 | natural-language-processing 402 | natural-language-processing 403 | natural-language-processing 404 | computer-vision 405 | natural-language-processing 406 | natural-language-processing 407 | natural-language-processing 408 | natural-language-processing 409 | natural-language-processing 410 | reinforcement-learning 411 | natural-language-processing 412 | natural-language-processing 413 | computer-vision 414 | natural-language-processing 415 | computer-vision 416 | computer-vision 417 | reinforcement-learning 418 | time-series 419 | mlops 420 | computer-vision 421 | natural-language-processing 422 | computer-vision 423 | computer-vision 424 | computer-vision 425 | computer-vision 426 | computer-vision 427 | computer-vision 428 | computer-vision 429 | time-series 430 | computer-vision 431 | reinforcement-learning 432 | computer-vision 433 | natural-language-processing 434 | computer-vision 435 | reinforcement-learning 436 | computer-vision 437 | natural-language-processing 438 | natural-language-processing 439 | natural-language-processing 440 | computer-vision 441 | computer-vision 442 | mlops 443 | computer-vision 444 | natural-language-processing 445 | computer-vision 446 | reinforcement-learning 447 | natural-language-processing 448 | natural-language-processing 449 | graph-learning 450 | natural-language-processing 451 | reinforcement-learning 452 | computer-vision 453 | computer-vision 454 | mlops 455 | computer-vision 456 | computer-vision 457 | computer-vision 458 | computer-vision 459 | computer-vision 460 | mlops 461 | graph-learning 462 | computer-vision 463 | natural-language-processing 464 | natural-language-processing 465 | natural-language-processing 466 | natural-language-processing 467 | mlops 468 | reinforcement-learning 469 | computer-vision 470 | computer-vision 471 | computer-vision 472 | reinforcement-learning 473 | natural-language-processing 474 | graph-learning 475 | natural-language-processing 476 | natural-language-processing 477 | natural-language-processing 478 | computer-vision 479 | graph-learning 480 | reinforcement-learning 481 | reinforcement-learning 482 | natural-language-processing 483 | computer-vision 484 | computer-vision 485 | natural-language-processing 486 | natural-language-processing 487 | computer-vision 488 | natural-language-processing 489 | natural-language-processing 490 | natural-language-processing 491 | time-series 492 | computer-vision 493 | natural-language-processing 494 | computer-vision 495 | natural-language-processing 496 | mlops 497 | computer-vision 498 | computer-vision 499 | time-series 500 | natural-language-processing 501 | natural-language-processing 502 | reinforcement-learning 503 | natural-language-processing 504 | computer-vision 505 | reinforcement-learning 506 | mlops 507 | computer-vision 508 | reinforcement-learning 509 | computer-vision 510 | time-series 511 | computer-vision 512 | natural-language-processing 513 | natural-language-processing 514 | natural-language-processing 515 | natural-language-processing 516 | computer-vision 517 | natural-language-processing 518 | computer-vision 519 | natural-language-processing 520 | natural-language-processing 521 | natural-language-processing 522 | mlops 523 | computer-vision 524 | graph-learning 525 | mlops 526 | computer-vision 527 | computer-vision 528 | computer-vision 529 | natural-language-processing 530 | natural-language-processing 531 | computer-vision 532 | computer-vision 533 | time-series 534 | time-series 535 | natural-language-processing 536 | natural-language-processing 537 | natural-language-processing 538 | natural-language-processing 539 | natural-language-processing 540 | natural-language-processing 541 | mlops 542 | mlops 543 | mlops 544 | mlops 545 | natural-language-processing 546 | computer-vision 547 | computer-vision 548 | computer-vision 549 | computer-vision 550 | mlops 551 | computer-vision 552 | computer-vision 553 | computer-vision 554 | computer-vision 555 | mlops 556 | natural-language-processing 557 | computer-vision 558 | mlops 559 | computer-vision 560 | computer-vision 561 | computer-vision 562 | computer-vision 563 | natural-language-processing 564 | natural-language-processing 565 | computer-vision 566 | natural-language-processing 567 | natural-language-processing 568 | computer-vision 569 | mlops 570 | computer-vision 571 | computer-vision 572 | natural-language-processing 573 | mlops 574 | natural-language-processing 575 | computer-vision 576 | computer-vision 577 | computer-vision 578 | computer-vision 579 | computer-vision 580 | mlops 581 | computer-vision 582 | computer-vision 583 | natural-language-processing 584 | natural-language-processing 585 | natural-language-processing 586 | computer-vision 587 | graph-learning 588 | mlops 589 | computer-vision 590 | computer-vision 591 | natural-language-processing 592 | natural-language-processing 593 | natural-language-processing 594 | computer-vision 595 | natural-language-processing 596 | graph-learning 597 | mlops 598 | mlops 599 | mlops 600 | mlops 601 | computer-vision 602 | mlops 603 | natural-language-processing 604 | computer-vision 605 | computer-vision 606 | mlops 607 | computer-vision 608 | computer-vision 609 | natural-language-processing 610 | graph-learning 611 | natural-language-processing 612 | time-series 613 | computer-vision 614 | computer-vision 615 | natural-language-processing 616 | computer-vision 617 | computer-vision 618 | natural-language-processing 619 | computer-vision 620 | computer-vision 621 | computer-vision 622 | computer-vision 623 | computer-vision 624 | natural-language-processing 625 | natural-language-processing 626 | natural-language-processing 627 | mlops 628 | computer-vision 629 | computer-vision 630 | natural-language-processing 631 | computer-vision 632 | natural-language-processing 633 | mlops 634 | mlops 635 | computer-vision 636 | natural-language-processing 637 | natural-language-processing 638 | natural-language-processing 639 | mlops 640 | natural-language-processing 641 | natural-language-processing 642 | computer-vision 643 | computer-vision 644 | mlops 645 | computer-vision 646 | computer-vision 647 | computer-vision 648 | computer-vision 649 | computer-vision 650 | natural-language-processing 651 | computer-vision 652 | computer-vision 653 | natural-language-processing 654 | time-series 655 | mlops 656 | mlops 657 | mlops 658 | reinforcement-learning 659 | time-series 660 | mlops 661 | natural-language-processing 662 | computer-vision 663 | natural-language-processing 664 | natural-language-processing 665 | computer-vision 666 | natural-language-processing 667 | computer-vision 668 | natural-language-processing 669 | computer-vision 670 | computer-vision 671 | natural-language-processing 672 | natural-language-processing 673 | natural-language-processing 674 | natural-language-processing 675 | mlops 676 | mlops 677 | graph-learning 678 | computer-vision 679 | computer-vision 680 | mlops 681 | computer-vision 682 | computer-vision 683 | computer-vision 684 | natural-language-processing 685 | computer-vision 686 | natural-language-processing 687 | natural-language-processing 688 | reinforcement-learning 689 | computer-vision 690 | computer-vision 691 | graph-learning 692 | natural-language-processing 693 | natural-language-processing 694 | reinforcement-learning 695 | natural-language-processing 696 | graph-learning 697 | computer-vision 698 | computer-vision 699 | natural-language-processing 700 | reinforcement-learning 701 | mlops 702 | natural-language-processing 703 | natural-language-processing 704 | natural-language-processing 705 | natural-language-processing 706 | reinforcement-learning 707 | natural-language-processing 708 | natural-language-processing 709 | computer-vision 710 | mlops 711 | natural-language-processing 712 | mlops 713 | computer-vision 714 | mlops 715 | time-series 716 | graph-learning 717 | natural-language-processing 718 | graph-learning 719 | computer-vision 720 | time-series 721 | computer-vision 722 | computer-vision 723 | natural-language-processing 724 | computer-vision 725 | computer-vision 726 | natural-language-processing 727 | time-series 728 | computer-vision 729 | computer-vision 730 | computer-vision 731 | computer-vision 732 | natural-language-processing 733 | graph-learning 734 | computer-vision 735 | computer-vision 736 | computer-vision 737 | natural-language-processing 738 | computer-vision 739 | mlops 740 | reinforcement-learning 741 | graph-learning 742 | time-series 743 | mlops 744 | computer-vision 745 | computer-vision 746 | mlops 747 | computer-vision 748 | computer-vision 749 | natural-language-processing 750 | mlops 751 | mlops 752 | reinforcement-learning 753 | computer-vision 754 | mlops 755 | time-series 756 | reinforcement-learning 757 | computer-vision 758 | natural-language-processing 759 | computer-vision 760 | natural-language-processing 761 | natural-language-processing 762 | natural-language-processing 763 | computer-vision 764 | reinforcement-learning 765 | computer-vision 766 | -------------------------------------------------------------------------------- /deploy/cluster_compute.yaml: -------------------------------------------------------------------------------- 1 | cloud: education-us-west-2 2 | region: us-west-2 3 | head_node_type: 4 | name: head_node_type 5 | instance_type: g5.4xlarge 6 | worker_node_types: 7 | - name: gpu_worker 8 | instance_type: g5.4xlarge 9 | min_workers: 1 10 | max_workers: 1 11 | use_spot: False 12 | aws: 13 | BlockDeviceMappings: 14 | - DeviceName: "/dev/sda1" 15 | Ebs: 16 | VolumeSize: 500 17 | DeleteOnTermination: true 18 | TagSpecifications: 19 | - ResourceType: instance 20 | Tags: 21 | - Key: as-feature-multi-zone 22 | Value: "true" 23 | -------------------------------------------------------------------------------- /deploy/cluster_env.yaml: -------------------------------------------------------------------------------- 1 | base_image: anyscale/ray:2.7.0optimized-py310-cu118 2 | env_vars: {} 3 | debian_packages: 4 | - curl 5 | 6 | python: 7 | pip_packages: [] 8 | conda_packages: [] 9 | 10 | post_build_cmds: 11 | - python3 -m pip install --upgrade pip setuptools wheel 12 | - python3 -m pip install -r https://raw.githubusercontent.com/GokuMohandas/Made-With-ML/main/requirements.txt 13 | -------------------------------------------------------------------------------- /deploy/jobs/workloads.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export PYTHONPATH=$PYTHONPATH:$PWD 3 | mkdir results 4 | 5 | # Test data 6 | export RESULTS_FILE=results/test_data_results.txt 7 | export DATASET_LOC="https://raw.githubusercontent.com/GokuMohandas/Made-With-ML/main/datasets/dataset.csv" 8 | pytest --dataset-loc=$DATASET_LOC tests/data --verbose --disable-warnings > $RESULTS_FILE 9 | cat $RESULTS_FILE 10 | 11 | # Test code 12 | export RESULTS_FILE=results/test_code_results.txt 13 | python -m pytest tests/code --verbose --disable-warnings > $RESULTS_FILE 14 | cat $RESULTS_FILE 15 | 16 | # Train 17 | export EXPERIMENT_NAME="llm" 18 | export RESULTS_FILE=results/training_results.json 19 | export DATASET_LOC="https://raw.githubusercontent.com/GokuMohandas/Made-With-ML/main/datasets/dataset.csv" 20 | export TRAIN_LOOP_CONFIG='{"dropout_p": 0.5, "lr": 1e-4, "lr_factor": 0.8, "lr_patience": 3}' 21 | python madewithml/train.py \ 22 | --experiment-name "$EXPERIMENT_NAME" \ 23 | --dataset-loc "$DATASET_LOC" \ 24 | --train-loop-config "$TRAIN_LOOP_CONFIG" \ 25 | --num-workers 1 \ 26 | --cpu-per-worker 10 \ 27 | --gpu-per-worker 1 \ 28 | --num-epochs 10 \ 29 | --batch-size 256 \ 30 | --results-fp $RESULTS_FILE 31 | 32 | # Get and save run ID 33 | export RUN_ID=$(python -c "import os; from madewithml import utils; d = utils.load_dict(os.getenv('RESULTS_FILE')); print(d['run_id'])") 34 | export RUN_ID_FILE=results/run_id.txt 35 | echo $RUN_ID > $RUN_ID_FILE # used for serving later 36 | 37 | # Evaluate 38 | export RESULTS_FILE=results/evaluation_results.json 39 | export HOLDOUT_LOC="https://raw.githubusercontent.com/GokuMohandas/Made-With-ML/main/datasets/holdout.csv" 40 | python madewithml/evaluate.py \ 41 | --run-id $RUN_ID \ 42 | --dataset-loc $HOLDOUT_LOC \ 43 | --results-fp $RESULTS_FILE 44 | 45 | # Test model 46 | RESULTS_FILE=results/test_model_results.txt 47 | pytest --run-id=$RUN_ID tests/model --verbose --disable-warnings > $RESULTS_FILE 48 | cat $RESULTS_FILE 49 | 50 | # Save to S3 51 | export MODEL_REGISTRY=$(python -c "from madewithml import config; print(config.MODEL_REGISTRY)") 52 | aws s3 cp $MODEL_REGISTRY s3://madewithml/$GITHUB_USERNAME/mlflow/ --recursive 53 | aws s3 cp results/ s3://madewithml/$GITHUB_USERNAME/results/ --recursive 54 | -------------------------------------------------------------------------------- /deploy/jobs/workloads.yaml: -------------------------------------------------------------------------------- 1 | name: workloads 2 | project_id: prj_wn6el5cu9dqwktk6t4cv54x8zh 3 | cluster_env: madewithml-cluster-env 4 | compute_config: madewithml-cluster-compute 5 | runtime_env: 6 | working_dir: . 7 | upload_path: s3://madewithml/GokuMohandas/jobs # <--- CHANGE USERNAME (case-sensitive) 8 | env_vars: 9 | GITHUB_USERNAME: GokuMohandas # <--- CHANGE USERNAME (case-sensitive) 10 | entrypoint: bash deploy/jobs/workloads.sh 11 | max_retries: 0 12 | -------------------------------------------------------------------------------- /deploy/services/serve_model.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import sys 4 | 5 | sys.path.append(".") 6 | 7 | from madewithml.config import MODEL_REGISTRY # NOQA: E402 8 | from madewithml.serve import ModelDeployment # NOQA: E402 9 | 10 | # Copy from S3 11 | github_username = os.environ.get("GITHUB_USERNAME") 12 | subprocess.check_output(["aws", "s3", "cp", f"s3://madewithml/{github_username}/mlflow/", str(MODEL_REGISTRY), "--recursive"]) 13 | subprocess.check_output(["aws", "s3", "cp", f"s3://madewithml/{github_username}/results/", "./", "--recursive"]) 14 | 15 | # Entrypoint 16 | run_id = [line.strip() for line in open("run_id.txt")][0] 17 | entrypoint = ModelDeployment.bind(run_id=run_id, threshold=0.9) 18 | -------------------------------------------------------------------------------- /deploy/services/serve_model.yaml: -------------------------------------------------------------------------------- 1 | name: madewithml 2 | project_id: prj_wn6el5cu9dqwktk6t4cv54x8zh 3 | cluster_env: madewithml-cluster-env 4 | compute_config: madewithml-cluster-compute 5 | ray_serve_config: 6 | import_path: deploy.services.serve_model:entrypoint 7 | runtime_env: 8 | working_dir: . 9 | upload_path: s3://madewithml/GokuMohandas/services # <--- CHANGE USERNAME (case-sensitive) 10 | env_vars: 11 | GITHUB_USERNAME: GokuMohandas # <--- CHANGE USERNAME (case-sensitive) 12 | rollout_strategy: ROLLOUT # ROLLOUT or IN_PLACE 13 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | ## Documentation 2 | 3 | - [madewithml](madewithml/data.md): documentation. 4 | 5 | ## Lessons 6 | 7 | Learn how to combine machine learning with software engineering to design, develop, deploy and iterate on production ML applications. 8 | 9 | - **Lessons**: [https://madewithml.com/](https://madewithml.com/#course) 10 | - **Code**: [GokuMohandas/Made-With-ML](https://github.com/GokuMohandas/Made-With-ML) 11 | -------------------------------------------------------------------------------- /docs/madewithml/data.md: -------------------------------------------------------------------------------- 1 | ::: madewithml.data 2 | -------------------------------------------------------------------------------- /docs/madewithml/evaluate.md: -------------------------------------------------------------------------------- 1 | ::: madewithml.evaluate 2 | -------------------------------------------------------------------------------- /docs/madewithml/models.md: -------------------------------------------------------------------------------- 1 | ::: madewithml.models 2 | -------------------------------------------------------------------------------- /docs/madewithml/predict.md: -------------------------------------------------------------------------------- 1 | ::: madewithml.predict 2 | -------------------------------------------------------------------------------- /docs/madewithml/serve.md: -------------------------------------------------------------------------------- 1 | ::: madewithml.serve 2 | -------------------------------------------------------------------------------- /docs/madewithml/train.md: -------------------------------------------------------------------------------- 1 | ::: madewithml.train 2 | -------------------------------------------------------------------------------- /docs/madewithml/tune.md: -------------------------------------------------------------------------------- 1 | ::: madewithml.tune 2 | -------------------------------------------------------------------------------- /docs/madewithml/utils.md: -------------------------------------------------------------------------------- 1 | ::: madewithml.utils 2 | -------------------------------------------------------------------------------- /madewithml/__init__.py: -------------------------------------------------------------------------------- 1 | from dotenv import load_dotenv 2 | 3 | load_dotenv() 4 | -------------------------------------------------------------------------------- /madewithml/config.py: -------------------------------------------------------------------------------- 1 | # config.py 2 | import logging 3 | import os 4 | import sys 5 | from pathlib import Path 6 | 7 | import mlflow 8 | 9 | # Directories 10 | ROOT_DIR = Path(__file__).parent.parent.absolute() 11 | LOGS_DIR = Path(ROOT_DIR, "logs") 12 | LOGS_DIR.mkdir(parents=True, exist_ok=True) 13 | EFS_DIR = Path(f"/efs/shared_storage/madewithml/{os.environ.get('GITHUB_USERNAME', '')}") 14 | try: 15 | Path(EFS_DIR).mkdir(parents=True, exist_ok=True) 16 | except OSError: 17 | EFS_DIR = Path(ROOT_DIR, "efs") 18 | Path(EFS_DIR).mkdir(parents=True, exist_ok=True) 19 | 20 | # Config MLflow 21 | MODEL_REGISTRY = Path(f"{EFS_DIR}/mlflow") 22 | Path(MODEL_REGISTRY).mkdir(parents=True, exist_ok=True) 23 | MLFLOW_TRACKING_URI = "file://" + str(MODEL_REGISTRY.absolute()) 24 | mlflow.set_tracking_uri(MLFLOW_TRACKING_URI) 25 | 26 | # Logger 27 | logging_config = { 28 | "version": 1, 29 | "disable_existing_loggers": False, 30 | "formatters": { 31 | "minimal": {"format": "%(message)s"}, 32 | "detailed": {"format": "%(levelname)s %(asctime)s [%(name)s:%(filename)s:%(funcName)s:%(lineno)d]\n%(message)s\n"}, 33 | }, 34 | "handlers": { 35 | "console": { 36 | "class": "logging.StreamHandler", 37 | "stream": sys.stdout, 38 | "formatter": "minimal", 39 | "level": logging.DEBUG, 40 | }, 41 | "info": { 42 | "class": "logging.handlers.RotatingFileHandler", 43 | "filename": Path(LOGS_DIR, "info.log"), 44 | "maxBytes": 10485760, # 1 MB 45 | "backupCount": 10, 46 | "formatter": "detailed", 47 | "level": logging.INFO, 48 | }, 49 | "error": { 50 | "class": "logging.handlers.RotatingFileHandler", 51 | "filename": Path(LOGS_DIR, "error.log"), 52 | "maxBytes": 10485760, # 1 MB 53 | "backupCount": 10, 54 | "formatter": "detailed", 55 | "level": logging.ERROR, 56 | }, 57 | }, 58 | "root": { 59 | "handlers": ["console", "info", "error"], 60 | "level": logging.INFO, 61 | "propagate": True, 62 | }, 63 | } 64 | 65 | # Logger 66 | logging.config.dictConfig(logging_config) 67 | logger = logging.getLogger() 68 | 69 | # Constraints 70 | STOPWORDS = [ 71 | "i", 72 | "me", 73 | "my", 74 | "myself", 75 | "we", 76 | "our", 77 | "ours", 78 | "ourselves", 79 | "you", 80 | "you're", 81 | "you've", 82 | "you'll", 83 | "you'd", 84 | "your", 85 | "yours", 86 | "yourself", 87 | "yourselves", 88 | "he", 89 | "him", 90 | "his", 91 | "himself", 92 | "she", 93 | "she's", 94 | "her", 95 | "hers", 96 | "herself", 97 | "it", 98 | "it's", 99 | "its", 100 | "itself", 101 | "they", 102 | "them", 103 | "their", 104 | "theirs", 105 | "themselves", 106 | "what", 107 | "which", 108 | "who", 109 | "whom", 110 | "this", 111 | "that", 112 | "that'll", 113 | "these", 114 | "those", 115 | "am", 116 | "is", 117 | "are", 118 | "was", 119 | "were", 120 | "be", 121 | "been", 122 | "being", 123 | "have", 124 | "has", 125 | "had", 126 | "having", 127 | "do", 128 | "does", 129 | "did", 130 | "doing", 131 | "a", 132 | "an", 133 | "the", 134 | "and", 135 | "but", 136 | "if", 137 | "or", 138 | "because", 139 | "as", 140 | "until", 141 | "while", 142 | "of", 143 | "at", 144 | "by", 145 | "for", 146 | "with", 147 | "about", 148 | "against", 149 | "between", 150 | "into", 151 | "through", 152 | "during", 153 | "before", 154 | "after", 155 | "above", 156 | "below", 157 | "to", 158 | "from", 159 | "up", 160 | "down", 161 | "in", 162 | "out", 163 | "on", 164 | "off", 165 | "over", 166 | "under", 167 | "again", 168 | "further", 169 | "then", 170 | "once", 171 | "here", 172 | "there", 173 | "when", 174 | "where", 175 | "why", 176 | "how", 177 | "all", 178 | "any", 179 | "both", 180 | "each", 181 | "few", 182 | "more", 183 | "most", 184 | "other", 185 | "some", 186 | "such", 187 | "no", 188 | "nor", 189 | "not", 190 | "only", 191 | "own", 192 | "same", 193 | "so", 194 | "than", 195 | "too", 196 | "very", 197 | "s", 198 | "t", 199 | "can", 200 | "will", 201 | "just", 202 | "don", 203 | "don't", 204 | "should", 205 | "should've", 206 | "now", 207 | "d", 208 | "ll", 209 | "m", 210 | "o", 211 | "re", 212 | "ve", 213 | "y", 214 | "ain", 215 | "aren", 216 | "aren't", 217 | "couldn", 218 | "couldn't", 219 | "didn", 220 | "didn't", 221 | "doesn", 222 | "doesn't", 223 | "hadn", 224 | "hadn't", 225 | "hasn", 226 | "hasn't", 227 | "haven", 228 | "haven't", 229 | "isn", 230 | "isn't", 231 | "ma", 232 | "mightn", 233 | "mightn't", 234 | "mustn", 235 | "mustn't", 236 | "needn", 237 | "needn't", 238 | "shan", 239 | "shan't", 240 | "shouldn", 241 | "shouldn't", 242 | "wasn", 243 | "wasn't", 244 | "weren", 245 | "weren't", 246 | "won", 247 | "won't", 248 | "wouldn", 249 | "wouldn't", 250 | ] 251 | -------------------------------------------------------------------------------- /madewithml/data.py: -------------------------------------------------------------------------------- 1 | import re 2 | from typing import Dict, List, Tuple 3 | 4 | import numpy as np 5 | import pandas as pd 6 | import ray 7 | from ray.data import Dataset 8 | from sklearn.model_selection import train_test_split 9 | from transformers import BertTokenizer 10 | 11 | from madewithml.config import STOPWORDS 12 | 13 | 14 | def load_data(dataset_loc: str, num_samples: int = None) -> Dataset: 15 | """Load data from source into a Ray Dataset. 16 | 17 | Args: 18 | dataset_loc (str): Location of the dataset. 19 | num_samples (int, optional): The number of samples to load. Defaults to None. 20 | 21 | Returns: 22 | Dataset: Our dataset represented by a Ray Dataset. 23 | """ 24 | ds = ray.data.read_csv(dataset_loc) 25 | ds = ds.random_shuffle(seed=1234) 26 | ds = ray.data.from_items(ds.take(num_samples)) if num_samples else ds 27 | return ds 28 | 29 | 30 | def stratify_split( 31 | ds: Dataset, 32 | stratify: str, 33 | test_size: float, 34 | shuffle: bool = True, 35 | seed: int = 1234, 36 | ) -> Tuple[Dataset, Dataset]: 37 | """Split a dataset into train and test splits with equal 38 | amounts of data points from each class in the column we 39 | want to stratify on. 40 | 41 | Args: 42 | ds (Dataset): Input dataset to split. 43 | stratify (str): Name of column to split on. 44 | test_size (float): Proportion of dataset to split for test set. 45 | shuffle (bool, optional): whether to shuffle the dataset. Defaults to True. 46 | seed (int, optional): seed for shuffling. Defaults to 1234. 47 | 48 | Returns: 49 | Tuple[Dataset, Dataset]: the stratified train and test datasets. 50 | """ 51 | 52 | def _add_split(df: pd.DataFrame) -> pd.DataFrame: # pragma: no cover, used in parent function 53 | """Naively split a dataframe into train and test splits. 54 | Add a column specifying whether it's the train or test split.""" 55 | train, test = train_test_split(df, test_size=test_size, shuffle=shuffle, random_state=seed) 56 | train["_split"] = "train" 57 | test["_split"] = "test" 58 | return pd.concat([train, test]) 59 | 60 | def _filter_split(df: pd.DataFrame, split: str) -> pd.DataFrame: # pragma: no cover, used in parent function 61 | """Filter by data points that match the split column's value 62 | and return the dataframe with the _split column dropped.""" 63 | return df[df["_split"] == split].drop("_split", axis=1) 64 | 65 | # Train, test split with stratify 66 | grouped = ds.groupby(stratify).map_groups(_add_split, batch_format="pandas") # group by each unique value in the column we want to stratify on 67 | train_ds = grouped.map_batches(_filter_split, fn_kwargs={"split": "train"}, batch_format="pandas") # combine 68 | test_ds = grouped.map_batches(_filter_split, fn_kwargs={"split": "test"}, batch_format="pandas") # combine 69 | 70 | # Shuffle each split (required) 71 | train_ds = train_ds.random_shuffle(seed=seed) 72 | test_ds = test_ds.random_shuffle(seed=seed) 73 | 74 | return train_ds, test_ds 75 | 76 | 77 | def clean_text(text: str, stopwords: List = STOPWORDS) -> str: 78 | """Clean raw text string. 79 | 80 | Args: 81 | text (str): Raw text to clean. 82 | stopwords (List, optional): list of words to filter out. Defaults to STOPWORDS. 83 | 84 | Returns: 85 | str: cleaned text. 86 | """ 87 | # Lower 88 | text = text.lower() 89 | 90 | # Remove stopwords 91 | pattern = re.compile(r"\b(" + r"|".join(stopwords) + r")\b\s*") 92 | text = pattern.sub(" ", text) 93 | 94 | # Spacing and filters 95 | text = re.sub(r"([!\"'#$%&()*\+,-./:;<=>?@\\\[\]^_`{|}~])", r" \1 ", text) # add spacing 96 | text = re.sub("[^A-Za-z0-9]+", " ", text) # remove non alphanumeric chars 97 | text = re.sub(" +", " ", text) # remove multiple spaces 98 | text = text.strip() # strip white space at the ends 99 | text = re.sub(r"http\S+", "", text) # remove links 100 | 101 | return text 102 | 103 | 104 | def tokenize(batch: Dict) -> Dict: 105 | """Tokenize the text input in our batch using a tokenizer. 106 | 107 | Args: 108 | batch (Dict): batch of data with the text inputs to tokenize. 109 | 110 | Returns: 111 | Dict: batch of data with the results of tokenization (`input_ids` and `attention_mask`) on the text inputs. 112 | """ 113 | tokenizer = BertTokenizer.from_pretrained("allenai/scibert_scivocab_uncased", return_dict=False) 114 | encoded_inputs = tokenizer(batch["text"].tolist(), return_tensors="np", padding="longest") 115 | return dict(ids=encoded_inputs["input_ids"], masks=encoded_inputs["attention_mask"], targets=np.array(batch["tag"])) 116 | 117 | 118 | def preprocess(df: pd.DataFrame, class_to_index: Dict) -> Dict: 119 | """Preprocess the data in our dataframe. 120 | 121 | Args: 122 | df (pd.DataFrame): Raw dataframe to preprocess. 123 | class_to_index (Dict): Mapping of class names to indices. 124 | 125 | Returns: 126 | Dict: preprocessed data (ids, masks, targets). 127 | """ 128 | df["text"] = df.title + " " + df.description # feature engineering 129 | df["text"] = df.text.apply(clean_text) # clean text 130 | df = df.drop(columns=["id", "created_on", "title", "description"], errors="ignore") # clean dataframe 131 | df = df[["text", "tag"]] # rearrange columns 132 | df["tag"] = df["tag"].map(class_to_index) # label encoding 133 | outputs = tokenize(df) 134 | return outputs 135 | 136 | 137 | class CustomPreprocessor: 138 | """Custom preprocessor class.""" 139 | 140 | def __init__(self, class_to_index={}): 141 | self.class_to_index = class_to_index or {} # mutable defaults 142 | self.index_to_class = {v: k for k, v in self.class_to_index.items()} 143 | 144 | def fit(self, ds): 145 | tags = ds.unique(column="tag") 146 | self.class_to_index = {tag: i for i, tag in enumerate(tags)} 147 | self.index_to_class = {v: k for k, v in self.class_to_index.items()} 148 | return self 149 | 150 | def transform(self, ds): 151 | return ds.map_batches(preprocess, fn_kwargs={"class_to_index": self.class_to_index}, batch_format="pandas") 152 | -------------------------------------------------------------------------------- /madewithml/evaluate.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import json 3 | from collections import OrderedDict 4 | from typing import Dict 5 | 6 | import numpy as np 7 | import ray 8 | import ray.train.torch # NOQA: F401 (imported but unused) 9 | import typer 10 | from ray.data import Dataset 11 | from sklearn.metrics import precision_recall_fscore_support 12 | from snorkel.slicing import PandasSFApplier, slicing_function 13 | from typing_extensions import Annotated 14 | 15 | from madewithml import predict, utils 16 | from madewithml.config import logger 17 | from madewithml.predict import TorchPredictor 18 | 19 | # Initialize Typer CLI app 20 | app = typer.Typer() 21 | 22 | 23 | def get_overall_metrics(y_true: np.ndarray, y_pred: np.ndarray) -> Dict: # pragma: no cover, eval workload 24 | """Get overall performance metrics. 25 | 26 | Args: 27 | y_true (np.ndarray): ground truth labels. 28 | y_pred (np.ndarray): predicted labels. 29 | 30 | Returns: 31 | Dict: overall metrics. 32 | """ 33 | metrics = precision_recall_fscore_support(y_true, y_pred, average="weighted") 34 | overall_metrics = { 35 | "precision": metrics[0], 36 | "recall": metrics[1], 37 | "f1": metrics[2], 38 | "num_samples": np.float64(len(y_true)), 39 | } 40 | return overall_metrics 41 | 42 | 43 | def get_per_class_metrics(y_true: np.ndarray, y_pred: np.ndarray, class_to_index: Dict) -> Dict: # pragma: no cover, eval workload 44 | """Get per class performance metrics. 45 | 46 | Args: 47 | y_true (np.ndarray): ground truth labels. 48 | y_pred (np.ndarray): predicted labels. 49 | class_to_index (Dict): dictionary mapping class to index. 50 | 51 | Returns: 52 | Dict: per class metrics. 53 | """ 54 | per_class_metrics = {} 55 | metrics = precision_recall_fscore_support(y_true, y_pred, average=None) 56 | for i, _class in enumerate(class_to_index): 57 | per_class_metrics[_class] = { 58 | "precision": metrics[0][i], 59 | "recall": metrics[1][i], 60 | "f1": metrics[2][i], 61 | "num_samples": np.float64(metrics[3][i]), 62 | } 63 | sorted_per_class_metrics = OrderedDict(sorted(per_class_metrics.items(), key=lambda tag: tag[1]["f1"], reverse=True)) 64 | return sorted_per_class_metrics 65 | 66 | 67 | @slicing_function() 68 | def nlp_llm(x): # pragma: no cover, eval workload 69 | """NLP projects that use LLMs.""" 70 | nlp_project = "natural-language-processing" in x.tag 71 | llm_terms = ["transformer", "llm", "bert"] 72 | llm_project = any(s.lower() in x.text.lower() for s in llm_terms) 73 | return nlp_project and llm_project 74 | 75 | 76 | @slicing_function() 77 | def short_text(x): # pragma: no cover, eval workload 78 | """Projects with short titles and descriptions.""" 79 | return len(x.text.split()) < 8 # less than 8 words 80 | 81 | 82 | def get_slice_metrics(y_true: np.ndarray, y_pred: np.ndarray, ds: Dataset) -> Dict: # pragma: no cover, eval workload 83 | """Get performance metrics for slices. 84 | 85 | Args: 86 | y_true (np.ndarray): ground truth labels. 87 | y_pred (np.ndarray): predicted labels. 88 | ds (Dataset): Ray dataset with labels. 89 | Returns: 90 | Dict: performance metrics for slices. 91 | """ 92 | slice_metrics = {} 93 | df = ds.to_pandas() 94 | df["text"] = df["title"] + " " + df["description"] 95 | slices = PandasSFApplier([nlp_llm, short_text]).apply(df) 96 | for slice_name in slices.dtype.names: 97 | mask = slices[slice_name].astype(bool) 98 | if sum(mask): 99 | metrics = precision_recall_fscore_support(y_true[mask], y_pred[mask], average="micro") 100 | slice_metrics[slice_name] = {} 101 | slice_metrics[slice_name]["precision"] = metrics[0] 102 | slice_metrics[slice_name]["recall"] = metrics[1] 103 | slice_metrics[slice_name]["f1"] = metrics[2] 104 | slice_metrics[slice_name]["num_samples"] = len(y_true[mask]) 105 | return slice_metrics 106 | 107 | 108 | @app.command() 109 | def evaluate( 110 | run_id: Annotated[str, typer.Option(help="id of the specific run to load from")] = None, 111 | dataset_loc: Annotated[str, typer.Option(help="dataset (with labels) to evaluate on")] = None, 112 | results_fp: Annotated[str, typer.Option(help="location to save evaluation results to")] = None, 113 | ) -> Dict: # pragma: no cover, eval workload 114 | """Evaluate on the holdout dataset. 115 | 116 | Args: 117 | run_id (str): id of the specific run to load from. Defaults to None. 118 | dataset_loc (str): dataset (with labels) to evaluate on. 119 | results_fp (str, optional): location to save evaluation results to. Defaults to None. 120 | 121 | Returns: 122 | Dict: model's performance metrics on the dataset. 123 | """ 124 | # Load 125 | ds = ray.data.read_csv(dataset_loc) 126 | best_checkpoint = predict.get_best_checkpoint(run_id=run_id) 127 | predictor = TorchPredictor.from_checkpoint(best_checkpoint) 128 | 129 | # y_true 130 | preprocessor = predictor.get_preprocessor() 131 | preprocessed_ds = preprocessor.transform(ds) 132 | values = preprocessed_ds.select_columns(cols=["targets"]).take_all() 133 | y_true = np.stack([item["targets"] for item in values]) 134 | 135 | # y_pred 136 | predictions = preprocessed_ds.map_batches(predictor).take_all() 137 | y_pred = np.array([d["output"] for d in predictions]) 138 | 139 | # Metrics 140 | metrics = { 141 | "timestamp": datetime.datetime.now().strftime("%B %d, %Y %I:%M:%S %p"), 142 | "run_id": run_id, 143 | "overall": get_overall_metrics(y_true=y_true, y_pred=y_pred), 144 | "per_class": get_per_class_metrics(y_true=y_true, y_pred=y_pred, class_to_index=preprocessor.class_to_index), 145 | "slices": get_slice_metrics(y_true=y_true, y_pred=y_pred, ds=ds), 146 | } 147 | logger.info(json.dumps(metrics, indent=2)) 148 | if results_fp: # pragma: no cover, saving results 149 | utils.save_dict(d=metrics, path=results_fp) 150 | return metrics 151 | 152 | 153 | if __name__ == "__main__": # pragma: no cover, checked during evaluation workload 154 | app() 155 | -------------------------------------------------------------------------------- /madewithml/models.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from pathlib import Path 4 | 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | from transformers import BertModel 9 | 10 | 11 | class FinetunedLLM(nn.Module): 12 | def __init__(self, llm, dropout_p, embedding_dim, num_classes): 13 | super(FinetunedLLM, self).__init__() 14 | self.llm = llm 15 | self.dropout_p = dropout_p 16 | self.embedding_dim = embedding_dim 17 | self.num_classes = num_classes 18 | self.dropout = torch.nn.Dropout(dropout_p) 19 | self.fc1 = torch.nn.Linear(embedding_dim, num_classes) 20 | 21 | def forward(self, batch): 22 | ids, masks = batch["ids"], batch["masks"] 23 | seq, pool = self.llm(input_ids=ids, attention_mask=masks) 24 | z = self.dropout(pool) 25 | z = self.fc1(z) 26 | return z 27 | 28 | @torch.inference_mode() 29 | def predict(self, batch): 30 | self.eval() 31 | z = self(batch) 32 | y_pred = torch.argmax(z, dim=1).cpu().numpy() 33 | return y_pred 34 | 35 | @torch.inference_mode() 36 | def predict_proba(self, batch): 37 | self.eval() 38 | z = self(batch) 39 | y_probs = F.softmax(z, dim=1).cpu().numpy() 40 | return y_probs 41 | 42 | def save(self, dp): 43 | with open(Path(dp, "args.json"), "w") as fp: 44 | contents = { 45 | "dropout_p": self.dropout_p, 46 | "embedding_dim": self.embedding_dim, 47 | "num_classes": self.num_classes, 48 | } 49 | json.dump(contents, fp, indent=4, sort_keys=False) 50 | torch.save(self.state_dict(), os.path.join(dp, "model.pt")) 51 | 52 | @classmethod 53 | def load(cls, args_fp, state_dict_fp): 54 | with open(args_fp, "r") as fp: 55 | kwargs = json.load(fp=fp) 56 | llm = BertModel.from_pretrained("allenai/scibert_scivocab_uncased", return_dict=False) 57 | model = cls(llm=llm, **kwargs) 58 | model.load_state_dict(torch.load(state_dict_fp, map_location=torch.device("cpu"))) 59 | return model 60 | -------------------------------------------------------------------------------- /madewithml/predict.py: -------------------------------------------------------------------------------- 1 | import json 2 | from pathlib import Path 3 | from typing import Any, Dict, Iterable, List 4 | from urllib.parse import urlparse 5 | 6 | import numpy as np 7 | import ray 8 | import typer 9 | from numpyencoder import NumpyEncoder 10 | from ray.air import Result 11 | from ray.train.torch.torch_checkpoint import TorchCheckpoint 12 | from typing_extensions import Annotated 13 | 14 | from madewithml.config import logger, mlflow 15 | from madewithml.data import CustomPreprocessor 16 | from madewithml.models import FinetunedLLM 17 | from madewithml.utils import collate_fn 18 | 19 | # Initialize Typer CLI app 20 | app = typer.Typer() 21 | 22 | 23 | def decode(indices: Iterable[Any], index_to_class: Dict) -> List: 24 | """Decode indices to labels. 25 | 26 | Args: 27 | indices (Iterable[Any]): Iterable (list, array, etc.) with indices. 28 | index_to_class (Dict): mapping between indices and labels. 29 | 30 | Returns: 31 | List: list of labels. 32 | """ 33 | return [index_to_class[index] for index in indices] 34 | 35 | 36 | def format_prob(prob: Iterable, index_to_class: Dict) -> Dict: 37 | """Format probabilities to a dictionary mapping class label to probability. 38 | 39 | Args: 40 | prob (Iterable): probabilities. 41 | index_to_class (Dict): mapping between indices and labels. 42 | 43 | Returns: 44 | Dict: Dictionary mapping class label to probability. 45 | """ 46 | d = {} 47 | for i, item in enumerate(prob): 48 | d[index_to_class[i]] = item 49 | return d 50 | 51 | 52 | class TorchPredictor: 53 | def __init__(self, preprocessor, model): 54 | self.preprocessor = preprocessor 55 | self.model = model 56 | self.model.eval() 57 | 58 | def __call__(self, batch): 59 | results = self.model.predict(collate_fn(batch)) 60 | return {"output": results} 61 | 62 | def predict_proba(self, batch): 63 | results = self.model.predict_proba(collate_fn(batch)) 64 | return {"output": results} 65 | 66 | def get_preprocessor(self): 67 | return self.preprocessor 68 | 69 | @classmethod 70 | def from_checkpoint(cls, checkpoint): 71 | metadata = checkpoint.get_metadata() 72 | preprocessor = CustomPreprocessor(class_to_index=metadata["class_to_index"]) 73 | model = FinetunedLLM.load(Path(checkpoint.path, "args.json"), Path(checkpoint.path, "model.pt")) 74 | return cls(preprocessor=preprocessor, model=model) 75 | 76 | 77 | def predict_proba( 78 | ds: ray.data.dataset.Dataset, 79 | predictor: TorchPredictor, 80 | ) -> List: # pragma: no cover, tested with inference workload 81 | """Predict tags (with probabilities) for input data from a dataframe. 82 | 83 | Args: 84 | df (pd.DataFrame): dataframe with input features. 85 | predictor (TorchPredictor): loaded predictor from a checkpoint. 86 | 87 | Returns: 88 | List: list of predicted labels. 89 | """ 90 | preprocessor = predictor.get_preprocessor() 91 | preprocessed_ds = preprocessor.transform(ds) 92 | outputs = preprocessed_ds.map_batches(predictor.predict_proba) 93 | y_prob = np.array([d["output"] for d in outputs.take_all()]) 94 | results = [] 95 | for i, prob in enumerate(y_prob): 96 | tag = preprocessor.index_to_class[prob.argmax()] 97 | results.append({"prediction": tag, "probabilities": format_prob(prob, preprocessor.index_to_class)}) 98 | return results 99 | 100 | 101 | @app.command() 102 | def get_best_run_id(experiment_name: str = "", metric: str = "", mode: str = "") -> str: # pragma: no cover, mlflow logic 103 | """Get the best run_id from an MLflow experiment. 104 | 105 | Args: 106 | experiment_name (str): name of the experiment. 107 | metric (str): metric to filter by. 108 | mode (str): direction of metric (ASC/DESC). 109 | 110 | Returns: 111 | str: best run id from experiment. 112 | """ 113 | sorted_runs = mlflow.search_runs( 114 | experiment_names=[experiment_name], 115 | order_by=[f"metrics.{metric} {mode}"], 116 | ) 117 | run_id = sorted_runs.iloc[0].run_id 118 | print(run_id) 119 | return run_id 120 | 121 | 122 | def get_best_checkpoint(run_id: str) -> TorchCheckpoint: # pragma: no cover, mlflow logic 123 | """Get the best checkpoint from a specific run. 124 | 125 | Args: 126 | run_id (str): ID of the run to get the best checkpoint from. 127 | 128 | Returns: 129 | TorchCheckpoint: Best checkpoint from the run. 130 | """ 131 | artifact_dir = urlparse(mlflow.get_run(run_id).info.artifact_uri).path # get path from mlflow 132 | results = Result.from_path(artifact_dir) 133 | return results.best_checkpoints[0][0] 134 | 135 | 136 | @app.command() 137 | def predict( 138 | run_id: Annotated[str, typer.Option(help="id of the specific run to load from")] = None, 139 | title: Annotated[str, typer.Option(help="project title")] = None, 140 | description: Annotated[str, typer.Option(help="project description")] = None, 141 | ) -> Dict: # pragma: no cover, tested with inference workload 142 | """Predict the tag for a project given it's title and description. 143 | 144 | Args: 145 | run_id (str): id of the specific run to load from. Defaults to None. 146 | title (str, optional): project title. Defaults to "". 147 | description (str, optional): project description. Defaults to "". 148 | 149 | Returns: 150 | Dict: prediction results for the input data. 151 | """ 152 | # Load components 153 | best_checkpoint = get_best_checkpoint(run_id=run_id) 154 | predictor = TorchPredictor.from_checkpoint(best_checkpoint) 155 | 156 | # Predict 157 | sample_ds = ray.data.from_items([{"title": title, "description": description, "tag": "other"}]) 158 | results = predict_proba(ds=sample_ds, predictor=predictor) 159 | logger.info(json.dumps(results, cls=NumpyEncoder, indent=2)) 160 | return results 161 | 162 | 163 | if __name__ == "__main__": # pragma: no cover, application 164 | app() 165 | -------------------------------------------------------------------------------- /madewithml/serve.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | from http import HTTPStatus 4 | from typing import Dict 5 | 6 | import ray 7 | from fastapi import FastAPI 8 | from ray import serve 9 | from starlette.requests import Request 10 | 11 | from madewithml import evaluate, predict 12 | from madewithml.config import MLFLOW_TRACKING_URI, mlflow 13 | 14 | # Define application 15 | app = FastAPI( 16 | title="Made With ML", 17 | description="Classify machine learning projects.", 18 | version="0.1", 19 | ) 20 | 21 | 22 | @serve.deployment(num_replicas="1", ray_actor_options={"num_cpus": 8, "num_gpus": 0}) 23 | @serve.ingress(app) 24 | class ModelDeployment: 25 | def __init__(self, run_id: str, threshold: int = 0.9): 26 | """Initialize the model.""" 27 | self.run_id = run_id 28 | self.threshold = threshold 29 | mlflow.set_tracking_uri(MLFLOW_TRACKING_URI) # so workers have access to model registry 30 | best_checkpoint = predict.get_best_checkpoint(run_id=run_id) 31 | self.predictor = predict.TorchPredictor.from_checkpoint(best_checkpoint) 32 | 33 | @app.get("/") 34 | def _index(self) -> Dict: 35 | """Health check.""" 36 | response = { 37 | "message": HTTPStatus.OK.phrase, 38 | "status-code": HTTPStatus.OK, 39 | "data": {}, 40 | } 41 | return response 42 | 43 | @app.get("/run_id/") 44 | def _run_id(self) -> Dict: 45 | """Get the run ID.""" 46 | return {"run_id": self.run_id} 47 | 48 | @app.post("/evaluate/") 49 | async def _evaluate(self, request: Request) -> Dict: 50 | data = await request.json() 51 | results = evaluate.evaluate(run_id=self.run_id, dataset_loc=data.get("dataset")) 52 | return {"results": results} 53 | 54 | @app.post("/predict/") 55 | async def _predict(self, request: Request): 56 | data = await request.json() 57 | sample_ds = ray.data.from_items([{"title": data.get("title", ""), "description": data.get("description", ""), "tag": ""}]) 58 | results = predict.predict_proba(ds=sample_ds, predictor=self.predictor) 59 | 60 | # Apply custom logic 61 | for i, result in enumerate(results): 62 | pred = result["prediction"] 63 | prob = result["probabilities"] 64 | if prob[pred] < self.threshold: 65 | results[i]["prediction"] = "other" 66 | 67 | return {"results": results} 68 | 69 | 70 | if __name__ == "__main__": 71 | parser = argparse.ArgumentParser() 72 | parser.add_argument("--run_id", help="run ID to use for serving.") 73 | parser.add_argument("--threshold", type=float, default=0.9, help="threshold for `other` class.") 74 | args = parser.parse_args() 75 | ray.init(runtime_env={"env_vars": {"GITHUB_USERNAME": os.environ["GITHUB_USERNAME"]}}) 76 | serve.run(ModelDeployment.bind(run_id=args.run_id, threshold=args.threshold)) 77 | -------------------------------------------------------------------------------- /madewithml/train.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import json 3 | import os 4 | import tempfile 5 | from typing import Tuple 6 | 7 | import numpy as np 8 | import ray 9 | import ray.train as train 10 | import torch 11 | import torch.nn as nn 12 | import torch.nn.functional as F 13 | import typer 14 | from ray.air.integrations.mlflow import MLflowLoggerCallback 15 | from ray.data import Dataset 16 | from ray.train import ( 17 | Checkpoint, 18 | CheckpointConfig, 19 | DataConfig, 20 | RunConfig, 21 | ScalingConfig, 22 | ) 23 | from ray.train.torch import TorchTrainer 24 | from torch.nn.parallel.distributed import DistributedDataParallel 25 | from transformers import BertModel 26 | from typing_extensions import Annotated 27 | 28 | from madewithml import data, utils 29 | from madewithml.config import EFS_DIR, MLFLOW_TRACKING_URI, logger 30 | from madewithml.models import FinetunedLLM 31 | 32 | # Initialize Typer CLI app 33 | app = typer.Typer() 34 | 35 | 36 | def train_step( 37 | ds: Dataset, 38 | batch_size: int, 39 | model: nn.Module, 40 | num_classes: int, 41 | loss_fn: torch.nn.modules.loss._WeightedLoss, 42 | optimizer: torch.optim.Optimizer, 43 | ) -> float: # pragma: no cover, tested via train workload 44 | """Train step. 45 | 46 | Args: 47 | ds (Dataset): dataset to iterate batches from. 48 | batch_size (int): size of each batch. 49 | model (nn.Module): model to train. 50 | num_classes (int): number of classes. 51 | loss_fn (torch.nn.loss._WeightedLoss): loss function to use between labels and predictions. 52 | optimizer (torch.optimizer.Optimizer): optimizer to use for updating the model's weights. 53 | 54 | Returns: 55 | float: cumulative loss for the dataset. 56 | """ 57 | model.train() 58 | loss = 0.0 59 | ds_generator = ds.iter_torch_batches(batch_size=batch_size, collate_fn=utils.collate_fn) 60 | for i, batch in enumerate(ds_generator): 61 | optimizer.zero_grad() # reset gradients 62 | z = model(batch) # forward pass 63 | targets = F.one_hot(batch["targets"], num_classes=num_classes).float() # one-hot (for loss_fn) 64 | J = loss_fn(z, targets) # define loss 65 | J.backward() # backward pass 66 | optimizer.step() # update weights 67 | loss += (J.detach().item() - loss) / (i + 1) # cumulative loss 68 | return loss 69 | 70 | 71 | def eval_step( 72 | ds: Dataset, batch_size: int, model: nn.Module, num_classes: int, loss_fn: torch.nn.modules.loss._WeightedLoss 73 | ) -> Tuple[float, np.array, np.array]: # pragma: no cover, tested via train workload 74 | """Eval step. 75 | 76 | Args: 77 | ds (Dataset): dataset to iterate batches from. 78 | batch_size (int): size of each batch. 79 | model (nn.Module): model to train. 80 | num_classes (int): number of classes. 81 | loss_fn (torch.nn.loss._WeightedLoss): loss function to use between labels and predictions. 82 | 83 | Returns: 84 | Tuple[float, np.array, np.array]: cumulative loss, ground truths and predictions. 85 | """ 86 | model.eval() 87 | loss = 0.0 88 | y_trues, y_preds = [], [] 89 | ds_generator = ds.iter_torch_batches(batch_size=batch_size, collate_fn=utils.collate_fn) 90 | with torch.inference_mode(): 91 | for i, batch in enumerate(ds_generator): 92 | z = model(batch) 93 | targets = F.one_hot(batch["targets"], num_classes=num_classes).float() # one-hot (for loss_fn) 94 | J = loss_fn(z, targets).item() 95 | loss += (J - loss) / (i + 1) 96 | y_trues.extend(batch["targets"].cpu().numpy()) 97 | y_preds.extend(torch.argmax(z, dim=1).cpu().numpy()) 98 | return loss, np.vstack(y_trues), np.vstack(y_preds) 99 | 100 | 101 | def train_loop_per_worker(config: dict) -> None: # pragma: no cover, tested via train workload 102 | """Training loop that each worker will execute. 103 | 104 | Args: 105 | config (dict): arguments to use for training. 106 | """ 107 | # Hyperparameters 108 | dropout_p = config["dropout_p"] 109 | lr = config["lr"] 110 | lr_factor = config["lr_factor"] 111 | lr_patience = config["lr_patience"] 112 | num_epochs = config["num_epochs"] 113 | batch_size = config["batch_size"] 114 | num_classes = config["num_classes"] 115 | 116 | # Get datasets 117 | utils.set_seeds() 118 | train_ds = train.get_dataset_shard("train") 119 | val_ds = train.get_dataset_shard("val") 120 | 121 | # Model 122 | llm = BertModel.from_pretrained("allenai/scibert_scivocab_uncased", return_dict=False) 123 | model = FinetunedLLM(llm=llm, dropout_p=dropout_p, embedding_dim=llm.config.hidden_size, num_classes=num_classes) 124 | model = train.torch.prepare_model(model) 125 | 126 | # Training components 127 | loss_fn = nn.BCEWithLogitsLoss() 128 | optimizer = torch.optim.Adam(model.parameters(), lr=lr) 129 | scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="min", factor=lr_factor, patience=lr_patience) 130 | 131 | # Training 132 | num_workers = train.get_context().get_world_size() 133 | batch_size_per_worker = batch_size // num_workers 134 | for epoch in range(num_epochs): 135 | # Step 136 | train_loss = train_step(train_ds, batch_size_per_worker, model, num_classes, loss_fn, optimizer) 137 | val_loss, _, _ = eval_step(val_ds, batch_size_per_worker, model, num_classes, loss_fn) 138 | scheduler.step(val_loss) 139 | 140 | # Checkpoint 141 | with tempfile.TemporaryDirectory() as dp: 142 | if isinstance(model, DistributedDataParallel): # cpu 143 | model.module.save(dp=dp) 144 | else: 145 | model.save(dp=dp) 146 | metrics = dict(epoch=epoch, lr=optimizer.param_groups[0]["lr"], train_loss=train_loss, val_loss=val_loss) 147 | checkpoint = Checkpoint.from_directory(dp) 148 | train.report(metrics, checkpoint=checkpoint) 149 | 150 | 151 | @app.command() 152 | def train_model( 153 | experiment_name: Annotated[str, typer.Option(help="name of the experiment for this training workload.")] = None, 154 | dataset_loc: Annotated[str, typer.Option(help="location of the dataset.")] = None, 155 | train_loop_config: Annotated[str, typer.Option(help="arguments to use for training.")] = None, 156 | num_workers: Annotated[int, typer.Option(help="number of workers to use for training.")] = 1, 157 | cpu_per_worker: Annotated[int, typer.Option(help="number of CPUs to use per worker.")] = 1, 158 | gpu_per_worker: Annotated[int, typer.Option(help="number of GPUs to use per worker.")] = 0, 159 | num_samples: Annotated[int, typer.Option(help="number of samples to use from dataset.")] = None, 160 | num_epochs: Annotated[int, typer.Option(help="number of epochs to train for.")] = 1, 161 | batch_size: Annotated[int, typer.Option(help="number of samples per batch.")] = 256, 162 | results_fp: Annotated[str, typer.Option(help="filepath to save results to.")] = None, 163 | ) -> ray.air.result.Result: 164 | """Main train function to train our model as a distributed workload. 165 | 166 | Args: 167 | experiment_name (str): name of the experiment for this training workload. 168 | dataset_loc (str): location of the dataset. 169 | train_loop_config (str): arguments to use for training. 170 | num_workers (int, optional): number of workers to use for training. Defaults to 1. 171 | cpu_per_worker (int, optional): number of CPUs to use per worker. Defaults to 1. 172 | gpu_per_worker (int, optional): number of GPUs to use per worker. Defaults to 0. 173 | num_samples (int, optional): number of samples to use from dataset. 174 | If this is passed in, it will override the config. Defaults to None. 175 | num_epochs (int, optional): number of epochs to train for. 176 | If this is passed in, it will override the config. Defaults to None. 177 | batch_size (int, optional): number of samples per batch. 178 | If this is passed in, it will override the config. Defaults to None. 179 | results_fp (str, optional): filepath to save results to. Defaults to None. 180 | 181 | Returns: 182 | ray.air.result.Result: training results. 183 | """ 184 | # Set up 185 | train_loop_config = json.loads(train_loop_config) 186 | train_loop_config["num_samples"] = num_samples 187 | train_loop_config["num_epochs"] = num_epochs 188 | train_loop_config["batch_size"] = batch_size 189 | 190 | # Scaling config 191 | scaling_config = ScalingConfig( 192 | num_workers=num_workers, 193 | use_gpu=bool(gpu_per_worker), 194 | resources_per_worker={"CPU": cpu_per_worker, "GPU": gpu_per_worker}, 195 | ) 196 | 197 | # Checkpoint config 198 | checkpoint_config = CheckpointConfig( 199 | num_to_keep=1, 200 | checkpoint_score_attribute="val_loss", 201 | checkpoint_score_order="min", 202 | ) 203 | 204 | # MLflow callback 205 | mlflow_callback = MLflowLoggerCallback( 206 | tracking_uri=MLFLOW_TRACKING_URI, 207 | experiment_name=experiment_name, 208 | save_artifact=True, 209 | ) 210 | 211 | # Run config 212 | run_config = RunConfig(callbacks=[mlflow_callback], checkpoint_config=checkpoint_config, storage_path=EFS_DIR, local_dir=EFS_DIR) 213 | 214 | # Dataset 215 | ds = data.load_data(dataset_loc=dataset_loc, num_samples=train_loop_config["num_samples"]) 216 | train_ds, val_ds = data.stratify_split(ds, stratify="tag", test_size=0.2) 217 | tags = train_ds.unique(column="tag") 218 | train_loop_config["num_classes"] = len(tags) 219 | 220 | # Dataset config 221 | options = ray.data.ExecutionOptions(preserve_order=True) 222 | dataset_config = DataConfig(datasets_to_split=["train"], execution_options=options) 223 | 224 | # Preprocess 225 | preprocessor = data.CustomPreprocessor() 226 | preprocessor = preprocessor.fit(train_ds) 227 | train_ds = preprocessor.transform(train_ds) 228 | val_ds = preprocessor.transform(val_ds) 229 | train_ds = train_ds.materialize() 230 | val_ds = val_ds.materialize() 231 | 232 | # Trainer 233 | trainer = TorchTrainer( 234 | train_loop_per_worker=train_loop_per_worker, 235 | train_loop_config=train_loop_config, 236 | scaling_config=scaling_config, 237 | run_config=run_config, 238 | datasets={"train": train_ds, "val": val_ds}, 239 | dataset_config=dataset_config, 240 | metadata={"class_to_index": preprocessor.class_to_index}, 241 | ) 242 | 243 | # Train 244 | results = trainer.fit() 245 | d = { 246 | "timestamp": datetime.datetime.now().strftime("%B %d, %Y %I:%M:%S %p"), 247 | "run_id": utils.get_run_id(experiment_name=experiment_name, trial_id=results.metrics["trial_id"]), 248 | "params": results.config["train_loop_config"], 249 | "metrics": utils.dict_to_list(results.metrics_dataframe.to_dict(), keys=["epoch", "train_loss", "val_loss"]), 250 | } 251 | logger.info(json.dumps(d, indent=2)) 252 | if results_fp: # pragma: no cover, saving results 253 | utils.save_dict(d, results_fp) 254 | return results 255 | 256 | 257 | if __name__ == "__main__": # pragma: no cover, application 258 | if ray.is_initialized(): 259 | ray.shutdown() 260 | ray.init(runtime_env={"env_vars": {"GITHUB_USERNAME": os.environ["GITHUB_USERNAME"]}}) 261 | app() 262 | -------------------------------------------------------------------------------- /madewithml/tune.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import json 3 | import os 4 | 5 | import ray 6 | import typer 7 | from ray import tune 8 | from ray.air.config import ( 9 | CheckpointConfig, 10 | DatasetConfig, 11 | RunConfig, 12 | ScalingConfig, 13 | ) 14 | from ray.air.integrations.mlflow import MLflowLoggerCallback 15 | from ray.train.torch import TorchTrainer 16 | from ray.tune import Tuner 17 | from ray.tune.schedulers import AsyncHyperBandScheduler 18 | from ray.tune.search import ConcurrencyLimiter 19 | from ray.tune.search.hyperopt import HyperOptSearch 20 | from typing_extensions import Annotated 21 | 22 | from madewithml import data, train, utils 23 | from madewithml.config import EFS_DIR, MLFLOW_TRACKING_URI, logger 24 | 25 | # Initialize Typer CLI app 26 | app = typer.Typer() 27 | 28 | 29 | @app.command() 30 | def tune_models( 31 | experiment_name: Annotated[str, typer.Option(help="name of the experiment for this training workload.")] = None, 32 | dataset_loc: Annotated[str, typer.Option(help="location of the dataset.")] = None, 33 | initial_params: Annotated[str, typer.Option(help="initial config for the tuning workload.")] = None, 34 | num_workers: Annotated[int, typer.Option(help="number of workers to use for training.")] = 1, 35 | cpu_per_worker: Annotated[int, typer.Option(help="number of CPUs to use per worker.")] = 1, 36 | gpu_per_worker: Annotated[int, typer.Option(help="number of GPUs to use per worker.")] = 0, 37 | num_runs: Annotated[int, typer.Option(help="number of runs in this tuning experiment.")] = 1, 38 | num_samples: Annotated[int, typer.Option(help="number of samples to use from dataset.")] = None, 39 | num_epochs: Annotated[int, typer.Option(help="number of epochs to train for.")] = 1, 40 | batch_size: Annotated[int, typer.Option(help="number of samples per batch.")] = 256, 41 | results_fp: Annotated[str, typer.Option(help="filepath to save results to.")] = None, 42 | ) -> ray.tune.result_grid.ResultGrid: 43 | """Hyperparameter tuning experiment. 44 | 45 | Args: 46 | experiment_name (str): name of the experiment for this training workload. 47 | dataset_loc (str): location of the dataset. 48 | initial_params (str): initial config for the tuning workload. 49 | num_workers (int, optional): number of workers to use for training. Defaults to 1. 50 | cpu_per_worker (int, optional): number of CPUs to use per worker. Defaults to 1. 51 | gpu_per_worker (int, optional): number of GPUs to use per worker. Defaults to 0. 52 | num_runs (int, optional): number of runs in this tuning experiment. Defaults to 1. 53 | num_samples (int, optional): number of samples to use from dataset. 54 | If this is passed in, it will override the config. Defaults to None. 55 | num_epochs (int, optional): number of epochs to train for. 56 | If this is passed in, it will override the config. Defaults to None. 57 | batch_size (int, optional): number of samples per batch. 58 | If this is passed in, it will override the config. Defaults to None. 59 | results_fp (str, optional): filepath to save the tuning results. Defaults to None. 60 | 61 | Returns: 62 | ray.tune.result_grid.ResultGrid: results of the tuning experiment. 63 | """ 64 | # Set up 65 | utils.set_seeds() 66 | train_loop_config = {} 67 | train_loop_config["num_samples"] = num_samples 68 | train_loop_config["num_epochs"] = num_epochs 69 | train_loop_config["batch_size"] = batch_size 70 | 71 | # Scaling config 72 | scaling_config = ScalingConfig( 73 | num_workers=num_workers, 74 | use_gpu=bool(gpu_per_worker), 75 | resources_per_worker={"CPU": cpu_per_worker, "GPU": gpu_per_worker}, 76 | ) 77 | 78 | # Dataset 79 | ds = data.load_data(dataset_loc=dataset_loc, num_samples=train_loop_config.get("num_samples", None)) 80 | train_ds, val_ds = data.stratify_split(ds, stratify="tag", test_size=0.2) 81 | tags = train_ds.unique(column="tag") 82 | train_loop_config["num_classes"] = len(tags) 83 | 84 | # Dataset config 85 | dataset_config = { 86 | "train": DatasetConfig(fit=False, transform=False, randomize_block_order=False), 87 | "val": DatasetConfig(fit=False, transform=False, randomize_block_order=False), 88 | } 89 | 90 | # Preprocess 91 | preprocessor = data.CustomPreprocessor() 92 | preprocessor = preprocessor.fit(train_ds) 93 | train_ds = preprocessor.transform(train_ds) 94 | val_ds = preprocessor.transform(val_ds) 95 | train_ds = train_ds.materialize() 96 | val_ds = val_ds.materialize() 97 | 98 | # Trainer 99 | trainer = TorchTrainer( 100 | train_loop_per_worker=train.train_loop_per_worker, 101 | train_loop_config=train_loop_config, 102 | scaling_config=scaling_config, 103 | datasets={"train": train_ds, "val": val_ds}, 104 | dataset_config=dataset_config, 105 | metadata={"class_to_index": preprocessor.class_to_index}, 106 | ) 107 | 108 | # Checkpoint configuration 109 | checkpoint_config = CheckpointConfig( 110 | num_to_keep=1, 111 | checkpoint_score_attribute="val_loss", 112 | checkpoint_score_order="min", 113 | ) 114 | 115 | # Run configuration 116 | mlflow_callback = MLflowLoggerCallback( 117 | tracking_uri=MLFLOW_TRACKING_URI, 118 | experiment_name=experiment_name, 119 | save_artifact=True, 120 | ) 121 | run_config = RunConfig(callbacks=[mlflow_callback], checkpoint_config=checkpoint_config, storage_path=EFS_DIR, local_dir=EFS_DIR) 122 | 123 | # Hyperparameters to start with 124 | initial_params = json.loads(initial_params) 125 | search_alg = HyperOptSearch(points_to_evaluate=initial_params) 126 | search_alg = ConcurrencyLimiter(search_alg, max_concurrent=2) # trade off b/w optimization and search space 127 | 128 | # Parameter space 129 | param_space = { 130 | "train_loop_config": { 131 | "dropout_p": tune.uniform(0.3, 0.9), 132 | "lr": tune.loguniform(1e-5, 5e-4), 133 | "lr_factor": tune.uniform(0.1, 0.9), 134 | "lr_patience": tune.uniform(1, 10), 135 | } 136 | } 137 | 138 | # Scheduler 139 | scheduler = AsyncHyperBandScheduler( 140 | max_t=train_loop_config["num_epochs"], # max epoch () per trial 141 | grace_period=1, # min epoch () per trial 142 | ) 143 | 144 | # Tune config 145 | tune_config = tune.TuneConfig( 146 | metric="val_loss", 147 | mode="min", 148 | search_alg=search_alg, 149 | scheduler=scheduler, 150 | num_samples=num_runs, 151 | ) 152 | 153 | # Tuner 154 | tuner = Tuner( 155 | trainable=trainer, 156 | run_config=run_config, 157 | param_space=param_space, 158 | tune_config=tune_config, 159 | ) 160 | 161 | # Tune 162 | results = tuner.fit() 163 | best_trial = results.get_best_result(metric="val_loss", mode="min") 164 | d = { 165 | "timestamp": datetime.datetime.now().strftime("%B %d, %Y %I:%M:%S %p"), 166 | "run_id": utils.get_run_id(experiment_name=experiment_name, trial_id=best_trial.metrics["trial_id"]), 167 | "params": best_trial.config["train_loop_config"], 168 | "metrics": utils.dict_to_list(best_trial.metrics_dataframe.to_dict(), keys=["epoch", "train_loss", "val_loss"]), 169 | } 170 | logger.info(json.dumps(d, indent=2)) 171 | if results_fp: # pragma: no cover, saving results 172 | utils.save_dict(d, results_fp) 173 | return results 174 | 175 | 176 | if __name__ == "__main__": # pragma: no cover, application 177 | if ray.is_initialized(): 178 | ray.shutdown() 179 | ray.init(runtime_env={"env_vars": {"GITHUB_USERNAME": os.environ["GITHUB_USERNAME"]}}) 180 | app() 181 | -------------------------------------------------------------------------------- /madewithml/utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import random 4 | from typing import Any, Dict, List 5 | 6 | import numpy as np 7 | import torch 8 | from ray.data import DatasetContext 9 | from ray.train.torch import get_device 10 | 11 | from madewithml.config import mlflow 12 | 13 | DatasetContext.get_current().execution_options.preserve_order = True 14 | 15 | 16 | def set_seeds(seed: int = 42): 17 | """Set seeds for reproducibility.""" 18 | np.random.seed(seed) 19 | random.seed(seed) 20 | torch.manual_seed(seed) 21 | torch.cuda.manual_seed(seed) 22 | eval("setattr(torch.backends.cudnn, 'deterministic', True)") 23 | eval("setattr(torch.backends.cudnn, 'benchmark', False)") 24 | os.environ["PYTHONHASHSEED"] = str(seed) 25 | 26 | 27 | def load_dict(path: str) -> Dict: 28 | """Load a dictionary from a JSON's filepath. 29 | 30 | Args: 31 | path (str): location of file. 32 | 33 | Returns: 34 | Dict: loaded JSON data. 35 | """ 36 | with open(path) as fp: 37 | d = json.load(fp) 38 | return d 39 | 40 | 41 | def save_dict(d: Dict, path: str, cls: Any = None, sortkeys: bool = False) -> None: 42 | """Save a dictionary to a specific location. 43 | 44 | Args: 45 | d (Dict): data to save. 46 | path (str): location of where to save the data. 47 | cls (optional): encoder to use on dict data. Defaults to None. 48 | sortkeys (bool, optional): whether to sort keys alphabetically. Defaults to False. 49 | """ 50 | directory = os.path.dirname(path) 51 | if directory and not os.path.exists(directory): # pragma: no cover 52 | os.makedirs(directory) 53 | with open(path, "w") as fp: 54 | json.dump(d, indent=2, fp=fp, cls=cls, sort_keys=sortkeys) 55 | fp.write("\n") 56 | 57 | 58 | def pad_array(arr: np.ndarray, dtype=np.int32) -> np.ndarray: 59 | """Pad an 2D array with zeros until all rows in the 60 | 2D array are of the same length as a the longest 61 | row in the 2D array. 62 | 63 | Args: 64 | arr (np.array): input array 65 | 66 | Returns: 67 | np.array: zero padded array 68 | """ 69 | max_len = max(len(row) for row in arr) 70 | padded_arr = np.zeros((arr.shape[0], max_len), dtype=dtype) 71 | for i, row in enumerate(arr): 72 | padded_arr[i][: len(row)] = row 73 | return padded_arr 74 | 75 | 76 | def collate_fn(batch: Dict[str, np.ndarray]) -> Dict[str, torch.Tensor]: # pragma: no cover, air internal 77 | """Convert a batch of numpy arrays to tensors (with appropriate padding). 78 | 79 | Args: 80 | batch (Dict[str, np.ndarray]): input batch as a dictionary of numpy arrays. 81 | 82 | Returns: 83 | Dict[str, torch.Tensor]: output batch as a dictionary of tensors. 84 | """ 85 | batch["ids"] = pad_array(batch["ids"]) 86 | batch["masks"] = pad_array(batch["masks"]) 87 | dtypes = {"ids": torch.int32, "masks": torch.int32, "targets": torch.int64} 88 | tensor_batch = {} 89 | for key, array in batch.items(): 90 | tensor_batch[key] = torch.as_tensor(array, dtype=dtypes[key], device=get_device()) 91 | return tensor_batch 92 | 93 | 94 | def get_run_id(experiment_name: str, trial_id: str) -> str: # pragma: no cover, mlflow functionality 95 | """Get the MLflow run ID for a specific Ray trial ID. 96 | 97 | Args: 98 | experiment_name (str): name of the experiment. 99 | trial_id (str): id of the trial. 100 | 101 | Returns: 102 | str: run id of the trial. 103 | """ 104 | trial_name = f"TorchTrainer_{trial_id}" 105 | run = mlflow.search_runs(experiment_names=[experiment_name], filter_string=f"tags.trial_name = '{trial_name}'").iloc[0] 106 | return run.run_id 107 | 108 | 109 | def dict_to_list(data: Dict, keys: List[str]) -> List[Dict[str, Any]]: 110 | """Convert a dictionary to a list of dictionaries. 111 | 112 | Args: 113 | data (Dict): input dictionary. 114 | keys (List[str]): keys to include in the output list of dictionaries. 115 | 116 | Returns: 117 | List[Dict[str, Any]]: output list of dictionaries. 118 | """ 119 | list_of_dicts = [] 120 | for i in range(len(data[keys[0]])): 121 | new_dict = {key: data[key][i] for key in keys} 122 | list_of_dicts.append(new_dict) 123 | return list_of_dicts 124 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: Made With ML 2 | site_url: https://madewithml.com/ 3 | repo_url: https://github.com/GokuMohandas/Made-With-ML/ 4 | nav: 5 | - Home: index.md 6 | - madewithml: 7 | - data: madewithml/data.md 8 | - models: madewithml/models.md 9 | - train: madewithml/train.md 10 | - tune: madewithml/tune.md 11 | - evaluate: madewithml/evaluate.md 12 | - predict: madewithml/predict.md 13 | - serve: madewithml/serve.md 14 | - utils: madewithml/utils.md 15 | theme: readthedocs 16 | plugins: 17 | - mkdocstrings 18 | watch: 19 | - . # reload docs for any file changes 20 | -------------------------------------------------------------------------------- /notebooks/clear_cell_nums.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import nbformat 4 | 5 | 6 | def clear_execution_numbers(nb_path): 7 | with open(nb_path, "r", encoding="utf-8") as f: 8 | nb = nbformat.read(f, as_version=4) 9 | for cell in nb["cells"]: 10 | if cell["cell_type"] == "code": 11 | cell["execution_count"] = None 12 | for output in cell["outputs"]: 13 | if "execution_count" in output: 14 | output["execution_count"] = None 15 | with open(nb_path, "w", encoding="utf-8") as f: 16 | nbformat.write(nb, f) 17 | 18 | 19 | if __name__ == "__main__": 20 | NOTEBOOK_DIR = Path(__file__).parent 21 | notebook_fps = list(NOTEBOOK_DIR.glob("**/*.ipynb")) 22 | for fp in notebook_fps: 23 | clear_execution_numbers(fp) 24 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | # Black formatting 2 | [tool.black] 3 | line-length = 150 4 | include = '\.pyi?$' 5 | exclude = ''' 6 | /( 7 | .eggs # exclude a few common directories in the 8 | | .git # root of the project 9 | | .hg 10 | | .mypy_cache 11 | | .tox 12 | | venv 13 | | _build 14 | | buck-out 15 | | build 16 | | dist 17 | )/ 18 | ''' 19 | 20 | # iSort 21 | [tool.isort] 22 | profile = "black" 23 | line_length = 79 24 | multi_line_output = 3 25 | include_trailing_comma = true 26 | virtual_env = "venv" 27 | 28 | [tool.flake8] 29 | exclude = "venv" 30 | ignore = ["E501", "W503", "E226"] 31 | # E501: Line too long 32 | # W503: Line break occurred before binary operator 33 | # E226: Missing white space around arithmetic operator 34 | 35 | [tool.pyupgrade] 36 | py39plus = true 37 | 38 | # Pytest 39 | [tool.pytest.ini_options] 40 | testpaths = ["tests"] 41 | python_files = "test_*.py" 42 | 43 | # Pytest cov 44 | [tool.coverage.run] 45 | omit=["madewithml/evaluate.py", "madewithml/serve.py"] 46 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # Default 2 | hyperopt==0.2.7 3 | ipywidgets>=8 4 | matplotlib==3.7.1 5 | mlflow==2.3.1 6 | nltk==3.8.1 7 | numpy==1.24.3 8 | numpyencoder==0.3.0 9 | pandas==2.0.1 10 | python-dotenv==1.0.0 11 | ray[air]==2.7.0 12 | scikit-learn==1.2.2 13 | snorkel==0.9.9 14 | SQLAlchemy==1.4.48 15 | torch==2.0.0 16 | transformers==4.28.1 17 | 18 | # Notebook 19 | cleanlab==2.3.1 20 | jupyterlab==3.6.3 21 | lime==0.2.0.1 22 | seaborn==0.12.2 23 | wordcloud==1.9.2 24 | 25 | # Documentation 26 | mkdocs==1.4.2 27 | mkdocstrings==0.21.2 28 | mkdocstrings[python]>=0.18 29 | 30 | # Styling 31 | black==23.3.0 32 | flake8==6.0.0 33 | Flake8-pyproject==1.2.3 34 | isort==5.12.0 35 | pyupgrade==3.3.2 36 | 37 | # Testing 38 | great-expectations==0.16.5 39 | pytest==7.3.1 40 | pytest-cov==4.0.0 41 | 42 | # Development 43 | fastapi==0.95.2 44 | pre-commit==3.2.2 45 | typer==0.9.0 46 | 47 | # Deployment 48 | anyscale==0.5.131 49 | -------------------------------------------------------------------------------- /tests/code/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from madewithml.data import CustomPreprocessor 4 | 5 | 6 | @pytest.fixture 7 | def dataset_loc(): 8 | return "https://raw.githubusercontent.com/GokuMohandas/Made-With-ML/main/datasets/dataset.csv" 9 | 10 | 11 | @pytest.fixture 12 | def preprocessor(): 13 | return CustomPreprocessor() 14 | -------------------------------------------------------------------------------- /tests/code/test_data.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import pytest 3 | import ray 4 | 5 | from madewithml import data 6 | 7 | 8 | @pytest.fixture(scope="module") 9 | def df(): 10 | data = [{"title": "a0", "description": "b0", "tag": "c0"}] 11 | df = pd.DataFrame(data) 12 | return df 13 | 14 | 15 | @pytest.fixture(scope="module") 16 | def class_to_index(): 17 | class_to_index = {"c0": 0, "c1": 1} 18 | return class_to_index 19 | 20 | 21 | def test_load_data(dataset_loc): 22 | num_samples = 10 23 | ds = data.load_data(dataset_loc=dataset_loc, num_samples=num_samples) 24 | assert ds.count() == num_samples 25 | 26 | 27 | def test_stratify_split(): 28 | n_per_class = 10 29 | targets = n_per_class * ["c1"] + n_per_class * ["c2"] 30 | ds = ray.data.from_items([dict(target=t) for t in targets]) 31 | train_ds, test_ds = data.stratify_split(ds, stratify="target", test_size=0.5) 32 | train_target_counts = train_ds.to_pandas().target.value_counts().to_dict() 33 | test_target_counts = test_ds.to_pandas().target.value_counts().to_dict() 34 | assert train_target_counts == test_target_counts 35 | 36 | 37 | @pytest.mark.parametrize( 38 | "text, sw, clean_text", 39 | [ 40 | ("hi", [], "hi"), 41 | ("hi you", ["you"], "hi"), 42 | ("hi yous", ["you"], "hi yous"), 43 | ], 44 | ) 45 | def test_clean_text(text, sw, clean_text): 46 | assert data.clean_text(text=text, stopwords=sw) == clean_text 47 | 48 | 49 | def test_preprocess(df, class_to_index): 50 | assert "text" not in df.columns 51 | outputs = data.preprocess(df, class_to_index=class_to_index) 52 | assert set(outputs) == {"ids", "masks", "targets"} 53 | 54 | 55 | def test_fit_transform(dataset_loc, preprocessor): 56 | ds = data.load_data(dataset_loc=dataset_loc) 57 | preprocessor = preprocessor.fit(ds) 58 | preprocessed_ds = preprocessor.transform(ds) 59 | assert len(preprocessor.class_to_index) == 4 60 | assert ds.count() == preprocessed_ds.count() 61 | -------------------------------------------------------------------------------- /tests/code/test_predict.py: -------------------------------------------------------------------------------- 1 | from madewithml import predict 2 | 3 | 4 | def test_decode(): 5 | decoded = predict.decode(indices=[0, 1, 1], index_to_class={0: "x", 1: "y"}) 6 | assert decoded == ["x", "y", "y"] 7 | 8 | 9 | def test_format_prob(): 10 | d = predict.format_prob(prob=[0.1, 0.9], index_to_class={0: "x", 1: "y"}) 11 | assert d == {"x": 0.1, "y": 0.9} 12 | -------------------------------------------------------------------------------- /tests/code/test_train.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import pytest 4 | import utils 5 | 6 | from madewithml import train 7 | 8 | 9 | @pytest.mark.training 10 | def test_train_model(dataset_loc): 11 | experiment_name = utils.generate_experiment_name(prefix="test_train") 12 | train_loop_config = {"dropout_p": 0.5, "lr": 1e-4, "lr_factor": 0.8, "lr_patience": 3} 13 | result = train.train_model( 14 | experiment_name=experiment_name, 15 | dataset_loc=dataset_loc, 16 | train_loop_config=json.dumps(train_loop_config), 17 | num_workers=6, 18 | cpu_per_worker=1, 19 | gpu_per_worker=0, 20 | num_epochs=2, 21 | num_samples=512, 22 | batch_size=256, 23 | results_fp=None, 24 | ) 25 | utils.delete_experiment(experiment_name=experiment_name) 26 | train_loss_list = result.metrics_dataframe.to_dict()["train_loss"] 27 | assert train_loss_list[0] > train_loss_list[1] # loss decreased 28 | -------------------------------------------------------------------------------- /tests/code/test_tune.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import pytest 4 | import utils 5 | 6 | from madewithml import tune 7 | 8 | 9 | @pytest.mark.training 10 | def test_tune_models(dataset_loc): 11 | num_runs = 2 12 | experiment_name = utils.generate_experiment_name(prefix="test_tune") 13 | initial_params = [ 14 | { 15 | "train_loop_config": { 16 | "dropout_p": 0.5, 17 | "lr": 1e-4, 18 | "lr_factor": 0.8, 19 | "lr_patience": 3, 20 | } 21 | } 22 | ] 23 | results = tune.tune_models( 24 | experiment_name=experiment_name, 25 | dataset_loc=dataset_loc, 26 | initial_params=json.dumps(initial_params), 27 | num_workers=6, 28 | cpu_per_worker=1, 29 | gpu_per_worker=0, 30 | num_runs=num_runs, 31 | num_epochs=1, 32 | num_samples=512, 33 | batch_size=256, 34 | results_fp=None, 35 | ) 36 | utils.delete_experiment(experiment_name=experiment_name) 37 | assert len(results.get_dataframe()) == num_runs 38 | -------------------------------------------------------------------------------- /tests/code/test_utils.py: -------------------------------------------------------------------------------- 1 | import tempfile 2 | from pathlib import Path 3 | 4 | import numpy as np 5 | import pytest 6 | import torch 7 | from ray.train.torch import get_device 8 | 9 | from madewithml import utils 10 | 11 | 12 | def test_set_seed(): 13 | utils.set_seeds() 14 | a = np.random.randn(2, 3) 15 | b = np.random.randn(2, 3) 16 | utils.set_seeds() 17 | x = np.random.randn(2, 3) 18 | y = np.random.randn(2, 3) 19 | assert np.array_equal(a, x) 20 | assert np.array_equal(b, y) 21 | 22 | 23 | def test_save_and_load_dict(): 24 | with tempfile.TemporaryDirectory() as dp: 25 | d = {"hello": "world"} 26 | fp = Path(dp, "d.json") 27 | utils.save_dict(d=d, path=fp) 28 | d = utils.load_dict(path=fp) 29 | assert d["hello"] == "world" 30 | 31 | 32 | def test_pad_array(): 33 | arr = np.array([[1, 2], [1, 2, 3]], dtype="object") 34 | padded_arr = np.array([[1, 2, 0], [1, 2, 3]]) 35 | assert np.array_equal(utils.pad_array(arr), padded_arr) 36 | 37 | 38 | def test_collate_fn(): 39 | batch = { 40 | "ids": np.array([[1, 2], [1, 2, 3]], dtype="object"), 41 | "masks": np.array([[1, 1], [1, 1, 1]], dtype="object"), 42 | "targets": np.array([3, 1]), 43 | } 44 | processed_batch = utils.collate_fn(batch) 45 | expected_batch = { 46 | "ids": torch.as_tensor([[1, 2, 0], [1, 2, 3]], dtype=torch.int32, device=get_device()), 47 | "masks": torch.as_tensor([[1, 1, 0], [1, 1, 1]], dtype=torch.int32, device=get_device()), 48 | "targets": torch.as_tensor([3, 1], dtype=torch.int64, device=get_device()), 49 | } 50 | for k in batch: 51 | assert torch.allclose(processed_batch[k], expected_batch[k]) 52 | 53 | 54 | @pytest.mark.parametrize( 55 | "d, keys, list", 56 | [ 57 | ({"a": [1, 2], "b": [1, 2]}, ["a", "b"], [{"a": 1, "b": 1}, {"a": 2, "b": 2}]), 58 | ({"a": [1, 2], "b": [1, 2]}, ["a"], [{"a": 1}, {"a": 2}]), 59 | ], 60 | ) 61 | def test_dict_to_list(d, keys, list): 62 | assert utils.dict_to_list(d, keys=keys) == list 63 | -------------------------------------------------------------------------------- /tests/code/utils.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | 3 | from madewithml.config import mlflow 4 | 5 | 6 | def generate_experiment_name(prefix: str = "test") -> str: 7 | return f"{prefix}-{uuid.uuid4().hex[:8]}" 8 | 9 | 10 | def delete_experiment(experiment_name: str) -> None: 11 | client = mlflow.tracking.MlflowClient() 12 | experiment_id = client.get_experiment_by_name(experiment_name).experiment_id 13 | client.delete_experiment(experiment_id=experiment_id) 14 | -------------------------------------------------------------------------------- /tests/data/conftest.py: -------------------------------------------------------------------------------- 1 | import great_expectations as ge 2 | import pandas as pd 3 | import pytest 4 | 5 | 6 | def pytest_addoption(parser): 7 | """Add option to specify dataset location when executing tests from CLI. 8 | Ex: pytest --dataset-loc=$DATASET_LOC tests/data --verbose --disable-warnings 9 | """ 10 | parser.addoption("--dataset-loc", action="store", default=None, help="Dataset location.") 11 | 12 | 13 | @pytest.fixture(scope="module") 14 | def df(request): 15 | dataset_loc = request.config.getoption("--dataset-loc") 16 | df = ge.dataset.PandasDataset(pd.read_csv(dataset_loc)) 17 | return df 18 | -------------------------------------------------------------------------------- /tests/data/test_dataset.py: -------------------------------------------------------------------------------- 1 | def test_dataset(df): 2 | """Test dataset quality and integrity.""" 3 | column_list = ["id", "created_on", "title", "description", "tag"] 4 | df.expect_table_columns_to_match_ordered_list(column_list=column_list) # schema adherence 5 | tags = ["computer-vision", "natural-language-processing", "mlops", "other"] 6 | df.expect_column_values_to_be_in_set(column="tag", value_set=tags) # expected labels 7 | df.expect_compound_columns_to_be_unique(column_list=["title", "description"]) # data leaks 8 | df.expect_column_values_to_not_be_null(column="tag") # missing values 9 | df.expect_column_values_to_be_unique(column="id") # unique values 10 | df.expect_column_values_to_be_of_type(column="title", type_="str") # type adherence 11 | 12 | # Expectation suite 13 | expectation_suite = df.get_expectation_suite(discard_failed_expectations=False) 14 | results = df.validate(expectation_suite=expectation_suite, only_return_failures=True).to_json_dict() 15 | assert results["success"] 16 | -------------------------------------------------------------------------------- /tests/model/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from madewithml import predict 4 | from madewithml.predict import TorchPredictor 5 | 6 | 7 | def pytest_addoption(parser): 8 | parser.addoption("--run-id", action="store", default=None, help="Run ID of model to use.") 9 | 10 | 11 | @pytest.fixture(scope="module") 12 | def run_id(request): 13 | return request.config.getoption("--run-id") 14 | 15 | 16 | @pytest.fixture(scope="module") 17 | def predictor(run_id): 18 | best_checkpoint = predict.get_best_checkpoint(run_id=run_id) 19 | predictor = TorchPredictor.from_checkpoint(best_checkpoint) 20 | return predictor 21 | -------------------------------------------------------------------------------- /tests/model/test_behavioral.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import utils 3 | 4 | 5 | @pytest.mark.parametrize( 6 | "input_a, input_b, label", 7 | [ 8 | ( 9 | "Transformers applied to NLP have revolutionized machine learning.", 10 | "Transformers applied to NLP have disrupted machine learning.", 11 | "natural-language-processing", 12 | ), 13 | ], 14 | ) 15 | def test_invariance(input_a, input_b, label, predictor): 16 | """INVariance via verb injection (changes should not affect outputs).""" 17 | label_a = utils.get_label(text=input_a, predictor=predictor) 18 | label_b = utils.get_label(text=input_b, predictor=predictor) 19 | assert label_a == label_b == label 20 | 21 | 22 | @pytest.mark.parametrize( 23 | "input, label", 24 | [ 25 | ( 26 | "ML applied to text classification.", 27 | "natural-language-processing", 28 | ), 29 | ( 30 | "ML applied to image classification.", 31 | "computer-vision", 32 | ), 33 | ( 34 | "CNNs for text classification.", 35 | "natural-language-processing", 36 | ), 37 | ], 38 | ) 39 | def test_directional(input, label, predictor): 40 | """DIRectional expectations (changes with known outputs).""" 41 | prediction = utils.get_label(text=input, predictor=predictor) 42 | assert label == prediction 43 | 44 | 45 | @pytest.mark.parametrize( 46 | "input, label", 47 | [ 48 | ( 49 | "Natural language processing is the next big wave in machine learning.", 50 | "natural-language-processing", 51 | ), 52 | ( 53 | "MLOps is the next big wave in machine learning.", 54 | "mlops", 55 | ), 56 | ( 57 | "This is about graph neural networks.", 58 | "other", 59 | ), 60 | ], 61 | ) 62 | def test_mft(input, label, predictor): 63 | """Minimum Functionality Tests (simple input/output pairs).""" 64 | prediction = utils.get_label(text=input, predictor=predictor) 65 | assert label == prediction 66 | -------------------------------------------------------------------------------- /tests/model/utils.py: -------------------------------------------------------------------------------- 1 | import ray 2 | 3 | from madewithml import predict 4 | 5 | 6 | def get_label(text, predictor): 7 | sample_ds = ray.data.from_items([{"title": text, "description": "", "tag": "other"}]) 8 | results = predict.predict_proba(ds=sample_ds, predictor=predictor) 9 | return results[0]["prediction"] 10 | --------------------------------------------------------------------------------