├── 06_gpu_and_ml
├── comfyui
│ ├── __init__.py
│ ├── .gitignore
│ ├── kjnodes
│ │ ├── mood.jpg
│ │ ├── tram.jpeg
│ │ └── kjnodes_example.py
│ ├── essentials
│ │ ├── vernere.jpg
│ │ ├── simple_mask.png
│ │ └── essentials_example.py
│ ├── ip_adapter
│ │ ├── starry_night.jpg
│ │ └── ip_adapter_example.py
│ ├── memory_snapshot
│ │ └── memory_snapshot_helper
│ │ │ ├── __init__.py
│ │ │ └── prestartup_script.py
│ ├── was_node_suite
│ │ └── was_node_example.py
│ ├── impact
│ │ └── impact_example.py
│ ├── workflow_api.json
│ └── comfyclient.py
├── nsight-profiling
│ ├── .gitignore
│ └── toy.cu
├── openai_whisper
│ ├── finetuning
│ │ ├── .gitignore
│ │ ├── audio
│ │ │ └── common_voice_hi_31822997.mp3
│ │ ├── requirements.txt
│ │ ├── train
│ │ │ ├── logs.py
│ │ │ ├── transcribe.py
│ │ │ └── end_to_end_check.py
│ │ └── readme.md
│ └── pod_transcriber
│ │ ├── app
│ │ ├── __init__.py
│ │ ├── frontend
│ │ │ ├── src
│ │ │ │ ├── vite-env.d.ts
│ │ │ │ ├── components
│ │ │ │ │ ├── Spinner.tsx
│ │ │ │ │ ├── HomeButton.tsx
│ │ │ │ │ └── Footer.tsx
│ │ │ │ ├── main.tsx
│ │ │ │ ├── index.css
│ │ │ │ └── routes
│ │ │ │ │ └── podcast.tsx
│ │ │ ├── postcss.config.cjs
│ │ │ ├── vite.config.ts
│ │ │ ├── tailwind.config.cjs
│ │ │ ├── tsconfig.node.json
│ │ │ ├── .gitignore
│ │ │ ├── tsconfig.json
│ │ │ ├── package.json
│ │ │ └── index.html
│ │ └── config.py
│ │ └── README.md
├── speech-to-text
│ ├── __init__.py
│ ├── streaming-parakeet-frontend
│ │ ├── index.html
│ │ └── audio-processor.js
│ ├── multitalker-frontend
│ │ ├── index.html
│ │ └── audio-processor.js
│ └── streaming-diarization-frontend
│ │ ├── audio-processor.js
│ │ └── index.html
├── blender
│ └── IceModal.blend
├── sam
│ └── cliff_jumping.mp4
├── tensorflow
│ └── tensorboard.png
├── protein-folding
│ ├── gradio_ui.png
│ ├── data
│ │ ├── chai1_quick_inference.json
│ │ ├── chai1_default_inference.json
│ │ ├── chai1_default_input.fasta
│ │ └── boltz_affinity.yaml
│ └── frontend
│ │ ├── index.css
│ │ ├── favicon.svg
│ │ └── background.svg
├── controlnet
│ └── demo_images
│ │ ├── dog.png
│ │ ├── house.png
│ │ ├── modal-logo-bright.png
│ │ └── modal-logo-edges.png
├── hyperparameter-sweep
│ ├── gradio.png
│ ├── shakespeare.jpg
│ ├── tensorboard.png
│ ├── assets
│ │ ├── index.css
│ │ ├── favicon.svg
│ │ └── background.svg
│ └── src
│ │ ├── tokenizer.py
│ │ ├── dataset.py
│ │ └── logs_manager.py
├── dreambooth
│ ├── gradio-image-generate.png
│ ├── instance_example_urls.txt
│ └── assets
│ │ ├── index.css
│ │ ├── favicon.svg
│ │ └── background.svg
├── stable_diffusion
│ └── demo_images
│ │ └── dog.png
├── reinforcement-learning
│ └── config_grpo.yaml
├── import_torch.py
├── llm-serving
│ └── openai_compatible
│ │ ├── locustfile.py
│ │ └── load_test.py
├── embeddings
│ ├── qdrant.py
│ └── wikipedia
│ │ └── download.py
├── gpu_fallbacks.py
└── gpu_snapshot.py
├── 13_sandboxes
├── codelangchain
│ ├── __init__.py
│ ├── src
│ │ ├── __init__.py
│ │ ├── common.py
│ │ ├── retrieval.py
│ │ └── edges.py
│ ├── README.md
│ └── langserve.py
└── sandbox_agent.py
├── 10_integrations
├── dbt
│ ├── .gitignore
│ ├── sample_proj_duckdb_s3
│ │ ├── seeds
│ │ │ ├── .gitkeep
│ │ │ ├── raw_customers.csv
│ │ │ ├── raw_payments.csv
│ │ │ └── raw_orders.csv
│ │ ├── tests
│ │ │ └── .gitkeep
│ │ ├── snapshots
│ │ │ └── .gitkeep
│ │ ├── .gitignore
│ │ ├── models
│ │ │ ├── sources.yml
│ │ │ ├── staging
│ │ │ │ ├── stg_customers.sql
│ │ │ │ ├── stg_orders.sql
│ │ │ │ ├── stg_payments.sql
│ │ │ │ └── schema.yml
│ │ │ ├── orders.sql
│ │ │ └── customers.sql
│ │ ├── profiles.yml
│ │ └── dbt_project.yml
│ └── dbt_docs.png
├── pushgateway_diagram.png
├── pushgateway_grafana.png
├── streamlit
│ ├── streamlit.png
│ ├── app.py
│ └── serve_streamlit.py
├── nyc_yellow_taxi_trips_s3_mount.png
├── ikea-instructions-for-building-a-gpu-rig-for-deep-learning.png
├── tailscale
│ ├── entrypoint.sh
│ └── modal_tailscale.py
└── webscraper_old.py
├── 07_web_endpoints
├── webrtc
│ ├── yolo
│ │ ├── __init__.py
│ │ └── yolo_classes.txt
│ └── frontend
│ │ └── index.html
├── badges_deploy.png
├── fasthtml-checkboxes
│ ├── ui.png
│ ├── constants.py
│ ├── styles.css
│ ├── cbx_locustfile.py
│ └── cbx_load_test.py
├── flask_app.py
├── fasthtml_app.py
├── fastapi_app.py
├── flask_streaming.py
├── streaming.py
└── badges.py
├── 03_scaling_out
├── stock_prices.png
├── basic_grid_search.py
└── dynamic_batching.py
├── 02_building_containers
├── screenshot.png
├── urls.txt
├── install_flash_attn.py
├── import_sklearn.py
└── install_cuda.py
├── misc
├── batch_inference
│ ├── batch_inference_roc.png
│ ├── batch_inference_progress.png
│ └── batch_inference_huggingface.png
├── README.md
├── say_hello_cron.py
├── google_search_generator.py
├── hello_shebang.py
├── queue_simple.py
├── lmdeploy_oai_compatible.py
└── tgi_oai_compatible.py
├── .gitignore
├── internal
├── requirements.txt
├── conftest.py
├── test-event.json
├── examples_test.py
└── run_example.py
├── .pre-commit-config.yaml
├── 01_getting_started
├── get_started.py
├── generators.py
├── inference.py
├── inference_endpoint.py
├── inference_map.py
├── inference_perf.py
└── inference_full.py
├── 08_advanced
├── generators_async.py
├── hello_world_async.py
├── parallel_execution.py
└── poll_delayed_result.py
├── .github
├── workflows
│ ├── typecheck.yml
│ ├── cd.yml
│ ├── build-and-run-example.yml
│ ├── check.yml
│ ├── stale.yml
│ └── run-examples.yml
├── actions
│ └── setup
│ │ └── action.yml
└── pull_request_template.md
├── pyproject.toml
├── LICENSE
├── README.md
├── 05_scheduling
└── schedule_simple.py
└── 09_job_queues
└── doc_ocr_frontend
└── index.html
/06_gpu_and_ml/comfyui/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/13_sandboxes/codelangchain/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/13_sandboxes/codelangchain/src/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/comfyui/.gitignore:
--------------------------------------------------------------------------------
1 | comfyui_gen_image.png
2 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/nsight-profiling/.gitignore:
--------------------------------------------------------------------------------
1 | *.nsys-rep
2 |
--------------------------------------------------------------------------------
/10_integrations/dbt/.gitignore:
--------------------------------------------------------------------------------
1 | logs/
2 | sample_proj/logs
--------------------------------------------------------------------------------
/10_integrations/dbt/sample_proj_duckdb_s3/seeds/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/10_integrations/dbt/sample_proj_duckdb_s3/tests/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/openai_whisper/finetuning/.gitignore:
--------------------------------------------------------------------------------
1 | models/
2 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/openai_whisper/pod_transcriber/app/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/10_integrations/dbt/sample_proj_duckdb_s3/snapshots/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/07_web_endpoints/webrtc/yolo/__init__.py:
--------------------------------------------------------------------------------
1 | from .yolo import YOLOv10 as YOLOv10
2 |
--------------------------------------------------------------------------------
/10_integrations/dbt/sample_proj_duckdb_s3/.gitignore:
--------------------------------------------------------------------------------
1 |
2 | target/
3 | dbt_packages/
4 | logs/
5 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/speech-to-text/__init__.py:
--------------------------------------------------------------------------------
1 | # ---
2 | # lambda-test: false
3 | # pytest: false
4 | # ---
5 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/openai_whisper/pod_transcriber/app/frontend/src/vite-env.d.ts:
--------------------------------------------------------------------------------
1 | ///
2 |
--------------------------------------------------------------------------------
/03_scaling_out/stock_prices.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/03_scaling_out/stock_prices.png
--------------------------------------------------------------------------------
/10_integrations/dbt/dbt_docs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/10_integrations/dbt/dbt_docs.png
--------------------------------------------------------------------------------
/06_gpu_and_ml/blender/IceModal.blend:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/06_gpu_and_ml/blender/IceModal.blend
--------------------------------------------------------------------------------
/06_gpu_and_ml/sam/cliff_jumping.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/06_gpu_and_ml/sam/cliff_jumping.mp4
--------------------------------------------------------------------------------
/07_web_endpoints/badges_deploy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/07_web_endpoints/badges_deploy.png
--------------------------------------------------------------------------------
/02_building_containers/screenshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/02_building_containers/screenshot.png
--------------------------------------------------------------------------------
/06_gpu_and_ml/comfyui/kjnodes/mood.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/06_gpu_and_ml/comfyui/kjnodes/mood.jpg
--------------------------------------------------------------------------------
/06_gpu_and_ml/comfyui/kjnodes/tram.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/06_gpu_and_ml/comfyui/kjnodes/tram.jpeg
--------------------------------------------------------------------------------
/06_gpu_and_ml/tensorflow/tensorboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/06_gpu_and_ml/tensorflow/tensorboard.png
--------------------------------------------------------------------------------
/10_integrations/pushgateway_diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/10_integrations/pushgateway_diagram.png
--------------------------------------------------------------------------------
/10_integrations/pushgateway_grafana.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/10_integrations/pushgateway_grafana.png
--------------------------------------------------------------------------------
/10_integrations/streamlit/streamlit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/10_integrations/streamlit/streamlit.png
--------------------------------------------------------------------------------
/06_gpu_and_ml/protein-folding/gradio_ui.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/06_gpu_and_ml/protein-folding/gradio_ui.png
--------------------------------------------------------------------------------
/07_web_endpoints/fasthtml-checkboxes/ui.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/07_web_endpoints/fasthtml-checkboxes/ui.png
--------------------------------------------------------------------------------
/06_gpu_and_ml/comfyui/essentials/vernere.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/06_gpu_and_ml/comfyui/essentials/vernere.jpg
--------------------------------------------------------------------------------
/06_gpu_and_ml/controlnet/demo_images/dog.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/06_gpu_and_ml/controlnet/demo_images/dog.png
--------------------------------------------------------------------------------
/06_gpu_and_ml/controlnet/demo_images/house.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/06_gpu_and_ml/controlnet/demo_images/house.png
--------------------------------------------------------------------------------
/06_gpu_and_ml/hyperparameter-sweep/gradio.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/06_gpu_and_ml/hyperparameter-sweep/gradio.png
--------------------------------------------------------------------------------
/misc/batch_inference/batch_inference_roc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/misc/batch_inference/batch_inference_roc.png
--------------------------------------------------------------------------------
/06_gpu_and_ml/comfyui/essentials/simple_mask.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/06_gpu_and_ml/comfyui/essentials/simple_mask.png
--------------------------------------------------------------------------------
/06_gpu_and_ml/comfyui/ip_adapter/starry_night.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/06_gpu_and_ml/comfyui/ip_adapter/starry_night.jpg
--------------------------------------------------------------------------------
/06_gpu_and_ml/dreambooth/gradio-image-generate.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/06_gpu_and_ml/dreambooth/gradio-image-generate.png
--------------------------------------------------------------------------------
/06_gpu_and_ml/hyperparameter-sweep/shakespeare.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/06_gpu_and_ml/hyperparameter-sweep/shakespeare.jpg
--------------------------------------------------------------------------------
/06_gpu_and_ml/hyperparameter-sweep/tensorboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/06_gpu_and_ml/hyperparameter-sweep/tensorboard.png
--------------------------------------------------------------------------------
/06_gpu_and_ml/stable_diffusion/demo_images/dog.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/06_gpu_and_ml/stable_diffusion/demo_images/dog.png
--------------------------------------------------------------------------------
/10_integrations/nyc_yellow_taxi_trips_s3_mount.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/10_integrations/nyc_yellow_taxi_trips_s3_mount.png
--------------------------------------------------------------------------------
/misc/batch_inference/batch_inference_progress.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/misc/batch_inference/batch_inference_progress.png
--------------------------------------------------------------------------------
/misc/batch_inference/batch_inference_huggingface.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/misc/batch_inference/batch_inference_huggingface.png
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | **/__pycache__
2 | .DS_Store
3 | .vscode
4 |
5 | venv
6 | .venv
7 |
8 | # secrets file for act, tool for local GitHub Actions testing
9 | .secrets
10 |
--------------------------------------------------------------------------------
/internal/requirements.txt:
--------------------------------------------------------------------------------
1 | pytest
2 | jupyter
3 | ipython
4 | nbconvert
5 | jupytext~=1.16.1
6 | pydantic~=1.10.14
7 | mypy==1.2.0
8 | ruff==0.9.6
9 | fastapi
10 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/controlnet/demo_images/modal-logo-bright.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/06_gpu_and_ml/controlnet/demo_images/modal-logo-bright.png
--------------------------------------------------------------------------------
/06_gpu_and_ml/controlnet/demo_images/modal-logo-edges.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/06_gpu_and_ml/controlnet/demo_images/modal-logo-edges.png
--------------------------------------------------------------------------------
/07_web_endpoints/fasthtml-checkboxes/constants.py:
--------------------------------------------------------------------------------
1 | # ---
2 | # lambda-test: false # auxiliary-file
3 | # ---
4 | N_CHECKBOXES = 100_000 # feel free to increase, if you dare!
5 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/openai_whisper/pod_transcriber/app/frontend/postcss.config.cjs:
--------------------------------------------------------------------------------
1 | module.exports = {
2 | plugins: {
3 | tailwindcss: {},
4 | autoprefixer: {},
5 | },
6 | };
7 |
--------------------------------------------------------------------------------
/internal/conftest.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 |
4 | @pytest.fixture(autouse=True)
5 | def disable_auto_mount(monkeypatch):
6 | monkeypatch.setenv("MODAL_AUTOMOUNT", "0")
7 | yield
8 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/protein-folding/data/chai1_quick_inference.json:
--------------------------------------------------------------------------------
1 | {
2 | "num_trunk_recycles": 1,
3 | "num_diffn_timesteps": 10,
4 | "seed": 42,
5 | "use_esm_embeddings": true
6 | }
7 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/openai_whisper/finetuning/audio/common_voice_hi_31822997.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/06_gpu_and_ml/openai_whisper/finetuning/audio/common_voice_hi_31822997.mp3
--------------------------------------------------------------------------------
/10_integrations/ikea-instructions-for-building-a-gpu-rig-for-deep-learning.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/10_integrations/ikea-instructions-for-building-a-gpu-rig-for-deep-learning.png
--------------------------------------------------------------------------------
/06_gpu_and_ml/openai_whisper/finetuning/requirements.txt:
--------------------------------------------------------------------------------
1 | datasets~=3.2.0
2 | evaluate~=0.4.3
3 | jiwer~=3.0.5
4 | librosa~=0.10.0
5 | torch~=2.5.1
6 | torchaudio~=2.5.1
7 | transformers~=4.48.0
8 | accelerate~=1.2.1
9 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/protein-folding/data/chai1_default_inference.json:
--------------------------------------------------------------------------------
1 | {
2 | "num_trunk_recycles": 3,
3 | "num_diffn_timesteps": 200,
4 | "seed": 42,
5 | "use_esm_embeddings": true,
6 | "use_msa_server": true
7 | }
8 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/openai_whisper/pod_transcriber/app/frontend/vite.config.ts:
--------------------------------------------------------------------------------
1 | import { defineConfig } from "vite";
2 | import react from "@vitejs/plugin-react";
3 |
4 | // https://vitejs.dev/config/
5 | export default defineConfig({
6 | plugins: [react()],
7 | });
8 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/openai_whisper/pod_transcriber/app/frontend/tailwind.config.cjs:
--------------------------------------------------------------------------------
1 | /** @type {import('tailwindcss').Config} */
2 | module.exports = {
3 | content: ["./index.html", "./src/**/*.{js,ts,jsx,tsx}"],
4 | theme: {
5 | extend: {},
6 | },
7 | plugins: [],
8 | };
9 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/openai_whisper/pod_transcriber/app/frontend/src/components/Spinner.tsx:
--------------------------------------------------------------------------------
1 | import PulseLoader from "react-spinners/PulseLoader";
2 |
3 | export default function Spinner({ size }: { size: number }) {
4 | return ;
5 | }
6 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: https://github.com/astral-sh/ruff-pre-commit
3 | # keep version here in sync with CI/CD and other modal repos
4 | rev: "v0.9.6"
5 | hooks:
6 | - id: ruff
7 | args: [--fix, --exit-non-zero-on-fix]
8 | - id: ruff-format
9 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/dreambooth/instance_example_urls.txt:
--------------------------------------------------------------------------------
1 | https://modal-public-assets.s3.amazonaws.com/example-dreambooth-app/fkRYgv6.png
2 | https://modal-public-assets.s3.amazonaws.com/example-dreambooth-app/98k9yDg.jpg
3 | https://modal-public-assets.s3.amazonaws.com/example-dreambooth-app/gHlW8Kw.jpg
4 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/openai_whisper/pod_transcriber/app/frontend/src/main.tsx:
--------------------------------------------------------------------------------
1 | import React from "react";
2 | import ReactDOM from "react-dom/client";
3 | import App from "./app";
4 | import "./index.css";
5 |
6 | ReactDOM.createRoot(document.getElementById("root") as HTMLElement).render(
7 |
8 | );
9 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/openai_whisper/pod_transcriber/app/frontend/tsconfig.node.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | "composite": true,
4 | "module": "ESNext",
5 | "moduleResolution": "Node",
6 | "allowSyntheticDefaultImports": true
7 | },
8 | "include": ["vite.config.ts"]
9 | }
10 |
--------------------------------------------------------------------------------
/10_integrations/dbt/sample_proj_duckdb_s3/models/sources.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 |
3 | sources:
4 | - name: external_source
5 | meta:
6 | external_location: "s3://modal-example-dbt-duckdb-s3/sources/{name}.parquet"
7 | tables:
8 | - name: raw_customers
9 | - name: raw_orders
10 | - name: raw_payments
11 |
--------------------------------------------------------------------------------
/01_getting_started/get_started.py:
--------------------------------------------------------------------------------
1 | import modal
2 |
3 | app = modal.App("example-get-started")
4 |
5 |
6 | @app.function()
7 | def square(x):
8 | print("This code is running on a remote worker!")
9 | return x**2
10 |
11 |
12 | @app.local_entrypoint()
13 | def main():
14 | print("the square is", square.remote(42))
15 |
--------------------------------------------------------------------------------
/02_building_containers/urls.txt:
--------------------------------------------------------------------------------
1 | adobe.com
2 | alibaba.com
3 | aliexpress.com
4 | amazon.com
5 | apple.com
6 | baidu.com
7 | bbc.co.uk
8 | bing.com
9 | blogspot.com
10 | booking.com
11 | craigslist.org
12 | dailymail.co.uk
13 | dropbox.com
14 | ebay.com
15 | facebook.com
16 | github.com
17 | google.com
18 | imdb.com
19 | imgur.com
20 | instagram.com
21 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/openai_whisper/pod_transcriber/app/frontend/.gitignore:
--------------------------------------------------------------------------------
1 | # Logs
2 | logs
3 | *.log
4 | npm-debug.log*
5 | yarn-debug.log*
6 | yarn-error.log*
7 | pnpm-debug.log*
8 | lerna-debug.log*
9 |
10 | node_modules
11 | dist
12 | dist-ssr
13 | *.local
14 |
15 | # Editor directories and files
16 | .vscode/*
17 | !.vscode/extensions.json
18 | .idea
19 | .DS_Store
20 | *.suo
21 | *.ntvs*
22 | *.njsproj
23 | *.sln
24 | *.sw?
25 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/openai_whisper/pod_transcriber/app/frontend/src/components/HomeButton.tsx:
--------------------------------------------------------------------------------
1 | import { Link } from "react-router-dom";
2 |
3 | export default function HomeButton() {
4 | return (
5 |
6 |
9 |
10 | );
11 | }
12 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/dreambooth/assets/index.css:
--------------------------------------------------------------------------------
1 | /* Bit of Modal Labs color scheming for the Gradio.app UI
2 |
3 | from https://github.com/modal-labs/modal-examples */
4 |
5 | a {
6 | text-decoration: inherit !important;
7 | }
8 |
9 | gradio-app {
10 | background-image: url(/assets/background.svg) !important;
11 | background-repeat: no-repeat !important;
12 | background-size 100% auto;
13 | padding-top: 3%;
14 | background-color: black;
15 | }
16 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/protein-folding/frontend/index.css:
--------------------------------------------------------------------------------
1 | /* Bit of Modal Labs color scheming for the Gradio.app UI
2 |
3 | from https://github.com/modal-labs/modal-examples */
4 |
5 | a {
6 | text-decoration: inherit !important;
7 | }
8 |
9 | gradio-app {
10 | background-image: url(/assets/background.svg) !important;
11 | background-repeat: no-repeat !important;
12 | background-size 100% auto;
13 | padding-top: 3%;
14 | background-color: black;
15 | }
16 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/hyperparameter-sweep/assets/index.css:
--------------------------------------------------------------------------------
1 | /* Bit of Modal Labs color scheming for the Gradio.app UI
2 |
3 | from https://github.com/modal-labs/modal-examples */
4 |
5 | a {
6 | text-decoration: inherit !important;
7 | }
8 |
9 | gradio-app {
10 | background-image: url(/assets/background.svg) !important;
11 | background-repeat: no-repeat !important;
12 | background-size 100% auto;
13 | padding-top: 3%;
14 | background-color: black;
15 | }
16 |
--------------------------------------------------------------------------------
/misc/README.md:
--------------------------------------------------------------------------------
1 | # Miscellaneous Examples
2 |
3 | This directory contains a variety of examples of ways to use Modal.
4 |
5 | Unlike the examples in the rest of this repository, these examples are not
6 | continually monitored for correctness, so it is possible that they may become
7 | out of date or incorrect over time.
8 |
9 | If you find an error in one of these examples, please report it in the issues
10 | tab or, even better, submit a pull request to fix it.
11 |
--------------------------------------------------------------------------------
/internal/test-event.json:
--------------------------------------------------------------------------------
1 | {
2 | "event_name": "pull_request",
3 | "pull_request": {
4 | "base": {
5 | "ref": "main",
6 | "sha": "3e3cba16881e73a80887c2f09477e86f0522b072"
7 | },
8 | "head": {
9 | "ref": "charlesfrye/run-examples-again",
10 | "sha": "b639aa6e806d2db555cbf4cfc29f2b93c4d50fcb"
11 | }
12 | },
13 | "repository": {
14 | "full_name": "modal-labs/modal-examples"
15 | },
16 | "ref": "refs/pull/1/merge"
17 | }
18 |
19 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/comfyui/memory_snapshot/memory_snapshot_helper/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | from aiohttp import web
4 | from server import PromptServer
5 |
6 | # ------- API Endpoints -------
7 |
8 |
9 | @PromptServer.instance.routes.post("/cuda/set_device")
10 | async def set_current_device(request):
11 | os.environ["CUDA_VISIBLE_DEVICES"] = "0"
12 | return web.json_response({"status": "success"})
13 |
14 |
15 | # Empty for ComfyUI node registration
16 | NODE_CLASS_MAPPINGS = {}
17 |
--------------------------------------------------------------------------------
/10_integrations/dbt/sample_proj_duckdb_s3/models/staging/stg_customers.sql:
--------------------------------------------------------------------------------
1 | with source as (
2 |
3 | {#-
4 | Here we load from the external S3 bucket data, which was seeded
5 | by running the `seed` Modal function.
6 | #}
7 | select * from {{ source('external_source', 'raw_customers') }}
8 |
9 | ),
10 |
11 | renamed as (
12 |
13 | select
14 | id as customer_id,
15 | first_name,
16 | last_name
17 |
18 | from source
19 |
20 | )
21 |
22 | select * from renamed
23 |
--------------------------------------------------------------------------------
/08_advanced/generators_async.py:
--------------------------------------------------------------------------------
1 | # # Run async generator function on Modal
2 |
3 | # This example shows how you can run an async generator function on Modal.
4 | # Modal natively supports async/await syntax using asyncio.
5 |
6 | import modal
7 |
8 | app = modal.App("example-generators-async")
9 |
10 |
11 | @app.function()
12 | def f(i):
13 | for j in range(i):
14 | yield j
15 |
16 |
17 | @app.local_entrypoint()
18 | async def run_async():
19 | async for r in f.remote_gen.aio(10):
20 | print(r)
21 |
--------------------------------------------------------------------------------
/misc/say_hello_cron.py:
--------------------------------------------------------------------------------
1 | # # Deploy a cron job with Modal
2 |
3 | # This example shows how you can deploy a cron job with Modal.
4 |
5 | import time
6 | from datetime import datetime, timezone
7 |
8 | import modal
9 |
10 | app = modal.App("example-say-hello-cron")
11 |
12 |
13 | @app.function(schedule=modal.Period(seconds=10))
14 | def say_hello():
15 | start_time = datetime.now(timezone.utc)
16 | for i in range(10):
17 | print(f"Message #{i} from invocation at {start_time}")
18 | time.sleep(1.5)
19 |
--------------------------------------------------------------------------------
/10_integrations/dbt/sample_proj_duckdb_s3/models/staging/stg_orders.sql:
--------------------------------------------------------------------------------
1 | with source as (
2 |
3 | {#-
4 | Here we load from the external S3 bucket data, which was seeded
5 | by running the `seed` Modal function.
6 | #}
7 | select * from {{ source('external_source', 'raw_orders') }}
8 |
9 | ),
10 |
11 | renamed as (
12 |
13 | select
14 | id as order_id,
15 | user_id as customer_id,
16 | order_date,
17 | status
18 |
19 | from source
20 |
21 | )
22 |
23 | select * from renamed
24 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/hyperparameter-sweep/src/tokenizer.py:
--------------------------------------------------------------------------------
1 | # ---
2 | # pytest: false
3 | # ---
4 |
5 |
6 | class Tokenizer:
7 | def __init__(self, text):
8 | self.unique_chars = sorted(set(text)) # sorted to ensure consistent
9 | self.stoi = {c: i for i, c in enumerate(self.unique_chars)}
10 | self.itos = {i: c for i, c in enumerate(self.unique_chars)}
11 | self.vocab_size = len(self.unique_chars)
12 |
13 | def encode(self, text):
14 | return [self.stoi[c] for c in text]
15 |
16 | def decode(self, tokens):
17 | return [self.itos[int(t)] for t in tokens]
18 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/protein-folding/data/chai1_default_input.fasta:
--------------------------------------------------------------------------------
1 | >protein|name=example-of-long-protein
2 | AGSHSMRYFSTSVSRPGRGEPRFIAVGYVDDTQFVRFDSDAASPRGEPRAPWVEQEGPEYWDRETQKYKRQAQTDRVSLRNLRGYYNQSEAGSHTLQWMFGCDLGPDGRLLRGYDQSAYDGKDYIALNEDLRSWTAADTAAQITQRKWEAAREAEQRRAYLEGTCVEWLRRYLENGKETLQRAEHPKTHVTHHPVSDHEATLRCWALGFYPAEITLTWQWDGEDQTQDTELVETRPAGDGTFQKWAAVVVPSGEEQRYTCHVQHEGLPEPLTLRWEP
3 | >protein|name=example-of-short-protein
4 | AIQRTPKIQVYSRHPAENGKSNFLNCYVSGFHPSDIEVDLLKNGERIEKVEHSDLSFSKDWSFYLLYYTEFTPTEKDEYACRVNHVTLSQPKIVKWDRDM
5 | >protein|name=example-peptide
6 | GAAL
7 | >ligand|name=example-ligand-as-smiles
8 | CCCCCCCCCCCCCC(=O)O
9 |
--------------------------------------------------------------------------------
/.github/workflows/typecheck.yml:
--------------------------------------------------------------------------------
1 | name: Typecheck
2 | on:
3 | push:
4 | branches:
5 | - main
6 | pull_request:
7 | workflow_dispatch:
8 |
9 | jobs:
10 | mypy:
11 | name: MyPy
12 | runs-on: ubuntu-24.04
13 |
14 | steps:
15 | - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3
16 |
17 | - uses: actions/setup-python@65d7f2d534ac1bc67fcd62888c5f4f3d2cb2b236 # v4
18 | with:
19 | python-version: "3.11"
20 |
21 | - name: Install mypy
22 | run: pip install mypy==0.950
23 |
24 | - name: Run
25 | run: python3 internal/typecheck.py
26 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/reinforcement-learning/config_grpo.yaml:
--------------------------------------------------------------------------------
1 | compute_environment: LOCAL_MACHINE
2 | debug: false
3 | deepspeed_config:
4 | deepspeed_multinode_launcher: standard
5 | offload_optimizer_device: none
6 | offload_param_device: none
7 | zero3_init_flag: true
8 | zero3_save_16bit_model: true
9 | zero_stage: 3
10 | distributed_type: DEEPSPEED
11 | downcast_bf16: 'no'
12 | machine_rank: 0
13 | main_training_function: main
14 | mixed_precision: bf16
15 | num_machines: 1
16 | num_processes: 3
17 | rdzv_backend: static
18 | same_network: true
19 | tpu_env: []
20 | tpu_use_cluster: false
21 | tpu_use_sudo: false
22 | use_cpu: false
--------------------------------------------------------------------------------
/10_integrations/dbt/sample_proj_duckdb_s3/models/staging/stg_payments.sql:
--------------------------------------------------------------------------------
1 | with source as (
2 |
3 | {#-
4 | Here we load from the external S3 bucket data, which was seeded
5 | by running the `seed` Modal function.
6 | #}
7 | select * from {{ source('external_source', 'raw_payments') }}
8 |
9 | ),
10 |
11 | renamed as (
12 |
13 | select
14 | id as payment_id,
15 | order_id,
16 | payment_method,
17 |
18 | -- `amount` is currently stored in cents, so we convert it to dollars
19 | amount / 100 as amount
20 |
21 | from source
22 |
23 | )
24 |
25 | select * from renamed
26 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/protein-folding/data/boltz_affinity.yaml:
--------------------------------------------------------------------------------
1 | version: 1 # Optional, defaults to 1
2 | sequences:
3 | - protein:
4 | id: A
5 | sequence: MVTPEGNVSLVDESLLVGVTDEDRAVRSAHQFYERLIGLWAPAVMEAAHELGVFAALAEAPADSGELARRLDCDARAMRVLLDALYAYDVIDRIHDTNGFRYLLSAEARECLLPGTLFSLVGKFMHDINVAWPAWRNLAEVVRHGARDTSGAESPNGIAQEDYESLVGGINFWAPPIVTTLSRKLRASGRSGDATASVLDVGCGTGLYSQLLLREFPRWTATGLDVERIATLANAQALRLGVEERFATRAGDFWRGGWGTGYDLVLFANIFHLQTPASAVRLMRHAAACLAPDGLVAVVDQIVDADREPKTPQDRFALLFAASMTNTGGGDAYTFQEYEEWFTAAGLQRIETLDTPMHRILLARRATEPSAVPEGQASENLYFQ
6 | - ligand:
7 | id: B
8 | smiles: 'N[C@@H](Cc1ccc(O)cc1)C(=O)O'
9 | properties:
10 | - affinity:
11 | binder: B
--------------------------------------------------------------------------------
/10_integrations/dbt/sample_proj_duckdb_s3/profiles.yml:
--------------------------------------------------------------------------------
1 | {
2 | "sample_proj":
3 | {
4 | "target": "modal",
5 | "outputs":
6 | {
7 | "modal":
8 | {
9 | "type": "duckdb",
10 | "path": "/tmp/dbt.duckdb",
11 | "extensions": ["httpfs", "parquet"],
12 | "settings":
13 | {
14 | "s3_region": "us-east-1",
15 | "s3_access_key_id": "{{ env_var('AWS_ACCESS_KEY_ID') }}",
16 | "s3_secret_access_key": "{{ env_var('AWS_SECRET_ACCESS_KEY') }}",
17 | },
18 | },
19 | },
20 | },
21 | }
22 |
--------------------------------------------------------------------------------
/01_getting_started/generators.py:
--------------------------------------------------------------------------------
1 | # # Run a generator function on Modal
2 |
3 | # This example shows how you can run a generator function on Modal. We define a
4 | # function that `yields` values and then call it with the [`remote_gen`](https://modal.com/docs/reference/modal.Function#remote_gen) method. The
5 | # `remote_gen` method returns a generator object that can be used to iterate over
6 | # the values produced by the function.
7 |
8 | import modal
9 |
10 | app = modal.App("example-generators")
11 |
12 |
13 | @app.function()
14 | def f(i):
15 | for j in range(i):
16 | yield j
17 |
18 |
19 | @app.local_entrypoint()
20 | def main():
21 | for r in f.remote_gen(10):
22 | print(r)
23 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/openai_whisper/pod_transcriber/app/frontend/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | "target": "ESNext",
4 | "useDefineForClassFields": true,
5 | "lib": ["DOM", "DOM.Iterable", "ESNext"],
6 | "allowJs": false,
7 | "skipLibCheck": true,
8 | "esModuleInterop": false,
9 | "allowSyntheticDefaultImports": true,
10 | "strict": true,
11 | "forceConsistentCasingInFileNames": true,
12 | "module": "ESNext",
13 | "moduleResolution": "Node",
14 | "resolveJsonModule": true,
15 | "isolatedModules": true,
16 | "noEmit": true,
17 | "jsx": "react-jsx"
18 | },
19 | "include": ["src"],
20 | "references": [{ "path": "./tsconfig.node.json" }]
21 | }
22 |
--------------------------------------------------------------------------------
/.github/workflows/cd.yml:
--------------------------------------------------------------------------------
1 | name: Deploy
2 | on:
3 | push:
4 | branches:
5 | - main
6 | workflow_dispatch:
7 |
8 | jobs:
9 | deploy:
10 | name: Deploy example apps
11 | if: github.ref == 'refs/heads/main'
12 | runs-on: ubuntu-24.04
13 | env:
14 | MODAL_TOKEN_ID: ${{ secrets.MODAL_MODAL_LABS_TOKEN_ID }}
15 | MODAL_TOKEN_SECRET: ${{ secrets.MODAL_MODAL_LABS_TOKEN_SECRET }}
16 | MODAL_ENVIRONMENT: examples
17 |
18 | steps:
19 | - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3
20 | with:
21 | fetch-depth: 1
22 | - uses: ./.github/actions/setup
23 |
24 | - name: Run deployment script
25 | run: |
26 | python3 internal/deploy.py --no-dry-run
27 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/openai_whisper/finetuning/train/logs.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 |
4 | def get_logger(name, level=logging.INFO):
5 | logger = logging.getLogger(name)
6 | handler = logging.StreamHandler()
7 | handler.setFormatter(
8 | logging.Formatter("%(levelname)s: %(asctime)s: %(name)s %(message)s")
9 | )
10 | logger.addHandler(handler)
11 | logger.setLevel(level)
12 | return logger
13 |
14 |
15 | def setup_logging(*, logger: logging.Logger, log_level: int) -> None:
16 | import datasets
17 | import transformers
18 |
19 | datasets.utils.logging.set_verbosity(log_level)
20 | transformers.utils.logging.set_verbosity(log_level)
21 | transformers.utils.logging.enable_default_handler()
22 | transformers.utils.logging.enable_explicit_format()
23 |
--------------------------------------------------------------------------------
/01_getting_started/inference.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | import modal
4 |
5 | app = modal.App("example-inference")
6 | image = modal.Image.debian_slim().uv_pip_install("transformers[torch]")
7 |
8 |
9 | @app.function(gpu="h100", image=image)
10 | def chat(prompt: str | None = None) -> list[dict]:
11 | from transformers import pipeline
12 |
13 | if prompt is None:
14 | prompt = f"/no_think Read this code.\n\n{Path(__file__).read_text()}\nIn one paragraph, what does the code do?"
15 |
16 | print(prompt)
17 | context = [{"role": "user", "content": prompt}]
18 |
19 | chatbot = pipeline(
20 | model="Qwen/Qwen3-1.7B-FP8", device_map="cuda", max_new_tokens=1024
21 | )
22 | result = chatbot(context)
23 | print(result[0]["generated_text"][-1]["content"])
24 |
25 | return result
26 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/import_torch.py:
--------------------------------------------------------------------------------
1 | import modal
2 |
3 | app = modal.App("example-import-torch")
4 |
5 |
6 | torch_image = modal.Image.debian_slim().uv_pip_install(
7 | "torch==2.7",
8 | extra_index_url="https://download.pytorch.org/whl/cu128",
9 | force_build=True, # trigger a build every time, just for demonstration purposes
10 | # remove if you're using this in production!
11 | )
12 |
13 |
14 | @app.function(gpu="B200", image=torch_image)
15 | def torch() -> list[list[int]]:
16 | import math
17 |
18 | import torch
19 |
20 | print(torch.cuda.get_device_properties("cuda:0"))
21 |
22 | matrix = torch.randn(1024, 1024) / math.sqrt(1024)
23 | matrix = matrix @ matrix
24 |
25 | return matrix.detach().cpu().tolist()
26 |
27 |
28 | @app.local_entrypoint()
29 | def main():
30 | print(torch.remote()[:1])
31 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/openai_whisper/pod_transcriber/app/frontend/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "whisper_frontend",
3 | "private": true,
4 | "version": "0.0.0",
5 | "type": "module",
6 | "scripts": {
7 | "dev": "vite",
8 | "build": "tsc && vite build",
9 | "preview": "vite preview"
10 | },
11 | "dependencies": {
12 | "react": "^18.2.0",
13 | "react-dom": "^18.2.0",
14 | "react-feather": "^2.0.10",
15 | "react-router-dom": "^6.4.2",
16 | "react-spinners": "^0.13.6",
17 | "swr": "^1.3.0"
18 | },
19 | "devDependencies": {
20 | "@types/react": "^18.0.17",
21 | "@types/react-dom": "^18.0.6",
22 | "@vitejs/plugin-react": "^2.1.0",
23 | "autoprefixer": "^10.4.12",
24 | "postcss": "^8.4.18",
25 | "tailwindcss": "^3.1.8",
26 | "typescript": "^4.6.4",
27 | "vite": "^3.1.0"
28 | }
29 | }
30 |
--------------------------------------------------------------------------------
/10_integrations/dbt/sample_proj_duckdb_s3/models/staging/schema.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 |
3 | models:
4 | - name: stg_customers
5 | columns:
6 | - name: customer_id
7 | tests:
8 | - unique
9 | - not_null
10 |
11 | - name: stg_orders
12 | columns:
13 | - name: order_id
14 | tests:
15 | - unique
16 | - not_null
17 | - name: status
18 | tests:
19 | - accepted_values:
20 | values:
21 | ["placed", "shipped", "completed", "return_pending", "returned"]
22 |
23 | - name: stg_payments
24 | columns:
25 | - name: payment_id
26 | tests:
27 | - unique
28 | - not_null
29 | - name: payment_method
30 | tests:
31 | - accepted_values:
32 | values: ["credit_card", "coupon", "bank_transfer", "gift_card"]
33 |
--------------------------------------------------------------------------------
/07_web_endpoints/fasthtml-checkboxes/styles.css:
--------------------------------------------------------------------------------
1 | /* This file is used to override the default pico.css styles. */
2 |
3 | body {
4 | background-color: #1d1d1d;
5 | }
6 |
7 | .container {
8 | padding: 2rem;
9 | width: 100%;
10 | max-width: 100%;
11 | }
12 |
13 | [type="checkbox"]:is(:checked, :checked:focus) {
14 | --pico-border-color: #7fee64;
15 | --pico-background-color: #7fee64;
16 | }
17 |
18 | [type="checkbox"]:not(:checked, :checked:focus) {
19 | --pico-border-color: rgba(255, 255, 255, 0.2);
20 | --pico-background-color: rgba(255, 255, 255, 0.05);
21 | }
22 |
23 | :where(select, textarea):not([readonly]):focus,
24 | input:not([type=submit], [type=button], [type=reset], [type=range], [type=file], [readonly]):focus {
25 | --pico-box-shadow: 0 0 0 var(--pico-outline-width) rgba(127, 238, 100, 0.25);
26 | --pico-border-color: rgba(127, 238, 100, 0.50);
27 | }
28 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/openai_whisper/pod_transcriber/app/frontend/src/components/Footer.tsx:
--------------------------------------------------------------------------------
1 | import { Link } from "react-router-dom";
2 | import modalWordmarkImg from "../modal-wordmark.svg";
3 |
4 | export default function Footer() {
5 | return (
6 |
24 | );
25 | }
26 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/openai_whisper/finetuning/readme.md:
--------------------------------------------------------------------------------
1 | ## Fine-tuning OpenAI's whisper model for improved automatic Hindi speech recognition
2 |
3 | The following configuration will finetune the `whisper-small` model for almost 3 hrs,
4 | acheiving a word error rate (WER) of about 55-60. Increasing the number of training
5 | epochs should improve performance, decreasing WER.
6 |
7 | You can benchmark this example's performance using Huggingface's [**autoevaluate leaderboard**]https://huggingface.co/spaces/autoevaluate/leaderboards?dataset=mozilla-foundation%2Fcommon_voice_11_0&only_verified=0&task=automatic-speech-recognition&config=hi&split=test&metric=wer).
8 |
9 | ```bash
10 | modal run -m train.train --num_train_epochs=10
11 | ```
12 |
13 | ### Testing
14 |
15 | Use `modal run -m train.end_to_end_check` to do a full train → serialize → save → load → predict
16 | run in less than 5 minutes, checking that the finetuning program is functional.
17 |
--------------------------------------------------------------------------------
/.github/workflows/build-and-run-example.yml:
--------------------------------------------------------------------------------
1 | name: Build and run random example
2 |
3 | on:
4 | workflow_dispatch:
5 | schedule:
6 | - cron: "23 * * * *"
7 |
8 | env:
9 | TERM: linux
10 | TERMINFO: /etc/terminfo
11 | MODAL_TOKEN_ID: ${{ secrets.MODAL_MODAL_LABS_TOKEN_ID }}
12 | MODAL_TOKEN_SECRET: ${{ secrets.MODAL_MODAL_LABS_TOKEN_SECRET }}
13 | MODAL_ENVIRONMENT: examples
14 |
15 | jobs:
16 | build-and-run:
17 | name: Build a random example from scratch and run it
18 | runs-on: ubuntu-24.04
19 | timeout-minutes: 60
20 | steps:
21 | - name: Checkout Repository
22 | uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3
23 | with:
24 | fetch-depth: 1
25 | - uses: ./.github/actions/setup
26 |
27 | - name: Run a random example with MODAL_IGNORE_CACHE set
28 | run: |
29 | MODAL_IGNORE_CACHE=1 python3 -m internal.run_example --timeout 1800
30 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.pytest.ini_options]
2 | filterwarnings = [
3 | "error::DeprecationWarning",
4 | "error::modal.exception.DeprecationError",
5 | "ignore::DeprecationWarning:pytest.*:",
6 | ]
7 | addopts = "--ignore 07_web_endpoints/webrtc/webrtc_yolo_test.py --ignore 06_gpu_and_ml/llm-serving/openai_compatible/load_test.py --ignore 07_web_endpoints/fasthtml-checkboxes/cbx_load_test.py"
8 |
9 | [tool.mypy]
10 | ignore_missing_imports = true
11 | check_untyped_defs = true
12 | no_strict_optional = true
13 |
14 | # https://github.com/python/mypy/issues/10632
15 | [[tool.mypy.overrides]]
16 | module = "requests"
17 | ignore_missing_imports = true
18 |
19 | [tool.ruff]
20 | exclude = [".venv", "venv", "__pycache__"]
21 | line-length = 88
22 | # TODO: Add when available: "E266", "E203"
23 | lint.ignore = ["E501", "E741", "E402"]
24 | lint.select = ['E', 'F', 'W', 'I']
25 |
26 | [tool.ruff.lint.isort]
27 | combine-as-imports = true
28 | known-third-party = ["modal"]
29 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/dreambooth/assets/favicon.svg:
--------------------------------------------------------------------------------
1 |
6 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/protein-folding/frontend/favicon.svg:
--------------------------------------------------------------------------------
1 |
6 |
--------------------------------------------------------------------------------
/10_integrations/dbt/sample_proj_duckdb_s3/dbt_project.yml:
--------------------------------------------------------------------------------
1 | name: "jaffle_shop"
2 | version: "1.0.0"
3 | config-version: 2
4 |
5 | # This setting configures which "profile" dbt uses for this project.
6 | profile: "sample_proj"
7 |
8 | # These configurations specify where dbt should look for different types of files.
9 | # The `model-paths` config, for example, states that models in this project can be
10 | # found in the "models/" directory. You probably won't need to change these!
11 | model-paths: ["models"]
12 | analysis-paths: ["analyses"]
13 | test-paths: ["tests"]
14 | seed-paths: ["seeds"]
15 | macro-paths: ["macros"]
16 | snapshot-paths: ["snapshots"]
17 |
18 | target-path: "target" # directory which will store compiled SQL files
19 | clean-targets: # directories to be removed by `dbt clean`
20 | - "target"
21 | - "dbt_packages"
22 |
23 | # Configuring models
24 | # Full documentation: https://docs.getdbt.com/docs/configuring-models
25 | models:
26 | +materialized: table
27 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/hyperparameter-sweep/assets/favicon.svg:
--------------------------------------------------------------------------------
1 |
6 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/openai_whisper/pod_transcriber/app/frontend/src/index.css:
--------------------------------------------------------------------------------
1 | @tailwind base;
2 | @tailwind components;
3 | @tailwind utilities;
4 |
5 | :root {
6 | font-family: Inter, Avenir, Helvetica, Arial, sans-serif;
7 | font-size: 16px;
8 | line-height: 24px;
9 | font-weight: 401;
10 |
11 | background-color: rgb(249 250 251);
12 |
13 | padding: 0 !important;
14 | }
15 |
16 | .podcast-list li:last-child {
17 | @apply border-b;
18 | }
19 |
20 | .modal-barloader {
21 | margin-top: 10px;
22 | width: 0;
23 | height: 10px;
24 | border-right: 20px solid #333;
25 | border-left: 0px solid #bbffaa;
26 | box-shadow: 0 0 0 1px #bbffaa;
27 | animation: modal-barloader infinite 4s linear;
28 | filter: brightness(95%);
29 | }
30 |
31 | @keyframes modal-barloader {
32 | 0% {
33 | border-right: 20px solid #333;
34 | border-left: 0px solid #bbffaa;
35 | }
36 |
37 | 50% {
38 | border-left: 20px solid #bbffaa;
39 | border-right: 0px solid #333;
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/misc/google_search_generator.py:
--------------------------------------------------------------------------------
1 | # ---
2 | # runtimes: ["runc", "gvisor"]
3 | # ---
4 | #
5 | # # Use a generator to fetch search results
6 | #
7 | # This is a simple example which
8 | #
9 | # 1. Installs a custom Python package.
10 | # 2. Uses a _generator_ to return results back to the launcher process.
11 |
12 | import modal
13 |
14 | # We build a custom image by adding the `google` package to the base image.
15 | app = modal.App(
16 | "example-google-search-generator",
17 | image=modal.Image.debian_slim().pip_install("google"),
18 | )
19 |
20 | # Next, let's define a _generator_ function that uses our custom image.
21 |
22 |
23 | @app.function()
24 | def scrape(query):
25 | from googlesearch import search
26 |
27 | for url in search(query.encode(), stop=100):
28 | yield url
29 |
30 |
31 | # Finally, let's launch it from the command line with `modal run`:
32 |
33 |
34 | @app.local_entrypoint()
35 | def main(query: str = "modal"):
36 | for url in scrape.remote_gen(query):
37 | print(url)
38 |
--------------------------------------------------------------------------------
/07_web_endpoints/flask_app.py:
--------------------------------------------------------------------------------
1 | # ---
2 | # cmd: ["modal", "serve", "07_web_endpoints/flask_app.py"]
3 | # ---
4 |
5 | # # Deploy Flask app with Modal
6 |
7 | # This example shows how you can deploy a [Flask](https://flask.palletsprojects.com/en/3.0.x/) app with Modal.
8 | # You can serve any app written in a WSGI-compatible web framework (like Flask) on Modal with this pattern. You can serve an app written in an ASGI-compatible framework, like FastAPI, with [`asgi_app`](https://modal.com/docs/guide/webhooks#asgi).
9 |
10 | import modal
11 |
12 | app = modal.App(
13 | "example-flask-app",
14 | image=modal.Image.debian_slim().uv_pip_install("flask"),
15 | )
16 |
17 |
18 | @app.function()
19 | @modal.wsgi_app()
20 | def flask_app():
21 | from flask import Flask, request
22 |
23 | web_app = Flask(__name__)
24 |
25 | @web_app.get("/")
26 | def home():
27 | return "Hello Flask World!"
28 |
29 | @web_app.post("/foo")
30 | def foo():
31 | return request.json
32 |
33 | return web_app
34 |
--------------------------------------------------------------------------------
/07_web_endpoints/webrtc/yolo/yolo_classes.txt:
--------------------------------------------------------------------------------
1 | person
2 | bicycle
3 | car
4 | motorcycle
5 | airplane
6 | bus
7 | train
8 | truck
9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | couch
59 | potted plant
60 | bed
61 | dining table
62 | toilet
63 | tv
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush
81 |
--------------------------------------------------------------------------------
/01_getting_started/inference_endpoint.py:
--------------------------------------------------------------------------------
1 | # ---
2 | # cmd: ["modal", "serve", "01_getting_started/inference_endpoint.py"]
3 | # ---
4 | from pathlib import Path
5 |
6 | import modal
7 |
8 | app = modal.App("example-inference-endpoint")
9 | image = (
10 | modal.Image.debian_slim()
11 | .uv_pip_install("transformers[torch]")
12 | .uv_pip_install("fastapi")
13 | )
14 |
15 |
16 | @app.function(gpu="h100", image=image)
17 | @modal.fastapi_endpoint(docs=True)
18 | def chat(prompt: str | None = None) -> list[dict]:
19 | from transformers import pipeline
20 |
21 | if prompt is None:
22 | prompt = f"/no_think Read this code.\n\n{Path(__file__).read_text()}\nIn one paragraph, what does the code do?"
23 |
24 | print(prompt)
25 | context = [{"role": "user", "content": prompt}]
26 |
27 | chatbot = pipeline(
28 | model="Qwen/Qwen3-1.7B-FP8", device_map="cuda", max_new_tokens=1024
29 | )
30 | result = chatbot(context)
31 | print(result[0]["generated_text"][-1]["content"])
32 |
33 | return result
34 |
--------------------------------------------------------------------------------
/.github/actions/setup/action.yml:
--------------------------------------------------------------------------------
1 | name: setup
2 |
3 | description: Set up a Python environment for the examples.
4 |
5 | inputs:
6 | version:
7 | description: Which Python version to install
8 | required: false
9 | default: "3.11"
10 | devDependencies:
11 | description: Whether to skip dependencies
12 | required: false
13 | default: "no-skip"
14 |
15 | runs:
16 | using: composite
17 | steps:
18 | - name: Install Python
19 | uses: actions/setup-python@8d9ed9ac5c53483de85588cdf95a591a75ab9f55 # v5
20 | with:
21 | python-version: ${{ inputs.version }}
22 |
23 | - name: Install base packages
24 | shell: bash
25 | run: |
26 | pip install uv
27 | uv pip install --system setuptools wheel
28 |
29 | - name: Install development Python packages
30 | if: ${{ inputs.devDependencies != 'skip' }}
31 | shell: bash
32 | run: uv pip install --system -r internal/requirements.txt
33 |
34 | - name: Install the modal client
35 | shell: bash
36 | run: uv pip install --system modal
37 |
--------------------------------------------------------------------------------
/07_web_endpoints/fasthtml_app.py:
--------------------------------------------------------------------------------
1 | # ---
2 | # cmd: ["modal", "serve", "07_web_endpoints/fasthtml_app.py"]
3 | # ---
4 |
5 | # # Deploy a FastHTML app with Modal
6 |
7 | # This example shows how you can deploy a FastHTML app with Modal.
8 | # [FastHTML](https://www.fastht.ml/) is a Python library built on top of [HTMX](https://htmx.org/)
9 | # which allows you to create entire web applications using only Python.
10 |
11 | # The integration is pretty simple, thanks to the ASGI standard.
12 | # You just need to define a function returns your FastHTML app
13 | # and is decorated with `app.function` and `modal.asgi_app`.
14 |
15 | import modal
16 |
17 | app = modal.App("example-fasthtml-app")
18 |
19 |
20 | @app.function(
21 | image=modal.Image.debian_slim(python_version="3.12").uv_pip_install(
22 | "python-fasthtml==0.5.2"
23 | )
24 | )
25 | @modal.asgi_app()
26 | def serve():
27 | import fasthtml.common as fh
28 |
29 | app = fh.FastHTML()
30 |
31 | @app.get("/")
32 | def home():
33 | return fh.Div(fh.P("Hello World!"), hx_get="/change")
34 |
35 | return app
36 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/llm-serving/openai_compatible/locustfile.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import random
3 |
4 | import locust
5 |
6 | messages = [
7 | {
8 | "role": "system",
9 | "content": "You are a salesman for Modal, the cloud-native serverless Python computing platform.",
10 | },
11 | {
12 | "role": "user",
13 | "content": "Give me two fun date ideas.",
14 | },
15 | ]
16 |
17 |
18 | class WebsiteUser(locust.HttpUser):
19 | wait_time = locust.between(1, 5)
20 | headers = {
21 | "Authorization": "Bearer super-secret-key",
22 | "Accept": "application/json",
23 | }
24 |
25 | @locust.task
26 | def chat_completion(self):
27 | payload = {
28 | "model": "Qwen/Qwen3-8B-FP8",
29 | "messages": messages,
30 | }
31 |
32 | response = self.client.request(
33 | "POST", "/v1/chat/completions", json=payload, headers=self.headers
34 | )
35 | response.raise_for_status()
36 | if random.random() < 0.01:
37 | logging.info(response.json()["choices"][0]["message"]["content"])
38 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2022 Modal Labs
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/13_sandboxes/codelangchain/src/common.py:
--------------------------------------------------------------------------------
1 | """Shared information: image definitions and common utilities."""
2 |
3 | import os
4 | from typing import Any, Dict, TypedDict
5 |
6 | import modal
7 |
8 | PYTHON_VERSION = "3.11"
9 |
10 | image = (
11 | modal.Image.debian_slim(python_version=PYTHON_VERSION)
12 | .uv_pip_install(
13 | "beautifulsoup4~=4.12.3",
14 | "langchain==0.3.4",
15 | "langchain-core==0.3.12",
16 | "langgraph==0.2.39",
17 | "langchain-community==0.3.3",
18 | "langchain-openai==0.2.3",
19 | "pydantic==2.9.1",
20 | )
21 | .env({"LANGCHAIN_TRACING_V2": "true"})
22 | )
23 |
24 |
25 | class GraphState(TypedDict):
26 | """
27 | Represents the state of our graph.
28 |
29 | Attributes:
30 | keys: A dictionary where each key is a string.
31 | """
32 |
33 | keys: Dict[str, Any]
34 |
35 |
36 | os.environ["LANGCHAIN_PROJECT"] = "codelangchain"
37 | os.environ["LANGCHAIN_TRACING"] = "true"
38 |
39 | COLOR = {
40 | "HEADER": "\033[95m",
41 | "BLUE": "\033[94m",
42 | "GREEN": "\033[92m",
43 | "RED": "\033[91m",
44 | "ENDC": "\033[0m",
45 | }
46 |
--------------------------------------------------------------------------------
/misc/hello_shebang.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # # Syntax for making modal scripts executable
3 |
4 | # This example shows how you can add a shebang to a script that is meant to be invoked with `modal run`.
5 |
6 | import sys
7 |
8 | import modal
9 |
10 | app = modal.App("example-hello-world")
11 |
12 |
13 | @app.function()
14 | def f(i):
15 | if i % 2 == 0:
16 | print("hello", i)
17 | else:
18 | print("world", i, file=sys.stderr)
19 |
20 | return i * i
21 |
22 |
23 | @app.local_entrypoint()
24 | def main():
25 | # run the function locally
26 | print(f.local(1000))
27 |
28 | # run the function remotely on Modal
29 | print(f.remote(1002))
30 |
31 | # run the function in parallel and remotely on Modal
32 | total = 0
33 | for ret in f.map(range(200)):
34 | total += ret
35 |
36 | print(total)
37 |
38 |
39 | if __name__ == "__main__":
40 | # Use `modal.enable_output()` to print the Sandbox's image build logs to the console, just like `modal run` does.
41 | # Use `app.run()` to substitute the `modal run` CLI invocation.
42 | with modal.enable_output(), app.run():
43 | main()
44 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/nsight-profiling/toy.cu:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | __device__ void nanosleep_cuda(unsigned int nanoseconds) {
4 | asm volatile ("nanosleep.u32 %0;" :: "r"(nanoseconds));
5 | }
6 |
7 | __global__ void toyKernel(int *d_arr) {
8 | int idx = threadIdx.x + blockIdx.x * blockDim.x;
9 | d_arr[idx] *= 2;
10 | nanosleep_cuda(1000000000); // Sleep for ~1 second
11 | }
12 |
13 |
14 |
15 | int main() {
16 | const int size = 2 << 16;
17 | const int ct = 2 << 10;
18 | int h_arr[size], *d_arr;
19 |
20 | for (int i = 0; i < size; i++) h_arr[i] = i;
21 |
22 | cudaMalloc((void **)&d_arr, size * sizeof(int));
23 | cudaMemcpy(d_arr, h_arr, size * sizeof(int), cudaMemcpyHostToDevice);
24 |
25 | for (int i = 0; i < ct; i++) {
26 | if (i > 0 && (i & (i - 1)) == 0) {
27 | std::cout << i << std::endl;
28 | }
29 | toyKernel<<<4, 64>>>(d_arr);
30 | cudaDeviceSynchronize();
31 | }
32 |
33 | cudaMemcpy(h_arr, d_arr, size * sizeof(int), cudaMemcpyDeviceToHost);
34 | cudaFree(d_arr);
35 |
36 | std::cout << "Computation done!" << std::endl;
37 | return 0;
38 | }
39 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/embeddings/qdrant.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 |
3 | import modal
4 |
5 | app = modal.App("example-qdrant")
6 |
7 | image = modal.Image.debian_slim(python_version="3.11").uv_pip_install(
8 | "qdrant-client[fastembed-gpu]==1.13.3"
9 | )
10 |
11 |
12 | @app.function(image=image, gpu="any")
13 | def query(inpt):
14 | from qdrant_client import QdrantClient
15 |
16 | client = QdrantClient(":memory:")
17 |
18 | docs = [
19 | "Qdrant has Langchain integrations",
20 | "Qdrant also has Llama Index integrations",
21 | ]
22 |
23 | print("querying documents:", *docs, sep="\n\t")
24 |
25 | client.add(collection_name="demo_collection", documents=docs)
26 |
27 | print("query:", inpt, sep="\n\t")
28 |
29 | search_results = client.query(
30 | collection_name="demo_collection",
31 | query_text=inpt,
32 | limit=1,
33 | )
34 |
35 | print("result:", search_results[0], sep="\n\t")
36 |
37 | return search_results[0].document
38 |
39 |
40 | @app.local_entrypoint()
41 | def main(inpt: Optional[str] = None):
42 | if not inpt:
43 | inpt = "alpaca"
44 |
45 | print(query.remote(inpt))
46 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/embeddings/wikipedia/download.py:
--------------------------------------------------------------------------------
1 | import modal
2 |
3 | # We first set out configuration variables for our script.
4 | DATASET_DIR = "/data"
5 | DATASET_NAME = "wikipedia"
6 | DATASET_CONFIG = "20220301.en"
7 |
8 |
9 | # We define our Modal Resources that we'll need
10 | volume = modal.Volume.from_name("embedding-wikipedia", create_if_missing=True)
11 | image = modal.Image.debian_slim(python_version="3.9").uv_pip_install(
12 | "datasets==2.16.1", "apache_beam==2.53.0"
13 | )
14 | app = modal.App(image=image)
15 |
16 |
17 | # The default timeout is 5 minutes re: https://modal.com/docs/guide/timeouts#handling-timeouts
18 | # but we override this to
19 | # 3000s to avoid any potential timeout issues
20 | @app.function(volumes={DATASET_DIR: volume}, timeout=3000)
21 | def download_dataset():
22 | # Redownload the dataset
23 | import time
24 |
25 | from datasets import load_dataset
26 |
27 | start = time.time()
28 | dataset = load_dataset(DATASET_NAME, DATASET_CONFIG, num_proc=6)
29 | end = time.time()
30 | print(f"Download complete - downloaded files in {end - start}s")
31 |
32 | dataset.save_to_disk(f"{DATASET_DIR}/{DATASET_NAME}")
33 | volume.commit()
34 |
35 |
36 | @app.local_entrypoint()
37 | def main():
38 | download_dataset.remote()
39 |
--------------------------------------------------------------------------------
/01_getting_started/inference_map.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | import modal
4 |
5 | app = modal.App("example-inference-map")
6 | image = modal.Image.debian_slim().uv_pip_install("transformers[torch]")
7 |
8 |
9 | @app.function(gpu="h100", image=image)
10 | def chat(prompt: str | None = None) -> list[dict]:
11 | from transformers import pipeline
12 |
13 | if prompt is None:
14 | prompt = f"/no_think Read this code.\n\n{Path(__file__).read_text()}\nIn one paragraph, what does the code do?"
15 |
16 | print(prompt)
17 | context = [{"role": "user", "content": prompt}]
18 |
19 | chatbot = pipeline(
20 | model="Qwen/Qwen3-1.7B-FP8", device_map="cuda", max_new_tokens=1024
21 | )
22 | result = chatbot(context)
23 | print(result[0]["generated_text"][-1]["content"])
24 |
25 | return result
26 |
27 |
28 | @app.local_entrypoint()
29 | def main():
30 | import glob
31 |
32 | root_dir, examples = Path(__file__).parent.parent, []
33 | for path in glob.glob("**/*.py", root_dir=root_dir):
34 | examples.append(
35 | f"/no_think Read this code.\n\n{(root_dir / path).read_text()}\nIn one paragraph, what does the code do?"
36 | )
37 |
38 | for result in chat.map(examples):
39 | print(result[0]["generated_text"][-1]["content"])
40 |
--------------------------------------------------------------------------------
/.github/workflows/check.yml:
--------------------------------------------------------------------------------
1 | name: Check
2 | on:
3 | push:
4 | branches:
5 | - main
6 | pull_request:
7 | workflow_dispatch:
8 |
9 | jobs:
10 | ruff:
11 | name: Ruff
12 | runs-on: ubuntu-24.04
13 |
14 | steps:
15 | - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3
16 | with:
17 | fetch-depth: 1
18 | - uses: ./.github/actions/setup
19 |
20 | - run: ruff check
21 |
22 | - run: ruff format --check
23 |
24 | nbconvert:
25 | name: NbConvert
26 | runs-on: ubuntu-24.04
27 |
28 | steps:
29 | - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3
30 | with:
31 | fetch-depth: 1
32 | - uses: ./.github/actions/setup
33 |
34 | - name: Check notebooks are cleaned
35 | run: |
36 | jupyter nbconvert --clear-output --inplace 11_notebooks/*.ipynb
37 | git diff --quiet 11_notebooks/*.ipynb && git diff --cached --quiet 11_notebooks/*.ipynb || exit 1
38 |
39 | pytest:
40 | name: Pytest
41 | runs-on: ubuntu-24.04
42 |
43 | steps:
44 | - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3
45 | with:
46 | fetch-depth: 1
47 | - uses: ./.github/actions/setup
48 |
49 | - name: Run
50 | run: pytest -v .
51 |
--------------------------------------------------------------------------------
/.github/workflows/stale.yml:
--------------------------------------------------------------------------------
1 | name: Stale
2 | on:
3 | workflow_dispatch:
4 | schedule:
5 | - cron: "30 15 * * *"
6 |
7 | permissions:
8 | contents: write
9 | issues: write
10 | pull-requests: write
11 |
12 | jobs:
13 | stale-prs:
14 | name: Close stale PRs
15 | runs-on: ubuntu-latest
16 | steps:
17 | - uses: actions/stale@5bef64f19d7facfb25b37b414482c7164d639639 # v9
18 | with:
19 | stale-pr-message: |
20 | This PR is stale because it has been open 30 days with no activity.
21 | If the stale label remains and there are no comments, this will be closed in 5 days.
22 | close-pr-message: |
23 | This PR was closed because it has been stalled for 5 days with no activity.
24 | days-before-stale: 30
25 | days-before-close: 5
26 | days-before-issue-stale: -1
27 | delete-branch: true
28 | operations-per-run: 200
29 |
30 | stale-branches:
31 | name: Remove stale branches
32 | runs-on: ubuntu-latest
33 | steps:
34 | - uses: fpicalausa/remove-stale-branches@bfaf2b7f95cfd85485960c9d2d98a0702c84a74c # v1.6.0
35 | with:
36 | operations-per-run: 500
37 | days-before-branch-stale: 30
38 | ignore-unknown-authors: true
39 | default-recipient: "(Unknown author)"
40 |
--------------------------------------------------------------------------------
/10_integrations/tailscale/entrypoint.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | # Custom entrypoint [1] used to login into Tailscale and start both SOCKS5 and HTTP
4 | # proxies. This requires the env var `TAILSCALE_AUTHKEY` to be populated with a
5 | # Tailscale auth key. [2]
6 | #
7 | # [1] https://modal.com/docs/guide/custom-container#entrypoint
8 | # [2] https://tailscale.com/kb/1111/ephemeral-nodes
9 |
10 | set -e
11 |
12 | tailscaled --tun=userspace-networking --socks5-server=localhost:1080 --outbound-http-proxy-listen=localhost:1080 &
13 | tailscale up --authkey=${TAILSCALE_AUTHKEY} --hostname=${MODAL_TASK_ID}
14 |
15 | # Loop until the maximum number of retries is reached
16 | retry_count=0
17 | while [ $retry_count -lt 5 ]; do
18 | http_status=$(curl -x socks5://localhost:1080 -o /dev/null -L -s -w '%{http_code}' https://www.google.com)
19 |
20 | # Check if the HTTP status code is 200 (OK)
21 | if [ $http_status -eq 200 ]; then
22 | echo "Successfully started SOCKS5 proxy, HTTP proxy, and connected to Tailscale."
23 | exec "$@" # Runs the command passed to the entrypoint script.
24 | exit 0
25 | else
26 | echo "Attempt $((retry_count+1))/$MAX_RETRIES failed: SOCKS5 proxy returned HTTP $http_status"
27 | fi
28 |
29 | retry_count=$((retry_count+1))
30 | sleep 1
31 | done
32 |
33 | echo "Failed to start Tailscale."
34 | exit 1
--------------------------------------------------------------------------------
/08_advanced/hello_world_async.py:
--------------------------------------------------------------------------------
1 | # # Async functions
2 | #
3 | # Modal natively supports async/await syntax using asyncio.
4 |
5 | # First, let's import some global stuff.
6 |
7 | import sys
8 |
9 | import modal
10 |
11 | app = modal.App("example-hello-world-async")
12 |
13 |
14 | # ## Defining a function
15 | #
16 | # Now, let's define a function. The wrapped function can be synchronous or
17 | # asynchronous, but calling it in either context will still work.
18 | # Let's stick to a normal synchronous function
19 |
20 |
21 | @app.function()
22 | def f(i):
23 | if i % 2 == 0:
24 | print("hello", i)
25 | else:
26 | print("world", i, file=sys.stderr)
27 |
28 | return i * i
29 |
30 |
31 | # ## Running the app with asyncio
32 | #
33 | # Let's make the main entrypoint asynchronous. In async contexts, we should
34 | # call the function using `await` or iterate over the map using `async for`.
35 | # Otherwise we would block the event loop while our call is being run.
36 |
37 |
38 | @app.local_entrypoint()
39 | async def run_async():
40 | # Call the function using .remote.aio() in order to run it asynchronously
41 | print(await f.remote.aio(1000))
42 |
43 | # Parallel map.
44 | total = 0
45 | # Call .map asynchronously using using f.map.aio(...)
46 | async for ret in f.map.aio(range(20)):
47 | total += ret
48 |
49 | print(total)
50 |
--------------------------------------------------------------------------------
/10_integrations/dbt/sample_proj_duckdb_s3/models/orders.sql:
--------------------------------------------------------------------------------
1 | {% set payment_methods = ['credit_card', 'coupon', 'bank_transfer', 'gift_card'] %}
2 |
3 | with orders as (
4 |
5 | select * from {{ ref('stg_orders') }}
6 |
7 | ),
8 |
9 | payments as (
10 |
11 | select * from {{ ref('stg_payments') }}
12 |
13 | ),
14 |
15 | order_payments as (
16 |
17 | select
18 | order_id,
19 |
20 | {% for payment_method in payment_methods -%}
21 | sum(case when payment_method = '{{ payment_method }}' then amount else 0 end) as {{ payment_method }}_amount,
22 | {% endfor -%}
23 |
24 | sum(amount) as total_amount
25 |
26 | from payments
27 |
28 | group by order_id
29 |
30 | ),
31 |
32 | final as (
33 |
34 | select
35 | orders.order_id,
36 | orders.customer_id,
37 | orders.order_date,
38 | orders.status,
39 |
40 | {% for payment_method in payment_methods -%}
41 |
42 | order_payments.{{ payment_method }}_amount,
43 |
44 | {% endfor -%}
45 |
46 | order_payments.total_amount as amount
47 |
48 | from orders
49 |
50 |
51 | left join order_payments
52 | on orders.order_id = order_payments.order_id
53 |
54 | )
55 |
56 | {{ config(materialized='external', format='parquet', location='s3://modal-example-dbt-duckdb-s3/out/orders.parquet') }}
57 | select * from final
58 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Modal Examples
5 |
6 |
7 |
8 | This is a collection of examples for [Modal](https://modal.com/). Use these examples to learn Modal and build your own robust and scalable applications.
9 |
10 | ## Usage
11 |
12 | First, sign up for a free account at [modal.com](https://modal.com/) and follow
13 | the setup instructions to install the `modal` package and set your API key.
14 |
15 | The examples are organized into several folders based on their category. You can
16 | generally run the files in any folder much like you run ordinary Python programs, with a
17 | command like:
18 |
19 | ```bash
20 | modal run 01_getting_started/hello_world.py
21 | ```
22 |
23 | Although these scripts are run on your local machine, they'll communicate with
24 | Modal and run in our cloud, spawning serverless containers on demand.
25 |
26 | ## Examples
27 |
28 | - [**`01_getting_started/`**](01_getting_started) through [**`14_clusters/`**](14_clusters) provide a guided tour through Modal's concepts and capabilities.
29 | - [**`misc/`**](/misc) contains uncategorized, miscellaneous examples.
30 |
31 | _These examples are continuously tested for correctness against Python **3.11**._
32 |
33 | ## License
34 |
35 | The [MIT license](LICENSE).
36 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/comfyui/kjnodes/kjnodes_example.py:
--------------------------------------------------------------------------------
1 | # ---
2 | # cmd: ["modal", "serve", "06_gpu_and_ml/comfyui/kjnodes/kjnodes_example.py"]
3 | # ---
4 |
5 | import subprocess
6 |
7 | import modal
8 |
9 | image = ( # build up a Modal Image to run ComfyUI, step by step
10 | modal.Image.debian_slim( # start from basic Linux with Python
11 | python_version="3.11"
12 | )
13 | .apt_install("git") # install git to clone ComfyUI
14 | .uv_pip_install("comfy-cli==1.2.7") # install comfy-cli
15 | .run_commands( # use comfy-cli to install the ComfyUI repo and its dependencies
16 | "comfy --skip-prompt install --nvidia"
17 | )
18 | .run_commands( # download the ComfyUI Essentials custom node pack
19 | "comfy node install comfyui-kjnodes"
20 | )
21 | .run_commands(
22 | "comfy --skip-prompt model download --url https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5/resolve/main/v1-5-pruned.safetensors --relative-path models/checkpoints"
23 | )
24 | )
25 |
26 | app = modal.App(name="example-kjnodes", image=image)
27 |
28 |
29 | # Run ComfyUI as an interactive web server
30 | @app.function(
31 | max_containers=1,
32 | scaledown_window=30,
33 | timeout=1800,
34 | gpu="A10G",
35 | )
36 | @modal.concurrent(max_inputs=10)
37 | @modal.web_server(8000, startup_timeout=60)
38 | def ui():
39 | subprocess.Popen("comfy launch -- --listen 0.0.0.0 --port 8000", shell=True)
40 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/comfyui/essentials/essentials_example.py:
--------------------------------------------------------------------------------
1 | # ---
2 | # cmd: ["modal", "serve", "06_gpu_and_ml/comfyui/essentials/essentials_example.py"]
3 | # ---
4 |
5 | import subprocess
6 |
7 | import modal
8 |
9 | image = ( # build up a Modal Image to run ComfyUI, step by step
10 | modal.Image.debian_slim( # start from basic Linux with Python
11 | python_version="3.11"
12 | )
13 | .apt_install("git") # install git to clone ComfyUI
14 | .uv_pip_install("comfy-cli==1.2.7") # install comfy-cli
15 | .run_commands( # use comfy-cli to install the ComfyUI repo and its dependencies
16 | "comfy --skip-prompt install --nvidia"
17 | )
18 | .run_commands( # download the ComfyUI Essentials custom node pack
19 | "comfy node install comfyui_essentials"
20 | )
21 | .run_commands(
22 | "comfy --skip-prompt model download --url https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5/resolve/main/v1-5-pruned.safetensors --relative-path models/checkpoints"
23 | )
24 | )
25 |
26 | app = modal.App(name="example-essentials", image=image)
27 |
28 |
29 | # Run ComfyUI as an interactive web server
30 | @app.function(
31 | max_containers=1,
32 | scaledown_window=30,
33 | timeout=1800,
34 | gpu="A10G",
35 | )
36 | @modal.concurrent(max_inputs=10)
37 | @modal.web_server(8000, startup_timeout=60)
38 | def ui():
39 | subprocess.Popen("comfy launch -- --listen 0.0.0.0 --port 8000", shell=True)
40 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/comfyui/was_node_suite/was_node_example.py:
--------------------------------------------------------------------------------
1 | # ---
2 | # cmd: ["modal", "serve", "06_gpu_and_ml/comfyui/was_node_suite/was_node_example.py"]
3 | # ---
4 |
5 | import subprocess
6 |
7 | import modal
8 |
9 | image = (
10 | modal.Image.debian_slim( # start from basic Linux with Python
11 | python_version="3.11"
12 | )
13 | .apt_install("git") # install git to clone ComfyUI
14 | .uv_pip_install("comfy-cli==1.2.7") # install comfy-cli
15 | .run_commands( # use comfy-cli to install the ComfyUI repo and its dependencies
16 | "comfy --skip-prompt install --nvidia"
17 | )
18 | .run_commands( # install default stable diffusion model for example purposes
19 | "comfy --skip-prompt model download --url https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5/resolve/main/v1-5-pruned.safetensors --relative-path models/checkpoints"
20 | )
21 | .run_commands( # download the WAS Node Suite custom node pack
22 | "comfy node install was-node-suite-comfyui"
23 | )
24 | )
25 |
26 | app = modal.App(name="example-was-node", image=image)
27 |
28 |
29 | # Run ComfyUI as an interactive web server
30 | @app.function(
31 | max_containers=1,
32 | scaledown_window=30,
33 | timeout=1800,
34 | gpu="A10G",
35 | )
36 | @modal.concurrent(max_inputs=10)
37 | @modal.web_server(8000, startup_timeout=60)
38 | def ui():
39 | subprocess.Popen("comfy launch -- --listen 0.0.0.0 --port 8000", shell=True)
40 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/hyperparameter-sweep/src/dataset.py:
--------------------------------------------------------------------------------
1 | # ---
2 | # pytest: false
3 | # ---
4 |
5 | import torch
6 |
7 |
8 | class Dataset:
9 | """Manage text dataset and batching."""
10 |
11 | def __init__(
12 | self,
13 | encoded_text,
14 | train_percent,
15 | batch_size,
16 | context_size,
17 | device,
18 | ):
19 | self.device = device
20 | self.batch_size = batch_size
21 | self.context_size = context_size
22 | assert (train_percent > 0.0) and (train_percent < 1.0), (
23 | "train_percent must be in (0,1)"
24 | )
25 |
26 | # Train/Validation split.
27 | data = torch.tensor(encoded_text, dtype=torch.long)
28 | n = len(data)
29 | self.train_data = data[: int(train_percent * n)]
30 | self.val_data = data[int(train_percent * n) :]
31 |
32 | def get_batch(self, split):
33 | """Get a batch of train or validation data."""
34 | data = self.train_data if split == "train" else self.val_data
35 |
36 | starts = torch.randint(len(data) - self.context_size, (self.batch_size,))
37 |
38 | x = torch.stack([data[start : start + self.context_size] for start in starts])
39 |
40 | # +1 because we want to predict the next token.
41 | y = torch.stack(
42 | [data[start + 1 : start + self.context_size + 1] for start in starts]
43 | )
44 | return x.to(self.device), y.to(self.device)
45 |
--------------------------------------------------------------------------------
/05_scheduling/schedule_simple.py:
--------------------------------------------------------------------------------
1 | # ---
2 | # cmd: ["python", "-m", "05_scheduling.schedule_simple"]
3 | # ---
4 |
5 | # # Scheduling remote jobs
6 |
7 | # This example shows how you can schedule remote jobs on Modal.
8 | # You can do this either with:
9 | #
10 | # - [`modal.Period`](https://modal.com/docs/reference/modal.Period) - a time interval between function calls.
11 | # - [`modal.Cron`](https://modal.com/docs/reference/modal.Cron) - a cron expression to specify the schedule.
12 |
13 | # In the code below, the first function runs every
14 | # 5 seconds, and the second function runs every minute. We use the `schedule`
15 | # argument to specify the schedule for each function. The `schedule` argument can
16 | # take a `modal.Period` object to specify a time interval or a `modal.Cron` object
17 | # to specify a cron expression.
18 |
19 | import time
20 | from datetime import datetime
21 |
22 | import modal
23 |
24 | app = modal.App("example-schedule-simple")
25 |
26 |
27 | @app.function(schedule=modal.Period(seconds=5))
28 | def print_time_1():
29 | print(
30 | f"Printing with period 5 seconds: {datetime.now().strftime('%m/%d/%Y, %H:%M:%S')}"
31 | )
32 |
33 |
34 | @app.function(schedule=modal.Cron("* * * * *"))
35 | def print_time_2():
36 | print(
37 | f"Printing with cron every minute: {datetime.now().strftime('%m/%d/%Y, %H:%M:%S')}"
38 | )
39 |
40 |
41 | if __name__ == "__main__":
42 | with modal.enable_output():
43 | with app.run():
44 | time.sleep(60)
45 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/speech-to-text/streaming-parakeet-frontend/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | Audio Transcription
7 |
37 |
38 |
39 | Audio Transcription
40 | Tip: Turn your microphone volume up for better transcription quality.
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/hyperparameter-sweep/src/logs_manager.py:
--------------------------------------------------------------------------------
1 | # ---
2 | # pytest: false
3 | # ---
4 |
5 | from torch.utils.tensorboard import SummaryWriter
6 |
7 |
8 | class LogsManager:
9 | def __init__(self, experiment_name, hparams, num_parameters, tb_log_path):
10 | self.model_name = (
11 | f"{experiment_name}"
12 | f"_context_size={hparams.context_size}_n_heads={hparams.n_heads}"
13 | f"_dropout={hparams.dropout}"
14 | )
15 |
16 | model_log_dir = tb_log_path / f"{experiment_name}/{self.model_name}"
17 | model_log_dir.mkdir(parents=True, exist_ok=True)
18 | self.train_writer = SummaryWriter(log_dir=f"{model_log_dir}/train")
19 | self.val_writer = SummaryWriter(log_dir=f"{model_log_dir}/val")
20 |
21 | # save hyperparameters to TensorBoard for easy reference
22 | pretty_hparams_str = "\n".join(f"{k}: {v}" for k, v in hparams.__dict__.items())
23 | pretty_hparams_str += f"\nNum parameters: {num_parameters}"
24 | self.train_writer.add_text("Hyperparameters", pretty_hparams_str)
25 |
26 | def add_train_scalar(self, name, value, step):
27 | self.train_writer.add_scalar(name, value, step)
28 |
29 | def add_val_scalar(self, name, value, step):
30 | self.val_writer.add_scalar(name, value, step)
31 |
32 | def add_val_text(self, name, text, step):
33 | self.val_writer.add_text(name, text, step)
34 |
35 | def flush(self):
36 | self.train_writer.flush()
37 | self.val_writer.flush()
38 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/speech-to-text/multitalker-frontend/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | Audio Transcription
7 |
37 |
38 |
39 | Parakeet Streaming Transcription
40 | Tip: Turn your microphone volume up for better transcription quality.
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
--------------------------------------------------------------------------------
/08_advanced/parallel_execution.py:
--------------------------------------------------------------------------------
1 | # # Parallel execution on Modal with `spawn` and `gather`
2 |
3 | # This example shows how you can run multiple functions in parallel on Modal.
4 | # We use the `spawn` method to start a function and return a handle to its result.
5 | # The `get` method is used to retrieve the result of the function call.
6 |
7 | import time
8 |
9 | import modal
10 |
11 | app = modal.App("example-parallel-execution")
12 |
13 |
14 | @app.function()
15 | def step1(word):
16 | time.sleep(2)
17 | print("step1 done")
18 | return word
19 |
20 |
21 | @app.function()
22 | def step2(number):
23 | time.sleep(1)
24 | print("step2 done")
25 | if number == 0:
26 | raise ValueError("custom error")
27 | return number
28 |
29 |
30 | @app.local_entrypoint()
31 | def main():
32 | # Start running a function and return a handle to its result.
33 | word_call = step1.spawn("foo")
34 | number_call = step2.spawn(2)
35 |
36 | # Print "foofoo" after 2 seconds.
37 | print(word_call.get() * number_call.get())
38 |
39 | # Alternatively, use `modal.FunctionCall.gather(...)` as a convenience wrapper,
40 | # which returns an error if either call fails.
41 | results = modal.FunctionCall.gather(step1.spawn("bar"), step2.spawn(4))
42 | assert results == ["bar", 4]
43 |
44 | # Raise exception after 2 seconds.
45 | try:
46 | modal.FunctionCall.gather(step1.spawn("bar"), step2.spawn(0))
47 | except ValueError as exc:
48 | assert str(exc) == "custom error"
49 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/comfyui/impact/impact_example.py:
--------------------------------------------------------------------------------
1 | # ---
2 | # cmd: ["modal", "serve", "06_gpu_and_ml/comfyui/impact/impact_example.py"]
3 | # ---
4 |
5 | import subprocess
6 |
7 | import modal
8 |
9 | image = (
10 | modal.Image.debian_slim( # start from basic Linux with Python
11 | python_version="3.11"
12 | )
13 | .apt_install("git") # install git to clone ComfyUI
14 | .uv_pip_install("comfy-cli==1.2.7") # install comfy-cli
15 | .run_commands( # use comfy-cli to install the ComfyUI repo and its dependencies
16 | "comfy --skip-prompt install --nvidia"
17 | )
18 | .run_commands( # download the Impact pack
19 | "comfy node install comfyui-impact-pack"
20 | )
21 | .uv_pip_install("ultralytics==8.3.26") # object detection models
22 | .apt_install( # opengl dependencies
23 | "libgl1-mesa-glx", "libglib2.0-0"
24 | )
25 | .run_commands(
26 | "comfy --skip-prompt model download --url https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5/resolve/main/v1-5-pruned.safetensors --relative-path models/checkpoints",
27 | )
28 | )
29 |
30 | app = modal.App(name="example-impact", image=image)
31 |
32 |
33 | # Run ComfyUI as an interactive web server
34 | @app.function(
35 | max_containers=1,
36 | scaledown_window=30,
37 | timeout=1800,
38 | gpu="A10G",
39 | )
40 | @modal.concurrent(max_inputs=10)
41 | @modal.web_server(8000, startup_timeout=60)
42 | def ui():
43 | subprocess.Popen("comfy launch -- --listen 0.0.0.0 --port 8000", shell=True)
44 |
--------------------------------------------------------------------------------
/07_web_endpoints/fastapi_app.py:
--------------------------------------------------------------------------------
1 | # ---
2 | # cmd: ["modal", "serve", "07_web_endpoints/fastapi_app.py"]
3 | # ---
4 |
5 | # # Deploy FastAPI app with Modal
6 |
7 | # This example shows how you can deploy a [FastAPI](https://fastapi.tiangolo.com/) app with Modal.
8 | # You can serve any app written in an ASGI-compatible web framework (like FastAPI) using this pattern or you can server WSGI-compatible frameworks like Flask with [`wsgi_app`](https://modal.com/docs/guide/webhooks#wsgi).
9 |
10 | from typing import Optional
11 |
12 | import modal
13 | from fastapi import FastAPI, Header
14 | from pydantic import BaseModel
15 |
16 | image = modal.Image.debian_slim().uv_pip_install("fastapi[standard]", "pydantic")
17 | app = modal.App("example-fastapi-app", image=image)
18 | web_app = FastAPI()
19 |
20 |
21 | class Item(BaseModel):
22 | name: str
23 |
24 |
25 | @web_app.get("/")
26 | async def handle_root(user_agent: Optional[str] = Header(None)):
27 | print(f"GET / - received user_agent={user_agent}")
28 | return "Hello World"
29 |
30 |
31 | @web_app.post("/foo")
32 | async def handle_foo(item: Item, user_agent: Optional[str] = Header(None)):
33 | print(f"POST /foo - received user_agent={user_agent}, item.name={item.name}")
34 | return item
35 |
36 |
37 | @app.function()
38 | @modal.asgi_app()
39 | def fastapi_app():
40 | return web_app
41 |
42 |
43 | @app.function()
44 | @modal.fastapi_endpoint(method="POST")
45 | def f(item: Item):
46 | return "Hello " + item.name
47 |
48 |
49 | if __name__ == "__main__":
50 | app.deploy("webapp")
51 |
--------------------------------------------------------------------------------
/misc/queue_simple.py:
--------------------------------------------------------------------------------
1 | # ---
2 | # cmd: ["python", "misc/queue_simple.py"]
3 | # runtimes: ["runc", "gvisor"]
4 | # ---
5 | #
6 | # # Using a queue to send/receive data
7 | #
8 | # This is an example of how to use queues to send/receive data.
9 | # We don't do it here, but you could imagine doing this _between_ two functions.
10 |
11 |
12 | import asyncio
13 |
14 | import modal
15 | import modal.queue
16 |
17 |
18 | async def run_async(q: modal.Queue) -> None:
19 | await q.put.aio(42)
20 | r = await q.get.aio()
21 | assert r == 42
22 | await q.put_many.aio([42, 43, 44, 45, 46])
23 | await q.put_many.aio([47, 48, 49, 50, 51])
24 | r = await q.get_many.aio(3)
25 | assert r == [42, 43, 44]
26 | r = await q.get_many.aio(99)
27 | assert r == [45, 46, 47, 48, 49, 50, 51]
28 |
29 |
30 | async def many_consumers(q: modal.Queue) -> None:
31 | print("Creating getters")
32 | tasks = [asyncio.create_task(q.get.aio()) for i in range(20)]
33 | print("Putting values")
34 | await q.put_many.aio(list(range(10)))
35 | await asyncio.sleep(1)
36 | # About 10 tasks should now be done
37 | n_done_tasks = sum(1 for t in tasks if t.done())
38 | assert n_done_tasks == 10
39 | # Finish remaining ones
40 | await q.put_many.aio(list(range(10)))
41 | await asyncio.sleep(1)
42 | assert all(t.done() for t in tasks)
43 |
44 |
45 | async def main():
46 | with modal.Queue.ephemeral() as q:
47 | await run_async(q)
48 | await many_consumers(q)
49 |
50 |
51 | if __name__ == "__main__":
52 | asyncio.run(main())
53 |
--------------------------------------------------------------------------------
/internal/examples_test.py:
--------------------------------------------------------------------------------
1 | import importlib
2 | import json
3 | import pathlib
4 | import sys
5 |
6 | import pytest
7 | from utils import (
8 | EXAMPLES_ROOT,
9 | ExampleType,
10 | get_examples,
11 | get_examples_json,
12 | render_example_md,
13 | )
14 |
15 | examples = [ex for ex in get_examples() if ex.type == ExampleType.MODULE]
16 | examples = [ex for ex in examples if ex.metadata.get("pytest", True)]
17 | example_ids = [ex.module for ex in examples]
18 |
19 |
20 | @pytest.fixture(autouse=True)
21 | def disable_auto_mount(monkeypatch):
22 | monkeypatch.setenv("MODAL_AUTOMOUNT", "0")
23 | yield
24 |
25 |
26 | @pytest.fixture(autouse=False)
27 | def add_root_to_syspath(monkeypatch):
28 | sys.path.append(str(EXAMPLES_ROOT))
29 | yield
30 | sys.path.pop()
31 |
32 |
33 | @pytest.mark.parametrize("example", examples, ids=example_ids)
34 | def test_filename(example):
35 | assert not example.repo_filename.startswith("/")
36 | assert pathlib.Path(example.repo_filename).exists()
37 |
38 |
39 | @pytest.mark.parametrize("example", examples, ids=example_ids)
40 | def test_import(example, add_root_to_syspath):
41 | importlib.import_module(example.module)
42 |
43 |
44 | @pytest.mark.parametrize("example", examples, ids=example_ids)
45 | def test_render(example):
46 | md = render_example_md(example)
47 | assert isinstance(md, str)
48 | assert len(md) > 0
49 |
50 |
51 | def test_json():
52 | data = get_examples_json()
53 | examples = json.loads(data)
54 | assert isinstance(examples, list)
55 | assert len(examples) > 0
56 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/gpu_fallbacks.py:
--------------------------------------------------------------------------------
1 | # # Set "fallback" GPUs
2 | #
3 | # GPU availabilities on Modal can fluctuate, especially for
4 | # tightly-constrained requests, like for eight co-located GPUs
5 | # in a specific region.
6 | #
7 | # If your code can run on multiple different GPUs, you can specify
8 | # your GPU request as a list, in order of preference, and whenever
9 | # your Function scales up, we will try to schedule it on each requested GPU type in order.
10 | #
11 | # The code below demonstrates the usage of the `gpu` parameter with a list of GPUs.
12 |
13 | import subprocess
14 |
15 | import modal
16 |
17 | app = modal.App("example-gpu-fallbacks")
18 |
19 |
20 | @app.function(
21 | gpu=["h100", "a100", "any"], # "any" means any of L4, A10, or T4
22 | max_inputs=1, # new container each input, so we re-roll the GPU dice every time
23 | )
24 | async def remote(_idx):
25 | gpu = subprocess.run(
26 | ["nvidia-smi", "--query-gpu=name", "--format=csv,noheader"],
27 | check=True,
28 | text=True,
29 | stdout=subprocess.PIPE,
30 | ).stdout.strip()
31 | print(gpu)
32 | return gpu
33 |
34 |
35 | @app.local_entrypoint()
36 | def local(count: int = 32):
37 | from collections import Counter
38 |
39 | gpu_counter = Counter(remote.map([i for i in range(count)], order_outputs=False))
40 | print(f"ran {gpu_counter.total()} times")
41 | print(f"on the following {len(gpu_counter.keys())} GPUs:", end="\n")
42 | print(
43 | *[f"{gpu.rjust(32)}: {'🔥' * ct}" for gpu, ct in gpu_counter.items()],
44 | sep="\n",
45 | )
46 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/openai_whisper/pod_transcriber/app/config.py:
--------------------------------------------------------------------------------
1 | import dataclasses
2 | import logging
3 | import pathlib
4 |
5 |
6 | @dataclasses.dataclass
7 | class ModelSpec:
8 | name: str
9 | params: str
10 |
11 |
12 | def get_logger(name, level=logging.INFO):
13 | logger = logging.getLogger(name)
14 | handler = logging.StreamHandler()
15 | handler.setFormatter(
16 | logging.Formatter("%(levelname)s: %(asctime)s: %(name)s %(message)s")
17 | )
18 | logger.addHandler(handler)
19 | logger.setLevel(level)
20 | return logger
21 |
22 |
23 | CACHE_DIR = "/cache"
24 | # Where downloaded podcasts are stored, by guid hash.
25 | # Mostly .mp3 files 50-100MiB.
26 | RAW_AUDIO_DIR = pathlib.Path(CACHE_DIR, "raw_audio")
27 | # Stores metadata of individual podcast episodes as JSON.
28 | PODCAST_METADATA_DIR = pathlib.Path(CACHE_DIR, "podcast_metadata")
29 | # Completed episode transcriptions. Stored as flat files with
30 | # files structured as '{guid_hash}-{model_slug}.json'.
31 | TRANSCRIPTIONS_DIR = pathlib.Path(CACHE_DIR, "transcriptions")
32 | # Searching indexing files, refreshed by scheduled functions.
33 | SEARCH_DIR = pathlib.Path(CACHE_DIR, "search")
34 | # Location of modal checkpoint.
35 | MODEL_DIR = pathlib.Path(CACHE_DIR, "model")
36 | # Location of web frontend assets.
37 | ASSETS_PATH = pathlib.Path(__file__).parent / "frontend" / "dist"
38 |
39 | transcripts_per_podcast_limit = 2
40 |
41 | supported_parakeet_models = {
42 | "parakeet-tdt-0.6b-v2": ModelSpec(
43 | name="nvidia/parakeet-tdt-0.6b-v2", params="600M"
44 | ),
45 | }
46 |
47 | DEFAULT_MODEL = supported_parakeet_models["parakeet-tdt-0.6b-v2"]
48 |
--------------------------------------------------------------------------------
/10_integrations/dbt/sample_proj_duckdb_s3/models/customers.sql:
--------------------------------------------------------------------------------
1 | with customers as (
2 |
3 | select * from {{ ref('stg_customers') }}
4 |
5 | ),
6 |
7 | orders as (
8 |
9 | select * from {{ ref('stg_orders') }}
10 |
11 | ),
12 |
13 | payments as (
14 |
15 | select * from {{ ref('stg_payments') }}
16 |
17 | ),
18 |
19 | customer_orders as (
20 |
21 | select
22 | customer_id,
23 |
24 | min(order_date) as first_order,
25 | max(order_date) as most_recent_order,
26 | count(order_id) as number_of_orders
27 | from orders
28 |
29 | group by customer_id
30 |
31 | ),
32 |
33 | customer_payments as (
34 |
35 | select
36 | orders.customer_id,
37 | sum(amount) as total_amount
38 |
39 | from payments
40 |
41 | left join orders on
42 | payments.order_id = orders.order_id
43 |
44 | group by orders.customer_id
45 |
46 | ),
47 |
48 | final as (
49 |
50 | select
51 | customers.customer_id,
52 | customers.first_name,
53 | customers.last_name,
54 | customer_orders.first_order,
55 | customer_orders.most_recent_order,
56 | customer_orders.number_of_orders,
57 | customer_payments.total_amount as customer_lifetime_value
58 |
59 | from customers
60 |
61 | left join customer_orders
62 | on customers.customer_id = customer_orders.customer_id
63 |
64 | left join customer_payments
65 | on customers.customer_id = customer_payments.customer_id
66 |
67 | )
68 |
69 | {{ config(materialized='external', format='parquet', location='s3://modal-example-dbt-duckdb-s3/out/customers.parquet') }}
70 | select * from final
71 |
--------------------------------------------------------------------------------
/13_sandboxes/codelangchain/src/retrieval.py:
--------------------------------------------------------------------------------
1 | """Just as a constant function is _technically_ a polynomial, so too is injecting the same information every time _technically_ RAG."""
2 |
3 | from .common import COLOR
4 |
5 | docs_url = "https://huggingface.co/docs/transformers/index"
6 |
7 |
8 | def retrieve_docs(url: str = docs_url, debug=False):
9 | from bs4 import BeautifulSoup as Soup
10 | from langchain_community.document_loaders.recursive_url_loader import (
11 | RecursiveUrlLoader,
12 | )
13 |
14 | print(f"{COLOR['HEADER']}📜: Retrieving documents from {url}{COLOR['ENDC']}")
15 | loader = RecursiveUrlLoader(
16 | url=docs_url,
17 | max_depth=2 // (int(debug) + 1), # retrieve fewer docs in debug mode
18 | extractor=lambda x: Soup(x, "html.parser").text,
19 | )
20 | docs = loader.load()
21 |
22 | # sort the list based on the URLs
23 | d_sorted = sorted(docs, key=lambda x: x.metadata["source"], reverse=True)
24 |
25 | # combine them all together
26 | concatenated_content = "\n\n\n --- \n\n\n".join(
27 | [
28 | "## " + doc.metadata["source"] + "\n\n" + doc.page_content.strip()
29 | for doc in d_sorted
30 | ]
31 | )
32 |
33 | print(
34 | f"{COLOR['HEADER']}📜: Retrieved {len(docs)} documents{COLOR['ENDC']}",
35 | f"{COLOR['GREEN']}{concatenated_content[:100].strip()}{COLOR['ENDC']}",
36 | sep="\n",
37 | )
38 |
39 | if debug:
40 | print(
41 | f"{COLOR['HEADER']}📜: Restricting to at most 30,000 characters{COLOR['ENDC']}"
42 | )
43 | concatenated_content = concatenated_content[:30_000]
44 |
45 | return concatenated_content
46 |
--------------------------------------------------------------------------------
/03_scaling_out/basic_grid_search.py:
--------------------------------------------------------------------------------
1 | # # Hyperparameter search
2 | #
3 | # This example showcases a simple grid search in one dimension, where we try different
4 | # parameters for a model and pick the one with the best results on a holdout set.
5 | #
6 | # ## Defining the image
7 | #
8 | # First, let's build a custom image and install scikit-learn in it.
9 |
10 | import modal
11 |
12 | app = modal.App(
13 | "example-basic-grid-search",
14 | image=modal.Image.debian_slim().uv_pip_install("scikit-learn~=1.5.0"),
15 | )
16 |
17 | # ## The Modal function
18 | #
19 | # Next, define the function. Note that we use the custom image with scikit-learn in it.
20 | # We also take the hyperparameter `k`, which is how many nearest neighbors we use.
21 |
22 |
23 | @app.function()
24 | def fit_knn(k):
25 | from sklearn.datasets import load_digits
26 | from sklearn.model_selection import train_test_split
27 | from sklearn.neighbors import KNeighborsClassifier
28 |
29 | X, y = load_digits(return_X_y=True)
30 | X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
31 |
32 | clf = KNeighborsClassifier(k)
33 | clf.fit(X_train, y_train)
34 | score = float(clf.score(X_test, y_test))
35 | print("k = %3d, score = %.4f" % (k, score))
36 | return score, k
37 |
38 |
39 | # ## Parallel search
40 | #
41 | # To do a hyperparameter search, let's map over this function with different values
42 | # for `k`, and then select for the best score on the holdout set:
43 |
44 |
45 | @app.local_entrypoint()
46 | def main():
47 | # Do a basic hyperparameter search
48 | best_score, best_k = max(fit_knn.map(range(1, 100)))
49 | print("Best k = %3d, score = %.4f" % (best_k, best_score))
50 |
--------------------------------------------------------------------------------
/02_building_containers/install_flash_attn.py:
--------------------------------------------------------------------------------
1 | # # Install Flash Attention on Modal
2 |
3 | # FlashAttention is an optimized CUDA library for Transformer
4 | # scaled-dot-product attention. Dao AI Lab now publishes pre-compiled
5 | # wheels, which makes installation quick. This script shows how to
6 | # 1. Pin an exact wheel that matches CUDA 12 / PyTorch 2.6 / Python 3.13.
7 | # 2. Build a Modal image that installs torch, numpy, and FlashAttention.
8 | # 3. Launch a GPU function to confirm the kernel runs on a GPU.
9 |
10 | import modal
11 |
12 | app = modal.App("example-install-flash-attn")
13 |
14 | # You need to specify an exact release wheel. You can find
15 | # [more on their github](https://github.com/Dao-AILab/flash-attention/releases).
16 |
17 | flash_attn_release = (
18 | "https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/"
19 | "flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp313-cp313-linux_x86_64.whl"
20 | )
21 |
22 | image = modal.Image.debian_slim(python_version="3.13").uv_pip_install(
23 | "torch==2.6.0", "numpy==2.2.4", flash_attn_release
24 | )
25 |
26 |
27 | # And here is a demo verifying that it works:
28 |
29 |
30 | @app.function(gpu="L40S", image=image)
31 | def run_flash_attn():
32 | import torch
33 | from flash_attn import flash_attn_func
34 |
35 | batch_size, seqlen, nheads, headdim, nheads_k = 2, 4, 3, 16, 3
36 |
37 | q = torch.randn(batch_size, seqlen, nheads, headdim, dtype=torch.float16).to("cuda")
38 | k = torch.randn(batch_size, seqlen, nheads_k, headdim, dtype=torch.float16).to(
39 | "cuda"
40 | )
41 | v = torch.randn(batch_size, seqlen, nheads_k, headdim, dtype=torch.float16).to(
42 | "cuda"
43 | )
44 |
45 | out = flash_attn_func(q, k, v)
46 | assert out.shape == (batch_size, seqlen, nheads, headdim)
47 |
--------------------------------------------------------------------------------
/07_web_endpoints/flask_streaming.py:
--------------------------------------------------------------------------------
1 | # ---
2 | # cmd: ["modal", "serve", "07_web_endpoints/flask_streaming.py"]
3 | # ---
4 |
5 | # # Deploy Flask app with streaming results with Modal
6 |
7 | # This example shows how you can deploy a [Flask](https://flask.palletsprojects.com/en/3.0.x/) app with Modal that streams results back to the client.
8 |
9 | import modal
10 |
11 | app = modal.App(
12 | "example-flask-streaming",
13 | image=modal.Image.debian_slim().uv_pip_install("flask"),
14 | )
15 |
16 |
17 | @app.function()
18 | def generate_rows():
19 | """
20 | This creates a large CSV file, about 10MB, which will be streaming downloaded
21 | by a web client.
22 | """
23 | for i in range(10_000):
24 | line = ",".join(str((j + i) * i) for j in range(128))
25 | yield f"{line}\n"
26 |
27 |
28 | @app.function()
29 | @modal.wsgi_app()
30 | def flask_app():
31 | from flask import Flask
32 |
33 | web_app = Flask(__name__)
34 |
35 | # These web handlers follow the example from
36 | # https://flask.palletsprojects.com/en/2.2.x/patterns/streaming/
37 |
38 | @web_app.route("/")
39 | def generate_large_csv():
40 | # Run the function locally in the web app's container.
41 | return generate_rows.local(), {"Content-Type": "text/csv"}
42 |
43 | @web_app.route("/remote")
44 | def generate_large_csv_in_container():
45 | # Run the function remotely in a separate container,
46 | # which will stream back results to the web app container,
47 | # which will stream back to the web client.
48 | #
49 | # This is less efficient, but demonstrates how web serving
50 | # containers can be separated from and cooperate with other
51 | # containers.
52 | return generate_rows.remote(), {"Content-Type": "text/csv"}
53 |
54 | return web_app
55 |
--------------------------------------------------------------------------------
/10_integrations/streamlit/app.py:
--------------------------------------------------------------------------------
1 | # ---
2 | # lambda-test: false # auxiliary-file
3 | # ---
4 | # ## Demo Streamlit application.
5 | #
6 | # This application is the example from https://docs.streamlit.io/library/get-started/create-an-app.
7 | #
8 | # Streamlit is designed to run its apps as Python scripts, not functions, so we separate the Streamlit
9 | # code into this module, away from the Modal application code.
10 |
11 |
12 | def main():
13 | import numpy as np
14 | import pandas as pd
15 | import streamlit as st
16 |
17 | st.title("Uber pickups in NYC!")
18 |
19 | DATE_COLUMN = "date/time"
20 | DATA_URL = (
21 | "https://s3-us-west-2.amazonaws.com/"
22 | "streamlit-demo-data/uber-raw-data-sep14.csv.gz"
23 | )
24 |
25 | @st.cache_data
26 | def load_data(nrows):
27 | data = pd.read_csv(DATA_URL, nrows=nrows)
28 |
29 | def lowercase(x):
30 | return str(x).lower()
31 |
32 | data.rename(lowercase, axis="columns", inplace=True)
33 | data[DATE_COLUMN] = pd.to_datetime(data[DATE_COLUMN])
34 | return data
35 |
36 | data_load_state = st.text("Loading data...")
37 | data = load_data(10000)
38 | data_load_state.text("Done! (using st.cache_data)")
39 |
40 | if st.checkbox("Show raw data"):
41 | st.subheader("Raw data")
42 | st.write(data)
43 |
44 | st.subheader("Number of pickups by hour")
45 | hist_values = np.histogram(data[DATE_COLUMN].dt.hour, bins=24, range=(0, 24))[0]
46 | st.bar_chart(hist_values)
47 |
48 | # Some number in the range 0-23
49 | hour_to_filter = st.slider("hour", 0, 23, 17)
50 | filtered_data = data[data[DATE_COLUMN].dt.hour == hour_to_filter]
51 |
52 | st.subheader("Map of all pickups at %s:00" % hour_to_filter)
53 | st.map(filtered_data)
54 |
55 |
56 | if __name__ == "__main__":
57 | main()
58 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/speech-to-text/multitalker-frontend/audio-processor.js:
--------------------------------------------------------------------------------
1 | class AudioProcessor extends AudioWorkletProcessor {
2 | constructor(options) {
3 | super();
4 |
5 | const processorOptions = options.processorOptions || {};
6 | this.sourceSampleRate = processorOptions.sourceSampleRate || 48000;
7 | this.targetSampleRate = processorOptions.targetSampleRate || 16000;
8 | this.downsampleRatio = this.sourceSampleRate / this.targetSampleRate;
9 |
10 | this.bufferSize = 512; // 1 second of audio at 16kHz
11 | this.buffer = new Float32Array(this.bufferSize);
12 | this.bufferIndex = 0;
13 |
14 | this.sampleAccumulator = 0;
15 | this.sampleCounter = 0;
16 | }
17 |
18 | process(inputs, outputs) {
19 | const input = inputs[0];
20 | const channel = input[0];
21 |
22 | if (!channel) return true;
23 |
24 | // simple downsampling by averaging
25 | for (let i = 0; i < channel.length; i++) {
26 | this.sampleAccumulator += channel[i];
27 | this.sampleCounter++;
28 |
29 | if (this.sampleCounter >= this.downsampleRatio) {
30 | const downsampledValue = this.sampleAccumulator / this.sampleCounter;
31 | this.buffer[this.bufferIndex++] = downsampledValue;
32 |
33 | this.sampleAccumulator = 0;
34 | this.sampleCounter = 0;
35 |
36 | if (this.bufferIndex >= this.bufferSize) {
37 | const pcmData = new Int16Array(this.bufferSize);
38 | for (let j = 0; j < this.bufferSize; j++) {
39 | pcmData[j] = Math.max(
40 | -32768,
41 | Math.min(32767, Math.round(this.buffer[j] * 32768))
42 | );
43 | }
44 |
45 | this.port.postMessage(pcmData.buffer);
46 |
47 | this.bufferIndex = 0;
48 | }
49 | }
50 | }
51 |
52 | return true;
53 | }
54 | }
55 |
56 | registerProcessor("audio-processor", AudioProcessor);
57 |
--------------------------------------------------------------------------------
/07_web_endpoints/fasthtml-checkboxes/cbx_locustfile.py:
--------------------------------------------------------------------------------
1 | # ---
2 | # lambda-test: false # auxiliary-file
3 | # pytest: false
4 | # ---
5 | import random
6 |
7 | from bs4 import BeautifulSoup
8 | from constants import N_CHECKBOXES
9 | from locust import HttpUser, between, task
10 |
11 |
12 | class CheckboxesUser(HttpUser):
13 | wait_time = between(0.01, 0.1) # Simulates a wait time between requests
14 |
15 | def load_homepage(self):
16 | """
17 | Simulates a user loading the homepage and fetching the state of the checkboxes.
18 | """
19 | response = self.client.get("/")
20 | soup = BeautifulSoup(response.text, "lxml")
21 | main_div = soup.find("main")
22 | self.id = main_div["hx-get"].split("/")[-1]
23 |
24 | @task(10)
25 | def toggle_random_checkboxes(self):
26 | """
27 | Simulates a user toggling a random checkbox.
28 | """
29 | n_checkboxes = random.binomialvariate( # approximately poisson at 10
30 | n=100,
31 | p=0.1,
32 | )
33 | for _ in range(min(n_checkboxes, 1)):
34 | checkbox_id = int(
35 | N_CHECKBOXES * random.random() ** 2
36 | ) # Choose a random checkbox between 0 and 9999, more likely to be closer to 0
37 | self.client.post(
38 | f"/checkbox/toggle/{checkbox_id}",
39 | name="/checkbox/toggle",
40 | )
41 |
42 | @task(1)
43 | def poll_for_diffs(self):
44 | """
45 | Simulates a user polling for any outstanding diffs.
46 | """
47 | self.client.get(f"/diffs/{self.id}", name="/diffs")
48 |
49 | def on_start(self):
50 | """
51 | Called when a simulated user starts, typically used to initialize or login a user.
52 | """
53 | self.id = str(random.randint(1, 9999))
54 | self.load_homepage()
55 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/speech-to-text/streaming-diarization-frontend/audio-processor.js:
--------------------------------------------------------------------------------
1 | class AudioProcessor extends AudioWorkletProcessor {
2 | constructor(options) {
3 | super();
4 |
5 | const processorOptions = options.processorOptions || {};
6 | this.sourceSampleRate = processorOptions.sourceSampleRate || 48000;
7 | this.targetSampleRate = processorOptions.targetSampleRate || 16000;
8 | this.downsampleRatio = this.sourceSampleRate / this.targetSampleRate;
9 |
10 | this.bufferSize = 512; // 1 second of audio at 16kHz
11 | this.buffer = new Float32Array(this.bufferSize);
12 | this.bufferIndex = 0;
13 |
14 | this.sampleAccumulator = 0;
15 | this.sampleCounter = 0;
16 | }
17 |
18 | process(inputs, outputs) {
19 | const input = inputs[0];
20 | const channel = input[0];
21 |
22 | if (!channel) return true;
23 |
24 | // simple downsampling by averaging
25 | for (let i = 0; i < channel.length; i++) {
26 | this.sampleAccumulator += channel[i];
27 | this.sampleCounter++;
28 |
29 | if (this.sampleCounter >= this.downsampleRatio) {
30 | const downsampledValue = this.sampleAccumulator / this.sampleCounter;
31 | this.buffer[this.bufferIndex++] = downsampledValue;
32 |
33 | this.sampleAccumulator = 0;
34 | this.sampleCounter = 0;
35 |
36 | if (this.bufferIndex >= this.bufferSize) {
37 | const pcmData = new Int16Array(this.bufferSize);
38 | for (let j = 0; j < this.bufferSize; j++) {
39 | pcmData[j] = Math.max(
40 | -32768,
41 | Math.min(32767, Math.round(this.buffer[j] * 32768))
42 | );
43 | }
44 |
45 | this.port.postMessage(pcmData.buffer);
46 |
47 | this.bufferIndex = 0;
48 | }
49 | }
50 | }
51 |
52 | return true;
53 | }
54 | }
55 |
56 | registerProcessor("audio-processor", AudioProcessor);
57 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/speech-to-text/streaming-parakeet-frontend/audio-processor.js:
--------------------------------------------------------------------------------
1 | class AudioProcessor extends AudioWorkletProcessor {
2 | constructor(options) {
3 | super();
4 |
5 | const processorOptions = options.processorOptions || {};
6 | this.sourceSampleRate = processorOptions.sourceSampleRate || 48000;
7 | this.targetSampleRate = processorOptions.targetSampleRate || 16000;
8 | this.downsampleRatio = this.sourceSampleRate / this.targetSampleRate;
9 |
10 | this.bufferSize = 16000; // 1 second of audio at 16kHz
11 | this.buffer = new Float32Array(this.bufferSize);
12 | this.bufferIndex = 0;
13 |
14 | this.sampleAccumulator = 0;
15 | this.sampleCounter = 0;
16 | }
17 |
18 | process(inputs, outputs) {
19 | const input = inputs[0];
20 | const channel = input[0];
21 |
22 | if (!channel) return true;
23 |
24 | // simple downsampling by averaging
25 | for (let i = 0; i < channel.length; i++) {
26 | this.sampleAccumulator += channel[i];
27 | this.sampleCounter++;
28 |
29 | if (this.sampleCounter >= this.downsampleRatio) {
30 | const downsampledValue = this.sampleAccumulator / this.sampleCounter;
31 | this.buffer[this.bufferIndex++] = downsampledValue;
32 |
33 | this.sampleAccumulator = 0;
34 | this.sampleCounter = 0;
35 |
36 | if (this.bufferIndex >= this.bufferSize) {
37 | const pcmData = new Int16Array(this.bufferSize);
38 | for (let j = 0; j < this.bufferSize; j++) {
39 | pcmData[j] = Math.max(
40 | -32768,
41 | Math.min(32767, Math.round(this.buffer[j] * 32768))
42 | );
43 | }
44 |
45 | this.port.postMessage(pcmData.buffer);
46 |
47 | this.bufferIndex = 0;
48 | }
49 | }
50 | }
51 |
52 | return true;
53 | }
54 | }
55 |
56 | registerProcessor("audio-processor", AudioProcessor);
57 |
--------------------------------------------------------------------------------
/10_integrations/dbt/sample_proj_duckdb_s3/seeds/raw_customers.csv:
--------------------------------------------------------------------------------
1 | id,first_name,last_name
2 | 1,Michael,P.
3 | 2,Shawn,M.
4 | 3,Kathleen,P.
5 | 4,Jimmy,C.
6 | 5,Katherine,R.
7 | 6,Sarah,R.
8 | 7,Martin,M.
9 | 8,Frank,R.
10 | 9,Jennifer,F.
11 | 10,Henry,W.
12 | 11,Fred,S.
13 | 12,Amy,D.
14 | 13,Kathleen,M.
15 | 14,Steve,F.
16 | 15,Teresa,H.
17 | 16,Amanda,H.
18 | 17,Kimberly,R.
19 | 18,Johnny,K.
20 | 19,Virginia,F.
21 | 20,Anna,A.
22 | 21,Willie,H.
23 | 22,Sean,H.
24 | 23,Mildred,A.
25 | 24,David,G.
26 | 25,Victor,H.
27 | 26,Aaron,R.
28 | 27,Benjamin,B.
29 | 28,Lisa,W.
30 | 29,Benjamin,K.
31 | 30,Christina,W.
32 | 31,Jane,G.
33 | 32,Thomas,O.
34 | 33,Katherine,M.
35 | 34,Jennifer,S.
36 | 35,Sara,T.
37 | 36,Harold,O.
38 | 37,Shirley,J.
39 | 38,Dennis,J.
40 | 39,Louise,W.
41 | 40,Maria,A.
42 | 41,Gloria,C.
43 | 42,Diana,S.
44 | 43,Kelly,N.
45 | 44,Jane,R.
46 | 45,Scott,B.
47 | 46,Norma,C.
48 | 47,Marie,P.
49 | 48,Lillian,C.
50 | 49,Judy,N.
51 | 50,Billy,L.
52 | 51,Howard,R.
53 | 52,Laura,F.
54 | 53,Anne,B.
55 | 54,Rose,M.
56 | 55,Nicholas,R.
57 | 56,Joshua,K.
58 | 57,Paul,W.
59 | 58,Kathryn,K.
60 | 59,Adam,A.
61 | 60,Norma,W.
62 | 61,Timothy,R.
63 | 62,Elizabeth,P.
64 | 63,Edward,G.
65 | 64,David,C.
66 | 65,Brenda,W.
67 | 66,Adam,W.
68 | 67,Michael,H.
69 | 68,Jesse,E.
70 | 69,Janet,P.
71 | 70,Helen,F.
72 | 71,Gerald,C.
73 | 72,Kathryn,O.
74 | 73,Alan,B.
75 | 74,Harry,A.
76 | 75,Andrea,H.
77 | 76,Barbara,W.
78 | 77,Anne,W.
79 | 78,Harry,H.
80 | 79,Jack,R.
81 | 80,Phillip,H.
82 | 81,Shirley,H.
83 | 82,Arthur,D.
84 | 83,Virginia,R.
85 | 84,Christina,R.
86 | 85,Theresa,M.
87 | 86,Jason,C.
88 | 87,Phillip,B.
89 | 88,Adam,T.
90 | 89,Margaret,J.
91 | 90,Paul,P.
92 | 91,Todd,W.
93 | 92,Willie,O.
94 | 93,Frances,R.
95 | 94,Gregory,H.
96 | 95,Lisa,P.
97 | 96,Jacqueline,A.
98 | 97,Shirley,D.
99 | 98,Nicole,M.
100 | 99,Mary,G.
101 | 100,Jean,M.
102 |
--------------------------------------------------------------------------------
/02_building_containers/import_sklearn.py:
--------------------------------------------------------------------------------
1 | # # Install scikit-learn in a custom image
2 | #
3 | # This builds a custom image which installs the sklearn (scikit-learn) Python package in it.
4 | # It's an example of how you can use packages, even if you don't have them installed locally.
5 | #
6 | # First, the imports
7 |
8 | import time
9 |
10 | import modal
11 |
12 | # Next, define an app, with a custom image that installs `sklearn`.
13 |
14 | app = modal.App(
15 | "example-import-sklearn",
16 | image=modal.Image.debian_slim()
17 | .apt_install("libgomp1")
18 | .uv_pip_install("scikit-learn"),
19 | )
20 |
21 | # The `app.image.imports()` lets us conditionally import in the global scope.
22 | # This is needed because we might not have sklearn and numpy installed locally,
23 | # but we know they are installed inside the custom image.
24 |
25 | with app.image.imports():
26 | import numpy as np
27 | from sklearn import datasets, linear_model
28 |
29 | # Now, let's define a function that uses one of scikit-learn's built-in datasets
30 | # and fits a very simple model (linear regression) to it
31 |
32 |
33 | @app.function()
34 | def fit():
35 | print("Inside run!")
36 | t0 = time.time()
37 | diabetes_X, diabetes_y = datasets.load_diabetes(return_X_y=True)
38 | diabetes_X = diabetes_X[:, np.newaxis, 2]
39 | regr = linear_model.LinearRegression()
40 | regr.fit(diabetes_X, diabetes_y)
41 | return time.time() - t0
42 |
43 |
44 | # Finally, let's trigger the run locally. We also time this. Note that the first time we run this,
45 | # it will build the image. This might take 1-2 min. When we run this subsequent times, the image
46 | # is already build, and it will run much much faster.
47 |
48 |
49 | if __name__ == "__main__":
50 | t0 = time.time()
51 | with app.run():
52 | t = fit.remote()
53 | print("Function time spent:", t)
54 | print("Full time spent:", time.time() - t0)
55 |
--------------------------------------------------------------------------------
/13_sandboxes/sandbox_agent.py:
--------------------------------------------------------------------------------
1 | # ---
2 | # cmd: ["python", "13_sandboxes/sandbox_agent.py"]
3 | # pytest: false
4 | # ---
5 |
6 | # # Run Claude Code in a Modal Sandbox
7 |
8 | # This example demonstrates how to run Claude Code in a Modal
9 | # [Sandbox](https://modal.com/docs/guide/sandbox) to analyze a GitHub repository.
10 | # The Sandbox provides an isolated environment where the agent can safely execute code
11 | # and examine files.
12 |
13 | import modal
14 |
15 | app = modal.App.lookup("example-sandbox-agent", create_if_missing=True)
16 |
17 | # First we create a custom Image that has Claude Code installed.
18 | image = (
19 | modal.Image.debian_slim(python_version="3.12")
20 | .apt_install("curl", "git")
21 | .env({"PATH": "/root/.local/bin:$PATH"}) # add claude to path
22 | .run_commands(
23 | "curl -fsSL https://claude.ai/install.sh | bash",
24 | )
25 | )
26 |
27 | with modal.enable_output():
28 | sandbox = modal.Sandbox.create(app=app, image=image)
29 | print(f"Sandbox ID: {sandbox.object_id}")
30 |
31 | # Next we'll clone a repository that Claude Code will work on.
32 | repo_url = "https://github.com/modal-labs/modal-examples"
33 | git_ps = sandbox.exec("git", "clone", "--depth", "1", repo_url, "/repo")
34 | git_ps.wait()
35 | print(f"Cloned '{repo_url}' into /repo.")
36 |
37 | # Finally we'll run Claude Code to analyze the repository.
38 | claude_cmd = ["claude", "-p", "What is in this repository?"]
39 |
40 | print("\nRunning command:", claude_cmd)
41 |
42 | claude_ps = sandbox.exec(
43 | *claude_cmd,
44 | pty=True, # Adding a PTY is important, since Claude requires it
45 | secrets=[
46 | modal.Secret.from_name("anthropic-secret", required_keys=["ANTHROPIC_API_KEY"])
47 | ],
48 | workdir="/repo",
49 | )
50 | claude_ps.wait()
51 |
52 | print("\nAgent stdout:\n")
53 | print(claude_ps.stdout.read())
54 |
55 | stderr = claude_ps.stderr.read()
56 | if stderr != "":
57 | print("Agent stderr:", stderr)
58 |
--------------------------------------------------------------------------------
/02_building_containers/install_cuda.py:
--------------------------------------------------------------------------------
1 | # # Installing the CUDA Toolkit on Modal
2 |
3 | # This code sample is intended to quickly show how different layers of the CUDA stack are used on Modal.
4 | # For greater detail, see our [guide to using CUDA on Modal](https://modal.com/docs/guide/cuda).
5 |
6 | # All Modal Functions with GPUs already have the NVIDIA CUDA drivers,
7 | # NVIDIA System Management Interface, and CUDA Driver API installed.
8 |
9 | import modal
10 |
11 | app = modal.App("example-install-cuda")
12 |
13 |
14 | @app.function(gpu="T4")
15 | def nvidia_smi():
16 | import subprocess
17 |
18 | subprocess.run(["nvidia-smi"], check=True)
19 |
20 |
21 | # This is enough to install and use many CUDA-dependent libraries, like PyTorch.
22 |
23 |
24 | @app.function(gpu="T4", image=modal.Image.debian_slim().uv_pip_install("torch"))
25 | def torch_cuda():
26 | import torch
27 |
28 | print(torch.cuda.get_device_properties("cuda:0"))
29 |
30 |
31 | # If your application or its dependencies need components of the CUDA toolkit,
32 | # like the `nvcc` compiler driver, installed as system libraries or command-line tools,
33 | # you'll need to install those manually.
34 |
35 | # We recommend the official NVIDIA CUDA Docker images from Docker Hub.
36 | # You'll need to add Python 3 and pip with the `add_python` option because the image
37 | # doesn't have these by default.
38 |
39 |
40 | ctk_image = modal.Image.from_registry(
41 | "nvidia/cuda:12.4.0-devel-ubuntu22.04", add_python="3.11"
42 | ).entrypoint([]) # removes chatty prints on entry
43 |
44 |
45 | @app.function(gpu="T4", image=ctk_image)
46 | def nvcc_version():
47 | import subprocess
48 |
49 | return subprocess.run(["nvcc", "--version"], check=True)
50 |
51 |
52 | # You can check that all these functions run by invoking this script with `modal run`.
53 |
54 |
55 | @app.local_entrypoint()
56 | def main():
57 | nvidia_smi.remote()
58 | torch_cuda.remote()
59 | nvcc_version.remote()
60 |
--------------------------------------------------------------------------------
/08_advanced/poll_delayed_result.py:
--------------------------------------------------------------------------------
1 | # ---
2 | # cmd: ["modal", "serve", "08_advanced/poll_delayed_result.py"]
3 | # ---
4 |
5 | # # Polling for a delayed result on Modal
6 |
7 | # This example shows how you can poll for a delayed result on Modal.
8 |
9 | # The function `factor_number` takes a number as input and returns the prime factors of the number. The function could take a long time to run, so we don't want to wait for the result in the web server.
10 | # Instead, we return a URL that the client can poll to get the result.
11 |
12 | import fastapi
13 | import modal
14 | from modal.functions import FunctionCall
15 | from starlette.responses import HTMLResponse, RedirectResponse
16 |
17 | app = modal.App("example-poll-delayed-result")
18 |
19 | web_app = fastapi.FastAPI()
20 |
21 |
22 | @app.function(image=modal.Image.debian_slim().uv_pip_install("primefac"))
23 | def factor_number(number):
24 | import primefac
25 |
26 | return list(primefac.primefac(number)) # could take a long time
27 |
28 |
29 | @web_app.get("/")
30 | async def index():
31 | return HTMLResponse(
32 | """
33 |
37 | """
38 | )
39 |
40 |
41 | @web_app.get("/factors")
42 | async def web_submit(request: fastapi.Request, number: int):
43 | call = factor_number.spawn(
44 | number
45 | ) # returns a FunctionCall without waiting for result
46 | polling_url = request.url.replace(
47 | path="/result", query=f"function_id={call.object_id}"
48 | )
49 | return RedirectResponse(polling_url)
50 |
51 |
52 | @web_app.get("/result")
53 | async def web_poll(function_id: str):
54 | function_call = FunctionCall.from_id(function_id)
55 | try:
56 | result = function_call.get(timeout=0)
57 | except TimeoutError:
58 | result = "not ready"
59 |
60 | return result
61 |
62 |
63 | @app.function()
64 | @modal.asgi_app()
65 | def fastapi_app():
66 | return web_app
67 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/comfyui/memory_snapshot/memory_snapshot_helper/prestartup_script.py:
--------------------------------------------------------------------------------
1 | import os
2 | import shutil
3 | from pathlib import Path
4 |
5 | comfy_dir = Path(__file__).parent.parent.parent / "comfy"
6 |
7 | model_management_path = str(comfy_dir / "model_management.py")
8 | original_model_management_path = str(comfy_dir / "model_management_original.py")
9 | is_patched = os.path.exists(original_model_management_path)
10 |
11 |
12 | def _apply_cuda_safe_patch():
13 | """Apply a permanent patch that avoid torch cuda init during snapshots"""
14 |
15 | shutil.copy(model_management_path, original_model_management_path)
16 | print(
17 | "[memory_snapshot_helper] ==> Applying CUDA-safe patch for model_management.py"
18 | )
19 |
20 | with open(model_management_path, "r") as f:
21 | content = f.read()
22 |
23 | # Find the get_torch_device function and modify the CUDA device access
24 | # The original line uses: return torch.device(torch.cuda.current_device())
25 | # We'll replace it with a check if CUDA is available
26 |
27 | # Define the patched content as a constant
28 | CUDA_SAFE_PATCH = """import os
29 | if torch.cuda.is_available():
30 | return torch.device(torch.cuda.current_device())
31 | else:
32 | logging.info("[memory_snapshot_helper] CUDA is not available, defaulting to cpu")
33 | return torch.device('cpu') # Safe fallback during snapshot"""
34 |
35 | if "return torch.device(torch.cuda.current_device())" in content:
36 | patched_content = content.replace(
37 | "return torch.device(torch.cuda.current_device())", CUDA_SAFE_PATCH
38 | )
39 |
40 | # Save the patched version
41 | with open(model_management_path, "w") as f:
42 | f.write(patched_content)
43 |
44 | print("[memory_snapshot_helper] ==> Successfully patched model_management.py")
45 | else:
46 | raise Exception(
47 | "[memory_snapshot_helper] ==> Failed to patch model_management.py"
48 | )
49 |
50 |
51 | if not is_patched:
52 | _apply_cuda_safe_patch()
53 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/openai_whisper/pod_transcriber/README.md:
--------------------------------------------------------------------------------
1 | # Modal Podcast Transcriber
2 |
3 | This is a complete application that uses [NVIDIA Parakeet ASR](https://docs.nvidia.com/nemo-framework/user-guide/latest/nemotoolkit/asr/models.html#parakeet) to transcribe podcasts. Modal spins up multiple containers for a single transcription run, so hours of audio can be transcribed on-demand in a few minutes.
4 |
5 | You can find our deployment of the app [here](https://modal-labs-examples--parakeet-pod-transcriber-fastapi-app.modal.run/).
6 |
7 | ## Architecture
8 |
9 | The entire application is hosted serverlessly on Modal and consists of 3 components:
10 |
11 | 1. React + Vite SPA ([`app/frontend/`](./app/frontend/))
12 | 2. FastAPI server ([`app/api.py`](./app/api.py))
13 | 3. Modal async job queue ([`app/main.py`](./app/main.py))
14 |
15 | ## Developing locally
16 |
17 | ### Requirements
18 |
19 | - `npm`
20 | - `modal` installed in your current Python virtual environment
21 |
22 | ### Podchaser Secret
23 |
24 | To run this on your own Modal account, you'll need to [create a Podchaser account and create an API key](https://api-docs.podchaser.com/docs/guides/guide-first-podchaser-query/#getting-your-access-token).
25 |
26 | Then, create a [Modal Secret](https://modal.com/secrets/) with the following keys:
27 |
28 | - `PODCHASER_CLIENT_SECRET`
29 | - `PODCHASER_CLIENT_ID`
30 |
31 | You can find both on [their API page](https://www.podchaser.com/profile/settings/api).
32 |
33 | ### Vite build
34 |
35 | `cd` into the `app/frontend` directory, and run:
36 |
37 | - `npm install`
38 | - `npx vite build --watch`
39 |
40 | The last command will start a watcher process that will rebuild your static frontend files whenever you make changes to the frontend code.
41 |
42 | ### Serve on Modal
43 |
44 | Once you have `vite build` running, in a separate shell run this to start an ephemeral app on Modal:
45 |
46 | ```shell
47 | modal serve -m app.main
48 | ```
49 |
50 | Pressing `Ctrl+C` will stop your app.
51 |
52 | ### Deploy to Modal
53 |
54 | Once your happy with your changes, run `modal deploy -m app.main` to deploy your app to Modal.
55 |
--------------------------------------------------------------------------------
/.github/workflows/run-examples.yml:
--------------------------------------------------------------------------------
1 | name: Run changed examples
2 |
3 | on:
4 | pull_request:
5 | branches:
6 | - main
7 | paths:
8 | - "**.py"
9 | push:
10 | branches:
11 | - main
12 | paths:
13 | - "**.py"
14 | workflow_dispatch:
15 |
16 | # Cancel previous runs of the same PR but do not cancel previous runs on main
17 | concurrency:
18 | group: ${{ github.workflow }}-${{ github.ref }}
19 | cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
20 |
21 | env:
22 | TERM: linux
23 | TERMINFO: /etc/terminfo
24 | MODAL_ENVIRONMENT: examples
25 |
26 | jobs:
27 | # Output all changed files in a JSON format compatible with GitHub Actions job matrices
28 | diff-matrix:
29 | name: Generate matrix of changed examples
30 | runs-on: ubuntu-24.04
31 | outputs:
32 | matrix: ${{ steps.diff.outputs.all_changed_files }}
33 |
34 | steps:
35 | - uses: actions/checkout@v3
36 | with:
37 | fetch-depth: 0
38 |
39 | - name: Find changed examples
40 | id: diff
41 | run: python3 -m internal.generate_diff_matrix
42 |
43 | # Run each changed example, using the output of the previous step as a job matrix
44 | run-changed:
45 | name: Run changed example
46 | needs: [diff-matrix]
47 | if: ${{ needs.diff-matrix.outputs.matrix != '[]' &&
48 | needs.diff-matrix.outputs.matrix != '' }}
49 | runs-on: ubuntu-24.04
50 | strategy:
51 | matrix:
52 | file: ${{ fromJson(needs.diff-matrix.outputs.matrix) }}
53 | fail-fast: false
54 |
55 | steps:
56 | - name: Checkout Repository
57 | uses: actions/checkout@v3
58 | with:
59 | fetch-depth: 1
60 | - uses: ./.github/actions/setup
61 |
62 | - name: Run example
63 | run: |
64 | echo "Running ${{ matrix.file }}"
65 | stem=$(basename "${{ matrix.file }}" .py)
66 | python3 -m internal.run_example $stem || exit $?
67 | env:
68 | MODAL_TOKEN_ID: ${{ secrets.MODAL_MODAL_LABS_TOKEN_ID }}
69 | MODAL_TOKEN_SECRET: ${{ secrets.MODAL_MODAL_LABS_TOKEN_SECRET }}
70 |
--------------------------------------------------------------------------------
/01_getting_started/inference_perf.py:
--------------------------------------------------------------------------------
1 | # ---
2 | # cmd: ["python", "01_getting_started/inference_perf.py"]
3 | # deploy: true
4 | # mypy: ignore-errors
5 | # ---
6 | from pathlib import Path
7 |
8 | import modal
9 |
10 | app = modal.App("example-inference-perf")
11 | image = (
12 | modal.Image.debian_slim()
13 | .uv_pip_install("transformers[torch]")
14 | .uv_pip_install("fastapi")
15 | )
16 |
17 | with image.imports():
18 | from transformers import pipeline
19 |
20 | weights_cache = {
21 | "/root/.cache/huggingface": modal.Volume.from_name(
22 | "example-inference", create_if_missing=True
23 | )
24 | }
25 |
26 |
27 | @app.cls(gpu="h100", image=image, volumes=weights_cache, enable_memory_snapshot=True)
28 | class Chat:
29 | @modal.enter()
30 | def init(self):
31 | self.chatbot = pipeline(
32 | model="Qwen/Qwen3-1.7B-FP8", device_map="cuda", max_new_tokens=1024
33 | )
34 |
35 | @modal.fastapi_endpoint(docs=True)
36 | def web(self, prompt: str | None = None) -> list[dict]:
37 | result = self.run.local(prompt)
38 | return result
39 |
40 | @modal.method()
41 | def run(self, prompt: str | None = None) -> list[dict]:
42 | if prompt is None:
43 | prompt = f"/no_think Read this code.\n\n{Path(__file__).read_text()}\nIn one paragraph, what does the code do?"
44 |
45 | print(prompt)
46 | context = [{"role": "user", "content": prompt}]
47 |
48 | result = self.chatbot(context)
49 | print(result[0]["generated_text"][-1]["content"])
50 |
51 | return result
52 |
53 |
54 | if __name__ == "__main__":
55 | import json
56 | import urllib.request
57 | from datetime import datetime
58 |
59 | ChatCls = modal.Cls.from_name(app.name, "Chat")
60 | chat = ChatCls()
61 | print(datetime.now(), "making .remote call to Chat.run")
62 | print(chat.run.remote())
63 | print(datetime.now(), "making web request to", url := chat.web.get_web_url())
64 |
65 | with urllib.request.urlopen(url) as response:
66 | print(datetime.now())
67 | print(json.loads(response.read().decode("utf-8")))
68 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/openai_whisper/finetuning/train/transcribe.py:
--------------------------------------------------------------------------------
1 | import os
2 | from typing import TYPE_CHECKING
3 |
4 | from .logs import get_logger
5 |
6 | if TYPE_CHECKING:
7 | from numpy import ndarray
8 |
9 | logger = get_logger(__name__)
10 |
11 |
12 | def whisper_transcribe_local_file(
13 | model_dir: os.PathLike,
14 | language: str,
15 | filepath: os.PathLike,
16 | sample_rate_hz: int,
17 | ) -> str:
18 | """Convenience function for transcribing a single local audio file with a Whisper model already saved to disk."""
19 | from datasets import Audio, Dataset
20 |
21 | audio_dataset = Dataset.from_dict({"audio": [str(filepath)]}).cast_column(
22 | "audio", Audio(sampling_rate=sample_rate_hz)
23 | )
24 | row = next(iter(audio_dataset))
25 | return whisper_transcribe_audio(
26 | model_dir,
27 | language,
28 | data=row["audio"]["array"],
29 | sample_rate_hz=row["audio"]["sampling_rate"],
30 | )
31 |
32 |
33 | def whisper_transcribe_audio(
34 | model_dir: os.PathLike,
35 | language: str,
36 | data: "ndarray",
37 | sample_rate_hz: int,
38 | ) -> str:
39 | """Transcribes a single audio sample with a Whisper model, for demonstration purposes."""
40 | from transformers import (
41 | WhisperForConditionalGeneration,
42 | WhisperProcessor,
43 | )
44 |
45 | # load model and processor
46 | processor = WhisperProcessor.from_pretrained(model_dir)
47 | model = WhisperForConditionalGeneration.from_pretrained(model_dir)
48 | forced_decoder_ids = processor.get_decoder_prompt_ids(
49 | language=language, task="transcribe"
50 | )
51 | input_features = processor(
52 | data,
53 | sampling_rate=sample_rate_hz,
54 | return_tensors="pt",
55 | ).input_features
56 |
57 | # generate token ids
58 | predicted_ids = model.generate(
59 | input_features, forced_decoder_ids=forced_decoder_ids
60 | )
61 | # decode token ids to text
62 | predicted_transcription = processor.batch_decode(
63 | predicted_ids, skip_special_tokens=True
64 | )[0]
65 | return predicted_transcription
66 |
--------------------------------------------------------------------------------
/09_job_queues/doc_ocr_frontend/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 | Receipt Parser
8 |
9 |
10 |
11 |
12 |
13 |
17 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
44 |
45 |
46 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
--------------------------------------------------------------------------------
/07_web_endpoints/webrtc/frontend/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | WebRTC YOLO Demo
5 |
48 |
49 |
50 |
51 |
54 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/dreambooth/assets/background.svg:
--------------------------------------------------------------------------------
1 |
24 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/hyperparameter-sweep/assets/background.svg:
--------------------------------------------------------------------------------
1 |
24 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/protein-folding/frontend/background.svg:
--------------------------------------------------------------------------------
1 |
24 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/comfyui/ip_adapter/ip_adapter_example.py:
--------------------------------------------------------------------------------
1 | # ---
2 | # cmd: ["modal", "serve", "06_gpu_and_ml/comfyui/ip_adapter/ip_adapter_example.py"]
3 | # ---
4 |
5 | import subprocess
6 |
7 | import modal
8 |
9 | image = ( # build up a Modal Image to run ComfyUI, step by step
10 | modal.Image.debian_slim( # start from basic Linux with Python
11 | python_version="3.11"
12 | )
13 | .apt_install("git") # install git to clone ComfyUI
14 | .uv_pip_install("comfy-cli==1.2.7") # install comfy-cli
15 | .run_commands( # use comfy-cli to install the ComfyUI repo and its dependencies
16 | "comfy --skip-prompt install --nvidia"
17 | )
18 | .run_commands( # download the WAS Node Suite custom node pack
19 | "comfy node install comfyui_ipadapter_plus"
20 | )
21 | .run_commands("apt install -y wget")
22 | .run_commands( # the Unified Model Loader node requires these two models to be named a specific way, so we use wget instead of the usual comfy model download command
23 | "wget -q -O /root/comfy/ComfyUI/models/clip_vision/CLIP-ViT-H-14-laion2B-s32B-b79K.safetensors https://huggingface.co/h94/IP-Adapter/resolve/main/models/image_encoder/model.safetensors",
24 | )
25 | .run_commands(
26 | "wget -q -O /root/comfy/ComfyUI/models/clip_vision/CLIP-ViT-bigG-14-laion2B-39B-b160k.safetensors, https://huggingface.co/h94/IP-Adapter/resolve/main/sdxl_models/image_encoder/model.safetensors",
27 | )
28 | .run_commands( # download the IP-Adapter model
29 | "comfy --skip-prompt model download --url https://huggingface.co/h94/IP-Adapter/resolve/main/models/ip-adapter_sd15.safetensors --relative-path models/ipadapter"
30 | )
31 | .run_commands(
32 | "comfy --skip-prompt model download --url https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5/resolve/main/v1-5-pruned.safetensors --relative-path models/checkpoints",
33 | )
34 | )
35 |
36 | app = modal.App(name="example-ip-adapter", image=image)
37 |
38 |
39 | # Run ComfyUI as an interactive web server
40 | @app.function(
41 | max_containers=1,
42 | scaledown_window=30,
43 | timeout=1800,
44 | gpu="A10G",
45 | )
46 | @modal.concurrent(max_inputs=10)
47 | @modal.web_server(8000, startup_timeout=60)
48 | def ui():
49 | subprocess.Popen("comfy launch -- --listen 0.0.0.0 --port 8000", shell=True)
50 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/comfyui/workflow_api.json:
--------------------------------------------------------------------------------
1 | {
2 | "6": {
3 | "inputs": {
4 | "text": "Surreal dreamscape with floating islands, upside-down waterfalls, and impossible geometric structures, all bathed in a soft, ethereal light",
5 | "clip": ["30", 1]
6 | },
7 | "class_type": "CLIPTextEncode",
8 | "_meta": {
9 | "title": "CLIP Text Encode (Positive Prompt)"
10 | }
11 | },
12 | "8": {
13 | "inputs": {
14 | "samples": ["31", 0],
15 | "vae": ["30", 2]
16 | },
17 | "class_type": "VAEDecode",
18 | "_meta": {
19 | "title": "VAE Decode"
20 | }
21 | },
22 | "9": {
23 | "inputs": {
24 | "filename_prefix": "ComfyUI",
25 | "images": ["37", 0]
26 | },
27 | "class_type": "SaveImage",
28 | "_meta": {
29 | "title": "Save Image"
30 | }
31 | },
32 | "27": {
33 | "inputs": {
34 | "width": 1024,
35 | "height": 1024,
36 | "batch_size": 1
37 | },
38 | "class_type": "EmptySD3LatentImage",
39 | "_meta": {
40 | "title": "EmptySD3LatentImage"
41 | }
42 | },
43 | "30": {
44 | "inputs": {
45 | "ckpt_name": "flux1-schnell-fp8.safetensors"
46 | },
47 | "class_type": "CheckpointLoaderSimple",
48 | "_meta": {
49 | "title": "Load Checkpoint"
50 | }
51 | },
52 | "31": {
53 | "inputs": {
54 | "seed": 74618958969040,
55 | "steps": 4,
56 | "cfg": 1,
57 | "sampler_name": "euler",
58 | "scheduler": "simple",
59 | "denoise": 1,
60 | "model": ["30", 0],
61 | "positive": ["6", 0],
62 | "negative": ["33", 0],
63 | "latent_image": ["27", 0]
64 | },
65 | "class_type": "KSampler",
66 | "_meta": {
67 | "title": "KSampler"
68 | }
69 | },
70 | "33": {
71 | "inputs": {
72 | "text": "",
73 | "clip": ["30", 1]
74 | },
75 | "class_type": "CLIPTextEncode",
76 | "_meta": {
77 | "title": "CLIP Text Encode (Negative Prompt)"
78 | }
79 | },
80 | "37": {
81 | "inputs": {
82 | "mode": "rescale",
83 | "supersample": "true",
84 | "resampling": "lanczos",
85 | "rescale_factor": 2,
86 | "resize_width": 1024,
87 | "resize_height": 1536,
88 | "image": ["8", 0]
89 | },
90 | "class_type": "Image Resize",
91 | "_meta": {
92 | "title": "Image Resize"
93 | }
94 | }
95 | }
96 |
--------------------------------------------------------------------------------
/internal/run_example.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import os
3 | import random
4 | import subprocess
5 | import sys
6 | import time
7 |
8 | from . import utils
9 |
10 | MINUTES = 60
11 | DEFAULT_TIMEOUT = 12 * MINUTES
12 |
13 |
14 | def run_script(example, timeout=DEFAULT_TIMEOUT):
15 | t0 = time.time()
16 |
17 | print(f"Running example {example.stem} with timeout {timeout}s")
18 |
19 | try:
20 | print(f"cli args: {example.cli_args}")
21 | if "runc" in example.runtimes:
22 | example.env |= {"MODAL_FUNCTION_RUNTIME": "runc"}
23 | process = subprocess.run(
24 | [str(x) for x in example.cli_args],
25 | env=os.environ | example.env | {"MODAL_SERVE_TIMEOUT": "5.0"},
26 | timeout=timeout,
27 | )
28 | total_time = time.time() - t0
29 | if process.returncode == 0:
30 | print(f"Success after {total_time:.2f}s :)")
31 | else:
32 | print(
33 | f"Failed after {total_time:.2f}s with return code {process.returncode} :("
34 | )
35 |
36 | returncode = process.returncode
37 |
38 | except subprocess.TimeoutExpired:
39 | print(f"Past timeout of {timeout}s :(")
40 | returncode = 999
41 |
42 | return returncode
43 |
44 |
45 | def run_single_example(stem, timeout=DEFAULT_TIMEOUT):
46 | examples = utils.get_examples()
47 | for example in examples:
48 | if stem == example.stem and example.metadata.get("lambda-test", True):
49 | return run_script(example, timeout=timeout)
50 | else:
51 | print(f"Could not find example name {stem}")
52 | return 0
53 |
54 |
55 | def run_random_example(timeout=DEFAULT_TIMEOUT):
56 | examples = filter(
57 | lambda ex: ex.metadata and ex.metadata.get("lambda-test", True),
58 | utils.get_examples(),
59 | )
60 | return run_script(random.choice(list(examples)), timeout=timeout)
61 |
62 |
63 | if __name__ == "__main__":
64 | parser = argparse.ArgumentParser()
65 | parser.add_argument("example", nargs="?", default=None)
66 | parser.add_argument("--timeout", type=int, default=DEFAULT_TIMEOUT)
67 | args = parser.parse_args()
68 | print(args)
69 | if args.example:
70 | sys.exit(run_single_example(args.example, timeout=args.timeout))
71 | else:
72 | sys.exit(run_random_example(timeout=args.timeout))
73 |
--------------------------------------------------------------------------------
/10_integrations/webscraper_old.py:
--------------------------------------------------------------------------------
1 | # # Web Scraping on Modal
2 |
3 | # This example shows how you can scrape links from a website and post them to a Slack channel using Modal.
4 |
5 | import os
6 |
7 | import modal
8 |
9 | app = modal.App("example-webscraper")
10 |
11 |
12 | playwright_image = modal.Image.debian_slim(
13 | python_version="3.10"
14 | ).run_commands( # Doesn't work with 3.11 yet
15 | "apt-get update",
16 | "apt-get install -y software-properties-common",
17 | "apt-add-repository non-free",
18 | "apt-add-repository contrib",
19 | "pip install playwright==1.42.0",
20 | "playwright install-deps chromium",
21 | "playwright install chromium",
22 | )
23 |
24 |
25 | @app.function(image=playwright_image)
26 | async def get_links(url: str) -> set[str]:
27 | from playwright.async_api import async_playwright
28 |
29 | async with async_playwright() as p:
30 | browser = await p.chromium.launch()
31 | page = await browser.new_page()
32 | await page.goto(url)
33 | links = await page.eval_on_selector_all(
34 | "a[href]", "elements => elements.map(element => element.href)"
35 | )
36 | await browser.close()
37 |
38 | return set(links)
39 |
40 |
41 | slack_sdk_image = modal.Image.debian_slim(python_version="3.10").uv_pip_install(
42 | "slack-sdk==3.27.1"
43 | )
44 |
45 |
46 | @app.function(
47 | image=slack_sdk_image,
48 | secrets=[
49 | modal.Secret.from_name(
50 | "scraper-slack-secret", required_keys=["SLACK_BOT_TOKEN"]
51 | )
52 | ],
53 | )
54 | def bot_token_msg(channel, message):
55 | import slack_sdk
56 | from slack_sdk.http_retry.builtin_handlers import RateLimitErrorRetryHandler
57 |
58 | client = slack_sdk.WebClient(token=os.environ["SLACK_BOT_TOKEN"])
59 | rate_limit_handler = RateLimitErrorRetryHandler(max_retry_count=3)
60 | client.retry_handlers.append(rate_limit_handler)
61 |
62 | print(f"Posting {message} to #{channel}")
63 | client.chat_postMessage(channel=channel, text=message)
64 |
65 |
66 | @app.function()
67 | def scrape():
68 | links_of_interest = ["http://modal.com"]
69 |
70 | for links in get_links.map(links_of_interest):
71 | for link in links:
72 | bot_token_msg.remote("scraped-links", link)
73 |
74 |
75 | @app.function(schedule=modal.Period(days=1))
76 | def daily_scrape():
77 | scrape.remote()
78 |
79 |
80 | @app.local_entrypoint()
81 | def run():
82 | scrape.remote()
83 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/comfyui/comfyclient.py:
--------------------------------------------------------------------------------
1 | # ---
2 | # cmd: ["python", "06_gpu_and_ml/comfyui/comfyclient.py", "--modal-workspace", "modal-labs-examples", "--prompt", "Spider-Man visits Yosemite, rendered by Blender, trending on artstation"]
3 | # output-directory: "/tmp/comfyui"
4 | # ---
5 |
6 | import argparse
7 | import json
8 | import pathlib
9 | import sys
10 | import time
11 | import urllib.request
12 |
13 | OUTPUT_DIR = pathlib.Path("/tmp/comfyui")
14 | OUTPUT_DIR.mkdir(exist_ok=True, parents=True)
15 |
16 |
17 | def main(args: argparse.Namespace):
18 | url = f"https://{args.modal_workspace}--example-comfyapp-comfyui-api{'-dev' if args.dev else ''}.modal.run/"
19 | data = json.dumps({"prompt": args.prompt}).encode("utf-8")
20 | print(f"Sending request to {url} with prompt: {args.prompt}")
21 | print("Waiting for response...")
22 | start_time = time.time()
23 | req = urllib.request.Request(
24 | url, data=data, headers={"Content-Type": "application/json"}
25 | )
26 | try:
27 | with urllib.request.urlopen(req) as response:
28 | assert response.status == 200, response.status
29 | elapsed = round(time.time() - start_time, 1)
30 | print(f"Image finished generating in {elapsed} seconds!")
31 | filename = OUTPUT_DIR / f"{slugify(args.prompt)}.png"
32 | filename.write_bytes(response.read())
33 | print(f"Saved to '{filename}'")
34 | except urllib.error.HTTPError as e:
35 | if e.code == 404:
36 | print(f"Workflow API not found at {url}")
37 |
38 |
39 | def parse_args(arglist: list[str]) -> argparse.Namespace:
40 | parser = argparse.ArgumentParser()
41 |
42 | parser.add_argument(
43 | "--modal-workspace",
44 | type=str,
45 | required=True,
46 | help="Name of the Modal workspace with the deployed app. Run `modal profile current` to check.",
47 | )
48 | parser.add_argument(
49 | "--prompt",
50 | type=str,
51 | required=True,
52 | help="Prompt for the image generation model.",
53 | )
54 | parser.add_argument(
55 | "--dev",
56 | action="store_true",
57 | help="use this flag when running the ComfyUI server in development mode with `modal serve`",
58 | )
59 |
60 | return parser.parse_args(arglist[1:])
61 |
62 |
63 | def slugify(s: str) -> str:
64 | return s.lower().replace(" ", "-").replace(".", "-").replace("/", "-")[:32]
65 |
66 |
67 | if __name__ == "__main__":
68 | args = parse_args(sys.argv)
69 | main(args)
70 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/gpu_snapshot.py:
--------------------------------------------------------------------------------
1 | # ---
2 | # deploy: true
3 | # cmd: ["python", "06_gpu_and_ml/gpu_snapshot.py"]
4 | # mypy: ignore-errors
5 | # ---
6 |
7 | # # Snapshot GPU memory to speed up cold starts
8 |
9 | # This example demonstrates how to use GPU memory snapshots to speed up model loading.
10 | # Note that GPU memory snapshotting is an experimental feature,
11 | # so test carefully before using in production!
12 | # You can read more about GPU memory snapshotting, and its caveats,
13 | # [here](https://modal.com/docs/guide/memory-snapshot).
14 |
15 | # GPU snapshots can only be used with deployed Functions, so first deploy the App:
16 |
17 | # ```bash
18 | # modal deploy -m 06_gpu_and_ml.gpu_snapshot
19 | # ```
20 |
21 | # Next, invoke the Function:
22 |
23 | # ```bash
24 | # python -m 06_gpu_and_ml.gpu_snapshot
25 | # ```
26 |
27 | # The full code is below:
28 |
29 | import modal
30 |
31 | image = modal.Image.debian_slim().uv_pip_install("sentence-transformers<6")
32 | app_name = "example-gpu-snapshot"
33 | app = modal.App(app_name, image=image)
34 |
35 | snapshot_key = "v1" # change this to invalidate the snapshot cache
36 |
37 | with image.imports(): # import in the global scope so imports can be snapshot
38 | from sentence_transformers import SentenceTransformer
39 |
40 |
41 | @app.cls(
42 | gpu="a10",
43 | enable_memory_snapshot=True,
44 | experimental_options={"enable_gpu_snapshot": True},
45 | )
46 | class SnapshotEmbedder:
47 | @modal.enter(snap=True)
48 | def load(self):
49 | # during enter phase of container lifecycle,
50 | # load the model onto the GPU so it can be snapshot
51 | print("loading model")
52 | self.model = SentenceTransformer("BAAI/bge-small-en-v1.5", device="cuda")
53 | print(f"snapshotting {snapshot_key}")
54 |
55 | @modal.method()
56 | def run(self, sentences: list[str]) -> list[list[float]]:
57 | # later invocations of the Function will start here
58 | embeddings = self.model.encode(sentences, normalize_embeddings=True)
59 | return embeddings.tolist()
60 |
61 |
62 | if __name__ == "__main__":
63 | # after deployment, we can use the class from anywhere
64 | SnapshotEmbedder = modal.Cls.from_name(app_name, "SnapshotEmbedder")
65 | embedder = SnapshotEmbedder()
66 | try:
67 | print("calling Modal Function")
68 | print(embedder.run.remote(sentences=["what is the meaning of life?"]))
69 | except modal.exception.NotFoundError:
70 | raise Exception(
71 | f"To take advantage of GPU snapshots, deploy first with modal deploy {__file__}"
72 | )
73 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/openai_whisper/pod_transcriber/app/frontend/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
9 |
10 | Modal Podcast Transcriber
11 |
12 |
13 |
14 |
17 |
24 |
39 |
66 |
67 |
68 |
69 |
70 |
71 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/openai_whisper/pod_transcriber/app/frontend/src/routes/podcast.tsx:
--------------------------------------------------------------------------------
1 | import useSWR from "swr";
2 | import { useParams } from "react-router-dom";
3 | import { Link } from "react-router-dom";
4 | import HomeButton from "../components/HomeButton";
5 | import Spinner from "../components/Spinner";
6 |
7 | function Episode({
8 | guidHash,
9 | title,
10 | transcribed,
11 | publishDate,
12 | podcastId,
13 | }: {
14 | guidHash: string;
15 | title: string;
16 | transcribed: boolean;
17 | publishDate: string;
18 | podcastId: string;
19 | }) {
20 | return (
21 |
25 | {transcribed ? "📃 " : " "}
26 |
30 | {title}
31 | {" "}
32 | | {publishDate}
33 |
34 | );
35 | }
36 |
37 | export default function Podcast() {
38 | let params = useParams();
39 |
40 | async function fetchData() {
41 | const response = await fetch(`/api/podcast/${params.podcastId}`);
42 | const data = await response.json();
43 | return data;
44 | }
45 |
46 | const { data } = useSWR(`/api/podcast/${params.podcastId}`, fetchData);
47 |
48 | if (!data) {
49 | return (
50 |
51 |
52 |
53 | );
54 | }
55 |
56 | return (
57 |
58 |
59 |
60 |
61 |
62 |
{data.pod_metadata.title}
63 |
64 | {data.pod_metadata.description}
65 |
66 |
67 |
68 |
69 |
70 |
71 | {data.episodes.map((ep) => (
72 |
80 | ))}
81 |
82 |
83 |
84 |
85 | );
86 | }
87 |
--------------------------------------------------------------------------------
/01_getting_started/inference_full.py:
--------------------------------------------------------------------------------
1 | # ---
2 | # cmd: ["python", "01_getting_started/inference_full.py"]
3 | # deploy: true
4 | # mypy: ignore-errors
5 | # ---
6 | from pathlib import Path
7 |
8 | import modal
9 |
10 | app = modal.App("example-inference-full")
11 | image = (
12 | modal.Image.debian_slim()
13 | .uv_pip_install("transformers[torch]")
14 | .uv_pip_install("fastapi")
15 | )
16 |
17 | with image.imports():
18 | from transformers import pipeline
19 |
20 | weights_cache = {
21 | "/root/.cache/huggingface": modal.Volume.from_name(
22 | "example-inference", create_if_missing=True
23 | )
24 | }
25 |
26 |
27 | @app.cls(gpu="h100", image=image, volumes=weights_cache, enable_memory_snapshot=True)
28 | class Chat:
29 | @modal.enter()
30 | def init(self):
31 | self.chatbot = pipeline(
32 | model="Qwen/Qwen3-1.7B-FP8", device_map="cuda", max_new_tokens=1024
33 | )
34 |
35 | @modal.fastapi_endpoint(docs=True)
36 | def web(self, prompt: str | None = None) -> list[dict]:
37 | result = self.run.local(prompt)
38 | return result
39 |
40 | @modal.method()
41 | def run(self, prompt: str | None = None) -> list[dict]:
42 | if prompt is None:
43 | prompt = f"/no_think Read this code.\n\n{Path(__file__).read_text()}\nIn one paragraph, what does the code do?"
44 |
45 | print(prompt)
46 | context = [{"role": "user", "content": prompt}]
47 |
48 | result = self.chatbot(context)
49 | print(result[0]["generated_text"][-1]["content"])
50 |
51 | return result
52 |
53 |
54 | @app.local_entrypoint()
55 | def main():
56 | import glob
57 |
58 | chat = Chat()
59 | root_dir, examples = Path(__file__).parent.parent, []
60 | for path in glob.glob("**/*.py", root_dir=root_dir):
61 | examples.append(
62 | f"/no_think Read this code.\n\n{(root_dir / path).read_text()}\nIn one paragraph, what does the code do?"
63 | )
64 |
65 | for result in chat.run.map(examples):
66 | print(result[0]["generated_text"][-1]["content"])
67 |
68 |
69 | if __name__ == "__main__":
70 | import json
71 | import urllib.request
72 | from datetime import datetime
73 |
74 | ChatCls = modal.Cls.from_name(app.name, "Chat")
75 | chat = ChatCls()
76 | print(datetime.now(), "making .remote call to Chat.run")
77 | print(chat.run.remote())
78 | print(datetime.now(), "making web request to", url := chat.web.get_web_url())
79 |
80 | with urllib.request.urlopen(url) as response:
81 | print(datetime.now())
82 | print(json.loads(response.read().decode("utf-8")))
83 |
--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
1 |
4 |
5 | ## Type of Change
6 |
7 |
10 |
11 | - [ ] New example for the GitHub repo
12 | - [ ] New example for the documentation site (Linked from a discoverable page, e.g. via the sidebar in `/docs/examples`)
13 | - [ ] Example updates (Bug fixes, new features, etc.)
14 | - [ ] Other (Changes to the codebase, but not to examples)
15 |
16 | ## Monitoring Checklist
17 |
18 |
24 |
25 | - [ ] Example is configured for testing in the synthetic monitoring system, or `lambda-test: false` is provided in the example frontmatter and I have gotten approval from a maintainer
26 | - [ ] Example is tested by executing with `modal run`, or an alternative `cmd` is provided in the example frontmatter (e.g. `cmd: ["modal", "serve"]`)
27 | - [ ] Example is tested by running the `cmd` with no arguments, or the `args` are provided in the example frontmatter (e.g. `args: ["--prompt", "Formula for room temperature superconductor:"]`
28 | - [ ] Example does _not_ require third-party dependencies besides `fastapi` to be installed locally (e.g. does not import `requests` or `torch` in the global scope or other code executed locally)
29 |
30 | ## Documentation Site Checklist
31 |
32 |
36 |
37 | ### Content
38 | - [ ] Example is documented with comments throughout, in a [_Literate Programming_](https://en.wikipedia.org/wiki/Literate_programming) style
39 | - [ ] All media assets for the example that are rendered in the documentation site page are retrieved from `modal-cdn.com`
40 |
41 | ### Build Stability
42 | - [ ] Example pins all dependencies in container images
43 | - [ ] Example pins container images to a stable tag like `v1`, not a dynamic tag like `latest`
44 | - [ ] Example specifies a `python_version` for the base image, if it is used
45 | - [ ] Example pins all dependencies to at least [SemVer](https://semver.org/) minor version, `~=x.y.z` or `==x.y`, or we expect this example to work across major versions of the dependency and are committed to maintenance across those versions
46 | - [ ] Example dependencies with `version < 1` are pinned to patch version, `==0.y.z`
47 |
48 | ## Outside Contributors
49 |
50 | You're great! Thanks for your contribution.
51 |
--------------------------------------------------------------------------------
/misc/lmdeploy_oai_compatible.py:
--------------------------------------------------------------------------------
1 | # # Deploy a model with `lmdeploy`
2 | #
3 | # This script is used to deploy a model using [lmdeploy](https://github.com/InternLM/lmdeploy) with OpenAI compatible API.
4 |
5 | import subprocess
6 |
7 | import modal
8 | from modal import App, Image, Secret, gpu
9 |
10 | ########## CONSTANTS ##########
11 |
12 |
13 | # define model for serving and path to store in modal container
14 | MODEL_NAME = "meta-llama/Llama-2-7b-hf"
15 | MODEL_DIR = f"/models/{MODEL_NAME}"
16 | SERVE_MODEL_NAME = "meta--llama-2-7b"
17 | HF_SECRET = Secret.from_name("huggingface-secret")
18 | SECONDS = 60 # for timeout
19 |
20 |
21 | ########## UTILS FUNCTIONS ##########
22 |
23 |
24 | def download_hf_model(model_dir: str, model_name: str):
25 | """Retrieve model from HuggingFace Hub and save into
26 | specified path within the modal container.
27 |
28 | Args:
29 | model_dir (str): Path to save model weights in container.
30 | model_name (str): HuggingFace Model ID.
31 | """
32 | import os
33 |
34 | from huggingface_hub import snapshot_download # type: ignore
35 | from transformers.utils import move_cache # type: ignore
36 |
37 | os.makedirs(model_dir, exist_ok=True)
38 |
39 | snapshot_download(
40 | model_name,
41 | local_dir=model_dir,
42 | # consolidated.safetensors is prevent error here: https://github.com/vllm-project/vllm/pull/5005
43 | ignore_patterns=["*.pt", "*.bin", "consolidated.safetensors"],
44 | token=os.environ["HF_TOKEN"],
45 | )
46 | move_cache()
47 |
48 |
49 | ########## IMAGE DEFINITION ##########
50 |
51 | # define image for modal environment
52 | lmdeploy_image = (
53 | Image.from_registry(
54 | "openmmlab/lmdeploy:v0.4.2",
55 | )
56 | .pip_install(["lmdeploy[all]", "huggingface_hub", "hf-transfer"])
57 | .env({"HF_HUB_ENABLE_HF_TRANSFER": "1"})
58 | .run_function(
59 | download_hf_model,
60 | timeout=60 * SECONDS,
61 | kwargs={"model_dir": MODEL_DIR, "model_name": MODEL_NAME},
62 | secrets=[HF_SECRET],
63 | )
64 | )
65 |
66 | ########## APP SETUP ##########
67 |
68 |
69 | app = App(f"lmdeploy-{SERVE_MODEL_NAME}")
70 |
71 | NO_GPU = 1
72 | TOKEN = "secret12345"
73 |
74 |
75 | @app.function(
76 | image=lmdeploy_image,
77 | gpu=gpu.A10G(count=NO_GPU),
78 | scaledown_window=20 * SECONDS,
79 | )
80 | @modal.concurrent(max_inputs=256) # https://modal.com/docs/guide/concurrent-inputs
81 | @modal.web_server(port=23333, startup_timeout=60 * SECONDS)
82 | def serve():
83 | cmd = f"""
84 | lmdeploy serve api_server {MODEL_DIR} \
85 | --model-name {SERVE_MODEL_NAME} \
86 | --server-port 23333 \
87 | --session-len 4092
88 | """
89 | subprocess.Popen(cmd, shell=True)
90 |
--------------------------------------------------------------------------------
/misc/tgi_oai_compatible.py:
--------------------------------------------------------------------------------
1 | # # Run TGI on Modal
2 |
3 | # This example shows how you can run LLMs with the [Text Generation Inference (TGI)](https://huggingface.co/docs/text-generation-inference/en/index) inference framework on Modal.
4 |
5 | import subprocess
6 |
7 | import modal
8 | from modal import App, Image, Secret, gpu
9 |
10 | # define model for serving and path to store in modal container
11 | MODEL_NAME = "meta-llama/Llama-2-7b-hf"
12 | MODEL_DIR = f"/models/{MODEL_NAME}"
13 | SERVE_MODEL_NAME = "meta--llama-2-7b"
14 | HF_SECRET = Secret.from_name("huggingface-secret")
15 | SECONDS = 60 # for timeout
16 |
17 | ########## UTILS FUNCTIONS ##########
18 |
19 |
20 | def download_hf_model(model_dir: str, model_name: str):
21 | """Retrieve model from HuggingFace Hub and save into
22 | specified path within the modal container.
23 |
24 | Args:
25 | model_dir (str): Path to save model weights in container.
26 | model_name (str): HuggingFace Model ID.
27 | """
28 | import os
29 |
30 | from huggingface_hub import snapshot_download # type: ignore
31 | from transformers.utils import move_cache # type: ignore
32 |
33 | os.makedirs(model_dir, exist_ok=True)
34 |
35 | snapshot_download(
36 | model_name,
37 | local_dir=model_dir,
38 | # consolidated.safetensors is prevent error here: https://github.com/vllm-project/vllm/pull/5005
39 | ignore_patterns=["*.pt", "*.bin", "consolidated.safetensors"],
40 | token=os.environ["HF_TOKEN"],
41 | )
42 | move_cache()
43 |
44 |
45 | ########## IMAGE DEFINITION ##########
46 |
47 |
48 | # define image for modal environment
49 | tgi_image = (
50 | Image.from_registry(
51 | "ghcr.io/huggingface/text-generation-inference", add_python="3.10"
52 | )
53 | .dockerfile_commands("ENTRYPOINT []")
54 | .pip_install(["huggingface_hub", "hf-transfer"])
55 | .env({"HF_HUB_ENABLE_HF_TRANSFER": "1"})
56 | .run_function(
57 | download_hf_model,
58 | timeout=20 * SECONDS,
59 | kwargs={"model_dir": MODEL_DIR, "model_name": MODEL_NAME},
60 | secrets=[HF_SECRET],
61 | )
62 | )
63 |
64 |
65 | ########## APP SETUP ##########
66 |
67 |
68 | app = App(f"tgi-{SERVE_MODEL_NAME}")
69 |
70 |
71 | NO_GPU = 1
72 | TOKEN = "secret12345"
73 |
74 |
75 | @app.function(
76 | image=tgi_image,
77 | gpu=gpu.A10G(count=NO_GPU),
78 | scaledown_window=20 * SECONDS,
79 | )
80 | @modal.concurrent(max_inputs=256) # https://modal.com/docs/guide/concurrent-inputs
81 | @modal.web_server(port=3000, startup_timeout=60 * SECONDS)
82 | def serve():
83 | cmd = f"""
84 | text-generation-launcher --model-id {MODEL_DIR} \
85 | --hostname 0.0.0.0 \
86 | --port 3000
87 | """
88 | subprocess.Popen(cmd, shell=True)
89 |
--------------------------------------------------------------------------------
/13_sandboxes/codelangchain/README.md:
--------------------------------------------------------------------------------
1 | # Deploying code agents without all the agonizing pain
2 |
3 | This example deploys a "code agent": a language model that can write and execute
4 | code in a flexible control flow aimed at completing a task or goal.
5 |
6 | It is implemented in LangChain, using the LangGraph library to structure the
7 | agent and the LangServe framework to turn it into a FastAPI app.
8 |
9 | We use Modal to turn that app into a web endpoint. We also use Modal to
10 | "sandbox" the agent's code execution, so that it can't accidentally (or when
11 | prompt injected!) damage the application by executing some inadvisable code.
12 |
13 | Modal's Charles Frye and LangChain's Lance Martin did a
14 | [walkthrough webinar](https://www.youtube.com/watch?v=X3yzWtAkaeo) explaining
15 | the project's context and implementation. Check it out if you're curious!
16 |
17 | ## How to run
18 |
19 | To run this app, you need to `pip install modal` and then create the following
20 | [secrets](https://modal.com/docs/guide/secrets):
21 |
22 | - `openai-secret` with an OpenAI API key, so that we can query OpenAI's models
23 | to power the agent,
24 | - and `langsmith-secret` with a LangSmith API key, so that we can monitor the
25 | agent's behavior with LangSmith.
26 |
27 | Head to the [secret creation dashboard](https://modal.com/secrets/) and follow
28 | the instructions for each secret type.
29 |
30 | Then, you can deploy the app with:
31 |
32 | ```bash
33 | modal deploy codelangchain.py
34 | ```
35 |
36 | Navigate to the URL that appears in the output and you'll be dropped into an
37 | interactive "playground" interface where you can send queries to the agent and
38 | receive responses. You should expect it to take about a minute to respond.
39 |
40 | You can also navigate to the `/docs` path to see OpenAPI/Swagger docs, for
41 | everything you'd need to see how to incorporate the agent into your downstream
42 | applications via API requests.
43 |
44 | When developing the app, use `modal serve codelangchain.py` to get a
45 | hot-reloading server.
46 |
47 | ## Repo structure
48 |
49 | The web application is defined in `codelangchain.py`.
50 |
51 | It wraps the `agent.py` module, which contains the LangChain agent's definition.
52 | To test the agent in isolation, run `modal run agent.py` in the terminal and
53 | provide a `--question` about Python programming as input.
54 |
55 | Because the agent is a graph, it is defined by specifying nodes and edges, which
56 | are found in `nodes.py` and `edges.py`, respectively.
57 |
58 | The retrieval logic is very simple: all of the data from the relevant docs is
59 | retrieved and put at the beginning of the language model's prompt. You can find
60 | it in `retrieval.py`.
61 |
62 | The definition of the Modal container images and a few other shared utilities
63 | can be found in `common.py`.
64 |
--------------------------------------------------------------------------------
/10_integrations/streamlit/serve_streamlit.py:
--------------------------------------------------------------------------------
1 | # ---
2 | # deploy: true
3 | # cmd: ["modal", "serve", "10_integrations/streamlit/serve_streamlit.py"]
4 | # ---
5 |
6 | # # Run and share Streamlit apps
7 |
8 | # This example shows you how to run a Streamlit app with `modal serve`, and then deploy it as a serverless web app.
9 |
10 | # 
11 |
12 | # This example is structured as two files:
13 |
14 | # 1. This module, which defines the Modal objects (name the script `serve_streamlit.py` locally).
15 |
16 | # 2. `app.py`, which is any Streamlit script to be mounted into the Modal
17 | # function ([download script](https://github.com/modal-labs/modal-examples/blob/main/10_integrations/streamlit/app.py)).
18 |
19 | import shlex
20 | import subprocess
21 | from pathlib import Path
22 |
23 | import modal
24 |
25 | # ## Define container dependencies
26 |
27 | # The `app.py` script imports three third-party packages, so we include these in the example's
28 | # image definition and then add the `app.py` file itself to the image.
29 |
30 | streamlit_script_local_path = Path(__file__).parent / "app.py"
31 | streamlit_script_remote_path = "/root/app.py"
32 |
33 | image = (
34 | modal.Image.debian_slim(python_version="3.11")
35 | .uv_pip_install("streamlit~=1.35.0", "numpy~=1.26.4", "pandas~=2.2.2")
36 | .add_local_file(
37 | streamlit_script_local_path,
38 | streamlit_script_remote_path,
39 | )
40 | )
41 |
42 | app = modal.App(name="example-serve-streamlit", image=image)
43 |
44 | if not streamlit_script_local_path.exists():
45 | raise RuntimeError(
46 | "app.py not found! Place the script with your streamlit app in the same directory."
47 | )
48 |
49 | # ## Spawning the Streamlit server
50 |
51 | # Inside the container, we will run the Streamlit server in a background subprocess using
52 | # `subprocess.Popen`. We also expose port 8000 using the `@web_server` decorator.
53 |
54 |
55 | @app.function()
56 | @modal.concurrent(max_inputs=100)
57 | @modal.web_server(8000)
58 | def run():
59 | target = shlex.quote(streamlit_script_remote_path)
60 | cmd = f"streamlit run {target} --server.port 8000 --server.enableCORS=false --server.enableXsrfProtection=false"
61 | subprocess.Popen(cmd, shell=True)
62 |
63 |
64 | # ## Iterate and Deploy
65 |
66 | # While you're iterating on your screamlit app, you can run it "ephemerally" with `modal serve`. This will
67 | # run a local process that watches your files and updates the app if anything changes.
68 |
69 | # ```shell
70 | # modal serve serve_streamlit.py
71 | # ```
72 |
73 | # Once you're happy with your changes, you can deploy your application with
74 |
75 | # ```shell
76 | # modal deploy serve_streamlit.py
77 | # ```
78 |
79 | # If successful, this will print a URL for your app that you can navigate to from
80 | # your browser 🎉 .
81 |
--------------------------------------------------------------------------------
/13_sandboxes/codelangchain/src/edges.py:
--------------------------------------------------------------------------------
1 | """Defines functions that transition our agent from one state to another."""
2 |
3 | from typing import Callable
4 |
5 | from .common import GraphState
6 |
7 | EXPECTED_NODES = [
8 | "generate",
9 | "check_code_imports",
10 | "check_code_execution",
11 | "finish",
12 | ]
13 |
14 |
15 | def enrich(graph):
16 | """Adds transition edges to the graph."""
17 |
18 | for node_name in set(EXPECTED_NODES):
19 | assert node_name in graph.nodes, f"Node {node_name} not found in graph"
20 |
21 | graph.add_edge("generate", "check_code_imports")
22 | graph.add_conditional_edges(
23 | "check_code_imports",
24 | EDGE_MAP["decide_to_check_code_exec"],
25 | {
26 | "check_code_execution": "check_code_execution",
27 | "generate": "generate",
28 | },
29 | )
30 | graph.add_edge("check_code_execution", "evaluate_execution")
31 | graph.add_conditional_edges(
32 | "evaluate_execution",
33 | EDGE_MAP["decide_to_finish"],
34 | {
35 | "finish": "finish",
36 | "generate": "generate",
37 | },
38 | )
39 | return graph
40 |
41 |
42 | def decide_to_check_code_exec(state: GraphState) -> str:
43 | """
44 | Determines whether to test code execution, or re-try answer generation.
45 |
46 | Args:
47 | state (dict): The current graph state
48 |
49 | Returns:
50 | str: Next node to call
51 | """
52 |
53 | print("---DECIDE TO TEST CODE EXECUTION---")
54 | state_dict = state["keys"]
55 | error = state_dict["error"]
56 |
57 | if error == "None":
58 | # All documents have been filtered check_relevance
59 | # We will re-generate a new query
60 | print("---DECISION: TEST CODE EXECUTION---")
61 | return "check_code_execution"
62 | else:
63 | # We have relevant documents, so generate answer
64 | print("---DECISION: RE-TRY SOLUTION---")
65 | return "generate"
66 |
67 |
68 | def decide_to_finish(state: GraphState) -> str:
69 | """
70 | Determines whether to finish (re-try code 3 times).
71 |
72 | Args:
73 | state (dict): The current graph state
74 |
75 | Returns:
76 | str: Next node to call
77 | """
78 |
79 | print("---DECIDE TO FINISH---")
80 | state_dict = state["keys"]
81 | evaluation = state_dict["evaluation"]
82 | iter = state_dict["iterations"]
83 |
84 | if evaluation.decision == "finish" or iter >= 3:
85 | print("---DECISION: FINISH---")
86 | return "finish"
87 | else:
88 | print("---DECISION: RE-TRY SOLUTION---")
89 | return "generate"
90 |
91 |
92 | EDGE_MAP: dict[str, Callable] = {
93 | "decide_to_check_code_exec": decide_to_check_code_exec,
94 | "decide_to_finish": decide_to_finish,
95 | }
96 |
--------------------------------------------------------------------------------
/07_web_endpoints/streaming.py:
--------------------------------------------------------------------------------
1 | # ---
2 | # cmd: ["modal", "serve", "07_web_endpoints/streaming.py"]
3 | # ---
4 |
5 | # # Deploy a FastAPI app with streaming responses
6 |
7 | # This example shows how you can deploy a [FastAPI](https://fastapi.tiangolo.com/) app with Modal that streams results back to the client.
8 |
9 | import asyncio
10 | import time
11 |
12 | import modal
13 | from fastapi import FastAPI
14 | from fastapi.responses import StreamingResponse
15 |
16 | image = modal.Image.debian_slim().uv_pip_install("fastapi[standard]")
17 | app = modal.App("example-streaming", image=image)
18 |
19 | web_app = FastAPI()
20 |
21 | # This asynchronous generator function simulates
22 | # progressively returning data to the client. The `asyncio.sleep`
23 | # is not necessary, but makes it easier to see the iterative behavior
24 | # of the response.
25 |
26 |
27 | async def fake_video_streamer():
28 | for i in range(10):
29 | yield f"frame {i}: hello world!".encode()
30 | await asyncio.sleep(1.0)
31 |
32 |
33 | # ASGI app with streaming handler.
34 |
35 | # This `fastapi_app` also uses the fake video streamer async generator,
36 | # passing it directly into `StreamingResponse`.
37 |
38 |
39 | @web_app.get("/")
40 | async def main():
41 | return StreamingResponse(fake_video_streamer(), media_type="text/event-stream")
42 |
43 |
44 | @app.function()
45 | @modal.asgi_app()
46 | def fastapi_app():
47 | return web_app
48 |
49 |
50 | # This `hook` web endpoint Modal function calls *another* Modal function,
51 | # and it just works!
52 |
53 |
54 | @app.function()
55 | def sync_fake_video_streamer():
56 | for i in range(10):
57 | yield f"frame {i}: some data\n".encode()
58 | time.sleep(1)
59 |
60 |
61 | @app.function()
62 | @modal.fastapi_endpoint()
63 | def hook():
64 | return StreamingResponse(
65 | sync_fake_video_streamer.remote_gen(), media_type="text/event-stream"
66 | )
67 |
68 |
69 | # This `mapped` web endpoint Modal function does a parallel `.map` on a simple
70 | # Modal function. Using `.starmap` also would work in the same fashion.
71 |
72 |
73 | @app.function()
74 | def map_me(i):
75 | time.sleep(i) # stagger the results for demo purposes
76 | return f"hello from {i}\n"
77 |
78 |
79 | @app.function()
80 | @modal.fastapi_endpoint()
81 | def mapped():
82 | return StreamingResponse(map_me.map(range(10)), media_type="text/event-stream")
83 |
84 |
85 | # To try for yourself, run
86 |
87 | # ```shell
88 | # modal serve streaming.py
89 | # ```
90 |
91 | # and then send requests to the URLs that appear in the terminal output.
92 |
93 | # Make sure that your client is not buffering the server response
94 | # until it gets newline (\n) characters. By default browsers and `curl` are buffering,
95 | # though modern browsers should respect the "text/event-stream" content type header being set.
96 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/llm-serving/openai_compatible/load_test.py:
--------------------------------------------------------------------------------
1 | import os
2 | from datetime import datetime, timezone
3 | from pathlib import Path, PosixPath
4 |
5 | import modal
6 |
7 | if modal.is_local():
8 | workspace = modal.config._profile
9 | environment = modal.config.config.get("environment") or ""
10 | else:
11 | workspace = os.environ["MODAL_WORKSPACE"]
12 | environment = os.environ["MODAL_ENVIRONMENT"]
13 |
14 |
15 | image = (
16 | modal.Image.debian_slim(python_version="3.11")
17 | .uv_pip_install("locust~=2.36.2", "openai~=1.37.1")
18 | .env({"MODAL_WORKSPACE": workspace, "MODAL_ENVIRONMENT": environment})
19 | .add_local_file(
20 | Path(__file__).parent / "locustfile.py",
21 | remote_path="/root/locustfile.py",
22 | )
23 | )
24 |
25 | volume = modal.Volume.from_name("loadtest-vllm-oai-results", create_if_missing=True)
26 | remote_path = Path("/root") / "loadtests"
27 | OUT_DIRECTORY = (
28 | remote_path / datetime.now(timezone.utc).replace(microsecond=0).isoformat()
29 | )
30 |
31 | app = modal.App("loadtest-vllm-oai", image=image, volumes={remote_path: volume})
32 |
33 | workers = 8
34 |
35 | prefix = workspace + (f"-{environment}" if environment else "")
36 | host = f"https://{prefix}--example-vllm-inference-serve.modal.run"
37 |
38 | csv_file = OUT_DIRECTORY / "stats.csv"
39 | default_args = [
40 | "-H",
41 | host,
42 | "--processes",
43 | str(workers),
44 | "--csv",
45 | str(csv_file),
46 | ]
47 |
48 | MINUTES = 60 # seconds
49 |
50 |
51 | @app.function(cpu=workers)
52 | @modal.concurrent(max_inputs=100)
53 | @modal.web_server(port=8089)
54 | def serve():
55 | run_locust.local(default_args)
56 |
57 |
58 | @app.function(cpu=workers, timeout=60 * MINUTES)
59 | def run_locust(args: list, wait=False):
60 | import subprocess
61 |
62 | process = subprocess.Popen(["locust"] + args)
63 | if wait:
64 | process.wait()
65 | return process.returncode
66 |
67 |
68 | @app.local_entrypoint()
69 | def main(
70 | r: float = 1.0,
71 | u: int = 36,
72 | t: str = "1m", # no more than the timeout of run_locust, one hour
73 | ):
74 | args = default_args + [
75 | "--spawn-rate",
76 | str(r),
77 | "--users",
78 | str(u),
79 | "--run-time",
80 | t,
81 | ]
82 |
83 | html_report_file = str(PosixPath(OUT_DIRECTORY / "report.html"))
84 | args += [
85 | "--headless", # run without browser UI
86 | "--autostart", # start test immediately
87 | "--autoquit", # stop once finished...
88 | "10", # ...but wait ten seconds
89 | "--html", # output an HTML-formatted report
90 | html_report_file, # to this location
91 | ]
92 |
93 | if exit_code := run_locust.remote(args, wait=True):
94 | SystemExit(exit_code)
95 | else:
96 | print("finished successfully")
97 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/speech-to-text/streaming-diarization-frontend/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | Audio Transcription
7 |
37 |
38 |
39 | Streaming Speaker Diarization with nvidia/diar_streaming_sortformer_4spk-v2_1
40 | Tip: Turn your microphone volume up for better transcription quality.
41 |
42 |
43 |
44 |
45 |
46 |
Speaker 0
47 |
48 |
0.00
49 |
50 |
51 |
Speaker 1
52 |
53 |
0.00
54 |
55 |
56 |
Speaker 2
57 |
58 |
0.00
59 |
60 |
61 |
Speaker 3
62 |
63 |
0.00
64 |
65 |
66 |
67 |
68 |
69 |
70 |
--------------------------------------------------------------------------------
/07_web_endpoints/fasthtml-checkboxes/cbx_load_test.py:
--------------------------------------------------------------------------------
1 | import os
2 | from datetime import datetime
3 | from pathlib import Path
4 |
5 | import modal
6 |
7 | if modal.is_local():
8 | workspace = modal.config._profile or ""
9 | environment = modal.config.config["environment"] or ""
10 | else:
11 | workspace = os.environ["MODAL_WORKSPACE"] or ""
12 | environment = os.environ["MODAL_ENVIRONMENT"] or ""
13 |
14 |
15 | image = (
16 | modal.Image.debian_slim(python_version="3.12")
17 | .uv_pip_install("locust~=2.29.1", "beautifulsoup4~=4.12.3", "lxml~=5.3.0")
18 | .env({"MODAL_WORKSPACE": workspace, "MODAL_ENVIRONMENT": environment})
19 | .add_local_file(
20 | Path(__file__).parent / "cbx_locustfile.py",
21 | remote_path="/root/locustfile.py",
22 | )
23 | .add_local_file(
24 | Path(__file__).parent / "constants.py",
25 | remote_path="/root/constants.py",
26 | )
27 | )
28 | volume = modal.Volume.from_name("example-cbx-load-test-results", create_if_missing=True)
29 | remote_path = Path("/root") / "loadtests"
30 | OUT_DIRECTORY = remote_path / datetime.utcnow().replace(microsecond=0).isoformat()
31 |
32 | app = modal.App("example-cbx-load-test", image=image, volumes={remote_path: volume})
33 |
34 | workers = 8
35 | host = f"https://{workspace}{'-' + environment if environment else ''}--example-fasthtml-checkboxes-web.modal.run"
36 | csv_file = OUT_DIRECTORY / "stats.csv"
37 | default_args = [
38 | "-H",
39 | host,
40 | "--processes",
41 | str(workers),
42 | "--csv",
43 | csv_file,
44 | ]
45 |
46 | MINUTES = 60 # seconds
47 |
48 |
49 | @app.function(cpu=workers)
50 | @modal.concurrent(max_inputs=100)
51 | @modal.web_server(port=8089)
52 | def serve():
53 | run_locust.local(default_args)
54 |
55 |
56 | @app.function(cpu=workers, timeout=60 * MINUTES)
57 | def run_locust(args: list, wait=False):
58 | import subprocess
59 |
60 | process = subprocess.Popen(["locust"] + args)
61 | if wait:
62 | process.wait()
63 | return process.returncode
64 |
65 |
66 | @app.local_entrypoint()
67 | def main(
68 | r: float = 1.0,
69 | u: int = 36,
70 | t: str = "1m", # no more than the timeout of run_locust, one hour
71 | ):
72 | args = default_args + [
73 | "--spawn-rate",
74 | str(r),
75 | "--users",
76 | str(u),
77 | "--run-time",
78 | t,
79 | ]
80 |
81 | html_report_file = OUT_DIRECTORY / "report.html"
82 | args += [
83 | "--headless", # run without browser UI
84 | "--autostart", # start test immediately
85 | "--autoquit", # stop once finished...
86 | "10", # ...but wait ten seconds
87 | "--html", # output an HTML-formatted report
88 | html_report_file, # to this location
89 | ]
90 |
91 | if exit_code := run_locust.remote(args, wait=True):
92 | SystemExit(exit_code)
93 | else:
94 | print("finished successfully")
95 |
--------------------------------------------------------------------------------
/10_integrations/tailscale/modal_tailscale.py:
--------------------------------------------------------------------------------
1 | # ---
2 | # lambda-test: false # missing-secret
3 | # ---
4 |
5 | # # Add Modal Apps to Tailscale
6 |
7 | # This example demonstrates how to integrate Modal with Tailscale (https://tailscale.com).
8 | # It outlines the steps to configure Modal containers so that they join the Tailscale network.
9 |
10 | # We use a custom entrypoint to automatically add containers to a Tailscale network (tailnet).
11 | # This configuration enables the containers to interact with one another and with
12 | # additional applications within the same tailnet.
13 |
14 |
15 | import modal
16 |
17 | # Install Tailscale and copy custom entrypoint script ([entrypoint.sh](https://github.com/modal-labs/modal-examples/blob/main/10_integrations/tailscale/entrypoint.sh)). The script must be
18 | # executable.
19 | image = (
20 | modal.Image.debian_slim(python_version="3.11")
21 | .apt_install("curl")
22 | .run_commands("curl -fsSL https://tailscale.com/install.sh | sh")
23 | .uv_pip_install("requests==2.32.3", "PySocks==1.7.1")
24 | .add_local_file("./entrypoint.sh", "/root/entrypoint.sh", copy=True)
25 | .run_commands("chmod a+x /root/entrypoint.sh")
26 | .entrypoint(["/root/entrypoint.sh"])
27 | )
28 | app = modal.App("example-modal-tailscale", image=image)
29 |
30 | # Packages might not be installed locally. This catches import errors and
31 | # only attempts imports in the container.
32 | with image.imports():
33 | import socket
34 |
35 | import socks
36 |
37 | # Configure Python to use the SOCKS5 proxy globally.
38 | if not modal.is_local():
39 | socks.set_default_proxy(socks.SOCKS5, "0.0.0.0", 1080)
40 | socket.socket = socks.socksocket
41 |
42 |
43 | # Run your function adding a Tailscale secret. We suggest creating a [reusable and ephemeral key](https://tailscale.com/kb/1111/ephemeral-nodes).
44 | @app.function(
45 | secrets=[
46 | modal.Secret.from_name("tailscale-auth", required_keys=["TAILSCALE_AUTHKEY"]),
47 | modal.Secret.from_dict(
48 | {
49 | "ALL_PROXY": "socks5://localhost:1080/",
50 | "HTTP_PROXY": "http://localhost:1080/",
51 | "http_proxy": "http://localhost:1080/",
52 | }
53 | ),
54 | ],
55 | )
56 | def connect_to_machine():
57 | import requests
58 |
59 | # Connect to other machines in your tailnet.
60 | resp = requests.get("http://my-tailscale-machine:5000")
61 | print(resp.content)
62 |
63 |
64 | # Run this script with `modal run modal_tailscale.py`. You will see Tailscale logs
65 | # when the container start indicating that you were able to login successfully and
66 | # that the proxies (SOCKS5 and HTTP) have created been successfully. You will also
67 | # be able to see Modal containers in your Tailscale dashboard in the "Machines" tab.
68 | # Every new container launched will show up as a new "machine". Containers are
69 | # individually addressable using their Tailscale name or IP address.
70 |
--------------------------------------------------------------------------------
/07_web_endpoints/badges.py:
--------------------------------------------------------------------------------
1 | # ---
2 | # cmd: ["modal", "serve", "07_web_endpoints/badges.py"]
3 | # ---
4 |
5 | # # Serve a dynamic SVG badge
6 |
7 | # In this example, we use Modal's [webhook](https://modal.com/docs/guide/webhooks) capability to host a dynamic SVG badge that shows
8 | # you the current number of downloads for a Python package.
9 |
10 | # First let's start off by creating a Modal app, and defining an image with the Python packages we're going to be using:
11 |
12 | import modal
13 |
14 | image = modal.Image.debian_slim().uv_pip_install(
15 | "fastapi[standard]", "pybadges", "pypistats"
16 | )
17 |
18 | app = modal.App("example-badges", image=image)
19 |
20 | # ## Defining the web endpoint
21 |
22 | # In addition to using `@app.function()` to decorate our function, we use the
23 | # [`@modal.fastapi_endpoint` decorator](https://modal.com/docs/guide/webhooks)
24 | # which instructs Modal to create a REST endpoint that serves this function.
25 | # Note that the default method is `GET`, but this can be overridden using the `method` argument.
26 |
27 |
28 | @app.function()
29 | @modal.fastapi_endpoint()
30 | async def package_downloads(package_name: str):
31 | import json
32 |
33 | import pypistats
34 | from fastapi import Response
35 | from pybadges import badge
36 |
37 | stats = json.loads(pypistats.recent(package_name, format="json"))
38 | svg = badge(
39 | left_text=f"{package_name} downloads",
40 | right_text=str(stats["data"]["last_month"]),
41 | right_color="blue",
42 | )
43 |
44 | return Response(content=svg, media_type="image/svg+xml")
45 |
46 |
47 | # In this function, we use `pypistats` to query the most recent stats for our package, and then
48 | # use that as the text for a SVG badge, rendered using `pybadges`.
49 | # Since Modal web endpoints are FastAPI functions under the hood, we return this SVG wrapped in a FastAPI response with the correct media type.
50 | # Also note that FastAPI automatically interprets `package_name` as a [query param](https://fastapi.tiangolo.com/tutorial/query-params/).
51 |
52 | # ## Running and deploying
53 |
54 | # We can now run an ephemeral app on the command line using:
55 |
56 | # ```shell
57 | # modal serve badges.py
58 | # ```
59 |
60 | # This will create a short-lived web url that exists until you terminate the script.
61 | # It will also hot-reload the code if you make changes to it.
62 |
63 | # If you want to create a persistent URL, you have to deploy the script.
64 | # To deploy using the Modal CLI by running `modal deploy badges.py`,
65 |
66 | # Either way, as soon as we run this command, Modal gives us the link to our brand new
67 | # web endpoint in the output:
68 |
69 | # 
70 |
71 | # We can now visit the link using a web browser, using a `package_name` of our choice in the URL query params.
72 | # For example:
73 | # - `https://YOUR_SUBDOMAIN.modal.run/?package_name=synchronicity`
74 | # - `https://YOUR_SUBDOMAIN.modal.run/?package_name=torch`
75 |
--------------------------------------------------------------------------------
/13_sandboxes/codelangchain/langserve.py:
--------------------------------------------------------------------------------
1 | # ---
2 | # pytest: false
3 | # cmd: ["modal", "serve", "-m", "13_sandboxes.codelangchain.langserve"]
4 | # ---
5 |
6 | # # Deploy LangChain and LangGraph applications with LangServe
7 |
8 | # This code demonstrates how to deploy a
9 | # [LangServe](https://python.langchain.com/docs/langserve/) application on Modal.
10 | # LangServe makes it easy to wrap LangChain and LangGraph applications in a FastAPI server,
11 | # and Modal makes it easy to deploy FastAPI servers.
12 |
13 | # The LangGraph application that it serves is from our [sandboxed LLM coding agent example](https://modal.com/docs/examples/agent).
14 |
15 | # You can find the code for the agent and several other code files associated with this example in the
16 | # [`codelangchain` directory of our examples repo](https://github.com/modal-labs/modal-examples/tree/main/13_sandboxes/codelangchain).
17 |
18 | import modal
19 |
20 | from .agent import construct_graph, create_sandbox
21 | from .src.common import image
22 |
23 | app = modal.App("example-codelangchain-langserve")
24 |
25 | image = image.uv_pip_install("langserve[all]==0.3.0")
26 |
27 |
28 | @app.function(
29 | image=image,
30 | secrets=[ # see the agent.py file for more information on Secrets
31 | modal.Secret.from_name("openai-secret", required_keys=["OPENAI_API_KEY"]),
32 | modal.Secret.from_name("langsmith-secret", required_keys=["LANGCHAIN_API_KEY"]),
33 | ],
34 | )
35 | @modal.asgi_app()
36 | def serve():
37 | from fastapi import FastAPI, responses
38 | from fastapi.middleware.cors import CORSMiddleware
39 | from langchain_core.runnables import RunnableLambda
40 | from langserve import add_routes
41 |
42 | # create a FastAPI app
43 | web_app = FastAPI(
44 | title="CodeLangChain Server",
45 | version="1.0",
46 | description="Writes code and checks if it runs.",
47 | )
48 |
49 | # set all CORS enabled origins
50 | web_app.add_middleware(
51 | CORSMiddleware,
52 | allow_origins=["*"],
53 | allow_credentials=True,
54 | allow_methods=["*"],
55 | allow_headers=["*"],
56 | expose_headers=["*"],
57 | )
58 |
59 | def inp(question: str) -> dict:
60 | return {"keys": {"question": question, "iterations": 0}}
61 |
62 | def out(state: dict) -> str:
63 | if "finish" in state:
64 | return state["finish"]["keys"]["response"]
65 | elif len(state) > 0 and "finish" in state[-1]:
66 | return state[-1]["finish"]["keys"]["response"]
67 | else:
68 | return str(state)
69 |
70 | graph = construct_graph(create_sandbox(app), debug=False).compile()
71 |
72 | chain = RunnableLambda(inp) | graph | RunnableLambda(out)
73 |
74 | add_routes(
75 | web_app,
76 | chain,
77 | path="/codelangchain",
78 | )
79 |
80 | # redirect the root to the interactive playground
81 | @web_app.get("/")
82 | def redirect():
83 | return responses.RedirectResponse(url="/codelangchain/playground")
84 |
85 | # return the FastAPI app and Modal will deploy it for us
86 | return web_app
87 |
--------------------------------------------------------------------------------
/10_integrations/dbt/sample_proj_duckdb_s3/seeds/raw_payments.csv:
--------------------------------------------------------------------------------
1 | id,order_id,payment_method,amount
2 | 1,1,credit_card,1000
3 | 2,2,credit_card,2000
4 | 3,3,coupon,100
5 | 4,4,coupon,2500
6 | 5,5,bank_transfer,1700
7 | 6,6,credit_card,600
8 | 7,7,credit_card,1600
9 | 8,8,credit_card,2300
10 | 9,9,gift_card,2300
11 | 10,9,bank_transfer,0
12 | 11,10,bank_transfer,2600
13 | 12,11,credit_card,2700
14 | 13,12,credit_card,100
15 | 14,13,credit_card,500
16 | 15,13,bank_transfer,1400
17 | 16,14,bank_transfer,300
18 | 17,15,coupon,2200
19 | 18,16,credit_card,1000
20 | 19,17,bank_transfer,200
21 | 20,18,credit_card,500
22 | 21,18,credit_card,800
23 | 22,19,gift_card,600
24 | 23,20,bank_transfer,1500
25 | 24,21,credit_card,1200
26 | 25,22,bank_transfer,800
27 | 26,23,gift_card,2300
28 | 27,24,coupon,2600
29 | 28,25,bank_transfer,2000
30 | 29,25,credit_card,2200
31 | 30,25,coupon,1600
32 | 31,26,credit_card,3000
33 | 32,27,credit_card,2300
34 | 33,28,bank_transfer,1900
35 | 34,29,bank_transfer,1200
36 | 35,30,credit_card,1300
37 | 36,31,credit_card,1200
38 | 37,32,credit_card,300
39 | 38,33,credit_card,2200
40 | 39,34,bank_transfer,1500
41 | 40,35,credit_card,2900
42 | 41,36,bank_transfer,900
43 | 42,37,credit_card,2300
44 | 43,38,credit_card,1500
45 | 44,39,bank_transfer,800
46 | 45,40,credit_card,1400
47 | 46,41,credit_card,1700
48 | 47,42,coupon,1700
49 | 48,43,gift_card,1800
50 | 49,44,gift_card,1100
51 | 50,45,bank_transfer,500
52 | 51,46,bank_transfer,800
53 | 52,47,credit_card,2200
54 | 53,48,bank_transfer,300
55 | 54,49,credit_card,600
56 | 55,49,credit_card,900
57 | 56,50,credit_card,2600
58 | 57,51,credit_card,2900
59 | 58,51,credit_card,100
60 | 59,52,bank_transfer,1500
61 | 60,53,credit_card,300
62 | 61,54,credit_card,1800
63 | 62,54,bank_transfer,1100
64 | 63,55,credit_card,2900
65 | 64,56,credit_card,400
66 | 65,57,bank_transfer,200
67 | 66,58,coupon,1800
68 | 67,58,gift_card,600
69 | 68,59,gift_card,2800
70 | 69,60,credit_card,400
71 | 70,61,bank_transfer,1600
72 | 71,62,gift_card,1400
73 | 72,63,credit_card,2900
74 | 73,64,bank_transfer,2600
75 | 74,65,credit_card,0
76 | 75,66,credit_card,2800
77 | 76,67,bank_transfer,400
78 | 77,67,credit_card,1900
79 | 78,68,credit_card,1600
80 | 79,69,credit_card,1900
81 | 80,70,credit_card,2600
82 | 81,71,credit_card,500
83 | 82,72,credit_card,2900
84 | 83,73,bank_transfer,300
85 | 84,74,credit_card,3000
86 | 85,75,credit_card,1900
87 | 86,76,coupon,200
88 | 87,77,credit_card,0
89 | 88,77,bank_transfer,1900
90 | 89,78,bank_transfer,2600
91 | 90,79,credit_card,1800
92 | 91,79,credit_card,900
93 | 92,80,gift_card,300
94 | 93,81,coupon,200
95 | 94,82,credit_card,800
96 | 95,83,credit_card,100
97 | 96,84,bank_transfer,2500
98 | 97,85,bank_transfer,1700
99 | 98,86,coupon,2300
100 | 99,87,gift_card,3000
101 | 100,87,credit_card,2600
102 | 101,88,credit_card,2900
103 | 102,89,bank_transfer,2200
104 | 103,90,bank_transfer,200
105 | 104,91,credit_card,1900
106 | 105,92,bank_transfer,1500
107 | 106,92,coupon,200
108 | 107,93,gift_card,2600
109 | 108,94,coupon,700
110 | 109,95,coupon,2400
111 | 110,96,gift_card,1700
112 | 111,97,bank_transfer,1400
113 | 112,98,bank_transfer,1000
114 | 113,99,credit_card,2400
115 |
--------------------------------------------------------------------------------
/03_scaling_out/dynamic_batching.py:
--------------------------------------------------------------------------------
1 | # # Dynamic batching for ASCII and character conversion
2 |
3 | # This example demonstrates how to dynamically batch a simple
4 | # application that converts ASCII codes to characters and vice versa.
5 |
6 | # For more details about using dynamic batching and optimizing
7 | # the batching configurations for your application, see
8 | # the [dynamic batching guide](https://modal.com/docs/guide/dynamic-batching).
9 |
10 | # ## Setup
11 |
12 | # Let's start by defining the image for the application.
13 |
14 | import modal
15 |
16 | app = modal.App(
17 | "example-dynamic-batching",
18 | image=modal.Image.debian_slim(python_version="3.11"),
19 | )
20 |
21 |
22 | # ## Defining a Batched Function
23 |
24 | # Now, let's define a function that converts ASCII codes to characters. This
25 | # async Batched Function allows us to convert up to four ASCII codes at once.
26 |
27 |
28 | @app.function()
29 | @modal.batched(max_batch_size=4, wait_ms=1000)
30 | async def asciis_to_chars(asciis: list[int]) -> list[str]:
31 | return [chr(ascii) for ascii in asciis]
32 |
33 |
34 | # If there are fewer than four ASCII codes in the batch, the Function will wait
35 | # for one second, as specified by `wait_ms`, to allow more inputs to arrive before
36 | # returning the result.
37 |
38 | # The input `asciis` to the Function is a list of integers, and the
39 | # output is a list of strings. To allow batching, the input list `asciis`
40 | # and the output list must have the same length.
41 |
42 | # You must invoke the Function with an individual ASCII input, and a single
43 | # character will be returned in response.
44 |
45 | # ## Defining a class with a Batched Method
46 |
47 | # Next, let's define a class that converts characters to ASCII codes. This
48 | # class has an async Batched Method `chars_to_asciis` that converts characters
49 | # to ASCII codes.
50 |
51 | # Note that if a class has a Batched Method, it cannot have other Batched Methods
52 | # or Methods.
53 |
54 |
55 | @app.cls()
56 | class AsciiConverter:
57 | @modal.batched(max_batch_size=4, wait_ms=1000)
58 | async def chars_to_asciis(self, chars: list[str]) -> list[int]:
59 | asciis = [ord(char) for char in chars]
60 | return asciis
61 |
62 |
63 | # ## ASCII and character conversion
64 |
65 | # Finally, let's define the `local_entrypoint` that uses the Batched Function
66 | # and Class Method to convert ASCII codes to characters and
67 | # vice versa.
68 |
69 | # We use [`map.aio`](https://modal.com/docs/reference/modal.Function#map) to asynchronously map
70 | # over the ASCII codes and characters. This allows us to invoke the Batched
71 | # Function and the Batched Method over a range of ASCII codes and characters
72 | # in parallel.
73 | #
74 | # Run this script to see which characters correspond to ASCII codes 33 through 38!
75 |
76 |
77 | @app.local_entrypoint()
78 | async def main():
79 | ascii_converter = AsciiConverter()
80 | chars = []
81 | async for char in asciis_to_chars.map.aio(range(33, 39)):
82 | chars.append(char)
83 |
84 | print("Characters:", chars)
85 |
86 | asciis = []
87 | async for ascii in ascii_converter.chars_to_asciis.map.aio(chars):
88 | asciis.append(ascii)
89 |
90 | print("ASCII codes:", asciis)
91 |
--------------------------------------------------------------------------------
/10_integrations/dbt/sample_proj_duckdb_s3/seeds/raw_orders.csv:
--------------------------------------------------------------------------------
1 | id,user_id,order_date,status
2 | 1,1,2018-01-01,returned
3 | 2,3,2018-01-02,completed
4 | 3,94,2018-01-04,completed
5 | 4,50,2018-01-05,completed
6 | 5,64,2018-01-05,completed
7 | 6,54,2018-01-07,completed
8 | 7,88,2018-01-09,completed
9 | 8,2,2018-01-11,returned
10 | 9,53,2018-01-12,completed
11 | 10,7,2018-01-14,completed
12 | 11,99,2018-01-14,completed
13 | 12,59,2018-01-15,completed
14 | 13,84,2018-01-17,completed
15 | 14,40,2018-01-17,returned
16 | 15,25,2018-01-17,completed
17 | 16,39,2018-01-18,completed
18 | 17,71,2018-01-18,completed
19 | 18,64,2018-01-20,returned
20 | 19,54,2018-01-22,completed
21 | 20,20,2018-01-23,completed
22 | 21,71,2018-01-23,completed
23 | 22,86,2018-01-24,completed
24 | 23,22,2018-01-26,return_pending
25 | 24,3,2018-01-27,completed
26 | 25,51,2018-01-28,completed
27 | 26,32,2018-01-28,completed
28 | 27,94,2018-01-29,completed
29 | 28,8,2018-01-29,completed
30 | 29,57,2018-01-31,completed
31 | 30,69,2018-02-02,completed
32 | 31,16,2018-02-02,completed
33 | 32,28,2018-02-04,completed
34 | 33,42,2018-02-04,completed
35 | 34,38,2018-02-06,completed
36 | 35,80,2018-02-08,completed
37 | 36,85,2018-02-10,completed
38 | 37,1,2018-02-10,completed
39 | 38,51,2018-02-10,completed
40 | 39,26,2018-02-11,completed
41 | 40,33,2018-02-13,completed
42 | 41,99,2018-02-14,completed
43 | 42,92,2018-02-16,completed
44 | 43,31,2018-02-17,completed
45 | 44,66,2018-02-17,completed
46 | 45,22,2018-02-17,completed
47 | 46,6,2018-02-19,completed
48 | 47,50,2018-02-20,completed
49 | 48,27,2018-02-21,completed
50 | 49,35,2018-02-21,completed
51 | 50,51,2018-02-23,completed
52 | 51,71,2018-02-24,completed
53 | 52,54,2018-02-25,return_pending
54 | 53,34,2018-02-26,completed
55 | 54,54,2018-02-26,completed
56 | 55,18,2018-02-27,completed
57 | 56,79,2018-02-28,completed
58 | 57,93,2018-03-01,completed
59 | 58,22,2018-03-01,completed
60 | 59,30,2018-03-02,completed
61 | 60,12,2018-03-03,completed
62 | 61,63,2018-03-03,completed
63 | 62,57,2018-03-05,completed
64 | 63,70,2018-03-06,completed
65 | 64,13,2018-03-07,completed
66 | 65,26,2018-03-08,completed
67 | 66,36,2018-03-10,completed
68 | 67,79,2018-03-11,completed
69 | 68,53,2018-03-11,completed
70 | 69,3,2018-03-11,completed
71 | 70,8,2018-03-12,completed
72 | 71,42,2018-03-12,shipped
73 | 72,30,2018-03-14,shipped
74 | 73,19,2018-03-16,completed
75 | 74,9,2018-03-17,shipped
76 | 75,69,2018-03-18,completed
77 | 76,25,2018-03-20,completed
78 | 77,35,2018-03-21,shipped
79 | 78,90,2018-03-23,shipped
80 | 79,52,2018-03-23,shipped
81 | 80,11,2018-03-23,shipped
82 | 81,76,2018-03-23,shipped
83 | 82,46,2018-03-24,shipped
84 | 83,54,2018-03-24,shipped
85 | 84,70,2018-03-26,placed
86 | 85,47,2018-03-26,shipped
87 | 86,68,2018-03-26,placed
88 | 87,46,2018-03-27,placed
89 | 88,91,2018-03-27,shipped
90 | 89,21,2018-03-28,placed
91 | 90,66,2018-03-30,shipped
92 | 91,47,2018-03-31,placed
93 | 92,84,2018-04-02,placed
94 | 93,66,2018-04-03,placed
95 | 94,63,2018-04-03,placed
96 | 95,27,2018-04-04,placed
97 | 96,90,2018-04-06,placed
98 | 97,89,2018-04-07,placed
99 | 98,41,2018-04-07,placed
100 | 99,85,2018-04-09,placed
101 |
--------------------------------------------------------------------------------
/06_gpu_and_ml/openai_whisper/finetuning/train/end_to_end_check.py:
--------------------------------------------------------------------------------
1 | """
2 | A full fine-tuning run on GPUs takes multiple hours, but we
3 | want to be able to validate changes quickly while coding.
4 |
5 | This module contains an end-to-end test that runs only 1 step of training,
6 | before testing that the partially trained model can be serialized, saved to
7 | persistent storage, and then downloaded locally for inference.
8 | """
9 |
10 | import pathlib
11 |
12 | from .config import app_config
13 | from .logs import get_logger
14 | from .train import app, persistent_volume, train
15 | from .transcribe import whisper_transcribe_audio
16 |
17 | logger = get_logger(__name__)
18 |
19 |
20 | # Test model serialization and persistence by starting a new remote
21 | # function that reads back the model files from the temporary network file system disk
22 | # and does a single sentence of translation.
23 | #
24 | # When doing full training runs, the saved model will be loaded in the same way
25 | # but from a *persisted* network file system, which keeps data around even after the Modal
26 | # ephemeral app that ran the training has stopped.
27 |
28 |
29 | @app.function(volumes={app_config.model_dir: persistent_volume})
30 | def test_download_and_tryout_model(run_id: str):
31 | from datasets import Audio, load_dataset
32 | from evaluate import load
33 |
34 | lang, lang_short = (
35 | "french",
36 | "fr",
37 | ) # the language doesn't matter for this test.
38 | model_dir = pathlib.Path(app_config.model_dir, run_id)
39 |
40 | # load streaming dataset and read first audio sample
41 | ds = load_dataset(
42 | app_config.dataset,
43 | lang_short,
44 | split="test",
45 | streaming=True,
46 | trust_remote_code=True,
47 | )
48 | ds = ds.cast_column("audio", Audio(sampling_rate=16_000))
49 | test_row = next(iter(ds))
50 | input_speech = test_row["audio"]
51 |
52 | predicted_transcription = whisper_transcribe_audio(
53 | model_dir=model_dir,
54 | language=lang,
55 | data=input_speech["array"],
56 | sample_rate_hz=input_speech["sampling_rate"],
57 | )
58 | expected_transcription = test_row["sentence"]
59 | wer = load("wer")
60 | wer_score = wer.compute(
61 | predictions=[predicted_transcription],
62 | references=[expected_transcription],
63 | )
64 | logger.info(
65 | f"{expected_transcription=}\n{predicted_transcription=}\n"
66 | f"Word Error Rate (WER): {wer_score}"
67 | )
68 | assert wer_score < 1.0, (
69 | f"Even without finetuning, a WER score of {wer_score} is far too high."
70 | )
71 |
72 |
73 | # This simple entrypoint function just starts an ephemeral app run and calls
74 | # the two test functions in sequence.
75 | #
76 | # Any runtime errors or assertion errors will fail the app and exit non-zero.
77 |
78 |
79 | @app.local_entrypoint()
80 | def run_test():
81 | # Test the `main.train` function by passing in test-specific configuration
82 | # that does only a minimal amount of training steps and saves the model
83 | # to the temporary (ie. ephemeral) network file system disk.
84 | #
85 | # This should take only ~1 min to run.
86 | train.remote(num_train_epochs=1.0, warmup_steps=0, max_steps=1)
87 | test_download_and_tryout_model.remote(run_id=app.app_id)
88 |
--------------------------------------------------------------------------------