├── 06_gpu_and_ml ├── comfyui │ ├── __init__.py │ ├── .gitignore │ ├── kjnodes │ │ ├── mood.jpg │ │ ├── tram.jpeg │ │ └── kjnodes_example.py │ ├── essentials │ │ ├── vernere.jpg │ │ ├── simple_mask.png │ │ └── essentials_example.py │ ├── ip_adapter │ │ ├── starry_night.jpg │ │ └── ip_adapter_example.py │ ├── memory_snapshot │ │ └── memory_snapshot_helper │ │ │ ├── __init__.py │ │ │ └── prestartup_script.py │ ├── was_node_suite │ │ └── was_node_example.py │ ├── impact │ │ └── impact_example.py │ ├── workflow_api.json │ └── comfyclient.py ├── nsight-profiling │ ├── .gitignore │ └── toy.cu ├── openai_whisper │ ├── finetuning │ │ ├── .gitignore │ │ ├── audio │ │ │ └── common_voice_hi_31822997.mp3 │ │ ├── requirements.txt │ │ ├── train │ │ │ ├── logs.py │ │ │ ├── transcribe.py │ │ │ └── end_to_end_check.py │ │ └── readme.md │ └── pod_transcriber │ │ ├── app │ │ ├── __init__.py │ │ ├── frontend │ │ │ ├── src │ │ │ │ ├── vite-env.d.ts │ │ │ │ ├── components │ │ │ │ │ ├── Spinner.tsx │ │ │ │ │ ├── HomeButton.tsx │ │ │ │ │ └── Footer.tsx │ │ │ │ ├── main.tsx │ │ │ │ ├── index.css │ │ │ │ └── routes │ │ │ │ │ └── podcast.tsx │ │ │ ├── postcss.config.cjs │ │ │ ├── vite.config.ts │ │ │ ├── tailwind.config.cjs │ │ │ ├── tsconfig.node.json │ │ │ ├── .gitignore │ │ │ ├── tsconfig.json │ │ │ ├── package.json │ │ │ └── index.html │ │ └── config.py │ │ └── README.md ├── speech-to-text │ ├── __init__.py │ ├── streaming-parakeet-frontend │ │ ├── index.html │ │ └── audio-processor.js │ ├── multitalker-frontend │ │ ├── index.html │ │ └── audio-processor.js │ └── streaming-diarization-frontend │ │ ├── audio-processor.js │ │ └── index.html ├── blender │ └── IceModal.blend ├── sam │ └── cliff_jumping.mp4 ├── tensorflow │ └── tensorboard.png ├── protein-folding │ ├── gradio_ui.png │ ├── data │ │ ├── chai1_quick_inference.json │ │ ├── chai1_default_inference.json │ │ ├── chai1_default_input.fasta │ │ └── boltz_affinity.yaml │ └── frontend │ │ ├── index.css │ │ ├── favicon.svg │ │ └── background.svg ├── controlnet │ └── demo_images │ │ ├── dog.png │ │ ├── house.png │ │ ├── modal-logo-bright.png │ │ └── modal-logo-edges.png ├── hyperparameter-sweep │ ├── gradio.png │ ├── shakespeare.jpg │ ├── tensorboard.png │ ├── assets │ │ ├── index.css │ │ ├── favicon.svg │ │ └── background.svg │ └── src │ │ ├── tokenizer.py │ │ ├── dataset.py │ │ └── logs_manager.py ├── dreambooth │ ├── gradio-image-generate.png │ ├── instance_example_urls.txt │ └── assets │ │ ├── index.css │ │ ├── favicon.svg │ │ └── background.svg ├── stable_diffusion │ └── demo_images │ │ └── dog.png ├── reinforcement-learning │ └── config_grpo.yaml ├── import_torch.py ├── llm-serving │ └── openai_compatible │ │ ├── locustfile.py │ │ └── load_test.py ├── embeddings │ ├── qdrant.py │ └── wikipedia │ │ └── download.py ├── gpu_fallbacks.py └── gpu_snapshot.py ├── 13_sandboxes ├── codelangchain │ ├── __init__.py │ ├── src │ │ ├── __init__.py │ │ ├── common.py │ │ ├── retrieval.py │ │ └── edges.py │ ├── README.md │ └── langserve.py └── sandbox_agent.py ├── 10_integrations ├── dbt │ ├── .gitignore │ ├── sample_proj_duckdb_s3 │ │ ├── seeds │ │ │ ├── .gitkeep │ │ │ ├── raw_customers.csv │ │ │ ├── raw_payments.csv │ │ │ └── raw_orders.csv │ │ ├── tests │ │ │ └── .gitkeep │ │ ├── snapshots │ │ │ └── .gitkeep │ │ ├── .gitignore │ │ ├── models │ │ │ ├── sources.yml │ │ │ ├── staging │ │ │ │ ├── stg_customers.sql │ │ │ │ ├── stg_orders.sql │ │ │ │ ├── stg_payments.sql │ │ │ │ └── schema.yml │ │ │ ├── orders.sql │ │ │ └── customers.sql │ │ ├── profiles.yml │ │ └── dbt_project.yml │ └── dbt_docs.png ├── pushgateway_diagram.png ├── pushgateway_grafana.png ├── streamlit │ ├── streamlit.png │ ├── app.py │ └── serve_streamlit.py ├── nyc_yellow_taxi_trips_s3_mount.png ├── ikea-instructions-for-building-a-gpu-rig-for-deep-learning.png ├── tailscale │ ├── entrypoint.sh │ └── modal_tailscale.py └── webscraper_old.py ├── 07_web_endpoints ├── webrtc │ ├── yolo │ │ ├── __init__.py │ │ └── yolo_classes.txt │ └── frontend │ │ └── index.html ├── badges_deploy.png ├── fasthtml-checkboxes │ ├── ui.png │ ├── constants.py │ ├── styles.css │ ├── cbx_locustfile.py │ └── cbx_load_test.py ├── flask_app.py ├── fasthtml_app.py ├── fastapi_app.py ├── flask_streaming.py ├── streaming.py └── badges.py ├── 03_scaling_out ├── stock_prices.png ├── basic_grid_search.py └── dynamic_batching.py ├── 02_building_containers ├── screenshot.png ├── urls.txt ├── install_flash_attn.py ├── import_sklearn.py └── install_cuda.py ├── misc ├── batch_inference │ ├── batch_inference_roc.png │ ├── batch_inference_progress.png │ └── batch_inference_huggingface.png ├── README.md ├── say_hello_cron.py ├── google_search_generator.py ├── hello_shebang.py ├── queue_simple.py ├── lmdeploy_oai_compatible.py └── tgi_oai_compatible.py ├── .gitignore ├── internal ├── requirements.txt ├── conftest.py ├── test-event.json ├── examples_test.py └── run_example.py ├── .pre-commit-config.yaml ├── 01_getting_started ├── get_started.py ├── generators.py ├── inference.py ├── inference_endpoint.py ├── inference_map.py ├── inference_perf.py └── inference_full.py ├── 08_advanced ├── generators_async.py ├── hello_world_async.py ├── parallel_execution.py └── poll_delayed_result.py ├── .github ├── workflows │ ├── typecheck.yml │ ├── cd.yml │ ├── build-and-run-example.yml │ ├── check.yml │ ├── stale.yml │ └── run-examples.yml ├── actions │ └── setup │ │ └── action.yml └── pull_request_template.md ├── pyproject.toml ├── LICENSE ├── README.md ├── 05_scheduling └── schedule_simple.py └── 09_job_queues └── doc_ocr_frontend └── index.html /06_gpu_and_ml/comfyui/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /13_sandboxes/codelangchain/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /13_sandboxes/codelangchain/src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /06_gpu_and_ml/comfyui/.gitignore: -------------------------------------------------------------------------------- 1 | comfyui_gen_image.png 2 | -------------------------------------------------------------------------------- /06_gpu_and_ml/nsight-profiling/.gitignore: -------------------------------------------------------------------------------- 1 | *.nsys-rep 2 | -------------------------------------------------------------------------------- /10_integrations/dbt/.gitignore: -------------------------------------------------------------------------------- 1 | logs/ 2 | sample_proj/logs -------------------------------------------------------------------------------- /10_integrations/dbt/sample_proj_duckdb_s3/seeds/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /10_integrations/dbt/sample_proj_duckdb_s3/tests/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /06_gpu_and_ml/openai_whisper/finetuning/.gitignore: -------------------------------------------------------------------------------- 1 | models/ 2 | -------------------------------------------------------------------------------- /06_gpu_and_ml/openai_whisper/pod_transcriber/app/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /10_integrations/dbt/sample_proj_duckdb_s3/snapshots/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /07_web_endpoints/webrtc/yolo/__init__.py: -------------------------------------------------------------------------------- 1 | from .yolo import YOLOv10 as YOLOv10 2 | -------------------------------------------------------------------------------- /10_integrations/dbt/sample_proj_duckdb_s3/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | target/ 3 | dbt_packages/ 4 | logs/ 5 | -------------------------------------------------------------------------------- /06_gpu_and_ml/speech-to-text/__init__.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # lambda-test: false 3 | # pytest: false 4 | # --- 5 | -------------------------------------------------------------------------------- /06_gpu_and_ml/openai_whisper/pod_transcriber/app/frontend/src/vite-env.d.ts: -------------------------------------------------------------------------------- 1 | /// 2 | -------------------------------------------------------------------------------- /03_scaling_out/stock_prices.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/03_scaling_out/stock_prices.png -------------------------------------------------------------------------------- /10_integrations/dbt/dbt_docs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/10_integrations/dbt/dbt_docs.png -------------------------------------------------------------------------------- /06_gpu_and_ml/blender/IceModal.blend: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/06_gpu_and_ml/blender/IceModal.blend -------------------------------------------------------------------------------- /06_gpu_and_ml/sam/cliff_jumping.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/06_gpu_and_ml/sam/cliff_jumping.mp4 -------------------------------------------------------------------------------- /07_web_endpoints/badges_deploy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/07_web_endpoints/badges_deploy.png -------------------------------------------------------------------------------- /02_building_containers/screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/02_building_containers/screenshot.png -------------------------------------------------------------------------------- /06_gpu_and_ml/comfyui/kjnodes/mood.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/06_gpu_and_ml/comfyui/kjnodes/mood.jpg -------------------------------------------------------------------------------- /06_gpu_and_ml/comfyui/kjnodes/tram.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/06_gpu_and_ml/comfyui/kjnodes/tram.jpeg -------------------------------------------------------------------------------- /06_gpu_and_ml/tensorflow/tensorboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/06_gpu_and_ml/tensorflow/tensorboard.png -------------------------------------------------------------------------------- /10_integrations/pushgateway_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/10_integrations/pushgateway_diagram.png -------------------------------------------------------------------------------- /10_integrations/pushgateway_grafana.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/10_integrations/pushgateway_grafana.png -------------------------------------------------------------------------------- /10_integrations/streamlit/streamlit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/10_integrations/streamlit/streamlit.png -------------------------------------------------------------------------------- /06_gpu_and_ml/protein-folding/gradio_ui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/06_gpu_and_ml/protein-folding/gradio_ui.png -------------------------------------------------------------------------------- /07_web_endpoints/fasthtml-checkboxes/ui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/07_web_endpoints/fasthtml-checkboxes/ui.png -------------------------------------------------------------------------------- /06_gpu_and_ml/comfyui/essentials/vernere.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/06_gpu_and_ml/comfyui/essentials/vernere.jpg -------------------------------------------------------------------------------- /06_gpu_and_ml/controlnet/demo_images/dog.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/06_gpu_and_ml/controlnet/demo_images/dog.png -------------------------------------------------------------------------------- /06_gpu_and_ml/controlnet/demo_images/house.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/06_gpu_and_ml/controlnet/demo_images/house.png -------------------------------------------------------------------------------- /06_gpu_and_ml/hyperparameter-sweep/gradio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/06_gpu_and_ml/hyperparameter-sweep/gradio.png -------------------------------------------------------------------------------- /misc/batch_inference/batch_inference_roc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/misc/batch_inference/batch_inference_roc.png -------------------------------------------------------------------------------- /06_gpu_and_ml/comfyui/essentials/simple_mask.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/06_gpu_and_ml/comfyui/essentials/simple_mask.png -------------------------------------------------------------------------------- /06_gpu_and_ml/comfyui/ip_adapter/starry_night.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/06_gpu_and_ml/comfyui/ip_adapter/starry_night.jpg -------------------------------------------------------------------------------- /06_gpu_and_ml/dreambooth/gradio-image-generate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/06_gpu_and_ml/dreambooth/gradio-image-generate.png -------------------------------------------------------------------------------- /06_gpu_and_ml/hyperparameter-sweep/shakespeare.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/06_gpu_and_ml/hyperparameter-sweep/shakespeare.jpg -------------------------------------------------------------------------------- /06_gpu_and_ml/hyperparameter-sweep/tensorboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/06_gpu_and_ml/hyperparameter-sweep/tensorboard.png -------------------------------------------------------------------------------- /06_gpu_and_ml/stable_diffusion/demo_images/dog.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/06_gpu_and_ml/stable_diffusion/demo_images/dog.png -------------------------------------------------------------------------------- /10_integrations/nyc_yellow_taxi_trips_s3_mount.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/10_integrations/nyc_yellow_taxi_trips_s3_mount.png -------------------------------------------------------------------------------- /misc/batch_inference/batch_inference_progress.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/misc/batch_inference/batch_inference_progress.png -------------------------------------------------------------------------------- /misc/batch_inference/batch_inference_huggingface.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/misc/batch_inference/batch_inference_huggingface.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | **/__pycache__ 2 | .DS_Store 3 | .vscode 4 | 5 | venv 6 | .venv 7 | 8 | # secrets file for act, tool for local GitHub Actions testing 9 | .secrets 10 | -------------------------------------------------------------------------------- /internal/requirements.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | jupyter 3 | ipython 4 | nbconvert 5 | jupytext~=1.16.1 6 | pydantic~=1.10.14 7 | mypy==1.2.0 8 | ruff==0.9.6 9 | fastapi 10 | -------------------------------------------------------------------------------- /06_gpu_and_ml/controlnet/demo_images/modal-logo-bright.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/06_gpu_and_ml/controlnet/demo_images/modal-logo-bright.png -------------------------------------------------------------------------------- /06_gpu_and_ml/controlnet/demo_images/modal-logo-edges.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/06_gpu_and_ml/controlnet/demo_images/modal-logo-edges.png -------------------------------------------------------------------------------- /07_web_endpoints/fasthtml-checkboxes/constants.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # lambda-test: false # auxiliary-file 3 | # --- 4 | N_CHECKBOXES = 100_000 # feel free to increase, if you dare! 5 | -------------------------------------------------------------------------------- /06_gpu_and_ml/openai_whisper/pod_transcriber/app/frontend/postcss.config.cjs: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | plugins: { 3 | tailwindcss: {}, 4 | autoprefixer: {}, 5 | }, 6 | }; 7 | -------------------------------------------------------------------------------- /internal/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | 4 | @pytest.fixture(autouse=True) 5 | def disable_auto_mount(monkeypatch): 6 | monkeypatch.setenv("MODAL_AUTOMOUNT", "0") 7 | yield 8 | -------------------------------------------------------------------------------- /06_gpu_and_ml/protein-folding/data/chai1_quick_inference.json: -------------------------------------------------------------------------------- 1 | { 2 | "num_trunk_recycles": 1, 3 | "num_diffn_timesteps": 10, 4 | "seed": 42, 5 | "use_esm_embeddings": true 6 | } 7 | -------------------------------------------------------------------------------- /06_gpu_and_ml/openai_whisper/finetuning/audio/common_voice_hi_31822997.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/06_gpu_and_ml/openai_whisper/finetuning/audio/common_voice_hi_31822997.mp3 -------------------------------------------------------------------------------- /10_integrations/ikea-instructions-for-building-a-gpu-rig-for-deep-learning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/modal-labs/modal-examples/HEAD/10_integrations/ikea-instructions-for-building-a-gpu-rig-for-deep-learning.png -------------------------------------------------------------------------------- /06_gpu_and_ml/openai_whisper/finetuning/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets~=3.2.0 2 | evaluate~=0.4.3 3 | jiwer~=3.0.5 4 | librosa~=0.10.0 5 | torch~=2.5.1 6 | torchaudio~=2.5.1 7 | transformers~=4.48.0 8 | accelerate~=1.2.1 9 | -------------------------------------------------------------------------------- /06_gpu_and_ml/protein-folding/data/chai1_default_inference.json: -------------------------------------------------------------------------------- 1 | { 2 | "num_trunk_recycles": 3, 3 | "num_diffn_timesteps": 200, 4 | "seed": 42, 5 | "use_esm_embeddings": true, 6 | "use_msa_server": true 7 | } 8 | -------------------------------------------------------------------------------- /06_gpu_and_ml/openai_whisper/pod_transcriber/app/frontend/vite.config.ts: -------------------------------------------------------------------------------- 1 | import { defineConfig } from "vite"; 2 | import react from "@vitejs/plugin-react"; 3 | 4 | // https://vitejs.dev/config/ 5 | export default defineConfig({ 6 | plugins: [react()], 7 | }); 8 | -------------------------------------------------------------------------------- /06_gpu_and_ml/openai_whisper/pod_transcriber/app/frontend/tailwind.config.cjs: -------------------------------------------------------------------------------- 1 | /** @type {import('tailwindcss').Config} */ 2 | module.exports = { 3 | content: ["./index.html", "./src/**/*.{js,ts,jsx,tsx}"], 4 | theme: { 5 | extend: {}, 6 | }, 7 | plugins: [], 8 | }; 9 | -------------------------------------------------------------------------------- /06_gpu_and_ml/openai_whisper/pod_transcriber/app/frontend/src/components/Spinner.tsx: -------------------------------------------------------------------------------- 1 | import PulseLoader from "react-spinners/PulseLoader"; 2 | 3 | export default function Spinner({ size }: { size: number }) { 4 | return ; 5 | } 6 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/astral-sh/ruff-pre-commit 3 | # keep version here in sync with CI/CD and other modal repos 4 | rev: "v0.9.6" 5 | hooks: 6 | - id: ruff 7 | args: [--fix, --exit-non-zero-on-fix] 8 | - id: ruff-format 9 | -------------------------------------------------------------------------------- /06_gpu_and_ml/dreambooth/instance_example_urls.txt: -------------------------------------------------------------------------------- 1 | https://modal-public-assets.s3.amazonaws.com/example-dreambooth-app/fkRYgv6.png 2 | https://modal-public-assets.s3.amazonaws.com/example-dreambooth-app/98k9yDg.jpg 3 | https://modal-public-assets.s3.amazonaws.com/example-dreambooth-app/gHlW8Kw.jpg 4 | -------------------------------------------------------------------------------- /06_gpu_and_ml/openai_whisper/pod_transcriber/app/frontend/src/main.tsx: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | import ReactDOM from "react-dom/client"; 3 | import App from "./app"; 4 | import "./index.css"; 5 | 6 | ReactDOM.createRoot(document.getElementById("root") as HTMLElement).render( 7 | 8 | ); 9 | -------------------------------------------------------------------------------- /06_gpu_and_ml/openai_whisper/pod_transcriber/app/frontend/tsconfig.node.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "composite": true, 4 | "module": "ESNext", 5 | "moduleResolution": "Node", 6 | "allowSyntheticDefaultImports": true 7 | }, 8 | "include": ["vite.config.ts"] 9 | } 10 | -------------------------------------------------------------------------------- /10_integrations/dbt/sample_proj_duckdb_s3/models/sources.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | sources: 4 | - name: external_source 5 | meta: 6 | external_location: "s3://modal-example-dbt-duckdb-s3/sources/{name}.parquet" 7 | tables: 8 | - name: raw_customers 9 | - name: raw_orders 10 | - name: raw_payments 11 | -------------------------------------------------------------------------------- /01_getting_started/get_started.py: -------------------------------------------------------------------------------- 1 | import modal 2 | 3 | app = modal.App("example-get-started") 4 | 5 | 6 | @app.function() 7 | def square(x): 8 | print("This code is running on a remote worker!") 9 | return x**2 10 | 11 | 12 | @app.local_entrypoint() 13 | def main(): 14 | print("the square is", square.remote(42)) 15 | -------------------------------------------------------------------------------- /02_building_containers/urls.txt: -------------------------------------------------------------------------------- 1 | adobe.com 2 | alibaba.com 3 | aliexpress.com 4 | amazon.com 5 | apple.com 6 | baidu.com 7 | bbc.co.uk 8 | bing.com 9 | blogspot.com 10 | booking.com 11 | craigslist.org 12 | dailymail.co.uk 13 | dropbox.com 14 | ebay.com 15 | facebook.com 16 | github.com 17 | google.com 18 | imdb.com 19 | imgur.com 20 | instagram.com 21 | -------------------------------------------------------------------------------- /06_gpu_and_ml/openai_whisper/pod_transcriber/app/frontend/.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | pnpm-debug.log* 8 | lerna-debug.log* 9 | 10 | node_modules 11 | dist 12 | dist-ssr 13 | *.local 14 | 15 | # Editor directories and files 16 | .vscode/* 17 | !.vscode/extensions.json 18 | .idea 19 | .DS_Store 20 | *.suo 21 | *.ntvs* 22 | *.njsproj 23 | *.sln 24 | *.sw? 25 | -------------------------------------------------------------------------------- /06_gpu_and_ml/openai_whisper/pod_transcriber/app/frontend/src/components/HomeButton.tsx: -------------------------------------------------------------------------------- 1 | import { Link } from "react-router-dom"; 2 | 3 | export default function HomeButton() { 4 | return ( 5 | 6 | 9 | 10 | ); 11 | } 12 | -------------------------------------------------------------------------------- /06_gpu_and_ml/dreambooth/assets/index.css: -------------------------------------------------------------------------------- 1 | /* Bit of Modal Labs color scheming for the Gradio.app UI 2 | 3 | from https://github.com/modal-labs/modal-examples */ 4 | 5 | a { 6 | text-decoration: inherit !important; 7 | } 8 | 9 | gradio-app { 10 | background-image: url(/assets/background.svg) !important; 11 | background-repeat: no-repeat !important; 12 | background-size 100% auto; 13 | padding-top: 3%; 14 | background-color: black; 15 | } 16 | -------------------------------------------------------------------------------- /06_gpu_and_ml/protein-folding/frontend/index.css: -------------------------------------------------------------------------------- 1 | /* Bit of Modal Labs color scheming for the Gradio.app UI 2 | 3 | from https://github.com/modal-labs/modal-examples */ 4 | 5 | a { 6 | text-decoration: inherit !important; 7 | } 8 | 9 | gradio-app { 10 | background-image: url(/assets/background.svg) !important; 11 | background-repeat: no-repeat !important; 12 | background-size 100% auto; 13 | padding-top: 3%; 14 | background-color: black; 15 | } 16 | -------------------------------------------------------------------------------- /06_gpu_and_ml/hyperparameter-sweep/assets/index.css: -------------------------------------------------------------------------------- 1 | /* Bit of Modal Labs color scheming for the Gradio.app UI 2 | 3 | from https://github.com/modal-labs/modal-examples */ 4 | 5 | a { 6 | text-decoration: inherit !important; 7 | } 8 | 9 | gradio-app { 10 | background-image: url(/assets/background.svg) !important; 11 | background-repeat: no-repeat !important; 12 | background-size 100% auto; 13 | padding-top: 3%; 14 | background-color: black; 15 | } 16 | -------------------------------------------------------------------------------- /misc/README.md: -------------------------------------------------------------------------------- 1 | # Miscellaneous Examples 2 | 3 | This directory contains a variety of examples of ways to use Modal. 4 | 5 | Unlike the examples in the rest of this repository, these examples are not 6 | continually monitored for correctness, so it is possible that they may become 7 | out of date or incorrect over time. 8 | 9 | If you find an error in one of these examples, please report it in the issues 10 | tab or, even better, submit a pull request to fix it. 11 | -------------------------------------------------------------------------------- /internal/test-event.json: -------------------------------------------------------------------------------- 1 | { 2 | "event_name": "pull_request", 3 | "pull_request": { 4 | "base": { 5 | "ref": "main", 6 | "sha": "3e3cba16881e73a80887c2f09477e86f0522b072" 7 | }, 8 | "head": { 9 | "ref": "charlesfrye/run-examples-again", 10 | "sha": "b639aa6e806d2db555cbf4cfc29f2b93c4d50fcb" 11 | } 12 | }, 13 | "repository": { 14 | "full_name": "modal-labs/modal-examples" 15 | }, 16 | "ref": "refs/pull/1/merge" 17 | } 18 | 19 | -------------------------------------------------------------------------------- /06_gpu_and_ml/comfyui/memory_snapshot/memory_snapshot_helper/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from aiohttp import web 4 | from server import PromptServer 5 | 6 | # ------- API Endpoints ------- 7 | 8 | 9 | @PromptServer.instance.routes.post("/cuda/set_device") 10 | async def set_current_device(request): 11 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" 12 | return web.json_response({"status": "success"}) 13 | 14 | 15 | # Empty for ComfyUI node registration 16 | NODE_CLASS_MAPPINGS = {} 17 | -------------------------------------------------------------------------------- /10_integrations/dbt/sample_proj_duckdb_s3/models/staging/stg_customers.sql: -------------------------------------------------------------------------------- 1 | with source as ( 2 | 3 | {#- 4 | Here we load from the external S3 bucket data, which was seeded 5 | by running the `seed` Modal function. 6 | #} 7 | select * from {{ source('external_source', 'raw_customers') }} 8 | 9 | ), 10 | 11 | renamed as ( 12 | 13 | select 14 | id as customer_id, 15 | first_name, 16 | last_name 17 | 18 | from source 19 | 20 | ) 21 | 22 | select * from renamed 23 | -------------------------------------------------------------------------------- /08_advanced/generators_async.py: -------------------------------------------------------------------------------- 1 | # # Run async generator function on Modal 2 | 3 | # This example shows how you can run an async generator function on Modal. 4 | # Modal natively supports async/await syntax using asyncio. 5 | 6 | import modal 7 | 8 | app = modal.App("example-generators-async") 9 | 10 | 11 | @app.function() 12 | def f(i): 13 | for j in range(i): 14 | yield j 15 | 16 | 17 | @app.local_entrypoint() 18 | async def run_async(): 19 | async for r in f.remote_gen.aio(10): 20 | print(r) 21 | -------------------------------------------------------------------------------- /misc/say_hello_cron.py: -------------------------------------------------------------------------------- 1 | # # Deploy a cron job with Modal 2 | 3 | # This example shows how you can deploy a cron job with Modal. 4 | 5 | import time 6 | from datetime import datetime, timezone 7 | 8 | import modal 9 | 10 | app = modal.App("example-say-hello-cron") 11 | 12 | 13 | @app.function(schedule=modal.Period(seconds=10)) 14 | def say_hello(): 15 | start_time = datetime.now(timezone.utc) 16 | for i in range(10): 17 | print(f"Message #{i} from invocation at {start_time}") 18 | time.sleep(1.5) 19 | -------------------------------------------------------------------------------- /10_integrations/dbt/sample_proj_duckdb_s3/models/staging/stg_orders.sql: -------------------------------------------------------------------------------- 1 | with source as ( 2 | 3 | {#- 4 | Here we load from the external S3 bucket data, which was seeded 5 | by running the `seed` Modal function. 6 | #} 7 | select * from {{ source('external_source', 'raw_orders') }} 8 | 9 | ), 10 | 11 | renamed as ( 12 | 13 | select 14 | id as order_id, 15 | user_id as customer_id, 16 | order_date, 17 | status 18 | 19 | from source 20 | 21 | ) 22 | 23 | select * from renamed 24 | -------------------------------------------------------------------------------- /06_gpu_and_ml/hyperparameter-sweep/src/tokenizer.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # pytest: false 3 | # --- 4 | 5 | 6 | class Tokenizer: 7 | def __init__(self, text): 8 | self.unique_chars = sorted(set(text)) # sorted to ensure consistent 9 | self.stoi = {c: i for i, c in enumerate(self.unique_chars)} 10 | self.itos = {i: c for i, c in enumerate(self.unique_chars)} 11 | self.vocab_size = len(self.unique_chars) 12 | 13 | def encode(self, text): 14 | return [self.stoi[c] for c in text] 15 | 16 | def decode(self, tokens): 17 | return [self.itos[int(t)] for t in tokens] 18 | -------------------------------------------------------------------------------- /06_gpu_and_ml/protein-folding/data/chai1_default_input.fasta: -------------------------------------------------------------------------------- 1 | >protein|name=example-of-long-protein 2 | AGSHSMRYFSTSVSRPGRGEPRFIAVGYVDDTQFVRFDSDAASPRGEPRAPWVEQEGPEYWDRETQKYKRQAQTDRVSLRNLRGYYNQSEAGSHTLQWMFGCDLGPDGRLLRGYDQSAYDGKDYIALNEDLRSWTAADTAAQITQRKWEAAREAEQRRAYLEGTCVEWLRRYLENGKETLQRAEHPKTHVTHHPVSDHEATLRCWALGFYPAEITLTWQWDGEDQTQDTELVETRPAGDGTFQKWAAVVVPSGEEQRYTCHVQHEGLPEPLTLRWEP 3 | >protein|name=example-of-short-protein 4 | AIQRTPKIQVYSRHPAENGKSNFLNCYVSGFHPSDIEVDLLKNGERIEKVEHSDLSFSKDWSFYLLYYTEFTPTEKDEYACRVNHVTLSQPKIVKWDRDM 5 | >protein|name=example-peptide 6 | GAAL 7 | >ligand|name=example-ligand-as-smiles 8 | CCCCCCCCCCCCCC(=O)O 9 | -------------------------------------------------------------------------------- /.github/workflows/typecheck.yml: -------------------------------------------------------------------------------- 1 | name: Typecheck 2 | on: 3 | push: 4 | branches: 5 | - main 6 | pull_request: 7 | workflow_dispatch: 8 | 9 | jobs: 10 | mypy: 11 | name: MyPy 12 | runs-on: ubuntu-24.04 13 | 14 | steps: 15 | - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3 16 | 17 | - uses: actions/setup-python@65d7f2d534ac1bc67fcd62888c5f4f3d2cb2b236 # v4 18 | with: 19 | python-version: "3.11" 20 | 21 | - name: Install mypy 22 | run: pip install mypy==0.950 23 | 24 | - name: Run 25 | run: python3 internal/typecheck.py 26 | -------------------------------------------------------------------------------- /06_gpu_and_ml/reinforcement-learning/config_grpo.yaml: -------------------------------------------------------------------------------- 1 | compute_environment: LOCAL_MACHINE 2 | debug: false 3 | deepspeed_config: 4 | deepspeed_multinode_launcher: standard 5 | offload_optimizer_device: none 6 | offload_param_device: none 7 | zero3_init_flag: true 8 | zero3_save_16bit_model: true 9 | zero_stage: 3 10 | distributed_type: DEEPSPEED 11 | downcast_bf16: 'no' 12 | machine_rank: 0 13 | main_training_function: main 14 | mixed_precision: bf16 15 | num_machines: 1 16 | num_processes: 3 17 | rdzv_backend: static 18 | same_network: true 19 | tpu_env: [] 20 | tpu_use_cluster: false 21 | tpu_use_sudo: false 22 | use_cpu: false -------------------------------------------------------------------------------- /10_integrations/dbt/sample_proj_duckdb_s3/models/staging/stg_payments.sql: -------------------------------------------------------------------------------- 1 | with source as ( 2 | 3 | {#- 4 | Here we load from the external S3 bucket data, which was seeded 5 | by running the `seed` Modal function. 6 | #} 7 | select * from {{ source('external_source', 'raw_payments') }} 8 | 9 | ), 10 | 11 | renamed as ( 12 | 13 | select 14 | id as payment_id, 15 | order_id, 16 | payment_method, 17 | 18 | -- `amount` is currently stored in cents, so we convert it to dollars 19 | amount / 100 as amount 20 | 21 | from source 22 | 23 | ) 24 | 25 | select * from renamed 26 | -------------------------------------------------------------------------------- /06_gpu_and_ml/protein-folding/data/boltz_affinity.yaml: -------------------------------------------------------------------------------- 1 | version: 1 # Optional, defaults to 1 2 | sequences: 3 | - protein: 4 | id: A 5 | sequence: MVTPEGNVSLVDESLLVGVTDEDRAVRSAHQFYERLIGLWAPAVMEAAHELGVFAALAEAPADSGELARRLDCDARAMRVLLDALYAYDVIDRIHDTNGFRYLLSAEARECLLPGTLFSLVGKFMHDINVAWPAWRNLAEVVRHGARDTSGAESPNGIAQEDYESLVGGINFWAPPIVTTLSRKLRASGRSGDATASVLDVGCGTGLYSQLLLREFPRWTATGLDVERIATLANAQALRLGVEERFATRAGDFWRGGWGTGYDLVLFANIFHLQTPASAVRLMRHAAACLAPDGLVAVVDQIVDADREPKTPQDRFALLFAASMTNTGGGDAYTFQEYEEWFTAAGLQRIETLDTPMHRILLARRATEPSAVPEGQASENLYFQ 6 | - ligand: 7 | id: B 8 | smiles: 'N[C@@H](Cc1ccc(O)cc1)C(=O)O' 9 | properties: 10 | - affinity: 11 | binder: B -------------------------------------------------------------------------------- /10_integrations/dbt/sample_proj_duckdb_s3/profiles.yml: -------------------------------------------------------------------------------- 1 | { 2 | "sample_proj": 3 | { 4 | "target": "modal", 5 | "outputs": 6 | { 7 | "modal": 8 | { 9 | "type": "duckdb", 10 | "path": "/tmp/dbt.duckdb", 11 | "extensions": ["httpfs", "parquet"], 12 | "settings": 13 | { 14 | "s3_region": "us-east-1", 15 | "s3_access_key_id": "{{ env_var('AWS_ACCESS_KEY_ID') }}", 16 | "s3_secret_access_key": "{{ env_var('AWS_SECRET_ACCESS_KEY') }}", 17 | }, 18 | }, 19 | }, 20 | }, 21 | } 22 | -------------------------------------------------------------------------------- /01_getting_started/generators.py: -------------------------------------------------------------------------------- 1 | # # Run a generator function on Modal 2 | 3 | # This example shows how you can run a generator function on Modal. We define a 4 | # function that `yields` values and then call it with the [`remote_gen`](https://modal.com/docs/reference/modal.Function#remote_gen) method. The 5 | # `remote_gen` method returns a generator object that can be used to iterate over 6 | # the values produced by the function. 7 | 8 | import modal 9 | 10 | app = modal.App("example-generators") 11 | 12 | 13 | @app.function() 14 | def f(i): 15 | for j in range(i): 16 | yield j 17 | 18 | 19 | @app.local_entrypoint() 20 | def main(): 21 | for r in f.remote_gen(10): 22 | print(r) 23 | -------------------------------------------------------------------------------- /06_gpu_and_ml/openai_whisper/pod_transcriber/app/frontend/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ESNext", 4 | "useDefineForClassFields": true, 5 | "lib": ["DOM", "DOM.Iterable", "ESNext"], 6 | "allowJs": false, 7 | "skipLibCheck": true, 8 | "esModuleInterop": false, 9 | "allowSyntheticDefaultImports": true, 10 | "strict": true, 11 | "forceConsistentCasingInFileNames": true, 12 | "module": "ESNext", 13 | "moduleResolution": "Node", 14 | "resolveJsonModule": true, 15 | "isolatedModules": true, 16 | "noEmit": true, 17 | "jsx": "react-jsx" 18 | }, 19 | "include": ["src"], 20 | "references": [{ "path": "./tsconfig.node.json" }] 21 | } 22 | -------------------------------------------------------------------------------- /.github/workflows/cd.yml: -------------------------------------------------------------------------------- 1 | name: Deploy 2 | on: 3 | push: 4 | branches: 5 | - main 6 | workflow_dispatch: 7 | 8 | jobs: 9 | deploy: 10 | name: Deploy example apps 11 | if: github.ref == 'refs/heads/main' 12 | runs-on: ubuntu-24.04 13 | env: 14 | MODAL_TOKEN_ID: ${{ secrets.MODAL_MODAL_LABS_TOKEN_ID }} 15 | MODAL_TOKEN_SECRET: ${{ secrets.MODAL_MODAL_LABS_TOKEN_SECRET }} 16 | MODAL_ENVIRONMENT: examples 17 | 18 | steps: 19 | - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3 20 | with: 21 | fetch-depth: 1 22 | - uses: ./.github/actions/setup 23 | 24 | - name: Run deployment script 25 | run: | 26 | python3 internal/deploy.py --no-dry-run 27 | -------------------------------------------------------------------------------- /06_gpu_and_ml/openai_whisper/finetuning/train/logs.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | 4 | def get_logger(name, level=logging.INFO): 5 | logger = logging.getLogger(name) 6 | handler = logging.StreamHandler() 7 | handler.setFormatter( 8 | logging.Formatter("%(levelname)s: %(asctime)s: %(name)s %(message)s") 9 | ) 10 | logger.addHandler(handler) 11 | logger.setLevel(level) 12 | return logger 13 | 14 | 15 | def setup_logging(*, logger: logging.Logger, log_level: int) -> None: 16 | import datasets 17 | import transformers 18 | 19 | datasets.utils.logging.set_verbosity(log_level) 20 | transformers.utils.logging.set_verbosity(log_level) 21 | transformers.utils.logging.enable_default_handler() 22 | transformers.utils.logging.enable_explicit_format() 23 | -------------------------------------------------------------------------------- /01_getting_started/inference.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import modal 4 | 5 | app = modal.App("example-inference") 6 | image = modal.Image.debian_slim().uv_pip_install("transformers[torch]") 7 | 8 | 9 | @app.function(gpu="h100", image=image) 10 | def chat(prompt: str | None = None) -> list[dict]: 11 | from transformers import pipeline 12 | 13 | if prompt is None: 14 | prompt = f"/no_think Read this code.\n\n{Path(__file__).read_text()}\nIn one paragraph, what does the code do?" 15 | 16 | print(prompt) 17 | context = [{"role": "user", "content": prompt}] 18 | 19 | chatbot = pipeline( 20 | model="Qwen/Qwen3-1.7B-FP8", device_map="cuda", max_new_tokens=1024 21 | ) 22 | result = chatbot(context) 23 | print(result[0]["generated_text"][-1]["content"]) 24 | 25 | return result 26 | -------------------------------------------------------------------------------- /06_gpu_and_ml/import_torch.py: -------------------------------------------------------------------------------- 1 | import modal 2 | 3 | app = modal.App("example-import-torch") 4 | 5 | 6 | torch_image = modal.Image.debian_slim().uv_pip_install( 7 | "torch==2.7", 8 | extra_index_url="https://download.pytorch.org/whl/cu128", 9 | force_build=True, # trigger a build every time, just for demonstration purposes 10 | # remove if you're using this in production! 11 | ) 12 | 13 | 14 | @app.function(gpu="B200", image=torch_image) 15 | def torch() -> list[list[int]]: 16 | import math 17 | 18 | import torch 19 | 20 | print(torch.cuda.get_device_properties("cuda:0")) 21 | 22 | matrix = torch.randn(1024, 1024) / math.sqrt(1024) 23 | matrix = matrix @ matrix 24 | 25 | return matrix.detach().cpu().tolist() 26 | 27 | 28 | @app.local_entrypoint() 29 | def main(): 30 | print(torch.remote()[:1]) 31 | -------------------------------------------------------------------------------- /06_gpu_and_ml/openai_whisper/pod_transcriber/app/frontend/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "whisper_frontend", 3 | "private": true, 4 | "version": "0.0.0", 5 | "type": "module", 6 | "scripts": { 7 | "dev": "vite", 8 | "build": "tsc && vite build", 9 | "preview": "vite preview" 10 | }, 11 | "dependencies": { 12 | "react": "^18.2.0", 13 | "react-dom": "^18.2.0", 14 | "react-feather": "^2.0.10", 15 | "react-router-dom": "^6.4.2", 16 | "react-spinners": "^0.13.6", 17 | "swr": "^1.3.0" 18 | }, 19 | "devDependencies": { 20 | "@types/react": "^18.0.17", 21 | "@types/react-dom": "^18.0.6", 22 | "@vitejs/plugin-react": "^2.1.0", 23 | "autoprefixer": "^10.4.12", 24 | "postcss": "^8.4.18", 25 | "tailwindcss": "^3.1.8", 26 | "typescript": "^4.6.4", 27 | "vite": "^3.1.0" 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /10_integrations/dbt/sample_proj_duckdb_s3/models/staging/schema.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | models: 4 | - name: stg_customers 5 | columns: 6 | - name: customer_id 7 | tests: 8 | - unique 9 | - not_null 10 | 11 | - name: stg_orders 12 | columns: 13 | - name: order_id 14 | tests: 15 | - unique 16 | - not_null 17 | - name: status 18 | tests: 19 | - accepted_values: 20 | values: 21 | ["placed", "shipped", "completed", "return_pending", "returned"] 22 | 23 | - name: stg_payments 24 | columns: 25 | - name: payment_id 26 | tests: 27 | - unique 28 | - not_null 29 | - name: payment_method 30 | tests: 31 | - accepted_values: 32 | values: ["credit_card", "coupon", "bank_transfer", "gift_card"] 33 | -------------------------------------------------------------------------------- /07_web_endpoints/fasthtml-checkboxes/styles.css: -------------------------------------------------------------------------------- 1 | /* This file is used to override the default pico.css styles. */ 2 | 3 | body { 4 | background-color: #1d1d1d; 5 | } 6 | 7 | .container { 8 | padding: 2rem; 9 | width: 100%; 10 | max-width: 100%; 11 | } 12 | 13 | [type="checkbox"]:is(:checked, :checked:focus) { 14 | --pico-border-color: #7fee64; 15 | --pico-background-color: #7fee64; 16 | } 17 | 18 | [type="checkbox"]:not(:checked, :checked:focus) { 19 | --pico-border-color: rgba(255, 255, 255, 0.2); 20 | --pico-background-color: rgba(255, 255, 255, 0.05); 21 | } 22 | 23 | :where(select, textarea):not([readonly]):focus, 24 | input:not([type=submit], [type=button], [type=reset], [type=range], [type=file], [readonly]):focus { 25 | --pico-box-shadow: 0 0 0 var(--pico-outline-width) rgba(127, 238, 100, 0.25); 26 | --pico-border-color: rgba(127, 238, 100, 0.50); 27 | } 28 | -------------------------------------------------------------------------------- /06_gpu_and_ml/openai_whisper/pod_transcriber/app/frontend/src/components/Footer.tsx: -------------------------------------------------------------------------------- 1 | import { Link } from "react-router-dom"; 2 | import modalWordmarkImg from "../modal-wordmark.svg"; 3 | 4 | export default function Footer() { 5 | return ( 6 |
15 | 16 |
17 | 18 | built with 19 | 20 | 21 |
22 |
23 |
24 | ); 25 | } 26 | -------------------------------------------------------------------------------- /06_gpu_and_ml/openai_whisper/finetuning/readme.md: -------------------------------------------------------------------------------- 1 | ## Fine-tuning OpenAI's whisper model for improved automatic Hindi speech recognition 2 | 3 | The following configuration will finetune the `whisper-small` model for almost 3 hrs, 4 | acheiving a word error rate (WER) of about 55-60. Increasing the number of training 5 | epochs should improve performance, decreasing WER. 6 | 7 | You can benchmark this example's performance using Huggingface's [**autoevaluate leaderboard**]https://huggingface.co/spaces/autoevaluate/leaderboards?dataset=mozilla-foundation%2Fcommon_voice_11_0&only_verified=0&task=automatic-speech-recognition&config=hi&split=test&metric=wer). 8 | 9 | ```bash 10 | modal run -m train.train --num_train_epochs=10 11 | ``` 12 | 13 | ### Testing 14 | 15 | Use `modal run -m train.end_to_end_check` to do a full train → serialize → save → load → predict 16 | run in less than 5 minutes, checking that the finetuning program is functional. 17 | -------------------------------------------------------------------------------- /.github/workflows/build-and-run-example.yml: -------------------------------------------------------------------------------- 1 | name: Build and run random example 2 | 3 | on: 4 | workflow_dispatch: 5 | schedule: 6 | - cron: "23 * * * *" 7 | 8 | env: 9 | TERM: linux 10 | TERMINFO: /etc/terminfo 11 | MODAL_TOKEN_ID: ${{ secrets.MODAL_MODAL_LABS_TOKEN_ID }} 12 | MODAL_TOKEN_SECRET: ${{ secrets.MODAL_MODAL_LABS_TOKEN_SECRET }} 13 | MODAL_ENVIRONMENT: examples 14 | 15 | jobs: 16 | build-and-run: 17 | name: Build a random example from scratch and run it 18 | runs-on: ubuntu-24.04 19 | timeout-minutes: 60 20 | steps: 21 | - name: Checkout Repository 22 | uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3 23 | with: 24 | fetch-depth: 1 25 | - uses: ./.github/actions/setup 26 | 27 | - name: Run a random example with MODAL_IGNORE_CACHE set 28 | run: | 29 | MODAL_IGNORE_CACHE=1 python3 -m internal.run_example --timeout 1800 30 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.pytest.ini_options] 2 | filterwarnings = [ 3 | "error::DeprecationWarning", 4 | "error::modal.exception.DeprecationError", 5 | "ignore::DeprecationWarning:pytest.*:", 6 | ] 7 | addopts = "--ignore 07_web_endpoints/webrtc/webrtc_yolo_test.py --ignore 06_gpu_and_ml/llm-serving/openai_compatible/load_test.py --ignore 07_web_endpoints/fasthtml-checkboxes/cbx_load_test.py" 8 | 9 | [tool.mypy] 10 | ignore_missing_imports = true 11 | check_untyped_defs = true 12 | no_strict_optional = true 13 | 14 | # https://github.com/python/mypy/issues/10632 15 | [[tool.mypy.overrides]] 16 | module = "requests" 17 | ignore_missing_imports = true 18 | 19 | [tool.ruff] 20 | exclude = [".venv", "venv", "__pycache__"] 21 | line-length = 88 22 | # TODO: Add when available: "E266", "E203" 23 | lint.ignore = ["E501", "E741", "E402"] 24 | lint.select = ['E', 'F', 'W', 'I'] 25 | 26 | [tool.ruff.lint.isort] 27 | combine-as-imports = true 28 | known-third-party = ["modal"] 29 | -------------------------------------------------------------------------------- /06_gpu_and_ml/dreambooth/assets/favicon.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /06_gpu_and_ml/protein-folding/frontend/favicon.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /10_integrations/dbt/sample_proj_duckdb_s3/dbt_project.yml: -------------------------------------------------------------------------------- 1 | name: "jaffle_shop" 2 | version: "1.0.0" 3 | config-version: 2 4 | 5 | # This setting configures which "profile" dbt uses for this project. 6 | profile: "sample_proj" 7 | 8 | # These configurations specify where dbt should look for different types of files. 9 | # The `model-paths` config, for example, states that models in this project can be 10 | # found in the "models/" directory. You probably won't need to change these! 11 | model-paths: ["models"] 12 | analysis-paths: ["analyses"] 13 | test-paths: ["tests"] 14 | seed-paths: ["seeds"] 15 | macro-paths: ["macros"] 16 | snapshot-paths: ["snapshots"] 17 | 18 | target-path: "target" # directory which will store compiled SQL files 19 | clean-targets: # directories to be removed by `dbt clean` 20 | - "target" 21 | - "dbt_packages" 22 | 23 | # Configuring models 24 | # Full documentation: https://docs.getdbt.com/docs/configuring-models 25 | models: 26 | +materialized: table 27 | -------------------------------------------------------------------------------- /06_gpu_and_ml/hyperparameter-sweep/assets/favicon.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /06_gpu_and_ml/openai_whisper/pod_transcriber/app/frontend/src/index.css: -------------------------------------------------------------------------------- 1 | @tailwind base; 2 | @tailwind components; 3 | @tailwind utilities; 4 | 5 | :root { 6 | font-family: Inter, Avenir, Helvetica, Arial, sans-serif; 7 | font-size: 16px; 8 | line-height: 24px; 9 | font-weight: 401; 10 | 11 | background-color: rgb(249 250 251); 12 | 13 | padding: 0 !important; 14 | } 15 | 16 | .podcast-list li:last-child { 17 | @apply border-b; 18 | } 19 | 20 | .modal-barloader { 21 | margin-top: 10px; 22 | width: 0; 23 | height: 10px; 24 | border-right: 20px solid #333; 25 | border-left: 0px solid #bbffaa; 26 | box-shadow: 0 0 0 1px #bbffaa; 27 | animation: modal-barloader infinite 4s linear; 28 | filter: brightness(95%); 29 | } 30 | 31 | @keyframes modal-barloader { 32 | 0% { 33 | border-right: 20px solid #333; 34 | border-left: 0px solid #bbffaa; 35 | } 36 | 37 | 50% { 38 | border-left: 20px solid #bbffaa; 39 | border-right: 0px solid #333; 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /misc/google_search_generator.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # runtimes: ["runc", "gvisor"] 3 | # --- 4 | # 5 | # # Use a generator to fetch search results 6 | # 7 | # This is a simple example which 8 | # 9 | # 1. Installs a custom Python package. 10 | # 2. Uses a _generator_ to return results back to the launcher process. 11 | 12 | import modal 13 | 14 | # We build a custom image by adding the `google` package to the base image. 15 | app = modal.App( 16 | "example-google-search-generator", 17 | image=modal.Image.debian_slim().pip_install("google"), 18 | ) 19 | 20 | # Next, let's define a _generator_ function that uses our custom image. 21 | 22 | 23 | @app.function() 24 | def scrape(query): 25 | from googlesearch import search 26 | 27 | for url in search(query.encode(), stop=100): 28 | yield url 29 | 30 | 31 | # Finally, let's launch it from the command line with `modal run`: 32 | 33 | 34 | @app.local_entrypoint() 35 | def main(query: str = "modal"): 36 | for url in scrape.remote_gen(query): 37 | print(url) 38 | -------------------------------------------------------------------------------- /07_web_endpoints/flask_app.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # cmd: ["modal", "serve", "07_web_endpoints/flask_app.py"] 3 | # --- 4 | 5 | # # Deploy Flask app with Modal 6 | 7 | # This example shows how you can deploy a [Flask](https://flask.palletsprojects.com/en/3.0.x/) app with Modal. 8 | # You can serve any app written in a WSGI-compatible web framework (like Flask) on Modal with this pattern. You can serve an app written in an ASGI-compatible framework, like FastAPI, with [`asgi_app`](https://modal.com/docs/guide/webhooks#asgi). 9 | 10 | import modal 11 | 12 | app = modal.App( 13 | "example-flask-app", 14 | image=modal.Image.debian_slim().uv_pip_install("flask"), 15 | ) 16 | 17 | 18 | @app.function() 19 | @modal.wsgi_app() 20 | def flask_app(): 21 | from flask import Flask, request 22 | 23 | web_app = Flask(__name__) 24 | 25 | @web_app.get("/") 26 | def home(): 27 | return "Hello Flask World!" 28 | 29 | @web_app.post("/foo") 30 | def foo(): 31 | return request.json 32 | 33 | return web_app 34 | -------------------------------------------------------------------------------- /07_web_endpoints/webrtc/yolo/yolo_classes.txt: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorcycle 5 | airplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | couch 59 | potted plant 60 | bed 61 | dining table 62 | toilet 63 | tv 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush 81 | -------------------------------------------------------------------------------- /01_getting_started/inference_endpoint.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # cmd: ["modal", "serve", "01_getting_started/inference_endpoint.py"] 3 | # --- 4 | from pathlib import Path 5 | 6 | import modal 7 | 8 | app = modal.App("example-inference-endpoint") 9 | image = ( 10 | modal.Image.debian_slim() 11 | .uv_pip_install("transformers[torch]") 12 | .uv_pip_install("fastapi") 13 | ) 14 | 15 | 16 | @app.function(gpu="h100", image=image) 17 | @modal.fastapi_endpoint(docs=True) 18 | def chat(prompt: str | None = None) -> list[dict]: 19 | from transformers import pipeline 20 | 21 | if prompt is None: 22 | prompt = f"/no_think Read this code.\n\n{Path(__file__).read_text()}\nIn one paragraph, what does the code do?" 23 | 24 | print(prompt) 25 | context = [{"role": "user", "content": prompt}] 26 | 27 | chatbot = pipeline( 28 | model="Qwen/Qwen3-1.7B-FP8", device_map="cuda", max_new_tokens=1024 29 | ) 30 | result = chatbot(context) 31 | print(result[0]["generated_text"][-1]["content"]) 32 | 33 | return result 34 | -------------------------------------------------------------------------------- /.github/actions/setup/action.yml: -------------------------------------------------------------------------------- 1 | name: setup 2 | 3 | description: Set up a Python environment for the examples. 4 | 5 | inputs: 6 | version: 7 | description: Which Python version to install 8 | required: false 9 | default: "3.11" 10 | devDependencies: 11 | description: Whether to skip dependencies 12 | required: false 13 | default: "no-skip" 14 | 15 | runs: 16 | using: composite 17 | steps: 18 | - name: Install Python 19 | uses: actions/setup-python@8d9ed9ac5c53483de85588cdf95a591a75ab9f55 # v5 20 | with: 21 | python-version: ${{ inputs.version }} 22 | 23 | - name: Install base packages 24 | shell: bash 25 | run: | 26 | pip install uv 27 | uv pip install --system setuptools wheel 28 | 29 | - name: Install development Python packages 30 | if: ${{ inputs.devDependencies != 'skip' }} 31 | shell: bash 32 | run: uv pip install --system -r internal/requirements.txt 33 | 34 | - name: Install the modal client 35 | shell: bash 36 | run: uv pip install --system modal 37 | -------------------------------------------------------------------------------- /07_web_endpoints/fasthtml_app.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # cmd: ["modal", "serve", "07_web_endpoints/fasthtml_app.py"] 3 | # --- 4 | 5 | # # Deploy a FastHTML app with Modal 6 | 7 | # This example shows how you can deploy a FastHTML app with Modal. 8 | # [FastHTML](https://www.fastht.ml/) is a Python library built on top of [HTMX](https://htmx.org/) 9 | # which allows you to create entire web applications using only Python. 10 | 11 | # The integration is pretty simple, thanks to the ASGI standard. 12 | # You just need to define a function returns your FastHTML app 13 | # and is decorated with `app.function` and `modal.asgi_app`. 14 | 15 | import modal 16 | 17 | app = modal.App("example-fasthtml-app") 18 | 19 | 20 | @app.function( 21 | image=modal.Image.debian_slim(python_version="3.12").uv_pip_install( 22 | "python-fasthtml==0.5.2" 23 | ) 24 | ) 25 | @modal.asgi_app() 26 | def serve(): 27 | import fasthtml.common as fh 28 | 29 | app = fh.FastHTML() 30 | 31 | @app.get("/") 32 | def home(): 33 | return fh.Div(fh.P("Hello World!"), hx_get="/change") 34 | 35 | return app 36 | -------------------------------------------------------------------------------- /06_gpu_and_ml/llm-serving/openai_compatible/locustfile.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import random 3 | 4 | import locust 5 | 6 | messages = [ 7 | { 8 | "role": "system", 9 | "content": "You are a salesman for Modal, the cloud-native serverless Python computing platform.", 10 | }, 11 | { 12 | "role": "user", 13 | "content": "Give me two fun date ideas.", 14 | }, 15 | ] 16 | 17 | 18 | class WebsiteUser(locust.HttpUser): 19 | wait_time = locust.between(1, 5) 20 | headers = { 21 | "Authorization": "Bearer super-secret-key", 22 | "Accept": "application/json", 23 | } 24 | 25 | @locust.task 26 | def chat_completion(self): 27 | payload = { 28 | "model": "Qwen/Qwen3-8B-FP8", 29 | "messages": messages, 30 | } 31 | 32 | response = self.client.request( 33 | "POST", "/v1/chat/completions", json=payload, headers=self.headers 34 | ) 35 | response.raise_for_status() 36 | if random.random() < 0.01: 37 | logging.info(response.json()["choices"][0]["message"]["content"]) 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Modal Labs 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /13_sandboxes/codelangchain/src/common.py: -------------------------------------------------------------------------------- 1 | """Shared information: image definitions and common utilities.""" 2 | 3 | import os 4 | from typing import Any, Dict, TypedDict 5 | 6 | import modal 7 | 8 | PYTHON_VERSION = "3.11" 9 | 10 | image = ( 11 | modal.Image.debian_slim(python_version=PYTHON_VERSION) 12 | .uv_pip_install( 13 | "beautifulsoup4~=4.12.3", 14 | "langchain==0.3.4", 15 | "langchain-core==0.3.12", 16 | "langgraph==0.2.39", 17 | "langchain-community==0.3.3", 18 | "langchain-openai==0.2.3", 19 | "pydantic==2.9.1", 20 | ) 21 | .env({"LANGCHAIN_TRACING_V2": "true"}) 22 | ) 23 | 24 | 25 | class GraphState(TypedDict): 26 | """ 27 | Represents the state of our graph. 28 | 29 | Attributes: 30 | keys: A dictionary where each key is a string. 31 | """ 32 | 33 | keys: Dict[str, Any] 34 | 35 | 36 | os.environ["LANGCHAIN_PROJECT"] = "codelangchain" 37 | os.environ["LANGCHAIN_TRACING"] = "true" 38 | 39 | COLOR = { 40 | "HEADER": "\033[95m", 41 | "BLUE": "\033[94m", 42 | "GREEN": "\033[92m", 43 | "RED": "\033[91m", 44 | "ENDC": "\033[0m", 45 | } 46 | -------------------------------------------------------------------------------- /misc/hello_shebang.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # # Syntax for making modal scripts executable 3 | 4 | # This example shows how you can add a shebang to a script that is meant to be invoked with `modal run`. 5 | 6 | import sys 7 | 8 | import modal 9 | 10 | app = modal.App("example-hello-world") 11 | 12 | 13 | @app.function() 14 | def f(i): 15 | if i % 2 == 0: 16 | print("hello", i) 17 | else: 18 | print("world", i, file=sys.stderr) 19 | 20 | return i * i 21 | 22 | 23 | @app.local_entrypoint() 24 | def main(): 25 | # run the function locally 26 | print(f.local(1000)) 27 | 28 | # run the function remotely on Modal 29 | print(f.remote(1002)) 30 | 31 | # run the function in parallel and remotely on Modal 32 | total = 0 33 | for ret in f.map(range(200)): 34 | total += ret 35 | 36 | print(total) 37 | 38 | 39 | if __name__ == "__main__": 40 | # Use `modal.enable_output()` to print the Sandbox's image build logs to the console, just like `modal run` does. 41 | # Use `app.run()` to substitute the `modal run` CLI invocation. 42 | with modal.enable_output(), app.run(): 43 | main() 44 | -------------------------------------------------------------------------------- /06_gpu_and_ml/nsight-profiling/toy.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | __device__ void nanosleep_cuda(unsigned int nanoseconds) { 4 | asm volatile ("nanosleep.u32 %0;" :: "r"(nanoseconds)); 5 | } 6 | 7 | __global__ void toyKernel(int *d_arr) { 8 | int idx = threadIdx.x + blockIdx.x * blockDim.x; 9 | d_arr[idx] *= 2; 10 | nanosleep_cuda(1000000000); // Sleep for ~1 second 11 | } 12 | 13 | 14 | 15 | int main() { 16 | const int size = 2 << 16; 17 | const int ct = 2 << 10; 18 | int h_arr[size], *d_arr; 19 | 20 | for (int i = 0; i < size; i++) h_arr[i] = i; 21 | 22 | cudaMalloc((void **)&d_arr, size * sizeof(int)); 23 | cudaMemcpy(d_arr, h_arr, size * sizeof(int), cudaMemcpyHostToDevice); 24 | 25 | for (int i = 0; i < ct; i++) { 26 | if (i > 0 && (i & (i - 1)) == 0) { 27 | std::cout << i << std::endl; 28 | } 29 | toyKernel<<<4, 64>>>(d_arr); 30 | cudaDeviceSynchronize(); 31 | } 32 | 33 | cudaMemcpy(h_arr, d_arr, size * sizeof(int), cudaMemcpyDeviceToHost); 34 | cudaFree(d_arr); 35 | 36 | std::cout << "Computation done!" << std::endl; 37 | return 0; 38 | } 39 | -------------------------------------------------------------------------------- /06_gpu_and_ml/embeddings/qdrant.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | import modal 4 | 5 | app = modal.App("example-qdrant") 6 | 7 | image = modal.Image.debian_slim(python_version="3.11").uv_pip_install( 8 | "qdrant-client[fastembed-gpu]==1.13.3" 9 | ) 10 | 11 | 12 | @app.function(image=image, gpu="any") 13 | def query(inpt): 14 | from qdrant_client import QdrantClient 15 | 16 | client = QdrantClient(":memory:") 17 | 18 | docs = [ 19 | "Qdrant has Langchain integrations", 20 | "Qdrant also has Llama Index integrations", 21 | ] 22 | 23 | print("querying documents:", *docs, sep="\n\t") 24 | 25 | client.add(collection_name="demo_collection", documents=docs) 26 | 27 | print("query:", inpt, sep="\n\t") 28 | 29 | search_results = client.query( 30 | collection_name="demo_collection", 31 | query_text=inpt, 32 | limit=1, 33 | ) 34 | 35 | print("result:", search_results[0], sep="\n\t") 36 | 37 | return search_results[0].document 38 | 39 | 40 | @app.local_entrypoint() 41 | def main(inpt: Optional[str] = None): 42 | if not inpt: 43 | inpt = "alpaca" 44 | 45 | print(query.remote(inpt)) 46 | -------------------------------------------------------------------------------- /06_gpu_and_ml/embeddings/wikipedia/download.py: -------------------------------------------------------------------------------- 1 | import modal 2 | 3 | # We first set out configuration variables for our script. 4 | DATASET_DIR = "/data" 5 | DATASET_NAME = "wikipedia" 6 | DATASET_CONFIG = "20220301.en" 7 | 8 | 9 | # We define our Modal Resources that we'll need 10 | volume = modal.Volume.from_name("embedding-wikipedia", create_if_missing=True) 11 | image = modal.Image.debian_slim(python_version="3.9").uv_pip_install( 12 | "datasets==2.16.1", "apache_beam==2.53.0" 13 | ) 14 | app = modal.App(image=image) 15 | 16 | 17 | # The default timeout is 5 minutes re: https://modal.com/docs/guide/timeouts#handling-timeouts 18 | # but we override this to 19 | # 3000s to avoid any potential timeout issues 20 | @app.function(volumes={DATASET_DIR: volume}, timeout=3000) 21 | def download_dataset(): 22 | # Redownload the dataset 23 | import time 24 | 25 | from datasets import load_dataset 26 | 27 | start = time.time() 28 | dataset = load_dataset(DATASET_NAME, DATASET_CONFIG, num_proc=6) 29 | end = time.time() 30 | print(f"Download complete - downloaded files in {end - start}s") 31 | 32 | dataset.save_to_disk(f"{DATASET_DIR}/{DATASET_NAME}") 33 | volume.commit() 34 | 35 | 36 | @app.local_entrypoint() 37 | def main(): 38 | download_dataset.remote() 39 | -------------------------------------------------------------------------------- /01_getting_started/inference_map.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import modal 4 | 5 | app = modal.App("example-inference-map") 6 | image = modal.Image.debian_slim().uv_pip_install("transformers[torch]") 7 | 8 | 9 | @app.function(gpu="h100", image=image) 10 | def chat(prompt: str | None = None) -> list[dict]: 11 | from transformers import pipeline 12 | 13 | if prompt is None: 14 | prompt = f"/no_think Read this code.\n\n{Path(__file__).read_text()}\nIn one paragraph, what does the code do?" 15 | 16 | print(prompt) 17 | context = [{"role": "user", "content": prompt}] 18 | 19 | chatbot = pipeline( 20 | model="Qwen/Qwen3-1.7B-FP8", device_map="cuda", max_new_tokens=1024 21 | ) 22 | result = chatbot(context) 23 | print(result[0]["generated_text"][-1]["content"]) 24 | 25 | return result 26 | 27 | 28 | @app.local_entrypoint() 29 | def main(): 30 | import glob 31 | 32 | root_dir, examples = Path(__file__).parent.parent, [] 33 | for path in glob.glob("**/*.py", root_dir=root_dir): 34 | examples.append( 35 | f"/no_think Read this code.\n\n{(root_dir / path).read_text()}\nIn one paragraph, what does the code do?" 36 | ) 37 | 38 | for result in chat.map(examples): 39 | print(result[0]["generated_text"][-1]["content"]) 40 | -------------------------------------------------------------------------------- /.github/workflows/check.yml: -------------------------------------------------------------------------------- 1 | name: Check 2 | on: 3 | push: 4 | branches: 5 | - main 6 | pull_request: 7 | workflow_dispatch: 8 | 9 | jobs: 10 | ruff: 11 | name: Ruff 12 | runs-on: ubuntu-24.04 13 | 14 | steps: 15 | - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3 16 | with: 17 | fetch-depth: 1 18 | - uses: ./.github/actions/setup 19 | 20 | - run: ruff check 21 | 22 | - run: ruff format --check 23 | 24 | nbconvert: 25 | name: NbConvert 26 | runs-on: ubuntu-24.04 27 | 28 | steps: 29 | - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3 30 | with: 31 | fetch-depth: 1 32 | - uses: ./.github/actions/setup 33 | 34 | - name: Check notebooks are cleaned 35 | run: | 36 | jupyter nbconvert --clear-output --inplace 11_notebooks/*.ipynb 37 | git diff --quiet 11_notebooks/*.ipynb && git diff --cached --quiet 11_notebooks/*.ipynb || exit 1 38 | 39 | pytest: 40 | name: Pytest 41 | runs-on: ubuntu-24.04 42 | 43 | steps: 44 | - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3 45 | with: 46 | fetch-depth: 1 47 | - uses: ./.github/actions/setup 48 | 49 | - name: Run 50 | run: pytest -v . 51 | -------------------------------------------------------------------------------- /.github/workflows/stale.yml: -------------------------------------------------------------------------------- 1 | name: Stale 2 | on: 3 | workflow_dispatch: 4 | schedule: 5 | - cron: "30 15 * * *" 6 | 7 | permissions: 8 | contents: write 9 | issues: write 10 | pull-requests: write 11 | 12 | jobs: 13 | stale-prs: 14 | name: Close stale PRs 15 | runs-on: ubuntu-latest 16 | steps: 17 | - uses: actions/stale@5bef64f19d7facfb25b37b414482c7164d639639 # v9 18 | with: 19 | stale-pr-message: | 20 | This PR is stale because it has been open 30 days with no activity. 21 | If the stale label remains and there are no comments, this will be closed in 5 days. 22 | close-pr-message: | 23 | This PR was closed because it has been stalled for 5 days with no activity. 24 | days-before-stale: 30 25 | days-before-close: 5 26 | days-before-issue-stale: -1 27 | delete-branch: true 28 | operations-per-run: 200 29 | 30 | stale-branches: 31 | name: Remove stale branches 32 | runs-on: ubuntu-latest 33 | steps: 34 | - uses: fpicalausa/remove-stale-branches@bfaf2b7f95cfd85485960c9d2d98a0702c84a74c # v1.6.0 35 | with: 36 | operations-per-run: 500 37 | days-before-branch-stale: 30 38 | ignore-unknown-authors: true 39 | default-recipient: "(Unknown author)" 40 | -------------------------------------------------------------------------------- /10_integrations/tailscale/entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Custom entrypoint [1] used to login into Tailscale and start both SOCKS5 and HTTP 4 | # proxies. This requires the env var `TAILSCALE_AUTHKEY` to be populated with a 5 | # Tailscale auth key. [2] 6 | # 7 | # [1] https://modal.com/docs/guide/custom-container#entrypoint 8 | # [2] https://tailscale.com/kb/1111/ephemeral-nodes 9 | 10 | set -e 11 | 12 | tailscaled --tun=userspace-networking --socks5-server=localhost:1080 --outbound-http-proxy-listen=localhost:1080 & 13 | tailscale up --authkey=${TAILSCALE_AUTHKEY} --hostname=${MODAL_TASK_ID} 14 | 15 | # Loop until the maximum number of retries is reached 16 | retry_count=0 17 | while [ $retry_count -lt 5 ]; do 18 | http_status=$(curl -x socks5://localhost:1080 -o /dev/null -L -s -w '%{http_code}' https://www.google.com) 19 | 20 | # Check if the HTTP status code is 200 (OK) 21 | if [ $http_status -eq 200 ]; then 22 | echo "Successfully started SOCKS5 proxy, HTTP proxy, and connected to Tailscale." 23 | exec "$@" # Runs the command passed to the entrypoint script. 24 | exit 0 25 | else 26 | echo "Attempt $((retry_count+1))/$MAX_RETRIES failed: SOCKS5 proxy returned HTTP $http_status" 27 | fi 28 | 29 | retry_count=$((retry_count+1)) 30 | sleep 1 31 | done 32 | 33 | echo "Failed to start Tailscale." 34 | exit 1 -------------------------------------------------------------------------------- /08_advanced/hello_world_async.py: -------------------------------------------------------------------------------- 1 | # # Async functions 2 | # 3 | # Modal natively supports async/await syntax using asyncio. 4 | 5 | # First, let's import some global stuff. 6 | 7 | import sys 8 | 9 | import modal 10 | 11 | app = modal.App("example-hello-world-async") 12 | 13 | 14 | # ## Defining a function 15 | # 16 | # Now, let's define a function. The wrapped function can be synchronous or 17 | # asynchronous, but calling it in either context will still work. 18 | # Let's stick to a normal synchronous function 19 | 20 | 21 | @app.function() 22 | def f(i): 23 | if i % 2 == 0: 24 | print("hello", i) 25 | else: 26 | print("world", i, file=sys.stderr) 27 | 28 | return i * i 29 | 30 | 31 | # ## Running the app with asyncio 32 | # 33 | # Let's make the main entrypoint asynchronous. In async contexts, we should 34 | # call the function using `await` or iterate over the map using `async for`. 35 | # Otherwise we would block the event loop while our call is being run. 36 | 37 | 38 | @app.local_entrypoint() 39 | async def run_async(): 40 | # Call the function using .remote.aio() in order to run it asynchronously 41 | print(await f.remote.aio(1000)) 42 | 43 | # Parallel map. 44 | total = 0 45 | # Call .map asynchronously using using f.map.aio(...) 46 | async for ret in f.map.aio(range(20)): 47 | total += ret 48 | 49 | print(total) 50 | -------------------------------------------------------------------------------- /10_integrations/dbt/sample_proj_duckdb_s3/models/orders.sql: -------------------------------------------------------------------------------- 1 | {% set payment_methods = ['credit_card', 'coupon', 'bank_transfer', 'gift_card'] %} 2 | 3 | with orders as ( 4 | 5 | select * from {{ ref('stg_orders') }} 6 | 7 | ), 8 | 9 | payments as ( 10 | 11 | select * from {{ ref('stg_payments') }} 12 | 13 | ), 14 | 15 | order_payments as ( 16 | 17 | select 18 | order_id, 19 | 20 | {% for payment_method in payment_methods -%} 21 | sum(case when payment_method = '{{ payment_method }}' then amount else 0 end) as {{ payment_method }}_amount, 22 | {% endfor -%} 23 | 24 | sum(amount) as total_amount 25 | 26 | from payments 27 | 28 | group by order_id 29 | 30 | ), 31 | 32 | final as ( 33 | 34 | select 35 | orders.order_id, 36 | orders.customer_id, 37 | orders.order_date, 38 | orders.status, 39 | 40 | {% for payment_method in payment_methods -%} 41 | 42 | order_payments.{{ payment_method }}_amount, 43 | 44 | {% endfor -%} 45 | 46 | order_payments.total_amount as amount 47 | 48 | from orders 49 | 50 | 51 | left join order_payments 52 | on orders.order_id = order_payments.order_id 53 | 54 | ) 55 | 56 | {{ config(materialized='external', format='parquet', location='s3://modal-example-dbt-duckdb-s3/out/orders.parquet') }} 57 | select * from final 58 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 | 4 |

Modal Examples

5 | 6 |

7 | 8 | This is a collection of examples for [Modal](https://modal.com/). Use these examples to learn Modal and build your own robust and scalable applications. 9 | 10 | ## Usage 11 | 12 | First, sign up for a free account at [modal.com](https://modal.com/) and follow 13 | the setup instructions to install the `modal` package and set your API key. 14 | 15 | The examples are organized into several folders based on their category. You can 16 | generally run the files in any folder much like you run ordinary Python programs, with a 17 | command like: 18 | 19 | ```bash 20 | modal run 01_getting_started/hello_world.py 21 | ``` 22 | 23 | Although these scripts are run on your local machine, they'll communicate with 24 | Modal and run in our cloud, spawning serverless containers on demand. 25 | 26 | ## Examples 27 | 28 | - [**`01_getting_started/`**](01_getting_started) through [**`14_clusters/`**](14_clusters) provide a guided tour through Modal's concepts and capabilities. 29 | - [**`misc/`**](/misc) contains uncategorized, miscellaneous examples. 30 | 31 | _These examples are continuously tested for correctness against Python **3.11**._ 32 | 33 | ## License 34 | 35 | The [MIT license](LICENSE). 36 | -------------------------------------------------------------------------------- /06_gpu_and_ml/comfyui/kjnodes/kjnodes_example.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # cmd: ["modal", "serve", "06_gpu_and_ml/comfyui/kjnodes/kjnodes_example.py"] 3 | # --- 4 | 5 | import subprocess 6 | 7 | import modal 8 | 9 | image = ( # build up a Modal Image to run ComfyUI, step by step 10 | modal.Image.debian_slim( # start from basic Linux with Python 11 | python_version="3.11" 12 | ) 13 | .apt_install("git") # install git to clone ComfyUI 14 | .uv_pip_install("comfy-cli==1.2.7") # install comfy-cli 15 | .run_commands( # use comfy-cli to install the ComfyUI repo and its dependencies 16 | "comfy --skip-prompt install --nvidia" 17 | ) 18 | .run_commands( # download the ComfyUI Essentials custom node pack 19 | "comfy node install comfyui-kjnodes" 20 | ) 21 | .run_commands( 22 | "comfy --skip-prompt model download --url https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5/resolve/main/v1-5-pruned.safetensors --relative-path models/checkpoints" 23 | ) 24 | ) 25 | 26 | app = modal.App(name="example-kjnodes", image=image) 27 | 28 | 29 | # Run ComfyUI as an interactive web server 30 | @app.function( 31 | max_containers=1, 32 | scaledown_window=30, 33 | timeout=1800, 34 | gpu="A10G", 35 | ) 36 | @modal.concurrent(max_inputs=10) 37 | @modal.web_server(8000, startup_timeout=60) 38 | def ui(): 39 | subprocess.Popen("comfy launch -- --listen 0.0.0.0 --port 8000", shell=True) 40 | -------------------------------------------------------------------------------- /06_gpu_and_ml/comfyui/essentials/essentials_example.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # cmd: ["modal", "serve", "06_gpu_and_ml/comfyui/essentials/essentials_example.py"] 3 | # --- 4 | 5 | import subprocess 6 | 7 | import modal 8 | 9 | image = ( # build up a Modal Image to run ComfyUI, step by step 10 | modal.Image.debian_slim( # start from basic Linux with Python 11 | python_version="3.11" 12 | ) 13 | .apt_install("git") # install git to clone ComfyUI 14 | .uv_pip_install("comfy-cli==1.2.7") # install comfy-cli 15 | .run_commands( # use comfy-cli to install the ComfyUI repo and its dependencies 16 | "comfy --skip-prompt install --nvidia" 17 | ) 18 | .run_commands( # download the ComfyUI Essentials custom node pack 19 | "comfy node install comfyui_essentials" 20 | ) 21 | .run_commands( 22 | "comfy --skip-prompt model download --url https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5/resolve/main/v1-5-pruned.safetensors --relative-path models/checkpoints" 23 | ) 24 | ) 25 | 26 | app = modal.App(name="example-essentials", image=image) 27 | 28 | 29 | # Run ComfyUI as an interactive web server 30 | @app.function( 31 | max_containers=1, 32 | scaledown_window=30, 33 | timeout=1800, 34 | gpu="A10G", 35 | ) 36 | @modal.concurrent(max_inputs=10) 37 | @modal.web_server(8000, startup_timeout=60) 38 | def ui(): 39 | subprocess.Popen("comfy launch -- --listen 0.0.0.0 --port 8000", shell=True) 40 | -------------------------------------------------------------------------------- /06_gpu_and_ml/comfyui/was_node_suite/was_node_example.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # cmd: ["modal", "serve", "06_gpu_and_ml/comfyui/was_node_suite/was_node_example.py"] 3 | # --- 4 | 5 | import subprocess 6 | 7 | import modal 8 | 9 | image = ( 10 | modal.Image.debian_slim( # start from basic Linux with Python 11 | python_version="3.11" 12 | ) 13 | .apt_install("git") # install git to clone ComfyUI 14 | .uv_pip_install("comfy-cli==1.2.7") # install comfy-cli 15 | .run_commands( # use comfy-cli to install the ComfyUI repo and its dependencies 16 | "comfy --skip-prompt install --nvidia" 17 | ) 18 | .run_commands( # install default stable diffusion model for example purposes 19 | "comfy --skip-prompt model download --url https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5/resolve/main/v1-5-pruned.safetensors --relative-path models/checkpoints" 20 | ) 21 | .run_commands( # download the WAS Node Suite custom node pack 22 | "comfy node install was-node-suite-comfyui" 23 | ) 24 | ) 25 | 26 | app = modal.App(name="example-was-node", image=image) 27 | 28 | 29 | # Run ComfyUI as an interactive web server 30 | @app.function( 31 | max_containers=1, 32 | scaledown_window=30, 33 | timeout=1800, 34 | gpu="A10G", 35 | ) 36 | @modal.concurrent(max_inputs=10) 37 | @modal.web_server(8000, startup_timeout=60) 38 | def ui(): 39 | subprocess.Popen("comfy launch -- --listen 0.0.0.0 --port 8000", shell=True) 40 | -------------------------------------------------------------------------------- /06_gpu_and_ml/hyperparameter-sweep/src/dataset.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # pytest: false 3 | # --- 4 | 5 | import torch 6 | 7 | 8 | class Dataset: 9 | """Manage text dataset and batching.""" 10 | 11 | def __init__( 12 | self, 13 | encoded_text, 14 | train_percent, 15 | batch_size, 16 | context_size, 17 | device, 18 | ): 19 | self.device = device 20 | self.batch_size = batch_size 21 | self.context_size = context_size 22 | assert (train_percent > 0.0) and (train_percent < 1.0), ( 23 | "train_percent must be in (0,1)" 24 | ) 25 | 26 | # Train/Validation split. 27 | data = torch.tensor(encoded_text, dtype=torch.long) 28 | n = len(data) 29 | self.train_data = data[: int(train_percent * n)] 30 | self.val_data = data[int(train_percent * n) :] 31 | 32 | def get_batch(self, split): 33 | """Get a batch of train or validation data.""" 34 | data = self.train_data if split == "train" else self.val_data 35 | 36 | starts = torch.randint(len(data) - self.context_size, (self.batch_size,)) 37 | 38 | x = torch.stack([data[start : start + self.context_size] for start in starts]) 39 | 40 | # +1 because we want to predict the next token. 41 | y = torch.stack( 42 | [data[start + 1 : start + self.context_size + 1] for start in starts] 43 | ) 44 | return x.to(self.device), y.to(self.device) 45 | -------------------------------------------------------------------------------- /05_scheduling/schedule_simple.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # cmd: ["python", "-m", "05_scheduling.schedule_simple"] 3 | # --- 4 | 5 | # # Scheduling remote jobs 6 | 7 | # This example shows how you can schedule remote jobs on Modal. 8 | # You can do this either with: 9 | # 10 | # - [`modal.Period`](https://modal.com/docs/reference/modal.Period) - a time interval between function calls. 11 | # - [`modal.Cron`](https://modal.com/docs/reference/modal.Cron) - a cron expression to specify the schedule. 12 | 13 | # In the code below, the first function runs every 14 | # 5 seconds, and the second function runs every minute. We use the `schedule` 15 | # argument to specify the schedule for each function. The `schedule` argument can 16 | # take a `modal.Period` object to specify a time interval or a `modal.Cron` object 17 | # to specify a cron expression. 18 | 19 | import time 20 | from datetime import datetime 21 | 22 | import modal 23 | 24 | app = modal.App("example-schedule-simple") 25 | 26 | 27 | @app.function(schedule=modal.Period(seconds=5)) 28 | def print_time_1(): 29 | print( 30 | f"Printing with period 5 seconds: {datetime.now().strftime('%m/%d/%Y, %H:%M:%S')}" 31 | ) 32 | 33 | 34 | @app.function(schedule=modal.Cron("* * * * *")) 35 | def print_time_2(): 36 | print( 37 | f"Printing with cron every minute: {datetime.now().strftime('%m/%d/%Y, %H:%M:%S')}" 38 | ) 39 | 40 | 41 | if __name__ == "__main__": 42 | with modal.enable_output(): 43 | with app.run(): 44 | time.sleep(60) 45 | -------------------------------------------------------------------------------- /06_gpu_and_ml/speech-to-text/streaming-parakeet-frontend/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Audio Transcription 7 | 37 | 38 | 39 |

Audio Transcription

40 |

Tip: Turn your microphone volume up for better transcription quality.

41 |
42 | 43 |
44 |
45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /06_gpu_and_ml/hyperparameter-sweep/src/logs_manager.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # pytest: false 3 | # --- 4 | 5 | from torch.utils.tensorboard import SummaryWriter 6 | 7 | 8 | class LogsManager: 9 | def __init__(self, experiment_name, hparams, num_parameters, tb_log_path): 10 | self.model_name = ( 11 | f"{experiment_name}" 12 | f"_context_size={hparams.context_size}_n_heads={hparams.n_heads}" 13 | f"_dropout={hparams.dropout}" 14 | ) 15 | 16 | model_log_dir = tb_log_path / f"{experiment_name}/{self.model_name}" 17 | model_log_dir.mkdir(parents=True, exist_ok=True) 18 | self.train_writer = SummaryWriter(log_dir=f"{model_log_dir}/train") 19 | self.val_writer = SummaryWriter(log_dir=f"{model_log_dir}/val") 20 | 21 | # save hyperparameters to TensorBoard for easy reference 22 | pretty_hparams_str = "\n".join(f"{k}: {v}" for k, v in hparams.__dict__.items()) 23 | pretty_hparams_str += f"\nNum parameters: {num_parameters}" 24 | self.train_writer.add_text("Hyperparameters", pretty_hparams_str) 25 | 26 | def add_train_scalar(self, name, value, step): 27 | self.train_writer.add_scalar(name, value, step) 28 | 29 | def add_val_scalar(self, name, value, step): 30 | self.val_writer.add_scalar(name, value, step) 31 | 32 | def add_val_text(self, name, text, step): 33 | self.val_writer.add_text(name, text, step) 34 | 35 | def flush(self): 36 | self.train_writer.flush() 37 | self.val_writer.flush() 38 | -------------------------------------------------------------------------------- /06_gpu_and_ml/speech-to-text/multitalker-frontend/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Audio Transcription 7 | 37 | 38 | 39 |

Parakeet Streaming Transcription

40 |

Tip: Turn your microphone volume up for better transcription quality.

41 |
42 | 43 |
44 |
45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /08_advanced/parallel_execution.py: -------------------------------------------------------------------------------- 1 | # # Parallel execution on Modal with `spawn` and `gather` 2 | 3 | # This example shows how you can run multiple functions in parallel on Modal. 4 | # We use the `spawn` method to start a function and return a handle to its result. 5 | # The `get` method is used to retrieve the result of the function call. 6 | 7 | import time 8 | 9 | import modal 10 | 11 | app = modal.App("example-parallel-execution") 12 | 13 | 14 | @app.function() 15 | def step1(word): 16 | time.sleep(2) 17 | print("step1 done") 18 | return word 19 | 20 | 21 | @app.function() 22 | def step2(number): 23 | time.sleep(1) 24 | print("step2 done") 25 | if number == 0: 26 | raise ValueError("custom error") 27 | return number 28 | 29 | 30 | @app.local_entrypoint() 31 | def main(): 32 | # Start running a function and return a handle to its result. 33 | word_call = step1.spawn("foo") 34 | number_call = step2.spawn(2) 35 | 36 | # Print "foofoo" after 2 seconds. 37 | print(word_call.get() * number_call.get()) 38 | 39 | # Alternatively, use `modal.FunctionCall.gather(...)` as a convenience wrapper, 40 | # which returns an error if either call fails. 41 | results = modal.FunctionCall.gather(step1.spawn("bar"), step2.spawn(4)) 42 | assert results == ["bar", 4] 43 | 44 | # Raise exception after 2 seconds. 45 | try: 46 | modal.FunctionCall.gather(step1.spawn("bar"), step2.spawn(0)) 47 | except ValueError as exc: 48 | assert str(exc) == "custom error" 49 | -------------------------------------------------------------------------------- /06_gpu_and_ml/comfyui/impact/impact_example.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # cmd: ["modal", "serve", "06_gpu_and_ml/comfyui/impact/impact_example.py"] 3 | # --- 4 | 5 | import subprocess 6 | 7 | import modal 8 | 9 | image = ( 10 | modal.Image.debian_slim( # start from basic Linux with Python 11 | python_version="3.11" 12 | ) 13 | .apt_install("git") # install git to clone ComfyUI 14 | .uv_pip_install("comfy-cli==1.2.7") # install comfy-cli 15 | .run_commands( # use comfy-cli to install the ComfyUI repo and its dependencies 16 | "comfy --skip-prompt install --nvidia" 17 | ) 18 | .run_commands( # download the Impact pack 19 | "comfy node install comfyui-impact-pack" 20 | ) 21 | .uv_pip_install("ultralytics==8.3.26") # object detection models 22 | .apt_install( # opengl dependencies 23 | "libgl1-mesa-glx", "libglib2.0-0" 24 | ) 25 | .run_commands( 26 | "comfy --skip-prompt model download --url https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5/resolve/main/v1-5-pruned.safetensors --relative-path models/checkpoints", 27 | ) 28 | ) 29 | 30 | app = modal.App(name="example-impact", image=image) 31 | 32 | 33 | # Run ComfyUI as an interactive web server 34 | @app.function( 35 | max_containers=1, 36 | scaledown_window=30, 37 | timeout=1800, 38 | gpu="A10G", 39 | ) 40 | @modal.concurrent(max_inputs=10) 41 | @modal.web_server(8000, startup_timeout=60) 42 | def ui(): 43 | subprocess.Popen("comfy launch -- --listen 0.0.0.0 --port 8000", shell=True) 44 | -------------------------------------------------------------------------------- /07_web_endpoints/fastapi_app.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # cmd: ["modal", "serve", "07_web_endpoints/fastapi_app.py"] 3 | # --- 4 | 5 | # # Deploy FastAPI app with Modal 6 | 7 | # This example shows how you can deploy a [FastAPI](https://fastapi.tiangolo.com/) app with Modal. 8 | # You can serve any app written in an ASGI-compatible web framework (like FastAPI) using this pattern or you can server WSGI-compatible frameworks like Flask with [`wsgi_app`](https://modal.com/docs/guide/webhooks#wsgi). 9 | 10 | from typing import Optional 11 | 12 | import modal 13 | from fastapi import FastAPI, Header 14 | from pydantic import BaseModel 15 | 16 | image = modal.Image.debian_slim().uv_pip_install("fastapi[standard]", "pydantic") 17 | app = modal.App("example-fastapi-app", image=image) 18 | web_app = FastAPI() 19 | 20 | 21 | class Item(BaseModel): 22 | name: str 23 | 24 | 25 | @web_app.get("/") 26 | async def handle_root(user_agent: Optional[str] = Header(None)): 27 | print(f"GET / - received user_agent={user_agent}") 28 | return "Hello World" 29 | 30 | 31 | @web_app.post("/foo") 32 | async def handle_foo(item: Item, user_agent: Optional[str] = Header(None)): 33 | print(f"POST /foo - received user_agent={user_agent}, item.name={item.name}") 34 | return item 35 | 36 | 37 | @app.function() 38 | @modal.asgi_app() 39 | def fastapi_app(): 40 | return web_app 41 | 42 | 43 | @app.function() 44 | @modal.fastapi_endpoint(method="POST") 45 | def f(item: Item): 46 | return "Hello " + item.name 47 | 48 | 49 | if __name__ == "__main__": 50 | app.deploy("webapp") 51 | -------------------------------------------------------------------------------- /misc/queue_simple.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # cmd: ["python", "misc/queue_simple.py"] 3 | # runtimes: ["runc", "gvisor"] 4 | # --- 5 | # 6 | # # Using a queue to send/receive data 7 | # 8 | # This is an example of how to use queues to send/receive data. 9 | # We don't do it here, but you could imagine doing this _between_ two functions. 10 | 11 | 12 | import asyncio 13 | 14 | import modal 15 | import modal.queue 16 | 17 | 18 | async def run_async(q: modal.Queue) -> None: 19 | await q.put.aio(42) 20 | r = await q.get.aio() 21 | assert r == 42 22 | await q.put_many.aio([42, 43, 44, 45, 46]) 23 | await q.put_many.aio([47, 48, 49, 50, 51]) 24 | r = await q.get_many.aio(3) 25 | assert r == [42, 43, 44] 26 | r = await q.get_many.aio(99) 27 | assert r == [45, 46, 47, 48, 49, 50, 51] 28 | 29 | 30 | async def many_consumers(q: modal.Queue) -> None: 31 | print("Creating getters") 32 | tasks = [asyncio.create_task(q.get.aio()) for i in range(20)] 33 | print("Putting values") 34 | await q.put_many.aio(list(range(10))) 35 | await asyncio.sleep(1) 36 | # About 10 tasks should now be done 37 | n_done_tasks = sum(1 for t in tasks if t.done()) 38 | assert n_done_tasks == 10 39 | # Finish remaining ones 40 | await q.put_many.aio(list(range(10))) 41 | await asyncio.sleep(1) 42 | assert all(t.done() for t in tasks) 43 | 44 | 45 | async def main(): 46 | with modal.Queue.ephemeral() as q: 47 | await run_async(q) 48 | await many_consumers(q) 49 | 50 | 51 | if __name__ == "__main__": 52 | asyncio.run(main()) 53 | -------------------------------------------------------------------------------- /internal/examples_test.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import json 3 | import pathlib 4 | import sys 5 | 6 | import pytest 7 | from utils import ( 8 | EXAMPLES_ROOT, 9 | ExampleType, 10 | get_examples, 11 | get_examples_json, 12 | render_example_md, 13 | ) 14 | 15 | examples = [ex for ex in get_examples() if ex.type == ExampleType.MODULE] 16 | examples = [ex for ex in examples if ex.metadata.get("pytest", True)] 17 | example_ids = [ex.module for ex in examples] 18 | 19 | 20 | @pytest.fixture(autouse=True) 21 | def disable_auto_mount(monkeypatch): 22 | monkeypatch.setenv("MODAL_AUTOMOUNT", "0") 23 | yield 24 | 25 | 26 | @pytest.fixture(autouse=False) 27 | def add_root_to_syspath(monkeypatch): 28 | sys.path.append(str(EXAMPLES_ROOT)) 29 | yield 30 | sys.path.pop() 31 | 32 | 33 | @pytest.mark.parametrize("example", examples, ids=example_ids) 34 | def test_filename(example): 35 | assert not example.repo_filename.startswith("/") 36 | assert pathlib.Path(example.repo_filename).exists() 37 | 38 | 39 | @pytest.mark.parametrize("example", examples, ids=example_ids) 40 | def test_import(example, add_root_to_syspath): 41 | importlib.import_module(example.module) 42 | 43 | 44 | @pytest.mark.parametrize("example", examples, ids=example_ids) 45 | def test_render(example): 46 | md = render_example_md(example) 47 | assert isinstance(md, str) 48 | assert len(md) > 0 49 | 50 | 51 | def test_json(): 52 | data = get_examples_json() 53 | examples = json.loads(data) 54 | assert isinstance(examples, list) 55 | assert len(examples) > 0 56 | -------------------------------------------------------------------------------- /06_gpu_and_ml/gpu_fallbacks.py: -------------------------------------------------------------------------------- 1 | # # Set "fallback" GPUs 2 | # 3 | # GPU availabilities on Modal can fluctuate, especially for 4 | # tightly-constrained requests, like for eight co-located GPUs 5 | # in a specific region. 6 | # 7 | # If your code can run on multiple different GPUs, you can specify 8 | # your GPU request as a list, in order of preference, and whenever 9 | # your Function scales up, we will try to schedule it on each requested GPU type in order. 10 | # 11 | # The code below demonstrates the usage of the `gpu` parameter with a list of GPUs. 12 | 13 | import subprocess 14 | 15 | import modal 16 | 17 | app = modal.App("example-gpu-fallbacks") 18 | 19 | 20 | @app.function( 21 | gpu=["h100", "a100", "any"], # "any" means any of L4, A10, or T4 22 | max_inputs=1, # new container each input, so we re-roll the GPU dice every time 23 | ) 24 | async def remote(_idx): 25 | gpu = subprocess.run( 26 | ["nvidia-smi", "--query-gpu=name", "--format=csv,noheader"], 27 | check=True, 28 | text=True, 29 | stdout=subprocess.PIPE, 30 | ).stdout.strip() 31 | print(gpu) 32 | return gpu 33 | 34 | 35 | @app.local_entrypoint() 36 | def local(count: int = 32): 37 | from collections import Counter 38 | 39 | gpu_counter = Counter(remote.map([i for i in range(count)], order_outputs=False)) 40 | print(f"ran {gpu_counter.total()} times") 41 | print(f"on the following {len(gpu_counter.keys())} GPUs:", end="\n") 42 | print( 43 | *[f"{gpu.rjust(32)}: {'🔥' * ct}" for gpu, ct in gpu_counter.items()], 44 | sep="\n", 45 | ) 46 | -------------------------------------------------------------------------------- /06_gpu_and_ml/openai_whisper/pod_transcriber/app/config.py: -------------------------------------------------------------------------------- 1 | import dataclasses 2 | import logging 3 | import pathlib 4 | 5 | 6 | @dataclasses.dataclass 7 | class ModelSpec: 8 | name: str 9 | params: str 10 | 11 | 12 | def get_logger(name, level=logging.INFO): 13 | logger = logging.getLogger(name) 14 | handler = logging.StreamHandler() 15 | handler.setFormatter( 16 | logging.Formatter("%(levelname)s: %(asctime)s: %(name)s %(message)s") 17 | ) 18 | logger.addHandler(handler) 19 | logger.setLevel(level) 20 | return logger 21 | 22 | 23 | CACHE_DIR = "/cache" 24 | # Where downloaded podcasts are stored, by guid hash. 25 | # Mostly .mp3 files 50-100MiB. 26 | RAW_AUDIO_DIR = pathlib.Path(CACHE_DIR, "raw_audio") 27 | # Stores metadata of individual podcast episodes as JSON. 28 | PODCAST_METADATA_DIR = pathlib.Path(CACHE_DIR, "podcast_metadata") 29 | # Completed episode transcriptions. Stored as flat files with 30 | # files structured as '{guid_hash}-{model_slug}.json'. 31 | TRANSCRIPTIONS_DIR = pathlib.Path(CACHE_DIR, "transcriptions") 32 | # Searching indexing files, refreshed by scheduled functions. 33 | SEARCH_DIR = pathlib.Path(CACHE_DIR, "search") 34 | # Location of modal checkpoint. 35 | MODEL_DIR = pathlib.Path(CACHE_DIR, "model") 36 | # Location of web frontend assets. 37 | ASSETS_PATH = pathlib.Path(__file__).parent / "frontend" / "dist" 38 | 39 | transcripts_per_podcast_limit = 2 40 | 41 | supported_parakeet_models = { 42 | "parakeet-tdt-0.6b-v2": ModelSpec( 43 | name="nvidia/parakeet-tdt-0.6b-v2", params="600M" 44 | ), 45 | } 46 | 47 | DEFAULT_MODEL = supported_parakeet_models["parakeet-tdt-0.6b-v2"] 48 | -------------------------------------------------------------------------------- /10_integrations/dbt/sample_proj_duckdb_s3/models/customers.sql: -------------------------------------------------------------------------------- 1 | with customers as ( 2 | 3 | select * from {{ ref('stg_customers') }} 4 | 5 | ), 6 | 7 | orders as ( 8 | 9 | select * from {{ ref('stg_orders') }} 10 | 11 | ), 12 | 13 | payments as ( 14 | 15 | select * from {{ ref('stg_payments') }} 16 | 17 | ), 18 | 19 | customer_orders as ( 20 | 21 | select 22 | customer_id, 23 | 24 | min(order_date) as first_order, 25 | max(order_date) as most_recent_order, 26 | count(order_id) as number_of_orders 27 | from orders 28 | 29 | group by customer_id 30 | 31 | ), 32 | 33 | customer_payments as ( 34 | 35 | select 36 | orders.customer_id, 37 | sum(amount) as total_amount 38 | 39 | from payments 40 | 41 | left join orders on 42 | payments.order_id = orders.order_id 43 | 44 | group by orders.customer_id 45 | 46 | ), 47 | 48 | final as ( 49 | 50 | select 51 | customers.customer_id, 52 | customers.first_name, 53 | customers.last_name, 54 | customer_orders.first_order, 55 | customer_orders.most_recent_order, 56 | customer_orders.number_of_orders, 57 | customer_payments.total_amount as customer_lifetime_value 58 | 59 | from customers 60 | 61 | left join customer_orders 62 | on customers.customer_id = customer_orders.customer_id 63 | 64 | left join customer_payments 65 | on customers.customer_id = customer_payments.customer_id 66 | 67 | ) 68 | 69 | {{ config(materialized='external', format='parquet', location='s3://modal-example-dbt-duckdb-s3/out/customers.parquet') }} 70 | select * from final 71 | -------------------------------------------------------------------------------- /13_sandboxes/codelangchain/src/retrieval.py: -------------------------------------------------------------------------------- 1 | """Just as a constant function is _technically_ a polynomial, so too is injecting the same information every time _technically_ RAG.""" 2 | 3 | from .common import COLOR 4 | 5 | docs_url = "https://huggingface.co/docs/transformers/index" 6 | 7 | 8 | def retrieve_docs(url: str = docs_url, debug=False): 9 | from bs4 import BeautifulSoup as Soup 10 | from langchain_community.document_loaders.recursive_url_loader import ( 11 | RecursiveUrlLoader, 12 | ) 13 | 14 | print(f"{COLOR['HEADER']}📜: Retrieving documents from {url}{COLOR['ENDC']}") 15 | loader = RecursiveUrlLoader( 16 | url=docs_url, 17 | max_depth=2 // (int(debug) + 1), # retrieve fewer docs in debug mode 18 | extractor=lambda x: Soup(x, "html.parser").text, 19 | ) 20 | docs = loader.load() 21 | 22 | # sort the list based on the URLs 23 | d_sorted = sorted(docs, key=lambda x: x.metadata["source"], reverse=True) 24 | 25 | # combine them all together 26 | concatenated_content = "\n\n\n --- \n\n\n".join( 27 | [ 28 | "## " + doc.metadata["source"] + "\n\n" + doc.page_content.strip() 29 | for doc in d_sorted 30 | ] 31 | ) 32 | 33 | print( 34 | f"{COLOR['HEADER']}📜: Retrieved {len(docs)} documents{COLOR['ENDC']}", 35 | f"{COLOR['GREEN']}{concatenated_content[:100].strip()}{COLOR['ENDC']}", 36 | sep="\n", 37 | ) 38 | 39 | if debug: 40 | print( 41 | f"{COLOR['HEADER']}📜: Restricting to at most 30,000 characters{COLOR['ENDC']}" 42 | ) 43 | concatenated_content = concatenated_content[:30_000] 44 | 45 | return concatenated_content 46 | -------------------------------------------------------------------------------- /03_scaling_out/basic_grid_search.py: -------------------------------------------------------------------------------- 1 | # # Hyperparameter search 2 | # 3 | # This example showcases a simple grid search in one dimension, where we try different 4 | # parameters for a model and pick the one with the best results on a holdout set. 5 | # 6 | # ## Defining the image 7 | # 8 | # First, let's build a custom image and install scikit-learn in it. 9 | 10 | import modal 11 | 12 | app = modal.App( 13 | "example-basic-grid-search", 14 | image=modal.Image.debian_slim().uv_pip_install("scikit-learn~=1.5.0"), 15 | ) 16 | 17 | # ## The Modal function 18 | # 19 | # Next, define the function. Note that we use the custom image with scikit-learn in it. 20 | # We also take the hyperparameter `k`, which is how many nearest neighbors we use. 21 | 22 | 23 | @app.function() 24 | def fit_knn(k): 25 | from sklearn.datasets import load_digits 26 | from sklearn.model_selection import train_test_split 27 | from sklearn.neighbors import KNeighborsClassifier 28 | 29 | X, y = load_digits(return_X_y=True) 30 | X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) 31 | 32 | clf = KNeighborsClassifier(k) 33 | clf.fit(X_train, y_train) 34 | score = float(clf.score(X_test, y_test)) 35 | print("k = %3d, score = %.4f" % (k, score)) 36 | return score, k 37 | 38 | 39 | # ## Parallel search 40 | # 41 | # To do a hyperparameter search, let's map over this function with different values 42 | # for `k`, and then select for the best score on the holdout set: 43 | 44 | 45 | @app.local_entrypoint() 46 | def main(): 47 | # Do a basic hyperparameter search 48 | best_score, best_k = max(fit_knn.map(range(1, 100))) 49 | print("Best k = %3d, score = %.4f" % (best_k, best_score)) 50 | -------------------------------------------------------------------------------- /02_building_containers/install_flash_attn.py: -------------------------------------------------------------------------------- 1 | # # Install Flash Attention on Modal 2 | 3 | # FlashAttention is an optimized CUDA library for Transformer 4 | # scaled-dot-product attention. Dao AI Lab now publishes pre-compiled 5 | # wheels, which makes installation quick. This script shows how to 6 | # 1. Pin an exact wheel that matches CUDA 12 / PyTorch 2.6 / Python 3.13. 7 | # 2. Build a Modal image that installs torch, numpy, and FlashAttention. 8 | # 3. Launch a GPU function to confirm the kernel runs on a GPU. 9 | 10 | import modal 11 | 12 | app = modal.App("example-install-flash-attn") 13 | 14 | # You need to specify an exact release wheel. You can find 15 | # [more on their github](https://github.com/Dao-AILab/flash-attention/releases). 16 | 17 | flash_attn_release = ( 18 | "https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/" 19 | "flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp313-cp313-linux_x86_64.whl" 20 | ) 21 | 22 | image = modal.Image.debian_slim(python_version="3.13").uv_pip_install( 23 | "torch==2.6.0", "numpy==2.2.4", flash_attn_release 24 | ) 25 | 26 | 27 | # And here is a demo verifying that it works: 28 | 29 | 30 | @app.function(gpu="L40S", image=image) 31 | def run_flash_attn(): 32 | import torch 33 | from flash_attn import flash_attn_func 34 | 35 | batch_size, seqlen, nheads, headdim, nheads_k = 2, 4, 3, 16, 3 36 | 37 | q = torch.randn(batch_size, seqlen, nheads, headdim, dtype=torch.float16).to("cuda") 38 | k = torch.randn(batch_size, seqlen, nheads_k, headdim, dtype=torch.float16).to( 39 | "cuda" 40 | ) 41 | v = torch.randn(batch_size, seqlen, nheads_k, headdim, dtype=torch.float16).to( 42 | "cuda" 43 | ) 44 | 45 | out = flash_attn_func(q, k, v) 46 | assert out.shape == (batch_size, seqlen, nheads, headdim) 47 | -------------------------------------------------------------------------------- /07_web_endpoints/flask_streaming.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # cmd: ["modal", "serve", "07_web_endpoints/flask_streaming.py"] 3 | # --- 4 | 5 | # # Deploy Flask app with streaming results with Modal 6 | 7 | # This example shows how you can deploy a [Flask](https://flask.palletsprojects.com/en/3.0.x/) app with Modal that streams results back to the client. 8 | 9 | import modal 10 | 11 | app = modal.App( 12 | "example-flask-streaming", 13 | image=modal.Image.debian_slim().uv_pip_install("flask"), 14 | ) 15 | 16 | 17 | @app.function() 18 | def generate_rows(): 19 | """ 20 | This creates a large CSV file, about 10MB, which will be streaming downloaded 21 | by a web client. 22 | """ 23 | for i in range(10_000): 24 | line = ",".join(str((j + i) * i) for j in range(128)) 25 | yield f"{line}\n" 26 | 27 | 28 | @app.function() 29 | @modal.wsgi_app() 30 | def flask_app(): 31 | from flask import Flask 32 | 33 | web_app = Flask(__name__) 34 | 35 | # These web handlers follow the example from 36 | # https://flask.palletsprojects.com/en/2.2.x/patterns/streaming/ 37 | 38 | @web_app.route("/") 39 | def generate_large_csv(): 40 | # Run the function locally in the web app's container. 41 | return generate_rows.local(), {"Content-Type": "text/csv"} 42 | 43 | @web_app.route("/remote") 44 | def generate_large_csv_in_container(): 45 | # Run the function remotely in a separate container, 46 | # which will stream back results to the web app container, 47 | # which will stream back to the web client. 48 | # 49 | # This is less efficient, but demonstrates how web serving 50 | # containers can be separated from and cooperate with other 51 | # containers. 52 | return generate_rows.remote(), {"Content-Type": "text/csv"} 53 | 54 | return web_app 55 | -------------------------------------------------------------------------------- /10_integrations/streamlit/app.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # lambda-test: false # auxiliary-file 3 | # --- 4 | # ## Demo Streamlit application. 5 | # 6 | # This application is the example from https://docs.streamlit.io/library/get-started/create-an-app. 7 | # 8 | # Streamlit is designed to run its apps as Python scripts, not functions, so we separate the Streamlit 9 | # code into this module, away from the Modal application code. 10 | 11 | 12 | def main(): 13 | import numpy as np 14 | import pandas as pd 15 | import streamlit as st 16 | 17 | st.title("Uber pickups in NYC!") 18 | 19 | DATE_COLUMN = "date/time" 20 | DATA_URL = ( 21 | "https://s3-us-west-2.amazonaws.com/" 22 | "streamlit-demo-data/uber-raw-data-sep14.csv.gz" 23 | ) 24 | 25 | @st.cache_data 26 | def load_data(nrows): 27 | data = pd.read_csv(DATA_URL, nrows=nrows) 28 | 29 | def lowercase(x): 30 | return str(x).lower() 31 | 32 | data.rename(lowercase, axis="columns", inplace=True) 33 | data[DATE_COLUMN] = pd.to_datetime(data[DATE_COLUMN]) 34 | return data 35 | 36 | data_load_state = st.text("Loading data...") 37 | data = load_data(10000) 38 | data_load_state.text("Done! (using st.cache_data)") 39 | 40 | if st.checkbox("Show raw data"): 41 | st.subheader("Raw data") 42 | st.write(data) 43 | 44 | st.subheader("Number of pickups by hour") 45 | hist_values = np.histogram(data[DATE_COLUMN].dt.hour, bins=24, range=(0, 24))[0] 46 | st.bar_chart(hist_values) 47 | 48 | # Some number in the range 0-23 49 | hour_to_filter = st.slider("hour", 0, 23, 17) 50 | filtered_data = data[data[DATE_COLUMN].dt.hour == hour_to_filter] 51 | 52 | st.subheader("Map of all pickups at %s:00" % hour_to_filter) 53 | st.map(filtered_data) 54 | 55 | 56 | if __name__ == "__main__": 57 | main() 58 | -------------------------------------------------------------------------------- /06_gpu_and_ml/speech-to-text/multitalker-frontend/audio-processor.js: -------------------------------------------------------------------------------- 1 | class AudioProcessor extends AudioWorkletProcessor { 2 | constructor(options) { 3 | super(); 4 | 5 | const processorOptions = options.processorOptions || {}; 6 | this.sourceSampleRate = processorOptions.sourceSampleRate || 48000; 7 | this.targetSampleRate = processorOptions.targetSampleRate || 16000; 8 | this.downsampleRatio = this.sourceSampleRate / this.targetSampleRate; 9 | 10 | this.bufferSize = 512; // 1 second of audio at 16kHz 11 | this.buffer = new Float32Array(this.bufferSize); 12 | this.bufferIndex = 0; 13 | 14 | this.sampleAccumulator = 0; 15 | this.sampleCounter = 0; 16 | } 17 | 18 | process(inputs, outputs) { 19 | const input = inputs[0]; 20 | const channel = input[0]; 21 | 22 | if (!channel) return true; 23 | 24 | // simple downsampling by averaging 25 | for (let i = 0; i < channel.length; i++) { 26 | this.sampleAccumulator += channel[i]; 27 | this.sampleCounter++; 28 | 29 | if (this.sampleCounter >= this.downsampleRatio) { 30 | const downsampledValue = this.sampleAccumulator / this.sampleCounter; 31 | this.buffer[this.bufferIndex++] = downsampledValue; 32 | 33 | this.sampleAccumulator = 0; 34 | this.sampleCounter = 0; 35 | 36 | if (this.bufferIndex >= this.bufferSize) { 37 | const pcmData = new Int16Array(this.bufferSize); 38 | for (let j = 0; j < this.bufferSize; j++) { 39 | pcmData[j] = Math.max( 40 | -32768, 41 | Math.min(32767, Math.round(this.buffer[j] * 32768)) 42 | ); 43 | } 44 | 45 | this.port.postMessage(pcmData.buffer); 46 | 47 | this.bufferIndex = 0; 48 | } 49 | } 50 | } 51 | 52 | return true; 53 | } 54 | } 55 | 56 | registerProcessor("audio-processor", AudioProcessor); 57 | -------------------------------------------------------------------------------- /07_web_endpoints/fasthtml-checkboxes/cbx_locustfile.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # lambda-test: false # auxiliary-file 3 | # pytest: false 4 | # --- 5 | import random 6 | 7 | from bs4 import BeautifulSoup 8 | from constants import N_CHECKBOXES 9 | from locust import HttpUser, between, task 10 | 11 | 12 | class CheckboxesUser(HttpUser): 13 | wait_time = between(0.01, 0.1) # Simulates a wait time between requests 14 | 15 | def load_homepage(self): 16 | """ 17 | Simulates a user loading the homepage and fetching the state of the checkboxes. 18 | """ 19 | response = self.client.get("/") 20 | soup = BeautifulSoup(response.text, "lxml") 21 | main_div = soup.find("main") 22 | self.id = main_div["hx-get"].split("/")[-1] 23 | 24 | @task(10) 25 | def toggle_random_checkboxes(self): 26 | """ 27 | Simulates a user toggling a random checkbox. 28 | """ 29 | n_checkboxes = random.binomialvariate( # approximately poisson at 10 30 | n=100, 31 | p=0.1, 32 | ) 33 | for _ in range(min(n_checkboxes, 1)): 34 | checkbox_id = int( 35 | N_CHECKBOXES * random.random() ** 2 36 | ) # Choose a random checkbox between 0 and 9999, more likely to be closer to 0 37 | self.client.post( 38 | f"/checkbox/toggle/{checkbox_id}", 39 | name="/checkbox/toggle", 40 | ) 41 | 42 | @task(1) 43 | def poll_for_diffs(self): 44 | """ 45 | Simulates a user polling for any outstanding diffs. 46 | """ 47 | self.client.get(f"/diffs/{self.id}", name="/diffs") 48 | 49 | def on_start(self): 50 | """ 51 | Called when a simulated user starts, typically used to initialize or login a user. 52 | """ 53 | self.id = str(random.randint(1, 9999)) 54 | self.load_homepage() 55 | -------------------------------------------------------------------------------- /06_gpu_and_ml/speech-to-text/streaming-diarization-frontend/audio-processor.js: -------------------------------------------------------------------------------- 1 | class AudioProcessor extends AudioWorkletProcessor { 2 | constructor(options) { 3 | super(); 4 | 5 | const processorOptions = options.processorOptions || {}; 6 | this.sourceSampleRate = processorOptions.sourceSampleRate || 48000; 7 | this.targetSampleRate = processorOptions.targetSampleRate || 16000; 8 | this.downsampleRatio = this.sourceSampleRate / this.targetSampleRate; 9 | 10 | this.bufferSize = 512; // 1 second of audio at 16kHz 11 | this.buffer = new Float32Array(this.bufferSize); 12 | this.bufferIndex = 0; 13 | 14 | this.sampleAccumulator = 0; 15 | this.sampleCounter = 0; 16 | } 17 | 18 | process(inputs, outputs) { 19 | const input = inputs[0]; 20 | const channel = input[0]; 21 | 22 | if (!channel) return true; 23 | 24 | // simple downsampling by averaging 25 | for (let i = 0; i < channel.length; i++) { 26 | this.sampleAccumulator += channel[i]; 27 | this.sampleCounter++; 28 | 29 | if (this.sampleCounter >= this.downsampleRatio) { 30 | const downsampledValue = this.sampleAccumulator / this.sampleCounter; 31 | this.buffer[this.bufferIndex++] = downsampledValue; 32 | 33 | this.sampleAccumulator = 0; 34 | this.sampleCounter = 0; 35 | 36 | if (this.bufferIndex >= this.bufferSize) { 37 | const pcmData = new Int16Array(this.bufferSize); 38 | for (let j = 0; j < this.bufferSize; j++) { 39 | pcmData[j] = Math.max( 40 | -32768, 41 | Math.min(32767, Math.round(this.buffer[j] * 32768)) 42 | ); 43 | } 44 | 45 | this.port.postMessage(pcmData.buffer); 46 | 47 | this.bufferIndex = 0; 48 | } 49 | } 50 | } 51 | 52 | return true; 53 | } 54 | } 55 | 56 | registerProcessor("audio-processor", AudioProcessor); 57 | -------------------------------------------------------------------------------- /06_gpu_and_ml/speech-to-text/streaming-parakeet-frontend/audio-processor.js: -------------------------------------------------------------------------------- 1 | class AudioProcessor extends AudioWorkletProcessor { 2 | constructor(options) { 3 | super(); 4 | 5 | const processorOptions = options.processorOptions || {}; 6 | this.sourceSampleRate = processorOptions.sourceSampleRate || 48000; 7 | this.targetSampleRate = processorOptions.targetSampleRate || 16000; 8 | this.downsampleRatio = this.sourceSampleRate / this.targetSampleRate; 9 | 10 | this.bufferSize = 16000; // 1 second of audio at 16kHz 11 | this.buffer = new Float32Array(this.bufferSize); 12 | this.bufferIndex = 0; 13 | 14 | this.sampleAccumulator = 0; 15 | this.sampleCounter = 0; 16 | } 17 | 18 | process(inputs, outputs) { 19 | const input = inputs[0]; 20 | const channel = input[0]; 21 | 22 | if (!channel) return true; 23 | 24 | // simple downsampling by averaging 25 | for (let i = 0; i < channel.length; i++) { 26 | this.sampleAccumulator += channel[i]; 27 | this.sampleCounter++; 28 | 29 | if (this.sampleCounter >= this.downsampleRatio) { 30 | const downsampledValue = this.sampleAccumulator / this.sampleCounter; 31 | this.buffer[this.bufferIndex++] = downsampledValue; 32 | 33 | this.sampleAccumulator = 0; 34 | this.sampleCounter = 0; 35 | 36 | if (this.bufferIndex >= this.bufferSize) { 37 | const pcmData = new Int16Array(this.bufferSize); 38 | for (let j = 0; j < this.bufferSize; j++) { 39 | pcmData[j] = Math.max( 40 | -32768, 41 | Math.min(32767, Math.round(this.buffer[j] * 32768)) 42 | ); 43 | } 44 | 45 | this.port.postMessage(pcmData.buffer); 46 | 47 | this.bufferIndex = 0; 48 | } 49 | } 50 | } 51 | 52 | return true; 53 | } 54 | } 55 | 56 | registerProcessor("audio-processor", AudioProcessor); 57 | -------------------------------------------------------------------------------- /10_integrations/dbt/sample_proj_duckdb_s3/seeds/raw_customers.csv: -------------------------------------------------------------------------------- 1 | id,first_name,last_name 2 | 1,Michael,P. 3 | 2,Shawn,M. 4 | 3,Kathleen,P. 5 | 4,Jimmy,C. 6 | 5,Katherine,R. 7 | 6,Sarah,R. 8 | 7,Martin,M. 9 | 8,Frank,R. 10 | 9,Jennifer,F. 11 | 10,Henry,W. 12 | 11,Fred,S. 13 | 12,Amy,D. 14 | 13,Kathleen,M. 15 | 14,Steve,F. 16 | 15,Teresa,H. 17 | 16,Amanda,H. 18 | 17,Kimberly,R. 19 | 18,Johnny,K. 20 | 19,Virginia,F. 21 | 20,Anna,A. 22 | 21,Willie,H. 23 | 22,Sean,H. 24 | 23,Mildred,A. 25 | 24,David,G. 26 | 25,Victor,H. 27 | 26,Aaron,R. 28 | 27,Benjamin,B. 29 | 28,Lisa,W. 30 | 29,Benjamin,K. 31 | 30,Christina,W. 32 | 31,Jane,G. 33 | 32,Thomas,O. 34 | 33,Katherine,M. 35 | 34,Jennifer,S. 36 | 35,Sara,T. 37 | 36,Harold,O. 38 | 37,Shirley,J. 39 | 38,Dennis,J. 40 | 39,Louise,W. 41 | 40,Maria,A. 42 | 41,Gloria,C. 43 | 42,Diana,S. 44 | 43,Kelly,N. 45 | 44,Jane,R. 46 | 45,Scott,B. 47 | 46,Norma,C. 48 | 47,Marie,P. 49 | 48,Lillian,C. 50 | 49,Judy,N. 51 | 50,Billy,L. 52 | 51,Howard,R. 53 | 52,Laura,F. 54 | 53,Anne,B. 55 | 54,Rose,M. 56 | 55,Nicholas,R. 57 | 56,Joshua,K. 58 | 57,Paul,W. 59 | 58,Kathryn,K. 60 | 59,Adam,A. 61 | 60,Norma,W. 62 | 61,Timothy,R. 63 | 62,Elizabeth,P. 64 | 63,Edward,G. 65 | 64,David,C. 66 | 65,Brenda,W. 67 | 66,Adam,W. 68 | 67,Michael,H. 69 | 68,Jesse,E. 70 | 69,Janet,P. 71 | 70,Helen,F. 72 | 71,Gerald,C. 73 | 72,Kathryn,O. 74 | 73,Alan,B. 75 | 74,Harry,A. 76 | 75,Andrea,H. 77 | 76,Barbara,W. 78 | 77,Anne,W. 79 | 78,Harry,H. 80 | 79,Jack,R. 81 | 80,Phillip,H. 82 | 81,Shirley,H. 83 | 82,Arthur,D. 84 | 83,Virginia,R. 85 | 84,Christina,R. 86 | 85,Theresa,M. 87 | 86,Jason,C. 88 | 87,Phillip,B. 89 | 88,Adam,T. 90 | 89,Margaret,J. 91 | 90,Paul,P. 92 | 91,Todd,W. 93 | 92,Willie,O. 94 | 93,Frances,R. 95 | 94,Gregory,H. 96 | 95,Lisa,P. 97 | 96,Jacqueline,A. 98 | 97,Shirley,D. 99 | 98,Nicole,M. 100 | 99,Mary,G. 101 | 100,Jean,M. 102 | -------------------------------------------------------------------------------- /02_building_containers/import_sklearn.py: -------------------------------------------------------------------------------- 1 | # # Install scikit-learn in a custom image 2 | # 3 | # This builds a custom image which installs the sklearn (scikit-learn) Python package in it. 4 | # It's an example of how you can use packages, even if you don't have them installed locally. 5 | # 6 | # First, the imports 7 | 8 | import time 9 | 10 | import modal 11 | 12 | # Next, define an app, with a custom image that installs `sklearn`. 13 | 14 | app = modal.App( 15 | "example-import-sklearn", 16 | image=modal.Image.debian_slim() 17 | .apt_install("libgomp1") 18 | .uv_pip_install("scikit-learn"), 19 | ) 20 | 21 | # The `app.image.imports()` lets us conditionally import in the global scope. 22 | # This is needed because we might not have sklearn and numpy installed locally, 23 | # but we know they are installed inside the custom image. 24 | 25 | with app.image.imports(): 26 | import numpy as np 27 | from sklearn import datasets, linear_model 28 | 29 | # Now, let's define a function that uses one of scikit-learn's built-in datasets 30 | # and fits a very simple model (linear regression) to it 31 | 32 | 33 | @app.function() 34 | def fit(): 35 | print("Inside run!") 36 | t0 = time.time() 37 | diabetes_X, diabetes_y = datasets.load_diabetes(return_X_y=True) 38 | diabetes_X = diabetes_X[:, np.newaxis, 2] 39 | regr = linear_model.LinearRegression() 40 | regr.fit(diabetes_X, diabetes_y) 41 | return time.time() - t0 42 | 43 | 44 | # Finally, let's trigger the run locally. We also time this. Note that the first time we run this, 45 | # it will build the image. This might take 1-2 min. When we run this subsequent times, the image 46 | # is already build, and it will run much much faster. 47 | 48 | 49 | if __name__ == "__main__": 50 | t0 = time.time() 51 | with app.run(): 52 | t = fit.remote() 53 | print("Function time spent:", t) 54 | print("Full time spent:", time.time() - t0) 55 | -------------------------------------------------------------------------------- /13_sandboxes/sandbox_agent.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # cmd: ["python", "13_sandboxes/sandbox_agent.py"] 3 | # pytest: false 4 | # --- 5 | 6 | # # Run Claude Code in a Modal Sandbox 7 | 8 | # This example demonstrates how to run Claude Code in a Modal 9 | # [Sandbox](https://modal.com/docs/guide/sandbox) to analyze a GitHub repository. 10 | # The Sandbox provides an isolated environment where the agent can safely execute code 11 | # and examine files. 12 | 13 | import modal 14 | 15 | app = modal.App.lookup("example-sandbox-agent", create_if_missing=True) 16 | 17 | # First we create a custom Image that has Claude Code installed. 18 | image = ( 19 | modal.Image.debian_slim(python_version="3.12") 20 | .apt_install("curl", "git") 21 | .env({"PATH": "/root/.local/bin:$PATH"}) # add claude to path 22 | .run_commands( 23 | "curl -fsSL https://claude.ai/install.sh | bash", 24 | ) 25 | ) 26 | 27 | with modal.enable_output(): 28 | sandbox = modal.Sandbox.create(app=app, image=image) 29 | print(f"Sandbox ID: {sandbox.object_id}") 30 | 31 | # Next we'll clone a repository that Claude Code will work on. 32 | repo_url = "https://github.com/modal-labs/modal-examples" 33 | git_ps = sandbox.exec("git", "clone", "--depth", "1", repo_url, "/repo") 34 | git_ps.wait() 35 | print(f"Cloned '{repo_url}' into /repo.") 36 | 37 | # Finally we'll run Claude Code to analyze the repository. 38 | claude_cmd = ["claude", "-p", "What is in this repository?"] 39 | 40 | print("\nRunning command:", claude_cmd) 41 | 42 | claude_ps = sandbox.exec( 43 | *claude_cmd, 44 | pty=True, # Adding a PTY is important, since Claude requires it 45 | secrets=[ 46 | modal.Secret.from_name("anthropic-secret", required_keys=["ANTHROPIC_API_KEY"]) 47 | ], 48 | workdir="/repo", 49 | ) 50 | claude_ps.wait() 51 | 52 | print("\nAgent stdout:\n") 53 | print(claude_ps.stdout.read()) 54 | 55 | stderr = claude_ps.stderr.read() 56 | if stderr != "": 57 | print("Agent stderr:", stderr) 58 | -------------------------------------------------------------------------------- /02_building_containers/install_cuda.py: -------------------------------------------------------------------------------- 1 | # # Installing the CUDA Toolkit on Modal 2 | 3 | # This code sample is intended to quickly show how different layers of the CUDA stack are used on Modal. 4 | # For greater detail, see our [guide to using CUDA on Modal](https://modal.com/docs/guide/cuda). 5 | 6 | # All Modal Functions with GPUs already have the NVIDIA CUDA drivers, 7 | # NVIDIA System Management Interface, and CUDA Driver API installed. 8 | 9 | import modal 10 | 11 | app = modal.App("example-install-cuda") 12 | 13 | 14 | @app.function(gpu="T4") 15 | def nvidia_smi(): 16 | import subprocess 17 | 18 | subprocess.run(["nvidia-smi"], check=True) 19 | 20 | 21 | # This is enough to install and use many CUDA-dependent libraries, like PyTorch. 22 | 23 | 24 | @app.function(gpu="T4", image=modal.Image.debian_slim().uv_pip_install("torch")) 25 | def torch_cuda(): 26 | import torch 27 | 28 | print(torch.cuda.get_device_properties("cuda:0")) 29 | 30 | 31 | # If your application or its dependencies need components of the CUDA toolkit, 32 | # like the `nvcc` compiler driver, installed as system libraries or command-line tools, 33 | # you'll need to install those manually. 34 | 35 | # We recommend the official NVIDIA CUDA Docker images from Docker Hub. 36 | # You'll need to add Python 3 and pip with the `add_python` option because the image 37 | # doesn't have these by default. 38 | 39 | 40 | ctk_image = modal.Image.from_registry( 41 | "nvidia/cuda:12.4.0-devel-ubuntu22.04", add_python="3.11" 42 | ).entrypoint([]) # removes chatty prints on entry 43 | 44 | 45 | @app.function(gpu="T4", image=ctk_image) 46 | def nvcc_version(): 47 | import subprocess 48 | 49 | return subprocess.run(["nvcc", "--version"], check=True) 50 | 51 | 52 | # You can check that all these functions run by invoking this script with `modal run`. 53 | 54 | 55 | @app.local_entrypoint() 56 | def main(): 57 | nvidia_smi.remote() 58 | torch_cuda.remote() 59 | nvcc_version.remote() 60 | -------------------------------------------------------------------------------- /08_advanced/poll_delayed_result.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # cmd: ["modal", "serve", "08_advanced/poll_delayed_result.py"] 3 | # --- 4 | 5 | # # Polling for a delayed result on Modal 6 | 7 | # This example shows how you can poll for a delayed result on Modal. 8 | 9 | # The function `factor_number` takes a number as input and returns the prime factors of the number. The function could take a long time to run, so we don't want to wait for the result in the web server. 10 | # Instead, we return a URL that the client can poll to get the result. 11 | 12 | import fastapi 13 | import modal 14 | from modal.functions import FunctionCall 15 | from starlette.responses import HTMLResponse, RedirectResponse 16 | 17 | app = modal.App("example-poll-delayed-result") 18 | 19 | web_app = fastapi.FastAPI() 20 | 21 | 22 | @app.function(image=modal.Image.debian_slim().uv_pip_install("primefac")) 23 | def factor_number(number): 24 | import primefac 25 | 26 | return list(primefac.primefac(number)) # could take a long time 27 | 28 | 29 | @web_app.get("/") 30 | async def index(): 31 | return HTMLResponse( 32 | """ 33 |
34 | Enter a number: 35 | 36 |
37 | """ 38 | ) 39 | 40 | 41 | @web_app.get("/factors") 42 | async def web_submit(request: fastapi.Request, number: int): 43 | call = factor_number.spawn( 44 | number 45 | ) # returns a FunctionCall without waiting for result 46 | polling_url = request.url.replace( 47 | path="/result", query=f"function_id={call.object_id}" 48 | ) 49 | return RedirectResponse(polling_url) 50 | 51 | 52 | @web_app.get("/result") 53 | async def web_poll(function_id: str): 54 | function_call = FunctionCall.from_id(function_id) 55 | try: 56 | result = function_call.get(timeout=0) 57 | except TimeoutError: 58 | result = "not ready" 59 | 60 | return result 61 | 62 | 63 | @app.function() 64 | @modal.asgi_app() 65 | def fastapi_app(): 66 | return web_app 67 | -------------------------------------------------------------------------------- /06_gpu_and_ml/comfyui/memory_snapshot/memory_snapshot_helper/prestartup_script.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | from pathlib import Path 4 | 5 | comfy_dir = Path(__file__).parent.parent.parent / "comfy" 6 | 7 | model_management_path = str(comfy_dir / "model_management.py") 8 | original_model_management_path = str(comfy_dir / "model_management_original.py") 9 | is_patched = os.path.exists(original_model_management_path) 10 | 11 | 12 | def _apply_cuda_safe_patch(): 13 | """Apply a permanent patch that avoid torch cuda init during snapshots""" 14 | 15 | shutil.copy(model_management_path, original_model_management_path) 16 | print( 17 | "[memory_snapshot_helper] ==> Applying CUDA-safe patch for model_management.py" 18 | ) 19 | 20 | with open(model_management_path, "r") as f: 21 | content = f.read() 22 | 23 | # Find the get_torch_device function and modify the CUDA device access 24 | # The original line uses: return torch.device(torch.cuda.current_device()) 25 | # We'll replace it with a check if CUDA is available 26 | 27 | # Define the patched content as a constant 28 | CUDA_SAFE_PATCH = """import os 29 | if torch.cuda.is_available(): 30 | return torch.device(torch.cuda.current_device()) 31 | else: 32 | logging.info("[memory_snapshot_helper] CUDA is not available, defaulting to cpu") 33 | return torch.device('cpu') # Safe fallback during snapshot""" 34 | 35 | if "return torch.device(torch.cuda.current_device())" in content: 36 | patched_content = content.replace( 37 | "return torch.device(torch.cuda.current_device())", CUDA_SAFE_PATCH 38 | ) 39 | 40 | # Save the patched version 41 | with open(model_management_path, "w") as f: 42 | f.write(patched_content) 43 | 44 | print("[memory_snapshot_helper] ==> Successfully patched model_management.py") 45 | else: 46 | raise Exception( 47 | "[memory_snapshot_helper] ==> Failed to patch model_management.py" 48 | ) 49 | 50 | 51 | if not is_patched: 52 | _apply_cuda_safe_patch() 53 | -------------------------------------------------------------------------------- /06_gpu_and_ml/openai_whisper/pod_transcriber/README.md: -------------------------------------------------------------------------------- 1 | # Modal Podcast Transcriber 2 | 3 | This is a complete application that uses [NVIDIA Parakeet ASR](https://docs.nvidia.com/nemo-framework/user-guide/latest/nemotoolkit/asr/models.html#parakeet) to transcribe podcasts. Modal spins up multiple containers for a single transcription run, so hours of audio can be transcribed on-demand in a few minutes. 4 | 5 | You can find our deployment of the app [here](https://modal-labs-examples--parakeet-pod-transcriber-fastapi-app.modal.run/). 6 | 7 | ## Architecture 8 | 9 | The entire application is hosted serverlessly on Modal and consists of 3 components: 10 | 11 | 1. React + Vite SPA ([`app/frontend/`](./app/frontend/)) 12 | 2. FastAPI server ([`app/api.py`](./app/api.py)) 13 | 3. Modal async job queue ([`app/main.py`](./app/main.py)) 14 | 15 | ## Developing locally 16 | 17 | ### Requirements 18 | 19 | - `npm` 20 | - `modal` installed in your current Python virtual environment 21 | 22 | ### Podchaser Secret 23 | 24 | To run this on your own Modal account, you'll need to [create a Podchaser account and create an API key](https://api-docs.podchaser.com/docs/guides/guide-first-podchaser-query/#getting-your-access-token). 25 | 26 | Then, create a [Modal Secret](https://modal.com/secrets/) with the following keys: 27 | 28 | - `PODCHASER_CLIENT_SECRET` 29 | - `PODCHASER_CLIENT_ID` 30 | 31 | You can find both on [their API page](https://www.podchaser.com/profile/settings/api). 32 | 33 | ### Vite build 34 | 35 | `cd` into the `app/frontend` directory, and run: 36 | 37 | - `npm install` 38 | - `npx vite build --watch` 39 | 40 | The last command will start a watcher process that will rebuild your static frontend files whenever you make changes to the frontend code. 41 | 42 | ### Serve on Modal 43 | 44 | Once you have `vite build` running, in a separate shell run this to start an ephemeral app on Modal: 45 | 46 | ```shell 47 | modal serve -m app.main 48 | ``` 49 | 50 | Pressing `Ctrl+C` will stop your app. 51 | 52 | ### Deploy to Modal 53 | 54 | Once your happy with your changes, run `modal deploy -m app.main` to deploy your app to Modal. 55 | -------------------------------------------------------------------------------- /.github/workflows/run-examples.yml: -------------------------------------------------------------------------------- 1 | name: Run changed examples 2 | 3 | on: 4 | pull_request: 5 | branches: 6 | - main 7 | paths: 8 | - "**.py" 9 | push: 10 | branches: 11 | - main 12 | paths: 13 | - "**.py" 14 | workflow_dispatch: 15 | 16 | # Cancel previous runs of the same PR but do not cancel previous runs on main 17 | concurrency: 18 | group: ${{ github.workflow }}-${{ github.ref }} 19 | cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} 20 | 21 | env: 22 | TERM: linux 23 | TERMINFO: /etc/terminfo 24 | MODAL_ENVIRONMENT: examples 25 | 26 | jobs: 27 | # Output all changed files in a JSON format compatible with GitHub Actions job matrices 28 | diff-matrix: 29 | name: Generate matrix of changed examples 30 | runs-on: ubuntu-24.04 31 | outputs: 32 | matrix: ${{ steps.diff.outputs.all_changed_files }} 33 | 34 | steps: 35 | - uses: actions/checkout@v3 36 | with: 37 | fetch-depth: 0 38 | 39 | - name: Find changed examples 40 | id: diff 41 | run: python3 -m internal.generate_diff_matrix 42 | 43 | # Run each changed example, using the output of the previous step as a job matrix 44 | run-changed: 45 | name: Run changed example 46 | needs: [diff-matrix] 47 | if: ${{ needs.diff-matrix.outputs.matrix != '[]' && 48 | needs.diff-matrix.outputs.matrix != '' }} 49 | runs-on: ubuntu-24.04 50 | strategy: 51 | matrix: 52 | file: ${{ fromJson(needs.diff-matrix.outputs.matrix) }} 53 | fail-fast: false 54 | 55 | steps: 56 | - name: Checkout Repository 57 | uses: actions/checkout@v3 58 | with: 59 | fetch-depth: 1 60 | - uses: ./.github/actions/setup 61 | 62 | - name: Run example 63 | run: | 64 | echo "Running ${{ matrix.file }}" 65 | stem=$(basename "${{ matrix.file }}" .py) 66 | python3 -m internal.run_example $stem || exit $? 67 | env: 68 | MODAL_TOKEN_ID: ${{ secrets.MODAL_MODAL_LABS_TOKEN_ID }} 69 | MODAL_TOKEN_SECRET: ${{ secrets.MODAL_MODAL_LABS_TOKEN_SECRET }} 70 | -------------------------------------------------------------------------------- /01_getting_started/inference_perf.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # cmd: ["python", "01_getting_started/inference_perf.py"] 3 | # deploy: true 4 | # mypy: ignore-errors 5 | # --- 6 | from pathlib import Path 7 | 8 | import modal 9 | 10 | app = modal.App("example-inference-perf") 11 | image = ( 12 | modal.Image.debian_slim() 13 | .uv_pip_install("transformers[torch]") 14 | .uv_pip_install("fastapi") 15 | ) 16 | 17 | with image.imports(): 18 | from transformers import pipeline 19 | 20 | weights_cache = { 21 | "/root/.cache/huggingface": modal.Volume.from_name( 22 | "example-inference", create_if_missing=True 23 | ) 24 | } 25 | 26 | 27 | @app.cls(gpu="h100", image=image, volumes=weights_cache, enable_memory_snapshot=True) 28 | class Chat: 29 | @modal.enter() 30 | def init(self): 31 | self.chatbot = pipeline( 32 | model="Qwen/Qwen3-1.7B-FP8", device_map="cuda", max_new_tokens=1024 33 | ) 34 | 35 | @modal.fastapi_endpoint(docs=True) 36 | def web(self, prompt: str | None = None) -> list[dict]: 37 | result = self.run.local(prompt) 38 | return result 39 | 40 | @modal.method() 41 | def run(self, prompt: str | None = None) -> list[dict]: 42 | if prompt is None: 43 | prompt = f"/no_think Read this code.\n\n{Path(__file__).read_text()}\nIn one paragraph, what does the code do?" 44 | 45 | print(prompt) 46 | context = [{"role": "user", "content": prompt}] 47 | 48 | result = self.chatbot(context) 49 | print(result[0]["generated_text"][-1]["content"]) 50 | 51 | return result 52 | 53 | 54 | if __name__ == "__main__": 55 | import json 56 | import urllib.request 57 | from datetime import datetime 58 | 59 | ChatCls = modal.Cls.from_name(app.name, "Chat") 60 | chat = ChatCls() 61 | print(datetime.now(), "making .remote call to Chat.run") 62 | print(chat.run.remote()) 63 | print(datetime.now(), "making web request to", url := chat.web.get_web_url()) 64 | 65 | with urllib.request.urlopen(url) as response: 66 | print(datetime.now()) 67 | print(json.loads(response.read().decode("utf-8"))) 68 | -------------------------------------------------------------------------------- /06_gpu_and_ml/openai_whisper/finetuning/train/transcribe.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import TYPE_CHECKING 3 | 4 | from .logs import get_logger 5 | 6 | if TYPE_CHECKING: 7 | from numpy import ndarray 8 | 9 | logger = get_logger(__name__) 10 | 11 | 12 | def whisper_transcribe_local_file( 13 | model_dir: os.PathLike, 14 | language: str, 15 | filepath: os.PathLike, 16 | sample_rate_hz: int, 17 | ) -> str: 18 | """Convenience function for transcribing a single local audio file with a Whisper model already saved to disk.""" 19 | from datasets import Audio, Dataset 20 | 21 | audio_dataset = Dataset.from_dict({"audio": [str(filepath)]}).cast_column( 22 | "audio", Audio(sampling_rate=sample_rate_hz) 23 | ) 24 | row = next(iter(audio_dataset)) 25 | return whisper_transcribe_audio( 26 | model_dir, 27 | language, 28 | data=row["audio"]["array"], 29 | sample_rate_hz=row["audio"]["sampling_rate"], 30 | ) 31 | 32 | 33 | def whisper_transcribe_audio( 34 | model_dir: os.PathLike, 35 | language: str, 36 | data: "ndarray", 37 | sample_rate_hz: int, 38 | ) -> str: 39 | """Transcribes a single audio sample with a Whisper model, for demonstration purposes.""" 40 | from transformers import ( 41 | WhisperForConditionalGeneration, 42 | WhisperProcessor, 43 | ) 44 | 45 | # load model and processor 46 | processor = WhisperProcessor.from_pretrained(model_dir) 47 | model = WhisperForConditionalGeneration.from_pretrained(model_dir) 48 | forced_decoder_ids = processor.get_decoder_prompt_ids( 49 | language=language, task="transcribe" 50 | ) 51 | input_features = processor( 52 | data, 53 | sampling_rate=sample_rate_hz, 54 | return_tensors="pt", 55 | ).input_features 56 | 57 | # generate token ids 58 | predicted_ids = model.generate( 59 | input_features, forced_decoder_ids=forced_decoder_ids 60 | ) 61 | # decode token ids to text 62 | predicted_transcription = processor.batch_decode( 63 | predicted_ids, skip_special_tokens=True 64 | )[0] 65 | return predicted_transcription 66 | -------------------------------------------------------------------------------- /09_job_queues/doc_ocr_frontend/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Receipt Parser 8 | 9 | 10 | 11 | 12 | 13 | 17 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 44 | 45 | 46 | 64 | 65 | 66 | 67 |
68 | 69 | 70 | 71 | -------------------------------------------------------------------------------- /07_web_endpoints/webrtc/frontend/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | WebRTC YOLO Demo 5 | 48 | 49 | 50 |
51 | 54 | 57 |
58 |
59 | 60 | 61 |
62 |
63 | 64 | 65 | 66 |
67 |
68 | 69 | 70 | -------------------------------------------------------------------------------- /06_gpu_and_ml/dreambooth/assets/background.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /06_gpu_and_ml/hyperparameter-sweep/assets/background.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /06_gpu_and_ml/protein-folding/frontend/background.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /06_gpu_and_ml/comfyui/ip_adapter/ip_adapter_example.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # cmd: ["modal", "serve", "06_gpu_and_ml/comfyui/ip_adapter/ip_adapter_example.py"] 3 | # --- 4 | 5 | import subprocess 6 | 7 | import modal 8 | 9 | image = ( # build up a Modal Image to run ComfyUI, step by step 10 | modal.Image.debian_slim( # start from basic Linux with Python 11 | python_version="3.11" 12 | ) 13 | .apt_install("git") # install git to clone ComfyUI 14 | .uv_pip_install("comfy-cli==1.2.7") # install comfy-cli 15 | .run_commands( # use comfy-cli to install the ComfyUI repo and its dependencies 16 | "comfy --skip-prompt install --nvidia" 17 | ) 18 | .run_commands( # download the WAS Node Suite custom node pack 19 | "comfy node install comfyui_ipadapter_plus" 20 | ) 21 | .run_commands("apt install -y wget") 22 | .run_commands( # the Unified Model Loader node requires these two models to be named a specific way, so we use wget instead of the usual comfy model download command 23 | "wget -q -O /root/comfy/ComfyUI/models/clip_vision/CLIP-ViT-H-14-laion2B-s32B-b79K.safetensors https://huggingface.co/h94/IP-Adapter/resolve/main/models/image_encoder/model.safetensors", 24 | ) 25 | .run_commands( 26 | "wget -q -O /root/comfy/ComfyUI/models/clip_vision/CLIP-ViT-bigG-14-laion2B-39B-b160k.safetensors, https://huggingface.co/h94/IP-Adapter/resolve/main/sdxl_models/image_encoder/model.safetensors", 27 | ) 28 | .run_commands( # download the IP-Adapter model 29 | "comfy --skip-prompt model download --url https://huggingface.co/h94/IP-Adapter/resolve/main/models/ip-adapter_sd15.safetensors --relative-path models/ipadapter" 30 | ) 31 | .run_commands( 32 | "comfy --skip-prompt model download --url https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5/resolve/main/v1-5-pruned.safetensors --relative-path models/checkpoints", 33 | ) 34 | ) 35 | 36 | app = modal.App(name="example-ip-adapter", image=image) 37 | 38 | 39 | # Run ComfyUI as an interactive web server 40 | @app.function( 41 | max_containers=1, 42 | scaledown_window=30, 43 | timeout=1800, 44 | gpu="A10G", 45 | ) 46 | @modal.concurrent(max_inputs=10) 47 | @modal.web_server(8000, startup_timeout=60) 48 | def ui(): 49 | subprocess.Popen("comfy launch -- --listen 0.0.0.0 --port 8000", shell=True) 50 | -------------------------------------------------------------------------------- /06_gpu_and_ml/comfyui/workflow_api.json: -------------------------------------------------------------------------------- 1 | { 2 | "6": { 3 | "inputs": { 4 | "text": "Surreal dreamscape with floating islands, upside-down waterfalls, and impossible geometric structures, all bathed in a soft, ethereal light", 5 | "clip": ["30", 1] 6 | }, 7 | "class_type": "CLIPTextEncode", 8 | "_meta": { 9 | "title": "CLIP Text Encode (Positive Prompt)" 10 | } 11 | }, 12 | "8": { 13 | "inputs": { 14 | "samples": ["31", 0], 15 | "vae": ["30", 2] 16 | }, 17 | "class_type": "VAEDecode", 18 | "_meta": { 19 | "title": "VAE Decode" 20 | } 21 | }, 22 | "9": { 23 | "inputs": { 24 | "filename_prefix": "ComfyUI", 25 | "images": ["37", 0] 26 | }, 27 | "class_type": "SaveImage", 28 | "_meta": { 29 | "title": "Save Image" 30 | } 31 | }, 32 | "27": { 33 | "inputs": { 34 | "width": 1024, 35 | "height": 1024, 36 | "batch_size": 1 37 | }, 38 | "class_type": "EmptySD3LatentImage", 39 | "_meta": { 40 | "title": "EmptySD3LatentImage" 41 | } 42 | }, 43 | "30": { 44 | "inputs": { 45 | "ckpt_name": "flux1-schnell-fp8.safetensors" 46 | }, 47 | "class_type": "CheckpointLoaderSimple", 48 | "_meta": { 49 | "title": "Load Checkpoint" 50 | } 51 | }, 52 | "31": { 53 | "inputs": { 54 | "seed": 74618958969040, 55 | "steps": 4, 56 | "cfg": 1, 57 | "sampler_name": "euler", 58 | "scheduler": "simple", 59 | "denoise": 1, 60 | "model": ["30", 0], 61 | "positive": ["6", 0], 62 | "negative": ["33", 0], 63 | "latent_image": ["27", 0] 64 | }, 65 | "class_type": "KSampler", 66 | "_meta": { 67 | "title": "KSampler" 68 | } 69 | }, 70 | "33": { 71 | "inputs": { 72 | "text": "", 73 | "clip": ["30", 1] 74 | }, 75 | "class_type": "CLIPTextEncode", 76 | "_meta": { 77 | "title": "CLIP Text Encode (Negative Prompt)" 78 | } 79 | }, 80 | "37": { 81 | "inputs": { 82 | "mode": "rescale", 83 | "supersample": "true", 84 | "resampling": "lanczos", 85 | "rescale_factor": 2, 86 | "resize_width": 1024, 87 | "resize_height": 1536, 88 | "image": ["8", 0] 89 | }, 90 | "class_type": "Image Resize", 91 | "_meta": { 92 | "title": "Image Resize" 93 | } 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /internal/run_example.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import random 4 | import subprocess 5 | import sys 6 | import time 7 | 8 | from . import utils 9 | 10 | MINUTES = 60 11 | DEFAULT_TIMEOUT = 12 * MINUTES 12 | 13 | 14 | def run_script(example, timeout=DEFAULT_TIMEOUT): 15 | t0 = time.time() 16 | 17 | print(f"Running example {example.stem} with timeout {timeout}s") 18 | 19 | try: 20 | print(f"cli args: {example.cli_args}") 21 | if "runc" in example.runtimes: 22 | example.env |= {"MODAL_FUNCTION_RUNTIME": "runc"} 23 | process = subprocess.run( 24 | [str(x) for x in example.cli_args], 25 | env=os.environ | example.env | {"MODAL_SERVE_TIMEOUT": "5.0"}, 26 | timeout=timeout, 27 | ) 28 | total_time = time.time() - t0 29 | if process.returncode == 0: 30 | print(f"Success after {total_time:.2f}s :)") 31 | else: 32 | print( 33 | f"Failed after {total_time:.2f}s with return code {process.returncode} :(" 34 | ) 35 | 36 | returncode = process.returncode 37 | 38 | except subprocess.TimeoutExpired: 39 | print(f"Past timeout of {timeout}s :(") 40 | returncode = 999 41 | 42 | return returncode 43 | 44 | 45 | def run_single_example(stem, timeout=DEFAULT_TIMEOUT): 46 | examples = utils.get_examples() 47 | for example in examples: 48 | if stem == example.stem and example.metadata.get("lambda-test", True): 49 | return run_script(example, timeout=timeout) 50 | else: 51 | print(f"Could not find example name {stem}") 52 | return 0 53 | 54 | 55 | def run_random_example(timeout=DEFAULT_TIMEOUT): 56 | examples = filter( 57 | lambda ex: ex.metadata and ex.metadata.get("lambda-test", True), 58 | utils.get_examples(), 59 | ) 60 | return run_script(random.choice(list(examples)), timeout=timeout) 61 | 62 | 63 | if __name__ == "__main__": 64 | parser = argparse.ArgumentParser() 65 | parser.add_argument("example", nargs="?", default=None) 66 | parser.add_argument("--timeout", type=int, default=DEFAULT_TIMEOUT) 67 | args = parser.parse_args() 68 | print(args) 69 | if args.example: 70 | sys.exit(run_single_example(args.example, timeout=args.timeout)) 71 | else: 72 | sys.exit(run_random_example(timeout=args.timeout)) 73 | -------------------------------------------------------------------------------- /10_integrations/webscraper_old.py: -------------------------------------------------------------------------------- 1 | # # Web Scraping on Modal 2 | 3 | # This example shows how you can scrape links from a website and post them to a Slack channel using Modal. 4 | 5 | import os 6 | 7 | import modal 8 | 9 | app = modal.App("example-webscraper") 10 | 11 | 12 | playwright_image = modal.Image.debian_slim( 13 | python_version="3.10" 14 | ).run_commands( # Doesn't work with 3.11 yet 15 | "apt-get update", 16 | "apt-get install -y software-properties-common", 17 | "apt-add-repository non-free", 18 | "apt-add-repository contrib", 19 | "pip install playwright==1.42.0", 20 | "playwright install-deps chromium", 21 | "playwright install chromium", 22 | ) 23 | 24 | 25 | @app.function(image=playwright_image) 26 | async def get_links(url: str) -> set[str]: 27 | from playwright.async_api import async_playwright 28 | 29 | async with async_playwright() as p: 30 | browser = await p.chromium.launch() 31 | page = await browser.new_page() 32 | await page.goto(url) 33 | links = await page.eval_on_selector_all( 34 | "a[href]", "elements => elements.map(element => element.href)" 35 | ) 36 | await browser.close() 37 | 38 | return set(links) 39 | 40 | 41 | slack_sdk_image = modal.Image.debian_slim(python_version="3.10").uv_pip_install( 42 | "slack-sdk==3.27.1" 43 | ) 44 | 45 | 46 | @app.function( 47 | image=slack_sdk_image, 48 | secrets=[ 49 | modal.Secret.from_name( 50 | "scraper-slack-secret", required_keys=["SLACK_BOT_TOKEN"] 51 | ) 52 | ], 53 | ) 54 | def bot_token_msg(channel, message): 55 | import slack_sdk 56 | from slack_sdk.http_retry.builtin_handlers import RateLimitErrorRetryHandler 57 | 58 | client = slack_sdk.WebClient(token=os.environ["SLACK_BOT_TOKEN"]) 59 | rate_limit_handler = RateLimitErrorRetryHandler(max_retry_count=3) 60 | client.retry_handlers.append(rate_limit_handler) 61 | 62 | print(f"Posting {message} to #{channel}") 63 | client.chat_postMessage(channel=channel, text=message) 64 | 65 | 66 | @app.function() 67 | def scrape(): 68 | links_of_interest = ["http://modal.com"] 69 | 70 | for links in get_links.map(links_of_interest): 71 | for link in links: 72 | bot_token_msg.remote("scraped-links", link) 73 | 74 | 75 | @app.function(schedule=modal.Period(days=1)) 76 | def daily_scrape(): 77 | scrape.remote() 78 | 79 | 80 | @app.local_entrypoint() 81 | def run(): 82 | scrape.remote() 83 | -------------------------------------------------------------------------------- /06_gpu_and_ml/comfyui/comfyclient.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # cmd: ["python", "06_gpu_and_ml/comfyui/comfyclient.py", "--modal-workspace", "modal-labs-examples", "--prompt", "Spider-Man visits Yosemite, rendered by Blender, trending on artstation"] 3 | # output-directory: "/tmp/comfyui" 4 | # --- 5 | 6 | import argparse 7 | import json 8 | import pathlib 9 | import sys 10 | import time 11 | import urllib.request 12 | 13 | OUTPUT_DIR = pathlib.Path("/tmp/comfyui") 14 | OUTPUT_DIR.mkdir(exist_ok=True, parents=True) 15 | 16 | 17 | def main(args: argparse.Namespace): 18 | url = f"https://{args.modal_workspace}--example-comfyapp-comfyui-api{'-dev' if args.dev else ''}.modal.run/" 19 | data = json.dumps({"prompt": args.prompt}).encode("utf-8") 20 | print(f"Sending request to {url} with prompt: {args.prompt}") 21 | print("Waiting for response...") 22 | start_time = time.time() 23 | req = urllib.request.Request( 24 | url, data=data, headers={"Content-Type": "application/json"} 25 | ) 26 | try: 27 | with urllib.request.urlopen(req) as response: 28 | assert response.status == 200, response.status 29 | elapsed = round(time.time() - start_time, 1) 30 | print(f"Image finished generating in {elapsed} seconds!") 31 | filename = OUTPUT_DIR / f"{slugify(args.prompt)}.png" 32 | filename.write_bytes(response.read()) 33 | print(f"Saved to '{filename}'") 34 | except urllib.error.HTTPError as e: 35 | if e.code == 404: 36 | print(f"Workflow API not found at {url}") 37 | 38 | 39 | def parse_args(arglist: list[str]) -> argparse.Namespace: 40 | parser = argparse.ArgumentParser() 41 | 42 | parser.add_argument( 43 | "--modal-workspace", 44 | type=str, 45 | required=True, 46 | help="Name of the Modal workspace with the deployed app. Run `modal profile current` to check.", 47 | ) 48 | parser.add_argument( 49 | "--prompt", 50 | type=str, 51 | required=True, 52 | help="Prompt for the image generation model.", 53 | ) 54 | parser.add_argument( 55 | "--dev", 56 | action="store_true", 57 | help="use this flag when running the ComfyUI server in development mode with `modal serve`", 58 | ) 59 | 60 | return parser.parse_args(arglist[1:]) 61 | 62 | 63 | def slugify(s: str) -> str: 64 | return s.lower().replace(" ", "-").replace(".", "-").replace("/", "-")[:32] 65 | 66 | 67 | if __name__ == "__main__": 68 | args = parse_args(sys.argv) 69 | main(args) 70 | -------------------------------------------------------------------------------- /06_gpu_and_ml/gpu_snapshot.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # deploy: true 3 | # cmd: ["python", "06_gpu_and_ml/gpu_snapshot.py"] 4 | # mypy: ignore-errors 5 | # --- 6 | 7 | # # Snapshot GPU memory to speed up cold starts 8 | 9 | # This example demonstrates how to use GPU memory snapshots to speed up model loading. 10 | # Note that GPU memory snapshotting is an experimental feature, 11 | # so test carefully before using in production! 12 | # You can read more about GPU memory snapshotting, and its caveats, 13 | # [here](https://modal.com/docs/guide/memory-snapshot). 14 | 15 | # GPU snapshots can only be used with deployed Functions, so first deploy the App: 16 | 17 | # ```bash 18 | # modal deploy -m 06_gpu_and_ml.gpu_snapshot 19 | # ``` 20 | 21 | # Next, invoke the Function: 22 | 23 | # ```bash 24 | # python -m 06_gpu_and_ml.gpu_snapshot 25 | # ``` 26 | 27 | # The full code is below: 28 | 29 | import modal 30 | 31 | image = modal.Image.debian_slim().uv_pip_install("sentence-transformers<6") 32 | app_name = "example-gpu-snapshot" 33 | app = modal.App(app_name, image=image) 34 | 35 | snapshot_key = "v1" # change this to invalidate the snapshot cache 36 | 37 | with image.imports(): # import in the global scope so imports can be snapshot 38 | from sentence_transformers import SentenceTransformer 39 | 40 | 41 | @app.cls( 42 | gpu="a10", 43 | enable_memory_snapshot=True, 44 | experimental_options={"enable_gpu_snapshot": True}, 45 | ) 46 | class SnapshotEmbedder: 47 | @modal.enter(snap=True) 48 | def load(self): 49 | # during enter phase of container lifecycle, 50 | # load the model onto the GPU so it can be snapshot 51 | print("loading model") 52 | self.model = SentenceTransformer("BAAI/bge-small-en-v1.5", device="cuda") 53 | print(f"snapshotting {snapshot_key}") 54 | 55 | @modal.method() 56 | def run(self, sentences: list[str]) -> list[list[float]]: 57 | # later invocations of the Function will start here 58 | embeddings = self.model.encode(sentences, normalize_embeddings=True) 59 | return embeddings.tolist() 60 | 61 | 62 | if __name__ == "__main__": 63 | # after deployment, we can use the class from anywhere 64 | SnapshotEmbedder = modal.Cls.from_name(app_name, "SnapshotEmbedder") 65 | embedder = SnapshotEmbedder() 66 | try: 67 | print("calling Modal Function") 68 | print(embedder.run.remote(sentences=["what is the meaning of life?"])) 69 | except modal.exception.NotFoundError: 70 | raise Exception( 71 | f"To take advantage of GPU snapshots, deploy first with modal deploy {__file__}" 72 | ) 73 | -------------------------------------------------------------------------------- /06_gpu_and_ml/openai_whisper/pod_transcriber/app/frontend/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 9 | 10 | Modal Podcast Transcriber 11 | 12 | 13 | 14 | 17 | 24 | 25 | Fork on GitHub 26 | 27 | 33 | 38 | 39 | 66 | 67 |
68 | 69 | 70 | 71 | -------------------------------------------------------------------------------- /06_gpu_and_ml/openai_whisper/pod_transcriber/app/frontend/src/routes/podcast.tsx: -------------------------------------------------------------------------------- 1 | import useSWR from "swr"; 2 | import { useParams } from "react-router-dom"; 3 | import { Link } from "react-router-dom"; 4 | import HomeButton from "../components/HomeButton"; 5 | import Spinner from "../components/Spinner"; 6 | 7 | function Episode({ 8 | guidHash, 9 | title, 10 | transcribed, 11 | publishDate, 12 | podcastId, 13 | }: { 14 | guidHash: string; 15 | title: string; 16 | transcribed: boolean; 17 | publishDate: string; 18 | podcastId: string; 19 | }) { 20 | return ( 21 |
  • 25 | {transcribed ? "📃 " : " "} 26 | 30 | {title} 31 | {" "} 32 | | {publishDate} 33 |
  • 34 | ); 35 | } 36 | 37 | export default function Podcast() { 38 | let params = useParams(); 39 | 40 | async function fetchData() { 41 | const response = await fetch(`/api/podcast/${params.podcastId}`); 42 | const data = await response.json(); 43 | return data; 44 | } 45 | 46 | const { data } = useSWR(`/api/podcast/${params.podcastId}`, fetchData); 47 | 48 | if (!data) { 49 | return ( 50 |
    51 | 52 |
    53 | ); 54 | } 55 | 56 | return ( 57 |
    58 |
    59 | 60 |
    61 |
    62 |
    {data.pod_metadata.title}
    63 |
    64 | {data.pod_metadata.description} 65 |
    66 |
    67 |
    68 | 69 |
    70 |
      71 | {data.episodes.map((ep) => ( 72 | 80 | ))} 81 |
    82 |
    83 |
    84 |
    85 | ); 86 | } 87 | -------------------------------------------------------------------------------- /01_getting_started/inference_full.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # cmd: ["python", "01_getting_started/inference_full.py"] 3 | # deploy: true 4 | # mypy: ignore-errors 5 | # --- 6 | from pathlib import Path 7 | 8 | import modal 9 | 10 | app = modal.App("example-inference-full") 11 | image = ( 12 | modal.Image.debian_slim() 13 | .uv_pip_install("transformers[torch]") 14 | .uv_pip_install("fastapi") 15 | ) 16 | 17 | with image.imports(): 18 | from transformers import pipeline 19 | 20 | weights_cache = { 21 | "/root/.cache/huggingface": modal.Volume.from_name( 22 | "example-inference", create_if_missing=True 23 | ) 24 | } 25 | 26 | 27 | @app.cls(gpu="h100", image=image, volumes=weights_cache, enable_memory_snapshot=True) 28 | class Chat: 29 | @modal.enter() 30 | def init(self): 31 | self.chatbot = pipeline( 32 | model="Qwen/Qwen3-1.7B-FP8", device_map="cuda", max_new_tokens=1024 33 | ) 34 | 35 | @modal.fastapi_endpoint(docs=True) 36 | def web(self, prompt: str | None = None) -> list[dict]: 37 | result = self.run.local(prompt) 38 | return result 39 | 40 | @modal.method() 41 | def run(self, prompt: str | None = None) -> list[dict]: 42 | if prompt is None: 43 | prompt = f"/no_think Read this code.\n\n{Path(__file__).read_text()}\nIn one paragraph, what does the code do?" 44 | 45 | print(prompt) 46 | context = [{"role": "user", "content": prompt}] 47 | 48 | result = self.chatbot(context) 49 | print(result[0]["generated_text"][-1]["content"]) 50 | 51 | return result 52 | 53 | 54 | @app.local_entrypoint() 55 | def main(): 56 | import glob 57 | 58 | chat = Chat() 59 | root_dir, examples = Path(__file__).parent.parent, [] 60 | for path in glob.glob("**/*.py", root_dir=root_dir): 61 | examples.append( 62 | f"/no_think Read this code.\n\n{(root_dir / path).read_text()}\nIn one paragraph, what does the code do?" 63 | ) 64 | 65 | for result in chat.run.map(examples): 66 | print(result[0]["generated_text"][-1]["content"]) 67 | 68 | 69 | if __name__ == "__main__": 70 | import json 71 | import urllib.request 72 | from datetime import datetime 73 | 74 | ChatCls = modal.Cls.from_name(app.name, "Chat") 75 | chat = ChatCls() 76 | print(datetime.now(), "making .remote call to Chat.run") 77 | print(chat.run.remote()) 78 | print(datetime.now(), "making web request to", url := chat.web.get_web_url()) 79 | 80 | with urllib.request.urlopen(url) as response: 81 | print(datetime.now()) 82 | print(json.loads(response.read().decode("utf-8"))) 83 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | 4 | 5 | ## Type of Change 6 | 7 | 10 | 11 | - [ ] New example for the GitHub repo 12 | - [ ] New example for the documentation site (Linked from a discoverable page, e.g. via the sidebar in `/docs/examples`) 13 | - [ ] Example updates (Bug fixes, new features, etc.) 14 | - [ ] Other (Changes to the codebase, but not to examples) 15 | 16 | ## Monitoring Checklist 17 | 18 | 24 | 25 | - [ ] Example is configured for testing in the synthetic monitoring system, or `lambda-test: false` is provided in the example frontmatter and I have gotten approval from a maintainer 26 | - [ ] Example is tested by executing with `modal run`, or an alternative `cmd` is provided in the example frontmatter (e.g. `cmd: ["modal", "serve"]`) 27 | - [ ] Example is tested by running the `cmd` with no arguments, or the `args` are provided in the example frontmatter (e.g. `args: ["--prompt", "Formula for room temperature superconductor:"]` 28 | - [ ] Example does _not_ require third-party dependencies besides `fastapi` to be installed locally (e.g. does not import `requests` or `torch` in the global scope or other code executed locally) 29 | 30 | ## Documentation Site Checklist 31 | 32 | 36 | 37 | ### Content 38 | - [ ] Example is documented with comments throughout, in a [_Literate Programming_](https://en.wikipedia.org/wiki/Literate_programming) style 39 | - [ ] All media assets for the example that are rendered in the documentation site page are retrieved from `modal-cdn.com` 40 | 41 | ### Build Stability 42 | - [ ] Example pins all dependencies in container images 43 | - [ ] Example pins container images to a stable tag like `v1`, not a dynamic tag like `latest` 44 | - [ ] Example specifies a `python_version` for the base image, if it is used 45 | - [ ] Example pins all dependencies to at least [SemVer](https://semver.org/) minor version, `~=x.y.z` or `==x.y`, or we expect this example to work across major versions of the dependency and are committed to maintenance across those versions 46 | - [ ] Example dependencies with `version < 1` are pinned to patch version, `==0.y.z` 47 | 48 | ## Outside Contributors 49 | 50 | You're great! Thanks for your contribution. 51 | -------------------------------------------------------------------------------- /misc/lmdeploy_oai_compatible.py: -------------------------------------------------------------------------------- 1 | # # Deploy a model with `lmdeploy` 2 | # 3 | # This script is used to deploy a model using [lmdeploy](https://github.com/InternLM/lmdeploy) with OpenAI compatible API. 4 | 5 | import subprocess 6 | 7 | import modal 8 | from modal import App, Image, Secret, gpu 9 | 10 | ########## CONSTANTS ########## 11 | 12 | 13 | # define model for serving and path to store in modal container 14 | MODEL_NAME = "meta-llama/Llama-2-7b-hf" 15 | MODEL_DIR = f"/models/{MODEL_NAME}" 16 | SERVE_MODEL_NAME = "meta--llama-2-7b" 17 | HF_SECRET = Secret.from_name("huggingface-secret") 18 | SECONDS = 60 # for timeout 19 | 20 | 21 | ########## UTILS FUNCTIONS ########## 22 | 23 | 24 | def download_hf_model(model_dir: str, model_name: str): 25 | """Retrieve model from HuggingFace Hub and save into 26 | specified path within the modal container. 27 | 28 | Args: 29 | model_dir (str): Path to save model weights in container. 30 | model_name (str): HuggingFace Model ID. 31 | """ 32 | import os 33 | 34 | from huggingface_hub import snapshot_download # type: ignore 35 | from transformers.utils import move_cache # type: ignore 36 | 37 | os.makedirs(model_dir, exist_ok=True) 38 | 39 | snapshot_download( 40 | model_name, 41 | local_dir=model_dir, 42 | # consolidated.safetensors is prevent error here: https://github.com/vllm-project/vllm/pull/5005 43 | ignore_patterns=["*.pt", "*.bin", "consolidated.safetensors"], 44 | token=os.environ["HF_TOKEN"], 45 | ) 46 | move_cache() 47 | 48 | 49 | ########## IMAGE DEFINITION ########## 50 | 51 | # define image for modal environment 52 | lmdeploy_image = ( 53 | Image.from_registry( 54 | "openmmlab/lmdeploy:v0.4.2", 55 | ) 56 | .pip_install(["lmdeploy[all]", "huggingface_hub", "hf-transfer"]) 57 | .env({"HF_HUB_ENABLE_HF_TRANSFER": "1"}) 58 | .run_function( 59 | download_hf_model, 60 | timeout=60 * SECONDS, 61 | kwargs={"model_dir": MODEL_DIR, "model_name": MODEL_NAME}, 62 | secrets=[HF_SECRET], 63 | ) 64 | ) 65 | 66 | ########## APP SETUP ########## 67 | 68 | 69 | app = App(f"lmdeploy-{SERVE_MODEL_NAME}") 70 | 71 | NO_GPU = 1 72 | TOKEN = "secret12345" 73 | 74 | 75 | @app.function( 76 | image=lmdeploy_image, 77 | gpu=gpu.A10G(count=NO_GPU), 78 | scaledown_window=20 * SECONDS, 79 | ) 80 | @modal.concurrent(max_inputs=256) # https://modal.com/docs/guide/concurrent-inputs 81 | @modal.web_server(port=23333, startup_timeout=60 * SECONDS) 82 | def serve(): 83 | cmd = f""" 84 | lmdeploy serve api_server {MODEL_DIR} \ 85 | --model-name {SERVE_MODEL_NAME} \ 86 | --server-port 23333 \ 87 | --session-len 4092 88 | """ 89 | subprocess.Popen(cmd, shell=True) 90 | -------------------------------------------------------------------------------- /misc/tgi_oai_compatible.py: -------------------------------------------------------------------------------- 1 | # # Run TGI on Modal 2 | 3 | # This example shows how you can run LLMs with the [Text Generation Inference (TGI)](https://huggingface.co/docs/text-generation-inference/en/index) inference framework on Modal. 4 | 5 | import subprocess 6 | 7 | import modal 8 | from modal import App, Image, Secret, gpu 9 | 10 | # define model for serving and path to store in modal container 11 | MODEL_NAME = "meta-llama/Llama-2-7b-hf" 12 | MODEL_DIR = f"/models/{MODEL_NAME}" 13 | SERVE_MODEL_NAME = "meta--llama-2-7b" 14 | HF_SECRET = Secret.from_name("huggingface-secret") 15 | SECONDS = 60 # for timeout 16 | 17 | ########## UTILS FUNCTIONS ########## 18 | 19 | 20 | def download_hf_model(model_dir: str, model_name: str): 21 | """Retrieve model from HuggingFace Hub and save into 22 | specified path within the modal container. 23 | 24 | Args: 25 | model_dir (str): Path to save model weights in container. 26 | model_name (str): HuggingFace Model ID. 27 | """ 28 | import os 29 | 30 | from huggingface_hub import snapshot_download # type: ignore 31 | from transformers.utils import move_cache # type: ignore 32 | 33 | os.makedirs(model_dir, exist_ok=True) 34 | 35 | snapshot_download( 36 | model_name, 37 | local_dir=model_dir, 38 | # consolidated.safetensors is prevent error here: https://github.com/vllm-project/vllm/pull/5005 39 | ignore_patterns=["*.pt", "*.bin", "consolidated.safetensors"], 40 | token=os.environ["HF_TOKEN"], 41 | ) 42 | move_cache() 43 | 44 | 45 | ########## IMAGE DEFINITION ########## 46 | 47 | 48 | # define image for modal environment 49 | tgi_image = ( 50 | Image.from_registry( 51 | "ghcr.io/huggingface/text-generation-inference", add_python="3.10" 52 | ) 53 | .dockerfile_commands("ENTRYPOINT []") 54 | .pip_install(["huggingface_hub", "hf-transfer"]) 55 | .env({"HF_HUB_ENABLE_HF_TRANSFER": "1"}) 56 | .run_function( 57 | download_hf_model, 58 | timeout=20 * SECONDS, 59 | kwargs={"model_dir": MODEL_DIR, "model_name": MODEL_NAME}, 60 | secrets=[HF_SECRET], 61 | ) 62 | ) 63 | 64 | 65 | ########## APP SETUP ########## 66 | 67 | 68 | app = App(f"tgi-{SERVE_MODEL_NAME}") 69 | 70 | 71 | NO_GPU = 1 72 | TOKEN = "secret12345" 73 | 74 | 75 | @app.function( 76 | image=tgi_image, 77 | gpu=gpu.A10G(count=NO_GPU), 78 | scaledown_window=20 * SECONDS, 79 | ) 80 | @modal.concurrent(max_inputs=256) # https://modal.com/docs/guide/concurrent-inputs 81 | @modal.web_server(port=3000, startup_timeout=60 * SECONDS) 82 | def serve(): 83 | cmd = f""" 84 | text-generation-launcher --model-id {MODEL_DIR} \ 85 | --hostname 0.0.0.0 \ 86 | --port 3000 87 | """ 88 | subprocess.Popen(cmd, shell=True) 89 | -------------------------------------------------------------------------------- /13_sandboxes/codelangchain/README.md: -------------------------------------------------------------------------------- 1 | # Deploying code agents without all the agonizing pain 2 | 3 | This example deploys a "code agent": a language model that can write and execute 4 | code in a flexible control flow aimed at completing a task or goal. 5 | 6 | It is implemented in LangChain, using the LangGraph library to structure the 7 | agent and the LangServe framework to turn it into a FastAPI app. 8 | 9 | We use Modal to turn that app into a web endpoint. We also use Modal to 10 | "sandbox" the agent's code execution, so that it can't accidentally (or when 11 | prompt injected!) damage the application by executing some inadvisable code. 12 | 13 | Modal's Charles Frye and LangChain's Lance Martin did a 14 | [walkthrough webinar](https://www.youtube.com/watch?v=X3yzWtAkaeo) explaining 15 | the project's context and implementation. Check it out if you're curious! 16 | 17 | ## How to run 18 | 19 | To run this app, you need to `pip install modal` and then create the following 20 | [secrets](https://modal.com/docs/guide/secrets): 21 | 22 | - `openai-secret` with an OpenAI API key, so that we can query OpenAI's models 23 | to power the agent, 24 | - and `langsmith-secret` with a LangSmith API key, so that we can monitor the 25 | agent's behavior with LangSmith. 26 | 27 | Head to the [secret creation dashboard](https://modal.com/secrets/) and follow 28 | the instructions for each secret type. 29 | 30 | Then, you can deploy the app with: 31 | 32 | ```bash 33 | modal deploy codelangchain.py 34 | ``` 35 | 36 | Navigate to the URL that appears in the output and you'll be dropped into an 37 | interactive "playground" interface where you can send queries to the agent and 38 | receive responses. You should expect it to take about a minute to respond. 39 | 40 | You can also navigate to the `/docs` path to see OpenAPI/Swagger docs, for 41 | everything you'd need to see how to incorporate the agent into your downstream 42 | applications via API requests. 43 | 44 | When developing the app, use `modal serve codelangchain.py` to get a 45 | hot-reloading server. 46 | 47 | ## Repo structure 48 | 49 | The web application is defined in `codelangchain.py`. 50 | 51 | It wraps the `agent.py` module, which contains the LangChain agent's definition. 52 | To test the agent in isolation, run `modal run agent.py` in the terminal and 53 | provide a `--question` about Python programming as input. 54 | 55 | Because the agent is a graph, it is defined by specifying nodes and edges, which 56 | are found in `nodes.py` and `edges.py`, respectively. 57 | 58 | The retrieval logic is very simple: all of the data from the relevant docs is 59 | retrieved and put at the beginning of the language model's prompt. You can find 60 | it in `retrieval.py`. 61 | 62 | The definition of the Modal container images and a few other shared utilities 63 | can be found in `common.py`. 64 | -------------------------------------------------------------------------------- /10_integrations/streamlit/serve_streamlit.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # deploy: true 3 | # cmd: ["modal", "serve", "10_integrations/streamlit/serve_streamlit.py"] 4 | # --- 5 | 6 | # # Run and share Streamlit apps 7 | 8 | # This example shows you how to run a Streamlit app with `modal serve`, and then deploy it as a serverless web app. 9 | 10 | # ![example streamlit app](./streamlit.png) 11 | 12 | # This example is structured as two files: 13 | 14 | # 1. This module, which defines the Modal objects (name the script `serve_streamlit.py` locally). 15 | 16 | # 2. `app.py`, which is any Streamlit script to be mounted into the Modal 17 | # function ([download script](https://github.com/modal-labs/modal-examples/blob/main/10_integrations/streamlit/app.py)). 18 | 19 | import shlex 20 | import subprocess 21 | from pathlib import Path 22 | 23 | import modal 24 | 25 | # ## Define container dependencies 26 | 27 | # The `app.py` script imports three third-party packages, so we include these in the example's 28 | # image definition and then add the `app.py` file itself to the image. 29 | 30 | streamlit_script_local_path = Path(__file__).parent / "app.py" 31 | streamlit_script_remote_path = "/root/app.py" 32 | 33 | image = ( 34 | modal.Image.debian_slim(python_version="3.11") 35 | .uv_pip_install("streamlit~=1.35.0", "numpy~=1.26.4", "pandas~=2.2.2") 36 | .add_local_file( 37 | streamlit_script_local_path, 38 | streamlit_script_remote_path, 39 | ) 40 | ) 41 | 42 | app = modal.App(name="example-serve-streamlit", image=image) 43 | 44 | if not streamlit_script_local_path.exists(): 45 | raise RuntimeError( 46 | "app.py not found! Place the script with your streamlit app in the same directory." 47 | ) 48 | 49 | # ## Spawning the Streamlit server 50 | 51 | # Inside the container, we will run the Streamlit server in a background subprocess using 52 | # `subprocess.Popen`. We also expose port 8000 using the `@web_server` decorator. 53 | 54 | 55 | @app.function() 56 | @modal.concurrent(max_inputs=100) 57 | @modal.web_server(8000) 58 | def run(): 59 | target = shlex.quote(streamlit_script_remote_path) 60 | cmd = f"streamlit run {target} --server.port 8000 --server.enableCORS=false --server.enableXsrfProtection=false" 61 | subprocess.Popen(cmd, shell=True) 62 | 63 | 64 | # ## Iterate and Deploy 65 | 66 | # While you're iterating on your screamlit app, you can run it "ephemerally" with `modal serve`. This will 67 | # run a local process that watches your files and updates the app if anything changes. 68 | 69 | # ```shell 70 | # modal serve serve_streamlit.py 71 | # ``` 72 | 73 | # Once you're happy with your changes, you can deploy your application with 74 | 75 | # ```shell 76 | # modal deploy serve_streamlit.py 77 | # ``` 78 | 79 | # If successful, this will print a URL for your app that you can navigate to from 80 | # your browser 🎉 . 81 | -------------------------------------------------------------------------------- /13_sandboxes/codelangchain/src/edges.py: -------------------------------------------------------------------------------- 1 | """Defines functions that transition our agent from one state to another.""" 2 | 3 | from typing import Callable 4 | 5 | from .common import GraphState 6 | 7 | EXPECTED_NODES = [ 8 | "generate", 9 | "check_code_imports", 10 | "check_code_execution", 11 | "finish", 12 | ] 13 | 14 | 15 | def enrich(graph): 16 | """Adds transition edges to the graph.""" 17 | 18 | for node_name in set(EXPECTED_NODES): 19 | assert node_name in graph.nodes, f"Node {node_name} not found in graph" 20 | 21 | graph.add_edge("generate", "check_code_imports") 22 | graph.add_conditional_edges( 23 | "check_code_imports", 24 | EDGE_MAP["decide_to_check_code_exec"], 25 | { 26 | "check_code_execution": "check_code_execution", 27 | "generate": "generate", 28 | }, 29 | ) 30 | graph.add_edge("check_code_execution", "evaluate_execution") 31 | graph.add_conditional_edges( 32 | "evaluate_execution", 33 | EDGE_MAP["decide_to_finish"], 34 | { 35 | "finish": "finish", 36 | "generate": "generate", 37 | }, 38 | ) 39 | return graph 40 | 41 | 42 | def decide_to_check_code_exec(state: GraphState) -> str: 43 | """ 44 | Determines whether to test code execution, or re-try answer generation. 45 | 46 | Args: 47 | state (dict): The current graph state 48 | 49 | Returns: 50 | str: Next node to call 51 | """ 52 | 53 | print("---DECIDE TO TEST CODE EXECUTION---") 54 | state_dict = state["keys"] 55 | error = state_dict["error"] 56 | 57 | if error == "None": 58 | # All documents have been filtered check_relevance 59 | # We will re-generate a new query 60 | print("---DECISION: TEST CODE EXECUTION---") 61 | return "check_code_execution" 62 | else: 63 | # We have relevant documents, so generate answer 64 | print("---DECISION: RE-TRY SOLUTION---") 65 | return "generate" 66 | 67 | 68 | def decide_to_finish(state: GraphState) -> str: 69 | """ 70 | Determines whether to finish (re-try code 3 times). 71 | 72 | Args: 73 | state (dict): The current graph state 74 | 75 | Returns: 76 | str: Next node to call 77 | """ 78 | 79 | print("---DECIDE TO FINISH---") 80 | state_dict = state["keys"] 81 | evaluation = state_dict["evaluation"] 82 | iter = state_dict["iterations"] 83 | 84 | if evaluation.decision == "finish" or iter >= 3: 85 | print("---DECISION: FINISH---") 86 | return "finish" 87 | else: 88 | print("---DECISION: RE-TRY SOLUTION---") 89 | return "generate" 90 | 91 | 92 | EDGE_MAP: dict[str, Callable] = { 93 | "decide_to_check_code_exec": decide_to_check_code_exec, 94 | "decide_to_finish": decide_to_finish, 95 | } 96 | -------------------------------------------------------------------------------- /07_web_endpoints/streaming.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # cmd: ["modal", "serve", "07_web_endpoints/streaming.py"] 3 | # --- 4 | 5 | # # Deploy a FastAPI app with streaming responses 6 | 7 | # This example shows how you can deploy a [FastAPI](https://fastapi.tiangolo.com/) app with Modal that streams results back to the client. 8 | 9 | import asyncio 10 | import time 11 | 12 | import modal 13 | from fastapi import FastAPI 14 | from fastapi.responses import StreamingResponse 15 | 16 | image = modal.Image.debian_slim().uv_pip_install("fastapi[standard]") 17 | app = modal.App("example-streaming", image=image) 18 | 19 | web_app = FastAPI() 20 | 21 | # This asynchronous generator function simulates 22 | # progressively returning data to the client. The `asyncio.sleep` 23 | # is not necessary, but makes it easier to see the iterative behavior 24 | # of the response. 25 | 26 | 27 | async def fake_video_streamer(): 28 | for i in range(10): 29 | yield f"frame {i}: hello world!".encode() 30 | await asyncio.sleep(1.0) 31 | 32 | 33 | # ASGI app with streaming handler. 34 | 35 | # This `fastapi_app` also uses the fake video streamer async generator, 36 | # passing it directly into `StreamingResponse`. 37 | 38 | 39 | @web_app.get("/") 40 | async def main(): 41 | return StreamingResponse(fake_video_streamer(), media_type="text/event-stream") 42 | 43 | 44 | @app.function() 45 | @modal.asgi_app() 46 | def fastapi_app(): 47 | return web_app 48 | 49 | 50 | # This `hook` web endpoint Modal function calls *another* Modal function, 51 | # and it just works! 52 | 53 | 54 | @app.function() 55 | def sync_fake_video_streamer(): 56 | for i in range(10): 57 | yield f"frame {i}: some data\n".encode() 58 | time.sleep(1) 59 | 60 | 61 | @app.function() 62 | @modal.fastapi_endpoint() 63 | def hook(): 64 | return StreamingResponse( 65 | sync_fake_video_streamer.remote_gen(), media_type="text/event-stream" 66 | ) 67 | 68 | 69 | # This `mapped` web endpoint Modal function does a parallel `.map` on a simple 70 | # Modal function. Using `.starmap` also would work in the same fashion. 71 | 72 | 73 | @app.function() 74 | def map_me(i): 75 | time.sleep(i) # stagger the results for demo purposes 76 | return f"hello from {i}\n" 77 | 78 | 79 | @app.function() 80 | @modal.fastapi_endpoint() 81 | def mapped(): 82 | return StreamingResponse(map_me.map(range(10)), media_type="text/event-stream") 83 | 84 | 85 | # To try for yourself, run 86 | 87 | # ```shell 88 | # modal serve streaming.py 89 | # ``` 90 | 91 | # and then send requests to the URLs that appear in the terminal output. 92 | 93 | # Make sure that your client is not buffering the server response 94 | # until it gets newline (\n) characters. By default browsers and `curl` are buffering, 95 | # though modern browsers should respect the "text/event-stream" content type header being set. 96 | -------------------------------------------------------------------------------- /06_gpu_and_ml/llm-serving/openai_compatible/load_test.py: -------------------------------------------------------------------------------- 1 | import os 2 | from datetime import datetime, timezone 3 | from pathlib import Path, PosixPath 4 | 5 | import modal 6 | 7 | if modal.is_local(): 8 | workspace = modal.config._profile 9 | environment = modal.config.config.get("environment") or "" 10 | else: 11 | workspace = os.environ["MODAL_WORKSPACE"] 12 | environment = os.environ["MODAL_ENVIRONMENT"] 13 | 14 | 15 | image = ( 16 | modal.Image.debian_slim(python_version="3.11") 17 | .uv_pip_install("locust~=2.36.2", "openai~=1.37.1") 18 | .env({"MODAL_WORKSPACE": workspace, "MODAL_ENVIRONMENT": environment}) 19 | .add_local_file( 20 | Path(__file__).parent / "locustfile.py", 21 | remote_path="/root/locustfile.py", 22 | ) 23 | ) 24 | 25 | volume = modal.Volume.from_name("loadtest-vllm-oai-results", create_if_missing=True) 26 | remote_path = Path("/root") / "loadtests" 27 | OUT_DIRECTORY = ( 28 | remote_path / datetime.now(timezone.utc).replace(microsecond=0).isoformat() 29 | ) 30 | 31 | app = modal.App("loadtest-vllm-oai", image=image, volumes={remote_path: volume}) 32 | 33 | workers = 8 34 | 35 | prefix = workspace + (f"-{environment}" if environment else "") 36 | host = f"https://{prefix}--example-vllm-inference-serve.modal.run" 37 | 38 | csv_file = OUT_DIRECTORY / "stats.csv" 39 | default_args = [ 40 | "-H", 41 | host, 42 | "--processes", 43 | str(workers), 44 | "--csv", 45 | str(csv_file), 46 | ] 47 | 48 | MINUTES = 60 # seconds 49 | 50 | 51 | @app.function(cpu=workers) 52 | @modal.concurrent(max_inputs=100) 53 | @modal.web_server(port=8089) 54 | def serve(): 55 | run_locust.local(default_args) 56 | 57 | 58 | @app.function(cpu=workers, timeout=60 * MINUTES) 59 | def run_locust(args: list, wait=False): 60 | import subprocess 61 | 62 | process = subprocess.Popen(["locust"] + args) 63 | if wait: 64 | process.wait() 65 | return process.returncode 66 | 67 | 68 | @app.local_entrypoint() 69 | def main( 70 | r: float = 1.0, 71 | u: int = 36, 72 | t: str = "1m", # no more than the timeout of run_locust, one hour 73 | ): 74 | args = default_args + [ 75 | "--spawn-rate", 76 | str(r), 77 | "--users", 78 | str(u), 79 | "--run-time", 80 | t, 81 | ] 82 | 83 | html_report_file = str(PosixPath(OUT_DIRECTORY / "report.html")) 84 | args += [ 85 | "--headless", # run without browser UI 86 | "--autostart", # start test immediately 87 | "--autoquit", # stop once finished... 88 | "10", # ...but wait ten seconds 89 | "--html", # output an HTML-formatted report 90 | html_report_file, # to this location 91 | ] 92 | 93 | if exit_code := run_locust.remote(args, wait=True): 94 | SystemExit(exit_code) 95 | else: 96 | print("finished successfully") 97 | -------------------------------------------------------------------------------- /06_gpu_and_ml/speech-to-text/streaming-diarization-frontend/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Audio Transcription 7 | 37 | 38 | 39 |

    Streaming Speaker Diarization with nvidia/diar_streaming_sortformer_4spk-v2_1

    40 |

    Tip: Turn your microphone volume up for better transcription quality.

    41 |
    42 | 43 |
    44 |
    45 |
    46 |
    Speaker 0
    47 |
    48 |
    0.00
    49 |
    50 |
    51 |
    Speaker 1
    52 |
    53 |
    0.00
    54 |
    55 |
    56 |
    Speaker 2
    57 |
    58 |
    0.00
    59 |
    60 |
    61 |
    Speaker 3
    62 |
    63 |
    0.00
    64 |
    65 |
    66 | 67 | 68 | 69 | 70 | -------------------------------------------------------------------------------- /07_web_endpoints/fasthtml-checkboxes/cbx_load_test.py: -------------------------------------------------------------------------------- 1 | import os 2 | from datetime import datetime 3 | from pathlib import Path 4 | 5 | import modal 6 | 7 | if modal.is_local(): 8 | workspace = modal.config._profile or "" 9 | environment = modal.config.config["environment"] or "" 10 | else: 11 | workspace = os.environ["MODAL_WORKSPACE"] or "" 12 | environment = os.environ["MODAL_ENVIRONMENT"] or "" 13 | 14 | 15 | image = ( 16 | modal.Image.debian_slim(python_version="3.12") 17 | .uv_pip_install("locust~=2.29.1", "beautifulsoup4~=4.12.3", "lxml~=5.3.0") 18 | .env({"MODAL_WORKSPACE": workspace, "MODAL_ENVIRONMENT": environment}) 19 | .add_local_file( 20 | Path(__file__).parent / "cbx_locustfile.py", 21 | remote_path="/root/locustfile.py", 22 | ) 23 | .add_local_file( 24 | Path(__file__).parent / "constants.py", 25 | remote_path="/root/constants.py", 26 | ) 27 | ) 28 | volume = modal.Volume.from_name("example-cbx-load-test-results", create_if_missing=True) 29 | remote_path = Path("/root") / "loadtests" 30 | OUT_DIRECTORY = remote_path / datetime.utcnow().replace(microsecond=0).isoformat() 31 | 32 | app = modal.App("example-cbx-load-test", image=image, volumes={remote_path: volume}) 33 | 34 | workers = 8 35 | host = f"https://{workspace}{'-' + environment if environment else ''}--example-fasthtml-checkboxes-web.modal.run" 36 | csv_file = OUT_DIRECTORY / "stats.csv" 37 | default_args = [ 38 | "-H", 39 | host, 40 | "--processes", 41 | str(workers), 42 | "--csv", 43 | csv_file, 44 | ] 45 | 46 | MINUTES = 60 # seconds 47 | 48 | 49 | @app.function(cpu=workers) 50 | @modal.concurrent(max_inputs=100) 51 | @modal.web_server(port=8089) 52 | def serve(): 53 | run_locust.local(default_args) 54 | 55 | 56 | @app.function(cpu=workers, timeout=60 * MINUTES) 57 | def run_locust(args: list, wait=False): 58 | import subprocess 59 | 60 | process = subprocess.Popen(["locust"] + args) 61 | if wait: 62 | process.wait() 63 | return process.returncode 64 | 65 | 66 | @app.local_entrypoint() 67 | def main( 68 | r: float = 1.0, 69 | u: int = 36, 70 | t: str = "1m", # no more than the timeout of run_locust, one hour 71 | ): 72 | args = default_args + [ 73 | "--spawn-rate", 74 | str(r), 75 | "--users", 76 | str(u), 77 | "--run-time", 78 | t, 79 | ] 80 | 81 | html_report_file = OUT_DIRECTORY / "report.html" 82 | args += [ 83 | "--headless", # run without browser UI 84 | "--autostart", # start test immediately 85 | "--autoquit", # stop once finished... 86 | "10", # ...but wait ten seconds 87 | "--html", # output an HTML-formatted report 88 | html_report_file, # to this location 89 | ] 90 | 91 | if exit_code := run_locust.remote(args, wait=True): 92 | SystemExit(exit_code) 93 | else: 94 | print("finished successfully") 95 | -------------------------------------------------------------------------------- /10_integrations/tailscale/modal_tailscale.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # lambda-test: false # missing-secret 3 | # --- 4 | 5 | # # Add Modal Apps to Tailscale 6 | 7 | # This example demonstrates how to integrate Modal with Tailscale (https://tailscale.com). 8 | # It outlines the steps to configure Modal containers so that they join the Tailscale network. 9 | 10 | # We use a custom entrypoint to automatically add containers to a Tailscale network (tailnet). 11 | # This configuration enables the containers to interact with one another and with 12 | # additional applications within the same tailnet. 13 | 14 | 15 | import modal 16 | 17 | # Install Tailscale and copy custom entrypoint script ([entrypoint.sh](https://github.com/modal-labs/modal-examples/blob/main/10_integrations/tailscale/entrypoint.sh)). The script must be 18 | # executable. 19 | image = ( 20 | modal.Image.debian_slim(python_version="3.11") 21 | .apt_install("curl") 22 | .run_commands("curl -fsSL https://tailscale.com/install.sh | sh") 23 | .uv_pip_install("requests==2.32.3", "PySocks==1.7.1") 24 | .add_local_file("./entrypoint.sh", "/root/entrypoint.sh", copy=True) 25 | .run_commands("chmod a+x /root/entrypoint.sh") 26 | .entrypoint(["/root/entrypoint.sh"]) 27 | ) 28 | app = modal.App("example-modal-tailscale", image=image) 29 | 30 | # Packages might not be installed locally. This catches import errors and 31 | # only attempts imports in the container. 32 | with image.imports(): 33 | import socket 34 | 35 | import socks 36 | 37 | # Configure Python to use the SOCKS5 proxy globally. 38 | if not modal.is_local(): 39 | socks.set_default_proxy(socks.SOCKS5, "0.0.0.0", 1080) 40 | socket.socket = socks.socksocket 41 | 42 | 43 | # Run your function adding a Tailscale secret. We suggest creating a [reusable and ephemeral key](https://tailscale.com/kb/1111/ephemeral-nodes). 44 | @app.function( 45 | secrets=[ 46 | modal.Secret.from_name("tailscale-auth", required_keys=["TAILSCALE_AUTHKEY"]), 47 | modal.Secret.from_dict( 48 | { 49 | "ALL_PROXY": "socks5://localhost:1080/", 50 | "HTTP_PROXY": "http://localhost:1080/", 51 | "http_proxy": "http://localhost:1080/", 52 | } 53 | ), 54 | ], 55 | ) 56 | def connect_to_machine(): 57 | import requests 58 | 59 | # Connect to other machines in your tailnet. 60 | resp = requests.get("http://my-tailscale-machine:5000") 61 | print(resp.content) 62 | 63 | 64 | # Run this script with `modal run modal_tailscale.py`. You will see Tailscale logs 65 | # when the container start indicating that you were able to login successfully and 66 | # that the proxies (SOCKS5 and HTTP) have created been successfully. You will also 67 | # be able to see Modal containers in your Tailscale dashboard in the "Machines" tab. 68 | # Every new container launched will show up as a new "machine". Containers are 69 | # individually addressable using their Tailscale name or IP address. 70 | -------------------------------------------------------------------------------- /07_web_endpoints/badges.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # cmd: ["modal", "serve", "07_web_endpoints/badges.py"] 3 | # --- 4 | 5 | # # Serve a dynamic SVG badge 6 | 7 | # In this example, we use Modal's [webhook](https://modal.com/docs/guide/webhooks) capability to host a dynamic SVG badge that shows 8 | # you the current number of downloads for a Python package. 9 | 10 | # First let's start off by creating a Modal app, and defining an image with the Python packages we're going to be using: 11 | 12 | import modal 13 | 14 | image = modal.Image.debian_slim().uv_pip_install( 15 | "fastapi[standard]", "pybadges", "pypistats" 16 | ) 17 | 18 | app = modal.App("example-badges", image=image) 19 | 20 | # ## Defining the web endpoint 21 | 22 | # In addition to using `@app.function()` to decorate our function, we use the 23 | # [`@modal.fastapi_endpoint` decorator](https://modal.com/docs/guide/webhooks) 24 | # which instructs Modal to create a REST endpoint that serves this function. 25 | # Note that the default method is `GET`, but this can be overridden using the `method` argument. 26 | 27 | 28 | @app.function() 29 | @modal.fastapi_endpoint() 30 | async def package_downloads(package_name: str): 31 | import json 32 | 33 | import pypistats 34 | from fastapi import Response 35 | from pybadges import badge 36 | 37 | stats = json.loads(pypistats.recent(package_name, format="json")) 38 | svg = badge( 39 | left_text=f"{package_name} downloads", 40 | right_text=str(stats["data"]["last_month"]), 41 | right_color="blue", 42 | ) 43 | 44 | return Response(content=svg, media_type="image/svg+xml") 45 | 46 | 47 | # In this function, we use `pypistats` to query the most recent stats for our package, and then 48 | # use that as the text for a SVG badge, rendered using `pybadges`. 49 | # Since Modal web endpoints are FastAPI functions under the hood, we return this SVG wrapped in a FastAPI response with the correct media type. 50 | # Also note that FastAPI automatically interprets `package_name` as a [query param](https://fastapi.tiangolo.com/tutorial/query-params/). 51 | 52 | # ## Running and deploying 53 | 54 | # We can now run an ephemeral app on the command line using: 55 | 56 | # ```shell 57 | # modal serve badges.py 58 | # ``` 59 | 60 | # This will create a short-lived web url that exists until you terminate the script. 61 | # It will also hot-reload the code if you make changes to it. 62 | 63 | # If you want to create a persistent URL, you have to deploy the script. 64 | # To deploy using the Modal CLI by running `modal deploy badges.py`, 65 | 66 | # Either way, as soon as we run this command, Modal gives us the link to our brand new 67 | # web endpoint in the output: 68 | 69 | # ![web badge deployment](./badges_deploy.png) 70 | 71 | # We can now visit the link using a web browser, using a `package_name` of our choice in the URL query params. 72 | # For example: 73 | # - `https://YOUR_SUBDOMAIN.modal.run/?package_name=synchronicity` 74 | # - `https://YOUR_SUBDOMAIN.modal.run/?package_name=torch` 75 | -------------------------------------------------------------------------------- /13_sandboxes/codelangchain/langserve.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # pytest: false 3 | # cmd: ["modal", "serve", "-m", "13_sandboxes.codelangchain.langserve"] 4 | # --- 5 | 6 | # # Deploy LangChain and LangGraph applications with LangServe 7 | 8 | # This code demonstrates how to deploy a 9 | # [LangServe](https://python.langchain.com/docs/langserve/) application on Modal. 10 | # LangServe makes it easy to wrap LangChain and LangGraph applications in a FastAPI server, 11 | # and Modal makes it easy to deploy FastAPI servers. 12 | 13 | # The LangGraph application that it serves is from our [sandboxed LLM coding agent example](https://modal.com/docs/examples/agent). 14 | 15 | # You can find the code for the agent and several other code files associated with this example in the 16 | # [`codelangchain` directory of our examples repo](https://github.com/modal-labs/modal-examples/tree/main/13_sandboxes/codelangchain). 17 | 18 | import modal 19 | 20 | from .agent import construct_graph, create_sandbox 21 | from .src.common import image 22 | 23 | app = modal.App("example-codelangchain-langserve") 24 | 25 | image = image.uv_pip_install("langserve[all]==0.3.0") 26 | 27 | 28 | @app.function( 29 | image=image, 30 | secrets=[ # see the agent.py file for more information on Secrets 31 | modal.Secret.from_name("openai-secret", required_keys=["OPENAI_API_KEY"]), 32 | modal.Secret.from_name("langsmith-secret", required_keys=["LANGCHAIN_API_KEY"]), 33 | ], 34 | ) 35 | @modal.asgi_app() 36 | def serve(): 37 | from fastapi import FastAPI, responses 38 | from fastapi.middleware.cors import CORSMiddleware 39 | from langchain_core.runnables import RunnableLambda 40 | from langserve import add_routes 41 | 42 | # create a FastAPI app 43 | web_app = FastAPI( 44 | title="CodeLangChain Server", 45 | version="1.0", 46 | description="Writes code and checks if it runs.", 47 | ) 48 | 49 | # set all CORS enabled origins 50 | web_app.add_middleware( 51 | CORSMiddleware, 52 | allow_origins=["*"], 53 | allow_credentials=True, 54 | allow_methods=["*"], 55 | allow_headers=["*"], 56 | expose_headers=["*"], 57 | ) 58 | 59 | def inp(question: str) -> dict: 60 | return {"keys": {"question": question, "iterations": 0}} 61 | 62 | def out(state: dict) -> str: 63 | if "finish" in state: 64 | return state["finish"]["keys"]["response"] 65 | elif len(state) > 0 and "finish" in state[-1]: 66 | return state[-1]["finish"]["keys"]["response"] 67 | else: 68 | return str(state) 69 | 70 | graph = construct_graph(create_sandbox(app), debug=False).compile() 71 | 72 | chain = RunnableLambda(inp) | graph | RunnableLambda(out) 73 | 74 | add_routes( 75 | web_app, 76 | chain, 77 | path="/codelangchain", 78 | ) 79 | 80 | # redirect the root to the interactive playground 81 | @web_app.get("/") 82 | def redirect(): 83 | return responses.RedirectResponse(url="/codelangchain/playground") 84 | 85 | # return the FastAPI app and Modal will deploy it for us 86 | return web_app 87 | -------------------------------------------------------------------------------- /10_integrations/dbt/sample_proj_duckdb_s3/seeds/raw_payments.csv: -------------------------------------------------------------------------------- 1 | id,order_id,payment_method,amount 2 | 1,1,credit_card,1000 3 | 2,2,credit_card,2000 4 | 3,3,coupon,100 5 | 4,4,coupon,2500 6 | 5,5,bank_transfer,1700 7 | 6,6,credit_card,600 8 | 7,7,credit_card,1600 9 | 8,8,credit_card,2300 10 | 9,9,gift_card,2300 11 | 10,9,bank_transfer,0 12 | 11,10,bank_transfer,2600 13 | 12,11,credit_card,2700 14 | 13,12,credit_card,100 15 | 14,13,credit_card,500 16 | 15,13,bank_transfer,1400 17 | 16,14,bank_transfer,300 18 | 17,15,coupon,2200 19 | 18,16,credit_card,1000 20 | 19,17,bank_transfer,200 21 | 20,18,credit_card,500 22 | 21,18,credit_card,800 23 | 22,19,gift_card,600 24 | 23,20,bank_transfer,1500 25 | 24,21,credit_card,1200 26 | 25,22,bank_transfer,800 27 | 26,23,gift_card,2300 28 | 27,24,coupon,2600 29 | 28,25,bank_transfer,2000 30 | 29,25,credit_card,2200 31 | 30,25,coupon,1600 32 | 31,26,credit_card,3000 33 | 32,27,credit_card,2300 34 | 33,28,bank_transfer,1900 35 | 34,29,bank_transfer,1200 36 | 35,30,credit_card,1300 37 | 36,31,credit_card,1200 38 | 37,32,credit_card,300 39 | 38,33,credit_card,2200 40 | 39,34,bank_transfer,1500 41 | 40,35,credit_card,2900 42 | 41,36,bank_transfer,900 43 | 42,37,credit_card,2300 44 | 43,38,credit_card,1500 45 | 44,39,bank_transfer,800 46 | 45,40,credit_card,1400 47 | 46,41,credit_card,1700 48 | 47,42,coupon,1700 49 | 48,43,gift_card,1800 50 | 49,44,gift_card,1100 51 | 50,45,bank_transfer,500 52 | 51,46,bank_transfer,800 53 | 52,47,credit_card,2200 54 | 53,48,bank_transfer,300 55 | 54,49,credit_card,600 56 | 55,49,credit_card,900 57 | 56,50,credit_card,2600 58 | 57,51,credit_card,2900 59 | 58,51,credit_card,100 60 | 59,52,bank_transfer,1500 61 | 60,53,credit_card,300 62 | 61,54,credit_card,1800 63 | 62,54,bank_transfer,1100 64 | 63,55,credit_card,2900 65 | 64,56,credit_card,400 66 | 65,57,bank_transfer,200 67 | 66,58,coupon,1800 68 | 67,58,gift_card,600 69 | 68,59,gift_card,2800 70 | 69,60,credit_card,400 71 | 70,61,bank_transfer,1600 72 | 71,62,gift_card,1400 73 | 72,63,credit_card,2900 74 | 73,64,bank_transfer,2600 75 | 74,65,credit_card,0 76 | 75,66,credit_card,2800 77 | 76,67,bank_transfer,400 78 | 77,67,credit_card,1900 79 | 78,68,credit_card,1600 80 | 79,69,credit_card,1900 81 | 80,70,credit_card,2600 82 | 81,71,credit_card,500 83 | 82,72,credit_card,2900 84 | 83,73,bank_transfer,300 85 | 84,74,credit_card,3000 86 | 85,75,credit_card,1900 87 | 86,76,coupon,200 88 | 87,77,credit_card,0 89 | 88,77,bank_transfer,1900 90 | 89,78,bank_transfer,2600 91 | 90,79,credit_card,1800 92 | 91,79,credit_card,900 93 | 92,80,gift_card,300 94 | 93,81,coupon,200 95 | 94,82,credit_card,800 96 | 95,83,credit_card,100 97 | 96,84,bank_transfer,2500 98 | 97,85,bank_transfer,1700 99 | 98,86,coupon,2300 100 | 99,87,gift_card,3000 101 | 100,87,credit_card,2600 102 | 101,88,credit_card,2900 103 | 102,89,bank_transfer,2200 104 | 103,90,bank_transfer,200 105 | 104,91,credit_card,1900 106 | 105,92,bank_transfer,1500 107 | 106,92,coupon,200 108 | 107,93,gift_card,2600 109 | 108,94,coupon,700 110 | 109,95,coupon,2400 111 | 110,96,gift_card,1700 112 | 111,97,bank_transfer,1400 113 | 112,98,bank_transfer,1000 114 | 113,99,credit_card,2400 115 | -------------------------------------------------------------------------------- /03_scaling_out/dynamic_batching.py: -------------------------------------------------------------------------------- 1 | # # Dynamic batching for ASCII and character conversion 2 | 3 | # This example demonstrates how to dynamically batch a simple 4 | # application that converts ASCII codes to characters and vice versa. 5 | 6 | # For more details about using dynamic batching and optimizing 7 | # the batching configurations for your application, see 8 | # the [dynamic batching guide](https://modal.com/docs/guide/dynamic-batching). 9 | 10 | # ## Setup 11 | 12 | # Let's start by defining the image for the application. 13 | 14 | import modal 15 | 16 | app = modal.App( 17 | "example-dynamic-batching", 18 | image=modal.Image.debian_slim(python_version="3.11"), 19 | ) 20 | 21 | 22 | # ## Defining a Batched Function 23 | 24 | # Now, let's define a function that converts ASCII codes to characters. This 25 | # async Batched Function allows us to convert up to four ASCII codes at once. 26 | 27 | 28 | @app.function() 29 | @modal.batched(max_batch_size=4, wait_ms=1000) 30 | async def asciis_to_chars(asciis: list[int]) -> list[str]: 31 | return [chr(ascii) for ascii in asciis] 32 | 33 | 34 | # If there are fewer than four ASCII codes in the batch, the Function will wait 35 | # for one second, as specified by `wait_ms`, to allow more inputs to arrive before 36 | # returning the result. 37 | 38 | # The input `asciis` to the Function is a list of integers, and the 39 | # output is a list of strings. To allow batching, the input list `asciis` 40 | # and the output list must have the same length. 41 | 42 | # You must invoke the Function with an individual ASCII input, and a single 43 | # character will be returned in response. 44 | 45 | # ## Defining a class with a Batched Method 46 | 47 | # Next, let's define a class that converts characters to ASCII codes. This 48 | # class has an async Batched Method `chars_to_asciis` that converts characters 49 | # to ASCII codes. 50 | 51 | # Note that if a class has a Batched Method, it cannot have other Batched Methods 52 | # or Methods. 53 | 54 | 55 | @app.cls() 56 | class AsciiConverter: 57 | @modal.batched(max_batch_size=4, wait_ms=1000) 58 | async def chars_to_asciis(self, chars: list[str]) -> list[int]: 59 | asciis = [ord(char) for char in chars] 60 | return asciis 61 | 62 | 63 | # ## ASCII and character conversion 64 | 65 | # Finally, let's define the `local_entrypoint` that uses the Batched Function 66 | # and Class Method to convert ASCII codes to characters and 67 | # vice versa. 68 | 69 | # We use [`map.aio`](https://modal.com/docs/reference/modal.Function#map) to asynchronously map 70 | # over the ASCII codes and characters. This allows us to invoke the Batched 71 | # Function and the Batched Method over a range of ASCII codes and characters 72 | # in parallel. 73 | # 74 | # Run this script to see which characters correspond to ASCII codes 33 through 38! 75 | 76 | 77 | @app.local_entrypoint() 78 | async def main(): 79 | ascii_converter = AsciiConverter() 80 | chars = [] 81 | async for char in asciis_to_chars.map.aio(range(33, 39)): 82 | chars.append(char) 83 | 84 | print("Characters:", chars) 85 | 86 | asciis = [] 87 | async for ascii in ascii_converter.chars_to_asciis.map.aio(chars): 88 | asciis.append(ascii) 89 | 90 | print("ASCII codes:", asciis) 91 | -------------------------------------------------------------------------------- /10_integrations/dbt/sample_proj_duckdb_s3/seeds/raw_orders.csv: -------------------------------------------------------------------------------- 1 | id,user_id,order_date,status 2 | 1,1,2018-01-01,returned 3 | 2,3,2018-01-02,completed 4 | 3,94,2018-01-04,completed 5 | 4,50,2018-01-05,completed 6 | 5,64,2018-01-05,completed 7 | 6,54,2018-01-07,completed 8 | 7,88,2018-01-09,completed 9 | 8,2,2018-01-11,returned 10 | 9,53,2018-01-12,completed 11 | 10,7,2018-01-14,completed 12 | 11,99,2018-01-14,completed 13 | 12,59,2018-01-15,completed 14 | 13,84,2018-01-17,completed 15 | 14,40,2018-01-17,returned 16 | 15,25,2018-01-17,completed 17 | 16,39,2018-01-18,completed 18 | 17,71,2018-01-18,completed 19 | 18,64,2018-01-20,returned 20 | 19,54,2018-01-22,completed 21 | 20,20,2018-01-23,completed 22 | 21,71,2018-01-23,completed 23 | 22,86,2018-01-24,completed 24 | 23,22,2018-01-26,return_pending 25 | 24,3,2018-01-27,completed 26 | 25,51,2018-01-28,completed 27 | 26,32,2018-01-28,completed 28 | 27,94,2018-01-29,completed 29 | 28,8,2018-01-29,completed 30 | 29,57,2018-01-31,completed 31 | 30,69,2018-02-02,completed 32 | 31,16,2018-02-02,completed 33 | 32,28,2018-02-04,completed 34 | 33,42,2018-02-04,completed 35 | 34,38,2018-02-06,completed 36 | 35,80,2018-02-08,completed 37 | 36,85,2018-02-10,completed 38 | 37,1,2018-02-10,completed 39 | 38,51,2018-02-10,completed 40 | 39,26,2018-02-11,completed 41 | 40,33,2018-02-13,completed 42 | 41,99,2018-02-14,completed 43 | 42,92,2018-02-16,completed 44 | 43,31,2018-02-17,completed 45 | 44,66,2018-02-17,completed 46 | 45,22,2018-02-17,completed 47 | 46,6,2018-02-19,completed 48 | 47,50,2018-02-20,completed 49 | 48,27,2018-02-21,completed 50 | 49,35,2018-02-21,completed 51 | 50,51,2018-02-23,completed 52 | 51,71,2018-02-24,completed 53 | 52,54,2018-02-25,return_pending 54 | 53,34,2018-02-26,completed 55 | 54,54,2018-02-26,completed 56 | 55,18,2018-02-27,completed 57 | 56,79,2018-02-28,completed 58 | 57,93,2018-03-01,completed 59 | 58,22,2018-03-01,completed 60 | 59,30,2018-03-02,completed 61 | 60,12,2018-03-03,completed 62 | 61,63,2018-03-03,completed 63 | 62,57,2018-03-05,completed 64 | 63,70,2018-03-06,completed 65 | 64,13,2018-03-07,completed 66 | 65,26,2018-03-08,completed 67 | 66,36,2018-03-10,completed 68 | 67,79,2018-03-11,completed 69 | 68,53,2018-03-11,completed 70 | 69,3,2018-03-11,completed 71 | 70,8,2018-03-12,completed 72 | 71,42,2018-03-12,shipped 73 | 72,30,2018-03-14,shipped 74 | 73,19,2018-03-16,completed 75 | 74,9,2018-03-17,shipped 76 | 75,69,2018-03-18,completed 77 | 76,25,2018-03-20,completed 78 | 77,35,2018-03-21,shipped 79 | 78,90,2018-03-23,shipped 80 | 79,52,2018-03-23,shipped 81 | 80,11,2018-03-23,shipped 82 | 81,76,2018-03-23,shipped 83 | 82,46,2018-03-24,shipped 84 | 83,54,2018-03-24,shipped 85 | 84,70,2018-03-26,placed 86 | 85,47,2018-03-26,shipped 87 | 86,68,2018-03-26,placed 88 | 87,46,2018-03-27,placed 89 | 88,91,2018-03-27,shipped 90 | 89,21,2018-03-28,placed 91 | 90,66,2018-03-30,shipped 92 | 91,47,2018-03-31,placed 93 | 92,84,2018-04-02,placed 94 | 93,66,2018-04-03,placed 95 | 94,63,2018-04-03,placed 96 | 95,27,2018-04-04,placed 97 | 96,90,2018-04-06,placed 98 | 97,89,2018-04-07,placed 99 | 98,41,2018-04-07,placed 100 | 99,85,2018-04-09,placed 101 | -------------------------------------------------------------------------------- /06_gpu_and_ml/openai_whisper/finetuning/train/end_to_end_check.py: -------------------------------------------------------------------------------- 1 | """ 2 | A full fine-tuning run on GPUs takes multiple hours, but we 3 | want to be able to validate changes quickly while coding. 4 | 5 | This module contains an end-to-end test that runs only 1 step of training, 6 | before testing that the partially trained model can be serialized, saved to 7 | persistent storage, and then downloaded locally for inference. 8 | """ 9 | 10 | import pathlib 11 | 12 | from .config import app_config 13 | from .logs import get_logger 14 | from .train import app, persistent_volume, train 15 | from .transcribe import whisper_transcribe_audio 16 | 17 | logger = get_logger(__name__) 18 | 19 | 20 | # Test model serialization and persistence by starting a new remote 21 | # function that reads back the model files from the temporary network file system disk 22 | # and does a single sentence of translation. 23 | # 24 | # When doing full training runs, the saved model will be loaded in the same way 25 | # but from a *persisted* network file system, which keeps data around even after the Modal 26 | # ephemeral app that ran the training has stopped. 27 | 28 | 29 | @app.function(volumes={app_config.model_dir: persistent_volume}) 30 | def test_download_and_tryout_model(run_id: str): 31 | from datasets import Audio, load_dataset 32 | from evaluate import load 33 | 34 | lang, lang_short = ( 35 | "french", 36 | "fr", 37 | ) # the language doesn't matter for this test. 38 | model_dir = pathlib.Path(app_config.model_dir, run_id) 39 | 40 | # load streaming dataset and read first audio sample 41 | ds = load_dataset( 42 | app_config.dataset, 43 | lang_short, 44 | split="test", 45 | streaming=True, 46 | trust_remote_code=True, 47 | ) 48 | ds = ds.cast_column("audio", Audio(sampling_rate=16_000)) 49 | test_row = next(iter(ds)) 50 | input_speech = test_row["audio"] 51 | 52 | predicted_transcription = whisper_transcribe_audio( 53 | model_dir=model_dir, 54 | language=lang, 55 | data=input_speech["array"], 56 | sample_rate_hz=input_speech["sampling_rate"], 57 | ) 58 | expected_transcription = test_row["sentence"] 59 | wer = load("wer") 60 | wer_score = wer.compute( 61 | predictions=[predicted_transcription], 62 | references=[expected_transcription], 63 | ) 64 | logger.info( 65 | f"{expected_transcription=}\n{predicted_transcription=}\n" 66 | f"Word Error Rate (WER): {wer_score}" 67 | ) 68 | assert wer_score < 1.0, ( 69 | f"Even without finetuning, a WER score of {wer_score} is far too high." 70 | ) 71 | 72 | 73 | # This simple entrypoint function just starts an ephemeral app run and calls 74 | # the two test functions in sequence. 75 | # 76 | # Any runtime errors or assertion errors will fail the app and exit non-zero. 77 | 78 | 79 | @app.local_entrypoint() 80 | def run_test(): 81 | # Test the `main.train` function by passing in test-specific configuration 82 | # that does only a minimal amount of training steps and saves the model 83 | # to the temporary (ie. ephemeral) network file system disk. 84 | # 85 | # This should take only ~1 min to run. 86 | train.remote(num_train_epochs=1.0, warmup_steps=0, max_steps=1) 87 | test_download_and_tryout_model.remote(run_id=app.app_id) 88 | --------------------------------------------------------------------------------