├── .github
└── workflows
│ └── lint.yaml
├── .gitignore
├── LICENSE
├── README.md
├── advanced
├── README.md
├── clip-interrogator
│ ├── README.md
│ ├── assets
│ │ └── two-cats.jpg
│ ├── photon.py
│ └── requirements.txt
├── codellama
│ ├── README.md
│ └── photon.py
├── comfyui
│ ├── README.md
│ ├── assets
│ │ ├── browser.png
│ │ └── deployment.png
│ └── photon.py
├── deepfloyd-if
│ ├── README.md
│ ├── assets
│ │ └── if_result.png
│ ├── deepfloyd_if.py
│ └── requirements.txt
├── earning-sage
│ ├── AAPL-89728-report.tsv
│ ├── README.md
│ ├── earning-sage-data.json
│ └── main.py
├── embedding
│ └── baai_bge
│ │ ├── example_usage.py
│ │ └── main.py
├── flamingo
│ ├── README.md
│ └── photon.py
├── hf-stream-llm
│ └── photon.py
├── idefics
│ ├── README.md
│ ├── photon.py
│ └── requirements.txt
├── lavis
│ ├── README.md
│ ├── caption.py
│ ├── extract-features.py
│ ├── requirements.txt
│ └── vqa.py
├── layout-parser
│ └── main.py
├── llama2
│ ├── README.md
│ └── llama2-api.ipynb
├── nougat
│ ├── README.md
│ ├── photon.py
│ └── requirements.txt
├── open-clip
│ ├── README.md
│ ├── open-clip.py
│ └── requirements.txt
├── pytorch-example
│ ├── main.py
│ └── requirements.txt
├── sdxl
│ ├── README.md
│ ├── assets
│ │ ├── image.png
│ │ ├── inpaint.png
│ │ ├── mask.png
│ │ └── txt2img.png
│ ├── requirements.txt
│ └── sdxl.py
├── segment-anything
│ ├── README.md
│ ├── assets
│ │ ├── koala.jpeg
│ │ └── koala_segmented.jpg
│ ├── requirements.txt
│ ├── sam.py
│ └── segment-anything.ipynb
├── segment-something
│ ├── README.md
│ └── SegmentSomething-LanceDB.ipynb
├── stable-diffusion-webui
│ ├── README.md
│ ├── assets
│ │ ├── browser.png
│ │ └── deployment.png
│ └── photon.py
├── tabbyml
│ ├── README.md
│ ├── assets
│ │ ├── coding.png
│ │ ├── deployment.png
│ │ └── vscode.png
│ └── photon.py
├── tts
│ ├── README.md
│ ├── assets
│ │ └── thequickbrownfox.mp3
│ ├── requirements.txt
│ ├── tts.ipynb
│ ├── tts_main.py
│ └── xtts_main.py
├── whisper-jax
│ ├── README.md
│ ├── assets
│ │ ├── bot_token.png
│ │ ├── create_slack_app.png
│ │ ├── event_subscription.png
│ │ ├── permissions.png
│ │ ├── test_japanese.wav
│ │ └── whisper_result.jpg
│ ├── requirements.txt
│ └── whisper-jax.py
└── whisperx
│ ├── README.md
│ ├── assets
│ ├── negative_example.png
│ ├── positive_example.png
│ ├── pyannote.png
│ ├── silent.m4a
│ └── thequickbrownfox.mp3
│ ├── audio_analysis.ipynb
│ ├── main.py
│ └── requirements.txt
├── assets
└── logo.svg
├── getting-started
├── README.md
├── counter
│ ├── counter.py
│ └── safe_counter.py
├── custom-image
│ ├── README.md
│ └── custom-image.py
├── extra_files
│ ├── README.md
│ ├── content.txt
│ ├── dependency.py
│ └── main.py
└── shell
│ ├── README.md
│ └── shell.py
└── pyproject.toml
/.github/workflows/lint.yaml:
--------------------------------------------------------------------------------
1 | name: lint
2 |
3 | on:
4 | push:
5 | branches: ["main"]
6 | pull_request:
7 | branches: ["**"]
8 |
9 | permissions:
10 | contents: read
11 |
12 | jobs:
13 | lint:
14 | runs-on: ubuntu-latest
15 |
16 | steps:
17 | - uses: actions/checkout@v3
18 | - name: Set up Python
19 | uses: actions/setup-python@v4
20 | with:
21 | python-version: "3.10"
22 | - name: Install tools
23 | run: |
24 | pip install -U pip setuptools
25 | pip install black ruff
26 | - name: Lint
27 | run: |
28 | ruff check .
29 | black --check .
30 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py,cover
50 | .hypothesis/
51 | .pytest_cache/
52 | cover/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | .pybuilder/
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | # For a library or package, you might want to ignore these files since the code is
87 | # intended to run in multiple environments; otherwise, check them in:
88 | # .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # poetry
98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99 | # This is especially recommended for binary packages to ensure reproducibility, and is more
100 | # commonly ignored for libraries.
101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 |
104 | # pdm
105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | # in version control.
109 | # https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 |
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 |
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 |
119 | # SageMath parsed files
120 | *.sage.py
121 |
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 |
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 |
135 | # Rope project settings
136 | .ropeproject
137 |
138 | # mkdocs documentation
139 | /site
140 |
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 |
146 | # Pyre type checker
147 | .pyre/
148 |
149 | # pytype static type analyzer
150 | .pytype/
151 |
152 | # Cython debug symbols
153 | cython_debug/
154 |
155 | # PyCharm
156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | # and can be added to the global gitignore or merged into this file. For a more nuclear
159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | # Lepton Examples
4 |
5 | This repository contains a collection of example photons built with the [Lepton AI sdk](https://github.com/leptonai/leptonai-sdk/).
6 |
7 | Feel free to modify and use these examples as a starting point for your own applications.
8 |
9 | The example repo is organized into the following directories:
10 | - [getting-started](https://github.com/leptonai/examples/tree/main/getting-started): A collection of simple examples that demonstrate the basics of building and running simple photons, which are light-weight, single-file applications.
11 | - [advanced](https://github.com/leptonai/examples/tree/main/advanced): A collection of more complex examples that demonstrate how to build and run real-world applications, such as LLMs, image search, object segmentation, and more.
12 |
13 | For the full documentation, please visit [https://lepton.ai/docs](https://lepton.ai/docs).
14 |
15 | ## Prerequisite
16 |
17 | Note: we are currently in closed beta. All examples in this folder are runnable locally. To run on the Lepton cloud, join the waiting list at [https://lepton.ai/](https://lepton.ai/).
18 |
19 | Install the lepton sdk via (the `-U` option ensures the most recent version is installed):
20 | ```python
21 | pip install -U leptonai
22 | ```
23 |
24 | For many examples in the [advanced](https://github.com/leptonai/examples/tree/main/advanced) folder, there are dependencies needed by the specific algorithms. It is recommended that you use virtual environments to not pollute your daily environment. For example, if you use conda, you can do:
25 | ```shell
26 | conda create -n myenvironment python=3.10
27 | conda activate myenvironment
28 | ```
29 |
30 | ## Running examples
31 |
32 | To run the examples in the respective folders, there are usually three ways:
33 | - Directly invoking the python code to run things locally, for example:
34 | ```bash
35 | python getting-started/counter/counter.py
36 | # runs on local server at port 8080 if not occupied
37 | ```
38 | - Create a photon and then run it locally with the `lep` CLI command, for example:
39 | ```bash
40 | lep photon create -n sam -m advanced/segment-anything/sam.py
41 | lep photon runlocal -n sam
42 | ```
43 | - Create a photon like the one above, and run it on the cloud:
44 | ```bash
45 | lep login # logs into the lepton cloud
46 | lep photon push -n sam # pushes the photon to the cloud
47 | lep photon run -n sam --resource-shape gpu.a10 # run it
48 | ```
49 | For individual examples, refer to their source files for self-explanatory comments.
50 |
51 | ## Using clients
52 |
53 | In all three cases, you can use the python client to access the deployment via:
54 | ```python
55 | from leptonai.client import Client, local
56 | c = Client(local(port=8080))
57 | ```
58 | or
59 | ```python
60 | from leptonai.client import Client
61 | c = Client("myworkspaceid", "sam", token="**mytoken**")
62 | ```
63 |
64 | For example, for the `counter` example running locally, you can interact with the photon in python:
65 | ```python
66 | >> from leptonai.client import Client, local
67 | >> c = Client(local(port=8080))
68 | >> print(c.add.__doc__)
69 | Add
70 |
71 | Automatically inferred parameters from openapi:
72 |
73 | Input Schema (*=required):
74 | x*: integer
75 |
76 | Output Schema:
77 | output: integer
78 | >> c.add(x=10)
79 | 10
80 | >> c.add(x=2)
81 | 12
82 | ```
83 |
84 | For more details, check out the [Quickstart](https://www.lepton.ai/docs/overview/quickstart), [Walkthrough](https://www.lepton.ai/docs/walkthrough/anatomy_of_a_photon), and the [client documentation](https://www.lepton.ai/docs/walkthrough/clients).
85 |
86 |
87 | ## Notes on huggingface access
88 |
89 | Sometimes, you might encounter errors accessing huggingface models, such as the following message when accessing `llama2`:
90 | ```text
91 | Failed to create photon: 401 Client Error. (Request ID: Root=xxxxxxx)
92 |
93 | Cannot access gated repo for url https://huggingface.co/api/models/meta-llama/Llama-2-7b-hf.
94 | Repo model meta-llama/Llama-2-7b-hf is gated. You must be authenticated to access it.
95 | ```
96 | This means that you did not have access to the repo, or you did not set up huggingface access tokens. We'll detail how to do so below.
97 |
98 | ### Get access to the huggingface repo.
99 | Go to the corresponding huggingface repo, and accept the terms and conditions of the corresponding repository. For example, for llama2, the corresponding link is [https://huggingface.co/meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf).
100 |
101 | ### Set up credentials to access huggingface
102 | Your credential to access huggingface can be found online at [https://huggingface.co/settings/tokens](https://huggingface.co/settings/tokens).
103 |
104 | If you are running photons locally, you can do one of the following:
105 | - set the token as an environmental variable, with `export HUGGING_FACE_HUB_TOKEN=your_token_here`.
106 | - or, in your python environment, run the following command and login. Huggingface will store the credential in the local cache, usually `.huggingface/token`, for repeated usage:
107 | ```python
108 | import huggingface_hub
109 | huggingface_hub.login()
110 | ```
111 |
112 | If you are running on the Lepton cloud remotely, the easiest approach is to use the `secret` feature of Lepton. You can safely store the huggingface token as a secret via CLI:
113 | ```shell
114 | lep secret create -n HUGGING_FACE_HUB_TOKEN -v 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
115 | ```
116 | (Don't worry, the above token is only an example and isn't active.)
117 |
118 | You can verify the secret exists with `lep secret list`:
119 | ```shell
120 | >> lep secret list
121 | Secrets
122 | ┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┓
123 | ┃ ID ┃ Value ┃
124 | ┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━┩
125 | │ HUGGING_FACE_HUB_TOKEN │ (hidden) │
126 | └────────────────────────┴──────────┘
127 | ```
128 |
129 | And when you launch a photon, add `--secret`:
130 | ```shell
131 | lep photon run -n myphoton --secret HUGGING_FACE_HUB_TOKEN
132 | ```
133 |
134 | For more detailed information, check out the following resources:
135 | - [Huggingface's login reference](https://huggingface.co/docs/huggingface_hub/package_reference/login)
136 | - [Lepton documentation on secrets](https://www.lepton.ai/docs/advanced/env_n_secrets)
137 | - [An example showing huggingface access using the deepfloyd-if model](https://github.com/leptonai/examples/tree/main/advanced/deepfloyd-if)
138 |
139 | ## Contributing
140 |
141 | We love your feedback! If you would like to suggest example use cases, please [open an issue](https://github.com/leptonai/examples/issues/new). If you would like to contribute an example, kindly create a subfolder under `getting-started` or `advanced`, and submit a pull request.
142 |
--------------------------------------------------------------------------------
/advanced/README.md:
--------------------------------------------------------------------------------
1 | # Advanced topics
2 |
3 | This folder contains typical AI applications using lepton.
4 |
5 | ## Example list
6 | - [deepfloyd-if](https://github.com/leptonai/examples/tree/main/advanced/deepfloyd-if): running the Deepfloyd IF model for AIGC.
7 | - [earning-sage](https://github.com/leptonai/examples/tree/main/advanced/earning-sage) : talking to a LLM behaves like a CFO.
8 | - [open-clip](https://github.com/leptonai/examples/tree/main/advanced/open-clip) : running the [Open-Clip](https://github.com/mlfoundations/open_clip) model.
9 | - [sdxl](https://github.com/leptonai/examples/tree/main/advanced/sdxl) : Stable Diffusion XL model
10 | - [segment-anything](https://github.com/leptonai/examples/tree/main/advanced/segment-anything): running Meta's [Segment Anything](https://github.com/facebookresearch/segment-anything) model.
11 | - [tts](https://github.com/leptonai/examples/tree/main/advanced/tts): running the [Coqui AI TTS](https://github.com/coqui-ai/TTS/) text-to-speech model.
12 | - [whisper-jax](https://github.com/leptonai/examples/tree/main/advanced/whisper-jax): running the Whisper ASR model, as well as a slack bot that automatically transcribes all slack channel voice messages.
13 | - [whisperx](https://github.com/leptonai/examples/tree/main/advanced/whisperx): similar to the whisper model, but does transcription, alignment, and diarization for the input.
14 |
--------------------------------------------------------------------------------
/advanced/clip-interrogator/README.md:
--------------------------------------------------------------------------------
1 | # clip-interrogator
2 |
3 | [clip-interrogator](https://github.com/pharmapsychotic/clip-interrogator) is a prompt engineering tool that combines OpenAI's CLIP and Salesforce's BLIP to optimize text prompts to match a given image. User can use the resulting prompts with text-to-image models like Stable Diffusion on DreamStudio to create cool art. In this example we are going to demonstrate how to run clip-interrogator on Lepton
4 |
5 | ## Install Lepton sdk
6 | ```shell
7 | pip install leptonai
8 | ```
9 |
10 | ## Launch inference service locally
11 |
12 | To run locally, first install dependencies:
13 | ```shell
14 | pip install -r requirements.txt
15 | ```
16 |
17 | After installing dependencies, you can launch inference service locally by running:
18 |
19 | ```shell
20 | lep photon run -n clip-interrogator -m photon.py --local
21 | ```
22 |
23 | ## Launch inference service in the cloud
24 |
25 | Similar to other examples, you can run services on Lepton Cloud Platform easily, e.g.:
26 |
27 | ```shell
28 | lep photon run -n clip-interrogator -m photon.py --resource-shape gpu.a10
29 | ```
30 |
31 | You can visit [dashboard.lepton.ai](https://dashboard.lepton.ai/) to try out the model.
32 |
33 | Note: in default, the server is protected via a token, so you won't be able to access the gradio UI. This is by design to provide adequate security. If you want to make the UI public, you can either add the `--public` argument to `lep photon run`, or update the deployment with:
34 |
35 | ```shell
36 | lep deployment update -n clip-interrogator --public
37 | ```
38 |
39 | ## Client
40 |
41 | Once the inference service is up (either locally or in the cloud), you can use the client to access it in a programmatical way:
42 |
43 | ```python
44 | from leptonai.client import Client, local, current
45 |
46 | # Use this if you are running locally
47 | client = Client(local())
48 | # Or, if you are logged in to your workspace via `lep login` already
49 | # and have launched it:
50 | # client = Client(current(), "clip-interrogator", token=YOUR_WORKSPACE_TOKEN)
51 | ```
52 |
53 | ```python
54 | image = "http://images.cocodataset.org/val2017/000000039769.jpg"
55 | prompt = client.run(image=image)
56 |
57 | print(prompt)
58 | ```
59 |
60 |
61 | Image:
62 |
63 | 
64 |
65 | Prompt:
66 |
67 | ```
68 | two cats laying on a couch with remote controls on the back, on flickr in 2007, ;open mouth, vhs artifacts, inspired by Frédéric Bazille, long - haired siberian cat, inflateble shapes, on a hot australian day, circa 2 0 0 8, at midday, size difference, aliasing visible
69 | ```
70 |
--------------------------------------------------------------------------------
/advanced/clip-interrogator/assets/two-cats.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/clip-interrogator/assets/two-cats.jpg
--------------------------------------------------------------------------------
/advanced/clip-interrogator/photon.py:
--------------------------------------------------------------------------------
1 | from io import BytesIO
2 | import os
3 | from typing import Union
4 |
5 | from leptonai.photon import Photon, FileParam, get_file_content
6 |
7 |
8 | class ClipInterrogator(Photon):
9 | requirement_dependency = [
10 | "clip-interrogator==0.6.0",
11 | "Pillow",
12 | "numpy<2"
13 | ]
14 |
15 | def init(self):
16 | from clip_interrogator import (
17 | Config,
18 | Interrogator,
19 | list_caption_models,
20 | list_clip_models,
21 | )
22 |
23 | caption_model_name = os.environ.get("CAPTION_MODEL_NAME", "blip-large")
24 | if caption_model_name not in list_caption_models():
25 | raise ValueError(
26 | f"caption_model_name must be one of {list_caption_models()}"
27 | )
28 |
29 | clip_model_name = os.environ.get("CLIP_MODEL_NAME", "ViT-L-14/openai")
30 | if clip_model_name not in list_clip_models():
31 | raise ValueError(f"clip_model_name must be one of {list_clip_models()}")
32 |
33 | self.ci = Interrogator(
34 | Config(
35 | caption_model_name=caption_model_name, clip_model_name=clip_model_name
36 | )
37 | )
38 |
39 | @Photon.handler
40 | def run(self, image: Union[FileParam, str]) -> str:
41 | from PIL import Image
42 |
43 | content = get_file_content(image)
44 | image = Image.open(BytesIO(content)).convert("RGB")
45 | return self.ci.interrogate(image)
46 |
--------------------------------------------------------------------------------
/advanced/clip-interrogator/requirements.txt:
--------------------------------------------------------------------------------
1 | clip-interrogator==0.6.0
2 | Pillow
3 |
--------------------------------------------------------------------------------
/advanced/codellama/README.md:
--------------------------------------------------------------------------------
1 | # CodeLlama
2 |
3 | [CodeLlama](https://ai.meta.com/research/publications/code-llama-open-foundation-models-for-code/) released by Meta is a family of LLM based on Llama 2, providing code completion, infilling capabilities and zero-shot instruction following ability for programming tasks. In this example we are going to demonstrate how to run CodeLlama-7b model on Lepton.
4 |
5 | At the point of writing, running CodeLlama models relies on some relatively [new changes](https://github.com/huggingface/transformers/pull/25740) in HuggingFace Transformers that are not released yet, so please make sure to install transformers from source until the next version is released:
6 |
7 | `pip install git+https://github.com/huggingface/transformers.git@015f8e1 accelerate`
8 |
9 | ## Launch CodeLlama inference service locally
10 |
11 | Ensure that you have installed the required dependencies. Then, run:
12 | ```shell
13 | lep photon run -n codellama -m photon.py
14 | ```
15 | Note that you will need to have a relatively large GPU (>=16GB memory).
16 |
17 | Use `MODEL` environment variable to switch to a different model in the CodeLlama family, e.g.
18 |
19 | ```shell
20 | MODEL=codellama/CodeLlama-7b-Instruct-hf lep photon run -n codellama -m photon.py
21 | ```
22 |
23 | ## Launch CodeLlama inference service in the cloud
24 |
25 | Similar to other examples, you can run CodeLlama with the following command. Use a reasonably sized GPU like `gpu.a10` to ensure that things run.
26 |
27 | ```shell
28 | lep photon create -n codellama -m photon.py
29 | lep photon push -n codellama
30 | lep photon run \
31 | -n codellama \
32 | --resource-shape gpu.a10
33 | ```
34 |
35 | Use `MODEL` environment variable to switch to a different model in the CodeLlama family, e.g.
36 |
37 | ```shell
38 | lep photon run \
39 | -n codellama \
40 | --env MODEL=codellama/CodeLlama-7b-Instruct-hf \
41 | --resource-shape gpu.a10
42 | ```
43 |
44 | And visit [dashboard.lepton.ai](https://dashboard.lepton.ai/) to try out the model.
45 |
46 | Note: in default, the server is protected via a token, so you won't be able to access the gradio UI. This is by design to provide adequate security. If you want to make the UI public, you can either add the `--public` argument to `lep photon run`, or update the deployment with:
47 |
48 | ```shell
49 | lep deployment update -n codellama --public
50 | ```
51 |
52 | ### Client
53 |
54 | Once the inference service is up (either locally or in the cloud), you can use the client to access it in a programmatical way:
55 |
56 | Create client
57 | ```python
58 | >>> from leptonai.client import Client, local
59 |
60 | >>> client = Client(local(port=8080)) # If the inference service was launched in the cloud, change the parameters to create the client, see https://github.com/leptonai/examples#using-clients
61 | ```
62 |
63 | Code completion:
64 | ```python
65 |
66 | >>> prompt = '''\
67 | import socket
68 |
69 | def ping_exponential_backoff(host: str):
70 | '''
71 |
72 | >>> print(client.run(inputs=prompt, max_new_tokens=256))
73 | '''
74 | import socket
75 |
76 | def ping_exponential_backoff(host: str):
77 | """Repeatedly try until ping succeeds"""
78 | for i in range(1,11):
79 | print('Ping attempt '+str(i))
80 | ...
81 | '''
82 | ```
83 |
84 | If you have chosen to use the "Instruct" models (e.g. the "codellama/CodeLlama-7b-Instruct-hf" one mentioned above), you can instruct/chat with the model:
85 |
86 | Instructions/Chat:
87 | ````python
88 | >>> user = 'In Bash, how do I list all text files in the current directory (excluding subdirectories) that have been modified in the last month?'
89 |
90 | >>> prompt = f"[INST] {user.strip()} [/INST]"
91 |
92 | >>> print(client.run(inputs=prompt, max_new_tokens=256)[len(prompt):])
93 | '''
94 | You can use the `find` command in Bash to list all text files in the current directory that have been modified in the last month. Here's an example command:
95 | ```
96 | find. -type f -name "*.txt" -mtime -30
97 | ```
98 | Here's how the command works:
99 |
100 | * `.` is the current directory.
101 | * `-type f` specifies that we want to find files (not directories).
102 | * `-name "*.txt"` specifies that we want to find files with the `.txt` extension.
103 | * `-mtime -30` specifies that we want to find files that have been modified in the last 30 days.
104 |
105 | The `-mtime` option takes a number of days as its argument, and the `-30` argument means "modified in the last 30 days".
106 | ...
107 | '''
108 | ````
109 |
--------------------------------------------------------------------------------
/advanced/codellama/photon.py:
--------------------------------------------------------------------------------
1 | import os
2 | from leptonai.photon import Photon
3 | from typing import List, Optional, Union
4 |
5 | import torch
6 | from transformers import pipeline
7 |
8 |
9 | class CodeLlama(Photon):
10 | requirement_dependency = [
11 | "git+https://github.com/huggingface/transformers.git@015f8e1",
12 | "accelerate",
13 | ]
14 |
15 | def init(self):
16 | if torch.cuda.is_available():
17 | device = 0
18 | else:
19 | device = -1
20 |
21 | self.pipeline = pipeline(
22 | "text-generation",
23 | model=os.environ.get("MODEL", "codellama/CodeLlama-7b-hf"),
24 | torch_dtype=torch.float16,
25 | device=device,
26 | )
27 |
28 | def _get_generated_text(self, res):
29 | if isinstance(res, str):
30 | return res
31 | elif isinstance(res, dict):
32 | return res["generated_text"]
33 | elif isinstance(res, list):
34 | if len(res) == 1:
35 | return self._get_generated_text(res[0])
36 | else:
37 | return [self._get_generated_text(r) for r in res]
38 | else:
39 | raise ValueError(
40 | f"Unsupported result type in _get_generated_text: {type(res)}"
41 | )
42 |
43 | @Photon.handler(
44 | "run",
45 | example={
46 | "inputs": "import socket\n\ndef ping_exponential_backoff(host: str):",
47 | "do_sample": True,
48 | "top_k": 10,
49 | "top_p": 0.95,
50 | "temperature": 0.1,
51 | "max_new_tokens": 256,
52 | },
53 | )
54 | def run_handler(
55 | self,
56 | inputs: Union[str, List[str]],
57 | do_sample: bool = True,
58 | top_k: int = 10,
59 | top_p: float = 0.95,
60 | temperature: Optional[float] = 0.1,
61 | max_new_tokens: int = 256,
62 | **kwargs,
63 | ) -> Union[str, List[str]]:
64 | res = self.pipeline(
65 | inputs,
66 | do_sample=do_sample,
67 | top_k=top_k,
68 | top_p=top_p,
69 | temperature=temperature,
70 | max_new_tokens=max_new_tokens,
71 | **kwargs,
72 | )
73 | return self._get_generated_text(res)
74 |
75 |
76 | if __name__ == "__main__":
77 | p = CodeLlama()
78 | p.launch()
79 |
--------------------------------------------------------------------------------
/advanced/comfyui/README.md:
--------------------------------------------------------------------------------
1 | # ComfyUI
2 |
3 | [ComfyUI](https://github.com/comfyanonymous/ComfyUI.git) is a powerful and modular stable diffusion GUI and backend that let you design and execute advanced stable diffusion pipelines using a graph/nodes/flowchart based interface.
4 |
5 | ## Install Lepton sdk
6 | ```shell
7 | pip install leptonai
8 | ```
9 |
10 | ## Launch Stable Diffusion web UI in the cloud
11 |
12 | Similar to other examples, you can run ComfyUI on Lepton Cloud Platform easily, e.g.:
13 |
14 | ```shell
15 | lep photon create -n comfyui -m photon.py
16 | lep photon push -n comfyui
17 | lep photon run \
18 | -n comfyui \
19 | --resource-shape gpu.a10
20 | --public
21 | ```
22 |
23 | You can visit [dashboard.lepton.ai](https://dashboard.lepton.ai/) to use the web Dashboard, or use `lep` cli to manage the launched deployment:
24 | ```shell
25 | lep deployment status -n comfyui
26 | ```
27 |
28 | ## Client
29 |
30 | Once the ComfyUI server is up, you can copy the deployment url shown on the Lepton Dashboard (or in the `lep` cli output)
31 |
32 |
33 |
34 | and visit it in the web browser. e.g. running the sdxl + svd models to do text to video:
35 |
36 |
37 |
--------------------------------------------------------------------------------
/advanced/comfyui/assets/browser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/comfyui/assets/browser.png
--------------------------------------------------------------------------------
/advanced/comfyui/assets/deployment.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/comfyui/assets/deployment.png
--------------------------------------------------------------------------------
/advanced/comfyui/photon.py:
--------------------------------------------------------------------------------
1 | from leptonai.photon import Photon
2 |
3 |
4 | class ComfyUI(Photon):
5 | comfyui_version = "329c571"
6 | cmd = [
7 | "bash",
8 | "-c",
9 | (
10 | "pip install aiohttp einops torchsde &&"
11 | "git clone --recursive https://github.com/comfyanonymous/ComfyUI.git && cd"
12 | f" ComfyUI && git checkout {comfyui_version} && python main.py --listen"
13 | " 0.0.0.0 --port 8080"
14 | ),
15 | ]
16 | deployment_template = {
17 | "resource_shape": "gpu.a10",
18 | }
19 |
--------------------------------------------------------------------------------
/advanced/deepfloyd-if/README.md:
--------------------------------------------------------------------------------
1 | # Deepfloyd If
2 |
3 | This folder shows an end-to-end AI example, with the [Deepfloyd IF model](https://github.com/deep-floyd/IF). The demo also shows how to run a photon with environmental variables (or secrets) to pass in necessary credentials.
4 |
5 | With this demo, you will be able to run deepfloyd and get results like follows:
6 |
7 |
8 |
9 | ## Obtain access to the deepfloyd model.
10 | Deepfloyd hosts models on huggingface. You should obtain access as follows:
11 | - On the [model info page](https://huggingface.co/DeepFloyd/IF-I-XL-v1.0), sign in and agree with the agreement
12 | - Visit [tokens](https://huggingface.co/settings/tokens) page to generate the token.
13 |
14 | ## Use Lepton's secret management
15 |
16 | As you may use the token multiple times, we recommend storing it in Lepton's secret store. Simply do this and remember to replace the token with your own.
17 | ```shell
18 | lep secret create -n HUGGING_FACE_HUB_TOKEN -v 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
19 | ```
20 | (Don't worry, the above token is only an example and isn't active.)
21 |
22 | You can verify the secret exists with `lep secret list`:
23 | ```shell
24 | >> lep secret list
25 | Secrets
26 | ┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┓
27 | ┃ ID ┃ Value ┃
28 | ┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━┩
29 | │ HUGGING_FACE_HUB_TOKEN │ (hidden) │
30 | └────────────────────────┴──────────┘
31 | ```
32 |
33 | ## Implementation note: mounting a gradio server
34 |
35 | In the deepfloyd example, we will not only expose a standard API, but also incorporate a UI implemented by gradio. This is done easily via the `mount` capability as follows:
36 | ```python
37 | @Photon.handler(mount=True)
38 | def ui(self) -> gr.Blocks:
39 | blocks = gr.Blocks()
40 | # Actual blocks creation code here - see deepfloyd_if.py for details.
41 | ...
42 | return blocks
43 | ```
44 | The UI will then be available at the `/ui/` address. For example, if you are running locally, it would be something like `http://0.0.0.0:8080/ui/`.
45 |
46 | ## Run deepfloyd locally
47 |
48 | Ensure that you have installed the required dependencies. Then, run:
49 | ```shell
50 | python deepfloyd_if.py
51 | ```
52 | Note that you will need to have a relatively large GPU (>20GB memory). When the program runs, visit `http://0.0.0.0:8080/ui/` for the web UI, or use the client to access it in a programmatical way.
53 |
54 | ## Run deepfloyd in the cloud
55 |
56 | Similar to other examples, you can run deepfloyd with the following command. Remember to pass in the huggingface access token, and also, use a reasonably sized GPU like `gpu.a10` to ensure that things run.
57 |
58 | ```shell
59 | lep photon create -n deepfloyd -m deepfloyd_if.py
60 | lep photon push -n deepfloyd
61 | lep photon run \
62 | -n deepfloyd \
63 | --secret HUGGING_FACE_HUB_TOKEN \
64 | --resource-shape gpu.a10
65 | ```
66 |
67 | And visit [dashboard.lepton.ai](https://dashboard.lepton.ai/) to try out the model.
68 |
69 | Note: in default, the server is protected via a token, so you won't be able to access the gradio UI. This is by design to provide adequate security. If you want to make the UI public, you can either add the `--public` argument to `lep photon run`, or update the deployment with:
70 |
71 | ```shell
72 | lep deployment update -n deepfloyd --public
73 | ```
74 |
75 | You can now use deepfloyd either via the UI or via the client. Enjoy!
--------------------------------------------------------------------------------
/advanced/deepfloyd-if/assets/if_result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/deepfloyd-if/assets/if_result.png
--------------------------------------------------------------------------------
/advanced/deepfloyd-if/deepfloyd_if.py:
--------------------------------------------------------------------------------
1 | from io import BytesIO
2 | import os
3 |
4 | from diffusers import DiffusionPipeline
5 | from diffusers.pipelines.deepfloyd_if import fast27_timesteps, smart27_timesteps
6 | from diffusers.utils import pt_to_pil
7 | from loguru import logger
8 | import gradio
9 | import torch
10 |
11 | from leptonai.photon import Photon, PNGResponse
12 |
13 |
14 | class If(Photon):
15 | requirement_dependency = [
16 | "diffusers==0.16.1",
17 | "torch==2.0.0",
18 | "torchvision==0.15.1",
19 | "gradio",
20 | ]
21 |
22 | def init(self):
23 | # Checks if the user has logged into huggingface, or have provided the huggingface token.
24 | logger.info("Checking huggingface credentials...")
25 | if not os.environ.get("HUGGING_FACE_HUB_TOKEN", None) and not os.path.exists(
26 | os.path.expanduser("~/.huggingface/token")
27 | ):
28 | logger.warning(
29 | "Environment variable HUGGING_FACE_HUB_TOKEN not set, and it seems that"
30 | " you have not logged into huggingface using its CLI. This may stop us"
31 | " from accessing some models. Refer to the deepfloyd page, e.g."
32 | " https://huggingface.co/DeepFloyd/IF-I-XL-v1.0, for information about"
33 | " access."
34 | )
35 | logger.info("Loading models...")
36 | s1_model_size = os.environ.get("STAGE1_MODEL_SIZE", "M")
37 | if s1_model_size not in ["M", "L", "XL"]:
38 | raise ValueError(
39 | "STAGE1_MODEL_SIZE must be one of 'M', 'L', or 'XL', but got"
40 | f" {s1_model_size}"
41 | )
42 | s1_model = f"DeepFloyd/IF-I-{s1_model_size}-v1.0"
43 | logger.info(f"Using stage 1 model: {s1_model}")
44 | s2_model_size = os.environ.get("STAGE2_MODEL_SIZE", "M")
45 | if s2_model_size not in ["M", "L"]:
46 | raise ValueError(
47 | f"STAGE2_MODEL_SIZE must be one of 'M' or 'L', but got {s2_model_size}"
48 | )
49 | s2_model = f"DeepFloyd/IF-II-{s2_model_size}-v1.0"
50 | logger.info(f"Using stage 2 model: {s2_model}")
51 |
52 | enable_cpu_offload = os.environ.get("ENABLE_CPU_OFFLOAD", "1").lower() in [
53 | "true",
54 | "1",
55 | ]
56 | logger.info(f"Enable CPU offload: {enable_cpu_offload}")
57 |
58 | # stage 1
59 | self.stage_1 = DiffusionPipeline.from_pretrained(
60 | s1_model, variant="fp16", torch_dtype=torch.float16
61 | )
62 | if enable_cpu_offload:
63 | self.stage_1.enable_model_cpu_offload()
64 | else:
65 | self.stage_1.to("cuda")
66 |
67 | # stage 2
68 | self.stage_2 = DiffusionPipeline.from_pretrained(
69 | s2_model,
70 | text_encoder=None,
71 | variant="fp16",
72 | torch_dtype=torch.float16,
73 | )
74 | if enable_cpu_offload:
75 | self.stage_2.enable_model_cpu_offload()
76 | else:
77 | self.stage_2.to("cuda")
78 | # stage 3
79 | safety_modules = {
80 | "feature_extractor": self.stage_1.feature_extractor,
81 | "safety_checker": self.stage_1.safety_checker,
82 | "watermarker": self.stage_1.watermarker,
83 | }
84 | self.stage_3 = DiffusionPipeline.from_pretrained(
85 | "stabilityai/stable-diffusion-x4-upscaler",
86 | **safety_modules,
87 | torch_dtype=torch.float16,
88 | )
89 | if enable_cpu_offload:
90 | self.stage_3.enable_model_cpu_offload()
91 | else:
92 | self.stage_3.to("cuda")
93 | logger.info("Models loaded.")
94 |
95 | def _run(self, prompt: str, seed: int = 0):
96 | """
97 | Runs the Deepfloyd IF model with the given prompt, and returns the resulting PIL images.
98 | """
99 | res = []
100 | generator = torch.manual_seed(seed)
101 |
102 | # text embeds
103 | prompt_embeds, negative_embeds = self.stage_1.encode_prompt(prompt)
104 | # stage 1
105 | images = self.stage_1(
106 | prompt_embeds=prompt_embeds,
107 | negative_prompt_embeds=negative_embeds,
108 | generator=generator,
109 | output_type="pt",
110 | timesteps=fast27_timesteps,
111 | ).images # type: ignore
112 | res.append(pt_to_pil(images)[0])
113 | # stage 2
114 | images = self.stage_2(
115 | image=images,
116 | prompt_embeds=prompt_embeds,
117 | negative_prompt_embeds=negative_embeds,
118 | generator=generator,
119 | output_type="pt",
120 | timesteps=smart27_timesteps,
121 | ).images # type: ignore
122 | res.append(pt_to_pil(images)[0])
123 | # stage 3
124 | images = self.stage_3(
125 | prompt=prompt,
126 | image=images,
127 | generator=generator,
128 | noise_level=100,
129 | num_inference_steps=30,
130 | ).images # type: ignore
131 | res.append(images[0])
132 |
133 | return res
134 |
135 | # The main path for the Deepfloyd IF photon.
136 | @Photon.handler(
137 | example={
138 | "prompt": (
139 | "A photo of a kangaroo wearing an orange hoodie and blue sunglasses"
140 | ' standing in front of the eiffel tower holding a sign that says "very'
141 | ' deep learning"'
142 | )
143 | },
144 | )
145 | def run(self, prompt: str) -> PNGResponse:
146 | """
147 | Runs the Deepfloyd IF model with the given prompt, and returns the resulting PNG image.
148 | """
149 | images = self._run(prompt=prompt)
150 |
151 | img_io = BytesIO()
152 | images[-1].save(img_io, format="PNG", quality="keep")
153 | img_io.seek(0)
154 | return PNGResponse(img_io)
155 |
156 | # This shows how to use gradio to create a UI, and use Photon handler's `mount`
157 | # feature to mount the UI to the Photon.
158 | # With this, you can host both a Photon API and a Gradio UI on the same server.
159 | # Of course, make sure that you do not have a conflicting name for the handler.
160 | @Photon.handler(mount=True)
161 | def ui(self) -> gradio.Blocks:
162 | blocks = gradio.Blocks()
163 |
164 | with blocks:
165 | with gradio.Group():
166 | with gradio.Box():
167 | with gradio.Row().style(mobile_collapse=False, equal_height=True):
168 | text = gradio.Textbox(
169 | label="Enter your prompt",
170 | show_label=False,
171 | max_lines=1,
172 | placeholder="Enter your prompt",
173 | ).style(
174 | border=(True, False, True, True),
175 | rounded=(True, False, False, True),
176 | container=False,
177 | )
178 | btn = gradio.Button("Generate image").style(
179 | margin=False,
180 | rounded=(False, True, True, False),
181 | )
182 | gallery = gradio.Gallery(
183 | label="Generated images", show_label=False, elem_id="gallery"
184 | ).style(grid=(3,), height="auto")
185 |
186 | with gradio.Row(elem_id="advanced-options"):
187 | seed = gradio.Slider(
188 | label="Seed",
189 | minimum=0,
190 | maximum=2147483647,
191 | step=1,
192 | randomize=True,
193 | )
194 | btn.click(self._run, inputs=[text, seed], outputs=gallery)
195 | return blocks
196 |
197 |
198 | if __name__ == "__main__":
199 | p = If()
200 | p.launch()
201 |
--------------------------------------------------------------------------------
/advanced/deepfloyd-if/requirements.txt:
--------------------------------------------------------------------------------
1 | diffusers==0.16.1
2 | gradio
3 | leptonai
4 | torch==2.0.0
5 | torchvision==0.15.1
6 |
--------------------------------------------------------------------------------
/advanced/earning-sage/README.md:
--------------------------------------------------------------------------------
1 | # 🧙 Earning-Sage
2 | Have you ever thought about joining an earning call and asking questions to these CFOs? That used to be the privilege held by the investors from high-end investment banks such as JP Morgan, Goldman Sachs and Morgan Stanley.
3 |
4 | Yet with the capability of LLM and proper techniques around it, not anymore. And if you don’t feel like reading the whole post, feel free to try out a demo [here](https://earningsage.lepton.run/). This demo is created based on the Apple Q2 2023 earning call.
5 |
6 | The full documentation can be found [here](https://www.lepton.ai/docs/examples/earning_sage).
7 |
8 | ## Getting Started
9 |
10 | ### Step 1 : Setup env
11 | In `main.py` , change line 48 and 49 to the URL with corresponding URL and token.
12 |
13 |
14 | ### Step 2 : Create a photon
15 | ```shell
16 | lep photon create -n earning-sage -m py:main.py
17 | ```
18 |
19 | ### Step 3 : Run the photon
20 | ```shell
21 | # Running locally
22 | lep photon run -n earning-sage --local
23 | # Running remotely, this requies login to lepton.ai
24 | lep photon push -n earning-sage
25 | lep photon run -n earning-sage
26 | ```
27 |
28 |
--------------------------------------------------------------------------------
/advanced/earning-sage/main.py:
--------------------------------------------------------------------------------
1 | from leptonai.photon import Photon
2 |
3 | from langchain.llms import OpenAI
4 | from langchain.document_loaders.csv_loader import CSVLoader
5 | from langchain.embeddings.openai import OpenAIEmbeddings
6 | from langchain.vectorstores.chroma import Chroma
7 | from langchain.text_splitter import RecursiveCharacterTextSplitter
8 | from langchain.chains import RetrievalQA
9 |
10 | import openai
11 |
12 | import os
13 | import gradio as gr
14 |
15 |
16 | def create_retriever(target_file):
17 | loader = CSVLoader(target_file, csv_args={"delimiter": "\t"})
18 | text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
19 | chunk_size=256, chunk_overlap=0
20 | )
21 | docs = loader.load_and_split(text_splitter=text_splitter)
22 | embeddings = OpenAIEmbeddings()
23 | db = Chroma.from_documents(docs, embeddings)
24 | return db.as_retriever()
25 |
26 |
27 | def create_qa_retrival_chain(target_file):
28 | foo_retriever = create_retriever(target_file)
29 | llm = OpenAI(temperature=0)
30 | qa = RetrievalQA.from_chain_type(
31 | llm=llm, chain_type="stuff", retriever=foo_retriever
32 | )
33 | return qa
34 |
35 |
36 | class EarningSage_Retriver(Photon):
37 | extra_files = {"AAPL-89728-report.tsv": "AAPL-89728-report.tsv"}
38 |
39 | requirement_dependency = ["tiktoken", "openai", "langchain", "chromadb", "gradio"]
40 |
41 | def init(self):
42 | os.environ["OPENAI_API_BASE"] = "API_BASE_FROM_TUNA"
43 | os.environ["OPENAI_API_KEY"] = "LEPTONAI_API_KEY"
44 |
45 | openai.api_base = os.environ["OPENAI_API_BASE"]
46 | openai.api_key = os.environ["OPENAI_API_KEY"]
47 |
48 | target_file = "AAPL-89728-report.tsv"
49 |
50 | print("Loading LLM from", openai.api_base)
51 | self.retrival_chain = create_qa_retrival_chain(target_file)
52 | print("Ready to serve!")
53 |
54 | @Photon.handler("chat")
55 | def chat(self, message):
56 | return self.retrival_chain.run(message)
57 |
58 | @Photon.handler(mount=True)
59 | def ui(self):
60 | blocks = gr.Blocks(title="🧙🏼 Earning Report Assistant")
61 |
62 | with blocks:
63 | gr.Markdown("# 🧙🏼 Earning Report Assistant")
64 | gr.Markdown("""
65 | This is an earning report assistant built for investors can't make the earning call on time. This sample is using Apple 2023 Q2 report. Feel free to reach out to uz@lepton.ai for more advanced features.
66 | """)
67 | with gr.Row():
68 | chatbot = gr.Chatbot(label="Model")
69 | with gr.Row():
70 | msg = gr.Textbox(
71 | value=(
72 | "What do you think of the relationship between Apple and it's"
73 | " customers?"
74 | ),
75 | label="Questions you would like to ask",
76 | )
77 |
78 | with gr.Row():
79 | send = gr.Button("Send")
80 | clear = gr.Button("Clear")
81 |
82 | def respond_message(message, chat_history):
83 | bot_message = self.retrival_chain.run(message)
84 | chat_history.append((message, bot_message))
85 | return "", chat_history
86 |
87 | msg.submit(respond_message, [msg, chatbot], [msg, chatbot])
88 | send.click(respond_message, [msg, chatbot], [msg, chatbot])
89 |
90 | button1 = gr.Button(
91 | "Can you discuss the potential for further growth in the number of"
92 | " Apple devices per iPhone user?"
93 | )
94 | button2 = gr.Button("How is Apple ecosystem helping driving the revenue?")
95 | button3 = gr.Button("How is the feedback on Apple Pay Later?")
96 |
97 | def send_button_clicked(x):
98 | return gr.update(
99 | value="""Can you discuss the potential for further growth in the number of Apple devices per iPhone user? Additionally, could you elaborate on how the monetization per user might vary between highly engaged "super users" and those who are not as deeply integrated into the Apple ecosystem?"""
100 | )
101 |
102 | def ask_ai_strategy(x):
103 | question = """What do you think of the relationship between Apple and it's customers? Could you give few examples on Apple trying to improve the customer relationship?"""
104 | return gr.update(value=question)
105 |
106 | def ask_pay_later(x):
107 | question = """Maybe as a quick follow-up, you talked about Apple Pay Later, how has the feedback been so far and how do you expect the adoption of our debt service over the next few quarters? Thank you."""
108 | return gr.update(value=question)
109 |
110 | button1.click(send_button_clicked, msg, msg)
111 | button2.click(ask_ai_strategy, msg, msg)
112 | button3.click(ask_pay_later, msg, msg)
113 |
114 | clear.click(lambda: None, None, chatbot, queue=False)
115 |
116 | return blocks
117 |
--------------------------------------------------------------------------------
/advanced/embedding/baai_bge/example_usage.py:
--------------------------------------------------------------------------------
1 | import subprocess
2 | import time
3 | import socket
4 |
5 | from leptonai.client import Client, local, current # noqa: F401
6 |
7 |
8 | def is_port_open(host, port):
9 | """Check if a port is open on a given host."""
10 | with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
11 | s.settimeout(1)
12 | try:
13 | s.connect((host, port))
14 | return True
15 | except socket.error:
16 | return False
17 |
18 |
19 | def wait_for_port(host, port, interval=5):
20 | """Wait for a port to be connectable."""
21 | while True:
22 | if is_port_open(host, port):
23 | print(f"Port {port} on {host} is now connectable!")
24 | break
25 | else:
26 | print(
27 | f"Port {port} on {host} is not ready yet. Retrying in"
28 | f" {interval} seconds..."
29 | )
30 | time.sleep(interval)
31 |
32 |
33 | def main():
34 | # launches "python main.py" in a subprocess so we can use the client
35 | # to test it.
36 | #
37 | print("Launching the photon in a subprocess on port 8080...")
38 | p = subprocess.Popen(
39 | ["python", "main.py"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
40 | )
41 | wait_for_port("localhost", 8080)
42 |
43 | # Note: this is not necessary if you are running the photon in the lepton
44 | # server. To run it in the server, you can do
45 | # lep photon run -n bge -m main.py --resource-shape gpu.a10
46 | # and then instead of using local, you can use the client as
47 | # c = Client(current(), "bge")
48 | # where current() is a helper function to get the current workspace.
49 |
50 | c = Client(local())
51 | # c = Client(current(), "bge")
52 | print("\nThe client has the following endpoints:")
53 | print(c.paths())
54 | print("For the encode endpoint, the docstring is as follows:")
55 | print("***begin docstring***")
56 | print(c.encode.__doc__)
57 | print("***end docstring***")
58 |
59 | print("\n\nRunning the encode endpoint...")
60 | query = "The quick brown fox jumps over the lazy dog."
61 | ret = c.encode(sentences=query)
62 | print("The result is (truncated, showing first 5):")
63 | print(ret[:5])
64 | print(f"(the full result is a list of {len(ret)} floats)")
65 |
66 | print("\n\nRunning the rank endpoint...")
67 | sentences = [
68 | "the fox jumps over the dog",
69 | "the photon is a particle and a wave",
70 | "let the record show that the shipment has arrived",
71 | "the cat jumps on the fox",
72 | ]
73 | rank, score = c.rank(query=query, sentences=sentences)
74 | print("The rank and score are respectively:")
75 | print([(r, s) for r, s in zip(rank, score)])
76 | print(f"The query is: {query}")
77 | print("The sentences, ordered from closest to furthest, are:")
78 | print([sentences[i] for i in rank])
79 |
80 | print("Finished. Closing everything.")
81 | # Closes the subprocess
82 | p.terminate()
83 |
84 |
85 | if __name__ == "__main__":
86 | main()
87 |
--------------------------------------------------------------------------------
/advanced/embedding/baai_bge/main.py:
--------------------------------------------------------------------------------
1 | import os
2 | from typing import List, Union, Tuple
3 |
4 | from leptonai.photon import Photon, HTTPException
5 |
6 |
7 | # Transcribed from https://github.com/FlagOpen/FlagEmbedding/tree/master#model-list
8 | AVAILABLE_MODELS_AND_INSTRUCTIONS = {
9 | "BAAI/llm-embedder": None,
10 | "BAAI/bge-reranker-large": None,
11 | "BAAI/bge-reranker-base": None,
12 | "BAAI/bge-large-en-v1.5": (
13 | "Represent this sentence for searching relevant passages: "
14 | ),
15 | "BAAI/bge-base-en-v1.5": (
16 | "Represent this sentence for searching relevant passages: "
17 | ),
18 | "BAAI/bge-small-en-v1.5": (
19 | "Represent this sentence for searching relevant passages: "
20 | ),
21 | "BAAI/bge-large-zh-v1.5": "为这个句子生成表示以用于检索相关文章:",
22 | "BAAI/bge-base-zh-v1.5": "为这个句子生成表示以用于检索相关文章:",
23 | "BAAI/bge-small-zh-v1.5": "为这个句子生成表示以用于检索相关文章:",
24 | "BAAI/bge-large-en": "Represent this sentence for searching relevant passages: ",
25 | "BAAI/bge-base-en": "Represent this sentence for searching relevant passages: ",
26 | "BAAI/bge-small-en": "Represent this sentence for searching relevant passages: ",
27 | "BAAI/bge-large-zh": "为这个句子生成表示以用于检索相关文章:",
28 | "BAAI/bge-base-zh": "为这个句子生成表示以用于检索相关文章:",
29 | "BAAI/bge-small-zh": "为这个句子生成表示以用于检索相关文章:",
30 | }
31 |
32 |
33 | class BGEEmbedding(Photon):
34 | """
35 | The BGE embedding model from BAAI.
36 | """
37 |
38 | requirement_dependency = [
39 | "FlagEmbedding",
40 | ]
41 |
42 | # manage the max concurrency of the photon. This is the number of requests
43 | # that can be handled at the same time.
44 | handler_max_concurrency = 4
45 |
46 | DEFAULT_MODEL_NAME = "BAAI/bge-large-en-v1.5"
47 | DEFAULT_QUERY_INSTRUCTION = AVAILABLE_MODELS_AND_INSTRUCTIONS[DEFAULT_MODEL_NAME]
48 | DEFAULT_USE_FP16 = True
49 | DEFAULT_NORMALIZE_EMBEDDINGS = True
50 |
51 | def init(self):
52 | from FlagEmbedding import FlagModel
53 |
54 | model_name = os.environ.get("MODEL_NAME", self.DEFAULT_MODEL_NAME)
55 | if model_name not in AVAILABLE_MODELS_AND_INSTRUCTIONS:
56 | raise ValueError(
57 | f"Model name {model_name} not found. Available models:"
58 | f" {AVAILABLE_MODELS_AND_INSTRUCTIONS.keys()}"
59 | )
60 | query_instruction = os.environ.get(
61 | "QUERY_INSTRUCTION", self.DEFAULT_QUERY_INSTRUCTION
62 | )
63 | use_fp16 = os.environ.get("USE_FP16", self.DEFAULT_USE_FP16)
64 | normalize_embeddings = os.environ.get(
65 | "NORMALIZE_EMBEDDINGS", self.DEFAULT_NORMALIZE_EMBEDDINGS
66 | )
67 | self._model = FlagModel(
68 | model_name,
69 | query_instruction_for_retrieval=query_instruction,
70 | use_fp16=use_fp16,
71 | normalize_embeddings=normalize_embeddings,
72 | )
73 |
74 | @Photon.handler
75 | def encode(self, sentences: Union[str, List[str]]) -> List[float]:
76 | """
77 | Encodes the current sentences into embeddings.
78 | """
79 | embeddings = self._model.encode(sentences)
80 | return embeddings.tolist()
81 |
82 | @Photon.handler
83 | def rank(self, query: str, sentences: List[str]) -> Tuple[List[int], List[float]]:
84 | """
85 | Returns a ranked list of indices of the most relevant sentences. This uses
86 | the inner product of the embeddings to rank the sentences. If the model is
87 | not initialized as normalize_embeddings=True, this will raise an error. The
88 | relative similarity scores are also returned.
89 | """
90 | if not self._model.normalize_embeddings:
91 | raise HTTPException(
92 | status_code=500,
93 | detail="Model must have normalize_embeddings=True to use rank.",
94 | )
95 | embeddings = self._model.encode([query] + sentences)
96 | query_embedding = embeddings[0]
97 | sentence_embeddings = embeddings[1:]
98 | inner_product = query_embedding @ sentence_embeddings.T
99 | sorted_indices = inner_product.argsort()[::-1]
100 | return sorted_indices.tolist(), inner_product[sorted_indices].tolist()
101 |
102 |
103 | if __name__ == "__main__":
104 | # TODO: change the name of the class "MyPhoton" to the name of your photon
105 | ph = BGEEmbedding()
106 | ph.launch(port=8080)
107 |
--------------------------------------------------------------------------------
/advanced/flamingo/README.md:
--------------------------------------------------------------------------------
1 | # Flamingo
2 |
3 | [Flamingo](https://www.deepmind.com/blog/tackling-multiple-tasks-with-a-single-visual-language-model) is an effective and efficient general-purpose family of models that can be applied to image and video understanding tasks with minimal task-specific examples. In this example we are going to run Flamingo with [open-flamingo](https://github.com/mlfoundations/open_flamingo) on Lepton.
4 |
5 | ## Install Lepton sdk
6 | ```shell
7 | pip install leptonai
8 | ```
9 |
10 | ## Launch Flamingo inference service locally
11 |
12 | Run:
13 | ```shell
14 | lep photon run -n flamingo -m photon.py
15 | ```
16 | Although it's runnable on cpu, we recommend you to use a gpu to run vision model to get more satisfying performance.
17 |
18 | ## Launch Flamingo inference service in the cloud
19 |
20 | Similar to other examples, you can run Flamingo with the following command.
21 |
22 | ```shell
23 | lep photon create -n flamingo -m photon.py
24 | lep photon push -n flamingo
25 | lep photon run \
26 | -n flamingo \
27 | --resource-shape gpu.a10
28 | ```
29 |
30 | Optionally, add e.g. `--env OPEN_FLAMINGO_MODEL:openflamingo/OpenFlamingo-4B-vitl-rpj3b` to specify the model you would like to run. The supported model names can be found in the open-flamingo repository's README file.
31 |
32 | You can visit [dashboard.lepton.ai](https://dashboard.lepton.ai/) to try out the model.
33 |
34 | Note: in default, the server is protected via a token, so you won't be able to access the gradio UI. This is by design to provide adequate security. If you want to make the UI public, you can either add the `--public` argument to `lep photon run`, or update the deployment with:
35 |
36 | ```shell
37 | lep deployment update -n flamingo --public
38 | ```
39 |
40 | ### Client
41 |
42 | Once the inference service is up (either locally or in the cloud), you can use the client to access it in a programmatical way:
43 |
44 | ```python
45 | from leptonai.client import Client, local, current
46 |
47 | # Use this if you are running locally
48 | client = Client(local())
49 | # Or, if you are logged in to your workspace via `lep login` already
50 | # and have launched it:
51 | # client = Client(current(), "flamingo")
52 |
53 | inputs = {
54 | "demo_images": [
55 | "http://images.cocodataset.org/val2017/000000039769.jpg",
56 | "http://images.cocodataset.org/test-stuff2017/000000028137.jpg"
57 | ],
58 | "demo_texts": [
59 | "An image of two cats.",
60 | "An image of a bathroom sink."
61 | ],
62 | "query_image": "http://images.cocodataset.org/test-stuff2017/000000028352.jpg",
63 | "query_text": "An image of"
64 | }
65 | res = client.run(**inputs)
66 |
67 | print(inputs["query_text"] + res)
68 | ```
69 |
70 | ```
71 | An image of a buffet table.
72 | ```
73 |
--------------------------------------------------------------------------------
/advanced/flamingo/photon.py:
--------------------------------------------------------------------------------
1 | import base64
2 | from io import BytesIO
3 | import os
4 |
5 | from typing import List, Union
6 |
7 | from leptonai.photon import Photon, FileParam, HTTPException
8 |
9 |
10 | # Pretrained models are obtained from https://github.com/mlfoundations/open_flamingo
11 | # and transcribed to the following dictionary.
12 | pretrained_models = {
13 | "openflamingo/OpenFlamingo-3B-vitl-mpt1b": [
14 | "ViT-L-14",
15 | "openai",
16 | "mosaicml/mpt-1b-redpajama-200b",
17 | "mosaicml/mpt-1b-redpajama-200b",
18 | 1,
19 | ],
20 | "OpenFlamingo-3B-vitl-mpt1b-langinstruct": [
21 | "ViT-L-14",
22 | "openai",
23 | "mosaicml/mpt-1b-redpajama-200b-dolly",
24 | "mosaicml/mpt-1b-redpajama-200b-dolly",
25 | 1,
26 | ],
27 | "openflamingo/OpenFlamingo-4B-vitl-rpj3b": [
28 | "ViT-L-14",
29 | "openai",
30 | "togethercomputer/RedPajama-INCITE-Base-3B-v1",
31 | "togethercomputer/RedPajama-INCITE-Base-3B-v1",
32 | 2,
33 | ],
34 | "openflamingo/OpenFlamingo-4B-vitl-rpj3b-langinstruct": [
35 | "ViT-L-14",
36 | "openai",
37 | "togethercomputer/RedPajama-INCITE-Instruct-3B-v1",
38 | "togethercomputer/RedPajama-INCITE-Instruct-3B-v1",
39 | 2,
40 | ],
41 | "openflamingo/OpenFlamingo-9B-vitl-mpt7b": [
42 | "ViT-L-14",
43 | "openai",
44 | "mosaicml/mpt-7b",
45 | "mosaicml/mpt-7b",
46 | 4,
47 | ],
48 | }
49 |
50 |
51 | class Flamingo(Photon):
52 | requirement_dependency = ["open-flamingo", "huggingface-hub", "Pillow", "requests"]
53 |
54 | IMAGE_TOKEN = ""
55 | END_OF_TEXT_TOKEN = "<|endofchunk|>"
56 | DEFAULT_MODEL = "openflamingo/OpenFlamingo-3B-vitl-mpt1b"
57 |
58 | def init(self):
59 | from open_flamingo import create_model_and_transforms
60 | from huggingface_hub import hf_hub_download
61 | import torch
62 |
63 | if torch.cuda.is_available():
64 | self.device = "cuda"
65 | else:
66 | self.device = "cpu"
67 |
68 | model_name = os.environ.get("OPEN_FLAMINGO_MODEL", self.DEFAULT_MODEL)
69 | try:
70 | model_spec = pretrained_models[model_name]
71 | except KeyError:
72 | raise KeyError(
73 | f"Model {model_name} not found in pretrained_models. Available models:"
74 | f" {pretrained_models.keys()}"
75 | )
76 |
77 | self.model, self.image_processor, self.tokenizer = create_model_and_transforms(
78 | clip_vision_encoder_path=model_spec[0],
79 | clip_vision_encoder_pretrained=model_spec[1],
80 | lang_encoder_path=model_spec[2],
81 | tokenizer_path=model_spec[3],
82 | cross_attn_every_n_layers=model_spec[4],
83 | )
84 |
85 | checkpoint_path = hf_hub_download(
86 | "openflamingo/OpenFlamingo-3B-vitl-mpt1b", "checkpoint.pt"
87 | )
88 | self.model.load_state_dict(torch.load(checkpoint_path), strict=False)
89 | self.model = self.model.to(self.device)
90 |
91 | self.tokenizer.padding_side = "left"
92 |
93 | def _img_param_to_img(self, param):
94 | from PIL import Image
95 | import requests
96 |
97 | if isinstance(param, FileParam):
98 | content = param.file.read()
99 | elif isinstance(param, str):
100 | if param.startswith("http://") or param.startswith("https://"):
101 | content = requests.get(param).content
102 | else:
103 | content = base64.b64decode(param).decode("utf-8")
104 | else:
105 | raise TypeError(f"Invalid image type: {type(param)}")
106 |
107 | return Image.open(BytesIO(content))
108 |
109 | @Photon.handler(
110 | example={
111 | "demo_images": [
112 | "http://images.cocodataset.org/val2017/000000039769.jpg",
113 | "http://images.cocodataset.org/test-stuff2017/000000028137.jpg",
114 | ],
115 | "demo_texts": ["An image of two cats.", "An image of a bathroom sink."],
116 | "query_image": (
117 | "http://images.cocodataset.org/test-stuff2017/000000028352.jpg"
118 | ),
119 | "query_text": "An image of",
120 | },
121 | )
122 | def run(
123 | self,
124 | demo_images: List[Union[FileParam, str]],
125 | demo_texts: List[str],
126 | query_image: Union[FileParam, str],
127 | query_text: str,
128 | max_new_tokens: int = 32,
129 | num_beams: int = 3,
130 | ) -> str:
131 | import torch
132 |
133 | if len(demo_images) != len(demo_texts):
134 | raise HTTPException(
135 | status_code=400,
136 | detail="The number of demo images and demo texts must be the same.",
137 | )
138 |
139 | demo_images = [self._img_param_to_img(img) for img in demo_images]
140 | query_image = self._img_param_to_img(query_image)
141 |
142 | vision_x = [
143 | self.image_processor(img).unsqueeze(0).to(self.device)
144 | for img in (demo_images + [query_image])
145 | ]
146 | vision_x = torch.cat(vision_x, dim=0)
147 | vision_x = vision_x.unsqueeze(1).unsqueeze(0)
148 |
149 | lang_x_text = self.END_OF_TEXT_TOKEN.join(
150 | f"{self.IMAGE_TOKEN}{text}" for text in (demo_texts + [query_text])
151 | )
152 | lang_x = self.tokenizer(
153 | lang_x_text,
154 | return_tensors="pt",
155 | )
156 |
157 | generated_text = self.model.generate(
158 | vision_x=vision_x,
159 | lang_x=lang_x["input_ids"].to(self.device),
160 | attention_mask=lang_x["attention_mask"].to(self.device),
161 | max_new_tokens=max_new_tokens,
162 | num_beams=num_beams,
163 | )
164 | generated_text = self.tokenizer.decode(generated_text[0])
165 |
166 | if generated_text.startswith(lang_x_text):
167 | generated_text = generated_text[len(lang_x_text) :]
168 | if generated_text.endswith(self.END_OF_TEXT_TOKEN):
169 | generated_text = generated_text[: -len(self.END_OF_TEXT_TOKEN)]
170 |
171 | return generated_text
172 |
--------------------------------------------------------------------------------
/advanced/hf-stream-llm/photon.py:
--------------------------------------------------------------------------------
1 | import os
2 | from threading import Thread
3 | from queue import Queue
4 |
5 | from loguru import logger
6 | from leptonai.photon import Photon, StreamingResponse
7 |
8 |
9 | class HfStreamLLM(Photon):
10 |
11 | deployment_template = {
12 | "resource_shape": "gpu.a10.6xlarge",
13 | "env": {
14 | "MODEL_PATH": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
15 | },
16 | "secret": [
17 | "HUGGING_FACE_HUB_TOKEN",
18 | ],
19 | }
20 |
21 | requirement_dependency = [
22 | "transformers",
23 | ]
24 |
25 | handler_max_concurrency = 4
26 |
27 | def init(self):
28 | from transformers import AutoModelForCausalLM, AutoTokenizer
29 |
30 | model_path = os.environ["MODEL_PATH"]
31 |
32 | self._tok = AutoTokenizer.from_pretrained(model_path)
33 | self._model = AutoModelForCausalLM.from_pretrained(model_path).to("cuda")
34 |
35 | self._generation_queue = Queue()
36 |
37 | for _ in range(self.handler_max_concurrency):
38 | Thread(target=self._generate, daemon=True).start()
39 |
40 | def _generate(self):
41 | while True:
42 | streamer, args, kwargs = self._generation_queue.get()
43 | try:
44 | self._model.generate(*args, **kwargs)
45 | except Exception as e:
46 | logger.error(f"Error in generation: {e}")
47 | streamer.text_queue.put(streamer.stop_signal)
48 |
49 | @Photon.handler
50 | def run(self, text: str, max_new_tokens: int = 100) -> StreamingResponse:
51 | from transformers import TextIteratorStreamer
52 |
53 | streamer = TextIteratorStreamer(self._tok, skip_prompt=True, timeout=60)
54 | inputs = self._tok(text, return_tensors="pt").to("cuda")
55 | self._generation_queue.put_nowait((
56 | streamer,
57 | (),
58 | dict(inputs, streamer=streamer, max_new_tokens=max_new_tokens),
59 | ))
60 | return streamer
61 |
--------------------------------------------------------------------------------
/advanced/idefics/README.md:
--------------------------------------------------------------------------------
1 | # IDEFICS
2 |
3 | [IDEFICS](https://huggingface.co/blog/idefics) is a multimodal model that accepts sequences of images and texts as input and generates coherent text as output. It can answer questions about images, describe visual content, create stories grounded in multiple images, etc. IDEFICS is an open-access reproduction of Flamingo and is comparable in performance with the original closed-source model across various image-text understanding benchmarks. It comes in two variants - 80 billion parameters and 9 billion parameters. In this example, we are going to use the 9 billion parameters version of the model to demonstrate how to do multimodal text generation on Lepton.
4 |
5 | ## Install Lepton sdk
6 | ```shell
7 | pip install leptonai
8 | ```
9 |
10 | ## Launch inference service locally
11 |
12 | To run locally, first install dependencies:
13 | ```shell
14 | pip install -r requirements.txt
15 | ```
16 |
17 | After installing dependencies, you can launch inference service locally by running:
18 |
19 | ```shell
20 | lep photon run -n idefics -m photon.py
21 | ```
22 |
23 | By default, the service runs [9b-instruct](HuggingFaceM4/idefics-9b-instruct) version of the model. You can use `MODEL` environment variable to select a different variant of the model to run, e.g.:
24 |
25 | ```
26 | MODEL=HuggingFaceM4/idefics-9b lep photon run -n idefics -m photon.py
27 | ```
28 |
29 | ## Launch inference service in the cloud
30 |
31 | Similar to other examples, you can run services on Lepton Cloud Platform easily, e.g.:
32 |
33 | ```shell
34 | lep photon create -n idefics -m photon.py
35 | lep photon push -n idefics
36 | lep photon run \
37 | -n idefics \
38 | --resource-shape gpu.a10
39 | ```
40 |
41 | By default, the service runs [9b-instruct](HuggingFaceM4/idefics-9b-instruct) version of the model. You can use `MODEL` environment variable to select a different variant of the model to run, e.g.:
42 |
43 | ```shell
44 | lep photon create -n idefics -m photon.py
45 | lep photon push -n idefics
46 | lep photon run \
47 | -n idefics \
48 | --env MODEL="HuggingFaceM4/idefics-9b" \
49 | --resource-shape gpu.a10
50 | ```
51 |
52 | You can visit [dashboard.lepton.ai](https://dashboard.lepton.ai/) to try out the model.
53 |
54 | If you want to make the api public, you can either add the `--public` argument to `lep photon run`, or update the deployment with:
55 |
56 | ```shell
57 | lep deployment update -n idefics --public
58 | ```
59 |
60 | ## Client
61 |
62 | Once the inference service is up (either locally or in the cloud), you can use the client to access it in a programmatical way:
63 |
64 | ```python
65 | from leptonai.client import Client, local, current
66 |
67 | # Use this if you are running locally
68 | client = Client(local())
69 | # Or, if you are logged in to your workspace via `lep login` already
70 | # and have launched it:
71 | # client = Client(current(), "idefics", stream=True)
72 | ```
73 |
74 | ```python
75 | image = "https://huggingfacem4-idefics-playground.hf.space/file=/home/user/app/example_images/obama-harry-potter.jpg"
76 | question = "Which famous person does the person in the image look like? Could you craft an engaging narrative featuring this character from the image as the main protagonist?"
77 | eos_token = ""
78 | prompts = [
79 | f"User: {question}",
80 | image,
81 | eos_token,
82 | "\nAssistant:",
83 | ]
84 | res = client.run(prompts=prompts)
85 | print(res)
86 | ```
87 |
88 | ```
89 | User: Which famous person does the person in the image look like? Could you craft an engaging narrative featuring this character from the image as the main protagonist?
90 | Assistant: The person in the image looks like Harry Potter, the famous wizard from the Harry Potter book series. As the main protagonist, Harry Potter embarks on a thrilling adventure to defeat the evil Lord Voldemort and save the wizarding world from his grasp. Along the way, he makes new friends, learns powerful spells, and discovers the true extent of his own magical abilities. With the help of his loyal companions Hermione Granger and Ron Weasley, Harry Potter faces countless challenges and obstacles, ultimately emerging victorious and becoming a legend in the wizarding world.
91 | ```
92 |
--------------------------------------------------------------------------------
/advanced/idefics/photon.py:
--------------------------------------------------------------------------------
1 | from io import BytesIO
2 | import os
3 | from typing import Union, List
4 |
5 | from leptonai.photon import Photon, FileParam
6 |
7 |
8 | class IDEFICS(Photon):
9 | requirement_dependency = [
10 | "accelerate",
11 | "Pillow",
12 | "torch",
13 | "transformers",
14 | "protobuf",
15 | ]
16 |
17 | def init(self):
18 | import torch
19 | from transformers import IdeficsForVisionText2Text, AutoProcessor
20 |
21 | self.device = "cuda" if torch.cuda.is_available() else "cpu"
22 |
23 | checkpoint = os.environ.get("MODEL", "HuggingFaceM4/idefics-9b-instruct")
24 | self.model = IdeficsForVisionText2Text.from_pretrained(
25 | checkpoint, torch_dtype=torch.bfloat16, low_cpu_mem_usage=True
26 | ).to(self.device)
27 | self.processor = AutoProcessor.from_pretrained(checkpoint)
28 |
29 | @Photon.handler(
30 | example={
31 | "prompts": [
32 | (
33 | "User: Which famous person does the person in the image look like?"
34 | " Could you craft an engaging narrative featuring this character"
35 | " from the image as the main protagonist?"
36 | ),
37 | "https://huggingfacem4-idefics-playground.hf.space/file=/home/user/app/example_images/obama-harry-potter.jpg",
38 | "",
39 | "\nAssistant:",
40 | ]
41 | }
42 | )
43 | def run(
44 | self,
45 | prompts: Union[List[Union[str, FileParam]], List[List[Union[str, FileParam]]]],
46 | eos_token: str = "",
47 | bad_words: List[str] = ["", ""],
48 | max_length: int = 256,
49 | **kwargs,
50 | ) -> Union[str, List[str]]:
51 | from PIL import Image
52 |
53 | if not prompts:
54 | return []
55 |
56 | input_is_batch = isinstance(prompts[0], list)
57 | if not input_is_batch:
58 | prompts = [prompts]
59 |
60 | for prompt in prompts:
61 | for i, p in enumerate(prompt):
62 | if isinstance(p, FileParam):
63 | prompt[i] = Image.open(BytesIO(p.read())).convert("RGB")
64 |
65 | inputs = self.processor(
66 | prompts, add_end_of_utterance_token=False, return_tensors="pt"
67 | ).to(self.device)
68 |
69 | # Generation args
70 | exit_condition = self.processor.tokenizer(
71 | eos_token, add_special_tokens=False
72 | ).input_ids
73 | bad_words_ids = self.processor.tokenizer(
74 | bad_words, add_special_tokens=False
75 | ).input_ids
76 |
77 | generated_ids = self.model.generate(
78 | **inputs,
79 | eos_token_id=exit_condition,
80 | bad_words_ids=bad_words_ids,
81 | max_length=max_length,
82 | **kwargs,
83 | )
84 | generated_text = self.processor.batch_decode(
85 | generated_ids, skip_special_tokens=True
86 | )
87 |
88 | if not input_is_batch:
89 | return generated_text[0]
90 | else:
91 | return generated_text
92 |
--------------------------------------------------------------------------------
/advanced/idefics/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate
2 | Pillow
3 | torch
4 | transformers
5 | protobuf
6 |
--------------------------------------------------------------------------------
/advanced/lavis/README.md:
--------------------------------------------------------------------------------
1 | # LAVIS
2 |
3 | [LAVIS](https://github.com/salesforce/LAVIS) is a Python deep learning library for LAnguage-and-VISion intelligence research and applications that supports 10+ tasks like retrieval, captioning, visual question answering (vqa), multimodal classification. In this example we are going to show how to use LAVIS to do image captioning, vqa and features extraction on Lepton.
4 |
5 | ## Install Lepton sdk
6 | ```shell
7 | pip install leptonai
8 | ```
9 |
10 | ## Launch inference service locally
11 |
12 | To run locally, first install dependencies:
13 | ```shell
14 | pip install -r requirements.txt
15 | ```
16 |
17 | After installing dependencies, you can launch inference service locally by running:
18 |
19 | ### Image Captioning
20 |
21 | ```shell
22 | lep photon run -n caption -m caption.py
23 | ```
24 |
25 | ### Visual Question Answering (VQA)
26 |
27 | ```shell
28 | lep photon run -n vqa -m vqa.py
29 | ```
30 |
31 | ### Features Extraction
32 |
33 | ```shell
34 | lep photon run -n extract-features -m extract-features.py
35 | ```
36 |
37 | ## Launch inference service in the cloud
38 |
39 | Similar to other examples, you can run services on Lepton Cloud Platform easily, e.g.:
40 |
41 | ```shell
42 | lep photon create -n extract-features -m extract-features.py
43 | lep photon push -n extract-features
44 | lep photon run \
45 | -n extract-features \
46 | --resource-shape gpu.a10
47 | ```
48 |
49 | You can visit [dashboard.lepton.ai](https://dashboard.lepton.ai/) to try out the model.
50 |
51 | Note: in default, the server is protected via a token, so you won't be able to access the gradio UI. This is by design to provide adequate security. If you want to make the UI public, you can either add the `--public` argument to `lep photon run`, or update the deployment with:
52 |
53 | ```shell
54 | lep deployment update -n extract-features --public
55 | ```
56 |
57 | ## Client
58 |
59 | Once the inference service is up (either locally or in the cloud), you can use the client to access it in a programmatical way:
60 |
61 | ```python
62 | from leptonai.client import Client, local, current
63 |
64 | # Use this if you are running locally
65 | client = Client(local())
66 | # Or, if you are logged in to your workspace via `lep login` already
67 | # and have launched it:
68 | # client = Client(current(), "extract-features") # or "caption" for Image Captioning, or "vqa" for VQA
69 | ```
70 |
71 | ### Image Captioning
72 | ```python
73 | image = "http://images.cocodataset.org/val2017/000000039769.jpg"
74 | caption = client.run(image=image)
75 |
76 | print(caption)
77 | ```
78 |
79 | ```
80 | a couple of cats laying on top of a pink couch
81 | ```
82 |
83 | ### Visual Question Answering (VQA)
84 |
85 | ```python
86 | image = "http://images.cocodataset.org/val2017/000000039769.jpg"
87 | question = "How many cats?"
88 | answer = client.run(image=image, question=question)
89 |
90 | print(answer)
91 | ```
92 |
93 | ```
94 | 2
95 | ```
96 |
97 | ### Features Extraction
98 |
99 | ```python
100 | # image embedding
101 | image = "http://images.cocodataset.org/val2017/000000039769.jpg"
102 | features = client.run(image=image)
103 |
104 | print(f"embedding dimensions: {len(features)} x {len(features[0])}")
105 | ```
106 |
107 | ```
108 | embedding dimensions: 32 x 768
109 | ```
110 |
111 | ```python
112 | # text embedding
113 | text = "a large fountain spewing water into the air"
114 | features = client.run(text=text)
115 |
116 | print(f"embedding dimensions: {len(features)} x {len(features[0])}")
117 | ```
118 |
119 | ```
120 | embedding dimensions: 12 x 768
121 | ```
122 |
123 | ```python
124 | # multimodal embedding
125 | image = "http://images.cocodataset.org/val2017/000000039769.jpg"
126 | text = "two cats"
127 | features = client.run(image=image, text=text)
128 |
129 | print(f"embedding dimensions: {len(features)} x {len(features[0])}")
130 | ```
131 |
132 | ```
133 | embedding dimensions: 32 x 768
134 | ```
135 |
--------------------------------------------------------------------------------
/advanced/lavis/caption.py:
--------------------------------------------------------------------------------
1 | from io import BytesIO
2 | from typing import Union
3 |
4 | from leptonai.photon import Photon, FileParam, get_file_content
5 |
6 |
7 | class CaptionPhoton(Photon):
8 | requirement_dependency = [
9 | "salesforce-lavis",
10 | "Pillow",
11 | "opencv-python!=4.8.0.76",
12 | "opencv-contrib-python!=4.8.0.76",
13 | ]
14 |
15 | def _get_img(self, param):
16 | from PIL import Image
17 |
18 | content = get_file_content(param)
19 | return Image.open(BytesIO(content)).convert("RGB")
20 |
21 | def init(self):
22 | import torch
23 | from lavis.models import load_model_and_preprocess
24 |
25 | if torch.cuda.is_available():
26 | self.device = torch.device("cuda")
27 | else:
28 | self.device = torch.device("cpu")
29 |
30 | # Here we choose blip model, for other available models, please refer to:
31 | #
32 | # from lavis.models import model_zoo
33 | # print(model_zoo)
34 | #
35 | self.model_and_preprocess = load_model_and_preprocess(
36 | name="blip_caption",
37 | model_type="large_coco",
38 | is_eval=True,
39 | device=self.device,
40 | )
41 |
42 | @Photon.handler(
43 | example={"image": "http://images.cocodataset.org/val2017/000000039769.jpg"}
44 | )
45 | def run(self, image: Union[FileParam, str]) -> str:
46 | model, vis_processors, _ = self.model_and_preprocess
47 |
48 | image = self._get_img(image)
49 | image = vis_processors["eval"](image).unsqueeze(0).to(self.device)
50 | captions = model.generate({"image": image})
51 | return captions[0]
52 |
--------------------------------------------------------------------------------
/advanced/lavis/extract-features.py:
--------------------------------------------------------------------------------
1 | from io import BytesIO
2 | from typing import Union, Optional, List
3 |
4 | from leptonai.photon import Photon, FileParam, get_file_content, HTTPException
5 |
6 |
7 | class ExtractFeaturesPhoton(Photon):
8 | requirement_dependency = [
9 | "salesforce-lavis",
10 | "Pillow",
11 | "opencv-python!=4.8.0.76",
12 | "opencv-contrib-python!=4.8.0.76",
13 | ]
14 |
15 | def _get_img(self, param):
16 | from PIL import Image
17 |
18 | content = get_file_content(param)
19 | return Image.open(BytesIO(content)).convert("RGB")
20 |
21 | def init(self):
22 | import torch
23 | from lavis.models import load_model_and_preprocess
24 |
25 | if torch.cuda.is_available():
26 | self.device = torch.device("cuda")
27 | else:
28 | self.device = torch.device("cpu")
29 |
30 | # Here we choose blip2 model, for other available models, please refer to:
31 | #
32 | # from lavis.models import model_zoo
33 | # print(model_zoo)
34 | #
35 | self.model_and_preprocess = load_model_and_preprocess(
36 | name="blip2_feature_extractor",
37 | model_type="pretrain",
38 | is_eval=True,
39 | device=self.device,
40 | )
41 |
42 | @Photon.handler(
43 | examples=[
44 | {"image": "http://images.cocodataset.org/val2017/000000039769.jpg"},
45 | {"text": "a large fountain spewing water into the air"},
46 | {
47 | "image": "http://images.cocodataset.org/val2017/000000039769.jpg",
48 | "text": "two cats",
49 | },
50 | ]
51 | )
52 | def run(
53 | self, image: Optional[Union[FileParam, str]] = None, text: Optional[str] = None
54 | ) -> List[float]:
55 | model, vis_processors, txt_processors = self.model_and_preprocess
56 |
57 | if image is None and text is None:
58 | raise HTTPException(
59 | status_code=400, detail="Either image or text should be provided."
60 | )
61 |
62 | if image is not None:
63 | image = self._get_img(image)
64 | image = vis_processors["eval"](image).unsqueeze(0).to(self.device)
65 | if text is not None:
66 | text = txt_processors["eval"](text)
67 |
68 | if image is not None and text is None:
69 | # image embedding
70 | features = model.extract_features({"image": image}, mode="image")
71 | return features.image_embeds[0].tolist()
72 | elif image is None and text is not None:
73 | # text embedding
74 | features = model.extract_features({"text_input": [text]}, mode="text")
75 | return features.text_embeds[0].tolist()
76 | else:
77 | # multimodal embedding
78 | features = model.extract_features({"image": image, "text_input": [text]})
79 | return features.multimodal_embeds[0].tolist()
80 |
--------------------------------------------------------------------------------
/advanced/lavis/requirements.txt:
--------------------------------------------------------------------------------
1 | salesforce-lavis
2 | Pillow
3 | opencv-python!=4.8.0.76
4 | opencv-contrib-python!=4.8.0.76
5 |
--------------------------------------------------------------------------------
/advanced/lavis/vqa.py:
--------------------------------------------------------------------------------
1 | from io import BytesIO
2 | from typing import Union
3 |
4 | from leptonai.photon import Photon, FileParam, get_file_content
5 |
6 |
7 | class VQAPhoton(Photon):
8 | requirement_dependency = [
9 | "salesforce-lavis",
10 | "Pillow",
11 | "opencv-python!=4.8.0.76",
12 | "opencv-contrib-python!=4.8.0.76",
13 | ]
14 |
15 | def _get_img(self, param):
16 | from PIL import Image
17 |
18 | content = get_file_content(param)
19 | return Image.open(BytesIO(content)).convert("RGB")
20 |
21 | def init(self):
22 | import torch
23 | from lavis.models import load_model_and_preprocess
24 |
25 | if torch.cuda.is_available():
26 | self.device = torch.device("cuda")
27 | else:
28 | self.device = torch.device("cpu")
29 |
30 | # Here we choose blip model, for other available models, please refer to:
31 | #
32 | # from lavis.models import model_zoo
33 | # print(model_zoo)
34 | #
35 | self.model_and_preprocess = load_model_and_preprocess(
36 | name="blip_vqa", model_type="vqav2", is_eval=True, device=self.device
37 | )
38 |
39 | @Photon.handler(
40 | example={
41 | "image": "http://images.cocodataset.org/val2017/000000039769.jpg",
42 | "question": "How many cats?",
43 | }
44 | )
45 | def run(self, image: Union[FileParam, str], question: str) -> str:
46 | model, vis_processors, txt_processors = self.model_and_preprocess
47 | image = self._get_img(image)
48 | image = vis_processors["eval"](image).unsqueeze(0).to(self.device)
49 | question = txt_processors["eval"](question)
50 | answers = model.predict_answers(
51 | samples={"image": image, "text_input": question},
52 | inference_method="generate",
53 | )
54 | return answers[0]
55 |
--------------------------------------------------------------------------------
/advanced/layout-parser/main.py:
--------------------------------------------------------------------------------
1 | import copy
2 | import os
3 | import requests
4 | from threading import Lock
5 | from typing import Union, Any, Dict
6 |
7 | from loguru import logger
8 |
9 | import layoutparser as lp
10 | from layoutparser.models.detectron2 import catalog
11 | import cv2
12 |
13 | from leptonai.photon import (
14 | Photon,
15 | FileParam,
16 | get_file_content,
17 | PNGResponse,
18 | HTTPException,
19 | make_png_response,
20 | )
21 |
22 |
23 | class LayoutParser(Photon):
24 | requirement_dependency = [
25 | "layoutparser",
26 | "git+https://github.com/facebookresearch/detectron2.git",
27 | "pytesseract",
28 | ]
29 |
30 | system_dependency = [
31 | "tesseract-ocr",
32 | ]
33 |
34 | # Layout parser ocr right now seems to be thread safe, so we can turn on
35 | # multithreading to avoid blocking and improve overall IO time.
36 | handler_max_concurrency = 4
37 |
38 | # The default model config. Specify "MODEL_CONFIG" env variable to
39 | # override this.
40 | DEFAULT_MODEL_CONFIG = "lp://PubLayNet/faster_rcnn_R_50_FPN_3x/config"
41 |
42 | # The path to save the model.
43 | MODEL_SAVE_PATH = "/tmp/layoutparser_lepton_cache"
44 |
45 | # You can specify the language code(s) of the documents to detect to improve
46 | # accuracy. The supported language and their code can be found at:
47 | # https://github.com/tesseract-ocr/langdata
48 | # The supported format is `+` connected string like `"eng+fra"`
49 | TESSERACT_LANGUAGE = "eng"
50 | TESSERACT_CONFIGS = {}
51 |
52 | def init(self):
53 | logger.debug("Loading model...")
54 | self.model = LayoutParser.safe_load_model(
55 | os.environ.get("MODEL_CONFIG", self.DEFAULT_MODEL_CONFIG)
56 | )
57 | # We are not sure if the underlying layout parser model is thread safe, so we will
58 | # consider it a black box and use a lock to prevent concurrent access.
59 | self.model_lock = Lock()
60 | self.ocr_agent = lp.TesseractAgent(
61 | languages=os.environ.get("TESSERACT_LANGUAGE", self.TESSERACT_LANGUAGE),
62 | **self.TESSERACT_CONFIGS,
63 | )
64 | logger.debug("Model loaded successfully.")
65 |
66 | @Photon.handler
67 | def detect(self, image: Union[str, FileParam]) -> Dict[str, Any]:
68 | """
69 | Detects the layout of the image, and returns the layout in a dictionary. On the client
70 | side, if you want to recover the Layout object, you can use the `layoutparser.load_dict`
71 | functionality.
72 | """
73 | cv_image = self._load_image(image)
74 | with self.model_lock:
75 | layout = self.model.detect(cv_image)
76 | return layout.to_dict()
77 |
78 | @Photon.handler
79 | def draw_detection_box(
80 | self, image: Union[str, FileParam], box_width: int = 3
81 | ) -> PNGResponse:
82 | """
83 | Returns the detection box of the input image as a PNG image.
84 | """
85 | cv_image = self._load_image(image)
86 | with self.model_lock:
87 | layout = self.model.detect(cv_image)
88 | img = lp.draw_box(cv_image, layout, box_width=box_width)
89 | return make_png_response(img)
90 |
91 | @Photon.handler
92 | def ocr(
93 | self,
94 | image: Union[str, FileParam],
95 | return_response: bool = False,
96 | return_only_text: bool = False,
97 | ) -> Union[str, Dict[str, Any]]:
98 | """
99 | Carries out Tesseract ocr for the input image. If return_response=True, the full response
100 | is returned as a dictionary with two keys: `text` containing the text, and `data` containing
101 | the full response from Tesseract, as a DataFrame converted to a dict. If you want to recover
102 | the original DataFrame, you can use `pandas.DataFrame.from_dict(result["data"])`.
103 | """
104 | cv_image = self._load_image(image)
105 | res = self.ocr_agent.detect(
106 | cv_image, return_response=return_response, return_only_text=return_only_text
107 | )
108 | print(type(res))
109 | print(str(res))
110 | if return_response:
111 | # The result is a dict with two keys: "text" being the text, and "data" being a DataFrame.
112 | # We will convert it to a dict with data converted to a dict.
113 | return {"text": res["text"], "data": res["data"].to_dict()}
114 | else:
115 | # The returned result is a string, so we will simply return it.
116 | return res
117 |
118 | @Photon.handler
119 | def draw_ocr_result(
120 | self,
121 | image: Union[str, FileParam],
122 | agg_level: int = 4,
123 | font_size: int = 12,
124 | with_box_on_text: bool = True,
125 | text_box_width: int = 1,
126 | ) -> PNGResponse:
127 | """
128 | Returns the OCR result of the input image as a PNG image. Optionally, specify agg_level to
129 | aggregate the text into blocks. The default agg_level is 4, which means that the text will
130 | be aggregated in words. Options are 3 (LINE), 2 (PARA), 1 (BLOCK), and 0 (PAGE).
131 | """
132 | try:
133 | agg_level_enum = lp.TesseractFeatureType(agg_level)
134 | except ValueError:
135 | raise HTTPException(
136 | status_code=400,
137 | detail=(
138 | f"agg_level should be an integer between 0 and 4. Got {agg_level}."
139 | ),
140 | )
141 | cv_image = self._load_image(image)
142 | res = self.ocr_agent.detect(cv_image, return_response=True)
143 | layout = self.ocr_agent.gather_data(res, agg_level_enum)
144 | img = lp.draw_text(
145 | cv_image,
146 | layout,
147 | font_size=font_size,
148 | with_box_on_text=with_box_on_text,
149 | text_box_width=text_box_width,
150 | )
151 | return make_png_response(img)
152 |
153 | @classmethod
154 | def safe_load_model(cls, config_path: str):
155 | """
156 | A helper function to safely load the model to bypass the bug here:
157 | https://github.com/Layout-Parser/layout-parser/issues/168
158 | """
159 | # override storage path
160 | if not os.path.exists(cls.MODEL_SAVE_PATH):
161 | os.mkdir(cls.MODEL_SAVE_PATH)
162 | config_path_split = config_path.split("/")
163 | dataset_name = config_path_split[-3]
164 | model_name = config_path_split[-2]
165 | # get the URLs from the MODEL_CATALOG and the CONFIG_CATALOG
166 | # (global variables .../layoutparser/models/detectron2/catalog.py)
167 | model_url = catalog.MODEL_CATALOG[dataset_name][model_name]
168 | config_url = catalog.CONFIG_CATALOG[dataset_name][model_name]
169 |
170 | config_file_path, model_file_path = None, None
171 |
172 | for url in [model_url, config_url]:
173 | filename = url.split("/")[-1].split("?")[0]
174 | save_to_path = f"{cls.MODEL_SAVE_PATH}/" + filename
175 | if "config" in filename:
176 | config_file_path = copy.deepcopy(save_to_path)
177 | if "model_final" in filename:
178 | model_file_path = copy.deepcopy(save_to_path)
179 |
180 | # skip if file exist in path
181 | if filename in os.listdir(f"{cls.MODEL_SAVE_PATH}/"):
182 | continue
183 | # Download file from URL
184 | r = requests.get(
185 | url, stream=True, headers={"user-agent": "Wget/1.16 (linux-gnu)"}
186 | )
187 | with open(save_to_path, "wb") as f:
188 | for chunk in r.iter_content(chunk_size=4096):
189 | if chunk:
190 | f.write(chunk)
191 |
192 | # load the label map
193 | label_map = catalog.LABEL_MAP_CATALOG[dataset_name]
194 |
195 | return lp.models.Detectron2LayoutModel(
196 | config_path=config_file_path,
197 | model_path=model_file_path,
198 | label_map=label_map,
199 | )
200 |
201 | def _load_image(self, image: Union[str, FileParam]):
202 | """
203 | Loads the image, and returns the cv.Image object. Throws HTTPError if the image
204 | cannot be loaded.
205 | """
206 | try:
207 | file_content = get_file_content(
208 | image, return_file=True, allow_local_file=True
209 | )
210 | except Exception as e:
211 | raise HTTPException(
212 | status_code=400,
213 | detail=(
214 | f"Cannot open image with source: {image}. Detailed error message:"
215 | f" {str(e)}"
216 | ),
217 | )
218 | try:
219 | cv_image = cv2.imread(file_content.name)
220 | cv_image = cv_image[..., ::-1]
221 | except Exception as e:
222 | raise HTTPException(
223 | status_code=400,
224 | detail=(
225 | f"Cannot load image with source: {image}. Detailed error message:"
226 | f" {str(e)}"
227 | ),
228 | )
229 | return cv_image
230 |
231 |
232 | if __name__ == "__main__":
233 | ph = LayoutParser()
234 | ph.launch()
235 |
--------------------------------------------------------------------------------
/advanced/llama2/README.md:
--------------------------------------------------------------------------------
1 | # Llama2
2 |
3 | [Llama2](https://ai.meta.com/llama/) is the latest collection of pretrained and fine-tuned generative text models released by Meta, ranging in scale from 7 billion to 70 billion parameters. In this example we are gonna use the Llama2-7B model to demonstrate how to get state of the art LLm models running on Lepton within just seconds.
4 |
5 | There are two ways to access Llama2 models on Lepton:
6 |
7 | ## Fully managed Llama2 inference api
8 |
9 | Lepton provides the standard Llama2 models as fully managed api endpoints at https://llama2.lepton.run. This api endpoint is fully compatible with OpenAI's ChatGPT API, users can directly use OpenAI's sdk or any tools that are using ChatGPT API to seamlessly switch to Llama2 model service. e.g. If you are using OpenAI's Python sdk, you can simply switch to Lepton's Llama2 inference api with
10 |
11 | ```python
12 | import openai
13 |
14 | openai.api_base = "https://llama2.lepton.run/api/v1"
15 | openai.api_key = "sk-" + "a" * 48
16 | ```
17 |
18 | After setting the `api_base` (and `api_key`) configuration, all existing code are compatible with Lepton's Llama2 inference API e.g. the following typical Python code that uses OpenAI's ChatGPT API simply works without any modifications:
19 |
20 | ```python
21 | sys_prompt = """
22 | The following is a conversation with an AI assistant. The assistant is helpful, creative, clever, and very friendly.
23 | """
24 | # Create a completion
25 | completion = openai.ChatCompletion.create(
26 | model="gpt-3.5-turbo",
27 | messages=[
28 | {"role": "system", "content": sys_prompt},
29 | {"role": "user", "content": "tell me a short story"},
30 | ],
31 | stream=True,
32 | max_tokens=64,
33 | )
34 | for chunk in completion:
35 | content = chunk["choices"][0]["delta"].get("content")
36 | if content:
37 | print(content, end="")
38 | print()
39 | ```
40 |
41 | ## Dedicated Llama2 inference service
42 |
43 | If fully managed api does not fit your use case, you can also easily launch a dedicated Llama2 model inference service on Lepton platform.
44 |
45 | Note:
46 | Meta hosts Llama2 models weights on Huggingface. You should obtain access to these models weights by going to the corresponding model page(e.g. [llama-2-7b-chat-hf](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf)) and request for access. Once you have the access, go to Huggingface's [token management page](https://huggingface.co/settings/tokens) to generate a token.
47 |
48 | ### Use Lepton's secret management
49 |
50 | As you may use the token multiple times, we recommend storing it in Lepton's secret store. Simply do this and remember to replace the token with your own.
51 | ```shell
52 | lep secret create -n HUGGING_FACE_HUB_TOKEN -v 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
53 | ```
54 | (Don't worry, the above token is only an example and isn't active.)
55 |
56 | You can verify the secret exists with `lep secret list`:
57 | ```shell
58 | >> lep secret list
59 | Secrets
60 | ┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┓
61 | ┃ ID ┃ Value ┃
62 | ┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━┩
63 | │ HUGGING_FACE_HUB_TOKEN │ (hidden) │
64 | └────────────────────────┴──────────┘
65 | ```
66 |
67 | ### Launch llama2 inference service locally
68 |
69 | Ensure that you have installed the required dependencies. Then, run:
70 | ```shell
71 | lep photon run -n llama2 -m hf:meta-llama/Llama-2-7b-hf
72 | ```
73 | Note that you will need to have a relatively large GPU (>20GB memory).
74 |
75 | ### Launch llama2 inference service in the cloud
76 |
77 | Similar to other examples, you can run llama2 with the following command. Remember to pass in the huggingface access token, and also, use a reasonably sized GPU like `gpu.a10` to ensure that things run.
78 |
79 | ```shell
80 | lep photon create -n llama2 -m hf:meta-llama/Llama-2-7b-hf
81 | lep photon push -n llama2
82 | lep photon run \
83 | -n llama2 \
84 | --secret HUGGING_FACE_HUB_TOKEN \
85 | --resource-shape gpu.a10
86 | ```
87 |
88 | And visit [dashboard.lepton.ai](https://dashboard.lepton.ai/) to try out the model.
89 |
90 | Note: in default, the server is protected via a token, so you won't be able to access the gradio UI. This is by design to provide adequate security. If you want to make the UI public, you can either add the `--public` argument to `lep photon run`, or update the deployment with:
91 |
92 | ```shell
93 | lep deployment update -n llama2 --public
94 | ```
95 |
96 | ### Client
97 |
98 | Once the inference service is up (either locally or in the cloud), you can use the client to access it in a programmatical way:
99 |
100 | ```python
101 | >>> from leptonai.client import Client
102 |
103 | >>> client = Client(...)
104 |
105 | >>> client.run(inputs=["what is 2 + 3"], max_new_tokens=128)
106 | "what is 2 + 3.\nThis is quite common in mathematics: variable height means variable growth and variable foot (puz- ulating, pus, pulsating), variable width for a three dimensional thing. Variable has an incorrect connotation for us. It would be better to say that the statistic is unsatisfactory in all conditions.\nBut...since he _says_ he's a 90th percentile man, and since the classification is as it is, and since those who classify him for that percentile have based it on other empirical evidence, you still have either an error in the percentile, or"
107 | ```
108 |
--------------------------------------------------------------------------------
/advanced/llama2/llama2-api.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {
7 | "id": "lIYdn1woOS1n"
8 | },
9 | "outputs": [],
10 | "source": [
11 | "!pip install -qqq openai"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "source": [
17 | "import openai\n",
18 | "\n",
19 | "openai.api_base = \"https://llama2.lepton.run/api/v1\"\n",
20 | "openai.api_key = \"sk-\" + \"a\" * 48"
21 | ],
22 | "metadata": {
23 | "id": "UCOfN-VEsy5m"
24 | },
25 | "execution_count": 2,
26 | "outputs": []
27 | },
28 | {
29 | "cell_type": "code",
30 | "source": [
31 | "sys_prompt = \"\"\"\n",
32 | "The following is a conversation with an AI assistant. The assistant is helpful, creative, clever, and very friendly.\n",
33 | "\"\"\"\n",
34 | "# Create a completion\n",
35 | "completion = openai.ChatCompletion.create(\n",
36 | " model=\"gpt-3.5-turbo\",\n",
37 | " messages=[\n",
38 | " {\"role\": \"system\", \"content\": sys_prompt},\n",
39 | " {\"role\": \"user\", \"content\": \"tell me a short story\"},\n",
40 | " ],\n",
41 | " stream=True,\n",
42 | " max_tokens=64,\n",
43 | ")\n",
44 | "for chunk in completion:\n",
45 | " content = chunk[\"choices\"][0][\"delta\"].get(\"content\")\n",
46 | " if content:\n",
47 | " print(content, end=\"\")\n",
48 | "print()"
49 | ],
50 | "metadata": {
51 | "colab": {
52 | "base_uri": "https://localhost:8080/"
53 | },
54 | "id": "y7eV3R87sz6Y",
55 | "outputId": "75896f74-408c-4946-8bbd-d392b1a4178b"
56 | },
57 | "execution_count": 3,
58 | "outputs": [
59 | {
60 | "output_type": "stream",
61 | "name": "stdout",
62 | "text": [
63 | "Of course! I'd be happy to tell you a short story. Here is one I came up with on the spot:\n",
64 | "\n",
65 | "Once upon a time, in a far-off land, there was a magical forest filled with towering trees, sparkling streams, and a variety of enchanting cre\n"
66 | ]
67 | }
68 | ]
69 | }
70 | ],
71 | "metadata": {
72 | "colab": {
73 | "name": "scratchpad",
74 | "provenance": []
75 | },
76 | "kernelspec": {
77 | "display_name": "Python 3",
78 | "name": "python3"
79 | }
80 | },
81 | "nbformat": 4,
82 | "nbformat_minor": 0
83 | }
--------------------------------------------------------------------------------
/advanced/nougat/README.md:
--------------------------------------------------------------------------------
1 | # Nougat
2 |
3 | [Nougat](https://github.com/facebookresearch/nougat) (Neural Optical Understanding for Academic Documents) is a Visual Transformer model that performs an Optical Character Recognition (OCR) task for processing scientific documents into a markup language. In this example, we are going to show how to use Nougat to turn scanned PDF files (human readable documents) to markups (machine-readable text).
4 |
5 | ## Install Lepton sdk
6 | ```shell
7 | pip install leptonai
8 | ```
9 |
10 | ## Launch inference service locally
11 |
12 | To run locally, first install dependencies:
13 | ```shell
14 | pip install -r requirements.txt
15 | ```
16 |
17 | Nougat uses `pdfinfo` to extract the "Info" section from PDF files, thus need to install `poppler-utils`:
18 |
19 | ```shell
20 | sudo apt-get update
21 | sudo apt-get install poppler-utils
22 | ```
23 |
24 | After installing dependencies, you can launch inference service locally by running:
25 |
26 | ```shell
27 | lep photon run -n nougat -m photon.py
28 | ```
29 |
30 | ## Launch inference service in the cloud
31 |
32 | Similar to other examples, you can run services on Lepton Cloud Platform easily, e.g.:
33 |
34 | ```shell
35 | lep photon create -n nougat -m photon.py
36 | lep photon push -n nougat
37 | lep photon run \
38 | -n nougat \
39 | --resource-shape gpu.a10
40 | ```
41 |
42 | You can visit [dashboard.lepton.ai](https://dashboard.lepton.ai/) to try out the model.
43 |
44 | If you want to make the api public, you can either add the `--public` argument to `lep photon run`, or update the deployment with:
45 |
46 | ```shell
47 | lep deployment update -n nougat --public
48 | ```
49 |
50 | ## Client
51 |
52 | Once the inference service is up (either locally or in the cloud), you can use the client to access it in a programmatical way:
53 |
54 | ```python
55 | from leptonai.client import Client, local, current
56 |
57 | # Use this if you are running locally
58 | client = Client(local(), stream=True)
59 | # Or, if you are logged in to your workspace via `lep login` already
60 | # and have launched it:
61 | # client = Client(current(), "nougat", stream=True)
62 | ```
63 |
64 | ```python
65 | PDF_FILE = "https://www.gcpsk12.org/site/handlers/filedownload.ashx?moduleinstanceid=74914&dataid=140852&FileName=Sample%20Scanned%20PDF.pdf"
66 | content_iter = client.run(file=PDF_FILE)
67 | for chunk in content_iter:
68 | print(chunk.decode("utf-8"))
69 | ```
70 |
71 | ```
72 | Document Title (Heading Style 1)
73 |
74 | Topic 1 (Heading Style 2)
75 |
76 | Normal Paragraph Style: Lorentz ipsum dolor sit amet, consecetetur adipiscing elit, sed do
77 |
78 | elusmod temper incididunt ut labore et dolore magna aliquua. Dapibus uttrices in iaculis
79 |
80 | nunc sed augue. Fusce ut placerat orci nulla pellentesque dignissim enim sit. Nunc
81 |
82 | congue nisi vitae suscipitt tellus. Tristique et egestas quis ipsum suspendisse uttrices.
83 |
84 | Nunc aliquet bibendum enim facilis gravida neque.
85 |
86 | Topic 2 (Heading Style 2)
87 |
88 | Subtopic A (Heading Style 3)
89 | ...
90 | ```
91 |
--------------------------------------------------------------------------------
/advanced/nougat/photon.py:
--------------------------------------------------------------------------------
1 | from io import BytesIO
2 | import os
3 | import traceback
4 | from typing import Union, Optional
5 |
6 | from loguru import logger
7 | import torch
8 | from leptonai.photon import (
9 | Photon,
10 | FileParam,
11 | get_file_content,
12 | HTTPException,
13 | StreamingResponse,
14 | )
15 |
16 |
17 | class Nougat(Photon):
18 | requirement_dependency = [
19 | "git+https://github.com/facebookresearch/nougat.git@84b3ae1",
20 | "torch",
21 | "pypdf",
22 | "loguru",
23 | "opencv-python!=4.8.0.76",
24 | ]
25 |
26 | system_dependency = ["poppler-utils"]
27 |
28 | def init(self):
29 | from nougat import NougatModel
30 | from nougat.utils.checkpoint import get_checkpoint
31 |
32 | model_tag = os.environ.get(
33 | "MODEL_TAG", "0.1.0-small"
34 | ) # 0.1.0-small or 0.1.0-base
35 | checkpoint = get_checkpoint(model_tag=model_tag)
36 | model = NougatModel.from_pretrained(checkpoint)
37 | if torch.cuda.is_available():
38 | model = model.to("cuda")
39 | self.model = model.to(torch.bfloat16).eval()
40 | self.batch_size = os.environ.get("BATCH_SIZE", 4)
41 |
42 | def iter_batch(self, iterable, batch_size):
43 | for start in range(0, len(iterable), batch_size):
44 | yield iterable[start : min(start + batch_size, len(iterable))]
45 |
46 | def gen_pages(self, pdf, start, end):
47 | from nougat.dataset.rasterize import rasterize_paper
48 | from PIL import Image
49 | from nougat.postprocessing import markdown_compatible
50 |
51 | pages = list(range(start - 1, end))
52 | for batch_pages in self.iter_batch(pages, self.batch_size):
53 | image_bytes_list = rasterize_paper(pdf, pages=batch_pages, return_pil=True)
54 | images = [
55 | self.model.encoder.prepare_input(
56 | Image.open(image_bytes), random_padding=False
57 | )
58 | for image_bytes in image_bytes_list
59 | ]
60 | model_output = self.model.inference(image_tensors=torch.stack(images))
61 | logger.info(
62 | f"#input pages: {len(batch_pages)}, #output pages:"
63 | f" {len(model_output['predictions'])}"
64 | )
65 | for page_prediction in model_output["predictions"]:
66 | content = markdown_compatible(page_prediction)
67 | yield content
68 |
69 | @Photon.handler
70 | def run(
71 | self,
72 | file: Union[FileParam, str],
73 | start: Optional[int] = None,
74 | end: Optional[int] = None,
75 | ) -> StreamingResponse:
76 | import pypdf
77 |
78 | try:
79 | content = get_file_content(file)
80 | pdf = pypdf.PdfReader(BytesIO(content))
81 | except Exception:
82 | logger.error(traceback.format_exc())
83 | raise HTTPException(status_code=400, detail="Failed to read PDF file.")
84 |
85 | total_pages = len(pdf.pages)
86 | start = start or 1
87 | end = end or total_pages
88 | logger.info(f"Total pages: {total_pages}, start: {start}, end: {end}")
89 | if start < 1 or end > total_pages:
90 | raise HTTPException(
91 | status_code=400,
92 | detail=f"Page number should be in range [1, {total_pages}]",
93 | )
94 | if start > end:
95 | raise HTTPException(
96 | status_code=400, detail="Start page number should be less than end."
97 | )
98 |
99 | return self.gen_pages(pdf, start, end)
100 |
--------------------------------------------------------------------------------
/advanced/nougat/requirements.txt:
--------------------------------------------------------------------------------
1 | git+https://github.com/facebookresearch/nougat.git@84b3ae1
2 | torch
3 | pypdf
4 | loguru
5 | opencv-python!=4.8.0.76
6 |
--------------------------------------------------------------------------------
/advanced/open-clip/README.md:
--------------------------------------------------------------------------------
1 | # OpenCLIP Example
2 |
3 | This is a simple example of how to use the [OpenCLIP](https://github.com/mlfoundations/open_clip) to generate the embeddings of text and images. OpenCLIP is an open source implementation of OpenAI's [CLIP](https://github.com/openai/CLIP) (Contrastive Language-Image Pre-training). It is a neural network trained on a variety of (image, text) pairs. It can be instructed in natural language to predict the most relevant text snippet, given an image, without directly optimizing for the task.
4 | .
5 |
6 | ## Install dependencies
7 |
8 | Within this example, we will use `conda` to manage the environment. You can install `conda` by following the instructions [here](https://docs.conda.io/projects/conda/en/latest/user-guide/install/).
9 |
10 | ```bash
11 | # Create a new environment
12 | conda create -n clip python=3.10
13 | conda activate clip
14 |
15 | # Install leptonai, if you've done this already, you can skip this step
16 | pip install leptonai
17 |
18 | # Install the dependencies
19 | pip install -r requirements.txt
20 | ```
21 |
22 | > During close beta stage, you may install the latest packge [here](https://www.lepton.ai/docs/overview/quickstart#1-installation)
23 |
24 |
25 | ## Create photon and run locally
26 |
27 | ```bash
28 | # Create a photon
29 | lep photon create -n clip -m open-clip.py
30 | # Run the photon locally
31 | lep photon run -n clip --local
32 | ```
33 |
34 | ## Make a prediction
35 |
36 | ```python
37 | from leptonai.client import Client, local
38 | c = Client(local())
39 |
40 | # Embed a text
41 | c.embed_text(query='cat')
42 |
43 | # Embed an image by url
44 | c.embed_image(url='https://i.natgeofe.com/n/548467d8-c5f1-4551-9f58-6817a8d2c45e/NationalGeographic_2572187_square.jpg')
45 |
46 | ```
47 |
48 | ## Run the photon remotely
49 |
50 | ```bash
51 | lep login # logs into the lepton cloud
52 | lep photon push -n clip # pushes the photon to the cloud
53 | lep photon run -n clip --resource-shape gpu.a10 # run it
54 | ```
55 |
56 | ```python
57 | from leptonai.client import Client
58 | LEPTON_API_TOKEN = "YOUR_LEPTON_API_TOKEN"
59 |
60 | client = Client("YOUR_WORKSPACE_ID", "clip", token=LEPTON_API_TOKEN)
61 |
62 | # Eg. Embed a text
63 | result = client.embed_text(
64 | query="string"
65 | )
66 |
67 | print(result)
68 | ```
--------------------------------------------------------------------------------
/advanced/open-clip/open-clip.py:
--------------------------------------------------------------------------------
1 | """
2 | This is a simple class that shows how to use the Photon SDK to create a
3 | common embedding service for text and image (assuming image urls), using the
4 | CLIP model. Note that for the sake of simplicity, the model is downloaded from
5 | the internet every time the photon is run. This is not recommended for
6 | production use though, but is fine if you are running prototypes.
7 |
8 | In default, this uses the ViT-B-32-quickgelu model with the laion400m_e32 pretrained weights.
9 | You can change the model and pretrained weights by passing in the MODEL_NAME and PRETRAINED
10 | environment variables when running the photon. However, we do not proactively sanity
11 | check the validity of the specified model name and pretrained weights name, so please
12 | make sure they are valid.
13 |
14 | To build the photon, do:
15 |
16 | lep photon create -n clip -m open-clip.py:Clip
17 |
18 | To run the photon locally, simply do
19 |
20 | lep photon run -n clip --local
21 |
22 | For other models, you can try adding --env arguments like:
23 |
24 | --env DEFAULT_MODEL_NAME=ViT-B-32-quickgelu --env DEFAULT_PRETRAINED=laion400m_e32
25 |
26 | and the list of models can be found at
27 | https://github.com/mlfoundations/open_clip/blob/main/src/open_clip/pretrained.py
28 |
29 | To deploy the photon, do
30 |
31 | lep photon push -n clip
32 | lep photon run -n clip -dn clip
33 |
34 | Or choose your own deployment name like "-dn my-clip-deployment".
35 |
36 | To test the photon, you can either use the API explorer in the UI, or use
37 | the photon client class in python, e.g.
38 |
39 | from leptonai.client import Client
40 | # If you are runnnig the photon remotely with workspace id "myworkspace"
41 | # and deployment name "clip"
42 | client = Client("myworkspace", "clip")
43 | # Or if you are running the photon locally at port 8080
44 | client = Client("http://localhost:8080")
45 | # Do NOT run the above two commands at the same time! Choose only one.
46 |
47 | # Now you can call the endpoints
48 | vec = client.embed(query="people running by the sea"))
49 | # Or call explicit functions:
50 | vec = client.embed_text(query="people running by the sea"))
51 | vec = client.embed_image(url="https://upload.wikimedia.org/wikipedia/commons/thumb/3/3f/Fermilab.jpg/800px-Fermilab.jpg")
52 | """
53 |
54 | import io
55 | import os
56 | import urllib
57 | from typing import List
58 |
59 | import open_clip
60 | from PIL import Image
61 | import torch
62 | import validators
63 |
64 | from leptonai.photon import Photon, handler, HTTPException
65 | from leptonai.photon.types import lepton_unpickle, is_pickled, LeptonPickled
66 |
67 |
68 | DEFAULT_MODEL_NAME = "ViT-B-32-quickgelu"
69 | DEFAULT_PRETRAINED = "laion400m_e32"
70 |
71 |
72 | class Clip(Photon):
73 | """
74 | This photon is used to embed text and image into a vector space using CLIP.
75 | """
76 |
77 | # Python dependency
78 | requirement_dependency = [
79 | "open_clip_torch",
80 | "Pillow",
81 | "torch",
82 | "transformers",
83 | "validators",
84 | ]
85 |
86 | def init(self):
87 | if torch.cuda.is_available():
88 | self.DEVICE = "cuda"
89 | else:
90 | self.DEVICE = "cpu"
91 | MODEL_NAME = (
92 | os.environ["MODEL_NAME"]
93 | if "MODEL_NAME" in os.environ
94 | else DEFAULT_MODEL_NAME
95 | )
96 | PRETRAINED = (
97 | os.environ["PRETRAINED"]
98 | if "PRETRAINED" in os.environ
99 | else DEFAULT_PRETRAINED
100 | )
101 | (
102 | self.CLIP_MODEL,
103 | _,
104 | self.CLIP_IMG_PREPROCESS,
105 | ) = open_clip.create_model_and_transforms(
106 | model_name=MODEL_NAME, pretrained=PRETRAINED, device=self.DEVICE
107 | )
108 | self.TOKENIZER = open_clip.get_tokenizer(MODEL_NAME)
109 |
110 | @handler("embed")
111 | def embed(self, query: str) -> List[float]:
112 | if validators.url(query):
113 | return self.embed_image(query)
114 | else:
115 | return self.embed_text(query)
116 |
117 | @handler("embed_text")
118 | def embed_text(self, query: str) -> List[float]:
119 | query = self.TOKENIZER([query])
120 | with torch.no_grad():
121 | text_features = self.CLIP_MODEL.encode_text(query.to(self.DEVICE))
122 | text_features /= text_features.norm(dim=-1, keepdim=True)
123 | return list(text_features.cpu().numpy()[0].astype(float))
124 |
125 | def embed_image_local(self, image: Image):
126 | image = self.CLIP_IMG_PREPROCESS(image).unsqueeze(0).to(self.DEVICE)
127 | with torch.no_grad():
128 | image_features = self.CLIP_MODEL.encode_image(image)
129 | image_features /= image_features.norm(dim=-1, keepdim=True)
130 | return list(image_features.cpu().numpy()[0].astype(float))
131 |
132 | @handler("embed_image")
133 | def embed_image(self, url: str) -> List[float]:
134 | # open the imageurl and then read the content into a buffer
135 | try:
136 | raw_img = Image.open(io.BytesIO(urllib.request.urlopen(url).read()))
137 | except Exception as e:
138 | raise HTTPException(
139 | status_code=400,
140 | detail=(
141 | f"Cannot open image at url {url}. Detailed error message: {str(e)}"
142 | ),
143 | )
144 | return self.embed_image_local(raw_img)
145 |
146 | @handler("embed_pickle_image")
147 | def embed_pickle_image(self, image: LeptonPickled) -> List[float]:
148 | print("Is the image passed in pickled ? :", is_pickled(image))
149 | try:
150 | raw_img = lepton_unpickle(image)
151 | except Exception:
152 | raise HTTPException(status_code=400, detail="Cannot read image from bytes.")
153 | return self.embed_image_local(raw_img)
154 |
--------------------------------------------------------------------------------
/advanced/open-clip/requirements.txt:
--------------------------------------------------------------------------------
1 | open_clip_torch
2 | Pillow
3 | torch
4 | transformers
5 | validators
6 |
--------------------------------------------------------------------------------
/advanced/pytorch-example/main.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.optim as optim
4 | import torch.nn.functional as F
5 | import torch.distributed as dist
6 | import os
7 | from torchvision import datasets, transforms
8 | from torch.nn.parallel import DistributedDataParallel as DDP
9 | from torch.utils.data import DataLoader, DistributedSampler
10 |
11 | from datasets import load_dataset
12 |
13 |
14 | class MNISTModel(nn.Module):
15 | def __init__(self):
16 | super(MNISTModel, self).__init__()
17 | self.conv1 = nn.Conv2d(1, 32, 3, 1)
18 | self.conv2 = nn.Conv2d(32, 64, 3, 1)
19 | self.dropout1 = nn.Dropout(0.25)
20 | self.dropout2 = nn.Dropout(0.5)
21 | self.fc1 = nn.Linear(9216, 128)
22 | self.fc2 = nn.Linear(128, 10)
23 |
24 | def forward(self, x):
25 | x = self.conv1(x)
26 | x = F.relu(x)
27 | x = self.conv2(x)
28 | x = F.relu(x)
29 | x = F.max_pool2d(x, 2)
30 | x = self.dropout1(x)
31 | x = torch.flatten(x, 1)
32 | x = self.fc1(x)
33 | x = F.relu(x)
34 | x = self.dropout2(x)
35 | x = self.fc2(x)
36 | return F.log_softmax(x, dim=1)
37 |
38 | def train():
39 | # Initialize process group
40 | dist.init_process_group(backend="nccl")
41 |
42 | # Get local rank from environment variable
43 | local_rank = int(os.environ["LOCAL_RANK"])
44 | rank = int(os.environ["RANK"])
45 | world_size = int(os.environ["WORLD_SIZE"])
46 |
47 | # Set device
48 | torch.cuda.set_device(local_rank)
49 | device = torch.device("cuda", local_rank)
50 |
51 | print(f"Running on rank {rank} (local_rank: {local_rank})")
52 |
53 | def transform(example):
54 | imgs = [transforms.ToTensor()(img) for img in example["image"]]
55 | imgs = [transforms.Normalize((0.1307,), (0.3081,))(img) for img in imgs]
56 | example["image"] = torch.stack(imgs)
57 | example["label"] = torch.tensor(example["label"])
58 | return example
59 |
60 | dataset = load_dataset("mnist", split="train")
61 | dataset = dataset.with_transform(transform)
62 | sampler = DistributedSampler(dataset, num_replicas=world_size, rank=rank)
63 | train_loader = DataLoader(dataset, batch_size=64, sampler=sampler)
64 |
65 | model = MNISTModel().to(device)
66 | model = DDP(model, device_ids=[local_rank])
67 | optimizer = optim.Adam(model.parameters(), lr=0.001)
68 |
69 | model.train()
70 | for epoch in range(1, 11):
71 | sampler.set_epoch(epoch)
72 | for batch_idx, batch_data in enumerate(train_loader):
73 | data, target = batch_data["image"].to(device), batch_data["label"].to(device)
74 | optimizer.zero_grad()
75 | output = model(data)
76 | loss = F.nll_loss(output, target)
77 | loss.backward()
78 | optimizer.step()
79 |
80 | if batch_idx % 10 == 0:
81 | print(f"Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} ({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f}")
82 |
83 | if rank == 0:
84 | torch.save(model.module.state_dict(), "mnist_model.pth")
85 | print("Model saved as mnist_model.pth")
86 |
87 | dist.destroy_process_group()
88 |
89 | if __name__ == "__main__":
90 | train()
--------------------------------------------------------------------------------
/advanced/pytorch-example/requirements.txt:
--------------------------------------------------------------------------------
1 | datasets
--------------------------------------------------------------------------------
/advanced/sdxl/README.md:
--------------------------------------------------------------------------------
1 | # Stable Diffusion XL model
2 |
3 | [Stable Diffusion XL](https://stability.ai/stablediffusion) (SDXL) is the latest open source image generation model developed by Stability AI, focusing on delivering photorealistic outputs that boast intricate details and sophisticated compositions. In this example we are demonstrate how to run an SDXL model inference service on Lepton.
4 |
5 | There are two ways to access SDXL model:
6 |
7 | ## Fully managed SDXL inference api
8 |
9 | Lepton provides the SDXL model as a fully managed api endpoints at https://sdxl.lepton.run. Users can easily use the lepton Python client or existing https request tool to generate high resolution realistic images right away.
10 |
11 | Creating the client:
12 | ```python
13 | from leptonai.client import Client
14 |
15 | API_URL = "https://sdxl.lepton.run"
16 | TOKEN = "YOUR_TOKEN_HERE"
17 |
18 | c = Client(API_URL, token=TOKEN)
19 | ```
20 |
21 | Text to Image:
22 | ```python
23 | prompt = "A cat launching rocket"
24 | seed = 1234
25 | image_bytes = c.txt2img(prompt=prompt, seed=seed)
26 | with open("txt2img_prompt.png", "wb") as f:
27 | f.write(image_bytes)
28 | ```
29 |
30 | Text to Image (with refiner):
31 | ```python
32 | prompt = "A cat launching rocket"
33 | seed = 1234
34 | image_bytes = c.txt2img(prompt=prompt, seed=seed, use_refiner=True)
35 | with open("txt2img_prompt_refiner.png", "wb") as f:
36 | f.write(image_bytes)
37 | ```
38 |
39 |
40 | Inpaint
41 | ```python
42 | import base64
43 | import requests
44 |
45 | from leptonai.photon import FileParam
46 |
47 |
48 | img_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png"
49 | mask_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png"
50 | prompt = "A border collie sitting on a bench"
51 | seed = 2236
52 |
53 |
54 | # Directly using urls to pass images
55 | image_bytes = c.inpaint(image=img_url, mask_image=mask_url, prompt=prompt, seed=seed)
56 | with open("inpaint_url.png", "wb") as f:
57 | f.write(image_bytes)
58 |
59 | # Or use FileParam to send image files:
60 | img_content = requests.get(img_url).content
61 | mask_content = requests.get(mask_url).content
62 | image_bytes = c.inpaint(
63 | image=FileParam(img_content),
64 | mask_image=FileParam(mask_content),
65 | prompt=prompt,
66 | seed=seed,
67 | )
68 | with open("inpaint_file_param.png", "wb") as f:
69 | f.write(image_bytes)
70 |
71 | # Or use base64 to encode image files:
72 | img_content = requests.get(img_url).content
73 | mask_content = requests.get(mask_url).content
74 | image_bytes = c.inpaint(
75 | image=base64.b64encode(img_content).decode("ascii"),
76 | mask_image=base64.b64encode(mask_content).decode("ascii"),
77 | prompt=prompt,
78 | seed=seed,
79 | )
80 | with open("inpaint_base64.png", "wb") as f:
81 | f.write(image_bytes)
82 | ```
83 | Image:
84 |
85 |
86 |
87 | Mask:
88 |
89 |
90 |
91 | Result:
92 |
93 |
94 |
95 | ## Dedicated SDXL inference service
96 |
97 | If fully managed api does not fit your use case, you can also easily launch a dedicated SDXL model inference service on Lepton platform.
98 |
99 | ### Launch SDXL inference service locally
100 |
101 | Ensure that you have installed the required dependencies. Then, run:
102 | ```shell
103 | lep photon create -n sdxl -m ./sdxl.py
104 | lep photon run -n sdxl
105 | ```
106 | Once the service is up, its url will be printed on the terminal screen (e.g. http://localhost:8080).
107 |
108 | ### Launch SDXL inference service in the cloud
109 |
110 | Similar to other examples, after you have finished iterating with local service, you can launch it on Lepton cloud platform, which handles autoscaling, monitoring etc. for your production use case.
111 |
112 | ```shell
113 | lep photon push -n sdxl
114 | lep photon run \
115 | -n sdxl \
116 | --resource-shape gpu.a10
117 | ```
118 |
119 | And visit [dashboard.lepton.ai](https://dashboard.lepton.ai/) to find the corresponding service url.
120 |
121 | Note: in default, the server is protected via a token, so you won't be able to access the gradio UI. This is by design to provide adequate security. If you want to make the UI public, you can either add the `--public` argument to `lep photon run`, or update the deployment with:
122 |
123 | ```shell
124 | lep deployment update -n sdxl --public
125 | ```
126 |
127 | ### Client
128 |
129 | Once the inference service is up (either locally or in the cloud), you can use the client to access it in a programmatical way:
130 |
131 | ```python
132 | from leptonai.client import Client
133 |
134 | SERVICE_URL = "http://localhost:8080" # if run locally
135 | # SERVICE_URL = "DEPLOYMENT URL shown on Lepton Cloud Platform" # if run on the Lepton Cloud Platform
136 |
137 | c = Client(SERVICE_URL)
138 |
139 | img_content = c.run(prompt="a cat launching rocket", seed=1234)
140 | with open("cat.png", "wb") as fid:
141 | fid.write(img_content)
142 | ```
143 |
--------------------------------------------------------------------------------
/advanced/sdxl/assets/image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/sdxl/assets/image.png
--------------------------------------------------------------------------------
/advanced/sdxl/assets/inpaint.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/sdxl/assets/inpaint.png
--------------------------------------------------------------------------------
/advanced/sdxl/assets/mask.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/sdxl/assets/mask.png
--------------------------------------------------------------------------------
/advanced/sdxl/assets/txt2img.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/sdxl/assets/txt2img.png
--------------------------------------------------------------------------------
/advanced/sdxl/requirements.txt:
--------------------------------------------------------------------------------
1 | diffusers>=0.19.0
2 | gradio
3 | invisible-watermark
4 | leptonai
5 | torch
6 |
7 |
--------------------------------------------------------------------------------
/advanced/sdxl/sdxl.py:
--------------------------------------------------------------------------------
1 | from io import BytesIO
2 | from typing import Optional
3 |
4 | from diffusers import DiffusionPipeline
5 | import gradio as gr
6 | import torch
7 |
8 | from leptonai.photon import Photon, PNGResponse
9 |
10 |
11 | class SDXL(Photon):
12 | requirement_dependency = [
13 | "gradio",
14 | "torch",
15 | "diffusers>=0.19.0",
16 | "invisible-watermark",
17 | ]
18 |
19 | def init(self):
20 | cuda_available = torch.cuda.is_available()
21 |
22 | if cuda_available:
23 | self.device = torch.device("cuda")
24 | else:
25 | self.device = torch.device("cpu")
26 |
27 | # load both base & refiner
28 | self.base = DiffusionPipeline.from_pretrained(
29 | "stabilityai/stable-diffusion-xl-base-1.0",
30 | torch_dtype=torch.float16,
31 | variant="fp16",
32 | use_safetensors=True,
33 | )
34 | if cuda_available:
35 | self.base.to("cuda")
36 | # torch.compile is affected by the following issue. If you encounter problems,
37 | # comment the torch.compile line.
38 | # https://github.com/huggingface/diffusers/issues/4370
39 | # self.base.unet = torch.compile(
40 | # self.base.unet, mode="reduce-overhead", fullgraph=True
41 | # )
42 |
43 | self._refiner = None
44 |
45 | @property
46 | def refiner(self):
47 | if self._refiner is None:
48 | pipe = DiffusionPipeline.from_pretrained(
49 | "stabilityai/stable-diffusion-xl-refiner-1.0",
50 | text_encoder_2=self.base.text_encoder_2,
51 | vae=self.base.vae,
52 | torch_dtype=torch.float16,
53 | use_safetensors=True,
54 | variant="fp16",
55 | )
56 | if torch.cuda.is_available():
57 | pipe.to("cuda")
58 |
59 | # torch.compile is affected by the following issue. If you encounter problems,
60 | # comment the torch.compile line.
61 | # pipe.unet = torch.compile(
62 | # pipe.unet, mode="reduce-overhead", fullgraph=True
63 | # )
64 | self._refiner = pipe
65 | return self._refiner
66 |
67 | @Photon.handler(
68 | "run",
69 | example={
70 | "prompt": "A majestic lion jumping from a big stone at night",
71 | "n_steps": 40,
72 | "high_noise_frac": 0.8,
73 | },
74 | )
75 | def run(
76 | self,
77 | prompt: str,
78 | negative_prompt: Optional[str] = None,
79 | width: Optional[int] = None,
80 | height: Optional[int] = None,
81 | guidance_scale: Optional[float] = 5.0,
82 | seed: Optional[int] = None,
83 | num_inference_steps: Optional[int] = 50,
84 | high_noise_frac: Optional[float] = 0.8,
85 | use_refiner: Optional[bool] = True,
86 | ) -> PNGResponse:
87 | images = self._run(
88 | prompt=prompt,
89 | negative_prompt=negative_prompt,
90 | width=width,
91 | height=height,
92 | guidance_scale=guidance_scale,
93 | samples=1,
94 | seed=seed,
95 | num_inference_steps=num_inference_steps,
96 | high_noise_frac=high_noise_frac,
97 | use_refiner=use_refiner,
98 | )
99 |
100 | img_io = BytesIO()
101 | images[0].save(img_io, format="PNG", quality="keep")
102 | img_io.seek(0)
103 | return PNGResponse(img_io)
104 |
105 | def _run(
106 | self,
107 | prompt,
108 | negative_prompt,
109 | width,
110 | height,
111 | guidance_scale,
112 | samples,
113 | seed,
114 | num_inference_steps,
115 | high_noise_frac,
116 | use_refiner,
117 | ):
118 | if seed is not None:
119 | generator = torch.Generator(device=self.device).manual_seed(seed)
120 | else:
121 | generator = None
122 |
123 | if samples > 1:
124 | prompt = [prompt] * samples
125 | if negative_prompt is not None:
126 | negative_prompt = [negative_prompt] * samples
127 | generator = [generator] * samples
128 |
129 | base_extra_kwargs = {}
130 | if use_refiner:
131 | base_extra_kwargs["output_type"] = "latent"
132 | base_extra_kwargs["denoising_end"] = high_noise_frac
133 | # run both experts
134 | images = self.base(
135 | prompt=prompt,
136 | negative_prompt=negative_prompt,
137 | width=width,
138 | height=height,
139 | guidance_scale=guidance_scale,
140 | generator=generator,
141 | num_inference_steps=num_inference_steps,
142 | **base_extra_kwargs,
143 | ).images
144 | if use_refiner:
145 | images = self.refiner(
146 | prompt=prompt,
147 | negative_prompt=negative_prompt,
148 | guidance_scale=guidance_scale,
149 | num_inference_steps=num_inference_steps,
150 | generator=generator,
151 | denoising_start=high_noise_frac,
152 | image=images,
153 | ).images
154 | return images
155 |
156 | @Photon.handler(mount=True)
157 | def ui(self):
158 | blocks = gr.Blocks()
159 |
160 | with blocks:
161 | with gr.Group():
162 | with gr.Box():
163 | with gr.Column(scale=3):
164 | with gr.Row():
165 | prompt = gr.Textbox(
166 | label="Enter your prompt",
167 | show_label=False,
168 | max_lines=1,
169 | placeholder="Enter your prompt",
170 | ).style(
171 | border=(True, False, True, True),
172 | rounded=(True, False, False, True),
173 | container=False,
174 | )
175 | with gr.Row():
176 | negative_prompt = gr.Textbox(
177 | label="Enter your negative prompt",
178 | show_label=False,
179 | max_lines=1,
180 | placeholder="Enter your negative prompt",
181 | ).style(
182 | border=(True, False, True, True),
183 | rounded=(True, False, False, True),
184 | container=False,
185 | )
186 | with gr.Column(scale=1):
187 | btn = gr.Button("Generate image").style(
188 | margin=False,
189 | rounded=(False, True, True, False),
190 | )
191 | gallery = gr.Gallery(
192 | label="Generated images", show_label=False, elem_id="gallery"
193 | ).style(grid=[2], height="auto")
194 |
195 | with gr.Row(elem_id="advanced-options-1"):
196 | samples = gr.Slider(
197 | label="Images", minimum=1, maximum=4, value=1, step=1
198 | )
199 | width = gr.Slider(
200 | label="Width",
201 | minimum=64,
202 | maximum=1024,
203 | value=512,
204 | step=8,
205 | )
206 | height = gr.Slider(
207 | label="Height",
208 | minimum=64,
209 | maximum=1024,
210 | value=512,
211 | step=8,
212 | )
213 | steps = gr.Slider(
214 | label="Steps", minimum=1, maximum=50, value=25, step=1
215 | )
216 | with gr.Row(elem_id="advanced-options-2"):
217 | scale = gr.Slider(
218 | label="Guidance Scale", minimum=0, maximum=50, value=7.5, step=0.1
219 | )
220 | high_noise_frac = gr.Slider(
221 | label="Denoising fraction",
222 | minimum=0,
223 | maximum=1,
224 | value=0.8,
225 | step=0.1,
226 | )
227 | seed = gr.Slider(
228 | label="Seed",
229 | minimum=0,
230 | maximum=2147483647,
231 | value=142857,
232 | step=1,
233 | )
234 | use_refiner = gr.Checkbox(label="Use refiner", value=True)
235 | btn.click(
236 | self._run,
237 | inputs=[
238 | prompt,
239 | negative_prompt,
240 | width,
241 | height,
242 | scale,
243 | samples,
244 | seed,
245 | steps,
246 | high_noise_frac,
247 | use_refiner,
248 | ],
249 | outputs=gallery,
250 | )
251 |
252 | return blocks
253 |
254 |
255 | if __name__ == "__main__":
256 | p = SDXL()
257 | p.launch()
258 |
--------------------------------------------------------------------------------
/advanced/segment-anything/README.md:
--------------------------------------------------------------------------------
1 | # Segment Anything Model
2 |
3 | This folder shows an end-to-end AI example, with Meta's most recent [Segment Anything](https://github.com/facebookresearch/segment-anything) model. Specifically, we will implement the functionality that takes an image and an optional prompt, and produces a segmentation mask, either as a list of structured boolean masks, or as a single overlayed image for display.
4 |
5 | A quick example is shown below with input image and output mask:
6 |
7 |
8 |
9 | Technically, this demo shows how to:
10 | - specify dependencies for a photon, including dependencies that are github repositories,
11 | - use the `@Photon.handler` decorator to define handlers for a photon, and annotate the arguments and return values for better user experience,
12 | - return different types of outputs from a photon deployment,
13 | - use the python client to connect and interact with the deployment in nontrivial ways.
14 |
15 | Check out `sam.py` for the actual implementation, and `segment-anything.ipynb` for a notebook demonstration.
16 |
17 | To run it on Lepton AI platform, you can use the following command:
18 |
19 | ```bash
20 | # Create a photon
21 | lep photon create -n sam -m py:github.com/leptonai/examples.git:advanced/segment-anything/sam.py
22 | # Push the photon to the platform
23 | lep photon push -n sam
24 | # Run the SAM remotely
25 | lep photon run -n sam --resource-shape gpu.a10
26 | ```
27 |
--------------------------------------------------------------------------------
/advanced/segment-anything/assets/koala.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/segment-anything/assets/koala.jpeg
--------------------------------------------------------------------------------
/advanced/segment-anything/assets/koala_segmented.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/segment-anything/assets/koala_segmented.jpg
--------------------------------------------------------------------------------
/advanced/segment-anything/requirements.txt:
--------------------------------------------------------------------------------
1 | git+https://github.com/facebookresearch/segment-anything.git
2 | leptonai
3 | Pillow
--------------------------------------------------------------------------------
/advanced/segment-something/README.md:
--------------------------------------------------------------------------------
1 | # 👀Segment Something
2 |
3 | Entity extraction with CLIP and SAM model. For more detailed instructions, please refer to this [link](https://www.lepton.ai/docs/examples/segment_something).
--------------------------------------------------------------------------------
/advanced/stable-diffusion-webui/README.md:
--------------------------------------------------------------------------------
1 | # Stable Diffusion web UI
2 |
3 | [Stable Diffusion web UI](https://github.com/AUTOMATIC1111/stable-diffusion-webui.git) is a browser interface based on Gradio library for Stable Diffusion.
4 |
5 | ## Install Lepton sdk
6 | ```shell
7 | pip install leptonai
8 | ```
9 |
10 | ## Launch Stable Diffusion web UI in the cloud
11 |
12 | Similar to other examples, you can run Stable Diffusion web UI on Lepton Cloud Platform easily, e.g.:
13 |
14 | ```shell
15 | lep photon create -n stable-diffusion-webui -m photon.py
16 | lep photon push -n stable-diffusion-webui
17 | lep photon run \
18 | -n stable-diffusion-webui \
19 | --resource-shape gpu.a10
20 | --public
21 | ```
22 |
23 | You can visit [dashboard.lepton.ai](https://dashboard.lepton.ai/) to use the web Dashboard, or use `lep` cli to manage the launched deployment:
24 | ```shell
25 | lep deployment status -n stable-diffusion-webui
26 | ```
27 |
28 | ## Client
29 |
30 | Once the Stable Diffusion web UI server is up, you can copy the deployment url shown on the Lepton Dashboard (or in the `lep` cli output)
31 |
32 |
33 |
34 | and visit it in the web browser
35 |
36 |
37 |
--------------------------------------------------------------------------------
/advanced/stable-diffusion-webui/assets/browser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/stable-diffusion-webui/assets/browser.png
--------------------------------------------------------------------------------
/advanced/stable-diffusion-webui/assets/deployment.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/stable-diffusion-webui/assets/deployment.png
--------------------------------------------------------------------------------
/advanced/stable-diffusion-webui/photon.py:
--------------------------------------------------------------------------------
1 | from leptonai.photon import Photon
2 |
3 |
4 | class WebUI(Photon):
5 | webui_version = "v1.6.0"
6 | cmd = [
7 | "bash",
8 | "-c",
9 | (
10 | "apt-get update && apt-get install -y wget libgoogle-perftools-dev && wget"
11 | f" -q https://raw.githubusercontent.com/AUTOMATIC1111/stable-diffusion-webui/{webui_version}/webui.sh"
12 | " && chmod +x ./webui.sh && ACCELERATE=True ./webui.sh -f --listen --port"
13 | " 8080"
14 | ),
15 | ]
16 | deployment_template = {
17 | "resource_shape": "gpu.a10",
18 | }
19 |
--------------------------------------------------------------------------------
/advanced/tabbyml/README.md:
--------------------------------------------------------------------------------
1 | # Tabby
2 |
3 | [Tabby](https://github.com/TabbyML/tabby) is an AI coding assistant, offering an open-source and on-premises alternative to GitHub Copilot.
4 |
5 | ## Install Lepton sdk
6 | ```shell
7 | pip install leptonai
8 | ```
9 |
10 | ## Launch Tabby in the cloud
11 |
12 | Similar to other examples, you can run Tabby on Lepton Cloud Platform easily, e.g.:
13 |
14 | ```shell
15 | lep photon create -n tabby -m photon.py
16 | lep photon push -n tabby
17 | lep photon run \
18 | -n tabby \
19 | --resource-shape gpu.a10
20 | --public
21 | ```
22 |
23 | You can visit [dashboard.lepton.ai](https://dashboard.lepton.ai/) to use the web Dashboard, or use `lep` cli to manage the launched deployment:
24 | ```shell
25 | lep deployment status -n tabby
26 | ```
27 |
28 | You can configure the underlying model (default `TabbyML/StarCoder-1B`) with the `MODEL` environment variable, e.g. to switch to use `TabbyML/DeepseekCoder-1.3B`:
29 |
30 | ```shell
31 | lep photon run \
32 | -n tabby- \
33 | -e TabbyML/DeepseekCoder-1.3B
34 | --resource-shape gpu.a10
35 | --public
36 | ```
37 |
38 | ## Client
39 |
40 | Once the Tabby server is up, you can use the deployment url shown on the Lepton Dashboard (or in the `lep` cli output)
41 |
42 |
43 |
44 | as API Endpoint of Tabby, to configure the supported [IDE extensions](https://tabby.tabbyml.com/docs/extensions). e.g. In VSCode:
45 |
46 |
47 |
48 | and start coding with the power of AI!
49 |
50 |
51 |
--------------------------------------------------------------------------------
/advanced/tabbyml/assets/coding.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/tabbyml/assets/coding.png
--------------------------------------------------------------------------------
/advanced/tabbyml/assets/deployment.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/tabbyml/assets/deployment.png
--------------------------------------------------------------------------------
/advanced/tabbyml/assets/vscode.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/tabbyml/assets/vscode.png
--------------------------------------------------------------------------------
/advanced/tabbyml/photon.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | from leptonai.photon import Photon
4 |
5 |
6 | class TabbyML(Photon):
7 | image: str = "tabbyml/tabby"
8 | cmd = [
9 | "/opt/tabby/bin/tabby",
10 | "serve",
11 | "--model",
12 | os.environ.get("MODEL", "TabbyML/StarCoder-1B"),
13 | "--port",
14 | "8080",
15 | "--device",
16 | os.environ.get("DEVICE", "cuda"),
17 | ]
18 |
19 | deployment_template = {
20 | "resource_shape": "gpu.a10",
21 | "env": {
22 | "MODEL": "TabbyML/StarCoder-1B",
23 | },
24 | "secret": [
25 | "HUGGING_FACE_HUB_TOKEN",
26 | ],
27 | }
28 |
--------------------------------------------------------------------------------
/advanced/tts/README.md:
--------------------------------------------------------------------------------
1 | # TTS
2 |
3 | This folder shows an end-to-end AI example, with the [Coqui AI TTS](https://github.com/coqui-ai/TTS/) text-to-speech library. The demo also shows how to run a photon with multimedia outputs (in this case a WAV response.)
4 |
5 | With this demo, you will be able to run deepfloyd and get results like follows:
6 |
7 |
8 |
9 | and you can check out more details in the `tts.ipynb` notebook.
10 |
11 | ## Run tts locally
12 |
13 | Ensure that you have installed the required dependencies via `pip install -r requirements.txt`. Then, run:
14 | ```shell
15 | python tts_main.py
16 | ```
17 | Note that if you have a GPU, things will run much faster. When the program runs, visit `http://0.0.0.0:8080/doc/` for the openapi doc, or use the client to access it in a programmatical way.
18 |
19 | ## Run tts in the cloud
20 |
21 | Similar to other examples, you can run tts with the following command:
22 |
23 | ```shell
24 | lep photon run -n tts -m tts_main.py --resource-shape gpu.a10
25 | ```
26 |
27 | And visit [dashboard.lepton.ai](https://dashboard.lepton.ai/) to try out the model.
28 |
29 | Note: in default, the server is protected via a token, so you won't be able to access the gradio UI. This is by design to provide adequate security. If you want to make the UI public, you can either add the `--public` argument to `lep photon run`, or update the deployment with:
30 |
31 | ```shell
32 | lep deployment update -n tts --public
33 | ```
34 |
35 | You can then use tts either via the UI or via the client. See the notebook example for more details.
36 |
37 | ## XTTS
38 |
39 | We also include an XTTS example that can be used to do voice cloning. More details to be written.
--------------------------------------------------------------------------------
/advanced/tts/assets/thequickbrownfox.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/tts/assets/thequickbrownfox.mp3
--------------------------------------------------------------------------------
/advanced/tts/requirements.txt:
--------------------------------------------------------------------------------
1 | leptonai
2 | TTS
3 | deepspeed
--------------------------------------------------------------------------------
/advanced/tts/tts_main.py:
--------------------------------------------------------------------------------
1 | from io import BytesIO
2 | import os
3 | from threading import Lock
4 | from typing import List, Optional, Union, Dict
5 |
6 | from loguru import logger
7 | import torch
8 |
9 | from leptonai.photon import (
10 | Photon,
11 | WAVResponse,
12 | HTTPException,
13 | FileParam,
14 | get_file_content,
15 | )
16 |
17 |
18 | class Speaker(Photon):
19 | """
20 | A TTS service that supports multiple models provided by coqui and others.
21 |
22 | To launch this photon and specify the model to use, you can pass in env
23 | variables during photon launch:
24 | --env MODEL_NAME=tts_models/en/vctk/vits
25 | And if you want to preload multiple models, you can pass in a comma-separated
26 | list of models:
27 | --env PRELOAD_MODELS=tts_models/en/vctk/vits,tts_models/multilingual/multi-dataset/xtts_v1
28 | """
29 |
30 | requirement_dependency = ["TTS"]
31 |
32 | system_dependency = ["espeak-ng", "libsndfile1-dev"]
33 |
34 | handler_max_concurrency = 4
35 |
36 | MODEL_NAME = "tts_models/en/vctk/vits"
37 | # Or, you can choose some other models
38 | # MODEL_NAME = "tts_models/multilingual/multi-dataset/xtts_v1"
39 |
40 | # If you want to load multiple models at the same time, you can put it here
41 | # as a comma-separated string. For example:
42 | # PRELAOD_MODELS = "tts_models/en/vctk/vits,tts_models/multilingual/multi-dataset/xtts_v1"
43 | # Note that the default model will always be loaded.
44 | # Note that this might involve some extra memory - use at your own risk.
45 | PRELOAD_MODELS = ""
46 |
47 | def init(self):
48 | """
49 | Initialize a default model.
50 | """
51 |
52 | # By using XTTS you agree to CPML license https://coqui.ai/cpml
53 | os.environ["COQUI_TOS_AGREED"] = "1"
54 |
55 | from TTS.api import TTS
56 |
57 | self._models: Dict[Union[str, None], TTS] = {}
58 | self._model_lock: Dict[Union[str, None], Lock] = {}
59 |
60 | self.MODEL_NAME = os.environ.get("MODEL_NAME", self.MODEL_NAME).strip()
61 |
62 | self.PRELOAD_MODELS = [
63 | m
64 | for m in os.environ.get("PRELOAD_MODELS", self.PRELOAD_MODELS).split(",")
65 | if m
66 | ]
67 | if self.MODEL_NAME not in self.PRELOAD_MODELS:
68 | self.PRELOAD_MODELS.append(self.MODEL_NAME)
69 |
70 | logger.info("Loading the model...")
71 | for model_name in self.PRELOAD_MODELS:
72 | self._models[model_name] = self._load_model(model_name)
73 | self._model_lock[model_name] = Lock()
74 | self._models[None] = self._models[self.MODEL_NAME]
75 | self._model_lock[None] = self._model_lock[self.MODEL_NAME]
76 | logger.debug("Model loaded.")
77 |
78 | def _load_model(self, model_name: str):
79 | """
80 | Internal function to load a model. We will assume that the model name
81 | is already sanity checked.
82 | """
83 | from TTS.api import TTS
84 |
85 | use_gpu = torch.cuda.is_available()
86 | logger.debug(f"Loading model {model_name}... use_gpu: {use_gpu} ")
87 | try:
88 | model = TTS(model_name, progress_bar=False, gpu=use_gpu)
89 | except Exception as e:
90 | raise RuntimeError(f"Failed to load model {model_name}.") from e
91 | logger.debug(f"Loaded model {model_name}")
92 | logger.debug(f"Model {model_name} is_multilingual: {model.is_multi_lingual}")
93 | logger.debug(f"Model {model_name} is_multi_speaker: {model.is_multi_speaker}")
94 | try:
95 | # The below one seems to not always work with xtts models.
96 | if model.is_multi_lingual:
97 | logger.debug(f"Model {model_name} languages: {model.languages}")
98 | except AttributeError:
99 | try:
100 | # xtts models have a different way of accessing languages.
101 | logger.debug(
102 | f"Model {model_name} languages:"
103 | f" {model.synthesizer.tts_model.config.languages}"
104 | )
105 | except Exception:
106 | # If neither of above works, we will just ignore it and not print
107 | # anything.
108 | pass
109 | if model.is_multi_speaker:
110 | logger.debug(f"Model {model_name} speakers: {model.speakers}")
111 |
112 | return model
113 |
114 | def _tts(
115 | self,
116 | text: str,
117 | model: Optional[str] = None,
118 | language: Optional[str] = None,
119 | speaker: Optional[str] = None,
120 | speaker_wav: Optional[str] = None,
121 | ) -> BytesIO:
122 | if model not in self._models:
123 | raise HTTPException(
124 | status_code=404,
125 | detail=f"Model {model} not loaded.",
126 | )
127 | logger.info(
128 | f"Synthesizing '{text}' with language '{language}' and speaker '{speaker}'"
129 | )
130 | # Many of the models might not be python thread safe, so we lock it.
131 | with self._model_lock[model]:
132 | wav = self._models[model].tts(
133 | text=text,
134 | language=language, # type: ignore
135 | speaker=speaker, # type: ignore
136 | speaker_wav=speaker_wav,
137 | )
138 | return wav
139 |
140 | ##########################################################################
141 | # Photon handlers that are exposed to the external clients.
142 | ##########################################################################
143 |
144 | @Photon.handler(method="GET")
145 | def languages(self, model: Optional[str] = None) -> List[str]:
146 | """
147 | Returns a list of languages supported by the current model. Empty list
148 | if no model is loaded, or the model does not support multiple languages.
149 | """
150 | if model not in self._models:
151 | raise HTTPException(
152 | status_code=404,
153 | detail=f"Model {model} not loaded.",
154 | )
155 | if not self._models[model].is_multi_lingual:
156 | return []
157 | try:
158 | return self._models[model].languages
159 | except AttributeError:
160 | # xtts models have a different way of accessing languages.
161 | # if there are further errors, we don't handle them.
162 | return self._models[model].synthesizer.tts_model.config.languages
163 |
164 | @Photon.handler(method="GET")
165 | def speakers(self, model: Optional[str] = None) -> List[str]:
166 | """
167 | Returns a list of speakers supported by the model. If the model is an
168 | XTTS model, this will return empty as you will need to use speaker_wav
169 | to synthesize speech.
170 | """
171 | if model not in self._models:
172 | raise HTTPException(
173 | status_code=404,
174 | detail=f"Model {model} not loaded.",
175 | )
176 | elif not self._models[model].is_multi_speaker:
177 | return []
178 | else:
179 | return self._models[model].speakers
180 |
181 | @Photon.handler(method="GET")
182 | def models(self) -> List[str]:
183 | """
184 | Returns a list of available models.
185 | """
186 | return [k for k in self._models.keys() if k]
187 |
188 | @Photon.handler(
189 | example={
190 | "text": "The quick brown fox jumps over the lazy dog.",
191 | }
192 | )
193 | def tts(
194 | self,
195 | text: str,
196 | model: Optional[str] = None,
197 | language: Optional[str] = None,
198 | speaker: Optional[str] = None,
199 | speaker_wav: Union[None, str, FileParam] = None,
200 | ) -> WAVResponse:
201 | """
202 | Synthesizes speech from text. Returns the synthesized speech as a WAV
203 | response.
204 |
205 | Pass in language if the model is multilingual. Pass in speaker if the model
206 | is multi-speaker. Pass in speaker_wav if the model is XTTS. The endpoint
207 | tries its best to return the correct error message if the parameters are
208 | not correct, but it may not be perfect.
209 | """
210 | if model not in self._models:
211 | raise HTTPException(
212 | status_code=404,
213 | detail=f"Model {model} not loaded.",
214 | )
215 | tts_model = self._models[model]
216 | if not tts_model.is_multi_lingual and language is not None:
217 | raise HTTPException(
218 | status_code=400,
219 | detail="Model is not multi-lingual, you should not pass in language.",
220 | )
221 | if not tts_model.is_multi_speaker and speaker is not None:
222 | raise HTTPException(
223 | status_code=400,
224 | detail="Model is not multi-speaker, you should not pass in speaker.",
225 | )
226 | if tts_model.is_multi_lingual and language is None:
227 | raise HTTPException(
228 | status_code=400,
229 | detail=(
230 | "Model is multi-lingual, you should pass in language. "
231 | " Use GET /languages to get available languages and pass in "
232 | " as optional parameters"
233 | ),
234 | )
235 | if tts_model.is_multi_speaker and speaker is None:
236 | raise HTTPException(
237 | status_code=400,
238 | detail=(
239 | "Model is multi-speaker, you should pass in speaker. "
240 | " Use GET /speakers to get available speakers and pass in as "
241 | " optional parameters"
242 | ),
243 | )
244 |
245 | try:
246 | if speaker_wav is not None:
247 | speaker_wav_file = get_file_content(
248 | speaker_wav, allow_local_file=False, return_file=True
249 | )
250 | speaker_wav_file_name = speaker_wav_file.name
251 | else:
252 | speaker_wav_file_name = None
253 | wav = self._tts(
254 | text=text,
255 | language=language,
256 | speaker=speaker,
257 | speaker_wav=speaker_wav_file_name,
258 | )
259 | wav_io = BytesIO()
260 | tts_model.synthesizer.save_wav(wav, wav_io) # type: ignore
261 | wav_io.seek(0)
262 | return WAVResponse(wav_io)
263 | except HTTPException:
264 | raise
265 | except TypeError as e:
266 | if "expected str, bytes or os.PathLike object, not NoneType" in str(e):
267 | raise HTTPException(
268 | status_code=400,
269 | detail=(
270 | "Speaker wav file is not provided. This is necessary when"
271 | " running an XTTS model to do voice cloning."
272 | ),
273 | ) from e
274 | except Exception as e:
275 | raise HTTPException(
276 | status_code=500,
277 | detail=f"Failed to synthesize speech. Details: {e}",
278 | ) from e
279 |
280 |
281 | if __name__ == "__main__":
282 | p = Speaker()
283 | p.launch()
284 |
--------------------------------------------------------------------------------
/advanced/tts/xtts_main.py:
--------------------------------------------------------------------------------
1 | from io import BytesIO
2 | import os
3 | import subprocess
4 | from tempfile import NamedTemporaryFile
5 | from threading import Lock
6 | import time
7 | from typing import Optional, Union
8 |
9 | from loguru import logger
10 |
11 | from leptonai.photon import (
12 | Photon,
13 | WAVResponse,
14 | HTTPException,
15 | FileParam,
16 | get_file_content,
17 | )
18 |
19 |
20 | class XTTSSpeaker(Photon):
21 | """
22 | A XTTS service that supports multiple models provided by coqui and others.
23 |
24 | To launch this photon and specify the model to use, you can pass in env
25 | variables during photon launch:
26 | --env MODEL_NAME=tts_models/multilingual/multi-dataset/xtts_v1.1
27 | """
28 |
29 | requirement_dependency = ["TTS", "deepspeed"]
30 |
31 | system_dependency = ["ffmpeg", "espeak-ng", "libsndfile1-dev"]
32 |
33 | handler_max_concurrency = 4
34 |
35 | MODEL_NAME = "tts_models/multilingual/multi-dataset/xtts_v1.1"
36 | DEFAULT_DECODER = "ne_hifigan"
37 |
38 | def init(self):
39 | """
40 | Initialize a default model.
41 | """
42 |
43 | # By using XTTS you agree to CPML license https://coqui.ai/cpml
44 | os.environ["COQUI_TOS_AGREED"] = "1"
45 |
46 | import torch
47 | from TTS.tts.configs.xtts_config import XttsConfig
48 | from TTS.tts.models.xtts import Xtts
49 | from TTS.utils.generic_utils import get_user_data_dir
50 | from TTS.utils.manage import ModelManager
51 |
52 | logger.info("Loading the xtts model...")
53 | try:
54 | self.MODEL_NAME = os.environ.get("MODEL_NAME", self.MODEL_NAME).strip()
55 | ModelManager().download_model(self.MODEL_NAME)
56 | model_path = os.path.join(
57 | get_user_data_dir("tts"), self.MODEL_NAME.replace("/", "--")
58 | )
59 | config = XttsConfig()
60 | config.load_json(os.path.join(model_path, "config.json"))
61 | self._model = Xtts.init_from_config(config)
62 | self._model.load_checkpoint(
63 | config,
64 | checkpoint_path=os.path.join(model_path, "model.pth"),
65 | vocab_path=os.path.join(model_path, "vocab.json"),
66 | eval=True,
67 | use_deepspeed=torch.cuda.is_available(),
68 | )
69 | # The xtts model's main chunk cannot be run in parallel, so we will need
70 | # to lock protect it.
71 | self._model_lock = Lock()
72 | self._supported_languages = self._model.config.languages
73 | if torch.cuda.is_available():
74 | self._model.cuda()
75 | self._languages = config.languages
76 | except Exception as e:
77 | raise RuntimeError(f"Cannot load XTTS model {self.MODEL_NAME}") from e
78 |
79 | logger.debug("Model loaded.")
80 |
81 | def _tts(
82 | self,
83 | text: str,
84 | language: str,
85 | speaker_wav: Optional[str] = None,
86 | voice_cleanup: Optional[bool] = False,
87 | ):
88 | import torch
89 |
90 | if voice_cleanup:
91 | with NamedTemporaryFile(suffix=".wav", delete=False) as filtered_file:
92 | lowpass_highpass = "lowpass=8000,highpass=75,"
93 | trim_silence = "areverse,silenceremove=start_periods=1:start_silence=0:start_threshold=0.02,areverse,silenceremove=start_periods=1:start_silence=0:start_threshold=0.02"
94 | shell_command = (
95 | f"ffmpeg -y -i {speaker_wav} -af"
96 | f" {lowpass_highpass}{trim_silence} {filtered_file.name}".split(" ")
97 | )
98 | logger.debug("Running ffmpeg command: " + " ".join(shell_command))
99 | try:
100 | subprocess.run(
101 | shell_command,
102 | capture_output=False,
103 | text=True,
104 | check=True,
105 | stdout=subprocess.DEVNULL,
106 | stderr=subprocess.DEVNULL,
107 | )
108 | except subprocess.CalledProcessError as e:
109 | logger.debug("Failed to run ffmpeg command: " + str(e))
110 | logger.debug("Use original file")
111 | else:
112 | # filter succeeded - use filtered file.
113 | speaker_wav = filtered_file.name
114 | # critical part: cannot run in parallel threads.
115 | with self._model_lock:
116 | # learn from speaker_wav
117 | start = time.time()
118 | logger.debug("Learning from speaker wav...")
119 | try:
120 | gpt_cond_latent, diffusion_conditioning, speaker_embedding = (
121 | self._model.get_conditioning_latents(audio_path=speaker_wav)
122 | )
123 | except Exception as e:
124 | raise HTTPException(
125 | status_code=400,
126 | detail="Failed to learn from speaker wav.",
127 | ) from e
128 | learned_time = time.time()
129 | logger.debug(f"Learned from speaker wav in {learned_time - start} seconds.")
130 | out = self._model.inference(
131 | text,
132 | language,
133 | gpt_cond_latent,
134 | speaker_embedding,
135 | diffusion_conditioning,
136 | decoder=self.DEFAULT_DECODER,
137 | )
138 | logger.debug(f"Synthesized speech in {time.time() - learned_time} seconds.")
139 | if voice_cleanup:
140 | os.remove(filtered_file.name) # type: ignore
141 | return torch.tensor(out["wav"]).unsqueeze(0)
142 |
143 | ##########################################################################
144 | # Photon handlers that are exposed to the external clients.
145 | ##########################################################################
146 | @Photon.handler(
147 | example={
148 | "text": "The quick brown fox jumps over the lazy dog.",
149 | }
150 | )
151 | def tts(
152 | self,
153 | text: str,
154 | language: str,
155 | speaker_wav: Union[str, FileParam],
156 | voice_cleanup: bool = False,
157 | ) -> WAVResponse:
158 | """
159 | Synthesizes speech from text. Returns the synthesized speech as a WAV
160 | response. The XTTS model is multi-lingual, so you need to specify the
161 | language - use language() to show a list of languages available. The
162 | model carries out voice transfer from the speaker wav file, so you need
163 | to specify the speaker wav file. The endpoint tries its best to return
164 | the correct error message if the parameters are not correct, but it may
165 | not be perfect.
166 | """
167 | import torchaudio
168 |
169 | if language not in self._supported_languages:
170 | raise HTTPException(
171 | status_code=400,
172 | detail=(
173 | f"Language {language} not supported. Supported languages are:"
174 | f" {self._supported_languages}"
175 | ),
176 | )
177 |
178 | try:
179 | speaker_wav_file = get_file_content(
180 | speaker_wav, allow_local_file=False, return_file=True
181 | )
182 | except Exception:
183 | raise HTTPException(
184 | status_code=400,
185 | detail=f"Failed to read speaker wav file {speaker_wav}.",
186 | )
187 |
188 | speaker_wav_file_name = speaker_wav_file.name
189 | wav = self._tts(
190 | text,
191 | language,
192 | speaker_wav=speaker_wav_file_name,
193 | voice_cleanup=voice_cleanup,
194 | )
195 | wav_io = BytesIO()
196 | torchaudio.save(wav_io, wav, 24000, format="wav")
197 | wav_io.seek(0)
198 | return WAVResponse(wav_io)
199 |
200 |
201 | if __name__ == "__main__":
202 | p = XTTSSpeaker()
203 | p.launch()
204 |
--------------------------------------------------------------------------------
/advanced/whisper-jax/README.md:
--------------------------------------------------------------------------------
1 | # Whisper
2 |
3 | This example demonstrates how to run optimized Whisper model on Lepton, and also how to integrate it with other application frameworks, in this case Slack.
4 |
5 | [whisper-jax](https://github.com/sanchit-gandhi/whisper-jax.git) is a JAX (optimized) port of the openai whisper model. It chunks audio data into segments and then performs batch inference to gain speedup.
6 |
7 |
8 | ## Note on custom environment
9 |
10 | Usually, when you run different AI models, they require specific dependencies that sometimes conflict with each other. This is particularly true in the whisper case - from `requirements.txt`, you may notice that there are quite a bit of specific version requirements.
11 |
12 | This is where having a separate service like Lepton becomes super useful: we can create a python environment (using e.g. conda or virtualenv), installed the required dependencies, run the photon as a web service, and then in the regular python environment, simply call the web service as if we were using a regular python function. Comparing to some apparent choices:
13 | - unlike a single python environment, we don't need to resolve version conflicts of different algorithms;
14 | - unlike packing everything in a separate opaque container image, we are much more lightweighted: only a python environment and dependencies are needed.
15 |
16 | Here we provide a combination of jax + jaxlib + cuda/cudnn pip versions that can work together inside Lepton's default image, so you can pull it up with ease.
17 |
18 | ## Running with a custom environment.
19 |
20 | We recommend you use conda or virtualenv to start a whisper-specific environment. For example, if you use conda, it's easy to do:
21 |
22 | ```shell
23 | # pick a python version of your favorite
24 | conda create -n whisper python=3.10
25 | conda activate whisper
26 | ```
27 |
28 | After that, install lepton [per the installation instruction](https://www.lepton.ai/docs/overview/quickstart#1-installation), and install the required dependencies of this demo via:
29 | ```shell
30 | pip install -r requirements.txt
31 | ```
32 |
33 | Note that `torch` sometimes has a dependency conflict with the current jax gpu libraries. If you installed torch after the above pip install and encountered errors, re-run the above installation instruction.
34 |
35 | After this, you can launch whisper like:
36 | ```shell
37 | # Set the model id, or omit it to use the default large-v2 model.
38 | # Note that the large-v2 model may need ~10GB GPU memory to run.
39 | export WHISPER_MODEL_ID="openai/whisper-medium"
40 | python whisper-jax.py
41 | ```
42 |
43 | It will download the paramaters and start the server. After that, use the regular python client to access the model:
44 | ```python
45 | from leptonai.client import Client, local
46 | c = Client(local())
47 | ```
48 |
49 | and invoke transcription or translation as follows:
50 | ```python
51 | >> c.run(inputs="assets/test_japanese.wav")
52 | '私たちはAIの株式会社であります'
53 | >> c.run(inputs="assets/test_japanese.wav", task="translate")
54 | 'We are an AI company.'
55 | ```
56 |
57 |
58 | WE DO NOTE that for the Whisper demo, the first call will be very slow. This is because jax needs to do a bit of compilation and initialization - after that, the subsequent calls will be much faster. You may find it surprising - but for many AI deployments, the first run is usually slower due to such initialization overheads. As a good practice, if your model has such overheads, you can always do a "warm-up" call before the actual inference traffic.
59 |
60 | ## Running a slack translation bot
61 |
62 | The whisper-jax example also demonstrates how to use Slack bot to trigger inference. To use this feature, you need to create a slack app, and set the following environment variables:
63 | - `SLACK_VERIFICATION_TOKEN`: The verification token of your Slack app
64 | - `SLACK_BOT_TOKEN`: The bot token of your Slack app
65 |
66 | Let's go through the process one by one.
67 |
68 | ### Creating a slack app
69 |
70 | First you will need to create a slack app. Go to [https://api.slack.com/apps](https://api.slack.com/apps), and click "Create an App". Choose "From scratch", and select the app name and workspace you want to add the app to, like:
71 |
72 |
73 |
74 | In "OAuth & Permisions", add the following permissions to the app:
75 | - app_methions:read
76 | - chat:write
77 | - files:read
78 |
79 | Which looks like the follows:
80 |
81 |
82 |
83 | And install it to your workspace. After that, you should be able to see the Bot User OAuth Token like the following:
84 |
85 |
86 |
87 | Locate the verification token in the app's "Basic Information"-"App Credentials" section. Then, launch the service as follows:
88 | ```shell
89 | export SLACK_BOT_TOKEN="xoxb-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
90 | export SLACK_VERIFICATION_TOKEN="xxxxxxxxxxxxxxxxxxxxxxxxxxx"
91 | python whisper-jax.py
92 | ```
93 |
94 | You will need a public address for slack to connect. For example, if the whisper-jax bot tells you `Uvicorn running on http://0.0.0.0:8080`, and your public IP is `1.2.3.4`, you can go to the "Event Subscriptions" page of the slack app, and enable events like the following (make sure that the request URL says "Verified"):
95 |
96 |
97 |
98 | You should also be able to see the verification request from the logging info like
99 | ```
100 | 2023-08-09 16:35:16,119 - INFO: 34.203.249.192:47706 - "POST /slack HTTP/1.1" 200 OK
101 | ```
102 |
103 | Now, in the "Event Subscriptions"-"Subscribe to bot events" page, add the bot user event "file_shared", and save. If everything goes well, you should be able to see the bot responding to voice inputs as follows:
104 |
105 |
106 |
107 | The whisper model is fairly versatile - in this case we are showing the medium sized model, and it is already able to recognize multiple languages with a fairly good accuracy.
108 |
109 | ## Running with Lepton
110 |
111 | The above example runs on the local machine. If your machine does not have a public facing IP, or more commonly, you want a stable server environment to host your model - then running on the Lepton cloud platform is the best option. To run it on Lepton, you can simply create a photon and push it to the cloud:
112 |
113 | ```shell
114 | lep login
115 | lep photon create -n whisper -m whisper-jax.py
116 | lep photon push -n whisper
117 | # An A10 machine is usually big enough to run the large-v2 model.
118 | lep photon run -n whisper
119 | --resource-shape gpu.a10 \
120 | --public \
121 | --env WHISPER_MODEL_ID="openai/whisper-large-v2" \
122 | --env SLACK_BOT_TOKEN="xoxb-xxxxxxxxxxxxxxxxxxxxxxxxxx" \
123 | --env SLACK_VERIFICATION_TOKEN="xxxxxxxxxxxxxxxxxxxxxxxxxxx"
124 | ```
125 |
126 | Note that because we want Slack to be able to call the API, we made it a public deployment. After that, you can use the `lep deployment status` to obtain the public address of the photon, and use the same slack app to connect to it:
127 | ```shell
128 | >> lep deployment status -n whisper
129 | Created at: 2023-08-09 20:24:48
130 | Photon ID: whisper-6t01ptsf
131 | State: Running
132 | Endpoint: https://latest-whisper.cloud.lepton.ai
133 | Is Public: Yes
134 | Replicas List:
135 | ┏━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━┓
136 | ┃ replica id ┃ status ┃ message ┃
137 | ┡━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━┩
138 | │ whisper-f9dbd6757-l8hms │ Ready │ (empty) │
139 | └─────────────────────────┴────────┴─────────┘
140 | ```
141 | When you obtain the endpoint URL above, simply change the event subscription URL to the new endpoint, such as in this case, `https://latest-whisper.cloud.lepton.ai/slack`, and you should be able to use the same slack app.
142 |
143 | Unlike local deployment, running on the Lepton cloud platform comes with a series of advantages, especially in the whisper case:
144 | - You do not need to worry about reproducible software environment. The photon is guaranteed to run on the same environment as you created it.
145 | - Scaling is easier - you can simply increase the number of replicas if you need more capacity.
146 | - Automatic fault tolerance - if the photon crashes, it will be automatically restarted.
147 |
148 | Happy building!
149 |
--------------------------------------------------------------------------------
/advanced/whisper-jax/assets/bot_token.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/whisper-jax/assets/bot_token.png
--------------------------------------------------------------------------------
/advanced/whisper-jax/assets/create_slack_app.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/whisper-jax/assets/create_slack_app.png
--------------------------------------------------------------------------------
/advanced/whisper-jax/assets/event_subscription.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/whisper-jax/assets/event_subscription.png
--------------------------------------------------------------------------------
/advanced/whisper-jax/assets/permissions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/whisper-jax/assets/permissions.png
--------------------------------------------------------------------------------
/advanced/whisper-jax/assets/test_japanese.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/whisper-jax/assets/test_japanese.wav
--------------------------------------------------------------------------------
/advanced/whisper-jax/assets/whisper_result.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/whisper-jax/assets/whisper_result.jpg
--------------------------------------------------------------------------------
/advanced/whisper-jax/requirements.txt:
--------------------------------------------------------------------------------
1 | git+https://github.com/sanchit-gandhi/whisper-jax.git@0d3bc54
2 | cached_property
3 | nvidia-cudnn-cu11==8.6.0.163
4 | -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
5 | jax==0.4.13
6 | jaxlib==0.4.13+cuda11.cudnn86
7 | slack_sdk
--------------------------------------------------------------------------------
/advanced/whisper-jax/whisper-jax.py:
--------------------------------------------------------------------------------
1 | """This example demonstrates how to run optimized Whisper model on
2 | Lepton.
3 |
4 | [whisper-jax](https://github.com/sanchit-gandhi/whisper-jax.git) is a
5 | JAX (optimized) port of the openai whisper model. It chunks audio data
6 | into segments and then performs batch inference to gain speedup.
7 |
8 | Installing JAX is a bit tricky, so here we provide a combination of
9 | jax + jaxlib + cuda/cudnn pip versions that can work together inside
10 | Lepton's default image.
11 |
12 | Whisper has a set of model ids that you can use. This is specified by an
13 | environment variable "WHISPER_MODEL_ID". By default, it uses "openai/whisper-large-v2".
14 | The list of available models are "openai/whisper-{size}" where size can be one of
15 | the following:
16 | tiny, base, small, medium, large, large-v2
17 | See https://github.com/sanchit-gandhi/whisper-jax for more details.
18 |
19 | Optionally, you can also set the environment variable "BATCH_SIZE" to
20 | change the batch size of the inference. By default, it is 4.
21 |
22 | In addition, this example also demonstrates how to use Slack bot to
23 | trigger inference. To use this feature, you need to set the following
24 | environment variables:
25 | - `SLACK_VERIFICATION_TOKEN`: The verification token of your Slack app
26 | - `SLACK_BOT_TOKEN`: The bot token of your Slack app
27 | """
28 |
29 | from datetime import datetime, timedelta
30 | import os
31 | import tempfile
32 | from typing import Optional, Dict, Any
33 |
34 | from loguru import logger
35 | import requests
36 |
37 | from leptonai.photon import Photon, HTTPException
38 |
39 |
40 | class Whisper(Photon):
41 | """
42 | A photon implementatio
43 | """
44 |
45 | # note:
46 | requirement_dependency = [
47 | "git+https://github.com/sanchit-gandhi/whisper-jax.git@0d3bc54",
48 | "cached_property",
49 | "nvidia-cudnn-cu11==8.6.0.163",
50 | "-f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html",
51 | "jax==0.4.13",
52 | "jaxlib==0.4.13+cuda11.cudnn86",
53 | "slack_sdk",
54 | ]
55 |
56 | # note: system_dependency specifies what should be installed via `apt install`
57 | system_dependency = [
58 | "ffmpeg",
59 | ]
60 |
61 | def init(self):
62 | # Implementation note: strictly speaking, this is not recommended by Python
63 | # as all imports should be places on the top of the file. However, this shows
64 | # a small trick when a local installation isn't really possible, such as
65 | # installing all the jax and cuda dependencies on a mac machine. We can defer
66 | # the import inside the actual Photon class.
67 | # Of course, this makes the debugging duty to the remote execution time, and
68 | # is going to be a bit harder. This is a conscious tradeoff between development
69 | # speed and debugging speed.
70 | logger.info("Initializing Whisper model. This might take a while...")
71 | from whisper_jax import FlaxWhisperPipline
72 | import jax.numpy as jnp
73 |
74 | model_id = os.environ.get("WHISPER_MODEL_ID", "openai/whisper-large-v2")
75 | batch_size = os.environ.get("BATCH_SIZE", 4)
76 | logger.info(f"Using model id: {model_id} and batch size: {batch_size}")
77 | self.pipeline = FlaxWhisperPipline(
78 | model_id, dtype=jnp.float16, batch_size=batch_size
79 | )
80 | logger.info("Initialized Whisper model.")
81 | logger.info("Initializing slack bot...")
82 | self._init_slack_bot()
83 |
84 | def _init_slack_bot(self):
85 | """
86 | Initializes the slack bot client.
87 | """
88 | from slack_sdk import WebClient as SlackClient
89 |
90 | self._verification_token = os.environ.get("SLACK_VERIFICATION_TOKEN", None)
91 | self._slack_bot_token = os.environ.get("SLACK_BOT_TOKEN", None)
92 | if self._slack_bot_token:
93 | self._slack_bot_client = SlackClient(token=self._slack_bot_token)
94 | else:
95 | logger.warning("Slack bot token not configured. Slack bot will not work.")
96 | self._processed_slack_tasks = {}
97 |
98 | @Photon.handler(
99 | "run",
100 | example={
101 | "inputs": (
102 | "https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/1.flac"
103 | )
104 | },
105 | )
106 | def run(self, inputs: str, task: Optional[str] = None) -> str:
107 | """
108 | Transcribe or translate an audio input file to a text transcription.
109 |
110 | Args:
111 | inputs: the filename or url of the audio file.
112 | task (optional): either `"transcribe"` or `"translate"`. Defaults to `"transcribe"`.
113 |
114 | Returns:
115 | text: the transcription of the audio file.
116 | """
117 | return self.pipeline(inputs, task=task)["text"]
118 |
119 | async def _slack_process_task(
120 | self, channel: str, thread_ts: Optional[str], url: str
121 | ):
122 | """
123 | Internal method to process a slack task. This is called by the `slack` handler.
124 | """
125 | last_processed_time = self._processed_slack_tasks.get((channel, url))
126 | if last_processed_time and datetime.now() - last_processed_time < timedelta(
127 | seconds=20
128 | ):
129 | logger.info(
130 | f"Skip processing slack task: ({channel}, {url}) since it was processed"
131 | f" recently: {last_processed_time}"
132 | )
133 | return
134 |
135 | logger.info(f"Processing audio file: {url}")
136 | with tempfile.NamedTemporaryFile("wb", suffix="." + url.split(".")[-1]) as f:
137 | logger.info(f"Start downloading audio file to: {f.name}")
138 | res = requests.get(
139 | url,
140 | allow_redirects=True,
141 | headers={"Authorization": f"Bearer {self._slack_bot_token}"},
142 | )
143 | res.raise_for_status()
144 | logger.info(f"Downloaded audio file (total bytes: {len(res.content)})")
145 | f.write(res.content)
146 | f.flush()
147 | f.seek(0)
148 | logger.info(f"Saved audio file to: {f.name}")
149 | logger.info(f"Running inference on audio file: {f.name}")
150 | try:
151 | text = self.run(f.name)
152 | except Exception:
153 | logger.error(f"Failed to run inference on audio file: {f.name}")
154 | return
155 | logger.info(f"Finished inference on audio file: {f.name}")
156 | self._slack_bot_client.chat_postMessage(
157 | channel=channel,
158 | thread_ts=thread_ts,
159 | text=text,
160 | )
161 | if len(self._processed_slack_tasks) > 100:
162 | # clean up task histories that are too old.
163 | self._processed_slack_tasks = {
164 | k: v
165 | for k, v in self._processed_slack_tasks.items()
166 | if datetime.now() - v < timedelta(seconds=20)
167 | }
168 | self._processed_slack_tasks[(channel, url)] = datetime.now()
169 |
170 | # This is a handler that receives slack events. It is triggered by the
171 | # slack server side - see the slack event api for details:
172 | # https://api.slack.com/apis/connections/events-api#receiving-events
173 | # and more specs here:
174 | # https://github.com/slackapi/slack-api-specs
175 | @Photon.handler
176 | def slack(
177 | self,
178 | token: str,
179 | type: str,
180 | event: Optional[Dict[str, Any]] = None,
181 | challenge: Optional[str] = None,
182 | **exttra,
183 | ) -> str:
184 | # Checks if the slack bot is configured. Note that we are still using the
185 | # now deprecated verification token, an supporting the new signing secrets
186 | # is a todo item.
187 | if not self._verification_token or not self._slack_bot_token:
188 | raise HTTPException(401, "Slack bot not configured.")
189 | # If token is not correct, we return 401.
190 | if token != self._verification_token:
191 | raise HTTPException(401, "Invalid token.")
192 | # We will respond to the challenge request if it is a url_verification event,
193 | # so that slack can verify our endpoint.
194 | if type == "url_verification":
195 | if challenge:
196 | return challenge
197 | else:
198 | raise HTTPException(400, "Missing challenge")
199 |
200 | # If event is not present, we will ignore it.
201 | if not event:
202 | raise HTTPException(400, "Missing event type")
203 |
204 | # Actually handle the slack event. We will only handle file_shared events.
205 | event_type = event["type"]
206 | logger.info(f"Received slack event: {event_type}")
207 | if event_type == "file_shared":
208 | channel = event["channel_id"]
209 | thread_ts = event.get("thread_ts")
210 | file_id = event["file_id"]
211 | file_info = self._slack_bot_client.files_info(file=file_id)
212 | if not file_info["ok"]:
213 | raise HTTPException(500, "Failed to get file info from slack")
214 | self.add_background_task(
215 | self._slack_process_task,
216 | channel,
217 | thread_ts,
218 | file_info["file"]["url_private"],
219 | )
220 | return "ok"
221 | else:
222 | logger.info(f"Ignored slack event type: {event_type}")
223 | return "ok"
224 |
225 |
226 | if __name__ == "__main__":
227 | w = Whisper()
228 | w.launch()
229 |
--------------------------------------------------------------------------------
/advanced/whisperx/README.md:
--------------------------------------------------------------------------------
1 | # WhisperX
2 |
3 | This example demonstrates how to run the WhisperX model on Lepton. [WhisperX](https://github.com/m-bain/whisperX) is similar to the whisper example, but does transcription, alignment, and diarization for the input.
4 |
5 | The following example shows the WhisperX demo that allows one to very quickly recognize short audios. For long audios like podcasts, instead of having a long-waiting api, it is easier to build a service that does the actual recognition as a background task - see the notebook [What does a great podcast sound like?](audio_analysis.ipynb) as an end to end example.
6 |
7 | ## Note on custom environment
8 |
9 | Similar to the Whisper JAX example, if you are running locally, we recommend you to use a custom environment like `conda` or `virtualenv`.
10 |
11 | Usually, when you run different AI models, they require specific dependencies that sometimes conflict with each other. This is particularly true in the whisper case - from `requirements.txt`, you may notice that there are quite a bit of specific version requirements.
12 |
13 | This is where having a separate service like Lepton becomes super useful: we can create a python environment (using e.g. conda or virtualenv), installed the required dependencies, run the photon as a web service, and then in the regular python environment, simply call the web service as if we were using a regular python function. Comparing to some apparent choices:
14 |
15 | - unlike a single python environment, we don't need to resolve version conflicts of different algorithms;
16 |
17 | ## Prerequisite
18 |
19 | Note that one of the dependency relies on 3 Hugging Face Hub models that would require you to sign some terms of usage beforehand. Otherwise it will throw error. Simply proceed to the website for [Segmentation](https://huggingface.co/pyannote/segmentation) , [Voice Activity Detection (VAD)](https://huggingface.co/pyannote/voice-activity-detection) , and [Speaker Diarization](https://huggingface.co/pyannote/speaker-diarization) and sign the terms.
20 |
21 | 
22 |
23 | You would also need a Hugging Face Access Token at hand. Simply following the steps in the [official guide](https://huggingface.co/docs/hub/security-tokens).
24 |
25 | ## Running with a custom environment
26 |
27 | We recommend you use conda or virtualenv to start a whisper-specific environment. For example, if you use conda, it's easy to do:
28 |
29 | ```shell
30 | # pick a python version of your favorite
31 | conda create -n whisperx python=3.10
32 | conda activate whisperx
33 | ```
34 |
35 | After that, install lepton [per the installation instruction](https://www.lepton.ai/docs/overview/quickstart#1-installation), and install the required dependencies of this demo via:
36 |
37 | ```shell
38 | pip install -r requirements.txt
39 | ```
40 |
41 | After this, you can launch whisperx like:
42 |
43 | ```shell
44 | # Set your huggingface token. This is required to obtain the respective models.
45 | export HUGGING_FACE_HUB_TOKEN="replace-with-your-own-token"
46 | python main.py
47 | ```
48 |
49 | It will download the paramaters and start the server. After that, use the regular python client to access the model:
50 |
51 | ```python
52 | from leptonai.client import Client, local
53 | c = Client(local())
54 | ```
55 |
56 | and invoke transcription or translation as follows:
57 |
58 | ```python
59 | >> c.run(input=FileParam(open("assets/thequickbrownfox.mp3", "rb")))
60 | [{'start': 0.028,
61 | 'end': 2.06,
62 | 'text': ' A quick brown fox jumps over the lazy dog.',
63 | 'words': [{'word': 'A', 'start': 0.028, 'end': 0.068, 'score': 0.5},
64 | {'word': 'quick', 'start': 0.109, 'end': 0.31, 'score': 0.995},
65 | {'word': 'brown',
66 | 'start': 0.35,
67 | 'end': 0.571,
68 | 'score': 0.849,
69 | 'speaker': 'SPEAKER_00'},
70 | {'word': 'fox',
71 | 'start': 0.612,
72 | 'end': 0.853,
73 | 'score': 0.897,
74 | 'speaker': 'SPEAKER_00'},
75 | {'word': 'jumps',
76 | 'start': 0.893,
77 | 'end': 1.175,
78 | 'score': 0.867,
79 | 'speaker': 'SPEAKER_00'},
80 | {'word': 'over',
81 | 'start': 1.255,
82 | 'end': 1.416,
83 | 'score': 0.648,
84 | 'speaker': 'SPEAKER_00'},
85 | {'word': 'the',
86 | 'start': 1.456,
87 | 'end': 1.517,
88 | 'score': 0.998,
89 | 'speaker': 'SPEAKER_00'},
90 | {'word': 'lazy',
91 | 'start': 1.557,
92 | 'end': 1.839,
93 | 'score': 0.922,
94 | 'speaker': 'SPEAKER_00'},
95 | {'word': 'dog.',
96 | 'start': 1.859,
97 | 'end': 2.06,
98 | 'score': 0.998,
99 | 'speaker': 'SPEAKER_00'}],
100 | 'speaker': 'SPEAKER_00'}]
101 | ```
102 |
103 | ## Running with Lepton
104 |
105 | The above example runs on the local machine. If your machine does not have a public facing IP, or more commonly, you want a stable server environment to host your model - then running on the Lepton cloud platform is the best option. To run it on Lepton, you can simply create a photon and push it to the cloud.
106 |
107 | To have HuggingFace Hub API access function properly, we would also need it set as an available environment variable in the cloud. To do so, simply run the following command to store it as a [secret](https://www.lepton.ai/docs/advanced/env_n_secrets):
108 |
109 | ```shell
110 | lep secret create -n HUGGING_FACE_HUB_TOKEN -v VALUE_OF_YOUR_TOKEN
111 | ```
112 |
113 | You can run the following command to confirm that the secret is stored properly:
114 |
115 | ```shell
116 | lep secret list
117 | ```
118 |
119 | which should return something like below
120 |
121 | ```txt
122 | Secrets
123 | ┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┓
124 | ┃ ID ┃ Value ┃
125 | ┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━┩
126 | │ HUGGING_FACE_HUB_TOKEN │ (hidden) │
127 | └────────────────────────┴──────────┘
128 | ```
129 |
130 | Now you can proceed to photo creation and deployment by running the following command:
131 |
132 | ```shell
133 | lep login
134 | lep photon create -n whisperx -m main.py
135 | lep photon push -n whisperx
136 | # An A10 machine is usually big enough to run the large-v2 model.
137 | # note you need to specify the secret that needs to be available in the run
138 | lep photon run -n whisperx --resource-shape gpu.a10 --secret HUGGING_FACE_ACCESS_TOKEN
139 | ```
140 |
141 | After that, you can use the `lep deployment status` to obtain the public address of the photon, and use the same slack app to connect to it:
142 |
143 | ```shell
144 | >> lep deployment status -n whisperx
145 | Created at: 2023-08-09 20:24:48
146 | Created at: 2023-08-16 11:08:56
147 | Photon ID: whisperx-bsip0d8q
148 | State: Running
149 | Endpoint: https://latest-whisperx.cloud.lepton.ai
150 | Is Public: No
151 | Replicas List:
152 | ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━┓
153 | ┃ replica id ┃ status ┃ message ┃
154 | ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━┩
155 | │ whisperx-5ddc79f555-l59cj │ Ready │ (empty) │
156 | └───────────────────────────┴────────┴─────────┘
157 | 1 out of 1 replicas ready.
158 | ```
159 |
160 | To access the model, we can create a client similar to the local case, simply replace `local()` with the workspace, deployment name, and token. Also, since we are running remote now, we will need to upload the audio files. This is done by calling the `run_updload` path:
161 |
162 | ```python
163 | >> from leptonai.client import Client
164 | >> from leptonai.photon import FileParam
165 | >> c = Client("YOUR_WORKSPACE_NAME", "whisperx", token="YOUR_TOKEN")
166 | >> c.run(input=FileParam(open("assets/thequickbrownfox.mp3", "rb")))
167 | [{'start': 0.028,
168 | 'end': 2.06,
169 | 'text': ' A quick brown fox jumps over the lazy dog.',
170 | 'words': [{'word': 'A', 'start': 0.028, 'end': 0.068, 'score': 0.5},
171 | {'word': 'quick', 'start': 0.109, 'end': 0.31, 'score': 0.995},
172 | {'word': 'brown',
173 | 'start': 0.35,
174 | 'end': 0.571,
175 | 'score': 0.849,
176 | 'speaker': 'SPEAKER_00'},
177 | {'word': 'fox',
178 | 'start': 0.612,
179 | 'end': 0.853,
180 | 'score': 0.897,
181 | 'speaker': 'SPEAKER_00'},
182 | {'word': 'jumps',
183 | 'start': 0.893,
184 | 'end': 1.175,
185 | 'score': 0.867,
186 | 'speaker': 'SPEAKER_00'},
187 | {'word': 'over',
188 | 'start': 1.255,
189 | 'end': 1.416,
190 | 'score': 0.648,
191 | 'speaker': 'SPEAKER_00'},
192 | {'word': 'the',
193 | 'start': 1.456,
194 | 'end': 1.517,
195 | 'score': 0.998,
196 | 'speaker': 'SPEAKER_00'},
197 | {'word': 'lazy',
198 | 'start': 1.557,
199 | 'end': 1.839,
200 | 'score': 0.922,
201 | 'speaker': 'SPEAKER_00'},
202 | {'word': 'dog.',
203 | 'start': 1.859,
204 | 'end': 2.06,
205 | 'score': 0.998,
206 | 'speaker': 'SPEAKER_00'}],
207 | 'speaker': 'SPEAKER_00'}]
208 | ```
209 |
210 | Unlike local deployment, running on the Lepton cloud platform comes with a series of advantages, especially in the whisperx case:
211 |
212 | - You do not need to worry about reproducible software environment. The photon is guaranteed to run on the same environment as you created it.
213 | - Scaling is easier - you can simply increase the number of replicas if you need more capacity.
214 | - Automatic fault tolerance - if the photon crashes, it will be automatically restarted.
215 |
216 | Happy building!
217 |
--------------------------------------------------------------------------------
/advanced/whisperx/assets/negative_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/whisperx/assets/negative_example.png
--------------------------------------------------------------------------------
/advanced/whisperx/assets/positive_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/whisperx/assets/positive_example.png
--------------------------------------------------------------------------------
/advanced/whisperx/assets/pyannote.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/whisperx/assets/pyannote.png
--------------------------------------------------------------------------------
/advanced/whisperx/assets/silent.m4a:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/whisperx/assets/silent.m4a
--------------------------------------------------------------------------------
/advanced/whisperx/assets/thequickbrownfox.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/whisperx/assets/thequickbrownfox.mp3
--------------------------------------------------------------------------------
/advanced/whisperx/requirements.txt:
--------------------------------------------------------------------------------
1 | torch
2 | torchaudio
3 | leptonai
4 | pyannote.audio
5 | git+https://github.com/m-bain/whisperx.git@e9c507ce5dea0f93318746411c03fed0926b70be
6 |
--------------------------------------------------------------------------------
/assets/logo.svg:
--------------------------------------------------------------------------------
1 |
2 |
45 |
--------------------------------------------------------------------------------
/getting-started/README.md:
--------------------------------------------------------------------------------
1 | # Getting Started
2 |
3 | This folder contains simple examples to show the concepts of Lepton photons. Please refer to the subdirectories for more detailed instruction for how to run them - the source code usually contains self-explanatory comments.
4 |
5 | ## Example list
6 | - counter: simple demonstrative photons to implement a counter, and a safe counter using file as state storage. More related reads: [Anatomy of a photon](https://www.lepton.ai/docs/walkthrough/anatomy_of_a_photon), [Storage](https://www.lepton.ai/docs/advanced/storage)
7 | - extra_files: an example to show how one can include additional files to the photon.
8 | - shell: a photon that runs a simple shell, and returns stdout and stderr.
9 | - custom-image: an example that uses custom Docker image in your Photon.
10 |
--------------------------------------------------------------------------------
/getting-started/counter/counter.py:
--------------------------------------------------------------------------------
1 | """
2 | A simple example to show a minimal example of a photon: a counter that keeps
3 | states in memory. Note that this is for illustrative purpose only - read the
4 | fine prints in the class comments.
5 |
6 | To launch a counter, run:
7 | lep photon create -n counter -m counter.py:Counter
8 | # run locally
9 | lep photon run -n counter
10 | # or if you want to run things remote, first push the photon
11 | lep photon push -n counter
12 | lep photon run -n counter -dn counter
13 |
14 | To test the photon, you can either use the API explorer in the UI, or use
15 | the photon client class in python, e.g.
16 | from leptonai.client import Client
17 | # If you are runnnig the photon remotely with workspace id "myworkspace"
18 | # and deployment name "counter"
19 | client = Client("myworkspace", "counter")
20 | # Or if you are running the photon locally at port 8080
21 | client = Client("http://localhost:8080")
22 | # Do NOT run the above two commands at the same time! Choose only one.
23 | print(client.add(x=3))
24 | print(client.sub(x=5))
25 | """
26 |
27 | from leptonai.photon import Photon
28 |
29 |
30 | class Counter(Photon):
31 | """
32 | A simple example showing a counter. The counter is initialized to 0 and
33 | can be incremented or decremented by calling the ``add`` or ``sub`` methods.
34 |
35 | Note that this is not a safe counter: when there are multiple replicas,
36 | every replica will have its own counter. Also, when the deployment restarts,
37 | the counter will be reset to 0. It is an example to show how not to assume
38 | that the deployments are automatically stateful. Remember, deployments are
39 | stateless by default unless you use a stateful storage like Lepton storage,
40 | or a database.
41 |
42 | An example to implement a minimal stateful counter is shown in the
43 | separate safe_counter example.
44 | """
45 |
46 | def init(self):
47 | self.counter = 0
48 |
49 | @Photon.handler("add")
50 | def add(self, x: int) -> int:
51 | self.counter += x
52 | return self.counter
53 |
54 | @Photon.handler("sub")
55 | def sub(self, x: int) -> int:
56 | return self.add(-x)
57 |
58 |
59 | if __name__ == "__main__":
60 | Counter().launch()
61 |
--------------------------------------------------------------------------------
/getting-started/counter/safe_counter.py:
--------------------------------------------------------------------------------
1 | """
2 | A simple example of a safe counter that utilizes Lepton storage to keep
3 | states persistent. For the details, please refer to the class comments
4 |
5 | To launch a safe counter, you need to have a Lepton storage attached. Run:
6 | lep photon create -n safe-counter -m safe_counter.py:SafeCounter
7 | # run locally to debug
8 | sudo mkdir /mnt/leptonstore
9 | sudo chown $USER /mnt/leptonstore
10 | lep photon run -n safe-counter --local
11 | # or if you want to run things remote, first push the photon
12 | lep photon push -n safe-counter
13 | lep photon run -n safe-counter -dn safe-counter --mount /:/mnt/leptonstore
14 |
15 | To test the photon, you can either use the API explorer in the UI, or use
16 | the photon client class in python, e.g.
17 | from leptonai.client import Client
18 | # If you are runnnig the photon remotely with workspace id "myworkspace"
19 | # and deployment name "safe-counter"
20 | client = Client("myworkspace", "safe-counter")
21 | # Or if you are running the photon locally at port 8080
22 | client = Client("http://localhost:8080")
23 | # Do NOT run the above three commands at the same time! Choose only one.
24 | print(client.add(x=3))
25 | print(client.sub(x=5))
26 | etc. You can try to stop and restart safe counter and see that the counter
27 | is persistent.
28 | """
29 |
30 | import errno
31 | import fcntl
32 | import os
33 | import time
34 |
35 | from fastapi import HTTPException
36 |
37 | from leptonai.photon import Photon
38 |
39 |
40 | class SafeCounter(Photon):
41 | """
42 | An example showing a safe counter using Lepton storage. Note that in actual
43 | production, you should probably use a locking mechanism better than files,
44 | such as a database.
45 |
46 | This deployment is stateful, and will be automatically recovered when the
47 | deployment restarts. It also keeps the counter consistent across replicas.
48 | It is not "perfectly safe" - if a replica dies before it can write to and
49 | close a file, an undefined latency may occur.
50 |
51 | To run this example, you need to have a Lepton storage attached to the
52 | deployment. You can do this by adding the following to the run command:
53 | --mount [storage path you want to use]:/mnt/leptonstore
54 | The simplest option for [storage path you want to use] is to use the root
55 | path of the storage, aka ``--mount /:/mnt/leptonstore``.
56 | """
57 |
58 | PATH = "/mnt/leptonstore/safe_counter.txt"
59 |
60 | def init(self):
61 | # checks if the folder containing the file exists
62 | if not os.path.exists(os.path.dirname(self.PATH)):
63 | raise RuntimeError(
64 | "SafeCounter requires a Lepton storage to be attached to the deployment"
65 | "at /mnt/leptonstore."
66 | )
67 | # checks if the file exists
68 | if not os.path.exists(self.PATH):
69 | # if not, create the file and write 0 to it. Strictly speaking, this
70 | # may have a race condition, but it is unlikely to happen in practice
71 | # and the worst that can happen is that the file is created twice,
72 | # unless a request comes in right in between two deployments creating
73 | # the file.
74 | with open(self.PATH, "w") as file:
75 | file.write("0")
76 |
77 | @Photon.handler("add")
78 | def add(self, x: int) -> int:
79 | # Open the file in read mode
80 | with open(self.PATH, "r+") as file:
81 | # Attempt to acquire a non-blocking exclusive lock on the file
82 | retry = 0
83 | while retry < 10:
84 | try:
85 | fcntl.flock(file, fcntl.LOCK_EX | fcntl.LOCK_NB)
86 | break
87 | except IOError as e:
88 | # If the lock cannot be acquired, sleep for a short interval
89 | # and try again
90 | if e.errno != errno.EAGAIN:
91 | raise HTTPException(
92 | status_code=500,
93 | detail=(
94 | "Internal server error: failed to acquire lock on file"
95 | " after repeated attempts."
96 | ),
97 | )
98 | retry += 1
99 | time.sleep(0.1)
100 |
101 | # Read the current value from the file
102 | current_value = int(file.read())
103 | # Increment the value
104 | new_value = current_value + x
105 | file.seek(0)
106 | file.write(str(new_value))
107 | file.truncate()
108 | fcntl.flock(file, fcntl.LOCK_UN)
109 | return new_value
110 |
111 | @Photon.handler("sub")
112 | def sub(self, x: int) -> int:
113 | return self.add(-x)
114 |
115 |
116 | if __name__ == "__main__":
117 | p = SafeCounter()
118 | if not os.path.exists(os.path.dirname(p.PATH)):
119 | raise RuntimeError(
120 | "SafeCounter requires a Lepton storage to be attached to the deploymentat"
121 | f" {os.path.dirname(p.PATH)}, or if you are running locally, create the"
122 | " folder first."
123 | )
124 | p.launch()
125 |
--------------------------------------------------------------------------------
/getting-started/custom-image/README.md:
--------------------------------------------------------------------------------
1 | # Custom Image
2 |
3 | Lepton supports the usage of custom images with your own software environment,
4 | given the following conditions:
5 | - the image is a relatively standard Linux image, and
6 | - it contains `python` (>3.7) and `pip`, and
7 | - optionally, to install system dependencies, it should support `apt`.
8 |
9 | Note: despite the fact that custom images are very flexible, you should use
10 | the default image if possible, and use `requirement_dependencies` and
11 | `system_dependencies` to install dependencies. This is because in the cloud
12 | environment, we do a lot to minimize the loading time of the default image,
13 | and a custom image may take much longer (at the scale of minutes) to load.
14 |
15 | To specify custom image is simple: in your Photon class, simply specify
16 | ```python
17 | class MyPhoton(Photon):
18 | image="your_custom_image_location"
19 | ```
20 |
21 | To build the example, simply do:
22 |
23 | lep photon create -n custom-image -m custom-image.py
24 |
25 | To run the photon, simply do
26 |
27 | lep photon push -n custom-image
28 | lep photon run -n custom-image [optional arguments]
29 |
30 |
--------------------------------------------------------------------------------
/getting-started/custom-image/custom-image.py:
--------------------------------------------------------------------------------
1 | import sys
2 | from leptonai.photon import Photon
3 |
4 |
5 | class Counter(Photon):
6 | """
7 | A counter photon that uses a custom Docker image.
8 | """
9 |
10 | # Note that, the image should be accessible publicly. It can be a URL, or
11 | # an image in docker hub that you can normally `docker pull` with.
12 | # In this case, we are using the python slim images as an example.
13 | image = f"python:{sys.version_info.major}.{sys.version_info.minor}-slim"
14 |
15 | def init(self):
16 | self.counter = 0
17 |
18 | @Photon.handler("add")
19 | def add(self, x: int) -> int:
20 | self.counter += x
21 | return self.counter
22 |
23 | @Photon.handler("sub")
24 | def sub(self, x: int) -> int:
25 | self.counter -= x
26 | return self.counter
27 |
--------------------------------------------------------------------------------
/getting-started/extra_files/README.md:
--------------------------------------------------------------------------------
1 | # Handling Extra Files
2 |
3 | If your photon / deployment requires a few extra files that are not part of the
4 | main python file, we provide a lightweighted way to add these files in your photon
5 | by specifying the `extra_files` field in the Photon.
6 |
7 | In this example, the main photon class is defined in `main.py`, and we want to include
8 | two files: a `content.txt` file that can be read by the photon, and a `dependency.py`
9 | file that we want to import as a submodule. The `extra_files` field is a list that
10 | specifies these two files.
11 |
12 | During deployment time, these files will be unarchived and then placed in the current
13 | working directory of the photon. You can use `os.getcwd()` to get the current working
14 | directory.
15 |
16 | To run the example, simply do:
17 |
18 | lep photon run -n extra_files_example -m main.py
19 |
20 | See the source files for more detailed explanation of the example.
21 |
--------------------------------------------------------------------------------
/getting-started/extra_files/content.txt:
--------------------------------------------------------------------------------
1 | Hello world from content.txt!
--------------------------------------------------------------------------------
/getting-started/extra_files/dependency.py:
--------------------------------------------------------------------------------
1 | """
2 | A simple module to demonstrate how to use a dependency in a photon.
3 | """
4 |
5 |
6 | def content() -> str:
7 | """
8 | A simple function to return a string for demo purpose.
9 | """
10 | return "Hello world from dependency.py!"
11 |
--------------------------------------------------------------------------------
/getting-started/extra_files/main.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 |
4 | from leptonai import Photon
5 |
6 |
7 | class Main(Photon):
8 | # extra_files defines files that will be included in the Photon package.
9 | extra_files = ["dependency.py", "content.txt"]
10 |
11 | def init(self):
12 | # If you want to use the extra_files field to store python files / modules that
13 | # you then import, you will need to add the current working directory to the
14 | # python path.
15 | #
16 | # Note that you should NOT use "__file__" to get the current working directory,
17 | # as the underlying cloudpickle class implicitly replaces the __file__ variable,
18 | # making local and remote environment inconsistent.
19 | sys.path.append(os.getcwd())
20 |
21 | @Photon.handler
22 | def get_content_txt(self) -> str:
23 | """
24 | A simple function to return the content of content.txt.
25 | """
26 | with open(os.path.join(os.getcwd(), "content.txt"), "r") as f:
27 | return f.read()
28 |
29 | @Photon.handler
30 | def get_dependency_content(self) -> str:
31 | """
32 | A simple function to return the content defined inside dependency.py.
33 | """
34 | # As long as you have added cwd in the system path, you can import it without
35 | # problem.
36 | import dependency
37 |
38 | return dependency.content()
39 |
40 | @Photon.handler
41 | def cwd(self) -> str:
42 | """
43 | A simple function to return the current working directory.
44 | """
45 | return os.getcwd()
46 |
--------------------------------------------------------------------------------
/getting-started/shell/README.md:
--------------------------------------------------------------------------------
1 | This is a simple class that uses the `/run` api to run a shell command on the
2 | local deployment. Note: since the deployments are considered stateless, any
3 | command you run that may have a non-ephemeral effect, such as creating a file
4 | or so, will not be persistent, unless it is written to a persistent storage
5 | such as the Lepton storage or a mounted S3.
6 |
7 | To build the photon, do:
8 |
9 | lep photon create -n shell -m shell.py:Shell
10 |
11 | To run the photon, simply do
12 |
13 | lep photon run -n shell [optional arguments]
14 |
15 |
--------------------------------------------------------------------------------
/getting-started/shell/shell.py:
--------------------------------------------------------------------------------
1 | import subprocess
2 | from typing import Tuple
3 |
4 | # This is what you should do to load the Photon class and write your code.
5 | from leptonai.photon import Photon, handler
6 |
7 |
8 | class Shell(Photon):
9 | def init(self):
10 | pass
11 |
12 | @handler("run", example={"query": "pwd"})
13 | def run(self, query: str) -> Tuple[str, str]:
14 | """Run the shell. Don't do rm -rf though."""
15 | output = subprocess.run(
16 | query, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
17 | )
18 | stdout_output = output.stdout.strip()
19 | stderr_output = output.stderr.strip()
20 |
21 | return stdout_output, stderr_output
22 |
23 |
24 | if __name__ == "__main__":
25 | shell = Shell()
26 | shell.launch()
27 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.black]
2 | preview = true
3 |
4 | [tool.ruff]
5 | # Enable pycodestyle (`E`) and Pyflakes (`F`) codes by default.
6 | select = ["E", "F"]
7 | ignore = ["E402", "E501"]
8 |
9 | # Allow autofix for all enabled rules (when `--fix`) is provided.
10 | fixable = ["A", "B", "C", "D", "E", "F", "G", "I", "N", "Q", "S", "T", "W", "ANN", "ARG", "BLE", "COM", "DJ", "DTZ", "EM", "ERA", "EXE", "FBT", "ICN", "INP", "ISC", "NPY", "PD", "PGH", "PIE", "PL", "PT", "PTH", "PYI", "RET", "RSE", "RUF", "SIM", "SLF", "TCH", "TID", "TRY", "UP", "YTT"]
11 | unfixable = []
12 |
13 | # Exclude a variety of commonly ignored directories.
14 | exclude = [
15 | ".bzr",
16 | ".direnv",
17 | ".eggs",
18 | ".git",
19 | ".git-rewrite",
20 | ".hg",
21 | ".mypy_cache",
22 | ".nox",
23 | ".pants.d",
24 | ".pytype",
25 | ".ruff_cache",
26 | ".svn",
27 | ".tox",
28 | ".venv",
29 | "__pypackages__",
30 | "_build",
31 | "buck-out",
32 | "build",
33 | "dist",
34 | "node_modules",
35 | "venv",
36 | ]
37 |
--------------------------------------------------------------------------------