├── .github └── workflows │ └── lint.yaml ├── .gitignore ├── LICENSE ├── README.md ├── advanced ├── README.md ├── clip-interrogator │ ├── README.md │ ├── assets │ │ └── two-cats.jpg │ ├── photon.py │ └── requirements.txt ├── codellama │ ├── README.md │ └── photon.py ├── comfyui │ ├── README.md │ ├── assets │ │ ├── browser.png │ │ └── deployment.png │ └── photon.py ├── deepfloyd-if │ ├── README.md │ ├── assets │ │ └── if_result.png │ ├── deepfloyd_if.py │ └── requirements.txt ├── earning-sage │ ├── AAPL-89728-report.tsv │ ├── README.md │ ├── earning-sage-data.json │ └── main.py ├── embedding │ └── baai_bge │ │ ├── example_usage.py │ │ └── main.py ├── flamingo │ ├── README.md │ └── photon.py ├── hf-stream-llm │ └── photon.py ├── idefics │ ├── README.md │ ├── photon.py │ └── requirements.txt ├── lavis │ ├── README.md │ ├── caption.py │ ├── extract-features.py │ ├── requirements.txt │ └── vqa.py ├── layout-parser │ └── main.py ├── llama2 │ ├── README.md │ └── llama2-api.ipynb ├── nougat │ ├── README.md │ ├── photon.py │ └── requirements.txt ├── open-clip │ ├── README.md │ ├── open-clip.py │ └── requirements.txt ├── pytorch-example │ ├── main.py │ └── requirements.txt ├── sdxl │ ├── README.md │ ├── assets │ │ ├── image.png │ │ ├── inpaint.png │ │ ├── mask.png │ │ └── txt2img.png │ ├── requirements.txt │ └── sdxl.py ├── segment-anything │ ├── README.md │ ├── assets │ │ ├── koala.jpeg │ │ └── koala_segmented.jpg │ ├── requirements.txt │ ├── sam.py │ └── segment-anything.ipynb ├── segment-something │ ├── README.md │ └── SegmentSomething-LanceDB.ipynb ├── stable-diffusion-webui │ ├── README.md │ ├── assets │ │ ├── browser.png │ │ └── deployment.png │ └── photon.py ├── tabbyml │ ├── README.md │ ├── assets │ │ ├── coding.png │ │ ├── deployment.png │ │ └── vscode.png │ └── photon.py ├── tts │ ├── README.md │ ├── assets │ │ └── thequickbrownfox.mp3 │ ├── requirements.txt │ ├── tts.ipynb │ ├── tts_main.py │ └── xtts_main.py ├── whisper-jax │ ├── README.md │ ├── assets │ │ ├── bot_token.png │ │ ├── create_slack_app.png │ │ ├── event_subscription.png │ │ ├── permissions.png │ │ ├── test_japanese.wav │ │ └── whisper_result.jpg │ ├── requirements.txt │ └── whisper-jax.py └── whisperx │ ├── README.md │ ├── assets │ ├── negative_example.png │ ├── positive_example.png │ ├── pyannote.png │ ├── silent.m4a │ └── thequickbrownfox.mp3 │ ├── audio_analysis.ipynb │ ├── main.py │ └── requirements.txt ├── assets └── logo.svg ├── getting-started ├── README.md ├── counter │ ├── counter.py │ └── safe_counter.py ├── custom-image │ ├── README.md │ └── custom-image.py ├── extra_files │ ├── README.md │ ├── content.txt │ ├── dependency.py │ └── main.py └── shell │ ├── README.md │ └── shell.py └── pyproject.toml /.github/workflows/lint.yaml: -------------------------------------------------------------------------------- 1 | name: lint 2 | 3 | on: 4 | push: 5 | branches: ["main"] 6 | pull_request: 7 | branches: ["**"] 8 | 9 | permissions: 10 | contents: read 11 | 12 | jobs: 13 | lint: 14 | runs-on: ubuntu-latest 15 | 16 | steps: 17 | - uses: actions/checkout@v3 18 | - name: Set up Python 19 | uses: actions/setup-python@v4 20 | with: 21 | python-version: "3.10" 22 | - name: Install tools 23 | run: | 24 | pip install -U pip setuptools 25 | pip install black ruff 26 | - name: Lint 27 | run: | 28 | ruff check . 29 | black --check . 30 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # Lepton Examples 4 | 5 | This repository contains a collection of example photons built with the [Lepton AI sdk](https://github.com/leptonai/leptonai-sdk/). 6 | 7 | Feel free to modify and use these examples as a starting point for your own applications. 8 | 9 | The example repo is organized into the following directories: 10 | - [getting-started](https://github.com/leptonai/examples/tree/main/getting-started): A collection of simple examples that demonstrate the basics of building and running simple photons, which are light-weight, single-file applications. 11 | - [advanced](https://github.com/leptonai/examples/tree/main/advanced): A collection of more complex examples that demonstrate how to build and run real-world applications, such as LLMs, image search, object segmentation, and more. 12 | 13 | For the full documentation, please visit [https://lepton.ai/docs](https://lepton.ai/docs). 14 | 15 | ## Prerequisite 16 | 17 | Note: we are currently in closed beta. All examples in this folder are runnable locally. To run on the Lepton cloud, join the waiting list at [https://lepton.ai/](https://lepton.ai/). 18 | 19 | Install the lepton sdk via (the `-U` option ensures the most recent version is installed): 20 | ```python 21 | pip install -U leptonai 22 | ``` 23 | 24 | For many examples in the [advanced](https://github.com/leptonai/examples/tree/main/advanced) folder, there are dependencies needed by the specific algorithms. It is recommended that you use virtual environments to not pollute your daily environment. For example, if you use conda, you can do: 25 | ```shell 26 | conda create -n myenvironment python=3.10 27 | conda activate myenvironment 28 | ``` 29 | 30 | ## Running examples 31 | 32 | To run the examples in the respective folders, there are usually three ways: 33 | - Directly invoking the python code to run things locally, for example: 34 | ```bash 35 | python getting-started/counter/counter.py 36 | # runs on local server at port 8080 if not occupied 37 | ``` 38 | - Create a photon and then run it locally with the `lep` CLI command, for example: 39 | ```bash 40 | lep photon create -n sam -m advanced/segment-anything/sam.py 41 | lep photon runlocal -n sam 42 | ``` 43 | - Create a photon like the one above, and run it on the cloud: 44 | ```bash 45 | lep login # logs into the lepton cloud 46 | lep photon push -n sam # pushes the photon to the cloud 47 | lep photon run -n sam --resource-shape gpu.a10 # run it 48 | ``` 49 | For individual examples, refer to their source files for self-explanatory comments. 50 | 51 | ## Using clients 52 | 53 | In all three cases, you can use the python client to access the deployment via: 54 | ```python 55 | from leptonai.client import Client, local 56 | c = Client(local(port=8080)) 57 | ``` 58 | or 59 | ```python 60 | from leptonai.client import Client 61 | c = Client("myworkspaceid", "sam", token="**mytoken**") 62 | ``` 63 | 64 | For example, for the `counter` example running locally, you can interact with the photon in python: 65 | ```python 66 | >> from leptonai.client import Client, local 67 | >> c = Client(local(port=8080)) 68 | >> print(c.add.__doc__) 69 | Add 70 | 71 | Automatically inferred parameters from openapi: 72 | 73 | Input Schema (*=required): 74 | x*: integer 75 | 76 | Output Schema: 77 | output: integer 78 | >> c.add(x=10) 79 | 10 80 | >> c.add(x=2) 81 | 12 82 | ``` 83 | 84 | For more details, check out the [Quickstart](https://www.lepton.ai/docs/overview/quickstart), [Walkthrough](https://www.lepton.ai/docs/walkthrough/anatomy_of_a_photon), and the [client documentation](https://www.lepton.ai/docs/walkthrough/clients). 85 | 86 | 87 | ## Notes on huggingface access 88 | 89 | Sometimes, you might encounter errors accessing huggingface models, such as the following message when accessing `llama2`: 90 | ```text 91 | Failed to create photon: 401 Client Error. (Request ID: Root=xxxxxxx) 92 | 93 | Cannot access gated repo for url https://huggingface.co/api/models/meta-llama/Llama-2-7b-hf. 94 | Repo model meta-llama/Llama-2-7b-hf is gated. You must be authenticated to access it. 95 | ``` 96 | This means that you did not have access to the repo, or you did not set up huggingface access tokens. We'll detail how to do so below. 97 | 98 | ### Get access to the huggingface repo. 99 | Go to the corresponding huggingface repo, and accept the terms and conditions of the corresponding repository. For example, for llama2, the corresponding link is [https://huggingface.co/meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf). 100 | 101 | ### Set up credentials to access huggingface 102 | Your credential to access huggingface can be found online at [https://huggingface.co/settings/tokens](https://huggingface.co/settings/tokens). 103 | 104 | If you are running photons locally, you can do one of the following: 105 | - set the token as an environmental variable, with `export HUGGING_FACE_HUB_TOKEN=your_token_here`. 106 | - or, in your python environment, run the following command and login. Huggingface will store the credential in the local cache, usually `.huggingface/token`, for repeated usage: 107 | ```python 108 | import huggingface_hub 109 | huggingface_hub.login() 110 | ``` 111 | 112 | If you are running on the Lepton cloud remotely, the easiest approach is to use the `secret` feature of Lepton. You can safely store the huggingface token as a secret via CLI: 113 | ```shell 114 | lep secret create -n HUGGING_FACE_HUB_TOKEN -v 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 115 | ``` 116 | (Don't worry, the above token is only an example and isn't active.) 117 | 118 | You can verify the secret exists with `lep secret list`: 119 | ```shell 120 | >> lep secret list 121 | Secrets 122 | ┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┓ 123 | ┃ ID ┃ Value ┃ 124 | ┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━┩ 125 | │ HUGGING_FACE_HUB_TOKEN │ (hidden) │ 126 | └────────────────────────┴──────────┘ 127 | ``` 128 | 129 | And when you launch a photon, add `--secret`: 130 | ```shell 131 | lep photon run -n myphoton --secret HUGGING_FACE_HUB_TOKEN 132 | ``` 133 | 134 | For more detailed information, check out the following resources: 135 | - [Huggingface's login reference](https://huggingface.co/docs/huggingface_hub/package_reference/login) 136 | - [Lepton documentation on secrets](https://www.lepton.ai/docs/advanced/env_n_secrets) 137 | - [An example showing huggingface access using the deepfloyd-if model](https://github.com/leptonai/examples/tree/main/advanced/deepfloyd-if) 138 | 139 | ## Contributing 140 | 141 | We love your feedback! If you would like to suggest example use cases, please [open an issue](https://github.com/leptonai/examples/issues/new). If you would like to contribute an example, kindly create a subfolder under `getting-started` or `advanced`, and submit a pull request. 142 | -------------------------------------------------------------------------------- /advanced/README.md: -------------------------------------------------------------------------------- 1 | # Advanced topics 2 | 3 | This folder contains typical AI applications using lepton. 4 | 5 | ## Example list 6 | - [deepfloyd-if](https://github.com/leptonai/examples/tree/main/advanced/deepfloyd-if): running the Deepfloyd IF model for AIGC. 7 | - [earning-sage](https://github.com/leptonai/examples/tree/main/advanced/earning-sage) : talking to a LLM behaves like a CFO. 8 | - [open-clip](https://github.com/leptonai/examples/tree/main/advanced/open-clip) : running the [Open-Clip](https://github.com/mlfoundations/open_clip) model. 9 | - [sdxl](https://github.com/leptonai/examples/tree/main/advanced/sdxl) : Stable Diffusion XL model 10 | - [segment-anything](https://github.com/leptonai/examples/tree/main/advanced/segment-anything): running Meta's [Segment Anything](https://github.com/facebookresearch/segment-anything) model. 11 | - [tts](https://github.com/leptonai/examples/tree/main/advanced/tts): running the [Coqui AI TTS](https://github.com/coqui-ai/TTS/) text-to-speech model. 12 | - [whisper-jax](https://github.com/leptonai/examples/tree/main/advanced/whisper-jax): running the Whisper ASR model, as well as a slack bot that automatically transcribes all slack channel voice messages. 13 | - [whisperx](https://github.com/leptonai/examples/tree/main/advanced/whisperx): similar to the whisper model, but does transcription, alignment, and diarization for the input. 14 | -------------------------------------------------------------------------------- /advanced/clip-interrogator/README.md: -------------------------------------------------------------------------------- 1 | # clip-interrogator 2 | 3 | [clip-interrogator](https://github.com/pharmapsychotic/clip-interrogator) is a prompt engineering tool that combines OpenAI's CLIP and Salesforce's BLIP to optimize text prompts to match a given image. User can use the resulting prompts with text-to-image models like Stable Diffusion on DreamStudio to create cool art. In this example we are going to demonstrate how to run clip-interrogator on Lepton 4 | 5 | ## Install Lepton sdk 6 | ```shell 7 | pip install leptonai 8 | ``` 9 | 10 | ## Launch inference service locally 11 | 12 | To run locally, first install dependencies: 13 | ```shell 14 | pip install -r requirements.txt 15 | ``` 16 | 17 | After installing dependencies, you can launch inference service locally by running: 18 | 19 | ```shell 20 | lep photon run -n clip-interrogator -m photon.py --local 21 | ``` 22 | 23 | ## Launch inference service in the cloud 24 | 25 | Similar to other examples, you can run services on Lepton Cloud Platform easily, e.g.: 26 | 27 | ```shell 28 | lep photon run -n clip-interrogator -m photon.py --resource-shape gpu.a10 29 | ``` 30 | 31 | You can visit [dashboard.lepton.ai](https://dashboard.lepton.ai/) to try out the model. 32 | 33 | Note: in default, the server is protected via a token, so you won't be able to access the gradio UI. This is by design to provide adequate security. If you want to make the UI public, you can either add the `--public` argument to `lep photon run`, or update the deployment with: 34 | 35 | ```shell 36 | lep deployment update -n clip-interrogator --public 37 | ``` 38 | 39 | ## Client 40 | 41 | Once the inference service is up (either locally or in the cloud), you can use the client to access it in a programmatical way: 42 | 43 | ```python 44 | from leptonai.client import Client, local, current 45 | 46 | # Use this if you are running locally 47 | client = Client(local()) 48 | # Or, if you are logged in to your workspace via `lep login` already 49 | # and have launched it: 50 | # client = Client(current(), "clip-interrogator", token=YOUR_WORKSPACE_TOKEN) 51 | ``` 52 | 53 | ```python 54 | image = "http://images.cocodataset.org/val2017/000000039769.jpg" 55 | prompt = client.run(image=image) 56 | 57 | print(prompt) 58 | ``` 59 | 60 | 61 | Image: 62 | 63 | ![two-cats](assets/two-cats.jpg) 64 | 65 | Prompt: 66 | 67 | ``` 68 | two cats laying on a couch with remote controls on the back, on flickr in 2007, ;open mouth, vhs artifacts, inspired by Frédéric Bazille, long - haired siberian cat, inflateble shapes, on a hot australian day, circa 2 0 0 8, at midday, size difference, aliasing visible 69 | ``` 70 | -------------------------------------------------------------------------------- /advanced/clip-interrogator/assets/two-cats.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/clip-interrogator/assets/two-cats.jpg -------------------------------------------------------------------------------- /advanced/clip-interrogator/photon.py: -------------------------------------------------------------------------------- 1 | from io import BytesIO 2 | import os 3 | from typing import Union 4 | 5 | from leptonai.photon import Photon, FileParam, get_file_content 6 | 7 | 8 | class ClipInterrogator(Photon): 9 | requirement_dependency = [ 10 | "clip-interrogator==0.6.0", 11 | "Pillow", 12 | "numpy<2" 13 | ] 14 | 15 | def init(self): 16 | from clip_interrogator import ( 17 | Config, 18 | Interrogator, 19 | list_caption_models, 20 | list_clip_models, 21 | ) 22 | 23 | caption_model_name = os.environ.get("CAPTION_MODEL_NAME", "blip-large") 24 | if caption_model_name not in list_caption_models(): 25 | raise ValueError( 26 | f"caption_model_name must be one of {list_caption_models()}" 27 | ) 28 | 29 | clip_model_name = os.environ.get("CLIP_MODEL_NAME", "ViT-L-14/openai") 30 | if clip_model_name not in list_clip_models(): 31 | raise ValueError(f"clip_model_name must be one of {list_clip_models()}") 32 | 33 | self.ci = Interrogator( 34 | Config( 35 | caption_model_name=caption_model_name, clip_model_name=clip_model_name 36 | ) 37 | ) 38 | 39 | @Photon.handler 40 | def run(self, image: Union[FileParam, str]) -> str: 41 | from PIL import Image 42 | 43 | content = get_file_content(image) 44 | image = Image.open(BytesIO(content)).convert("RGB") 45 | return self.ci.interrogate(image) 46 | -------------------------------------------------------------------------------- /advanced/clip-interrogator/requirements.txt: -------------------------------------------------------------------------------- 1 | clip-interrogator==0.6.0 2 | Pillow 3 | -------------------------------------------------------------------------------- /advanced/codellama/README.md: -------------------------------------------------------------------------------- 1 | # CodeLlama 2 | 3 | [CodeLlama](https://ai.meta.com/research/publications/code-llama-open-foundation-models-for-code/) released by Meta is a family of LLM based on Llama 2, providing code completion, infilling capabilities and zero-shot instruction following ability for programming tasks. In this example we are going to demonstrate how to run CodeLlama-7b model on Lepton. 4 | 5 | At the point of writing, running CodeLlama models relies on some relatively [new changes](https://github.com/huggingface/transformers/pull/25740) in HuggingFace Transformers that are not released yet, so please make sure to install transformers from source until the next version is released: 6 | 7 | `pip install git+https://github.com/huggingface/transformers.git@015f8e1 accelerate` 8 | 9 | ## Launch CodeLlama inference service locally 10 | 11 | Ensure that you have installed the required dependencies. Then, run: 12 | ```shell 13 | lep photon run -n codellama -m photon.py 14 | ``` 15 | Note that you will need to have a relatively large GPU (>=16GB memory). 16 | 17 | Use `MODEL` environment variable to switch to a different model in the CodeLlama family, e.g. 18 | 19 | ```shell 20 | MODEL=codellama/CodeLlama-7b-Instruct-hf lep photon run -n codellama -m photon.py 21 | ``` 22 | 23 | ## Launch CodeLlama inference service in the cloud 24 | 25 | Similar to other examples, you can run CodeLlama with the following command. Use a reasonably sized GPU like `gpu.a10` to ensure that things run. 26 | 27 | ```shell 28 | lep photon create -n codellama -m photon.py 29 | lep photon push -n codellama 30 | lep photon run \ 31 | -n codellama \ 32 | --resource-shape gpu.a10 33 | ``` 34 | 35 | Use `MODEL` environment variable to switch to a different model in the CodeLlama family, e.g. 36 | 37 | ```shell 38 | lep photon run \ 39 | -n codellama \ 40 | --env MODEL=codellama/CodeLlama-7b-Instruct-hf \ 41 | --resource-shape gpu.a10 42 | ``` 43 | 44 | And visit [dashboard.lepton.ai](https://dashboard.lepton.ai/) to try out the model. 45 | 46 | Note: in default, the server is protected via a token, so you won't be able to access the gradio UI. This is by design to provide adequate security. If you want to make the UI public, you can either add the `--public` argument to `lep photon run`, or update the deployment with: 47 | 48 | ```shell 49 | lep deployment update -n codellama --public 50 | ``` 51 | 52 | ### Client 53 | 54 | Once the inference service is up (either locally or in the cloud), you can use the client to access it in a programmatical way: 55 | 56 | Create client 57 | ```python 58 | >>> from leptonai.client import Client, local 59 | 60 | >>> client = Client(local(port=8080)) # If the inference service was launched in the cloud, change the parameters to create the client, see https://github.com/leptonai/examples#using-clients 61 | ``` 62 | 63 | Code completion: 64 | ```python 65 | 66 | >>> prompt = '''\ 67 | import socket 68 | 69 | def ping_exponential_backoff(host: str): 70 | ''' 71 | 72 | >>> print(client.run(inputs=prompt, max_new_tokens=256)) 73 | ''' 74 | import socket 75 | 76 | def ping_exponential_backoff(host: str): 77 | """Repeatedly try until ping succeeds""" 78 | for i in range(1,11): 79 | print('Ping attempt '+str(i)) 80 | ... 81 | ''' 82 | ``` 83 | 84 | If you have chosen to use the "Instruct" models (e.g. the "codellama/CodeLlama-7b-Instruct-hf" one mentioned above), you can instruct/chat with the model: 85 | 86 | Instructions/Chat: 87 | ````python 88 | >>> user = 'In Bash, how do I list all text files in the current directory (excluding subdirectories) that have been modified in the last month?' 89 | 90 | >>> prompt = f"[INST] {user.strip()} [/INST]" 91 | 92 | >>> print(client.run(inputs=prompt, max_new_tokens=256)[len(prompt):]) 93 | ''' 94 | You can use the `find` command in Bash to list all text files in the current directory that have been modified in the last month. Here's an example command: 95 | ``` 96 | find. -type f -name "*.txt" -mtime -30 97 | ``` 98 | Here's how the command works: 99 | 100 | * `.` is the current directory. 101 | * `-type f` specifies that we want to find files (not directories). 102 | * `-name "*.txt"` specifies that we want to find files with the `.txt` extension. 103 | * `-mtime -30` specifies that we want to find files that have been modified in the last 30 days. 104 | 105 | The `-mtime` option takes a number of days as its argument, and the `-30` argument means "modified in the last 30 days". 106 | ... 107 | ''' 108 | ```` 109 | -------------------------------------------------------------------------------- /advanced/codellama/photon.py: -------------------------------------------------------------------------------- 1 | import os 2 | from leptonai.photon import Photon 3 | from typing import List, Optional, Union 4 | 5 | import torch 6 | from transformers import pipeline 7 | 8 | 9 | class CodeLlama(Photon): 10 | requirement_dependency = [ 11 | "git+https://github.com/huggingface/transformers.git@015f8e1", 12 | "accelerate", 13 | ] 14 | 15 | def init(self): 16 | if torch.cuda.is_available(): 17 | device = 0 18 | else: 19 | device = -1 20 | 21 | self.pipeline = pipeline( 22 | "text-generation", 23 | model=os.environ.get("MODEL", "codellama/CodeLlama-7b-hf"), 24 | torch_dtype=torch.float16, 25 | device=device, 26 | ) 27 | 28 | def _get_generated_text(self, res): 29 | if isinstance(res, str): 30 | return res 31 | elif isinstance(res, dict): 32 | return res["generated_text"] 33 | elif isinstance(res, list): 34 | if len(res) == 1: 35 | return self._get_generated_text(res[0]) 36 | else: 37 | return [self._get_generated_text(r) for r in res] 38 | else: 39 | raise ValueError( 40 | f"Unsupported result type in _get_generated_text: {type(res)}" 41 | ) 42 | 43 | @Photon.handler( 44 | "run", 45 | example={ 46 | "inputs": "import socket\n\ndef ping_exponential_backoff(host: str):", 47 | "do_sample": True, 48 | "top_k": 10, 49 | "top_p": 0.95, 50 | "temperature": 0.1, 51 | "max_new_tokens": 256, 52 | }, 53 | ) 54 | def run_handler( 55 | self, 56 | inputs: Union[str, List[str]], 57 | do_sample: bool = True, 58 | top_k: int = 10, 59 | top_p: float = 0.95, 60 | temperature: Optional[float] = 0.1, 61 | max_new_tokens: int = 256, 62 | **kwargs, 63 | ) -> Union[str, List[str]]: 64 | res = self.pipeline( 65 | inputs, 66 | do_sample=do_sample, 67 | top_k=top_k, 68 | top_p=top_p, 69 | temperature=temperature, 70 | max_new_tokens=max_new_tokens, 71 | **kwargs, 72 | ) 73 | return self._get_generated_text(res) 74 | 75 | 76 | if __name__ == "__main__": 77 | p = CodeLlama() 78 | p.launch() 79 | -------------------------------------------------------------------------------- /advanced/comfyui/README.md: -------------------------------------------------------------------------------- 1 | # ComfyUI 2 | 3 | [ComfyUI](https://github.com/comfyanonymous/ComfyUI.git) is a powerful and modular stable diffusion GUI and backend that let you design and execute advanced stable diffusion pipelines using a graph/nodes/flowchart based interface. 4 | 5 | ## Install Lepton sdk 6 | ```shell 7 | pip install leptonai 8 | ``` 9 | 10 | ## Launch Stable Diffusion web UI in the cloud 11 | 12 | Similar to other examples, you can run ComfyUI on Lepton Cloud Platform easily, e.g.: 13 | 14 | ```shell 15 | lep photon create -n comfyui -m photon.py 16 | lep photon push -n comfyui 17 | lep photon run \ 18 | -n comfyui \ 19 | --resource-shape gpu.a10 20 | --public 21 | ``` 22 | 23 | You can visit [dashboard.lepton.ai](https://dashboard.lepton.ai/) to use the web Dashboard, or use `lep` cli to manage the launched deployment: 24 | ```shell 25 | lep deployment status -n comfyui 26 | ``` 27 | 28 | ## Client 29 | 30 | Once the ComfyUI server is up, you can copy the deployment url shown on the Lepton Dashboard (or in the `lep` cli output) 31 | 32 | 33 | 34 | and visit it in the web browser. e.g. running the sdxl + svd models to do text to video: 35 | 36 | 37 | -------------------------------------------------------------------------------- /advanced/comfyui/assets/browser.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/comfyui/assets/browser.png -------------------------------------------------------------------------------- /advanced/comfyui/assets/deployment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/comfyui/assets/deployment.png -------------------------------------------------------------------------------- /advanced/comfyui/photon.py: -------------------------------------------------------------------------------- 1 | from leptonai.photon import Photon 2 | 3 | 4 | class ComfyUI(Photon): 5 | comfyui_version = "329c571" 6 | cmd = [ 7 | "bash", 8 | "-c", 9 | ( 10 | "pip install aiohttp einops torchsde &&" 11 | "git clone --recursive https://github.com/comfyanonymous/ComfyUI.git && cd" 12 | f" ComfyUI && git checkout {comfyui_version} && python main.py --listen" 13 | " 0.0.0.0 --port 8080" 14 | ), 15 | ] 16 | deployment_template = { 17 | "resource_shape": "gpu.a10", 18 | } 19 | -------------------------------------------------------------------------------- /advanced/deepfloyd-if/README.md: -------------------------------------------------------------------------------- 1 | # Deepfloyd If 2 | 3 | This folder shows an end-to-end AI example, with the [Deepfloyd IF model](https://github.com/deep-floyd/IF). The demo also shows how to run a photon with environmental variables (or secrets) to pass in necessary credentials. 4 | 5 | With this demo, you will be able to run deepfloyd and get results like follows: 6 | 7 | 8 | 9 | ## Obtain access to the deepfloyd model. 10 | Deepfloyd hosts models on huggingface. You should obtain access as follows: 11 | - On the [model info page](https://huggingface.co/DeepFloyd/IF-I-XL-v1.0), sign in and agree with the agreement 12 | - Visit [tokens](https://huggingface.co/settings/tokens) page to generate the token. 13 | 14 | ## Use Lepton's secret management 15 | 16 | As you may use the token multiple times, we recommend storing it in Lepton's secret store. Simply do this and remember to replace the token with your own. 17 | ```shell 18 | lep secret create -n HUGGING_FACE_HUB_TOKEN -v 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 19 | ``` 20 | (Don't worry, the above token is only an example and isn't active.) 21 | 22 | You can verify the secret exists with `lep secret list`: 23 | ```shell 24 | >> lep secret list 25 | Secrets 26 | ┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┓ 27 | ┃ ID ┃ Value ┃ 28 | ┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━┩ 29 | │ HUGGING_FACE_HUB_TOKEN │ (hidden) │ 30 | └────────────────────────┴──────────┘ 31 | ``` 32 | 33 | ## Implementation note: mounting a gradio server 34 | 35 | In the deepfloyd example, we will not only expose a standard API, but also incorporate a UI implemented by gradio. This is done easily via the `mount` capability as follows: 36 | ```python 37 | @Photon.handler(mount=True) 38 | def ui(self) -> gr.Blocks: 39 | blocks = gr.Blocks() 40 | # Actual blocks creation code here - see deepfloyd_if.py for details. 41 | ... 42 | return blocks 43 | ``` 44 | The UI will then be available at the `/ui/` address. For example, if you are running locally, it would be something like `http://0.0.0.0:8080/ui/`. 45 | 46 | ## Run deepfloyd locally 47 | 48 | Ensure that you have installed the required dependencies. Then, run: 49 | ```shell 50 | python deepfloyd_if.py 51 | ``` 52 | Note that you will need to have a relatively large GPU (>20GB memory). When the program runs, visit `http://0.0.0.0:8080/ui/` for the web UI, or use the client to access it in a programmatical way. 53 | 54 | ## Run deepfloyd in the cloud 55 | 56 | Similar to other examples, you can run deepfloyd with the following command. Remember to pass in the huggingface access token, and also, use a reasonably sized GPU like `gpu.a10` to ensure that things run. 57 | 58 | ```shell 59 | lep photon create -n deepfloyd -m deepfloyd_if.py 60 | lep photon push -n deepfloyd 61 | lep photon run \ 62 | -n deepfloyd \ 63 | --secret HUGGING_FACE_HUB_TOKEN \ 64 | --resource-shape gpu.a10 65 | ``` 66 | 67 | And visit [dashboard.lepton.ai](https://dashboard.lepton.ai/) to try out the model. 68 | 69 | Note: in default, the server is protected via a token, so you won't be able to access the gradio UI. This is by design to provide adequate security. If you want to make the UI public, you can either add the `--public` argument to `lep photon run`, or update the deployment with: 70 | 71 | ```shell 72 | lep deployment update -n deepfloyd --public 73 | ``` 74 | 75 | You can now use deepfloyd either via the UI or via the client. Enjoy! -------------------------------------------------------------------------------- /advanced/deepfloyd-if/assets/if_result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/deepfloyd-if/assets/if_result.png -------------------------------------------------------------------------------- /advanced/deepfloyd-if/deepfloyd_if.py: -------------------------------------------------------------------------------- 1 | from io import BytesIO 2 | import os 3 | 4 | from diffusers import DiffusionPipeline 5 | from diffusers.pipelines.deepfloyd_if import fast27_timesteps, smart27_timesteps 6 | from diffusers.utils import pt_to_pil 7 | from loguru import logger 8 | import gradio 9 | import torch 10 | 11 | from leptonai.photon import Photon, PNGResponse 12 | 13 | 14 | class If(Photon): 15 | requirement_dependency = [ 16 | "diffusers==0.16.1", 17 | "torch==2.0.0", 18 | "torchvision==0.15.1", 19 | "gradio", 20 | ] 21 | 22 | def init(self): 23 | # Checks if the user has logged into huggingface, or have provided the huggingface token. 24 | logger.info("Checking huggingface credentials...") 25 | if not os.environ.get("HUGGING_FACE_HUB_TOKEN", None) and not os.path.exists( 26 | os.path.expanduser("~/.huggingface/token") 27 | ): 28 | logger.warning( 29 | "Environment variable HUGGING_FACE_HUB_TOKEN not set, and it seems that" 30 | " you have not logged into huggingface using its CLI. This may stop us" 31 | " from accessing some models. Refer to the deepfloyd page, e.g." 32 | " https://huggingface.co/DeepFloyd/IF-I-XL-v1.0, for information about" 33 | " access." 34 | ) 35 | logger.info("Loading models...") 36 | s1_model_size = os.environ.get("STAGE1_MODEL_SIZE", "M") 37 | if s1_model_size not in ["M", "L", "XL"]: 38 | raise ValueError( 39 | "STAGE1_MODEL_SIZE must be one of 'M', 'L', or 'XL', but got" 40 | f" {s1_model_size}" 41 | ) 42 | s1_model = f"DeepFloyd/IF-I-{s1_model_size}-v1.0" 43 | logger.info(f"Using stage 1 model: {s1_model}") 44 | s2_model_size = os.environ.get("STAGE2_MODEL_SIZE", "M") 45 | if s2_model_size not in ["M", "L"]: 46 | raise ValueError( 47 | f"STAGE2_MODEL_SIZE must be one of 'M' or 'L', but got {s2_model_size}" 48 | ) 49 | s2_model = f"DeepFloyd/IF-II-{s2_model_size}-v1.0" 50 | logger.info(f"Using stage 2 model: {s2_model}") 51 | 52 | enable_cpu_offload = os.environ.get("ENABLE_CPU_OFFLOAD", "1").lower() in [ 53 | "true", 54 | "1", 55 | ] 56 | logger.info(f"Enable CPU offload: {enable_cpu_offload}") 57 | 58 | # stage 1 59 | self.stage_1 = DiffusionPipeline.from_pretrained( 60 | s1_model, variant="fp16", torch_dtype=torch.float16 61 | ) 62 | if enable_cpu_offload: 63 | self.stage_1.enable_model_cpu_offload() 64 | else: 65 | self.stage_1.to("cuda") 66 | 67 | # stage 2 68 | self.stage_2 = DiffusionPipeline.from_pretrained( 69 | s2_model, 70 | text_encoder=None, 71 | variant="fp16", 72 | torch_dtype=torch.float16, 73 | ) 74 | if enable_cpu_offload: 75 | self.stage_2.enable_model_cpu_offload() 76 | else: 77 | self.stage_2.to("cuda") 78 | # stage 3 79 | safety_modules = { 80 | "feature_extractor": self.stage_1.feature_extractor, 81 | "safety_checker": self.stage_1.safety_checker, 82 | "watermarker": self.stage_1.watermarker, 83 | } 84 | self.stage_3 = DiffusionPipeline.from_pretrained( 85 | "stabilityai/stable-diffusion-x4-upscaler", 86 | **safety_modules, 87 | torch_dtype=torch.float16, 88 | ) 89 | if enable_cpu_offload: 90 | self.stage_3.enable_model_cpu_offload() 91 | else: 92 | self.stage_3.to("cuda") 93 | logger.info("Models loaded.") 94 | 95 | def _run(self, prompt: str, seed: int = 0): 96 | """ 97 | Runs the Deepfloyd IF model with the given prompt, and returns the resulting PIL images. 98 | """ 99 | res = [] 100 | generator = torch.manual_seed(seed) 101 | 102 | # text embeds 103 | prompt_embeds, negative_embeds = self.stage_1.encode_prompt(prompt) 104 | # stage 1 105 | images = self.stage_1( 106 | prompt_embeds=prompt_embeds, 107 | negative_prompt_embeds=negative_embeds, 108 | generator=generator, 109 | output_type="pt", 110 | timesteps=fast27_timesteps, 111 | ).images # type: ignore 112 | res.append(pt_to_pil(images)[0]) 113 | # stage 2 114 | images = self.stage_2( 115 | image=images, 116 | prompt_embeds=prompt_embeds, 117 | negative_prompt_embeds=negative_embeds, 118 | generator=generator, 119 | output_type="pt", 120 | timesteps=smart27_timesteps, 121 | ).images # type: ignore 122 | res.append(pt_to_pil(images)[0]) 123 | # stage 3 124 | images = self.stage_3( 125 | prompt=prompt, 126 | image=images, 127 | generator=generator, 128 | noise_level=100, 129 | num_inference_steps=30, 130 | ).images # type: ignore 131 | res.append(images[0]) 132 | 133 | return res 134 | 135 | # The main path for the Deepfloyd IF photon. 136 | @Photon.handler( 137 | example={ 138 | "prompt": ( 139 | "A photo of a kangaroo wearing an orange hoodie and blue sunglasses" 140 | ' standing in front of the eiffel tower holding a sign that says "very' 141 | ' deep learning"' 142 | ) 143 | }, 144 | ) 145 | def run(self, prompt: str) -> PNGResponse: 146 | """ 147 | Runs the Deepfloyd IF model with the given prompt, and returns the resulting PNG image. 148 | """ 149 | images = self._run(prompt=prompt) 150 | 151 | img_io = BytesIO() 152 | images[-1].save(img_io, format="PNG", quality="keep") 153 | img_io.seek(0) 154 | return PNGResponse(img_io) 155 | 156 | # This shows how to use gradio to create a UI, and use Photon handler's `mount` 157 | # feature to mount the UI to the Photon. 158 | # With this, you can host both a Photon API and a Gradio UI on the same server. 159 | # Of course, make sure that you do not have a conflicting name for the handler. 160 | @Photon.handler(mount=True) 161 | def ui(self) -> gradio.Blocks: 162 | blocks = gradio.Blocks() 163 | 164 | with blocks: 165 | with gradio.Group(): 166 | with gradio.Box(): 167 | with gradio.Row().style(mobile_collapse=False, equal_height=True): 168 | text = gradio.Textbox( 169 | label="Enter your prompt", 170 | show_label=False, 171 | max_lines=1, 172 | placeholder="Enter your prompt", 173 | ).style( 174 | border=(True, False, True, True), 175 | rounded=(True, False, False, True), 176 | container=False, 177 | ) 178 | btn = gradio.Button("Generate image").style( 179 | margin=False, 180 | rounded=(False, True, True, False), 181 | ) 182 | gallery = gradio.Gallery( 183 | label="Generated images", show_label=False, elem_id="gallery" 184 | ).style(grid=(3,), height="auto") 185 | 186 | with gradio.Row(elem_id="advanced-options"): 187 | seed = gradio.Slider( 188 | label="Seed", 189 | minimum=0, 190 | maximum=2147483647, 191 | step=1, 192 | randomize=True, 193 | ) 194 | btn.click(self._run, inputs=[text, seed], outputs=gallery) 195 | return blocks 196 | 197 | 198 | if __name__ == "__main__": 199 | p = If() 200 | p.launch() 201 | -------------------------------------------------------------------------------- /advanced/deepfloyd-if/requirements.txt: -------------------------------------------------------------------------------- 1 | diffusers==0.16.1 2 | gradio 3 | leptonai 4 | torch==2.0.0 5 | torchvision==0.15.1 6 | -------------------------------------------------------------------------------- /advanced/earning-sage/README.md: -------------------------------------------------------------------------------- 1 | # 🧙 Earning-Sage 2 | Have you ever thought about joining an earning call and asking questions to these CFOs? That used to be the privilege held by the investors from high-end investment banks such as JP Morgan, Goldman Sachs and Morgan Stanley. 3 | 4 | Yet with the capability of LLM and proper techniques around it, not anymore. And if you don’t feel like reading the whole post, feel free to try out a demo [here](https://earningsage.lepton.run/). This demo is created based on the Apple Q2 2023 earning call. 5 | 6 | The full documentation can be found [here](https://www.lepton.ai/docs/examples/earning_sage). 7 | 8 | ## Getting Started 9 | 10 | ### Step 1 : Setup env 11 | In `main.py` , change line 48 and 49 to the URL with corresponding URL and token. 12 | 13 | 14 | ### Step 2 : Create a photon 15 | ```shell 16 | lep photon create -n earning-sage -m py:main.py 17 | ``` 18 | 19 | ### Step 3 : Run the photon 20 | ```shell 21 | # Running locally 22 | lep photon run -n earning-sage --local 23 | # Running remotely, this requies login to lepton.ai 24 | lep photon push -n earning-sage 25 | lep photon run -n earning-sage 26 | ``` 27 | 28 | -------------------------------------------------------------------------------- /advanced/earning-sage/main.py: -------------------------------------------------------------------------------- 1 | from leptonai.photon import Photon 2 | 3 | from langchain.llms import OpenAI 4 | from langchain.document_loaders.csv_loader import CSVLoader 5 | from langchain.embeddings.openai import OpenAIEmbeddings 6 | from langchain.vectorstores.chroma import Chroma 7 | from langchain.text_splitter import RecursiveCharacterTextSplitter 8 | from langchain.chains import RetrievalQA 9 | 10 | import openai 11 | 12 | import os 13 | import gradio as gr 14 | 15 | 16 | def create_retriever(target_file): 17 | loader = CSVLoader(target_file, csv_args={"delimiter": "\t"}) 18 | text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder( 19 | chunk_size=256, chunk_overlap=0 20 | ) 21 | docs = loader.load_and_split(text_splitter=text_splitter) 22 | embeddings = OpenAIEmbeddings() 23 | db = Chroma.from_documents(docs, embeddings) 24 | return db.as_retriever() 25 | 26 | 27 | def create_qa_retrival_chain(target_file): 28 | foo_retriever = create_retriever(target_file) 29 | llm = OpenAI(temperature=0) 30 | qa = RetrievalQA.from_chain_type( 31 | llm=llm, chain_type="stuff", retriever=foo_retriever 32 | ) 33 | return qa 34 | 35 | 36 | class EarningSage_Retriver(Photon): 37 | extra_files = {"AAPL-89728-report.tsv": "AAPL-89728-report.tsv"} 38 | 39 | requirement_dependency = ["tiktoken", "openai", "langchain", "chromadb", "gradio"] 40 | 41 | def init(self): 42 | os.environ["OPENAI_API_BASE"] = "API_BASE_FROM_TUNA" 43 | os.environ["OPENAI_API_KEY"] = "LEPTONAI_API_KEY" 44 | 45 | openai.api_base = os.environ["OPENAI_API_BASE"] 46 | openai.api_key = os.environ["OPENAI_API_KEY"] 47 | 48 | target_file = "AAPL-89728-report.tsv" 49 | 50 | print("Loading LLM from", openai.api_base) 51 | self.retrival_chain = create_qa_retrival_chain(target_file) 52 | print("Ready to serve!") 53 | 54 | @Photon.handler("chat") 55 | def chat(self, message): 56 | return self.retrival_chain.run(message) 57 | 58 | @Photon.handler(mount=True) 59 | def ui(self): 60 | blocks = gr.Blocks(title="🧙🏼 Earning Report Assistant") 61 | 62 | with blocks: 63 | gr.Markdown("# 🧙🏼 Earning Report Assistant") 64 | gr.Markdown(""" 65 | This is an earning report assistant built for investors can't make the earning call on time. This sample is using Apple 2023 Q2 report. Feel free to reach out to uz@lepton.ai for more advanced features. 66 | """) 67 | with gr.Row(): 68 | chatbot = gr.Chatbot(label="Model") 69 | with gr.Row(): 70 | msg = gr.Textbox( 71 | value=( 72 | "What do you think of the relationship between Apple and it's" 73 | " customers?" 74 | ), 75 | label="Questions you would like to ask", 76 | ) 77 | 78 | with gr.Row(): 79 | send = gr.Button("Send") 80 | clear = gr.Button("Clear") 81 | 82 | def respond_message(message, chat_history): 83 | bot_message = self.retrival_chain.run(message) 84 | chat_history.append((message, bot_message)) 85 | return "", chat_history 86 | 87 | msg.submit(respond_message, [msg, chatbot], [msg, chatbot]) 88 | send.click(respond_message, [msg, chatbot], [msg, chatbot]) 89 | 90 | button1 = gr.Button( 91 | "Can you discuss the potential for further growth in the number of" 92 | " Apple devices per iPhone user?" 93 | ) 94 | button2 = gr.Button("How is Apple ecosystem helping driving the revenue?") 95 | button3 = gr.Button("How is the feedback on Apple Pay Later?") 96 | 97 | def send_button_clicked(x): 98 | return gr.update( 99 | value="""Can you discuss the potential for further growth in the number of Apple devices per iPhone user? Additionally, could you elaborate on how the monetization per user might vary between highly engaged "super users" and those who are not as deeply integrated into the Apple ecosystem?""" 100 | ) 101 | 102 | def ask_ai_strategy(x): 103 | question = """What do you think of the relationship between Apple and it's customers? Could you give few examples on Apple trying to improve the customer relationship?""" 104 | return gr.update(value=question) 105 | 106 | def ask_pay_later(x): 107 | question = """Maybe as a quick follow-up, you talked about Apple Pay Later, how has the feedback been so far and how do you expect the adoption of our debt service over the next few quarters? Thank you.""" 108 | return gr.update(value=question) 109 | 110 | button1.click(send_button_clicked, msg, msg) 111 | button2.click(ask_ai_strategy, msg, msg) 112 | button3.click(ask_pay_later, msg, msg) 113 | 114 | clear.click(lambda: None, None, chatbot, queue=False) 115 | 116 | return blocks 117 | -------------------------------------------------------------------------------- /advanced/embedding/baai_bge/example_usage.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import time 3 | import socket 4 | 5 | from leptonai.client import Client, local, current # noqa: F401 6 | 7 | 8 | def is_port_open(host, port): 9 | """Check if a port is open on a given host.""" 10 | with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: 11 | s.settimeout(1) 12 | try: 13 | s.connect((host, port)) 14 | return True 15 | except socket.error: 16 | return False 17 | 18 | 19 | def wait_for_port(host, port, interval=5): 20 | """Wait for a port to be connectable.""" 21 | while True: 22 | if is_port_open(host, port): 23 | print(f"Port {port} on {host} is now connectable!") 24 | break 25 | else: 26 | print( 27 | f"Port {port} on {host} is not ready yet. Retrying in" 28 | f" {interval} seconds..." 29 | ) 30 | time.sleep(interval) 31 | 32 | 33 | def main(): 34 | # launches "python main.py" in a subprocess so we can use the client 35 | # to test it. 36 | # 37 | print("Launching the photon in a subprocess on port 8080...") 38 | p = subprocess.Popen( 39 | ["python", "main.py"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL 40 | ) 41 | wait_for_port("localhost", 8080) 42 | 43 | # Note: this is not necessary if you are running the photon in the lepton 44 | # server. To run it in the server, you can do 45 | # lep photon run -n bge -m main.py --resource-shape gpu.a10 46 | # and then instead of using local, you can use the client as 47 | # c = Client(current(), "bge") 48 | # where current() is a helper function to get the current workspace. 49 | 50 | c = Client(local()) 51 | # c = Client(current(), "bge") 52 | print("\nThe client has the following endpoints:") 53 | print(c.paths()) 54 | print("For the encode endpoint, the docstring is as follows:") 55 | print("***begin docstring***") 56 | print(c.encode.__doc__) 57 | print("***end docstring***") 58 | 59 | print("\n\nRunning the encode endpoint...") 60 | query = "The quick brown fox jumps over the lazy dog." 61 | ret = c.encode(sentences=query) 62 | print("The result is (truncated, showing first 5):") 63 | print(ret[:5]) 64 | print(f"(the full result is a list of {len(ret)} floats)") 65 | 66 | print("\n\nRunning the rank endpoint...") 67 | sentences = [ 68 | "the fox jumps over the dog", 69 | "the photon is a particle and a wave", 70 | "let the record show that the shipment has arrived", 71 | "the cat jumps on the fox", 72 | ] 73 | rank, score = c.rank(query=query, sentences=sentences) 74 | print("The rank and score are respectively:") 75 | print([(r, s) for r, s in zip(rank, score)]) 76 | print(f"The query is: {query}") 77 | print("The sentences, ordered from closest to furthest, are:") 78 | print([sentences[i] for i in rank]) 79 | 80 | print("Finished. Closing everything.") 81 | # Closes the subprocess 82 | p.terminate() 83 | 84 | 85 | if __name__ == "__main__": 86 | main() 87 | -------------------------------------------------------------------------------- /advanced/embedding/baai_bge/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import List, Union, Tuple 3 | 4 | from leptonai.photon import Photon, HTTPException 5 | 6 | 7 | # Transcribed from https://github.com/FlagOpen/FlagEmbedding/tree/master#model-list 8 | AVAILABLE_MODELS_AND_INSTRUCTIONS = { 9 | "BAAI/llm-embedder": None, 10 | "BAAI/bge-reranker-large": None, 11 | "BAAI/bge-reranker-base": None, 12 | "BAAI/bge-large-en-v1.5": ( 13 | "Represent this sentence for searching relevant passages: " 14 | ), 15 | "BAAI/bge-base-en-v1.5": ( 16 | "Represent this sentence for searching relevant passages: " 17 | ), 18 | "BAAI/bge-small-en-v1.5": ( 19 | "Represent this sentence for searching relevant passages: " 20 | ), 21 | "BAAI/bge-large-zh-v1.5": "为这个句子生成表示以用于检索相关文章:", 22 | "BAAI/bge-base-zh-v1.5": "为这个句子生成表示以用于检索相关文章:", 23 | "BAAI/bge-small-zh-v1.5": "为这个句子生成表示以用于检索相关文章:", 24 | "BAAI/bge-large-en": "Represent this sentence for searching relevant passages: ", 25 | "BAAI/bge-base-en": "Represent this sentence for searching relevant passages: ", 26 | "BAAI/bge-small-en": "Represent this sentence for searching relevant passages: ", 27 | "BAAI/bge-large-zh": "为这个句子生成表示以用于检索相关文章:", 28 | "BAAI/bge-base-zh": "为这个句子生成表示以用于检索相关文章:", 29 | "BAAI/bge-small-zh": "为这个句子生成表示以用于检索相关文章:", 30 | } 31 | 32 | 33 | class BGEEmbedding(Photon): 34 | """ 35 | The BGE embedding model from BAAI. 36 | """ 37 | 38 | requirement_dependency = [ 39 | "FlagEmbedding", 40 | ] 41 | 42 | # manage the max concurrency of the photon. This is the number of requests 43 | # that can be handled at the same time. 44 | handler_max_concurrency = 4 45 | 46 | DEFAULT_MODEL_NAME = "BAAI/bge-large-en-v1.5" 47 | DEFAULT_QUERY_INSTRUCTION = AVAILABLE_MODELS_AND_INSTRUCTIONS[DEFAULT_MODEL_NAME] 48 | DEFAULT_USE_FP16 = True 49 | DEFAULT_NORMALIZE_EMBEDDINGS = True 50 | 51 | def init(self): 52 | from FlagEmbedding import FlagModel 53 | 54 | model_name = os.environ.get("MODEL_NAME", self.DEFAULT_MODEL_NAME) 55 | if model_name not in AVAILABLE_MODELS_AND_INSTRUCTIONS: 56 | raise ValueError( 57 | f"Model name {model_name} not found. Available models:" 58 | f" {AVAILABLE_MODELS_AND_INSTRUCTIONS.keys()}" 59 | ) 60 | query_instruction = os.environ.get( 61 | "QUERY_INSTRUCTION", self.DEFAULT_QUERY_INSTRUCTION 62 | ) 63 | use_fp16 = os.environ.get("USE_FP16", self.DEFAULT_USE_FP16) 64 | normalize_embeddings = os.environ.get( 65 | "NORMALIZE_EMBEDDINGS", self.DEFAULT_NORMALIZE_EMBEDDINGS 66 | ) 67 | self._model = FlagModel( 68 | model_name, 69 | query_instruction_for_retrieval=query_instruction, 70 | use_fp16=use_fp16, 71 | normalize_embeddings=normalize_embeddings, 72 | ) 73 | 74 | @Photon.handler 75 | def encode(self, sentences: Union[str, List[str]]) -> List[float]: 76 | """ 77 | Encodes the current sentences into embeddings. 78 | """ 79 | embeddings = self._model.encode(sentences) 80 | return embeddings.tolist() 81 | 82 | @Photon.handler 83 | def rank(self, query: str, sentences: List[str]) -> Tuple[List[int], List[float]]: 84 | """ 85 | Returns a ranked list of indices of the most relevant sentences. This uses 86 | the inner product of the embeddings to rank the sentences. If the model is 87 | not initialized as normalize_embeddings=True, this will raise an error. The 88 | relative similarity scores are also returned. 89 | """ 90 | if not self._model.normalize_embeddings: 91 | raise HTTPException( 92 | status_code=500, 93 | detail="Model must have normalize_embeddings=True to use rank.", 94 | ) 95 | embeddings = self._model.encode([query] + sentences) 96 | query_embedding = embeddings[0] 97 | sentence_embeddings = embeddings[1:] 98 | inner_product = query_embedding @ sentence_embeddings.T 99 | sorted_indices = inner_product.argsort()[::-1] 100 | return sorted_indices.tolist(), inner_product[sorted_indices].tolist() 101 | 102 | 103 | if __name__ == "__main__": 104 | # TODO: change the name of the class "MyPhoton" to the name of your photon 105 | ph = BGEEmbedding() 106 | ph.launch(port=8080) 107 | -------------------------------------------------------------------------------- /advanced/flamingo/README.md: -------------------------------------------------------------------------------- 1 | # Flamingo 2 | 3 | [Flamingo](https://www.deepmind.com/blog/tackling-multiple-tasks-with-a-single-visual-language-model) is an effective and efficient general-purpose family of models that can be applied to image and video understanding tasks with minimal task-specific examples. In this example we are going to run Flamingo with [open-flamingo](https://github.com/mlfoundations/open_flamingo) on Lepton. 4 | 5 | ## Install Lepton sdk 6 | ```shell 7 | pip install leptonai 8 | ``` 9 | 10 | ## Launch Flamingo inference service locally 11 | 12 | Run: 13 | ```shell 14 | lep photon run -n flamingo -m photon.py 15 | ``` 16 | Although it's runnable on cpu, we recommend you to use a gpu to run vision model to get more satisfying performance. 17 | 18 | ## Launch Flamingo inference service in the cloud 19 | 20 | Similar to other examples, you can run Flamingo with the following command. 21 | 22 | ```shell 23 | lep photon create -n flamingo -m photon.py 24 | lep photon push -n flamingo 25 | lep photon run \ 26 | -n flamingo \ 27 | --resource-shape gpu.a10 28 | ``` 29 | 30 | Optionally, add e.g. `--env OPEN_FLAMINGO_MODEL:openflamingo/OpenFlamingo-4B-vitl-rpj3b` to specify the model you would like to run. The supported model names can be found in the open-flamingo repository's README file. 31 | 32 | You can visit [dashboard.lepton.ai](https://dashboard.lepton.ai/) to try out the model. 33 | 34 | Note: in default, the server is protected via a token, so you won't be able to access the gradio UI. This is by design to provide adequate security. If you want to make the UI public, you can either add the `--public` argument to `lep photon run`, or update the deployment with: 35 | 36 | ```shell 37 | lep deployment update -n flamingo --public 38 | ``` 39 | 40 | ### Client 41 | 42 | Once the inference service is up (either locally or in the cloud), you can use the client to access it in a programmatical way: 43 | 44 | ```python 45 | from leptonai.client import Client, local, current 46 | 47 | # Use this if you are running locally 48 | client = Client(local()) 49 | # Or, if you are logged in to your workspace via `lep login` already 50 | # and have launched it: 51 | # client = Client(current(), "flamingo") 52 | 53 | inputs = { 54 | "demo_images": [ 55 | "http://images.cocodataset.org/val2017/000000039769.jpg", 56 | "http://images.cocodataset.org/test-stuff2017/000000028137.jpg" 57 | ], 58 | "demo_texts": [ 59 | "An image of two cats.", 60 | "An image of a bathroom sink." 61 | ], 62 | "query_image": "http://images.cocodataset.org/test-stuff2017/000000028352.jpg", 63 | "query_text": "An image of" 64 | } 65 | res = client.run(**inputs) 66 | 67 | print(inputs["query_text"] + res) 68 | ``` 69 | 70 | ``` 71 | An image of a buffet table. 72 | ``` 73 | -------------------------------------------------------------------------------- /advanced/flamingo/photon.py: -------------------------------------------------------------------------------- 1 | import base64 2 | from io import BytesIO 3 | import os 4 | 5 | from typing import List, Union 6 | 7 | from leptonai.photon import Photon, FileParam, HTTPException 8 | 9 | 10 | # Pretrained models are obtained from https://github.com/mlfoundations/open_flamingo 11 | # and transcribed to the following dictionary. 12 | pretrained_models = { 13 | "openflamingo/OpenFlamingo-3B-vitl-mpt1b": [ 14 | "ViT-L-14", 15 | "openai", 16 | "mosaicml/mpt-1b-redpajama-200b", 17 | "mosaicml/mpt-1b-redpajama-200b", 18 | 1, 19 | ], 20 | "OpenFlamingo-3B-vitl-mpt1b-langinstruct": [ 21 | "ViT-L-14", 22 | "openai", 23 | "mosaicml/mpt-1b-redpajama-200b-dolly", 24 | "mosaicml/mpt-1b-redpajama-200b-dolly", 25 | 1, 26 | ], 27 | "openflamingo/OpenFlamingo-4B-vitl-rpj3b": [ 28 | "ViT-L-14", 29 | "openai", 30 | "togethercomputer/RedPajama-INCITE-Base-3B-v1", 31 | "togethercomputer/RedPajama-INCITE-Base-3B-v1", 32 | 2, 33 | ], 34 | "openflamingo/OpenFlamingo-4B-vitl-rpj3b-langinstruct": [ 35 | "ViT-L-14", 36 | "openai", 37 | "togethercomputer/RedPajama-INCITE-Instruct-3B-v1", 38 | "togethercomputer/RedPajama-INCITE-Instruct-3B-v1", 39 | 2, 40 | ], 41 | "openflamingo/OpenFlamingo-9B-vitl-mpt7b": [ 42 | "ViT-L-14", 43 | "openai", 44 | "mosaicml/mpt-7b", 45 | "mosaicml/mpt-7b", 46 | 4, 47 | ], 48 | } 49 | 50 | 51 | class Flamingo(Photon): 52 | requirement_dependency = ["open-flamingo", "huggingface-hub", "Pillow", "requests"] 53 | 54 | IMAGE_TOKEN = "" 55 | END_OF_TEXT_TOKEN = "<|endofchunk|>" 56 | DEFAULT_MODEL = "openflamingo/OpenFlamingo-3B-vitl-mpt1b" 57 | 58 | def init(self): 59 | from open_flamingo import create_model_and_transforms 60 | from huggingface_hub import hf_hub_download 61 | import torch 62 | 63 | if torch.cuda.is_available(): 64 | self.device = "cuda" 65 | else: 66 | self.device = "cpu" 67 | 68 | model_name = os.environ.get("OPEN_FLAMINGO_MODEL", self.DEFAULT_MODEL) 69 | try: 70 | model_spec = pretrained_models[model_name] 71 | except KeyError: 72 | raise KeyError( 73 | f"Model {model_name} not found in pretrained_models. Available models:" 74 | f" {pretrained_models.keys()}" 75 | ) 76 | 77 | self.model, self.image_processor, self.tokenizer = create_model_and_transforms( 78 | clip_vision_encoder_path=model_spec[0], 79 | clip_vision_encoder_pretrained=model_spec[1], 80 | lang_encoder_path=model_spec[2], 81 | tokenizer_path=model_spec[3], 82 | cross_attn_every_n_layers=model_spec[4], 83 | ) 84 | 85 | checkpoint_path = hf_hub_download( 86 | "openflamingo/OpenFlamingo-3B-vitl-mpt1b", "checkpoint.pt" 87 | ) 88 | self.model.load_state_dict(torch.load(checkpoint_path), strict=False) 89 | self.model = self.model.to(self.device) 90 | 91 | self.tokenizer.padding_side = "left" 92 | 93 | def _img_param_to_img(self, param): 94 | from PIL import Image 95 | import requests 96 | 97 | if isinstance(param, FileParam): 98 | content = param.file.read() 99 | elif isinstance(param, str): 100 | if param.startswith("http://") or param.startswith("https://"): 101 | content = requests.get(param).content 102 | else: 103 | content = base64.b64decode(param).decode("utf-8") 104 | else: 105 | raise TypeError(f"Invalid image type: {type(param)}") 106 | 107 | return Image.open(BytesIO(content)) 108 | 109 | @Photon.handler( 110 | example={ 111 | "demo_images": [ 112 | "http://images.cocodataset.org/val2017/000000039769.jpg", 113 | "http://images.cocodataset.org/test-stuff2017/000000028137.jpg", 114 | ], 115 | "demo_texts": ["An image of two cats.", "An image of a bathroom sink."], 116 | "query_image": ( 117 | "http://images.cocodataset.org/test-stuff2017/000000028352.jpg" 118 | ), 119 | "query_text": "An image of", 120 | }, 121 | ) 122 | def run( 123 | self, 124 | demo_images: List[Union[FileParam, str]], 125 | demo_texts: List[str], 126 | query_image: Union[FileParam, str], 127 | query_text: str, 128 | max_new_tokens: int = 32, 129 | num_beams: int = 3, 130 | ) -> str: 131 | import torch 132 | 133 | if len(demo_images) != len(demo_texts): 134 | raise HTTPException( 135 | status_code=400, 136 | detail="The number of demo images and demo texts must be the same.", 137 | ) 138 | 139 | demo_images = [self._img_param_to_img(img) for img in demo_images] 140 | query_image = self._img_param_to_img(query_image) 141 | 142 | vision_x = [ 143 | self.image_processor(img).unsqueeze(0).to(self.device) 144 | for img in (demo_images + [query_image]) 145 | ] 146 | vision_x = torch.cat(vision_x, dim=0) 147 | vision_x = vision_x.unsqueeze(1).unsqueeze(0) 148 | 149 | lang_x_text = self.END_OF_TEXT_TOKEN.join( 150 | f"{self.IMAGE_TOKEN}{text}" for text in (demo_texts + [query_text]) 151 | ) 152 | lang_x = self.tokenizer( 153 | lang_x_text, 154 | return_tensors="pt", 155 | ) 156 | 157 | generated_text = self.model.generate( 158 | vision_x=vision_x, 159 | lang_x=lang_x["input_ids"].to(self.device), 160 | attention_mask=lang_x["attention_mask"].to(self.device), 161 | max_new_tokens=max_new_tokens, 162 | num_beams=num_beams, 163 | ) 164 | generated_text = self.tokenizer.decode(generated_text[0]) 165 | 166 | if generated_text.startswith(lang_x_text): 167 | generated_text = generated_text[len(lang_x_text) :] 168 | if generated_text.endswith(self.END_OF_TEXT_TOKEN): 169 | generated_text = generated_text[: -len(self.END_OF_TEXT_TOKEN)] 170 | 171 | return generated_text 172 | -------------------------------------------------------------------------------- /advanced/hf-stream-llm/photon.py: -------------------------------------------------------------------------------- 1 | import os 2 | from threading import Thread 3 | from queue import Queue 4 | 5 | from loguru import logger 6 | from leptonai.photon import Photon, StreamingResponse 7 | 8 | 9 | class HfStreamLLM(Photon): 10 | 11 | deployment_template = { 12 | "resource_shape": "gpu.a10.6xlarge", 13 | "env": { 14 | "MODEL_PATH": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", 15 | }, 16 | "secret": [ 17 | "HUGGING_FACE_HUB_TOKEN", 18 | ], 19 | } 20 | 21 | requirement_dependency = [ 22 | "transformers", 23 | ] 24 | 25 | handler_max_concurrency = 4 26 | 27 | def init(self): 28 | from transformers import AutoModelForCausalLM, AutoTokenizer 29 | 30 | model_path = os.environ["MODEL_PATH"] 31 | 32 | self._tok = AutoTokenizer.from_pretrained(model_path) 33 | self._model = AutoModelForCausalLM.from_pretrained(model_path).to("cuda") 34 | 35 | self._generation_queue = Queue() 36 | 37 | for _ in range(self.handler_max_concurrency): 38 | Thread(target=self._generate, daemon=True).start() 39 | 40 | def _generate(self): 41 | while True: 42 | streamer, args, kwargs = self._generation_queue.get() 43 | try: 44 | self._model.generate(*args, **kwargs) 45 | except Exception as e: 46 | logger.error(f"Error in generation: {e}") 47 | streamer.text_queue.put(streamer.stop_signal) 48 | 49 | @Photon.handler 50 | def run(self, text: str, max_new_tokens: int = 100) -> StreamingResponse: 51 | from transformers import TextIteratorStreamer 52 | 53 | streamer = TextIteratorStreamer(self._tok, skip_prompt=True, timeout=60) 54 | inputs = self._tok(text, return_tensors="pt").to("cuda") 55 | self._generation_queue.put_nowait(( 56 | streamer, 57 | (), 58 | dict(inputs, streamer=streamer, max_new_tokens=max_new_tokens), 59 | )) 60 | return streamer 61 | -------------------------------------------------------------------------------- /advanced/idefics/README.md: -------------------------------------------------------------------------------- 1 | # IDEFICS 2 | 3 | [IDEFICS](https://huggingface.co/blog/idefics) is a multimodal model that accepts sequences of images and texts as input and generates coherent text as output. It can answer questions about images, describe visual content, create stories grounded in multiple images, etc. IDEFICS is an open-access reproduction of Flamingo and is comparable in performance with the original closed-source model across various image-text understanding benchmarks. It comes in two variants - 80 billion parameters and 9 billion parameters. In this example, we are going to use the 9 billion parameters version of the model to demonstrate how to do multimodal text generation on Lepton. 4 | 5 | ## Install Lepton sdk 6 | ```shell 7 | pip install leptonai 8 | ``` 9 | 10 | ## Launch inference service locally 11 | 12 | To run locally, first install dependencies: 13 | ```shell 14 | pip install -r requirements.txt 15 | ``` 16 | 17 | After installing dependencies, you can launch inference service locally by running: 18 | 19 | ```shell 20 | lep photon run -n idefics -m photon.py 21 | ``` 22 | 23 | By default, the service runs [9b-instruct](HuggingFaceM4/idefics-9b-instruct) version of the model. You can use `MODEL` environment variable to select a different variant of the model to run, e.g.: 24 | 25 | ``` 26 | MODEL=HuggingFaceM4/idefics-9b lep photon run -n idefics -m photon.py 27 | ``` 28 | 29 | ## Launch inference service in the cloud 30 | 31 | Similar to other examples, you can run services on Lepton Cloud Platform easily, e.g.: 32 | 33 | ```shell 34 | lep photon create -n idefics -m photon.py 35 | lep photon push -n idefics 36 | lep photon run \ 37 | -n idefics \ 38 | --resource-shape gpu.a10 39 | ``` 40 | 41 | By default, the service runs [9b-instruct](HuggingFaceM4/idefics-9b-instruct) version of the model. You can use `MODEL` environment variable to select a different variant of the model to run, e.g.: 42 | 43 | ```shell 44 | lep photon create -n idefics -m photon.py 45 | lep photon push -n idefics 46 | lep photon run \ 47 | -n idefics \ 48 | --env MODEL="HuggingFaceM4/idefics-9b" \ 49 | --resource-shape gpu.a10 50 | ``` 51 | 52 | You can visit [dashboard.lepton.ai](https://dashboard.lepton.ai/) to try out the model. 53 | 54 | If you want to make the api public, you can either add the `--public` argument to `lep photon run`, or update the deployment with: 55 | 56 | ```shell 57 | lep deployment update -n idefics --public 58 | ``` 59 | 60 | ## Client 61 | 62 | Once the inference service is up (either locally or in the cloud), you can use the client to access it in a programmatical way: 63 | 64 | ```python 65 | from leptonai.client import Client, local, current 66 | 67 | # Use this if you are running locally 68 | client = Client(local()) 69 | # Or, if you are logged in to your workspace via `lep login` already 70 | # and have launched it: 71 | # client = Client(current(), "idefics", stream=True) 72 | ``` 73 | 74 | ```python 75 | image = "https://huggingfacem4-idefics-playground.hf.space/file=/home/user/app/example_images/obama-harry-potter.jpg" 76 | question = "Which famous person does the person in the image look like? Could you craft an engaging narrative featuring this character from the image as the main protagonist?" 77 | eos_token = "" 78 | prompts = [ 79 | f"User: {question}", 80 | image, 81 | eos_token, 82 | "\nAssistant:", 83 | ] 84 | res = client.run(prompts=prompts) 85 | print(res) 86 | ``` 87 | 88 | ``` 89 | User: Which famous person does the person in the image look like? Could you craft an engaging narrative featuring this character from the image as the main protagonist? 90 | Assistant: The person in the image looks like Harry Potter, the famous wizard from the Harry Potter book series. As the main protagonist, Harry Potter embarks on a thrilling adventure to defeat the evil Lord Voldemort and save the wizarding world from his grasp. Along the way, he makes new friends, learns powerful spells, and discovers the true extent of his own magical abilities. With the help of his loyal companions Hermione Granger and Ron Weasley, Harry Potter faces countless challenges and obstacles, ultimately emerging victorious and becoming a legend in the wizarding world. 91 | ``` 92 | -------------------------------------------------------------------------------- /advanced/idefics/photon.py: -------------------------------------------------------------------------------- 1 | from io import BytesIO 2 | import os 3 | from typing import Union, List 4 | 5 | from leptonai.photon import Photon, FileParam 6 | 7 | 8 | class IDEFICS(Photon): 9 | requirement_dependency = [ 10 | "accelerate", 11 | "Pillow", 12 | "torch", 13 | "transformers", 14 | "protobuf", 15 | ] 16 | 17 | def init(self): 18 | import torch 19 | from transformers import IdeficsForVisionText2Text, AutoProcessor 20 | 21 | self.device = "cuda" if torch.cuda.is_available() else "cpu" 22 | 23 | checkpoint = os.environ.get("MODEL", "HuggingFaceM4/idefics-9b-instruct") 24 | self.model = IdeficsForVisionText2Text.from_pretrained( 25 | checkpoint, torch_dtype=torch.bfloat16, low_cpu_mem_usage=True 26 | ).to(self.device) 27 | self.processor = AutoProcessor.from_pretrained(checkpoint) 28 | 29 | @Photon.handler( 30 | example={ 31 | "prompts": [ 32 | ( 33 | "User: Which famous person does the person in the image look like?" 34 | " Could you craft an engaging narrative featuring this character" 35 | " from the image as the main protagonist?" 36 | ), 37 | "https://huggingfacem4-idefics-playground.hf.space/file=/home/user/app/example_images/obama-harry-potter.jpg", 38 | "", 39 | "\nAssistant:", 40 | ] 41 | } 42 | ) 43 | def run( 44 | self, 45 | prompts: Union[List[Union[str, FileParam]], List[List[Union[str, FileParam]]]], 46 | eos_token: str = "", 47 | bad_words: List[str] = ["", ""], 48 | max_length: int = 256, 49 | **kwargs, 50 | ) -> Union[str, List[str]]: 51 | from PIL import Image 52 | 53 | if not prompts: 54 | return [] 55 | 56 | input_is_batch = isinstance(prompts[0], list) 57 | if not input_is_batch: 58 | prompts = [prompts] 59 | 60 | for prompt in prompts: 61 | for i, p in enumerate(prompt): 62 | if isinstance(p, FileParam): 63 | prompt[i] = Image.open(BytesIO(p.read())).convert("RGB") 64 | 65 | inputs = self.processor( 66 | prompts, add_end_of_utterance_token=False, return_tensors="pt" 67 | ).to(self.device) 68 | 69 | # Generation args 70 | exit_condition = self.processor.tokenizer( 71 | eos_token, add_special_tokens=False 72 | ).input_ids 73 | bad_words_ids = self.processor.tokenizer( 74 | bad_words, add_special_tokens=False 75 | ).input_ids 76 | 77 | generated_ids = self.model.generate( 78 | **inputs, 79 | eos_token_id=exit_condition, 80 | bad_words_ids=bad_words_ids, 81 | max_length=max_length, 82 | **kwargs, 83 | ) 84 | generated_text = self.processor.batch_decode( 85 | generated_ids, skip_special_tokens=True 86 | ) 87 | 88 | if not input_is_batch: 89 | return generated_text[0] 90 | else: 91 | return generated_text 92 | -------------------------------------------------------------------------------- /advanced/idefics/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate 2 | Pillow 3 | torch 4 | transformers 5 | protobuf 6 | -------------------------------------------------------------------------------- /advanced/lavis/README.md: -------------------------------------------------------------------------------- 1 | # LAVIS 2 | 3 | [LAVIS](https://github.com/salesforce/LAVIS) is a Python deep learning library for LAnguage-and-VISion intelligence research and applications that supports 10+ tasks like retrieval, captioning, visual question answering (vqa), multimodal classification. In this example we are going to show how to use LAVIS to do image captioning, vqa and features extraction on Lepton. 4 | 5 | ## Install Lepton sdk 6 | ```shell 7 | pip install leptonai 8 | ``` 9 | 10 | ## Launch inference service locally 11 | 12 | To run locally, first install dependencies: 13 | ```shell 14 | pip install -r requirements.txt 15 | ``` 16 | 17 | After installing dependencies, you can launch inference service locally by running: 18 | 19 | ### Image Captioning 20 | 21 | ```shell 22 | lep photon run -n caption -m caption.py 23 | ``` 24 | 25 | ### Visual Question Answering (VQA) 26 | 27 | ```shell 28 | lep photon run -n vqa -m vqa.py 29 | ``` 30 | 31 | ### Features Extraction 32 | 33 | ```shell 34 | lep photon run -n extract-features -m extract-features.py 35 | ``` 36 | 37 | ## Launch inference service in the cloud 38 | 39 | Similar to other examples, you can run services on Lepton Cloud Platform easily, e.g.: 40 | 41 | ```shell 42 | lep photon create -n extract-features -m extract-features.py 43 | lep photon push -n extract-features 44 | lep photon run \ 45 | -n extract-features \ 46 | --resource-shape gpu.a10 47 | ``` 48 | 49 | You can visit [dashboard.lepton.ai](https://dashboard.lepton.ai/) to try out the model. 50 | 51 | Note: in default, the server is protected via a token, so you won't be able to access the gradio UI. This is by design to provide adequate security. If you want to make the UI public, you can either add the `--public` argument to `lep photon run`, or update the deployment with: 52 | 53 | ```shell 54 | lep deployment update -n extract-features --public 55 | ``` 56 | 57 | ## Client 58 | 59 | Once the inference service is up (either locally or in the cloud), you can use the client to access it in a programmatical way: 60 | 61 | ```python 62 | from leptonai.client import Client, local, current 63 | 64 | # Use this if you are running locally 65 | client = Client(local()) 66 | # Or, if you are logged in to your workspace via `lep login` already 67 | # and have launched it: 68 | # client = Client(current(), "extract-features") # or "caption" for Image Captioning, or "vqa" for VQA 69 | ``` 70 | 71 | ### Image Captioning 72 | ```python 73 | image = "http://images.cocodataset.org/val2017/000000039769.jpg" 74 | caption = client.run(image=image) 75 | 76 | print(caption) 77 | ``` 78 | 79 | ``` 80 | a couple of cats laying on top of a pink couch 81 | ``` 82 | 83 | ### Visual Question Answering (VQA) 84 | 85 | ```python 86 | image = "http://images.cocodataset.org/val2017/000000039769.jpg" 87 | question = "How many cats?" 88 | answer = client.run(image=image, question=question) 89 | 90 | print(answer) 91 | ``` 92 | 93 | ``` 94 | 2 95 | ``` 96 | 97 | ### Features Extraction 98 | 99 | ```python 100 | # image embedding 101 | image = "http://images.cocodataset.org/val2017/000000039769.jpg" 102 | features = client.run(image=image) 103 | 104 | print(f"embedding dimensions: {len(features)} x {len(features[0])}") 105 | ``` 106 | 107 | ``` 108 | embedding dimensions: 32 x 768 109 | ``` 110 | 111 | ```python 112 | # text embedding 113 | text = "a large fountain spewing water into the air" 114 | features = client.run(text=text) 115 | 116 | print(f"embedding dimensions: {len(features)} x {len(features[0])}") 117 | ``` 118 | 119 | ``` 120 | embedding dimensions: 12 x 768 121 | ``` 122 | 123 | ```python 124 | # multimodal embedding 125 | image = "http://images.cocodataset.org/val2017/000000039769.jpg" 126 | text = "two cats" 127 | features = client.run(image=image, text=text) 128 | 129 | print(f"embedding dimensions: {len(features)} x {len(features[0])}") 130 | ``` 131 | 132 | ``` 133 | embedding dimensions: 32 x 768 134 | ``` 135 | -------------------------------------------------------------------------------- /advanced/lavis/caption.py: -------------------------------------------------------------------------------- 1 | from io import BytesIO 2 | from typing import Union 3 | 4 | from leptonai.photon import Photon, FileParam, get_file_content 5 | 6 | 7 | class CaptionPhoton(Photon): 8 | requirement_dependency = [ 9 | "salesforce-lavis", 10 | "Pillow", 11 | "opencv-python!=4.8.0.76", 12 | "opencv-contrib-python!=4.8.0.76", 13 | ] 14 | 15 | def _get_img(self, param): 16 | from PIL import Image 17 | 18 | content = get_file_content(param) 19 | return Image.open(BytesIO(content)).convert("RGB") 20 | 21 | def init(self): 22 | import torch 23 | from lavis.models import load_model_and_preprocess 24 | 25 | if torch.cuda.is_available(): 26 | self.device = torch.device("cuda") 27 | else: 28 | self.device = torch.device("cpu") 29 | 30 | # Here we choose blip model, for other available models, please refer to: 31 | # 32 | # from lavis.models import model_zoo 33 | # print(model_zoo) 34 | # 35 | self.model_and_preprocess = load_model_and_preprocess( 36 | name="blip_caption", 37 | model_type="large_coco", 38 | is_eval=True, 39 | device=self.device, 40 | ) 41 | 42 | @Photon.handler( 43 | example={"image": "http://images.cocodataset.org/val2017/000000039769.jpg"} 44 | ) 45 | def run(self, image: Union[FileParam, str]) -> str: 46 | model, vis_processors, _ = self.model_and_preprocess 47 | 48 | image = self._get_img(image) 49 | image = vis_processors["eval"](image).unsqueeze(0).to(self.device) 50 | captions = model.generate({"image": image}) 51 | return captions[0] 52 | -------------------------------------------------------------------------------- /advanced/lavis/extract-features.py: -------------------------------------------------------------------------------- 1 | from io import BytesIO 2 | from typing import Union, Optional, List 3 | 4 | from leptonai.photon import Photon, FileParam, get_file_content, HTTPException 5 | 6 | 7 | class ExtractFeaturesPhoton(Photon): 8 | requirement_dependency = [ 9 | "salesforce-lavis", 10 | "Pillow", 11 | "opencv-python!=4.8.0.76", 12 | "opencv-contrib-python!=4.8.0.76", 13 | ] 14 | 15 | def _get_img(self, param): 16 | from PIL import Image 17 | 18 | content = get_file_content(param) 19 | return Image.open(BytesIO(content)).convert("RGB") 20 | 21 | def init(self): 22 | import torch 23 | from lavis.models import load_model_and_preprocess 24 | 25 | if torch.cuda.is_available(): 26 | self.device = torch.device("cuda") 27 | else: 28 | self.device = torch.device("cpu") 29 | 30 | # Here we choose blip2 model, for other available models, please refer to: 31 | # 32 | # from lavis.models import model_zoo 33 | # print(model_zoo) 34 | # 35 | self.model_and_preprocess = load_model_and_preprocess( 36 | name="blip2_feature_extractor", 37 | model_type="pretrain", 38 | is_eval=True, 39 | device=self.device, 40 | ) 41 | 42 | @Photon.handler( 43 | examples=[ 44 | {"image": "http://images.cocodataset.org/val2017/000000039769.jpg"}, 45 | {"text": "a large fountain spewing water into the air"}, 46 | { 47 | "image": "http://images.cocodataset.org/val2017/000000039769.jpg", 48 | "text": "two cats", 49 | }, 50 | ] 51 | ) 52 | def run( 53 | self, image: Optional[Union[FileParam, str]] = None, text: Optional[str] = None 54 | ) -> List[float]: 55 | model, vis_processors, txt_processors = self.model_and_preprocess 56 | 57 | if image is None and text is None: 58 | raise HTTPException( 59 | status_code=400, detail="Either image or text should be provided." 60 | ) 61 | 62 | if image is not None: 63 | image = self._get_img(image) 64 | image = vis_processors["eval"](image).unsqueeze(0).to(self.device) 65 | if text is not None: 66 | text = txt_processors["eval"](text) 67 | 68 | if image is not None and text is None: 69 | # image embedding 70 | features = model.extract_features({"image": image}, mode="image") 71 | return features.image_embeds[0].tolist() 72 | elif image is None and text is not None: 73 | # text embedding 74 | features = model.extract_features({"text_input": [text]}, mode="text") 75 | return features.text_embeds[0].tolist() 76 | else: 77 | # multimodal embedding 78 | features = model.extract_features({"image": image, "text_input": [text]}) 79 | return features.multimodal_embeds[0].tolist() 80 | -------------------------------------------------------------------------------- /advanced/lavis/requirements.txt: -------------------------------------------------------------------------------- 1 | salesforce-lavis 2 | Pillow 3 | opencv-python!=4.8.0.76 4 | opencv-contrib-python!=4.8.0.76 5 | -------------------------------------------------------------------------------- /advanced/lavis/vqa.py: -------------------------------------------------------------------------------- 1 | from io import BytesIO 2 | from typing import Union 3 | 4 | from leptonai.photon import Photon, FileParam, get_file_content 5 | 6 | 7 | class VQAPhoton(Photon): 8 | requirement_dependency = [ 9 | "salesforce-lavis", 10 | "Pillow", 11 | "opencv-python!=4.8.0.76", 12 | "opencv-contrib-python!=4.8.0.76", 13 | ] 14 | 15 | def _get_img(self, param): 16 | from PIL import Image 17 | 18 | content = get_file_content(param) 19 | return Image.open(BytesIO(content)).convert("RGB") 20 | 21 | def init(self): 22 | import torch 23 | from lavis.models import load_model_and_preprocess 24 | 25 | if torch.cuda.is_available(): 26 | self.device = torch.device("cuda") 27 | else: 28 | self.device = torch.device("cpu") 29 | 30 | # Here we choose blip model, for other available models, please refer to: 31 | # 32 | # from lavis.models import model_zoo 33 | # print(model_zoo) 34 | # 35 | self.model_and_preprocess = load_model_and_preprocess( 36 | name="blip_vqa", model_type="vqav2", is_eval=True, device=self.device 37 | ) 38 | 39 | @Photon.handler( 40 | example={ 41 | "image": "http://images.cocodataset.org/val2017/000000039769.jpg", 42 | "question": "How many cats?", 43 | } 44 | ) 45 | def run(self, image: Union[FileParam, str], question: str) -> str: 46 | model, vis_processors, txt_processors = self.model_and_preprocess 47 | image = self._get_img(image) 48 | image = vis_processors["eval"](image).unsqueeze(0).to(self.device) 49 | question = txt_processors["eval"](question) 50 | answers = model.predict_answers( 51 | samples={"image": image, "text_input": question}, 52 | inference_method="generate", 53 | ) 54 | return answers[0] 55 | -------------------------------------------------------------------------------- /advanced/layout-parser/main.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import os 3 | import requests 4 | from threading import Lock 5 | from typing import Union, Any, Dict 6 | 7 | from loguru import logger 8 | 9 | import layoutparser as lp 10 | from layoutparser.models.detectron2 import catalog 11 | import cv2 12 | 13 | from leptonai.photon import ( 14 | Photon, 15 | FileParam, 16 | get_file_content, 17 | PNGResponse, 18 | HTTPException, 19 | make_png_response, 20 | ) 21 | 22 | 23 | class LayoutParser(Photon): 24 | requirement_dependency = [ 25 | "layoutparser", 26 | "git+https://github.com/facebookresearch/detectron2.git", 27 | "pytesseract", 28 | ] 29 | 30 | system_dependency = [ 31 | "tesseract-ocr", 32 | ] 33 | 34 | # Layout parser ocr right now seems to be thread safe, so we can turn on 35 | # multithreading to avoid blocking and improve overall IO time. 36 | handler_max_concurrency = 4 37 | 38 | # The default model config. Specify "MODEL_CONFIG" env variable to 39 | # override this. 40 | DEFAULT_MODEL_CONFIG = "lp://PubLayNet/faster_rcnn_R_50_FPN_3x/config" 41 | 42 | # The path to save the model. 43 | MODEL_SAVE_PATH = "/tmp/layoutparser_lepton_cache" 44 | 45 | # You can specify the language code(s) of the documents to detect to improve 46 | # accuracy. The supported language and their code can be found at: 47 | # https://github.com/tesseract-ocr/langdata 48 | # The supported format is `+` connected string like `"eng+fra"` 49 | TESSERACT_LANGUAGE = "eng" 50 | TESSERACT_CONFIGS = {} 51 | 52 | def init(self): 53 | logger.debug("Loading model...") 54 | self.model = LayoutParser.safe_load_model( 55 | os.environ.get("MODEL_CONFIG", self.DEFAULT_MODEL_CONFIG) 56 | ) 57 | # We are not sure if the underlying layout parser model is thread safe, so we will 58 | # consider it a black box and use a lock to prevent concurrent access. 59 | self.model_lock = Lock() 60 | self.ocr_agent = lp.TesseractAgent( 61 | languages=os.environ.get("TESSERACT_LANGUAGE", self.TESSERACT_LANGUAGE), 62 | **self.TESSERACT_CONFIGS, 63 | ) 64 | logger.debug("Model loaded successfully.") 65 | 66 | @Photon.handler 67 | def detect(self, image: Union[str, FileParam]) -> Dict[str, Any]: 68 | """ 69 | Detects the layout of the image, and returns the layout in a dictionary. On the client 70 | side, if you want to recover the Layout object, you can use the `layoutparser.load_dict` 71 | functionality. 72 | """ 73 | cv_image = self._load_image(image) 74 | with self.model_lock: 75 | layout = self.model.detect(cv_image) 76 | return layout.to_dict() 77 | 78 | @Photon.handler 79 | def draw_detection_box( 80 | self, image: Union[str, FileParam], box_width: int = 3 81 | ) -> PNGResponse: 82 | """ 83 | Returns the detection box of the input image as a PNG image. 84 | """ 85 | cv_image = self._load_image(image) 86 | with self.model_lock: 87 | layout = self.model.detect(cv_image) 88 | img = lp.draw_box(cv_image, layout, box_width=box_width) 89 | return make_png_response(img) 90 | 91 | @Photon.handler 92 | def ocr( 93 | self, 94 | image: Union[str, FileParam], 95 | return_response: bool = False, 96 | return_only_text: bool = False, 97 | ) -> Union[str, Dict[str, Any]]: 98 | """ 99 | Carries out Tesseract ocr for the input image. If return_response=True, the full response 100 | is returned as a dictionary with two keys: `text` containing the text, and `data` containing 101 | the full response from Tesseract, as a DataFrame converted to a dict. If you want to recover 102 | the original DataFrame, you can use `pandas.DataFrame.from_dict(result["data"])`. 103 | """ 104 | cv_image = self._load_image(image) 105 | res = self.ocr_agent.detect( 106 | cv_image, return_response=return_response, return_only_text=return_only_text 107 | ) 108 | print(type(res)) 109 | print(str(res)) 110 | if return_response: 111 | # The result is a dict with two keys: "text" being the text, and "data" being a DataFrame. 112 | # We will convert it to a dict with data converted to a dict. 113 | return {"text": res["text"], "data": res["data"].to_dict()} 114 | else: 115 | # The returned result is a string, so we will simply return it. 116 | return res 117 | 118 | @Photon.handler 119 | def draw_ocr_result( 120 | self, 121 | image: Union[str, FileParam], 122 | agg_level: int = 4, 123 | font_size: int = 12, 124 | with_box_on_text: bool = True, 125 | text_box_width: int = 1, 126 | ) -> PNGResponse: 127 | """ 128 | Returns the OCR result of the input image as a PNG image. Optionally, specify agg_level to 129 | aggregate the text into blocks. The default agg_level is 4, which means that the text will 130 | be aggregated in words. Options are 3 (LINE), 2 (PARA), 1 (BLOCK), and 0 (PAGE). 131 | """ 132 | try: 133 | agg_level_enum = lp.TesseractFeatureType(agg_level) 134 | except ValueError: 135 | raise HTTPException( 136 | status_code=400, 137 | detail=( 138 | f"agg_level should be an integer between 0 and 4. Got {agg_level}." 139 | ), 140 | ) 141 | cv_image = self._load_image(image) 142 | res = self.ocr_agent.detect(cv_image, return_response=True) 143 | layout = self.ocr_agent.gather_data(res, agg_level_enum) 144 | img = lp.draw_text( 145 | cv_image, 146 | layout, 147 | font_size=font_size, 148 | with_box_on_text=with_box_on_text, 149 | text_box_width=text_box_width, 150 | ) 151 | return make_png_response(img) 152 | 153 | @classmethod 154 | def safe_load_model(cls, config_path: str): 155 | """ 156 | A helper function to safely load the model to bypass the bug here: 157 | https://github.com/Layout-Parser/layout-parser/issues/168 158 | """ 159 | # override storage path 160 | if not os.path.exists(cls.MODEL_SAVE_PATH): 161 | os.mkdir(cls.MODEL_SAVE_PATH) 162 | config_path_split = config_path.split("/") 163 | dataset_name = config_path_split[-3] 164 | model_name = config_path_split[-2] 165 | # get the URLs from the MODEL_CATALOG and the CONFIG_CATALOG 166 | # (global variables .../layoutparser/models/detectron2/catalog.py) 167 | model_url = catalog.MODEL_CATALOG[dataset_name][model_name] 168 | config_url = catalog.CONFIG_CATALOG[dataset_name][model_name] 169 | 170 | config_file_path, model_file_path = None, None 171 | 172 | for url in [model_url, config_url]: 173 | filename = url.split("/")[-1].split("?")[0] 174 | save_to_path = f"{cls.MODEL_SAVE_PATH}/" + filename 175 | if "config" in filename: 176 | config_file_path = copy.deepcopy(save_to_path) 177 | if "model_final" in filename: 178 | model_file_path = copy.deepcopy(save_to_path) 179 | 180 | # skip if file exist in path 181 | if filename in os.listdir(f"{cls.MODEL_SAVE_PATH}/"): 182 | continue 183 | # Download file from URL 184 | r = requests.get( 185 | url, stream=True, headers={"user-agent": "Wget/1.16 (linux-gnu)"} 186 | ) 187 | with open(save_to_path, "wb") as f: 188 | for chunk in r.iter_content(chunk_size=4096): 189 | if chunk: 190 | f.write(chunk) 191 | 192 | # load the label map 193 | label_map = catalog.LABEL_MAP_CATALOG[dataset_name] 194 | 195 | return lp.models.Detectron2LayoutModel( 196 | config_path=config_file_path, 197 | model_path=model_file_path, 198 | label_map=label_map, 199 | ) 200 | 201 | def _load_image(self, image: Union[str, FileParam]): 202 | """ 203 | Loads the image, and returns the cv.Image object. Throws HTTPError if the image 204 | cannot be loaded. 205 | """ 206 | try: 207 | file_content = get_file_content( 208 | image, return_file=True, allow_local_file=True 209 | ) 210 | except Exception as e: 211 | raise HTTPException( 212 | status_code=400, 213 | detail=( 214 | f"Cannot open image with source: {image}. Detailed error message:" 215 | f" {str(e)}" 216 | ), 217 | ) 218 | try: 219 | cv_image = cv2.imread(file_content.name) 220 | cv_image = cv_image[..., ::-1] 221 | except Exception as e: 222 | raise HTTPException( 223 | status_code=400, 224 | detail=( 225 | f"Cannot load image with source: {image}. Detailed error message:" 226 | f" {str(e)}" 227 | ), 228 | ) 229 | return cv_image 230 | 231 | 232 | if __name__ == "__main__": 233 | ph = LayoutParser() 234 | ph.launch() 235 | -------------------------------------------------------------------------------- /advanced/llama2/README.md: -------------------------------------------------------------------------------- 1 | # Llama2 2 | 3 | [Llama2](https://ai.meta.com/llama/) is the latest collection of pretrained and fine-tuned generative text models released by Meta, ranging in scale from 7 billion to 70 billion parameters. In this example we are gonna use the Llama2-7B model to demonstrate how to get state of the art LLm models running on Lepton within just seconds. 4 | 5 | There are two ways to access Llama2 models on Lepton: 6 | 7 | ## Fully managed Llama2 inference api 8 | 9 | Lepton provides the standard Llama2 models as fully managed api endpoints at https://llama2.lepton.run. This api endpoint is fully compatible with OpenAI's ChatGPT API, users can directly use OpenAI's sdk or any tools that are using ChatGPT API to seamlessly switch to Llama2 model service. e.g. If you are using OpenAI's Python sdk, you can simply switch to Lepton's Llama2 inference api with 10 | 11 | ```python 12 | import openai 13 | 14 | openai.api_base = "https://llama2.lepton.run/api/v1" 15 | openai.api_key = "sk-" + "a" * 48 16 | ``` 17 | 18 | After setting the `api_base` (and `api_key`) configuration, all existing code are compatible with Lepton's Llama2 inference API e.g. the following typical Python code that uses OpenAI's ChatGPT API simply works without any modifications: 19 | 20 | ```python 21 | sys_prompt = """ 22 | The following is a conversation with an AI assistant. The assistant is helpful, creative, clever, and very friendly. 23 | """ 24 | # Create a completion 25 | completion = openai.ChatCompletion.create( 26 | model="gpt-3.5-turbo", 27 | messages=[ 28 | {"role": "system", "content": sys_prompt}, 29 | {"role": "user", "content": "tell me a short story"}, 30 | ], 31 | stream=True, 32 | max_tokens=64, 33 | ) 34 | for chunk in completion: 35 | content = chunk["choices"][0]["delta"].get("content") 36 | if content: 37 | print(content, end="") 38 | print() 39 | ``` 40 | 41 | ## Dedicated Llama2 inference service 42 | 43 | If fully managed api does not fit your use case, you can also easily launch a dedicated Llama2 model inference service on Lepton platform. 44 | 45 | Note: 46 | Meta hosts Llama2 models weights on Huggingface. You should obtain access to these models weights by going to the corresponding model page(e.g. [llama-2-7b-chat-hf](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf)) and request for access. Once you have the access, go to Huggingface's [token management page](https://huggingface.co/settings/tokens) to generate a token. 47 | 48 | ### Use Lepton's secret management 49 | 50 | As you may use the token multiple times, we recommend storing it in Lepton's secret store. Simply do this and remember to replace the token with your own. 51 | ```shell 52 | lep secret create -n HUGGING_FACE_HUB_TOKEN -v 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 53 | ``` 54 | (Don't worry, the above token is only an example and isn't active.) 55 | 56 | You can verify the secret exists with `lep secret list`: 57 | ```shell 58 | >> lep secret list 59 | Secrets 60 | ┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┓ 61 | ┃ ID ┃ Value ┃ 62 | ┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━┩ 63 | │ HUGGING_FACE_HUB_TOKEN │ (hidden) │ 64 | └────────────────────────┴──────────┘ 65 | ``` 66 | 67 | ### Launch llama2 inference service locally 68 | 69 | Ensure that you have installed the required dependencies. Then, run: 70 | ```shell 71 | lep photon run -n llama2 -m hf:meta-llama/Llama-2-7b-hf 72 | ``` 73 | Note that you will need to have a relatively large GPU (>20GB memory). 74 | 75 | ### Launch llama2 inference service in the cloud 76 | 77 | Similar to other examples, you can run llama2 with the following command. Remember to pass in the huggingface access token, and also, use a reasonably sized GPU like `gpu.a10` to ensure that things run. 78 | 79 | ```shell 80 | lep photon create -n llama2 -m hf:meta-llama/Llama-2-7b-hf 81 | lep photon push -n llama2 82 | lep photon run \ 83 | -n llama2 \ 84 | --secret HUGGING_FACE_HUB_TOKEN \ 85 | --resource-shape gpu.a10 86 | ``` 87 | 88 | And visit [dashboard.lepton.ai](https://dashboard.lepton.ai/) to try out the model. 89 | 90 | Note: in default, the server is protected via a token, so you won't be able to access the gradio UI. This is by design to provide adequate security. If you want to make the UI public, you can either add the `--public` argument to `lep photon run`, or update the deployment with: 91 | 92 | ```shell 93 | lep deployment update -n llama2 --public 94 | ``` 95 | 96 | ### Client 97 | 98 | Once the inference service is up (either locally or in the cloud), you can use the client to access it in a programmatical way: 99 | 100 | ```python 101 | >>> from leptonai.client import Client 102 | 103 | >>> client = Client(...) 104 | 105 | >>> client.run(inputs=["what is 2 + 3"], max_new_tokens=128) 106 | "what is 2 + 3.\nThis is quite common in mathematics: variable height means variable growth and variable foot (puz- ulating, pus, pulsating), variable width for a three dimensional thing. Variable has an incorrect connotation for us. It would be better to say that the statistic is unsatisfactory in all conditions.\nBut...since he _says_ he's a 90th percentile man, and since the classification is as it is, and since those who classify him for that percentile have based it on other empirical evidence, you still have either an error in the percentile, or" 107 | ``` 108 | -------------------------------------------------------------------------------- /advanced/llama2/llama2-api.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "id": "lIYdn1woOS1n" 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "!pip install -qqq openai" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "source": [ 17 | "import openai\n", 18 | "\n", 19 | "openai.api_base = \"https://llama2.lepton.run/api/v1\"\n", 20 | "openai.api_key = \"sk-\" + \"a\" * 48" 21 | ], 22 | "metadata": { 23 | "id": "UCOfN-VEsy5m" 24 | }, 25 | "execution_count": 2, 26 | "outputs": [] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "source": [ 31 | "sys_prompt = \"\"\"\n", 32 | "The following is a conversation with an AI assistant. The assistant is helpful, creative, clever, and very friendly.\n", 33 | "\"\"\"\n", 34 | "# Create a completion\n", 35 | "completion = openai.ChatCompletion.create(\n", 36 | " model=\"gpt-3.5-turbo\",\n", 37 | " messages=[\n", 38 | " {\"role\": \"system\", \"content\": sys_prompt},\n", 39 | " {\"role\": \"user\", \"content\": \"tell me a short story\"},\n", 40 | " ],\n", 41 | " stream=True,\n", 42 | " max_tokens=64,\n", 43 | ")\n", 44 | "for chunk in completion:\n", 45 | " content = chunk[\"choices\"][0][\"delta\"].get(\"content\")\n", 46 | " if content:\n", 47 | " print(content, end=\"\")\n", 48 | "print()" 49 | ], 50 | "metadata": { 51 | "colab": { 52 | "base_uri": "https://localhost:8080/" 53 | }, 54 | "id": "y7eV3R87sz6Y", 55 | "outputId": "75896f74-408c-4946-8bbd-d392b1a4178b" 56 | }, 57 | "execution_count": 3, 58 | "outputs": [ 59 | { 60 | "output_type": "stream", 61 | "name": "stdout", 62 | "text": [ 63 | "Of course! I'd be happy to tell you a short story. Here is one I came up with on the spot:\n", 64 | "\n", 65 | "Once upon a time, in a far-off land, there was a magical forest filled with towering trees, sparkling streams, and a variety of enchanting cre\n" 66 | ] 67 | } 68 | ] 69 | } 70 | ], 71 | "metadata": { 72 | "colab": { 73 | "name": "scratchpad", 74 | "provenance": [] 75 | }, 76 | "kernelspec": { 77 | "display_name": "Python 3", 78 | "name": "python3" 79 | } 80 | }, 81 | "nbformat": 4, 82 | "nbformat_minor": 0 83 | } -------------------------------------------------------------------------------- /advanced/nougat/README.md: -------------------------------------------------------------------------------- 1 | # Nougat 2 | 3 | [Nougat](https://github.com/facebookresearch/nougat) (Neural Optical Understanding for Academic Documents) is a Visual Transformer model that performs an Optical Character Recognition (OCR) task for processing scientific documents into a markup language. In this example, we are going to show how to use Nougat to turn scanned PDF files (human readable documents) to markups (machine-readable text). 4 | 5 | ## Install Lepton sdk 6 | ```shell 7 | pip install leptonai 8 | ``` 9 | 10 | ## Launch inference service locally 11 | 12 | To run locally, first install dependencies: 13 | ```shell 14 | pip install -r requirements.txt 15 | ``` 16 | 17 | Nougat uses `pdfinfo` to extract the "Info" section from PDF files, thus need to install `poppler-utils`: 18 | 19 | ```shell 20 | sudo apt-get update 21 | sudo apt-get install poppler-utils 22 | ``` 23 | 24 | After installing dependencies, you can launch inference service locally by running: 25 | 26 | ```shell 27 | lep photon run -n nougat -m photon.py 28 | ``` 29 | 30 | ## Launch inference service in the cloud 31 | 32 | Similar to other examples, you can run services on Lepton Cloud Platform easily, e.g.: 33 | 34 | ```shell 35 | lep photon create -n nougat -m photon.py 36 | lep photon push -n nougat 37 | lep photon run \ 38 | -n nougat \ 39 | --resource-shape gpu.a10 40 | ``` 41 | 42 | You can visit [dashboard.lepton.ai](https://dashboard.lepton.ai/) to try out the model. 43 | 44 | If you want to make the api public, you can either add the `--public` argument to `lep photon run`, or update the deployment with: 45 | 46 | ```shell 47 | lep deployment update -n nougat --public 48 | ``` 49 | 50 | ## Client 51 | 52 | Once the inference service is up (either locally or in the cloud), you can use the client to access it in a programmatical way: 53 | 54 | ```python 55 | from leptonai.client import Client, local, current 56 | 57 | # Use this if you are running locally 58 | client = Client(local(), stream=True) 59 | # Or, if you are logged in to your workspace via `lep login` already 60 | # and have launched it: 61 | # client = Client(current(), "nougat", stream=True) 62 | ``` 63 | 64 | ```python 65 | PDF_FILE = "https://www.gcpsk12.org/site/handlers/filedownload.ashx?moduleinstanceid=74914&dataid=140852&FileName=Sample%20Scanned%20PDF.pdf" 66 | content_iter = client.run(file=PDF_FILE) 67 | for chunk in content_iter: 68 | print(chunk.decode("utf-8")) 69 | ``` 70 | 71 | ``` 72 | Document Title (Heading Style 1) 73 | 74 | Topic 1 (Heading Style 2) 75 | 76 | Normal Paragraph Style: Lorentz ipsum dolor sit amet, consecetetur adipiscing elit, sed do 77 | 78 | elusmod temper incididunt ut labore et dolore magna aliquua. Dapibus uttrices in iaculis 79 | 80 | nunc sed augue. Fusce ut placerat orci nulla pellentesque dignissim enim sit. Nunc 81 | 82 | congue nisi vitae suscipitt tellus. Tristique et egestas quis ipsum suspendisse uttrices. 83 | 84 | Nunc aliquet bibendum enim facilis gravida neque. 85 | 86 | Topic 2 (Heading Style 2) 87 | 88 | Subtopic A (Heading Style 3) 89 | ... 90 | ``` 91 | -------------------------------------------------------------------------------- /advanced/nougat/photon.py: -------------------------------------------------------------------------------- 1 | from io import BytesIO 2 | import os 3 | import traceback 4 | from typing import Union, Optional 5 | 6 | from loguru import logger 7 | import torch 8 | from leptonai.photon import ( 9 | Photon, 10 | FileParam, 11 | get_file_content, 12 | HTTPException, 13 | StreamingResponse, 14 | ) 15 | 16 | 17 | class Nougat(Photon): 18 | requirement_dependency = [ 19 | "git+https://github.com/facebookresearch/nougat.git@84b3ae1", 20 | "torch", 21 | "pypdf", 22 | "loguru", 23 | "opencv-python!=4.8.0.76", 24 | ] 25 | 26 | system_dependency = ["poppler-utils"] 27 | 28 | def init(self): 29 | from nougat import NougatModel 30 | from nougat.utils.checkpoint import get_checkpoint 31 | 32 | model_tag = os.environ.get( 33 | "MODEL_TAG", "0.1.0-small" 34 | ) # 0.1.0-small or 0.1.0-base 35 | checkpoint = get_checkpoint(model_tag=model_tag) 36 | model = NougatModel.from_pretrained(checkpoint) 37 | if torch.cuda.is_available(): 38 | model = model.to("cuda") 39 | self.model = model.to(torch.bfloat16).eval() 40 | self.batch_size = os.environ.get("BATCH_SIZE", 4) 41 | 42 | def iter_batch(self, iterable, batch_size): 43 | for start in range(0, len(iterable), batch_size): 44 | yield iterable[start : min(start + batch_size, len(iterable))] 45 | 46 | def gen_pages(self, pdf, start, end): 47 | from nougat.dataset.rasterize import rasterize_paper 48 | from PIL import Image 49 | from nougat.postprocessing import markdown_compatible 50 | 51 | pages = list(range(start - 1, end)) 52 | for batch_pages in self.iter_batch(pages, self.batch_size): 53 | image_bytes_list = rasterize_paper(pdf, pages=batch_pages, return_pil=True) 54 | images = [ 55 | self.model.encoder.prepare_input( 56 | Image.open(image_bytes), random_padding=False 57 | ) 58 | for image_bytes in image_bytes_list 59 | ] 60 | model_output = self.model.inference(image_tensors=torch.stack(images)) 61 | logger.info( 62 | f"#input pages: {len(batch_pages)}, #output pages:" 63 | f" {len(model_output['predictions'])}" 64 | ) 65 | for page_prediction in model_output["predictions"]: 66 | content = markdown_compatible(page_prediction) 67 | yield content 68 | 69 | @Photon.handler 70 | def run( 71 | self, 72 | file: Union[FileParam, str], 73 | start: Optional[int] = None, 74 | end: Optional[int] = None, 75 | ) -> StreamingResponse: 76 | import pypdf 77 | 78 | try: 79 | content = get_file_content(file) 80 | pdf = pypdf.PdfReader(BytesIO(content)) 81 | except Exception: 82 | logger.error(traceback.format_exc()) 83 | raise HTTPException(status_code=400, detail="Failed to read PDF file.") 84 | 85 | total_pages = len(pdf.pages) 86 | start = start or 1 87 | end = end or total_pages 88 | logger.info(f"Total pages: {total_pages}, start: {start}, end: {end}") 89 | if start < 1 or end > total_pages: 90 | raise HTTPException( 91 | status_code=400, 92 | detail=f"Page number should be in range [1, {total_pages}]", 93 | ) 94 | if start > end: 95 | raise HTTPException( 96 | status_code=400, detail="Start page number should be less than end." 97 | ) 98 | 99 | return self.gen_pages(pdf, start, end) 100 | -------------------------------------------------------------------------------- /advanced/nougat/requirements.txt: -------------------------------------------------------------------------------- 1 | git+https://github.com/facebookresearch/nougat.git@84b3ae1 2 | torch 3 | pypdf 4 | loguru 5 | opencv-python!=4.8.0.76 6 | -------------------------------------------------------------------------------- /advanced/open-clip/README.md: -------------------------------------------------------------------------------- 1 | # OpenCLIP Example 2 | 3 | This is a simple example of how to use the [OpenCLIP](https://github.com/mlfoundations/open_clip) to generate the embeddings of text and images. OpenCLIP is an open source implementation of OpenAI's [CLIP](https://github.com/openai/CLIP) (Contrastive Language-Image Pre-training). It is a neural network trained on a variety of (image, text) pairs. It can be instructed in natural language to predict the most relevant text snippet, given an image, without directly optimizing for the task. 4 | . 5 | 6 | ## Install dependencies 7 | 8 | Within this example, we will use `conda` to manage the environment. You can install `conda` by following the instructions [here](https://docs.conda.io/projects/conda/en/latest/user-guide/install/). 9 | 10 | ```bash 11 | # Create a new environment 12 | conda create -n clip python=3.10 13 | conda activate clip 14 | 15 | # Install leptonai, if you've done this already, you can skip this step 16 | pip install leptonai 17 | 18 | # Install the dependencies 19 | pip install -r requirements.txt 20 | ``` 21 | 22 | > During close beta stage, you may install the latest packge [here](https://www.lepton.ai/docs/overview/quickstart#1-installation) 23 | 24 | 25 | ## Create photon and run locally 26 | 27 | ```bash 28 | # Create a photon 29 | lep photon create -n clip -m open-clip.py 30 | # Run the photon locally 31 | lep photon run -n clip --local 32 | ``` 33 | 34 | ## Make a prediction 35 | 36 | ```python 37 | from leptonai.client import Client, local 38 | c = Client(local()) 39 | 40 | # Embed a text 41 | c.embed_text(query='cat') 42 | 43 | # Embed an image by url 44 | c.embed_image(url='https://i.natgeofe.com/n/548467d8-c5f1-4551-9f58-6817a8d2c45e/NationalGeographic_2572187_square.jpg') 45 | 46 | ``` 47 | 48 | ## Run the photon remotely 49 | 50 | ```bash 51 | lep login # logs into the lepton cloud 52 | lep photon push -n clip # pushes the photon to the cloud 53 | lep photon run -n clip --resource-shape gpu.a10 # run it 54 | ``` 55 | 56 | ```python 57 | from leptonai.client import Client 58 | LEPTON_API_TOKEN = "YOUR_LEPTON_API_TOKEN" 59 | 60 | client = Client("YOUR_WORKSPACE_ID", "clip", token=LEPTON_API_TOKEN) 61 | 62 | # Eg. Embed a text 63 | result = client.embed_text( 64 | query="string" 65 | ) 66 | 67 | print(result) 68 | ``` -------------------------------------------------------------------------------- /advanced/open-clip/open-clip.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is a simple class that shows how to use the Photon SDK to create a 3 | common embedding service for text and image (assuming image urls), using the 4 | CLIP model. Note that for the sake of simplicity, the model is downloaded from 5 | the internet every time the photon is run. This is not recommended for 6 | production use though, but is fine if you are running prototypes. 7 | 8 | In default, this uses the ViT-B-32-quickgelu model with the laion400m_e32 pretrained weights. 9 | You can change the model and pretrained weights by passing in the MODEL_NAME and PRETRAINED 10 | environment variables when running the photon. However, we do not proactively sanity 11 | check the validity of the specified model name and pretrained weights name, so please 12 | make sure they are valid. 13 | 14 | To build the photon, do: 15 | 16 | lep photon create -n clip -m open-clip.py:Clip 17 | 18 | To run the photon locally, simply do 19 | 20 | lep photon run -n clip --local 21 | 22 | For other models, you can try adding --env arguments like: 23 | 24 | --env DEFAULT_MODEL_NAME=ViT-B-32-quickgelu --env DEFAULT_PRETRAINED=laion400m_e32 25 | 26 | and the list of models can be found at 27 | https://github.com/mlfoundations/open_clip/blob/main/src/open_clip/pretrained.py 28 | 29 | To deploy the photon, do 30 | 31 | lep photon push -n clip 32 | lep photon run -n clip -dn clip 33 | 34 | Or choose your own deployment name like "-dn my-clip-deployment". 35 | 36 | To test the photon, you can either use the API explorer in the UI, or use 37 | the photon client class in python, e.g. 38 | 39 | from leptonai.client import Client 40 | # If you are runnnig the photon remotely with workspace id "myworkspace" 41 | # and deployment name "clip" 42 | client = Client("myworkspace", "clip") 43 | # Or if you are running the photon locally at port 8080 44 | client = Client("http://localhost:8080") 45 | # Do NOT run the above two commands at the same time! Choose only one. 46 | 47 | # Now you can call the endpoints 48 | vec = client.embed(query="people running by the sea")) 49 | # Or call explicit functions: 50 | vec = client.embed_text(query="people running by the sea")) 51 | vec = client.embed_image(url="https://upload.wikimedia.org/wikipedia/commons/thumb/3/3f/Fermilab.jpg/800px-Fermilab.jpg") 52 | """ 53 | 54 | import io 55 | import os 56 | import urllib 57 | from typing import List 58 | 59 | import open_clip 60 | from PIL import Image 61 | import torch 62 | import validators 63 | 64 | from leptonai.photon import Photon, handler, HTTPException 65 | from leptonai.photon.types import lepton_unpickle, is_pickled, LeptonPickled 66 | 67 | 68 | DEFAULT_MODEL_NAME = "ViT-B-32-quickgelu" 69 | DEFAULT_PRETRAINED = "laion400m_e32" 70 | 71 | 72 | class Clip(Photon): 73 | """ 74 | This photon is used to embed text and image into a vector space using CLIP. 75 | """ 76 | 77 | # Python dependency 78 | requirement_dependency = [ 79 | "open_clip_torch", 80 | "Pillow", 81 | "torch", 82 | "transformers", 83 | "validators", 84 | ] 85 | 86 | def init(self): 87 | if torch.cuda.is_available(): 88 | self.DEVICE = "cuda" 89 | else: 90 | self.DEVICE = "cpu" 91 | MODEL_NAME = ( 92 | os.environ["MODEL_NAME"] 93 | if "MODEL_NAME" in os.environ 94 | else DEFAULT_MODEL_NAME 95 | ) 96 | PRETRAINED = ( 97 | os.environ["PRETRAINED"] 98 | if "PRETRAINED" in os.environ 99 | else DEFAULT_PRETRAINED 100 | ) 101 | ( 102 | self.CLIP_MODEL, 103 | _, 104 | self.CLIP_IMG_PREPROCESS, 105 | ) = open_clip.create_model_and_transforms( 106 | model_name=MODEL_NAME, pretrained=PRETRAINED, device=self.DEVICE 107 | ) 108 | self.TOKENIZER = open_clip.get_tokenizer(MODEL_NAME) 109 | 110 | @handler("embed") 111 | def embed(self, query: str) -> List[float]: 112 | if validators.url(query): 113 | return self.embed_image(query) 114 | else: 115 | return self.embed_text(query) 116 | 117 | @handler("embed_text") 118 | def embed_text(self, query: str) -> List[float]: 119 | query = self.TOKENIZER([query]) 120 | with torch.no_grad(): 121 | text_features = self.CLIP_MODEL.encode_text(query.to(self.DEVICE)) 122 | text_features /= text_features.norm(dim=-1, keepdim=True) 123 | return list(text_features.cpu().numpy()[0].astype(float)) 124 | 125 | def embed_image_local(self, image: Image): 126 | image = self.CLIP_IMG_PREPROCESS(image).unsqueeze(0).to(self.DEVICE) 127 | with torch.no_grad(): 128 | image_features = self.CLIP_MODEL.encode_image(image) 129 | image_features /= image_features.norm(dim=-1, keepdim=True) 130 | return list(image_features.cpu().numpy()[0].astype(float)) 131 | 132 | @handler("embed_image") 133 | def embed_image(self, url: str) -> List[float]: 134 | # open the imageurl and then read the content into a buffer 135 | try: 136 | raw_img = Image.open(io.BytesIO(urllib.request.urlopen(url).read())) 137 | except Exception as e: 138 | raise HTTPException( 139 | status_code=400, 140 | detail=( 141 | f"Cannot open image at url {url}. Detailed error message: {str(e)}" 142 | ), 143 | ) 144 | return self.embed_image_local(raw_img) 145 | 146 | @handler("embed_pickle_image") 147 | def embed_pickle_image(self, image: LeptonPickled) -> List[float]: 148 | print("Is the image passed in pickled ? :", is_pickled(image)) 149 | try: 150 | raw_img = lepton_unpickle(image) 151 | except Exception: 152 | raise HTTPException(status_code=400, detail="Cannot read image from bytes.") 153 | return self.embed_image_local(raw_img) 154 | -------------------------------------------------------------------------------- /advanced/open-clip/requirements.txt: -------------------------------------------------------------------------------- 1 | open_clip_torch 2 | Pillow 3 | torch 4 | transformers 5 | validators 6 | -------------------------------------------------------------------------------- /advanced/pytorch-example/main.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.optim as optim 4 | import torch.nn.functional as F 5 | import torch.distributed as dist 6 | import os 7 | from torchvision import datasets, transforms 8 | from torch.nn.parallel import DistributedDataParallel as DDP 9 | from torch.utils.data import DataLoader, DistributedSampler 10 | 11 | from datasets import load_dataset 12 | 13 | 14 | class MNISTModel(nn.Module): 15 | def __init__(self): 16 | super(MNISTModel, self).__init__() 17 | self.conv1 = nn.Conv2d(1, 32, 3, 1) 18 | self.conv2 = nn.Conv2d(32, 64, 3, 1) 19 | self.dropout1 = nn.Dropout(0.25) 20 | self.dropout2 = nn.Dropout(0.5) 21 | self.fc1 = nn.Linear(9216, 128) 22 | self.fc2 = nn.Linear(128, 10) 23 | 24 | def forward(self, x): 25 | x = self.conv1(x) 26 | x = F.relu(x) 27 | x = self.conv2(x) 28 | x = F.relu(x) 29 | x = F.max_pool2d(x, 2) 30 | x = self.dropout1(x) 31 | x = torch.flatten(x, 1) 32 | x = self.fc1(x) 33 | x = F.relu(x) 34 | x = self.dropout2(x) 35 | x = self.fc2(x) 36 | return F.log_softmax(x, dim=1) 37 | 38 | def train(): 39 | # Initialize process group 40 | dist.init_process_group(backend="nccl") 41 | 42 | # Get local rank from environment variable 43 | local_rank = int(os.environ["LOCAL_RANK"]) 44 | rank = int(os.environ["RANK"]) 45 | world_size = int(os.environ["WORLD_SIZE"]) 46 | 47 | # Set device 48 | torch.cuda.set_device(local_rank) 49 | device = torch.device("cuda", local_rank) 50 | 51 | print(f"Running on rank {rank} (local_rank: {local_rank})") 52 | 53 | def transform(example): 54 | imgs = [transforms.ToTensor()(img) for img in example["image"]] 55 | imgs = [transforms.Normalize((0.1307,), (0.3081,))(img) for img in imgs] 56 | example["image"] = torch.stack(imgs) 57 | example["label"] = torch.tensor(example["label"]) 58 | return example 59 | 60 | dataset = load_dataset("mnist", split="train") 61 | dataset = dataset.with_transform(transform) 62 | sampler = DistributedSampler(dataset, num_replicas=world_size, rank=rank) 63 | train_loader = DataLoader(dataset, batch_size=64, sampler=sampler) 64 | 65 | model = MNISTModel().to(device) 66 | model = DDP(model, device_ids=[local_rank]) 67 | optimizer = optim.Adam(model.parameters(), lr=0.001) 68 | 69 | model.train() 70 | for epoch in range(1, 11): 71 | sampler.set_epoch(epoch) 72 | for batch_idx, batch_data in enumerate(train_loader): 73 | data, target = batch_data["image"].to(device), batch_data["label"].to(device) 74 | optimizer.zero_grad() 75 | output = model(data) 76 | loss = F.nll_loss(output, target) 77 | loss.backward() 78 | optimizer.step() 79 | 80 | if batch_idx % 10 == 0: 81 | print(f"Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} ({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f}") 82 | 83 | if rank == 0: 84 | torch.save(model.module.state_dict(), "mnist_model.pth") 85 | print("Model saved as mnist_model.pth") 86 | 87 | dist.destroy_process_group() 88 | 89 | if __name__ == "__main__": 90 | train() -------------------------------------------------------------------------------- /advanced/pytorch-example/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets -------------------------------------------------------------------------------- /advanced/sdxl/README.md: -------------------------------------------------------------------------------- 1 | # Stable Diffusion XL model 2 | 3 | [Stable Diffusion XL](https://stability.ai/stablediffusion) (SDXL) is the latest open source image generation model developed by Stability AI, focusing on delivering photorealistic outputs that boast intricate details and sophisticated compositions. In this example we are demonstrate how to run an SDXL model inference service on Lepton. 4 | 5 | There are two ways to access SDXL model: 6 | 7 | ## Fully managed SDXL inference api 8 | 9 | Lepton provides the SDXL model as a fully managed api endpoints at https://sdxl.lepton.run. Users can easily use the lepton Python client or existing https request tool to generate high resolution realistic images right away. 10 | 11 | Creating the client: 12 | ```python 13 | from leptonai.client import Client 14 | 15 | API_URL = "https://sdxl.lepton.run" 16 | TOKEN = "YOUR_TOKEN_HERE" 17 | 18 | c = Client(API_URL, token=TOKEN) 19 | ``` 20 | 21 | Text to Image: 22 | ```python 23 | prompt = "A cat launching rocket" 24 | seed = 1234 25 | image_bytes = c.txt2img(prompt=prompt, seed=seed) 26 | with open("txt2img_prompt.png", "wb") as f: 27 | f.write(image_bytes) 28 | ``` 29 | 30 | Text to Image (with refiner): 31 | ```python 32 | prompt = "A cat launching rocket" 33 | seed = 1234 34 | image_bytes = c.txt2img(prompt=prompt, seed=seed, use_refiner=True) 35 | with open("txt2img_prompt_refiner.png", "wb") as f: 36 | f.write(image_bytes) 37 | ``` 38 | 39 | 40 | Inpaint 41 | ```python 42 | import base64 43 | import requests 44 | 45 | from leptonai.photon import FileParam 46 | 47 | 48 | img_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png" 49 | mask_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png" 50 | prompt = "A border collie sitting on a bench" 51 | seed = 2236 52 | 53 | 54 | # Directly using urls to pass images 55 | image_bytes = c.inpaint(image=img_url, mask_image=mask_url, prompt=prompt, seed=seed) 56 | with open("inpaint_url.png", "wb") as f: 57 | f.write(image_bytes) 58 | 59 | # Or use FileParam to send image files: 60 | img_content = requests.get(img_url).content 61 | mask_content = requests.get(mask_url).content 62 | image_bytes = c.inpaint( 63 | image=FileParam(img_content), 64 | mask_image=FileParam(mask_content), 65 | prompt=prompt, 66 | seed=seed, 67 | ) 68 | with open("inpaint_file_param.png", "wb") as f: 69 | f.write(image_bytes) 70 | 71 | # Or use base64 to encode image files: 72 | img_content = requests.get(img_url).content 73 | mask_content = requests.get(mask_url).content 74 | image_bytes = c.inpaint( 75 | image=base64.b64encode(img_content).decode("ascii"), 76 | mask_image=base64.b64encode(mask_content).decode("ascii"), 77 | prompt=prompt, 78 | seed=seed, 79 | ) 80 | with open("inpaint_base64.png", "wb") as f: 81 | f.write(image_bytes) 82 | ``` 83 | Image: 84 | 85 | 86 | 87 | Mask: 88 | 89 | 90 | 91 | Result: 92 | 93 | 94 | 95 | ## Dedicated SDXL inference service 96 | 97 | If fully managed api does not fit your use case, you can also easily launch a dedicated SDXL model inference service on Lepton platform. 98 | 99 | ### Launch SDXL inference service locally 100 | 101 | Ensure that you have installed the required dependencies. Then, run: 102 | ```shell 103 | lep photon create -n sdxl -m ./sdxl.py 104 | lep photon run -n sdxl 105 | ``` 106 | Once the service is up, its url will be printed on the terminal screen (e.g. http://localhost:8080). 107 | 108 | ### Launch SDXL inference service in the cloud 109 | 110 | Similar to other examples, after you have finished iterating with local service, you can launch it on Lepton cloud platform, which handles autoscaling, monitoring etc. for your production use case. 111 | 112 | ```shell 113 | lep photon push -n sdxl 114 | lep photon run \ 115 | -n sdxl \ 116 | --resource-shape gpu.a10 117 | ``` 118 | 119 | And visit [dashboard.lepton.ai](https://dashboard.lepton.ai/) to find the corresponding service url. 120 | 121 | Note: in default, the server is protected via a token, so you won't be able to access the gradio UI. This is by design to provide adequate security. If you want to make the UI public, you can either add the `--public` argument to `lep photon run`, or update the deployment with: 122 | 123 | ```shell 124 | lep deployment update -n sdxl --public 125 | ``` 126 | 127 | ### Client 128 | 129 | Once the inference service is up (either locally or in the cloud), you can use the client to access it in a programmatical way: 130 | 131 | ```python 132 | from leptonai.client import Client 133 | 134 | SERVICE_URL = "http://localhost:8080" # if run locally 135 | # SERVICE_URL = "DEPLOYMENT URL shown on Lepton Cloud Platform" # if run on the Lepton Cloud Platform 136 | 137 | c = Client(SERVICE_URL) 138 | 139 | img_content = c.run(prompt="a cat launching rocket", seed=1234) 140 | with open("cat.png", "wb") as fid: 141 | fid.write(img_content) 142 | ``` 143 | -------------------------------------------------------------------------------- /advanced/sdxl/assets/image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/sdxl/assets/image.png -------------------------------------------------------------------------------- /advanced/sdxl/assets/inpaint.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/sdxl/assets/inpaint.png -------------------------------------------------------------------------------- /advanced/sdxl/assets/mask.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/sdxl/assets/mask.png -------------------------------------------------------------------------------- /advanced/sdxl/assets/txt2img.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/sdxl/assets/txt2img.png -------------------------------------------------------------------------------- /advanced/sdxl/requirements.txt: -------------------------------------------------------------------------------- 1 | diffusers>=0.19.0 2 | gradio 3 | invisible-watermark 4 | leptonai 5 | torch 6 | 7 | -------------------------------------------------------------------------------- /advanced/sdxl/sdxl.py: -------------------------------------------------------------------------------- 1 | from io import BytesIO 2 | from typing import Optional 3 | 4 | from diffusers import DiffusionPipeline 5 | import gradio as gr 6 | import torch 7 | 8 | from leptonai.photon import Photon, PNGResponse 9 | 10 | 11 | class SDXL(Photon): 12 | requirement_dependency = [ 13 | "gradio", 14 | "torch", 15 | "diffusers>=0.19.0", 16 | "invisible-watermark", 17 | ] 18 | 19 | def init(self): 20 | cuda_available = torch.cuda.is_available() 21 | 22 | if cuda_available: 23 | self.device = torch.device("cuda") 24 | else: 25 | self.device = torch.device("cpu") 26 | 27 | # load both base & refiner 28 | self.base = DiffusionPipeline.from_pretrained( 29 | "stabilityai/stable-diffusion-xl-base-1.0", 30 | torch_dtype=torch.float16, 31 | variant="fp16", 32 | use_safetensors=True, 33 | ) 34 | if cuda_available: 35 | self.base.to("cuda") 36 | # torch.compile is affected by the following issue. If you encounter problems, 37 | # comment the torch.compile line. 38 | # https://github.com/huggingface/diffusers/issues/4370 39 | # self.base.unet = torch.compile( 40 | # self.base.unet, mode="reduce-overhead", fullgraph=True 41 | # ) 42 | 43 | self._refiner = None 44 | 45 | @property 46 | def refiner(self): 47 | if self._refiner is None: 48 | pipe = DiffusionPipeline.from_pretrained( 49 | "stabilityai/stable-diffusion-xl-refiner-1.0", 50 | text_encoder_2=self.base.text_encoder_2, 51 | vae=self.base.vae, 52 | torch_dtype=torch.float16, 53 | use_safetensors=True, 54 | variant="fp16", 55 | ) 56 | if torch.cuda.is_available(): 57 | pipe.to("cuda") 58 | 59 | # torch.compile is affected by the following issue. If you encounter problems, 60 | # comment the torch.compile line. 61 | # pipe.unet = torch.compile( 62 | # pipe.unet, mode="reduce-overhead", fullgraph=True 63 | # ) 64 | self._refiner = pipe 65 | return self._refiner 66 | 67 | @Photon.handler( 68 | "run", 69 | example={ 70 | "prompt": "A majestic lion jumping from a big stone at night", 71 | "n_steps": 40, 72 | "high_noise_frac": 0.8, 73 | }, 74 | ) 75 | def run( 76 | self, 77 | prompt: str, 78 | negative_prompt: Optional[str] = None, 79 | width: Optional[int] = None, 80 | height: Optional[int] = None, 81 | guidance_scale: Optional[float] = 5.0, 82 | seed: Optional[int] = None, 83 | num_inference_steps: Optional[int] = 50, 84 | high_noise_frac: Optional[float] = 0.8, 85 | use_refiner: Optional[bool] = True, 86 | ) -> PNGResponse: 87 | images = self._run( 88 | prompt=prompt, 89 | negative_prompt=negative_prompt, 90 | width=width, 91 | height=height, 92 | guidance_scale=guidance_scale, 93 | samples=1, 94 | seed=seed, 95 | num_inference_steps=num_inference_steps, 96 | high_noise_frac=high_noise_frac, 97 | use_refiner=use_refiner, 98 | ) 99 | 100 | img_io = BytesIO() 101 | images[0].save(img_io, format="PNG", quality="keep") 102 | img_io.seek(0) 103 | return PNGResponse(img_io) 104 | 105 | def _run( 106 | self, 107 | prompt, 108 | negative_prompt, 109 | width, 110 | height, 111 | guidance_scale, 112 | samples, 113 | seed, 114 | num_inference_steps, 115 | high_noise_frac, 116 | use_refiner, 117 | ): 118 | if seed is not None: 119 | generator = torch.Generator(device=self.device).manual_seed(seed) 120 | else: 121 | generator = None 122 | 123 | if samples > 1: 124 | prompt = [prompt] * samples 125 | if negative_prompt is not None: 126 | negative_prompt = [negative_prompt] * samples 127 | generator = [generator] * samples 128 | 129 | base_extra_kwargs = {} 130 | if use_refiner: 131 | base_extra_kwargs["output_type"] = "latent" 132 | base_extra_kwargs["denoising_end"] = high_noise_frac 133 | # run both experts 134 | images = self.base( 135 | prompt=prompt, 136 | negative_prompt=negative_prompt, 137 | width=width, 138 | height=height, 139 | guidance_scale=guidance_scale, 140 | generator=generator, 141 | num_inference_steps=num_inference_steps, 142 | **base_extra_kwargs, 143 | ).images 144 | if use_refiner: 145 | images = self.refiner( 146 | prompt=prompt, 147 | negative_prompt=negative_prompt, 148 | guidance_scale=guidance_scale, 149 | num_inference_steps=num_inference_steps, 150 | generator=generator, 151 | denoising_start=high_noise_frac, 152 | image=images, 153 | ).images 154 | return images 155 | 156 | @Photon.handler(mount=True) 157 | def ui(self): 158 | blocks = gr.Blocks() 159 | 160 | with blocks: 161 | with gr.Group(): 162 | with gr.Box(): 163 | with gr.Column(scale=3): 164 | with gr.Row(): 165 | prompt = gr.Textbox( 166 | label="Enter your prompt", 167 | show_label=False, 168 | max_lines=1, 169 | placeholder="Enter your prompt", 170 | ).style( 171 | border=(True, False, True, True), 172 | rounded=(True, False, False, True), 173 | container=False, 174 | ) 175 | with gr.Row(): 176 | negative_prompt = gr.Textbox( 177 | label="Enter your negative prompt", 178 | show_label=False, 179 | max_lines=1, 180 | placeholder="Enter your negative prompt", 181 | ).style( 182 | border=(True, False, True, True), 183 | rounded=(True, False, False, True), 184 | container=False, 185 | ) 186 | with gr.Column(scale=1): 187 | btn = gr.Button("Generate image").style( 188 | margin=False, 189 | rounded=(False, True, True, False), 190 | ) 191 | gallery = gr.Gallery( 192 | label="Generated images", show_label=False, elem_id="gallery" 193 | ).style(grid=[2], height="auto") 194 | 195 | with gr.Row(elem_id="advanced-options-1"): 196 | samples = gr.Slider( 197 | label="Images", minimum=1, maximum=4, value=1, step=1 198 | ) 199 | width = gr.Slider( 200 | label="Width", 201 | minimum=64, 202 | maximum=1024, 203 | value=512, 204 | step=8, 205 | ) 206 | height = gr.Slider( 207 | label="Height", 208 | minimum=64, 209 | maximum=1024, 210 | value=512, 211 | step=8, 212 | ) 213 | steps = gr.Slider( 214 | label="Steps", minimum=1, maximum=50, value=25, step=1 215 | ) 216 | with gr.Row(elem_id="advanced-options-2"): 217 | scale = gr.Slider( 218 | label="Guidance Scale", minimum=0, maximum=50, value=7.5, step=0.1 219 | ) 220 | high_noise_frac = gr.Slider( 221 | label="Denoising fraction", 222 | minimum=0, 223 | maximum=1, 224 | value=0.8, 225 | step=0.1, 226 | ) 227 | seed = gr.Slider( 228 | label="Seed", 229 | minimum=0, 230 | maximum=2147483647, 231 | value=142857, 232 | step=1, 233 | ) 234 | use_refiner = gr.Checkbox(label="Use refiner", value=True) 235 | btn.click( 236 | self._run, 237 | inputs=[ 238 | prompt, 239 | negative_prompt, 240 | width, 241 | height, 242 | scale, 243 | samples, 244 | seed, 245 | steps, 246 | high_noise_frac, 247 | use_refiner, 248 | ], 249 | outputs=gallery, 250 | ) 251 | 252 | return blocks 253 | 254 | 255 | if __name__ == "__main__": 256 | p = SDXL() 257 | p.launch() 258 | -------------------------------------------------------------------------------- /advanced/segment-anything/README.md: -------------------------------------------------------------------------------- 1 | # Segment Anything Model 2 | 3 | This folder shows an end-to-end AI example, with Meta's most recent [Segment Anything](https://github.com/facebookresearch/segment-anything) model. Specifically, we will implement the functionality that takes an image and an optional prompt, and produces a segmentation mask, either as a list of structured boolean masks, or as a single overlayed image for display. 4 | 5 | A quick example is shown below with input image and output mask: 6 | 7 | 8 | 9 | Technically, this demo shows how to: 10 | - specify dependencies for a photon, including dependencies that are github repositories, 11 | - use the `@Photon.handler` decorator to define handlers for a photon, and annotate the arguments and return values for better user experience, 12 | - return different types of outputs from a photon deployment, 13 | - use the python client to connect and interact with the deployment in nontrivial ways. 14 | 15 | Check out `sam.py` for the actual implementation, and `segment-anything.ipynb` for a notebook demonstration. 16 | 17 | To run it on Lepton AI platform, you can use the following command: 18 | 19 | ```bash 20 | # Create a photon 21 | lep photon create -n sam -m py:github.com/leptonai/examples.git:advanced/segment-anything/sam.py 22 | # Push the photon to the platform 23 | lep photon push -n sam 24 | # Run the SAM remotely 25 | lep photon run -n sam --resource-shape gpu.a10 26 | ``` 27 | -------------------------------------------------------------------------------- /advanced/segment-anything/assets/koala.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/segment-anything/assets/koala.jpeg -------------------------------------------------------------------------------- /advanced/segment-anything/assets/koala_segmented.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/segment-anything/assets/koala_segmented.jpg -------------------------------------------------------------------------------- /advanced/segment-anything/requirements.txt: -------------------------------------------------------------------------------- 1 | git+https://github.com/facebookresearch/segment-anything.git 2 | leptonai 3 | Pillow -------------------------------------------------------------------------------- /advanced/segment-something/README.md: -------------------------------------------------------------------------------- 1 | # 👀Segment Something 2 | 3 | Entity extraction with CLIP and SAM model. For more detailed instructions, please refer to this [link](https://www.lepton.ai/docs/examples/segment_something). -------------------------------------------------------------------------------- /advanced/stable-diffusion-webui/README.md: -------------------------------------------------------------------------------- 1 | # Stable Diffusion web UI 2 | 3 | [Stable Diffusion web UI](https://github.com/AUTOMATIC1111/stable-diffusion-webui.git) is a browser interface based on Gradio library for Stable Diffusion. 4 | 5 | ## Install Lepton sdk 6 | ```shell 7 | pip install leptonai 8 | ``` 9 | 10 | ## Launch Stable Diffusion web UI in the cloud 11 | 12 | Similar to other examples, you can run Stable Diffusion web UI on Lepton Cloud Platform easily, e.g.: 13 | 14 | ```shell 15 | lep photon create -n stable-diffusion-webui -m photon.py 16 | lep photon push -n stable-diffusion-webui 17 | lep photon run \ 18 | -n stable-diffusion-webui \ 19 | --resource-shape gpu.a10 20 | --public 21 | ``` 22 | 23 | You can visit [dashboard.lepton.ai](https://dashboard.lepton.ai/) to use the web Dashboard, or use `lep` cli to manage the launched deployment: 24 | ```shell 25 | lep deployment status -n stable-diffusion-webui 26 | ``` 27 | 28 | ## Client 29 | 30 | Once the Stable Diffusion web UI server is up, you can copy the deployment url shown on the Lepton Dashboard (or in the `lep` cli output) 31 | 32 | 33 | 34 | and visit it in the web browser 35 | 36 | 37 | -------------------------------------------------------------------------------- /advanced/stable-diffusion-webui/assets/browser.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/stable-diffusion-webui/assets/browser.png -------------------------------------------------------------------------------- /advanced/stable-diffusion-webui/assets/deployment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/stable-diffusion-webui/assets/deployment.png -------------------------------------------------------------------------------- /advanced/stable-diffusion-webui/photon.py: -------------------------------------------------------------------------------- 1 | from leptonai.photon import Photon 2 | 3 | 4 | class WebUI(Photon): 5 | webui_version = "v1.6.0" 6 | cmd = [ 7 | "bash", 8 | "-c", 9 | ( 10 | "apt-get update && apt-get install -y wget libgoogle-perftools-dev && wget" 11 | f" -q https://raw.githubusercontent.com/AUTOMATIC1111/stable-diffusion-webui/{webui_version}/webui.sh" 12 | " && chmod +x ./webui.sh && ACCELERATE=True ./webui.sh -f --listen --port" 13 | " 8080" 14 | ), 15 | ] 16 | deployment_template = { 17 | "resource_shape": "gpu.a10", 18 | } 19 | -------------------------------------------------------------------------------- /advanced/tabbyml/README.md: -------------------------------------------------------------------------------- 1 | # Tabby 2 | 3 | [Tabby](https://github.com/TabbyML/tabby) is an AI coding assistant, offering an open-source and on-premises alternative to GitHub Copilot. 4 | 5 | ## Install Lepton sdk 6 | ```shell 7 | pip install leptonai 8 | ``` 9 | 10 | ## Launch Tabby in the cloud 11 | 12 | Similar to other examples, you can run Tabby on Lepton Cloud Platform easily, e.g.: 13 | 14 | ```shell 15 | lep photon create -n tabby -m photon.py 16 | lep photon push -n tabby 17 | lep photon run \ 18 | -n tabby \ 19 | --resource-shape gpu.a10 20 | --public 21 | ``` 22 | 23 | You can visit [dashboard.lepton.ai](https://dashboard.lepton.ai/) to use the web Dashboard, or use `lep` cli to manage the launched deployment: 24 | ```shell 25 | lep deployment status -n tabby 26 | ``` 27 | 28 | You can configure the underlying model (default `TabbyML/StarCoder-1B`) with the `MODEL` environment variable, e.g. to switch to use `TabbyML/DeepseekCoder-1.3B`: 29 | 30 | ```shell 31 | lep photon run \ 32 | -n tabby- \ 33 | -e TabbyML/DeepseekCoder-1.3B 34 | --resource-shape gpu.a10 35 | --public 36 | ``` 37 | 38 | ## Client 39 | 40 | Once the Tabby server is up, you can use the deployment url shown on the Lepton Dashboard (or in the `lep` cli output) 41 | 42 | 43 | 44 | as API Endpoint of Tabby, to configure the supported [IDE extensions](https://tabby.tabbyml.com/docs/extensions). e.g. In VSCode: 45 | 46 | 47 | 48 | and start coding with the power of AI! 49 | 50 | 51 | -------------------------------------------------------------------------------- /advanced/tabbyml/assets/coding.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/tabbyml/assets/coding.png -------------------------------------------------------------------------------- /advanced/tabbyml/assets/deployment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/tabbyml/assets/deployment.png -------------------------------------------------------------------------------- /advanced/tabbyml/assets/vscode.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/tabbyml/assets/vscode.png -------------------------------------------------------------------------------- /advanced/tabbyml/photon.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from leptonai.photon import Photon 4 | 5 | 6 | class TabbyML(Photon): 7 | image: str = "tabbyml/tabby" 8 | cmd = [ 9 | "/opt/tabby/bin/tabby", 10 | "serve", 11 | "--model", 12 | os.environ.get("MODEL", "TabbyML/StarCoder-1B"), 13 | "--port", 14 | "8080", 15 | "--device", 16 | os.environ.get("DEVICE", "cuda"), 17 | ] 18 | 19 | deployment_template = { 20 | "resource_shape": "gpu.a10", 21 | "env": { 22 | "MODEL": "TabbyML/StarCoder-1B", 23 | }, 24 | "secret": [ 25 | "HUGGING_FACE_HUB_TOKEN", 26 | ], 27 | } 28 | -------------------------------------------------------------------------------- /advanced/tts/README.md: -------------------------------------------------------------------------------- 1 | # TTS 2 | 3 | This folder shows an end-to-end AI example, with the [Coqui AI TTS](https://github.com/coqui-ai/TTS/) text-to-speech library. The demo also shows how to run a photon with multimedia outputs (in this case a WAV response.) 4 | 5 | With this demo, you will be able to run deepfloyd and get results like follows: 6 | 7 | 8 | 9 | and you can check out more details in the `tts.ipynb` notebook. 10 | 11 | ## Run tts locally 12 | 13 | Ensure that you have installed the required dependencies via `pip install -r requirements.txt`. Then, run: 14 | ```shell 15 | python tts_main.py 16 | ``` 17 | Note that if you have a GPU, things will run much faster. When the program runs, visit `http://0.0.0.0:8080/doc/` for the openapi doc, or use the client to access it in a programmatical way. 18 | 19 | ## Run tts in the cloud 20 | 21 | Similar to other examples, you can run tts with the following command: 22 | 23 | ```shell 24 | lep photon run -n tts -m tts_main.py --resource-shape gpu.a10 25 | ``` 26 | 27 | And visit [dashboard.lepton.ai](https://dashboard.lepton.ai/) to try out the model. 28 | 29 | Note: in default, the server is protected via a token, so you won't be able to access the gradio UI. This is by design to provide adequate security. If you want to make the UI public, you can either add the `--public` argument to `lep photon run`, or update the deployment with: 30 | 31 | ```shell 32 | lep deployment update -n tts --public 33 | ``` 34 | 35 | You can then use tts either via the UI or via the client. See the notebook example for more details. 36 | 37 | ## XTTS 38 | 39 | We also include an XTTS example that can be used to do voice cloning. More details to be written. -------------------------------------------------------------------------------- /advanced/tts/assets/thequickbrownfox.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/tts/assets/thequickbrownfox.mp3 -------------------------------------------------------------------------------- /advanced/tts/requirements.txt: -------------------------------------------------------------------------------- 1 | leptonai 2 | TTS 3 | deepspeed -------------------------------------------------------------------------------- /advanced/tts/tts_main.py: -------------------------------------------------------------------------------- 1 | from io import BytesIO 2 | import os 3 | from threading import Lock 4 | from typing import List, Optional, Union, Dict 5 | 6 | from loguru import logger 7 | import torch 8 | 9 | from leptonai.photon import ( 10 | Photon, 11 | WAVResponse, 12 | HTTPException, 13 | FileParam, 14 | get_file_content, 15 | ) 16 | 17 | 18 | class Speaker(Photon): 19 | """ 20 | A TTS service that supports multiple models provided by coqui and others. 21 | 22 | To launch this photon and specify the model to use, you can pass in env 23 | variables during photon launch: 24 | --env MODEL_NAME=tts_models/en/vctk/vits 25 | And if you want to preload multiple models, you can pass in a comma-separated 26 | list of models: 27 | --env PRELOAD_MODELS=tts_models/en/vctk/vits,tts_models/multilingual/multi-dataset/xtts_v1 28 | """ 29 | 30 | requirement_dependency = ["TTS"] 31 | 32 | system_dependency = ["espeak-ng", "libsndfile1-dev"] 33 | 34 | handler_max_concurrency = 4 35 | 36 | MODEL_NAME = "tts_models/en/vctk/vits" 37 | # Or, you can choose some other models 38 | # MODEL_NAME = "tts_models/multilingual/multi-dataset/xtts_v1" 39 | 40 | # If you want to load multiple models at the same time, you can put it here 41 | # as a comma-separated string. For example: 42 | # PRELAOD_MODELS = "tts_models/en/vctk/vits,tts_models/multilingual/multi-dataset/xtts_v1" 43 | # Note that the default model will always be loaded. 44 | # Note that this might involve some extra memory - use at your own risk. 45 | PRELOAD_MODELS = "" 46 | 47 | def init(self): 48 | """ 49 | Initialize a default model. 50 | """ 51 | 52 | # By using XTTS you agree to CPML license https://coqui.ai/cpml 53 | os.environ["COQUI_TOS_AGREED"] = "1" 54 | 55 | from TTS.api import TTS 56 | 57 | self._models: Dict[Union[str, None], TTS] = {} 58 | self._model_lock: Dict[Union[str, None], Lock] = {} 59 | 60 | self.MODEL_NAME = os.environ.get("MODEL_NAME", self.MODEL_NAME).strip() 61 | 62 | self.PRELOAD_MODELS = [ 63 | m 64 | for m in os.environ.get("PRELOAD_MODELS", self.PRELOAD_MODELS).split(",") 65 | if m 66 | ] 67 | if self.MODEL_NAME not in self.PRELOAD_MODELS: 68 | self.PRELOAD_MODELS.append(self.MODEL_NAME) 69 | 70 | logger.info("Loading the model...") 71 | for model_name in self.PRELOAD_MODELS: 72 | self._models[model_name] = self._load_model(model_name) 73 | self._model_lock[model_name] = Lock() 74 | self._models[None] = self._models[self.MODEL_NAME] 75 | self._model_lock[None] = self._model_lock[self.MODEL_NAME] 76 | logger.debug("Model loaded.") 77 | 78 | def _load_model(self, model_name: str): 79 | """ 80 | Internal function to load a model. We will assume that the model name 81 | is already sanity checked. 82 | """ 83 | from TTS.api import TTS 84 | 85 | use_gpu = torch.cuda.is_available() 86 | logger.debug(f"Loading model {model_name}... use_gpu: {use_gpu} ") 87 | try: 88 | model = TTS(model_name, progress_bar=False, gpu=use_gpu) 89 | except Exception as e: 90 | raise RuntimeError(f"Failed to load model {model_name}.") from e 91 | logger.debug(f"Loaded model {model_name}") 92 | logger.debug(f"Model {model_name} is_multilingual: {model.is_multi_lingual}") 93 | logger.debug(f"Model {model_name} is_multi_speaker: {model.is_multi_speaker}") 94 | try: 95 | # The below one seems to not always work with xtts models. 96 | if model.is_multi_lingual: 97 | logger.debug(f"Model {model_name} languages: {model.languages}") 98 | except AttributeError: 99 | try: 100 | # xtts models have a different way of accessing languages. 101 | logger.debug( 102 | f"Model {model_name} languages:" 103 | f" {model.synthesizer.tts_model.config.languages}" 104 | ) 105 | except Exception: 106 | # If neither of above works, we will just ignore it and not print 107 | # anything. 108 | pass 109 | if model.is_multi_speaker: 110 | logger.debug(f"Model {model_name} speakers: {model.speakers}") 111 | 112 | return model 113 | 114 | def _tts( 115 | self, 116 | text: str, 117 | model: Optional[str] = None, 118 | language: Optional[str] = None, 119 | speaker: Optional[str] = None, 120 | speaker_wav: Optional[str] = None, 121 | ) -> BytesIO: 122 | if model not in self._models: 123 | raise HTTPException( 124 | status_code=404, 125 | detail=f"Model {model} not loaded.", 126 | ) 127 | logger.info( 128 | f"Synthesizing '{text}' with language '{language}' and speaker '{speaker}'" 129 | ) 130 | # Many of the models might not be python thread safe, so we lock it. 131 | with self._model_lock[model]: 132 | wav = self._models[model].tts( 133 | text=text, 134 | language=language, # type: ignore 135 | speaker=speaker, # type: ignore 136 | speaker_wav=speaker_wav, 137 | ) 138 | return wav 139 | 140 | ########################################################################## 141 | # Photon handlers that are exposed to the external clients. 142 | ########################################################################## 143 | 144 | @Photon.handler(method="GET") 145 | def languages(self, model: Optional[str] = None) -> List[str]: 146 | """ 147 | Returns a list of languages supported by the current model. Empty list 148 | if no model is loaded, or the model does not support multiple languages. 149 | """ 150 | if model not in self._models: 151 | raise HTTPException( 152 | status_code=404, 153 | detail=f"Model {model} not loaded.", 154 | ) 155 | if not self._models[model].is_multi_lingual: 156 | return [] 157 | try: 158 | return self._models[model].languages 159 | except AttributeError: 160 | # xtts models have a different way of accessing languages. 161 | # if there are further errors, we don't handle them. 162 | return self._models[model].synthesizer.tts_model.config.languages 163 | 164 | @Photon.handler(method="GET") 165 | def speakers(self, model: Optional[str] = None) -> List[str]: 166 | """ 167 | Returns a list of speakers supported by the model. If the model is an 168 | XTTS model, this will return empty as you will need to use speaker_wav 169 | to synthesize speech. 170 | """ 171 | if model not in self._models: 172 | raise HTTPException( 173 | status_code=404, 174 | detail=f"Model {model} not loaded.", 175 | ) 176 | elif not self._models[model].is_multi_speaker: 177 | return [] 178 | else: 179 | return self._models[model].speakers 180 | 181 | @Photon.handler(method="GET") 182 | def models(self) -> List[str]: 183 | """ 184 | Returns a list of available models. 185 | """ 186 | return [k for k in self._models.keys() if k] 187 | 188 | @Photon.handler( 189 | example={ 190 | "text": "The quick brown fox jumps over the lazy dog.", 191 | } 192 | ) 193 | def tts( 194 | self, 195 | text: str, 196 | model: Optional[str] = None, 197 | language: Optional[str] = None, 198 | speaker: Optional[str] = None, 199 | speaker_wav: Union[None, str, FileParam] = None, 200 | ) -> WAVResponse: 201 | """ 202 | Synthesizes speech from text. Returns the synthesized speech as a WAV 203 | response. 204 | 205 | Pass in language if the model is multilingual. Pass in speaker if the model 206 | is multi-speaker. Pass in speaker_wav if the model is XTTS. The endpoint 207 | tries its best to return the correct error message if the parameters are 208 | not correct, but it may not be perfect. 209 | """ 210 | if model not in self._models: 211 | raise HTTPException( 212 | status_code=404, 213 | detail=f"Model {model} not loaded.", 214 | ) 215 | tts_model = self._models[model] 216 | if not tts_model.is_multi_lingual and language is not None: 217 | raise HTTPException( 218 | status_code=400, 219 | detail="Model is not multi-lingual, you should not pass in language.", 220 | ) 221 | if not tts_model.is_multi_speaker and speaker is not None: 222 | raise HTTPException( 223 | status_code=400, 224 | detail="Model is not multi-speaker, you should not pass in speaker.", 225 | ) 226 | if tts_model.is_multi_lingual and language is None: 227 | raise HTTPException( 228 | status_code=400, 229 | detail=( 230 | "Model is multi-lingual, you should pass in language. " 231 | " Use GET /languages to get available languages and pass in " 232 | " as optional parameters" 233 | ), 234 | ) 235 | if tts_model.is_multi_speaker and speaker is None: 236 | raise HTTPException( 237 | status_code=400, 238 | detail=( 239 | "Model is multi-speaker, you should pass in speaker. " 240 | " Use GET /speakers to get available speakers and pass in as " 241 | " optional parameters" 242 | ), 243 | ) 244 | 245 | try: 246 | if speaker_wav is not None: 247 | speaker_wav_file = get_file_content( 248 | speaker_wav, allow_local_file=False, return_file=True 249 | ) 250 | speaker_wav_file_name = speaker_wav_file.name 251 | else: 252 | speaker_wav_file_name = None 253 | wav = self._tts( 254 | text=text, 255 | language=language, 256 | speaker=speaker, 257 | speaker_wav=speaker_wav_file_name, 258 | ) 259 | wav_io = BytesIO() 260 | tts_model.synthesizer.save_wav(wav, wav_io) # type: ignore 261 | wav_io.seek(0) 262 | return WAVResponse(wav_io) 263 | except HTTPException: 264 | raise 265 | except TypeError as e: 266 | if "expected str, bytes or os.PathLike object, not NoneType" in str(e): 267 | raise HTTPException( 268 | status_code=400, 269 | detail=( 270 | "Speaker wav file is not provided. This is necessary when" 271 | " running an XTTS model to do voice cloning." 272 | ), 273 | ) from e 274 | except Exception as e: 275 | raise HTTPException( 276 | status_code=500, 277 | detail=f"Failed to synthesize speech. Details: {e}", 278 | ) from e 279 | 280 | 281 | if __name__ == "__main__": 282 | p = Speaker() 283 | p.launch() 284 | -------------------------------------------------------------------------------- /advanced/tts/xtts_main.py: -------------------------------------------------------------------------------- 1 | from io import BytesIO 2 | import os 3 | import subprocess 4 | from tempfile import NamedTemporaryFile 5 | from threading import Lock 6 | import time 7 | from typing import Optional, Union 8 | 9 | from loguru import logger 10 | 11 | from leptonai.photon import ( 12 | Photon, 13 | WAVResponse, 14 | HTTPException, 15 | FileParam, 16 | get_file_content, 17 | ) 18 | 19 | 20 | class XTTSSpeaker(Photon): 21 | """ 22 | A XTTS service that supports multiple models provided by coqui and others. 23 | 24 | To launch this photon and specify the model to use, you can pass in env 25 | variables during photon launch: 26 | --env MODEL_NAME=tts_models/multilingual/multi-dataset/xtts_v1.1 27 | """ 28 | 29 | requirement_dependency = ["TTS", "deepspeed"] 30 | 31 | system_dependency = ["ffmpeg", "espeak-ng", "libsndfile1-dev"] 32 | 33 | handler_max_concurrency = 4 34 | 35 | MODEL_NAME = "tts_models/multilingual/multi-dataset/xtts_v1.1" 36 | DEFAULT_DECODER = "ne_hifigan" 37 | 38 | def init(self): 39 | """ 40 | Initialize a default model. 41 | """ 42 | 43 | # By using XTTS you agree to CPML license https://coqui.ai/cpml 44 | os.environ["COQUI_TOS_AGREED"] = "1" 45 | 46 | import torch 47 | from TTS.tts.configs.xtts_config import XttsConfig 48 | from TTS.tts.models.xtts import Xtts 49 | from TTS.utils.generic_utils import get_user_data_dir 50 | from TTS.utils.manage import ModelManager 51 | 52 | logger.info("Loading the xtts model...") 53 | try: 54 | self.MODEL_NAME = os.environ.get("MODEL_NAME", self.MODEL_NAME).strip() 55 | ModelManager().download_model(self.MODEL_NAME) 56 | model_path = os.path.join( 57 | get_user_data_dir("tts"), self.MODEL_NAME.replace("/", "--") 58 | ) 59 | config = XttsConfig() 60 | config.load_json(os.path.join(model_path, "config.json")) 61 | self._model = Xtts.init_from_config(config) 62 | self._model.load_checkpoint( 63 | config, 64 | checkpoint_path=os.path.join(model_path, "model.pth"), 65 | vocab_path=os.path.join(model_path, "vocab.json"), 66 | eval=True, 67 | use_deepspeed=torch.cuda.is_available(), 68 | ) 69 | # The xtts model's main chunk cannot be run in parallel, so we will need 70 | # to lock protect it. 71 | self._model_lock = Lock() 72 | self._supported_languages = self._model.config.languages 73 | if torch.cuda.is_available(): 74 | self._model.cuda() 75 | self._languages = config.languages 76 | except Exception as e: 77 | raise RuntimeError(f"Cannot load XTTS model {self.MODEL_NAME}") from e 78 | 79 | logger.debug("Model loaded.") 80 | 81 | def _tts( 82 | self, 83 | text: str, 84 | language: str, 85 | speaker_wav: Optional[str] = None, 86 | voice_cleanup: Optional[bool] = False, 87 | ): 88 | import torch 89 | 90 | if voice_cleanup: 91 | with NamedTemporaryFile(suffix=".wav", delete=False) as filtered_file: 92 | lowpass_highpass = "lowpass=8000,highpass=75," 93 | trim_silence = "areverse,silenceremove=start_periods=1:start_silence=0:start_threshold=0.02,areverse,silenceremove=start_periods=1:start_silence=0:start_threshold=0.02" 94 | shell_command = ( 95 | f"ffmpeg -y -i {speaker_wav} -af" 96 | f" {lowpass_highpass}{trim_silence} {filtered_file.name}".split(" ") 97 | ) 98 | logger.debug("Running ffmpeg command: " + " ".join(shell_command)) 99 | try: 100 | subprocess.run( 101 | shell_command, 102 | capture_output=False, 103 | text=True, 104 | check=True, 105 | stdout=subprocess.DEVNULL, 106 | stderr=subprocess.DEVNULL, 107 | ) 108 | except subprocess.CalledProcessError as e: 109 | logger.debug("Failed to run ffmpeg command: " + str(e)) 110 | logger.debug("Use original file") 111 | else: 112 | # filter succeeded - use filtered file. 113 | speaker_wav = filtered_file.name 114 | # critical part: cannot run in parallel threads. 115 | with self._model_lock: 116 | # learn from speaker_wav 117 | start = time.time() 118 | logger.debug("Learning from speaker wav...") 119 | try: 120 | gpt_cond_latent, diffusion_conditioning, speaker_embedding = ( 121 | self._model.get_conditioning_latents(audio_path=speaker_wav) 122 | ) 123 | except Exception as e: 124 | raise HTTPException( 125 | status_code=400, 126 | detail="Failed to learn from speaker wav.", 127 | ) from e 128 | learned_time = time.time() 129 | logger.debug(f"Learned from speaker wav in {learned_time - start} seconds.") 130 | out = self._model.inference( 131 | text, 132 | language, 133 | gpt_cond_latent, 134 | speaker_embedding, 135 | diffusion_conditioning, 136 | decoder=self.DEFAULT_DECODER, 137 | ) 138 | logger.debug(f"Synthesized speech in {time.time() - learned_time} seconds.") 139 | if voice_cleanup: 140 | os.remove(filtered_file.name) # type: ignore 141 | return torch.tensor(out["wav"]).unsqueeze(0) 142 | 143 | ########################################################################## 144 | # Photon handlers that are exposed to the external clients. 145 | ########################################################################## 146 | @Photon.handler( 147 | example={ 148 | "text": "The quick brown fox jumps over the lazy dog.", 149 | } 150 | ) 151 | def tts( 152 | self, 153 | text: str, 154 | language: str, 155 | speaker_wav: Union[str, FileParam], 156 | voice_cleanup: bool = False, 157 | ) -> WAVResponse: 158 | """ 159 | Synthesizes speech from text. Returns the synthesized speech as a WAV 160 | response. The XTTS model is multi-lingual, so you need to specify the 161 | language - use language() to show a list of languages available. The 162 | model carries out voice transfer from the speaker wav file, so you need 163 | to specify the speaker wav file. The endpoint tries its best to return 164 | the correct error message if the parameters are not correct, but it may 165 | not be perfect. 166 | """ 167 | import torchaudio 168 | 169 | if language not in self._supported_languages: 170 | raise HTTPException( 171 | status_code=400, 172 | detail=( 173 | f"Language {language} not supported. Supported languages are:" 174 | f" {self._supported_languages}" 175 | ), 176 | ) 177 | 178 | try: 179 | speaker_wav_file = get_file_content( 180 | speaker_wav, allow_local_file=False, return_file=True 181 | ) 182 | except Exception: 183 | raise HTTPException( 184 | status_code=400, 185 | detail=f"Failed to read speaker wav file {speaker_wav}.", 186 | ) 187 | 188 | speaker_wav_file_name = speaker_wav_file.name 189 | wav = self._tts( 190 | text, 191 | language, 192 | speaker_wav=speaker_wav_file_name, 193 | voice_cleanup=voice_cleanup, 194 | ) 195 | wav_io = BytesIO() 196 | torchaudio.save(wav_io, wav, 24000, format="wav") 197 | wav_io.seek(0) 198 | return WAVResponse(wav_io) 199 | 200 | 201 | if __name__ == "__main__": 202 | p = XTTSSpeaker() 203 | p.launch() 204 | -------------------------------------------------------------------------------- /advanced/whisper-jax/README.md: -------------------------------------------------------------------------------- 1 | # Whisper 2 | 3 | This example demonstrates how to run optimized Whisper model on Lepton, and also how to integrate it with other application frameworks, in this case Slack. 4 | 5 | [whisper-jax](https://github.com/sanchit-gandhi/whisper-jax.git) is a JAX (optimized) port of the openai whisper model. It chunks audio data into segments and then performs batch inference to gain speedup. 6 | 7 | 8 | ## Note on custom environment 9 | 10 | Usually, when you run different AI models, they require specific dependencies that sometimes conflict with each other. This is particularly true in the whisper case - from `requirements.txt`, you may notice that there are quite a bit of specific version requirements. 11 | 12 | This is where having a separate service like Lepton becomes super useful: we can create a python environment (using e.g. conda or virtualenv), installed the required dependencies, run the photon as a web service, and then in the regular python environment, simply call the web service as if we were using a regular python function. Comparing to some apparent choices: 13 | - unlike a single python environment, we don't need to resolve version conflicts of different algorithms; 14 | - unlike packing everything in a separate opaque container image, we are much more lightweighted: only a python environment and dependencies are needed. 15 | 16 | Here we provide a combination of jax + jaxlib + cuda/cudnn pip versions that can work together inside Lepton's default image, so you can pull it up with ease. 17 | 18 | ## Running with a custom environment. 19 | 20 | We recommend you use conda or virtualenv to start a whisper-specific environment. For example, if you use conda, it's easy to do: 21 | 22 | ```shell 23 | # pick a python version of your favorite 24 | conda create -n whisper python=3.10 25 | conda activate whisper 26 | ``` 27 | 28 | After that, install lepton [per the installation instruction](https://www.lepton.ai/docs/overview/quickstart#1-installation), and install the required dependencies of this demo via: 29 | ```shell 30 | pip install -r requirements.txt 31 | ``` 32 | 33 | Note that `torch` sometimes has a dependency conflict with the current jax gpu libraries. If you installed torch after the above pip install and encountered errors, re-run the above installation instruction. 34 | 35 | After this, you can launch whisper like: 36 | ```shell 37 | # Set the model id, or omit it to use the default large-v2 model. 38 | # Note that the large-v2 model may need ~10GB GPU memory to run. 39 | export WHISPER_MODEL_ID="openai/whisper-medium" 40 | python whisper-jax.py 41 | ``` 42 | 43 | It will download the paramaters and start the server. After that, use the regular python client to access the model: 44 | ```python 45 | from leptonai.client import Client, local 46 | c = Client(local()) 47 | ``` 48 | 49 | and invoke transcription or translation as follows: 50 | ```python 51 | >> c.run(inputs="assets/test_japanese.wav") 52 | '私たちはAIの株式会社であります' 53 | >> c.run(inputs="assets/test_japanese.wav", task="translate") 54 | 'We are an AI company.' 55 | ``` 56 | 57 | 58 | WE DO NOTE that for the Whisper demo, the first call will be very slow. This is because jax needs to do a bit of compilation and initialization - after that, the subsequent calls will be much faster. You may find it surprising - but for many AI deployments, the first run is usually slower due to such initialization overheads. As a good practice, if your model has such overheads, you can always do a "warm-up" call before the actual inference traffic. 59 | 60 | ## Running a slack translation bot 61 | 62 | The whisper-jax example also demonstrates how to use Slack bot to trigger inference. To use this feature, you need to create a slack app, and set the following environment variables: 63 | - `SLACK_VERIFICATION_TOKEN`: The verification token of your Slack app 64 | - `SLACK_BOT_TOKEN`: The bot token of your Slack app 65 | 66 | Let's go through the process one by one. 67 | 68 | ### Creating a slack app 69 | 70 | First you will need to create a slack app. Go to [https://api.slack.com/apps](https://api.slack.com/apps), and click "Create an App". Choose "From scratch", and select the app name and workspace you want to add the app to, like: 71 | 72 | 73 | 74 | In "OAuth & Permisions", add the following permissions to the app: 75 | - app_methions:read 76 | - chat:write 77 | - files:read 78 | 79 | Which looks like the follows: 80 | 81 | 82 | 83 | And install it to your workspace. After that, you should be able to see the Bot User OAuth Token like the following: 84 | 85 | 86 | 87 | Locate the verification token in the app's "Basic Information"-"App Credentials" section. Then, launch the service as follows: 88 | ```shell 89 | export SLACK_BOT_TOKEN="xoxb-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" 90 | export SLACK_VERIFICATION_TOKEN="xxxxxxxxxxxxxxxxxxxxxxxxxxx" 91 | python whisper-jax.py 92 | ``` 93 | 94 | You will need a public address for slack to connect. For example, if the whisper-jax bot tells you `Uvicorn running on http://0.0.0.0:8080`, and your public IP is `1.2.3.4`, you can go to the "Event Subscriptions" page of the slack app, and enable events like the following (make sure that the request URL says "Verified"): 95 | 96 | 97 | 98 | You should also be able to see the verification request from the logging info like 99 | ``` 100 | 2023-08-09 16:35:16,119 - INFO: 34.203.249.192:47706 - "POST /slack HTTP/1.1" 200 OK 101 | ``` 102 | 103 | Now, in the "Event Subscriptions"-"Subscribe to bot events" page, add the bot user event "file_shared", and save. If everything goes well, you should be able to see the bot responding to voice inputs as follows: 104 | 105 | 106 | 107 | The whisper model is fairly versatile - in this case we are showing the medium sized model, and it is already able to recognize multiple languages with a fairly good accuracy. 108 | 109 | ## Running with Lepton 110 | 111 | The above example runs on the local machine. If your machine does not have a public facing IP, or more commonly, you want a stable server environment to host your model - then running on the Lepton cloud platform is the best option. To run it on Lepton, you can simply create a photon and push it to the cloud: 112 | 113 | ```shell 114 | lep login 115 | lep photon create -n whisper -m whisper-jax.py 116 | lep photon push -n whisper 117 | # An A10 machine is usually big enough to run the large-v2 model. 118 | lep photon run -n whisper 119 | --resource-shape gpu.a10 \ 120 | --public \ 121 | --env WHISPER_MODEL_ID="openai/whisper-large-v2" \ 122 | --env SLACK_BOT_TOKEN="xoxb-xxxxxxxxxxxxxxxxxxxxxxxxxx" \ 123 | --env SLACK_VERIFICATION_TOKEN="xxxxxxxxxxxxxxxxxxxxxxxxxxx" 124 | ``` 125 | 126 | Note that because we want Slack to be able to call the API, we made it a public deployment. After that, you can use the `lep deployment status` to obtain the public address of the photon, and use the same slack app to connect to it: 127 | ```shell 128 | >> lep deployment status -n whisper 129 | Created at: 2023-08-09 20:24:48 130 | Photon ID: whisper-6t01ptsf 131 | State: Running 132 | Endpoint: https://latest-whisper.cloud.lepton.ai 133 | Is Public: Yes 134 | Replicas List: 135 | ┏━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━┓ 136 | ┃ replica id ┃ status ┃ message ┃ 137 | ┡━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━┩ 138 | │ whisper-f9dbd6757-l8hms │ Ready │ (empty) │ 139 | └─────────────────────────┴────────┴─────────┘ 140 | ``` 141 | When you obtain the endpoint URL above, simply change the event subscription URL to the new endpoint, such as in this case, `https://latest-whisper.cloud.lepton.ai/slack`, and you should be able to use the same slack app. 142 | 143 | Unlike local deployment, running on the Lepton cloud platform comes with a series of advantages, especially in the whisper case: 144 | - You do not need to worry about reproducible software environment. The photon is guaranteed to run on the same environment as you created it. 145 | - Scaling is easier - you can simply increase the number of replicas if you need more capacity. 146 | - Automatic fault tolerance - if the photon crashes, it will be automatically restarted. 147 | 148 | Happy building! 149 | -------------------------------------------------------------------------------- /advanced/whisper-jax/assets/bot_token.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/whisper-jax/assets/bot_token.png -------------------------------------------------------------------------------- /advanced/whisper-jax/assets/create_slack_app.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/whisper-jax/assets/create_slack_app.png -------------------------------------------------------------------------------- /advanced/whisper-jax/assets/event_subscription.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/whisper-jax/assets/event_subscription.png -------------------------------------------------------------------------------- /advanced/whisper-jax/assets/permissions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/whisper-jax/assets/permissions.png -------------------------------------------------------------------------------- /advanced/whisper-jax/assets/test_japanese.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/whisper-jax/assets/test_japanese.wav -------------------------------------------------------------------------------- /advanced/whisper-jax/assets/whisper_result.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/whisper-jax/assets/whisper_result.jpg -------------------------------------------------------------------------------- /advanced/whisper-jax/requirements.txt: -------------------------------------------------------------------------------- 1 | git+https://github.com/sanchit-gandhi/whisper-jax.git@0d3bc54 2 | cached_property 3 | nvidia-cudnn-cu11==8.6.0.163 4 | -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html 5 | jax==0.4.13 6 | jaxlib==0.4.13+cuda11.cudnn86 7 | slack_sdk -------------------------------------------------------------------------------- /advanced/whisper-jax/whisper-jax.py: -------------------------------------------------------------------------------- 1 | """This example demonstrates how to run optimized Whisper model on 2 | Lepton. 3 | 4 | [whisper-jax](https://github.com/sanchit-gandhi/whisper-jax.git) is a 5 | JAX (optimized) port of the openai whisper model. It chunks audio data 6 | into segments and then performs batch inference to gain speedup. 7 | 8 | Installing JAX is a bit tricky, so here we provide a combination of 9 | jax + jaxlib + cuda/cudnn pip versions that can work together inside 10 | Lepton's default image. 11 | 12 | Whisper has a set of model ids that you can use. This is specified by an 13 | environment variable "WHISPER_MODEL_ID". By default, it uses "openai/whisper-large-v2". 14 | The list of available models are "openai/whisper-{size}" where size can be one of 15 | the following: 16 | tiny, base, small, medium, large, large-v2 17 | See https://github.com/sanchit-gandhi/whisper-jax for more details. 18 | 19 | Optionally, you can also set the environment variable "BATCH_SIZE" to 20 | change the batch size of the inference. By default, it is 4. 21 | 22 | In addition, this example also demonstrates how to use Slack bot to 23 | trigger inference. To use this feature, you need to set the following 24 | environment variables: 25 | - `SLACK_VERIFICATION_TOKEN`: The verification token of your Slack app 26 | - `SLACK_BOT_TOKEN`: The bot token of your Slack app 27 | """ 28 | 29 | from datetime import datetime, timedelta 30 | import os 31 | import tempfile 32 | from typing import Optional, Dict, Any 33 | 34 | from loguru import logger 35 | import requests 36 | 37 | from leptonai.photon import Photon, HTTPException 38 | 39 | 40 | class Whisper(Photon): 41 | """ 42 | A photon implementatio 43 | """ 44 | 45 | # note: 46 | requirement_dependency = [ 47 | "git+https://github.com/sanchit-gandhi/whisper-jax.git@0d3bc54", 48 | "cached_property", 49 | "nvidia-cudnn-cu11==8.6.0.163", 50 | "-f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html", 51 | "jax==0.4.13", 52 | "jaxlib==0.4.13+cuda11.cudnn86", 53 | "slack_sdk", 54 | ] 55 | 56 | # note: system_dependency specifies what should be installed via `apt install` 57 | system_dependency = [ 58 | "ffmpeg", 59 | ] 60 | 61 | def init(self): 62 | # Implementation note: strictly speaking, this is not recommended by Python 63 | # as all imports should be places on the top of the file. However, this shows 64 | # a small trick when a local installation isn't really possible, such as 65 | # installing all the jax and cuda dependencies on a mac machine. We can defer 66 | # the import inside the actual Photon class. 67 | # Of course, this makes the debugging duty to the remote execution time, and 68 | # is going to be a bit harder. This is a conscious tradeoff between development 69 | # speed and debugging speed. 70 | logger.info("Initializing Whisper model. This might take a while...") 71 | from whisper_jax import FlaxWhisperPipline 72 | import jax.numpy as jnp 73 | 74 | model_id = os.environ.get("WHISPER_MODEL_ID", "openai/whisper-large-v2") 75 | batch_size = os.environ.get("BATCH_SIZE", 4) 76 | logger.info(f"Using model id: {model_id} and batch size: {batch_size}") 77 | self.pipeline = FlaxWhisperPipline( 78 | model_id, dtype=jnp.float16, batch_size=batch_size 79 | ) 80 | logger.info("Initialized Whisper model.") 81 | logger.info("Initializing slack bot...") 82 | self._init_slack_bot() 83 | 84 | def _init_slack_bot(self): 85 | """ 86 | Initializes the slack bot client. 87 | """ 88 | from slack_sdk import WebClient as SlackClient 89 | 90 | self._verification_token = os.environ.get("SLACK_VERIFICATION_TOKEN", None) 91 | self._slack_bot_token = os.environ.get("SLACK_BOT_TOKEN", None) 92 | if self._slack_bot_token: 93 | self._slack_bot_client = SlackClient(token=self._slack_bot_token) 94 | else: 95 | logger.warning("Slack bot token not configured. Slack bot will not work.") 96 | self._processed_slack_tasks = {} 97 | 98 | @Photon.handler( 99 | "run", 100 | example={ 101 | "inputs": ( 102 | "https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/1.flac" 103 | ) 104 | }, 105 | ) 106 | def run(self, inputs: str, task: Optional[str] = None) -> str: 107 | """ 108 | Transcribe or translate an audio input file to a text transcription. 109 | 110 | Args: 111 | inputs: the filename or url of the audio file. 112 | task (optional): either `"transcribe"` or `"translate"`. Defaults to `"transcribe"`. 113 | 114 | Returns: 115 | text: the transcription of the audio file. 116 | """ 117 | return self.pipeline(inputs, task=task)["text"] 118 | 119 | async def _slack_process_task( 120 | self, channel: str, thread_ts: Optional[str], url: str 121 | ): 122 | """ 123 | Internal method to process a slack task. This is called by the `slack` handler. 124 | """ 125 | last_processed_time = self._processed_slack_tasks.get((channel, url)) 126 | if last_processed_time and datetime.now() - last_processed_time < timedelta( 127 | seconds=20 128 | ): 129 | logger.info( 130 | f"Skip processing slack task: ({channel}, {url}) since it was processed" 131 | f" recently: {last_processed_time}" 132 | ) 133 | return 134 | 135 | logger.info(f"Processing audio file: {url}") 136 | with tempfile.NamedTemporaryFile("wb", suffix="." + url.split(".")[-1]) as f: 137 | logger.info(f"Start downloading audio file to: {f.name}") 138 | res = requests.get( 139 | url, 140 | allow_redirects=True, 141 | headers={"Authorization": f"Bearer {self._slack_bot_token}"}, 142 | ) 143 | res.raise_for_status() 144 | logger.info(f"Downloaded audio file (total bytes: {len(res.content)})") 145 | f.write(res.content) 146 | f.flush() 147 | f.seek(0) 148 | logger.info(f"Saved audio file to: {f.name}") 149 | logger.info(f"Running inference on audio file: {f.name}") 150 | try: 151 | text = self.run(f.name) 152 | except Exception: 153 | logger.error(f"Failed to run inference on audio file: {f.name}") 154 | return 155 | logger.info(f"Finished inference on audio file: {f.name}") 156 | self._slack_bot_client.chat_postMessage( 157 | channel=channel, 158 | thread_ts=thread_ts, 159 | text=text, 160 | ) 161 | if len(self._processed_slack_tasks) > 100: 162 | # clean up task histories that are too old. 163 | self._processed_slack_tasks = { 164 | k: v 165 | for k, v in self._processed_slack_tasks.items() 166 | if datetime.now() - v < timedelta(seconds=20) 167 | } 168 | self._processed_slack_tasks[(channel, url)] = datetime.now() 169 | 170 | # This is a handler that receives slack events. It is triggered by the 171 | # slack server side - see the slack event api for details: 172 | # https://api.slack.com/apis/connections/events-api#receiving-events 173 | # and more specs here: 174 | # https://github.com/slackapi/slack-api-specs 175 | @Photon.handler 176 | def slack( 177 | self, 178 | token: str, 179 | type: str, 180 | event: Optional[Dict[str, Any]] = None, 181 | challenge: Optional[str] = None, 182 | **exttra, 183 | ) -> str: 184 | # Checks if the slack bot is configured. Note that we are still using the 185 | # now deprecated verification token, an supporting the new signing secrets 186 | # is a todo item. 187 | if not self._verification_token or not self._slack_bot_token: 188 | raise HTTPException(401, "Slack bot not configured.") 189 | # If token is not correct, we return 401. 190 | if token != self._verification_token: 191 | raise HTTPException(401, "Invalid token.") 192 | # We will respond to the challenge request if it is a url_verification event, 193 | # so that slack can verify our endpoint. 194 | if type == "url_verification": 195 | if challenge: 196 | return challenge 197 | else: 198 | raise HTTPException(400, "Missing challenge") 199 | 200 | # If event is not present, we will ignore it. 201 | if not event: 202 | raise HTTPException(400, "Missing event type") 203 | 204 | # Actually handle the slack event. We will only handle file_shared events. 205 | event_type = event["type"] 206 | logger.info(f"Received slack event: {event_type}") 207 | if event_type == "file_shared": 208 | channel = event["channel_id"] 209 | thread_ts = event.get("thread_ts") 210 | file_id = event["file_id"] 211 | file_info = self._slack_bot_client.files_info(file=file_id) 212 | if not file_info["ok"]: 213 | raise HTTPException(500, "Failed to get file info from slack") 214 | self.add_background_task( 215 | self._slack_process_task, 216 | channel, 217 | thread_ts, 218 | file_info["file"]["url_private"], 219 | ) 220 | return "ok" 221 | else: 222 | logger.info(f"Ignored slack event type: {event_type}") 223 | return "ok" 224 | 225 | 226 | if __name__ == "__main__": 227 | w = Whisper() 228 | w.launch() 229 | -------------------------------------------------------------------------------- /advanced/whisperx/README.md: -------------------------------------------------------------------------------- 1 | # WhisperX 2 | 3 | This example demonstrates how to run the WhisperX model on Lepton. [WhisperX](https://github.com/m-bain/whisperX) is similar to the whisper example, but does transcription, alignment, and diarization for the input. 4 | 5 | The following example shows the WhisperX demo that allows one to very quickly recognize short audios. For long audios like podcasts, instead of having a long-waiting api, it is easier to build a service that does the actual recognition as a background task - see the notebook [What does a great podcast sound like?](audio_analysis.ipynb) as an end to end example. 6 | 7 | ## Note on custom environment 8 | 9 | Similar to the Whisper JAX example, if you are running locally, we recommend you to use a custom environment like `conda` or `virtualenv`. 10 | 11 | Usually, when you run different AI models, they require specific dependencies that sometimes conflict with each other. This is particularly true in the whisper case - from `requirements.txt`, you may notice that there are quite a bit of specific version requirements. 12 | 13 | This is where having a separate service like Lepton becomes super useful: we can create a python environment (using e.g. conda or virtualenv), installed the required dependencies, run the photon as a web service, and then in the regular python environment, simply call the web service as if we were using a regular python function. Comparing to some apparent choices: 14 | 15 | - unlike a single python environment, we don't need to resolve version conflicts of different algorithms; 16 | 17 | ## Prerequisite 18 | 19 | Note that one of the dependency relies on 3 Hugging Face Hub models that would require you to sign some terms of usage beforehand. Otherwise it will throw error. Simply proceed to the website for [Segmentation](https://huggingface.co/pyannote/segmentation) , [Voice Activity Detection (VAD)](https://huggingface.co/pyannote/voice-activity-detection) , and [Speaker Diarization](https://huggingface.co/pyannote/speaker-diarization) and sign the terms. 20 | 21 | ![Pyannote Model Term Agreement](assets/pyannote.png) 22 | 23 | You would also need a Hugging Face Access Token at hand. Simply following the steps in the [official guide](https://huggingface.co/docs/hub/security-tokens). 24 | 25 | ## Running with a custom environment 26 | 27 | We recommend you use conda or virtualenv to start a whisper-specific environment. For example, if you use conda, it's easy to do: 28 | 29 | ```shell 30 | # pick a python version of your favorite 31 | conda create -n whisperx python=3.10 32 | conda activate whisperx 33 | ``` 34 | 35 | After that, install lepton [per the installation instruction](https://www.lepton.ai/docs/overview/quickstart#1-installation), and install the required dependencies of this demo via: 36 | 37 | ```shell 38 | pip install -r requirements.txt 39 | ``` 40 | 41 | After this, you can launch whisperx like: 42 | 43 | ```shell 44 | # Set your huggingface token. This is required to obtain the respective models. 45 | export HUGGING_FACE_HUB_TOKEN="replace-with-your-own-token" 46 | python main.py 47 | ``` 48 | 49 | It will download the paramaters and start the server. After that, use the regular python client to access the model: 50 | 51 | ```python 52 | from leptonai.client import Client, local 53 | c = Client(local()) 54 | ``` 55 | 56 | and invoke transcription or translation as follows: 57 | 58 | ```python 59 | >> c.run(input=FileParam(open("assets/thequickbrownfox.mp3", "rb"))) 60 | [{'start': 0.028, 61 | 'end': 2.06, 62 | 'text': ' A quick brown fox jumps over the lazy dog.', 63 | 'words': [{'word': 'A', 'start': 0.028, 'end': 0.068, 'score': 0.5}, 64 | {'word': 'quick', 'start': 0.109, 'end': 0.31, 'score': 0.995}, 65 | {'word': 'brown', 66 | 'start': 0.35, 67 | 'end': 0.571, 68 | 'score': 0.849, 69 | 'speaker': 'SPEAKER_00'}, 70 | {'word': 'fox', 71 | 'start': 0.612, 72 | 'end': 0.853, 73 | 'score': 0.897, 74 | 'speaker': 'SPEAKER_00'}, 75 | {'word': 'jumps', 76 | 'start': 0.893, 77 | 'end': 1.175, 78 | 'score': 0.867, 79 | 'speaker': 'SPEAKER_00'}, 80 | {'word': 'over', 81 | 'start': 1.255, 82 | 'end': 1.416, 83 | 'score': 0.648, 84 | 'speaker': 'SPEAKER_00'}, 85 | {'word': 'the', 86 | 'start': 1.456, 87 | 'end': 1.517, 88 | 'score': 0.998, 89 | 'speaker': 'SPEAKER_00'}, 90 | {'word': 'lazy', 91 | 'start': 1.557, 92 | 'end': 1.839, 93 | 'score': 0.922, 94 | 'speaker': 'SPEAKER_00'}, 95 | {'word': 'dog.', 96 | 'start': 1.859, 97 | 'end': 2.06, 98 | 'score': 0.998, 99 | 'speaker': 'SPEAKER_00'}], 100 | 'speaker': 'SPEAKER_00'}] 101 | ``` 102 | 103 | ## Running with Lepton 104 | 105 | The above example runs on the local machine. If your machine does not have a public facing IP, or more commonly, you want a stable server environment to host your model - then running on the Lepton cloud platform is the best option. To run it on Lepton, you can simply create a photon and push it to the cloud. 106 | 107 | To have HuggingFace Hub API access function properly, we would also need it set as an available environment variable in the cloud. To do so, simply run the following command to store it as a [secret](https://www.lepton.ai/docs/advanced/env_n_secrets): 108 | 109 | ```shell 110 | lep secret create -n HUGGING_FACE_HUB_TOKEN -v VALUE_OF_YOUR_TOKEN 111 | ``` 112 | 113 | You can run the following command to confirm that the secret is stored properly: 114 | 115 | ```shell 116 | lep secret list 117 | ``` 118 | 119 | which should return something like below 120 | 121 | ```txt 122 | Secrets 123 | ┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┓ 124 | ┃ ID ┃ Value ┃ 125 | ┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━┩ 126 | │ HUGGING_FACE_HUB_TOKEN │ (hidden) │ 127 | └────────────────────────┴──────────┘ 128 | ``` 129 | 130 | Now you can proceed to photo creation and deployment by running the following command: 131 | 132 | ```shell 133 | lep login 134 | lep photon create -n whisperx -m main.py 135 | lep photon push -n whisperx 136 | # An A10 machine is usually big enough to run the large-v2 model. 137 | # note you need to specify the secret that needs to be available in the run 138 | lep photon run -n whisperx --resource-shape gpu.a10 --secret HUGGING_FACE_ACCESS_TOKEN 139 | ``` 140 | 141 | After that, you can use the `lep deployment status` to obtain the public address of the photon, and use the same slack app to connect to it: 142 | 143 | ```shell 144 | >> lep deployment status -n whisperx 145 | Created at: 2023-08-09 20:24:48 146 | Created at: 2023-08-16 11:08:56 147 | Photon ID: whisperx-bsip0d8q 148 | State: Running 149 | Endpoint: https://latest-whisperx.cloud.lepton.ai 150 | Is Public: No 151 | Replicas List: 152 | ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━┓ 153 | ┃ replica id ┃ status ┃ message ┃ 154 | ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━┩ 155 | │ whisperx-5ddc79f555-l59cj │ Ready │ (empty) │ 156 | └───────────────────────────┴────────┴─────────┘ 157 | 1 out of 1 replicas ready. 158 | ``` 159 | 160 | To access the model, we can create a client similar to the local case, simply replace `local()` with the workspace, deployment name, and token. Also, since we are running remote now, we will need to upload the audio files. This is done by calling the `run_updload` path: 161 | 162 | ```python 163 | >> from leptonai.client import Client 164 | >> from leptonai.photon import FileParam 165 | >> c = Client("YOUR_WORKSPACE_NAME", "whisperx", token="YOUR_TOKEN") 166 | >> c.run(input=FileParam(open("assets/thequickbrownfox.mp3", "rb"))) 167 | [{'start': 0.028, 168 | 'end': 2.06, 169 | 'text': ' A quick brown fox jumps over the lazy dog.', 170 | 'words': [{'word': 'A', 'start': 0.028, 'end': 0.068, 'score': 0.5}, 171 | {'word': 'quick', 'start': 0.109, 'end': 0.31, 'score': 0.995}, 172 | {'word': 'brown', 173 | 'start': 0.35, 174 | 'end': 0.571, 175 | 'score': 0.849, 176 | 'speaker': 'SPEAKER_00'}, 177 | {'word': 'fox', 178 | 'start': 0.612, 179 | 'end': 0.853, 180 | 'score': 0.897, 181 | 'speaker': 'SPEAKER_00'}, 182 | {'word': 'jumps', 183 | 'start': 0.893, 184 | 'end': 1.175, 185 | 'score': 0.867, 186 | 'speaker': 'SPEAKER_00'}, 187 | {'word': 'over', 188 | 'start': 1.255, 189 | 'end': 1.416, 190 | 'score': 0.648, 191 | 'speaker': 'SPEAKER_00'}, 192 | {'word': 'the', 193 | 'start': 1.456, 194 | 'end': 1.517, 195 | 'score': 0.998, 196 | 'speaker': 'SPEAKER_00'}, 197 | {'word': 'lazy', 198 | 'start': 1.557, 199 | 'end': 1.839, 200 | 'score': 0.922, 201 | 'speaker': 'SPEAKER_00'}, 202 | {'word': 'dog.', 203 | 'start': 1.859, 204 | 'end': 2.06, 205 | 'score': 0.998, 206 | 'speaker': 'SPEAKER_00'}], 207 | 'speaker': 'SPEAKER_00'}] 208 | ``` 209 | 210 | Unlike local deployment, running on the Lepton cloud platform comes with a series of advantages, especially in the whisperx case: 211 | 212 | - You do not need to worry about reproducible software environment. The photon is guaranteed to run on the same environment as you created it. 213 | - Scaling is easier - you can simply increase the number of replicas if you need more capacity. 214 | - Automatic fault tolerance - if the photon crashes, it will be automatically restarted. 215 | 216 | Happy building! 217 | -------------------------------------------------------------------------------- /advanced/whisperx/assets/negative_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/whisperx/assets/negative_example.png -------------------------------------------------------------------------------- /advanced/whisperx/assets/positive_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/whisperx/assets/positive_example.png -------------------------------------------------------------------------------- /advanced/whisperx/assets/pyannote.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/whisperx/assets/pyannote.png -------------------------------------------------------------------------------- /advanced/whisperx/assets/silent.m4a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/whisperx/assets/silent.m4a -------------------------------------------------------------------------------- /advanced/whisperx/assets/thequickbrownfox.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leptonai/examples/033f7c0d04c57c8c24b9e82894c3b330c5c74993/advanced/whisperx/assets/thequickbrownfox.mp3 -------------------------------------------------------------------------------- /advanced/whisperx/requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | torchaudio 3 | leptonai 4 | pyannote.audio 5 | git+https://github.com/m-bain/whisperx.git@e9c507ce5dea0f93318746411c03fed0926b70be 6 | -------------------------------------------------------------------------------- /assets/logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 4 | 10 | 29 | 40 | 42 | 44 | 45 | -------------------------------------------------------------------------------- /getting-started/README.md: -------------------------------------------------------------------------------- 1 | # Getting Started 2 | 3 | This folder contains simple examples to show the concepts of Lepton photons. Please refer to the subdirectories for more detailed instruction for how to run them - the source code usually contains self-explanatory comments. 4 | 5 | ## Example list 6 | - counter: simple demonstrative photons to implement a counter, and a safe counter using file as state storage. More related reads: [Anatomy of a photon](https://www.lepton.ai/docs/walkthrough/anatomy_of_a_photon), [Storage](https://www.lepton.ai/docs/advanced/storage) 7 | - extra_files: an example to show how one can include additional files to the photon. 8 | - shell: a photon that runs a simple shell, and returns stdout and stderr. 9 | - custom-image: an example that uses custom Docker image in your Photon. 10 | -------------------------------------------------------------------------------- /getting-started/counter/counter.py: -------------------------------------------------------------------------------- 1 | """ 2 | A simple example to show a minimal example of a photon: a counter that keeps 3 | states in memory. Note that this is for illustrative purpose only - read the 4 | fine prints in the class comments. 5 | 6 | To launch a counter, run: 7 | lep photon create -n counter -m counter.py:Counter 8 | # run locally 9 | lep photon run -n counter 10 | # or if you want to run things remote, first push the photon 11 | lep photon push -n counter 12 | lep photon run -n counter -dn counter 13 | 14 | To test the photon, you can either use the API explorer in the UI, or use 15 | the photon client class in python, e.g. 16 | from leptonai.client import Client 17 | # If you are runnnig the photon remotely with workspace id "myworkspace" 18 | # and deployment name "counter" 19 | client = Client("myworkspace", "counter") 20 | # Or if you are running the photon locally at port 8080 21 | client = Client("http://localhost:8080") 22 | # Do NOT run the above two commands at the same time! Choose only one. 23 | print(client.add(x=3)) 24 | print(client.sub(x=5)) 25 | """ 26 | 27 | from leptonai.photon import Photon 28 | 29 | 30 | class Counter(Photon): 31 | """ 32 | A simple example showing a counter. The counter is initialized to 0 and 33 | can be incremented or decremented by calling the ``add`` or ``sub`` methods. 34 | 35 | Note that this is not a safe counter: when there are multiple replicas, 36 | every replica will have its own counter. Also, when the deployment restarts, 37 | the counter will be reset to 0. It is an example to show how not to assume 38 | that the deployments are automatically stateful. Remember, deployments are 39 | stateless by default unless you use a stateful storage like Lepton storage, 40 | or a database. 41 | 42 | An example to implement a minimal stateful counter is shown in the 43 | separate safe_counter example. 44 | """ 45 | 46 | def init(self): 47 | self.counter = 0 48 | 49 | @Photon.handler("add") 50 | def add(self, x: int) -> int: 51 | self.counter += x 52 | return self.counter 53 | 54 | @Photon.handler("sub") 55 | def sub(self, x: int) -> int: 56 | return self.add(-x) 57 | 58 | 59 | if __name__ == "__main__": 60 | Counter().launch() 61 | -------------------------------------------------------------------------------- /getting-started/counter/safe_counter.py: -------------------------------------------------------------------------------- 1 | """ 2 | A simple example of a safe counter that utilizes Lepton storage to keep 3 | states persistent. For the details, please refer to the class comments 4 | 5 | To launch a safe counter, you need to have a Lepton storage attached. Run: 6 | lep photon create -n safe-counter -m safe_counter.py:SafeCounter 7 | # run locally to debug 8 | sudo mkdir /mnt/leptonstore 9 | sudo chown $USER /mnt/leptonstore 10 | lep photon run -n safe-counter --local 11 | # or if you want to run things remote, first push the photon 12 | lep photon push -n safe-counter 13 | lep photon run -n safe-counter -dn safe-counter --mount /:/mnt/leptonstore 14 | 15 | To test the photon, you can either use the API explorer in the UI, or use 16 | the photon client class in python, e.g. 17 | from leptonai.client import Client 18 | # If you are runnnig the photon remotely with workspace id "myworkspace" 19 | # and deployment name "safe-counter" 20 | client = Client("myworkspace", "safe-counter") 21 | # Or if you are running the photon locally at port 8080 22 | client = Client("http://localhost:8080") 23 | # Do NOT run the above three commands at the same time! Choose only one. 24 | print(client.add(x=3)) 25 | print(client.sub(x=5)) 26 | etc. You can try to stop and restart safe counter and see that the counter 27 | is persistent. 28 | """ 29 | 30 | import errno 31 | import fcntl 32 | import os 33 | import time 34 | 35 | from fastapi import HTTPException 36 | 37 | from leptonai.photon import Photon 38 | 39 | 40 | class SafeCounter(Photon): 41 | """ 42 | An example showing a safe counter using Lepton storage. Note that in actual 43 | production, you should probably use a locking mechanism better than files, 44 | such as a database. 45 | 46 | This deployment is stateful, and will be automatically recovered when the 47 | deployment restarts. It also keeps the counter consistent across replicas. 48 | It is not "perfectly safe" - if a replica dies before it can write to and 49 | close a file, an undefined latency may occur. 50 | 51 | To run this example, you need to have a Lepton storage attached to the 52 | deployment. You can do this by adding the following to the run command: 53 | --mount [storage path you want to use]:/mnt/leptonstore 54 | The simplest option for [storage path you want to use] is to use the root 55 | path of the storage, aka ``--mount /:/mnt/leptonstore``. 56 | """ 57 | 58 | PATH = "/mnt/leptonstore/safe_counter.txt" 59 | 60 | def init(self): 61 | # checks if the folder containing the file exists 62 | if not os.path.exists(os.path.dirname(self.PATH)): 63 | raise RuntimeError( 64 | "SafeCounter requires a Lepton storage to be attached to the deployment" 65 | "at /mnt/leptonstore." 66 | ) 67 | # checks if the file exists 68 | if not os.path.exists(self.PATH): 69 | # if not, create the file and write 0 to it. Strictly speaking, this 70 | # may have a race condition, but it is unlikely to happen in practice 71 | # and the worst that can happen is that the file is created twice, 72 | # unless a request comes in right in between two deployments creating 73 | # the file. 74 | with open(self.PATH, "w") as file: 75 | file.write("0") 76 | 77 | @Photon.handler("add") 78 | def add(self, x: int) -> int: 79 | # Open the file in read mode 80 | with open(self.PATH, "r+") as file: 81 | # Attempt to acquire a non-blocking exclusive lock on the file 82 | retry = 0 83 | while retry < 10: 84 | try: 85 | fcntl.flock(file, fcntl.LOCK_EX | fcntl.LOCK_NB) 86 | break 87 | except IOError as e: 88 | # If the lock cannot be acquired, sleep for a short interval 89 | # and try again 90 | if e.errno != errno.EAGAIN: 91 | raise HTTPException( 92 | status_code=500, 93 | detail=( 94 | "Internal server error: failed to acquire lock on file" 95 | " after repeated attempts." 96 | ), 97 | ) 98 | retry += 1 99 | time.sleep(0.1) 100 | 101 | # Read the current value from the file 102 | current_value = int(file.read()) 103 | # Increment the value 104 | new_value = current_value + x 105 | file.seek(0) 106 | file.write(str(new_value)) 107 | file.truncate() 108 | fcntl.flock(file, fcntl.LOCK_UN) 109 | return new_value 110 | 111 | @Photon.handler("sub") 112 | def sub(self, x: int) -> int: 113 | return self.add(-x) 114 | 115 | 116 | if __name__ == "__main__": 117 | p = SafeCounter() 118 | if not os.path.exists(os.path.dirname(p.PATH)): 119 | raise RuntimeError( 120 | "SafeCounter requires a Lepton storage to be attached to the deploymentat" 121 | f" {os.path.dirname(p.PATH)}, or if you are running locally, create the" 122 | " folder first." 123 | ) 124 | p.launch() 125 | -------------------------------------------------------------------------------- /getting-started/custom-image/README.md: -------------------------------------------------------------------------------- 1 | # Custom Image 2 | 3 | Lepton supports the usage of custom images with your own software environment, 4 | given the following conditions: 5 | - the image is a relatively standard Linux image, and 6 | - it contains `python` (>3.7) and `pip`, and 7 | - optionally, to install system dependencies, it should support `apt`. 8 | 9 | Note: despite the fact that custom images are very flexible, you should use 10 | the default image if possible, and use `requirement_dependencies` and 11 | `system_dependencies` to install dependencies. This is because in the cloud 12 | environment, we do a lot to minimize the loading time of the default image, 13 | and a custom image may take much longer (at the scale of minutes) to load. 14 | 15 | To specify custom image is simple: in your Photon class, simply specify 16 | ```python 17 | class MyPhoton(Photon): 18 | image="your_custom_image_location" 19 | ``` 20 | 21 | To build the example, simply do: 22 | 23 | lep photon create -n custom-image -m custom-image.py 24 | 25 | To run the photon, simply do 26 | 27 | lep photon push -n custom-image 28 | lep photon run -n custom-image [optional arguments] 29 | 30 | -------------------------------------------------------------------------------- /getting-started/custom-image/custom-image.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from leptonai.photon import Photon 3 | 4 | 5 | class Counter(Photon): 6 | """ 7 | A counter photon that uses a custom Docker image. 8 | """ 9 | 10 | # Note that, the image should be accessible publicly. It can be a URL, or 11 | # an image in docker hub that you can normally `docker pull` with. 12 | # In this case, we are using the python slim images as an example. 13 | image = f"python:{sys.version_info.major}.{sys.version_info.minor}-slim" 14 | 15 | def init(self): 16 | self.counter = 0 17 | 18 | @Photon.handler("add") 19 | def add(self, x: int) -> int: 20 | self.counter += x 21 | return self.counter 22 | 23 | @Photon.handler("sub") 24 | def sub(self, x: int) -> int: 25 | self.counter -= x 26 | return self.counter 27 | -------------------------------------------------------------------------------- /getting-started/extra_files/README.md: -------------------------------------------------------------------------------- 1 | # Handling Extra Files 2 | 3 | If your photon / deployment requires a few extra files that are not part of the 4 | main python file, we provide a lightweighted way to add these files in your photon 5 | by specifying the `extra_files` field in the Photon. 6 | 7 | In this example, the main photon class is defined in `main.py`, and we want to include 8 | two files: a `content.txt` file that can be read by the photon, and a `dependency.py` 9 | file that we want to import as a submodule. The `extra_files` field is a list that 10 | specifies these two files. 11 | 12 | During deployment time, these files will be unarchived and then placed in the current 13 | working directory of the photon. You can use `os.getcwd()` to get the current working 14 | directory. 15 | 16 | To run the example, simply do: 17 | 18 | lep photon run -n extra_files_example -m main.py 19 | 20 | See the source files for more detailed explanation of the example. 21 | -------------------------------------------------------------------------------- /getting-started/extra_files/content.txt: -------------------------------------------------------------------------------- 1 | Hello world from content.txt! -------------------------------------------------------------------------------- /getting-started/extra_files/dependency.py: -------------------------------------------------------------------------------- 1 | """ 2 | A simple module to demonstrate how to use a dependency in a photon. 3 | """ 4 | 5 | 6 | def content() -> str: 7 | """ 8 | A simple function to return a string for demo purpose. 9 | """ 10 | return "Hello world from dependency.py!" 11 | -------------------------------------------------------------------------------- /getting-started/extra_files/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | from leptonai import Photon 5 | 6 | 7 | class Main(Photon): 8 | # extra_files defines files that will be included in the Photon package. 9 | extra_files = ["dependency.py", "content.txt"] 10 | 11 | def init(self): 12 | # If you want to use the extra_files field to store python files / modules that 13 | # you then import, you will need to add the current working directory to the 14 | # python path. 15 | # 16 | # Note that you should NOT use "__file__" to get the current working directory, 17 | # as the underlying cloudpickle class implicitly replaces the __file__ variable, 18 | # making local and remote environment inconsistent. 19 | sys.path.append(os.getcwd()) 20 | 21 | @Photon.handler 22 | def get_content_txt(self) -> str: 23 | """ 24 | A simple function to return the content of content.txt. 25 | """ 26 | with open(os.path.join(os.getcwd(), "content.txt"), "r") as f: 27 | return f.read() 28 | 29 | @Photon.handler 30 | def get_dependency_content(self) -> str: 31 | """ 32 | A simple function to return the content defined inside dependency.py. 33 | """ 34 | # As long as you have added cwd in the system path, you can import it without 35 | # problem. 36 | import dependency 37 | 38 | return dependency.content() 39 | 40 | @Photon.handler 41 | def cwd(self) -> str: 42 | """ 43 | A simple function to return the current working directory. 44 | """ 45 | return os.getcwd() 46 | -------------------------------------------------------------------------------- /getting-started/shell/README.md: -------------------------------------------------------------------------------- 1 | This is a simple class that uses the `/run` api to run a shell command on the 2 | local deployment. Note: since the deployments are considered stateless, any 3 | command you run that may have a non-ephemeral effect, such as creating a file 4 | or so, will not be persistent, unless it is written to a persistent storage 5 | such as the Lepton storage or a mounted S3. 6 | 7 | To build the photon, do: 8 | 9 | lep photon create -n shell -m shell.py:Shell 10 | 11 | To run the photon, simply do 12 | 13 | lep photon run -n shell [optional arguments] 14 | 15 | -------------------------------------------------------------------------------- /getting-started/shell/shell.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | from typing import Tuple 3 | 4 | # This is what you should do to load the Photon class and write your code. 5 | from leptonai.photon import Photon, handler 6 | 7 | 8 | class Shell(Photon): 9 | def init(self): 10 | pass 11 | 12 | @handler("run", example={"query": "pwd"}) 13 | def run(self, query: str) -> Tuple[str, str]: 14 | """Run the shell. Don't do rm -rf though.""" 15 | output = subprocess.run( 16 | query, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True 17 | ) 18 | stdout_output = output.stdout.strip() 19 | stderr_output = output.stderr.strip() 20 | 21 | return stdout_output, stderr_output 22 | 23 | 24 | if __name__ == "__main__": 25 | shell = Shell() 26 | shell.launch() 27 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | preview = true 3 | 4 | [tool.ruff] 5 | # Enable pycodestyle (`E`) and Pyflakes (`F`) codes by default. 6 | select = ["E", "F"] 7 | ignore = ["E402", "E501"] 8 | 9 | # Allow autofix for all enabled rules (when `--fix`) is provided. 10 | fixable = ["A", "B", "C", "D", "E", "F", "G", "I", "N", "Q", "S", "T", "W", "ANN", "ARG", "BLE", "COM", "DJ", "DTZ", "EM", "ERA", "EXE", "FBT", "ICN", "INP", "ISC", "NPY", "PD", "PGH", "PIE", "PL", "PT", "PTH", "PYI", "RET", "RSE", "RUF", "SIM", "SLF", "TCH", "TID", "TRY", "UP", "YTT"] 11 | unfixable = [] 12 | 13 | # Exclude a variety of commonly ignored directories. 14 | exclude = [ 15 | ".bzr", 16 | ".direnv", 17 | ".eggs", 18 | ".git", 19 | ".git-rewrite", 20 | ".hg", 21 | ".mypy_cache", 22 | ".nox", 23 | ".pants.d", 24 | ".pytype", 25 | ".ruff_cache", 26 | ".svn", 27 | ".tox", 28 | ".venv", 29 | "__pypackages__", 30 | "_build", 31 | "buck-out", 32 | "build", 33 | "dist", 34 | "node_modules", 35 | "venv", 36 | ] 37 | --------------------------------------------------------------------------------