├── .droid.yaml
├── .github
    ├── actions
    │   └── setup-python
    │   │   └── action.yml
    └── workflows
    │   ├── pr.yml
    │   ├── test-examples.yml
    │   ├── truss_deploy.yml
    │   └── warm-chains.yml
├── .gitignore
├── .isort.cfg
├── .pre-commit-config.yaml
├── .tool-versions
├── 01-getting-started-bert
    ├── config.yaml
    ├── doc.yaml
    └── model
    │   ├── __init__.py
    │   └── model.py
├── 02-llm
    ├── config.yaml
    ├── doc.yaml
    └── model
    │   ├── __init__.py
    │   └── model.py
├── 03-llm-with-streaming
    ├── config.yaml
    ├── doc.yaml
    └── model
    │   ├── __init__.py
    │   └── model.py
├── 04-image-generation
    ├── config.yaml
    ├── doc.yaml
    └── model
    │   ├── __init__.py
    │   └── model.py
├── 05-speech-to-text
    ├── config.yaml
    └── model
    │   ├── __init__.py
    │   └── model.py
├── 06-high-performance-cached-weights
    ├── config.yaml
    ├── doc.yaml
    └── model
    │   ├── __init__.py
    │   └── model.py
├── 07-high-performance-dynamic-batching
    ├── .gitignore
    ├── .truss_ignore
    ├── README.md
    ├── config.yaml
    ├── model
    │   ├── __init__.py
    │   └── model.py
    ├── packages
    │   ├── __init__.py
    │   ├── run.py
    │   ├── tokenizer.py
    │   └── whisper_utils.py
    └── test.py
├── 09-private-huggingface
    ├── config.yaml
    ├── doc.yaml
    └── model
    │   ├── __init__.py
    │   └── model.py
├── 10-using-system-packages
    ├── config.yaml
    ├── doc.yaml
    └── model
    │   ├── __init__.py
    │   └── model.py
├── 11-embeddings-reranker-classification-tensorrt
    ├── BEI-allenai-llama-3.1-tulu-3-8b-reward-model-fp8
    │   ├── README.md
    │   └── config.yaml
    ├── BEI-baai-bge-en-icl-embedding-fp8
    │   ├── README.md
    │   └── config.yaml
    ├── BEI-baai-bge-large-en-v1.5-embedding
    │   ├── README.md
    │   └── config.yaml
    ├── BEI-baai-bge-m3-embedding-dense
    │   ├── README.md
    │   └── config.yaml
    ├── BEI-baai-bge-multilingual-gemma2-multilingual-embedding
    │   ├── README.md
    │   └── config.yaml
    ├── BEI-baai-bge-reranker-large
    │   ├── README.md
    │   └── config.yaml
    ├── BEI-baai-bge-reranker-v2-m3-multilingual
    │   ├── README.md
    │   └── config.yaml
    ├── BEI-baseten-example-meta-llama-3-70b-instructforsequenceclassification-fp8
    │   ├── README.md
    │   └── config.yaml
    ├── BEI-intfloat-e5-mistral-7b-instruct-embedding-fp8
    │   ├── README.md
    │   └── config.yaml
    ├── BEI-mixedbread-ai-mxbai-embed-large-v1-embedding
    │   ├── README.md
    │   └── config.yaml
    ├── BEI-mixedbread-ai-mxbai-rerank-base-v2-reranker-fp8
    │   ├── README.md
    │   └── config.yaml
    ├── BEI-mixedbread-ai-mxbai-rerank-large-v2-reranker-fp8
    │   ├── README.md
    │   └── config.yaml
    ├── BEI-ncbi-medcpt-cross-encoder-reranker
    │   ├── README.md
    │   └── config.yaml
    ├── BEI-nomic-ai-nomic-embed-code-fp8
    │   ├── README.md
    │   └── config.yaml
    ├── BEI-papluca-xlm-roberta-base-language-detection-classification
    │   ├── README.md
    │   └── config.yaml
    ├── BEI-qwen-qwen3-embedding-0.6b-fp8
    │   ├── README.md
    │   └── config.yaml
    ├── BEI-qwen-qwen3-embedding-4b-fp8
    │   ├── README.md
    │   └── config.yaml
    ├── BEI-qwen-qwen3-embedding-8b-fp8
    │   ├── README.md
    │   └── config.yaml
    ├── BEI-qwen-qwen3-reranker-0.6b-fp8
    │   ├── README.md
    │   └── config.yaml
    ├── BEI-qwen-qwen3-reranker-4b-fp8
    │   ├── README.md
    │   └── config.yaml
    ├── BEI-qwen-qwen3-reranker-8b-fp8
    │   ├── README.md
    │   └── config.yaml
    ├── BEI-salesforce-sfr-embedding-mistral-fp8
    │   ├── README.md
    │   └── config.yaml
    ├── BEI-samlowe-roberta-base-go_emotions-classification
    │   ├── README.md
    │   └── config.yaml
    ├── BEI-skywork-skywork-reward-llama-3.1-8b-v0.2-reward-model-fp8
    │   ├── README.md
    │   ├── chat_template_deployment.py
    │   └── config.yaml
    ├── BEI-snowflake-snowflake-arctic-embed-l-v2.0
    │   ├── README.md
    │   └── config.yaml
    ├── BEI-whereisai-uae-large-v1-embedding
    │   ├── README.md
    │   └── config.yaml
    ├── Briton-deepseek-ai-deepseek-r1-distill-llama-70b-fp8
    │   ├── README.md
    │   └── config.yaml
    ├── Briton-deepseek-ai-deepseek-r1-distill-qwen-32b-fp8
    │   ├── README.md
    │   └── config.yaml
    ├── Briton-meta-llama-llama-3.1-405b-fp8
    │   ├── README.md
    │   └── config.yaml
    ├── Briton-meta-llama-llama-3.1-8b-instruct-with-speculative-lookahead-decoding-fp8
    │   ├── README.md
    │   └── config.yaml
    ├── Briton-meta-llama-llama-3.2-1b-instruct-fp8
    │   ├── README.md
    │   └── config.yaml
    ├── Briton-meta-llama-llama-3.2-3b-instruct
    │   ├── README.md
    │   └── config.yaml
    ├── Briton-meta-llama-llama-3.3-70b-instruct-fp8
    │   ├── README.md
    │   └── config.yaml
    ├── Briton-meta-llama-llama-3.3-70b-instruct-speculative-with-1b-external-draft-fp8
    │   ├── README.md
    │   └── config.yaml
    ├── Briton-meta-llama-llama-3.3-70b-instruct-tp4-fp8
    │   ├── README.md
    │   └── config.yaml
    ├── Briton-microsoft-phi-4-fp8
    │   ├── README.md
    │   └── config.yaml
    ├── Briton-mistralai-mistral-7b-instruct-v0.3
    │   ├── README.md
    │   └── config.yaml
    ├── Briton-mistralai-mistral-small-24b-instruct-2501-fp8
    │   ├── README.md
    │   └── config.yaml
    ├── Briton-qwen-qwen2-57b-a14b-moe-int4
    │   ├── README.md
    │   └── config.yaml
    ├── Briton-qwen-qwen2.5-72b-instruct-tp2-fp8
    │   ├── README.md
    │   └── config.yaml
    ├── Briton-qwen-qwen2.5-7b-instruct-with-speculative-lookahead-decoding-fp8
    │   ├── README.md
    │   └── config.yaml
    ├── Briton-qwen-qwen3-32b-fp8
    │   ├── README.md
    │   └── config.yaml
    ├── Briton-qwen-qwq-32b-reasoning-fp8
    │   ├── README.md
    │   └── config.yaml
    ├── Briton-qwen-qwq-32b-reasoning-with-speculative-fp8
    │   ├── README.md
    │   └── config.yaml
    ├── Briton-tiiuae-falcon3-10b-instruct-fp8
    │   ├── README.md
    │   └── config.yaml
    ├── README.md
    ├── TEI-alibaba-nlp-gte-modernbert-base-embedding
    │   ├── README.md
    │   └── config.yaml
    ├── TEI-alibaba-nlp-gte-qwen2-1.5b-instruct-embedding
    │   ├── README.md
    │   └── config.yaml
    ├── TEI-alibaba-nlp-gte-qwen2-7b-instruct-embedding
    │   ├── README.md
    │   └── config.yaml
    ├── TEI-alibaba-nlp-gte-reranker-modernbert-base
    │   ├── README.md
    │   └── config.yaml
    ├── TEI-intfloat-multilingual-e5-large-instruct
    │   ├── README.md
    │   └── config.yaml
    ├── TEI-jina-ai-jina-embeddings-v2-base-en
    │   ├── README.md
    │   └── config.yaml
    ├── TEI-jinaai-jina-embeddings-v2-base-code
    │   ├── README.md
    │   └── config.yaml
    ├── TEI-mixedbread-ai-mxbai-embed-large-v1-embedding
    │   ├── README.md
    │   └── config.yaml
    ├── TEI-nomic-ai-nomic-embed-text-v1.5
    │   ├── README.md
    │   └── config.yaml
    ├── TEI-nomic-ai-nomic-embed-text-v2-moe
    │   ├── README.md
    │   └── config.yaml
    ├── TEI-sentence-transformers-all-minilm-l6-v2-embedding
    │   ├── README.md
    │   └── config.yaml
    ├── TEI-taylorai-bge-micro-v2
    │   ├── README.md
    │   └── config.yaml
    └── templating
    │   ├── .internal_tei
    │       ├── Dockerfile
    │       └── roll_out_docker.sh
    │   ├── README.md
    │   ├── deploy_all.py
    │   └── generate_templates.py
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── assets
    ├── comfyui-screenshot-1.png
    ├── comfyui-screenshot-2.png
    ├── comfyui-screenshot-3.png
    └── sdxl-controlnet-workflow.json
├── audiogen-medium
    ├── README.md
    ├── config.yaml
    └── model
    │   ├── __init__.py
    │   └── model.py
├── bin
    ├── image.txt
    ├── test_example.py
    ├── test_truss_deploy.py
    └── validate_ci.py
├── binocular
    ├── config.yaml
    ├── model
    │   ├── __init__.py
    │   └── model.py
    └── packages
    │   └── config.py
├── chains-examples
    └── docs
    │   ├── audio-transcription
    │       ├── README.md
    │       ├── data_types.py
    │       ├── helpers.py
    │       ├── transcribe.py
    │       └── whisper_chainlet.py
    │   └── poems
    │       └── poems.py
├── chatterbox-tts
    ├── README.md
    ├── config.yaml
    ├── docker
    │   ├── Dockerfile
    │   └── docker_build.sh
    ├── input
    │   └── obama_8s.wav
    ├── model
    │   ├── __init__.py
    │   └── model.py
    └── run_tts.py
├── ci.yaml
├── clip
    ├── config.yaml
    └── model
    │   ├── __init__.py
    │   └── model.py
├── cogvlm
    ├── README.md
    ├── config.yaml
    └── model
    │   ├── __init__.py
    │   └── model.py
├── comfyui-truss
    ├── README.md
    ├── config.yaml
    ├── data
    │   ├── comfy_ui_workflow.json
    │   └── model.json
    ├── examples
    │   ├── animate-diff
    │   │   ├── model.json
    │   │   └── workflow.json
    │   ├── anime-style-transfer
    │   │   ├── config.yaml
    │   │   └── workflow.json
    │   ├── sdxl-controlnet
    │   │   ├── model.json
    │   │   └── workflow.json
    │   └── sdxl-with-refiner
    │   │   ├── model.json
    │   │   └── workflow.json
    └── model
    │   ├── __init__.py
    │   ├── helpers.py
    │   └── model.py
├── control-net-qrcode
    ├── README.md
    ├── config.yaml
    ├── controlnet_qr_code_results.gif
    ├── model
    │   ├── __init__.py
    │   └── model.py
    ├── twitter_mask.jpeg
    └── twitter_output.jpg
├── custom-server
    ├── README.md
    ├── deepseek-v2-5-instruct-sglang
    │   └── config.yaml
    ├── infinity-embedding-server
    │   ├── README.md
    │   └── config.yaml
    ├── llama3-70b-instruct-lmdeploy
    │   └── config.yaml
    ├── llama3-70b-instruct-sglang
    │   └── config.yaml
    ├── llama3-8b-instruct-lmdeploy
    │   └── config.yaml
    ├── llama3-8b-instruct-sglang
    │   └── config.yaml
    ├── llama3_eval.py
    ├── pixtral-12b
    │   ├── README.md
    │   ├── config.yaml
    │   └── data
    │   │   └── pixtral12b.jinja
    └── ultravox-0.4
    │   ├── README.md
    │   └── config.yaml
├── deepfloyd-xl
    ├── README.md
    ├── config.yaml
    └── model
    │   ├── __init__.py
    │   └── model.py
├── deepseek
    ├── engine-deepseek-r1-distill-llama-70b
    │   ├── README.md
    │   └── config.yaml
    ├── engine-deepseek-r1-distill-llama-8b
    │   ├── README.md
    │   └── config.yaml
    ├── engine-deepseek-r1-distill-qwen-14b
    │   ├── README.md
    │   └── config.yaml
    ├── engine-deepseek-r1-distill-qwen-32b
    │   ├── README.md
    │   └── config.yaml
    └── engine-deepseek-r1-distill-qwen-7b
    │   ├── README.md
    │   └── config.yaml
├── deepspeed-mii
    ├── README.md
    ├── config.yaml
    └── model
    │   ├── __init__.py
    │   └── model.py
├── dis-segmentation
    ├── README.md
    ├── config.yaml
    └── model
    │   ├── __init__.py
    │   ├── clone_repo_helper.py
    │   ├── helpers.py
    │   └── model.py
├── dockerfiles
    └── ComfyUI.dockerfile
├── falcon
    ├── falcon3-10B-trt-llm-spec-dec
    │   ├── README.md
    │   └── config.yaml
    └── falcon3-3B-trt-llm-engine-high-throughput
    │   ├── README.md
    │   └── config.yaml
├── flux
    ├── README.md
    ├── dev
    │   ├── config.yaml
    │   └── model
    │   │   ├── __init__.py
    │   │   └── model.py
    └── schnell
    │   ├── config.yaml
    │   └── model
    │       ├── __init__.py
    │       └── model.py
├── fotographer
    └── zenctrl
    │   ├── README.md
    │   ├── call.py
    │   ├── config.yaml
    │   ├── images
    │       ├── banner_1.png
    │       ├── camera.png
    │       ├── speaker-input.png
    │       └── speaker-output.png
    │   └── requirements.txt
├── gemma
    ├── gemma-2-27b-it-vllm
    │   ├── README.md
    │   ├── config.yaml
    │   └── model
    │   │   ├── __init__.py
    │   │   └── model.py
    ├── gemma-2-9b-it-vllm
    │   ├── README.md
    │   ├── config.yaml
    │   └── model
    │   │   ├── __init__.py
    │   │   └── model.py
    └── gemma-3-27b-it
    │   └── config.yaml
├── gfp-gan
    ├── LICENSE
    ├── README.md
    ├── config.yaml
    ├── data
    │   └── .gitkeep
    ├── input.json
    └── model
    │   ├── __init__.py
    │   └── model.py
├── image-segmentation
    ├── config.yaml
    └── model
    │   ├── __init__.py
    │   └── model.py
├── internal
    └── config.yaml
├── ip-adapter
    ├── README.md
    ├── config.yaml
    └── model
    │   ├── __init__.py
    │   └── model.py
├── jsonformatter
    ├── config.yaml
    ├── data
    │   └── schema.json
    └── model
    │   ├── __init__.py
    │   └── model.py
├── kokoro
    ├── README.md
    ├── call.py
    ├── config.yaml
    └── model
    │   ├── __init__.py
    │   └── model.py
├── layoutlm-document-qa
    ├── README.md
    ├── config.yaml
    └── model
    │   ├── __init__.py
    │   └── model.py
├── llama-cpp-server
    ├── README.md
    ├── config.yaml
    ├── cuda.Dockerfile
    └── llama_server_help
├── llama
    ├── engine-llama-3-1-70b-instruct
    │   ├── README.md
    │   └── config.yaml
    ├── engine-llama-3-1-8b-instruct
    │   ├── README.md
    │   └── config.yaml
    ├── engine-llama-3-3-70b-instruct
    │   ├── README.md
    │   └── config.yaml
    ├── engine-llama-3.1-405b-instruct
    │   ├── README.md
    │   └── config.yaml
    ├── llama-2-13b-chat
    │   ├── README.md
    │   ├── config.yaml
    │   └── model
    │   │   ├── __init__.py
    │   │   └── model.py
    ├── llama-2-13b
    │   ├── README.md
    │   ├── config.yaml
    │   └── model
    │   │   ├── __init__.py
    │   │   └── model.py
    ├── llama-2-70b-chat
    │   ├── README.md
    │   ├── config.yaml
    │   └── model
    │   │   ├── __init__.py
    │   │   └── model.py
    ├── llama-2-70b
    │   ├── README.md
    │   ├── config.yaml
    │   └── model
    │   │   ├── __init__.py
    │   │   └── model.py
    ├── llama-2-7b-chat
    │   ├── README.md
    │   ├── config.yaml
    │   └── model
    │   │   ├── __init__.py
    │   │   └── model.py
    ├── llama-2-7b
    │   ├── README.md
    │   ├── config.yaml
    │   └── model
    │   │   ├── __init__.py
    │   │   └── model.py
    ├── llama-3-70b-instruct
    │   ├── README.md
    │   ├── config.yaml
    │   └── model
    │   │   ├── __init__.py
    │   │   └── model.py
    ├── llama-3-8b-instruct
    │   ├── README.md
    │   ├── config.yaml
    │   └── model
    │   │   ├── __init__.py
    │   │   └── model.py
    ├── llama-3_1-405b-instruct
    │   ├── README.md
    │   ├── config.yaml
    │   └── model
    │   │   ├── __init__.py
    │   │   ├── model.py
    │   │   └── sighelper.py
    ├── llama-3_1-8b-instruct-sglang
    │   ├── config.yaml
    │   └── model
    │   │   ├── __init__.py
    │   │   └── model.py
    ├── llama-3_1-8b-instruct
    │   ├── config.yaml
    │   └── model
    │   │   ├── __init__.py
    │   │   └── model.py
    ├── llama-3_1_70b-instruct
    │   ├── config.yaml
    │   └── model
    │   │   ├── __init__.py
    │   │   ├── model.py
    │   │   └── sighelper.py
    ├── llama-3_2-11b-vision-instruct
    │   ├── README.md
    │   └── config.yaml
    ├── llama-4-maverick-17b-128e-instruct-fp8-vllm
    │   ├── config.yaml
    │   └── data
    │   │   └── do.sh
    ├── llama-4-scout-17b-16e-instruct-bf16-vllm
    │   ├── config.yaml
    │   └── data
    │   │   └── do.sh
    ├── llama-7b-exllama-streaming
    │   ├── README.md
    │   ├── config.yaml
    │   └── model
    │   │   ├── __init__.py
    │   │   └── model.py
    ├── llama-7b-exllama
    │   ├── README.md
    │   ├── config.yaml
    │   └── model
    │   │   ├── __init__.py
    │   │   └── model.py
    ├── llama-7b-vllm
    │   ├── config.yaml
    │   └── model
    │   │   ├── __init__.py
    │   │   └── model.py
    ├── llama-7b
    │   ├── README.md
    │   ├── config.yaml
    │   ├── data
    │   │   ├── config.json
    │   │   ├── generation_config.json
    │   │   └── pytorch_model.bin.index.json
    │   └── model
    │   │   ├── __init__.py
    │   │   └── model.py
    └── tinyllama-1.1B-chat-v1.0
    │   └── config.yaml
├── llava
    ├── llava-1.6-sgl
    │   ├── README.md
    │   ├── config.yaml
    │   ├── model
    │   │   ├── __init__.py
    │   │   └── model.py
    │   └── requirements.txt
    ├── llava-v1.5-7b
    │   ├── README.md
    │   ├── config.yaml
    │   ├── model
    │   │   ├── __init__.py
    │   │   └── model.py
    │   └── packages
    │   │   └── llava
    │   │       ├── __init__.py
    │   │       ├── constants.py
    │   │       ├── conversation.py
    │   │       ├── eval
    │   │           ├── eval_gpt_review.py
    │   │           ├── eval_gpt_review_bench.py
    │   │           ├── eval_gpt_review_visual.py
    │   │           ├── eval_pope.py
    │   │           ├── eval_science_qa.py
    │   │           ├── eval_science_qa_gpt4.py
    │   │           ├── eval_science_qa_gpt4_requery.py
    │   │           ├── eval_textvqa.py
    │   │           ├── generate_webpage_data_from_table.py
    │   │           ├── m4c_evaluator.py
    │   │           ├── model_qa.py
    │   │           ├── model_vqa.py
    │   │           ├── model_vqa_loader.py
    │   │           ├── model_vqa_mmbench.py
    │   │           ├── model_vqa_qbench.py
    │   │           ├── model_vqa_science.py
    │   │           ├── qa_baseline_gpt35.py
    │   │           ├── run_llava.py
    │   │           ├── summarize_gpt_review.py
    │   │           ├── table
    │   │           │   ├── answer
    │   │           │   │   ├── answer_alpaca-13b.jsonl
    │   │           │   │   ├── answer_bard.jsonl
    │   │           │   │   ├── answer_gpt35.jsonl
    │   │           │   │   ├── answer_llama-13b.jsonl
    │   │           │   │   └── answer_vicuna-13b.jsonl
    │   │           │   ├── caps_boxes_coco2014_val_80.jsonl
    │   │           │   ├── model.jsonl
    │   │           │   ├── prompt.jsonl
    │   │           │   ├── question.jsonl
    │   │           │   ├── results
    │   │           │   │   ├── test_sqa_llava_13b_v0.json
    │   │           │   │   └── test_sqa_llava_lcs_558k_sqa_12e_vicuna_v1_3_13b.json
    │   │           │   ├── review
    │   │           │   │   ├── review_alpaca-13b_vicuna-13b.jsonl
    │   │           │   │   ├── review_bard_vicuna-13b.jsonl
    │   │           │   │   ├── review_gpt35_vicuna-13b.jsonl
    │   │           │   │   └── review_llama-13b_vicuna-13b.jsonl
    │   │           │   ├── reviewer.jsonl
    │   │           │   └── rule.json
    │   │           └── webpage
    │   │           │   ├── figures
    │   │           │       ├── alpaca.png
    │   │           │       ├── bard.jpg
    │   │           │       ├── chatgpt.svg
    │   │           │       ├── llama.jpg
    │   │           │       ├── swords_FILL0_wght300_GRAD0_opsz48.svg
    │   │           │       └── vicuna.jpeg
    │   │           │   ├── index.html
    │   │           │   ├── script.js
    │   │           │   └── styles.css
    │   │       ├── mm_utils.py
    │   │       ├── model
    │   │           ├── __init__.py
    │   │           ├── apply_delta.py
    │   │           ├── builder.py
    │   │           ├── consolidate.py
    │   │           ├── language_model
    │   │           │   ├── llava_llama.py
    │   │           │   ├── llava_mpt.py
    │   │           │   └── mpt
    │   │           │   │   ├── adapt_tokenizer.py
    │   │           │   │   ├── attention.py
    │   │           │   │   ├── blocks.py
    │   │           │   │   ├── configuration_mpt.py
    │   │           │   │   ├── custom_embedding.py
    │   │           │   │   ├── flash_attn_triton.py
    │   │           │   │   ├── hf_prefixlm_converter.py
    │   │           │   │   ├── meta_init_context.py
    │   │           │   │   ├── modeling_mpt.py
    │   │           │   │   ├── norm.py
    │   │           │   │   └── param_init_fns.py
    │   │           ├── llava_arch.py
    │   │           ├── make_delta.py
    │   │           ├── multimodal_encoder
    │   │           │   ├── builder.py
    │   │           │   └── clip_encoder.py
    │   │           ├── multimodal_projector
    │   │           │   └── builder.py
    │   │           └── utils.py
    │   │       ├── serve
    │   │           ├── __init__.py
    │   │           ├── cli.py
    │   │           ├── controller.py
    │   │           ├── examples
    │   │           │   ├── extreme_ironing.jpg
    │   │           │   └── waterview.jpg
    │   │           ├── gradio_web_server.py
    │   │           ├── model_worker.py
    │   │           ├── register_worker.py
    │   │           └── test_message.py
    │   │       ├── train
    │   │           ├── llama_flash_attn_monkey_patch.py
    │   │           ├── llama_xformers_attn_monkey_patch.py
    │   │           ├── llava_trainer.py
    │   │           ├── train.py
    │   │           ├── train_mem.py
    │   │           └── train_xformers.py
    │   │       └── utils.py
    └── llava-v1.6-34b
    │   ├── README.md
    │   ├── config.yaml
    │   ├── input.json
    │   └── model
    │       ├── __init__.py
    │       └── model.py
├── magic-animate
    ├── README.md
    ├── config.yaml
    └── model
    │   ├── __init__.py
    │   ├── configs
    │       ├── inference
    │       │   └── inference.yaml
    │       └── prompts
    │       │   └── animation.yaml
    │   ├── demo
    │       └── animate.py
    │   ├── magicanimate
    │       ├── models
    │       │   ├── appearance_encoder.py
    │       │   ├── attention.py
    │       │   ├── controlnet.py
    │       │   ├── embeddings.py
    │       │   ├── motion_module.py
    │       │   ├── mutual_self_attention.py
    │       │   ├── orig_attention.py
    │       │   ├── resnet.py
    │       │   ├── stable_diffusion_controlnet_reference.py
    │       │   ├── unet.py
    │       │   ├── unet_3d_blocks.py
    │       │   └── unet_controlnet.py
    │       ├── pipelines
    │       │   ├── animation.py
    │       │   ├── context.py
    │       │   └── pipeline_animation.py
    │       └── utils
    │       │   ├── dist_tools.py
    │       │   ├── util.py
    │       │   └── videoreader.py
    │   └── model.py
├── metavoice-1b
    ├── README.md
    ├── config.yaml
    ├── data
    │   └── bria.mp3
    ├── model
    │   ├── __init__.py
    │   └── model.py
    ├── process.py
    └── requirements.txt
├── mistral
    ├── engine-devstral
    │   └── config.yaml
    ├── engine-mistral-7b-instruct
    │   ├── README.md
    │   └── config.yaml
    ├── engine-mistral-small-3
    │   ├── README.md
    │   └── config.yaml
    ├── engine-mixtral-8x22b-instruct
    │   ├── README.md
    │   └── config.yaml
    ├── engine-mixtral-8x7b-instruct
    │   ├── README.md
    │   └── config.yaml
    ├── mistral-7b-chat
    │   ├── README.md
    │   ├── config.yaml
    │   └── model
    │   │   ├── __init__.py
    │   │   └── model.py
    ├── mistral-7b-instruct-vllm
    │   ├── README.md
    │   ├── config.yaml
    │   └── model
    │   │   ├── __init__.py
    │   │   └── model.py
    ├── mistral-7b-instruct
    │   ├── README.md
    │   ├── config.yaml
    │   └── model
    │   │   ├── __init__.py
    │   │   └── model.py
    ├── mistral-7b
    │   ├── README.md
    │   ├── config.yaml
    │   └── model
    │   │   ├── __init__.py
    │   │   └── model.py
    ├── mistral-small-3.1
    │   └── config.yaml
    ├── mixtral-8x22b-trt-int8-weights-only
    │   ├── README.md
    │   ├── TRT-LLM-README.md
    │   ├── config.yaml
    │   ├── data
    │   │   └── .gitattributes
    │   ├── model
    │   │   ├── __init__.py
    │   │   └── model.py
    │   └── packages
    │   │   ├── client.py
    │   │   ├── inflight_batcher_llm
    │   │       ├── ensemble
    │   │       │   └── config.pbtxt
    │   │       ├── postprocessing
    │   │       │   ├── 1
    │   │       │   │   └── model.py
    │   │       │   └── config.pbtxt
    │   │       ├── preprocessing
    │   │       │   ├── 1
    │   │       │   │   └── model.py
    │   │       │   └── config.pbtxt
    │   │       └── tensorrt_llm
    │   │       │   └── config.pbtxt
    │   │   └── utils.py
    ├── mixtral-8x22b
    │   ├── README.md
    │   ├── config.yaml
    │   └── model
    │   │   ├── __init__.py
    │   │   └── model.py
    ├── mixtral-8x7b-instruct-trt-llm-h100
    │   ├── README.md
    │   ├── TRT-LLM-README.md
    │   ├── config.yaml
    │   ├── data
    │   │   └── .gitattributes
    │   ├── model
    │   │   ├── __init__.py
    │   │   └── model.py
    │   └── packages
    │   │   ├── client.py
    │   │   ├── inflight_batcher_llm
    │   │       ├── ensemble
    │   │       │   └── config.pbtxt
    │   │       ├── postprocessing
    │   │       │   ├── 1
    │   │       │   │   └── model.py
    │   │       │   └── config.pbtxt
    │   │       ├── preprocessing
    │   │       │   ├── 1
    │   │       │   │   └── model.py
    │   │       │   └── config.pbtxt
    │   │       └── tensorrt_llm
    │   │       │   └── config.pbtxt
    │   │   └── utils.py
    ├── mixtral-8x7b-instruct-trt-llm-weights-only-quant-h100
    │   ├── README.md
    │   ├── TRT-LLM-README.md
    │   ├── config.yaml
    │   ├── data
    │   │   └── .gitattributes
    │   ├── model
    │   │   ├── __init__.py
    │   │   └── model.py
    │   └── packages
    │   │   ├── client.py
    │   │   ├── inflight_batcher_llm
    │   │       ├── ensemble
    │   │       │   └── config.pbtxt
    │   │       ├── postprocessing
    │   │       │   ├── 1
    │   │       │   │   └── model.py
    │   │       │   └── config.pbtxt
    │   │       ├── preprocessing
    │   │       │   ├── 1
    │   │       │   │   └── model.py
    │   │       │   └── config.pbtxt
    │   │       └── tensorrt_llm
    │   │       │   └── config.pbtxt
    │   │   └── utils.py
    ├── mixtral-8x7b-instruct-trt-llm-weights-only-quant
    │   ├── README.md
    │   ├── TRT-LLM-README.md
    │   ├── config.yaml
    │   ├── data
    │   │   └── .gitattributes
    │   ├── model
    │   │   ├── __init__.py
    │   │   └── model.py
    │   └── packages
    │   │   ├── client.py
    │   │   ├── inflight_batcher_llm
    │   │       ├── ensemble
    │   │       │   └── config.pbtxt
    │   │       ├── postprocessing
    │   │       │   ├── 1
    │   │       │   │   └── model.py
    │   │       │   └── config.pbtxt
    │   │       ├── preprocessing
    │   │       │   ├── 1
    │   │       │   │   └── model.py
    │   │       │   └── config.pbtxt
    │   │       └── tensorrt_llm
    │   │       │   └── config.pbtxt
    │   │   └── utils.py
    ├── mixtral-8x7b-instruct-trt-llm
    │   ├── README.md
    │   ├── TRT-LLM-README.md
    │   ├── config.yaml
    │   ├── data
    │   │   └── .gitattributes
    │   ├── model
    │   │   ├── __init__.py
    │   │   └── model.py
    │   └── packages
    │   │   ├── client.py
    │   │   ├── inflight_batcher_llm
    │   │       ├── ensemble
    │   │       │   └── config.pbtxt
    │   │       ├── postprocessing
    │   │       │   ├── 1
    │   │       │   │   └── model.py
    │   │       │   └── config.pbtxt
    │   │       ├── preprocessing
    │   │       │   ├── 1
    │   │       │   │   └── model.py
    │   │       │   └── config.pbtxt
    │   │       └── tensorrt_llm
    │   │       │   └── config.pbtxt
    │   │   └── utils.py
    ├── mixtral-8x7b-instruct-vllm-a100-t-tp2
    │   ├── README.md
    │   ├── config.yaml
    │   └── model
    │   │   ├── __init__.py
    │   │   └── model.py
    ├── mixtral-8x7b-instruct-vllm
    │   ├── README.md
    │   ├── config.yaml
    │   └── model
    │   │   ├── __init__.py
    │   │   └── model.py
    └── pixtral-12b
    │   ├── README.md
    │   ├── config.yaml
    │   └── model
    │       ├── __init__.py
    │       └── model.py
├── multiprocessing
    ├── README.md
    ├── config.yaml
    └── model
    │   ├── __init__.py
    │   ├── model.py
    │   └── test.py
├── musicgen-large
    ├── README.md
    ├── config.yaml
    └── model
    │   ├── __init__.py
    │   └── model.py
├── musicgen-melody
    ├── README.md
    ├── config.yaml
    └── model
    │   ├── __init__.py
    │   └── model.py
├── nemotron
    ├── llama-3-1-nemotron-70b-instruct
    │   ├── README.md
    │   └── config.yaml
    └── nemotron-ultra-253b
    │   └── config.yaml
├── ngram-speculator
    ├── truss
    │   ├── config.yaml
    │   └── model
    │   │   ├── __init__.py
    │   │   └── model.py
    └── trussless
    │   └── config.yaml
├── nous-capybara
    ├── nous-capybara-34b-openai
    │   ├── README.md
    │   ├── config.yaml
    │   └── model
    │   │   ├── __init__.py
    │   │   └── model.py
    └── nous-capybara-34b
    │   ├── README.md
    │   ├── config.yaml
    │   └── model
    │       ├── __init__.py
    │       └── model.py
├── nsql
    ├── README.md
    ├── config.yaml
    └── model
    │   ├── __init__.py
    │   └── model.py
├── orpheus-best-performance
    ├── call.py
    ├── config.yaml
    ├── model
    │   └── model.py
    └── snac_batching_quantization_dev.py
├── phi
    ├── phi-3-mini-128k-instruct
    │   ├── README.md
    │   ├── config.yaml
    │   └── model
    │   │   ├── __init__.py
    │   │   └── model.py
    ├── phi-3-mini-4k-instruct
    │   ├── README.md
    │   ├── config.yaml
    │   └── model
    │   │   ├── __init__.py
    │   │   └── model.py
    └── phi-3.5-mini
    │   ├── README.md
    │   ├── config.yaml
    │   └── model
    │       ├── __init__.py
    │       ├── helper.py
    │       └── model.py
├── piper-tts
    ├── README.md
    ├── config.yaml
    └── model
    │   ├── __init__.py
    │   └── model.py
├── playground-v2-aesthetic
    ├── README.md
    ├── config.yaml
    ├── model
    │   ├── __init__.py
    │   └── model.py
    └── show.py
├── poetry.lock
├── pyproject.toml
├── qwen
    ├── BEI-qwen-qwen3-embedding-0.6b-fp8
    │   ├── README.md
    │   └── config.yaml
    ├── BEI-qwen-qwen3-embedding-4b-fp8
    │   ├── README.md
    │   └── config.yaml
    ├── BEI-qwen-qwen3-embedding-8b-fp8
    │   ├── README.md
    │   └── config.yaml
    ├── BEI-qwen-qwen3-reranker-0.6b-fp8
    │   ├── README.md
    │   └── config.yaml
    ├── BEI-qwen-qwen3-reranker-4b-fp8
    │   ├── README.md
    │   └── config.yaml
    ├── BEI-qwen-qwen3-reranker-8b-fp8
    │   ├── README.md
    │   └── config.yaml
    ├── engine-qwen-2-5-14b-coder-instruct
    │   ├── README.md
    │   └── config.yaml
    ├── engine-qwen-2-5-14b-instruct
    │   ├── README.md
    │   └── config.yaml
    ├── engine-qwen-2-5-32b-coder-instruct
    │   ├── README.md
    │   └── config.yaml
    ├── engine-qwen-2-5-32b-instruct
    │   ├── README.md
    │   └── config.yaml
    ├── engine-qwen-2-5-3b-instruct
    │   ├── README.md
    │   └── config.yaml
    ├── engine-qwen-2-5-72b-instruct
    │   ├── README.md
    │   └── config.yaml
    ├── engine-qwen-2-5-72b-math-instruct
    │   ├── README.md
    │   └── config.yaml
    ├── engine-qwen-2-5-7b-coder-instruct
    │   ├── README.md
    │   └── config.yaml
    ├── engine-qwen-2-5-7b-instruct
    │   ├── README.md
    │   └── config.yaml
    ├── engine-qwen-2-5-7b-math-instruct
    │   ├── README.md
    │   └── config.yaml
    ├── engine-qwen-3-06b
    │   └── config.yaml
    ├── engine-qwen-3-32b
    │   └── config.yaml
    ├── engine-qwen-3-4b
    │   └── config.yaml
    ├── model_auto.py
    ├── qwen-3-235B-sglang
    │   └── config.yaml
    ├── qwen-3-30B-A3-sglang
    │   └── config.yaml
    ├── qwen-3-30B-A3-vllm
    │   └── config.yaml
    ├── qwen-3-32B-sglang
    │   └── config.yaml
    ├── qwen-7b-chat
    │   ├── README.md
    │   ├── config.yaml
    │   └── model
    │   │   ├── __init__.py
    │   │   └── model.py
    └── qwen-vl
    │   ├── README.md
    │   ├── config.yaml
    │   └── model
    │       ├── __init__.py
    │       └── model.py
├── sana
    ├── sana_1600M
    │   ├── config.yaml
    │   ├── model
    │   │   ├── __init__.py
    │   │   └── model.py
    │   └── packages
    │   │   └── Sana
    │   │       ├── CITATION.bib
    │   │       ├── CIs
    │   │           └── add_license_all.sh
    │   │       ├── Dockerfile
    │   │       ├── LICENSE
    │   │       ├── README.md
    │   │       ├── app
    │   │           ├── app_sana.py
    │   │           ├── app_sana_multithread.py
    │   │           ├── safety_check.py
    │   │           └── sana_pipeline.py
    │   │       ├── asset
    │   │           ├── Sana.jpg
    │   │           ├── docs
    │   │           │   └── metrics_toolkit.md
    │   │           ├── example_data
    │   │           │   ├── 00000000.png
    │   │           │   ├── 00000000.txt
    │   │           │   ├── 00000000_InternVL2-26B.json
    │   │           │   ├── 00000000_InternVL2-26B_clip_score.json
    │   │           │   ├── 00000000_VILA1-5-13B.json
    │   │           │   ├── 00000000_VILA1-5-13B_clip_score.json
    │   │           │   ├── 00000000_prompt_clip_score.json
    │   │           │   └── meta_data.json
    │   │           ├── examples.py
    │   │           ├── logo.png
    │   │           ├── model-incremental.jpg
    │   │           ├── model_paths.txt
    │   │           ├── samples.txt
    │   │           └── samples_mini.txt
    │   │       ├── configs
    │   │           ├── sana_app_config
    │   │           │   ├── Sana_1600M_app.yaml
    │   │           │   └── Sana_600M_app.yaml
    │   │           ├── sana_base.yaml
    │   │           └── sana_config
    │   │           │   ├── 1024ms
    │   │           │       ├── Sana_1600M_img1024.yaml
    │   │           │       ├── Sana_1600M_img1024_AdamW.yaml
    │   │           │       └── Sana_600M_img1024.yaml
    │   │           │   └── 512ms
    │   │           │       ├── Sana_1600M_img512.yaml
    │   │           │       ├── Sana_600M_img512.yaml
    │   │           │       ├── ci_Sana_600M_img512.yaml
    │   │           │       └── sample_dataset.yaml
    │   │       ├── diffusion
    │   │           ├── __init__.py
    │   │           ├── data
    │   │           │   ├── __init__.py
    │   │           │   ├── builder.py
    │   │           │   ├── datasets
    │   │           │   │   ├── __init__.py
    │   │           │   │   ├── sana_data.py
    │   │           │   │   ├── sana_data_multi_scale.py
    │   │           │   │   └── utils.py
    │   │           │   ├── transforms.py
    │   │           │   └── wids
    │   │           │   │   ├── __init__.py
    │   │           │   │   ├── wids.py
    │   │           │   │   ├── wids_dl.py
    │   │           │   │   ├── wids_lru.py
    │   │           │   │   ├── wids_mmtar.py
    │   │           │   │   ├── wids_specs.py
    │   │           │   │   └── wids_tar.py
    │   │           ├── dpm_solver.py
    │   │           ├── flow_euler_sampler.py
    │   │           ├── iddpm.py
    │   │           ├── lcm_scheduler.py
    │   │           ├── model
    │   │           │   ├── __init__.py
    │   │           │   ├── act.py
    │   │           │   ├── builder.py
    │   │           │   ├── dc_ae
    │   │           │   │   └── efficientvit
    │   │           │   │   │   ├── __init__.py
    │   │           │   │   │   ├── ae_model_zoo.py
    │   │           │   │   │   ├── apps
    │   │           │   │   │       ├── __init__.py
    │   │           │   │   │       ├── setup.py
    │   │           │   │   │       ├── trainer
    │   │           │   │   │       │   ├── __init__.py
    │   │           │   │   │       │   └── run_config.py
    │   │           │   │   │       └── utils
    │   │           │   │   │       │   ├── __init__.py
    │   │           │   │   │       │   ├── dist.py
    │   │           │   │   │       │   ├── ema.py
    │   │           │   │   │       │   ├── export.py
    │   │           │   │   │       │   ├── image.py
    │   │           │   │   │       │   ├── init.py
    │   │           │   │   │       │   ├── lr.py
    │   │           │   │   │       │   ├── metric.py
    │   │           │   │   │       │   ├── misc.py
    │   │           │   │   │       │   └── opt.py
    │   │           │   │   │   └── models
    │   │           │   │   │       ├── __init__.py
    │   │           │   │   │       ├── efficientvit
    │   │           │   │   │           ├── __init__.py
    │   │           │   │   │           └── dc_ae.py
    │   │           │   │   │       ├── nn
    │   │           │   │   │           ├── __init__.py
    │   │           │   │   │           ├── act.py
    │   │           │   │   │           ├── drop.py
    │   │           │   │   │           ├── norm.py
    │   │           │   │   │           ├── ops.py
    │   │           │   │   │           └── triton_rms_norm.py
    │   │           │   │   │       └── utils
    │   │           │   │   │           ├── __init__.py
    │   │           │   │   │           ├── list.py
    │   │           │   │   │           ├── network.py
    │   │           │   │   │           └── random.py
    │   │           │   ├── diffusion_utils.py
    │   │           │   ├── dpm_solver.py
    │   │           │   ├── edm_sample.py
    │   │           │   ├── gaussian_diffusion.py
    │   │           │   ├── nets
    │   │           │   │   ├── __init__.py
    │   │           │   │   ├── basic_modules.py
    │   │           │   │   ├── fastlinear
    │   │           │   │   │   ├── develop_triton_ffn.py
    │   │           │   │   │   ├── develop_triton_litemla.py
    │   │           │   │   │   ├── modules
    │   │           │   │   │   │   ├── __init__.py
    │   │           │   │   │   │   ├── flash_attn.py
    │   │           │   │   │   │   ├── lite_mla.py
    │   │           │   │   │   │   ├── mb_conv_pre_glu.py
    │   │           │   │   │   │   ├── nn
    │   │           │   │   │   │   │   ├── act.py
    │   │           │   │   │   │   │   ├── conv.py
    │   │           │   │   │   │   │   └── norm.py
    │   │           │   │   │   │   ├── triton_lite_mla.py
    │   │           │   │   │   │   ├── triton_lite_mla_fwd.py
    │   │           │   │   │   │   ├── triton_lite_mla_kernels
    │   │           │   │   │   │   │   ├── custom_autotune.py
    │   │           │   │   │   │   │   ├── linear_relu_fwd.py
    │   │           │   │   │   │   │   ├── mm.py
    │   │           │   │   │   │   │   ├── pad_vk_mm_fwd.py
    │   │           │   │   │   │   │   ├── proj_divide_bwd.py
    │   │           │   │   │   │   │   ├── vk_mm_relu_bwd.py
    │   │           │   │   │   │   │   ├── vk_q_mm_divide_fwd.py
    │   │           │   │   │   │   │   └── vk_q_mm_relu_bwd.py
    │   │           │   │   │   │   ├── triton_mb_conv_pre_glu.py
    │   │           │   │   │   │   ├── triton_mb_conv_pre_glu_kernels
    │   │           │   │   │   │   │   ├── depthwise_conv_fwd.py
    │   │           │   │   │   │   │   └── linear_glu_fwd.py
    │   │           │   │   │   │   └── utils
    │   │           │   │   │   │   │   ├── compare_results.py
    │   │           │   │   │   │   │   ├── custom_autotune.py
    │   │           │   │   │   │   │   ├── dtype.py
    │   │           │   │   │   │   │   ├── export_onnx.py
    │   │           │   │   │   │   │   └── model.py
    │   │           │   │   │   └── readme.md
    │   │           │   │   ├── sana.py
    │   │           │   │   ├── sana_U_shape.py
    │   │           │   │   ├── sana_U_shape_multi_scale.py
    │   │           │   │   ├── sana_blocks.py
    │   │           │   │   ├── sana_multi_scale.py
    │   │           │   │   ├── sana_multi_scale_adaln.py
    │   │           │   │   └── sana_others.py
    │   │           │   ├── norms.py
    │   │           │   ├── respace.py
    │   │           │   ├── sa_solver.py
    │   │           │   ├── timestep_sampler.py
    │   │           │   └── utils.py
    │   │           ├── sa_sampler.py
    │   │           ├── sa_solver_diffusers.py
    │   │           └── utils
    │   │           │   ├── __init__.py
    │   │           │   ├── checkpoint.py
    │   │           │   ├── config.py
    │   │           │   ├── data_sampler.py
    │   │           │   ├── dist_utils.py
    │   │           │   ├── import_utils.py
    │   │           │   ├── logger.py
    │   │           │   ├── lr_scheduler.py
    │   │           │   ├── misc.py
    │   │           │   └── optimizer.py
    │   │       ├── environment_setup.sh
    │   │       ├── pyproject.toml
    │   │       ├── sana
    │   │           ├── cli
    │   │           │   ├── run.py
    │   │           │   └── upload2hf.py
    │   │           └── tools
    │   │           │   ├── __init__.py
    │   │           │   ├── download.py
    │   │           │   └── hf_utils.py
    │   │       ├── scripts
    │   │           ├── bash_run_inference_metric.sh
    │   │           ├── bash_run_inference_metric_dpg.sh
    │   │           ├── bash_run_inference_metric_geneval.sh
    │   │           ├── bash_run_inference_metric_imagereward.sh
    │   │           ├── infer_metric_run_inference_metric.sh
    │   │           ├── infer_metric_run_inference_metric_geneval.sh
    │   │           ├── infer_run_inference.sh
    │   │           ├── infer_run_inference_geneval.sh
    │   │           ├── infer_run_inference_geneval_diffusers.sh
    │   │           ├── inference.py
    │   │           ├── inference_dpg.py
    │   │           ├── inference_geneval.py
    │   │           ├── inference_geneval_diffusers.py
    │   │           ├── inference_image_reward.py
    │   │           ├── interface.py
    │   │           └── style.css
    │   │       ├── tests
    │   │           └── bash
    │   │           │   ├── entry.sh
    │   │           │   ├── test_inference.sh
    │   │           │   └── test_training_1epoch.sh
    │   │       ├── tools
    │   │           ├── __init__.py
    │   │           ├── convert_py_to_yaml.py
    │   │           ├── convert_sana_pag_to_diffusers.py
    │   │           ├── convert_sana_to_diffusers.py
    │   │           ├── download.py
    │   │           └── metrics
    │   │           │   ├── clip-score
    │   │           │       ├── .gitignore
    │   │           │       ├── LICENSE
    │   │           │       ├── README.md
    │   │           │       ├── clip_score.py
    │   │           │       ├── setup.py
    │   │           │       └── src
    │   │           │       │   └── clip_score
    │   │           │       │       ├── __init__.py
    │   │           │       │       ├── __main__.py
    │   │           │       │       └── clip_score.py
    │   │           │   ├── compute_clipscore.sh
    │   │           │   ├── compute_dpg.sh
    │   │           │   ├── compute_fid_embedding.sh
    │   │           │   ├── compute_geneval.sh
    │   │           │   ├── compute_imagereward.sh
    │   │           │   ├── dpg_bench
    │   │           │       ├── compute_dpg_bench.py
    │   │           │       ├── dpg_bench.csv
    │   │           │       └── requirements.txt
    │   │           │   ├── geneval
    │   │           │       ├── LICENSE
    │   │           │       ├── README.md
    │   │           │       ├── annotations
    │   │           │       │   ├── annotations_clip.csv
    │   │           │       │   ├── annotations_if-xl.csv
    │   │           │       │   ├── annotations_sdv2.csv
    │   │           │       │   └── mturk_hit_template.html
    │   │           │       ├── environment.yml
    │   │           │       ├── evaluation
    │   │           │       │   ├── download_models.sh
    │   │           │       │   ├── evaluate_images.py
    │   │           │       │   ├── object_names.txt
    │   │           │       │   └── summary_scores.py
    │   │           │       ├── generation
    │   │           │       │   └── diffusers_generate.py
    │   │           │       ├── images
    │   │           │       │   └── geneval_figure_1.png
    │   │           │       └── prompts
    │   │           │       │   ├── create_prompts.py
    │   │           │       │   ├── evaluation_metadata.jsonl
    │   │           │       │   ├── generation_prompts.txt
    │   │           │       │   └── object_names.txt
    │   │           │   ├── image_reward
    │   │           │       ├── benchmark-prompts-dict.json
    │   │           │       └── compute_image_reward.py
    │   │           │   ├── pytorch-fid
    │   │           │       ├── .gitignore
    │   │           │       ├── CHANGELOG.md
    │   │           │       ├── LICENSE
    │   │           │       ├── README.md
    │   │           │       ├── compute_fid.py
    │   │           │       ├── noxfile.py
    │   │           │       ├── setup.cfg
    │   │           │       ├── setup.py
    │   │           │       ├── src
    │   │           │       │   └── pytorch_fid
    │   │           │       │   │   ├── __init__.py
    │   │           │       │   │   ├── __main__.py
    │   │           │       │   │   ├── fid_score.py
    │   │           │       │   │   └── inception.py
    │   │           │       └── tests
    │   │           │       │   └── test_fid_score.py
    │   │           │   └── utils.py
    │   │       └── train_scripts
    │   │           ├── train.py
    │   │           └── train.sh
    └── sana_600M
    │   ├── config.yaml
    │   ├── model
    │       ├── __init__.py
    │       └── model.py
    │   └── packages
    │       └── Sana
    │           ├── CITATION.bib
    │           ├── CIs
    │               └── add_license_all.sh
    │           ├── Dockerfile
    │           ├── LICENSE
    │           ├── README.md
    │           ├── app
    │               ├── app_sana.py
    │               ├── app_sana_multithread.py
    │               ├── safety_check.py
    │               └── sana_pipeline.py
    │           ├── asset
    │               ├── Sana.jpg
    │               ├── docs
    │               │   └── metrics_toolkit.md
    │               ├── example_data
    │               │   ├── 00000000.png
    │               │   ├── 00000000.txt
    │               │   ├── 00000000_InternVL2-26B.json
    │               │   ├── 00000000_InternVL2-26B_clip_score.json
    │               │   ├── 00000000_VILA1-5-13B.json
    │               │   ├── 00000000_VILA1-5-13B_clip_score.json
    │               │   ├── 00000000_prompt_clip_score.json
    │               │   └── meta_data.json
    │               ├── examples.py
    │               ├── logo.png
    │               ├── model-incremental.jpg
    │               ├── model_paths.txt
    │               ├── samples.txt
    │               └── samples_mini.txt
    │           ├── configs
    │               ├── sana_app_config
    │               │   ├── Sana_1600M_app.yaml
    │               │   └── Sana_600M_app.yaml
    │               ├── sana_base.yaml
    │               └── sana_config
    │               │   ├── 1024ms
    │               │       ├── Sana_1600M_img1024.yaml
    │               │       ├── Sana_1600M_img1024_AdamW.yaml
    │               │       └── Sana_600M_img1024.yaml
    │               │   └── 512ms
    │               │       ├── Sana_1600M_img512.yaml
    │               │       ├── Sana_600M_img512.yaml
    │               │       ├── ci_Sana_600M_img512.yaml
    │               │       └── sample_dataset.yaml
    │           ├── diffusion
    │               ├── __init__.py
    │               ├── data
    │               │   ├── __init__.py
    │               │   ├── builder.py
    │               │   ├── datasets
    │               │   │   ├── __init__.py
    │               │   │   ├── sana_data.py
    │               │   │   ├── sana_data_multi_scale.py
    │               │   │   └── utils.py
    │               │   ├── transforms.py
    │               │   └── wids
    │               │   │   ├── __init__.py
    │               │   │   ├── wids.py
    │               │   │   ├── wids_dl.py
    │               │   │   ├── wids_lru.py
    │               │   │   ├── wids_mmtar.py
    │               │   │   ├── wids_specs.py
    │               │   │   └── wids_tar.py
    │               ├── dpm_solver.py
    │               ├── flow_euler_sampler.py
    │               ├── iddpm.py
    │               ├── lcm_scheduler.py
    │               ├── model
    │               │   ├── __init__.py
    │               │   ├── act.py
    │               │   ├── builder.py
    │               │   ├── dc_ae
    │               │   │   └── efficientvit
    │               │   │   │   ├── __init__.py
    │               │   │   │   ├── ae_model_zoo.py
    │               │   │   │   ├── apps
    │               │   │   │       ├── __init__.py
    │               │   │   │       ├── setup.py
    │               │   │   │       ├── trainer
    │               │   │   │       │   ├── __init__.py
    │               │   │   │       │   └── run_config.py
    │               │   │   │       └── utils
    │               │   │   │       │   ├── __init__.py
    │               │   │   │       │   ├── dist.py
    │               │   │   │       │   ├── ema.py
    │               │   │   │       │   ├── export.py
    │               │   │   │       │   ├── image.py
    │               │   │   │       │   ├── init.py
    │               │   │   │       │   ├── lr.py
    │               │   │   │       │   ├── metric.py
    │               │   │   │       │   ├── misc.py
    │               │   │   │       │   └── opt.py
    │               │   │   │   └── models
    │               │   │   │       ├── __init__.py
    │               │   │   │       ├── efficientvit
    │               │   │   │           ├── __init__.py
    │               │   │   │           └── dc_ae.py
    │               │   │   │       ├── nn
    │               │   │   │           ├── __init__.py
    │               │   │   │           ├── act.py
    │               │   │   │           ├── drop.py
    │               │   │   │           ├── norm.py
    │               │   │   │           ├── ops.py
    │               │   │   │           └── triton_rms_norm.py
    │               │   │   │       └── utils
    │               │   │   │           ├── __init__.py
    │               │   │   │           ├── list.py
    │               │   │   │           ├── network.py
    │               │   │   │           └── random.py
    │               │   ├── diffusion_utils.py
    │               │   ├── dpm_solver.py
    │               │   ├── edm_sample.py
    │               │   ├── gaussian_diffusion.py
    │               │   ├── nets
    │               │   │   ├── __init__.py
    │               │   │   ├── basic_modules.py
    │               │   │   ├── fastlinear
    │               │   │   │   ├── develop_triton_ffn.py
    │               │   │   │   ├── develop_triton_litemla.py
    │               │   │   │   ├── modules
    │               │   │   │   │   ├── __init__.py
    │               │   │   │   │   ├── flash_attn.py
    │               │   │   │   │   ├── lite_mla.py
    │               │   │   │   │   ├── mb_conv_pre_glu.py
    │               │   │   │   │   ├── nn
    │               │   │   │   │   │   ├── act.py
    │               │   │   │   │   │   ├── conv.py
    │               │   │   │   │   │   └── norm.py
    │               │   │   │   │   ├── triton_lite_mla.py
    │               │   │   │   │   ├── triton_lite_mla_fwd.py
    │               │   │   │   │   ├── triton_lite_mla_kernels
    │               │   │   │   │   │   ├── custom_autotune.py
    │               │   │   │   │   │   ├── linear_relu_fwd.py
    │               │   │   │   │   │   ├── mm.py
    │               │   │   │   │   │   ├── pad_vk_mm_fwd.py
    │               │   │   │   │   │   ├── proj_divide_bwd.py
    │               │   │   │   │   │   ├── vk_mm_relu_bwd.py
    │               │   │   │   │   │   ├── vk_q_mm_divide_fwd.py
    │               │   │   │   │   │   └── vk_q_mm_relu_bwd.py
    │               │   │   │   │   ├── triton_mb_conv_pre_glu.py
    │               │   │   │   │   ├── triton_mb_conv_pre_glu_kernels
    │               │   │   │   │   │   ├── depthwise_conv_fwd.py
    │               │   │   │   │   │   └── linear_glu_fwd.py
    │               │   │   │   │   └── utils
    │               │   │   │   │   │   ├── compare_results.py
    │               │   │   │   │   │   ├── custom_autotune.py
    │               │   │   │   │   │   ├── dtype.py
    │               │   │   │   │   │   ├── export_onnx.py
    │               │   │   │   │   │   └── model.py
    │               │   │   │   └── readme.md
    │               │   │   ├── sana.py
    │               │   │   ├── sana_U_shape.py
    │               │   │   ├── sana_U_shape_multi_scale.py
    │               │   │   ├── sana_blocks.py
    │               │   │   ├── sana_multi_scale.py
    │               │   │   ├── sana_multi_scale_adaln.py
    │               │   │   └── sana_others.py
    │               │   ├── norms.py
    │               │   ├── respace.py
    │               │   ├── sa_solver.py
    │               │   ├── timestep_sampler.py
    │               │   └── utils.py
    │               ├── sa_sampler.py
    │               ├── sa_solver_diffusers.py
    │               └── utils
    │               │   ├── __init__.py
    │               │   ├── checkpoint.py
    │               │   ├── config.py
    │               │   ├── data_sampler.py
    │               │   ├── dist_utils.py
    │               │   ├── import_utils.py
    │               │   ├── logger.py
    │               │   ├── lr_scheduler.py
    │               │   ├── misc.py
    │               │   └── optimizer.py
    │           ├── environment_setup.sh
    │           ├── pyproject.toml
    │           ├── sana
    │               ├── cli
    │               │   ├── run.py
    │               │   └── upload2hf.py
    │               └── tools
    │               │   ├── __init__.py
    │               │   ├── download.py
    │               │   └── hf_utils.py
    │           ├── scripts
    │               ├── bash_run_inference_metric.sh
    │               ├── bash_run_inference_metric_dpg.sh
    │               ├── bash_run_inference_metric_geneval.sh
    │               ├── bash_run_inference_metric_imagereward.sh
    │               ├── infer_metric_run_inference_metric.sh
    │               ├── infer_metric_run_inference_metric_geneval.sh
    │               ├── infer_run_inference.sh
    │               ├── infer_run_inference_geneval.sh
    │               ├── infer_run_inference_geneval_diffusers.sh
    │               ├── inference.py
    │               ├── inference_dpg.py
    │               ├── inference_geneval.py
    │               ├── inference_geneval_diffusers.py
    │               ├── inference_image_reward.py
    │               ├── interface.py
    │               └── style.css
    │           ├── tests
    │               └── bash
    │               │   ├── entry.sh
    │               │   ├── test_inference.sh
    │               │   └── test_training_1epoch.sh
    │           ├── tools
    │               ├── __init__.py
    │               ├── convert_py_to_yaml.py
    │               ├── convert_sana_pag_to_diffusers.py
    │               ├── convert_sana_to_diffusers.py
    │               ├── download.py
    │               └── metrics
    │               │   ├── clip-score
    │               │       ├── .gitignore
    │               │       ├── LICENSE
    │               │       ├── README.md
    │               │       ├── clip_score.py
    │               │       ├── setup.py
    │               │       └── src
    │               │       │   └── clip_score
    │               │       │       ├── __init__.py
    │               │       │       ├── __main__.py
    │               │       │       └── clip_score.py
    │               │   ├── compute_clipscore.sh
    │               │   ├── compute_dpg.sh
    │               │   ├── compute_fid_embedding.sh
    │               │   ├── compute_geneval.sh
    │               │   ├── compute_imagereward.sh
    │               │   ├── dpg_bench
    │               │       ├── compute_dpg_bench.py
    │               │       ├── dpg_bench.csv
    │               │       └── requirements.txt
    │               │   ├── geneval
    │               │       ├── LICENSE
    │               │       ├── README.md
    │               │       ├── annotations
    │               │       │   ├── annotations_clip.csv
    │               │       │   ├── annotations_if-xl.csv
    │               │       │   ├── annotations_sdv2.csv
    │               │       │   └── mturk_hit_template.html
    │               │       ├── environment.yml
    │               │       ├── evaluation
    │               │       │   ├── download_models.sh
    │               │       │   ├── evaluate_images.py
    │               │       │   ├── object_names.txt
    │               │       │   └── summary_scores.py
    │               │       ├── generation
    │               │       │   └── diffusers_generate.py
    │               │       ├── images
    │               │       │   └── geneval_figure_1.png
    │               │       └── prompts
    │               │       │   ├── create_prompts.py
    │               │       │   ├── evaluation_metadata.jsonl
    │               │       │   ├── generation_prompts.txt
    │               │       │   └── object_names.txt
    │               │   ├── image_reward
    │               │       ├── benchmark-prompts-dict.json
    │               │       └── compute_image_reward.py
    │               │   ├── pytorch-fid
    │               │       ├── .gitignore
    │               │       ├── CHANGELOG.md
    │               │       ├── LICENSE
    │               │       ├── README.md
    │               │       ├── compute_fid.py
    │               │       ├── noxfile.py
    │               │       ├── setup.cfg
    │               │       ├── setup.py
    │               │       ├── src
    │               │       │   └── pytorch_fid
    │               │       │   │   ├── __init__.py
    │               │       │   │   ├── __main__.py
    │               │       │   │   ├── fid_score.py
    │               │       │   │   └── inception.py
    │               │       └── tests
    │               │       │   └── test_fid_score.py
    │               │   └── utils.py
    │           └── train_scripts
    │               ├── train.py
    │               └── train.sh
├── segment-anything
    ├── README.md
    ├── config.yaml
    └── model
    │   ├── __init__.py
    │   └── model.py
├── sesame-csm-1b
    ├── config.yaml
    └── model
    │   ├── __init__.py
    │   └── model.py
├── stable-diffusion
    ├── dreamshaper-lcm
    │   ├── README.md
    │   ├── config.yaml
    │   └── model
    │   │   ├── __init__.py
    │   │   └── model.py
    ├── playground-v2-trt
    │   ├── README.md
    │   ├── config.yaml
    │   ├── model
    │   │   ├── __init__.py
    │   │   └── model.py
    │   ├── packages
    │   │   └── diffusion
    │   │   │   ├── trtclip.py
    │   │   │   └── trtunet.py
    │   └── show.py
    ├── sd-textual-inversion
    │   ├── README.md
    │   ├── config.yaml
    │   ├── data
    │   │   └── LulaCipher.bin
    │   └── model
    │   │   ├── __init__.py
    │   │   └── model.py
    ├── sd-turbo
    │   ├── README.md
    │   ├── config.yaml
    │   └── model
    │   │   ├── __init__.py
    │   │   └── model.py
    ├── sdxl-controlnet-canny
    │   ├── README.md
    │   ├── baseten-logo.gif
    │   ├── config.yaml
    │   └── model
    │   │   ├── __init__.py
    │   │   └── model.py
    ├── sdxl-controlnet-depth
    │   ├── README.md
    │   ├── config.yaml
    │   └── model
    │   │   ├── __init__.py
    │   │   └── model.py
    ├── sdxl-controlnet
    │   ├── README.md
    │   ├── baseten-logo.gif
    │   ├── config.yaml
    │   └── model
    │   │   ├── __init__.py
    │   │   └── model.py
    ├── sdxl-lightning
    │   ├── README.md
    │   ├── config.yaml
    │   └── model
    │   │   ├── __init__.py
    │   │   └── model.py
    ├── sdxl-lora-swapping
    │   ├── README.md
    │   ├── config.yaml
    │   └── model
    │   │   ├── __init__.py
    │   │   └── model.py
    ├── sdxl-lora
    │   ├── README.md
    │   ├── config.yaml
    │   └── model
    │   │   ├── __init__.py
    │   │   └── model.py
    ├── sdxl-turbo
    │   ├── README.md
    │   ├── config.yaml
    │   └── model
    │   │   ├── __init__.py
    │   │   └── model.py
    ├── stable-diffusion-3-medium
    │   ├── README.md
    │   ├── config.yaml
    │   └── model
    │   │   ├── __init__.py
    │   │   └── model.py
    ├── stable-diffusion-inpainting-trt
    │   ├── README.md
    │   ├── config.yaml
    │   ├── model
    │   │   ├── __init__.py
    │   │   └── model.py
    │   ├── packages
    │   │   └── helpers
    │   │   │   ├── inpaint_pipeline.py
    │   │   │   ├── models.py
    │   │   │   ├── stable_diffusion_pipeline.py
    │   │   │   └── utilities.py
    │   └── requirements.txt
    ├── stable-diffusion-xl-1.0-trt-h100
    │   ├── README.md
    │   ├── config.yaml
    │   ├── model
    │   │   ├── __init__.py
    │   │   └── model.py
    │   ├── packages
    │   │   └── diffusion
    │   │   │   ├── trtclip.py
    │   │   │   └── trtunet.py
    │   └── show.py
    ├── stable-diffusion-xl-1.0-trt
    │   ├── README.md
    │   ├── config.yaml
    │   ├── model
    │   │   ├── __init__.py
    │   │   └── model.py
    │   ├── packages
    │   │   └── diffusion
    │   │   │   ├── trtclip.py
    │   │   │   └── trtunet.py
    │   └── show.py
    ├── stable-diffusion-xl-1.0
    │   ├── README.md
    │   ├── config.yaml
    │   ├── model
    │   │   ├── __init__.py
    │   │   └── model.py
    │   └── show.py
    ├── stable-diffusion
    │   ├── README.md
    │   ├── config.yaml
    │   ├── data
    │   │   ├── model_index.json
    │   │   ├── scheduler
    │   │   │   └── scheduler_config.json
    │   │   ├── text_encoder
    │   │   │   └── config.json
    │   │   ├── tokenizer
    │   │   │   ├── merges.txt
    │   │   │   ├── special_tokens_map.json
    │   │   │   ├── tokenizer_config.json
    │   │   │   └── vocab.json
    │   │   ├── unet
    │   │   │   └── config.json
    │   │   └── vae
    │   │   │   └── config.json
    │   ├── model
    │   │   ├── __init__.py
    │   │   └── model.py
    │   └── show.py
    └── stable-video-diffusion
    │   ├── README.md
    │   ├── config.yaml
    │   ├── model
    │       ├── __init__.py
    │       ├── helper.py
    │       ├── model.py
    │       └── scripts
    │       │   ├── __init__.py
    │       │   ├── sampling
    │       │       ├── configs
    │       │       │   ├── svd.yaml
    │       │       │   ├── svd_image_decoder.yaml
    │       │       │   ├── svd_xt.yaml
    │       │       │   └── svd_xt_image_decoder.yaml
    │       │       └── simple_video_sample.py
    │       │   ├── tests
    │       │       └── attention.py
    │       │   └── util
    │       │       ├── __init__.py
    │       │       └── detection
    │       │           ├── __init__.py
    │       │           ├── nsfw_and_watermark_dectection.py
    │       │           ├── p_head_v1.npz
    │       │           └── w_head_v1.npz
    │   └── sample_images
    │       ├── cheetah.jpeg
    │       └── racecar.jpeg
├── templates
    ├── README.md
    ├── faster-whisper-truss
    │   ├── config.yaml
    │   └── model
    │   │   ├── __init__.py
    │   │   └── model.py
    ├── generate.py
    ├── generate.yaml
    ├── transformers-openai-compatible
    │   ├── config.yaml
    │   └── model
    │   │   ├── __init__.py
    │   │   └── model.py
    └── trt-llm
    │   ├── TRT-LLM-README.md
    │   ├── config.yaml
    │   ├── data
    │       └── .gitattributes
    │   ├── model
    │       ├── __init__.py
    │       └── model.py
    │   └── packages
    │       ├── client.py
    │       ├── inflight_batcher_llm
    │           ├── ensemble
    │           │   └── config.pbtxt.jinja
    │           ├── postprocessing
    │           │   ├── 1
    │           │   │   └── model.py
    │           │   └── config.pbtxt.jinja
    │           ├── preprocessing
    │           │   ├── 1
    │           │   │   └── model.py
    │           │   └── config.pbtxt.jinja
    │           └── tensorrt_llm
    │           │   └── config.pbtxt
    │       └── utils.py
├── text-embeddings-inference
    ├── README.md
    └── config.yaml
├── trt-llm-engine-builder-templates
    ├── llama-3_1-70b-instruct
    │   ├── high_throughput
    │   │   ├── README.md
    │   │   ├── config.yaml
    │   │   └── model
    │   │   │   └── __init__.py
    │   ├── large_context
    │   │   ├── README.md
    │   │   ├── config.yaml
    │   │   └── model
    │   │   │   └── __init__.py
    │   └── low_ttft
    │   │   ├── README.md
    │   │   ├── config.yaml
    │   │   └── model
    │   │       └── __init__.py
    └── llama-3_1-8b-instruct
    │   ├── high_throughput
    │       ├── README.md
    │       ├── config.yaml
    │       └── model
    │       │   └── __init__.py
    │   ├── large_context
    │       ├── README.md
    │       ├── config.yaml
    │       └── model
    │       │   └── __init__.py
    │   └── low_ttft
    │       ├── README.md
    │       ├── config.yaml
    │       └── model
    │           └── __init__.py
├── ultravox
    ├── README.md
    ├── config.yaml
    └── model
    │   ├── __init__.py
    │   └── model.py
├── vllm
    ├── README.md
    ├── config.yaml
    └── model
    │   ├── __init__.py
    │   ├── helper.py
    │   └── model.py
├── whisper
    ├── faster-whisper-small
    │   ├── README.md
    │   ├── config.yaml
    │   └── model
    │   │   ├── __init__.py
    │   │   └── model.py
    ├── faster-whisper-v2
    │   ├── config.yaml
    │   └── model
    │   │   ├── __init__.py
    │   │   └── model.py
    ├── faster-whisper-v3
    │   ├── config.yaml
    │   └── model
    │   │   ├── __init__.py
    │   │   └── model.py
    ├── whisper-streaming
    │   ├── README.md
    │   ├── config.yaml
    │   ├── model
    │   │   ├── __init__.py
    │   │   └── model.py
    │   ├── packages
    │   │   └── whisper_streaming
    │   │   │   ├── line_packet.py
    │   │   │   ├── whisper_online.py
    │   │   │   └── whisper_online_server.py
    │   └── requirements.txt
    ├── whisper-torchserve
    │   ├── README.md
    │   ├── config.yaml
    │   ├── data
    │   │   └── config.properties
    │   └── model
    │   │   ├── __init__.py
    │   │   └── model.py
    ├── whisper-truss
    │   ├── LICENSE
    │   ├── README.md
    │   ├── config.yaml
    │   ├── data
    │   │   └── .gitkeep
    │   └── model
    │   │   ├── __init__.py
    │   │   └── model.py
    ├── whisper-v3-truss-base64
    │   ├── README.md
    │   ├── config.yaml
    │   └── model
    │   │   ├── __init__.py
    │   │   └── model.py
    ├── whisper-v3-truss
    │   ├── README.md
    │   ├── config.yaml
    │   └── model
    │   │   ├── __init__.py
    │   │   └── model.py
    ├── whisper-v3-turbo
    │   ├── README.md
    │   └── config.yaml
    └── whisperx-truss
    │   ├── README.md
    │   ├── config.yaml
    │   └── model
    │       ├── __init__.py
    │       └── model.py
├── xtts-streaming
    ├── README.md
    ├── config.yaml
    ├── model
    │   ├── __init__.py
    │   └── model.py
    └── requirements.txt
└── xtts-v2-truss
    ├── README.md
    ├── config.yaml
    └── model
        ├── __init__.py
        └── model.py


/.github/workflows/pr.yml:
--------------------------------------------------------------------------------
 1 | name: PR
 2 | 
 3 | on:
 4 |   pull_request:
 5 | 
 6 | concurrency:
 7 |   group: pr-${{ github.ref_name }}
 8 |   cancel-in-progress: true
 9 | 
10 | jobs:
11 |   lint:
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |       - uses: actions/checkout@v4
15 |       - uses: ./.github/actions/setup-python/
16 |       - run: poetry install
17 |       - run: poetry run pre-commit run --all-files
18 | 


--------------------------------------------------------------------------------
/.isort.cfg:
--------------------------------------------------------------------------------
1 | [settings]
2 | profile=black
3 | 


--------------------------------------------------------------------------------
/.tool-versions:
--------------------------------------------------------------------------------
1 | python 3.11.11
2 | poetry 1.8.4
3 | 


--------------------------------------------------------------------------------
/01-getting-started-bert/doc.yaml:
--------------------------------------------------------------------------------
1 | title: "Getting Started"
2 | description: "Building your first Truss"
3 | files:
4 |   - model/model.py
5 |   - config.yaml
6 | 


--------------------------------------------------------------------------------
/01-getting-started-bert/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/01-getting-started-bert/model/__init__.py


--------------------------------------------------------------------------------
/02-llm/doc.yaml:
--------------------------------------------------------------------------------
1 | title: "LLM"
2 | description: "Building an LLM"
3 | files:
4 |   - model/model.py
5 |   - config.yaml
6 | 


--------------------------------------------------------------------------------
/02-llm/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/02-llm/model/__init__.py


--------------------------------------------------------------------------------
/03-llm-with-streaming/config.yaml:
--------------------------------------------------------------------------------
 1 | # # Setting up the config.yaml
 2 | #
 3 | # Running Falcon 7B requires torch, transformers,
 4 | # and a few other related libraries.
 5 | model_name: "LLM with Streaming"
 6 | model_metadata:
 7 |     example_model_input: {"prompt": "what is the meaning of life"}
 8 | requirements:
 9 | - torch==2.0.1
10 | - peft==0.4.0
11 | - scipy==1.11.1
12 | - sentencepiece==0.1.99
13 | - accelerate==0.21.0
14 | - bitsandbytes==0.41.1
15 | - einops==0.6.1
16 | - transformers==4.31.0
17 | - numpy==1.26.4
18 | # ## Configure resources for Falcon
19 | #
20 | # Note that we need an A10G to run this model.
21 | resources:
22 |   cpu: "3"
23 |   memory: 14Gi
24 |   use_gpu: true
25 |   accelerator: A10G
26 | 


--------------------------------------------------------------------------------
/03-llm-with-streaming/doc.yaml:
--------------------------------------------------------------------------------
1 | title: "LLM with Streaming"
2 | description: "Building an LLM with streaming output"
3 | files:
4 |   - model/model.py
5 |   - config.yaml
6 | 


--------------------------------------------------------------------------------
/03-llm-with-streaming/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/03-llm-with-streaming/model/__init__.py


--------------------------------------------------------------------------------
/04-image-generation/doc.yaml:
--------------------------------------------------------------------------------
1 | title: "Text-to-image"
2 | description: "Building a text-to-image model with SDXL"
3 | files:
4 |   - model/model.py
5 |   - config.yaml
6 | 


--------------------------------------------------------------------------------
/04-image-generation/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/04-image-generation/model/__init__.py


--------------------------------------------------------------------------------
/05-speech-to-text/config.yaml:
--------------------------------------------------------------------------------
 1 | environment_variables: {}
 2 | model_metadata:
 3 |   example_model_input: {"url": "https://cdn.baseten.co/docs/production/Gettysburg.mp3"}
 4 | model_name: Whisper
 5 | python_version: py39
 6 | requirements:
 7 | - openai-whisper==20230918
 8 | - torch==2.0.1
 9 | - numpy==1.26.4
10 | resources:
11 |   cpu: "4"
12 |   memory: 16Gi
13 |   use_gpu: true
14 |   accelerator: A10G
15 | secrets: {}
16 | system_packages:
17 | - ffmpeg
18 | external_data:
19 |   - url: https://baseten-public.s3.us-west-2.amazonaws.com/models/whisper/small.pt
20 |     local_data_path: models/small.pt
21 | 


--------------------------------------------------------------------------------
/05-speech-to-text/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/05-speech-to-text/model/__init__.py


--------------------------------------------------------------------------------
/06-high-performance-cached-weights/doc.yaml:
--------------------------------------------------------------------------------
1 | title: "Fast Cold Starts with Cached Weights"
2 | description: "Deploy a language model, with the model weights cached at build time"
3 | files:
4 |   - model/model.py
5 |   - config.yaml
6 | 


--------------------------------------------------------------------------------
/06-high-performance-cached-weights/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/06-high-performance-cached-weights/model/__init__.py


--------------------------------------------------------------------------------
/07-high-performance-dynamic-batching/.gitignore:
--------------------------------------------------------------------------------
1 | .venv/
2 | payload.json
3 | .vscode
4 | 


--------------------------------------------------------------------------------
/07-high-performance-dynamic-batching/.truss_ignore:
--------------------------------------------------------------------------------
1 | .venv/
2 | payload.json
3 | .vscode
4 | 


--------------------------------------------------------------------------------
/07-high-performance-dynamic-batching/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/07-high-performance-dynamic-batching/model/__init__.py


--------------------------------------------------------------------------------
/07-high-performance-dynamic-batching/packages/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/07-high-performance-dynamic-batching/packages/__init__.py


--------------------------------------------------------------------------------
/09-private-huggingface/doc.yaml:
--------------------------------------------------------------------------------
1 | title: "Private Hugging Face Model"
2 | description: "Load a model that requires authentication with Hugging Face"
3 | files:
4 |   - model/model.py
5 |   - config.yaml
6 | 


--------------------------------------------------------------------------------
/09-private-huggingface/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/09-private-huggingface/model/__init__.py


--------------------------------------------------------------------------------
/10-using-system-packages/doc.yaml:
--------------------------------------------------------------------------------
1 | title: "Model with system packages"
2 | description: "Deploy a model with both Python and system dependencies"
3 | files:
4 |   - model/model.py
5 |   - config.yaml
6 | 


--------------------------------------------------------------------------------
/10-using-system-packages/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/10-using-system-packages/model/__init__.py


--------------------------------------------------------------------------------
/11-embeddings-reranker-classification-tensorrt/BEI-baai-bge-en-icl-embedding-fp8/config.yaml:
--------------------------------------------------------------------------------
 1 | # this file was autogenerated by `generate_templates.py` - please do change via template only
 2 | model_metadata:
 3 |   example_model_input:
 4 |     encoding_format: float
 5 |     input: text string
 6 |     model: model
 7 | model_name: BEI-baai-bge-en-icl-embedding-fp8-truss-example
 8 | python_version: py39
 9 | resources:
10 |   accelerator: H100
11 |   cpu: '1'
12 |   memory: 10Gi
13 |   use_gpu: true
14 | trt_llm:
15 |   build:
16 |     base_model: encoder
17 |     checkpoint_repository:
18 |       repo: BAAI/bge-en-icl
19 |       revision: main
20 |       source: HF
21 |     max_num_tokens: 32768
22 |     num_builder_gpus: 2
23 |     quantization_type: fp8
24 |   runtime:
25 |     webserver_default_route: /v1/embeddings
26 | 


--------------------------------------------------------------------------------
/11-embeddings-reranker-classification-tensorrt/BEI-baai-bge-large-en-v1.5-embedding/config.yaml:
--------------------------------------------------------------------------------
 1 | # this file was autogenerated by `generate_templates.py` - please do change via template only
 2 | model_metadata:
 3 |   example_model_input:
 4 |     encoding_format: float
 5 |     input: text string
 6 |     model: model
 7 | model_name: BEI-baai-bge-large-en-v1.5-embedding-truss-example
 8 | python_version: py39
 9 | resources:
10 |   accelerator: L4
11 |   cpu: '1'
12 |   memory: 10Gi
13 |   use_gpu: true
14 | trt_llm:
15 |   build:
16 |     base_model: encoder
17 |     checkpoint_repository:
18 |       repo: BAAI/bge-large-en-v1.5
19 |       revision: main
20 |       source: HF
21 |     max_num_tokens: 16384
22 |   runtime:
23 |     webserver_default_route: /v1/embeddings
24 | 


--------------------------------------------------------------------------------
/11-embeddings-reranker-classification-tensorrt/BEI-baai-bge-m3-embedding-dense/config.yaml:
--------------------------------------------------------------------------------
 1 | # this file was autogenerated by `generate_templates.py` - please do change via template only
 2 | model_metadata:
 3 |   example_model_input:
 4 |     encoding_format: float
 5 |     input: text string
 6 |     model: model
 7 | model_name: BEI-baai-bge-m3-embedding-dense-truss-example
 8 | python_version: py39
 9 | resources:
10 |   accelerator: H100
11 |   cpu: '1'
12 |   memory: 10Gi
13 |   use_gpu: true
14 | trt_llm:
15 |   build:
16 |     base_model: encoder
17 |     checkpoint_repository:
18 |       repo: BAAI/bge-m3
19 |       revision: main
20 |       source: HF
21 |     max_num_tokens: 16384
22 |   runtime:
23 |     webserver_default_route: /v1/embeddings
24 | 


--------------------------------------------------------------------------------
/11-embeddings-reranker-classification-tensorrt/BEI-baai-bge-multilingual-gemma2-multilingual-embedding/config.yaml:
--------------------------------------------------------------------------------
 1 | # this file was autogenerated by `generate_templates.py` - please do change via template only
 2 | model_metadata:
 3 |   example_model_input:
 4 |     encoding_format: float
 5 |     input: text string
 6 |     model: model
 7 | model_name: BEI-baai-bge-multilingual-gemma2-multilingual-embedding-truss-example
 8 | python_version: py39
 9 | resources:
10 |   accelerator: H100_40GB
11 |   cpu: '1'
12 |   memory: 10Gi
13 |   use_gpu: true
14 | trt_llm:
15 |   build:
16 |     base_model: encoder
17 |     checkpoint_repository:
18 |       repo: BAAI/bge-multilingual-gemma2
19 |       revision: main
20 |       source: HF
21 |     max_num_tokens: 16384
22 |   runtime:
23 |     webserver_default_route: /v1/embeddings
24 | 


--------------------------------------------------------------------------------
/11-embeddings-reranker-classification-tensorrt/BEI-baai-bge-reranker-large/config.yaml:
--------------------------------------------------------------------------------
 1 | # this file was autogenerated by `generate_templates.py` - please do change via template only
 2 | model_metadata:
 3 |   example_model_input:
 4 |     query: What is Baseten?
 5 |     raw_scores: true
 6 |     return_text: true
 7 |     texts:
 8 |     - Deep Learning is ...
 9 |     - Baseten is a fast inference provider
10 |     truncate: true
11 |     truncation_direction: Right
12 | model_name: BEI-baai-bge-reranker-large-truss-example
13 | python_version: py39
14 | resources:
15 |   accelerator: L4
16 |   cpu: '1'
17 |   memory: 10Gi
18 |   use_gpu: true
19 | trt_llm:
20 |   build:
21 |     base_model: encoder
22 |     checkpoint_repository:
23 |       repo: BAAI/bge-reranker-large
24 |       revision: main
25 |       source: HF
26 |     max_num_tokens: 16384
27 |   runtime:
28 |     webserver_default_route: /rerank
29 | 


--------------------------------------------------------------------------------
/11-embeddings-reranker-classification-tensorrt/BEI-intfloat-e5-mistral-7b-instruct-embedding-fp8/config.yaml:
--------------------------------------------------------------------------------
 1 | # this file was autogenerated by `generate_templates.py` - please do change via template only
 2 | model_metadata:
 3 |   example_model_input:
 4 |     encoding_format: float
 5 |     input: text string
 6 |     model: model
 7 | model_name: BEI-intfloat-e5-mistral-7b-instruct-embedding-fp8-truss-example
 8 | python_version: py39
 9 | resources:
10 |   accelerator: H100
11 |   cpu: '1'
12 |   memory: 10Gi
13 |   use_gpu: true
14 | trt_llm:
15 |   build:
16 |     base_model: encoder
17 |     checkpoint_repository:
18 |       repo: intfloat/e5-mistral-7b-instruct
19 |       revision: main
20 |       source: HF
21 |     max_num_tokens: 32768
22 |     num_builder_gpus: 2
23 |     quantization_type: fp8
24 |   runtime:
25 |     webserver_default_route: /v1/embeddings
26 | 


--------------------------------------------------------------------------------
/11-embeddings-reranker-classification-tensorrt/BEI-mixedbread-ai-mxbai-embed-large-v1-embedding/config.yaml:
--------------------------------------------------------------------------------
 1 | # this file was autogenerated by `generate_templates.py` - please do change via template only
 2 | model_metadata:
 3 |   example_model_input:
 4 |     encoding_format: float
 5 |     input: text string
 6 |     model: model
 7 | model_name: BEI-mixedbread-ai-mxbai-embed-large-v1-embedding-truss-example
 8 | python_version: py39
 9 | resources:
10 |   accelerator: L4
11 |   cpu: '1'
12 |   memory: 10Gi
13 |   use_gpu: true
14 | trt_llm:
15 |   build:
16 |     base_model: encoder
17 |     checkpoint_repository:
18 |       repo: mixedbread-ai/mxbai-embed-large-v1
19 |       revision: main
20 |       source: HF
21 |     max_num_tokens: 16384
22 |   runtime:
23 |     webserver_default_route: /v1/embeddings
24 | 


--------------------------------------------------------------------------------
/11-embeddings-reranker-classification-tensorrt/BEI-nomic-ai-nomic-embed-code-fp8/config.yaml:
--------------------------------------------------------------------------------
 1 | # this file was autogenerated by `generate_templates.py` - please do change via template only
 2 | model_metadata:
 3 |   example_model_input:
 4 |     encoding_format: float
 5 |     input: text string
 6 |     model: model
 7 | model_name: BEI-nomic-ai-nomic-embed-code-fp8-truss-example
 8 | python_version: py39
 9 | resources:
10 |   accelerator: H100_40GB
11 |   cpu: '1'
12 |   memory: 10Gi
13 |   use_gpu: true
14 | trt_llm:
15 |   build:
16 |     base_model: encoder
17 |     checkpoint_repository:
18 |       repo: nomic-ai/nomic-embed-code
19 |       revision: main
20 |       source: HF
21 |     max_num_tokens: 32768
22 |     num_builder_gpus: 1
23 |     quantization_type: fp8
24 |   runtime:
25 |     webserver_default_route: /v1/embeddings
26 | 


--------------------------------------------------------------------------------
/11-embeddings-reranker-classification-tensorrt/BEI-qwen-qwen3-embedding-0.6b-fp8/config.yaml:
--------------------------------------------------------------------------------
 1 | # this file was autogenerated by `generate_templates.py` - please do change via template only
 2 | model_metadata:
 3 |   example_model_input:
 4 |     encoding_format: float
 5 |     input: text string
 6 |     model: model
 7 | model_name: BEI-qwen-qwen3-embedding-0.6b-fp8-truss-example
 8 | python_version: py39
 9 | resources:
10 |   accelerator: L4
11 |   cpu: '1'
12 |   memory: 10Gi
13 |   use_gpu: true
14 | trt_llm:
15 |   build:
16 |     base_model: encoder
17 |     checkpoint_repository:
18 |       repo: michaelfeil/Qwen3-Embedding-0.6B-auto
19 |       revision: main
20 |       source: HF
21 |     max_num_tokens: 32768
22 |     num_builder_gpus: 4
23 |     quantization_type: fp8
24 |   runtime:
25 |     webserver_default_route: /v1/embeddings
26 | 


--------------------------------------------------------------------------------
/11-embeddings-reranker-classification-tensorrt/BEI-qwen-qwen3-embedding-4b-fp8/config.yaml:
--------------------------------------------------------------------------------
 1 | # this file was autogenerated by `generate_templates.py` - please do change via template only
 2 | model_metadata:
 3 |   example_model_input:
 4 |     encoding_format: float
 5 |     input: text string
 6 |     model: model
 7 | model_name: BEI-qwen-qwen3-embedding-4b-fp8-truss-example
 8 | python_version: py39
 9 | resources:
10 |   accelerator: H100_40GB
11 |   cpu: '1'
12 |   memory: 10Gi
13 |   use_gpu: true
14 | trt_llm:
15 |   build:
16 |     base_model: encoder
17 |     checkpoint_repository:
18 |       repo: michaelfeil/Qwen3-Embedding-4B-auto
19 |       revision: main
20 |       source: HF
21 |     max_num_tokens: 40960
22 |     num_builder_gpus: 1
23 |     quantization_type: fp8
24 |   runtime:
25 |     webserver_default_route: /v1/embeddings
26 | 


--------------------------------------------------------------------------------
/11-embeddings-reranker-classification-tensorrt/BEI-qwen-qwen3-embedding-8b-fp8/config.yaml:
--------------------------------------------------------------------------------
 1 | # this file was autogenerated by `generate_templates.py` - please do change via template only
 2 | model_metadata:
 3 |   example_model_input:
 4 |     encoding_format: float
 5 |     input: text string
 6 |     model: model
 7 | model_name: BEI-qwen-qwen3-embedding-8b-fp8-truss-example
 8 | python_version: py39
 9 | resources:
10 |   accelerator: H100_40GB
11 |   cpu: '1'
12 |   memory: 10Gi
13 |   use_gpu: true
14 | trt_llm:
15 |   build:
16 |     base_model: encoder
17 |     checkpoint_repository:
18 |       repo: michaelfeil/Qwen3-Embedding-8B-auto
19 |       revision: main
20 |       source: HF
21 |     max_num_tokens: 40960
22 |     num_builder_gpus: 1
23 |     quantization_type: fp8
24 |   runtime:
25 |     webserver_default_route: /v1/embeddings
26 | 


--------------------------------------------------------------------------------
/11-embeddings-reranker-classification-tensorrt/BEI-qwen-qwen3-reranker-0.6b-fp8/config.yaml:
--------------------------------------------------------------------------------
 1 | # this file was autogenerated by `generate_templates.py` - please do change via template only
 2 | model_metadata:
 3 |   example_model_input:
 4 |     encoding_format: float
 5 |     input: text string
 6 |     model: model
 7 | model_name: BEI-qwen-qwen3-reranker-0.6b-fp8-truss-example
 8 | python_version: py39
 9 | resources:
10 |   accelerator: L4
11 |   cpu: '1'
12 |   memory: 10Gi
13 |   use_gpu: true
14 | trt_llm:
15 |   build:
16 |     base_model: encoder
17 |     checkpoint_repository:
18 |       repo: michaelfeil/Qwen3-Reranker-0.6B-seq
19 |       revision: main
20 |       source: HF
21 |     max_num_tokens: 40960
22 |     num_builder_gpus: 4
23 |     quantization_type: fp8
24 |   runtime:
25 |     webserver_default_route: /v1/embeddings
26 | 


--------------------------------------------------------------------------------
/11-embeddings-reranker-classification-tensorrt/BEI-qwen-qwen3-reranker-4b-fp8/config.yaml:
--------------------------------------------------------------------------------
 1 | # this file was autogenerated by `generate_templates.py` - please do change via template only
 2 | model_metadata:
 3 |   example_model_input:
 4 |     encoding_format: float
 5 |     input: text string
 6 |     model: model
 7 | model_name: BEI-qwen-qwen3-reranker-4b-fp8-truss-example
 8 | python_version: py39
 9 | resources:
10 |   accelerator: H100_40GB
11 |   cpu: '1'
12 |   memory: 10Gi
13 |   use_gpu: true
14 | trt_llm:
15 |   build:
16 |     base_model: encoder
17 |     checkpoint_repository:
18 |       repo: michaelfeil/Qwen3-Reranker-4B-seq
19 |       revision: main
20 |       source: HF
21 |     max_num_tokens: 40960
22 |     num_builder_gpus: 1
23 |     quantization_type: fp8
24 |   runtime:
25 |     webserver_default_route: /v1/embeddings
26 | 


--------------------------------------------------------------------------------
/11-embeddings-reranker-classification-tensorrt/BEI-qwen-qwen3-reranker-8b-fp8/config.yaml:
--------------------------------------------------------------------------------
 1 | # this file was autogenerated by `generate_templates.py` - please do change via template only
 2 | model_metadata:
 3 |   example_model_input:
 4 |     encoding_format: float
 5 |     input: text string
 6 |     model: model
 7 | model_name: BEI-qwen-qwen3-reranker-8b-fp8-truss-example
 8 | python_version: py39
 9 | resources:
10 |   accelerator: H100_40GB
11 |   cpu: '1'
12 |   memory: 10Gi
13 |   use_gpu: true
14 | trt_llm:
15 |   build:
16 |     base_model: encoder
17 |     checkpoint_repository:
18 |       repo: michaelfeil/Qwen3-Reranker-8B-seq
19 |       revision: main
20 |       source: HF
21 |     max_num_tokens: 40960
22 |     num_builder_gpus: 1
23 |     quantization_type: fp8
24 |   runtime:
25 |     webserver_default_route: /v1/embeddings
26 | 


--------------------------------------------------------------------------------
/11-embeddings-reranker-classification-tensorrt/BEI-salesforce-sfr-embedding-mistral-fp8/config.yaml:
--------------------------------------------------------------------------------
 1 | # this file was autogenerated by `generate_templates.py` - please do change via template only
 2 | model_metadata:
 3 |   example_model_input:
 4 |     encoding_format: float
 5 |     input: text string
 6 |     model: model
 7 | model_name: BEI-salesforce-sfr-embedding-mistral-fp8-truss-example
 8 | python_version: py39
 9 | resources:
10 |   accelerator: H100_40GB
11 |   cpu: '1'
12 |   memory: 10Gi
13 |   use_gpu: true
14 | trt_llm:
15 |   build:
16 |     base_model: encoder
17 |     checkpoint_repository:
18 |       repo: Salesforce/SFR-Embedding-Mistral
19 |       revision: main
20 |       source: HF
21 |     max_num_tokens: 32768
22 |     num_builder_gpus: 1
23 |     quantization_type: fp8
24 |   runtime:
25 |     webserver_default_route: /v1/embeddings
26 | 


--------------------------------------------------------------------------------
/11-embeddings-reranker-classification-tensorrt/BEI-samlowe-roberta-base-go_emotions-classification/config.yaml:
--------------------------------------------------------------------------------
 1 | # this file was autogenerated by `generate_templates.py` - please do change via template only
 2 | model_metadata:
 3 |   example_model_input:
 4 |     inputs:
 5 |     - - Baseten is a fast inference provider
 6 |     - - Classify this separately.
 7 |     raw_scores: true
 8 |     truncate: true
 9 |     truncation_direction: Right
10 | model_name: BEI-samlowe-roberta-base-go_emotions-classification-truss-example
11 | python_version: py39
12 | resources:
13 |   accelerator: L4
14 |   cpu: '1'
15 |   memory: 10Gi
16 |   use_gpu: true
17 | trt_llm:
18 |   build:
19 |     base_model: encoder
20 |     checkpoint_repository:
21 |       repo: SamLowe/roberta-base-go_emotions
22 |       revision: main
23 |       source: HF
24 |     max_num_tokens: 16384
25 |   runtime:
26 |     webserver_default_route: /predict
27 | 


--------------------------------------------------------------------------------
/11-embeddings-reranker-classification-tensorrt/BEI-snowflake-snowflake-arctic-embed-l-v2.0/config.yaml:
--------------------------------------------------------------------------------
 1 | # this file was autogenerated by `generate_templates.py` - please do change via template only
 2 | model_metadata:
 3 |   example_model_input:
 4 |     encoding_format: float
 5 |     input: text string
 6 |     model: model
 7 | model_name: BEI-snowflake-snowflake-arctic-embed-l-v2.0-truss-example
 8 | python_version: py39
 9 | resources:
10 |   accelerator: H100
11 |   cpu: '1'
12 |   memory: 10Gi
13 |   use_gpu: true
14 | trt_llm:
15 |   build:
16 |     base_model: encoder
17 |     checkpoint_repository:
18 |       repo: Snowflake/snowflake-arctic-embed-l-v2.0
19 |       revision: main
20 |       source: HF
21 |     max_num_tokens: 16384
22 |   runtime:
23 |     webserver_default_route: /v1/embeddings
24 | 


--------------------------------------------------------------------------------
/11-embeddings-reranker-classification-tensorrt/BEI-whereisai-uae-large-v1-embedding/config.yaml:
--------------------------------------------------------------------------------
 1 | # this file was autogenerated by `generate_templates.py` - please do change via template only
 2 | model_metadata:
 3 |   example_model_input:
 4 |     encoding_format: float
 5 |     input: text string
 6 |     model: model
 7 | model_name: BEI-whereisai-uae-large-v1-embedding-truss-example
 8 | python_version: py39
 9 | resources:
10 |   accelerator: L4
11 |   cpu: '1'
12 |   memory: 10Gi
13 |   use_gpu: true
14 | trt_llm:
15 |   build:
16 |     base_model: encoder
17 |     checkpoint_repository:
18 |       repo: WhereIsAI/UAE-Large-V1
19 |       revision: main
20 |       source: HF
21 |     max_num_tokens: 16384
22 |   runtime:
23 |     webserver_default_route: /v1/embeddings
24 | 


--------------------------------------------------------------------------------
/11-embeddings-reranker-classification-tensorrt/templating/.internal_tei/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG TAG=1.7.1
 2 | # this image builds a truss-compatible image with the text-embeddings-inference image as base
 3 | # it mainly requires python3
 4 | # optional, git and git-lfs are installed to allow for easy cloning of the huggingface model repos.
 5 | FROM ghcr.io/huggingface/text-embeddings-inference:${TAG}
 6 | RUN apt-get update && apt-get install -y python3 python3-pip git git-lfs
 7 | RUN git lfs install
 8 | ENTRYPOINT ["text-embeddings-router"]
 9 | CMD ["--json-output"]
10 | 


--------------------------------------------------------------------------------
/11-embeddings-reranker-classification-tensorrt/templating/.internal_tei/roll_out_docker.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | # Map architectures to prefixes
 5 | declare -A ARCHES=(
 6 |   ["cpu"]="cpu-"
 7 |   ["turing"]="turing-"
 8 |   ["ampere80"]=""
 9 |   ["ampere86"]="86-"
10 |   ["adalovelace"]="89-"
11 |   ["hopper"]="hopper-"
12 | )
13 | 
14 | # Define version and target
15 | VERSION="1.7.1"
16 | TARGET="baseten/text-embeddings-inference-mirror"
17 | 
18 | # Build and push images
19 | for ARCH in "${!ARCHES[@]}"; do
20 |   ARCH_PREFIX=${ARCHES[$ARCH]}
21 |   TAG="${TARGET}:${ARCH_PREFIX}${VERSION}"
22 | 
23 |   echo "Building and pushing image for $ARCH: $TAG"
24 | 
25 |   docker buildx build -t "$TAG" --build-arg TAG="${ARCH_PREFIX}${VERSION}" --push .
26 | done
27 | 
28 | echo "All images have been built and pushed."
29 | 


--------------------------------------------------------------------------------
/11-embeddings-reranker-classification-tensorrt/templating/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/11-embeddings-reranker-classification-tensorrt/templating/README.md


--------------------------------------------------------------------------------
/assets/comfyui-screenshot-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/assets/comfyui-screenshot-1.png


--------------------------------------------------------------------------------
/assets/comfyui-screenshot-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/assets/comfyui-screenshot-2.png


--------------------------------------------------------------------------------
/assets/comfyui-screenshot-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/assets/comfyui-screenshot-3.png


--------------------------------------------------------------------------------
/audiogen-medium/config.yaml:
--------------------------------------------------------------------------------
 1 | description: AudioGen is a simple and controllable model for audio generation developed
 2 |   by Facebook AI Research.
 3 | environment_variables: {}
 4 | external_package_dirs: []
 5 | model_metadata:
 6 |   avatar_url: https://cdn.baseten.co/production/static/explore/meta.png
 7 |   cover_image_url: https://cdn.baseten.co/production/static/explore/musicgen-cover.png
 8 |   example_model_input:
 9 |     duration: 8
10 |     prompts:
11 |     - dog barking
12 |     - sirene of an emergency vehicle
13 |     - footsteps in a corridor
14 |   tags:
15 |   - text-to-audio
16 | model_name: AudioGen medium
17 | python_version: py39
18 | requirements:
19 | - torch>=2
20 | - git+https://github.com/facebookresearch/audiocraft.git
21 | - torchaudio
22 | resources:
23 |   accelerator: A10G
24 |   cpu: '3'
25 |   memory: 14Gi
26 |   use_gpu: true
27 | secrets: {}
28 | system_packages:
29 | - ffmpeg
30 | 


--------------------------------------------------------------------------------
/audiogen-medium/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/audiogen-medium/model/__init__.py


--------------------------------------------------------------------------------
/bin/validate_ci.py:
--------------------------------------------------------------------------------
1 | import truss
2 | import yaml
3 | 
4 | with open("ci.yaml", "r") as file:
5 |     paths = yaml.safe_load(file)
6 | 
7 | for path in paths["tests"]:
8 |     _ = truss.load(path)
9 | 


--------------------------------------------------------------------------------
/binocular/config.yaml:
--------------------------------------------------------------------------------
 1 | environment_variables: {}
 2 | external_package_dirs: []
 3 | model_cache:
 4 | - allow_patterns:
 5 |   - '*.bin'
 6 |   ignore_patterns:
 7 |   - coreml/*
 8 |   repo_id: tiiuae/falcon-7b
 9 | - allow_patterns:
10 |   - '*.bin'
11 |   ignore_patterns:
12 |   - coreml/*
13 |   repo_id: tiiuae/falcon-7b-instruct
14 | model_name: Binoculars
15 | python_version: py311
16 | requirements:
17 | - git+https://github.com/ahans30/Binoculars.git
18 | resources:
19 |   accelerator: A10G:2
20 | secrets: {}
21 | system_packages: []
22 | 


--------------------------------------------------------------------------------
/binocular/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/binocular/model/__init__.py


--------------------------------------------------------------------------------
/binocular/model/model.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from binoculars import Binoculars
 4 | 
 5 | MINIMUM_TOKENS = 64
 6 | 
 7 | 
 8 | class Model:
 9 |     def __init__(self, **kwargs):
10 |         self._model = None
11 | 
12 |     def load(self):
13 |         # Load model here and assign to self._model.
14 |         self._model = Binoculars()
15 |         self._tokenizer = self._model.tokenizer
16 | 
17 |     def count_tokens(self, text):
18 |         return len(self._tokenizer(text).input_ids)
19 | 
20 |     def predict(self, model_input: dict):
21 |         input_text = model_input.pop("text")
22 |         if self.count_tokens(input_text) < MINIMUM_TOKENS:
23 |             logging.warn("Insufficient content length")
24 |             return {}
25 | 
26 |         return {
27 |             "score": self._model.compute_score(input_text),
28 |             "label": self._model.predict(input_text),
29 |         }
30 | 


--------------------------------------------------------------------------------
/binocular/packages/config.py:
--------------------------------------------------------------------------------
1 | import os
2 | 
3 | huggingface_config = {
4 |     # Only required for private models from Huggingface (e.g. LLaMA models)
5 |     "TOKEN": os.environ.get("HF_TOKEN", None)
6 | }
7 | 


--------------------------------------------------------------------------------
/chatterbox-tts/config.yaml:
--------------------------------------------------------------------------------
 1 | model_name: Chatterbox TTS
 2 | base_image:
 3 |   image: jojobaseten/truss-numpy-1.26.0-gpu:0.4
 4 |   python_executable_path: /usr/bin/python3
 5 | python_version: py312
 6 | requirements:
 7 |   - chatterbox-tts
 8 | resources:
 9 |   accelerator: H100
10 |   cpu: '1'
11 |   memory: 40Gi
12 |   use_gpu: true
13 | secrets:
14 |   hf_access_token: null


--------------------------------------------------------------------------------
/chatterbox-tts/docker/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM baseten/truss-server-base:3.12-gpu-v0.9.0
2 | 
3 | # Fix the urllib3/six dependency issue first
4 | RUN pip install --upgrade --force-reinstall urllib3 --no-cache-dir
5 | 
6 | RUN pip uninstall numpy -y || true
7 | RUN pip install numpy==1.26.0 --no-cache-dir


--------------------------------------------------------------------------------
/chatterbox-tts/docker/docker_build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | DOCKER_USERNAME="YOUR_DOCKER_USERNAME"
4 | IMAGE_NAME="truss-numpy-1.26.0-gpu"
5 | VERSION="0.1"
6 | 
7 | docker buildx build --platform linux/amd64 -t $IMAGE_NAME:$VERSION --load .
8 | docker tag $IMAGE_NAME:$VERSION $DOCKER_USERNAME/$IMAGE_NAME:$VERSION
9 | docker push $DOCKER_USERNAME/$IMAGE_NAME:$VERSION


--------------------------------------------------------------------------------
/chatterbox-tts/input/obama_8s.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/chatterbox-tts/input/obama_8s.wav


--------------------------------------------------------------------------------
/chatterbox-tts/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/chatterbox-tts/model/__init__.py


--------------------------------------------------------------------------------
/ci.yaml:
--------------------------------------------------------------------------------
 1 | tests:
 2 |   - 01-getting-started-bert
 3 |   - 02-llm
 4 |   - 03-llm-with-streaming
 5 |   - 04-image-generation
 6 |   - 05-speech-to-text
 7 |   - 06-high-performance-cached-weights
 8 |   - 10-using-system-packages
 9 |   - mistral/mistral-7b
10 |   - mistral/mistral-7b-instruct
11 |   - mistral/mistral-7b-chat
12 |   - whisper/whisper-v3-truss
13 |   - gfp-gan
14 |   - stable-diffusion/stable-diffusion-xl-1.0
15 |   - whisper/faster-whisper-v2
16 |   - whisper/faster-whisper-v3
17 |   - llama/llama-2-7b-chat
18 |   - playground-v2-aesthetic
19 |   - llama/tinyllama-1.1B-chat-v1.0
20 | 


--------------------------------------------------------------------------------
/clip/config.yaml:
--------------------------------------------------------------------------------
 1 | environment_variables: {}
 2 | external_package_dirs: []
 3 | model_metadata:
 4 |   example_model_input:
 5 |     url: https://images.pexels.com/photos/1170986/pexels-photo-1170986.jpeg?auto=compress&cs=tinysrgb&w=1600
 6 | model_name: clip-example
 7 | python_version: py311
 8 | requirements:
 9 | - transformers==4.47.1
10 | - pillow
11 | - torch
12 | resources:
13 |   accelerator: A10G
14 |   cpu: '3'
15 |   memory: 14Gi
16 |   use_gpu: true
17 | secrets: {}
18 | system_packages: []
19 | 


--------------------------------------------------------------------------------
/clip/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/clip/model/__init__.py


--------------------------------------------------------------------------------
/cogvlm/config.yaml:
--------------------------------------------------------------------------------
 1 | environment_variables: {}
 2 | external_package_dirs: []
 3 | model_name: CogVLM
 4 | python_version: py311
 5 | requirements:
 6 | - torch==2.0.1
 7 | - sentencepiece==0.1.99
 8 | - protobuf==4.25.1
 9 | - transformers==4.35.2
10 | - einops==0.7.0
11 | - torchvision==0.15.2
12 | - Pillow==10.1.0
13 | - xformers==0.0.22
14 | - accelerate==0.25.0
15 | resources:
16 |   accelerator: A100
17 |   cpu: '3'
18 |   memory: 15Gi
19 |   use_gpu: true
20 | secrets: {}
21 | system_packages: []
22 | 


--------------------------------------------------------------------------------
/cogvlm/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/cogvlm/model/__init__.py


--------------------------------------------------------------------------------
/comfyui-truss/config.yaml:
--------------------------------------------------------------------------------
 1 | base_image:
 2 |   image: bolabaseten/comfyui-truss-base:6a7bc35
 3 |   python_executable_path: /usr/bin/python3
 4 | description: Deploy a ComfyUI workflow as a Truss
 5 | environment_variables: {}
 6 | external_package_dirs: []
 7 | model_metadata:
 8 |   example_model_input:
 9 |     workflow_values:
10 |       controlnet_image: https://storage.googleapis.com/logos-bucket-01/baseten_logo.png
11 |       negative_prompt: blurry, text, low quality
12 |       positive_prompt: An igloo on a snowy day, 4k, hd
13 | model_name: ComfyUI Workflow
14 | python_version: py39
15 | requirements:
16 | - websocket-client==1.6.4
17 | - accelerate==0.23.0
18 | - opencv-python
19 | resources:
20 |   accelerator: A10G
21 |   cpu: '3'
22 |   memory: 14Gi
23 |   use_gpu: true
24 | secrets: {}
25 | system_packages:
26 | - ffmpeg
27 | - libgl1-mesa-glx
28 | 


--------------------------------------------------------------------------------
/comfyui-truss/data/model.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |       "url": "https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_base_1.0.safetensors",
 4 |       "path": "models/checkpoints/sd_xl_base_1.0.safetensors"
 5 |   },
 6 |   {
 7 |       "url": "https://huggingface.co/diffusers/controlnet-canny-sdxl-1.0/resolve/main/diffusion_pytorch_model.fp16.safetensors",
 8 |       "path": "models/controlnet/diffusers_xl_canny_full.safetensors"
 9 |   }
10 | ]
11 | 


--------------------------------------------------------------------------------
/comfyui-truss/examples/sdxl-controlnet/model.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |       "url": "https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_base_1.0.safetensors",
 4 |       "path": "models/checkpoints/sd_xl_base_1.0.safetensors"
 5 |   },
 6 |   {
 7 |       "url": "https://huggingface.co/diffusers/controlnet-canny-sdxl-1.0/resolve/main/diffusion_pytorch_model.fp16.safetensors",
 8 |       "path": "models/controlnet/diffusers_xl_canny_full.safetensors"
 9 |   }
10 | ]
11 | 


--------------------------------------------------------------------------------
/comfyui-truss/examples/sdxl-with-refiner/model.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |       "url": "https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_base_1.0.safetensors",
 4 |       "path": "models/checkpoints/sd_xl_base_1.0.safetensors"
 5 |   },
 6 |   {
 7 |       "url": "https://huggingface.co/stabilityai/stable-diffusion-xl-refiner-1.0/resolve/main/sd_xl_refiner_1.0.safetensors",
 8 |       "path": "models/checkpoints/sd_xl_refiner_1.0.safetensors"
 9 |   }
10 | ]
11 | 


--------------------------------------------------------------------------------
/comfyui-truss/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/comfyui-truss/model/__init__.py


--------------------------------------------------------------------------------
/control-net-qrcode/config.yaml:
--------------------------------------------------------------------------------
 1 | environment_variables: {}
 2 | external_package_dirs: []
 3 | model_metadata:
 4 |   example_model_input:
 5 |     prompt: A cubism painting of the Garden of Eaden with animals walking around,
 6 |       Andreas Rocha, matte painting concept art, a detailed matte painting
 7 |     qr_code_content: https://www.baseten.co
 8 | model_name: control-net-qrcode
 9 | python_version: py310
10 | requirements:
11 | - diffusers==0.21.1
12 | - torch==2.0.1
13 | - ftfy==6.1.1
14 | - scipy==1.9.3
15 | - transformers==4.25.1
16 | - accelerate==0.20.3
17 | - qrcode==7.4.2
18 | - xformers==0.0.21
19 | resources:
20 |   accelerator: T4
21 |   cpu: '3'
22 |   memory: 14Gi
23 |   use_gpu: true
24 | secrets: {}
25 | system_packages: []
26 | 


--------------------------------------------------------------------------------
/control-net-qrcode/controlnet_qr_code_results.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/control-net-qrcode/controlnet_qr_code_results.gif


--------------------------------------------------------------------------------
/control-net-qrcode/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/control-net-qrcode/model/__init__.py


--------------------------------------------------------------------------------
/control-net-qrcode/twitter_mask.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/control-net-qrcode/twitter_mask.jpeg


--------------------------------------------------------------------------------
/control-net-qrcode/twitter_output.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/control-net-qrcode/twitter_output.jpg


--------------------------------------------------------------------------------
/custom-server/deepseek-v2-5-instruct-sglang/config.yaml:
--------------------------------------------------------------------------------
 1 | base_image:
 2 |   image: lmsysorg/sglang:v0.4.0.post1-cu124
 3 | model_metadata:
 4 |   repo_id: deepseek-ai/DeepSeek-V2.5-1210
 5 | docker_server:
 6 |   start_command: sh -c "HF_TOKEN=$(cat /secrets/hf_access_token) python3 -m sglang.launch_server --model-path deepseek-ai/DeepSeek-V2.5-1210 --port 8000 --tp 8 --trust-remote-code"
 7 |   readiness_endpoint: /health
 8 |   liveness_endpoint: /health
 9 |   predict_endpoint: /v1/completions
10 |   server_port: 8000
11 | resources:
12 |   accelerator: H100:8
13 |   use_gpu: true
14 | runtime:
15 |   predict_concurrency : 32
16 | model_name: DeepSeek V2.5 1210 SGLang
17 | environment_variables:
18 |   hf_access_token: null
19 | 


--------------------------------------------------------------------------------
/custom-server/llama3-70b-instruct-lmdeploy/config.yaml:
--------------------------------------------------------------------------------
 1 | base_image:
 2 |   image: openmmlab/lmdeploy:v0.6.4-cu12
 3 | model_metadata:
 4 |   repo_id: meta-llama/Llama-3.1-70B-Instruct
 5 | docker_server:
 6 |   start_command: sh -c "HF_TOKEN=$(cat /secrets/hf_access_token) python3 -m lmdeploy serve api_server meta-llama/Llama-3.1-70B-Instruct --server-port 8000 --tp 4"
 7 |   readiness_endpoint: /health
 8 |   liveness_endpoint: /health
 9 |   predict_endpoint: /v1/completions
10 |   server_port: 8000
11 | resources:
12 |   accelerator: H100:4
13 |   use_gpu: true
14 | runtime:
15 |   predict_concurrency : 32
16 | model_name: Llama 3.1 70B Instruct LMDeploy
17 | environment_variables:
18 |   hf_access_token: null
19 | 


--------------------------------------------------------------------------------
/custom-server/llama3-70b-instruct-sglang/config.yaml:
--------------------------------------------------------------------------------
 1 | base_image:
 2 |   image: lmsysorg/sglang:v0.4.0.post1-cu124
 3 | model_metadata:
 4 |   repo_id: meta-llama/Llama-3.1-70B-Instruct
 5 | docker_server:
 6 |   start_command: sh -c "HF_TOKEN=$(cat /secrets/hf_access_token) python3 -m sglang.launch_server --model-path meta-llama/Llama-3.1-70B-Instruct --port 8000 --tp 4"
 7 |   readiness_endpoint: /health
 8 |   liveness_endpoint: /health
 9 |   predict_endpoint: /v1/completions
10 |   server_port: 8000
11 | resources:
12 |   accelerator: H100:4
13 |   use_gpu: true
14 | runtime:
15 |   predict_concurrency : 32
16 | model_name: Llama 3.1 70B Instruct SGLang
17 | environment_variables:
18 |   hf_access_token: null
19 | 


--------------------------------------------------------------------------------
/custom-server/llama3-8b-instruct-lmdeploy/config.yaml:
--------------------------------------------------------------------------------
 1 | base_image:
 2 |   image: openmmlab/lmdeploy:v0.6.4-cu12
 3 | model_metadata:
 4 |   repo_id: meta-llama/Llama-3.1-8B-Instruct
 5 | docker_server:
 6 |   start_command: sh -c "HF_TOKEN=$(cat /secrets/hf_access_token) python3 -m lmdeploy serve api_server meta-llama/Llama-3.1-8B-Instruct --server-port 8000"
 7 |   readiness_endpoint: /health
 8 |   liveness_endpoint: /health
 9 |   predict_endpoint: /v1/completions
10 |   server_port: 8000
11 | resources:
12 |   accelerator: H100
13 |   use_gpu: true
14 | runtime:
15 |   predict_concurrency : 32
16 | model_name: Llama 3.1 8B Instruct LMDeploy
17 | environment_variables:
18 |   hf_access_token: null
19 | 


--------------------------------------------------------------------------------
/custom-server/llama3-8b-instruct-sglang/config.yaml:
--------------------------------------------------------------------------------
 1 | base_image:
 2 |   image: lmsysorg/sglang:v0.4.0.post1-cu124
 3 | model_metadata:
 4 |   repo_id: meta-llama/Llama-3.1-8B-Instruct
 5 | docker_server:
 6 |   start_command: sh -c "HF_TOKEN=$(cat /secrets/hf_access_token) python3 -m sglang.launch_server --model-path meta-llama/Llama-3.1-8B-Instruct --port 8000"
 7 |   readiness_endpoint: /health
 8 |   liveness_endpoint: /health
 9 |   predict_endpoint: /v1/completions
10 |   server_port: 8000
11 | resources:
12 |   accelerator: H100
13 |   use_gpu: true
14 | runtime:
15 |   predict_concurrency : 32
16 | model_name: Llama 3.1 8B Instruct SGLang
17 | environment_variables:
18 |   hf_access_token: null
19 | 


--------------------------------------------------------------------------------
/deepfloyd-xl/config.yaml:
--------------------------------------------------------------------------------
 1 | description: Generate original images from text prompts.
 2 | environment_variables: {}
 3 | external_package_dirs: []
 4 | model_metadata:
 5 |   avatar_url: https://cdn.baseten.co/production/static/explore/deep-floyd.png
 6 |   cover_image_url: https://cdn.baseten.co/production/static/explore/deepfloyd-cover.png
 7 |   tags:
 8 |   - image-generation
 9 | model_name: Deepfloyd XL
10 | python_version: py39
11 | requirements:
12 | - diffusers
13 | - transformers
14 | - torch
15 | - scipy
16 | - accelerate
17 | - pillow
18 | - bitsandbytes
19 | - sentencepiece
20 | - huggingface_hub
21 | resources:
22 |   accelerator: A10G
23 |   cpu: '3'
24 |   memory: 14Gi
25 |   use_gpu: true
26 | secrets:
27 |   hf_access_token: ENTER HF API KEY HERE
28 | spec_version: 2.0
29 | system_packages: []
30 | 


--------------------------------------------------------------------------------
/deepfloyd-xl/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/deepfloyd-xl/model/__init__.py


--------------------------------------------------------------------------------
/deepseek/engine-deepseek-r1-distill-llama-70b/README.md:
--------------------------------------------------------------------------------
1 | # DeepSeek-R1 Distill Llama 70B
2 | 


--------------------------------------------------------------------------------
/deepseek/engine-deepseek-r1-distill-llama-8b/README.md:
--------------------------------------------------------------------------------
1 | # DeepSeek-R1 Distill Llama 8B
2 | 


--------------------------------------------------------------------------------
/deepseek/engine-deepseek-r1-distill-qwen-14b/README.md:
--------------------------------------------------------------------------------
1 | # DeepSeek-R1 Distill Qwen 14B
2 | 


--------------------------------------------------------------------------------
/deepseek/engine-deepseek-r1-distill-qwen-32b/README.md:
--------------------------------------------------------------------------------
1 | # DeepSeek-R1 Distill Qwen 32B
2 | 


--------------------------------------------------------------------------------
/deepseek/engine-deepseek-r1-distill-qwen-7b/README.md:
--------------------------------------------------------------------------------
1 | # DeepSeek-R1 Distill Qwen 7B
2 | 


--------------------------------------------------------------------------------
/deepspeed-mii/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/deepspeed-mii/model/__init__.py


--------------------------------------------------------------------------------
/dis-segmentation/config.yaml:
--------------------------------------------------------------------------------
 1 | environment_variables: {}
 2 | external_package_dirs: []
 3 | model_metadata:
 4 |   example_model_input:
 5 |     input_image: <base64 image string>
 6 | model_name: DIS Segmentation
 7 | python_version: py310
 8 | requirements:
 9 | - torch==2.1.0
10 | - Pillow==9.4.0
11 | - numpy==1.23.5
12 | - gdown==4.7.3
13 | - torchvision==0.16.0
14 | - torchaudio==2.1.0
15 | - scikit-image==0.19.3
16 | resources:
17 |   accelerator: T4
18 |   memory: 2Gi
19 |   use_gpu: true
20 | secrets: {}
21 | system_packages: []
22 | 


--------------------------------------------------------------------------------
/dis-segmentation/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/dis-segmentation/model/__init__.py


--------------------------------------------------------------------------------
/dis-segmentation/model/clone_repo_helper.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import subprocess
 3 | 
 4 | 
 5 | def clone_repo():
 6 |     git_repo_url = "https://github.com/xuebinqin/DIS"
 7 |     commit_hash = "ec4a4f4f8d967f744bf857149d5ee343b59766b0"
 8 |     git_clone_command = ["git", "clone", git_repo_url]
 9 | 
10 |     # clone the repo
11 |     subprocess.run(git_clone_command, check=True)
12 |     print("Git repository cloned successfully!")
13 | 
14 |     os.chdir(os.path.join(os.getcwd(), "DIS", "IS-Net"))
15 | 
16 |     # Pin repo to a specific commit
17 |     checkout_command = ["git", "checkout", commit_hash]
18 |     subprocess.run(checkout_command, check=True)
19 | 


--------------------------------------------------------------------------------
/dockerfiles/ComfyUI.dockerfile:
--------------------------------------------------------------------------------
1 | FROM baseten/truss-server-base:3.11-gpu-v0.7.17
2 | 
3 | ARG COMMIT_HASH 6a7bc35db845179a26e62534f3d4b789151e52fe
4 | 
5 | RUN git clone https://github.com/comfyanonymous/ComfyUI.git /app/ComfyUI
6 | 
7 | RUN cd /app/ComfyUI; git checkout $COMMIT_HASH; pip install -r requirements.txt
8 | 


--------------------------------------------------------------------------------
/flux/dev/config.yaml:
--------------------------------------------------------------------------------
 1 | external_package_dirs: []
 2 | model_metadata:
 3 |   example_model_input: {"prompt": 'black forest gateau cake spelling out the words "FLUX DEV", tasty, food photography, dynamic shot'}
 4 |   repo_id: black-forest-labs/FLUX.1-dev
 5 | model_name: Flux.1-dev
 6 | python_version: py311
 7 | requirements:
 8 |   - git+https://github.com/huggingface/diffusers.git@fc6a91e3834c35e57b398ad1c0d99f6f83557e04
 9 |   - transformers
10 |   - accelerate
11 |   - sentencepiece
12 |   - protobuf
13 | resources:
14 |   accelerator: H100_40GB
15 |   use_gpu: true
16 | secrets:
17 |   hf_access_token: null
18 | system_packages:
19 |   - ffmpeg
20 |   - libsm6
21 |   - libxext6
22 | 


--------------------------------------------------------------------------------
/flux/dev/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/flux/dev/model/__init__.py


--------------------------------------------------------------------------------
/flux/schnell/config.yaml:
--------------------------------------------------------------------------------
 1 | external_package_dirs: []
 2 | model_metadata:
 3 |   example_model_input: {"prompt": 'black forest gateau cake spelling out the words "FLUX SCHNELL", tasty, food photography, dynamic shot'}
 4 |   repo_id: black-forest-labs/FLUX.1-schnell
 5 | model_name: Flux.1-schnell
 6 | python_version: py311
 7 | requirements:
 8 |   - git+https://github.com/huggingface/diffusers.git@fc6a91e3834c35e57b398ad1c0d99f6f83557e04
 9 |   - transformers
10 |   - accelerate
11 |   - sentencepiece
12 |   - protobuf
13 | resources:
14 |   accelerator: H100_40GB
15 |   use_gpu: true
16 | secrets:
17 |   hf_access_token: null
18 | system_packages:
19 |   - ffmpeg
20 |   - libsm6
21 |   - libxext6
22 | 


--------------------------------------------------------------------------------
/flux/schnell/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/flux/schnell/model/__init__.py


--------------------------------------------------------------------------------
/fotographer/zenctrl/README.md:
--------------------------------------------------------------------------------
 1 | ![Header Image](images/banner_1.png)
 2 | 
 3 | # Fotographer AI ZenCtrl
 4 | 
 5 | Image-to-image model for generating in-context product photography.
 6 | 
 7 | Deploy with `truss push --promote`
 8 | 
 9 | Call with `python call.py` after providing `model_id` from deployed model.
10 | 
11 | ### Example input image
12 | 
13 | ![Speaker Input](images/speaker-input.png)
14 | 
15 | ### Example output image
16 | 
17 | ![Speaker Input](images/speaker-output.png)
18 | 


--------------------------------------------------------------------------------
/fotographer/zenctrl/config.yaml:
--------------------------------------------------------------------------------
 1 | base_image:
 2 |   image: fotographerai/zenctrlstage:latest
 3 | model_metadata: {}
 4 | docker_server:
 5 |   start_command: sh -c "HF_TOKEN=$(cat /secrets/hf_access_token) python -m uvicorn app:app --host 0.0.0.0 --port 8000 --log-level debug"
 6 |   readiness_endpoint: /health
 7 |   liveness_endpoint: /health
 8 |   predict_endpoint: /generate
 9 |   server_port: 8000
10 | resources:
11 |   accelerator: H100
12 |   use_gpu: true
13 | model_name: ZenCtrl
14 | environment_variables:
15 |   PORT: 8000
16 |   HF_TOKEN: null
17 | runtime:
18 |   predict_concurrency: 8
19 | secrets:
20 |   hf_access_token: null
21 | 


--------------------------------------------------------------------------------
/fotographer/zenctrl/images/banner_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/fotographer/zenctrl/images/banner_1.png


--------------------------------------------------------------------------------
/fotographer/zenctrl/images/camera.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/fotographer/zenctrl/images/camera.png


--------------------------------------------------------------------------------
/fotographer/zenctrl/images/speaker-input.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/fotographer/zenctrl/images/speaker-input.png


--------------------------------------------------------------------------------
/fotographer/zenctrl/images/speaker-output.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/fotographer/zenctrl/images/speaker-output.png


--------------------------------------------------------------------------------
/gemma/gemma-2-27b-it-vllm/config.yaml:
--------------------------------------------------------------------------------
 1 | model_name: "Gemma 2 27B Instruct VLLM"
 2 | python_version: py311
 3 | model_metadata:
 4 |   example_model_input: {"prompt": "what is the meaning of life"}
 5 |   repo_id: google/gemma-2-27b-it
 6 |   tensor_parallel: 1
 7 |   max_num_seqs: 16
 8 | requirements:
 9 |   - vllm==0.5.1
10 |   - https://github.com/flashinfer-ai/flashinfer/releases/download/v0.0.8/flashinfer-0.0.8+cu121torch2.3-cp311-cp311-linux_x86_64.whl
11 | resources:
12 |   accelerator: A100
13 |   use_gpu: true
14 | runtime:
15 |   predict_concurrency: 128
16 | secrets:
17 |   hf_access_token: null
18 | 


--------------------------------------------------------------------------------
/gemma/gemma-2-27b-it-vllm/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/gemma/gemma-2-27b-it-vllm/model/__init__.py


--------------------------------------------------------------------------------
/gemma/gemma-2-9b-it-vllm/config.yaml:
--------------------------------------------------------------------------------
 1 | model_name: "Gemma 2 9B Instruct VLLM"
 2 | python_version: py311
 3 | model_metadata:
 4 |   example_model_input: {"prompt": "what is the meaning of life"}
 5 |   repo_id: google/gemma-2-9b-it
 6 |   tensor_parallel: 1
 7 | requirements:
 8 |   - vllm==0.5.1
 9 |   - https://github.com/flashinfer-ai/flashinfer/releases/download/v0.0.8/flashinfer-0.0.8+cu121torch2.3-cp311-cp311-linux_x86_64.whl
10 | resources:
11 |   accelerator: A100
12 |   use_gpu: true
13 | runtime:
14 |   predict_concurrency: 128
15 | secrets:
16 |   hf_access_token: null
17 | 


--------------------------------------------------------------------------------
/gemma/gemma-2-9b-it-vllm/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/gemma/gemma-2-9b-it-vllm/model/__init__.py


--------------------------------------------------------------------------------
/gfp-gan/data/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/gfp-gan/data/.gitkeep


--------------------------------------------------------------------------------
/gfp-gan/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/gfp-gan/model/__init__.py


--------------------------------------------------------------------------------
/image-segmentation/config.yaml:
--------------------------------------------------------------------------------
 1 | environment_variables: {}
 2 | external_package_dirs: []
 3 | model_name: Image segmentation
 4 | python_version: py39
 5 | requirements:
 6 | - torchvision==0.9.1
 7 | resources:
 8 |   cpu: 3000m
 9 |   memory: 8Gi
10 |   use_gpu: false
11 | secrets: {}
12 | spec_version: 2.0
13 | system_packages: []
14 | 


--------------------------------------------------------------------------------
/image-segmentation/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/image-segmentation/model/__init__.py


--------------------------------------------------------------------------------
/ip-adapter/config.yaml:
--------------------------------------------------------------------------------
 1 | environment_variables: {}
 2 | external_package_dirs: []
 3 | model_name: IP Adapter
 4 | python_version: py311
 5 | requirements:
 6 | - torch==2.1.1
 7 | - diffusers==0.24.0
 8 | - transformers==4.35.2
 9 | resources:
10 |   accelerator: A10G
11 |   cpu: '3'
12 |   memory: 15Gi
13 |   use_gpu: true
14 | secrets: {}
15 | system_packages: []
16 | 


--------------------------------------------------------------------------------
/ip-adapter/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/ip-adapter/model/__init__.py


--------------------------------------------------------------------------------
/jsonformatter/config.yaml:
--------------------------------------------------------------------------------
 1 | environment_variables: {}
 2 | external_package_dirs: []
 3 | model_metadata:
 4 |   llm_model: databricks/dolly-v2-3b
 5 | model_name: JsonFormatter
 6 | python_version: py311
 7 | requirements:
 8 | - jsonformer
 9 | - transformers
10 | - accelerate
11 | resources:
12 |   accelerator: A10G
13 | secrets: {}
14 | system_packages: []
15 | 


--------------------------------------------------------------------------------
/jsonformatter/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/jsonformatter/model/__init__.py


--------------------------------------------------------------------------------
/kokoro/README.md:
--------------------------------------------------------------------------------
 1 | Kokoro is a frontier TTS model for its size of 82 million parameters (text in/audio out).
 2 | API:
 3 | ```bash
 4 | request:
 5 | {"text": "Hello", "voice": "af", "speed": 1.0}
 6 | 
 7 | text: str = defaults to "Hi, I'm kokoro"
 8 | voice: str = defaults to "af", available options: "af", "af_bella", "af_sarah", "am_adam", "am_michael", "bf_emma", "bf_isabella", "bm_george", "bm_lewis", "af_nicole", "af_sky"
 9 | speed: float = defaults to 1.0. The speed of the audio generated
10 | 
11 | reponse:
12 | {"base64": "base64 encoded bytestring"}
13 | ```
14 | 


--------------------------------------------------------------------------------
/kokoro/call.py:
--------------------------------------------------------------------------------
 1 | import base64
 2 | 
 3 | import httpx
 4 | 
 5 | DEPLOYMENT_URL = ""
 6 | API_KEY = ""
 7 | # Create client for connection reuse
 8 | with httpx.Client() as client:
 9 |     # Make the API request
10 |     resp = client.post(
11 |         DEPLOYMENT_URL,
12 |         headers={"Authorization": f"Api-Key {API_KEY}"},
13 |         json={"text": "Hello world", "voice": "af", "speed": 1.0},
14 |         timeout=None,
15 |     )
16 | 
17 | # Get the base64 encoded audio
18 | response_data = resp.json()
19 | audio_base64 = response_data["base64"]
20 | 
21 | # Decode the base64 string
22 | audio_bytes = base64.b64decode(audio_base64)
23 | 
24 | # Write to a WAV file
25 | with open("output.wav", "wb") as f:
26 |     f.write(audio_bytes)
27 | 
28 | print("Audio saved to output.wav")
29 | 


--------------------------------------------------------------------------------
/kokoro/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/kokoro/model/__init__.py


--------------------------------------------------------------------------------
/layoutlm-document-qa/config.yaml:
--------------------------------------------------------------------------------
 1 | description: Extract information from images of invoices
 2 | environment_variables: {}
 3 | external_package_dirs: []
 4 | model_metadata:
 5 |   avatar_url: https://cdn.baseten.co/production/static/explore/impira-logo.png
 6 |   cover_image_url: https://cdn.baseten.co/production/static/explore/document-qa.png
 7 |   example_model_input:
 8 |     prompt: What is the invoice number?
 9 |     url: https://templates.invoicehome.com/invoice-template-us-neat-750px.png
10 |   tags:
11 |   - text-generation
12 | model_name: LayoutLM Document QA
13 | python_version: py39
14 | requirements:
15 | - Pillow==10.0.0
16 | - pytesseract==0.3.10
17 | - torch==2.0.1
18 | - transformers==4.30.2
19 | resources:
20 |   accelerator: null
21 |   cpu: '4'
22 |   memory: 16Gi
23 |   use_gpu: false
24 | secrets: {}
25 | system_packages:
26 | - tesseract-ocr
27 | 


--------------------------------------------------------------------------------
/layoutlm-document-qa/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/layoutlm-document-qa/model/__init__.py


--------------------------------------------------------------------------------
/layoutlm-document-qa/model/model.py:
--------------------------------------------------------------------------------
 1 | from transformers import pipeline
 2 | 
 3 | 
 4 | class Model:
 5 |     def __init__(self, **kwargs) -> None:
 6 |         self._data_dir = kwargs["data_dir"]
 7 |         self._config = kwargs["config"]
 8 |         self._secrets = kwargs["secrets"]
 9 |         self._model = None
10 | 
11 |     def load(self):
12 |         self._model = pipeline(
13 |             "document-question-answering",
14 |             model="impira/layoutlm-document-qa",
15 |         )
16 | 
17 |     def predict(self, model_input):
18 |         return self._model(
19 |             model_input[
20 |                 "url"
21 |             ],  # e.g. "https://templates.invoicehome.com/invoice-template-us-neat-750px.png"
22 |             model_input["prompt"],  # e.g. "What is the invoice number?"
23 |         )
24 | 


--------------------------------------------------------------------------------
/llama/engine-llama-3-1-70b-instruct/README.md:
--------------------------------------------------------------------------------
1 | # Llama 3.1 70B Instruct
2 | 
3 | This deployment of Llama 3.1 70B Instruct uses the TensorRT-LLM Engine Builder.
4 | 
5 | For details, see: https://docs.baseten.co/performance/examples/llama-trt
6 | 


--------------------------------------------------------------------------------
/llama/engine-llama-3-1-8b-instruct/README.md:
--------------------------------------------------------------------------------
1 | # Llama 3.1 8B Instruct
2 | 
3 | This deployment of Llama 3.1 8B Instruct uses the TensorRT-LLM Engine Builder.
4 | 
5 | For details, see: https://docs.baseten.co/performance/examples/llama-trt
6 | 


--------------------------------------------------------------------------------
/llama/engine-llama-3-3-70b-instruct/README.md:
--------------------------------------------------------------------------------
1 | # Llama 3.3 70B Instruct
2 | 
3 | This deployment of Llama 3.3 70B Instruct uses the TensorRT-LLM Engine Builder.
4 | 
5 | For details, see: https://docs.baseten.co/performance/examples/llama-trt
6 | 


--------------------------------------------------------------------------------
/llama/llama-2-13b-chat/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llama/llama-2-13b-chat/model/__init__.py


--------------------------------------------------------------------------------
/llama/llama-2-13b/config.yaml:
--------------------------------------------------------------------------------
 1 | environment_variables: {}
 2 | external_package_dirs: []
 3 | model_cache:
 4 | - allow_patterns:
 5 |   - '*.json'
 6 |   - '*.safetensors'
 7 |   - '*.model'
 8 |   repo_id: meta-llama/Llama-2-13b-hf
 9 | model_metadata:
10 |   repo_id: meta-llama/Llama-2-13b-hf
11 | model_name: Llama 13B
12 | python_version: py39
13 | requirements:
14 | - accelerate==0.22.0
15 | - bitsandbytes==0.41.1
16 | - einops==0.6.1
17 | - faker==19.3.1
18 | - peft==0.5.0
19 | - safetensors==0.3.3
20 | - scipy==1.10.1
21 | - sentencepiece==0.1.99
22 | - torch==2.0.1
23 | - transformers==4.32.1
24 | resources:
25 |   accelerator: A100:1
26 |   cpu: '3'
27 |   memory: 14Gi
28 |   use_gpu: true
29 | secrets:
30 |   hf_access_token: null
31 | system_packages: []
32 | 


--------------------------------------------------------------------------------
/llama/llama-2-13b/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llama/llama-2-13b/model/__init__.py


--------------------------------------------------------------------------------
/llama/llama-2-70b-chat/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llama/llama-2-70b-chat/model/__init__.py


--------------------------------------------------------------------------------
/llama/llama-2-70b/config.yaml:
--------------------------------------------------------------------------------
 1 | environment_variables: {}
 2 | external_package_dirs: []
 3 | model_cache:
 4 | - allow_patterns:
 5 |   - '*.json'
 6 |   - '*.safetensors'
 7 |   - '*.model'
 8 |   repo_id: meta-llama/Llama-2-70b-hf
 9 | model_metadata:
10 |   repo_id: meta-llama/Llama-2-70b-hf
11 | model_name: Llama 70B
12 | python_version: py39
13 | requirements:
14 | - accelerate==0.22.0
15 | - bitsandbytes==0.41.1
16 | - einops==0.6.1
17 | - faker==19.3.1
18 | - peft==0.5.0
19 | - safetensors==0.3.3
20 | - scipy==1.10.1
21 | - sentencepiece==0.1.99
22 | - torch==2.0.1
23 | - transformers==4.32.1
24 | resources:
25 |   accelerator: A100:2
26 |   cpu: '3'
27 |   memory: 14Gi
28 |   use_gpu: true
29 | secrets:
30 |   hf_token: ENTER HF API KEY HERE
31 | system_packages: []
32 | 


--------------------------------------------------------------------------------
/llama/llama-2-70b/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llama/llama-2-70b/model/__init__.py


--------------------------------------------------------------------------------
/llama/llama-2-7b-chat/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llama/llama-2-7b-chat/model/__init__.py


--------------------------------------------------------------------------------
/llama/llama-2-7b/config.yaml:
--------------------------------------------------------------------------------
 1 | environment_variables: {}
 2 | external_package_dirs: []
 3 | model_cache:
 4 | - allow_patterns:
 5 |   - '*.json'
 6 |   - '*.safetensors'
 7 |   - '*.model'
 8 |   repo_id: meta-llama/Llama-2-7b-hf
 9 | model_metadata:
10 |   repo_id: meta-llama/Llama-2-7b-hf
11 | model_name: Falcon 7B
12 | python_version: py39
13 | requirements:
14 | - accelerate==0.22.0
15 | - bitsandbytes==0.41.1
16 | - einops==0.6.1
17 | - faker==19.3.1
18 | - peft==0.5.0
19 | - safetensors==0.3.3
20 | - scipy==1.10.1
21 | - sentencepiece==0.1.99
22 | - torch==2.0.1
23 | - transformers==4.32.1
24 | resources:
25 |   accelerator: A10G
26 |   cpu: '3'
27 |   memory: 14Gi
28 |   use_gpu: true
29 | secrets:
30 |   hf_access_token: ENTER HF ACCESS TOKEN HERE
31 | system_packages: []
32 | 


--------------------------------------------------------------------------------
/llama/llama-2-7b/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llama/llama-2-7b/model/__init__.py


--------------------------------------------------------------------------------
/llama/llama-3-70b-instruct/config.yaml:
--------------------------------------------------------------------------------
 1 | environment_variables: {}
 2 | external_package_dirs: []
 3 | model_metadata:
 4 |   avatar_url: https://cdn.baseten.co/production/static/explore/meta.png
 5 |   cover_image_url: https://cdn.baseten.co/production/static/explore/llama.png
 6 |   repo_id: meta-llama/Meta-Llama-3-8B-Instruct
 7 |   tags:
 8 |   - text-generation
 9 | model_name: Llama 3 70B Instruct
10 | python_version: py310
11 | requirements:
12 |   - accelerate
13 |   - einops
14 |   - transformers
15 |   - torch
16 | resources:
17 |   accelerator: H100:2
18 |   use_gpu: true
19 | secrets:
20 |   hf_access_token: "your api key"
21 | system_packages: []
22 | 


--------------------------------------------------------------------------------
/llama/llama-3-70b-instruct/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llama/llama-3-70b-instruct/model/__init__.py


--------------------------------------------------------------------------------
/llama/llama-3-8b-instruct/config.yaml:
--------------------------------------------------------------------------------
 1 | environment_variables: {}
 2 | external_package_dirs: []
 3 | model_metadata:
 4 |   avatar_url: https://cdn.baseten.co/production/static/explore/meta.png
 5 |   cover_image_url: https://cdn.baseten.co/production/static/explore/llama.png
 6 |   repo_id: meta-llama/Meta-Llama-3-8B-Instruct
 7 |   tags:
 8 |   - text-generation
 9 | model_name: Llama 3 8B Instruct
10 | python_version: py310
11 | model_cache:
12 |   - repo_id: meta-llama/Meta-Llama-3-8B-Instruct
13 | requirements:
14 |   - accelerate
15 |   - einops
16 |   - transformers
17 |   - torch
18 | resources:
19 |   accelerator: A100
20 |   use_gpu: true
21 | secrets:
22 |   hf_access_token: "your-hf-access-token"
23 | system_packages: []
24 | 


--------------------------------------------------------------------------------
/llama/llama-3-8b-instruct/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llama/llama-3-8b-instruct/model/__init__.py


--------------------------------------------------------------------------------
/llama/llama-3_1-405b-instruct/README.md:
--------------------------------------------------------------------------------
 1 | # Llama 3.1 405B Instruct
 2 | 
 3 | This is an implementation of Llama 3.1 405B for deployment on Baseten.
 4 | 
 5 | - VLLM for faster inference
 6 | - FP8 model weights
 7 | - Runs on an 8xH100 instance
 8 | 
 9 | Baseten offers private, secure deployments for LLMs like Llama 3.1 405B, including deployments to your own VPC.
10 | To deploy this model on Baseten, contact us at [support@baseten.co](support@baseten.co).
11 | 


--------------------------------------------------------------------------------
/llama/llama-3_1-405b-instruct/config.yaml:
--------------------------------------------------------------------------------
 1 | model_name: "Llama 3.1 405B Instruct VLLM"
 2 | python_version: py311
 3 | model_metadata:
 4 |   example_model_input: {"prompt": "what is the meaning of life"}
 5 |   repo_id: meta-llama/Llama-3.1-405B-Instruct-FP8
 6 |   tensor_parallel: 8
 7 | requirements:
 8 |   - vllm==0.5.3post1
 9 |   - transformers==4.43.1
10 | resources:
11 |   accelerator: H100:8
12 |   use_gpu: true
13 | runtime:
14 |   predict_concurrency: 128
15 | secrets:
16 |   hf_access_token: null
17 | 


--------------------------------------------------------------------------------
/llama/llama-3_1-405b-instruct/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llama/llama-3_1-405b-instruct/model/__init__.py


--------------------------------------------------------------------------------
/llama/llama-3_1-405b-instruct/model/sighelper.py:
--------------------------------------------------------------------------------
 1 | import fileinput
 2 | import sys
 3 | 
 4 | MODULE_FILE_PATH = (
 5 |     "/usr/local/lib/python3.11/dist-packages/vllm/executor/multiproc_gpu_executor.py"
 6 | )
 7 | 
 8 | 
 9 | def patch():
10 |     # This is for SIGINT
11 |     search_text = "signal.signal(signal.SIGINT, shutdown)"
12 | 
13 |     with fileinput.FileInput(MODULE_FILE_PATH, inplace=True) as file:
14 |         for line in file:
15 |             if search_text in line:
16 |                 line = "    # " + line.lstrip()
17 |             sys.stdout.write(line)
18 | 
19 |     # This is for SIGTERM
20 |     search_text = "signal.signal(signal.SIGTERM, shutdown)"
21 | 
22 |     with fileinput.FileInput(MODULE_FILE_PATH, inplace=True) as file:
23 |         for line in file:
24 |             if search_text in line:
25 |                 line = "    # " + line.lstrip()
26 |             sys.stdout.write(line)
27 | 


--------------------------------------------------------------------------------
/llama/llama-3_1-8b-instruct-sglang/config.yaml:
--------------------------------------------------------------------------------
 1 | model_name: "Llama 3.1 8B Instruct SGLang"
 2 | python_version: py311
 3 | model_metadata:
 4 |   example_model_input: {"prompt": "what is the meaning of life"}
 5 |   repo_id: meta-llama/Llama-3.1-8B-Instruct
 6 |   tensor_parallel: 1
 7 | requirements:
 8 |   - sglang[all]==0.3.0
 9 |   - https://github.com/flashinfer-ai/flashinfer/releases/download/v0.1.6/flashinfer-0.1.6+cu121torch2.4-cp311-cp311-linux_x86_64.whl
10 | model_cache:
11 |   - repo_id: meta-llama/Llama-3.1-8B-Instruct
12 |     ignore_patterns:
13 |       - "original/*"
14 |       - "*.pth"
15 | resources:
16 |   accelerator: H100
17 |   use_gpu: true
18 | runtime:
19 |   predict_concurrency: 128
20 | secrets:
21 |   hf_access_token: null
22 | 


--------------------------------------------------------------------------------
/llama/llama-3_1-8b-instruct-sglang/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llama/llama-3_1-8b-instruct-sglang/model/__init__.py


--------------------------------------------------------------------------------
/llama/llama-3_1-8b-instruct/config.yaml:
--------------------------------------------------------------------------------
 1 | model_name: "Llama 3.1 8B Instruct VLLM"
 2 | python_version: py311
 3 | model_metadata:
 4 |   example_model_input: {"prompt": "what is the meaning of life"}
 5 |   repo_id: meta-llama/Llama-3.1-8B-Instruct
 6 |   tensor_parallel: 1
 7 | requirements:
 8 |   - vllm==0.5.3post1
 9 | model_cache:
10 |   - repo_id: meta-llama/Llama-3.1-8B-Instruct
11 |     ignore_patterns:
12 |       - "original/*"
13 |       - "*.pth"
14 | resources:
15 |   accelerator: H100_40GB
16 |   use_gpu: true
17 | runtime:
18 |   predict_concurrency: 128
19 | secrets:
20 |   hf_access_token: null
21 | 


--------------------------------------------------------------------------------
/llama/llama-3_1-8b-instruct/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llama/llama-3_1-8b-instruct/model/__init__.py


--------------------------------------------------------------------------------
/llama/llama-3_1_70b-instruct/config.yaml:
--------------------------------------------------------------------------------
 1 | environment_variables: {}
 2 | external_package_dirs: []
 3 | model_metadata: {}
 4 | model_name: Llama 3.1 70B vLLM
 5 | python_version: py310
 6 | requirements:
 7 |   - vllm==0.5.3post1
 8 |   - accelerate
 9 | resources:
10 |   accelerator: A100:4
11 |   use_gpu: true
12 | runtime:
13 |   predict_concurrency: 128
14 | secrets:
15 |   hf_access_token: ""
16 | system_packages:
17 |   - python3.10-venv
18 | 


--------------------------------------------------------------------------------
/llama/llama-3_1_70b-instruct/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llama/llama-3_1_70b-instruct/model/__init__.py


--------------------------------------------------------------------------------
/llama/llama-3_1_70b-instruct/model/sighelper.py:
--------------------------------------------------------------------------------
 1 | import fileinput
 2 | import sys
 3 | 
 4 | MODULE_FILE_PATH = (
 5 |     "/usr/local/lib/python3.10/dist-packages/vllm/executor/multiproc_gpu_executor.py"
 6 | )
 7 | 
 8 | 
 9 | def patch():
10 |     # This is for SIGINT
11 |     search_text = "signal.signal(signal.SIGINT, shutdown)"
12 | 
13 |     with fileinput.FileInput(MODULE_FILE_PATH, inplace=True) as file:
14 |         for line in file:
15 |             if search_text in line:
16 |                 line = "    # " + line.lstrip()
17 |             sys.stdout.write(line)
18 | 
19 |     # This is for SIGTERM
20 |     search_text = "signal.signal(signal.SIGTERM, shutdown)"
21 | 
22 |     with fileinput.FileInput(MODULE_FILE_PATH, inplace=True) as file:
23 |         for line in file:
24 |             if search_text in line:
25 |                 line = "    # " + line.lstrip()
26 |             sys.stdout.write(line)
27 | 


--------------------------------------------------------------------------------
/llama/llama-4-maverick-17b-128e-instruct-fp8-vllm/data/do.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | HF_TOKEN=$(cat /secrets/hf_access_token) vllm serve meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 --served-model-name llama --max-model-len 340000 --tensor-parallel-size 8 --distributed-executor-backend mp --gpu-memory-utilization 0.95 --kv-cache-dtype fp8 --limit-mm-per-prompt image=10 --override-generation-config='{"attn_temperature_tuning": true}'
3 | 


--------------------------------------------------------------------------------
/llama/llama-4-scout-17b-16e-instruct-bf16-vllm/data/do.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | HF_TOKEN=$(cat /secrets/hf_access_token) vllm serve meta-llama/Llama-4-Scout-17B-16E-Instruct --served-model-name llama --max-model-len 131072 --tensor-parallel-size 4 --distributed-executor-backend mp --gpu-memory-utilization 0.95 --kv-cache-dtype fp8 --limit-mm-per-prompt image=10 --override-generation-config='{"attn_temperature_tuning": true}'
3 | 


--------------------------------------------------------------------------------
/llama/llama-7b-exllama-streaming/config.yaml:
--------------------------------------------------------------------------------
 1 | base_image:
 2 |   image: runpod/pytorch:2.0.1-py3.10-cuda11.8.0-devel
 3 |   python_executable_path: /usr/bin/python
 4 | environment_variables: {}
 5 | external_package_dirs: []
 6 | model_name: exllama-streaming
 7 | python_version: py311
 8 | requirements:
 9 | - exllamav2==0.0.5
10 | resources:
11 |   accelerator: A100
12 |   cpu: '1'
13 |   memory: 2Gi
14 |   use_gpu: true
15 | secrets: {}
16 | system_packages: []
17 | 


--------------------------------------------------------------------------------
/llama/llama-7b-exllama-streaming/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llama/llama-7b-exllama-streaming/model/__init__.py


--------------------------------------------------------------------------------
/llama/llama-7b-exllama/config.yaml:
--------------------------------------------------------------------------------
 1 | base_image:
 2 |   image: runpod/pytorch:2.0.1-py3.10-cuda11.8.0-devel
 3 |   python_executable_path: /usr/bin/python
 4 | environment_variables: {}
 5 | external_package_dirs: []
 6 | model_name: exllama
 7 | python_version: py311
 8 | requirements:
 9 | - exllamav2==0.0.5
10 | resources:
11 |   accelerator: A100
12 |   cpu: '1'
13 |   memory: 2Gi
14 |   use_gpu: true
15 | secrets: {}
16 | system_packages: []
17 | 


--------------------------------------------------------------------------------
/llama/llama-7b-exllama/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llama/llama-7b-exllama/model/__init__.py


--------------------------------------------------------------------------------
/llama/llama-7b-vllm/config.yaml:
--------------------------------------------------------------------------------
 1 | model_metadata:
 2 |   engine_args:
 3 |     model: TheBloke/Llama-2-7B-Chat-fp16
 4 |   example_model_input:
 5 |     prompt: Where do Llamas come from?
 6 |   pretty_name: Llama 2 7B
 7 |   prompt_format: <s>[INST] {prompt} [/INST]
 8 |   tags:
 9 |   - text-generation
10 | model_name: Llama 7B Instruct vLLM
11 | python_version: py311
12 | requirements:
13 | - vllm==0.2.1.post1
14 | resources:
15 |   accelerator: A10G
16 |   memory: 25Gi
17 |   use_gpu: true
18 | runtime:
19 |   predict_concurrency: 256
20 | system_packages: []
21 | 


--------------------------------------------------------------------------------
/llama/llama-7b-vllm/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llama/llama-7b-vllm/model/__init__.py


--------------------------------------------------------------------------------
/llama/llama-7b/data/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "_name_or_path": "decapoda-research/llama-7b-hf",
 3 |   "architectures": [
 4 |     "LlamaForCausalLM"
 5 |   ],
 6 |   "bos_token_id": 0,
 7 |   "eos_token_id": 1,
 8 |   "hidden_act": "silu",
 9 |   "hidden_size": 4096,
10 |   "initializer_range": 0.02,
11 |   "intermediate_size": 11008,
12 |   "max_position_embeddings": 2048,
13 |   "max_sequence_length": 2048,
14 |   "model_type": "llama",
15 |   "num_attention_heads": 32,
16 |   "num_hidden_layers": 32,
17 |   "pad_token_id": -1,
18 |   "rms_norm_eps": 1e-06,
19 |   "tie_word_embeddings": false,
20 |   "torch_dtype": "float16",
21 |   "transformers_version": "4.29.0.dev0",
22 |   "use_cache": true,
23 |   "vocab_size": 32000
24 | }
25 | 


--------------------------------------------------------------------------------
/llama/llama-7b/data/generation_config.json:
--------------------------------------------------------------------------------
1 | {
2 |   "_from_model_config": true,
3 |   "bos_token_id": 0,
4 |   "eos_token_id": 1,
5 |   "pad_token_id": 0,
6 |   "transformers_version": "4.29.0.dev0"
7 | }
8 | 


--------------------------------------------------------------------------------
/llama/llama-7b/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llama/llama-7b/model/__init__.py


--------------------------------------------------------------------------------
/llama/tinyllama-1.1B-chat-v1.0/config.yaml:
--------------------------------------------------------------------------------
 1 | model_metadata:
 2 |   tags:
 3 |   - openai-compatible
 4 |   example_model_input:
 5 |     prompt: How tall is a tiny llama?
 6 | model_name: tinyllama-trt
 7 | python_version: py310
 8 | resources:
 9 |   accelerator: A10G
10 |   memory: 24Gi
11 |   use_gpu: true
12 | trt_llm:
13 |   build:
14 |     max_seq_len: 2048
15 |     base_model: llama
16 |     quantization_type: no_quant
17 |     checkpoint_repository:
18 |       repo: TinyLlama/TinyLlama-1.1B-Chat-v1.0
19 |       source: HF
20 | 


--------------------------------------------------------------------------------
/llava/llava-1.6-sgl/config.yaml:
--------------------------------------------------------------------------------
 1 | environment_variables: {}
 2 | external_package_dirs: []
 3 | model_name: llava 1.6 SGL
 4 | python_version: py310
 5 | requirements: []
 6 | requirements_file: ./requirements.txt
 7 | resources:
 8 |   accelerator: A100
 9 |   use_gpu: true
10 | runtime:
11 |   predict_concurrency: 128
12 | secrets: {}
13 | system_packages: []
14 | 


--------------------------------------------------------------------------------
/llava/llava-1.6-sgl/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llava/llava-1.6-sgl/model/__init__.py


--------------------------------------------------------------------------------
/llava/llava-1.6-sgl/requirements.txt:
--------------------------------------------------------------------------------
 1 | sglang==0.1.12
 2 | triton==2.1.0
 3 | tqdm==4.66.2
 4 | aiohttp==3.9.3
 5 | psutil==5.9.4
 6 | rpyc==5.3.1
 7 | torch==2.1.2
 8 | vllm==0.3.0
 9 | zmq==0.0.0
10 | interegular==0.3.3
11 | lark==1.1.9
12 | numba==0.59.0
13 | referencing
14 | diskcache==5.6.3
15 | cloudpickle==3.0.0
16 | pillow==10.2.0
17 | outlines==0.0.30
18 | 


--------------------------------------------------------------------------------
/llava/llava-v1.5-7b/config.yaml:
--------------------------------------------------------------------------------
 1 | environment_variables: {}
 2 | external_package_dirs: []
 3 | model_name: llava-v1.5-7b
 4 | python_version: py311
 5 | requirements:
 6 | - torch==2.0.1
 7 | - torchvision==0.15.2
 8 | - transformers==4.31.0
 9 | - tokenizers>=0.12.1,<0.14
10 | - sentencepiece==0.1.99
11 | - shortuuid==1.0.11
12 | - scipy==1.11.4
13 | - accelerate==0.21.0
14 | - peft==0.4.0
15 | - bitsandbytes==0.41.0
16 | - einops==0.6.1
17 | - einops-exts==0.0.4
18 | - timm==0.6.13
19 | resources:
20 |   accelerator: A10G
21 |   cpu: '3'
22 |   memory: 15Gi
23 |   use_gpu: true
24 | secrets: {}
25 | system_packages: []
26 | 


--------------------------------------------------------------------------------
/llava/llava-v1.5-7b/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llava/llava-v1.5-7b/model/__init__.py


--------------------------------------------------------------------------------
/llava/llava-v1.5-7b/packages/llava/__init__.py:
--------------------------------------------------------------------------------
1 | from .model import LlavaLlamaForCausalLM
2 | 


--------------------------------------------------------------------------------
/llava/llava-v1.5-7b/packages/llava/constants.py:
--------------------------------------------------------------------------------
 1 | CONTROLLER_HEART_BEAT_EXPIRATION = 30
 2 | WORKER_HEART_BEAT_INTERVAL = 15
 3 | 
 4 | LOGDIR = "."
 5 | 
 6 | # Model Constants
 7 | IGNORE_INDEX = -100
 8 | IMAGE_TOKEN_INDEX = -200
 9 | DEFAULT_IMAGE_TOKEN = "<image>"
10 | DEFAULT_IMAGE_PATCH_TOKEN = "<im_patch>"
11 | DEFAULT_IM_START_TOKEN = "<im_start>"
12 | DEFAULT_IM_END_TOKEN = "<im_end>"
13 | IMAGE_PLACEHOLDER = "<image-placeholder>"
14 | 


--------------------------------------------------------------------------------
/llava/llava-v1.5-7b/packages/llava/eval/table/model.jsonl:
--------------------------------------------------------------------------------
1 | {"model_id": "vicuna-13b:20230322-clean-lang", "model_name": "vicuna-13b", "model_version": "20230322-clean-lang", "model_metadata": "vicuna-13b-20230322-clean-lang"}
2 | {"model_id": "alpaca-13b:v1", "model_name": "alpaca-13b", "model_version": "v1", "model_metadata": "alpaca-13b"}
3 | {"model_id": "llama-13b:v1", "model_name": "llama-13b", "model_version": "v1", "model_metadata": "hf-llama-13b"}
4 | {"model_id": "bard:20230327", "model_name": "bard", "model_version": "20230327", "model_metadata": "Google Bard 20230327"}
5 | {"model_id": "gpt-3.5-turbo:20230327", "model_name": "gpt-3.5-turbo", "model_version": "20230327", "model_metadata": "OpenAI ChatGPT gpt-3.5-turbo Chat Completion"}
6 | 


--------------------------------------------------------------------------------
/llava/llava-v1.5-7b/packages/llava/eval/table/reviewer.jsonl:
--------------------------------------------------------------------------------
1 | {"reviewer_id": "gpt-4-0328-default", "prompt_id": 1, "metadata": {"temperature": 0.2, "max_tokens": 1024}, "description": "GPT-4 for general questions"}
2 | {"reviewer_id": "gpt-4-0328-coding", "prompt_id": 2, "metadata": {"temperature": 0.2, "max_tokens": 1024}, "description": "GPT-4 for coding questions"}
3 | {"reviewer_id": "gpt-4-0328-math", "prompt_id": 3, "metadata": {"temperature": 0.2, "max_tokens": 1024}, "description": "GPT-4 for math questions"}
4 | {"reviewer_id": "gpt-4-0417-visual", "prompt_id": 4, "metadata": {"temperature": 0.2, "max_tokens": 1024}, "description": "GPT-4 for math questions"}
5 | 


--------------------------------------------------------------------------------
/llava/llava-v1.5-7b/packages/llava/eval/webpage/figures/alpaca.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llava/llava-v1.5-7b/packages/llava/eval/webpage/figures/alpaca.png


--------------------------------------------------------------------------------
/llava/llava-v1.5-7b/packages/llava/eval/webpage/figures/bard.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llava/llava-v1.5-7b/packages/llava/eval/webpage/figures/bard.jpg


--------------------------------------------------------------------------------
/llava/llava-v1.5-7b/packages/llava/eval/webpage/figures/llama.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llava/llava-v1.5-7b/packages/llava/eval/webpage/figures/llama.jpg


--------------------------------------------------------------------------------
/llava/llava-v1.5-7b/packages/llava/eval/webpage/figures/vicuna.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llava/llava-v1.5-7b/packages/llava/eval/webpage/figures/vicuna.jpeg


--------------------------------------------------------------------------------
/llava/llava-v1.5-7b/packages/llava/model/__init__.py:
--------------------------------------------------------------------------------
1 | from .language_model.llava_llama import LlavaConfig, LlavaLlamaForCausalLM
2 | from .language_model.llava_mpt import LlavaMPTConfig, LlavaMPTForCausalLM
3 | 


--------------------------------------------------------------------------------
/llava/llava-v1.5-7b/packages/llava/model/language_model/mpt/custom_embedding.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.functional as F
 3 | from torch import Tensor
 4 | 
 5 | 
 6 | class SharedEmbedding(nn.Embedding):
 7 |     def forward(self, input: Tensor, unembed: bool = False) -> Tensor:
 8 |         if unembed:
 9 |             return F.linear(input, self.weight)
10 |         return super().forward(input)
11 | 


--------------------------------------------------------------------------------
/llava/llava-v1.5-7b/packages/llava/model/multimodal_encoder/builder.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from .clip_encoder import CLIPVisionTower
 4 | 
 5 | 
 6 | def build_vision_tower(vision_tower_cfg, **kwargs):
 7 |     vision_tower = getattr(
 8 |         vision_tower_cfg,
 9 |         "mm_vision_tower",
10 |         getattr(vision_tower_cfg, "vision_tower", None),
11 |     )
12 |     is_absolute_path_exists = os.path.exists(vision_tower)
13 |     if (
14 |         is_absolute_path_exists
15 |         or vision_tower.startswith("openai")
16 |         or vision_tower.startswith("laion")
17 |     ):
18 |         return CLIPVisionTower(vision_tower, args=vision_tower_cfg, **kwargs)
19 | 
20 |     raise ValueError(f"Unknown vision tower: {vision_tower}")
21 | 


--------------------------------------------------------------------------------
/llava/llava-v1.5-7b/packages/llava/serve/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llava/llava-v1.5-7b/packages/llava/serve/__init__.py


--------------------------------------------------------------------------------
/llava/llava-v1.5-7b/packages/llava/serve/examples/extreme_ironing.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llava/llava-v1.5-7b/packages/llava/serve/examples/extreme_ironing.jpg


--------------------------------------------------------------------------------
/llava/llava-v1.5-7b/packages/llava/serve/examples/waterview.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llava/llava-v1.5-7b/packages/llava/serve/examples/waterview.jpg


--------------------------------------------------------------------------------
/llava/llava-v1.5-7b/packages/llava/serve/register_worker.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Manually register workers.
 3 | 
 4 | Usage:
 5 | python3 -m fastchat.serve.register_worker --controller http://localhost:21001 --worker-name http://localhost:21002
 6 | """
 7 | 
 8 | import argparse
 9 | 
10 | import requests
11 | 
12 | if __name__ == "__main__":
13 |     parser = argparse.ArgumentParser()
14 |     parser.add_argument("--controller-address", type=str)
15 |     parser.add_argument("--worker-name", type=str)
16 |     parser.add_argument("--check-heart-beat", action="store_true")
17 |     args = parser.parse_args()
18 | 
19 |     url = args.controller_address + "/register_worker"
20 |     data = {
21 |         "worker_name": args.worker_name,
22 |         "check_heart_beat": args.check_heart_beat,
23 |         "worker_status": None,
24 |     }
25 |     r = requests.post(url, json=data)
26 |     assert r.status_code == 200
27 | 


--------------------------------------------------------------------------------
/llava/llava-v1.5-7b/packages/llava/train/train_mem.py:
--------------------------------------------------------------------------------
 1 | # Adopted from https://github.com/lm-sys/FastChat. Below is the original copyright:
 2 | # Adopted from tatsu-lab@stanford_alpaca. Below is the original copyright:
 3 | # Make it more memory efficient by monkey patching the LLaMA model with FlashAttn.
 4 | 
 5 | # Need to call this before importing transformers.
 6 | from llava.train.llama_flash_attn_monkey_patch import replace_llama_attn_with_flash_attn
 7 | 
 8 | replace_llama_attn_with_flash_attn()
 9 | 
10 | from llava.train.train import train
11 | 
12 | if __name__ == "__main__":
13 |     train()
14 | 


--------------------------------------------------------------------------------
/llava/llava-v1.5-7b/packages/llava/train/train_xformers.py:
--------------------------------------------------------------------------------
 1 | # Make it more memory efficient by monkey patching the LLaMA model with xformers attention.
 2 | 
 3 | # Need to call this before importing transformers.
 4 | from llava.train.llama_xformers_attn_monkey_patch import (
 5 |     replace_llama_attn_with_xformers_attn,
 6 | )
 7 | 
 8 | replace_llama_attn_with_xformers_attn()
 9 | 
10 | from llava.train.train import train
11 | 
12 | if __name__ == "__main__":
13 |     train()
14 | 


--------------------------------------------------------------------------------
/llava/llava-v1.6-34b/config.yaml:
--------------------------------------------------------------------------------
 1 | environment_variables: {}
 2 | external_package_dirs: []
 3 | model_name: llava-v1.6-34b
 4 | python_version: py311
 5 | requirements:
 6 | - git+https://github.com/haotian-liu/LLaVA.git
 7 | resources:
 8 |   accelerator: A100
 9 |   use_gpu: true
10 | secrets: {}
11 | system_packages: []
12 | 


--------------------------------------------------------------------------------
/llava/llava-v1.6-34b/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llava/llava-v1.6-34b/model/__init__.py


--------------------------------------------------------------------------------
/magic-animate/config.yaml:
--------------------------------------------------------------------------------
 1 | environment_variables: {}
 2 | external_package_dirs: []
 3 | model_metadata:
 4 |   example_model_input:
 5 |     guidance_scale: 7.5
 6 |     motion_sequence: <BASE64 MP4 FILE>
 7 |     reference_image: <BASE64 IMAGE>
 8 |     seed: 1
 9 |     steps: 10
10 | model_name: Magic Animate
11 | python_version: py310
12 | requirements:
13 | - torch==2.0.1
14 | - torchvision==0.15.2
15 | - xformers==0.0.22
16 | - diffusers==0.21.4
17 | - pillow==9.5.0
18 | - numpy==1.24.4
19 | - omegaconf==2.3.0
20 | - transformers==4.32.0
21 | - einops==0.6.1
22 | - imageio==2.9.0
23 | - imageio-ffmpeg==0.4.3
24 | - tqdm==4.66.1
25 | - websockets==11.0.3
26 | - accelerate==0.22.0
27 | - huggingface-hub==0.16.4
28 | - av==11.0.0
29 | resources:
30 |   accelerator: A10G
31 |   cpu: '3'
32 |   memory: 15Gi
33 |   use_gpu: true
34 | secrets: {}
35 | system_packages:
36 | - ffmpeg
37 | 


--------------------------------------------------------------------------------
/magic-animate/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/magic-animate/model/__init__.py


--------------------------------------------------------------------------------
/magic-animate/model/configs/inference/inference.yaml:
--------------------------------------------------------------------------------
 1 | unet_additional_kwargs:
 2 |   unet_use_cross_frame_attention: false
 3 |   unet_use_temporal_attention: false
 4 |   use_motion_module: true
 5 |   motion_module_resolutions:
 6 |   - 1
 7 |   - 2
 8 |   - 4
 9 |   - 8
10 |   motion_module_mid_block: false
11 |   motion_module_decoder_only: false
12 |   motion_module_type: Vanilla
13 |   motion_module_kwargs:
14 |     num_attention_heads: 8
15 |     num_transformer_block: 1
16 |     attention_block_types:
17 |     - Temporal_Self
18 |     - Temporal_Self
19 |     temporal_position_encoding: true
20 |     temporal_position_encoding_max_len: 24
21 |     temporal_attention_dim_div: 1
22 | 
23 | noise_scheduler_kwargs:
24 |   beta_start: 0.00085
25 |   beta_end: 0.012
26 |   beta_schedule: "linear"
27 | 


--------------------------------------------------------------------------------
/metavoice-1b/config.yaml:
--------------------------------------------------------------------------------
 1 | model_name: MetaVoice 1B
 2 | description: MetaVoice is a transformer-based model for TTS
 3 | environment_variables: {}
 4 | external_package_dirs: []
 5 | model_metadata:
 6 |   example_model_input: '"text to speech models are cool"'
 7 | python_version: py311
 8 | data_dir: data
 9 | model_cache:
10 |   - repo_id: metavoiceio/metavoice-1B-v0.1
11 |     allow_patterns:
12 |       - "*.pt"
13 |   - repo_id: facebook/multiband-diffusion
14 |     allow_patterns:
15 |       - mbd_comp_8.pt
16 |   - repo_id: facebook/encodec_24khz
17 |     allow_patterns:
18 |       - "*.safetensors"
19 | 
20 | requirements_file: ./requirements.txt
21 | resources:
22 |   accelerator: "A10G"
23 |   use_gpu: true
24 | secrets:
25 |   hf_access_token: "ENTER HF ACCESS TOKEN HERE"
26 | system_packages:
27 | - ffmpeg
28 | 


--------------------------------------------------------------------------------
/metavoice-1b/data/bria.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/metavoice-1b/data/bria.mp3


--------------------------------------------------------------------------------
/metavoice-1b/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/metavoice-1b/model/__init__.py


--------------------------------------------------------------------------------
/metavoice-1b/process.py:
--------------------------------------------------------------------------------
 1 | import base64
 2 | import sys
 3 | 
 4 | b64_audio = sys.stdin.read()
 5 | 
 6 | # b64 data is surrounded by info messages and quotes if piped in from a truss command
 7 | b64_audio = b64_audio.split('"')[1]
 8 | 
 9 | wav_file = open("output.wav", "wb")
10 | try:
11 |     decode_string = base64.b64decode(b64_audio)
12 | except:
13 |     print("Response was not a valid base64 string. Exiting.")
14 |     print(b64_audio)
15 |     sys.exit(1)
16 | wav_file.write(decode_string)
17 | 


--------------------------------------------------------------------------------
/metavoice-1b/requirements.txt:
--------------------------------------------------------------------------------
 1 | torch==2.1.0
 2 | transformers==4.37.2
 3 | librosa==0.10.1
 4 | tqdm==4.66.2
 5 | tiktoken==0.5.1
 6 | audiocraft==1.2.0
 7 | numpy==1.24.4
 8 | tyro==0.7.3
 9 | DeepFilterNet==0.5.6
10 | pydub==0.25.1
11 | soundfile==0.12.1
12 | huggingface-hub==0.20.3
13 | scipy==1.12.0
14 | https://github.com/Dao-AILab/flash-attention/releases/download/v2.5.3/flash_attn-2.5.3+cu122torch2.1cxx11abiFALSE-cp311-cp311-linux_x86_64.whl
15 | git+https://github.com/metavoiceio/metavoice-src.git@182ec712a10b42440bd9e9346a17381e8664256e
16 | 


--------------------------------------------------------------------------------
/mistral/engine-mistral-7b-instruct/README.md:
--------------------------------------------------------------------------------
1 | # Mistral 7B Instruct
2 | 
3 | This deployment of Mistral 7B Instruct uses the TensorRT-LLM Engine Builder.
4 | 
5 | For details, see: https://docs.baseten.co/performance/examples/mistral-trt
6 | 


--------------------------------------------------------------------------------
/mistral/engine-mistral-small-3/README.md:
--------------------------------------------------------------------------------
1 | # Mistral Small 3 (2501)
2 | 


--------------------------------------------------------------------------------
/mistral/engine-mixtral-8x22b-instruct/README.md:
--------------------------------------------------------------------------------
1 | # Mistral 8x22B Instruct
2 | 
3 | This deployment of Mistral 8x22B Instruct uses the TensorRT-LLM Engine Builder.
4 | 
5 | For details, see: https://docs.baseten.co/performance/examples/mistral-trt
6 | 


--------------------------------------------------------------------------------
/mistral/engine-mixtral-8x7b-instruct/README.md:
--------------------------------------------------------------------------------
1 | # Mistral 8x7B Instruct
2 | 
3 | This deployment of Mistral 8x7B Instruct uses the TensorRT-LLM Engine Builder.
4 | 
5 | For details, see: https://docs.baseten.co/performance/examples/mistral-trt
6 | 


--------------------------------------------------------------------------------
/mistral/mistral-7b-chat/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/mistral/mistral-7b-chat/model/__init__.py


--------------------------------------------------------------------------------
/mistral/mistral-7b-instruct-vllm/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/mistral/mistral-7b-instruct-vllm/model/__init__.py


--------------------------------------------------------------------------------
/mistral/mistral-7b-instruct/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/mistral/mistral-7b-instruct/model/__init__.py


--------------------------------------------------------------------------------
/mistral/mistral-7b/config.yaml:
--------------------------------------------------------------------------------
 1 | environment_variables: {}
 2 | external_package_dirs: []
 3 | model_metadata:
 4 |   avatar_url: https://cdn.baseten.co/production/static/explore/mistral_logo.png
 5 |   cover_image_url: https://cdn.baseten.co/production/static/explore/mistral.png
 6 |   example_model_input:
 7 |     prompt: What is the Mistral wind?
 8 |   pretty_name: Mistral 7B
 9 |   tags:
10 |   - text-generation
11 | model_name: mistral-7b
12 | python_version: py311
13 | requirements:
14 | - transformers==4.42.3
15 | - sentencepiece
16 | - accelerate
17 | - torch==2.0.1
18 | - numpy==1.26.4
19 | resources:
20 |   accelerator: A10G
21 |   memory: 25Gi
22 |   use_gpu: true
23 | secrets:
24 |   hf_access_token: "ENTER HF ACCESS TOKEN HERE"
25 | system_packages: []
26 | 


--------------------------------------------------------------------------------
/mistral/mistral-7b/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/mistral/mistral-7b/model/__init__.py


--------------------------------------------------------------------------------
/mistral/mixtral-8x22b-trt-int8-weights-only/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/mistral/mixtral-8x22b-trt-int8-weights-only/model/__init__.py


--------------------------------------------------------------------------------
/mistral/mixtral-8x22b/config.yaml:
--------------------------------------------------------------------------------
 1 | environment_variables: {}
 2 | external_package_dirs: []
 3 | model_metadata:
 4 |   repo_id: mistralai/Mixtral-8x22B-Instruct-v0.1
 5 |   avatar_url: https://cdn.baseten.co/production/static/explore/mistral_logo.png
 6 |   cover_image_url: https://cdn.baseten.co/production/static/explore/mistral.png
 7 |   example_model_input:
 8 |     prompt: What is the Mistral wind?
 9 |   pretty_name: Mistral 8x22B
10 |   tags:
11 |   - text-generation
12 | model_name: Mixtral 8x22
13 | python_version: py310
14 | requirements:
15 |   - accelerate
16 |   - transformers==4.42.3
17 |   - torch==2.2.0
18 | resources:
19 |   accelerator: A100:4
20 |   use_gpu: true
21 | secrets:
22 |   hf_access_token: "ENTER HF ACCESS TOKEN HERE"
23 | system_packages: []
24 | 


--------------------------------------------------------------------------------
/mistral/mixtral-8x22b/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/mistral/mixtral-8x22b/model/__init__.py


--------------------------------------------------------------------------------
/mistral/mixtral-8x7b-instruct-trt-llm-h100/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/mistral/mixtral-8x7b-instruct-trt-llm-h100/model/__init__.py


--------------------------------------------------------------------------------
/mistral/mixtral-8x7b-instruct-trt-llm-weights-only-quant-h100/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/mistral/mixtral-8x7b-instruct-trt-llm-weights-only-quant-h100/model/__init__.py


--------------------------------------------------------------------------------
/mistral/mixtral-8x7b-instruct-trt-llm-weights-only-quant/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/mistral/mixtral-8x7b-instruct-trt-llm-weights-only-quant/model/__init__.py


--------------------------------------------------------------------------------
/mistral/mixtral-8x7b-instruct-trt-llm/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/mistral/mixtral-8x7b-instruct-trt-llm/model/__init__.py


--------------------------------------------------------------------------------
/mistral/mixtral-8x7b-instruct-vllm-a100-t-tp2/config.yaml:
--------------------------------------------------------------------------------
 1 | environment_variables: {}
 2 | external_package_dirs: []
 3 | model_name: Mixtral 8x7B — VLLM TP2 — A100:2
 4 | python_version: py310
 5 | requirements:
 6 | - vllm
 7 | resources:
 8 |   accelerator: A100:2
 9 |   use_gpu: true
10 | runtime:
11 |   predict_concurrency: 128
12 | secrets: {}
13 | system_packages: []
14 | 


--------------------------------------------------------------------------------
/mistral/mixtral-8x7b-instruct-vllm-a100-t-tp2/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/mistral/mixtral-8x7b-instruct-vllm-a100-t-tp2/model/__init__.py


--------------------------------------------------------------------------------
/mistral/mixtral-8x7b-instruct-vllm/config.yaml:
--------------------------------------------------------------------------------
 1 | environment_variables: {}
 2 | external_package_dirs: []
 3 | model_name: Mixtral 8x7B
 4 | python_version: py310
 5 | requirements:
 6 | - vllm==0.2.5
 7 | resources:
 8 |   accelerator: A100:2
 9 |   use_gpu: true
10 | runtime:
11 |   predict_concurrency: 128
12 | secrets: {}
13 | system_packages: []
14 | 


--------------------------------------------------------------------------------
/mistral/mixtral-8x7b-instruct-vllm/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/mistral/mixtral-8x7b-instruct-vllm/model/__init__.py


--------------------------------------------------------------------------------
/mistral/pixtral-12b/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/mistral/pixtral-12b/model/__init__.py


--------------------------------------------------------------------------------
/multiprocessing/config.yaml:
--------------------------------------------------------------------------------
 1 | environment_variables: {}
 2 | external_package_dirs: []
 3 | model_name: Model with multiprocessing pre/post-process
 4 | python_version: py310
 5 | requirements:
 6 | - torch
 7 | resources:
 8 |   accelerator: A10G
 9 |   cpu: '8'
10 |   memory: 8Gi
11 |   use_gpu: true
12 | secrets: {}
13 | system_packages: []
14 | 


--------------------------------------------------------------------------------
/multiprocessing/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/multiprocessing/model/__init__.py


--------------------------------------------------------------------------------
/multiprocessing/model/test.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | from typing import Any
 3 | 
 4 | import model
 5 | 
 6 | 
 7 | async def call_fn(fn, payload: Any) -> Any:
 8 |     return await fn(payload)
 9 | 
10 | 
11 | async def test():
12 |     m = model.Model(data_dir="", config="", secrets="")
13 |     body = {"n": 100}
14 | 
15 |     payload = await call_fn(m.preprocess, body)
16 |     response = await call_fn(m.predict, payload)
17 | 
18 |     return response
19 | 
20 | 
21 | if __name__ == "__main__":
22 |     x = asyncio.run(test())
23 |     print(x)
24 | 


--------------------------------------------------------------------------------
/musicgen-large/config.yaml:
--------------------------------------------------------------------------------
 1 | description: MusicGen is a simple and controllable model for music generation developed
 2 |   by Facebook AI Research.
 3 | environment_variables: {}
 4 | external_package_dirs: []
 5 | model_metadata:
 6 |   avatar_url: https://cdn.baseten.co/production/static/explore/meta.png
 7 |   cover_image_url: https://cdn.baseten.co/production/static/explore/musicgen-cover.png
 8 |   example_model_input:
 9 |     duration: 8
10 |     prompts:
11 |     - happy rock
12 |     - energetic EDM
13 |     - sad jazz
14 |   tags:
15 |   - text-to-music
16 | model_name: MusicGen large
17 | python_version: py39
18 | requirements:
19 | - torch>=2
20 | - audiocraft
21 | resources:
22 |   accelerator: A10G
23 |   cpu: '3'
24 |   memory: 14Gi
25 |   use_gpu: true
26 | secrets: {}
27 | system_packages:
28 | - ffmpeg
29 | 


--------------------------------------------------------------------------------
/musicgen-large/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/musicgen-large/model/__init__.py


--------------------------------------------------------------------------------
/musicgen-melody/config.yaml:
--------------------------------------------------------------------------------
 1 | description: MusicGen Melody is a simple and controllable model for music generation
 2 |   conditioned on text and audio. It is developed by Facebook AI Research.
 3 | environment_variables: {}
 4 | external_package_dirs: []
 5 | model_metadata:
 6 |   avatar_url: https://cdn.baseten.co/production/static/explore/meta.png
 7 |   cover_image_url: https://cdn.baseten.co/production/static/explore/musicgen-cover.png
 8 |   example_model_input:
 9 |     duration: 8
10 |     prompts:
11 |     - happy rock
12 |     - energetic EDM
13 |     - sad jazz
14 |   tags:
15 |   - text-to-music
16 | model_name: MusicGen Melody
17 | python_version: py39
18 | requirements:
19 | - torch>=2
20 | - audiocraft
21 | - protobuf
22 | resources:
23 |   accelerator: A10G
24 |   cpu: '3'
25 |   memory: 14Gi
26 |   use_gpu: true
27 | secrets: {}
28 | system_packages:
29 | - ffmpeg
30 | 


--------------------------------------------------------------------------------
/musicgen-melody/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/musicgen-melody/model/__init__.py


--------------------------------------------------------------------------------
/nemotron/llama-3-1-nemotron-70b-instruct/README.md:
--------------------------------------------------------------------------------
1 | 
2 | # Llama-3.1-Nemotron-70B-Instruct
3 | 
4 | This deployment of Llama-3.1-Nemotron-70B-Instruct uses the TensorRT-LLM Engine Builder.
5 | 
6 | For details, see: https://docs.baseten.co/performance/examples/llama-trt
7 | 


--------------------------------------------------------------------------------
/ngram-speculator/truss/config.yaml:
--------------------------------------------------------------------------------
 1 | build_commands: []
 2 | environment_variables: {}
 3 | external_package_dirs: []
 4 | model_metadata: {}
 5 | model_name: ngram-speculator
 6 | python_version: py310
 7 | requirements:
 8 | - vllm==0.6.5
 9 | - transformers==4.47.1
10 | resources:
11 |   accelerator: H100
12 |   use_gpu: True
13 | secrets: {}
14 | system_packages: []
15 | 


--------------------------------------------------------------------------------
/ngram-speculator/truss/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/ngram-speculator/truss/model/__init__.py


--------------------------------------------------------------------------------
/nous-capybara/nous-capybara-34b-openai/config.yaml:
--------------------------------------------------------------------------------
 1 | environment_variables: {}
 2 | external_package_dirs: []
 3 | model_cache:
 4 | - allow_patterns:
 5 |   - '*.json'
 6 |   - '*.bin'
 7 |   repo_id: NousResearch/Nous-Capybara-34B
 8 | model_name: Nous Capybara 34B OpenAI
 9 | python_version: py310
10 | requirements:
11 | - accelerate==0.25.0
12 | - transformers==4.35.2
13 | - torch==2.1.0
14 | - bitsandbytes==0.41.3
15 | - scipy==1.11.4
16 | - sentencepiece==0.1.99
17 | resources:
18 |   accelerator: A100
19 |   cpu: '3'
20 |   memory: 20Gi
21 |   use_gpu: true
22 | secrets: {}
23 | system_packages: []
24 | 


--------------------------------------------------------------------------------
/nous-capybara/nous-capybara-34b-openai/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/nous-capybara/nous-capybara-34b-openai/model/__init__.py


--------------------------------------------------------------------------------
/nous-capybara/nous-capybara-34b/config.yaml:
--------------------------------------------------------------------------------
 1 | environment_variables: {}
 2 | external_package_dirs: []
 3 | model_metadata:
 4 |   example_model_input:
 5 |     prompt: What happens if I go to the top of the tallest mountian in california
 6 |       with a bucket of water and tip it over the highest cliff?
 7 | model_name: Nous Capybara 34B
 8 | python_version: py310
 9 | requirements:
10 | - accelerate==0.25.0
11 | - transformers==4.35.2
12 | - torch==2.1.0
13 | - bitsandbytes==0.41.3
14 | - scipy==1.11.4
15 | - sentencepiece==0.1.99
16 | resources:
17 |   accelerator: A100
18 |   cpu: '3'
19 |   memory: 20Gi
20 |   use_gpu: true
21 | secrets: {}
22 | system_packages: []
23 | 


--------------------------------------------------------------------------------
/nous-capybara/nous-capybara-34b/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/nous-capybara/nous-capybara-34b/model/__init__.py


--------------------------------------------------------------------------------
/nsql/config.yaml:
--------------------------------------------------------------------------------
 1 | description: NSQL is an open-source text-to-SQL AI model developed by Numbers Station.
 2 | environment_variables: {}
 3 | external_package_dirs: []
 4 | model_metadata:
 5 |   avatar_url: https://aeiljuispo.cloudimg.io/v7/https://cdn-uploads.huggingface.co/production/uploads/649c7ee8f97bd6fd710a9eb5/nBg1Fyo22RrqRJrkz9IYB.png
 6 |   cover_image_url: https://global-uploads.webflow.com/6348b2d49808811e3f7a0fff/640690727b722a05771960ec_graphic-data-p-800.png
 7 |   tags:
 8 |   - code-generation
 9 | model_name: NSQL 350M
10 | python_version: py39
11 | requirements:
12 | - torch
13 | - transformers>=4.29.0
14 | resources:
15 |   accelerator: A10G
16 |   cpu: '8'
17 |   memory: 30Gi
18 |   use_gpu: true
19 | secrets: {}
20 | system_packages: []
21 | 


--------------------------------------------------------------------------------
/nsql/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/nsql/model/__init__.py


--------------------------------------------------------------------------------
/phi/phi-3-mini-128k-instruct/config.yaml:
--------------------------------------------------------------------------------
 1 | environment_variables: {}
 2 | external_package_dirs: []
 3 | model_metadata: {}
 4 | model_name: Phi-3-Mini-128K-Instruct
 5 | python_version: py39
 6 | requirements:
 7 |   - accelerate
 8 |   - einops
 9 |   - transformers==4.40.1
10 |   - torch==2.3.0
11 | resources:
12 |   accelerator: T4
13 |   use_gpu: true
14 | secrets: {}
15 | system_packages: []
16 | 


--------------------------------------------------------------------------------
/phi/phi-3-mini-128k-instruct/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/phi/phi-3-mini-128k-instruct/model/__init__.py


--------------------------------------------------------------------------------
/phi/phi-3-mini-4k-instruct/config.yaml:
--------------------------------------------------------------------------------
 1 | environment_variables: {}
 2 | external_package_dirs: []
 3 | model_metadata: {}
 4 | model_name: Phi-3-Mini-4K-Instruct
 5 | python_version: py39
 6 | requirements:
 7 |   - accelerate
 8 |   - einops
 9 |   - transformers==4.40.1
10 |   - torch==2.3.0
11 | resources:
12 |   accelerator: T4
13 |   use_gpu: true
14 | secrets: {}
15 | system_packages: []
16 | 


--------------------------------------------------------------------------------
/phi/phi-3-mini-4k-instruct/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/phi/phi-3-mini-4k-instruct/model/__init__.py


--------------------------------------------------------------------------------
/phi/phi-3.5-mini/config.yaml:
--------------------------------------------------------------------------------
 1 | model_name: "Phi 3.5 Mini Instruct VLLM openai compatible"
 2 | python_version: py311
 3 | model_metadata:
 4 |   example_model_input: {"messages": [{"role": "user", "content": "what is the meaning of life"}]}
 5 |   repo_id: microsoft/Phi-3.5-mini-instruct
 6 |   openai_compatible: true
 7 |   vllm_config:
 8 |     tensor_parallel_size: 1
 9 |     max_model_len: 10000
10 | requirements:
11 |   - vllm==0.5.4
12 | resources:
13 |   accelerator: A10G
14 |   use_gpu: true
15 | runtime:
16 |   predict_concurrency: 128
17 | secrets:
18 |   hf_access_token: null
19 | 


--------------------------------------------------------------------------------
/phi/phi-3.5-mini/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/phi/phi-3.5-mini/model/__init__.py


--------------------------------------------------------------------------------
/piper-tts/config.yaml:
--------------------------------------------------------------------------------
 1 | environment_variables: {}
 2 | external_data:
 3 | - local_data_path: models/model.onnx
 4 |   url: https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0/en/en_US/lessac/medium/en_US-lessac-medium.onnx
 5 | - local_data_path: models/model.onnx.json
 6 |   url: https://huggingface.co/rhasspy/piper-voices/raw/v1.0.0/en/en_US/lessac/medium/en_US-lessac-medium.onnx.json
 7 | external_package_dirs: []
 8 | model_metadata:
 9 |   example_model_input:
10 |     text: I love robots. Robots are cool!
11 |   tags:
12 |   - text-to-speech
13 | model_name: Piper TTS
14 | python_version: py310
15 | requirements:
16 | - piper-tts==1.2.0
17 | resources:
18 |   accelerator: T4
19 |   cpu: '3'
20 |   memory: 14Gi
21 |   use_gpu: true
22 | secrets: {}
23 | system_packages: []
24 | 


--------------------------------------------------------------------------------
/piper-tts/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/piper-tts/model/__init__.py


--------------------------------------------------------------------------------
/playground-v2-aesthetic/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/playground-v2-aesthetic/model/__init__.py


--------------------------------------------------------------------------------
/playground-v2-aesthetic/show.py:
--------------------------------------------------------------------------------
 1 | """
 2 | truss predict -d '{"prompt": "A heavily constructed solarpunk bridge over a canyon at sunset", "steps": 50}' | python show.py
 3 | """
 4 | 
 5 | import base64
 6 | import json
 7 | import os
 8 | import sys
 9 | 
10 | resp = sys.stdin.read()
11 | image = json.loads(resp)["output"]
12 | img = base64.b64decode(image)
13 | 
14 | img_file = open("playground.png", "wb")
15 | img_file.write(img)
16 | img_file.close()
17 | os.system("open playground.png")
18 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "truss-examples"
 3 | version = "0.1.0"
 4 | description = ""
 5 | authors = ["Truss Maintainers <team@trussml.com>"]
 6 | license = "MIT"
 7 | readme = "README.md"
 8 | package-mode = false
 9 | 
10 | [tool.poetry.dependencies]
11 | python = ">=3.9,<3.13"
12 | 
13 | [tool.poetry.group.dev.dependencies]
14 | black = "^23.7.0"
15 | ipython = "^8.14.0"
16 | isort = "^5.12.0"
17 | pre-commit = "^3.5.0"
18 | 
19 | [build-system]
20 | requires = ["poetry-core"]
21 | build-backend = "poetry.core.masonry.api"
22 | 


--------------------------------------------------------------------------------
/qwen/engine-qwen-2-5-14b-coder-instruct/README.md:
--------------------------------------------------------------------------------
1 | # Qwen Coder 2.5 14B Instruct Engine
2 | 
3 | This example uses the [TensorRT-LLM Engine Builder for Qwen](https://docs.baseten.co/performance/examples/qwen-trt) to build and deploy an optimized inference engine for Qwen Coder 2.5 14B Instruct.
4 | 
5 | For advanced control over the engine building process, see [engine control in Python](https://docs.baseten.co/performance/engine-builder-customization) and [engine builder configuration](https://docs.baseten.co/performance/engine-builder-config) docs.
6 | 


--------------------------------------------------------------------------------
/qwen/engine-qwen-2-5-14b-instruct/README.md:
--------------------------------------------------------------------------------
1 | # Qwen 2.5 14B Instruct Engine
2 | 
3 | This example uses the [TensorRT-LLM Engine Builder for Qwen](https://docs.baseten.co/performance/examples/qwen-trt) to build and deploy an optimized inference engine for Qwen 2.5 14B Instruct.
4 | 
5 | For advanced control over the engine building process, see [engine control in Python](https://docs.baseten.co/performance/engine-builder-customization) and [engine builder configuration](https://docs.baseten.co/performance/engine-builder-config) docs.
6 | 


--------------------------------------------------------------------------------
/qwen/engine-qwen-2-5-32b-coder-instruct/README.md:
--------------------------------------------------------------------------------
1 | # Qwen Coder 2.5 32B Instruct Engine
2 | 
3 | This example uses the [TensorRT-LLM Engine Builder for Qwen](https://docs.baseten.co/performance/examples/qwen-trt) to build and deploy an optimized inference engine for Qwen Coder 2.5 32B Instruct.
4 | 
5 | For advanced control over the engine building process, see [engine control in Python](https://docs.baseten.co/performance/engine-builder-customization) and [engine builder configuration](https://docs.baseten.co/performance/engine-builder-config) docs.
6 | 


--------------------------------------------------------------------------------
/qwen/engine-qwen-2-5-32b-instruct/README.md:
--------------------------------------------------------------------------------
1 | # Qwen 2.5 32B Instruct Engine
2 | 
3 | This example uses the [TensorRT-LLM Engine Builder for Qwen](https://docs.baseten.co/performance/examples/qwen-trt) to build and deploy an optimized inference engine for Qwen 2.5 32B Instruct.
4 | 
5 | For advanced control over the engine building process, see [engine control in Python](https://docs.baseten.co/performance/engine-builder-customization) and [engine builder configuration](https://docs.baseten.co/performance/engine-builder-config) docs.
6 | 


--------------------------------------------------------------------------------
/qwen/engine-qwen-2-5-3b-instruct/README.md:
--------------------------------------------------------------------------------
1 | # Qwen 2.5 3B Instruct Engine
2 | 
3 | This example uses the [TensorRT-LLM Engine Builder for Qwen](https://docs.baseten.co/performance/examples/qwen-trt) to build and deploy an optimized inference engine for Qwen 2.5 3B Instruct.
4 | 
5 | For advanced control over the engine building process, see [engine control in Python](https://docs.baseten.co/performance/engine-builder-customization) and [engine builder configuration](https://docs.baseten.co/performance/engine-builder-config) docs.
6 | 


--------------------------------------------------------------------------------
/qwen/engine-qwen-2-5-72b-instruct/README.md:
--------------------------------------------------------------------------------
1 | # Qwen 2.5 72B Instruct Engine
2 | 
3 | This example uses the [TensorRT-LLM Engine Builder for Qwen](https://docs.baseten.co/performance/examples/qwen-trt) to build and deploy an optimized inference engine for Qwen 2.5 72B Instruct.
4 | 
5 | Note that while other sizes of Qwen 2.5 are licensed as Apache 2.0, 72B sizes use the [qwen license](https://huggingface.co/Qwen/Qwen2.5-72B-Instruct/blob/main/LICENSE).
6 | 
7 | For advanced control over the engine building process, see [engine control in Python](https://docs.baseten.co/performance/engine-builder-customization) and [engine builder configuration](https://docs.baseten.co/performance/engine-builder-config) docs.
8 | 


--------------------------------------------------------------------------------
/qwen/engine-qwen-2-5-72b-math-instruct/README.md:
--------------------------------------------------------------------------------
1 | # Qwen Math 2.5 72B Instruct Engine
2 | 
3 | This example uses the [TensorRT-LLM Engine Builder for Qwen](https://docs.baseten.co/performance/examples/qwen-trt) to build and deploy an optimized inference engine for Qwen Math 2.5 72B Instruct.
4 | 
5 | Note that while other sizes of Qwen 2.5 are licensed as Apache 2.0, 72B sizes use the [qwen license](https://huggingface.co/Qwen/Qwen2.5-72B-Instruct/blob/main/LICENSE).
6 | 
7 | For advanced control over the engine building process, see [engine control in Python](https://docs.baseten.co/performance/engine-builder-customization) and [engine builder configuration](https://docs.baseten.co/performance/engine-builder-config) docs.
8 | 


--------------------------------------------------------------------------------
/qwen/engine-qwen-2-5-7b-coder-instruct/README.md:
--------------------------------------------------------------------------------
1 | # Qwen Coder 2.5 7B Instruct Engine
2 | 
3 | This example uses the [TensorRT-LLM Engine Builder for Qwen](https://docs.baseten.co/performance/examples/qwen-trt) to build and deploy an optimized inference engine for Qwen Coder 2.5 7B Instruct.
4 | 
5 | For advanced control over the engine building process, see [engine control in Python](https://docs.baseten.co/performance/engine-builder-customization) and [engine builder configuration](https://docs.baseten.co/performance/engine-builder-config) docs.
6 | 


--------------------------------------------------------------------------------
/qwen/engine-qwen-2-5-7b-instruct/README.md:
--------------------------------------------------------------------------------
1 | # Qwen 2.5 7B Instruct Engine
2 | 
3 | This example uses the [TensorRT-LLM Engine Builder for Qwen](https://docs.baseten.co/performance/examples/qwen-trt) to build and deploy an optimized inference engine for Qwen 2.5 7B Instruct.
4 | 
5 | For advanced control over the engine building process, see [engine control in Python](https://docs.baseten.co/performance/engine-builder-customization) and [engine builder configuration](https://docs.baseten.co/performance/engine-builder-config) docs.
6 | 


--------------------------------------------------------------------------------
/qwen/engine-qwen-2-5-7b-math-instruct/README.md:
--------------------------------------------------------------------------------
1 | # Qwen Math 2.5 7B Instruct Engine
2 | 
3 | This example uses the [TensorRT-LLM Engine Builder for Qwen](https://docs.baseten.co/performance/examples/qwen-trt) to build and deploy an optimized inference engine for Qwen Math 2.5 7B Instruct.
4 | 
5 | For advanced control over the engine building process, see [engine control in Python](https://docs.baseten.co/performance/engine-builder-customization) and [engine builder configuration](https://docs.baseten.co/performance/engine-builder-config) docs.
6 | 


--------------------------------------------------------------------------------
/qwen/qwen-7b-chat/config.yaml:
--------------------------------------------------------------------------------
 1 | environment_variables: {}
 2 | external_package_dirs: []
 3 | model_metadata:
 4 |   example_model_input:
 5 |     prompt: What is the meaning of life?
 6 | model_name: qwen-7b-chat
 7 | python_version: py310
 8 | requirements:
 9 | - accelerate==0.23.0
10 | - tiktoken==0.5.1
11 | - einops==0.6.1
12 | - scipy==1.11.3
13 | - transformers_stream_generator==0.0.4
14 | - peft==0.5.0
15 | - deepspeed==0.11.1
16 | - torch==2.0.1
17 | - transformers==4.32.0
18 | resources:
19 |   accelerator: A10G
20 |   cpu: '3'
21 |   memory: 14Gi
22 |   use_gpu: true
23 | secrets: {}
24 | system_packages: []
25 | 


--------------------------------------------------------------------------------
/qwen/qwen-7b-chat/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/qwen/qwen-7b-chat/model/__init__.py


--------------------------------------------------------------------------------
/qwen/qwen-vl/config.yaml:
--------------------------------------------------------------------------------
 1 | environment_variables: {}
 2 | external_package_dirs: []
 3 | model_cache:
 4 | - allow_patterns:
 5 |   - '*.json'
 6 |   - '*.fp16.safetensors'
 7 |   - '*.bin'
 8 |   - '*.tiktoken'
 9 |   - '*.py'
10 |   repo_id: Qwen/Qwen-VL
11 | model_name: Qwen VL
12 | python_version: py310
13 | requirements:
14 | - torch==2.0.1
15 | - accelerate==0.24.0
16 | - transformers==4.35.0
17 | - einops==0.7.0
18 | - torchvision==0.15.2
19 | - matplotlib==3.8.2
20 | - tiktoken==0.5.2
21 | - transformers_stream_generator==0.0.4
22 | resources:
23 |   accelerator: A10G
24 |   use_gpu: true
25 | secrets: {}
26 | system_packages: []
27 | 


--------------------------------------------------------------------------------
/qwen/qwen-vl/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/qwen/qwen-vl/model/__init__.py


--------------------------------------------------------------------------------
/sana/sana_1600M/config.yaml:
--------------------------------------------------------------------------------
 1 | build_commands: []
 2 | base_image:
 3 |   image: alphatozeta/cuda-python:12.1.1-cudnn8-devel-ubuntu22.04
 4 | environment_variables: {}
 5 | external_package_dirs: []
 6 | model_metadata:
 7 |   example_model_input: {
 8 |     "prompt": "a photo of an astronaut riding a horse on mars",
 9 |     "height": 1024,
10 |     "width": 1024,
11 |     "guidance_scale": 5.0,
12 |     "pag_guidance_scale": 2.0,
13 |     "num_inference_steps": 18,
14 |     "seed": 4096,
15 |   }
16 | model_name: Sana 1600M
17 | python_version: py311
18 | requirements:
19 | - git+https://github.com/NVlabs/Sana.git@d7945026d8d85008aca1d1e6db5717a1069f5c84
20 | - huggingface-hub==0.26.3
21 | - hf-transfer==0.1.8
22 | resources:
23 |   accelerator: H100_40GB
24 |   use_gpu: true
25 | secrets:
26 |   hf_access_token: "null"
27 | system_packages:
28 | - ffmpeg
29 | - libsm6
30 | - libxext6
31 | - python3.10-venv
32 | 


--------------------------------------------------------------------------------
/sana/sana_1600M/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/sana/sana_1600M/model/__init__.py


--------------------------------------------------------------------------------
/sana/sana_1600M/packages/Sana/CITATION.bib:
--------------------------------------------------------------------------------
 1 | @misc{xie2024sana,
 2 |     title={Sana: Efficient High-Resolution Image Synthesis with Linear Diffusion Transformer},
 3 |     author={Enze Xie and Junsong Chen and Junyu Chen and Han Cai and Haotian Tang and Yujun Lin and Zhekai Zhang and Muyang Li and Ligeng Zhu and Yao Lu and Song Han},
 4 |     year={2024},
 5 |     eprint={2410.10629},
 6 |     archivePrefix={arXiv},
 7 |     primaryClass={cs.CV},
 8 |     url={https://arxiv.org/abs/2410.10629},
 9 | }
10 | 


--------------------------------------------------------------------------------
/sana/sana_1600M/packages/Sana/CIs/add_license_all.sh:
--------------------------------------------------------------------------------
1 | #/bin/bash
2 | addlicense -s -c 'NVIDIA CORPORATION & AFFILIATES' -ignore "**/*__init__.py" **/*.py
3 | 


--------------------------------------------------------------------------------
/sana/sana_1600M/packages/Sana/asset/Sana.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/sana/sana_1600M/packages/Sana/asset/Sana.jpg


--------------------------------------------------------------------------------
/sana/sana_1600M/packages/Sana/asset/example_data/00000000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/sana/sana_1600M/packages/Sana/asset/example_data/00000000.png


--------------------------------------------------------------------------------
/sana/sana_1600M/packages/Sana/asset/example_data/00000000.txt:
--------------------------------------------------------------------------------
1 | a cyberpunk cat with a neon sign that says "Sana".
2 | 


--------------------------------------------------------------------------------
/sana/sana_1600M/packages/Sana/asset/example_data/00000000_InternVL2-26B.json:
--------------------------------------------------------------------------------
1 | {
2 |     "00000000": {
3 |         "InternVL2-26B": "a cyberpunk cat with a neon sign that says 'Sana'"
4 |     }
5 | }
6 | 


--------------------------------------------------------------------------------
/sana/sana_1600M/packages/Sana/asset/example_data/00000000_InternVL2-26B_clip_score.json:
--------------------------------------------------------------------------------
1 | {
2 |     "00000000": {
3 |         "InternVL2-26B": "27.1037"
4 |     }
5 | }
6 | 


--------------------------------------------------------------------------------
/sana/sana_1600M/packages/Sana/asset/example_data/00000000_VILA1-5-13B.json:
--------------------------------------------------------------------------------
1 | {
2 |     "00000000": {
3 |         "VILA1-5-13B": "a cyberpunk cat with a neon sign that says 'Sana'"
4 |     }
5 | }
6 | 


--------------------------------------------------------------------------------
/sana/sana_1600M/packages/Sana/asset/example_data/00000000_VILA1-5-13B_clip_score.json:
--------------------------------------------------------------------------------
1 | {
2 |     "00000000": {
3 |         "VILA1-5-13B": "27.2321"
4 |     }
5 | }
6 | 


--------------------------------------------------------------------------------
/sana/sana_1600M/packages/Sana/asset/example_data/00000000_prompt_clip_score.json:
--------------------------------------------------------------------------------
1 | {
2 |     "00000000": {
3 |         "prompt":  "26.7331"
4 |     }
5 | }
6 | 


--------------------------------------------------------------------------------
/sana/sana_1600M/packages/Sana/asset/example_data/meta_data.json:
--------------------------------------------------------------------------------
1 | {
2 |     "name": "sana-dev",
3 |     "__kind__": "Sana-ImgDataset",
4 |     "img_names": [
5 |         "00000000", "00000000"
6 |     ]
7 | }
8 | 


--------------------------------------------------------------------------------
/sana/sana_1600M/packages/Sana/asset/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/sana/sana_1600M/packages/Sana/asset/logo.png


--------------------------------------------------------------------------------
/sana/sana_1600M/packages/Sana/asset/model-incremental.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/sana/sana_1600M/packages/Sana/asset/model-incremental.jpg


--------------------------------------------------------------------------------
/sana/sana_1600M/packages/Sana/asset/model_paths.txt:
--------------------------------------------------------------------------------
1 | output/Sana_1600M_1024px/checkpoints/Sana_1600M_1024px.pth
2 | output/Sana_1600M_1024px/checkpoints/Sana_1600M_1024px.pth
3 | 


--------------------------------------------------------------------------------
/sana/sana_1600M/packages/Sana/diffusion/__init__.py:
--------------------------------------------------------------------------------
 1 | # Modified from OpenAI's diffusion repos
 2 | #     GLIDE: https://github.com/openai/glide-text2im/blob/main/glide_text2im/gaussian_diffusion.py
 3 | #     ADM:   https://github.com/openai/guided-diffusion/blob/main/guided_diffusion
 4 | #     IDDPM: https://github.com/openai/improved-diffusion/blob/main/improved_diffusion/gaussian_diffusion.py
 5 | 
 6 | from .dpm_solver import DPMS
 7 | from .flow_euler_sampler import FlowEuler
 8 | from .iddpm import Scheduler
 9 | from .sa_sampler import SASolverSampler
10 | 


--------------------------------------------------------------------------------
/sana/sana_1600M/packages/Sana/diffusion/data/__init__.py:
--------------------------------------------------------------------------------
1 | from .datasets import *
2 | from .transforms import get_transform
3 | 


--------------------------------------------------------------------------------
/sana/sana_1600M/packages/Sana/diffusion/data/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .sana_data import SanaImgDataset, SanaWebDataset
2 | from .sana_data_multi_scale import DummyDatasetMS, SanaWebDatasetMS
3 | from .utils import *
4 | 


--------------------------------------------------------------------------------
/sana/sana_1600M/packages/Sana/diffusion/data/wids/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017-2019 NVIDIA CORPORATION. All rights reserved.
 2 | # This file is part of the WebDataset library.
 3 | # See the LICENSE file for licensing terms (BSD-style).
 4 | #
 5 | # flake8: noqa
 6 | 
 7 | from .wids import (
 8 |     ChunkedSampler,
 9 |     DistributedChunkedSampler,
10 |     DistributedLocalSampler,
11 |     DistributedRangedSampler,
12 |     ShardedSampler,
13 |     ShardListDataset,
14 |     ShardListDatasetMulti,
15 |     lru_json_load,
16 | )
17 | 


--------------------------------------------------------------------------------
/sana/sana_1600M/packages/Sana/diffusion/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/sana/sana_1600M/packages/Sana/diffusion/model/__init__.py


--------------------------------------------------------------------------------
/sana/sana_1600M/packages/Sana/diffusion/model/dc_ae/efficientvit/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/sana/sana_1600M/packages/Sana/diffusion/model/dc_ae/efficientvit/__init__.py


--------------------------------------------------------------------------------
/sana/sana_1600M/packages/Sana/diffusion/model/dc_ae/efficientvit/apps/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/sana/sana_1600M/packages/Sana/diffusion/model/dc_ae/efficientvit/apps/__init__.py


--------------------------------------------------------------------------------
/sana/sana_1600M/packages/Sana/diffusion/model/dc_ae/efficientvit/apps/trainer/__init__.py:
--------------------------------------------------------------------------------
1 | from .run_config import *
2 | 


--------------------------------------------------------------------------------
/sana/sana_1600M/packages/Sana/diffusion/model/dc_ae/efficientvit/apps/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | from .dist import *
 2 | from .ema import *
 3 | 
 4 | # from .export import *
 5 | from .image import *
 6 | from .init import *
 7 | from .lr import *
 8 | from .metric import *
 9 | from .misc import *
10 | from .opt import *
11 | 


--------------------------------------------------------------------------------
/sana/sana_1600M/packages/Sana/diffusion/model/dc_ae/efficientvit/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/sana/sana_1600M/packages/Sana/diffusion/model/dc_ae/efficientvit/models/__init__.py


--------------------------------------------------------------------------------
/sana/sana_1600M/packages/Sana/diffusion/model/dc_ae/efficientvit/models/efficientvit/__init__.py:
--------------------------------------------------------------------------------
1 | from .dc_ae import *
2 | 


--------------------------------------------------------------------------------
/sana/sana_1600M/packages/Sana/diffusion/model/dc_ae/efficientvit/models/nn/__init__.py:
--------------------------------------------------------------------------------
1 | from .act import *
2 | from .drop import *
3 | from .norm import *
4 | from .ops import *
5 | from .triton_rms_norm import *
6 | 


--------------------------------------------------------------------------------
/sana/sana_1600M/packages/Sana/diffusion/model/dc_ae/efficientvit/models/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .list import *
2 | from .network import *
3 | from .random import *
4 | 


--------------------------------------------------------------------------------
/sana/sana_1600M/packages/Sana/diffusion/model/nets/fastlinear/modules/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 MIT Han Lab
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # SPDX-License-Identifier: Apache-2.0
16 | 
17 | from .triton_lite_mla import *
18 | from .triton_lite_mla_fwd import *
19 | from .triton_mb_conv_pre_glu import *
20 | 
21 | # from .flash_attn import *
22 | 


--------------------------------------------------------------------------------
/sana/sana_1600M/packages/Sana/diffusion/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/sana/sana_1600M/packages/Sana/diffusion/utils/__init__.py


--------------------------------------------------------------------------------
/sana/sana_1600M/packages/Sana/sana/tools/__init__.py:
--------------------------------------------------------------------------------
1 | from .download import download_model
2 | from .hf_utils import hf_download_or_fpath
3 | 


--------------------------------------------------------------------------------
/sana/sana_1600M/packages/Sana/scripts/style.css:
--------------------------------------------------------------------------------
 1 | /*.gradio-container{width:680px!important}*/
 2 | /* style.css */
 3 | .gradio_group, .gradio_row, .gradio_column {
 4 |     display: flex;
 5 |     flex-direction: row;
 6 |     justify-content: flex-start;
 7 |     align-items: flex-start;
 8 |     flex-wrap: wrap;
 9 | }
10 | 


--------------------------------------------------------------------------------
/sana/sana_1600M/packages/Sana/tests/bash/entry.sh:
--------------------------------------------------------------------------------
1 | #/bin/bash
2 | set -e
3 | 
4 | for t in tests/bash/test_*.sh; do
5 |     echo "========================== Testing $t =================================="
6 |     bash $t;
7 | done
8 | 


--------------------------------------------------------------------------------
/sana/sana_1600M/packages/Sana/tests/bash/test_inference.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | python scripts/inference.py \
 5 |     --config=configs/sana_config/1024ms/Sana_600M_img1024.yaml \
 6 |     --model_path=hf://Efficient-Large-Model/Sana_600M_1024px/checkpoints/Sana_600M_1024px_MultiLing.pth
 7 | 
 8 | 
 9 | python scripts/inference.py \
10 |     --config=configs/sana_config/1024ms/Sana_1600M_img1024.yaml \
11 |     --model_path=hf://Efficient-Large-Model/Sana_1600M_1024px/checkpoints/Sana_1600M_1024px.pth
12 | 


--------------------------------------------------------------------------------
/sana/sana_1600M/packages/Sana/tests/bash/test_training_1epoch.sh:
--------------------------------------------------------------------------------
 1 | #/bin/bash
 2 | set -e
 3 | 
 4 | mkdir -p data/data_public
 5 | huggingface-cli download  Efficient-Large-Model/sana_data_public --repo-type dataset --local-dir ./data/data_public --local-dir-use-symlinks False
 6 | 
 7 | bash train_scripts/train.sh configs/sana_config/512ms/ci_Sana_600M_img512.yaml --data.load_vae_feat=true
 8 | 
 9 | bash train_scripts/train.sh configs/sana_config/512ms/ci_Sana_600M_img512.yaml --data.data_dir="[asset/example_data]" --data.type=SanaImgDataset --model.multi_scale=false
10 | 


--------------------------------------------------------------------------------
/sana/sana_1600M/packages/Sana/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/sana/sana_1600M/packages/Sana/tools/__init__.py


--------------------------------------------------------------------------------
/sana/sana_1600M/packages/Sana/tools/metrics/clip-score/src/clip_score/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.1.1"
2 | 


--------------------------------------------------------------------------------
/sana/sana_1600M/packages/Sana/tools/metrics/clip-score/src/clip_score/__main__.py:
--------------------------------------------------------------------------------
1 | import clip_score.clip_score
2 | 
3 | clip_score.clip_score.main()
4 | 


--------------------------------------------------------------------------------
/sana/sana_1600M/packages/Sana/tools/metrics/geneval/evaluation/download_models.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Download Mask2Former object detection config and weights
 4 | 
 5 | if [ ! -z "$1" ]
 6 | then
 7 |     mkdir -p "$1"
 8 |     echo "Downloading mask2former for GenEval"
 9 |     wget https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco_20220504_001756-743b7d99.pth -O "$1/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco.pth"
10 | fi
11 | 


--------------------------------------------------------------------------------
/sana/sana_1600M/packages/Sana/tools/metrics/geneval/images/geneval_figure_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/sana/sana_1600M/packages/Sana/tools/metrics/geneval/images/geneval_figure_1.png


--------------------------------------------------------------------------------
/sana/sana_1600M/packages/Sana/tools/metrics/pytorch-fid/noxfile.py:
--------------------------------------------------------------------------------
 1 | import nox
 2 | 
 3 | LOCATIONS = ("src/", "tests/", "noxfile.py", "setup.py")
 4 | 
 5 | 
 6 | @nox.session
 7 | def lint(session):
 8 |     session.install("flake8")
 9 |     session.install("flake8-bugbear")
10 |     session.install("flake8-isort")
11 | 
12 |     args = session.posargs or LOCATIONS
13 |     session.run("flake8", *args)
14 | 
15 | 
16 | @nox.session
17 | def tests(session):
18 |     session.install(".")
19 |     session.install("pytest")
20 |     session.install("pytest-mock")
21 |     session.run("pytest", *session.posargs)
22 | 


--------------------------------------------------------------------------------
/sana/sana_1600M/packages/Sana/tools/metrics/pytorch-fid/setup.cfg:
--------------------------------------------------------------------------------
1 | [flake8]
2 | select=F,W,E,I,B,B9
3 | ignore=W503,B950
4 | max-line-length=79
5 | 
6 | [isort]
7 | multi_line_output=1
8 | line_length=79
9 | 


--------------------------------------------------------------------------------
/sana/sana_1600M/packages/Sana/tools/metrics/pytorch-fid/src/pytorch_fid/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.3.0"
2 | 


--------------------------------------------------------------------------------
/sana/sana_1600M/packages/Sana/tools/metrics/pytorch-fid/src/pytorch_fid/__main__.py:
--------------------------------------------------------------------------------
1 | import pytorch_fid.fid_score
2 | 
3 | pytorch_fid.fid_score.main()
4 | 


--------------------------------------------------------------------------------
/sana/sana_1600M/packages/Sana/train_scripts/train.sh:
--------------------------------------------------------------------------------
 1 | #/bin/bash
 2 | set -e
 3 | 
 4 | work_dir=output/debug
 5 | np=8
 6 | 
 7 | 
 8 | if [[ $1 == *.yaml ]]; then
 9 |     config=$1
10 |     shift
11 | else
12 |     config="configs/sana_config/512ms/sample_dataset.yaml"
13 |     echo "Only support .yaml files, but get $1. Set to --config_path=$config"
14 | fi
15 | 
16 | TRITON_PRINT_AUTOTUNING=1 \
17 |     torchrun --nproc_per_node=$np --master_port=15432 \
18 |         train_scripts/train.py \
19 |         --config_path=$config \
20 |         --work_dir=$work_dir \
21 |         --name=tmp \
22 |         --resume_from=latest \
23 |         --report_to=tensorboard \
24 |         --debug=true \
25 |         "$@"
26 | 


--------------------------------------------------------------------------------
/sana/sana_600M/config.yaml:
--------------------------------------------------------------------------------
 1 | build_commands: []
 2 | base_image:
 3 |   image: alphatozeta/cuda-python:12.1.1-cudnn8-devel-ubuntu22.04
 4 | environment_variables: {}
 5 | external_package_dirs: []
 6 | model_metadata:
 7 |   example_model_input: {
 8 |     "prompt": "a photo of an astronaut riding a horse on mars",
 9 |     "height": 1024,
10 |     "width": 1024,
11 |     "guidance_scale": 5.0,
12 |     "pag_guidance_scale": 2.0,
13 |     "num_inference_steps": 18,
14 |     "seed": 4096,
15 |   }
16 | model_name: Sana 600M
17 | python_version: py311
18 | requirements:
19 | - git+https://github.com/NVlabs/Sana.git@d7945026d8d85008aca1d1e6db5717a1069f5c84
20 | - huggingface-hub==0.26.3
21 | - hf-transfer==0.1.8
22 | resources:
23 |   accelerator: H100_40GB
24 |   use_gpu: true
25 | secrets:
26 |   hf_access_token: "null"
27 | system_packages:
28 | - ffmpeg
29 | - libsm6
30 | - libxext6
31 | - python3.10-venv
32 | 


--------------------------------------------------------------------------------
/sana/sana_600M/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/sana/sana_600M/model/__init__.py


--------------------------------------------------------------------------------
/sana/sana_600M/packages/Sana/CITATION.bib:
--------------------------------------------------------------------------------
 1 | @misc{xie2024sana,
 2 |     title={Sana: Efficient High-Resolution Image Synthesis with Linear Diffusion Transformer},
 3 |     author={Enze Xie and Junsong Chen and Junyu Chen and Han Cai and Haotian Tang and Yujun Lin and Zhekai Zhang and Muyang Li and Ligeng Zhu and Yao Lu and Song Han},
 4 |     year={2024},
 5 |     eprint={2410.10629},
 6 |     archivePrefix={arXiv},
 7 |     primaryClass={cs.CV},
 8 |     url={https://arxiv.org/abs/2410.10629},
 9 | }
10 | 


--------------------------------------------------------------------------------
/sana/sana_600M/packages/Sana/CIs/add_license_all.sh:
--------------------------------------------------------------------------------
1 | #/bin/bash
2 | addlicense -s -c 'NVIDIA CORPORATION & AFFILIATES' -ignore "**/*__init__.py" **/*.py
3 | 


--------------------------------------------------------------------------------
/sana/sana_600M/packages/Sana/asset/Sana.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/sana/sana_600M/packages/Sana/asset/Sana.jpg


--------------------------------------------------------------------------------
/sana/sana_600M/packages/Sana/asset/example_data/00000000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/sana/sana_600M/packages/Sana/asset/example_data/00000000.png


--------------------------------------------------------------------------------
/sana/sana_600M/packages/Sana/asset/example_data/00000000.txt:
--------------------------------------------------------------------------------
1 | a cyberpunk cat with a neon sign that says "Sana".
2 | 


--------------------------------------------------------------------------------
/sana/sana_600M/packages/Sana/asset/example_data/00000000_InternVL2-26B.json:
--------------------------------------------------------------------------------
1 | {
2 |     "00000000": {
3 |         "InternVL2-26B": "a cyberpunk cat with a neon sign that says 'Sana'"
4 |     }
5 | }
6 | 


--------------------------------------------------------------------------------
/sana/sana_600M/packages/Sana/asset/example_data/00000000_InternVL2-26B_clip_score.json:
--------------------------------------------------------------------------------
1 | {
2 |     "00000000": {
3 |         "InternVL2-26B": "27.1037"
4 |     }
5 | }
6 | 


--------------------------------------------------------------------------------
/sana/sana_600M/packages/Sana/asset/example_data/00000000_VILA1-5-13B.json:
--------------------------------------------------------------------------------
1 | {
2 |     "00000000": {
3 |         "VILA1-5-13B": "a cyberpunk cat with a neon sign that says 'Sana'"
4 |     }
5 | }
6 | 


--------------------------------------------------------------------------------
/sana/sana_600M/packages/Sana/asset/example_data/00000000_VILA1-5-13B_clip_score.json:
--------------------------------------------------------------------------------
1 | {
2 |     "00000000": {
3 |         "VILA1-5-13B": "27.2321"
4 |     }
5 | }
6 | 


--------------------------------------------------------------------------------
/sana/sana_600M/packages/Sana/asset/example_data/00000000_prompt_clip_score.json:
--------------------------------------------------------------------------------
1 | {
2 |     "00000000": {
3 |         "prompt":  "26.7331"
4 |     }
5 | }
6 | 


--------------------------------------------------------------------------------
/sana/sana_600M/packages/Sana/asset/example_data/meta_data.json:
--------------------------------------------------------------------------------
1 | {
2 |     "name": "sana-dev",
3 |     "__kind__": "Sana-ImgDataset",
4 |     "img_names": [
5 |         "00000000", "00000000"
6 |     ]
7 | }
8 | 


--------------------------------------------------------------------------------
/sana/sana_600M/packages/Sana/asset/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/sana/sana_600M/packages/Sana/asset/logo.png


--------------------------------------------------------------------------------
/sana/sana_600M/packages/Sana/asset/model-incremental.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/sana/sana_600M/packages/Sana/asset/model-incremental.jpg


--------------------------------------------------------------------------------
/sana/sana_600M/packages/Sana/asset/model_paths.txt:
--------------------------------------------------------------------------------
1 | output/Sana_1600M_1024px/checkpoints/Sana_1600M_1024px.pth
2 | output/Sana_1600M_1024px/checkpoints/Sana_1600M_1024px.pth
3 | 


--------------------------------------------------------------------------------
/sana/sana_600M/packages/Sana/diffusion/__init__.py:
--------------------------------------------------------------------------------
 1 | # Modified from OpenAI's diffusion repos
 2 | #     GLIDE: https://github.com/openai/glide-text2im/blob/main/glide_text2im/gaussian_diffusion.py
 3 | #     ADM:   https://github.com/openai/guided-diffusion/blob/main/guided_diffusion
 4 | #     IDDPM: https://github.com/openai/improved-diffusion/blob/main/improved_diffusion/gaussian_diffusion.py
 5 | 
 6 | from .dpm_solver import DPMS
 7 | from .flow_euler_sampler import FlowEuler
 8 | from .iddpm import Scheduler
 9 | from .sa_sampler import SASolverSampler
10 | 


--------------------------------------------------------------------------------
/sana/sana_600M/packages/Sana/diffusion/data/__init__.py:
--------------------------------------------------------------------------------
1 | from .datasets import *
2 | from .transforms import get_transform
3 | 


--------------------------------------------------------------------------------
/sana/sana_600M/packages/Sana/diffusion/data/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .sana_data import SanaImgDataset, SanaWebDataset
2 | from .sana_data_multi_scale import DummyDatasetMS, SanaWebDatasetMS
3 | from .utils import *
4 | 


--------------------------------------------------------------------------------
/sana/sana_600M/packages/Sana/diffusion/data/wids/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017-2019 NVIDIA CORPORATION. All rights reserved.
 2 | # This file is part of the WebDataset library.
 3 | # See the LICENSE file for licensing terms (BSD-style).
 4 | #
 5 | # flake8: noqa
 6 | 
 7 | from .wids import (
 8 |     ChunkedSampler,
 9 |     DistributedChunkedSampler,
10 |     DistributedLocalSampler,
11 |     DistributedRangedSampler,
12 |     ShardedSampler,
13 |     ShardListDataset,
14 |     ShardListDatasetMulti,
15 |     lru_json_load,
16 | )
17 | 


--------------------------------------------------------------------------------
/sana/sana_600M/packages/Sana/diffusion/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/sana/sana_600M/packages/Sana/diffusion/model/__init__.py


--------------------------------------------------------------------------------
/sana/sana_600M/packages/Sana/diffusion/model/dc_ae/efficientvit/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/sana/sana_600M/packages/Sana/diffusion/model/dc_ae/efficientvit/__init__.py


--------------------------------------------------------------------------------
/sana/sana_600M/packages/Sana/diffusion/model/dc_ae/efficientvit/apps/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/sana/sana_600M/packages/Sana/diffusion/model/dc_ae/efficientvit/apps/__init__.py


--------------------------------------------------------------------------------
/sana/sana_600M/packages/Sana/diffusion/model/dc_ae/efficientvit/apps/trainer/__init__.py:
--------------------------------------------------------------------------------
1 | from .run_config import *
2 | 


--------------------------------------------------------------------------------
/sana/sana_600M/packages/Sana/diffusion/model/dc_ae/efficientvit/apps/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | from .dist import *
 2 | from .ema import *
 3 | 
 4 | # from .export import *
 5 | from .image import *
 6 | from .init import *
 7 | from .lr import *
 8 | from .metric import *
 9 | from .misc import *
10 | from .opt import *
11 | 


--------------------------------------------------------------------------------
/sana/sana_600M/packages/Sana/diffusion/model/dc_ae/efficientvit/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/sana/sana_600M/packages/Sana/diffusion/model/dc_ae/efficientvit/models/__init__.py


--------------------------------------------------------------------------------
/sana/sana_600M/packages/Sana/diffusion/model/dc_ae/efficientvit/models/efficientvit/__init__.py:
--------------------------------------------------------------------------------
1 | from .dc_ae import *
2 | 


--------------------------------------------------------------------------------
/sana/sana_600M/packages/Sana/diffusion/model/dc_ae/efficientvit/models/nn/__init__.py:
--------------------------------------------------------------------------------
1 | from .act import *
2 | from .drop import *
3 | from .norm import *
4 | from .ops import *
5 | from .triton_rms_norm import *
6 | 


--------------------------------------------------------------------------------
/sana/sana_600M/packages/Sana/diffusion/model/dc_ae/efficientvit/models/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .list import *
2 | from .network import *
3 | from .random import *
4 | 


--------------------------------------------------------------------------------
/sana/sana_600M/packages/Sana/diffusion/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/sana/sana_600M/packages/Sana/diffusion/utils/__init__.py


--------------------------------------------------------------------------------
/sana/sana_600M/packages/Sana/sana/tools/__init__.py:
--------------------------------------------------------------------------------
1 | from .download import download_model
2 | from .hf_utils import hf_download_or_fpath
3 | 


--------------------------------------------------------------------------------
/sana/sana_600M/packages/Sana/scripts/style.css:
--------------------------------------------------------------------------------
 1 | /*.gradio-container{width:680px!important}*/
 2 | /* style.css */
 3 | .gradio_group, .gradio_row, .gradio_column {
 4 |     display: flex;
 5 |     flex-direction: row;
 6 |     justify-content: flex-start;
 7 |     align-items: flex-start;
 8 |     flex-wrap: wrap;
 9 | }
10 | 


--------------------------------------------------------------------------------
/sana/sana_600M/packages/Sana/tests/bash/entry.sh:
--------------------------------------------------------------------------------
1 | #/bin/bash
2 | set -e
3 | 
4 | for t in tests/bash/test_*.sh; do
5 |     echo "========================== Testing $t =================================="
6 |     bash $t;
7 | done
8 | 


--------------------------------------------------------------------------------
/sana/sana_600M/packages/Sana/tests/bash/test_inference.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | python scripts/inference.py \
 5 |     --config=configs/sana_config/1024ms/Sana_600M_img1024.yaml \
 6 |     --model_path=hf://Efficient-Large-Model/Sana_600M_1024px/checkpoints/Sana_600M_1024px_MultiLing.pth
 7 | 
 8 | 
 9 | python scripts/inference.py \
10 |     --config=configs/sana_config/1024ms/Sana_1600M_img1024.yaml \
11 |     --model_path=hf://Efficient-Large-Model/Sana_1600M_1024px/checkpoints/Sana_1600M_1024px.pth
12 | 


--------------------------------------------------------------------------------
/sana/sana_600M/packages/Sana/tests/bash/test_training_1epoch.sh:
--------------------------------------------------------------------------------
 1 | #/bin/bash
 2 | set -e
 3 | 
 4 | mkdir -p data/data_public
 5 | huggingface-cli download  Efficient-Large-Model/sana_data_public --repo-type dataset --local-dir ./data/data_public --local-dir-use-symlinks False
 6 | 
 7 | bash train_scripts/train.sh configs/sana_config/512ms/ci_Sana_600M_img512.yaml --data.load_vae_feat=true
 8 | 
 9 | bash train_scripts/train.sh configs/sana_config/512ms/ci_Sana_600M_img512.yaml --data.data_dir="[asset/example_data]" --data.type=SanaImgDataset --model.multi_scale=false
10 | 


--------------------------------------------------------------------------------
/sana/sana_600M/packages/Sana/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/sana/sana_600M/packages/Sana/tools/__init__.py


--------------------------------------------------------------------------------
/sana/sana_600M/packages/Sana/tools/metrics/clip-score/src/clip_score/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.1.1"
2 | 


--------------------------------------------------------------------------------
/sana/sana_600M/packages/Sana/tools/metrics/clip-score/src/clip_score/__main__.py:
--------------------------------------------------------------------------------
1 | import clip_score.clip_score
2 | 
3 | clip_score.clip_score.main()
4 | 


--------------------------------------------------------------------------------
/sana/sana_600M/packages/Sana/tools/metrics/geneval/evaluation/download_models.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Download Mask2Former object detection config and weights
 4 | 
 5 | if [ ! -z "$1" ]
 6 | then
 7 |     mkdir -p "$1"
 8 |     echo "Downloading mask2former for GenEval"
 9 |     wget https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco_20220504_001756-743b7d99.pth -O "$1/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco.pth"
10 | fi
11 | 


--------------------------------------------------------------------------------
/sana/sana_600M/packages/Sana/tools/metrics/geneval/images/geneval_figure_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/sana/sana_600M/packages/Sana/tools/metrics/geneval/images/geneval_figure_1.png


--------------------------------------------------------------------------------
/sana/sana_600M/packages/Sana/tools/metrics/pytorch-fid/noxfile.py:
--------------------------------------------------------------------------------
 1 | import nox
 2 | 
 3 | LOCATIONS = ("src/", "tests/", "noxfile.py", "setup.py")
 4 | 
 5 | 
 6 | @nox.session
 7 | def lint(session):
 8 |     session.install("flake8")
 9 |     session.install("flake8-bugbear")
10 |     session.install("flake8-isort")
11 | 
12 |     args = session.posargs or LOCATIONS
13 |     session.run("flake8", *args)
14 | 
15 | 
16 | @nox.session
17 | def tests(session):
18 |     session.install(".")
19 |     session.install("pytest")
20 |     session.install("pytest-mock")
21 |     session.run("pytest", *session.posargs)
22 | 


--------------------------------------------------------------------------------
/sana/sana_600M/packages/Sana/tools/metrics/pytorch-fid/setup.cfg:
--------------------------------------------------------------------------------
1 | [flake8]
2 | select=F,W,E,I,B,B9
3 | ignore=W503,B950
4 | max-line-length=79
5 | 
6 | [isort]
7 | multi_line_output=1
8 | line_length=79
9 | 


--------------------------------------------------------------------------------
/sana/sana_600M/packages/Sana/tools/metrics/pytorch-fid/src/pytorch_fid/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.3.0"
2 | 


--------------------------------------------------------------------------------
/sana/sana_600M/packages/Sana/tools/metrics/pytorch-fid/src/pytorch_fid/__main__.py:
--------------------------------------------------------------------------------
1 | import pytorch_fid.fid_score
2 | 
3 | pytorch_fid.fid_score.main()
4 | 


--------------------------------------------------------------------------------
/sana/sana_600M/packages/Sana/train_scripts/train.sh:
--------------------------------------------------------------------------------
 1 | #/bin/bash
 2 | set -e
 3 | 
 4 | work_dir=output/debug
 5 | np=8
 6 | 
 7 | 
 8 | if [[ $1 == *.yaml ]]; then
 9 |     config=$1
10 |     shift
11 | else
12 |     config="configs/sana_config/512ms/sample_dataset.yaml"
13 |     echo "Only support .yaml files, but get $1. Set to --config_path=$config"
14 | fi
15 | 
16 | TRITON_PRINT_AUTOTUNING=1 \
17 |     torchrun --nproc_per_node=$np --master_port=15432 \
18 |         train_scripts/train.py \
19 |         --config_path=$config \
20 |         --work_dir=$work_dir \
21 |         --name=tmp \
22 |         --resume_from=latest \
23 |         --report_to=tensorboard \
24 |         --debug=true \
25 |         "$@"
26 | 


--------------------------------------------------------------------------------
/segment-anything/README.md:
--------------------------------------------------------------------------------
 1 | # Segment Anything Model
 2 | 
 3 | This is an example deploying Segment Anything Model (SAM) with truss weights preloaded
 4 | 
 5 | ## Deploy to Baseten
 6 | To deploy the model, run the following from the root of the directory
 7 | 
 8 | ```
 9 | truss push --publish
10 | ```
11 | 
12 | ## Predict
13 | Example prediction:
14 | 
15 | ```
16 | truss predict --published -d '{"image_url": "https://as2.ftcdn.net/v2/jpg/00/66/26/87/1000_F_66268784_jccdcfdpf2vmq5X8raYA8JQT0sziZ1H9.jpg"}'
17 | ```
18 | 


--------------------------------------------------------------------------------
/segment-anything/config.yaml:
--------------------------------------------------------------------------------
 1 | environment_variables: {}
 2 | external_data:
 3 | - local_data_path: sam_vit_h_4b8939.pth
 4 |   url: https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth
 5 | external_package_dirs: []
 6 | model_metadata:
 7 |   example_model_input:
 8 |     image_url: https://as2.ftcdn.net/v2/jpg/00/66/26/87/1000_F_66268784_jccdcfdpf2vmq5X8raYA8JQT0sziZ1H9.jpg
 9 | model_name: Segment Anything
10 | python_version: py310
11 | requirements:
12 | - git+https://github.com/facebookresearch/segment-anything.git@6fdee8f2727f4506cfbbe553e23b895e27956588
13 | - opencv-python==4.8.1.78
14 | - torch==2.1.0
15 | - torchvision==0.16.0
16 | - pycocotools==2.0.7
17 | resources:
18 |   accelerator: A10G
19 |   cpu: 1000m
20 |   memory: 10Gi
21 |   use_gpu: true
22 | secrets: {}
23 | system_packages:
24 | - python3-opencv
25 | 


--------------------------------------------------------------------------------
/segment-anything/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/segment-anything/model/__init__.py


--------------------------------------------------------------------------------
/sesame-csm-1b/config.yaml:
--------------------------------------------------------------------------------
 1 | model_name: sesame-csm-1b
 2 | python_version: py310
 3 | model_metadata:
 4 |   example_model_input:
 5 |     text: "Hello from Sesame."
 6 |     speaker: 0
 7 | requirements:
 8 |   - torch==2.4.0
 9 |   - torchaudio==2.4.0
10 |   - tokenizers==0.21.0
11 |   - transformers==4.49.0
12 |   - huggingface_hub==0.28.1
13 |   - moshi==0.2.2
14 |   - torchtune==0.4.0
15 |   - torchao==0.9.0
16 |   - silentcipher @ git+https://github.com/SesameAILabs/silentcipher@master
17 |   - ffmpeg
18 |   - git+https://github.com/veerbia/csm.git
19 | resources:
20 |   accelerator: T4
21 |   cpu: '1'
22 |   memory: 10Gi
23 |   use_gpu: true
24 | secrets:
25 |   hf_access_token: null
26 | system_packages: []
27 | environment_variables: {}
28 | external_package_dirs: []
29 | 


--------------------------------------------------------------------------------
/sesame-csm-1b/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/sesame-csm-1b/model/__init__.py


--------------------------------------------------------------------------------
/stable-diffusion/dreamshaper-lcm/README.md:
--------------------------------------------------------------------------------
1 | # Dreamshaper Latent Consistency Model
2 | 
3 | A Truss for [Dreamshaper LCM](https://huggingface.co/spaces/SimianLuo/Latent_Consistency_Model), a distillation of Dreamshaper (a Stable Diffusion 1.5 fine-tune), that can achieve similar quality in ~1-8 steps. Generate high quality 768 x 768 images in under a second.
4 | 


--------------------------------------------------------------------------------
/stable-diffusion/dreamshaper-lcm/config.yaml:
--------------------------------------------------------------------------------
 1 | environment_variables: {}
 2 | external_package_dirs: []
 3 | model_name: Dreamshaper Latent Consistency Model
 4 | python_version: py311
 5 | requirements:
 6 | - diffusers=0.21.4
 7 | - transformers=4.34.1
 8 | - accelerate=0.23.0
 9 | - torch=2.1.0
10 | resources:
11 |   accelerator: A10G
12 |   cpu: '1'
13 |   memory: 2Gi
14 |   use_gpu: true
15 | secrets: {}
16 | system_packages: []
17 | 


--------------------------------------------------------------------------------
/stable-diffusion/dreamshaper-lcm/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/dreamshaper-lcm/model/__init__.py


--------------------------------------------------------------------------------
/stable-diffusion/playground-v2-trt/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/playground-v2-trt/model/__init__.py


--------------------------------------------------------------------------------
/stable-diffusion/playground-v2-trt/show.py:
--------------------------------------------------------------------------------
 1 | """
 2 | truss predict -d '{"prompt": "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k"}' | python show.py
 3 | """
 4 | 
 5 | import base64
 6 | import json
 7 | import os
 8 | import sys
 9 | 
10 | resp = sys.stdin.read()
11 | image = json.loads(resp)["data"]
12 | img = base64.b64decode(image)
13 | 
14 | file_name = f"{image[-10:].replace('/', '')}.jpeg"
15 | img_file = open(file_name, "wb")
16 | img_file.write(img)
17 | img_file.close()
18 | os.system(f"open {file_name}")
19 | 


--------------------------------------------------------------------------------
/stable-diffusion/sd-textual-inversion/README.md:
--------------------------------------------------------------------------------
1 | # Textual Inversion with Stable Diffusion
2 | 
3 | The following example demonstrates how to use Stable Diffusion with
4 | textual inversion embeddings.
5 | 
6 | This truss combines concepts from:
7 | 1. [This colab](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/stable_conceptualizer_inference.ipynb#scrollTo=JkIeuLEfqi-g) which demonstrates how to load textual inversion embeddings from hugginface repos
8 | 2. [This diffusers issue](https://github.com/huggingface/diffusers/issues/3097#issuecomment-1516138396) which demonstrates how to load an embedding directly.
9 | 


--------------------------------------------------------------------------------
/stable-diffusion/sd-textual-inversion/config.yaml:
--------------------------------------------------------------------------------
 1 | environment_variables: {}
 2 | external_package_dirs: []
 3 | model_metadata:
 4 |   pretty_name: Stable Diffusion - Textual Inversion
 5 |   tags:
 6 |   - image-generation
 7 | model_name: SD_Textual_Inversion
 8 | python_version: py311
 9 | requirements:
10 | - diffusers==0.16.1
11 | - transformers
12 | - ftfy
13 | - accelerate
14 | - torch
15 | - pillow
16 | resources:
17 |   accelerator: T4
18 |   cpu: 500m
19 |   memory: 512Mi
20 |   use_gpu: true
21 | secrets: {}
22 | system_packages: []
23 | 


--------------------------------------------------------------------------------
/stable-diffusion/sd-textual-inversion/data/LulaCipher.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/sd-textual-inversion/data/LulaCipher.bin


--------------------------------------------------------------------------------
/stable-diffusion/sd-textual-inversion/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/sd-textual-inversion/model/__init__.py


--------------------------------------------------------------------------------
/stable-diffusion/sd-turbo/config.yaml:
--------------------------------------------------------------------------------
 1 | environment_variables: {}
 2 | external_package_dirs: []
 3 | model_cache:
 4 | - allow_patterns:
 5 |   - '*.json'
 6 |   - '*.fp16.safetensors'
 7 |   - '*.txt'
 8 |   repo_id: stabilityai/sdxl-turbo
 9 | model_metadata:
10 |   avatar_url: https://cdn.baseten.co/production/static/stability.png
11 |   cover_image_url: https://cdn.baseten.co/production/static/sd.png
12 |   example_model_input:
13 |     prompt: A tree in a field under the night sky
14 |   pretty_name: SD Turbo
15 |   tags:
16 |   - image-generation
17 | model_name: SD Turbo
18 | python_version: py311
19 | requirements:
20 | - torch==2.0.1
21 | - transformers==4.35.2
22 | - diffusers==0.23.1
23 | - accelerate==0.24.1
24 | resources:
25 |   accelerator: T4
26 |   use_gpu: true
27 | secrets: {}
28 | system_packages: []
29 | 


--------------------------------------------------------------------------------
/stable-diffusion/sd-turbo/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/sd-turbo/model/__init__.py


--------------------------------------------------------------------------------
/stable-diffusion/sdxl-controlnet-canny/baseten-logo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/sdxl-controlnet-canny/baseten-logo.gif


--------------------------------------------------------------------------------
/stable-diffusion/sdxl-controlnet-canny/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/sdxl-controlnet-canny/model/__init__.py


--------------------------------------------------------------------------------
/stable-diffusion/sdxl-controlnet-depth/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/sdxl-controlnet-depth/model/__init__.py


--------------------------------------------------------------------------------
/stable-diffusion/sdxl-controlnet/baseten-logo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/sdxl-controlnet/baseten-logo.gif


--------------------------------------------------------------------------------
/stable-diffusion/sdxl-controlnet/config.yaml:
--------------------------------------------------------------------------------
 1 | environment_variables: {}
 2 | external_package_dirs: []
 3 | model_metadata:
 4 |   avatar_url: https://cdn.baseten.co/production/static/stability.png
 5 |   cover_image_url: https://cdn.baseten.co/production/static/sd.png
 6 |   example_model_input:
 7 |     prompt: aerial view, a futuristic research complex in a bright foggy jungle, hard
 8 |       lighting
 9 |   pretty_name: Stable Diffusion ControlNet
10 |   tags:
11 |   - image-generation
12 | model_name: SDXL ControlNet
13 | python_version: py39
14 | requirements:
15 | - accelerate
16 | - transformers
17 | - safetensors
18 | - opencv-python
19 | - diffusers
20 | resources:
21 |   accelerator: A10G
22 |   cpu: 3500m
23 |   memory: 20Gi
24 |   use_gpu: true
25 | secrets: {}
26 | system_packages:
27 | - ffmpeg
28 | - libsm6
29 | - libxext6
30 | 


--------------------------------------------------------------------------------
/stable-diffusion/sdxl-controlnet/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/sdxl-controlnet/model/__init__.py


--------------------------------------------------------------------------------
/stable-diffusion/sdxl-lightning/config.yaml:
--------------------------------------------------------------------------------
 1 | environment_variables: {}
 2 | external_package_dirs: []
 3 | model_metadata:
 4 |   avatar_url: https://cdn.baseten.co/production/static/stability.png
 5 |   cover_image_url: https://cdn.baseten.co/production/static/sd.png
 6 |   example_model_input:
 7 |     prompt: A tree in a field under the night sky
 8 |   pretty_name: SDXL Lightning
 9 |   tags:
10 |   - image-generation
11 | model_name: SDXL Lightning
12 | python_version: py310
13 | requirements:
14 | - torch==2.0.1
15 | - transformers==4.35.2
16 | - diffusers==0.23.1
17 | - hf_transfer==0.1.4
18 | - xformers==0.0.22
19 | - accelerate==0.24.1
20 | resources:
21 |   accelerator: A100
22 |   use_gpu: true
23 | secrets: {}
24 | system_packages: []
25 | 


--------------------------------------------------------------------------------
/stable-diffusion/sdxl-lightning/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/sdxl-lightning/model/__init__.py


--------------------------------------------------------------------------------
/stable-diffusion/sdxl-lora-swapping/config.yaml:
--------------------------------------------------------------------------------
 1 | environment_variables: {}
 2 | external_package_dirs: []
 3 | model_metadata:
 4 |   example_model_input:
 5 |     lora:
 6 |       repo_id: nerijs/pixel-art-xl
 7 |       weights: pixel-art-xl.safetensors
 8 |     prompt: pixel art, an baby giraffe
 9 | model_name: Stable Diffusion XL with LoRA Swapping
10 | python_version: py311
11 | requirements:
12 | - accelerate==0.23.0
13 | - transformers==4.33.2
14 | - safetensors==0.3.3
15 | - opencv-python==4.8.0.76
16 | - diffusers==0.21.2
17 | resources:
18 |   accelerator: A100
19 |   cpu: 3500m
20 |   memory: 20Gi
21 |   use_gpu: true
22 | secrets: {}
23 | system_packages:
24 | - ffmpeg
25 | - libsm6
26 | - libxext6
27 | 


--------------------------------------------------------------------------------
/stable-diffusion/sdxl-lora-swapping/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/sdxl-lora-swapping/model/__init__.py


--------------------------------------------------------------------------------
/stable-diffusion/sdxl-lora/config.yaml:
--------------------------------------------------------------------------------
 1 | environment_variables: {}
 2 | external_package_dirs: []
 3 | model_name: Stable Diffusion XL with LoRA
 4 | python_version: py311
 5 | requirements:
 6 | - accelerate
 7 | - transformers
 8 | - safetensors
 9 | - opencv-python
10 | - diffusers
11 | resources:
12 |   accelerator: A10G
13 |   cpu: 3500m
14 |   memory: 20Gi
15 |   use_gpu: true
16 | secrets: {}
17 | system_packages:
18 | - ffmpeg
19 | - libsm6
20 | - libxext6
21 | 


--------------------------------------------------------------------------------
/stable-diffusion/sdxl-lora/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/sdxl-lora/model/__init__.py


--------------------------------------------------------------------------------
/stable-diffusion/sdxl-turbo/config.yaml:
--------------------------------------------------------------------------------
 1 | environment_variables: {}
 2 | external_package_dirs: []
 3 | model_cache:
 4 | - allow_patterns:
 5 |   - '*.json'
 6 |   - '*.fp16.safetensors'
 7 |   - '*.txt'
 8 |   repo_id: stabilityai/sdxl-turbo
 9 | model_metadata:
10 |   avatar_url: https://cdn.baseten.co/production/static/stability.png
11 |   cover_image_url: https://cdn.baseten.co/production/static/sd.png
12 |   example_model_input:
13 |     prompt: A tree in a field under the night sky
14 |   pretty_name: SDXL Turbo
15 |   tags:
16 |   - image-generation
17 | model_name: SDXL Turbo
18 | python_version: py310
19 | requirements:
20 | - torch==2.0.1
21 | - transformers==4.35.2
22 | - diffusers==0.23.1
23 | - hf_transfer==0.1.4
24 | - xformers==0.0.22
25 | - accelerate==0.24.1
26 | resources:
27 |   accelerator: T4
28 |   cpu: '3'
29 |   memory: 20Gi
30 |   use_gpu: true
31 | secrets: {}
32 | system_packages: []
33 | 


--------------------------------------------------------------------------------
/stable-diffusion/sdxl-turbo/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/sdxl-turbo/model/__init__.py


--------------------------------------------------------------------------------
/stable-diffusion/stable-diffusion-3-medium/config.yaml:
--------------------------------------------------------------------------------
 1 | environment_variables:
 2 |   HF_HUB_OFFLINE: 1
 3 | external_package_dirs: []
 4 | model_metadata: {}
 5 | model_cache:
 6 |   - repo_id: stabilityai/stable-diffusion-3-medium-diffusers
 7 | model_name: Stable Diffusion 3 Medium
 8 | python_version: py310
 9 | requirements:
10 |   - diffusers==0.29.0
11 |   - transformers
12 |   - accelerate
13 |   - sentencepiece
14 |   - protobuf
15 | resources:
16 |   accelerator: A100
17 |   use_gpu: true
18 | secrets:
19 |   hf_access_token: ""
20 | system_packages:
21 |   - ffmpeg
22 |   - libsm6
23 |   - libxext6
24 | 


--------------------------------------------------------------------------------
/stable-diffusion/stable-diffusion-3-medium/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/stable-diffusion-3-medium/model/__init__.py


--------------------------------------------------------------------------------
/stable-diffusion/stable-diffusion-inpainting-trt/config.yaml:
--------------------------------------------------------------------------------
 1 | environment_variables: {}
 2 | external_package_dirs: []
 3 | model_name: Stable Diffusion Inpainting TRT
 4 | python_version: py310
 5 | requirements: []
 6 | requirements_file: requirements.txt
 7 | resources:
 8 |   accelerator: A10G
 9 |   use_gpu: true
10 | secrets: {}
11 | system_packages: []
12 | 


--------------------------------------------------------------------------------
/stable-diffusion/stable-diffusion-inpainting-trt/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/stable-diffusion-inpainting-trt/model/__init__.py


--------------------------------------------------------------------------------
/stable-diffusion/stable-diffusion-inpainting-trt/requirements.txt:
--------------------------------------------------------------------------------
 1 | accelerate==0.26.1
 2 | colored==2.2.4
 3 | cuda-python==12.3.0
 4 | diffusers==0.14.0
 5 | ftfy==6.1.3
 6 | matplotlib==3.8.2
 7 | nvtx==0.2.8
 8 | onnx==1.13.1
 9 | onnxruntime==1.14.1
10 | --extra-index-url https://pypi.ngc.nvidia.com
11 | onnx-graphsurgeon==0.3.26
12 | polygraphy==0.47.1
13 | scipy==1.12.0
14 | torch==2.2.0
15 | tensorrt==8.6.1.post1
16 | transformers==4.26.1
17 | 


--------------------------------------------------------------------------------
/stable-diffusion/stable-diffusion-xl-1.0-trt-h100/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/stable-diffusion-xl-1.0-trt-h100/model/__init__.py


--------------------------------------------------------------------------------
/stable-diffusion/stable-diffusion-xl-1.0-trt-h100/show.py:
--------------------------------------------------------------------------------
 1 | """
 2 | truss predict -d '{"prompt": "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k"}' | python show.py
 3 | """
 4 | 
 5 | import base64
 6 | import json
 7 | import os
 8 | import sys
 9 | 
10 | resp = sys.stdin.read()
11 | image = json.loads(resp)["data"]
12 | img = base64.b64decode(image)
13 | 
14 | file_name = f"{image[-10:].replace('/', '')}.jpeg"
15 | img_file = open(file_name, "wb")
16 | img_file.write(img)
17 | img_file.close()
18 | os.system(f"open {file_name}")
19 | 


--------------------------------------------------------------------------------
/stable-diffusion/stable-diffusion-xl-1.0-trt/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/stable-diffusion-xl-1.0-trt/model/__init__.py


--------------------------------------------------------------------------------
/stable-diffusion/stable-diffusion-xl-1.0-trt/show.py:
--------------------------------------------------------------------------------
 1 | """
 2 | truss predict -d '{"prompt": "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k"}' | python show.py
 3 | """
 4 | 
 5 | import base64
 6 | import json
 7 | import os
 8 | import sys
 9 | 
10 | resp = sys.stdin.read()
11 | image = json.loads(resp)["data"]
12 | img = base64.b64decode(image)
13 | 
14 | file_name = f"{image[-10:].replace('/', '')}.jpeg"
15 | img_file = open(file_name, "wb")
16 | img_file.write(img)
17 | img_file.close()
18 | os.system(f"open {file_name}")
19 | 


--------------------------------------------------------------------------------
/stable-diffusion/stable-diffusion-xl-1.0/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/stable-diffusion-xl-1.0/model/__init__.py


--------------------------------------------------------------------------------
/stable-diffusion/stable-diffusion-xl-1.0/show.py:
--------------------------------------------------------------------------------
 1 | """
 2 | truss predict -d '{"prompt": "A heavily constructed solarpunk bridge over a canyon at sunset"}' | python show.py
 3 | """
 4 | 
 5 | import base64
 6 | import json
 7 | import os
 8 | import sys
 9 | 
10 | resp = sys.stdin.read()
11 | image = json.loads(resp)["data"]
12 | img = base64.b64decode(image)
13 | 
14 | file_name = f"{image[-10:].replace('/', '')}.jpeg"
15 | img_file = open(file_name, "wb")
16 | img_file.write(img)
17 | img_file.close()
18 | os.system(f"open {file_name}")
19 | 


--------------------------------------------------------------------------------
/stable-diffusion/stable-diffusion/data/model_index.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "_class_name": "StableDiffusionPipeline",
 3 |   "_diffusers_version": "0.15.1",
 4 |   "feature_extractor": [
 5 |     null,
 6 |     null
 7 |   ],
 8 |   "requires_safety_checker": false,
 9 |   "safety_checker": [
10 |     null,
11 |     null
12 |   ],
13 |   "scheduler": [
14 |     "diffusers",
15 |     "PNDMScheduler"
16 |   ],
17 |   "text_encoder": [
18 |     "transformers",
19 |     "CLIPTextModel"
20 |   ],
21 |   "tokenizer": [
22 |     "transformers",
23 |     "CLIPTokenizer"
24 |   ],
25 |   "unet": [
26 |     "diffusers",
27 |     "UNet2DConditionModel"
28 |   ],
29 |   "vae": [
30 |     "diffusers",
31 |     "AutoencoderKL"
32 |   ]
33 | }
34 | 


--------------------------------------------------------------------------------
/stable-diffusion/stable-diffusion/data/scheduler/scheduler_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "_class_name": "PNDMScheduler",
 3 |   "_diffusers_version": "0.15.1",
 4 |   "beta_end": 0.012,
 5 |   "beta_schedule": "scaled_linear",
 6 |   "beta_start": 0.00085,
 7 |   "clip_sample": false,
 8 |   "num_train_timesteps": 1000,
 9 |   "prediction_type": "epsilon",
10 |   "set_alpha_to_one": false,
11 |   "skip_prk_steps": true,
12 |   "steps_offset": 1,
13 |   "trained_betas": null
14 | }
15 | 


--------------------------------------------------------------------------------
/stable-diffusion/stable-diffusion/data/text_encoder/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "_name_or_path": "/root/.cache/huggingface/hub/models--stabilityai--stable-diffusion-2-1-base/snapshots/1f758383196d38df1dfe523ddb1030f2bfab7741/text_encoder",
 3 |   "architectures": [
 4 |     "CLIPTextModel"
 5 |   ],
 6 |   "attention_dropout": 0.0,
 7 |   "bos_token_id": 0,
 8 |   "dropout": 0.0,
 9 |   "eos_token_id": 2,
10 |   "hidden_act": "gelu",
11 |   "hidden_size": 1024,
12 |   "initializer_factor": 1.0,
13 |   "initializer_range": 0.02,
14 |   "intermediate_size": 4096,
15 |   "layer_norm_eps": 1e-05,
16 |   "max_position_embeddings": 77,
17 |   "model_type": "clip_text_model",
18 |   "num_attention_heads": 16,
19 |   "num_hidden_layers": 23,
20 |   "pad_token_id": 1,
21 |   "projection_dim": 512,
22 |   "torch_dtype": "float16",
23 |   "transformers_version": "4.28.1",
24 |   "vocab_size": 49408
25 | }
26 | 


--------------------------------------------------------------------------------
/stable-diffusion/stable-diffusion/data/tokenizer/special_tokens_map.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "bos_token": {
 3 |     "content": "<|startoftext|>",
 4 |     "lstrip": false,
 5 |     "normalized": true,
 6 |     "rstrip": false,
 7 |     "single_word": false
 8 |   },
 9 |   "eos_token": {
10 |     "content": "<|endoftext|>",
11 |     "lstrip": false,
12 |     "normalized": true,
13 |     "rstrip": false,
14 |     "single_word": false
15 |   },
16 |   "pad_token": "!",
17 |   "unk_token": {
18 |     "content": "<|endoftext|>",
19 |     "lstrip": false,
20 |     "normalized": true,
21 |     "rstrip": false,
22 |     "single_word": false
23 |   }
24 | }
25 | 


--------------------------------------------------------------------------------
/stable-diffusion/stable-diffusion/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/stable-diffusion/model/__init__.py


--------------------------------------------------------------------------------
/stable-diffusion/stable-diffusion/show.py:
--------------------------------------------------------------------------------
 1 | """
 2 | truss predict -d '{"prompt": "A heavily constructed solarpunk bridge over a canyon at sunset"}' | python show.py
 3 | """
 4 | 
 5 | import base64
 6 | import json
 7 | import os
 8 | import sys
 9 | 
10 | resp = sys.stdin.read()
11 | image = json.loads(resp)["data"]
12 | img = base64.b64decode(image)
13 | 
14 | file_name = f"{image[-10:].replace('/', '')}.jpeg"
15 | img_file = open(file_name, "wb")
16 | img_file.write(img)
17 | img_file.close()
18 | os.system(f"open {file_name}")
19 | 


--------------------------------------------------------------------------------
/stable-diffusion/stable-video-diffusion/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/stable-video-diffusion/model/__init__.py


--------------------------------------------------------------------------------
/stable-diffusion/stable-video-diffusion/model/scripts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/stable-video-diffusion/model/scripts/__init__.py


--------------------------------------------------------------------------------
/stable-diffusion/stable-video-diffusion/model/scripts/util/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/stable-video-diffusion/model/scripts/util/__init__.py


--------------------------------------------------------------------------------
/stable-diffusion/stable-video-diffusion/model/scripts/util/detection/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/stable-video-diffusion/model/scripts/util/detection/__init__.py


--------------------------------------------------------------------------------
/stable-diffusion/stable-video-diffusion/model/scripts/util/detection/p_head_v1.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/stable-video-diffusion/model/scripts/util/detection/p_head_v1.npz


--------------------------------------------------------------------------------
/stable-diffusion/stable-video-diffusion/model/scripts/util/detection/w_head_v1.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/stable-video-diffusion/model/scripts/util/detection/w_head_v1.npz


--------------------------------------------------------------------------------
/stable-diffusion/stable-video-diffusion/sample_images/cheetah.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/stable-video-diffusion/sample_images/cheetah.jpeg


--------------------------------------------------------------------------------
/stable-diffusion/stable-video-diffusion/sample_images/racecar.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/stable-video-diffusion/sample_images/racecar.jpeg


--------------------------------------------------------------------------------
/templates/README.md:
--------------------------------------------------------------------------------
1 | # Truss templates for different model backends
2 | `generate.yaml` contains configurations of different models with different backends / engines. `generate.py` generates described models by copying content of the template and overriding config with provided values.
3 | 
4 | `generate.py` accepts following arguments:
5 | - `--only_check` if passed files aren't getting generated, fails if currently existing files are different from suppose to be generated ones
6 | - `--root` path to root of `truss-examples`, models are being generated under this path
7 | - `--templates` path to templates, generator reads `based_on` models from it
8 | - `--config` path to generation config
9 | 


--------------------------------------------------------------------------------
/templates/faster-whisper-truss/config.yaml:
--------------------------------------------------------------------------------
 1 | base_image:
 2 |   image: baseten/truss-server-base:3.10-gpu-v0.4.9
 3 |   python_executable_path: /usr/bin/python3
 4 | environment_variables: {}
 5 | external_package_dirs: []
 6 | model_metadata:
 7 |   avatar_url: https://cdn.baseten.co/production/static/openai.png
 8 |   cover_image_url: https://cdn.baseten.co/production/static/whisper.png
 9 |   example_model_input:
10 |     url: https://cdn.baseten.co/docs/production/Gettysburg.mp3
11 |   model_id: large-v2
12 |   pretty_name: Whisper
13 |   tags:
14 |   - speech-recognition
15 | model_name: Faster Whisper
16 | python_version: py39
17 | requirements:
18 | - faster-whisper==0.10.0
19 | resources:
20 |   accelerator: T4
21 |   cpu: 500m
22 |   memory: 512Mi
23 |   use_gpu: true
24 | secrets: {}
25 | system_packages: []
26 | 


--------------------------------------------------------------------------------
/templates/faster-whisper-truss/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/templates/faster-whisper-truss/model/__init__.py


--------------------------------------------------------------------------------
/templates/transformers-openai-compatible/config.yaml:
--------------------------------------------------------------------------------
 1 | environment_variables: {}
 2 | external_package_dirs: []
 3 | model_metadata:
 4 |   tags:
 5 |   - text-generation
 6 |   - openai-compatible
 7 | python_version: py311
 8 | requirements:
 9 | - sentencepiece
10 | - accelerate
11 | - transformers==4.34.0
12 | - torch==2.0.1
13 | - hf_transfer==0.1.4
14 | resources:
15 |   accelerator: A10G
16 |   memory: 25Gi
17 |   use_gpu: true
18 | secrets: {}
19 | system_packages: []
20 | 


--------------------------------------------------------------------------------
/templates/transformers-openai-compatible/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/templates/transformers-openai-compatible/model/__init__.py


--------------------------------------------------------------------------------
/templates/trt-llm/config.yaml:
--------------------------------------------------------------------------------
 1 | base_image:
 2 |   image: nvcr.io/nvidia/tritonserver:23.10-trtllm-python-py3
 3 |   python_executable_path: /usr/bin/python3
 4 | environment_variables: {}
 5 | external_package_dirs: []
 6 | model_metadata:
 7 |   example_model_input: {}
 8 |   tags:
 9 |   - text-generation
10 |   tensor_parallelism: 1
11 | python_version: py311
12 | requirements:
13 | - tritonclient[all]
14 | resources:
15 |   accelerator: A100
16 |   use_gpu: true
17 | runtime:
18 |   predict_concurrency: 256
19 | secrets: {}
20 | system_packages: []
21 | 


--------------------------------------------------------------------------------
/templates/trt-llm/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/templates/trt-llm/model/__init__.py


--------------------------------------------------------------------------------
/trt-llm-engine-builder-templates/llama-3_1-70b-instruct/high_throughput/config.yaml:
--------------------------------------------------------------------------------
 1 | build_commands: []
 2 | environment_variables: {}
 3 | external_package_dirs: []
 4 | model_metadata: {}
 5 | model_name: trtllm-llama3_1-70b-instruct-high_throughput
 6 | python_version: py311
 7 | requirements: []
 8 | resources:
 9 |   accelerator: H100:2
10 |   cpu: '1'
11 |   memory: 24Gi
12 |   use_gpu: true
13 | secrets:
14 |   hf_access_token: set token in baseten workspace
15 | system_packages: []
16 | trt_llm:
17 |   build:
18 |     base_model: llama
19 |     checkpoint_repository:
20 |       repo: meta-llama/Llama-3.1-70B-Instruct
21 |       source: HF
22 |     max_batch_size: 16
23 |     max_beam_width: 1
24 |     max_seq_len: 4096
25 |     num_builder_gpus: 4 # Need 4 builder GPUs for fp8
26 |     quantization_type: fp8_kv
27 |     tensor_parallel_count: 2
28 | 


--------------------------------------------------------------------------------
/trt-llm-engine-builder-templates/llama-3_1-70b-instruct/high_throughput/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/trt-llm-engine-builder-templates/llama-3_1-70b-instruct/high_throughput/model/__init__.py


--------------------------------------------------------------------------------
/trt-llm-engine-builder-templates/llama-3_1-70b-instruct/large_context/config.yaml:
--------------------------------------------------------------------------------
 1 | build_commands: []
 2 | environment_variables: {}
 3 | external_package_dirs: []
 4 | model_metadata: {}
 5 | model_name: trtllm-llama3.1_70b-instruct-large_context
 6 | python_version: py311
 7 | requirements: []
 8 | resources:
 9 |   accelerator: H100:2
10 |   cpu: "1"
11 |   memory: 24Gi
12 |   use_gpu: true
13 | secrets:
14 |   hf_access_token: set token in baseten workspace
15 | system_packages: []
16 | trt_llm:
17 |   build:
18 |     base_model: llama
19 |     checkpoint_repository:
20 |       repo: meta-llama/Meta-Llama-3-70B-Instruct
21 |       source: HF
22 |     max_batch_size: 8
23 |     max_beam_width: 1
24 |     max_seq_len: 8192
25 |     num_builder_gpus: 4
26 |     quantization_type: no_quant
27 |     tensor_parallel_count: 2
28 | 


--------------------------------------------------------------------------------
/trt-llm-engine-builder-templates/llama-3_1-70b-instruct/large_context/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/trt-llm-engine-builder-templates/llama-3_1-70b-instruct/large_context/model/__init__.py


--------------------------------------------------------------------------------
/trt-llm-engine-builder-templates/llama-3_1-70b-instruct/low_ttft/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/trt-llm-engine-builder-templates/llama-3_1-70b-instruct/low_ttft/model/__init__.py


--------------------------------------------------------------------------------
/trt-llm-engine-builder-templates/llama-3_1-8b-instruct/high_throughput/config.yaml:
--------------------------------------------------------------------------------
 1 | build_commands: []
 2 | environment_variables: {}
 3 | external_package_dirs: []
 4 | model_metadata: {}
 5 | model_name: trtllm-llama-3_1-8b-instruct-high_throughput
 6 | python_version: py311
 7 | requirements: []
 8 | resources:
 9 |   accelerator: H100
10 |   cpu: "1"
11 |   memory: 24Gi
12 |   use_gpu: true
13 | secrets:
14 |   hf_access_token: set token in baseten workspace
15 | system_packages: []
16 | trt_llm:
17 |   build:
18 |     base_model: llama
19 |     checkpoint_repository:
20 |       repo: meta-llama/Llama-3.1-8B-Instruct
21 |       source: HF
22 |     max_batch_size: 32
23 |     quantization_type: fp8_kv
24 |     max_beam_width: 1
25 |     max_seq_len: 4096
26 |     num_builder_gpus: 1
27 | 


--------------------------------------------------------------------------------
/trt-llm-engine-builder-templates/llama-3_1-8b-instruct/high_throughput/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/trt-llm-engine-builder-templates/llama-3_1-8b-instruct/high_throughput/model/__init__.py


--------------------------------------------------------------------------------
/trt-llm-engine-builder-templates/llama-3_1-8b-instruct/large_context/config.yaml:
--------------------------------------------------------------------------------
 1 | build_commands: []
 2 | environment_variables: {}
 3 | external_package_dirs: []
 4 | model_metadata: {}
 5 | model_name: trtllm-llama3.1_8b-instruct-large_context
 6 | python_version: py311
 7 | requirements: []
 8 | resources:
 9 |   accelerator: H100
10 |   cpu: "1"
11 |   memory: 24Gi
12 |   use_gpu: true
13 | secrets:
14 |   hf_access_token: set token in baseten workspace
15 | system_packages: []
16 | trt_llm:
17 |   build:
18 |     base_model: llama
19 |     checkpoint_repository:
20 |       repo: meta-llama/Llama-3.1-8B-Instruct
21 |       source: HF
22 |     max_batch_size: 16
23 |     max_beam_width: 1
24 |     max_seq_len: 8192
25 |     quantization_type: no_quant
26 |     tensor_parallel_count: 1
27 |     num_builder_gpus: 1
28 | 


--------------------------------------------------------------------------------
/trt-llm-engine-builder-templates/llama-3_1-8b-instruct/large_context/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/trt-llm-engine-builder-templates/llama-3_1-8b-instruct/large_context/model/__init__.py


--------------------------------------------------------------------------------
/trt-llm-engine-builder-templates/llama-3_1-8b-instruct/low_ttft/config.yaml:
--------------------------------------------------------------------------------
 1 | build_commands: []
 2 | environment_variables: {}
 3 | external_package_dirs: []
 4 | model_metadata: {}
 5 | model_name: trtllm-llama3_1_8b-instruct-low_ttft
 6 | python_version: py311
 7 | requirements: []
 8 | resources:
 9 |   accelerator: H100
10 |   cpu: "1"
11 |   memory: 24Gi
12 |   use_gpu: true
13 | secrets:
14 |   hf_access_token: set token in baseten workspace
15 | system_packages: []
16 | trt_llm:
17 |   build:
18 |     base_model: llama
19 |     checkpoint_repository:
20 |       repo: meta-llama/Llama-3.1-8B-Instruct
21 |       source: HF
22 |     max_batch_size: 8
23 |     max_beam_width: 1
24 |     max_seq_len: 4096
25 |     num_builder_gpus: 1
26 |     quantization_type: fp8_kv
27 |     tensor_parallel_count: 1
28 |   plugin_configuration:
29 |     use_paged_context_fmha: True
30 |     use_fp8_context_fmha: True
31 | 


--------------------------------------------------------------------------------
/trt-llm-engine-builder-templates/llama-3_1-8b-instruct/low_ttft/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/trt-llm-engine-builder-templates/llama-3_1-8b-instruct/low_ttft/model/__init__.py


--------------------------------------------------------------------------------
/ultravox/config.yaml:
--------------------------------------------------------------------------------
 1 | base_image:
 2 |   image: vshulman/vllm-openai-fixie:latest
 3 |   python_executable_path: /usr/bin/python3
 4 | model_metadata:
 5 |   arguments:
 6 |     model: fixie-ai/ultravox-v0.2
 7 |     audio_token_id: 128002
 8 | environment_variables: {}
 9 | external_package_dirs: []
10 | model_name: Ultravox v0.2
11 | python_version: py310
12 | runtime:
13 |   predict_concurrency: 512
14 | requirements:
15 |   - httpx
16 | resources:
17 |   accelerator: A100
18 |   use_gpu: true
19 | secrets: {}
20 | system_packages:
21 | - python3.10-venv
22 | 


--------------------------------------------------------------------------------
/ultravox/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/ultravox/model/__init__.py


--------------------------------------------------------------------------------
/vllm/config.yaml:
--------------------------------------------------------------------------------
 1 | model_name: "Llama 3.1 8B Instruct VLLM openai compatible"
 2 | python_version: py311
 3 | model_metadata:
 4 |   example_model_input: {"prompt": "what is the meaning of life"}
 5 |   repo_id: meta-llama/Llama-3.1-8B-Instruct
 6 |   openai_compatible: true
 7 |   vllm_config:
 8 |     tensor_parallel_size: 1
 9 |     max_model_len: 4096
10 |     enable_prefix_caching: true
11 | requirements:
12 |   - vllm==0.5.4
13 | resources:
14 |   accelerator: A100
15 |   use_gpu: true
16 | runtime:
17 |   predict_concurrency: 128
18 | secrets:
19 |   hf_access_token: null
20 | 


--------------------------------------------------------------------------------
/vllm/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/vllm/model/__init__.py


--------------------------------------------------------------------------------
/whisper/faster-whisper-small/config.yaml:
--------------------------------------------------------------------------------
 1 | description: A small speech-to-text model for multi-lingual audio transcription.
 2 | model_cache:
 3 | - repo_id: Systran/faster-whisper-small
 4 | model_metadata:
 5 |   avatar_url: https://cdn.baseten.co/production/static/openai.png
 6 |   cover_image_url: https://cdn.baseten.co/production/static/whisper.png
 7 |   example_model_input:
 8 |     url: https://cdn.baseten.co/docs/production/Gettysburg.mp3
 9 |   model_id: small
10 |   pretty_name: Whisper
11 |   tags:
12 |   - speech-recognition
13 | model_name: Faster Whisper Small
14 | python_version: py39
15 | requirements:
16 | - torch==2.1.0
17 | - faster-whisper==1.0.3
18 | resources:
19 |   accelerator: T4
20 |   use_gpu: true
21 | 


--------------------------------------------------------------------------------
/whisper/faster-whisper-small/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/whisper/faster-whisper-small/model/__init__.py


--------------------------------------------------------------------------------
/whisper/faster-whisper-v2/config.yaml:
--------------------------------------------------------------------------------
 1 | description: Faster Whisper v2
 2 | environment_variables: {}
 3 | external_package_dirs: []
 4 | model_cache:
 5 | - repo_id: Systran/faster-whisper-large-v2
 6 | model_metadata:
 7 |   avatar_url: https://cdn.baseten.co/production/static/openai.png
 8 |   cover_image_url: https://cdn.baseten.co/production/static/whisper.png
 9 |   example_model_input:
10 |     url: https://cdn.baseten.co/docs/production/Gettysburg.mp3
11 |   model_id: large-v2
12 |   pretty_name: Whisper
13 |   tags:
14 |   - speech-recognition
15 | model_name: Faster Whisper v2
16 | python_version: py39
17 | requirements:
18 | - torch==2.1.1
19 | - faster-whisper==1.0.3
20 | - ctranslate2==4.4.0
21 | - numpy==1.26.4
22 | resources:
23 |   accelerator: A10G
24 |   cpu: 500m
25 |   memory: 512Mi
26 |   use_gpu: true
27 | secrets: {}
28 | system_packages: []
29 | 


--------------------------------------------------------------------------------
/whisper/faster-whisper-v2/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/whisper/faster-whisper-v2/model/__init__.py


--------------------------------------------------------------------------------
/whisper/faster-whisper-v3/config.yaml:
--------------------------------------------------------------------------------
 1 | description: Faster Whisper v3
 2 | environment_variables: {}
 3 | external_package_dirs: []
 4 | model_cache:
 5 | - repo_id: Systran/faster-whisper-large-v3
 6 | model_metadata:
 7 |   avatar_url: https://cdn.baseten.co/production/static/openai.png
 8 |   cover_image_url: https://cdn.baseten.co/production/static/whisper.png
 9 |   example_model_input:
10 |     url: https://cdn.baseten.co/docs/production/Gettysburg.mp3
11 |   model_id: large-v3
12 |   pretty_name: Whisper
13 |   tags:
14 |   - speech-recognition
15 | model_name: Faster Whisper v3
16 | python_version: py39
17 | requirements:
18 | - torch==2.1.1
19 | - faster-whisper==1.0.3
20 | - ctranslate2==4.4.0
21 | - numpy==1.26.4
22 | resources:
23 |   accelerator: A10G
24 |   cpu: 500m
25 |   memory: 512Mi
26 |   use_gpu: true
27 | secrets: {}
28 | system_packages: []
29 | 


--------------------------------------------------------------------------------
/whisper/faster-whisper-v3/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/whisper/faster-whisper-v3/model/__init__.py


--------------------------------------------------------------------------------
/whisper/whisper-streaming/config.yaml:
--------------------------------------------------------------------------------
 1 | base_image:
 2 |   image: baseten/truss-server-base:3.10-gpu-v0.4.9
 3 |   python_executable_path: /usr/bin/python3
 4 | environment_variables: {}
 5 | external_package_dirs: []
 6 | model_metadata:
 7 |   whisper_model: medium
 8 | model_name: Whisper Streaming
 9 | python_version: py310
10 | requirements: []
11 | requirements_file: ./requirements.txt
12 | resources:
13 |   accelerator: T4
14 |   use_gpu: true
15 | secrets: {}
16 | system_packages:
17 | - ffmpeg
18 | 


--------------------------------------------------------------------------------
/whisper/whisper-streaming/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/whisper/whisper-streaming/model/__init__.py


--------------------------------------------------------------------------------
/whisper/whisper-streaming/requirements.txt:
--------------------------------------------------------------------------------
1 | torch==2.1.0
2 | faster-whisper==0.10.0
3 | librosa==0.10.1
4 | opus-fast-mosestokenizer==0.0.8.5
5 | 


--------------------------------------------------------------------------------
/whisper/whisper-torchserve/config.yaml:
--------------------------------------------------------------------------------
 1 | environment_variables: {}
 2 | external_package_dirs: []
 3 | model_metadata: {}
 4 | model_name: Whisper Torchserve
 5 | python_version: py310
 6 | requirements:
 7 |   - torch==2.1.0
 8 |   - torchserve==0.9.0
 9 |   - ffmpeg-python==0.2.0
10 |   - transformers==4.37.2
11 |   - nvgpu==0.10.0
12 |   - httpx==0.27.0
13 | resources:
14 |   accelerator: T4
15 |   use_gpu: true
16 | model_cache:
17 |   - repo_id: htrivedi99/whisper-torchserve
18 | secrets: {}
19 | system_packages:
20 |   - ffmpeg
21 |   - openjdk-11-jdk
22 | runtime:
23 |   predict_concurrency: 128
24 | 


--------------------------------------------------------------------------------
/whisper/whisper-torchserve/data/config.properties:
--------------------------------------------------------------------------------
 1 | inference_address=http://0.0.0.0:8888
 2 | batch_size=16
 3 | ipex_enable=true
 4 | async_logging=true
 5 | 
 6 | models={\
 7 |   "whisper_base": {\
 8 |     "1.0": {\
 9 |         "defaultVersion": true,\
10 |         "marName": "whisper_base.mar",\
11 |         "minWorkers": 1,\
12 |         "maxWorkers": 4,\
13 |         "batchSize": 16,\
14 |         "maxBatchDelay": 250,\
15 |         "responseTimeout": 120\
16 |     }\
17 |   }\
18 | }
19 | 
20 | # maxBatchDelay is the amount of time to wait for the batch size to fill up. Default is 250 ms.
21 | # default_workers_per_model=2
22 | 


--------------------------------------------------------------------------------
/whisper/whisper-torchserve/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/whisper/whisper-torchserve/model/__init__.py


--------------------------------------------------------------------------------
/whisper/whisper-truss/config.yaml:
--------------------------------------------------------------------------------
 1 | description: Transcribe audio files across multiple languages.
 2 | environment_variables: {}
 3 | external_data:
 4 | - local_data_path: models/small.pt
 5 |   url: https://baseten-public.s3.us-west-2.amazonaws.com/models/whisper/small.pt
 6 | external_package_dirs: []
 7 | model_metadata:
 8 |   avatar_url: https://cdn.baseten.co/production/static/openai.png
 9 |   cover_image_url: https://cdn.baseten.co/production/static/whisper.png
10 |   example_model_input:
11 |     url: https://cdn.baseten.co/docs/production/Gettysburg.mp3
12 |   pretty_name: Whisper
13 |   tags:
14 |   - speech-recognition
15 | model_name: Whisper
16 | python_version: py39
17 | requirements:
18 | - openai-whisper==20230314
19 | - torch==2.0.1
20 | resources:
21 |   accelerator: A10G
22 |   cpu: '4'
23 |   memory: 16Gi
24 |   use_gpu: true
25 | secrets: {}
26 | system_packages:
27 | - ffmpeg
28 | 


--------------------------------------------------------------------------------
/whisper/whisper-truss/data/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/whisper/whisper-truss/data/.gitkeep


--------------------------------------------------------------------------------
/whisper/whisper-truss/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/whisper/whisper-truss/model/__init__.py


--------------------------------------------------------------------------------
/whisper/whisper-v3-truss-base64/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/whisper/whisper-v3-truss-base64/model/__init__.py


--------------------------------------------------------------------------------
/whisper/whisper-v3-truss/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/whisper/whisper-v3-truss/model/__init__.py


--------------------------------------------------------------------------------
/whisper/whisper-v3-turbo/config.yaml:
--------------------------------------------------------------------------------
 1 | build_commands: []
 2 | environment_variables: {}
 3 | external_package_dirs: []
 4 | model_metadata: {}
 5 | model_name: Whisper 3 Turbo Engine
 6 | python_version: py39
 7 | requirements: []
 8 | resources:
 9 |   accelerator: A10G
10 |   cpu: '1'
11 |   memory: 24Gi
12 |   use_gpu: true
13 | secrets: {}
14 | system_packages: []
15 | trt_llm:
16 |   build:
17 |     base_model: whisper
18 |     checkpoint_repository:
19 |       repo: https://openaipublic.azureedge.net/main/whisper/models/aff26ae408abcba5fbf8813c21e62b0941638c5f6eebfb145be0c9839262a19a/large-v3-turbo.pt
20 |       source: REMOTE_URL
21 |     max_batch_size: 8
22 |     max_beam_width: 1
23 |     max_seq_len: 512
24 |     num_builder_gpus: 1
25 |     quantization_type: no_quant
26 |     tensor_parallel_count: 1
27 | 


--------------------------------------------------------------------------------
/whisper/whisperx-truss/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/whisper/whisperx-truss/model/__init__.py


--------------------------------------------------------------------------------
/xtts-streaming/config.yaml:
--------------------------------------------------------------------------------
 1 | base_image:
 2 |   image: htrivedi05/xtts-streaming
 3 |   python_executable_path: /opt/conda/bin/python
 4 | environment_variables:
 5 |   COQUI_TOS_AGREED: '1'
 6 | external_package_dirs: []
 7 | model_metadata: {}
 8 | model_name: XTTS Streaming - High Performance
 9 | resources:
10 |   accelerator: H100
11 |   cpu: '3'
12 |   memory: 10Gi
13 |   use_gpu: true
14 | secrets: {}
15 | 


--------------------------------------------------------------------------------
/xtts-streaming/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/xtts-streaming/model/__init__.py


--------------------------------------------------------------------------------
/xtts-streaming/requirements.txt:
--------------------------------------------------------------------------------
1 | git+https://github.com/coqui-ai/TTS@fa28f99f1508b5b5366539b2149963edcb80ba62
2 | deepspeed==0.10.3
3 | 


--------------------------------------------------------------------------------
/xtts-v2-truss/config.yaml:
--------------------------------------------------------------------------------
 1 | environment_variables:
 2 |   COQUI_TOS_AGREED: "1"
 3 | external_package_dirs: []
 4 | model_metadata:
 5 |   example_model_input:
 6 |     language: en
 7 |     speaker_voice: Claribel Dervla
 8 |     text: Kurt watched the incoming Pelicans. The blocky jet-powered craft were so distant they were only specks against the setting sun. He hit the magnification on his faceplate and saw lines of fire tracing their reentry vectors. They would touch down in three minutes.
 9 |   tags:
10 |   - text-to-speech
11 | model_name: XTTS V2
12 | python_version: py310
13 | requirements:
14 |   - git+https://github.com/htrivedi99/TTS.git
15 | resources:
16 |   accelerator: T4
17 |   cpu: '3'
18 |   memory: 10Gi
19 |   use_gpu: true
20 | secrets: {}
21 | system_packages: []
22 | 


--------------------------------------------------------------------------------
/xtts-v2-truss/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/xtts-v2-truss/model/__init__.py


--------------------------------------------------------------------------------