├── .droid.yaml ├── .github ├── actions │ └── setup-python │ │ └── action.yml └── workflows │ ├── pr.yml │ ├── test-examples.yml │ ├── truss_deploy.yml │ └── warm-chains.yml ├── .gitignore ├── .isort.cfg ├── .pre-commit-config.yaml ├── .tool-versions ├── 01-getting-started-bert ├── config.yaml ├── doc.yaml └── model │ ├── __init__.py │ └── model.py ├── 02-llm ├── config.yaml ├── doc.yaml └── model │ ├── __init__.py │ └── model.py ├── 03-llm-with-streaming ├── config.yaml ├── doc.yaml └── model │ ├── __init__.py │ └── model.py ├── 04-image-generation ├── config.yaml ├── doc.yaml └── model │ ├── __init__.py │ └── model.py ├── 05-speech-to-text ├── config.yaml └── model │ ├── __init__.py │ └── model.py ├── 06-high-performance-cached-weights ├── config.yaml ├── doc.yaml └── model │ ├── __init__.py │ └── model.py ├── 07-high-performance-dynamic-batching ├── .gitignore ├── .truss_ignore ├── README.md ├── config.yaml ├── model │ ├── __init__.py │ └── model.py ├── packages │ ├── __init__.py │ ├── run.py │ ├── tokenizer.py │ └── whisper_utils.py └── test.py ├── 09-private-huggingface ├── config.yaml ├── doc.yaml └── model │ ├── __init__.py │ └── model.py ├── 10-using-system-packages ├── config.yaml ├── doc.yaml └── model │ ├── __init__.py │ └── model.py ├── 11-embeddings-reranker-classification-tensorrt ├── BEI-allenai-llama-3.1-tulu-3-8b-reward-model-fp8 │ ├── README.md │ └── config.yaml ├── BEI-baai-bge-en-icl-embedding-fp8 │ ├── README.md │ └── config.yaml ├── BEI-baai-bge-large-en-v1.5-embedding │ ├── README.md │ └── config.yaml ├── BEI-baai-bge-m3-embedding-dense │ ├── README.md │ └── config.yaml ├── BEI-baai-bge-multilingual-gemma2-multilingual-embedding │ ├── README.md │ └── config.yaml ├── BEI-baai-bge-reranker-large │ ├── README.md │ └── config.yaml ├── BEI-baai-bge-reranker-v2-m3-multilingual │ ├── README.md │ └── config.yaml ├── BEI-baseten-example-meta-llama-3-70b-instructforsequenceclassification-fp8 │ ├── README.md │ └── config.yaml ├── BEI-intfloat-e5-mistral-7b-instruct-embedding-fp8 │ ├── README.md │ └── config.yaml ├── BEI-mixedbread-ai-mxbai-embed-large-v1-embedding │ ├── README.md │ └── config.yaml ├── BEI-mixedbread-ai-mxbai-rerank-base-v2-reranker-fp8 │ ├── README.md │ └── config.yaml ├── BEI-mixedbread-ai-mxbai-rerank-large-v2-reranker-fp8 │ ├── README.md │ └── config.yaml ├── BEI-ncbi-medcpt-cross-encoder-reranker │ ├── README.md │ └── config.yaml ├── BEI-nomic-ai-nomic-embed-code-fp8 │ ├── README.md │ └── config.yaml ├── BEI-papluca-xlm-roberta-base-language-detection-classification │ ├── README.md │ └── config.yaml ├── BEI-qwen-qwen3-embedding-0.6b-fp8 │ ├── README.md │ └── config.yaml ├── BEI-qwen-qwen3-embedding-4b-fp8 │ ├── README.md │ └── config.yaml ├── BEI-qwen-qwen3-embedding-8b-fp8 │ ├── README.md │ └── config.yaml ├── BEI-qwen-qwen3-reranker-0.6b-fp8 │ ├── README.md │ └── config.yaml ├── BEI-qwen-qwen3-reranker-4b-fp8 │ ├── README.md │ └── config.yaml ├── BEI-qwen-qwen3-reranker-8b-fp8 │ ├── README.md │ └── config.yaml ├── BEI-salesforce-sfr-embedding-mistral-fp8 │ ├── README.md │ └── config.yaml ├── BEI-samlowe-roberta-base-go_emotions-classification │ ├── README.md │ └── config.yaml ├── BEI-skywork-skywork-reward-llama-3.1-8b-v0.2-reward-model-fp8 │ ├── README.md │ ├── chat_template_deployment.py │ └── config.yaml ├── BEI-snowflake-snowflake-arctic-embed-l-v2.0 │ ├── README.md │ └── config.yaml ├── BEI-whereisai-uae-large-v1-embedding │ ├── README.md │ └── config.yaml ├── Briton-deepseek-ai-deepseek-r1-distill-llama-70b-fp8 │ ├── README.md │ └── config.yaml ├── Briton-deepseek-ai-deepseek-r1-distill-qwen-32b-fp8 │ ├── README.md │ └── config.yaml ├── Briton-meta-llama-llama-3.1-405b-fp8 │ ├── README.md │ └── config.yaml ├── Briton-meta-llama-llama-3.1-8b-instruct-with-speculative-lookahead-decoding-fp8 │ ├── README.md │ └── config.yaml ├── Briton-meta-llama-llama-3.2-1b-instruct-fp8 │ ├── README.md │ └── config.yaml ├── Briton-meta-llama-llama-3.2-3b-instruct │ ├── README.md │ └── config.yaml ├── Briton-meta-llama-llama-3.3-70b-instruct-fp8 │ ├── README.md │ └── config.yaml ├── Briton-meta-llama-llama-3.3-70b-instruct-speculative-with-1b-external-draft-fp8 │ ├── README.md │ └── config.yaml ├── Briton-meta-llama-llama-3.3-70b-instruct-tp4-fp8 │ ├── README.md │ └── config.yaml ├── Briton-microsoft-phi-4-fp8 │ ├── README.md │ └── config.yaml ├── Briton-mistralai-mistral-7b-instruct-v0.3 │ ├── README.md │ └── config.yaml ├── Briton-mistralai-mistral-small-24b-instruct-2501-fp8 │ ├── README.md │ └── config.yaml ├── Briton-qwen-qwen2-57b-a14b-moe-int4 │ ├── README.md │ └── config.yaml ├── Briton-qwen-qwen2.5-72b-instruct-tp2-fp8 │ ├── README.md │ └── config.yaml ├── Briton-qwen-qwen2.5-7b-instruct-with-speculative-lookahead-decoding-fp8 │ ├── README.md │ └── config.yaml ├── Briton-qwen-qwen3-32b-fp8 │ ├── README.md │ └── config.yaml ├── Briton-qwen-qwq-32b-reasoning-fp8 │ ├── README.md │ └── config.yaml ├── Briton-qwen-qwq-32b-reasoning-with-speculative-fp8 │ ├── README.md │ └── config.yaml ├── Briton-tiiuae-falcon3-10b-instruct-fp8 │ ├── README.md │ └── config.yaml ├── README.md ├── TEI-alibaba-nlp-gte-modernbert-base-embedding │ ├── README.md │ └── config.yaml ├── TEI-alibaba-nlp-gte-qwen2-1.5b-instruct-embedding │ ├── README.md │ └── config.yaml ├── TEI-alibaba-nlp-gte-qwen2-7b-instruct-embedding │ ├── README.md │ └── config.yaml ├── TEI-alibaba-nlp-gte-reranker-modernbert-base │ ├── README.md │ └── config.yaml ├── TEI-intfloat-multilingual-e5-large-instruct │ ├── README.md │ └── config.yaml ├── TEI-jina-ai-jina-embeddings-v2-base-en │ ├── README.md │ └── config.yaml ├── TEI-jinaai-jina-embeddings-v2-base-code │ ├── README.md │ └── config.yaml ├── TEI-mixedbread-ai-mxbai-embed-large-v1-embedding │ ├── README.md │ └── config.yaml ├── TEI-nomic-ai-nomic-embed-text-v1.5 │ ├── README.md │ └── config.yaml ├── TEI-nomic-ai-nomic-embed-text-v2-moe │ ├── README.md │ └── config.yaml ├── TEI-sentence-transformers-all-minilm-l6-v2-embedding │ ├── README.md │ └── config.yaml ├── TEI-taylorai-bge-micro-v2 │ ├── README.md │ └── config.yaml └── templating │ ├── .internal_tei │ ├── Dockerfile │ └── roll_out_docker.sh │ ├── README.md │ ├── deploy_all.py │ └── generate_templates.py ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── assets ├── comfyui-screenshot-1.png ├── comfyui-screenshot-2.png ├── comfyui-screenshot-3.png └── sdxl-controlnet-workflow.json ├── audiogen-medium ├── README.md ├── config.yaml └── model │ ├── __init__.py │ └── model.py ├── bin ├── image.txt ├── test_example.py ├── test_truss_deploy.py └── validate_ci.py ├── binocular ├── config.yaml ├── model │ ├── __init__.py │ └── model.py └── packages │ └── config.py ├── chains-examples └── docs │ ├── audio-transcription │ ├── README.md │ ├── data_types.py │ ├── helpers.py │ ├── transcribe.py │ └── whisper_chainlet.py │ └── poems │ └── poems.py ├── chatterbox-tts ├── README.md ├── config.yaml ├── docker │ ├── Dockerfile │ └── docker_build.sh ├── input │ └── obama_8s.wav ├── model │ ├── __init__.py │ └── model.py └── run_tts.py ├── ci.yaml ├── clip ├── config.yaml └── model │ ├── __init__.py │ └── model.py ├── cogvlm ├── README.md ├── config.yaml └── model │ ├── __init__.py │ └── model.py ├── comfyui-truss ├── README.md ├── config.yaml ├── data │ ├── comfy_ui_workflow.json │ └── model.json ├── examples │ ├── animate-diff │ │ ├── model.json │ │ └── workflow.json │ ├── anime-style-transfer │ │ ├── config.yaml │ │ └── workflow.json │ ├── sdxl-controlnet │ │ ├── model.json │ │ └── workflow.json │ └── sdxl-with-refiner │ │ ├── model.json │ │ └── workflow.json └── model │ ├── __init__.py │ ├── helpers.py │ └── model.py ├── control-net-qrcode ├── README.md ├── config.yaml ├── controlnet_qr_code_results.gif ├── model │ ├── __init__.py │ └── model.py ├── twitter_mask.jpeg └── twitter_output.jpg ├── custom-server ├── README.md ├── deepseek-v2-5-instruct-sglang │ └── config.yaml ├── infinity-embedding-server │ ├── README.md │ └── config.yaml ├── llama3-70b-instruct-lmdeploy │ └── config.yaml ├── llama3-70b-instruct-sglang │ └── config.yaml ├── llama3-8b-instruct-lmdeploy │ └── config.yaml ├── llama3-8b-instruct-sglang │ └── config.yaml ├── llama3_eval.py ├── pixtral-12b │ ├── README.md │ ├── config.yaml │ └── data │ │ └── pixtral12b.jinja └── ultravox-0.4 │ ├── README.md │ └── config.yaml ├── deepfloyd-xl ├── README.md ├── config.yaml └── model │ ├── __init__.py │ └── model.py ├── deepseek ├── engine-deepseek-r1-distill-llama-70b │ ├── README.md │ └── config.yaml ├── engine-deepseek-r1-distill-llama-8b │ ├── README.md │ └── config.yaml ├── engine-deepseek-r1-distill-qwen-14b │ ├── README.md │ └── config.yaml ├── engine-deepseek-r1-distill-qwen-32b │ ├── README.md │ └── config.yaml └── engine-deepseek-r1-distill-qwen-7b │ ├── README.md │ └── config.yaml ├── deepspeed-mii ├── README.md ├── config.yaml └── model │ ├── __init__.py │ └── model.py ├── dis-segmentation ├── README.md ├── config.yaml └── model │ ├── __init__.py │ ├── clone_repo_helper.py │ ├── helpers.py │ └── model.py ├── dockerfiles └── ComfyUI.dockerfile ├── falcon ├── falcon3-10B-trt-llm-spec-dec │ ├── README.md │ └── config.yaml └── falcon3-3B-trt-llm-engine-high-throughput │ ├── README.md │ └── config.yaml ├── flux ├── README.md ├── dev │ ├── config.yaml │ └── model │ │ ├── __init__.py │ │ └── model.py └── schnell │ ├── config.yaml │ └── model │ ├── __init__.py │ └── model.py ├── fotographer └── zenctrl │ ├── README.md │ ├── call.py │ ├── config.yaml │ ├── images │ ├── banner_1.png │ ├── camera.png │ ├── speaker-input.png │ └── speaker-output.png │ └── requirements.txt ├── gemma ├── gemma-2-27b-it-vllm │ ├── README.md │ ├── config.yaml │ └── model │ │ ├── __init__.py │ │ └── model.py ├── gemma-2-9b-it-vllm │ ├── README.md │ ├── config.yaml │ └── model │ │ ├── __init__.py │ │ └── model.py └── gemma-3-27b-it │ └── config.yaml ├── gfp-gan ├── LICENSE ├── README.md ├── config.yaml ├── data │ └── .gitkeep ├── input.json └── model │ ├── __init__.py │ └── model.py ├── image-segmentation ├── config.yaml └── model │ ├── __init__.py │ └── model.py ├── internal └── config.yaml ├── ip-adapter ├── README.md ├── config.yaml └── model │ ├── __init__.py │ └── model.py ├── jsonformatter ├── config.yaml ├── data │ └── schema.json └── model │ ├── __init__.py │ └── model.py ├── kokoro ├── README.md ├── call.py ├── config.yaml └── model │ ├── __init__.py │ └── model.py ├── layoutlm-document-qa ├── README.md ├── config.yaml └── model │ ├── __init__.py │ └── model.py ├── llama-cpp-server ├── README.md ├── config.yaml ├── cuda.Dockerfile └── llama_server_help ├── llama ├── engine-llama-3-1-70b-instruct │ ├── README.md │ └── config.yaml ├── engine-llama-3-1-8b-instruct │ ├── README.md │ └── config.yaml ├── engine-llama-3-3-70b-instruct │ ├── README.md │ └── config.yaml ├── engine-llama-3.1-405b-instruct │ ├── README.md │ └── config.yaml ├── llama-2-13b-chat │ ├── README.md │ ├── config.yaml │ └── model │ │ ├── __init__.py │ │ └── model.py ├── llama-2-13b │ ├── README.md │ ├── config.yaml │ └── model │ │ ├── __init__.py │ │ └── model.py ├── llama-2-70b-chat │ ├── README.md │ ├── config.yaml │ └── model │ │ ├── __init__.py │ │ └── model.py ├── llama-2-70b │ ├── README.md │ ├── config.yaml │ └── model │ │ ├── __init__.py │ │ └── model.py ├── llama-2-7b-chat │ ├── README.md │ ├── config.yaml │ └── model │ │ ├── __init__.py │ │ └── model.py ├── llama-2-7b │ ├── README.md │ ├── config.yaml │ └── model │ │ ├── __init__.py │ │ └── model.py ├── llama-3-70b-instruct │ ├── README.md │ ├── config.yaml │ └── model │ │ ├── __init__.py │ │ └── model.py ├── llama-3-8b-instruct │ ├── README.md │ ├── config.yaml │ └── model │ │ ├── __init__.py │ │ └── model.py ├── llama-3_1-405b-instruct │ ├── README.md │ ├── config.yaml │ └── model │ │ ├── __init__.py │ │ ├── model.py │ │ └── sighelper.py ├── llama-3_1-8b-instruct-sglang │ ├── config.yaml │ └── model │ │ ├── __init__.py │ │ └── model.py ├── llama-3_1-8b-instruct │ ├── config.yaml │ └── model │ │ ├── __init__.py │ │ └── model.py ├── llama-3_1_70b-instruct │ ├── config.yaml │ └── model │ │ ├── __init__.py │ │ ├── model.py │ │ └── sighelper.py ├── llama-3_2-11b-vision-instruct │ ├── README.md │ └── config.yaml ├── llama-4-maverick-17b-128e-instruct-fp8-vllm │ ├── config.yaml │ └── data │ │ └── do.sh ├── llama-4-scout-17b-16e-instruct-bf16-vllm │ ├── config.yaml │ └── data │ │ └── do.sh ├── llama-7b-exllama-streaming │ ├── README.md │ ├── config.yaml │ └── model │ │ ├── __init__.py │ │ └── model.py ├── llama-7b-exllama │ ├── README.md │ ├── config.yaml │ └── model │ │ ├── __init__.py │ │ └── model.py ├── llama-7b-vllm │ ├── config.yaml │ └── model │ │ ├── __init__.py │ │ └── model.py ├── llama-7b │ ├── README.md │ ├── config.yaml │ ├── data │ │ ├── config.json │ │ ├── generation_config.json │ │ └── pytorch_model.bin.index.json │ └── model │ │ ├── __init__.py │ │ └── model.py └── tinyllama-1.1B-chat-v1.0 │ └── config.yaml ├── llava ├── llava-1.6-sgl │ ├── README.md │ ├── config.yaml │ ├── model │ │ ├── __init__.py │ │ └── model.py │ └── requirements.txt ├── llava-v1.5-7b │ ├── README.md │ ├── config.yaml │ ├── model │ │ ├── __init__.py │ │ └── model.py │ └── packages │ │ └── llava │ │ ├── __init__.py │ │ ├── constants.py │ │ ├── conversation.py │ │ ├── eval │ │ ├── eval_gpt_review.py │ │ ├── eval_gpt_review_bench.py │ │ ├── eval_gpt_review_visual.py │ │ ├── eval_pope.py │ │ ├── eval_science_qa.py │ │ ├── eval_science_qa_gpt4.py │ │ ├── eval_science_qa_gpt4_requery.py │ │ ├── eval_textvqa.py │ │ ├── generate_webpage_data_from_table.py │ │ ├── m4c_evaluator.py │ │ ├── model_qa.py │ │ ├── model_vqa.py │ │ ├── model_vqa_loader.py │ │ ├── model_vqa_mmbench.py │ │ ├── model_vqa_qbench.py │ │ ├── model_vqa_science.py │ │ ├── qa_baseline_gpt35.py │ │ ├── run_llava.py │ │ ├── summarize_gpt_review.py │ │ ├── table │ │ │ ├── answer │ │ │ │ ├── answer_alpaca-13b.jsonl │ │ │ │ ├── answer_bard.jsonl │ │ │ │ ├── answer_gpt35.jsonl │ │ │ │ ├── answer_llama-13b.jsonl │ │ │ │ └── answer_vicuna-13b.jsonl │ │ │ ├── caps_boxes_coco2014_val_80.jsonl │ │ │ ├── model.jsonl │ │ │ ├── prompt.jsonl │ │ │ ├── question.jsonl │ │ │ ├── results │ │ │ │ ├── test_sqa_llava_13b_v0.json │ │ │ │ └── test_sqa_llava_lcs_558k_sqa_12e_vicuna_v1_3_13b.json │ │ │ ├── review │ │ │ │ ├── review_alpaca-13b_vicuna-13b.jsonl │ │ │ │ ├── review_bard_vicuna-13b.jsonl │ │ │ │ ├── review_gpt35_vicuna-13b.jsonl │ │ │ │ └── review_llama-13b_vicuna-13b.jsonl │ │ │ ├── reviewer.jsonl │ │ │ └── rule.json │ │ └── webpage │ │ │ ├── figures │ │ │ ├── alpaca.png │ │ │ ├── bard.jpg │ │ │ ├── chatgpt.svg │ │ │ ├── llama.jpg │ │ │ ├── swords_FILL0_wght300_GRAD0_opsz48.svg │ │ │ └── vicuna.jpeg │ │ │ ├── index.html │ │ │ ├── script.js │ │ │ └── styles.css │ │ ├── mm_utils.py │ │ ├── model │ │ ├── __init__.py │ │ ├── apply_delta.py │ │ ├── builder.py │ │ ├── consolidate.py │ │ ├── language_model │ │ │ ├── llava_llama.py │ │ │ ├── llava_mpt.py │ │ │ └── mpt │ │ │ │ ├── adapt_tokenizer.py │ │ │ │ ├── attention.py │ │ │ │ ├── blocks.py │ │ │ │ ├── configuration_mpt.py │ │ │ │ ├── custom_embedding.py │ │ │ │ ├── flash_attn_triton.py │ │ │ │ ├── hf_prefixlm_converter.py │ │ │ │ ├── meta_init_context.py │ │ │ │ ├── modeling_mpt.py │ │ │ │ ├── norm.py │ │ │ │ └── param_init_fns.py │ │ ├── llava_arch.py │ │ ├── make_delta.py │ │ ├── multimodal_encoder │ │ │ ├── builder.py │ │ │ └── clip_encoder.py │ │ ├── multimodal_projector │ │ │ └── builder.py │ │ └── utils.py │ │ ├── serve │ │ ├── __init__.py │ │ ├── cli.py │ │ ├── controller.py │ │ ├── examples │ │ │ ├── extreme_ironing.jpg │ │ │ └── waterview.jpg │ │ ├── gradio_web_server.py │ │ ├── model_worker.py │ │ ├── register_worker.py │ │ └── test_message.py │ │ ├── train │ │ ├── llama_flash_attn_monkey_patch.py │ │ ├── llama_xformers_attn_monkey_patch.py │ │ ├── llava_trainer.py │ │ ├── train.py │ │ ├── train_mem.py │ │ └── train_xformers.py │ │ └── utils.py └── llava-v1.6-34b │ ├── README.md │ ├── config.yaml │ ├── input.json │ └── model │ ├── __init__.py │ └── model.py ├── magic-animate ├── README.md ├── config.yaml └── model │ ├── __init__.py │ ├── configs │ ├── inference │ │ └── inference.yaml │ └── prompts │ │ └── animation.yaml │ ├── demo │ └── animate.py │ ├── magicanimate │ ├── models │ │ ├── appearance_encoder.py │ │ ├── attention.py │ │ ├── controlnet.py │ │ ├── embeddings.py │ │ ├── motion_module.py │ │ ├── mutual_self_attention.py │ │ ├── orig_attention.py │ │ ├── resnet.py │ │ ├── stable_diffusion_controlnet_reference.py │ │ ├── unet.py │ │ ├── unet_3d_blocks.py │ │ └── unet_controlnet.py │ ├── pipelines │ │ ├── animation.py │ │ ├── context.py │ │ └── pipeline_animation.py │ └── utils │ │ ├── dist_tools.py │ │ ├── util.py │ │ └── videoreader.py │ └── model.py ├── metavoice-1b ├── README.md ├── config.yaml ├── data │ └── bria.mp3 ├── model │ ├── __init__.py │ └── model.py ├── process.py └── requirements.txt ├── mistral ├── engine-devstral │ └── config.yaml ├── engine-mistral-7b-instruct │ ├── README.md │ └── config.yaml ├── engine-mistral-small-3 │ ├── README.md │ └── config.yaml ├── engine-mixtral-8x22b-instruct │ ├── README.md │ └── config.yaml ├── engine-mixtral-8x7b-instruct │ ├── README.md │ └── config.yaml ├── mistral-7b-chat │ ├── README.md │ ├── config.yaml │ └── model │ │ ├── __init__.py │ │ └── model.py ├── mistral-7b-instruct-vllm │ ├── README.md │ ├── config.yaml │ └── model │ │ ├── __init__.py │ │ └── model.py ├── mistral-7b-instruct │ ├── README.md │ ├── config.yaml │ └── model │ │ ├── __init__.py │ │ └── model.py ├── mistral-7b │ ├── README.md │ ├── config.yaml │ └── model │ │ ├── __init__.py │ │ └── model.py ├── mistral-small-3.1 │ └── config.yaml ├── mixtral-8x22b-trt-int8-weights-only │ ├── README.md │ ├── TRT-LLM-README.md │ ├── config.yaml │ ├── data │ │ └── .gitattributes │ ├── model │ │ ├── __init__.py │ │ └── model.py │ └── packages │ │ ├── client.py │ │ ├── inflight_batcher_llm │ │ ├── ensemble │ │ │ └── config.pbtxt │ │ ├── postprocessing │ │ │ ├── 1 │ │ │ │ └── model.py │ │ │ └── config.pbtxt │ │ ├── preprocessing │ │ │ ├── 1 │ │ │ │ └── model.py │ │ │ └── config.pbtxt │ │ └── tensorrt_llm │ │ │ └── config.pbtxt │ │ └── utils.py ├── mixtral-8x22b │ ├── README.md │ ├── config.yaml │ └── model │ │ ├── __init__.py │ │ └── model.py ├── mixtral-8x7b-instruct-trt-llm-h100 │ ├── README.md │ ├── TRT-LLM-README.md │ ├── config.yaml │ ├── data │ │ └── .gitattributes │ ├── model │ │ ├── __init__.py │ │ └── model.py │ └── packages │ │ ├── client.py │ │ ├── inflight_batcher_llm │ │ ├── ensemble │ │ │ └── config.pbtxt │ │ ├── postprocessing │ │ │ ├── 1 │ │ │ │ └── model.py │ │ │ └── config.pbtxt │ │ ├── preprocessing │ │ │ ├── 1 │ │ │ │ └── model.py │ │ │ └── config.pbtxt │ │ └── tensorrt_llm │ │ │ └── config.pbtxt │ │ └── utils.py ├── mixtral-8x7b-instruct-trt-llm-weights-only-quant-h100 │ ├── README.md │ ├── TRT-LLM-README.md │ ├── config.yaml │ ├── data │ │ └── .gitattributes │ ├── model │ │ ├── __init__.py │ │ └── model.py │ └── packages │ │ ├── client.py │ │ ├── inflight_batcher_llm │ │ ├── ensemble │ │ │ └── config.pbtxt │ │ ├── postprocessing │ │ │ ├── 1 │ │ │ │ └── model.py │ │ │ └── config.pbtxt │ │ ├── preprocessing │ │ │ ├── 1 │ │ │ │ └── model.py │ │ │ └── config.pbtxt │ │ └── tensorrt_llm │ │ │ └── config.pbtxt │ │ └── utils.py ├── mixtral-8x7b-instruct-trt-llm-weights-only-quant │ ├── README.md │ ├── TRT-LLM-README.md │ ├── config.yaml │ ├── data │ │ └── .gitattributes │ ├── model │ │ ├── __init__.py │ │ └── model.py │ └── packages │ │ ├── client.py │ │ ├── inflight_batcher_llm │ │ ├── ensemble │ │ │ └── config.pbtxt │ │ ├── postprocessing │ │ │ ├── 1 │ │ │ │ └── model.py │ │ │ └── config.pbtxt │ │ ├── preprocessing │ │ │ ├── 1 │ │ │ │ └── model.py │ │ │ └── config.pbtxt │ │ └── tensorrt_llm │ │ │ └── config.pbtxt │ │ └── utils.py ├── mixtral-8x7b-instruct-trt-llm │ ├── README.md │ ├── TRT-LLM-README.md │ ├── config.yaml │ ├── data │ │ └── .gitattributes │ ├── model │ │ ├── __init__.py │ │ └── model.py │ └── packages │ │ ├── client.py │ │ ├── inflight_batcher_llm │ │ ├── ensemble │ │ │ └── config.pbtxt │ │ ├── postprocessing │ │ │ ├── 1 │ │ │ │ └── model.py │ │ │ └── config.pbtxt │ │ ├── preprocessing │ │ │ ├── 1 │ │ │ │ └── model.py │ │ │ └── config.pbtxt │ │ └── tensorrt_llm │ │ │ └── config.pbtxt │ │ └── utils.py ├── mixtral-8x7b-instruct-vllm-a100-t-tp2 │ ├── README.md │ ├── config.yaml │ └── model │ │ ├── __init__.py │ │ └── model.py ├── mixtral-8x7b-instruct-vllm │ ├── README.md │ ├── config.yaml │ └── model │ │ ├── __init__.py │ │ └── model.py └── pixtral-12b │ ├── README.md │ ├── config.yaml │ └── model │ ├── __init__.py │ └── model.py ├── multiprocessing ├── README.md ├── config.yaml └── model │ ├── __init__.py │ ├── model.py │ └── test.py ├── musicgen-large ├── README.md ├── config.yaml └── model │ ├── __init__.py │ └── model.py ├── musicgen-melody ├── README.md ├── config.yaml └── model │ ├── __init__.py │ └── model.py ├── nemotron ├── llama-3-1-nemotron-70b-instruct │ ├── README.md │ └── config.yaml └── nemotron-ultra-253b │ └── config.yaml ├── ngram-speculator ├── truss │ ├── config.yaml │ └── model │ │ ├── __init__.py │ │ └── model.py └── trussless │ └── config.yaml ├── nous-capybara ├── nous-capybara-34b-openai │ ├── README.md │ ├── config.yaml │ └── model │ │ ├── __init__.py │ │ └── model.py └── nous-capybara-34b │ ├── README.md │ ├── config.yaml │ └── model │ ├── __init__.py │ └── model.py ├── nsql ├── README.md ├── config.yaml └── model │ ├── __init__.py │ └── model.py ├── orpheus-best-performance ├── call.py ├── config.yaml ├── model │ └── model.py └── snac_batching_quantization_dev.py ├── phi ├── phi-3-mini-128k-instruct │ ├── README.md │ ├── config.yaml │ └── model │ │ ├── __init__.py │ │ └── model.py ├── phi-3-mini-4k-instruct │ ├── README.md │ ├── config.yaml │ └── model │ │ ├── __init__.py │ │ └── model.py └── phi-3.5-mini │ ├── README.md │ ├── config.yaml │ └── model │ ├── __init__.py │ ├── helper.py │ └── model.py ├── piper-tts ├── README.md ├── config.yaml └── model │ ├── __init__.py │ └── model.py ├── playground-v2-aesthetic ├── README.md ├── config.yaml ├── model │ ├── __init__.py │ └── model.py └── show.py ├── poetry.lock ├── pyproject.toml ├── qwen ├── BEI-qwen-qwen3-embedding-0.6b-fp8 │ ├── README.md │ └── config.yaml ├── BEI-qwen-qwen3-embedding-4b-fp8 │ ├── README.md │ └── config.yaml ├── BEI-qwen-qwen3-embedding-8b-fp8 │ ├── README.md │ └── config.yaml ├── BEI-qwen-qwen3-reranker-0.6b-fp8 │ ├── README.md │ └── config.yaml ├── BEI-qwen-qwen3-reranker-4b-fp8 │ ├── README.md │ └── config.yaml ├── BEI-qwen-qwen3-reranker-8b-fp8 │ ├── README.md │ └── config.yaml ├── engine-qwen-2-5-14b-coder-instruct │ ├── README.md │ └── config.yaml ├── engine-qwen-2-5-14b-instruct │ ├── README.md │ └── config.yaml ├── engine-qwen-2-5-32b-coder-instruct │ ├── README.md │ └── config.yaml ├── engine-qwen-2-5-32b-instruct │ ├── README.md │ └── config.yaml ├── engine-qwen-2-5-3b-instruct │ ├── README.md │ └── config.yaml ├── engine-qwen-2-5-72b-instruct │ ├── README.md │ └── config.yaml ├── engine-qwen-2-5-72b-math-instruct │ ├── README.md │ └── config.yaml ├── engine-qwen-2-5-7b-coder-instruct │ ├── README.md │ └── config.yaml ├── engine-qwen-2-5-7b-instruct │ ├── README.md │ └── config.yaml ├── engine-qwen-2-5-7b-math-instruct │ ├── README.md │ └── config.yaml ├── engine-qwen-3-06b │ └── config.yaml ├── engine-qwen-3-32b │ └── config.yaml ├── engine-qwen-3-4b │ └── config.yaml ├── model_auto.py ├── qwen-3-235B-sglang │ └── config.yaml ├── qwen-3-30B-A3-sglang │ └── config.yaml ├── qwen-3-30B-A3-vllm │ └── config.yaml ├── qwen-3-32B-sglang │ └── config.yaml ├── qwen-7b-chat │ ├── README.md │ ├── config.yaml │ └── model │ │ ├── __init__.py │ │ └── model.py └── qwen-vl │ ├── README.md │ ├── config.yaml │ └── model │ ├── __init__.py │ └── model.py ├── sana ├── sana_1600M │ ├── config.yaml │ ├── model │ │ ├── __init__.py │ │ └── model.py │ └── packages │ │ └── Sana │ │ ├── CITATION.bib │ │ ├── CIs │ │ └── add_license_all.sh │ │ ├── Dockerfile │ │ ├── LICENSE │ │ ├── README.md │ │ ├── app │ │ ├── app_sana.py │ │ ├── app_sana_multithread.py │ │ ├── safety_check.py │ │ └── sana_pipeline.py │ │ ├── asset │ │ ├── Sana.jpg │ │ ├── docs │ │ │ └── metrics_toolkit.md │ │ ├── example_data │ │ │ ├── 00000000.png │ │ │ ├── 00000000.txt │ │ │ ├── 00000000_InternVL2-26B.json │ │ │ ├── 00000000_InternVL2-26B_clip_score.json │ │ │ ├── 00000000_VILA1-5-13B.json │ │ │ ├── 00000000_VILA1-5-13B_clip_score.json │ │ │ ├── 00000000_prompt_clip_score.json │ │ │ └── meta_data.json │ │ ├── examples.py │ │ ├── logo.png │ │ ├── model-incremental.jpg │ │ ├── model_paths.txt │ │ ├── samples.txt │ │ └── samples_mini.txt │ │ ├── configs │ │ ├── sana_app_config │ │ │ ├── Sana_1600M_app.yaml │ │ │ └── Sana_600M_app.yaml │ │ ├── sana_base.yaml │ │ └── sana_config │ │ │ ├── 1024ms │ │ │ ├── Sana_1600M_img1024.yaml │ │ │ ├── Sana_1600M_img1024_AdamW.yaml │ │ │ └── Sana_600M_img1024.yaml │ │ │ └── 512ms │ │ │ ├── Sana_1600M_img512.yaml │ │ │ ├── Sana_600M_img512.yaml │ │ │ ├── ci_Sana_600M_img512.yaml │ │ │ └── sample_dataset.yaml │ │ ├── diffusion │ │ ├── __init__.py │ │ ├── data │ │ │ ├── __init__.py │ │ │ ├── builder.py │ │ │ ├── datasets │ │ │ │ ├── __init__.py │ │ │ │ ├── sana_data.py │ │ │ │ ├── sana_data_multi_scale.py │ │ │ │ └── utils.py │ │ │ ├── transforms.py │ │ │ └── wids │ │ │ │ ├── __init__.py │ │ │ │ ├── wids.py │ │ │ │ ├── wids_dl.py │ │ │ │ ├── wids_lru.py │ │ │ │ ├── wids_mmtar.py │ │ │ │ ├── wids_specs.py │ │ │ │ └── wids_tar.py │ │ ├── dpm_solver.py │ │ ├── flow_euler_sampler.py │ │ ├── iddpm.py │ │ ├── lcm_scheduler.py │ │ ├── model │ │ │ ├── __init__.py │ │ │ ├── act.py │ │ │ ├── builder.py │ │ │ ├── dc_ae │ │ │ │ └── efficientvit │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── ae_model_zoo.py │ │ │ │ │ ├── apps │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── setup.py │ │ │ │ │ ├── trainer │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ └── run_config.py │ │ │ │ │ └── utils │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── dist.py │ │ │ │ │ │ ├── ema.py │ │ │ │ │ │ ├── export.py │ │ │ │ │ │ ├── image.py │ │ │ │ │ │ ├── init.py │ │ │ │ │ │ ├── lr.py │ │ │ │ │ │ ├── metric.py │ │ │ │ │ │ ├── misc.py │ │ │ │ │ │ └── opt.py │ │ │ │ │ └── models │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── efficientvit │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── dc_ae.py │ │ │ │ │ ├── nn │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── act.py │ │ │ │ │ ├── drop.py │ │ │ │ │ ├── norm.py │ │ │ │ │ ├── ops.py │ │ │ │ │ └── triton_rms_norm.py │ │ │ │ │ └── utils │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── list.py │ │ │ │ │ ├── network.py │ │ │ │ │ └── random.py │ │ │ ├── diffusion_utils.py │ │ │ ├── dpm_solver.py │ │ │ ├── edm_sample.py │ │ │ ├── gaussian_diffusion.py │ │ │ ├── nets │ │ │ │ ├── __init__.py │ │ │ │ ├── basic_modules.py │ │ │ │ ├── fastlinear │ │ │ │ │ ├── develop_triton_ffn.py │ │ │ │ │ ├── develop_triton_litemla.py │ │ │ │ │ ├── modules │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── flash_attn.py │ │ │ │ │ │ ├── lite_mla.py │ │ │ │ │ │ ├── mb_conv_pre_glu.py │ │ │ │ │ │ ├── nn │ │ │ │ │ │ │ ├── act.py │ │ │ │ │ │ │ ├── conv.py │ │ │ │ │ │ │ └── norm.py │ │ │ │ │ │ ├── triton_lite_mla.py │ │ │ │ │ │ ├── triton_lite_mla_fwd.py │ │ │ │ │ │ ├── triton_lite_mla_kernels │ │ │ │ │ │ │ ├── custom_autotune.py │ │ │ │ │ │ │ ├── linear_relu_fwd.py │ │ │ │ │ │ │ ├── mm.py │ │ │ │ │ │ │ ├── pad_vk_mm_fwd.py │ │ │ │ │ │ │ ├── proj_divide_bwd.py │ │ │ │ │ │ │ ├── vk_mm_relu_bwd.py │ │ │ │ │ │ │ ├── vk_q_mm_divide_fwd.py │ │ │ │ │ │ │ └── vk_q_mm_relu_bwd.py │ │ │ │ │ │ ├── triton_mb_conv_pre_glu.py │ │ │ │ │ │ ├── triton_mb_conv_pre_glu_kernels │ │ │ │ │ │ │ ├── depthwise_conv_fwd.py │ │ │ │ │ │ │ └── linear_glu_fwd.py │ │ │ │ │ │ └── utils │ │ │ │ │ │ │ ├── compare_results.py │ │ │ │ │ │ │ ├── custom_autotune.py │ │ │ │ │ │ │ ├── dtype.py │ │ │ │ │ │ │ ├── export_onnx.py │ │ │ │ │ │ │ └── model.py │ │ │ │ │ └── readme.md │ │ │ │ ├── sana.py │ │ │ │ ├── sana_U_shape.py │ │ │ │ ├── sana_U_shape_multi_scale.py │ │ │ │ ├── sana_blocks.py │ │ │ │ ├── sana_multi_scale.py │ │ │ │ ├── sana_multi_scale_adaln.py │ │ │ │ └── sana_others.py │ │ │ ├── norms.py │ │ │ ├── respace.py │ │ │ ├── sa_solver.py │ │ │ ├── timestep_sampler.py │ │ │ └── utils.py │ │ ├── sa_sampler.py │ │ ├── sa_solver_diffusers.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── checkpoint.py │ │ │ ├── config.py │ │ │ ├── data_sampler.py │ │ │ ├── dist_utils.py │ │ │ ├── import_utils.py │ │ │ ├── logger.py │ │ │ ├── lr_scheduler.py │ │ │ ├── misc.py │ │ │ └── optimizer.py │ │ ├── environment_setup.sh │ │ ├── pyproject.toml │ │ ├── sana │ │ ├── cli │ │ │ ├── run.py │ │ │ └── upload2hf.py │ │ └── tools │ │ │ ├── __init__.py │ │ │ ├── download.py │ │ │ └── hf_utils.py │ │ ├── scripts │ │ ├── bash_run_inference_metric.sh │ │ ├── bash_run_inference_metric_dpg.sh │ │ ├── bash_run_inference_metric_geneval.sh │ │ ├── bash_run_inference_metric_imagereward.sh │ │ ├── infer_metric_run_inference_metric.sh │ │ ├── infer_metric_run_inference_metric_geneval.sh │ │ ├── infer_run_inference.sh │ │ ├── infer_run_inference_geneval.sh │ │ ├── infer_run_inference_geneval_diffusers.sh │ │ ├── inference.py │ │ ├── inference_dpg.py │ │ ├── inference_geneval.py │ │ ├── inference_geneval_diffusers.py │ │ ├── inference_image_reward.py │ │ ├── interface.py │ │ └── style.css │ │ ├── tests │ │ └── bash │ │ │ ├── entry.sh │ │ │ ├── test_inference.sh │ │ │ └── test_training_1epoch.sh │ │ ├── tools │ │ ├── __init__.py │ │ ├── convert_py_to_yaml.py │ │ ├── convert_sana_pag_to_diffusers.py │ │ ├── convert_sana_to_diffusers.py │ │ ├── download.py │ │ └── metrics │ │ │ ├── clip-score │ │ │ ├── .gitignore │ │ │ ├── LICENSE │ │ │ ├── README.md │ │ │ ├── clip_score.py │ │ │ ├── setup.py │ │ │ └── src │ │ │ │ └── clip_score │ │ │ │ ├── __init__.py │ │ │ │ ├── __main__.py │ │ │ │ └── clip_score.py │ │ │ ├── compute_clipscore.sh │ │ │ ├── compute_dpg.sh │ │ │ ├── compute_fid_embedding.sh │ │ │ ├── compute_geneval.sh │ │ │ ├── compute_imagereward.sh │ │ │ ├── dpg_bench │ │ │ ├── compute_dpg_bench.py │ │ │ ├── dpg_bench.csv │ │ │ └── requirements.txt │ │ │ ├── geneval │ │ │ ├── LICENSE │ │ │ ├── README.md │ │ │ ├── annotations │ │ │ │ ├── annotations_clip.csv │ │ │ │ ├── annotations_if-xl.csv │ │ │ │ ├── annotations_sdv2.csv │ │ │ │ └── mturk_hit_template.html │ │ │ ├── environment.yml │ │ │ ├── evaluation │ │ │ │ ├── download_models.sh │ │ │ │ ├── evaluate_images.py │ │ │ │ ├── object_names.txt │ │ │ │ └── summary_scores.py │ │ │ ├── generation │ │ │ │ └── diffusers_generate.py │ │ │ ├── images │ │ │ │ └── geneval_figure_1.png │ │ │ └── prompts │ │ │ │ ├── create_prompts.py │ │ │ │ ├── evaluation_metadata.jsonl │ │ │ │ ├── generation_prompts.txt │ │ │ │ └── object_names.txt │ │ │ ├── image_reward │ │ │ ├── benchmark-prompts-dict.json │ │ │ └── compute_image_reward.py │ │ │ ├── pytorch-fid │ │ │ ├── .gitignore │ │ │ ├── CHANGELOG.md │ │ │ ├── LICENSE │ │ │ ├── README.md │ │ │ ├── compute_fid.py │ │ │ ├── noxfile.py │ │ │ ├── setup.cfg │ │ │ ├── setup.py │ │ │ ├── src │ │ │ │ └── pytorch_fid │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── __main__.py │ │ │ │ │ ├── fid_score.py │ │ │ │ │ └── inception.py │ │ │ └── tests │ │ │ │ └── test_fid_score.py │ │ │ └── utils.py │ │ └── train_scripts │ │ ├── train.py │ │ └── train.sh └── sana_600M │ ├── config.yaml │ ├── model │ ├── __init__.py │ └── model.py │ └── packages │ └── Sana │ ├── CITATION.bib │ ├── CIs │ └── add_license_all.sh │ ├── Dockerfile │ ├── LICENSE │ ├── README.md │ ├── app │ ├── app_sana.py │ ├── app_sana_multithread.py │ ├── safety_check.py │ └── sana_pipeline.py │ ├── asset │ ├── Sana.jpg │ ├── docs │ │ └── metrics_toolkit.md │ ├── example_data │ │ ├── 00000000.png │ │ ├── 00000000.txt │ │ ├── 00000000_InternVL2-26B.json │ │ ├── 00000000_InternVL2-26B_clip_score.json │ │ ├── 00000000_VILA1-5-13B.json │ │ ├── 00000000_VILA1-5-13B_clip_score.json │ │ ├── 00000000_prompt_clip_score.json │ │ └── meta_data.json │ ├── examples.py │ ├── logo.png │ ├── model-incremental.jpg │ ├── model_paths.txt │ ├── samples.txt │ └── samples_mini.txt │ ├── configs │ ├── sana_app_config │ │ ├── Sana_1600M_app.yaml │ │ └── Sana_600M_app.yaml │ ├── sana_base.yaml │ └── sana_config │ │ ├── 1024ms │ │ ├── Sana_1600M_img1024.yaml │ │ ├── Sana_1600M_img1024_AdamW.yaml │ │ └── Sana_600M_img1024.yaml │ │ └── 512ms │ │ ├── Sana_1600M_img512.yaml │ │ ├── Sana_600M_img512.yaml │ │ ├── ci_Sana_600M_img512.yaml │ │ └── sample_dataset.yaml │ ├── diffusion │ ├── __init__.py │ ├── data │ │ ├── __init__.py │ │ ├── builder.py │ │ ├── datasets │ │ │ ├── __init__.py │ │ │ ├── sana_data.py │ │ │ ├── sana_data_multi_scale.py │ │ │ └── utils.py │ │ ├── transforms.py │ │ └── wids │ │ │ ├── __init__.py │ │ │ ├── wids.py │ │ │ ├── wids_dl.py │ │ │ ├── wids_lru.py │ │ │ ├── wids_mmtar.py │ │ │ ├── wids_specs.py │ │ │ └── wids_tar.py │ ├── dpm_solver.py │ ├── flow_euler_sampler.py │ ├── iddpm.py │ ├── lcm_scheduler.py │ ├── model │ │ ├── __init__.py │ │ ├── act.py │ │ ├── builder.py │ │ ├── dc_ae │ │ │ └── efficientvit │ │ │ │ ├── __init__.py │ │ │ │ ├── ae_model_zoo.py │ │ │ │ ├── apps │ │ │ │ ├── __init__.py │ │ │ │ ├── setup.py │ │ │ │ ├── trainer │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── run_config.py │ │ │ │ └── utils │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── dist.py │ │ │ │ │ ├── ema.py │ │ │ │ │ ├── export.py │ │ │ │ │ ├── image.py │ │ │ │ │ ├── init.py │ │ │ │ │ ├── lr.py │ │ │ │ │ ├── metric.py │ │ │ │ │ ├── misc.py │ │ │ │ │ └── opt.py │ │ │ │ └── models │ │ │ │ ├── __init__.py │ │ │ │ ├── efficientvit │ │ │ │ ├── __init__.py │ │ │ │ └── dc_ae.py │ │ │ │ ├── nn │ │ │ │ ├── __init__.py │ │ │ │ ├── act.py │ │ │ │ ├── drop.py │ │ │ │ ├── norm.py │ │ │ │ ├── ops.py │ │ │ │ └── triton_rms_norm.py │ │ │ │ └── utils │ │ │ │ ├── __init__.py │ │ │ │ ├── list.py │ │ │ │ ├── network.py │ │ │ │ └── random.py │ │ ├── diffusion_utils.py │ │ ├── dpm_solver.py │ │ ├── edm_sample.py │ │ ├── gaussian_diffusion.py │ │ ├── nets │ │ │ ├── __init__.py │ │ │ ├── basic_modules.py │ │ │ ├── fastlinear │ │ │ │ ├── develop_triton_ffn.py │ │ │ │ ├── develop_triton_litemla.py │ │ │ │ ├── modules │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── flash_attn.py │ │ │ │ │ ├── lite_mla.py │ │ │ │ │ ├── mb_conv_pre_glu.py │ │ │ │ │ ├── nn │ │ │ │ │ │ ├── act.py │ │ │ │ │ │ ├── conv.py │ │ │ │ │ │ └── norm.py │ │ │ │ │ ├── triton_lite_mla.py │ │ │ │ │ ├── triton_lite_mla_fwd.py │ │ │ │ │ ├── triton_lite_mla_kernels │ │ │ │ │ │ ├── custom_autotune.py │ │ │ │ │ │ ├── linear_relu_fwd.py │ │ │ │ │ │ ├── mm.py │ │ │ │ │ │ ├── pad_vk_mm_fwd.py │ │ │ │ │ │ ├── proj_divide_bwd.py │ │ │ │ │ │ ├── vk_mm_relu_bwd.py │ │ │ │ │ │ ├── vk_q_mm_divide_fwd.py │ │ │ │ │ │ └── vk_q_mm_relu_bwd.py │ │ │ │ │ ├── triton_mb_conv_pre_glu.py │ │ │ │ │ ├── triton_mb_conv_pre_glu_kernels │ │ │ │ │ │ ├── depthwise_conv_fwd.py │ │ │ │ │ │ └── linear_glu_fwd.py │ │ │ │ │ └── utils │ │ │ │ │ │ ├── compare_results.py │ │ │ │ │ │ ├── custom_autotune.py │ │ │ │ │ │ ├── dtype.py │ │ │ │ │ │ ├── export_onnx.py │ │ │ │ │ │ └── model.py │ │ │ │ └── readme.md │ │ │ ├── sana.py │ │ │ ├── sana_U_shape.py │ │ │ ├── sana_U_shape_multi_scale.py │ │ │ ├── sana_blocks.py │ │ │ ├── sana_multi_scale.py │ │ │ ├── sana_multi_scale_adaln.py │ │ │ └── sana_others.py │ │ ├── norms.py │ │ ├── respace.py │ │ ├── sa_solver.py │ │ ├── timestep_sampler.py │ │ └── utils.py │ ├── sa_sampler.py │ ├── sa_solver_diffusers.py │ └── utils │ │ ├── __init__.py │ │ ├── checkpoint.py │ │ ├── config.py │ │ ├── data_sampler.py │ │ ├── dist_utils.py │ │ ├── import_utils.py │ │ ├── logger.py │ │ ├── lr_scheduler.py │ │ ├── misc.py │ │ └── optimizer.py │ ├── environment_setup.sh │ ├── pyproject.toml │ ├── sana │ ├── cli │ │ ├── run.py │ │ └── upload2hf.py │ └── tools │ │ ├── __init__.py │ │ ├── download.py │ │ └── hf_utils.py │ ├── scripts │ ├── bash_run_inference_metric.sh │ ├── bash_run_inference_metric_dpg.sh │ ├── bash_run_inference_metric_geneval.sh │ ├── bash_run_inference_metric_imagereward.sh │ ├── infer_metric_run_inference_metric.sh │ ├── infer_metric_run_inference_metric_geneval.sh │ ├── infer_run_inference.sh │ ├── infer_run_inference_geneval.sh │ ├── infer_run_inference_geneval_diffusers.sh │ ├── inference.py │ ├── inference_dpg.py │ ├── inference_geneval.py │ ├── inference_geneval_diffusers.py │ ├── inference_image_reward.py │ ├── interface.py │ └── style.css │ ├── tests │ └── bash │ │ ├── entry.sh │ │ ├── test_inference.sh │ │ └── test_training_1epoch.sh │ ├── tools │ ├── __init__.py │ ├── convert_py_to_yaml.py │ ├── convert_sana_pag_to_diffusers.py │ ├── convert_sana_to_diffusers.py │ ├── download.py │ └── metrics │ │ ├── clip-score │ │ ├── .gitignore │ │ ├── LICENSE │ │ ├── README.md │ │ ├── clip_score.py │ │ ├── setup.py │ │ └── src │ │ │ └── clip_score │ │ │ ├── __init__.py │ │ │ ├── __main__.py │ │ │ └── clip_score.py │ │ ├── compute_clipscore.sh │ │ ├── compute_dpg.sh │ │ ├── compute_fid_embedding.sh │ │ ├── compute_geneval.sh │ │ ├── compute_imagereward.sh │ │ ├── dpg_bench │ │ ├── compute_dpg_bench.py │ │ ├── dpg_bench.csv │ │ └── requirements.txt │ │ ├── geneval │ │ ├── LICENSE │ │ ├── README.md │ │ ├── annotations │ │ │ ├── annotations_clip.csv │ │ │ ├── annotations_if-xl.csv │ │ │ ├── annotations_sdv2.csv │ │ │ └── mturk_hit_template.html │ │ ├── environment.yml │ │ ├── evaluation │ │ │ ├── download_models.sh │ │ │ ├── evaluate_images.py │ │ │ ├── object_names.txt │ │ │ └── summary_scores.py │ │ ├── generation │ │ │ └── diffusers_generate.py │ │ ├── images │ │ │ └── geneval_figure_1.png │ │ └── prompts │ │ │ ├── create_prompts.py │ │ │ ├── evaluation_metadata.jsonl │ │ │ ├── generation_prompts.txt │ │ │ └── object_names.txt │ │ ├── image_reward │ │ ├── benchmark-prompts-dict.json │ │ └── compute_image_reward.py │ │ ├── pytorch-fid │ │ ├── .gitignore │ │ ├── CHANGELOG.md │ │ ├── LICENSE │ │ ├── README.md │ │ ├── compute_fid.py │ │ ├── noxfile.py │ │ ├── setup.cfg │ │ ├── setup.py │ │ ├── src │ │ │ └── pytorch_fid │ │ │ │ ├── __init__.py │ │ │ │ ├── __main__.py │ │ │ │ ├── fid_score.py │ │ │ │ └── inception.py │ │ └── tests │ │ │ └── test_fid_score.py │ │ └── utils.py │ └── train_scripts │ ├── train.py │ └── train.sh ├── segment-anything ├── README.md ├── config.yaml └── model │ ├── __init__.py │ └── model.py ├── sesame-csm-1b ├── config.yaml └── model │ ├── __init__.py │ └── model.py ├── stable-diffusion ├── dreamshaper-lcm │ ├── README.md │ ├── config.yaml │ └── model │ │ ├── __init__.py │ │ └── model.py ├── playground-v2-trt │ ├── README.md │ ├── config.yaml │ ├── model │ │ ├── __init__.py │ │ └── model.py │ ├── packages │ │ └── diffusion │ │ │ ├── trtclip.py │ │ │ └── trtunet.py │ └── show.py ├── sd-textual-inversion │ ├── README.md │ ├── config.yaml │ ├── data │ │ └── LulaCipher.bin │ └── model │ │ ├── __init__.py │ │ └── model.py ├── sd-turbo │ ├── README.md │ ├── config.yaml │ └── model │ │ ├── __init__.py │ │ └── model.py ├── sdxl-controlnet-canny │ ├── README.md │ ├── baseten-logo.gif │ ├── config.yaml │ └── model │ │ ├── __init__.py │ │ └── model.py ├── sdxl-controlnet-depth │ ├── README.md │ ├── config.yaml │ └── model │ │ ├── __init__.py │ │ └── model.py ├── sdxl-controlnet │ ├── README.md │ ├── baseten-logo.gif │ ├── config.yaml │ └── model │ │ ├── __init__.py │ │ └── model.py ├── sdxl-lightning │ ├── README.md │ ├── config.yaml │ └── model │ │ ├── __init__.py │ │ └── model.py ├── sdxl-lora-swapping │ ├── README.md │ ├── config.yaml │ └── model │ │ ├── __init__.py │ │ └── model.py ├── sdxl-lora │ ├── README.md │ ├── config.yaml │ └── model │ │ ├── __init__.py │ │ └── model.py ├── sdxl-turbo │ ├── README.md │ ├── config.yaml │ └── model │ │ ├── __init__.py │ │ └── model.py ├── stable-diffusion-3-medium │ ├── README.md │ ├── config.yaml │ └── model │ │ ├── __init__.py │ │ └── model.py ├── stable-diffusion-inpainting-trt │ ├── README.md │ ├── config.yaml │ ├── model │ │ ├── __init__.py │ │ └── model.py │ ├── packages │ │ └── helpers │ │ │ ├── inpaint_pipeline.py │ │ │ ├── models.py │ │ │ ├── stable_diffusion_pipeline.py │ │ │ └── utilities.py │ └── requirements.txt ├── stable-diffusion-xl-1.0-trt-h100 │ ├── README.md │ ├── config.yaml │ ├── model │ │ ├── __init__.py │ │ └── model.py │ ├── packages │ │ └── diffusion │ │ │ ├── trtclip.py │ │ │ └── trtunet.py │ └── show.py ├── stable-diffusion-xl-1.0-trt │ ├── README.md │ ├── config.yaml │ ├── model │ │ ├── __init__.py │ │ └── model.py │ ├── packages │ │ └── diffusion │ │ │ ├── trtclip.py │ │ │ └── trtunet.py │ └── show.py ├── stable-diffusion-xl-1.0 │ ├── README.md │ ├── config.yaml │ ├── model │ │ ├── __init__.py │ │ └── model.py │ └── show.py ├── stable-diffusion │ ├── README.md │ ├── config.yaml │ ├── data │ │ ├── model_index.json │ │ ├── scheduler │ │ │ └── scheduler_config.json │ │ ├── text_encoder │ │ │ └── config.json │ │ ├── tokenizer │ │ │ ├── merges.txt │ │ │ ├── special_tokens_map.json │ │ │ ├── tokenizer_config.json │ │ │ └── vocab.json │ │ ├── unet │ │ │ └── config.json │ │ └── vae │ │ │ └── config.json │ ├── model │ │ ├── __init__.py │ │ └── model.py │ └── show.py └── stable-video-diffusion │ ├── README.md │ ├── config.yaml │ ├── model │ ├── __init__.py │ ├── helper.py │ ├── model.py │ └── scripts │ │ ├── __init__.py │ │ ├── sampling │ │ ├── configs │ │ │ ├── svd.yaml │ │ │ ├── svd_image_decoder.yaml │ │ │ ├── svd_xt.yaml │ │ │ └── svd_xt_image_decoder.yaml │ │ └── simple_video_sample.py │ │ ├── tests │ │ └── attention.py │ │ └── util │ │ ├── __init__.py │ │ └── detection │ │ ├── __init__.py │ │ ├── nsfw_and_watermark_dectection.py │ │ ├── p_head_v1.npz │ │ └── w_head_v1.npz │ └── sample_images │ ├── cheetah.jpeg │ └── racecar.jpeg ├── templates ├── README.md ├── faster-whisper-truss │ ├── config.yaml │ └── model │ │ ├── __init__.py │ │ └── model.py ├── generate.py ├── generate.yaml ├── transformers-openai-compatible │ ├── config.yaml │ └── model │ │ ├── __init__.py │ │ └── model.py └── trt-llm │ ├── TRT-LLM-README.md │ ├── config.yaml │ ├── data │ └── .gitattributes │ ├── model │ ├── __init__.py │ └── model.py │ └── packages │ ├── client.py │ ├── inflight_batcher_llm │ ├── ensemble │ │ └── config.pbtxt.jinja │ ├── postprocessing │ │ ├── 1 │ │ │ └── model.py │ │ └── config.pbtxt.jinja │ ├── preprocessing │ │ ├── 1 │ │ │ └── model.py │ │ └── config.pbtxt.jinja │ └── tensorrt_llm │ │ └── config.pbtxt │ └── utils.py ├── text-embeddings-inference ├── README.md └── config.yaml ├── trt-llm-engine-builder-templates ├── llama-3_1-70b-instruct │ ├── high_throughput │ │ ├── README.md │ │ ├── config.yaml │ │ └── model │ │ │ └── __init__.py │ ├── large_context │ │ ├── README.md │ │ ├── config.yaml │ │ └── model │ │ │ └── __init__.py │ └── low_ttft │ │ ├── README.md │ │ ├── config.yaml │ │ └── model │ │ └── __init__.py └── llama-3_1-8b-instruct │ ├── high_throughput │ ├── README.md │ ├── config.yaml │ └── model │ │ └── __init__.py │ ├── large_context │ ├── README.md │ ├── config.yaml │ └── model │ │ └── __init__.py │ └── low_ttft │ ├── README.md │ ├── config.yaml │ └── model │ └── __init__.py ├── ultravox ├── README.md ├── config.yaml └── model │ ├── __init__.py │ └── model.py ├── vllm ├── README.md ├── config.yaml └── model │ ├── __init__.py │ ├── helper.py │ └── model.py ├── whisper ├── faster-whisper-small │ ├── README.md │ ├── config.yaml │ └── model │ │ ├── __init__.py │ │ └── model.py ├── faster-whisper-v2 │ ├── config.yaml │ └── model │ │ ├── __init__.py │ │ └── model.py ├── faster-whisper-v3 │ ├── config.yaml │ └── model │ │ ├── __init__.py │ │ └── model.py ├── whisper-streaming │ ├── README.md │ ├── config.yaml │ ├── model │ │ ├── __init__.py │ │ └── model.py │ ├── packages │ │ └── whisper_streaming │ │ │ ├── line_packet.py │ │ │ ├── whisper_online.py │ │ │ └── whisper_online_server.py │ └── requirements.txt ├── whisper-torchserve │ ├── README.md │ ├── config.yaml │ ├── data │ │ └── config.properties │ └── model │ │ ├── __init__.py │ │ └── model.py ├── whisper-truss │ ├── LICENSE │ ├── README.md │ ├── config.yaml │ ├── data │ │ └── .gitkeep │ └── model │ │ ├── __init__.py │ │ └── model.py ├── whisper-v3-truss-base64 │ ├── README.md │ ├── config.yaml │ └── model │ │ ├── __init__.py │ │ └── model.py ├── whisper-v3-truss │ ├── README.md │ ├── config.yaml │ └── model │ │ ├── __init__.py │ │ └── model.py ├── whisper-v3-turbo │ ├── README.md │ └── config.yaml └── whisperx-truss │ ├── README.md │ ├── config.yaml │ └── model │ ├── __init__.py │ └── model.py ├── xtts-streaming ├── README.md ├── config.yaml ├── model │ ├── __init__.py │ └── model.py └── requirements.txt └── xtts-v2-truss ├── README.md ├── config.yaml └── model ├── __init__.py └── model.py /.github/workflows/pr.yml: -------------------------------------------------------------------------------- 1 | name: PR 2 | 3 | on: 4 | pull_request: 5 | 6 | concurrency: 7 | group: pr-${{ github.ref_name }} 8 | cancel-in-progress: true 9 | 10 | jobs: 11 | lint: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v4 15 | - uses: ./.github/actions/setup-python/ 16 | - run: poetry install 17 | - run: poetry run pre-commit run --all-files 18 | -------------------------------------------------------------------------------- /.isort.cfg: -------------------------------------------------------------------------------- 1 | [settings] 2 | profile=black 3 | -------------------------------------------------------------------------------- /.tool-versions: -------------------------------------------------------------------------------- 1 | python 3.11.11 2 | poetry 1.8.4 3 | -------------------------------------------------------------------------------- /01-getting-started-bert/doc.yaml: -------------------------------------------------------------------------------- 1 | title: "Getting Started" 2 | description: "Building your first Truss" 3 | files: 4 | - model/model.py 5 | - config.yaml 6 | -------------------------------------------------------------------------------- /01-getting-started-bert/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/01-getting-started-bert/model/__init__.py -------------------------------------------------------------------------------- /02-llm/doc.yaml: -------------------------------------------------------------------------------- 1 | title: "LLM" 2 | description: "Building an LLM" 3 | files: 4 | - model/model.py 5 | - config.yaml 6 | -------------------------------------------------------------------------------- /02-llm/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/02-llm/model/__init__.py -------------------------------------------------------------------------------- /03-llm-with-streaming/config.yaml: -------------------------------------------------------------------------------- 1 | # # Setting up the config.yaml 2 | # 3 | # Running Falcon 7B requires torch, transformers, 4 | # and a few other related libraries. 5 | model_name: "LLM with Streaming" 6 | model_metadata: 7 | example_model_input: {"prompt": "what is the meaning of life"} 8 | requirements: 9 | - torch==2.0.1 10 | - peft==0.4.0 11 | - scipy==1.11.1 12 | - sentencepiece==0.1.99 13 | - accelerate==0.21.0 14 | - bitsandbytes==0.41.1 15 | - einops==0.6.1 16 | - transformers==4.31.0 17 | - numpy==1.26.4 18 | # ## Configure resources for Falcon 19 | # 20 | # Note that we need an A10G to run this model. 21 | resources: 22 | cpu: "3" 23 | memory: 14Gi 24 | use_gpu: true 25 | accelerator: A10G 26 | -------------------------------------------------------------------------------- /03-llm-with-streaming/doc.yaml: -------------------------------------------------------------------------------- 1 | title: "LLM with Streaming" 2 | description: "Building an LLM with streaming output" 3 | files: 4 | - model/model.py 5 | - config.yaml 6 | -------------------------------------------------------------------------------- /03-llm-with-streaming/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/03-llm-with-streaming/model/__init__.py -------------------------------------------------------------------------------- /04-image-generation/doc.yaml: -------------------------------------------------------------------------------- 1 | title: "Text-to-image" 2 | description: "Building a text-to-image model with SDXL" 3 | files: 4 | - model/model.py 5 | - config.yaml 6 | -------------------------------------------------------------------------------- /04-image-generation/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/04-image-generation/model/__init__.py -------------------------------------------------------------------------------- /05-speech-to-text/config.yaml: -------------------------------------------------------------------------------- 1 | environment_variables: {} 2 | model_metadata: 3 | example_model_input: {"url": "https://cdn.baseten.co/docs/production/Gettysburg.mp3"} 4 | model_name: Whisper 5 | python_version: py39 6 | requirements: 7 | - openai-whisper==20230918 8 | - torch==2.0.1 9 | - numpy==1.26.4 10 | resources: 11 | cpu: "4" 12 | memory: 16Gi 13 | use_gpu: true 14 | accelerator: A10G 15 | secrets: {} 16 | system_packages: 17 | - ffmpeg 18 | external_data: 19 | - url: https://baseten-public.s3.us-west-2.amazonaws.com/models/whisper/small.pt 20 | local_data_path: models/small.pt 21 | -------------------------------------------------------------------------------- /05-speech-to-text/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/05-speech-to-text/model/__init__.py -------------------------------------------------------------------------------- /06-high-performance-cached-weights/doc.yaml: -------------------------------------------------------------------------------- 1 | title: "Fast Cold Starts with Cached Weights" 2 | description: "Deploy a language model, with the model weights cached at build time" 3 | files: 4 | - model/model.py 5 | - config.yaml 6 | -------------------------------------------------------------------------------- /06-high-performance-cached-weights/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/06-high-performance-cached-weights/model/__init__.py -------------------------------------------------------------------------------- /07-high-performance-dynamic-batching/.gitignore: -------------------------------------------------------------------------------- 1 | .venv/ 2 | payload.json 3 | .vscode 4 | -------------------------------------------------------------------------------- /07-high-performance-dynamic-batching/.truss_ignore: -------------------------------------------------------------------------------- 1 | .venv/ 2 | payload.json 3 | .vscode 4 | -------------------------------------------------------------------------------- /07-high-performance-dynamic-batching/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/07-high-performance-dynamic-batching/model/__init__.py -------------------------------------------------------------------------------- /07-high-performance-dynamic-batching/packages/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/07-high-performance-dynamic-batching/packages/__init__.py -------------------------------------------------------------------------------- /09-private-huggingface/doc.yaml: -------------------------------------------------------------------------------- 1 | title: "Private Hugging Face Model" 2 | description: "Load a model that requires authentication with Hugging Face" 3 | files: 4 | - model/model.py 5 | - config.yaml 6 | -------------------------------------------------------------------------------- /09-private-huggingface/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/09-private-huggingface/model/__init__.py -------------------------------------------------------------------------------- /10-using-system-packages/doc.yaml: -------------------------------------------------------------------------------- 1 | title: "Model with system packages" 2 | description: "Deploy a model with both Python and system dependencies" 3 | files: 4 | - model/model.py 5 | - config.yaml 6 | -------------------------------------------------------------------------------- /10-using-system-packages/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/10-using-system-packages/model/__init__.py -------------------------------------------------------------------------------- /11-embeddings-reranker-classification-tensorrt/BEI-baai-bge-en-icl-embedding-fp8/config.yaml: -------------------------------------------------------------------------------- 1 | # this file was autogenerated by `generate_templates.py` - please do change via template only 2 | model_metadata: 3 | example_model_input: 4 | encoding_format: float 5 | input: text string 6 | model: model 7 | model_name: BEI-baai-bge-en-icl-embedding-fp8-truss-example 8 | python_version: py39 9 | resources: 10 | accelerator: H100 11 | cpu: '1' 12 | memory: 10Gi 13 | use_gpu: true 14 | trt_llm: 15 | build: 16 | base_model: encoder 17 | checkpoint_repository: 18 | repo: BAAI/bge-en-icl 19 | revision: main 20 | source: HF 21 | max_num_tokens: 32768 22 | num_builder_gpus: 2 23 | quantization_type: fp8 24 | runtime: 25 | webserver_default_route: /v1/embeddings 26 | -------------------------------------------------------------------------------- /11-embeddings-reranker-classification-tensorrt/BEI-baai-bge-large-en-v1.5-embedding/config.yaml: -------------------------------------------------------------------------------- 1 | # this file was autogenerated by `generate_templates.py` - please do change via template only 2 | model_metadata: 3 | example_model_input: 4 | encoding_format: float 5 | input: text string 6 | model: model 7 | model_name: BEI-baai-bge-large-en-v1.5-embedding-truss-example 8 | python_version: py39 9 | resources: 10 | accelerator: L4 11 | cpu: '1' 12 | memory: 10Gi 13 | use_gpu: true 14 | trt_llm: 15 | build: 16 | base_model: encoder 17 | checkpoint_repository: 18 | repo: BAAI/bge-large-en-v1.5 19 | revision: main 20 | source: HF 21 | max_num_tokens: 16384 22 | runtime: 23 | webserver_default_route: /v1/embeddings 24 | -------------------------------------------------------------------------------- /11-embeddings-reranker-classification-tensorrt/BEI-baai-bge-m3-embedding-dense/config.yaml: -------------------------------------------------------------------------------- 1 | # this file was autogenerated by `generate_templates.py` - please do change via template only 2 | model_metadata: 3 | example_model_input: 4 | encoding_format: float 5 | input: text string 6 | model: model 7 | model_name: BEI-baai-bge-m3-embedding-dense-truss-example 8 | python_version: py39 9 | resources: 10 | accelerator: H100 11 | cpu: '1' 12 | memory: 10Gi 13 | use_gpu: true 14 | trt_llm: 15 | build: 16 | base_model: encoder 17 | checkpoint_repository: 18 | repo: BAAI/bge-m3 19 | revision: main 20 | source: HF 21 | max_num_tokens: 16384 22 | runtime: 23 | webserver_default_route: /v1/embeddings 24 | -------------------------------------------------------------------------------- /11-embeddings-reranker-classification-tensorrt/BEI-baai-bge-multilingual-gemma2-multilingual-embedding/config.yaml: -------------------------------------------------------------------------------- 1 | # this file was autogenerated by `generate_templates.py` - please do change via template only 2 | model_metadata: 3 | example_model_input: 4 | encoding_format: float 5 | input: text string 6 | model: model 7 | model_name: BEI-baai-bge-multilingual-gemma2-multilingual-embedding-truss-example 8 | python_version: py39 9 | resources: 10 | accelerator: H100_40GB 11 | cpu: '1' 12 | memory: 10Gi 13 | use_gpu: true 14 | trt_llm: 15 | build: 16 | base_model: encoder 17 | checkpoint_repository: 18 | repo: BAAI/bge-multilingual-gemma2 19 | revision: main 20 | source: HF 21 | max_num_tokens: 16384 22 | runtime: 23 | webserver_default_route: /v1/embeddings 24 | -------------------------------------------------------------------------------- /11-embeddings-reranker-classification-tensorrt/BEI-baai-bge-reranker-large/config.yaml: -------------------------------------------------------------------------------- 1 | # this file was autogenerated by `generate_templates.py` - please do change via template only 2 | model_metadata: 3 | example_model_input: 4 | query: What is Baseten? 5 | raw_scores: true 6 | return_text: true 7 | texts: 8 | - Deep Learning is ... 9 | - Baseten is a fast inference provider 10 | truncate: true 11 | truncation_direction: Right 12 | model_name: BEI-baai-bge-reranker-large-truss-example 13 | python_version: py39 14 | resources: 15 | accelerator: L4 16 | cpu: '1' 17 | memory: 10Gi 18 | use_gpu: true 19 | trt_llm: 20 | build: 21 | base_model: encoder 22 | checkpoint_repository: 23 | repo: BAAI/bge-reranker-large 24 | revision: main 25 | source: HF 26 | max_num_tokens: 16384 27 | runtime: 28 | webserver_default_route: /rerank 29 | -------------------------------------------------------------------------------- /11-embeddings-reranker-classification-tensorrt/BEI-intfloat-e5-mistral-7b-instruct-embedding-fp8/config.yaml: -------------------------------------------------------------------------------- 1 | # this file was autogenerated by `generate_templates.py` - please do change via template only 2 | model_metadata: 3 | example_model_input: 4 | encoding_format: float 5 | input: text string 6 | model: model 7 | model_name: BEI-intfloat-e5-mistral-7b-instruct-embedding-fp8-truss-example 8 | python_version: py39 9 | resources: 10 | accelerator: H100 11 | cpu: '1' 12 | memory: 10Gi 13 | use_gpu: true 14 | trt_llm: 15 | build: 16 | base_model: encoder 17 | checkpoint_repository: 18 | repo: intfloat/e5-mistral-7b-instruct 19 | revision: main 20 | source: HF 21 | max_num_tokens: 32768 22 | num_builder_gpus: 2 23 | quantization_type: fp8 24 | runtime: 25 | webserver_default_route: /v1/embeddings 26 | -------------------------------------------------------------------------------- /11-embeddings-reranker-classification-tensorrt/BEI-mixedbread-ai-mxbai-embed-large-v1-embedding/config.yaml: -------------------------------------------------------------------------------- 1 | # this file was autogenerated by `generate_templates.py` - please do change via template only 2 | model_metadata: 3 | example_model_input: 4 | encoding_format: float 5 | input: text string 6 | model: model 7 | model_name: BEI-mixedbread-ai-mxbai-embed-large-v1-embedding-truss-example 8 | python_version: py39 9 | resources: 10 | accelerator: L4 11 | cpu: '1' 12 | memory: 10Gi 13 | use_gpu: true 14 | trt_llm: 15 | build: 16 | base_model: encoder 17 | checkpoint_repository: 18 | repo: mixedbread-ai/mxbai-embed-large-v1 19 | revision: main 20 | source: HF 21 | max_num_tokens: 16384 22 | runtime: 23 | webserver_default_route: /v1/embeddings 24 | -------------------------------------------------------------------------------- /11-embeddings-reranker-classification-tensorrt/BEI-nomic-ai-nomic-embed-code-fp8/config.yaml: -------------------------------------------------------------------------------- 1 | # this file was autogenerated by `generate_templates.py` - please do change via template only 2 | model_metadata: 3 | example_model_input: 4 | encoding_format: float 5 | input: text string 6 | model: model 7 | model_name: BEI-nomic-ai-nomic-embed-code-fp8-truss-example 8 | python_version: py39 9 | resources: 10 | accelerator: H100_40GB 11 | cpu: '1' 12 | memory: 10Gi 13 | use_gpu: true 14 | trt_llm: 15 | build: 16 | base_model: encoder 17 | checkpoint_repository: 18 | repo: nomic-ai/nomic-embed-code 19 | revision: main 20 | source: HF 21 | max_num_tokens: 32768 22 | num_builder_gpus: 1 23 | quantization_type: fp8 24 | runtime: 25 | webserver_default_route: /v1/embeddings 26 | -------------------------------------------------------------------------------- /11-embeddings-reranker-classification-tensorrt/BEI-qwen-qwen3-embedding-0.6b-fp8/config.yaml: -------------------------------------------------------------------------------- 1 | # this file was autogenerated by `generate_templates.py` - please do change via template only 2 | model_metadata: 3 | example_model_input: 4 | encoding_format: float 5 | input: text string 6 | model: model 7 | model_name: BEI-qwen-qwen3-embedding-0.6b-fp8-truss-example 8 | python_version: py39 9 | resources: 10 | accelerator: L4 11 | cpu: '1' 12 | memory: 10Gi 13 | use_gpu: true 14 | trt_llm: 15 | build: 16 | base_model: encoder 17 | checkpoint_repository: 18 | repo: michaelfeil/Qwen3-Embedding-0.6B-auto 19 | revision: main 20 | source: HF 21 | max_num_tokens: 32768 22 | num_builder_gpus: 4 23 | quantization_type: fp8 24 | runtime: 25 | webserver_default_route: /v1/embeddings 26 | -------------------------------------------------------------------------------- /11-embeddings-reranker-classification-tensorrt/BEI-qwen-qwen3-embedding-4b-fp8/config.yaml: -------------------------------------------------------------------------------- 1 | # this file was autogenerated by `generate_templates.py` - please do change via template only 2 | model_metadata: 3 | example_model_input: 4 | encoding_format: float 5 | input: text string 6 | model: model 7 | model_name: BEI-qwen-qwen3-embedding-4b-fp8-truss-example 8 | python_version: py39 9 | resources: 10 | accelerator: H100_40GB 11 | cpu: '1' 12 | memory: 10Gi 13 | use_gpu: true 14 | trt_llm: 15 | build: 16 | base_model: encoder 17 | checkpoint_repository: 18 | repo: michaelfeil/Qwen3-Embedding-4B-auto 19 | revision: main 20 | source: HF 21 | max_num_tokens: 40960 22 | num_builder_gpus: 1 23 | quantization_type: fp8 24 | runtime: 25 | webserver_default_route: /v1/embeddings 26 | -------------------------------------------------------------------------------- /11-embeddings-reranker-classification-tensorrt/BEI-qwen-qwen3-embedding-8b-fp8/config.yaml: -------------------------------------------------------------------------------- 1 | # this file was autogenerated by `generate_templates.py` - please do change via template only 2 | model_metadata: 3 | example_model_input: 4 | encoding_format: float 5 | input: text string 6 | model: model 7 | model_name: BEI-qwen-qwen3-embedding-8b-fp8-truss-example 8 | python_version: py39 9 | resources: 10 | accelerator: H100_40GB 11 | cpu: '1' 12 | memory: 10Gi 13 | use_gpu: true 14 | trt_llm: 15 | build: 16 | base_model: encoder 17 | checkpoint_repository: 18 | repo: michaelfeil/Qwen3-Embedding-8B-auto 19 | revision: main 20 | source: HF 21 | max_num_tokens: 40960 22 | num_builder_gpus: 1 23 | quantization_type: fp8 24 | runtime: 25 | webserver_default_route: /v1/embeddings 26 | -------------------------------------------------------------------------------- /11-embeddings-reranker-classification-tensorrt/BEI-qwen-qwen3-reranker-0.6b-fp8/config.yaml: -------------------------------------------------------------------------------- 1 | # this file was autogenerated by `generate_templates.py` - please do change via template only 2 | model_metadata: 3 | example_model_input: 4 | encoding_format: float 5 | input: text string 6 | model: model 7 | model_name: BEI-qwen-qwen3-reranker-0.6b-fp8-truss-example 8 | python_version: py39 9 | resources: 10 | accelerator: L4 11 | cpu: '1' 12 | memory: 10Gi 13 | use_gpu: true 14 | trt_llm: 15 | build: 16 | base_model: encoder 17 | checkpoint_repository: 18 | repo: michaelfeil/Qwen3-Reranker-0.6B-seq 19 | revision: main 20 | source: HF 21 | max_num_tokens: 40960 22 | num_builder_gpus: 4 23 | quantization_type: fp8 24 | runtime: 25 | webserver_default_route: /v1/embeddings 26 | -------------------------------------------------------------------------------- /11-embeddings-reranker-classification-tensorrt/BEI-qwen-qwen3-reranker-4b-fp8/config.yaml: -------------------------------------------------------------------------------- 1 | # this file was autogenerated by `generate_templates.py` - please do change via template only 2 | model_metadata: 3 | example_model_input: 4 | encoding_format: float 5 | input: text string 6 | model: model 7 | model_name: BEI-qwen-qwen3-reranker-4b-fp8-truss-example 8 | python_version: py39 9 | resources: 10 | accelerator: H100_40GB 11 | cpu: '1' 12 | memory: 10Gi 13 | use_gpu: true 14 | trt_llm: 15 | build: 16 | base_model: encoder 17 | checkpoint_repository: 18 | repo: michaelfeil/Qwen3-Reranker-4B-seq 19 | revision: main 20 | source: HF 21 | max_num_tokens: 40960 22 | num_builder_gpus: 1 23 | quantization_type: fp8 24 | runtime: 25 | webserver_default_route: /v1/embeddings 26 | -------------------------------------------------------------------------------- /11-embeddings-reranker-classification-tensorrt/BEI-qwen-qwen3-reranker-8b-fp8/config.yaml: -------------------------------------------------------------------------------- 1 | # this file was autogenerated by `generate_templates.py` - please do change via template only 2 | model_metadata: 3 | example_model_input: 4 | encoding_format: float 5 | input: text string 6 | model: model 7 | model_name: BEI-qwen-qwen3-reranker-8b-fp8-truss-example 8 | python_version: py39 9 | resources: 10 | accelerator: H100_40GB 11 | cpu: '1' 12 | memory: 10Gi 13 | use_gpu: true 14 | trt_llm: 15 | build: 16 | base_model: encoder 17 | checkpoint_repository: 18 | repo: michaelfeil/Qwen3-Reranker-8B-seq 19 | revision: main 20 | source: HF 21 | max_num_tokens: 40960 22 | num_builder_gpus: 1 23 | quantization_type: fp8 24 | runtime: 25 | webserver_default_route: /v1/embeddings 26 | -------------------------------------------------------------------------------- /11-embeddings-reranker-classification-tensorrt/BEI-salesforce-sfr-embedding-mistral-fp8/config.yaml: -------------------------------------------------------------------------------- 1 | # this file was autogenerated by `generate_templates.py` - please do change via template only 2 | model_metadata: 3 | example_model_input: 4 | encoding_format: float 5 | input: text string 6 | model: model 7 | model_name: BEI-salesforce-sfr-embedding-mistral-fp8-truss-example 8 | python_version: py39 9 | resources: 10 | accelerator: H100_40GB 11 | cpu: '1' 12 | memory: 10Gi 13 | use_gpu: true 14 | trt_llm: 15 | build: 16 | base_model: encoder 17 | checkpoint_repository: 18 | repo: Salesforce/SFR-Embedding-Mistral 19 | revision: main 20 | source: HF 21 | max_num_tokens: 32768 22 | num_builder_gpus: 1 23 | quantization_type: fp8 24 | runtime: 25 | webserver_default_route: /v1/embeddings 26 | -------------------------------------------------------------------------------- /11-embeddings-reranker-classification-tensorrt/BEI-samlowe-roberta-base-go_emotions-classification/config.yaml: -------------------------------------------------------------------------------- 1 | # this file was autogenerated by `generate_templates.py` - please do change via template only 2 | model_metadata: 3 | example_model_input: 4 | inputs: 5 | - - Baseten is a fast inference provider 6 | - - Classify this separately. 7 | raw_scores: true 8 | truncate: true 9 | truncation_direction: Right 10 | model_name: BEI-samlowe-roberta-base-go_emotions-classification-truss-example 11 | python_version: py39 12 | resources: 13 | accelerator: L4 14 | cpu: '1' 15 | memory: 10Gi 16 | use_gpu: true 17 | trt_llm: 18 | build: 19 | base_model: encoder 20 | checkpoint_repository: 21 | repo: SamLowe/roberta-base-go_emotions 22 | revision: main 23 | source: HF 24 | max_num_tokens: 16384 25 | runtime: 26 | webserver_default_route: /predict 27 | -------------------------------------------------------------------------------- /11-embeddings-reranker-classification-tensorrt/BEI-snowflake-snowflake-arctic-embed-l-v2.0/config.yaml: -------------------------------------------------------------------------------- 1 | # this file was autogenerated by `generate_templates.py` - please do change via template only 2 | model_metadata: 3 | example_model_input: 4 | encoding_format: float 5 | input: text string 6 | model: model 7 | model_name: BEI-snowflake-snowflake-arctic-embed-l-v2.0-truss-example 8 | python_version: py39 9 | resources: 10 | accelerator: H100 11 | cpu: '1' 12 | memory: 10Gi 13 | use_gpu: true 14 | trt_llm: 15 | build: 16 | base_model: encoder 17 | checkpoint_repository: 18 | repo: Snowflake/snowflake-arctic-embed-l-v2.0 19 | revision: main 20 | source: HF 21 | max_num_tokens: 16384 22 | runtime: 23 | webserver_default_route: /v1/embeddings 24 | -------------------------------------------------------------------------------- /11-embeddings-reranker-classification-tensorrt/BEI-whereisai-uae-large-v1-embedding/config.yaml: -------------------------------------------------------------------------------- 1 | # this file was autogenerated by `generate_templates.py` - please do change via template only 2 | model_metadata: 3 | example_model_input: 4 | encoding_format: float 5 | input: text string 6 | model: model 7 | model_name: BEI-whereisai-uae-large-v1-embedding-truss-example 8 | python_version: py39 9 | resources: 10 | accelerator: L4 11 | cpu: '1' 12 | memory: 10Gi 13 | use_gpu: true 14 | trt_llm: 15 | build: 16 | base_model: encoder 17 | checkpoint_repository: 18 | repo: WhereIsAI/UAE-Large-V1 19 | revision: main 20 | source: HF 21 | max_num_tokens: 16384 22 | runtime: 23 | webserver_default_route: /v1/embeddings 24 | -------------------------------------------------------------------------------- /11-embeddings-reranker-classification-tensorrt/templating/.internal_tei/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG TAG=1.7.1 2 | # this image builds a truss-compatible image with the text-embeddings-inference image as base 3 | # it mainly requires python3 4 | # optional, git and git-lfs are installed to allow for easy cloning of the huggingface model repos. 5 | FROM ghcr.io/huggingface/text-embeddings-inference:${TAG} 6 | RUN apt-get update && apt-get install -y python3 python3-pip git git-lfs 7 | RUN git lfs install 8 | ENTRYPOINT ["text-embeddings-router"] 9 | CMD ["--json-output"] 10 | -------------------------------------------------------------------------------- /11-embeddings-reranker-classification-tensorrt/templating/.internal_tei/roll_out_docker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | # Map architectures to prefixes 5 | declare -A ARCHES=( 6 | ["cpu"]="cpu-" 7 | ["turing"]="turing-" 8 | ["ampere80"]="" 9 | ["ampere86"]="86-" 10 | ["adalovelace"]="89-" 11 | ["hopper"]="hopper-" 12 | ) 13 | 14 | # Define version and target 15 | VERSION="1.7.1" 16 | TARGET="baseten/text-embeddings-inference-mirror" 17 | 18 | # Build and push images 19 | for ARCH in "${!ARCHES[@]}"; do 20 | ARCH_PREFIX=${ARCHES[$ARCH]} 21 | TAG="${TARGET}:${ARCH_PREFIX}${VERSION}" 22 | 23 | echo "Building and pushing image for $ARCH: $TAG" 24 | 25 | docker buildx build -t "$TAG" --build-arg TAG="${ARCH_PREFIX}${VERSION}" --push . 26 | done 27 | 28 | echo "All images have been built and pushed." 29 | -------------------------------------------------------------------------------- /11-embeddings-reranker-classification-tensorrt/templating/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/11-embeddings-reranker-classification-tensorrt/templating/README.md -------------------------------------------------------------------------------- /assets/comfyui-screenshot-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/assets/comfyui-screenshot-1.png -------------------------------------------------------------------------------- /assets/comfyui-screenshot-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/assets/comfyui-screenshot-2.png -------------------------------------------------------------------------------- /assets/comfyui-screenshot-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/assets/comfyui-screenshot-3.png -------------------------------------------------------------------------------- /audiogen-medium/config.yaml: -------------------------------------------------------------------------------- 1 | description: AudioGen is a simple and controllable model for audio generation developed 2 | by Facebook AI Research. 3 | environment_variables: {} 4 | external_package_dirs: [] 5 | model_metadata: 6 | avatar_url: https://cdn.baseten.co/production/static/explore/meta.png 7 | cover_image_url: https://cdn.baseten.co/production/static/explore/musicgen-cover.png 8 | example_model_input: 9 | duration: 8 10 | prompts: 11 | - dog barking 12 | - sirene of an emergency vehicle 13 | - footsteps in a corridor 14 | tags: 15 | - text-to-audio 16 | model_name: AudioGen medium 17 | python_version: py39 18 | requirements: 19 | - torch>=2 20 | - git+https://github.com/facebookresearch/audiocraft.git 21 | - torchaudio 22 | resources: 23 | accelerator: A10G 24 | cpu: '3' 25 | memory: 14Gi 26 | use_gpu: true 27 | secrets: {} 28 | system_packages: 29 | - ffmpeg 30 | -------------------------------------------------------------------------------- /audiogen-medium/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/audiogen-medium/model/__init__.py -------------------------------------------------------------------------------- /bin/validate_ci.py: -------------------------------------------------------------------------------- 1 | import truss 2 | import yaml 3 | 4 | with open("ci.yaml", "r") as file: 5 | paths = yaml.safe_load(file) 6 | 7 | for path in paths["tests"]: 8 | _ = truss.load(path) 9 | -------------------------------------------------------------------------------- /binocular/config.yaml: -------------------------------------------------------------------------------- 1 | environment_variables: {} 2 | external_package_dirs: [] 3 | model_cache: 4 | - allow_patterns: 5 | - '*.bin' 6 | ignore_patterns: 7 | - coreml/* 8 | repo_id: tiiuae/falcon-7b 9 | - allow_patterns: 10 | - '*.bin' 11 | ignore_patterns: 12 | - coreml/* 13 | repo_id: tiiuae/falcon-7b-instruct 14 | model_name: Binoculars 15 | python_version: py311 16 | requirements: 17 | - git+https://github.com/ahans30/Binoculars.git 18 | resources: 19 | accelerator: A10G:2 20 | secrets: {} 21 | system_packages: [] 22 | -------------------------------------------------------------------------------- /binocular/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/binocular/model/__init__.py -------------------------------------------------------------------------------- /binocular/model/model.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from binoculars import Binoculars 4 | 5 | MINIMUM_TOKENS = 64 6 | 7 | 8 | class Model: 9 | def __init__(self, **kwargs): 10 | self._model = None 11 | 12 | def load(self): 13 | # Load model here and assign to self._model. 14 | self._model = Binoculars() 15 | self._tokenizer = self._model.tokenizer 16 | 17 | def count_tokens(self, text): 18 | return len(self._tokenizer(text).input_ids) 19 | 20 | def predict(self, model_input: dict): 21 | input_text = model_input.pop("text") 22 | if self.count_tokens(input_text) < MINIMUM_TOKENS: 23 | logging.warn("Insufficient content length") 24 | return {} 25 | 26 | return { 27 | "score": self._model.compute_score(input_text), 28 | "label": self._model.predict(input_text), 29 | } 30 | -------------------------------------------------------------------------------- /binocular/packages/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | huggingface_config = { 4 | # Only required for private models from Huggingface (e.g. LLaMA models) 5 | "TOKEN": os.environ.get("HF_TOKEN", None) 6 | } 7 | -------------------------------------------------------------------------------- /chatterbox-tts/config.yaml: -------------------------------------------------------------------------------- 1 | model_name: Chatterbox TTS 2 | base_image: 3 | image: jojobaseten/truss-numpy-1.26.0-gpu:0.4 4 | python_executable_path: /usr/bin/python3 5 | python_version: py312 6 | requirements: 7 | - chatterbox-tts 8 | resources: 9 | accelerator: H100 10 | cpu: '1' 11 | memory: 40Gi 12 | use_gpu: true 13 | secrets: 14 | hf_access_token: null -------------------------------------------------------------------------------- /chatterbox-tts/docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM baseten/truss-server-base:3.12-gpu-v0.9.0 2 | 3 | # Fix the urllib3/six dependency issue first 4 | RUN pip install --upgrade --force-reinstall urllib3 --no-cache-dir 5 | 6 | RUN pip uninstall numpy -y || true 7 | RUN pip install numpy==1.26.0 --no-cache-dir -------------------------------------------------------------------------------- /chatterbox-tts/docker/docker_build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | DOCKER_USERNAME="YOUR_DOCKER_USERNAME" 4 | IMAGE_NAME="truss-numpy-1.26.0-gpu" 5 | VERSION="0.1" 6 | 7 | docker buildx build --platform linux/amd64 -t $IMAGE_NAME:$VERSION --load . 8 | docker tag $IMAGE_NAME:$VERSION $DOCKER_USERNAME/$IMAGE_NAME:$VERSION 9 | docker push $DOCKER_USERNAME/$IMAGE_NAME:$VERSION -------------------------------------------------------------------------------- /chatterbox-tts/input/obama_8s.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/chatterbox-tts/input/obama_8s.wav -------------------------------------------------------------------------------- /chatterbox-tts/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/chatterbox-tts/model/__init__.py -------------------------------------------------------------------------------- /ci.yaml: -------------------------------------------------------------------------------- 1 | tests: 2 | - 01-getting-started-bert 3 | - 02-llm 4 | - 03-llm-with-streaming 5 | - 04-image-generation 6 | - 05-speech-to-text 7 | - 06-high-performance-cached-weights 8 | - 10-using-system-packages 9 | - mistral/mistral-7b 10 | - mistral/mistral-7b-instruct 11 | - mistral/mistral-7b-chat 12 | - whisper/whisper-v3-truss 13 | - gfp-gan 14 | - stable-diffusion/stable-diffusion-xl-1.0 15 | - whisper/faster-whisper-v2 16 | - whisper/faster-whisper-v3 17 | - llama/llama-2-7b-chat 18 | - playground-v2-aesthetic 19 | - llama/tinyllama-1.1B-chat-v1.0 20 | -------------------------------------------------------------------------------- /clip/config.yaml: -------------------------------------------------------------------------------- 1 | environment_variables: {} 2 | external_package_dirs: [] 3 | model_metadata: 4 | example_model_input: 5 | url: https://images.pexels.com/photos/1170986/pexels-photo-1170986.jpeg?auto=compress&cs=tinysrgb&w=1600 6 | model_name: clip-example 7 | python_version: py311 8 | requirements: 9 | - transformers==4.47.1 10 | - pillow 11 | - torch 12 | resources: 13 | accelerator: A10G 14 | cpu: '3' 15 | memory: 14Gi 16 | use_gpu: true 17 | secrets: {} 18 | system_packages: [] 19 | -------------------------------------------------------------------------------- /clip/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/clip/model/__init__.py -------------------------------------------------------------------------------- /cogvlm/config.yaml: -------------------------------------------------------------------------------- 1 | environment_variables: {} 2 | external_package_dirs: [] 3 | model_name: CogVLM 4 | python_version: py311 5 | requirements: 6 | - torch==2.0.1 7 | - sentencepiece==0.1.99 8 | - protobuf==4.25.1 9 | - transformers==4.35.2 10 | - einops==0.7.0 11 | - torchvision==0.15.2 12 | - Pillow==10.1.0 13 | - xformers==0.0.22 14 | - accelerate==0.25.0 15 | resources: 16 | accelerator: A100 17 | cpu: '3' 18 | memory: 15Gi 19 | use_gpu: true 20 | secrets: {} 21 | system_packages: [] 22 | -------------------------------------------------------------------------------- /cogvlm/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/cogvlm/model/__init__.py -------------------------------------------------------------------------------- /comfyui-truss/config.yaml: -------------------------------------------------------------------------------- 1 | base_image: 2 | image: bolabaseten/comfyui-truss-base:6a7bc35 3 | python_executable_path: /usr/bin/python3 4 | description: Deploy a ComfyUI workflow as a Truss 5 | environment_variables: {} 6 | external_package_dirs: [] 7 | model_metadata: 8 | example_model_input: 9 | workflow_values: 10 | controlnet_image: https://storage.googleapis.com/logos-bucket-01/baseten_logo.png 11 | negative_prompt: blurry, text, low quality 12 | positive_prompt: An igloo on a snowy day, 4k, hd 13 | model_name: ComfyUI Workflow 14 | python_version: py39 15 | requirements: 16 | - websocket-client==1.6.4 17 | - accelerate==0.23.0 18 | - opencv-python 19 | resources: 20 | accelerator: A10G 21 | cpu: '3' 22 | memory: 14Gi 23 | use_gpu: true 24 | secrets: {} 25 | system_packages: 26 | - ffmpeg 27 | - libgl1-mesa-glx 28 | -------------------------------------------------------------------------------- /comfyui-truss/data/model.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "url": "https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_base_1.0.safetensors", 4 | "path": "models/checkpoints/sd_xl_base_1.0.safetensors" 5 | }, 6 | { 7 | "url": "https://huggingface.co/diffusers/controlnet-canny-sdxl-1.0/resolve/main/diffusion_pytorch_model.fp16.safetensors", 8 | "path": "models/controlnet/diffusers_xl_canny_full.safetensors" 9 | } 10 | ] 11 | -------------------------------------------------------------------------------- /comfyui-truss/examples/sdxl-controlnet/model.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "url": "https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_base_1.0.safetensors", 4 | "path": "models/checkpoints/sd_xl_base_1.0.safetensors" 5 | }, 6 | { 7 | "url": "https://huggingface.co/diffusers/controlnet-canny-sdxl-1.0/resolve/main/diffusion_pytorch_model.fp16.safetensors", 8 | "path": "models/controlnet/diffusers_xl_canny_full.safetensors" 9 | } 10 | ] 11 | -------------------------------------------------------------------------------- /comfyui-truss/examples/sdxl-with-refiner/model.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "url": "https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_base_1.0.safetensors", 4 | "path": "models/checkpoints/sd_xl_base_1.0.safetensors" 5 | }, 6 | { 7 | "url": "https://huggingface.co/stabilityai/stable-diffusion-xl-refiner-1.0/resolve/main/sd_xl_refiner_1.0.safetensors", 8 | "path": "models/checkpoints/sd_xl_refiner_1.0.safetensors" 9 | } 10 | ] 11 | -------------------------------------------------------------------------------- /comfyui-truss/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/comfyui-truss/model/__init__.py -------------------------------------------------------------------------------- /control-net-qrcode/config.yaml: -------------------------------------------------------------------------------- 1 | environment_variables: {} 2 | external_package_dirs: [] 3 | model_metadata: 4 | example_model_input: 5 | prompt: A cubism painting of the Garden of Eaden with animals walking around, 6 | Andreas Rocha, matte painting concept art, a detailed matte painting 7 | qr_code_content: https://www.baseten.co 8 | model_name: control-net-qrcode 9 | python_version: py310 10 | requirements: 11 | - diffusers==0.21.1 12 | - torch==2.0.1 13 | - ftfy==6.1.1 14 | - scipy==1.9.3 15 | - transformers==4.25.1 16 | - accelerate==0.20.3 17 | - qrcode==7.4.2 18 | - xformers==0.0.21 19 | resources: 20 | accelerator: T4 21 | cpu: '3' 22 | memory: 14Gi 23 | use_gpu: true 24 | secrets: {} 25 | system_packages: [] 26 | -------------------------------------------------------------------------------- /control-net-qrcode/controlnet_qr_code_results.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/control-net-qrcode/controlnet_qr_code_results.gif -------------------------------------------------------------------------------- /control-net-qrcode/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/control-net-qrcode/model/__init__.py -------------------------------------------------------------------------------- /control-net-qrcode/twitter_mask.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/control-net-qrcode/twitter_mask.jpeg -------------------------------------------------------------------------------- /control-net-qrcode/twitter_output.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/control-net-qrcode/twitter_output.jpg -------------------------------------------------------------------------------- /custom-server/deepseek-v2-5-instruct-sglang/config.yaml: -------------------------------------------------------------------------------- 1 | base_image: 2 | image: lmsysorg/sglang:v0.4.0.post1-cu124 3 | model_metadata: 4 | repo_id: deepseek-ai/DeepSeek-V2.5-1210 5 | docker_server: 6 | start_command: sh -c "HF_TOKEN=$(cat /secrets/hf_access_token) python3 -m sglang.launch_server --model-path deepseek-ai/DeepSeek-V2.5-1210 --port 8000 --tp 8 --trust-remote-code" 7 | readiness_endpoint: /health 8 | liveness_endpoint: /health 9 | predict_endpoint: /v1/completions 10 | server_port: 8000 11 | resources: 12 | accelerator: H100:8 13 | use_gpu: true 14 | runtime: 15 | predict_concurrency : 32 16 | model_name: DeepSeek V2.5 1210 SGLang 17 | environment_variables: 18 | hf_access_token: null 19 | -------------------------------------------------------------------------------- /custom-server/llama3-70b-instruct-lmdeploy/config.yaml: -------------------------------------------------------------------------------- 1 | base_image: 2 | image: openmmlab/lmdeploy:v0.6.4-cu12 3 | model_metadata: 4 | repo_id: meta-llama/Llama-3.1-70B-Instruct 5 | docker_server: 6 | start_command: sh -c "HF_TOKEN=$(cat /secrets/hf_access_token) python3 -m lmdeploy serve api_server meta-llama/Llama-3.1-70B-Instruct --server-port 8000 --tp 4" 7 | readiness_endpoint: /health 8 | liveness_endpoint: /health 9 | predict_endpoint: /v1/completions 10 | server_port: 8000 11 | resources: 12 | accelerator: H100:4 13 | use_gpu: true 14 | runtime: 15 | predict_concurrency : 32 16 | model_name: Llama 3.1 70B Instruct LMDeploy 17 | environment_variables: 18 | hf_access_token: null 19 | -------------------------------------------------------------------------------- /custom-server/llama3-70b-instruct-sglang/config.yaml: -------------------------------------------------------------------------------- 1 | base_image: 2 | image: lmsysorg/sglang:v0.4.0.post1-cu124 3 | model_metadata: 4 | repo_id: meta-llama/Llama-3.1-70B-Instruct 5 | docker_server: 6 | start_command: sh -c "HF_TOKEN=$(cat /secrets/hf_access_token) python3 -m sglang.launch_server --model-path meta-llama/Llama-3.1-70B-Instruct --port 8000 --tp 4" 7 | readiness_endpoint: /health 8 | liveness_endpoint: /health 9 | predict_endpoint: /v1/completions 10 | server_port: 8000 11 | resources: 12 | accelerator: H100:4 13 | use_gpu: true 14 | runtime: 15 | predict_concurrency : 32 16 | model_name: Llama 3.1 70B Instruct SGLang 17 | environment_variables: 18 | hf_access_token: null 19 | -------------------------------------------------------------------------------- /custom-server/llama3-8b-instruct-lmdeploy/config.yaml: -------------------------------------------------------------------------------- 1 | base_image: 2 | image: openmmlab/lmdeploy:v0.6.4-cu12 3 | model_metadata: 4 | repo_id: meta-llama/Llama-3.1-8B-Instruct 5 | docker_server: 6 | start_command: sh -c "HF_TOKEN=$(cat /secrets/hf_access_token) python3 -m lmdeploy serve api_server meta-llama/Llama-3.1-8B-Instruct --server-port 8000" 7 | readiness_endpoint: /health 8 | liveness_endpoint: /health 9 | predict_endpoint: /v1/completions 10 | server_port: 8000 11 | resources: 12 | accelerator: H100 13 | use_gpu: true 14 | runtime: 15 | predict_concurrency : 32 16 | model_name: Llama 3.1 8B Instruct LMDeploy 17 | environment_variables: 18 | hf_access_token: null 19 | -------------------------------------------------------------------------------- /custom-server/llama3-8b-instruct-sglang/config.yaml: -------------------------------------------------------------------------------- 1 | base_image: 2 | image: lmsysorg/sglang:v0.4.0.post1-cu124 3 | model_metadata: 4 | repo_id: meta-llama/Llama-3.1-8B-Instruct 5 | docker_server: 6 | start_command: sh -c "HF_TOKEN=$(cat /secrets/hf_access_token) python3 -m sglang.launch_server --model-path meta-llama/Llama-3.1-8B-Instruct --port 8000" 7 | readiness_endpoint: /health 8 | liveness_endpoint: /health 9 | predict_endpoint: /v1/completions 10 | server_port: 8000 11 | resources: 12 | accelerator: H100 13 | use_gpu: true 14 | runtime: 15 | predict_concurrency : 32 16 | model_name: Llama 3.1 8B Instruct SGLang 17 | environment_variables: 18 | hf_access_token: null 19 | -------------------------------------------------------------------------------- /deepfloyd-xl/config.yaml: -------------------------------------------------------------------------------- 1 | description: Generate original images from text prompts. 2 | environment_variables: {} 3 | external_package_dirs: [] 4 | model_metadata: 5 | avatar_url: https://cdn.baseten.co/production/static/explore/deep-floyd.png 6 | cover_image_url: https://cdn.baseten.co/production/static/explore/deepfloyd-cover.png 7 | tags: 8 | - image-generation 9 | model_name: Deepfloyd XL 10 | python_version: py39 11 | requirements: 12 | - diffusers 13 | - transformers 14 | - torch 15 | - scipy 16 | - accelerate 17 | - pillow 18 | - bitsandbytes 19 | - sentencepiece 20 | - huggingface_hub 21 | resources: 22 | accelerator: A10G 23 | cpu: '3' 24 | memory: 14Gi 25 | use_gpu: true 26 | secrets: 27 | hf_access_token: ENTER HF API KEY HERE 28 | spec_version: 2.0 29 | system_packages: [] 30 | -------------------------------------------------------------------------------- /deepfloyd-xl/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/deepfloyd-xl/model/__init__.py -------------------------------------------------------------------------------- /deepseek/engine-deepseek-r1-distill-llama-70b/README.md: -------------------------------------------------------------------------------- 1 | # DeepSeek-R1 Distill Llama 70B 2 | -------------------------------------------------------------------------------- /deepseek/engine-deepseek-r1-distill-llama-8b/README.md: -------------------------------------------------------------------------------- 1 | # DeepSeek-R1 Distill Llama 8B 2 | -------------------------------------------------------------------------------- /deepseek/engine-deepseek-r1-distill-qwen-14b/README.md: -------------------------------------------------------------------------------- 1 | # DeepSeek-R1 Distill Qwen 14B 2 | -------------------------------------------------------------------------------- /deepseek/engine-deepseek-r1-distill-qwen-32b/README.md: -------------------------------------------------------------------------------- 1 | # DeepSeek-R1 Distill Qwen 32B 2 | -------------------------------------------------------------------------------- /deepseek/engine-deepseek-r1-distill-qwen-7b/README.md: -------------------------------------------------------------------------------- 1 | # DeepSeek-R1 Distill Qwen 7B 2 | -------------------------------------------------------------------------------- /deepspeed-mii/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/deepspeed-mii/model/__init__.py -------------------------------------------------------------------------------- /dis-segmentation/config.yaml: -------------------------------------------------------------------------------- 1 | environment_variables: {} 2 | external_package_dirs: [] 3 | model_metadata: 4 | example_model_input: 5 | input_image: 6 | model_name: DIS Segmentation 7 | python_version: py310 8 | requirements: 9 | - torch==2.1.0 10 | - Pillow==9.4.0 11 | - numpy==1.23.5 12 | - gdown==4.7.3 13 | - torchvision==0.16.0 14 | - torchaudio==2.1.0 15 | - scikit-image==0.19.3 16 | resources: 17 | accelerator: T4 18 | memory: 2Gi 19 | use_gpu: true 20 | secrets: {} 21 | system_packages: [] 22 | -------------------------------------------------------------------------------- /dis-segmentation/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/dis-segmentation/model/__init__.py -------------------------------------------------------------------------------- /dis-segmentation/model/clone_repo_helper.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | 4 | 5 | def clone_repo(): 6 | git_repo_url = "https://github.com/xuebinqin/DIS" 7 | commit_hash = "ec4a4f4f8d967f744bf857149d5ee343b59766b0" 8 | git_clone_command = ["git", "clone", git_repo_url] 9 | 10 | # clone the repo 11 | subprocess.run(git_clone_command, check=True) 12 | print("Git repository cloned successfully!") 13 | 14 | os.chdir(os.path.join(os.getcwd(), "DIS", "IS-Net")) 15 | 16 | # Pin repo to a specific commit 17 | checkout_command = ["git", "checkout", commit_hash] 18 | subprocess.run(checkout_command, check=True) 19 | -------------------------------------------------------------------------------- /dockerfiles/ComfyUI.dockerfile: -------------------------------------------------------------------------------- 1 | FROM baseten/truss-server-base:3.11-gpu-v0.7.17 2 | 3 | ARG COMMIT_HASH 6a7bc35db845179a26e62534f3d4b789151e52fe 4 | 5 | RUN git clone https://github.com/comfyanonymous/ComfyUI.git /app/ComfyUI 6 | 7 | RUN cd /app/ComfyUI; git checkout $COMMIT_HASH; pip install -r requirements.txt 8 | -------------------------------------------------------------------------------- /flux/dev/config.yaml: -------------------------------------------------------------------------------- 1 | external_package_dirs: [] 2 | model_metadata: 3 | example_model_input: {"prompt": 'black forest gateau cake spelling out the words "FLUX DEV", tasty, food photography, dynamic shot'} 4 | repo_id: black-forest-labs/FLUX.1-dev 5 | model_name: Flux.1-dev 6 | python_version: py311 7 | requirements: 8 | - git+https://github.com/huggingface/diffusers.git@fc6a91e3834c35e57b398ad1c0d99f6f83557e04 9 | - transformers 10 | - accelerate 11 | - sentencepiece 12 | - protobuf 13 | resources: 14 | accelerator: H100_40GB 15 | use_gpu: true 16 | secrets: 17 | hf_access_token: null 18 | system_packages: 19 | - ffmpeg 20 | - libsm6 21 | - libxext6 22 | -------------------------------------------------------------------------------- /flux/dev/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/flux/dev/model/__init__.py -------------------------------------------------------------------------------- /flux/schnell/config.yaml: -------------------------------------------------------------------------------- 1 | external_package_dirs: [] 2 | model_metadata: 3 | example_model_input: {"prompt": 'black forest gateau cake spelling out the words "FLUX SCHNELL", tasty, food photography, dynamic shot'} 4 | repo_id: black-forest-labs/FLUX.1-schnell 5 | model_name: Flux.1-schnell 6 | python_version: py311 7 | requirements: 8 | - git+https://github.com/huggingface/diffusers.git@fc6a91e3834c35e57b398ad1c0d99f6f83557e04 9 | - transformers 10 | - accelerate 11 | - sentencepiece 12 | - protobuf 13 | resources: 14 | accelerator: H100_40GB 15 | use_gpu: true 16 | secrets: 17 | hf_access_token: null 18 | system_packages: 19 | - ffmpeg 20 | - libsm6 21 | - libxext6 22 | -------------------------------------------------------------------------------- /flux/schnell/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/flux/schnell/model/__init__.py -------------------------------------------------------------------------------- /fotographer/zenctrl/README.md: -------------------------------------------------------------------------------- 1 | ![Header Image](images/banner_1.png) 2 | 3 | # Fotographer AI ZenCtrl 4 | 5 | Image-to-image model for generating in-context product photography. 6 | 7 | Deploy with `truss push --promote` 8 | 9 | Call with `python call.py` after providing `model_id` from deployed model. 10 | 11 | ### Example input image 12 | 13 | ![Speaker Input](images/speaker-input.png) 14 | 15 | ### Example output image 16 | 17 | ![Speaker Input](images/speaker-output.png) 18 | -------------------------------------------------------------------------------- /fotographer/zenctrl/config.yaml: -------------------------------------------------------------------------------- 1 | base_image: 2 | image: fotographerai/zenctrlstage:latest 3 | model_metadata: {} 4 | docker_server: 5 | start_command: sh -c "HF_TOKEN=$(cat /secrets/hf_access_token) python -m uvicorn app:app --host 0.0.0.0 --port 8000 --log-level debug" 6 | readiness_endpoint: /health 7 | liveness_endpoint: /health 8 | predict_endpoint: /generate 9 | server_port: 8000 10 | resources: 11 | accelerator: H100 12 | use_gpu: true 13 | model_name: ZenCtrl 14 | environment_variables: 15 | PORT: 8000 16 | HF_TOKEN: null 17 | runtime: 18 | predict_concurrency: 8 19 | secrets: 20 | hf_access_token: null 21 | -------------------------------------------------------------------------------- /fotographer/zenctrl/images/banner_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/fotographer/zenctrl/images/banner_1.png -------------------------------------------------------------------------------- /fotographer/zenctrl/images/camera.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/fotographer/zenctrl/images/camera.png -------------------------------------------------------------------------------- /fotographer/zenctrl/images/speaker-input.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/fotographer/zenctrl/images/speaker-input.png -------------------------------------------------------------------------------- /fotographer/zenctrl/images/speaker-output.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/fotographer/zenctrl/images/speaker-output.png -------------------------------------------------------------------------------- /gemma/gemma-2-27b-it-vllm/config.yaml: -------------------------------------------------------------------------------- 1 | model_name: "Gemma 2 27B Instruct VLLM" 2 | python_version: py311 3 | model_metadata: 4 | example_model_input: {"prompt": "what is the meaning of life"} 5 | repo_id: google/gemma-2-27b-it 6 | tensor_parallel: 1 7 | max_num_seqs: 16 8 | requirements: 9 | - vllm==0.5.1 10 | - https://github.com/flashinfer-ai/flashinfer/releases/download/v0.0.8/flashinfer-0.0.8+cu121torch2.3-cp311-cp311-linux_x86_64.whl 11 | resources: 12 | accelerator: A100 13 | use_gpu: true 14 | runtime: 15 | predict_concurrency: 128 16 | secrets: 17 | hf_access_token: null 18 | -------------------------------------------------------------------------------- /gemma/gemma-2-27b-it-vllm/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/gemma/gemma-2-27b-it-vllm/model/__init__.py -------------------------------------------------------------------------------- /gemma/gemma-2-9b-it-vllm/config.yaml: -------------------------------------------------------------------------------- 1 | model_name: "Gemma 2 9B Instruct VLLM" 2 | python_version: py311 3 | model_metadata: 4 | example_model_input: {"prompt": "what is the meaning of life"} 5 | repo_id: google/gemma-2-9b-it 6 | tensor_parallel: 1 7 | requirements: 8 | - vllm==0.5.1 9 | - https://github.com/flashinfer-ai/flashinfer/releases/download/v0.0.8/flashinfer-0.0.8+cu121torch2.3-cp311-cp311-linux_x86_64.whl 10 | resources: 11 | accelerator: A100 12 | use_gpu: true 13 | runtime: 14 | predict_concurrency: 128 15 | secrets: 16 | hf_access_token: null 17 | -------------------------------------------------------------------------------- /gemma/gemma-2-9b-it-vllm/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/gemma/gemma-2-9b-it-vllm/model/__init__.py -------------------------------------------------------------------------------- /gfp-gan/data/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/gfp-gan/data/.gitkeep -------------------------------------------------------------------------------- /gfp-gan/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/gfp-gan/model/__init__.py -------------------------------------------------------------------------------- /image-segmentation/config.yaml: -------------------------------------------------------------------------------- 1 | environment_variables: {} 2 | external_package_dirs: [] 3 | model_name: Image segmentation 4 | python_version: py39 5 | requirements: 6 | - torchvision==0.9.1 7 | resources: 8 | cpu: 3000m 9 | memory: 8Gi 10 | use_gpu: false 11 | secrets: {} 12 | spec_version: 2.0 13 | system_packages: [] 14 | -------------------------------------------------------------------------------- /image-segmentation/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/image-segmentation/model/__init__.py -------------------------------------------------------------------------------- /ip-adapter/config.yaml: -------------------------------------------------------------------------------- 1 | environment_variables: {} 2 | external_package_dirs: [] 3 | model_name: IP Adapter 4 | python_version: py311 5 | requirements: 6 | - torch==2.1.1 7 | - diffusers==0.24.0 8 | - transformers==4.35.2 9 | resources: 10 | accelerator: A10G 11 | cpu: '3' 12 | memory: 15Gi 13 | use_gpu: true 14 | secrets: {} 15 | system_packages: [] 16 | -------------------------------------------------------------------------------- /ip-adapter/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/ip-adapter/model/__init__.py -------------------------------------------------------------------------------- /jsonformatter/config.yaml: -------------------------------------------------------------------------------- 1 | environment_variables: {} 2 | external_package_dirs: [] 3 | model_metadata: 4 | llm_model: databricks/dolly-v2-3b 5 | model_name: JsonFormatter 6 | python_version: py311 7 | requirements: 8 | - jsonformer 9 | - transformers 10 | - accelerate 11 | resources: 12 | accelerator: A10G 13 | secrets: {} 14 | system_packages: [] 15 | -------------------------------------------------------------------------------- /jsonformatter/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/jsonformatter/model/__init__.py -------------------------------------------------------------------------------- /kokoro/README.md: -------------------------------------------------------------------------------- 1 | Kokoro is a frontier TTS model for its size of 82 million parameters (text in/audio out). 2 | API: 3 | ```bash 4 | request: 5 | {"text": "Hello", "voice": "af", "speed": 1.0} 6 | 7 | text: str = defaults to "Hi, I'm kokoro" 8 | voice: str = defaults to "af", available options: "af", "af_bella", "af_sarah", "am_adam", "am_michael", "bf_emma", "bf_isabella", "bm_george", "bm_lewis", "af_nicole", "af_sky" 9 | speed: float = defaults to 1.0. The speed of the audio generated 10 | 11 | reponse: 12 | {"base64": "base64 encoded bytestring"} 13 | ``` 14 | -------------------------------------------------------------------------------- /kokoro/call.py: -------------------------------------------------------------------------------- 1 | import base64 2 | 3 | import httpx 4 | 5 | DEPLOYMENT_URL = "" 6 | API_KEY = "" 7 | # Create client for connection reuse 8 | with httpx.Client() as client: 9 | # Make the API request 10 | resp = client.post( 11 | DEPLOYMENT_URL, 12 | headers={"Authorization": f"Api-Key {API_KEY}"}, 13 | json={"text": "Hello world", "voice": "af", "speed": 1.0}, 14 | timeout=None, 15 | ) 16 | 17 | # Get the base64 encoded audio 18 | response_data = resp.json() 19 | audio_base64 = response_data["base64"] 20 | 21 | # Decode the base64 string 22 | audio_bytes = base64.b64decode(audio_base64) 23 | 24 | # Write to a WAV file 25 | with open("output.wav", "wb") as f: 26 | f.write(audio_bytes) 27 | 28 | print("Audio saved to output.wav") 29 | -------------------------------------------------------------------------------- /kokoro/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/kokoro/model/__init__.py -------------------------------------------------------------------------------- /layoutlm-document-qa/config.yaml: -------------------------------------------------------------------------------- 1 | description: Extract information from images of invoices 2 | environment_variables: {} 3 | external_package_dirs: [] 4 | model_metadata: 5 | avatar_url: https://cdn.baseten.co/production/static/explore/impira-logo.png 6 | cover_image_url: https://cdn.baseten.co/production/static/explore/document-qa.png 7 | example_model_input: 8 | prompt: What is the invoice number? 9 | url: https://templates.invoicehome.com/invoice-template-us-neat-750px.png 10 | tags: 11 | - text-generation 12 | model_name: LayoutLM Document QA 13 | python_version: py39 14 | requirements: 15 | - Pillow==10.0.0 16 | - pytesseract==0.3.10 17 | - torch==2.0.1 18 | - transformers==4.30.2 19 | resources: 20 | accelerator: null 21 | cpu: '4' 22 | memory: 16Gi 23 | use_gpu: false 24 | secrets: {} 25 | system_packages: 26 | - tesseract-ocr 27 | -------------------------------------------------------------------------------- /layoutlm-document-qa/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/layoutlm-document-qa/model/__init__.py -------------------------------------------------------------------------------- /layoutlm-document-qa/model/model.py: -------------------------------------------------------------------------------- 1 | from transformers import pipeline 2 | 3 | 4 | class Model: 5 | def __init__(self, **kwargs) -> None: 6 | self._data_dir = kwargs["data_dir"] 7 | self._config = kwargs["config"] 8 | self._secrets = kwargs["secrets"] 9 | self._model = None 10 | 11 | def load(self): 12 | self._model = pipeline( 13 | "document-question-answering", 14 | model="impira/layoutlm-document-qa", 15 | ) 16 | 17 | def predict(self, model_input): 18 | return self._model( 19 | model_input[ 20 | "url" 21 | ], # e.g. "https://templates.invoicehome.com/invoice-template-us-neat-750px.png" 22 | model_input["prompt"], # e.g. "What is the invoice number?" 23 | ) 24 | -------------------------------------------------------------------------------- /llama/engine-llama-3-1-70b-instruct/README.md: -------------------------------------------------------------------------------- 1 | # Llama 3.1 70B Instruct 2 | 3 | This deployment of Llama 3.1 70B Instruct uses the TensorRT-LLM Engine Builder. 4 | 5 | For details, see: https://docs.baseten.co/performance/examples/llama-trt 6 | -------------------------------------------------------------------------------- /llama/engine-llama-3-1-8b-instruct/README.md: -------------------------------------------------------------------------------- 1 | # Llama 3.1 8B Instruct 2 | 3 | This deployment of Llama 3.1 8B Instruct uses the TensorRT-LLM Engine Builder. 4 | 5 | For details, see: https://docs.baseten.co/performance/examples/llama-trt 6 | -------------------------------------------------------------------------------- /llama/engine-llama-3-3-70b-instruct/README.md: -------------------------------------------------------------------------------- 1 | # Llama 3.3 70B Instruct 2 | 3 | This deployment of Llama 3.3 70B Instruct uses the TensorRT-LLM Engine Builder. 4 | 5 | For details, see: https://docs.baseten.co/performance/examples/llama-trt 6 | -------------------------------------------------------------------------------- /llama/llama-2-13b-chat/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llama/llama-2-13b-chat/model/__init__.py -------------------------------------------------------------------------------- /llama/llama-2-13b/config.yaml: -------------------------------------------------------------------------------- 1 | environment_variables: {} 2 | external_package_dirs: [] 3 | model_cache: 4 | - allow_patterns: 5 | - '*.json' 6 | - '*.safetensors' 7 | - '*.model' 8 | repo_id: meta-llama/Llama-2-13b-hf 9 | model_metadata: 10 | repo_id: meta-llama/Llama-2-13b-hf 11 | model_name: Llama 13B 12 | python_version: py39 13 | requirements: 14 | - accelerate==0.22.0 15 | - bitsandbytes==0.41.1 16 | - einops==0.6.1 17 | - faker==19.3.1 18 | - peft==0.5.0 19 | - safetensors==0.3.3 20 | - scipy==1.10.1 21 | - sentencepiece==0.1.99 22 | - torch==2.0.1 23 | - transformers==4.32.1 24 | resources: 25 | accelerator: A100:1 26 | cpu: '3' 27 | memory: 14Gi 28 | use_gpu: true 29 | secrets: 30 | hf_access_token: null 31 | system_packages: [] 32 | -------------------------------------------------------------------------------- /llama/llama-2-13b/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llama/llama-2-13b/model/__init__.py -------------------------------------------------------------------------------- /llama/llama-2-70b-chat/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llama/llama-2-70b-chat/model/__init__.py -------------------------------------------------------------------------------- /llama/llama-2-70b/config.yaml: -------------------------------------------------------------------------------- 1 | environment_variables: {} 2 | external_package_dirs: [] 3 | model_cache: 4 | - allow_patterns: 5 | - '*.json' 6 | - '*.safetensors' 7 | - '*.model' 8 | repo_id: meta-llama/Llama-2-70b-hf 9 | model_metadata: 10 | repo_id: meta-llama/Llama-2-70b-hf 11 | model_name: Llama 70B 12 | python_version: py39 13 | requirements: 14 | - accelerate==0.22.0 15 | - bitsandbytes==0.41.1 16 | - einops==0.6.1 17 | - faker==19.3.1 18 | - peft==0.5.0 19 | - safetensors==0.3.3 20 | - scipy==1.10.1 21 | - sentencepiece==0.1.99 22 | - torch==2.0.1 23 | - transformers==4.32.1 24 | resources: 25 | accelerator: A100:2 26 | cpu: '3' 27 | memory: 14Gi 28 | use_gpu: true 29 | secrets: 30 | hf_token: ENTER HF API KEY HERE 31 | system_packages: [] 32 | -------------------------------------------------------------------------------- /llama/llama-2-70b/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llama/llama-2-70b/model/__init__.py -------------------------------------------------------------------------------- /llama/llama-2-7b-chat/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llama/llama-2-7b-chat/model/__init__.py -------------------------------------------------------------------------------- /llama/llama-2-7b/config.yaml: -------------------------------------------------------------------------------- 1 | environment_variables: {} 2 | external_package_dirs: [] 3 | model_cache: 4 | - allow_patterns: 5 | - '*.json' 6 | - '*.safetensors' 7 | - '*.model' 8 | repo_id: meta-llama/Llama-2-7b-hf 9 | model_metadata: 10 | repo_id: meta-llama/Llama-2-7b-hf 11 | model_name: Falcon 7B 12 | python_version: py39 13 | requirements: 14 | - accelerate==0.22.0 15 | - bitsandbytes==0.41.1 16 | - einops==0.6.1 17 | - faker==19.3.1 18 | - peft==0.5.0 19 | - safetensors==0.3.3 20 | - scipy==1.10.1 21 | - sentencepiece==0.1.99 22 | - torch==2.0.1 23 | - transformers==4.32.1 24 | resources: 25 | accelerator: A10G 26 | cpu: '3' 27 | memory: 14Gi 28 | use_gpu: true 29 | secrets: 30 | hf_access_token: ENTER HF ACCESS TOKEN HERE 31 | system_packages: [] 32 | -------------------------------------------------------------------------------- /llama/llama-2-7b/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llama/llama-2-7b/model/__init__.py -------------------------------------------------------------------------------- /llama/llama-3-70b-instruct/config.yaml: -------------------------------------------------------------------------------- 1 | environment_variables: {} 2 | external_package_dirs: [] 3 | model_metadata: 4 | avatar_url: https://cdn.baseten.co/production/static/explore/meta.png 5 | cover_image_url: https://cdn.baseten.co/production/static/explore/llama.png 6 | repo_id: meta-llama/Meta-Llama-3-8B-Instruct 7 | tags: 8 | - text-generation 9 | model_name: Llama 3 70B Instruct 10 | python_version: py310 11 | requirements: 12 | - accelerate 13 | - einops 14 | - transformers 15 | - torch 16 | resources: 17 | accelerator: H100:2 18 | use_gpu: true 19 | secrets: 20 | hf_access_token: "your api key" 21 | system_packages: [] 22 | -------------------------------------------------------------------------------- /llama/llama-3-70b-instruct/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llama/llama-3-70b-instruct/model/__init__.py -------------------------------------------------------------------------------- /llama/llama-3-8b-instruct/config.yaml: -------------------------------------------------------------------------------- 1 | environment_variables: {} 2 | external_package_dirs: [] 3 | model_metadata: 4 | avatar_url: https://cdn.baseten.co/production/static/explore/meta.png 5 | cover_image_url: https://cdn.baseten.co/production/static/explore/llama.png 6 | repo_id: meta-llama/Meta-Llama-3-8B-Instruct 7 | tags: 8 | - text-generation 9 | model_name: Llama 3 8B Instruct 10 | python_version: py310 11 | model_cache: 12 | - repo_id: meta-llama/Meta-Llama-3-8B-Instruct 13 | requirements: 14 | - accelerate 15 | - einops 16 | - transformers 17 | - torch 18 | resources: 19 | accelerator: A100 20 | use_gpu: true 21 | secrets: 22 | hf_access_token: "your-hf-access-token" 23 | system_packages: [] 24 | -------------------------------------------------------------------------------- /llama/llama-3-8b-instruct/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llama/llama-3-8b-instruct/model/__init__.py -------------------------------------------------------------------------------- /llama/llama-3_1-405b-instruct/README.md: -------------------------------------------------------------------------------- 1 | # Llama 3.1 405B Instruct 2 | 3 | This is an implementation of Llama 3.1 405B for deployment on Baseten. 4 | 5 | - VLLM for faster inference 6 | - FP8 model weights 7 | - Runs on an 8xH100 instance 8 | 9 | Baseten offers private, secure deployments for LLMs like Llama 3.1 405B, including deployments to your own VPC. 10 | To deploy this model on Baseten, contact us at [support@baseten.co](support@baseten.co). 11 | -------------------------------------------------------------------------------- /llama/llama-3_1-405b-instruct/config.yaml: -------------------------------------------------------------------------------- 1 | model_name: "Llama 3.1 405B Instruct VLLM" 2 | python_version: py311 3 | model_metadata: 4 | example_model_input: {"prompt": "what is the meaning of life"} 5 | repo_id: meta-llama/Llama-3.1-405B-Instruct-FP8 6 | tensor_parallel: 8 7 | requirements: 8 | - vllm==0.5.3post1 9 | - transformers==4.43.1 10 | resources: 11 | accelerator: H100:8 12 | use_gpu: true 13 | runtime: 14 | predict_concurrency: 128 15 | secrets: 16 | hf_access_token: null 17 | -------------------------------------------------------------------------------- /llama/llama-3_1-405b-instruct/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llama/llama-3_1-405b-instruct/model/__init__.py -------------------------------------------------------------------------------- /llama/llama-3_1-405b-instruct/model/sighelper.py: -------------------------------------------------------------------------------- 1 | import fileinput 2 | import sys 3 | 4 | MODULE_FILE_PATH = ( 5 | "/usr/local/lib/python3.11/dist-packages/vllm/executor/multiproc_gpu_executor.py" 6 | ) 7 | 8 | 9 | def patch(): 10 | # This is for SIGINT 11 | search_text = "signal.signal(signal.SIGINT, shutdown)" 12 | 13 | with fileinput.FileInput(MODULE_FILE_PATH, inplace=True) as file: 14 | for line in file: 15 | if search_text in line: 16 | line = " # " + line.lstrip() 17 | sys.stdout.write(line) 18 | 19 | # This is for SIGTERM 20 | search_text = "signal.signal(signal.SIGTERM, shutdown)" 21 | 22 | with fileinput.FileInput(MODULE_FILE_PATH, inplace=True) as file: 23 | for line in file: 24 | if search_text in line: 25 | line = " # " + line.lstrip() 26 | sys.stdout.write(line) 27 | -------------------------------------------------------------------------------- /llama/llama-3_1-8b-instruct-sglang/config.yaml: -------------------------------------------------------------------------------- 1 | model_name: "Llama 3.1 8B Instruct SGLang" 2 | python_version: py311 3 | model_metadata: 4 | example_model_input: {"prompt": "what is the meaning of life"} 5 | repo_id: meta-llama/Llama-3.1-8B-Instruct 6 | tensor_parallel: 1 7 | requirements: 8 | - sglang[all]==0.3.0 9 | - https://github.com/flashinfer-ai/flashinfer/releases/download/v0.1.6/flashinfer-0.1.6+cu121torch2.4-cp311-cp311-linux_x86_64.whl 10 | model_cache: 11 | - repo_id: meta-llama/Llama-3.1-8B-Instruct 12 | ignore_patterns: 13 | - "original/*" 14 | - "*.pth" 15 | resources: 16 | accelerator: H100 17 | use_gpu: true 18 | runtime: 19 | predict_concurrency: 128 20 | secrets: 21 | hf_access_token: null 22 | -------------------------------------------------------------------------------- /llama/llama-3_1-8b-instruct-sglang/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llama/llama-3_1-8b-instruct-sglang/model/__init__.py -------------------------------------------------------------------------------- /llama/llama-3_1-8b-instruct/config.yaml: -------------------------------------------------------------------------------- 1 | model_name: "Llama 3.1 8B Instruct VLLM" 2 | python_version: py311 3 | model_metadata: 4 | example_model_input: {"prompt": "what is the meaning of life"} 5 | repo_id: meta-llama/Llama-3.1-8B-Instruct 6 | tensor_parallel: 1 7 | requirements: 8 | - vllm==0.5.3post1 9 | model_cache: 10 | - repo_id: meta-llama/Llama-3.1-8B-Instruct 11 | ignore_patterns: 12 | - "original/*" 13 | - "*.pth" 14 | resources: 15 | accelerator: H100_40GB 16 | use_gpu: true 17 | runtime: 18 | predict_concurrency: 128 19 | secrets: 20 | hf_access_token: null 21 | -------------------------------------------------------------------------------- /llama/llama-3_1-8b-instruct/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llama/llama-3_1-8b-instruct/model/__init__.py -------------------------------------------------------------------------------- /llama/llama-3_1_70b-instruct/config.yaml: -------------------------------------------------------------------------------- 1 | environment_variables: {} 2 | external_package_dirs: [] 3 | model_metadata: {} 4 | model_name: Llama 3.1 70B vLLM 5 | python_version: py310 6 | requirements: 7 | - vllm==0.5.3post1 8 | - accelerate 9 | resources: 10 | accelerator: A100:4 11 | use_gpu: true 12 | runtime: 13 | predict_concurrency: 128 14 | secrets: 15 | hf_access_token: "" 16 | system_packages: 17 | - python3.10-venv 18 | -------------------------------------------------------------------------------- /llama/llama-3_1_70b-instruct/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llama/llama-3_1_70b-instruct/model/__init__.py -------------------------------------------------------------------------------- /llama/llama-3_1_70b-instruct/model/sighelper.py: -------------------------------------------------------------------------------- 1 | import fileinput 2 | import sys 3 | 4 | MODULE_FILE_PATH = ( 5 | "/usr/local/lib/python3.10/dist-packages/vllm/executor/multiproc_gpu_executor.py" 6 | ) 7 | 8 | 9 | def patch(): 10 | # This is for SIGINT 11 | search_text = "signal.signal(signal.SIGINT, shutdown)" 12 | 13 | with fileinput.FileInput(MODULE_FILE_PATH, inplace=True) as file: 14 | for line in file: 15 | if search_text in line: 16 | line = " # " + line.lstrip() 17 | sys.stdout.write(line) 18 | 19 | # This is for SIGTERM 20 | search_text = "signal.signal(signal.SIGTERM, shutdown)" 21 | 22 | with fileinput.FileInput(MODULE_FILE_PATH, inplace=True) as file: 23 | for line in file: 24 | if search_text in line: 25 | line = " # " + line.lstrip() 26 | sys.stdout.write(line) 27 | -------------------------------------------------------------------------------- /llama/llama-4-maverick-17b-128e-instruct-fp8-vllm/data/do.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | HF_TOKEN=$(cat /secrets/hf_access_token) vllm serve meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 --served-model-name llama --max-model-len 340000 --tensor-parallel-size 8 --distributed-executor-backend mp --gpu-memory-utilization 0.95 --kv-cache-dtype fp8 --limit-mm-per-prompt image=10 --override-generation-config='{"attn_temperature_tuning": true}' 3 | -------------------------------------------------------------------------------- /llama/llama-4-scout-17b-16e-instruct-bf16-vllm/data/do.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | HF_TOKEN=$(cat /secrets/hf_access_token) vllm serve meta-llama/Llama-4-Scout-17B-16E-Instruct --served-model-name llama --max-model-len 131072 --tensor-parallel-size 4 --distributed-executor-backend mp --gpu-memory-utilization 0.95 --kv-cache-dtype fp8 --limit-mm-per-prompt image=10 --override-generation-config='{"attn_temperature_tuning": true}' 3 | -------------------------------------------------------------------------------- /llama/llama-7b-exllama-streaming/config.yaml: -------------------------------------------------------------------------------- 1 | base_image: 2 | image: runpod/pytorch:2.0.1-py3.10-cuda11.8.0-devel 3 | python_executable_path: /usr/bin/python 4 | environment_variables: {} 5 | external_package_dirs: [] 6 | model_name: exllama-streaming 7 | python_version: py311 8 | requirements: 9 | - exllamav2==0.0.5 10 | resources: 11 | accelerator: A100 12 | cpu: '1' 13 | memory: 2Gi 14 | use_gpu: true 15 | secrets: {} 16 | system_packages: [] 17 | -------------------------------------------------------------------------------- /llama/llama-7b-exllama-streaming/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llama/llama-7b-exllama-streaming/model/__init__.py -------------------------------------------------------------------------------- /llama/llama-7b-exllama/config.yaml: -------------------------------------------------------------------------------- 1 | base_image: 2 | image: runpod/pytorch:2.0.1-py3.10-cuda11.8.0-devel 3 | python_executable_path: /usr/bin/python 4 | environment_variables: {} 5 | external_package_dirs: [] 6 | model_name: exllama 7 | python_version: py311 8 | requirements: 9 | - exllamav2==0.0.5 10 | resources: 11 | accelerator: A100 12 | cpu: '1' 13 | memory: 2Gi 14 | use_gpu: true 15 | secrets: {} 16 | system_packages: [] 17 | -------------------------------------------------------------------------------- /llama/llama-7b-exllama/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llama/llama-7b-exllama/model/__init__.py -------------------------------------------------------------------------------- /llama/llama-7b-vllm/config.yaml: -------------------------------------------------------------------------------- 1 | model_metadata: 2 | engine_args: 3 | model: TheBloke/Llama-2-7B-Chat-fp16 4 | example_model_input: 5 | prompt: Where do Llamas come from? 6 | pretty_name: Llama 2 7B 7 | prompt_format: [INST] {prompt} [/INST] 8 | tags: 9 | - text-generation 10 | model_name: Llama 7B Instruct vLLM 11 | python_version: py311 12 | requirements: 13 | - vllm==0.2.1.post1 14 | resources: 15 | accelerator: A10G 16 | memory: 25Gi 17 | use_gpu: true 18 | runtime: 19 | predict_concurrency: 256 20 | system_packages: [] 21 | -------------------------------------------------------------------------------- /llama/llama-7b-vllm/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llama/llama-7b-vllm/model/__init__.py -------------------------------------------------------------------------------- /llama/llama-7b/data/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_name_or_path": "decapoda-research/llama-7b-hf", 3 | "architectures": [ 4 | "LlamaForCausalLM" 5 | ], 6 | "bos_token_id": 0, 7 | "eos_token_id": 1, 8 | "hidden_act": "silu", 9 | "hidden_size": 4096, 10 | "initializer_range": 0.02, 11 | "intermediate_size": 11008, 12 | "max_position_embeddings": 2048, 13 | "max_sequence_length": 2048, 14 | "model_type": "llama", 15 | "num_attention_heads": 32, 16 | "num_hidden_layers": 32, 17 | "pad_token_id": -1, 18 | "rms_norm_eps": 1e-06, 19 | "tie_word_embeddings": false, 20 | "torch_dtype": "float16", 21 | "transformers_version": "4.29.0.dev0", 22 | "use_cache": true, 23 | "vocab_size": 32000 24 | } 25 | -------------------------------------------------------------------------------- /llama/llama-7b/data/generation_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_from_model_config": true, 3 | "bos_token_id": 0, 4 | "eos_token_id": 1, 5 | "pad_token_id": 0, 6 | "transformers_version": "4.29.0.dev0" 7 | } 8 | -------------------------------------------------------------------------------- /llama/llama-7b/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llama/llama-7b/model/__init__.py -------------------------------------------------------------------------------- /llama/tinyllama-1.1B-chat-v1.0/config.yaml: -------------------------------------------------------------------------------- 1 | model_metadata: 2 | tags: 3 | - openai-compatible 4 | example_model_input: 5 | prompt: How tall is a tiny llama? 6 | model_name: tinyllama-trt 7 | python_version: py310 8 | resources: 9 | accelerator: A10G 10 | memory: 24Gi 11 | use_gpu: true 12 | trt_llm: 13 | build: 14 | max_seq_len: 2048 15 | base_model: llama 16 | quantization_type: no_quant 17 | checkpoint_repository: 18 | repo: TinyLlama/TinyLlama-1.1B-Chat-v1.0 19 | source: HF 20 | -------------------------------------------------------------------------------- /llava/llava-1.6-sgl/config.yaml: -------------------------------------------------------------------------------- 1 | environment_variables: {} 2 | external_package_dirs: [] 3 | model_name: llava 1.6 SGL 4 | python_version: py310 5 | requirements: [] 6 | requirements_file: ./requirements.txt 7 | resources: 8 | accelerator: A100 9 | use_gpu: true 10 | runtime: 11 | predict_concurrency: 128 12 | secrets: {} 13 | system_packages: [] 14 | -------------------------------------------------------------------------------- /llava/llava-1.6-sgl/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llava/llava-1.6-sgl/model/__init__.py -------------------------------------------------------------------------------- /llava/llava-1.6-sgl/requirements.txt: -------------------------------------------------------------------------------- 1 | sglang==0.1.12 2 | triton==2.1.0 3 | tqdm==4.66.2 4 | aiohttp==3.9.3 5 | psutil==5.9.4 6 | rpyc==5.3.1 7 | torch==2.1.2 8 | vllm==0.3.0 9 | zmq==0.0.0 10 | interegular==0.3.3 11 | lark==1.1.9 12 | numba==0.59.0 13 | referencing 14 | diskcache==5.6.3 15 | cloudpickle==3.0.0 16 | pillow==10.2.0 17 | outlines==0.0.30 18 | -------------------------------------------------------------------------------- /llava/llava-v1.5-7b/config.yaml: -------------------------------------------------------------------------------- 1 | environment_variables: {} 2 | external_package_dirs: [] 3 | model_name: llava-v1.5-7b 4 | python_version: py311 5 | requirements: 6 | - torch==2.0.1 7 | - torchvision==0.15.2 8 | - transformers==4.31.0 9 | - tokenizers>=0.12.1,<0.14 10 | - sentencepiece==0.1.99 11 | - shortuuid==1.0.11 12 | - scipy==1.11.4 13 | - accelerate==0.21.0 14 | - peft==0.4.0 15 | - bitsandbytes==0.41.0 16 | - einops==0.6.1 17 | - einops-exts==0.0.4 18 | - timm==0.6.13 19 | resources: 20 | accelerator: A10G 21 | cpu: '3' 22 | memory: 15Gi 23 | use_gpu: true 24 | secrets: {} 25 | system_packages: [] 26 | -------------------------------------------------------------------------------- /llava/llava-v1.5-7b/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llava/llava-v1.5-7b/model/__init__.py -------------------------------------------------------------------------------- /llava/llava-v1.5-7b/packages/llava/__init__.py: -------------------------------------------------------------------------------- 1 | from .model import LlavaLlamaForCausalLM 2 | -------------------------------------------------------------------------------- /llava/llava-v1.5-7b/packages/llava/constants.py: -------------------------------------------------------------------------------- 1 | CONTROLLER_HEART_BEAT_EXPIRATION = 30 2 | WORKER_HEART_BEAT_INTERVAL = 15 3 | 4 | LOGDIR = "." 5 | 6 | # Model Constants 7 | IGNORE_INDEX = -100 8 | IMAGE_TOKEN_INDEX = -200 9 | DEFAULT_IMAGE_TOKEN = "" 10 | DEFAULT_IMAGE_PATCH_TOKEN = "" 11 | DEFAULT_IM_START_TOKEN = "" 12 | DEFAULT_IM_END_TOKEN = "" 13 | IMAGE_PLACEHOLDER = "" 14 | -------------------------------------------------------------------------------- /llava/llava-v1.5-7b/packages/llava/eval/table/model.jsonl: -------------------------------------------------------------------------------- 1 | {"model_id": "vicuna-13b:20230322-clean-lang", "model_name": "vicuna-13b", "model_version": "20230322-clean-lang", "model_metadata": "vicuna-13b-20230322-clean-lang"} 2 | {"model_id": "alpaca-13b:v1", "model_name": "alpaca-13b", "model_version": "v1", "model_metadata": "alpaca-13b"} 3 | {"model_id": "llama-13b:v1", "model_name": "llama-13b", "model_version": "v1", "model_metadata": "hf-llama-13b"} 4 | {"model_id": "bard:20230327", "model_name": "bard", "model_version": "20230327", "model_metadata": "Google Bard 20230327"} 5 | {"model_id": "gpt-3.5-turbo:20230327", "model_name": "gpt-3.5-turbo", "model_version": "20230327", "model_metadata": "OpenAI ChatGPT gpt-3.5-turbo Chat Completion"} 6 | -------------------------------------------------------------------------------- /llava/llava-v1.5-7b/packages/llava/eval/table/reviewer.jsonl: -------------------------------------------------------------------------------- 1 | {"reviewer_id": "gpt-4-0328-default", "prompt_id": 1, "metadata": {"temperature": 0.2, "max_tokens": 1024}, "description": "GPT-4 for general questions"} 2 | {"reviewer_id": "gpt-4-0328-coding", "prompt_id": 2, "metadata": {"temperature": 0.2, "max_tokens": 1024}, "description": "GPT-4 for coding questions"} 3 | {"reviewer_id": "gpt-4-0328-math", "prompt_id": 3, "metadata": {"temperature": 0.2, "max_tokens": 1024}, "description": "GPT-4 for math questions"} 4 | {"reviewer_id": "gpt-4-0417-visual", "prompt_id": 4, "metadata": {"temperature": 0.2, "max_tokens": 1024}, "description": "GPT-4 for math questions"} 5 | -------------------------------------------------------------------------------- /llava/llava-v1.5-7b/packages/llava/eval/webpage/figures/alpaca.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llava/llava-v1.5-7b/packages/llava/eval/webpage/figures/alpaca.png -------------------------------------------------------------------------------- /llava/llava-v1.5-7b/packages/llava/eval/webpage/figures/bard.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llava/llava-v1.5-7b/packages/llava/eval/webpage/figures/bard.jpg -------------------------------------------------------------------------------- /llava/llava-v1.5-7b/packages/llava/eval/webpage/figures/llama.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llava/llava-v1.5-7b/packages/llava/eval/webpage/figures/llama.jpg -------------------------------------------------------------------------------- /llava/llava-v1.5-7b/packages/llava/eval/webpage/figures/vicuna.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llava/llava-v1.5-7b/packages/llava/eval/webpage/figures/vicuna.jpeg -------------------------------------------------------------------------------- /llava/llava-v1.5-7b/packages/llava/model/__init__.py: -------------------------------------------------------------------------------- 1 | from .language_model.llava_llama import LlavaConfig, LlavaLlamaForCausalLM 2 | from .language_model.llava_mpt import LlavaMPTConfig, LlavaMPTForCausalLM 3 | -------------------------------------------------------------------------------- /llava/llava-v1.5-7b/packages/llava/model/language_model/mpt/custom_embedding.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | from torch import Tensor 4 | 5 | 6 | class SharedEmbedding(nn.Embedding): 7 | def forward(self, input: Tensor, unembed: bool = False) -> Tensor: 8 | if unembed: 9 | return F.linear(input, self.weight) 10 | return super().forward(input) 11 | -------------------------------------------------------------------------------- /llava/llava-v1.5-7b/packages/llava/model/multimodal_encoder/builder.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from .clip_encoder import CLIPVisionTower 4 | 5 | 6 | def build_vision_tower(vision_tower_cfg, **kwargs): 7 | vision_tower = getattr( 8 | vision_tower_cfg, 9 | "mm_vision_tower", 10 | getattr(vision_tower_cfg, "vision_tower", None), 11 | ) 12 | is_absolute_path_exists = os.path.exists(vision_tower) 13 | if ( 14 | is_absolute_path_exists 15 | or vision_tower.startswith("openai") 16 | or vision_tower.startswith("laion") 17 | ): 18 | return CLIPVisionTower(vision_tower, args=vision_tower_cfg, **kwargs) 19 | 20 | raise ValueError(f"Unknown vision tower: {vision_tower}") 21 | -------------------------------------------------------------------------------- /llava/llava-v1.5-7b/packages/llava/serve/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llava/llava-v1.5-7b/packages/llava/serve/__init__.py -------------------------------------------------------------------------------- /llava/llava-v1.5-7b/packages/llava/serve/examples/extreme_ironing.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llava/llava-v1.5-7b/packages/llava/serve/examples/extreme_ironing.jpg -------------------------------------------------------------------------------- /llava/llava-v1.5-7b/packages/llava/serve/examples/waterview.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llava/llava-v1.5-7b/packages/llava/serve/examples/waterview.jpg -------------------------------------------------------------------------------- /llava/llava-v1.5-7b/packages/llava/serve/register_worker.py: -------------------------------------------------------------------------------- 1 | """ 2 | Manually register workers. 3 | 4 | Usage: 5 | python3 -m fastchat.serve.register_worker --controller http://localhost:21001 --worker-name http://localhost:21002 6 | """ 7 | 8 | import argparse 9 | 10 | import requests 11 | 12 | if __name__ == "__main__": 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument("--controller-address", type=str) 15 | parser.add_argument("--worker-name", type=str) 16 | parser.add_argument("--check-heart-beat", action="store_true") 17 | args = parser.parse_args() 18 | 19 | url = args.controller_address + "/register_worker" 20 | data = { 21 | "worker_name": args.worker_name, 22 | "check_heart_beat": args.check_heart_beat, 23 | "worker_status": None, 24 | } 25 | r = requests.post(url, json=data) 26 | assert r.status_code == 200 27 | -------------------------------------------------------------------------------- /llava/llava-v1.5-7b/packages/llava/train/train_mem.py: -------------------------------------------------------------------------------- 1 | # Adopted from https://github.com/lm-sys/FastChat. Below is the original copyright: 2 | # Adopted from tatsu-lab@stanford_alpaca. Below is the original copyright: 3 | # Make it more memory efficient by monkey patching the LLaMA model with FlashAttn. 4 | 5 | # Need to call this before importing transformers. 6 | from llava.train.llama_flash_attn_monkey_patch import replace_llama_attn_with_flash_attn 7 | 8 | replace_llama_attn_with_flash_attn() 9 | 10 | from llava.train.train import train 11 | 12 | if __name__ == "__main__": 13 | train() 14 | -------------------------------------------------------------------------------- /llava/llava-v1.5-7b/packages/llava/train/train_xformers.py: -------------------------------------------------------------------------------- 1 | # Make it more memory efficient by monkey patching the LLaMA model with xformers attention. 2 | 3 | # Need to call this before importing transformers. 4 | from llava.train.llama_xformers_attn_monkey_patch import ( 5 | replace_llama_attn_with_xformers_attn, 6 | ) 7 | 8 | replace_llama_attn_with_xformers_attn() 9 | 10 | from llava.train.train import train 11 | 12 | if __name__ == "__main__": 13 | train() 14 | -------------------------------------------------------------------------------- /llava/llava-v1.6-34b/config.yaml: -------------------------------------------------------------------------------- 1 | environment_variables: {} 2 | external_package_dirs: [] 3 | model_name: llava-v1.6-34b 4 | python_version: py311 5 | requirements: 6 | - git+https://github.com/haotian-liu/LLaVA.git 7 | resources: 8 | accelerator: A100 9 | use_gpu: true 10 | secrets: {} 11 | system_packages: [] 12 | -------------------------------------------------------------------------------- /llava/llava-v1.6-34b/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/llava/llava-v1.6-34b/model/__init__.py -------------------------------------------------------------------------------- /magic-animate/config.yaml: -------------------------------------------------------------------------------- 1 | environment_variables: {} 2 | external_package_dirs: [] 3 | model_metadata: 4 | example_model_input: 5 | guidance_scale: 7.5 6 | motion_sequence: 7 | reference_image: 8 | seed: 1 9 | steps: 10 10 | model_name: Magic Animate 11 | python_version: py310 12 | requirements: 13 | - torch==2.0.1 14 | - torchvision==0.15.2 15 | - xformers==0.0.22 16 | - diffusers==0.21.4 17 | - pillow==9.5.0 18 | - numpy==1.24.4 19 | - omegaconf==2.3.0 20 | - transformers==4.32.0 21 | - einops==0.6.1 22 | - imageio==2.9.0 23 | - imageio-ffmpeg==0.4.3 24 | - tqdm==4.66.1 25 | - websockets==11.0.3 26 | - accelerate==0.22.0 27 | - huggingface-hub==0.16.4 28 | - av==11.0.0 29 | resources: 30 | accelerator: A10G 31 | cpu: '3' 32 | memory: 15Gi 33 | use_gpu: true 34 | secrets: {} 35 | system_packages: 36 | - ffmpeg 37 | -------------------------------------------------------------------------------- /magic-animate/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/magic-animate/model/__init__.py -------------------------------------------------------------------------------- /magic-animate/model/configs/inference/inference.yaml: -------------------------------------------------------------------------------- 1 | unet_additional_kwargs: 2 | unet_use_cross_frame_attention: false 3 | unet_use_temporal_attention: false 4 | use_motion_module: true 5 | motion_module_resolutions: 6 | - 1 7 | - 2 8 | - 4 9 | - 8 10 | motion_module_mid_block: false 11 | motion_module_decoder_only: false 12 | motion_module_type: Vanilla 13 | motion_module_kwargs: 14 | num_attention_heads: 8 15 | num_transformer_block: 1 16 | attention_block_types: 17 | - Temporal_Self 18 | - Temporal_Self 19 | temporal_position_encoding: true 20 | temporal_position_encoding_max_len: 24 21 | temporal_attention_dim_div: 1 22 | 23 | noise_scheduler_kwargs: 24 | beta_start: 0.00085 25 | beta_end: 0.012 26 | beta_schedule: "linear" 27 | -------------------------------------------------------------------------------- /metavoice-1b/config.yaml: -------------------------------------------------------------------------------- 1 | model_name: MetaVoice 1B 2 | description: MetaVoice is a transformer-based model for TTS 3 | environment_variables: {} 4 | external_package_dirs: [] 5 | model_metadata: 6 | example_model_input: '"text to speech models are cool"' 7 | python_version: py311 8 | data_dir: data 9 | model_cache: 10 | - repo_id: metavoiceio/metavoice-1B-v0.1 11 | allow_patterns: 12 | - "*.pt" 13 | - repo_id: facebook/multiband-diffusion 14 | allow_patterns: 15 | - mbd_comp_8.pt 16 | - repo_id: facebook/encodec_24khz 17 | allow_patterns: 18 | - "*.safetensors" 19 | 20 | requirements_file: ./requirements.txt 21 | resources: 22 | accelerator: "A10G" 23 | use_gpu: true 24 | secrets: 25 | hf_access_token: "ENTER HF ACCESS TOKEN HERE" 26 | system_packages: 27 | - ffmpeg 28 | -------------------------------------------------------------------------------- /metavoice-1b/data/bria.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/metavoice-1b/data/bria.mp3 -------------------------------------------------------------------------------- /metavoice-1b/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/metavoice-1b/model/__init__.py -------------------------------------------------------------------------------- /metavoice-1b/process.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import sys 3 | 4 | b64_audio = sys.stdin.read() 5 | 6 | # b64 data is surrounded by info messages and quotes if piped in from a truss command 7 | b64_audio = b64_audio.split('"')[1] 8 | 9 | wav_file = open("output.wav", "wb") 10 | try: 11 | decode_string = base64.b64decode(b64_audio) 12 | except: 13 | print("Response was not a valid base64 string. Exiting.") 14 | print(b64_audio) 15 | sys.exit(1) 16 | wav_file.write(decode_string) 17 | -------------------------------------------------------------------------------- /metavoice-1b/requirements.txt: -------------------------------------------------------------------------------- 1 | torch==2.1.0 2 | transformers==4.37.2 3 | librosa==0.10.1 4 | tqdm==4.66.2 5 | tiktoken==0.5.1 6 | audiocraft==1.2.0 7 | numpy==1.24.4 8 | tyro==0.7.3 9 | DeepFilterNet==0.5.6 10 | pydub==0.25.1 11 | soundfile==0.12.1 12 | huggingface-hub==0.20.3 13 | scipy==1.12.0 14 | https://github.com/Dao-AILab/flash-attention/releases/download/v2.5.3/flash_attn-2.5.3+cu122torch2.1cxx11abiFALSE-cp311-cp311-linux_x86_64.whl 15 | git+https://github.com/metavoiceio/metavoice-src.git@182ec712a10b42440bd9e9346a17381e8664256e 16 | -------------------------------------------------------------------------------- /mistral/engine-mistral-7b-instruct/README.md: -------------------------------------------------------------------------------- 1 | # Mistral 7B Instruct 2 | 3 | This deployment of Mistral 7B Instruct uses the TensorRT-LLM Engine Builder. 4 | 5 | For details, see: https://docs.baseten.co/performance/examples/mistral-trt 6 | -------------------------------------------------------------------------------- /mistral/engine-mistral-small-3/README.md: -------------------------------------------------------------------------------- 1 | # Mistral Small 3 (2501) 2 | -------------------------------------------------------------------------------- /mistral/engine-mixtral-8x22b-instruct/README.md: -------------------------------------------------------------------------------- 1 | # Mistral 8x22B Instruct 2 | 3 | This deployment of Mistral 8x22B Instruct uses the TensorRT-LLM Engine Builder. 4 | 5 | For details, see: https://docs.baseten.co/performance/examples/mistral-trt 6 | -------------------------------------------------------------------------------- /mistral/engine-mixtral-8x7b-instruct/README.md: -------------------------------------------------------------------------------- 1 | # Mistral 8x7B Instruct 2 | 3 | This deployment of Mistral 8x7B Instruct uses the TensorRT-LLM Engine Builder. 4 | 5 | For details, see: https://docs.baseten.co/performance/examples/mistral-trt 6 | -------------------------------------------------------------------------------- /mistral/mistral-7b-chat/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/mistral/mistral-7b-chat/model/__init__.py -------------------------------------------------------------------------------- /mistral/mistral-7b-instruct-vllm/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/mistral/mistral-7b-instruct-vllm/model/__init__.py -------------------------------------------------------------------------------- /mistral/mistral-7b-instruct/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/mistral/mistral-7b-instruct/model/__init__.py -------------------------------------------------------------------------------- /mistral/mistral-7b/config.yaml: -------------------------------------------------------------------------------- 1 | environment_variables: {} 2 | external_package_dirs: [] 3 | model_metadata: 4 | avatar_url: https://cdn.baseten.co/production/static/explore/mistral_logo.png 5 | cover_image_url: https://cdn.baseten.co/production/static/explore/mistral.png 6 | example_model_input: 7 | prompt: What is the Mistral wind? 8 | pretty_name: Mistral 7B 9 | tags: 10 | - text-generation 11 | model_name: mistral-7b 12 | python_version: py311 13 | requirements: 14 | - transformers==4.42.3 15 | - sentencepiece 16 | - accelerate 17 | - torch==2.0.1 18 | - numpy==1.26.4 19 | resources: 20 | accelerator: A10G 21 | memory: 25Gi 22 | use_gpu: true 23 | secrets: 24 | hf_access_token: "ENTER HF ACCESS TOKEN HERE" 25 | system_packages: [] 26 | -------------------------------------------------------------------------------- /mistral/mistral-7b/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/mistral/mistral-7b/model/__init__.py -------------------------------------------------------------------------------- /mistral/mixtral-8x22b-trt-int8-weights-only/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/mistral/mixtral-8x22b-trt-int8-weights-only/model/__init__.py -------------------------------------------------------------------------------- /mistral/mixtral-8x22b/config.yaml: -------------------------------------------------------------------------------- 1 | environment_variables: {} 2 | external_package_dirs: [] 3 | model_metadata: 4 | repo_id: mistralai/Mixtral-8x22B-Instruct-v0.1 5 | avatar_url: https://cdn.baseten.co/production/static/explore/mistral_logo.png 6 | cover_image_url: https://cdn.baseten.co/production/static/explore/mistral.png 7 | example_model_input: 8 | prompt: What is the Mistral wind? 9 | pretty_name: Mistral 8x22B 10 | tags: 11 | - text-generation 12 | model_name: Mixtral 8x22 13 | python_version: py310 14 | requirements: 15 | - accelerate 16 | - transformers==4.42.3 17 | - torch==2.2.0 18 | resources: 19 | accelerator: A100:4 20 | use_gpu: true 21 | secrets: 22 | hf_access_token: "ENTER HF ACCESS TOKEN HERE" 23 | system_packages: [] 24 | -------------------------------------------------------------------------------- /mistral/mixtral-8x22b/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/mistral/mixtral-8x22b/model/__init__.py -------------------------------------------------------------------------------- /mistral/mixtral-8x7b-instruct-trt-llm-h100/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/mistral/mixtral-8x7b-instruct-trt-llm-h100/model/__init__.py -------------------------------------------------------------------------------- /mistral/mixtral-8x7b-instruct-trt-llm-weights-only-quant-h100/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/mistral/mixtral-8x7b-instruct-trt-llm-weights-only-quant-h100/model/__init__.py -------------------------------------------------------------------------------- /mistral/mixtral-8x7b-instruct-trt-llm-weights-only-quant/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/mistral/mixtral-8x7b-instruct-trt-llm-weights-only-quant/model/__init__.py -------------------------------------------------------------------------------- /mistral/mixtral-8x7b-instruct-trt-llm/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/mistral/mixtral-8x7b-instruct-trt-llm/model/__init__.py -------------------------------------------------------------------------------- /mistral/mixtral-8x7b-instruct-vllm-a100-t-tp2/config.yaml: -------------------------------------------------------------------------------- 1 | environment_variables: {} 2 | external_package_dirs: [] 3 | model_name: Mixtral 8x7B — VLLM TP2 — A100:2 4 | python_version: py310 5 | requirements: 6 | - vllm 7 | resources: 8 | accelerator: A100:2 9 | use_gpu: true 10 | runtime: 11 | predict_concurrency: 128 12 | secrets: {} 13 | system_packages: [] 14 | -------------------------------------------------------------------------------- /mistral/mixtral-8x7b-instruct-vllm-a100-t-tp2/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/mistral/mixtral-8x7b-instruct-vllm-a100-t-tp2/model/__init__.py -------------------------------------------------------------------------------- /mistral/mixtral-8x7b-instruct-vllm/config.yaml: -------------------------------------------------------------------------------- 1 | environment_variables: {} 2 | external_package_dirs: [] 3 | model_name: Mixtral 8x7B 4 | python_version: py310 5 | requirements: 6 | - vllm==0.2.5 7 | resources: 8 | accelerator: A100:2 9 | use_gpu: true 10 | runtime: 11 | predict_concurrency: 128 12 | secrets: {} 13 | system_packages: [] 14 | -------------------------------------------------------------------------------- /mistral/mixtral-8x7b-instruct-vllm/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/mistral/mixtral-8x7b-instruct-vllm/model/__init__.py -------------------------------------------------------------------------------- /mistral/pixtral-12b/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/mistral/pixtral-12b/model/__init__.py -------------------------------------------------------------------------------- /multiprocessing/config.yaml: -------------------------------------------------------------------------------- 1 | environment_variables: {} 2 | external_package_dirs: [] 3 | model_name: Model with multiprocessing pre/post-process 4 | python_version: py310 5 | requirements: 6 | - torch 7 | resources: 8 | accelerator: A10G 9 | cpu: '8' 10 | memory: 8Gi 11 | use_gpu: true 12 | secrets: {} 13 | system_packages: [] 14 | -------------------------------------------------------------------------------- /multiprocessing/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/multiprocessing/model/__init__.py -------------------------------------------------------------------------------- /multiprocessing/model/test.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from typing import Any 3 | 4 | import model 5 | 6 | 7 | async def call_fn(fn, payload: Any) -> Any: 8 | return await fn(payload) 9 | 10 | 11 | async def test(): 12 | m = model.Model(data_dir="", config="", secrets="") 13 | body = {"n": 100} 14 | 15 | payload = await call_fn(m.preprocess, body) 16 | response = await call_fn(m.predict, payload) 17 | 18 | return response 19 | 20 | 21 | if __name__ == "__main__": 22 | x = asyncio.run(test()) 23 | print(x) 24 | -------------------------------------------------------------------------------- /musicgen-large/config.yaml: -------------------------------------------------------------------------------- 1 | description: MusicGen is a simple and controllable model for music generation developed 2 | by Facebook AI Research. 3 | environment_variables: {} 4 | external_package_dirs: [] 5 | model_metadata: 6 | avatar_url: https://cdn.baseten.co/production/static/explore/meta.png 7 | cover_image_url: https://cdn.baseten.co/production/static/explore/musicgen-cover.png 8 | example_model_input: 9 | duration: 8 10 | prompts: 11 | - happy rock 12 | - energetic EDM 13 | - sad jazz 14 | tags: 15 | - text-to-music 16 | model_name: MusicGen large 17 | python_version: py39 18 | requirements: 19 | - torch>=2 20 | - audiocraft 21 | resources: 22 | accelerator: A10G 23 | cpu: '3' 24 | memory: 14Gi 25 | use_gpu: true 26 | secrets: {} 27 | system_packages: 28 | - ffmpeg 29 | -------------------------------------------------------------------------------- /musicgen-large/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/musicgen-large/model/__init__.py -------------------------------------------------------------------------------- /musicgen-melody/config.yaml: -------------------------------------------------------------------------------- 1 | description: MusicGen Melody is a simple and controllable model for music generation 2 | conditioned on text and audio. It is developed by Facebook AI Research. 3 | environment_variables: {} 4 | external_package_dirs: [] 5 | model_metadata: 6 | avatar_url: https://cdn.baseten.co/production/static/explore/meta.png 7 | cover_image_url: https://cdn.baseten.co/production/static/explore/musicgen-cover.png 8 | example_model_input: 9 | duration: 8 10 | prompts: 11 | - happy rock 12 | - energetic EDM 13 | - sad jazz 14 | tags: 15 | - text-to-music 16 | model_name: MusicGen Melody 17 | python_version: py39 18 | requirements: 19 | - torch>=2 20 | - audiocraft 21 | - protobuf 22 | resources: 23 | accelerator: A10G 24 | cpu: '3' 25 | memory: 14Gi 26 | use_gpu: true 27 | secrets: {} 28 | system_packages: 29 | - ffmpeg 30 | -------------------------------------------------------------------------------- /musicgen-melody/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/musicgen-melody/model/__init__.py -------------------------------------------------------------------------------- /nemotron/llama-3-1-nemotron-70b-instruct/README.md: -------------------------------------------------------------------------------- 1 | 2 | # Llama-3.1-Nemotron-70B-Instruct 3 | 4 | This deployment of Llama-3.1-Nemotron-70B-Instruct uses the TensorRT-LLM Engine Builder. 5 | 6 | For details, see: https://docs.baseten.co/performance/examples/llama-trt 7 | -------------------------------------------------------------------------------- /ngram-speculator/truss/config.yaml: -------------------------------------------------------------------------------- 1 | build_commands: [] 2 | environment_variables: {} 3 | external_package_dirs: [] 4 | model_metadata: {} 5 | model_name: ngram-speculator 6 | python_version: py310 7 | requirements: 8 | - vllm==0.6.5 9 | - transformers==4.47.1 10 | resources: 11 | accelerator: H100 12 | use_gpu: True 13 | secrets: {} 14 | system_packages: [] 15 | -------------------------------------------------------------------------------- /ngram-speculator/truss/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/ngram-speculator/truss/model/__init__.py -------------------------------------------------------------------------------- /nous-capybara/nous-capybara-34b-openai/config.yaml: -------------------------------------------------------------------------------- 1 | environment_variables: {} 2 | external_package_dirs: [] 3 | model_cache: 4 | - allow_patterns: 5 | - '*.json' 6 | - '*.bin' 7 | repo_id: NousResearch/Nous-Capybara-34B 8 | model_name: Nous Capybara 34B OpenAI 9 | python_version: py310 10 | requirements: 11 | - accelerate==0.25.0 12 | - transformers==4.35.2 13 | - torch==2.1.0 14 | - bitsandbytes==0.41.3 15 | - scipy==1.11.4 16 | - sentencepiece==0.1.99 17 | resources: 18 | accelerator: A100 19 | cpu: '3' 20 | memory: 20Gi 21 | use_gpu: true 22 | secrets: {} 23 | system_packages: [] 24 | -------------------------------------------------------------------------------- /nous-capybara/nous-capybara-34b-openai/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/nous-capybara/nous-capybara-34b-openai/model/__init__.py -------------------------------------------------------------------------------- /nous-capybara/nous-capybara-34b/config.yaml: -------------------------------------------------------------------------------- 1 | environment_variables: {} 2 | external_package_dirs: [] 3 | model_metadata: 4 | example_model_input: 5 | prompt: What happens if I go to the top of the tallest mountian in california 6 | with a bucket of water and tip it over the highest cliff? 7 | model_name: Nous Capybara 34B 8 | python_version: py310 9 | requirements: 10 | - accelerate==0.25.0 11 | - transformers==4.35.2 12 | - torch==2.1.0 13 | - bitsandbytes==0.41.3 14 | - scipy==1.11.4 15 | - sentencepiece==0.1.99 16 | resources: 17 | accelerator: A100 18 | cpu: '3' 19 | memory: 20Gi 20 | use_gpu: true 21 | secrets: {} 22 | system_packages: [] 23 | -------------------------------------------------------------------------------- /nous-capybara/nous-capybara-34b/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/nous-capybara/nous-capybara-34b/model/__init__.py -------------------------------------------------------------------------------- /nsql/config.yaml: -------------------------------------------------------------------------------- 1 | description: NSQL is an open-source text-to-SQL AI model developed by Numbers Station. 2 | environment_variables: {} 3 | external_package_dirs: [] 4 | model_metadata: 5 | avatar_url: https://aeiljuispo.cloudimg.io/v7/https://cdn-uploads.huggingface.co/production/uploads/649c7ee8f97bd6fd710a9eb5/nBg1Fyo22RrqRJrkz9IYB.png 6 | cover_image_url: https://global-uploads.webflow.com/6348b2d49808811e3f7a0fff/640690727b722a05771960ec_graphic-data-p-800.png 7 | tags: 8 | - code-generation 9 | model_name: NSQL 350M 10 | python_version: py39 11 | requirements: 12 | - torch 13 | - transformers>=4.29.0 14 | resources: 15 | accelerator: A10G 16 | cpu: '8' 17 | memory: 30Gi 18 | use_gpu: true 19 | secrets: {} 20 | system_packages: [] 21 | -------------------------------------------------------------------------------- /nsql/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/nsql/model/__init__.py -------------------------------------------------------------------------------- /phi/phi-3-mini-128k-instruct/config.yaml: -------------------------------------------------------------------------------- 1 | environment_variables: {} 2 | external_package_dirs: [] 3 | model_metadata: {} 4 | model_name: Phi-3-Mini-128K-Instruct 5 | python_version: py39 6 | requirements: 7 | - accelerate 8 | - einops 9 | - transformers==4.40.1 10 | - torch==2.3.0 11 | resources: 12 | accelerator: T4 13 | use_gpu: true 14 | secrets: {} 15 | system_packages: [] 16 | -------------------------------------------------------------------------------- /phi/phi-3-mini-128k-instruct/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/phi/phi-3-mini-128k-instruct/model/__init__.py -------------------------------------------------------------------------------- /phi/phi-3-mini-4k-instruct/config.yaml: -------------------------------------------------------------------------------- 1 | environment_variables: {} 2 | external_package_dirs: [] 3 | model_metadata: {} 4 | model_name: Phi-3-Mini-4K-Instruct 5 | python_version: py39 6 | requirements: 7 | - accelerate 8 | - einops 9 | - transformers==4.40.1 10 | - torch==2.3.0 11 | resources: 12 | accelerator: T4 13 | use_gpu: true 14 | secrets: {} 15 | system_packages: [] 16 | -------------------------------------------------------------------------------- /phi/phi-3-mini-4k-instruct/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/phi/phi-3-mini-4k-instruct/model/__init__.py -------------------------------------------------------------------------------- /phi/phi-3.5-mini/config.yaml: -------------------------------------------------------------------------------- 1 | model_name: "Phi 3.5 Mini Instruct VLLM openai compatible" 2 | python_version: py311 3 | model_metadata: 4 | example_model_input: {"messages": [{"role": "user", "content": "what is the meaning of life"}]} 5 | repo_id: microsoft/Phi-3.5-mini-instruct 6 | openai_compatible: true 7 | vllm_config: 8 | tensor_parallel_size: 1 9 | max_model_len: 10000 10 | requirements: 11 | - vllm==0.5.4 12 | resources: 13 | accelerator: A10G 14 | use_gpu: true 15 | runtime: 16 | predict_concurrency: 128 17 | secrets: 18 | hf_access_token: null 19 | -------------------------------------------------------------------------------- /phi/phi-3.5-mini/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/phi/phi-3.5-mini/model/__init__.py -------------------------------------------------------------------------------- /piper-tts/config.yaml: -------------------------------------------------------------------------------- 1 | environment_variables: {} 2 | external_data: 3 | - local_data_path: models/model.onnx 4 | url: https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0/en/en_US/lessac/medium/en_US-lessac-medium.onnx 5 | - local_data_path: models/model.onnx.json 6 | url: https://huggingface.co/rhasspy/piper-voices/raw/v1.0.0/en/en_US/lessac/medium/en_US-lessac-medium.onnx.json 7 | external_package_dirs: [] 8 | model_metadata: 9 | example_model_input: 10 | text: I love robots. Robots are cool! 11 | tags: 12 | - text-to-speech 13 | model_name: Piper TTS 14 | python_version: py310 15 | requirements: 16 | - piper-tts==1.2.0 17 | resources: 18 | accelerator: T4 19 | cpu: '3' 20 | memory: 14Gi 21 | use_gpu: true 22 | secrets: {} 23 | system_packages: [] 24 | -------------------------------------------------------------------------------- /piper-tts/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/piper-tts/model/__init__.py -------------------------------------------------------------------------------- /playground-v2-aesthetic/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/playground-v2-aesthetic/model/__init__.py -------------------------------------------------------------------------------- /playground-v2-aesthetic/show.py: -------------------------------------------------------------------------------- 1 | """ 2 | truss predict -d '{"prompt": "A heavily constructed solarpunk bridge over a canyon at sunset", "steps": 50}' | python show.py 3 | """ 4 | 5 | import base64 6 | import json 7 | import os 8 | import sys 9 | 10 | resp = sys.stdin.read() 11 | image = json.loads(resp)["output"] 12 | img = base64.b64decode(image) 13 | 14 | img_file = open("playground.png", "wb") 15 | img_file.write(img) 16 | img_file.close() 17 | os.system("open playground.png") 18 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "truss-examples" 3 | version = "0.1.0" 4 | description = "" 5 | authors = ["Truss Maintainers "] 6 | license = "MIT" 7 | readme = "README.md" 8 | package-mode = false 9 | 10 | [tool.poetry.dependencies] 11 | python = ">=3.9,<3.13" 12 | 13 | [tool.poetry.group.dev.dependencies] 14 | black = "^23.7.0" 15 | ipython = "^8.14.0" 16 | isort = "^5.12.0" 17 | pre-commit = "^3.5.0" 18 | 19 | [build-system] 20 | requires = ["poetry-core"] 21 | build-backend = "poetry.core.masonry.api" 22 | -------------------------------------------------------------------------------- /qwen/engine-qwen-2-5-14b-coder-instruct/README.md: -------------------------------------------------------------------------------- 1 | # Qwen Coder 2.5 14B Instruct Engine 2 | 3 | This example uses the [TensorRT-LLM Engine Builder for Qwen](https://docs.baseten.co/performance/examples/qwen-trt) to build and deploy an optimized inference engine for Qwen Coder 2.5 14B Instruct. 4 | 5 | For advanced control over the engine building process, see [engine control in Python](https://docs.baseten.co/performance/engine-builder-customization) and [engine builder configuration](https://docs.baseten.co/performance/engine-builder-config) docs. 6 | -------------------------------------------------------------------------------- /qwen/engine-qwen-2-5-14b-instruct/README.md: -------------------------------------------------------------------------------- 1 | # Qwen 2.5 14B Instruct Engine 2 | 3 | This example uses the [TensorRT-LLM Engine Builder for Qwen](https://docs.baseten.co/performance/examples/qwen-trt) to build and deploy an optimized inference engine for Qwen 2.5 14B Instruct. 4 | 5 | For advanced control over the engine building process, see [engine control in Python](https://docs.baseten.co/performance/engine-builder-customization) and [engine builder configuration](https://docs.baseten.co/performance/engine-builder-config) docs. 6 | -------------------------------------------------------------------------------- /qwen/engine-qwen-2-5-32b-coder-instruct/README.md: -------------------------------------------------------------------------------- 1 | # Qwen Coder 2.5 32B Instruct Engine 2 | 3 | This example uses the [TensorRT-LLM Engine Builder for Qwen](https://docs.baseten.co/performance/examples/qwen-trt) to build and deploy an optimized inference engine for Qwen Coder 2.5 32B Instruct. 4 | 5 | For advanced control over the engine building process, see [engine control in Python](https://docs.baseten.co/performance/engine-builder-customization) and [engine builder configuration](https://docs.baseten.co/performance/engine-builder-config) docs. 6 | -------------------------------------------------------------------------------- /qwen/engine-qwen-2-5-32b-instruct/README.md: -------------------------------------------------------------------------------- 1 | # Qwen 2.5 32B Instruct Engine 2 | 3 | This example uses the [TensorRT-LLM Engine Builder for Qwen](https://docs.baseten.co/performance/examples/qwen-trt) to build and deploy an optimized inference engine for Qwen 2.5 32B Instruct. 4 | 5 | For advanced control over the engine building process, see [engine control in Python](https://docs.baseten.co/performance/engine-builder-customization) and [engine builder configuration](https://docs.baseten.co/performance/engine-builder-config) docs. 6 | -------------------------------------------------------------------------------- /qwen/engine-qwen-2-5-3b-instruct/README.md: -------------------------------------------------------------------------------- 1 | # Qwen 2.5 3B Instruct Engine 2 | 3 | This example uses the [TensorRT-LLM Engine Builder for Qwen](https://docs.baseten.co/performance/examples/qwen-trt) to build and deploy an optimized inference engine for Qwen 2.5 3B Instruct. 4 | 5 | For advanced control over the engine building process, see [engine control in Python](https://docs.baseten.co/performance/engine-builder-customization) and [engine builder configuration](https://docs.baseten.co/performance/engine-builder-config) docs. 6 | -------------------------------------------------------------------------------- /qwen/engine-qwen-2-5-72b-instruct/README.md: -------------------------------------------------------------------------------- 1 | # Qwen 2.5 72B Instruct Engine 2 | 3 | This example uses the [TensorRT-LLM Engine Builder for Qwen](https://docs.baseten.co/performance/examples/qwen-trt) to build and deploy an optimized inference engine for Qwen 2.5 72B Instruct. 4 | 5 | Note that while other sizes of Qwen 2.5 are licensed as Apache 2.0, 72B sizes use the [qwen license](https://huggingface.co/Qwen/Qwen2.5-72B-Instruct/blob/main/LICENSE). 6 | 7 | For advanced control over the engine building process, see [engine control in Python](https://docs.baseten.co/performance/engine-builder-customization) and [engine builder configuration](https://docs.baseten.co/performance/engine-builder-config) docs. 8 | -------------------------------------------------------------------------------- /qwen/engine-qwen-2-5-72b-math-instruct/README.md: -------------------------------------------------------------------------------- 1 | # Qwen Math 2.5 72B Instruct Engine 2 | 3 | This example uses the [TensorRT-LLM Engine Builder for Qwen](https://docs.baseten.co/performance/examples/qwen-trt) to build and deploy an optimized inference engine for Qwen Math 2.5 72B Instruct. 4 | 5 | Note that while other sizes of Qwen 2.5 are licensed as Apache 2.0, 72B sizes use the [qwen license](https://huggingface.co/Qwen/Qwen2.5-72B-Instruct/blob/main/LICENSE). 6 | 7 | For advanced control over the engine building process, see [engine control in Python](https://docs.baseten.co/performance/engine-builder-customization) and [engine builder configuration](https://docs.baseten.co/performance/engine-builder-config) docs. 8 | -------------------------------------------------------------------------------- /qwen/engine-qwen-2-5-7b-coder-instruct/README.md: -------------------------------------------------------------------------------- 1 | # Qwen Coder 2.5 7B Instruct Engine 2 | 3 | This example uses the [TensorRT-LLM Engine Builder for Qwen](https://docs.baseten.co/performance/examples/qwen-trt) to build and deploy an optimized inference engine for Qwen Coder 2.5 7B Instruct. 4 | 5 | For advanced control over the engine building process, see [engine control in Python](https://docs.baseten.co/performance/engine-builder-customization) and [engine builder configuration](https://docs.baseten.co/performance/engine-builder-config) docs. 6 | -------------------------------------------------------------------------------- /qwen/engine-qwen-2-5-7b-instruct/README.md: -------------------------------------------------------------------------------- 1 | # Qwen 2.5 7B Instruct Engine 2 | 3 | This example uses the [TensorRT-LLM Engine Builder for Qwen](https://docs.baseten.co/performance/examples/qwen-trt) to build and deploy an optimized inference engine for Qwen 2.5 7B Instruct. 4 | 5 | For advanced control over the engine building process, see [engine control in Python](https://docs.baseten.co/performance/engine-builder-customization) and [engine builder configuration](https://docs.baseten.co/performance/engine-builder-config) docs. 6 | -------------------------------------------------------------------------------- /qwen/engine-qwen-2-5-7b-math-instruct/README.md: -------------------------------------------------------------------------------- 1 | # Qwen Math 2.5 7B Instruct Engine 2 | 3 | This example uses the [TensorRT-LLM Engine Builder for Qwen](https://docs.baseten.co/performance/examples/qwen-trt) to build and deploy an optimized inference engine for Qwen Math 2.5 7B Instruct. 4 | 5 | For advanced control over the engine building process, see [engine control in Python](https://docs.baseten.co/performance/engine-builder-customization) and [engine builder configuration](https://docs.baseten.co/performance/engine-builder-config) docs. 6 | -------------------------------------------------------------------------------- /qwen/qwen-7b-chat/config.yaml: -------------------------------------------------------------------------------- 1 | environment_variables: {} 2 | external_package_dirs: [] 3 | model_metadata: 4 | example_model_input: 5 | prompt: What is the meaning of life? 6 | model_name: qwen-7b-chat 7 | python_version: py310 8 | requirements: 9 | - accelerate==0.23.0 10 | - tiktoken==0.5.1 11 | - einops==0.6.1 12 | - scipy==1.11.3 13 | - transformers_stream_generator==0.0.4 14 | - peft==0.5.0 15 | - deepspeed==0.11.1 16 | - torch==2.0.1 17 | - transformers==4.32.0 18 | resources: 19 | accelerator: A10G 20 | cpu: '3' 21 | memory: 14Gi 22 | use_gpu: true 23 | secrets: {} 24 | system_packages: [] 25 | -------------------------------------------------------------------------------- /qwen/qwen-7b-chat/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/qwen/qwen-7b-chat/model/__init__.py -------------------------------------------------------------------------------- /qwen/qwen-vl/config.yaml: -------------------------------------------------------------------------------- 1 | environment_variables: {} 2 | external_package_dirs: [] 3 | model_cache: 4 | - allow_patterns: 5 | - '*.json' 6 | - '*.fp16.safetensors' 7 | - '*.bin' 8 | - '*.tiktoken' 9 | - '*.py' 10 | repo_id: Qwen/Qwen-VL 11 | model_name: Qwen VL 12 | python_version: py310 13 | requirements: 14 | - torch==2.0.1 15 | - accelerate==0.24.0 16 | - transformers==4.35.0 17 | - einops==0.7.0 18 | - torchvision==0.15.2 19 | - matplotlib==3.8.2 20 | - tiktoken==0.5.2 21 | - transformers_stream_generator==0.0.4 22 | resources: 23 | accelerator: A10G 24 | use_gpu: true 25 | secrets: {} 26 | system_packages: [] 27 | -------------------------------------------------------------------------------- /qwen/qwen-vl/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/qwen/qwen-vl/model/__init__.py -------------------------------------------------------------------------------- /sana/sana_1600M/config.yaml: -------------------------------------------------------------------------------- 1 | build_commands: [] 2 | base_image: 3 | image: alphatozeta/cuda-python:12.1.1-cudnn8-devel-ubuntu22.04 4 | environment_variables: {} 5 | external_package_dirs: [] 6 | model_metadata: 7 | example_model_input: { 8 | "prompt": "a photo of an astronaut riding a horse on mars", 9 | "height": 1024, 10 | "width": 1024, 11 | "guidance_scale": 5.0, 12 | "pag_guidance_scale": 2.0, 13 | "num_inference_steps": 18, 14 | "seed": 4096, 15 | } 16 | model_name: Sana 1600M 17 | python_version: py311 18 | requirements: 19 | - git+https://github.com/NVlabs/Sana.git@d7945026d8d85008aca1d1e6db5717a1069f5c84 20 | - huggingface-hub==0.26.3 21 | - hf-transfer==0.1.8 22 | resources: 23 | accelerator: H100_40GB 24 | use_gpu: true 25 | secrets: 26 | hf_access_token: "null" 27 | system_packages: 28 | - ffmpeg 29 | - libsm6 30 | - libxext6 31 | - python3.10-venv 32 | -------------------------------------------------------------------------------- /sana/sana_1600M/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/sana/sana_1600M/model/__init__.py -------------------------------------------------------------------------------- /sana/sana_1600M/packages/Sana/CITATION.bib: -------------------------------------------------------------------------------- 1 | @misc{xie2024sana, 2 | title={Sana: Efficient High-Resolution Image Synthesis with Linear Diffusion Transformer}, 3 | author={Enze Xie and Junsong Chen and Junyu Chen and Han Cai and Haotian Tang and Yujun Lin and Zhekai Zhang and Muyang Li and Ligeng Zhu and Yao Lu and Song Han}, 4 | year={2024}, 5 | eprint={2410.10629}, 6 | archivePrefix={arXiv}, 7 | primaryClass={cs.CV}, 8 | url={https://arxiv.org/abs/2410.10629}, 9 | } 10 | -------------------------------------------------------------------------------- /sana/sana_1600M/packages/Sana/CIs/add_license_all.sh: -------------------------------------------------------------------------------- 1 | #/bin/bash 2 | addlicense -s -c 'NVIDIA CORPORATION & AFFILIATES' -ignore "**/*__init__.py" **/*.py 3 | -------------------------------------------------------------------------------- /sana/sana_1600M/packages/Sana/asset/Sana.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/sana/sana_1600M/packages/Sana/asset/Sana.jpg -------------------------------------------------------------------------------- /sana/sana_1600M/packages/Sana/asset/example_data/00000000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/sana/sana_1600M/packages/Sana/asset/example_data/00000000.png -------------------------------------------------------------------------------- /sana/sana_1600M/packages/Sana/asset/example_data/00000000.txt: -------------------------------------------------------------------------------- 1 | a cyberpunk cat with a neon sign that says "Sana". 2 | -------------------------------------------------------------------------------- /sana/sana_1600M/packages/Sana/asset/example_data/00000000_InternVL2-26B.json: -------------------------------------------------------------------------------- 1 | { 2 | "00000000": { 3 | "InternVL2-26B": "a cyberpunk cat with a neon sign that says 'Sana'" 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /sana/sana_1600M/packages/Sana/asset/example_data/00000000_InternVL2-26B_clip_score.json: -------------------------------------------------------------------------------- 1 | { 2 | "00000000": { 3 | "InternVL2-26B": "27.1037" 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /sana/sana_1600M/packages/Sana/asset/example_data/00000000_VILA1-5-13B.json: -------------------------------------------------------------------------------- 1 | { 2 | "00000000": { 3 | "VILA1-5-13B": "a cyberpunk cat with a neon sign that says 'Sana'" 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /sana/sana_1600M/packages/Sana/asset/example_data/00000000_VILA1-5-13B_clip_score.json: -------------------------------------------------------------------------------- 1 | { 2 | "00000000": { 3 | "VILA1-5-13B": "27.2321" 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /sana/sana_1600M/packages/Sana/asset/example_data/00000000_prompt_clip_score.json: -------------------------------------------------------------------------------- 1 | { 2 | "00000000": { 3 | "prompt": "26.7331" 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /sana/sana_1600M/packages/Sana/asset/example_data/meta_data.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "sana-dev", 3 | "__kind__": "Sana-ImgDataset", 4 | "img_names": [ 5 | "00000000", "00000000" 6 | ] 7 | } 8 | -------------------------------------------------------------------------------- /sana/sana_1600M/packages/Sana/asset/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/sana/sana_1600M/packages/Sana/asset/logo.png -------------------------------------------------------------------------------- /sana/sana_1600M/packages/Sana/asset/model-incremental.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/sana/sana_1600M/packages/Sana/asset/model-incremental.jpg -------------------------------------------------------------------------------- /sana/sana_1600M/packages/Sana/asset/model_paths.txt: -------------------------------------------------------------------------------- 1 | output/Sana_1600M_1024px/checkpoints/Sana_1600M_1024px.pth 2 | output/Sana_1600M_1024px/checkpoints/Sana_1600M_1024px.pth 3 | -------------------------------------------------------------------------------- /sana/sana_1600M/packages/Sana/diffusion/__init__.py: -------------------------------------------------------------------------------- 1 | # Modified from OpenAI's diffusion repos 2 | # GLIDE: https://github.com/openai/glide-text2im/blob/main/glide_text2im/gaussian_diffusion.py 3 | # ADM: https://github.com/openai/guided-diffusion/blob/main/guided_diffusion 4 | # IDDPM: https://github.com/openai/improved-diffusion/blob/main/improved_diffusion/gaussian_diffusion.py 5 | 6 | from .dpm_solver import DPMS 7 | from .flow_euler_sampler import FlowEuler 8 | from .iddpm import Scheduler 9 | from .sa_sampler import SASolverSampler 10 | -------------------------------------------------------------------------------- /sana/sana_1600M/packages/Sana/diffusion/data/__init__.py: -------------------------------------------------------------------------------- 1 | from .datasets import * 2 | from .transforms import get_transform 3 | -------------------------------------------------------------------------------- /sana/sana_1600M/packages/Sana/diffusion/data/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .sana_data import SanaImgDataset, SanaWebDataset 2 | from .sana_data_multi_scale import DummyDatasetMS, SanaWebDatasetMS 3 | from .utils import * 4 | -------------------------------------------------------------------------------- /sana/sana_1600M/packages/Sana/diffusion/data/wids/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-2019 NVIDIA CORPORATION. All rights reserved. 2 | # This file is part of the WebDataset library. 3 | # See the LICENSE file for licensing terms (BSD-style). 4 | # 5 | # flake8: noqa 6 | 7 | from .wids import ( 8 | ChunkedSampler, 9 | DistributedChunkedSampler, 10 | DistributedLocalSampler, 11 | DistributedRangedSampler, 12 | ShardedSampler, 13 | ShardListDataset, 14 | ShardListDatasetMulti, 15 | lru_json_load, 16 | ) 17 | -------------------------------------------------------------------------------- /sana/sana_1600M/packages/Sana/diffusion/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/sana/sana_1600M/packages/Sana/diffusion/model/__init__.py -------------------------------------------------------------------------------- /sana/sana_1600M/packages/Sana/diffusion/model/dc_ae/efficientvit/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/sana/sana_1600M/packages/Sana/diffusion/model/dc_ae/efficientvit/__init__.py -------------------------------------------------------------------------------- /sana/sana_1600M/packages/Sana/diffusion/model/dc_ae/efficientvit/apps/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/sana/sana_1600M/packages/Sana/diffusion/model/dc_ae/efficientvit/apps/__init__.py -------------------------------------------------------------------------------- /sana/sana_1600M/packages/Sana/diffusion/model/dc_ae/efficientvit/apps/trainer/__init__.py: -------------------------------------------------------------------------------- 1 | from .run_config import * 2 | -------------------------------------------------------------------------------- /sana/sana_1600M/packages/Sana/diffusion/model/dc_ae/efficientvit/apps/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .dist import * 2 | from .ema import * 3 | 4 | # from .export import * 5 | from .image import * 6 | from .init import * 7 | from .lr import * 8 | from .metric import * 9 | from .misc import * 10 | from .opt import * 11 | -------------------------------------------------------------------------------- /sana/sana_1600M/packages/Sana/diffusion/model/dc_ae/efficientvit/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/sana/sana_1600M/packages/Sana/diffusion/model/dc_ae/efficientvit/models/__init__.py -------------------------------------------------------------------------------- /sana/sana_1600M/packages/Sana/diffusion/model/dc_ae/efficientvit/models/efficientvit/__init__.py: -------------------------------------------------------------------------------- 1 | from .dc_ae import * 2 | -------------------------------------------------------------------------------- /sana/sana_1600M/packages/Sana/diffusion/model/dc_ae/efficientvit/models/nn/__init__.py: -------------------------------------------------------------------------------- 1 | from .act import * 2 | from .drop import * 3 | from .norm import * 4 | from .ops import * 5 | from .triton_rms_norm import * 6 | -------------------------------------------------------------------------------- /sana/sana_1600M/packages/Sana/diffusion/model/dc_ae/efficientvit/models/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .list import * 2 | from .network import * 3 | from .random import * 4 | -------------------------------------------------------------------------------- /sana/sana_1600M/packages/Sana/diffusion/model/nets/fastlinear/modules/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 MIT Han Lab 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # 15 | # SPDX-License-Identifier: Apache-2.0 16 | 17 | from .triton_lite_mla import * 18 | from .triton_lite_mla_fwd import * 19 | from .triton_mb_conv_pre_glu import * 20 | 21 | # from .flash_attn import * 22 | -------------------------------------------------------------------------------- /sana/sana_1600M/packages/Sana/diffusion/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/sana/sana_1600M/packages/Sana/diffusion/utils/__init__.py -------------------------------------------------------------------------------- /sana/sana_1600M/packages/Sana/sana/tools/__init__.py: -------------------------------------------------------------------------------- 1 | from .download import download_model 2 | from .hf_utils import hf_download_or_fpath 3 | -------------------------------------------------------------------------------- /sana/sana_1600M/packages/Sana/scripts/style.css: -------------------------------------------------------------------------------- 1 | /*.gradio-container{width:680px!important}*/ 2 | /* style.css */ 3 | .gradio_group, .gradio_row, .gradio_column { 4 | display: flex; 5 | flex-direction: row; 6 | justify-content: flex-start; 7 | align-items: flex-start; 8 | flex-wrap: wrap; 9 | } 10 | -------------------------------------------------------------------------------- /sana/sana_1600M/packages/Sana/tests/bash/entry.sh: -------------------------------------------------------------------------------- 1 | #/bin/bash 2 | set -e 3 | 4 | for t in tests/bash/test_*.sh; do 5 | echo "========================== Testing $t ==================================" 6 | bash $t; 7 | done 8 | -------------------------------------------------------------------------------- /sana/sana_1600M/packages/Sana/tests/bash/test_inference.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | python scripts/inference.py \ 5 | --config=configs/sana_config/1024ms/Sana_600M_img1024.yaml \ 6 | --model_path=hf://Efficient-Large-Model/Sana_600M_1024px/checkpoints/Sana_600M_1024px_MultiLing.pth 7 | 8 | 9 | python scripts/inference.py \ 10 | --config=configs/sana_config/1024ms/Sana_1600M_img1024.yaml \ 11 | --model_path=hf://Efficient-Large-Model/Sana_1600M_1024px/checkpoints/Sana_1600M_1024px.pth 12 | -------------------------------------------------------------------------------- /sana/sana_1600M/packages/Sana/tests/bash/test_training_1epoch.sh: -------------------------------------------------------------------------------- 1 | #/bin/bash 2 | set -e 3 | 4 | mkdir -p data/data_public 5 | huggingface-cli download Efficient-Large-Model/sana_data_public --repo-type dataset --local-dir ./data/data_public --local-dir-use-symlinks False 6 | 7 | bash train_scripts/train.sh configs/sana_config/512ms/ci_Sana_600M_img512.yaml --data.load_vae_feat=true 8 | 9 | bash train_scripts/train.sh configs/sana_config/512ms/ci_Sana_600M_img512.yaml --data.data_dir="[asset/example_data]" --data.type=SanaImgDataset --model.multi_scale=false 10 | -------------------------------------------------------------------------------- /sana/sana_1600M/packages/Sana/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/sana/sana_1600M/packages/Sana/tools/__init__.py -------------------------------------------------------------------------------- /sana/sana_1600M/packages/Sana/tools/metrics/clip-score/src/clip_score/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.1.1" 2 | -------------------------------------------------------------------------------- /sana/sana_1600M/packages/Sana/tools/metrics/clip-score/src/clip_score/__main__.py: -------------------------------------------------------------------------------- 1 | import clip_score.clip_score 2 | 3 | clip_score.clip_score.main() 4 | -------------------------------------------------------------------------------- /sana/sana_1600M/packages/Sana/tools/metrics/geneval/evaluation/download_models.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Download Mask2Former object detection config and weights 4 | 5 | if [ ! -z "$1" ] 6 | then 7 | mkdir -p "$1" 8 | echo "Downloading mask2former for GenEval" 9 | wget https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco_20220504_001756-743b7d99.pth -O "$1/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco.pth" 10 | fi 11 | -------------------------------------------------------------------------------- /sana/sana_1600M/packages/Sana/tools/metrics/geneval/images/geneval_figure_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/sana/sana_1600M/packages/Sana/tools/metrics/geneval/images/geneval_figure_1.png -------------------------------------------------------------------------------- /sana/sana_1600M/packages/Sana/tools/metrics/pytorch-fid/noxfile.py: -------------------------------------------------------------------------------- 1 | import nox 2 | 3 | LOCATIONS = ("src/", "tests/", "noxfile.py", "setup.py") 4 | 5 | 6 | @nox.session 7 | def lint(session): 8 | session.install("flake8") 9 | session.install("flake8-bugbear") 10 | session.install("flake8-isort") 11 | 12 | args = session.posargs or LOCATIONS 13 | session.run("flake8", *args) 14 | 15 | 16 | @nox.session 17 | def tests(session): 18 | session.install(".") 19 | session.install("pytest") 20 | session.install("pytest-mock") 21 | session.run("pytest", *session.posargs) 22 | -------------------------------------------------------------------------------- /sana/sana_1600M/packages/Sana/tools/metrics/pytorch-fid/setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | select=F,W,E,I,B,B9 3 | ignore=W503,B950 4 | max-line-length=79 5 | 6 | [isort] 7 | multi_line_output=1 8 | line_length=79 9 | -------------------------------------------------------------------------------- /sana/sana_1600M/packages/Sana/tools/metrics/pytorch-fid/src/pytorch_fid/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.3.0" 2 | -------------------------------------------------------------------------------- /sana/sana_1600M/packages/Sana/tools/metrics/pytorch-fid/src/pytorch_fid/__main__.py: -------------------------------------------------------------------------------- 1 | import pytorch_fid.fid_score 2 | 3 | pytorch_fid.fid_score.main() 4 | -------------------------------------------------------------------------------- /sana/sana_1600M/packages/Sana/train_scripts/train.sh: -------------------------------------------------------------------------------- 1 | #/bin/bash 2 | set -e 3 | 4 | work_dir=output/debug 5 | np=8 6 | 7 | 8 | if [[ $1 == *.yaml ]]; then 9 | config=$1 10 | shift 11 | else 12 | config="configs/sana_config/512ms/sample_dataset.yaml" 13 | echo "Only support .yaml files, but get $1. Set to --config_path=$config" 14 | fi 15 | 16 | TRITON_PRINT_AUTOTUNING=1 \ 17 | torchrun --nproc_per_node=$np --master_port=15432 \ 18 | train_scripts/train.py \ 19 | --config_path=$config \ 20 | --work_dir=$work_dir \ 21 | --name=tmp \ 22 | --resume_from=latest \ 23 | --report_to=tensorboard \ 24 | --debug=true \ 25 | "$@" 26 | -------------------------------------------------------------------------------- /sana/sana_600M/config.yaml: -------------------------------------------------------------------------------- 1 | build_commands: [] 2 | base_image: 3 | image: alphatozeta/cuda-python:12.1.1-cudnn8-devel-ubuntu22.04 4 | environment_variables: {} 5 | external_package_dirs: [] 6 | model_metadata: 7 | example_model_input: { 8 | "prompt": "a photo of an astronaut riding a horse on mars", 9 | "height": 1024, 10 | "width": 1024, 11 | "guidance_scale": 5.0, 12 | "pag_guidance_scale": 2.0, 13 | "num_inference_steps": 18, 14 | "seed": 4096, 15 | } 16 | model_name: Sana 600M 17 | python_version: py311 18 | requirements: 19 | - git+https://github.com/NVlabs/Sana.git@d7945026d8d85008aca1d1e6db5717a1069f5c84 20 | - huggingface-hub==0.26.3 21 | - hf-transfer==0.1.8 22 | resources: 23 | accelerator: H100_40GB 24 | use_gpu: true 25 | secrets: 26 | hf_access_token: "null" 27 | system_packages: 28 | - ffmpeg 29 | - libsm6 30 | - libxext6 31 | - python3.10-venv 32 | -------------------------------------------------------------------------------- /sana/sana_600M/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/sana/sana_600M/model/__init__.py -------------------------------------------------------------------------------- /sana/sana_600M/packages/Sana/CITATION.bib: -------------------------------------------------------------------------------- 1 | @misc{xie2024sana, 2 | title={Sana: Efficient High-Resolution Image Synthesis with Linear Diffusion Transformer}, 3 | author={Enze Xie and Junsong Chen and Junyu Chen and Han Cai and Haotian Tang and Yujun Lin and Zhekai Zhang and Muyang Li and Ligeng Zhu and Yao Lu and Song Han}, 4 | year={2024}, 5 | eprint={2410.10629}, 6 | archivePrefix={arXiv}, 7 | primaryClass={cs.CV}, 8 | url={https://arxiv.org/abs/2410.10629}, 9 | } 10 | -------------------------------------------------------------------------------- /sana/sana_600M/packages/Sana/CIs/add_license_all.sh: -------------------------------------------------------------------------------- 1 | #/bin/bash 2 | addlicense -s -c 'NVIDIA CORPORATION & AFFILIATES' -ignore "**/*__init__.py" **/*.py 3 | -------------------------------------------------------------------------------- /sana/sana_600M/packages/Sana/asset/Sana.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/sana/sana_600M/packages/Sana/asset/Sana.jpg -------------------------------------------------------------------------------- /sana/sana_600M/packages/Sana/asset/example_data/00000000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/sana/sana_600M/packages/Sana/asset/example_data/00000000.png -------------------------------------------------------------------------------- /sana/sana_600M/packages/Sana/asset/example_data/00000000.txt: -------------------------------------------------------------------------------- 1 | a cyberpunk cat with a neon sign that says "Sana". 2 | -------------------------------------------------------------------------------- /sana/sana_600M/packages/Sana/asset/example_data/00000000_InternVL2-26B.json: -------------------------------------------------------------------------------- 1 | { 2 | "00000000": { 3 | "InternVL2-26B": "a cyberpunk cat with a neon sign that says 'Sana'" 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /sana/sana_600M/packages/Sana/asset/example_data/00000000_InternVL2-26B_clip_score.json: -------------------------------------------------------------------------------- 1 | { 2 | "00000000": { 3 | "InternVL2-26B": "27.1037" 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /sana/sana_600M/packages/Sana/asset/example_data/00000000_VILA1-5-13B.json: -------------------------------------------------------------------------------- 1 | { 2 | "00000000": { 3 | "VILA1-5-13B": "a cyberpunk cat with a neon sign that says 'Sana'" 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /sana/sana_600M/packages/Sana/asset/example_data/00000000_VILA1-5-13B_clip_score.json: -------------------------------------------------------------------------------- 1 | { 2 | "00000000": { 3 | "VILA1-5-13B": "27.2321" 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /sana/sana_600M/packages/Sana/asset/example_data/00000000_prompt_clip_score.json: -------------------------------------------------------------------------------- 1 | { 2 | "00000000": { 3 | "prompt": "26.7331" 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /sana/sana_600M/packages/Sana/asset/example_data/meta_data.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "sana-dev", 3 | "__kind__": "Sana-ImgDataset", 4 | "img_names": [ 5 | "00000000", "00000000" 6 | ] 7 | } 8 | -------------------------------------------------------------------------------- /sana/sana_600M/packages/Sana/asset/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/sana/sana_600M/packages/Sana/asset/logo.png -------------------------------------------------------------------------------- /sana/sana_600M/packages/Sana/asset/model-incremental.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/sana/sana_600M/packages/Sana/asset/model-incremental.jpg -------------------------------------------------------------------------------- /sana/sana_600M/packages/Sana/asset/model_paths.txt: -------------------------------------------------------------------------------- 1 | output/Sana_1600M_1024px/checkpoints/Sana_1600M_1024px.pth 2 | output/Sana_1600M_1024px/checkpoints/Sana_1600M_1024px.pth 3 | -------------------------------------------------------------------------------- /sana/sana_600M/packages/Sana/diffusion/__init__.py: -------------------------------------------------------------------------------- 1 | # Modified from OpenAI's diffusion repos 2 | # GLIDE: https://github.com/openai/glide-text2im/blob/main/glide_text2im/gaussian_diffusion.py 3 | # ADM: https://github.com/openai/guided-diffusion/blob/main/guided_diffusion 4 | # IDDPM: https://github.com/openai/improved-diffusion/blob/main/improved_diffusion/gaussian_diffusion.py 5 | 6 | from .dpm_solver import DPMS 7 | from .flow_euler_sampler import FlowEuler 8 | from .iddpm import Scheduler 9 | from .sa_sampler import SASolverSampler 10 | -------------------------------------------------------------------------------- /sana/sana_600M/packages/Sana/diffusion/data/__init__.py: -------------------------------------------------------------------------------- 1 | from .datasets import * 2 | from .transforms import get_transform 3 | -------------------------------------------------------------------------------- /sana/sana_600M/packages/Sana/diffusion/data/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .sana_data import SanaImgDataset, SanaWebDataset 2 | from .sana_data_multi_scale import DummyDatasetMS, SanaWebDatasetMS 3 | from .utils import * 4 | -------------------------------------------------------------------------------- /sana/sana_600M/packages/Sana/diffusion/data/wids/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-2019 NVIDIA CORPORATION. All rights reserved. 2 | # This file is part of the WebDataset library. 3 | # See the LICENSE file for licensing terms (BSD-style). 4 | # 5 | # flake8: noqa 6 | 7 | from .wids import ( 8 | ChunkedSampler, 9 | DistributedChunkedSampler, 10 | DistributedLocalSampler, 11 | DistributedRangedSampler, 12 | ShardedSampler, 13 | ShardListDataset, 14 | ShardListDatasetMulti, 15 | lru_json_load, 16 | ) 17 | -------------------------------------------------------------------------------- /sana/sana_600M/packages/Sana/diffusion/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/sana/sana_600M/packages/Sana/diffusion/model/__init__.py -------------------------------------------------------------------------------- /sana/sana_600M/packages/Sana/diffusion/model/dc_ae/efficientvit/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/sana/sana_600M/packages/Sana/diffusion/model/dc_ae/efficientvit/__init__.py -------------------------------------------------------------------------------- /sana/sana_600M/packages/Sana/diffusion/model/dc_ae/efficientvit/apps/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/sana/sana_600M/packages/Sana/diffusion/model/dc_ae/efficientvit/apps/__init__.py -------------------------------------------------------------------------------- /sana/sana_600M/packages/Sana/diffusion/model/dc_ae/efficientvit/apps/trainer/__init__.py: -------------------------------------------------------------------------------- 1 | from .run_config import * 2 | -------------------------------------------------------------------------------- /sana/sana_600M/packages/Sana/diffusion/model/dc_ae/efficientvit/apps/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .dist import * 2 | from .ema import * 3 | 4 | # from .export import * 5 | from .image import * 6 | from .init import * 7 | from .lr import * 8 | from .metric import * 9 | from .misc import * 10 | from .opt import * 11 | -------------------------------------------------------------------------------- /sana/sana_600M/packages/Sana/diffusion/model/dc_ae/efficientvit/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/sana/sana_600M/packages/Sana/diffusion/model/dc_ae/efficientvit/models/__init__.py -------------------------------------------------------------------------------- /sana/sana_600M/packages/Sana/diffusion/model/dc_ae/efficientvit/models/efficientvit/__init__.py: -------------------------------------------------------------------------------- 1 | from .dc_ae import * 2 | -------------------------------------------------------------------------------- /sana/sana_600M/packages/Sana/diffusion/model/dc_ae/efficientvit/models/nn/__init__.py: -------------------------------------------------------------------------------- 1 | from .act import * 2 | from .drop import * 3 | from .norm import * 4 | from .ops import * 5 | from .triton_rms_norm import * 6 | -------------------------------------------------------------------------------- /sana/sana_600M/packages/Sana/diffusion/model/dc_ae/efficientvit/models/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .list import * 2 | from .network import * 3 | from .random import * 4 | -------------------------------------------------------------------------------- /sana/sana_600M/packages/Sana/diffusion/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/sana/sana_600M/packages/Sana/diffusion/utils/__init__.py -------------------------------------------------------------------------------- /sana/sana_600M/packages/Sana/sana/tools/__init__.py: -------------------------------------------------------------------------------- 1 | from .download import download_model 2 | from .hf_utils import hf_download_or_fpath 3 | -------------------------------------------------------------------------------- /sana/sana_600M/packages/Sana/scripts/style.css: -------------------------------------------------------------------------------- 1 | /*.gradio-container{width:680px!important}*/ 2 | /* style.css */ 3 | .gradio_group, .gradio_row, .gradio_column { 4 | display: flex; 5 | flex-direction: row; 6 | justify-content: flex-start; 7 | align-items: flex-start; 8 | flex-wrap: wrap; 9 | } 10 | -------------------------------------------------------------------------------- /sana/sana_600M/packages/Sana/tests/bash/entry.sh: -------------------------------------------------------------------------------- 1 | #/bin/bash 2 | set -e 3 | 4 | for t in tests/bash/test_*.sh; do 5 | echo "========================== Testing $t ==================================" 6 | bash $t; 7 | done 8 | -------------------------------------------------------------------------------- /sana/sana_600M/packages/Sana/tests/bash/test_inference.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | python scripts/inference.py \ 5 | --config=configs/sana_config/1024ms/Sana_600M_img1024.yaml \ 6 | --model_path=hf://Efficient-Large-Model/Sana_600M_1024px/checkpoints/Sana_600M_1024px_MultiLing.pth 7 | 8 | 9 | python scripts/inference.py \ 10 | --config=configs/sana_config/1024ms/Sana_1600M_img1024.yaml \ 11 | --model_path=hf://Efficient-Large-Model/Sana_1600M_1024px/checkpoints/Sana_1600M_1024px.pth 12 | -------------------------------------------------------------------------------- /sana/sana_600M/packages/Sana/tests/bash/test_training_1epoch.sh: -------------------------------------------------------------------------------- 1 | #/bin/bash 2 | set -e 3 | 4 | mkdir -p data/data_public 5 | huggingface-cli download Efficient-Large-Model/sana_data_public --repo-type dataset --local-dir ./data/data_public --local-dir-use-symlinks False 6 | 7 | bash train_scripts/train.sh configs/sana_config/512ms/ci_Sana_600M_img512.yaml --data.load_vae_feat=true 8 | 9 | bash train_scripts/train.sh configs/sana_config/512ms/ci_Sana_600M_img512.yaml --data.data_dir="[asset/example_data]" --data.type=SanaImgDataset --model.multi_scale=false 10 | -------------------------------------------------------------------------------- /sana/sana_600M/packages/Sana/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/sana/sana_600M/packages/Sana/tools/__init__.py -------------------------------------------------------------------------------- /sana/sana_600M/packages/Sana/tools/metrics/clip-score/src/clip_score/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.1.1" 2 | -------------------------------------------------------------------------------- /sana/sana_600M/packages/Sana/tools/metrics/clip-score/src/clip_score/__main__.py: -------------------------------------------------------------------------------- 1 | import clip_score.clip_score 2 | 3 | clip_score.clip_score.main() 4 | -------------------------------------------------------------------------------- /sana/sana_600M/packages/Sana/tools/metrics/geneval/evaluation/download_models.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Download Mask2Former object detection config and weights 4 | 5 | if [ ! -z "$1" ] 6 | then 7 | mkdir -p "$1" 8 | echo "Downloading mask2former for GenEval" 9 | wget https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco_20220504_001756-743b7d99.pth -O "$1/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco.pth" 10 | fi 11 | -------------------------------------------------------------------------------- /sana/sana_600M/packages/Sana/tools/metrics/geneval/images/geneval_figure_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/sana/sana_600M/packages/Sana/tools/metrics/geneval/images/geneval_figure_1.png -------------------------------------------------------------------------------- /sana/sana_600M/packages/Sana/tools/metrics/pytorch-fid/noxfile.py: -------------------------------------------------------------------------------- 1 | import nox 2 | 3 | LOCATIONS = ("src/", "tests/", "noxfile.py", "setup.py") 4 | 5 | 6 | @nox.session 7 | def lint(session): 8 | session.install("flake8") 9 | session.install("flake8-bugbear") 10 | session.install("flake8-isort") 11 | 12 | args = session.posargs or LOCATIONS 13 | session.run("flake8", *args) 14 | 15 | 16 | @nox.session 17 | def tests(session): 18 | session.install(".") 19 | session.install("pytest") 20 | session.install("pytest-mock") 21 | session.run("pytest", *session.posargs) 22 | -------------------------------------------------------------------------------- /sana/sana_600M/packages/Sana/tools/metrics/pytorch-fid/setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | select=F,W,E,I,B,B9 3 | ignore=W503,B950 4 | max-line-length=79 5 | 6 | [isort] 7 | multi_line_output=1 8 | line_length=79 9 | -------------------------------------------------------------------------------- /sana/sana_600M/packages/Sana/tools/metrics/pytorch-fid/src/pytorch_fid/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.3.0" 2 | -------------------------------------------------------------------------------- /sana/sana_600M/packages/Sana/tools/metrics/pytorch-fid/src/pytorch_fid/__main__.py: -------------------------------------------------------------------------------- 1 | import pytorch_fid.fid_score 2 | 3 | pytorch_fid.fid_score.main() 4 | -------------------------------------------------------------------------------- /sana/sana_600M/packages/Sana/train_scripts/train.sh: -------------------------------------------------------------------------------- 1 | #/bin/bash 2 | set -e 3 | 4 | work_dir=output/debug 5 | np=8 6 | 7 | 8 | if [[ $1 == *.yaml ]]; then 9 | config=$1 10 | shift 11 | else 12 | config="configs/sana_config/512ms/sample_dataset.yaml" 13 | echo "Only support .yaml files, but get $1. Set to --config_path=$config" 14 | fi 15 | 16 | TRITON_PRINT_AUTOTUNING=1 \ 17 | torchrun --nproc_per_node=$np --master_port=15432 \ 18 | train_scripts/train.py \ 19 | --config_path=$config \ 20 | --work_dir=$work_dir \ 21 | --name=tmp \ 22 | --resume_from=latest \ 23 | --report_to=tensorboard \ 24 | --debug=true \ 25 | "$@" 26 | -------------------------------------------------------------------------------- /segment-anything/README.md: -------------------------------------------------------------------------------- 1 | # Segment Anything Model 2 | 3 | This is an example deploying Segment Anything Model (SAM) with truss weights preloaded 4 | 5 | ## Deploy to Baseten 6 | To deploy the model, run the following from the root of the directory 7 | 8 | ``` 9 | truss push --publish 10 | ``` 11 | 12 | ## Predict 13 | Example prediction: 14 | 15 | ``` 16 | truss predict --published -d '{"image_url": "https://as2.ftcdn.net/v2/jpg/00/66/26/87/1000_F_66268784_jccdcfdpf2vmq5X8raYA8JQT0sziZ1H9.jpg"}' 17 | ``` 18 | -------------------------------------------------------------------------------- /segment-anything/config.yaml: -------------------------------------------------------------------------------- 1 | environment_variables: {} 2 | external_data: 3 | - local_data_path: sam_vit_h_4b8939.pth 4 | url: https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth 5 | external_package_dirs: [] 6 | model_metadata: 7 | example_model_input: 8 | image_url: https://as2.ftcdn.net/v2/jpg/00/66/26/87/1000_F_66268784_jccdcfdpf2vmq5X8raYA8JQT0sziZ1H9.jpg 9 | model_name: Segment Anything 10 | python_version: py310 11 | requirements: 12 | - git+https://github.com/facebookresearch/segment-anything.git@6fdee8f2727f4506cfbbe553e23b895e27956588 13 | - opencv-python==4.8.1.78 14 | - torch==2.1.0 15 | - torchvision==0.16.0 16 | - pycocotools==2.0.7 17 | resources: 18 | accelerator: A10G 19 | cpu: 1000m 20 | memory: 10Gi 21 | use_gpu: true 22 | secrets: {} 23 | system_packages: 24 | - python3-opencv 25 | -------------------------------------------------------------------------------- /segment-anything/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/segment-anything/model/__init__.py -------------------------------------------------------------------------------- /sesame-csm-1b/config.yaml: -------------------------------------------------------------------------------- 1 | model_name: sesame-csm-1b 2 | python_version: py310 3 | model_metadata: 4 | example_model_input: 5 | text: "Hello from Sesame." 6 | speaker: 0 7 | requirements: 8 | - torch==2.4.0 9 | - torchaudio==2.4.0 10 | - tokenizers==0.21.0 11 | - transformers==4.49.0 12 | - huggingface_hub==0.28.1 13 | - moshi==0.2.2 14 | - torchtune==0.4.0 15 | - torchao==0.9.0 16 | - silentcipher @ git+https://github.com/SesameAILabs/silentcipher@master 17 | - ffmpeg 18 | - git+https://github.com/veerbia/csm.git 19 | resources: 20 | accelerator: T4 21 | cpu: '1' 22 | memory: 10Gi 23 | use_gpu: true 24 | secrets: 25 | hf_access_token: null 26 | system_packages: [] 27 | environment_variables: {} 28 | external_package_dirs: [] 29 | -------------------------------------------------------------------------------- /sesame-csm-1b/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/sesame-csm-1b/model/__init__.py -------------------------------------------------------------------------------- /stable-diffusion/dreamshaper-lcm/README.md: -------------------------------------------------------------------------------- 1 | # Dreamshaper Latent Consistency Model 2 | 3 | A Truss for [Dreamshaper LCM](https://huggingface.co/spaces/SimianLuo/Latent_Consistency_Model), a distillation of Dreamshaper (a Stable Diffusion 1.5 fine-tune), that can achieve similar quality in ~1-8 steps. Generate high quality 768 x 768 images in under a second. 4 | -------------------------------------------------------------------------------- /stable-diffusion/dreamshaper-lcm/config.yaml: -------------------------------------------------------------------------------- 1 | environment_variables: {} 2 | external_package_dirs: [] 3 | model_name: Dreamshaper Latent Consistency Model 4 | python_version: py311 5 | requirements: 6 | - diffusers=0.21.4 7 | - transformers=4.34.1 8 | - accelerate=0.23.0 9 | - torch=2.1.0 10 | resources: 11 | accelerator: A10G 12 | cpu: '1' 13 | memory: 2Gi 14 | use_gpu: true 15 | secrets: {} 16 | system_packages: [] 17 | -------------------------------------------------------------------------------- /stable-diffusion/dreamshaper-lcm/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/dreamshaper-lcm/model/__init__.py -------------------------------------------------------------------------------- /stable-diffusion/playground-v2-trt/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/playground-v2-trt/model/__init__.py -------------------------------------------------------------------------------- /stable-diffusion/playground-v2-trt/show.py: -------------------------------------------------------------------------------- 1 | """ 2 | truss predict -d '{"prompt": "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k"}' | python show.py 3 | """ 4 | 5 | import base64 6 | import json 7 | import os 8 | import sys 9 | 10 | resp = sys.stdin.read() 11 | image = json.loads(resp)["data"] 12 | img = base64.b64decode(image) 13 | 14 | file_name = f"{image[-10:].replace('/', '')}.jpeg" 15 | img_file = open(file_name, "wb") 16 | img_file.write(img) 17 | img_file.close() 18 | os.system(f"open {file_name}") 19 | -------------------------------------------------------------------------------- /stable-diffusion/sd-textual-inversion/README.md: -------------------------------------------------------------------------------- 1 | # Textual Inversion with Stable Diffusion 2 | 3 | The following example demonstrates how to use Stable Diffusion with 4 | textual inversion embeddings. 5 | 6 | This truss combines concepts from: 7 | 1. [This colab](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/stable_conceptualizer_inference.ipynb#scrollTo=JkIeuLEfqi-g) which demonstrates how to load textual inversion embeddings from hugginface repos 8 | 2. [This diffusers issue](https://github.com/huggingface/diffusers/issues/3097#issuecomment-1516138396) which demonstrates how to load an embedding directly. 9 | -------------------------------------------------------------------------------- /stable-diffusion/sd-textual-inversion/config.yaml: -------------------------------------------------------------------------------- 1 | environment_variables: {} 2 | external_package_dirs: [] 3 | model_metadata: 4 | pretty_name: Stable Diffusion - Textual Inversion 5 | tags: 6 | - image-generation 7 | model_name: SD_Textual_Inversion 8 | python_version: py311 9 | requirements: 10 | - diffusers==0.16.1 11 | - transformers 12 | - ftfy 13 | - accelerate 14 | - torch 15 | - pillow 16 | resources: 17 | accelerator: T4 18 | cpu: 500m 19 | memory: 512Mi 20 | use_gpu: true 21 | secrets: {} 22 | system_packages: [] 23 | -------------------------------------------------------------------------------- /stable-diffusion/sd-textual-inversion/data/LulaCipher.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/sd-textual-inversion/data/LulaCipher.bin -------------------------------------------------------------------------------- /stable-diffusion/sd-textual-inversion/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/sd-textual-inversion/model/__init__.py -------------------------------------------------------------------------------- /stable-diffusion/sd-turbo/config.yaml: -------------------------------------------------------------------------------- 1 | environment_variables: {} 2 | external_package_dirs: [] 3 | model_cache: 4 | - allow_patterns: 5 | - '*.json' 6 | - '*.fp16.safetensors' 7 | - '*.txt' 8 | repo_id: stabilityai/sdxl-turbo 9 | model_metadata: 10 | avatar_url: https://cdn.baseten.co/production/static/stability.png 11 | cover_image_url: https://cdn.baseten.co/production/static/sd.png 12 | example_model_input: 13 | prompt: A tree in a field under the night sky 14 | pretty_name: SD Turbo 15 | tags: 16 | - image-generation 17 | model_name: SD Turbo 18 | python_version: py311 19 | requirements: 20 | - torch==2.0.1 21 | - transformers==4.35.2 22 | - diffusers==0.23.1 23 | - accelerate==0.24.1 24 | resources: 25 | accelerator: T4 26 | use_gpu: true 27 | secrets: {} 28 | system_packages: [] 29 | -------------------------------------------------------------------------------- /stable-diffusion/sd-turbo/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/sd-turbo/model/__init__.py -------------------------------------------------------------------------------- /stable-diffusion/sdxl-controlnet-canny/baseten-logo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/sdxl-controlnet-canny/baseten-logo.gif -------------------------------------------------------------------------------- /stable-diffusion/sdxl-controlnet-canny/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/sdxl-controlnet-canny/model/__init__.py -------------------------------------------------------------------------------- /stable-diffusion/sdxl-controlnet-depth/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/sdxl-controlnet-depth/model/__init__.py -------------------------------------------------------------------------------- /stable-diffusion/sdxl-controlnet/baseten-logo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/sdxl-controlnet/baseten-logo.gif -------------------------------------------------------------------------------- /stable-diffusion/sdxl-controlnet/config.yaml: -------------------------------------------------------------------------------- 1 | environment_variables: {} 2 | external_package_dirs: [] 3 | model_metadata: 4 | avatar_url: https://cdn.baseten.co/production/static/stability.png 5 | cover_image_url: https://cdn.baseten.co/production/static/sd.png 6 | example_model_input: 7 | prompt: aerial view, a futuristic research complex in a bright foggy jungle, hard 8 | lighting 9 | pretty_name: Stable Diffusion ControlNet 10 | tags: 11 | - image-generation 12 | model_name: SDXL ControlNet 13 | python_version: py39 14 | requirements: 15 | - accelerate 16 | - transformers 17 | - safetensors 18 | - opencv-python 19 | - diffusers 20 | resources: 21 | accelerator: A10G 22 | cpu: 3500m 23 | memory: 20Gi 24 | use_gpu: true 25 | secrets: {} 26 | system_packages: 27 | - ffmpeg 28 | - libsm6 29 | - libxext6 30 | -------------------------------------------------------------------------------- /stable-diffusion/sdxl-controlnet/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/sdxl-controlnet/model/__init__.py -------------------------------------------------------------------------------- /stable-diffusion/sdxl-lightning/config.yaml: -------------------------------------------------------------------------------- 1 | environment_variables: {} 2 | external_package_dirs: [] 3 | model_metadata: 4 | avatar_url: https://cdn.baseten.co/production/static/stability.png 5 | cover_image_url: https://cdn.baseten.co/production/static/sd.png 6 | example_model_input: 7 | prompt: A tree in a field under the night sky 8 | pretty_name: SDXL Lightning 9 | tags: 10 | - image-generation 11 | model_name: SDXL Lightning 12 | python_version: py310 13 | requirements: 14 | - torch==2.0.1 15 | - transformers==4.35.2 16 | - diffusers==0.23.1 17 | - hf_transfer==0.1.4 18 | - xformers==0.0.22 19 | - accelerate==0.24.1 20 | resources: 21 | accelerator: A100 22 | use_gpu: true 23 | secrets: {} 24 | system_packages: [] 25 | -------------------------------------------------------------------------------- /stable-diffusion/sdxl-lightning/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/sdxl-lightning/model/__init__.py -------------------------------------------------------------------------------- /stable-diffusion/sdxl-lora-swapping/config.yaml: -------------------------------------------------------------------------------- 1 | environment_variables: {} 2 | external_package_dirs: [] 3 | model_metadata: 4 | example_model_input: 5 | lora: 6 | repo_id: nerijs/pixel-art-xl 7 | weights: pixel-art-xl.safetensors 8 | prompt: pixel art, an baby giraffe 9 | model_name: Stable Diffusion XL with LoRA Swapping 10 | python_version: py311 11 | requirements: 12 | - accelerate==0.23.0 13 | - transformers==4.33.2 14 | - safetensors==0.3.3 15 | - opencv-python==4.8.0.76 16 | - diffusers==0.21.2 17 | resources: 18 | accelerator: A100 19 | cpu: 3500m 20 | memory: 20Gi 21 | use_gpu: true 22 | secrets: {} 23 | system_packages: 24 | - ffmpeg 25 | - libsm6 26 | - libxext6 27 | -------------------------------------------------------------------------------- /stable-diffusion/sdxl-lora-swapping/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/sdxl-lora-swapping/model/__init__.py -------------------------------------------------------------------------------- /stable-diffusion/sdxl-lora/config.yaml: -------------------------------------------------------------------------------- 1 | environment_variables: {} 2 | external_package_dirs: [] 3 | model_name: Stable Diffusion XL with LoRA 4 | python_version: py311 5 | requirements: 6 | - accelerate 7 | - transformers 8 | - safetensors 9 | - opencv-python 10 | - diffusers 11 | resources: 12 | accelerator: A10G 13 | cpu: 3500m 14 | memory: 20Gi 15 | use_gpu: true 16 | secrets: {} 17 | system_packages: 18 | - ffmpeg 19 | - libsm6 20 | - libxext6 21 | -------------------------------------------------------------------------------- /stable-diffusion/sdxl-lora/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/sdxl-lora/model/__init__.py -------------------------------------------------------------------------------- /stable-diffusion/sdxl-turbo/config.yaml: -------------------------------------------------------------------------------- 1 | environment_variables: {} 2 | external_package_dirs: [] 3 | model_cache: 4 | - allow_patterns: 5 | - '*.json' 6 | - '*.fp16.safetensors' 7 | - '*.txt' 8 | repo_id: stabilityai/sdxl-turbo 9 | model_metadata: 10 | avatar_url: https://cdn.baseten.co/production/static/stability.png 11 | cover_image_url: https://cdn.baseten.co/production/static/sd.png 12 | example_model_input: 13 | prompt: A tree in a field under the night sky 14 | pretty_name: SDXL Turbo 15 | tags: 16 | - image-generation 17 | model_name: SDXL Turbo 18 | python_version: py310 19 | requirements: 20 | - torch==2.0.1 21 | - transformers==4.35.2 22 | - diffusers==0.23.1 23 | - hf_transfer==0.1.4 24 | - xformers==0.0.22 25 | - accelerate==0.24.1 26 | resources: 27 | accelerator: T4 28 | cpu: '3' 29 | memory: 20Gi 30 | use_gpu: true 31 | secrets: {} 32 | system_packages: [] 33 | -------------------------------------------------------------------------------- /stable-diffusion/sdxl-turbo/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/sdxl-turbo/model/__init__.py -------------------------------------------------------------------------------- /stable-diffusion/stable-diffusion-3-medium/config.yaml: -------------------------------------------------------------------------------- 1 | environment_variables: 2 | HF_HUB_OFFLINE: 1 3 | external_package_dirs: [] 4 | model_metadata: {} 5 | model_cache: 6 | - repo_id: stabilityai/stable-diffusion-3-medium-diffusers 7 | model_name: Stable Diffusion 3 Medium 8 | python_version: py310 9 | requirements: 10 | - diffusers==0.29.0 11 | - transformers 12 | - accelerate 13 | - sentencepiece 14 | - protobuf 15 | resources: 16 | accelerator: A100 17 | use_gpu: true 18 | secrets: 19 | hf_access_token: "" 20 | system_packages: 21 | - ffmpeg 22 | - libsm6 23 | - libxext6 24 | -------------------------------------------------------------------------------- /stable-diffusion/stable-diffusion-3-medium/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/stable-diffusion-3-medium/model/__init__.py -------------------------------------------------------------------------------- /stable-diffusion/stable-diffusion-inpainting-trt/config.yaml: -------------------------------------------------------------------------------- 1 | environment_variables: {} 2 | external_package_dirs: [] 3 | model_name: Stable Diffusion Inpainting TRT 4 | python_version: py310 5 | requirements: [] 6 | requirements_file: requirements.txt 7 | resources: 8 | accelerator: A10G 9 | use_gpu: true 10 | secrets: {} 11 | system_packages: [] 12 | -------------------------------------------------------------------------------- /stable-diffusion/stable-diffusion-inpainting-trt/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/stable-diffusion-inpainting-trt/model/__init__.py -------------------------------------------------------------------------------- /stable-diffusion/stable-diffusion-inpainting-trt/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate==0.26.1 2 | colored==2.2.4 3 | cuda-python==12.3.0 4 | diffusers==0.14.0 5 | ftfy==6.1.3 6 | matplotlib==3.8.2 7 | nvtx==0.2.8 8 | onnx==1.13.1 9 | onnxruntime==1.14.1 10 | --extra-index-url https://pypi.ngc.nvidia.com 11 | onnx-graphsurgeon==0.3.26 12 | polygraphy==0.47.1 13 | scipy==1.12.0 14 | torch==2.2.0 15 | tensorrt==8.6.1.post1 16 | transformers==4.26.1 17 | -------------------------------------------------------------------------------- /stable-diffusion/stable-diffusion-xl-1.0-trt-h100/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/stable-diffusion-xl-1.0-trt-h100/model/__init__.py -------------------------------------------------------------------------------- /stable-diffusion/stable-diffusion-xl-1.0-trt-h100/show.py: -------------------------------------------------------------------------------- 1 | """ 2 | truss predict -d '{"prompt": "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k"}' | python show.py 3 | """ 4 | 5 | import base64 6 | import json 7 | import os 8 | import sys 9 | 10 | resp = sys.stdin.read() 11 | image = json.loads(resp)["data"] 12 | img = base64.b64decode(image) 13 | 14 | file_name = f"{image[-10:].replace('/', '')}.jpeg" 15 | img_file = open(file_name, "wb") 16 | img_file.write(img) 17 | img_file.close() 18 | os.system(f"open {file_name}") 19 | -------------------------------------------------------------------------------- /stable-diffusion/stable-diffusion-xl-1.0-trt/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/stable-diffusion-xl-1.0-trt/model/__init__.py -------------------------------------------------------------------------------- /stable-diffusion/stable-diffusion-xl-1.0-trt/show.py: -------------------------------------------------------------------------------- 1 | """ 2 | truss predict -d '{"prompt": "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k"}' | python show.py 3 | """ 4 | 5 | import base64 6 | import json 7 | import os 8 | import sys 9 | 10 | resp = sys.stdin.read() 11 | image = json.loads(resp)["data"] 12 | img = base64.b64decode(image) 13 | 14 | file_name = f"{image[-10:].replace('/', '')}.jpeg" 15 | img_file = open(file_name, "wb") 16 | img_file.write(img) 17 | img_file.close() 18 | os.system(f"open {file_name}") 19 | -------------------------------------------------------------------------------- /stable-diffusion/stable-diffusion-xl-1.0/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/stable-diffusion-xl-1.0/model/__init__.py -------------------------------------------------------------------------------- /stable-diffusion/stable-diffusion-xl-1.0/show.py: -------------------------------------------------------------------------------- 1 | """ 2 | truss predict -d '{"prompt": "A heavily constructed solarpunk bridge over a canyon at sunset"}' | python show.py 3 | """ 4 | 5 | import base64 6 | import json 7 | import os 8 | import sys 9 | 10 | resp = sys.stdin.read() 11 | image = json.loads(resp)["data"] 12 | img = base64.b64decode(image) 13 | 14 | file_name = f"{image[-10:].replace('/', '')}.jpeg" 15 | img_file = open(file_name, "wb") 16 | img_file.write(img) 17 | img_file.close() 18 | os.system(f"open {file_name}") 19 | -------------------------------------------------------------------------------- /stable-diffusion/stable-diffusion/data/model_index.json: -------------------------------------------------------------------------------- 1 | { 2 | "_class_name": "StableDiffusionPipeline", 3 | "_diffusers_version": "0.15.1", 4 | "feature_extractor": [ 5 | null, 6 | null 7 | ], 8 | "requires_safety_checker": false, 9 | "safety_checker": [ 10 | null, 11 | null 12 | ], 13 | "scheduler": [ 14 | "diffusers", 15 | "PNDMScheduler" 16 | ], 17 | "text_encoder": [ 18 | "transformers", 19 | "CLIPTextModel" 20 | ], 21 | "tokenizer": [ 22 | "transformers", 23 | "CLIPTokenizer" 24 | ], 25 | "unet": [ 26 | "diffusers", 27 | "UNet2DConditionModel" 28 | ], 29 | "vae": [ 30 | "diffusers", 31 | "AutoencoderKL" 32 | ] 33 | } 34 | -------------------------------------------------------------------------------- /stable-diffusion/stable-diffusion/data/scheduler/scheduler_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_class_name": "PNDMScheduler", 3 | "_diffusers_version": "0.15.1", 4 | "beta_end": 0.012, 5 | "beta_schedule": "scaled_linear", 6 | "beta_start": 0.00085, 7 | "clip_sample": false, 8 | "num_train_timesteps": 1000, 9 | "prediction_type": "epsilon", 10 | "set_alpha_to_one": false, 11 | "skip_prk_steps": true, 12 | "steps_offset": 1, 13 | "trained_betas": null 14 | } 15 | -------------------------------------------------------------------------------- /stable-diffusion/stable-diffusion/data/text_encoder/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_name_or_path": "/root/.cache/huggingface/hub/models--stabilityai--stable-diffusion-2-1-base/snapshots/1f758383196d38df1dfe523ddb1030f2bfab7741/text_encoder", 3 | "architectures": [ 4 | "CLIPTextModel" 5 | ], 6 | "attention_dropout": 0.0, 7 | "bos_token_id": 0, 8 | "dropout": 0.0, 9 | "eos_token_id": 2, 10 | "hidden_act": "gelu", 11 | "hidden_size": 1024, 12 | "initializer_factor": 1.0, 13 | "initializer_range": 0.02, 14 | "intermediate_size": 4096, 15 | "layer_norm_eps": 1e-05, 16 | "max_position_embeddings": 77, 17 | "model_type": "clip_text_model", 18 | "num_attention_heads": 16, 19 | "num_hidden_layers": 23, 20 | "pad_token_id": 1, 21 | "projection_dim": 512, 22 | "torch_dtype": "float16", 23 | "transformers_version": "4.28.1", 24 | "vocab_size": 49408 25 | } 26 | -------------------------------------------------------------------------------- /stable-diffusion/stable-diffusion/data/tokenizer/special_tokens_map.json: -------------------------------------------------------------------------------- 1 | { 2 | "bos_token": { 3 | "content": "<|startoftext|>", 4 | "lstrip": false, 5 | "normalized": true, 6 | "rstrip": false, 7 | "single_word": false 8 | }, 9 | "eos_token": { 10 | "content": "<|endoftext|>", 11 | "lstrip": false, 12 | "normalized": true, 13 | "rstrip": false, 14 | "single_word": false 15 | }, 16 | "pad_token": "!", 17 | "unk_token": { 18 | "content": "<|endoftext|>", 19 | "lstrip": false, 20 | "normalized": true, 21 | "rstrip": false, 22 | "single_word": false 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /stable-diffusion/stable-diffusion/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/stable-diffusion/model/__init__.py -------------------------------------------------------------------------------- /stable-diffusion/stable-diffusion/show.py: -------------------------------------------------------------------------------- 1 | """ 2 | truss predict -d '{"prompt": "A heavily constructed solarpunk bridge over a canyon at sunset"}' | python show.py 3 | """ 4 | 5 | import base64 6 | import json 7 | import os 8 | import sys 9 | 10 | resp = sys.stdin.read() 11 | image = json.loads(resp)["data"] 12 | img = base64.b64decode(image) 13 | 14 | file_name = f"{image[-10:].replace('/', '')}.jpeg" 15 | img_file = open(file_name, "wb") 16 | img_file.write(img) 17 | img_file.close() 18 | os.system(f"open {file_name}") 19 | -------------------------------------------------------------------------------- /stable-diffusion/stable-video-diffusion/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/stable-video-diffusion/model/__init__.py -------------------------------------------------------------------------------- /stable-diffusion/stable-video-diffusion/model/scripts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/stable-video-diffusion/model/scripts/__init__.py -------------------------------------------------------------------------------- /stable-diffusion/stable-video-diffusion/model/scripts/util/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/stable-video-diffusion/model/scripts/util/__init__.py -------------------------------------------------------------------------------- /stable-diffusion/stable-video-diffusion/model/scripts/util/detection/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/stable-video-diffusion/model/scripts/util/detection/__init__.py -------------------------------------------------------------------------------- /stable-diffusion/stable-video-diffusion/model/scripts/util/detection/p_head_v1.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/stable-video-diffusion/model/scripts/util/detection/p_head_v1.npz -------------------------------------------------------------------------------- /stable-diffusion/stable-video-diffusion/model/scripts/util/detection/w_head_v1.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/stable-video-diffusion/model/scripts/util/detection/w_head_v1.npz -------------------------------------------------------------------------------- /stable-diffusion/stable-video-diffusion/sample_images/cheetah.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/stable-video-diffusion/sample_images/cheetah.jpeg -------------------------------------------------------------------------------- /stable-diffusion/stable-video-diffusion/sample_images/racecar.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/stable-diffusion/stable-video-diffusion/sample_images/racecar.jpeg -------------------------------------------------------------------------------- /templates/README.md: -------------------------------------------------------------------------------- 1 | # Truss templates for different model backends 2 | `generate.yaml` contains configurations of different models with different backends / engines. `generate.py` generates described models by copying content of the template and overriding config with provided values. 3 | 4 | `generate.py` accepts following arguments: 5 | - `--only_check` if passed files aren't getting generated, fails if currently existing files are different from suppose to be generated ones 6 | - `--root` path to root of `truss-examples`, models are being generated under this path 7 | - `--templates` path to templates, generator reads `based_on` models from it 8 | - `--config` path to generation config 9 | -------------------------------------------------------------------------------- /templates/faster-whisper-truss/config.yaml: -------------------------------------------------------------------------------- 1 | base_image: 2 | image: baseten/truss-server-base:3.10-gpu-v0.4.9 3 | python_executable_path: /usr/bin/python3 4 | environment_variables: {} 5 | external_package_dirs: [] 6 | model_metadata: 7 | avatar_url: https://cdn.baseten.co/production/static/openai.png 8 | cover_image_url: https://cdn.baseten.co/production/static/whisper.png 9 | example_model_input: 10 | url: https://cdn.baseten.co/docs/production/Gettysburg.mp3 11 | model_id: large-v2 12 | pretty_name: Whisper 13 | tags: 14 | - speech-recognition 15 | model_name: Faster Whisper 16 | python_version: py39 17 | requirements: 18 | - faster-whisper==0.10.0 19 | resources: 20 | accelerator: T4 21 | cpu: 500m 22 | memory: 512Mi 23 | use_gpu: true 24 | secrets: {} 25 | system_packages: [] 26 | -------------------------------------------------------------------------------- /templates/faster-whisper-truss/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/templates/faster-whisper-truss/model/__init__.py -------------------------------------------------------------------------------- /templates/transformers-openai-compatible/config.yaml: -------------------------------------------------------------------------------- 1 | environment_variables: {} 2 | external_package_dirs: [] 3 | model_metadata: 4 | tags: 5 | - text-generation 6 | - openai-compatible 7 | python_version: py311 8 | requirements: 9 | - sentencepiece 10 | - accelerate 11 | - transformers==4.34.0 12 | - torch==2.0.1 13 | - hf_transfer==0.1.4 14 | resources: 15 | accelerator: A10G 16 | memory: 25Gi 17 | use_gpu: true 18 | secrets: {} 19 | system_packages: [] 20 | -------------------------------------------------------------------------------- /templates/transformers-openai-compatible/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/templates/transformers-openai-compatible/model/__init__.py -------------------------------------------------------------------------------- /templates/trt-llm/config.yaml: -------------------------------------------------------------------------------- 1 | base_image: 2 | image: nvcr.io/nvidia/tritonserver:23.10-trtllm-python-py3 3 | python_executable_path: /usr/bin/python3 4 | environment_variables: {} 5 | external_package_dirs: [] 6 | model_metadata: 7 | example_model_input: {} 8 | tags: 9 | - text-generation 10 | tensor_parallelism: 1 11 | python_version: py311 12 | requirements: 13 | - tritonclient[all] 14 | resources: 15 | accelerator: A100 16 | use_gpu: true 17 | runtime: 18 | predict_concurrency: 256 19 | secrets: {} 20 | system_packages: [] 21 | -------------------------------------------------------------------------------- /templates/trt-llm/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/templates/trt-llm/model/__init__.py -------------------------------------------------------------------------------- /trt-llm-engine-builder-templates/llama-3_1-70b-instruct/high_throughput/config.yaml: -------------------------------------------------------------------------------- 1 | build_commands: [] 2 | environment_variables: {} 3 | external_package_dirs: [] 4 | model_metadata: {} 5 | model_name: trtllm-llama3_1-70b-instruct-high_throughput 6 | python_version: py311 7 | requirements: [] 8 | resources: 9 | accelerator: H100:2 10 | cpu: '1' 11 | memory: 24Gi 12 | use_gpu: true 13 | secrets: 14 | hf_access_token: set token in baseten workspace 15 | system_packages: [] 16 | trt_llm: 17 | build: 18 | base_model: llama 19 | checkpoint_repository: 20 | repo: meta-llama/Llama-3.1-70B-Instruct 21 | source: HF 22 | max_batch_size: 16 23 | max_beam_width: 1 24 | max_seq_len: 4096 25 | num_builder_gpus: 4 # Need 4 builder GPUs for fp8 26 | quantization_type: fp8_kv 27 | tensor_parallel_count: 2 28 | -------------------------------------------------------------------------------- /trt-llm-engine-builder-templates/llama-3_1-70b-instruct/high_throughput/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/trt-llm-engine-builder-templates/llama-3_1-70b-instruct/high_throughput/model/__init__.py -------------------------------------------------------------------------------- /trt-llm-engine-builder-templates/llama-3_1-70b-instruct/large_context/config.yaml: -------------------------------------------------------------------------------- 1 | build_commands: [] 2 | environment_variables: {} 3 | external_package_dirs: [] 4 | model_metadata: {} 5 | model_name: trtllm-llama3.1_70b-instruct-large_context 6 | python_version: py311 7 | requirements: [] 8 | resources: 9 | accelerator: H100:2 10 | cpu: "1" 11 | memory: 24Gi 12 | use_gpu: true 13 | secrets: 14 | hf_access_token: set token in baseten workspace 15 | system_packages: [] 16 | trt_llm: 17 | build: 18 | base_model: llama 19 | checkpoint_repository: 20 | repo: meta-llama/Meta-Llama-3-70B-Instruct 21 | source: HF 22 | max_batch_size: 8 23 | max_beam_width: 1 24 | max_seq_len: 8192 25 | num_builder_gpus: 4 26 | quantization_type: no_quant 27 | tensor_parallel_count: 2 28 | -------------------------------------------------------------------------------- /trt-llm-engine-builder-templates/llama-3_1-70b-instruct/large_context/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/trt-llm-engine-builder-templates/llama-3_1-70b-instruct/large_context/model/__init__.py -------------------------------------------------------------------------------- /trt-llm-engine-builder-templates/llama-3_1-70b-instruct/low_ttft/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/trt-llm-engine-builder-templates/llama-3_1-70b-instruct/low_ttft/model/__init__.py -------------------------------------------------------------------------------- /trt-llm-engine-builder-templates/llama-3_1-8b-instruct/high_throughput/config.yaml: -------------------------------------------------------------------------------- 1 | build_commands: [] 2 | environment_variables: {} 3 | external_package_dirs: [] 4 | model_metadata: {} 5 | model_name: trtllm-llama-3_1-8b-instruct-high_throughput 6 | python_version: py311 7 | requirements: [] 8 | resources: 9 | accelerator: H100 10 | cpu: "1" 11 | memory: 24Gi 12 | use_gpu: true 13 | secrets: 14 | hf_access_token: set token in baseten workspace 15 | system_packages: [] 16 | trt_llm: 17 | build: 18 | base_model: llama 19 | checkpoint_repository: 20 | repo: meta-llama/Llama-3.1-8B-Instruct 21 | source: HF 22 | max_batch_size: 32 23 | quantization_type: fp8_kv 24 | max_beam_width: 1 25 | max_seq_len: 4096 26 | num_builder_gpus: 1 27 | -------------------------------------------------------------------------------- /trt-llm-engine-builder-templates/llama-3_1-8b-instruct/high_throughput/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/trt-llm-engine-builder-templates/llama-3_1-8b-instruct/high_throughput/model/__init__.py -------------------------------------------------------------------------------- /trt-llm-engine-builder-templates/llama-3_1-8b-instruct/large_context/config.yaml: -------------------------------------------------------------------------------- 1 | build_commands: [] 2 | environment_variables: {} 3 | external_package_dirs: [] 4 | model_metadata: {} 5 | model_name: trtllm-llama3.1_8b-instruct-large_context 6 | python_version: py311 7 | requirements: [] 8 | resources: 9 | accelerator: H100 10 | cpu: "1" 11 | memory: 24Gi 12 | use_gpu: true 13 | secrets: 14 | hf_access_token: set token in baseten workspace 15 | system_packages: [] 16 | trt_llm: 17 | build: 18 | base_model: llama 19 | checkpoint_repository: 20 | repo: meta-llama/Llama-3.1-8B-Instruct 21 | source: HF 22 | max_batch_size: 16 23 | max_beam_width: 1 24 | max_seq_len: 8192 25 | quantization_type: no_quant 26 | tensor_parallel_count: 1 27 | num_builder_gpus: 1 28 | -------------------------------------------------------------------------------- /trt-llm-engine-builder-templates/llama-3_1-8b-instruct/large_context/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/trt-llm-engine-builder-templates/llama-3_1-8b-instruct/large_context/model/__init__.py -------------------------------------------------------------------------------- /trt-llm-engine-builder-templates/llama-3_1-8b-instruct/low_ttft/config.yaml: -------------------------------------------------------------------------------- 1 | build_commands: [] 2 | environment_variables: {} 3 | external_package_dirs: [] 4 | model_metadata: {} 5 | model_name: trtllm-llama3_1_8b-instruct-low_ttft 6 | python_version: py311 7 | requirements: [] 8 | resources: 9 | accelerator: H100 10 | cpu: "1" 11 | memory: 24Gi 12 | use_gpu: true 13 | secrets: 14 | hf_access_token: set token in baseten workspace 15 | system_packages: [] 16 | trt_llm: 17 | build: 18 | base_model: llama 19 | checkpoint_repository: 20 | repo: meta-llama/Llama-3.1-8B-Instruct 21 | source: HF 22 | max_batch_size: 8 23 | max_beam_width: 1 24 | max_seq_len: 4096 25 | num_builder_gpus: 1 26 | quantization_type: fp8_kv 27 | tensor_parallel_count: 1 28 | plugin_configuration: 29 | use_paged_context_fmha: True 30 | use_fp8_context_fmha: True 31 | -------------------------------------------------------------------------------- /trt-llm-engine-builder-templates/llama-3_1-8b-instruct/low_ttft/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/trt-llm-engine-builder-templates/llama-3_1-8b-instruct/low_ttft/model/__init__.py -------------------------------------------------------------------------------- /ultravox/config.yaml: -------------------------------------------------------------------------------- 1 | base_image: 2 | image: vshulman/vllm-openai-fixie:latest 3 | python_executable_path: /usr/bin/python3 4 | model_metadata: 5 | arguments: 6 | model: fixie-ai/ultravox-v0.2 7 | audio_token_id: 128002 8 | environment_variables: {} 9 | external_package_dirs: [] 10 | model_name: Ultravox v0.2 11 | python_version: py310 12 | runtime: 13 | predict_concurrency: 512 14 | requirements: 15 | - httpx 16 | resources: 17 | accelerator: A100 18 | use_gpu: true 19 | secrets: {} 20 | system_packages: 21 | - python3.10-venv 22 | -------------------------------------------------------------------------------- /ultravox/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/ultravox/model/__init__.py -------------------------------------------------------------------------------- /vllm/config.yaml: -------------------------------------------------------------------------------- 1 | model_name: "Llama 3.1 8B Instruct VLLM openai compatible" 2 | python_version: py311 3 | model_metadata: 4 | example_model_input: {"prompt": "what is the meaning of life"} 5 | repo_id: meta-llama/Llama-3.1-8B-Instruct 6 | openai_compatible: true 7 | vllm_config: 8 | tensor_parallel_size: 1 9 | max_model_len: 4096 10 | enable_prefix_caching: true 11 | requirements: 12 | - vllm==0.5.4 13 | resources: 14 | accelerator: A100 15 | use_gpu: true 16 | runtime: 17 | predict_concurrency: 128 18 | secrets: 19 | hf_access_token: null 20 | -------------------------------------------------------------------------------- /vllm/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/vllm/model/__init__.py -------------------------------------------------------------------------------- /whisper/faster-whisper-small/config.yaml: -------------------------------------------------------------------------------- 1 | description: A small speech-to-text model for multi-lingual audio transcription. 2 | model_cache: 3 | - repo_id: Systran/faster-whisper-small 4 | model_metadata: 5 | avatar_url: https://cdn.baseten.co/production/static/openai.png 6 | cover_image_url: https://cdn.baseten.co/production/static/whisper.png 7 | example_model_input: 8 | url: https://cdn.baseten.co/docs/production/Gettysburg.mp3 9 | model_id: small 10 | pretty_name: Whisper 11 | tags: 12 | - speech-recognition 13 | model_name: Faster Whisper Small 14 | python_version: py39 15 | requirements: 16 | - torch==2.1.0 17 | - faster-whisper==1.0.3 18 | resources: 19 | accelerator: T4 20 | use_gpu: true 21 | -------------------------------------------------------------------------------- /whisper/faster-whisper-small/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/whisper/faster-whisper-small/model/__init__.py -------------------------------------------------------------------------------- /whisper/faster-whisper-v2/config.yaml: -------------------------------------------------------------------------------- 1 | description: Faster Whisper v2 2 | environment_variables: {} 3 | external_package_dirs: [] 4 | model_cache: 5 | - repo_id: Systran/faster-whisper-large-v2 6 | model_metadata: 7 | avatar_url: https://cdn.baseten.co/production/static/openai.png 8 | cover_image_url: https://cdn.baseten.co/production/static/whisper.png 9 | example_model_input: 10 | url: https://cdn.baseten.co/docs/production/Gettysburg.mp3 11 | model_id: large-v2 12 | pretty_name: Whisper 13 | tags: 14 | - speech-recognition 15 | model_name: Faster Whisper v2 16 | python_version: py39 17 | requirements: 18 | - torch==2.1.1 19 | - faster-whisper==1.0.3 20 | - ctranslate2==4.4.0 21 | - numpy==1.26.4 22 | resources: 23 | accelerator: A10G 24 | cpu: 500m 25 | memory: 512Mi 26 | use_gpu: true 27 | secrets: {} 28 | system_packages: [] 29 | -------------------------------------------------------------------------------- /whisper/faster-whisper-v2/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/whisper/faster-whisper-v2/model/__init__.py -------------------------------------------------------------------------------- /whisper/faster-whisper-v3/config.yaml: -------------------------------------------------------------------------------- 1 | description: Faster Whisper v3 2 | environment_variables: {} 3 | external_package_dirs: [] 4 | model_cache: 5 | - repo_id: Systran/faster-whisper-large-v3 6 | model_metadata: 7 | avatar_url: https://cdn.baseten.co/production/static/openai.png 8 | cover_image_url: https://cdn.baseten.co/production/static/whisper.png 9 | example_model_input: 10 | url: https://cdn.baseten.co/docs/production/Gettysburg.mp3 11 | model_id: large-v3 12 | pretty_name: Whisper 13 | tags: 14 | - speech-recognition 15 | model_name: Faster Whisper v3 16 | python_version: py39 17 | requirements: 18 | - torch==2.1.1 19 | - faster-whisper==1.0.3 20 | - ctranslate2==4.4.0 21 | - numpy==1.26.4 22 | resources: 23 | accelerator: A10G 24 | cpu: 500m 25 | memory: 512Mi 26 | use_gpu: true 27 | secrets: {} 28 | system_packages: [] 29 | -------------------------------------------------------------------------------- /whisper/faster-whisper-v3/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/whisper/faster-whisper-v3/model/__init__.py -------------------------------------------------------------------------------- /whisper/whisper-streaming/config.yaml: -------------------------------------------------------------------------------- 1 | base_image: 2 | image: baseten/truss-server-base:3.10-gpu-v0.4.9 3 | python_executable_path: /usr/bin/python3 4 | environment_variables: {} 5 | external_package_dirs: [] 6 | model_metadata: 7 | whisper_model: medium 8 | model_name: Whisper Streaming 9 | python_version: py310 10 | requirements: [] 11 | requirements_file: ./requirements.txt 12 | resources: 13 | accelerator: T4 14 | use_gpu: true 15 | secrets: {} 16 | system_packages: 17 | - ffmpeg 18 | -------------------------------------------------------------------------------- /whisper/whisper-streaming/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/whisper/whisper-streaming/model/__init__.py -------------------------------------------------------------------------------- /whisper/whisper-streaming/requirements.txt: -------------------------------------------------------------------------------- 1 | torch==2.1.0 2 | faster-whisper==0.10.0 3 | librosa==0.10.1 4 | opus-fast-mosestokenizer==0.0.8.5 5 | -------------------------------------------------------------------------------- /whisper/whisper-torchserve/config.yaml: -------------------------------------------------------------------------------- 1 | environment_variables: {} 2 | external_package_dirs: [] 3 | model_metadata: {} 4 | model_name: Whisper Torchserve 5 | python_version: py310 6 | requirements: 7 | - torch==2.1.0 8 | - torchserve==0.9.0 9 | - ffmpeg-python==0.2.0 10 | - transformers==4.37.2 11 | - nvgpu==0.10.0 12 | - httpx==0.27.0 13 | resources: 14 | accelerator: T4 15 | use_gpu: true 16 | model_cache: 17 | - repo_id: htrivedi99/whisper-torchserve 18 | secrets: {} 19 | system_packages: 20 | - ffmpeg 21 | - openjdk-11-jdk 22 | runtime: 23 | predict_concurrency: 128 24 | -------------------------------------------------------------------------------- /whisper/whisper-torchserve/data/config.properties: -------------------------------------------------------------------------------- 1 | inference_address=http://0.0.0.0:8888 2 | batch_size=16 3 | ipex_enable=true 4 | async_logging=true 5 | 6 | models={\ 7 | "whisper_base": {\ 8 | "1.0": {\ 9 | "defaultVersion": true,\ 10 | "marName": "whisper_base.mar",\ 11 | "minWorkers": 1,\ 12 | "maxWorkers": 4,\ 13 | "batchSize": 16,\ 14 | "maxBatchDelay": 250,\ 15 | "responseTimeout": 120\ 16 | }\ 17 | }\ 18 | } 19 | 20 | # maxBatchDelay is the amount of time to wait for the batch size to fill up. Default is 250 ms. 21 | # default_workers_per_model=2 22 | -------------------------------------------------------------------------------- /whisper/whisper-torchserve/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/whisper/whisper-torchserve/model/__init__.py -------------------------------------------------------------------------------- /whisper/whisper-truss/config.yaml: -------------------------------------------------------------------------------- 1 | description: Transcribe audio files across multiple languages. 2 | environment_variables: {} 3 | external_data: 4 | - local_data_path: models/small.pt 5 | url: https://baseten-public.s3.us-west-2.amazonaws.com/models/whisper/small.pt 6 | external_package_dirs: [] 7 | model_metadata: 8 | avatar_url: https://cdn.baseten.co/production/static/openai.png 9 | cover_image_url: https://cdn.baseten.co/production/static/whisper.png 10 | example_model_input: 11 | url: https://cdn.baseten.co/docs/production/Gettysburg.mp3 12 | pretty_name: Whisper 13 | tags: 14 | - speech-recognition 15 | model_name: Whisper 16 | python_version: py39 17 | requirements: 18 | - openai-whisper==20230314 19 | - torch==2.0.1 20 | resources: 21 | accelerator: A10G 22 | cpu: '4' 23 | memory: 16Gi 24 | use_gpu: true 25 | secrets: {} 26 | system_packages: 27 | - ffmpeg 28 | -------------------------------------------------------------------------------- /whisper/whisper-truss/data/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/whisper/whisper-truss/data/.gitkeep -------------------------------------------------------------------------------- /whisper/whisper-truss/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/whisper/whisper-truss/model/__init__.py -------------------------------------------------------------------------------- /whisper/whisper-v3-truss-base64/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/whisper/whisper-v3-truss-base64/model/__init__.py -------------------------------------------------------------------------------- /whisper/whisper-v3-truss/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/whisper/whisper-v3-truss/model/__init__.py -------------------------------------------------------------------------------- /whisper/whisper-v3-turbo/config.yaml: -------------------------------------------------------------------------------- 1 | build_commands: [] 2 | environment_variables: {} 3 | external_package_dirs: [] 4 | model_metadata: {} 5 | model_name: Whisper 3 Turbo Engine 6 | python_version: py39 7 | requirements: [] 8 | resources: 9 | accelerator: A10G 10 | cpu: '1' 11 | memory: 24Gi 12 | use_gpu: true 13 | secrets: {} 14 | system_packages: [] 15 | trt_llm: 16 | build: 17 | base_model: whisper 18 | checkpoint_repository: 19 | repo: https://openaipublic.azureedge.net/main/whisper/models/aff26ae408abcba5fbf8813c21e62b0941638c5f6eebfb145be0c9839262a19a/large-v3-turbo.pt 20 | source: REMOTE_URL 21 | max_batch_size: 8 22 | max_beam_width: 1 23 | max_seq_len: 512 24 | num_builder_gpus: 1 25 | quantization_type: no_quant 26 | tensor_parallel_count: 1 27 | -------------------------------------------------------------------------------- /whisper/whisperx-truss/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/whisper/whisperx-truss/model/__init__.py -------------------------------------------------------------------------------- /xtts-streaming/config.yaml: -------------------------------------------------------------------------------- 1 | base_image: 2 | image: htrivedi05/xtts-streaming 3 | python_executable_path: /opt/conda/bin/python 4 | environment_variables: 5 | COQUI_TOS_AGREED: '1' 6 | external_package_dirs: [] 7 | model_metadata: {} 8 | model_name: XTTS Streaming - High Performance 9 | resources: 10 | accelerator: H100 11 | cpu: '3' 12 | memory: 10Gi 13 | use_gpu: true 14 | secrets: {} 15 | -------------------------------------------------------------------------------- /xtts-streaming/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/xtts-streaming/model/__init__.py -------------------------------------------------------------------------------- /xtts-streaming/requirements.txt: -------------------------------------------------------------------------------- 1 | git+https://github.com/coqui-ai/TTS@fa28f99f1508b5b5366539b2149963edcb80ba62 2 | deepspeed==0.10.3 3 | -------------------------------------------------------------------------------- /xtts-v2-truss/config.yaml: -------------------------------------------------------------------------------- 1 | environment_variables: 2 | COQUI_TOS_AGREED: "1" 3 | external_package_dirs: [] 4 | model_metadata: 5 | example_model_input: 6 | language: en 7 | speaker_voice: Claribel Dervla 8 | text: Kurt watched the incoming Pelicans. The blocky jet-powered craft were so distant they were only specks against the setting sun. He hit the magnification on his faceplate and saw lines of fire tracing their reentry vectors. They would touch down in three minutes. 9 | tags: 10 | - text-to-speech 11 | model_name: XTTS V2 12 | python_version: py310 13 | requirements: 14 | - git+https://github.com/htrivedi99/TTS.git 15 | resources: 16 | accelerator: T4 17 | cpu: '3' 18 | memory: 10Gi 19 | use_gpu: true 20 | secrets: {} 21 | system_packages: [] 22 | -------------------------------------------------------------------------------- /xtts-v2-truss/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basetenlabs/truss-examples/09e3729f36e6bb87df17d5bc53b8c1ef9031bbb8/xtts-v2-truss/model/__init__.py --------------------------------------------------------------------------------