├── .env.template ├── .gitignore ├── .gitlab └── issue_templates │ ├── Default.md │ ├── Documentation.md │ └── Enhancement.md ├── .gitleaksignore ├── ATTRIBUTION.md ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── Dockerfile ├── LICENSE ├── README.md ├── config ├── default-config-ap-northeast-1.yaml ├── default-config-ap-northeast-2.yaml ├── default-config-ap-northeast-3.yaml ├── default-config-ap-south-1.yaml ├── default-config-ap-southeast-1.yaml ├── default-config-ap-southeast-2.yaml ├── default-config-base.yaml ├── default-config-ca-central-1.yaml ├── default-config-eu-central-1.yaml ├── default-config-eu-north-1.yaml ├── default-config-eu-west-1.yaml ├── default-config-eu-west-2.yaml ├── default-config-eu-west-3.yaml ├── default-config-sa-east-1.yaml ├── default-config-us-east-1.yaml ├── default-config-us-east-2.yaml ├── default-config-us-west-1.yaml ├── default-config-us-west-2.yaml └── details-for-models-that-need-additional-manual-config.yaml ├── create-ec2-to-access-private-load-balancer.sh ├── create-fake-llm-load-testing-server.sh ├── delete-fake-llm-load-testing-server.sh ├── deploy.sh ├── docker-build-and-deploy.sh ├── install-cloud9-prerequisites.sh ├── litellm-fake-llm-load-testing-server-terraform ├── docker │ ├── Dockerfile │ ├── docker-build-and-deploy.sh │ ├── fake_llm_server.py │ └── requirements.txt ├── main.tf ├── outputs.tf ├── providers.tf └── variables.tf ├── litellm-private-load-balancer-ec2-terraform ├── main.tf ├── outputs.tf ├── providers.tf └── variables.tf ├── litellm-s3-log-bucket-terraform ├── outputs.tf ├── provider.tf ├── s3.tf └── variables.tf ├── litellm-terraform-stack ├── main.tf ├── modules │ ├── base │ │ ├── ecr.tf │ │ ├── iam.tf │ │ ├── locals.tf │ │ ├── network.tf │ │ ├── outputs.tf │ │ ├── rds.tf │ │ ├── redis.tf │ │ ├── route53.tf │ │ ├── s3.tf │ │ ├── secrets-manager.tf │ │ ├── variables.tf │ │ ├── vpc-endpoints.tf │ │ └── waf.tf │ ├── ecs │ │ ├── alb.tf │ │ ├── cloudfront.tf │ │ ├── cloudwatch.tf │ │ ├── ecs.tf │ │ ├── iam.tf │ │ ├── outputs.tf │ │ ├── route53.tf │ │ ├── s3.tf │ │ ├── secrets-manager.tf │ │ ├── security-groups.tf │ │ ├── variables.tf │ │ └── waf.tf │ └── eks │ │ ├── eks.tf │ │ ├── iam.tf │ │ ├── kms.tf │ │ ├── main.tf │ │ ├── outputs.tf │ │ ├── route53.tf │ │ ├── variables.tf │ │ └── versions.tf ├── outputs.tf ├── providers.tf └── variables.tf ├── media ├── Gateway latest architecture with CloudFront.pptx ├── Gateway-Architecture-with-CloudFront.png ├── Reference_architecture_ECS_EKS_platform_combined.jpg ├── Required-EKS-Add-ons.png ├── Tested-Bring-Your-Own-EKS-Cluster-Configuration.png └── architecture.png ├── middleware ├── Dockerfile ├── app.py ├── docker-build-and-deploy.sh └── requirements.txt ├── scripts ├── .env.template ├── benchmark.py └── requirements.txt ├── test-middleware-streaming.py ├── test-middleware-synchronous.py ├── tests ├── .env.template ├── bedrock_chat_test_file.py ├── locust_load_test.py ├── management_apis_test_file.py ├── openai_chat_test_file.py └── requirements.txt ├── undeploy.sh └── update-litellm-config.sh /.env.template: -------------------------------------------------------------------------------- 1 | # LITELLM_VERSION eg: main-v1.56.5 2 | # Get it from https://github.com/berriai/litellm/pkgs/container/litellm/versions?filters%5Bversion_type%5D=tagged 3 | LITELLM_VERSION="litellm_stable_release_branch-v1.63.2-stable" 4 | TERRAFORM_S3_BUCKET_NAME="" #Must be globally unique 5 | BUILD_FROM_SOURCE="false" 6 | HOSTED_ZONE_NAME="" 7 | CREATE_PRIVATE_HOSTED_ZONE_IN_EXISTING_VPC="false" 8 | RECORD_NAME="" 9 | CERTIFICATE_ARN="" 10 | OKTA_ISSUER="" 11 | OKTA_AUDIENCE="api://default" 12 | OPENAI_API_KEY="placeholder" 13 | AZURE_OPENAI_API_KEY="placeholder" 14 | AZURE_API_KEY="placeholder" 15 | ANTHROPIC_API_KEY="placeholder" 16 | GROQ_API_KEY="placeholder" 17 | COHERE_API_KEY="placeholder" 18 | CO_API_KEY="placeholder" 19 | HF_TOKEN="placeholder" 20 | HUGGINGFACE_API_KEY="placeholder" 21 | DATABRICKS_API_KEY="placeholder" 22 | GEMINI_API_KEY="placeholder" 23 | CODESTRAL_API_KEY="placeholder" 24 | MISTRAL_API_KEY="placeholder" 25 | AZURE_AI_API_KEY="placeholder" 26 | NVIDIA_NIM_API_KEY="placeholder" 27 | XAI_API_KEY="placeholder" 28 | PERPLEXITYAI_API_KEY="placeholder" 29 | GITHUB_API_KEY="placeholder" 30 | DEEPSEEK_API_KEY="placeholder" 31 | AI21_API_KEY="placeholder" 32 | LANGSMITH_API_KEY="" 33 | LANGSMITH_PROJECT="" 34 | LANGSMITH_DEFAULT_RUN_NAME="" 35 | DEPLOYMENT_PLATFORM="ECS" 36 | EXISTING_VPC_ID="" 37 | EXISTING_EKS_CLUSTER_NAME="" 38 | DISABLE_OUTBOUND_NETWORK_ACCESS="false" 39 | CREATE_VPC_ENDPOINTS_IN_EXISTING_VPC="false" 40 | INSTALL_ADD_ONS_IN_EXISTING_EKS_CLUSTER="false" 41 | DESIRED_CAPACITY="2" #Number of ECS or EKS instances to run by default (for horizontal scaling) 42 | MIN_CAPACITY="2" 43 | MAX_CAPACITY="4" 44 | ECS_CPU_TARGET_UTILIZATION_PERCENTAGE="50" 45 | ECS_MEMORY_TARGET_UTILIZATION_PERCENTAGE="40" 46 | ECS_VCPUS="2" 47 | EKS_ARM_INSTANCE_TYPE="t4g.medium" 48 | EKS_X86_INSTANCE_TYPE="t3.medium" 49 | EKS_ARM_AMI_TYPE="AL2_ARM_64" 50 | EKS_X86_AMI_TYPE="AL2_x86_64" 51 | CPU_ARCHITECTURE="" #If empty, defaults to the architecture of your deployment machine "x86" or "arm" 52 | PUBLIC_LOAD_BALANCER="true" 53 | RDS_INSTANCE_CLASS="db.t3.small" 54 | RDS_ALLOCATED_STORAGE_GB="20" 55 | REDIS_NODE_TYPE="cache.t3.micro" 56 | REDIS_NUM_CACHE_CLUSTERS="2" #Number of cache clusters (primary and replicas) the replication group will have 57 | EC2_KEY_PAIR_NAME="" 58 | DISABLE_SWAGGER_PAGE="false" 59 | DISABLE_ADMIN_UI="false" 60 | LANGFUSE_PUBLIC_KEY="" 61 | LANGFUSE_SECRET_KEY="" 62 | LANGFUSE_HOST="" # Optional, defaults to https://cloud.langfuse.com 63 | FAKE_LLM_LOAD_TESTING_ENDPOINT_CERTIFICATE_ARN="" 64 | FAKE_LLM_LOAD_TESTING_ENDPOINT_HOSTED_ZONE_NAME="" 65 | FAKE_LLM_LOAD_TESTING_ENDPOINT_RECORD_NAME="" 66 | 67 | # CloudFront and Route53 Configuration 68 | USE_ROUTE53="false" 69 | USE_CLOUDFRONT="true" 70 | CLOUDFRONT_PRICE_CLASS="PriceClass_100" 71 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .env 2 | config/config.yaml 3 | config/local-config.yaml 4 | 5 | .*.sw? 6 | **/.DS_Store 7 | .idea/ 8 | .aws-sam/ 9 | .vscode/settings.json 10 | **/.vim/ 11 | 12 | *.js 13 | !jest.config.js 14 | *.d.ts 15 | node_modules 16 | 17 | cdk.out 18 | cdk.context.json 19 | litellm-source 20 | 21 | litellm-cdk/resources.txt 22 | 23 | **/outputs.json 24 | 25 | **/myenv/ 26 | **/fresh_venv/ 27 | *.pyc 28 | .terraform.lock.hcl 29 | .terraform 30 | terraform.tfstate 31 | terraform.tfstate.backup 32 | .terraform.tfstate.lock.info 33 | terraform.tfstate.* 34 | 35 | **.env.testing 36 | **error.txt 37 | **.env 38 | sessionmanager-bundle.zip 39 | sessionmanager-bundle 40 | backend.hcl 41 | resources.txt 42 | errored.tfstate -------------------------------------------------------------------------------- /.gitlab/issue_templates/Default.md: -------------------------------------------------------------------------------- 1 | ## Summary 2 | 3 | (Summarize the bug encountered concisely) 4 | 5 | ## Steps to reproduce 6 | 7 | (How one can reproduce the issue - this is very important) 8 | 9 | ## Example Project 10 | 11 | (If possible, create an example project here on GitLab.com that exhibits the problematic 12 | behavior, and link to it here in the bug report. 13 | If you are using an older version of GitLab, this will also determine whether the bug has been fixed 14 | in a more recent version) 15 | 16 | ## What is the current bug behavior? 17 | 18 | (What actually happens) 19 | 20 | ## What is the expected correct behavior? 21 | 22 | (What you should see instead) 23 | 24 | ## Relevant logs and/or screenshots 25 | 26 | (Paste any relevant logs - use code blocks (```) to format console output, logs, and code, as 27 | it's very hard to read otherwise.) 28 | 29 | ## Possible fixes 30 | 31 | (If you can, link to the line of code that might be responsible for the problem) -------------------------------------------------------------------------------- /.gitlab/issue_templates/Documentation.md: -------------------------------------------------------------------------------- 1 | ## Summary 2 | (Summarize the issue and why it is wrong, confusing or misleading) 3 | 4 | ## Link to Document 5 | (The document should be public facing such as the README.md) 6 | 7 | ## Suggested Change 8 | (Please suggest a change) -------------------------------------------------------------------------------- /.gitlab/issue_templates/Enhancement.md: -------------------------------------------------------------------------------- 1 | Please complete as many of the following sections as possible. 2 | 3 | ## Title 4 | [Concise title of the enhancement] 5 | 6 | ## Author(s) 7 | [Name(s) of the author(s) proposing the enhancement] 8 | 9 | ## Status 10 | [Draft/In Review/Approved/Rejected/Implemented] 11 | 12 | ## Summary 13 | [Brief overview of the proposed enhancement (1-2 sentences)] 14 | 15 | ## Motivation 16 | [Explain why this enhancement is needed and what problems it solves] 17 | 18 | ## Proposal 19 | [Detailed description of the proposed enhancement] 20 | 21 | ### User Experience 22 | [Describe how this enhancement will affect the user experience] 23 | 24 | ### Technical Implementation 25 | [Provide technical details on how this enhancement could be implemented] 26 | 27 | ## Alternatives Considered 28 | [List any alternative solutions or features you've considered] 29 | 30 | ## Benefits 31 | [Outline the benefits of implementing this enhancement] 32 | 33 | ## Drawbacks 34 | [Discuss any potential drawbacks or challenges] 35 | 36 | ## Required Resources 37 | [Estimate the resources (time, personnel, etc.) required to implement this enhancement] 38 | 39 | ## Dependencies 40 | [List any dependencies or prerequisites for this enhancement] 41 | 42 | ## Testing Plan 43 | [Describe how this enhancement will be tested] 44 | 45 | ## Rollout Plan 46 | [Explain how this enhancement will be rolled out to users] 47 | 48 | ## Documentation 49 | [Outline any documentation updates required for this enhancement] 50 | 51 | ## Open Questions 52 | [List any unresolved questions or areas that need further discussion] 53 | 54 | ## References 55 | [Include any relevant links, issues, or external resources] 56 | 57 | /label enhancement -------------------------------------------------------------------------------- /.gitleaksignore: -------------------------------------------------------------------------------- 1 | 0cb0c568dc995ce4a6220278a0cf5d76828e0ae9:README.md:generic-api-key:840 2 | dc45f9723bd40324f1bb95568b71948e685dcb10:litellm-cdk/lib/litellm-cdk-stack.ts:generic-api-key:213 3 | 15bd509e996cd7c79e8dc717d1e19a0c66b070ca:tests/openai_chat_test_file.py:generic-api-key:236 4 | 78062c1f9dcc9b68243bbcaf5302676583b760cb:README.md:generic-api-key:347 5 | 1d4f9615f50affba757f297c13360ebc590d88cb:README.md:generic-api-key:677 -------------------------------------------------------------------------------- /ATTRIBUTION.md: -------------------------------------------------------------------------------- 1 | This software uses the following open source libraries, codes and fonts: 2 | 3 | ## LiteLLM 4 | - License: [MIT License](https://github.com/BerriAI/litellm/blob/main/LICENSE) 5 | - Homepage: https://www.litellm.ai/ -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *main* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | ARG LITELLM_VERSION=latest 2 | FROM ghcr.io/berriai/litellm:${LITELLM_VERSION} 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT No Attribution 2 | 3 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so. 10 | 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 13 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 14 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 15 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 16 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /config/default-config-ap-northeast-1.yaml: -------------------------------------------------------------------------------- 1 | model_list: 2 | #Bedrock Models 3 | - model_name: amazon.titan-text-express-v1 4 | litellm_params: 5 | model: bedrock/amazon.titan-text-express-v1 6 | 7 | - model_name: amazon.titan-embed-text-v1 8 | litellm_params: 9 | model: bedrock/amazon.titan-embed-text-v1 10 | drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client 11 | 12 | - model_name: amazon.titan-embed-text-v2:0 13 | litellm_params: 14 | model: bedrock/amazon.titan-embed-text-v2:0 15 | drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client 16 | 17 | - model_name: amazon.rerank-v1:0 18 | litellm_params: 19 | model: bedrock/amazon.rerank-v1:0 20 | 21 | - model_name: anthropic.claude-3-haiku-20240307-v1:0 22 | litellm_params: 23 | model: bedrock/anthropic.claude-3-haiku-20240307-v1:0 24 | 25 | - model_name: anthropic.claude-3-5-sonnet-20240620-v1:0 26 | litellm_params: 27 | model: bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0 28 | 29 | - model_name: cohere.embed-english-v3 30 | litellm_params: 31 | model: bedrock/cohere.embed-english-v3 32 | drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client 33 | 34 | - model_name: cohere.embed-multilingual-v3 35 | litellm_params: 36 | model: bedrock/cohere.embed-multilingual-v3 37 | 38 | - model_name: cohere.rerank-v3-5:0 39 | litellm_params: 40 | model: bedrock/cohere.rerank-v3-5:0 41 | 42 | - model_name: apac.anthropic.claude-3-sonnet-20240229-v1:0 43 | litellm_params: 44 | model: bedrock/apac.anthropic.claude-3-sonnet-20240229-v1:0 45 | 46 | - model_name: apac.anthropic.claude-3-5-sonnet-20240620-v1:0 47 | litellm_params: 48 | model: bedrock/apac.anthropic.claude-3-5-sonnet-20240620-v1:0 49 | 50 | - model_name: apac.anthropic.claude-3-haiku-20240307-v1:0 51 | litellm_params: 52 | model: bedrock/apac.anthropic.claude-3-haiku-20240307-v1:0 53 | 54 | - model_name: apac.anthropic.claude-3-5-sonnet-20241022-v2:0 55 | litellm_params: 56 | model: bedrock/apac.anthropic.claude-3-5-sonnet-20241022-v2:0 57 | 58 | -------------------------------------------------------------------------------- /config/default-config-ap-northeast-2.yaml: -------------------------------------------------------------------------------- 1 | model_list: 2 | #Bedrock Models 3 | - model_name: amazon.titan-embed-text-v2:0 4 | litellm_params: 5 | model: bedrock/amazon.titan-embed-text-v2:0 6 | drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client 7 | 8 | - model_name: anthropic.claude-3-5-sonnet-20240620-v1:0 9 | litellm_params: 10 | model: bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0 11 | 12 | - model_name: anthropic.claude-3-haiku-20240307-v1:0 13 | litellm_params: 14 | model: bedrock/anthropic.claude-3-haiku-20240307-v1:0 15 | 16 | - model_name: apac.anthropic.claude-3-sonnet-20240229-v1:0 17 | litellm_params: 18 | model: bedrock/apac.anthropic.claude-3-sonnet-20240229-v1:0 19 | 20 | - model_name: apac.anthropic.claude-3-5-sonnet-20240620-v1:0 21 | litellm_params: 22 | model: bedrock/apac.anthropic.claude-3-5-sonnet-20240620-v1:0 23 | 24 | - model_name: apac.anthropic.claude-3-haiku-20240307-v1:0 25 | litellm_params: 26 | model: bedrock/apac.anthropic.claude-3-haiku-20240307-v1:0 27 | 28 | - model_name: apac.anthropic.claude-3-5-sonnet-20241022-v2:0 29 | litellm_params: 30 | model: bedrock/apac.anthropic.claude-3-5-sonnet-20241022-v2:0 31 | 32 | -------------------------------------------------------------------------------- /config/default-config-ap-northeast-3.yaml: -------------------------------------------------------------------------------- 1 | model_list: 2 | #Bedrock Models 3 | - model_name: apac.anthropic.claude-3-5-sonnet-20241022-v2:0 4 | litellm_params: 5 | model: bedrock/apac.anthropic.claude-3-5-sonnet-20241022-v2:0 6 | 7 | -------------------------------------------------------------------------------- /config/default-config-ap-south-1.yaml: -------------------------------------------------------------------------------- 1 | model_list: 2 | #Bedrock Models 3 | - model_name: amazon.titan-text-lite-v1 4 | litellm_params: 5 | model: bedrock/amazon.titan-text-lite-v1 6 | 7 | - model_name: amazon.titan-text-express-v1 8 | litellm_params: 9 | model: bedrock/amazon.titan-text-express-v1 10 | 11 | - model_name: amazon.titan-embed-image-v1 12 | litellm_params: 13 | model: bedrock/amazon.titan-embed-image-v1 14 | drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client 15 | 16 | - model_name: amazon.titan-image-generator-v1 17 | litellm_params: 18 | model: bedrock/amazon.titan-image-generator-v1 19 | 20 | - model_name: amazon.titan-embed-text-v2:0 21 | litellm_params: 22 | model: bedrock/amazon.titan-embed-text-v2:0 23 | drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client 24 | 25 | - model_name: anthropic.claude-3-sonnet-20240229-v1:0 26 | litellm_params: 27 | model: bedrock/anthropic.claude-3-sonnet-20240229-v1:0 28 | 29 | - model_name: anthropic.claude-3-haiku-20240307-v1:0 30 | litellm_params: 31 | model: bedrock/anthropic.claude-3-haiku-20240307-v1:0 32 | 33 | - model_name: cohere.embed-english-v3 34 | litellm_params: 35 | model: bedrock/cohere.embed-english-v3 36 | drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client 37 | 38 | - model_name: cohere.embed-multilingual-v3 39 | litellm_params: 40 | model: bedrock/cohere.embed-multilingual-v3 41 | 42 | - model_name: meta.llama3-8b-instruct-v1:0 43 | litellm_params: 44 | model: bedrock/meta.llama3-8b-instruct-v1:0 45 | 46 | - model_name: meta.llama3-70b-instruct-v1:0 47 | litellm_params: 48 | model: bedrock/meta.llama3-70b-instruct-v1:0 49 | 50 | - model_name: mistral.mistral-7b-instruct-v0:2 51 | litellm_params: 52 | model: bedrock/mistral.mistral-7b-instruct-v0:2 53 | 54 | - model_name: mistral.mixtral-8x7b-instruct-v0:1 55 | litellm_params: 56 | model: bedrock/mistral.mixtral-8x7b-instruct-v0:1 57 | 58 | - model_name: mistral.mistral-large-2402-v1:0 59 | litellm_params: 60 | model: bedrock/mistral.mistral-large-2402-v1:0 61 | 62 | - model_name: apac.anthropic.claude-3-5-sonnet-20240620-v1:0 63 | litellm_params: 64 | model: bedrock/apac.anthropic.claude-3-5-sonnet-20240620-v1:0 65 | 66 | - model_name: apac.anthropic.claude-3-sonnet-20240229-v1:0 67 | litellm_params: 68 | model: bedrock/apac.anthropic.claude-3-sonnet-20240229-v1:0 69 | 70 | - model_name: apac.anthropic.claude-3-haiku-20240307-v1:0 71 | litellm_params: 72 | model: bedrock/apac.anthropic.claude-3-haiku-20240307-v1:0 73 | 74 | - model_name: apac.anthropic.claude-3-5-sonnet-20241022-v2:0 75 | litellm_params: 76 | model: bedrock/apac.anthropic.claude-3-5-sonnet-20241022-v2:0 77 | 78 | -------------------------------------------------------------------------------- /config/default-config-ap-southeast-1.yaml: -------------------------------------------------------------------------------- 1 | model_list: 2 | #Bedrock Models 3 | - model_name: anthropic.claude-3-haiku-20240307-v1:0 4 | litellm_params: 5 | model: bedrock/anthropic.claude-3-haiku-20240307-v1:0 6 | 7 | - model_name: anthropic.claude-3-5-sonnet-20240620-v1:0 8 | litellm_params: 9 | model: bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0 10 | 11 | - model_name: cohere.embed-english-v3 12 | litellm_params: 13 | model: bedrock/cohere.embed-english-v3 14 | drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client 15 | 16 | - model_name: cohere.embed-multilingual-v3 17 | litellm_params: 18 | model: bedrock/cohere.embed-multilingual-v3 19 | 20 | - model_name: apac.anthropic.claude-3-5-sonnet-20240620-v1:0 21 | litellm_params: 22 | model: bedrock/apac.anthropic.claude-3-5-sonnet-20240620-v1:0 23 | 24 | - model_name: apac.anthropic.claude-3-sonnet-20240229-v1:0 25 | litellm_params: 26 | model: bedrock/apac.anthropic.claude-3-sonnet-20240229-v1:0 27 | 28 | - model_name: apac.anthropic.claude-3-haiku-20240307-v1:0 29 | litellm_params: 30 | model: bedrock/apac.anthropic.claude-3-haiku-20240307-v1:0 31 | 32 | - model_name: apac.anthropic.claude-3-5-sonnet-20241022-v2:0 33 | litellm_params: 34 | model: bedrock/apac.anthropic.claude-3-5-sonnet-20241022-v2:0 35 | 36 | -------------------------------------------------------------------------------- /config/default-config-ap-southeast-2.yaml: -------------------------------------------------------------------------------- 1 | model_list: 2 | #Bedrock Models 3 | - model_name: amazon.titan-text-lite-v1 4 | litellm_params: 5 | model: bedrock/amazon.titan-text-lite-v1 6 | 7 | - model_name: amazon.titan-text-express-v1 8 | litellm_params: 9 | model: bedrock/amazon.titan-text-express-v1 10 | 11 | - model_name: amazon.titan-embed-image-v1 12 | litellm_params: 13 | model: bedrock/amazon.titan-embed-image-v1 14 | drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client 15 | 16 | - model_name: amazon.titan-embed-text-v2:0 17 | litellm_params: 18 | model: bedrock/amazon.titan-embed-text-v2:0 19 | drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client 20 | 21 | - model_name: anthropic.claude-3-sonnet-20240229-v1:0 22 | litellm_params: 23 | model: bedrock/anthropic.claude-3-sonnet-20240229-v1:0 24 | 25 | - model_name: anthropic.claude-3-haiku-20240307-v1:0 26 | litellm_params: 27 | model: bedrock/anthropic.claude-3-haiku-20240307-v1:0 28 | 29 | - model_name: anthropic.claude-3-5-sonnet-20241022-v2:0 30 | litellm_params: 31 | model: bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0 32 | 33 | - model_name: cohere.embed-english-v3 34 | litellm_params: 35 | model: bedrock/cohere.embed-english-v3 36 | drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client 37 | 38 | - model_name: cohere.embed-multilingual-v3 39 | litellm_params: 40 | model: bedrock/cohere.embed-multilingual-v3 41 | 42 | - model_name: mistral.mistral-7b-instruct-v0:2 43 | litellm_params: 44 | model: bedrock/mistral.mistral-7b-instruct-v0:2 45 | 46 | - model_name: mistral.mixtral-8x7b-instruct-v0:1 47 | litellm_params: 48 | model: bedrock/mistral.mixtral-8x7b-instruct-v0:1 49 | 50 | - model_name: mistral.mistral-large-2402-v1:0 51 | litellm_params: 52 | model: bedrock/mistral.mistral-large-2402-v1:0 53 | 54 | - model_name: apac.anthropic.claude-3-sonnet-20240229-v1:0 55 | litellm_params: 56 | model: bedrock/apac.anthropic.claude-3-sonnet-20240229-v1:0 57 | 58 | - model_name: apac.anthropic.claude-3-5-sonnet-20240620-v1:0 59 | litellm_params: 60 | model: bedrock/apac.anthropic.claude-3-5-sonnet-20240620-v1:0 61 | 62 | - model_name: apac.anthropic.claude-3-haiku-20240307-v1:0 63 | litellm_params: 64 | model: bedrock/apac.anthropic.claude-3-haiku-20240307-v1:0 65 | 66 | - model_name: apac.anthropic.claude-3-5-sonnet-20241022-v2:0 67 | litellm_params: 68 | model: bedrock/apac.anthropic.claude-3-5-sonnet-20241022-v2:0 69 | 70 | -------------------------------------------------------------------------------- /config/default-config-ca-central-1.yaml: -------------------------------------------------------------------------------- 1 | model_list: 2 | #Bedrock Models 3 | - model_name: amazon.titan-text-lite-v1 4 | litellm_params: 5 | model: bedrock/amazon.titan-text-lite-v1 6 | 7 | - model_name: amazon.titan-text-express-v1 8 | litellm_params: 9 | model: bedrock/amazon.titan-text-express-v1 10 | 11 | - model_name: amazon.titan-embed-image-v1 12 | litellm_params: 13 | model: bedrock/amazon.titan-embed-image-v1 14 | drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client 15 | 16 | - model_name: amazon.titan-embed-text-v2:0 17 | litellm_params: 18 | model: bedrock/amazon.titan-embed-text-v2:0 19 | drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client 20 | 21 | - model_name: amazon.rerank-v1:0 22 | litellm_params: 23 | model: bedrock/amazon.rerank-v1:0 24 | 25 | - model_name: anthropic.claude-3-sonnet-20240229-v1:0 26 | litellm_params: 27 | model: bedrock/anthropic.claude-3-sonnet-20240229-v1:0 28 | 29 | - model_name: anthropic.claude-3-haiku-20240307-v1:0 30 | litellm_params: 31 | model: bedrock/anthropic.claude-3-haiku-20240307-v1:0 32 | 33 | - model_name: cohere.embed-english-v3 34 | litellm_params: 35 | model: bedrock/cohere.embed-english-v3 36 | drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client 37 | 38 | - model_name: cohere.embed-multilingual-v3 39 | litellm_params: 40 | model: bedrock/cohere.embed-multilingual-v3 41 | 42 | - model_name: cohere.rerank-v3-5:0 43 | litellm_params: 44 | model: bedrock/cohere.rerank-v3-5:0 45 | 46 | - model_name: meta.llama3-8b-instruct-v1:0 47 | litellm_params: 48 | model: bedrock/meta.llama3-8b-instruct-v1:0 49 | 50 | - model_name: meta.llama3-70b-instruct-v1:0 51 | litellm_params: 52 | model: bedrock/meta.llama3-70b-instruct-v1:0 53 | 54 | - model_name: mistral.mistral-7b-instruct-v0:2 55 | litellm_params: 56 | model: bedrock/mistral.mistral-7b-instruct-v0:2 57 | 58 | - model_name: mistral.mixtral-8x7b-instruct-v0:1 59 | litellm_params: 60 | model: bedrock/mistral.mixtral-8x7b-instruct-v0:1 61 | 62 | - model_name: mistral.mistral-large-2402-v1:0 63 | litellm_params: 64 | model: bedrock/mistral.mistral-large-2402-v1:0 65 | 66 | -------------------------------------------------------------------------------- /config/default-config-eu-central-1.yaml: -------------------------------------------------------------------------------- 1 | model_list: 2 | #Bedrock Models 3 | - model_name: amazon.titan-text-express-v1 4 | litellm_params: 5 | model: bedrock/amazon.titan-text-express-v1 6 | 7 | - model_name: amazon.titan-text-lite-v1 8 | litellm_params: 9 | model: bedrock/amazon.titan-text-lite-v1 10 | 11 | - model_name: amazon.titan-embed-text-v1 12 | litellm_params: 13 | model: bedrock/amazon.titan-embed-text-v1 14 | drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client 15 | 16 | - model_name: amazon.titan-embed-image-v1 17 | litellm_params: 18 | model: bedrock/amazon.titan-embed-image-v1 19 | drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client 20 | 21 | - model_name: amazon.titan-embed-text-v2:0 22 | litellm_params: 23 | model: bedrock/amazon.titan-embed-text-v2:0 24 | drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client 25 | 26 | - model_name: amazon.rerank-v1:0 27 | litellm_params: 28 | model: bedrock/amazon.rerank-v1:0 29 | 30 | - model_name: anthropic.claude-3-haiku-20240307-v1:0 31 | litellm_params: 32 | model: bedrock/anthropic.claude-3-haiku-20240307-v1:0 33 | 34 | - model_name: anthropic.claude-3-5-sonnet-20240620-v1:0 35 | litellm_params: 36 | model: bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0 37 | 38 | - model_name: cohere.embed-english-v3 39 | litellm_params: 40 | model: bedrock/cohere.embed-english-v3 41 | drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client 42 | 43 | - model_name: cohere.embed-multilingual-v3 44 | litellm_params: 45 | model: bedrock/cohere.embed-multilingual-v3 46 | 47 | - model_name: cohere.rerank-v3-5:0 48 | litellm_params: 49 | model: bedrock/cohere.rerank-v3-5:0 50 | 51 | - model_name: eu.anthropic.claude-3-sonnet-20240229-v1:0 52 | litellm_params: 53 | model: bedrock/eu.anthropic.claude-3-sonnet-20240229-v1:0 54 | 55 | - model_name: eu.anthropic.claude-3-5-sonnet-20240620-v1:0 56 | litellm_params: 57 | model: bedrock/eu.anthropic.claude-3-5-sonnet-20240620-v1:0 58 | 59 | - model_name: eu.anthropic.claude-3-haiku-20240307-v1:0 60 | litellm_params: 61 | model: bedrock/eu.anthropic.claude-3-haiku-20240307-v1:0 62 | 63 | - model_name: eu.meta.llama3-2-3b-instruct-v1:0 64 | litellm_params: 65 | model: bedrock/eu.meta.llama3-2-3b-instruct-v1:0 66 | 67 | - model_name: eu.meta.llama3-2-1b-instruct-v1:0 68 | litellm_params: 69 | model: bedrock/eu.meta.llama3-2-1b-instruct-v1:0 70 | 71 | -------------------------------------------------------------------------------- /config/default-config-eu-north-1.yaml: -------------------------------------------------------------------------------- 1 | model_list: 2 | #Bedrock Models 3 | - model_name: amazon.titan-embed-text-v2:0 4 | litellm_params: 5 | model: bedrock/amazon.titan-embed-text-v2:0 6 | drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client 7 | 8 | -------------------------------------------------------------------------------- /config/default-config-eu-west-1.yaml: -------------------------------------------------------------------------------- 1 | model_list: 2 | #Bedrock Models 3 | - model_name: amazon.titan-text-lite-v1 4 | litellm_params: 5 | model: bedrock/amazon.titan-text-lite-v1 6 | 7 | - model_name: amazon.titan-text-express-v1 8 | litellm_params: 9 | model: bedrock/amazon.titan-text-express-v1 10 | 11 | - model_name: amazon.titan-embed-image-v1 12 | litellm_params: 13 | model: bedrock/amazon.titan-embed-image-v1 14 | drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client 15 | 16 | - model_name: amazon.titan-embed-text-v2:0 17 | litellm_params: 18 | model: bedrock/amazon.titan-embed-text-v2:0 19 | drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client 20 | 21 | - model_name: amazon.titan-image-generator-v1 22 | litellm_params: 23 | model: bedrock/amazon.titan-image-generator-v1 24 | 25 | - model_name: anthropic.claude-3-haiku-20240307-v1:0 26 | litellm_params: 27 | model: bedrock/anthropic.claude-3-haiku-20240307-v1:0 28 | 29 | - model_name: cohere.embed-english-v3 30 | litellm_params: 31 | model: bedrock/cohere.embed-english-v3 32 | drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client 33 | 34 | - model_name: cohere.embed-multilingual-v3 35 | litellm_params: 36 | model: bedrock/cohere.embed-multilingual-v3 37 | 38 | - model_name: mistral.mistral-7b-instruct-v0:2 39 | litellm_params: 40 | model: bedrock/mistral.mistral-7b-instruct-v0:2 41 | 42 | - model_name: mistral.mixtral-8x7b-instruct-v0:1 43 | litellm_params: 44 | model: bedrock/mistral.mixtral-8x7b-instruct-v0:1 45 | 46 | - model_name: mistral.mistral-large-2402-v1:0 47 | litellm_params: 48 | model: bedrock/mistral.mistral-large-2402-v1:0 49 | 50 | - model_name: eu.anthropic.claude-3-sonnet-20240229-v1:0 51 | litellm_params: 52 | model: bedrock/eu.anthropic.claude-3-sonnet-20240229-v1:0 53 | 54 | - model_name: eu.anthropic.claude-3-haiku-20240307-v1:0 55 | litellm_params: 56 | model: bedrock/eu.anthropic.claude-3-haiku-20240307-v1:0 57 | 58 | - model_name: eu.anthropic.claude-3-5-sonnet-20240620-v1:0 59 | litellm_params: 60 | model: bedrock/eu.anthropic.claude-3-5-sonnet-20240620-v1:0 61 | 62 | - model_name: eu.meta.llama3-2-3b-instruct-v1:0 63 | litellm_params: 64 | model: bedrock/eu.meta.llama3-2-3b-instruct-v1:0 65 | 66 | - model_name: eu.meta.llama3-2-1b-instruct-v1:0 67 | litellm_params: 68 | model: bedrock/eu.meta.llama3-2-1b-instruct-v1:0 69 | 70 | -------------------------------------------------------------------------------- /config/default-config-eu-west-2.yaml: -------------------------------------------------------------------------------- 1 | model_list: 2 | #Bedrock Models 3 | - model_name: amazon.titan-text-lite-v1 4 | litellm_params: 5 | model: bedrock/amazon.titan-text-lite-v1 6 | 7 | - model_name: amazon.titan-text-express-v1 8 | litellm_params: 9 | model: bedrock/amazon.titan-text-express-v1 10 | 11 | - model_name: amazon.titan-embed-image-v1 12 | litellm_params: 13 | model: bedrock/amazon.titan-embed-image-v1 14 | drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client 15 | 16 | - model_name: amazon.titan-image-generator-v1 17 | litellm_params: 18 | model: bedrock/amazon.titan-image-generator-v1 19 | 20 | - model_name: amazon.titan-embed-text-v2:0 21 | litellm_params: 22 | model: bedrock/amazon.titan-embed-text-v2:0 23 | drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client 24 | 25 | - model_name: anthropic.claude-3-sonnet-20240229-v1:0 26 | litellm_params: 27 | model: bedrock/anthropic.claude-3-sonnet-20240229-v1:0 28 | 29 | - model_name: anthropic.claude-3-haiku-20240307-v1:0 30 | litellm_params: 31 | model: bedrock/anthropic.claude-3-haiku-20240307-v1:0 32 | 33 | - model_name: cohere.embed-english-v3 34 | litellm_params: 35 | model: bedrock/cohere.embed-english-v3 36 | drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client 37 | 38 | - model_name: cohere.embed-multilingual-v3 39 | litellm_params: 40 | model: bedrock/cohere.embed-multilingual-v3 41 | 42 | - model_name: meta.llama3-8b-instruct-v1:0 43 | litellm_params: 44 | model: bedrock/meta.llama3-8b-instruct-v1:0 45 | 46 | - model_name: meta.llama3-70b-instruct-v1:0 47 | litellm_params: 48 | model: bedrock/meta.llama3-70b-instruct-v1:0 49 | 50 | - model_name: mistral.mistral-7b-instruct-v0:2 51 | litellm_params: 52 | model: bedrock/mistral.mistral-7b-instruct-v0:2 53 | 54 | - model_name: mistral.mixtral-8x7b-instruct-v0:1 55 | litellm_params: 56 | model: bedrock/mistral.mixtral-8x7b-instruct-v0:1 57 | 58 | - model_name: mistral.mistral-large-2402-v1:0 59 | litellm_params: 60 | model: bedrock/mistral.mistral-large-2402-v1:0 61 | 62 | -------------------------------------------------------------------------------- /config/default-config-eu-west-3.yaml: -------------------------------------------------------------------------------- 1 | model_list: 2 | #Bedrock Models 3 | - model_name: amazon.titan-text-lite-v1 4 | litellm_params: 5 | model: bedrock/amazon.titan-text-lite-v1 6 | 7 | - model_name: amazon.titan-text-express-v1 8 | litellm_params: 9 | model: bedrock/amazon.titan-text-express-v1 10 | 11 | - model_name: amazon.titan-embed-image-v1 12 | litellm_params: 13 | model: bedrock/amazon.titan-embed-image-v1 14 | drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client 15 | 16 | - model_name: amazon.titan-embed-text-v2:0 17 | litellm_params: 18 | model: bedrock/amazon.titan-embed-text-v2:0 19 | drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client 20 | 21 | - model_name: anthropic.claude-3-haiku-20240307-v1:0 22 | litellm_params: 23 | model: bedrock/anthropic.claude-3-haiku-20240307-v1:0 24 | 25 | - model_name: cohere.embed-english-v3 26 | litellm_params: 27 | model: bedrock/cohere.embed-english-v3 28 | drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client 29 | 30 | - model_name: cohere.embed-multilingual-v3 31 | litellm_params: 32 | model: bedrock/cohere.embed-multilingual-v3 33 | 34 | - model_name: mistral.mistral-7b-instruct-v0:2 35 | litellm_params: 36 | model: bedrock/mistral.mistral-7b-instruct-v0:2 37 | 38 | - model_name: mistral.mixtral-8x7b-instruct-v0:1 39 | litellm_params: 40 | model: bedrock/mistral.mixtral-8x7b-instruct-v0:1 41 | 42 | - model_name: mistral.mistral-large-2402-v1:0 43 | litellm_params: 44 | model: bedrock/mistral.mistral-large-2402-v1:0 45 | 46 | - model_name: eu.anthropic.claude-3-5-sonnet-20240620-v1:0 47 | litellm_params: 48 | model: bedrock/eu.anthropic.claude-3-5-sonnet-20240620-v1:0 49 | 50 | - model_name: eu.anthropic.claude-3-sonnet-20240229-v1:0 51 | litellm_params: 52 | model: bedrock/eu.anthropic.claude-3-sonnet-20240229-v1:0 53 | 54 | - model_name: eu.anthropic.claude-3-haiku-20240307-v1:0 55 | litellm_params: 56 | model: bedrock/eu.anthropic.claude-3-haiku-20240307-v1:0 57 | 58 | - model_name: eu.meta.llama3-2-1b-instruct-v1:0 59 | litellm_params: 60 | model: bedrock/eu.meta.llama3-2-1b-instruct-v1:0 61 | 62 | - model_name: eu.meta.llama3-2-3b-instruct-v1:0 63 | litellm_params: 64 | model: bedrock/eu.meta.llama3-2-3b-instruct-v1:0 65 | 66 | -------------------------------------------------------------------------------- /config/default-config-sa-east-1.yaml: -------------------------------------------------------------------------------- 1 | model_list: 2 | #Bedrock Models 3 | - model_name: amazon.titan-text-lite-v1 4 | litellm_params: 5 | model: bedrock/amazon.titan-text-lite-v1 6 | 7 | - model_name: amazon.titan-text-express-v1 8 | litellm_params: 9 | model: bedrock/amazon.titan-text-express-v1 10 | 11 | - model_name: amazon.titan-embed-image-v1 12 | litellm_params: 13 | model: bedrock/amazon.titan-embed-image-v1 14 | drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client 15 | 16 | - model_name: amazon.titan-embed-text-v2:0 17 | litellm_params: 18 | model: bedrock/amazon.titan-embed-text-v2:0 19 | drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client 20 | 21 | - model_name: anthropic.claude-3-sonnet-20240229-v1:0 22 | litellm_params: 23 | model: bedrock/anthropic.claude-3-sonnet-20240229-v1:0 24 | 25 | - model_name: anthropic.claude-3-haiku-20240307-v1:0 26 | litellm_params: 27 | model: bedrock/anthropic.claude-3-haiku-20240307-v1:0 28 | 29 | - model_name: cohere.embed-english-v3 30 | litellm_params: 31 | model: bedrock/cohere.embed-english-v3 32 | drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client 33 | 34 | - model_name: cohere.embed-multilingual-v3 35 | litellm_params: 36 | model: bedrock/cohere.embed-multilingual-v3 37 | 38 | - model_name: mistral.mistral-7b-instruct-v0:2 39 | litellm_params: 40 | model: bedrock/mistral.mistral-7b-instruct-v0:2 41 | 42 | - model_name: mistral.mixtral-8x7b-instruct-v0:1 43 | litellm_params: 44 | model: bedrock/mistral.mixtral-8x7b-instruct-v0:1 45 | 46 | - model_name: mistral.mistral-large-2402-v1:0 47 | litellm_params: 48 | model: bedrock/mistral.mistral-large-2402-v1:0 49 | 50 | -------------------------------------------------------------------------------- /config/default-config-us-east-1.yaml: -------------------------------------------------------------------------------- 1 | model_list: 2 | #Bedrock Models 3 | - model_name: amazon.titan-tg1-large 4 | litellm_params: 5 | model: bedrock/amazon.titan-tg1-large 6 | 7 | - model_name: amazon.titan-image-generator-v1 8 | litellm_params: 9 | model: bedrock/amazon.titan-image-generator-v1 10 | 11 | - model_name: amazon.titan-image-generator-v2:0 12 | litellm_params: 13 | model: bedrock/amazon.titan-image-generator-v2:0 14 | 15 | - model_name: amazon.titan-text-premier-v1:0 16 | litellm_params: 17 | model: bedrock/amazon.titan-text-premier-v1:0 18 | 19 | - model_name: amazon.nova-pro-v1:0 20 | litellm_params: 21 | model: bedrock/amazon.nova-pro-v1:0 22 | 23 | - model_name: amazon.nova-lite-v1:0 24 | litellm_params: 25 | model: bedrock/amazon.nova-lite-v1:0 26 | 27 | - model_name: amazon.nova-canvas-v1:0 28 | litellm_params: 29 | model: bedrock/amazon.nova-canvas-v1:0 30 | 31 | - model_name: amazon.nova-reel-v1:0 32 | litellm_params: 33 | model: bedrock/amazon.nova-reel-v1:0 34 | 35 | - model_name: amazon.nova-micro-v1:0 36 | litellm_params: 37 | model: bedrock/amazon.nova-micro-v1:0 38 | 39 | - model_name: amazon.titan-embed-g1-text-02 40 | litellm_params: 41 | model: bedrock/amazon.titan-embed-g1-text-02 42 | 43 | - model_name: amazon.titan-text-lite-v1 44 | litellm_params: 45 | model: bedrock/amazon.titan-text-lite-v1 46 | 47 | - model_name: amazon.titan-text-express-v1 48 | litellm_params: 49 | model: bedrock/amazon.titan-text-express-v1 50 | 51 | - model_name: amazon.titan-embed-text-v1 52 | litellm_params: 53 | model: bedrock/amazon.titan-embed-text-v1 54 | drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client 55 | 56 | - model_name: amazon.titan-embed-text-v2:0 57 | litellm_params: 58 | model: bedrock/amazon.titan-embed-text-v2:0 59 | drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client 60 | 61 | - model_name: amazon.titan-embed-image-v1 62 | litellm_params: 63 | model: bedrock/amazon.titan-embed-image-v1 64 | drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client 65 | 66 | - model_name: ai21.jamba-1-5-large-v1:0 67 | litellm_params: 68 | model: bedrock/converse/ai21.jamba-1-5-large-v1:0 69 | 70 | - model_name: ai21.jamba-1-5-mini-v1:0 71 | litellm_params: 72 | model: bedrock/converse/ai21.jamba-1-5-mini-v1:0 73 | 74 | - model_name: anthropic.claude-3-haiku-20240307-v1:0 75 | litellm_params: 76 | model: bedrock/anthropic.claude-3-haiku-20240307-v1:0 77 | 78 | - model_name: anthropic.claude-3-5-sonnet-20240620-v1:0 79 | litellm_params: 80 | model: bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0 81 | 82 | - model_name: cohere.command-text-v14 83 | litellm_params: 84 | model: bedrock/cohere.command-text-v14 85 | 86 | - model_name: cohere.command-r-v1:0 87 | litellm_params: 88 | model: bedrock/cohere.command-r-v1:0 89 | 90 | - model_name: cohere.command-r-plus-v1:0 91 | litellm_params: 92 | model: bedrock/cohere.command-r-plus-v1:0 93 | 94 | - model_name: cohere.command-light-text-v14 95 | litellm_params: 96 | model: bedrock/cohere.command-light-text-v14 97 | 98 | - model_name: cohere.embed-english-v3 99 | litellm_params: 100 | model: bedrock/cohere.embed-english-v3 101 | drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client 102 | 103 | - model_name: cohere.embed-multilingual-v3 104 | litellm_params: 105 | model: bedrock/cohere.embed-multilingual-v3 106 | 107 | - model_name: meta.llama3-8b-instruct-v1:0 108 | litellm_params: 109 | model: bedrock/meta.llama3-8b-instruct-v1:0 110 | 111 | - model_name: meta.llama3-70b-instruct-v1:0 112 | litellm_params: 113 | model: bedrock/meta.llama3-70b-instruct-v1:0 114 | 115 | - model_name: mistral.mistral-7b-instruct-v0:2 116 | litellm_params: 117 | model: bedrock/mistral.mistral-7b-instruct-v0:2 118 | 119 | - model_name: mistral.mixtral-8x7b-instruct-v0:1 120 | litellm_params: 121 | model: bedrock/mistral.mixtral-8x7b-instruct-v0:1 122 | 123 | - model_name: mistral.mistral-large-2402-v1:0 124 | litellm_params: 125 | model: bedrock/mistral.mistral-large-2402-v1:0 126 | 127 | - model_name: mistral.mistral-small-2402-v1:0 128 | litellm_params: 129 | model: bedrock/mistral.mistral-small-2402-v1:0 130 | 131 | - model_name: us.anthropic.claude-3-sonnet-20240229-v1:0 132 | litellm_params: 133 | model: bedrock/us.anthropic.claude-3-sonnet-20240229-v1:0 134 | 135 | - model_name: us.anthropic.claude-3-opus-20240229-v1:0 136 | litellm_params: 137 | model: bedrock/us.anthropic.claude-3-opus-20240229-v1:0 138 | 139 | - model_name: us.anthropic.claude-3-haiku-20240307-v1:0 140 | litellm_params: 141 | model: bedrock/us.anthropic.claude-3-haiku-20240307-v1:0 142 | 143 | - model_name: us.meta.llama3-2-11b-instruct-v1:0 144 | litellm_params: 145 | model: bedrock/us.meta.llama3-2-11b-instruct-v1:0 146 | 147 | - model_name: us.meta.llama3-2-3b-instruct-v1:0 148 | litellm_params: 149 | model: bedrock/us.meta.llama3-2-3b-instruct-v1:0 150 | 151 | - model_name: us.meta.llama3-2-90b-instruct-v1:0 152 | litellm_params: 153 | model: bedrock/us.meta.llama3-2-90b-instruct-v1:0 154 | 155 | - model_name: us.meta.llama3-2-1b-instruct-v1:0 156 | litellm_params: 157 | model: bedrock/us.meta.llama3-2-1b-instruct-v1:0 158 | 159 | - model_name: us.anthropic.claude-3-5-sonnet-20240620-v1:0 160 | litellm_params: 161 | model: bedrock/us.anthropic.claude-3-5-sonnet-20240620-v1:0 162 | 163 | - model_name: us.anthropic.claude-3-5-haiku-20241022-v1:0 164 | litellm_params: 165 | model: bedrock/us.anthropic.claude-3-5-haiku-20241022-v1:0 166 | 167 | - model_name: us.meta.llama3-1-8b-instruct-v1:0 168 | litellm_params: 169 | model: bedrock/us.meta.llama3-1-8b-instruct-v1:0 170 | 171 | - model_name: us.meta.llama3-1-70b-instruct-v1:0 172 | litellm_params: 173 | model: bedrock/us.meta.llama3-1-70b-instruct-v1:0 174 | 175 | - model_name: us.amazon.nova-lite-v1:0 176 | litellm_params: 177 | model: bedrock/us.amazon.nova-lite-v1:0 178 | 179 | - model_name: us.amazon.nova-pro-v1:0 180 | litellm_params: 181 | model: bedrock/us.amazon.nova-pro-v1:0 182 | 183 | - model_name: us.amazon.nova-micro-v1:0 184 | litellm_params: 185 | model: bedrock/us.amazon.nova-micro-v1:0 186 | 187 | - model_name: us.meta.llama3-3-70b-instruct-v1:0 188 | litellm_params: 189 | model: bedrock/us.meta.llama3-3-70b-instruct-v1:0 190 | 191 | - model_name: us.anthropic.claude-3-5-sonnet-20241022-v2:0 192 | litellm_params: 193 | model: bedrock/us.anthropic.claude-3-5-sonnet-20241022-v2:0 194 | 195 | - model_name: us.anthropic.claude-3-7-sonnet-20250219-v1:0 196 | litellm_params: 197 | model: bedrock/converse/us.anthropic.claude-3-7-sonnet-20250219-v1:0 198 | -------------------------------------------------------------------------------- /config/default-config-us-east-2.yaml: -------------------------------------------------------------------------------- 1 | model_list: 2 | #Bedrock Models 3 | - model_name: amazon.titan-embed-text-v2:0 4 | litellm_params: 5 | model: bedrock/amazon.titan-embed-text-v2:0 6 | drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client 7 | 8 | - model_name: meta.llama3-3-70b-instruct-v1:0 9 | litellm_params: 10 | model: bedrock/meta.llama3-3-70b-instruct-v1:0 11 | 12 | - model_name: us.anthropic.claude-3-haiku-20240307-v1:0 13 | litellm_params: 14 | model: bedrock/us.anthropic.claude-3-haiku-20240307-v1:0 15 | 16 | - model_name: us.meta.llama3-2-1b-instruct-v1:0 17 | litellm_params: 18 | model: bedrock/us.meta.llama3-2-1b-instruct-v1:0 19 | 20 | - model_name: us.meta.llama3-2-11b-instruct-v1:0 21 | litellm_params: 22 | model: bedrock/us.meta.llama3-2-11b-instruct-v1:0 23 | 24 | - model_name: us.meta.llama3-2-3b-instruct-v1:0 25 | litellm_params: 26 | model: bedrock/us.meta.llama3-2-3b-instruct-v1:0 27 | 28 | - model_name: us.meta.llama3-2-90b-instruct-v1:0 29 | litellm_params: 30 | model: bedrock/us.meta.llama3-2-90b-instruct-v1:0 31 | 32 | - model_name: us.meta.llama3-1-8b-instruct-v1:0 33 | litellm_params: 34 | model: bedrock/us.meta.llama3-1-8b-instruct-v1:0 35 | 36 | - model_name: us.meta.llama3-1-70b-instruct-v1:0 37 | litellm_params: 38 | model: bedrock/us.meta.llama3-1-70b-instruct-v1:0 39 | 40 | - model_name: us.amazon.nova-micro-v1:0 41 | litellm_params: 42 | model: bedrock/us.amazon.nova-micro-v1:0 43 | 44 | - model_name: us.amazon.nova-lite-v1:0 45 | litellm_params: 46 | model: bedrock/us.amazon.nova-lite-v1:0 47 | 48 | - model_name: us.amazon.nova-pro-v1:0 49 | litellm_params: 50 | model: bedrock/us.amazon.nova-pro-v1:0 51 | 52 | - model_name: us.anthropic.claude-3-5-haiku-20241022-v1:0 53 | litellm_params: 54 | model: bedrock/us.anthropic.claude-3-5-haiku-20241022-v1:0 55 | 56 | - model_name: us.meta.llama3-1-405b-instruct-v1:0 57 | litellm_params: 58 | model: bedrock/us.meta.llama3-1-405b-instruct-v1:0 59 | 60 | - model_name: us.meta.llama3-3-70b-instruct-v1:0 61 | litellm_params: 62 | model: bedrock/us.meta.llama3-3-70b-instruct-v1:0 63 | 64 | - model_name: us.anthropic.claude-3-5-sonnet-20240620-v1:0 65 | litellm_params: 66 | model: bedrock/us.anthropic.claude-3-5-sonnet-20240620-v1:0 67 | 68 | - model_name: us.anthropic.claude-3-5-sonnet-20241022-v2:0 69 | litellm_params: 70 | model: bedrock/us.anthropic.claude-3-5-sonnet-20241022-v2:0 71 | 72 | - model_name: us.anthropic.claude-3-7-sonnet-20250219-v1:0 73 | litellm_params: 74 | model: bedrock/converse/us.anthropic.claude-3-7-sonnet-20250219-v1:0 75 | 76 | -------------------------------------------------------------------------------- /config/default-config-us-west-1.yaml: -------------------------------------------------------------------------------- 1 | model_list: 2 | #Bedrock Models 3 | - model_name: amazon.titan-embed-text-v2:0 4 | litellm_params: 5 | model: bedrock/amazon.titan-embed-text-v2:0 6 | drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client 7 | 8 | -------------------------------------------------------------------------------- /config/default-config-us-west-2.yaml: -------------------------------------------------------------------------------- 1 | model_list: 2 | #Bedrock Models 3 | - model_name: amazon.titan-tg1-large 4 | litellm_params: 5 | model: bedrock/amazon.titan-tg1-large 6 | 7 | - model_name: amazon.titan-embed-g1-text-02 8 | litellm_params: 9 | model: bedrock/amazon.titan-embed-g1-text-02 10 | 11 | - model_name: amazon.titan-text-lite-v1 12 | litellm_params: 13 | model: bedrock/amazon.titan-text-lite-v1 14 | 15 | - model_name: amazon.titan-text-express-v1 16 | litellm_params: 17 | model: bedrock/amazon.titan-text-express-v1 18 | 19 | - model_name: amazon.titan-embed-text-v1 20 | litellm_params: 21 | model: bedrock/amazon.titan-embed-text-v1 22 | drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client 23 | 24 | - model_name: amazon.titan-embed-text-v2:0 25 | litellm_params: 26 | model: bedrock/amazon.titan-embed-text-v2:0 27 | drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client 28 | 29 | - model_name: amazon.titan-embed-image-v1 30 | litellm_params: 31 | model: bedrock/amazon.titan-embed-image-v1 32 | drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client 33 | 34 | - model_name: amazon.titan-image-generator-v1 35 | litellm_params: 36 | model: bedrock/amazon.titan-image-generator-v1 37 | 38 | - model_name: amazon.titan-image-generator-v2:0 39 | litellm_params: 40 | model: bedrock/amazon.titan-image-generator-v2:0 41 | 42 | - model_name: amazon.rerank-v1:0 43 | litellm_params: 44 | model: bedrock/amazon.rerank-v1:0 45 | 46 | - model_name: stability.sd3-5-large-v1:0 47 | litellm_params: 48 | model: bedrock/stability.sd3-5-large-v1:0 49 | 50 | - model_name: anthropic.claude-3-5-sonnet-20241022-v2:0 51 | litellm_params: 52 | model: bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0 53 | 54 | - model_name: anthropic.claude-3-5-haiku-20241022-v1:0 55 | litellm_params: 56 | model: bedrock/anthropic.claude-3-5-haiku-20241022-v1:0 57 | 58 | - model_name: anthropic.claude-3-haiku-20240307-v1:0 59 | litellm_params: 60 | model: bedrock/anthropic.claude-3-haiku-20240307-v1:0 61 | 62 | - model_name: anthropic.claude-3-opus-20240229-v1:0 63 | litellm_params: 64 | model: bedrock/anthropic.claude-3-opus-20240229-v1:0 65 | 66 | - model_name: anthropic.claude-3-5-sonnet-20240620-v1:0 67 | litellm_params: 68 | model: bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0 69 | 70 | - model_name: cohere.command-text-v14 71 | litellm_params: 72 | model: bedrock/cohere.command-text-v14 73 | 74 | - model_name: cohere.command-r-v1:0 75 | litellm_params: 76 | model: bedrock/cohere.command-r-v1:0 77 | 78 | - model_name: cohere.command-r-plus-v1:0 79 | litellm_params: 80 | model: bedrock/cohere.command-r-plus-v1:0 81 | 82 | - model_name: cohere.command-light-text-v14 83 | litellm_params: 84 | model: bedrock/cohere.command-light-text-v14 85 | 86 | - model_name: cohere.embed-english-v3 87 | litellm_params: 88 | model: bedrock/cohere.embed-english-v3 89 | drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client 90 | 91 | - model_name: cohere.embed-multilingual-v3 92 | litellm_params: 93 | model: bedrock/cohere.embed-multilingual-v3 94 | 95 | - model_name: cohere.rerank-v3-5:0 96 | litellm_params: 97 | model: bedrock/cohere.rerank-v3-5:0 98 | 99 | - model_name: meta.llama3-8b-instruct-v1:0 100 | litellm_params: 101 | model: bedrock/meta.llama3-8b-instruct-v1:0 102 | 103 | - model_name: meta.llama3-70b-instruct-v1:0 104 | litellm_params: 105 | model: bedrock/meta.llama3-70b-instruct-v1:0 106 | 107 | - model_name: meta.llama3-1-8b-instruct-v1:0 108 | litellm_params: 109 | model: bedrock/meta.llama3-1-8b-instruct-v1:0 110 | 111 | - model_name: meta.llama3-1-70b-instruct-v1:0 112 | litellm_params: 113 | model: bedrock/meta.llama3-1-70b-instruct-v1:0 114 | 115 | - model_name: meta.llama3-1-405b-instruct-v1:0 116 | litellm_params: 117 | model: bedrock/meta.llama3-1-405b-instruct-v1:0 118 | 119 | - model_name: mistral.mistral-7b-instruct-v0:2 120 | litellm_params: 121 | model: bedrock/mistral.mistral-7b-instruct-v0:2 122 | 123 | - model_name: mistral.mixtral-8x7b-instruct-v0:1 124 | litellm_params: 125 | model: bedrock/mistral.mixtral-8x7b-instruct-v0:1 126 | 127 | - model_name: mistral.mistral-large-2402-v1:0 128 | litellm_params: 129 | model: bedrock/mistral.mistral-large-2402-v1:0 130 | 131 | - model_name: mistral.mistral-large-2407-v1:0 132 | litellm_params: 133 | model: bedrock/mistral.mistral-large-2407-v1:0 134 | 135 | - model_name: luma.ray-v2:0 136 | litellm_params: 137 | model: bedrock/luma.ray-v2:0 138 | 139 | - model_name: us.anthropic.claude-3-haiku-20240307-v1:0 140 | litellm_params: 141 | model: bedrock/us.anthropic.claude-3-haiku-20240307-v1:0 142 | 143 | - model_name: us.anthropic.claude-3-5-sonnet-20240620-v1:0 144 | litellm_params: 145 | model: bedrock/us.anthropic.claude-3-5-sonnet-20240620-v1:0 146 | 147 | - model_name: us.anthropic.claude-3-sonnet-20240229-v1:0 148 | litellm_params: 149 | model: bedrock/us.anthropic.claude-3-sonnet-20240229-v1:0 150 | 151 | - model_name: us.anthropic.claude-3-opus-20240229-v1:0 152 | litellm_params: 153 | model: bedrock/us.anthropic.claude-3-opus-20240229-v1:0 154 | 155 | - model_name: us.meta.llama3-2-11b-instruct-v1:0 156 | litellm_params: 157 | model: bedrock/us.meta.llama3-2-11b-instruct-v1:0 158 | 159 | - model_name: us.meta.llama3-2-90b-instruct-v1:0 160 | litellm_params: 161 | model: bedrock/us.meta.llama3-2-90b-instruct-v1:0 162 | 163 | - model_name: us.meta.llama3-2-3b-instruct-v1:0 164 | litellm_params: 165 | model: bedrock/us.meta.llama3-2-3b-instruct-v1:0 166 | 167 | - model_name: us.meta.llama3-2-1b-instruct-v1:0 168 | litellm_params: 169 | model: bedrock/us.meta.llama3-2-1b-instruct-v1:0 170 | 171 | - model_name: us.anthropic.claude-3-5-haiku-20241022-v1:0 172 | litellm_params: 173 | model: bedrock/us.anthropic.claude-3-5-haiku-20241022-v1:0 174 | 175 | - model_name: us.meta.llama3-1-8b-instruct-v1:0 176 | litellm_params: 177 | model: bedrock/us.meta.llama3-1-8b-instruct-v1:0 178 | 179 | - model_name: us.meta.llama3-1-70b-instruct-v1:0 180 | litellm_params: 181 | model: bedrock/us.meta.llama3-1-70b-instruct-v1:0 182 | 183 | - model_name: us.amazon.nova-pro-v1:0 184 | litellm_params: 185 | model: bedrock/us.amazon.nova-pro-v1:0 186 | 187 | - model_name: us.amazon.nova-lite-v1:0 188 | litellm_params: 189 | model: bedrock/us.amazon.nova-lite-v1:0 190 | 191 | - model_name: us.amazon.nova-micro-v1:0 192 | litellm_params: 193 | model: bedrock/us.amazon.nova-micro-v1:0 194 | 195 | - model_name: us.meta.llama3-3-70b-instruct-v1:0 196 | litellm_params: 197 | model: bedrock/us.meta.llama3-3-70b-instruct-v1:0 198 | 199 | - model_name: us.anthropic.claude-3-5-sonnet-20241022-v2:0 200 | litellm_params: 201 | model: bedrock/us.anthropic.claude-3-5-sonnet-20241022-v2:0 202 | 203 | - model_name: us.anthropic.claude-3-7-sonnet-20250219-v1:0 204 | litellm_params: 205 | model: bedrock/converse/us.anthropic.claude-3-7-sonnet-20250219-v1:0 -------------------------------------------------------------------------------- /config/details-for-models-that-need-additional-manual-config.yaml: -------------------------------------------------------------------------------- 1 | model_list: 2 | #Databricks Models (Commented out because the api_base value will vary by user and needs to be manually updated) 3 | # - model_name: databricks-meta-llama-3-1-70b-instruct 4 | # litellm_params: 5 | # model: databricks/databricks-meta-llama-3-1-70b-instruct 6 | # api_base: "" # e.g.: https://adb-3064715882934586.6.azuredatabricks.net/serving-endpoints 7 | 8 | # - model_name: databricks-meta-llama-3-1-405b-instruct 9 | # litellm_params: 10 | # model: databricks/databricks-meta-llama-3-1-405b-instruct 11 | # api_base: "" # e.g.: https://adb-3064715882934586.6.azuredatabricks.net/serving-endpoints 12 | 13 | # - model_name: databricks-dbrx-instruct 14 | # litellm_params: 15 | # model: databricks/databricks-dbrx-instruct 16 | # api_base: "" # e.g.: https://adb-3064715882934586.6.azuredatabricks.net/serving-endpoints 17 | 18 | # - model_name: databricks-meta-llama-3-70b-instruct 19 | # litellm_params: 20 | # model: databricks/databricks-meta-llama-3-70b-instruct 21 | # api_base: "" # e.g.: https://adb-3064715882934586.6.azuredatabricks.net/serving-endpoints 22 | 23 | # - model_name: databricks-llama-2-70b-chat 24 | # litellm_params: 25 | # model: databricks/databricks-llama-2-70b-chat 26 | # api_base: "" # e.g.: https://adb-3064715882934586.6.azuredatabricks.net/serving-endpoints 27 | 28 | # - model_name: databricks-mixtral-8x7b-instruct 29 | # litellm_params: 30 | # model: databricks/databricks-mixtral-8x7b-instruct 31 | # api_base: "" # e.g.: https://adb-3064715882934586.6.azuredatabricks.net/serving-endpoints 32 | 33 | # - model_name: databricks-mpt-30b-instruct 34 | # litellm_params: 35 | # model: databricks/databricks-mpt-30b-instruct 36 | # api_base: "" # e.g.: https://adb-3064715882934586.6.azuredatabricks.net/serving-endpoints 37 | 38 | # - model_name: databricks-mpt-7b-instruct 39 | # litellm_params: 40 | # model: databricks/databricks-mpt-7b-instruct 41 | # api_base: "" # e.g.: https://adb-3064715882934586.6.azuredatabricks.net/serving-endpoints 42 | 43 | #Sagemaker Models (Commented out because your config will vary dramatically based on the specific model you are using. Refer to the docs: https://docs.litellm.ai/docs/providers/aws_sagemaker) 44 | # - model_name: jumpstart-model 45 | # litellm_params: 46 | # model: sagemaker/jumpstart-dft-hf-textgeneration1-mp-20240815-185614 47 | 48 | #Azure OpenAI Models (Commented out because the api_base and api_version values will vary by user and need to be manually updated) 49 | # - model_name: azure/gpt-4o-realtime-preview-2024-10-01 50 | # litellm_params: 51 | # model: azure/gpt-4o-realtime-preview-2024-10-01 52 | # api_base: https://.openai.azure.com/ 53 | # api_version: "" 54 | 55 | # - model_name: azure/o1-mini 56 | # litellm_params: 57 | # model: azure/o1-mini 58 | # api_base: https://.openai.azure.com/ 59 | # api_version: "" 60 | 61 | # - model_name: azure/o1-preview 62 | # litellm_params: 63 | # model: azure/o1-preview 64 | # api_base: https://.openai.azure.com/ 65 | # api_version: "" 66 | 67 | # - model_name: azure/gpt-4o-mini 68 | # litellm_params: 69 | # model: azure/gpt-4o-mini 70 | # api_base: https://.openai.azure.com/ 71 | # api_version: "" 72 | 73 | # - model_name: azure/gpt-4o-mini-2024-07-18 74 | # litellm_params: 75 | # model: azure/gpt-4o-mini-2024-07-18 76 | # api_base: https://.openai.azure.com/ 77 | # api_version: "" 78 | 79 | # - model_name: azure/gpt-4o 80 | # litellm_params: 81 | # model: azure/gpt-4o 82 | # api_base: https://.openai.azure.com/ 83 | # api_version: "" 84 | 85 | # - model_name: azure/gpt-4o-2024-08-06 86 | # litellm_params: 87 | # model: azure/gpt-4o-2024-08-06 88 | # api_base: https://.openai.azure.com/ 89 | # api_version: "" 90 | 91 | # - model_name: azure/gpt-4o-2024-05-13 92 | # litellm_params: 93 | # model: azure/gpt-4o-2024-05-13 94 | # api_base: https://.openai.azure.com/ 95 | # api_version: "" 96 | 97 | # - model_name: azure/gpt-4-turbo 98 | # litellm_params: 99 | # model: azure/gpt-4-turbo 100 | # api_base: https://.openai.azure.com/ 101 | # api_version: "" 102 | 103 | # - model_name: azure/gpt-4-turbo-preview 104 | # litellm_params: 105 | # model: azure/gpt-4-0125-preview 106 | # api_base: https://.openai.azure.com/ 107 | # api_version: "" 108 | 109 | # - model_name: azure/gpt-4-0125-preview 110 | # litellm_params: 111 | # model: azure/gpt-4-0125-preview 112 | # api_base: https://.openai.azure.com/ 113 | # api_version: "" 114 | 115 | # - model_name: azure/gpt-4-1106-preview 116 | # litellm_params: 117 | # model: azure/gpt-4-1106-preview 118 | # api_base: https://.openai.azure.com/ 119 | # api_version: "" 120 | 121 | # - model_name: azure/gpt-3.5-turbo 122 | # litellm_params: 123 | # model: azure/gpt-3.5-turbo 124 | # api_base: https://.openai.azure.com/ 125 | # api_version: "" 126 | 127 | # - model_name: azure/gpt-3.5-turbo-1106 128 | # litellm_params: 129 | # model: azure/gpt-3.5-turbo-1106 130 | # api_base: https://.openai.azure.com/ 131 | # api_version: "" 132 | 133 | # - model_name: azure/gpt-3.5-turbo-0301 134 | # litellm_params: 135 | # model: azure/gpt-3.5-turbo-0301 136 | # api_base: https://.openai.azure.com/ 137 | # api_version: "" 138 | 139 | # - model_name: azure/gpt-3.5-turbo-0613 140 | # litellm_params: 141 | # model: azure/gpt-3.5-turbo-0613 142 | # api_base: https://.openai.azure.com/ 143 | # api_version: "" 144 | 145 | # - model_name: azure/gpt-3.5-turbo-16k 146 | # litellm_params: 147 | # model: azure/gpt-3.5-turbo-16k 148 | # api_base: https://.openai.azure.com/ 149 | # api_version: "" 150 | 151 | # - model_name: azure/gpt-3.5-turbo-16k-0613 152 | # litellm_params: 153 | # model: azure/gpt-3.5-turbo-16k-0613 154 | # api_base: https://.openai.azure.com/ 155 | # api_version: "" 156 | 157 | # - model_name: azure/gpt-4 158 | # litellm_params: 159 | # model: azure/gpt-4 160 | # api_base: https://.openai.azure.com/ 161 | # api_version: "" 162 | 163 | # - model_name: azure/gpt-4-0314 164 | # litellm_params: 165 | # model: azure/gpt-4-0314 166 | # api_base: https://.openai.azure.com/ 167 | # api_version: "" 168 | 169 | # - model_name: azure/gpt-4-0613 170 | # litellm_params: 171 | # model: azure/gpt-4-0613 172 | # api_base: https://.openai.azure.com/ 173 | # api_version: "" 174 | 175 | # - model_name: azure/gpt-4-32k 176 | # litellm_params: 177 | # model: azure/gpt-4-32k 178 | # api_base: https://.openai.azure.com/ 179 | # api_version: "" 180 | 181 | # - model_name: azure/gpt-4-32k-0314 182 | # litellm_params: 183 | # model: azure/gpt-4-32k-0314 184 | # api_base: https://.openai.azure.com/ 185 | # api_version: "" 186 | 187 | # - model_name: azure/gpt-4-32k-0613 188 | # litellm_params: 189 | # model: azure/gpt-4-32k-0613 190 | # api_base: https://.openai.azure.com/ 191 | # api_version: "" 192 | 193 | # - model_name: azure/gpt-4-vision-preview 194 | # litellm_params: 195 | # model: azure/gpt-4-vision-preview 196 | # api_base: https://.openai.azure.com/ 197 | # api_version: "" 198 | 199 | #Azure AI Studio Models (Commented out because the api_base value will vary by user and need to be manually updated) 200 | # - model_name: azure_ai/Llama-3.1-70B-Instruct 201 | # litellm_params: 202 | # model: azure_ai/Llama-3.1-70B-Instruct 203 | # api_base: "" -------------------------------------------------------------------------------- /create-ec2-to-access-private-load-balancer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -aeuo pipefail 3 | 4 | aws_region=$(aws ec2 describe-availability-zones --output text --query 'AvailabilityZones[0].[RegionName]') 5 | echo $aws_region 6 | 7 | # Load environment variables from .env file 8 | source .env 9 | 10 | echo "EC2_KEY_PAIR_NAME: $EC2_KEY_PAIR_NAME" 11 | 12 | # Check if bucket exists 13 | if aws s3api head-bucket --bucket "$TERRAFORM_S3_BUCKET_NAME" 2>/dev/null; then 14 | echo "Terraform Bucket $TERRAFORM_S3_BUCKET_NAME already exists, skipping creation" 15 | else 16 | echo "Creating bucket $TERRAFORM_S3_BUCKET_NAME..." 17 | aws s3 mb "s3://$TERRAFORM_S3_BUCKET_NAME" --region $aws_region 18 | echo "Terraform Bucket created successfully" 19 | fi 20 | 21 | cd litellm-terraform-stack 22 | VPC_ID=$(terraform output -raw vpc_id) 23 | cd .. 24 | 25 | cd litellm-private-load-balancer-ec2-terraform 26 | 27 | echo "about to deploy" 28 | 29 | cat > backend.hcl << EOF 30 | bucket = "${TERRAFORM_S3_BUCKET_NAME}" 31 | key = "terraform-ec2.tfstate" 32 | region = "${aws_region}" 33 | encrypt = true 34 | EOF 35 | echo "Generated backend.hcl configuration" 36 | 37 | terraform init -backend-config=backend.hcl 38 | 39 | export TF_VAR_vpc_id=$VPC_ID 40 | export TF_VAR_key_pair_name=$EC2_KEY_PAIR_NAME 41 | 42 | terraform apply -auto-approve 43 | echo "deployed" -------------------------------------------------------------------------------- /create-fake-llm-load-testing-server.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -aeuo pipefail 3 | 4 | aws_region=$(aws ec2 describe-availability-zones --output text --query 'AvailabilityZones[0].[RegionName]') 5 | echo $aws_region 6 | 7 | APP_NAME=fakeserver 8 | 9 | source .env 10 | 11 | cd litellm-terraform-stack 12 | VPC_ID=$(terraform output -raw vpc_id) 13 | cd .. 14 | 15 | cd litellm-fake-llm-load-testing-server-terraform 16 | 17 | if [ -n "$CPU_ARCHITECTURE" ]; then 18 | # Check if CPU_ARCHITECTURE is either "x86" or "arm" 19 | case "$CPU_ARCHITECTURE" in 20 | "x86"|"arm") 21 | ARCH="$CPU_ARCHITECTURE" 22 | ;; 23 | *) 24 | echo "Error: CPU_ARCHITECTURE must be either 'x86' or 'arm'" 25 | exit 1 26 | ;; 27 | esac 28 | else 29 | # Determine architecture from system 30 | ARCH=$(uname -m) 31 | case $ARCH in 32 | x86_64) 33 | ARCH="x86" 34 | ;; 35 | arm64) 36 | ARCH="arm" 37 | ;; 38 | *) 39 | echo "Unsupported architecture: $ARCH" 40 | exit 1 41 | ;; 42 | esac 43 | fi 44 | 45 | echo $ARCH 46 | 47 | echo "about to build and push image" 48 | cd docker 49 | ./docker-build-and-deploy.sh $APP_NAME $ARCH 50 | cd .. 51 | 52 | echo "about to deploy" 53 | 54 | export TF_VAR_vpc_id=$VPC_ID 55 | export TF_VAR_ecr_fake_server_repository=$APP_NAME 56 | export TF_VAR_architecture=$ARCH 57 | export TF_VAR_fake_llm_load_testing_endpoint_certifiacte_arn=$FAKE_LLM_LOAD_TESTING_ENDPOINT_CERTIFICATE_ARN 58 | export TF_VAR_fake_llm_load_testing_endpoint_hosted_zone_name=$FAKE_LLM_LOAD_TESTING_ENDPOINT_HOSTED_ZONE_NAME 59 | export TF_VAR_fake_llm_load_testing_endpoint_record_name=$FAKE_LLM_LOAD_TESTING_ENDPOINT_RECORD_NAME 60 | 61 | 62 | cat > backend.hcl << EOF 63 | bucket = "${TERRAFORM_S3_BUCKET_NAME}" 64 | key = "terraform-fake-llm-server.tfstate" 65 | region = "${aws_region}" 66 | encrypt = true 67 | EOF 68 | echo "Generated backend.hcl configuration" 69 | 70 | terraform init -backend-config=backend.hcl -reconfigure 71 | terraform apply -auto-approve 72 | 73 | echo "deployed" 74 | 75 | if [ $? -eq 0 ]; then 76 | LITELLM_ECS_CLUSTER=$(terraform output -raw fake_server_ecs_cluster) 77 | LITELLM_ECS_TASK=$(terraform output -raw fake_server_ecs_task) 78 | 79 | aws ecs update-service \ 80 | --cluster $LITELLM_ECS_CLUSTER \ 81 | --service $LITELLM_ECS_TASK \ 82 | --force-new-deployment \ 83 | --desired-count 3 \ 84 | --no-cli-pager 85 | else 86 | echo "Deployment failed" 87 | fi -------------------------------------------------------------------------------- /delete-fake-llm-load-testing-server.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -aeuo pipefail 3 | 4 | aws_region=$(aws ec2 describe-availability-zones --output text --query 'AvailabilityZones[0].[RegionName]') 5 | echo $aws_region 6 | 7 | APP_NAME=fakeserver 8 | 9 | source .env 10 | 11 | cd litellm-terraform-stack 12 | VPC_ID=$(terraform output -raw vpc_id) 13 | cd .. 14 | 15 | cd litellm-fake-llm-load-testing-server-terraform 16 | 17 | if [ -n "$CPU_ARCHITECTURE" ]; then 18 | # Check if CPU_ARCHITECTURE is either "x86" or "arm" 19 | case "$CPU_ARCHITECTURE" in 20 | "x86"|"arm") 21 | ARCH="$CPU_ARCHITECTURE" 22 | ;; 23 | *) 24 | echo "Error: CPU_ARCHITECTURE must be either 'x86' or 'arm'" 25 | exit 1 26 | ;; 27 | esac 28 | else 29 | # Determine architecture from system 30 | ARCH=$(uname -m) 31 | case $ARCH in 32 | x86_64) 33 | ARCH="x86" 34 | ;; 35 | arm64) 36 | ARCH="arm" 37 | ;; 38 | *) 39 | echo "Unsupported architecture: $ARCH" 40 | exit 1 41 | ;; 42 | esac 43 | fi 44 | 45 | echo $ARCH 46 | 47 | echo "about to destroy" 48 | 49 | export TF_VAR_vpc_id="vpc-02b681fa786fa8292" 50 | export TF_VAR_ecr_fake_server_repository=$APP_NAME 51 | export TF_VAR_architecture=$ARCH 52 | export TF_VAR_fake_llm_load_testing_endpoint_certifiacte_arn=$FAKE_LLM_LOAD_TESTING_ENDPOINT_CERTIFICATE_ARN 53 | export TF_VAR_fake_llm_load_testing_endpoint_hosted_zone_name=$FAKE_LLM_LOAD_TESTING_ENDPOINT_HOSTED_ZONE_NAME 54 | export TF_VAR_fake_llm_load_testing_endpoint_record_name=$FAKE_LLM_LOAD_TESTING_ENDPOINT_RECORD_NAME 55 | 56 | 57 | cat > backend.hcl << EOF 58 | bucket = "${TERRAFORM_S3_BUCKET_NAME}" 59 | key = "terraform-fake-llm-server.tfstate" 60 | region = "${aws_region}" 61 | encrypt = true 62 | EOF 63 | echo "Generated backend.hcl configuration" 64 | 65 | terraform init -backend-config=backend.hcl -reconfigure 66 | terraform destroy -auto-approve 67 | 68 | echo "destroyed" -------------------------------------------------------------------------------- /docker-build-and-deploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ $# -ne 3 ]; then 4 | echo "Usage: $0 " 5 | exit 1 6 | fi 7 | 8 | APP_NAME=$1 9 | BUILD_FROM_SOURCE=$(echo "$2" | tr '[:upper:]' '[:lower:]') 10 | ARCH=$3 11 | 12 | # check again if LITELLM_VERSION is set if script is used standalone 13 | source .env 14 | if [[ (-z "$LITELLM_VERSION") || ("$LITELLM_VERSION" == "placeholder") ]]; then 15 | echo "LITELLM_VERSION must be set in .env file" 16 | exit 1 17 | fi 18 | 19 | if [ "$BUILD_FROM_SOURCE" = "true" ]; then 20 | echo "Building from source..." 21 | if [ ! -d "litellm-source" ]; then 22 | echo "Fetching source for LiteLLM version ${LITELLM_VERSION}" 23 | mkdir litellm-source 24 | curl -L https://github.com/BerriAI/litellm/archive/refs/tags/${LITELLM_VERSION}.tar.gz | tar -xz -C litellm-source --strip-components=1 25 | else 26 | LITELLM_SOURCE_VERSION=$(yq '.tool.poetry.version' litellm-source/pyproject.toml) 27 | if [ v"$LITELLM_SOURCE_VERSION" != "$LITELLM_VERSION" ]; then 28 | echo "Your specified version ${LITELLM_VERSION} does not match the source version ${LITELLM_SOURCE_VERSION}" 29 | echo "Please remove the litellm-source directory manually and re-run this script when you change the version number" 30 | exit 1 31 | else 32 | echo "Source version ${LITELLM_VERSION} already exists, skipping fetching". 33 | fi 34 | fi 35 | 36 | cd litellm-source 37 | fi 38 | 39 | AWS_REGION=$(aws ec2 describe-availability-zones --output text --query 'AvailabilityZones[0].[RegionName]') 40 | export AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query "Account" --output text) 41 | 42 | # Check if the repository already exists 43 | REPO_EXISTS=$(aws ecr describe-repositories --repository-names $APP_NAME 2>/dev/null) 44 | 45 | if [ -z "$REPO_EXISTS" ]; then 46 | # Repository does not exist, create it with tag 47 | aws ecr create-repository --repository-name $APP_NAME --tags Key=project,Value=llmgateway 48 | else 49 | echo "Repository $APP_NAME already exists, checking tags..." 50 | 51 | # Get current tags for the repository 52 | CURRENT_TAGS=$(aws ecr list-tags-for-resource --resource-arn arn:aws:ecr:${AWS_REGION}:${AWS_ACCOUNT_ID}:repository/${APP_NAME}) 53 | 54 | # Check if project=llmgateway tag exists 55 | if ! echo "$CURRENT_TAGS" | grep -q '"Key": "project".*"Value": "llmgateway"'; then 56 | echo "Adding project=llmgateway tag..." 57 | aws ecr tag-resource \ 58 | --resource-arn arn:aws:ecr:${AWS_REGION}:${AWS_ACCOUNT_ID}:repository/${APP_NAME} \ 59 | --tags Key=project,Value=llmgateway 60 | else 61 | echo "Tag project=llmgateway already exists." 62 | fi 63 | fi 64 | 65 | echo $ARCH 66 | 67 | case $ARCH in 68 | "x86") 69 | DOCKER_ARCH="linux/amd64" 70 | ;; 71 | "arm") 72 | DOCKER_ARCH="linux/arm64" 73 | ;; 74 | *) 75 | echo "Unsupported architecture: $ARCH" 76 | exit 1 77 | ;; 78 | esac 79 | 80 | echo $DOCKER_ARCH 81 | 82 | aws ecr get-login-password --region $AWS_REGION | docker login --username AWS --password-stdin $AWS_ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com 83 | docker build --platform $DOCKER_ARCH --build-arg LITELLM_VERSION=${LITELLM_VERSION} -t $APP_NAME\:${LITELLM_VERSION} . 84 | echo "Tagging image with ${APP_NAME}:${LITELLM_VERSION}" 85 | docker tag $APP_NAME\:${LITELLM_VERSION} $AWS_ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com/$APP_NAME\:${LITELLM_VERSION} 86 | docker push $AWS_ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com/$APP_NAME\:${LITELLM_VERSION} 87 | -------------------------------------------------------------------------------- /install-cloud9-prerequisites.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Function to check if a command exists 4 | command_exists() { 5 | command -v "$1" >/dev/null 2>&1 6 | } 7 | 8 | # Check if yq is installed 9 | if command_exists yq; then 10 | echo "yq is already installed" 11 | yq --version 12 | else 13 | echo "yq is not installed. Installing now..." 14 | 15 | # Set the version 16 | VERSION="v4.40.5" 17 | BINARY="yq_linux_amd64" 18 | 19 | # Check if script is run with sudo 20 | if [ "$EUID" -ne 0 ]; then 21 | echo "Please run with sudo privileges" 22 | exit 1 23 | fi 24 | 25 | # Download yq 26 | if wget https://github.com/mikefarah/yq/releases/download/${VERSION}/${BINARY} -O /usr/bin/yq; then 27 | # Make it executable 28 | chmod +x /usr/bin/yq 29 | 30 | echo "yq has been successfully installed" 31 | yq --version 32 | else 33 | echo "Failed to download yq" 34 | exit 1 35 | fi 36 | fi 37 | 38 | sudo yum update -y 39 | 40 | # Install required dependencies 41 | sudo yum install -y yum-utils unzip wget 42 | 43 | # Download the signing key 44 | wget -O- https://apt.releases.hashicorp.com/gpg | sudo gpg --import - 45 | 46 | # Add the HashiCorp repository 47 | sudo yum-config-manager --add-repo https://rpm.releases.hashicorp.com/AmazonLinux/hashicorp.repo 48 | 49 | # Install Terraform 50 | sudo yum install -y terraform 51 | 52 | # Verify installation 53 | terraform version 54 | 55 | curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" 56 | 57 | sudo install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl 58 | 59 | kubectl version --client -------------------------------------------------------------------------------- /litellm-fake-llm-load-testing-server-terraform/docker/Dockerfile: -------------------------------------------------------------------------------- 1 | # Dockerfile 2 | 3 | FROM python:3.13-slim 4 | 5 | WORKDIR /app 6 | 7 | # Install system dependencies if needed 8 | # RUN apt-get update && apt-get install -y ... 9 | 10 | COPY requirements.txt /app/requirements.txt 11 | 12 | RUN pip install --no-cache-dir -r requirements.txt 13 | 14 | COPY . /app 15 | 16 | # Expose the port; ECS will map it 17 | EXPOSE 8080 18 | 19 | CMD ["python", "fake_llm_server.py"] 20 | -------------------------------------------------------------------------------- /litellm-fake-llm-load-testing-server-terraform/docker/docker-build-and-deploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ $# -ne 2 ]; then 4 | echo "Usage: $0 " 5 | exit 1 6 | fi 7 | 8 | APP_NAME=$1 9 | ARCH=$2 10 | 11 | AWS_REGION=$(aws ec2 describe-availability-zones --output text --query 'AvailabilityZones[0].[RegionName]') 12 | export AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query "Account" --output text) 13 | 14 | # Check if the repository already exists 15 | REPO_EXISTS=$(aws ecr describe-repositories --repository-names $APP_NAME 2>/dev/null) 16 | 17 | if [ -z "$REPO_EXISTS" ]; then 18 | # Repository does not exist, create it with tag 19 | aws ecr create-repository --repository-name $APP_NAME --tags Key=project,Value=llmgateway 20 | else 21 | echo "Repository $APP_NAME already exists, checking tags..." 22 | 23 | # Get current tags for the repository 24 | CURRENT_TAGS=$(aws ecr list-tags-for-resource --resource-arn arn:aws:ecr:${AWS_REGION}:${AWS_ACCOUNT_ID}:repository/${APP_NAME}) 25 | 26 | # Check if project=llmgateway tag exists 27 | if ! echo "$CURRENT_TAGS" | grep -q '"Key": "project".*"Value": "llmgateway"'; then 28 | echo "Adding project=llmgateway tag..." 29 | aws ecr tag-resource \ 30 | --resource-arn arn:aws:ecr:${AWS_REGION}:${AWS_ACCOUNT_ID}:repository/${APP_NAME} \ 31 | --tags Key=project,Value=llmgateway 32 | else 33 | echo "Tag project=llmgateway already exists." 34 | fi 35 | fi 36 | 37 | echo $ARCH 38 | case $ARCH in 39 | "x86") 40 | DOCKER_ARCH="linux/amd64" 41 | ;; 42 | "arm") 43 | DOCKER_ARCH="linux/arm64" 44 | ;; 45 | *) 46 | echo "Unsupported architecture: $ARCH" 47 | exit 1 48 | ;; 49 | esac 50 | 51 | echo $DOCKER_ARCH 52 | 53 | aws ecr get-login-password --region $AWS_REGION | docker login --username AWS --password-stdin $AWS_ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com 54 | docker build --platform $DOCKER_ARCH -t $APP_NAME . 55 | docker tag $APP_NAME\:latest $AWS_ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com/$APP_NAME\:latest 56 | docker push $AWS_ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com/$APP_NAME\:latest -------------------------------------------------------------------------------- /litellm-fake-llm-load-testing-server-terraform/docker/fake_llm_server.py: -------------------------------------------------------------------------------- 1 | from fastapi import FastAPI, Request, status 2 | from fastapi.responses import StreamingResponse, JSONResponse 3 | from fastapi.security import OAuth2PasswordBearer 4 | from fastapi.middleware.cors import CORSMiddleware 5 | from fastapi import HTTPException 6 | from typing import Optional 7 | from slowapi import Limiter 8 | from slowapi.util import get_remote_address 9 | from slowapi.errors import RateLimitExceeded 10 | import asyncio 11 | import random 12 | import json 13 | import socket 14 | import uvicorn 15 | 16 | 17 | class ProxyException(Exception): 18 | # NOTE: DO NOT MODIFY THIS 19 | # This is used to map exactly to OPENAI Exceptions 20 | def __init__( 21 | self, 22 | message: str, 23 | type: str, 24 | param: Optional[str], 25 | code: Optional[int], 26 | ): 27 | self.message = message 28 | self.type = type 29 | self.param = param 30 | self.code = code 31 | 32 | def to_dict(self) -> dict: 33 | return { 34 | "message": self.message, 35 | "type": self.type, 36 | "param": self.param, 37 | "code": self.code, 38 | } 39 | 40 | 41 | limiter = Limiter(key_func=get_remote_address) 42 | app = FastAPI() 43 | app.state.limiter = limiter 44 | 45 | 46 | @app.exception_handler(RateLimitExceeded) 47 | async def _rate_limit_exceeded_handler(request: Request, exc: RateLimitExceeded): 48 | return JSONResponse(status_code=429, content={"detail": "Rate Limited!"}) 49 | 50 | 51 | app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler) 52 | 53 | app.add_middleware( 54 | CORSMiddleware, 55 | allow_origins=["*"], 56 | allow_credentials=True, 57 | allow_methods=["*"], 58 | allow_headers=["*"], 59 | ) 60 | 61 | 62 | @app.get("/") 63 | async def health_check(): 64 | return {"status": "healthy"} 65 | 66 | @app.post("/model/{model_id}/converse") 67 | async def converse(model_id: str, request: Request): 68 | """ 69 | Fake Bedrock 'converse' endpoint. 70 | Returns a single JSON response according to the Bedrock response schema. 71 | """ 72 | body = await request.json() 73 | 74 | # Simulate random processing delay (optional) 75 | # await asyncio.sleep(random.uniform(1.0, 3.0)) 76 | 77 | # You could inspect 'body' here to see the user's messages or parameters. 78 | # For example: messages = body.get("messages", []) 79 | # Then craft a response. Here we just hard-code a sample. 80 | 81 | # A minimal valid Bedrock-like response 82 | response_data = { 83 | "output": { 84 | "message": { 85 | "role": "assistant", 86 | "content": [ 87 | { 88 | "text": "Hello there! This is a fake response from the Bedrock model." 89 | } 90 | ] 91 | } 92 | }, 93 | "stopReason": "end_turn", 94 | "usage": { 95 | "inputTokens": 30, 96 | "outputTokens": 10, 97 | "totalTokens": 40 98 | }, 99 | "metrics": { 100 | "latencyMs": 1234 101 | } 102 | } 103 | 104 | # Return a JSON response 105 | return JSONResponse(content=response_data) 106 | 107 | 108 | @app.post("/chat/completions") 109 | @app.post("/v1/chat/completions") 110 | async def completion(request: Request): 111 | """ 112 | Completion endpoint that either returns: 113 | - A normal (non-streaming) completion with a random 1–3 second delay 114 | - A streaming response with multiple chunks and random 0.2–0.8 second delays 115 | """ 116 | body = await request.json() 117 | stream_requested = body.get("stream", False) 118 | 119 | if stream_requested: 120 | # Simulate a small initial delay before the streaming starts 121 | #await asyncio.sleep(random.uniform(0.8, 1.5)) 122 | 123 | # Return a streaming response 124 | async def stream_generator(): 125 | # These are pseudo "token" parts of a response. 126 | content_parts = [ 127 | "Hello", 128 | " there,", 129 | " how ", 130 | "can ", 131 | "I ", 132 | "assist ", 133 | "you ", 134 | "today?", 135 | ] 136 | 137 | # Stream each part in a chunk 138 | for i, part in enumerate(content_parts): 139 | # Build a chunk that mimics OpenAI's streaming format 140 | chunk_data = { 141 | "id": "chatcmpl-123", 142 | "object": "chat.completion.chunk", 143 | "created": 1677652288, 144 | "model": "gpt-3.5-turbo-0301", 145 | "choices": [ 146 | { 147 | "delta": { 148 | # The first chunk includes "role" 149 | **({"role": "assistant"} if i == 0 else {}), 150 | "content": part, 151 | }, 152 | "index": 0, 153 | "finish_reason": None, 154 | } 155 | ], 156 | } 157 | yield f"data: {json.dumps(chunk_data)}\n\n" 158 | #await asyncio.sleep(random.uniform(0.2, 0.8)) 159 | 160 | # Final chunk signaling the end 161 | final_chunk = { 162 | "id": "chatcmpl-123", 163 | "object": "chat.completion.chunk", 164 | "created": 1677652288, 165 | "model": "gpt-3.5-turbo-0301", 166 | "choices": [ 167 | { 168 | "delta": {}, 169 | "index": 0, 170 | "finish_reason": "stop", 171 | } 172 | ], 173 | } 174 | yield f"data: {json.dumps(final_chunk)}\n\n" 175 | # The [DONE] message 176 | yield "data: [DONE]\n\n" 177 | 178 | return StreamingResponse(stream_generator(), media_type="text/event-stream") 179 | 180 | else: 181 | # Normal non-streaming response with random 1–3 second delay 182 | #await asyncio.sleep(random.uniform(1.0, 3.0)) 183 | return { 184 | "id": "chatcmpl-123", 185 | "object": "chat.completion", 186 | "created": 1677652288, 187 | "model": "gpt-3.5-turbo-0301", 188 | "system_fingerprint": "fp_44709d6fcb", 189 | "choices": [ 190 | { 191 | "index": 0, 192 | "message": { 193 | "role": "assistant", 194 | "content": "Hello there, how may I assist you today?", 195 | }, 196 | "logprobs": None, 197 | "finish_reason": "stop", 198 | } 199 | ], 200 | "usage": {"prompt_tokens": 9, "completion_tokens": 12, "total_tokens": 21}, 201 | } 202 | 203 | 204 | if __name__ == "__main__": 205 | port = 8080 206 | while True: 207 | sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 208 | result = sock.connect_ex(("0.0.0.0", port)) 209 | if result != 0: 210 | print(f"Port {port} is available, starting server on {port}...") 211 | break 212 | else: 213 | port += 1 214 | 215 | uvicorn.run(app, host="0.0.0.0", port=port) 216 | -------------------------------------------------------------------------------- /litellm-fake-llm-load-testing-server-terraform/docker/requirements.txt: -------------------------------------------------------------------------------- 1 | fastapi 2 | uvicorn 3 | slowapi 4 | httpx 5 | openai -------------------------------------------------------------------------------- /litellm-fake-llm-load-testing-server-terraform/main.tf: -------------------------------------------------------------------------------- 1 | # Data sources 2 | data "aws_subnets" "public" { 3 | filter { 4 | name = "vpc-id" 5 | values = [var.vpc_id] 6 | } 7 | filter { 8 | name = "map-public-ip-on-launch" 9 | values = ["true"] 10 | } 11 | } 12 | 13 | data "aws_subnets" "private" { 14 | filter { 15 | name = "vpc-id" 16 | values = [var.vpc_id] 17 | } 18 | filter { 19 | name = "map-public-ip-on-launch" 20 | values = ["false"] 21 | } 22 | } 23 | 24 | data "aws_route53_zone" "hosted_zone" { 25 | name = var.fake_llm_load_testing_endpoint_hosted_zone_name 26 | } 27 | 28 | data "aws_ecr_repository" "fake_server_repo" { 29 | name = var.ecr_fake_server_repository 30 | } 31 | 32 | # ECS Cluster 33 | resource "aws_ecs_cluster" "fake_llm_cluster" { 34 | name = "FakeLlmCluster" 35 | } 36 | 37 | # ECS Task Definition 38 | resource "aws_ecs_task_definition" "fake_server_task_def" { 39 | family = "FakeServerTaskDef" 40 | requires_compatibilities = ["FARGATE"] 41 | network_mode = "awsvpc" 42 | cpu = "512" 43 | memory = "1024" 44 | execution_role_arn = aws_iam_role.ecs_task_execution_role.arn 45 | task_role_arn = aws_iam_role.ecs_task_role.arn 46 | 47 | runtime_platform { 48 | cpu_architecture = var.architecture == "x86" ? "X86_64" : "ARM64" 49 | operating_system_family = "LINUX" 50 | } 51 | 52 | container_definitions = jsonencode([ 53 | { 54 | name = "FakeServerContainer" 55 | image = "${data.aws_ecr_repository.fake_server_repo.repository_url}:latest" 56 | essential = true 57 | 58 | logConfiguration = { 59 | logDriver = "awslogs" 60 | options = { 61 | "awslogs-group" = aws_cloudwatch_log_group.fake_server_logs.name 62 | "awslogs-region" = data.aws_region.current.name 63 | "awslogs-stream-prefix" = "FakeServer" 64 | } 65 | } 66 | 67 | portMappings = [ 68 | { 69 | containerPort = 8080 70 | hostPort = 8080 71 | protocol = "tcp" 72 | } 73 | ] 74 | } 75 | ]) 76 | } 77 | 78 | # CloudWatch Log Group 79 | resource "aws_cloudwatch_log_group" "fake_server_logs" { 80 | name = "/ecs/FakeServer" 81 | retention_in_days = 30 82 | } 83 | 84 | # IAM Roles 85 | resource "aws_iam_role" "ecs_task_execution_role" { 86 | name = "FakeServerEcsTaskExecutionRole" 87 | 88 | assume_role_policy = jsonencode({ 89 | Version = "2012-10-17" 90 | Statement = [ 91 | { 92 | Action = "sts:AssumeRole" 93 | Effect = "Allow" 94 | Principal = { 95 | Service = "ecs-tasks.amazonaws.com" 96 | } 97 | } 98 | ] 99 | }) 100 | } 101 | 102 | resource "aws_iam_role_policy_attachment" "ecs_task_execution_role_policy" { 103 | role = aws_iam_role.ecs_task_execution_role.name 104 | policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy" 105 | } 106 | 107 | resource "aws_iam_role" "ecs_task_role" { 108 | name = "FakeServerEcsTaskRole" 109 | 110 | assume_role_policy = jsonencode({ 111 | Version = "2012-10-17" 112 | Statement = [ 113 | { 114 | Action = "sts:AssumeRole" 115 | Effect = "Allow" 116 | Principal = { 117 | Service = "ecs-tasks.amazonaws.com" 118 | } 119 | } 120 | ] 121 | }) 122 | } 123 | 124 | # Application Load Balancer 125 | resource "aws_lb" "fake_server_alb" { 126 | name = "FakeServer-ALB" 127 | internal = false 128 | load_balancer_type = "application" 129 | security_groups = [aws_security_group.alb_sg.id] 130 | subnets = data.aws_subnets.public.ids 131 | 132 | enable_deletion_protection = false 133 | } 134 | 135 | # ALB HTTPS Listener 136 | resource "aws_lb_listener" "fake_server_listener" { 137 | load_balancer_arn = aws_lb.fake_server_alb.arn 138 | port = "443" 139 | protocol = "HTTPS" 140 | ssl_policy = "ELBSecurityPolicy-TLS13-1-2-2021-06" 141 | certificate_arn = var.fake_llm_load_testing_endpoint_certifiacte_arn 142 | 143 | default_action { 144 | type = "forward" 145 | target_group_arn = aws_lb_target_group.fake_server_tg.arn 146 | } 147 | } 148 | 149 | # Target Group 150 | resource "aws_lb_target_group" "fake_server_tg" { 151 | name = "FakeServer-TG" 152 | port = 8080 153 | protocol = "HTTP" 154 | vpc_id = var.vpc_id 155 | target_type = "ip" 156 | 157 | health_check { 158 | enabled = true 159 | interval = 30 160 | path = "/" 161 | port = "traffic-port" 162 | healthy_threshold = 3 163 | unhealthy_threshold = 3 164 | timeout = 5 165 | protocol = "HTTP" 166 | matcher = "200-399" 167 | } 168 | } 169 | 170 | # Security Groups 171 | resource "aws_security_group" "alb_sg" { 172 | name = "fake-server-alb-sg" 173 | description = "Allow HTTPS inbound traffic" 174 | vpc_id = var.vpc_id 175 | 176 | ingress { 177 | description = "HTTPS from internet" 178 | from_port = 443 179 | to_port = 443 180 | protocol = "tcp" 181 | cidr_blocks = ["0.0.0.0/0"] 182 | } 183 | 184 | egress { 185 | from_port = 0 186 | to_port = 0 187 | protocol = "-1" 188 | cidr_blocks = ["0.0.0.0/0"] 189 | } 190 | } 191 | 192 | resource "aws_security_group" "ecs_sg" { 193 | name = "fake-server-ecs-sg" 194 | description = "Allow inbound traffic from ALB" 195 | vpc_id = var.vpc_id 196 | 197 | ingress { 198 | description = "HTTP from ALB" 199 | from_port = 8080 200 | to_port = 8080 201 | protocol = "tcp" 202 | security_groups = [aws_security_group.alb_sg.id] 203 | } 204 | 205 | egress { 206 | from_port = 0 207 | to_port = 0 208 | protocol = "-1" 209 | cidr_blocks = ["0.0.0.0/0"] 210 | } 211 | } 212 | 213 | # ECS Service 214 | resource "aws_ecs_service" "fake_server_service" { 215 | name = "FakeServer" 216 | cluster = aws_ecs_cluster.fake_llm_cluster.id 217 | task_definition = aws_ecs_task_definition.fake_server_task_def.arn 218 | desired_count = 3 219 | launch_type = "FARGATE" 220 | health_check_grace_period_seconds = 300 221 | 222 | network_configuration { 223 | subnets = data.aws_subnets.private.ids 224 | security_groups = [aws_security_group.ecs_sg.id] 225 | assign_public_ip = false 226 | } 227 | 228 | load_balancer { 229 | target_group_arn = aws_lb_target_group.fake_server_tg.arn 230 | container_name = "FakeServerContainer" 231 | container_port = 8080 232 | } 233 | } 234 | 235 | # Route 53 Record 236 | resource "aws_route53_record" "fake_server_dns" { 237 | zone_id = data.aws_route53_zone.hosted_zone.zone_id 238 | name = var.fake_llm_load_testing_endpoint_record_name 239 | type = "A" 240 | 241 | alias { 242 | name = aws_lb.fake_server_alb.dns_name 243 | zone_id = aws_lb.fake_server_alb.zone_id 244 | evaluate_target_health = true 245 | } 246 | } 247 | 248 | -------------------------------------------------------------------------------- /litellm-fake-llm-load-testing-server-terraform/outputs.tf: -------------------------------------------------------------------------------- 1 | # Outputs 2 | output "fake_server_ecs_cluster" { 3 | value = aws_ecs_cluster.fake_llm_cluster.name 4 | description = "Name of the ECS Cluster" 5 | } 6 | 7 | output "fake_server_ecs_task" { 8 | value = aws_ecs_service.fake_server_service.name 9 | description = "Name of the task service" 10 | } 11 | 12 | output "fake_server_service_url" { 13 | value = "https://${var.fake_llm_load_testing_endpoint_record_name}" 14 | description = "URL of the deployed service" 15 | } 16 | -------------------------------------------------------------------------------- /litellm-fake-llm-load-testing-server-terraform/providers.tf: -------------------------------------------------------------------------------- 1 | provider "aws" { 2 | } 3 | 4 | terraform { 5 | backend "s3" {} 6 | } 7 | 8 | data "aws_region" "current" {} 9 | -------------------------------------------------------------------------------- /litellm-fake-llm-load-testing-server-terraform/variables.tf: -------------------------------------------------------------------------------- 1 | # Variables definition 2 | variable "vpc_id" { 3 | description = "ID of the VPC" 4 | type = string 5 | } 6 | 7 | variable "fake_llm_load_testing_endpoint_certifiacte_arn" { 8 | description = "ARN of the SSL certificate" 9 | type = string 10 | } 11 | 12 | variable "fake_llm_load_testing_endpoint_hosted_zone_name" { 13 | description = "Name of the hosted zone" 14 | type = string 15 | } 16 | 17 | variable "fake_llm_load_testing_endpoint_record_name" { 18 | description = "Route53 A record name for the service" 19 | type = string 20 | } 21 | 22 | variable "ecr_fake_server_repository" { 23 | description = "Name of the ECR repository" 24 | type = string 25 | } 26 | 27 | variable "architecture" { 28 | description = "CPU architecture (x86 or arm)" 29 | type = string 30 | } -------------------------------------------------------------------------------- /litellm-private-load-balancer-ec2-terraform/main.tf: -------------------------------------------------------------------------------- 1 | 2 | 3 | # Look up existing VPC 4 | data "aws_vpc" "imported_vpc" { 5 | id = var.vpc_id 6 | } 7 | 8 | # Find subnets with auto-assign public IP enabled 9 | data "aws_subnets" "public_subnets" { 10 | filter { 11 | name = "vpc-id" 12 | values = [var.vpc_id] 13 | } 14 | 15 | filter { 16 | name = "map-public-ip-on-launch" 17 | values = ["true"] 18 | } 19 | } 20 | 21 | # Find latest Amazon Linux 2023 AMI 22 | data "aws_ami" "amazon_linux" { 23 | most_recent = true 24 | owners = ["amazon"] 25 | 26 | filter { 27 | name = "name" 28 | values = ["al2023-ami-*-kernel-6.1-x86_64"] 29 | } 30 | 31 | filter { 32 | name = "virtualization-type" 33 | values = ["hvm"] 34 | } 35 | } 36 | 37 | # Create Security Group for the Linux instance 38 | resource "aws_security_group" "linux_sg" { 39 | name = "LinuxInstanceSG" 40 | description = "Security group for Linux EC2 instance" 41 | vpc_id = data.aws_vpc.imported_vpc.id 42 | 43 | # Allow SSH inbound 44 | ingress { 45 | from_port = 22 46 | to_port = 22 47 | protocol = "tcp" 48 | cidr_blocks = ["0.0.0.0/0"] # Consider restricting to specific IPs in production 49 | } 50 | 51 | # Allow all outbound traffic 52 | egress { 53 | from_port = 0 54 | to_port = 0 55 | protocol = "-1" 56 | cidr_blocks = ["0.0.0.0/0"] 57 | } 58 | 59 | tags = { 60 | Name = "LinuxInstanceSG" 61 | } 62 | } 63 | 64 | # Create IAM role for SSM 65 | resource "aws_iam_role" "ec2_ssm_role" { 66 | name = "Ec2SsmRole" 67 | 68 | assume_role_policy = jsonencode({ 69 | Version = "2012-10-17" 70 | Statement = [ 71 | { 72 | Action = "sts:AssumeRole" 73 | Effect = "Allow" 74 | Principal = { 75 | Service = "ec2.amazonaws.com" 76 | } 77 | } 78 | ] 79 | }) 80 | } 81 | 82 | # Attach SSM policy to the IAM role 83 | resource "aws_iam_role_policy_attachment" "ssm_policy_attachment" { 84 | role = aws_iam_role.ec2_ssm_role.name 85 | policy_arn = "arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore" 86 | } 87 | 88 | # Create an instance profile for the IAM role 89 | resource "aws_iam_instance_profile" "ec2_instance_profile" { 90 | name = "ec2-instance-profile" 91 | role = aws_iam_role.ec2_ssm_role.name 92 | } 93 | 94 | # Launch an EC2 instance with Amazon Linux 95 | resource "aws_instance" "linux_instance" { 96 | ami = data.aws_ami.amazon_linux.id 97 | instance_type = "t3.small" 98 | subnet_id = length(data.aws_subnets.public_subnets.ids) > 0 ? data.aws_subnets.public_subnets.ids[0] : null 99 | vpc_security_group_ids = [aws_security_group.linux_sg.id] 100 | key_name = var.key_pair_name 101 | iam_instance_profile = aws_iam_instance_profile.ec2_instance_profile.name 102 | associate_public_ip_address = true 103 | 104 | metadata_options { 105 | http_endpoint = "enabled" 106 | http_tokens = "required" 107 | } 108 | 109 | tags = { 110 | Name = "LinuxInstance" 111 | } 112 | 113 | lifecycle { 114 | precondition { 115 | condition = length(data.aws_subnets.public_subnets.ids) > 0 116 | error_message = "No subnets with auto-assign public IP enabled were found in the VPC. Please enable auto-assign public IP on at least one subnet." 117 | } 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /litellm-private-load-balancer-ec2-terraform/outputs.tf: -------------------------------------------------------------------------------- 1 | # Output the instance ID 2 | output "linux_instance_id" { 3 | value = aws_instance.linux_instance.id 4 | description = "Linux EC2 Instance ID" 5 | } 6 | 7 | # Output the public IP address 8 | output "bastion_host_public_ip" { 9 | value = aws_instance.linux_instance.public_ip 10 | description = "Public IP address of the Linux EC2 Instance" 11 | } 12 | -------------------------------------------------------------------------------- /litellm-private-load-balancer-ec2-terraform/providers.tf: -------------------------------------------------------------------------------- 1 | provider "aws" { 2 | } 3 | 4 | terraform { 5 | backend "s3" {} 6 | } -------------------------------------------------------------------------------- /litellm-private-load-balancer-ec2-terraform/variables.tf: -------------------------------------------------------------------------------- 1 | # Variables 2 | variable "vpc_id" { 3 | description = "The ID of the VPC" 4 | type = string 5 | } 6 | 7 | variable "key_pair_name" { 8 | description = "The name of the key pair to use for SSH access" 9 | type = string 10 | } -------------------------------------------------------------------------------- /litellm-s3-log-bucket-terraform/outputs.tf: -------------------------------------------------------------------------------- 1 | # Outputs to match the CDK stack's CfnOutput 2 | output "LogBucketName" { 3 | description = "The name of the Log S3 bucket" 4 | value = aws_s3_bucket.log_bucket.bucket 5 | } 6 | 7 | output "LogBucketArn" { 8 | description = "The ARN of the Log S3 bucket" 9 | value = aws_s3_bucket.log_bucket.arn 10 | } -------------------------------------------------------------------------------- /litellm-s3-log-bucket-terraform/provider.tf: -------------------------------------------------------------------------------- 1 | provider "aws" { 2 | default_tags { 3 | tags = { 4 | "stack-id" = var.name 5 | "project" = "llmgateway" 6 | } 7 | } 8 | } 9 | 10 | terraform { 11 | backend "s3" {} 12 | } -------------------------------------------------------------------------------- /litellm-s3-log-bucket-terraform/s3.tf: -------------------------------------------------------------------------------- 1 | resource "aws_s3_bucket" "log_bucket" { 2 | bucket_prefix = "litellm-logs-" 3 | force_destroy = true 4 | } 5 | 6 | resource "aws_s3_bucket_server_side_encryption_configuration" "log_bucket" { 7 | bucket = aws_s3_bucket.log_bucket.id 8 | 9 | rule { 10 | apply_server_side_encryption_by_default { 11 | sse_algorithm = "AES256" 12 | } 13 | } 14 | } 15 | 16 | resource "aws_s3_bucket_policy" "log_bucket" { 17 | bucket = aws_s3_bucket.log_bucket.id 18 | 19 | policy = jsonencode({ 20 | Version = "2012-10-17" 21 | Statement = [ 22 | { 23 | Sid = "EnforceSSLOnly" 24 | Effect = "Deny" 25 | Principal = "*" 26 | Action = "s3:*" 27 | Resource = [ 28 | aws_s3_bucket.log_bucket.arn, 29 | "${aws_s3_bucket.log_bucket.arn}/*" 30 | ] 31 | Condition = { 32 | Bool = { 33 | "aws:SecureTransport" = "false" 34 | } 35 | } 36 | } 37 | ] 38 | }) 39 | } 40 | 41 | resource "aws_s3_bucket_public_access_block" "log_bucket" { 42 | bucket = aws_s3_bucket.log_bucket.id 43 | block_public_acls = true 44 | block_public_policy = true 45 | } 46 | -------------------------------------------------------------------------------- /litellm-s3-log-bucket-terraform/variables.tf: -------------------------------------------------------------------------------- 1 | variable "name" { 2 | type = string 3 | description = "Name or ID of the stack (used as a tag)" 4 | } -------------------------------------------------------------------------------- /litellm-terraform-stack/main.tf: -------------------------------------------------------------------------------- 1 | #-------------------------------------------------------------- 2 | # Adding guidance solution ID via AWS CloudFormation resource 3 | #-------------------------------------------------------------- 4 | resource "aws_cloudformation_stack" "guidance_deployment_metrics" { 5 | name = "tracking-stack" 6 | template_body = < 0 75 | ] 76 | 77 | existing_private_subnet_ids = [ 78 | for subnet_id, rt in data.aws_route_table.private_subnet_route_tables : subnet_id 79 | if length([ 80 | for route in rt.routes : route 81 | if route.gateway_id != null && 82 | can(regex("^igw-", route.gateway_id)) && 83 | route.cidr_block == "0.0.0.0/0" 84 | ]) == 0 85 | ] 86 | 87 | # The final chosen subnets for "private_with_egress" or "private_isolated" usage. 88 | # If existing VPC => data subnets (you must do your own filtering in real usage). 89 | # If new VPC => the private subnets we created. 90 | chosen_subnet_ids = local.creating_new_vpc ? local.new_private_subnet_ids : local.existing_private_subnet_ids 91 | } 92 | 93 | locals { 94 | create_endpoints = (local.creating_new_vpc || var.create_vpc_endpoints_in_existing_vpc) 95 | } 96 | 97 | data "aws_route_tables" "existing_vpc_all" { 98 | # only do the lookup if var.vpc_id is set 99 | count = local.creating_new_vpc ? 0 : 1 100 | 101 | filter { 102 | name = "vpc-id" 103 | values = [var.vpc_id] 104 | } 105 | } 106 | 107 | locals { 108 | # If we’re using an existing VPC, fetch ALL route table IDs. 109 | # Otherwise, just pick the new route tables from our resources. 110 | s3_gateway_route_table_ids = local.creating_new_vpc ? [aws_route_table.public[0].id, local.private_route_table.id] : data.aws_route_tables.existing_vpc_all[0].ids 111 | private_route_table = local.creating_new_vpc ? (local.nat_gateway_count == 1 ? aws_route_table.private_with_nat[0] : aws_route_table.private_isolated[0]) : (null) 112 | 113 | } 114 | 115 | data "aws_vpc_endpoint_service" "bedrock_agent" { 116 | # This service name must match exactly what you used in the resource 117 | service_name = "com.amazonaws.${data.aws_region.current.name}.bedrock-agent" 118 | } 119 | 120 | data "aws_subnet" "chosen_subnets" { 121 | count = length(local.chosen_subnet_ids) 122 | id = local.chosen_subnet_ids[count.index] 123 | } 124 | 125 | locals { 126 | # A map from subnet_id => availability_zone 127 | subnet_az_map = { 128 | for idx, s in data.aws_subnet.chosen_subnets : 129 | s.id => s.availability_zone 130 | } 131 | } 132 | 133 | locals { 134 | # Suppose local.chosen_subnet_ids is the list of subnets you want to use 135 | # for endpoints in general. We filter them down to only those whose AZ 136 | # is in the service's list of availability_zones. 137 | bedrock_agent_compatible_subnets = [ 138 | for subnet_id in local.chosen_subnet_ids : subnet_id 139 | if contains(data.aws_vpc_endpoint_service.bedrock_agent.availability_zones, local.subnet_az_map[subnet_id]) 140 | ] 141 | } 142 | -------------------------------------------------------------------------------- /litellm-terraform-stack/modules/base/network.tf: -------------------------------------------------------------------------------- 1 | data "aws_vpc" "existing" { 2 | count = local.creating_new_vpc ? 0 : 1 3 | id = var.vpc_id 4 | } 5 | 6 | # We'll expose a local reference to either the existing VPC or a newly created one: 7 | resource "aws_vpc" "new" { 8 | count = local.creating_new_vpc ? 1 : 0 9 | cidr_block = "10.0.0.0/16" 10 | enable_dns_hostnames = true 11 | enable_dns_support = true 12 | } 13 | 14 | # We create an Internet Gateway only if we're creating the new VPC 15 | resource "aws_internet_gateway" "this" { 16 | count = local.creating_new_vpc ? 1 : 0 17 | vpc_id = aws_vpc.new[0].id 18 | } 19 | 20 | # Create the NAT gateway only if nat_gateway_count = 1 (and we have a new VPC). 21 | # We'll put it in the first public subnet for simplicity. 22 | resource "aws_eip" "nat" { 23 | count = (local.creating_new_vpc && local.nat_gateway_count == 1) ? 1 : 0 24 | domain = "vpc" 25 | } 26 | 27 | resource "aws_nat_gateway" "this" { 28 | count = (local.creating_new_vpc && local.nat_gateway_count == 1) ? 1 : 0 29 | allocation_id = aws_eip.nat[0].id 30 | subnet_id = aws_subnet.public[0].id 31 | depends_on = [aws_internet_gateway.this] 32 | } 33 | 34 | resource "aws_subnet" "public" { 35 | count = local.creating_new_vpc ? 2 : 0 36 | vpc_id = aws_vpc.new[0].id 37 | cidr_block = cidrsubnet(aws_vpc.new[0].cidr_block, 3, count.index) 38 | availability_zone = element(data.aws_availability_zones.available.names, count.index) 39 | map_public_ip_on_launch = true 40 | } 41 | 42 | resource "aws_subnet" "private" { 43 | count = local.creating_new_vpc ? 2 : 0 44 | vpc_id = aws_vpc.new[0].id 45 | cidr_block = cidrsubnet(aws_vpc.new[0].cidr_block, 3, count.index + 2) 46 | availability_zone = element(data.aws_availability_zones.available.names, count.index) 47 | map_public_ip_on_launch = false 48 | } 49 | 50 | # Route tables: one for public subnets, one for private/isolated subnets. 51 | resource "aws_route_table" "public" { 52 | count = local.creating_new_vpc ? 1 : 0 53 | vpc_id = aws_vpc.new[0].id 54 | route { 55 | cidr_block = "0.0.0.0/0" 56 | gateway_id = aws_internet_gateway.this[0].id 57 | } 58 | } 59 | 60 | resource "aws_route_table" "private_with_nat" { 61 | count = local.creating_new_vpc && (local.nat_gateway_count == 1) ? 1 : 0 62 | vpc_id = aws_vpc.new[0].id 63 | 64 | route { 65 | cidr_block = "0.0.0.0/0" 66 | nat_gateway_id = aws_nat_gateway.this[0].id 67 | } 68 | lifecycle { 69 | ignore_changes = [route] 70 | } 71 | } 72 | 73 | # Route table for isolated private subnets (no routes) 74 | resource "aws_route_table" "private_isolated" { 75 | count = local.creating_new_vpc && (local.nat_gateway_count == 0) ? 1 : 0 76 | vpc_id = aws_vpc.new[0].id 77 | lifecycle { 78 | ignore_changes = [route] 79 | } 80 | } 81 | 82 | # Subnet associations 83 | resource "aws_route_table_association" "public" { 84 | count = local.creating_new_vpc ? length(aws_subnet.public) : 0 85 | subnet_id = aws_subnet.public[count.index].id 86 | route_table_id = aws_route_table.public[0].id 87 | } 88 | 89 | resource "aws_route_table_association" "private" { 90 | count = local.creating_new_vpc ? length(aws_subnet.private) : 0 91 | subnet_id = aws_subnet.private[count.index].id 92 | route_table_id = local.nat_gateway_count == 1 ? aws_route_table.private_with_nat[0].id : aws_route_table.private_isolated[0].id 93 | } 94 | 95 | # Data source for availability_zones 96 | data "aws_availability_zones" "available" { 97 | state = "available" 98 | # We only need 2 for the new VPC, but we’ll still retrieve them all, just using index=0,1 99 | } 100 | 101 | resource "aws_cloudwatch_log_group" "vpc_flow_logs" { 102 | count = local.creating_new_vpc ? 1 : 0 103 | name_prefix = "/aws/vpc/${var.name}-flow-logs" 104 | retention_in_days = 365 105 | } 106 | 107 | resource "aws_flow_log" "this" { 108 | count = local.creating_new_vpc ? 1 : 0 109 | log_destination = aws_cloudwatch_log_group.vpc_flow_logs[0].arn 110 | log_destination_type = "cloud-watch-logs" 111 | vpc_id = aws_vpc.new[0].id 112 | iam_role_arn = aws_iam_role.vpc_flow_logs_role[0].arn 113 | traffic_type = "ALL" 114 | max_aggregation_interval = 60 115 | } 116 | 117 | data "aws_subnets" "existing_all" { 118 | count = local.creating_new_vpc ? 0 : 1 119 | filter { 120 | name = "vpc-id" 121 | values = [var.vpc_id] 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /litellm-terraform-stack/modules/base/outputs.tf: -------------------------------------------------------------------------------- 1 | ############################################# 2 | # OUTPUTS 3 | ############################################# 4 | output "RdsLitellmHostname" { 5 | description = "The hostname of the LiteLLM RDS instance" 6 | value = aws_db_instance.database.endpoint 7 | } 8 | 9 | output "RdsLitellmSecretArn" { 10 | description = "The ARN of the LiteLLM RDS secret" 11 | value = aws_secretsmanager_secret.db_secret_main.arn 12 | } 13 | 14 | output "RedisHostName" { 15 | description = "The hostname of the Redis cluster" 16 | value = aws_elasticache_replication_group.redis.primary_endpoint_address 17 | } 18 | 19 | output "RdsSecurityGroupId" { 20 | description = "The ID of the RDS security group" 21 | value = aws_security_group.db_sg.id 22 | } 23 | 24 | output "RedisSecurityGroupId" { 25 | description = "The ID of the Redis security group" 26 | value = aws_security_group.redis_sg.id 27 | } 28 | 29 | output "VpcId" { 30 | description = "The ID of the VPC" 31 | value = local.final_vpc_id 32 | } 33 | 34 | # If we created the pull-through cache: 35 | output "EksAlbControllerPrivateEcrRepositoryName" { 36 | description = "ECR repo for EKS ALB Controller (only if outbound disabled + EKS)." 37 | value = (var.disable_outbound_network_access && var.deployment_platform == "EKS") ? aws_ecr_repository.my_ecr_repository[0].name : "" 38 | } 39 | 40 | output "private_subnet_ids" { 41 | description = "List of IDs of private subnets" 42 | value = local.creating_new_vpc ? local.new_private_subnet_ids : local.existing_private_subnet_ids 43 | } 44 | 45 | output "public_subnet_ids" { 46 | description = "List of IDs of public subnets" 47 | value = local.creating_new_vpc ? local.new_public_subnet_ids : local.existing_public_subnet_ids 48 | } 49 | 50 | ############################################################################### 51 | # Outputs (mirror the CDK CfnOutputs) 52 | ############################################################################### 53 | output "ConfigBucketName" { 54 | description = "The Name of the configuration bucket" 55 | value = aws_s3_bucket.config_bucket.bucket 56 | } 57 | 58 | output "ConfigBucketArn" { 59 | description = "The ARN of the configuration bucket" 60 | value = aws_s3_bucket.config_bucket.arn 61 | } 62 | 63 | output "WafAclArn" { 64 | description = "The ARN of the WAF ACL" 65 | value = aws_wafv2_web_acl.litellm_waf.arn 66 | } 67 | 68 | # ECR Repositories 69 | data "aws_ecr_repository" "litellm" { 70 | name = var.ecrLitellmRepository 71 | } 72 | 73 | data "aws_ecr_repository" "middleware" { 74 | name = var.ecrMiddlewareRepository 75 | } 76 | 77 | output "LiteLLMRepositoryUrl" { 78 | description = "The URI of the LiteLLM ECR repository" 79 | value = data.aws_ecr_repository.litellm.repository_url 80 | } 81 | 82 | output "MiddlewareRepositoryUrl" { 83 | description = "The URI of the Middleware ECR repository" 84 | value = data.aws_ecr_repository.middleware.repository_url 85 | } 86 | 87 | output "DatabaseUrlSecretArn" { 88 | description = "The endpoint of the main database" 89 | value = aws_secretsmanager_secret.db_url_secret.arn 90 | } 91 | 92 | output "RedisUrl" { 93 | description = "The Redis connection URL" 94 | value = "rediss://${aws_elasticache_replication_group.redis.primary_endpoint_address}:6379" 95 | } 96 | 97 | output "RedisHost" { 98 | description = "The Redis host name" 99 | value = aws_elasticache_replication_group.redis.primary_endpoint_address 100 | } 101 | 102 | output "RedisPort" { 103 | description = "The Redis port" 104 | value = "6379" 105 | } 106 | 107 | output "RedisPassword" { 108 | description = "The Redis password" 109 | value = random_password.redis_password_main.result 110 | } 111 | 112 | output "LitellmMasterAndSaltKeySecretArn" { 113 | description = "LiteLLM Master & Salt Key Secret ARN" 114 | value = aws_secretsmanager_secret.litellm_master_salt.arn 115 | } 116 | 117 | output "DbSecurityGroupId" { 118 | description = "DB Security Group ID" 119 | value = aws_security_group.db_sg.id 120 | } 121 | 122 | output "database_url" { 123 | value = "postgresql://llmproxy:${local.litellm_db_password}@${aws_db_instance.database.endpoint}/litellm" 124 | } 125 | 126 | output "litellm_master_key" { 127 | value = local.litellm_master_key 128 | } 129 | 130 | output "litellm_salt_key" { 131 | value = local.litellm_salt_key 132 | } -------------------------------------------------------------------------------- /litellm-terraform-stack/modules/base/rds.tf: -------------------------------------------------------------------------------- 1 | # We replicate the logic: 2 | # Secret for DB user "llmproxy", random password, exclude punctuation 3 | 4 | # Random passwords 5 | resource "random_password" "db_password_main" { 6 | length = 16 7 | special = false 8 | } 9 | 10 | resource "aws_secretsmanager_secret" "db_secret_main" { 11 | name_prefix = "${var.name}-DBSecret-" 12 | recovery_window_in_days = 0 13 | } 14 | 15 | resource "aws_secretsmanager_secret_version" "db_secret_main_version" { 16 | secret_id = aws_secretsmanager_secret.db_secret_main.id 17 | secret_string = jsonencode({ 18 | username = "llmproxy" 19 | password = random_password.db_password_main.result 20 | }) 21 | } 22 | 23 | ############################################# 24 | # RDS SECURITY GROUP 25 | ############################################# 26 | 27 | resource "aws_security_group" "db_sg" { 28 | name = "${var.name}-db-sg" 29 | description = "Security group for RDS instance" 30 | vpc_id = local.final_vpc_id 31 | 32 | egress { 33 | description = "allow all outbound access" 34 | from_port = 0 35 | to_port = 0 36 | protocol = "-1" 37 | cidr_blocks = ["0.0.0.0/0"] 38 | } 39 | } 40 | 41 | ############################################# 42 | # RDS INSTANCES 43 | ############################################# 44 | 45 | # Subnet group for the DB 46 | resource "aws_db_subnet_group" "main" { 47 | name = "${var.name}-db-subnet-group" 48 | subnet_ids = local.chosen_subnet_ids 49 | } 50 | 51 | resource "aws_db_parameter_group" "example_pg" { 52 | name = "rds-postgres-parameter-group" 53 | # Update the family to match your PostgreSQL version 54 | family = "postgres15" 55 | 56 | # Enable logging of all statements 57 | parameter { 58 | name = "log_statement" 59 | value = "all" 60 | } 61 | 62 | # Log statements that take longer than 1ms 63 | parameter { 64 | name = "log_min_duration_statement" 65 | value = "1" 66 | } 67 | } 68 | 69 | # Database #1: litellm 70 | resource "aws_db_instance" "database" { 71 | identifier = "${var.name}-litellm-db" 72 | engine = "postgres" 73 | engine_version = "15" # or "15.x" 74 | instance_class = var.rds_instance_class 75 | storage_type = "gp3" 76 | allocated_storage = var.rds_allocated_storage 77 | storage_encrypted = true 78 | db_name = "litellm" 79 | db_subnet_group_name = aws_db_subnet_group.main.name 80 | vpc_security_group_ids = [aws_security_group.db_sg.id] 81 | username = jsondecode(aws_secretsmanager_secret_version.db_secret_main_version.secret_string)["username"] 82 | password = jsondecode(aws_secretsmanager_secret_version.db_secret_main_version.secret_string)["password"] 83 | skip_final_snapshot = true 84 | deletion_protection = false 85 | multi_az = true 86 | performance_insights_enabled = true 87 | enabled_cloudwatch_logs_exports = ["postgresql"] 88 | auto_minor_version_upgrade = true 89 | monitoring_interval = 60 90 | monitoring_role_arn = aws_iam_role.rds_enhanced_monitoring.arn 91 | parameter_group_name = aws_db_parameter_group.example_pg.name 92 | copy_tags_to_snapshot = true 93 | apply_immediately = true 94 | } -------------------------------------------------------------------------------- /litellm-terraform-stack/modules/base/redis.tf: -------------------------------------------------------------------------------- 1 | ############################################# 2 | # REDIS SECURITY GROUP 3 | ############################################# 4 | 5 | resource "aws_security_group" "redis_sg" { 6 | name = "${var.name}-redis-sg" 7 | description = "Security group for Redis cluster" 8 | vpc_id = local.final_vpc_id 9 | 10 | egress { 11 | description = "allow all outbound access" 12 | from_port = 0 13 | to_port = 0 14 | protocol = "-1" 15 | cidr_blocks = ["0.0.0.0/0"] 16 | } 17 | } 18 | 19 | ############################################# 20 | # REDIS SUBNET GROUP 21 | ############################################# 22 | 23 | resource "aws_elasticache_subnet_group" "redis_subnet_group" { 24 | name = "litellm-redis-subnet-group" 25 | description = "Subnet group for Redis cluster" 26 | subnet_ids = local.chosen_subnet_ids 27 | } 28 | 29 | ############################################# 30 | # REDIS PARAMETER GROUP 31 | ############################################# 32 | 33 | resource "aws_elasticache_parameter_group" "redis_parameter_group" { 34 | name = "${var.name}-redis-parameter-group" 35 | family = "redis7" 36 | description = "Redis parameter group" 37 | parameter { 38 | name = "timeout" 39 | value = "0" 40 | } 41 | # Add additional parameters if desired. 42 | } 43 | 44 | ############################################# 45 | # REDIS REPLICATION GROUP 46 | ############################################# 47 | 48 | # Random passwords 49 | resource "random_password" "redis_password_main" { 50 | length = 18 51 | special = false 52 | } 53 | 54 | resource "aws_elasticache_replication_group" "redis" { 55 | replication_group_id = "${var.name}-redis" 56 | description = "redis" 57 | engine = "redis" 58 | engine_version = "7.1" 59 | node_type = var.redis_node_type 60 | num_cache_clusters = var.redis_num_cache_clusters 61 | automatic_failover_enabled = true 62 | parameter_group_name = aws_elasticache_parameter_group.redis_parameter_group.name 63 | subnet_group_name = aws_elasticache_subnet_group.redis_subnet_group.name 64 | security_group_ids = [aws_security_group.redis_sg.id] 65 | port = 6379 66 | multi_az_enabled = true 67 | at_rest_encryption_enabled = true 68 | transit_encryption_enabled = true 69 | transit_encryption_mode = "required" 70 | auth_token = random_password.redis_password_main.result 71 | auth_token_update_strategy = "SET" 72 | 73 | depends_on = [ 74 | aws_elasticache_subnet_group.redis_subnet_group, 75 | aws_elasticache_parameter_group.redis_parameter_group 76 | ] 77 | } 78 | -------------------------------------------------------------------------------- /litellm-terraform-stack/modules/base/route53.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | # Only proceed with Route53 resources if use_route53 is true 3 | use_route53 = var.use_route53 && var.hostedZoneName != "" 4 | create_private_load_balancer = var.use_route53 && !var.publicLoadBalancer ? local.creating_new_vpc || var.create_private_hosted_zone_in_existing_vpc ? true : false : false 5 | import_private_load_balancer = var.use_route53 && !var.publicLoadBalancer ? !local.create_private_load_balancer : false 6 | } 7 | 8 | # If publicLoadBalancer = true and use_route53 = true, we fetch the existing public hosted zone 9 | data "aws_route53_zone" "public_zone" { 10 | count = local.use_route53 && var.publicLoadBalancer ? 1 : 0 11 | name = var.hostedZoneName 12 | private_zone = false 13 | } 14 | 15 | resource "aws_route53_zone" "new_private_zone" { 16 | //If use_route53 = false or public load balancer, never create private zone 17 | //If private load balancer, always create private zone if we are creating new vpc 18 | //If private load balancer, and user brings their own vpc, decide whether to create or import private hosted zone based on "var.create_private_hosted_zone_in_existing_vpc" variable 19 | count = local.create_private_load_balancer ? 1 : 0 20 | name = var.hostedZoneName 21 | vpc { 22 | vpc_id = local.final_vpc_id 23 | } 24 | } 25 | 26 | data "aws_route53_zone" "existing_private_zone" { 27 | //If use_route53 = false or public load balancer, never look up private zone 28 | //If private load balancer, always create private zone if we are creating new vpc 29 | //If private load balancer, and user brings their own vpc, decide whether to create or import private hosted zone based on "var.create_private_hosted_zone_in_existing_vpc" variable 30 | count = local.import_private_load_balancer ? 1 : 0 31 | name = var.hostedZoneName 32 | private_zone = true 33 | } 34 | -------------------------------------------------------------------------------- /litellm-terraform-stack/modules/base/s3.tf: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # S3 bucket for config 3 | ############################################################################### 4 | resource "aws_s3_bucket" "config_bucket" { 5 | bucket_prefix = "litellm-config-" 6 | force_destroy = true 7 | } 8 | 9 | resource "aws_s3_bucket_server_side_encryption_configuration" "config_bucket" { 10 | bucket = aws_s3_bucket.config_bucket.id 11 | 12 | rule { 13 | apply_server_side_encryption_by_default { 14 | sse_algorithm = "AES256" 15 | } 16 | } 17 | } 18 | 19 | resource "aws_s3_bucket_policy" "config_bucket" { 20 | bucket = aws_s3_bucket.config_bucket.id 21 | 22 | policy = jsonencode({ 23 | Version = "2012-10-17" 24 | Statement = [ 25 | { 26 | Sid = "EnforceSSLOnly" 27 | Effect = "Deny" 28 | Principal = "*" 29 | Action = "s3:*" 30 | Resource = [ 31 | aws_s3_bucket.config_bucket.arn, 32 | "${aws_s3_bucket.config_bucket.arn}/*" 33 | ] 34 | Condition = { 35 | Bool = { 36 | "aws:SecureTransport" = "false" 37 | } 38 | } 39 | } 40 | ] 41 | }) 42 | } 43 | 44 | resource "aws_s3_bucket_public_access_block" "config_bucket" { 45 | bucket = aws_s3_bucket.config_bucket.id 46 | block_public_acls = true 47 | block_public_policy = true 48 | } 49 | 50 | # Single file upload of `config.yaml` 51 | # In your CDK, you used s3deploy with `include: ['config.yaml']` and `exclude: ['*']` then re-included `config.yaml`. 52 | resource "aws_s3_object" "config_file" { 53 | bucket = aws_s3_bucket.config_bucket.id 54 | key = "config.yaml" 55 | source = "${path.module}/../../../config/config.yaml" # Adjust path as needed 56 | etag = filemd5("${path.module}/../../../config/config.yaml") 57 | } -------------------------------------------------------------------------------- /litellm-terraform-stack/modules/base/secrets-manager.tf: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Secrets Manager: LiteLLM master/salt keys 3 | ############################################################################### 4 | # Generate random strings for master and salt 5 | resource "random_password" "litellm_master" { 6 | length = 21 7 | special = false 8 | } 9 | 10 | resource "random_password" "litellm_salt" { 11 | length = 21 # Reduced by 3 to account for "sk-" prefix 12 | special = false 13 | } 14 | 15 | 16 | # Create a secret (the "shell" or "container" for the key) 17 | resource "aws_secretsmanager_secret" "litellm_master_salt" { 18 | name_prefix = "LiteLLMMasterSalt-" 19 | recovery_window_in_days = 0 20 | } 21 | 22 | locals { 23 | litellm_master_key = "sk-${random_password.litellm_master.result}" 24 | litellm_salt_key = "sk-${random_password.litellm_salt.result}" 25 | } 26 | 27 | # Store the generated values 28 | resource "aws_secretsmanager_secret_version" "litellm_master_salt_ver" { 29 | secret_id = aws_secretsmanager_secret.litellm_master_salt.id 30 | 31 | secret_string = jsonencode({ 32 | LITELLM_MASTER_KEY = local.litellm_master_key 33 | LITELLM_SALT_KEY = local.litellm_salt_key 34 | }) 35 | } 36 | 37 | ############################################################################### 38 | # Construct DB URLs from existing Secrets Manager password 39 | ############################################################################### 40 | # For demonstration, parse the JSON from data sources (the RDS secrets). 41 | # Adjust keys if your secrets structure differ. 42 | 43 | locals { 44 | litellm_db_password = jsondecode(aws_secretsmanager_secret_version.db_secret_main_version.secret_string).password 45 | } 46 | 47 | resource "aws_secretsmanager_secret" "db_url_secret" { 48 | name_prefix = "DBUrlSecret-" 49 | recovery_window_in_days = 0 50 | } 51 | 52 | resource "aws_secretsmanager_secret_version" "db_url_secret_ver" { 53 | secret_id = aws_secretsmanager_secret.db_url_secret.id 54 | 55 | secret_string = "postgresql://llmproxy:${local.litellm_db_password}@${aws_db_instance.database.endpoint}/litellm" 56 | } -------------------------------------------------------------------------------- /litellm-terraform-stack/modules/base/variables.tf: -------------------------------------------------------------------------------- 1 | variable "name" { 2 | description = "Standard name to be used as prefix on all resources." 3 | type = string 4 | } 5 | 6 | variable "vpc_id" { 7 | description = "ID of an existing VPC to use. If not provided, a new VPC will be created." 8 | type = string 9 | default = "" 10 | } 11 | 12 | variable "ecrLitellmRepository" { 13 | type = string 14 | description = "Name of the LiteLLM ECR repository" 15 | } 16 | 17 | variable "ecrMiddlewareRepository" { 18 | type = string 19 | description = "Name of the Middleware ECR repository" 20 | } 21 | 22 | variable "deployment_platform" { 23 | description = "Which platform to deploy (ECS or EKS)" 24 | type = string 25 | 26 | validation { 27 | condition = can(regex("^(ECS|EKS)$", upper(var.deployment_platform))) 28 | error_message = "DEPLOYMENT_PLATFORM must be either 'ECS' or 'EKS' (case insensitive)." 29 | } 30 | } 31 | 32 | variable "disable_outbound_network_access" { 33 | description = "Whether to disable outbound network access" 34 | type = bool 35 | } 36 | 37 | variable "create_vpc_endpoints_in_existing_vpc" { 38 | type = bool 39 | description = "If using an existing VPC, set this to true to also create interface/gateway endpoints within it." 40 | } 41 | 42 | variable "hostedZoneName" { 43 | description = "Hosted zone name" 44 | type = string 45 | default = "" 46 | } 47 | 48 | variable "publicLoadBalancer" { 49 | description = "Whether the load balancer is public or private" 50 | type = bool 51 | } 52 | 53 | variable "create_private_hosted_zone_in_existing_vpc" { 54 | description = "In the case publicLoadBalancer=false (meaning we need a private hosted zone), and an vpc_id is provided, decides whether we create a private hosted zone, or assume one already exists and import it" 55 | type = bool 56 | } 57 | 58 | variable "rds_instance_class" { 59 | type = string 60 | description = "The instance class for the RDS database" 61 | } 62 | 63 | variable "rds_allocated_storage" { 64 | type = number 65 | description = "The allocated storage in GB for the RDS database" 66 | } 67 | 68 | variable "redis_node_type" { 69 | type = string 70 | description = "The node type for Redis clusters" 71 | } 72 | 73 | variable "redis_num_cache_clusters" { 74 | type = number 75 | description = "The number of cache clusters for Redis" 76 | } 77 | 78 | variable "use_route53" { 79 | description = "Whether to use Route53 for DNS management. If false, no Route53 resources will be created." 80 | type = bool 81 | default = false 82 | } 83 | -------------------------------------------------------------------------------- /litellm-terraform-stack/modules/base/waf.tf: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # WAFv2 Web ACL 3 | ############################################################################### 4 | resource "aws_wafv2_web_acl" "litellm_waf" { 5 | name = "LiteLLMWAF" 6 | description = "WAF for LiteLLM" 7 | scope = "REGIONAL" # or CLOUDFRONT 8 | 9 | default_action { 10 | allow {} 11 | } 12 | 13 | visibility_config { 14 | cloudwatch_metrics_enabled = true 15 | metric_name = "LiteLLMWebAcl" 16 | sampled_requests_enabled = true 17 | } 18 | 19 | rule { 20 | name = "AWSManagedRulesCommonRuleSet-Exclusions" 21 | priority = 1 22 | 23 | # override_action is required if referencing a rule group 24 | # - use 'none' if you want to keep the group’s default action 25 | # - or 'count' to effectively “disable” or “exclude” from blocking 26 | override_action { 27 | none {} 28 | } 29 | 30 | statement { 31 | managed_rule_group_statement { 32 | name = "AWSManagedRulesCommonRuleSet" 33 | vendor_name = "AWS" 34 | 35 | # This is the Terraform equivalent to the "excludedRules" from CloudFormation/CDK: 36 | # We override the action of these specific sub-rules to avoid them blocking requests. 37 | rule_action_override { 38 | name = "NoUserAgent_HEADER" 39 | action_to_use { 40 | count {} 41 | } 42 | } 43 | 44 | rule_action_override { 45 | name = "SizeRestrictions_BODY" 46 | action_to_use { 47 | count {} 48 | } 49 | } 50 | } 51 | } 52 | 53 | visibility_config { 54 | cloudwatch_metrics_enabled = true 55 | metric_name = "LiteLLMCommonRuleSet" 56 | sampled_requests_enabled = true 57 | } 58 | } 59 | 60 | rule { 61 | name = "AWS-AWSManagedRulesKnownBadInputsRuleSet" 62 | priority = 2 63 | 64 | override_action { 65 | none {} 66 | } 67 | 68 | statement { 69 | managed_rule_group_statement { 70 | name = "AWSManagedRulesKnownBadInputsRuleSet" 71 | vendor_name = "AWS" 72 | } 73 | } 74 | visibility_config { 75 | cloudwatch_metrics_enabled = true 76 | metric_name = "LiteLLMCommonRuleSet" 77 | sampled_requests_enabled = true 78 | } 79 | } 80 | 81 | } 82 | -------------------------------------------------------------------------------- /litellm-terraform-stack/modules/ecs/cloudfront.tf: -------------------------------------------------------------------------------- 1 | # Generate a random secret for CloudFront-to-ALB authentication 2 | # This secret is used for secure origin authentication between CloudFront and ALB 3 | resource "random_password" "cloudfront_secret" { 4 | count = var.use_cloudfront ? 1 : 0 5 | length = 32 6 | special = false 7 | 8 | # Add keepers to prevent regeneration unless explicitly changed 9 | keepers = { 10 | name = var.name # Only regenerate if the name changes 11 | } 12 | 13 | # Prevent updates to the secret's properties during regular deployments 14 | lifecycle { 15 | ignore_changes = [length, special, min_lower, min_upper, min_numeric] 16 | } 17 | } 18 | 19 | # CloudFront Distribution 20 | resource "aws_cloudfront_distribution" "this" { 21 | count = var.use_cloudfront ? 1 : 0 22 | enabled = true 23 | is_ipv6_enabled = true 24 | comment = "${var.name}-distribution" 25 | default_root_object = "" 26 | price_class = var.cloudfront_price_class 27 | 28 | origin { 29 | domain_name = aws_lb.this.dns_name 30 | origin_id = "ALB" 31 | 32 | # Add a custom origin header for security 33 | # This replaces the IP-based security group approach 34 | # ALB should be configured to only accept requests with this header 35 | custom_header { 36 | name = "X-CloudFront-Secret" 37 | value = "litellm-cf-${random_password.cloudfront_secret[0].result}" 38 | } 39 | 40 | # Security note on CloudFront-ALB communication: 41 | # 42 | # By setting origin_protocol_policy = "http-only", communication between CloudFront and ALB 43 | # is unencrypted. However, security is maintained through: 44 | # 45 | # 1. Custom header authentication (X-CloudFront-Secret) that prevents direct access to the ALB 46 | # 2. Communication between end users and CloudFront remains encrypted with HTTPS 47 | # 3. The ALB is configured to reject requests without the secret header 48 | # 49 | # This approach eliminates certificate validation issues while maintaining a strong security posture. 50 | custom_origin_config { 51 | http_port = 80 52 | https_port = 443 53 | origin_protocol_policy = "http-only" 54 | origin_ssl_protocols = ["TLSv1.2"] 55 | } 56 | } 57 | 58 | # Default cache behavior for API requests 59 | default_cache_behavior { 60 | allowed_methods = ["DELETE", "GET", "HEAD", "OPTIONS", "PATCH", "POST", "PUT"] 61 | cached_methods = ["GET", "HEAD", "OPTIONS"] 62 | target_origin_id = "ALB" 63 | 64 | forwarded_values { 65 | query_string = true 66 | headers = ["Authorization", "Host", "Origin"] 67 | 68 | cookies { 69 | forward = "all" 70 | } 71 | } 72 | 73 | viewer_protocol_policy = "redirect-to-https" 74 | min_ttl = 0 75 | default_ttl = 0 76 | max_ttl = 0 77 | compress = true 78 | } 79 | 80 | # Use the provided certificate if Route53 is enabled with a custom domain 81 | dynamic "viewer_certificate" { 82 | for_each = var.use_route53 && var.certificate_arn != "" ? [1] : [] 83 | content { 84 | acm_certificate_arn = var.certificate_arn 85 | ssl_support_method = "sni-only" 86 | minimum_protocol_version = "TLSv1.2_2021" 87 | } 88 | } 89 | 90 | # Use CloudFront default certificate if no Route53 or certificate is provided 91 | dynamic "viewer_certificate" { 92 | for_each = !var.use_route53 || var.certificate_arn == "" ? [1] : [] 93 | content { 94 | cloudfront_default_certificate = true 95 | } 96 | } 97 | 98 | # Add aliases only if Route53 is used 99 | aliases = var.use_route53 ? [format("%s.%s", var.record_name, var.hosted_zone_name)] : [] 100 | 101 | # Associate WAF Web ACL if provided - commented out due to regional WAF scope issue 102 | # CloudFront requires global WAF WebACLs, but the current WAF is regional 103 | # web_acl_id = var.wafv2_acl_arn 104 | 105 | restrictions { 106 | geo_restriction { 107 | restriction_type = "none" 108 | } 109 | } 110 | 111 | # Enable logging to the ALB access logs bucket - commented out to avoid S3 ACL issues 112 | # logging_config { 113 | # include_cookies = false 114 | # bucket = aws_s3_bucket.access_log_bucket.bucket_domain_name 115 | # prefix = "cloudfront-logs/" 116 | # } 117 | 118 | tags = { 119 | Name = "${var.name}-cloudfront-distribution" 120 | } 121 | 122 | depends_on = [aws_lb.this] 123 | } 124 | -------------------------------------------------------------------------------- /litellm-terraform-stack/modules/ecs/cloudwatch.tf: -------------------------------------------------------------------------------- 1 | resource "aws_cloudwatch_log_group" "litellm" { 2 | name = "/ecs/${var.name}-litellm" 3 | retention_in_days = 365 4 | } 5 | 6 | resource "aws_cloudwatch_log_group" "middleware" { 7 | name = "/ecs/${var.name}-middleware" 8 | retention_in_days = 365 9 | } 10 | -------------------------------------------------------------------------------- /litellm-terraform-stack/modules/ecs/iam.tf: -------------------------------------------------------------------------------- 1 | data "aws_iam_policy_document" "ecs_task_assume_role" { 2 | statement { 3 | actions = ["sts:AssumeRole"] 4 | principals { 5 | type = "Service" 6 | identifiers = ["ecs-tasks.amazonaws.com"] 7 | } 8 | } 9 | } 10 | 11 | resource "aws_iam_role" "task_role" { 12 | name = "${var.name}-ecs-task-role" 13 | assume_role_policy = data.aws_iam_policy_document.ecs_task_assume_role.json 14 | } 15 | 16 | data "aws_iam_policy_document" "ecs_execution_assume_role" { 17 | statement { 18 | actions = ["sts:AssumeRole"] 19 | principals { 20 | type = "Service" 21 | identifiers = ["ecs-tasks.amazonaws.com"] 22 | } 23 | } 24 | } 25 | 26 | resource "aws_iam_role" "execution_role" { 27 | name = "${var.name}-ecs-execution-role" 28 | assume_role_policy = data.aws_iam_policy_document.ecs_execution_assume_role.json 29 | } 30 | 31 | resource "aws_iam_role_policy_attachment" "execution_role_attachment" { 32 | role = aws_iam_role.execution_role.name 33 | policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy" 34 | } 35 | 36 | data "aws_iam_policy_document" "execution_role_policy_doc" { 37 | statement { 38 | sid = "EcrImageAccess" 39 | actions = ["ecr:BatchCheckLayerAvailability", "ecr:BatchGetImage", "ecr:GetDownloadUrlForLayer"] 40 | resources = [ 41 | "*" 42 | ] 43 | } 44 | 45 | statement { 46 | sid = "EcrTokenAccess" 47 | actions = ["ecr:GetAuthorizationToken"] 48 | resources = [ 49 | "*" 50 | ] 51 | } 52 | 53 | statement { 54 | sid = "CloudwatchAccess" 55 | actions = ["logs:CreateLogStream", "logs:PutLogEvents"] 56 | resources = ["*"] 57 | } 58 | 59 | statement { 60 | actions = ["secretsmanager:GetSecretValue", "secretsmanager:DescribeSecret"] 61 | resources = [var.master_and_salt_key_secret_arn, var.main_db_secret_arn, aws_secretsmanager_secret.litellm_other_secrets.arn] 62 | } 63 | } 64 | 65 | resource "aws_iam_policy" "execution_role_policy" { 66 | name = "${var.name}-ecs-execution-role-policy" 67 | policy = data.aws_iam_policy_document.execution_role_policy_doc.json 68 | } 69 | 70 | resource "aws_iam_role_policy_attachment" "execution_role_attach" { 71 | role = aws_iam_role.execution_role.name 72 | policy_arn = aws_iam_policy.execution_role_policy.arn 73 | } 74 | 75 | # -------------------------------------------------------------------- 76 | # Task Role Policy (S3, Bedrock, SageMaker) 77 | # -------------------------------------------------------------------- 78 | data "aws_iam_policy_document" "task_role_policy_doc" { 79 | statement { 80 | sid = "S3ConfigBucketAccess" 81 | actions = ["s3:GetObject", "s3:ListBucket"] 82 | resources = [ 83 | var.config_bucket_arn, 84 | "${var.config_bucket_arn}/*" 85 | ] 86 | } 87 | 88 | statement { 89 | sid = "S3LogBucketAccess" 90 | actions = ["s3:*"] 91 | resources = [ 92 | var.log_bucket_arn, 93 | "${var.log_bucket_arn}/*" 94 | ] 95 | } 96 | 97 | statement { 98 | sid = "BedrockAccess" 99 | actions = ["bedrock:*"] 100 | resources = ["*"] 101 | } 102 | 103 | statement { 104 | sid = "SageMakerInvoke" 105 | actions = ["sagemaker:InvokeEndpoint"] 106 | resources = ["*"] 107 | } 108 | } 109 | 110 | resource "aws_iam_policy" "task_role_policy" { 111 | name = "${var.name}-ecs-task-role-policy" 112 | policy = data.aws_iam_policy_document.task_role_policy_doc.json 113 | } 114 | 115 | resource "aws_iam_role_policy_attachment" "task_role_attach" { 116 | role = aws_iam_role.task_role.name 117 | policy_arn = aws_iam_policy.task_role_policy.arn 118 | } -------------------------------------------------------------------------------- /litellm-terraform-stack/modules/ecs/outputs.tf: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # (12) Outputs 3 | ############################################################################### 4 | output "LitellmEcsCluster" { 5 | value = aws_ecs_cluster.this.name 6 | description = "Name of the ECS Cluster" 7 | } 8 | 9 | output "LitellmEcsTask" { 10 | value = aws_ecs_service.litellm_service.name 11 | description = "Name of the ECS Service" 12 | } 13 | 14 | output "alb_dns_name" { 15 | value = aws_lb.this.dns_name 16 | description = "The DNS name of the ALB" 17 | } 18 | 19 | output "alb_zone_id" { 20 | value = aws_lb.this.zone_id 21 | description = "The zone ID of the ALB" 22 | } 23 | 24 | output "cloudfront_distribution_id" { 25 | value = var.use_cloudfront ? aws_cloudfront_distribution.this[0].id : "" 26 | description = "The ID of the CloudFront distribution" 27 | } 28 | 29 | output "cloudfront_domain_name" { 30 | value = var.use_cloudfront ? aws_cloudfront_distribution.this[0].domain_name : "" 31 | description = "The domain name of the CloudFront distribution" 32 | } 33 | 34 | output "ServiceURL" { 35 | description = "The service URL" 36 | value = var.use_route53 ? "https://${var.record_name}.${var.hosted_zone_name}" : ( 37 | var.use_cloudfront ? "https://${aws_cloudfront_distribution.this[0].domain_name}" : "https://${aws_lb.this.dns_name}" 38 | ) 39 | } 40 | 41 | output "cloudfront_auth_secret" { 42 | description = "The CloudFront authentication secret (only shown once after creation)" 43 | value = var.use_cloudfront ? random_password.cloudfront_secret[0].result : null 44 | sensitive = true 45 | } 46 | -------------------------------------------------------------------------------- /litellm-terraform-stack/modules/ecs/route53.tf: -------------------------------------------------------------------------------- 1 | # Only lookup the Route53 zone if use_route53 is true 2 | data "aws_route53_zone" "this" { 3 | count = var.use_route53 ? 1 : 0 4 | name = var.hosted_zone_name 5 | private_zone = !var.public_load_balancer 6 | } 7 | 8 | # Only create Route53 records if use_route53 is true 9 | resource "aws_route53_record" "alb_alias" { 10 | count = var.use_route53 ? 1 : 0 11 | zone_id = data.aws_route53_zone.this[0].zone_id 12 | name = var.record_name 13 | type = "A" 14 | 15 | alias { 16 | # If CloudFront is enabled, point to CloudFront, otherwise point to ALB 17 | name = var.use_cloudfront ? aws_cloudfront_distribution.this[0].domain_name : aws_lb.this.dns_name 18 | zone_id = var.use_cloudfront ? aws_cloudfront_distribution.this[0].hosted_zone_id : aws_lb.this.zone_id 19 | evaluate_target_health = true 20 | } 21 | 22 | depends_on = [aws_cloudfront_distribution.this] 23 | } 24 | -------------------------------------------------------------------------------- /litellm-terraform-stack/modules/ecs/s3.tf: -------------------------------------------------------------------------------- 1 | resource "aws_s3_bucket" "access_log_bucket" { 2 | bucket_prefix = "access-logs-" 3 | force_destroy = true 4 | } 5 | 6 | resource "aws_s3_bucket_server_side_encryption_configuration" "access_log_bucket" { 7 | bucket = aws_s3_bucket.access_log_bucket.id 8 | 9 | rule { 10 | apply_server_side_encryption_by_default { 11 | sse_algorithm = "AES256" 12 | } 13 | } 14 | } 15 | 16 | data "aws_elb_service_account" "main" {} 17 | 18 | resource "aws_s3_bucket_policy" "access_log_bucket" { 19 | bucket = aws_s3_bucket.access_log_bucket.id 20 | 21 | policy = jsonencode({ 22 | Version = "2012-10-17" 23 | Statement = [ 24 | { 25 | Sid = "EnforceSSLOnly" 26 | Effect = "Deny" 27 | Principal = "*" 28 | Action = "s3:*" 29 | Resource = [ 30 | aws_s3_bucket.access_log_bucket.arn, 31 | "${aws_s3_bucket.access_log_bucket.arn}/*" 32 | ] 33 | Condition = { 34 | Bool = { 35 | "aws:SecureTransport" = "false" 36 | } 37 | } 38 | }, 39 | { 40 | Sid = "AllowELBLogDelivery" 41 | Effect = "Allow" 42 | Principal = { 43 | AWS = data.aws_elb_service_account.main.arn 44 | } 45 | Action = "s3:PutObject" 46 | Resource = "${aws_s3_bucket.access_log_bucket.arn}/*" 47 | } 48 | ] 49 | }) 50 | } 51 | 52 | resource "aws_s3_bucket_public_access_block" "access_log_bucket" { 53 | bucket = aws_s3_bucket.access_log_bucket.id 54 | block_public_acls = true 55 | block_public_policy = true 56 | } -------------------------------------------------------------------------------- /litellm-terraform-stack/modules/ecs/secrets-manager.tf: -------------------------------------------------------------------------------- 1 | resource "aws_secretsmanager_secret" "litellm_other_secrets" { 2 | name_prefix = "LiteLLMApiKeySecret-" 3 | recovery_window_in_days = 0 4 | } 5 | 6 | resource "aws_secretsmanager_secret_version" "litellm_other_secrets_ver" { 7 | secret_id = aws_secretsmanager_secret.litellm_other_secrets.id 8 | 9 | secret_string = jsonencode({ 10 | OPENAI_API_KEY = var.openai_api_key 11 | AZURE_OPENAI_API_KEY = var.azure_openai_api_key 12 | AZURE_API_KEY = var.azure_api_key 13 | ANTHROPIC_API_KEY = var.anthropic_api_key 14 | GROQ_API_KEY = var.groq_api_key 15 | COHERE_API_KEY = var.cohere_api_key 16 | CO_API_KEY = var.co_api_key 17 | HF_TOKEN = var.hf_token 18 | HUGGINGFACE_API_KEY = var.huggingface_api_key 19 | DATABRICKS_API_KEY = var.databricks_api_key 20 | GEMINI_API_KEY = var.gemini_api_key 21 | CODESTRAL_API_KEY = var.codestral_api_key 22 | MISTRAL_API_KEY = var.mistral_api_key 23 | AZURE_AI_API_KEY = var.azure_ai_api_key 24 | NVIDIA_NIM_API_KEY = var.nvidia_nim_api_key 25 | XAI_API_KEY = var.xai_api_key 26 | PERPLEXITYAI_API_KEY = var.perplexityai_api_key 27 | GITHUB_API_KEY = var.github_api_key 28 | DEEPSEEK_API_KEY = var.deepseek_api_key 29 | AI21_API_KEY = var.ai21_api_key 30 | LANGSMITH_API_KEY = var.langsmith_api_key 31 | LANGFUSE_SECRET_KEY = var.langfuse_secret_key 32 | }) 33 | } -------------------------------------------------------------------------------- /litellm-terraform-stack/modules/ecs/security-groups.tf: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # (7) Security Groups & Ingress for Redis and RDS 3 | ############################################################################### 4 | 5 | # CloudFront security is implemented using origin custom headers instead of IP ranges 6 | # This avoids hitting AWS security group rule limits (60 rules per security group) 7 | # CloudFront has hundreds of IP ranges globally, which would exceed the limit 8 | # Security group for ECS Service tasks 9 | resource "aws_security_group" "ecs_service_sg" { 10 | name = "${var.name}-service-sg" 11 | description = "Security group for ECS Fargate service" 12 | vpc_id = var.vpc_id 13 | 14 | egress { 15 | from_port = 0 16 | to_port = 0 17 | protocol = "-1" # "-1" represents all protocols 18 | cidr_blocks = ["0.0.0.0/0"] 19 | description = "Allow all outbound traffic by default" 20 | } 21 | } 22 | 23 | resource "aws_security_group_rule" "alb_ingress_4000" { 24 | type = "ingress" 25 | from_port = 4000 26 | to_port = 4000 27 | protocol = "tcp" 28 | security_group_id = aws_security_group.ecs_service_sg.id 29 | source_security_group_id = aws_security_group.alb_sg.id 30 | description = "Allow Load Balancer to ECS" 31 | } 32 | 33 | resource "aws_security_group_rule" "alb_ingress_3000" { 34 | type = "ingress" 35 | from_port = 3000 36 | to_port = 3000 37 | protocol = "tcp" 38 | security_group_id = aws_security_group.ecs_service_sg.id 39 | source_security_group_id = aws_security_group.alb_sg.id 40 | description = "Allow Load Balancer to ECS" 41 | } 42 | 43 | 44 | # Allow ECS tasks to connect to Redis 45 | resource "aws_security_group_rule" "redis_ingress" { 46 | type = "ingress" 47 | from_port = 6379 48 | to_port = 6379 49 | protocol = "tcp" 50 | security_group_id = var.redis_security_group_id 51 | source_security_group_id = aws_security_group.ecs_service_sg.id 52 | description = "Allow ECS tasks to connect to Redis" 53 | } 54 | 55 | # Allow ECS tasks to connect to RDS 56 | resource "aws_security_group_rule" "db_ingress" { 57 | type = "ingress" 58 | from_port = 5432 59 | to_port = 5432 60 | protocol = "tcp" 61 | security_group_id = var.db_security_group_id 62 | source_security_group_id = aws_security_group.ecs_service_sg.id 63 | description = "Allow ECS tasks to connect to RDS" 64 | } 65 | 66 | resource "aws_security_group" "alb_sg" { 67 | name = "${var.name}-alb-sg" 68 | description = "Security group for ALB" 69 | vpc_id = var.vpc_id 70 | 71 | # Public load balancer: Allow HTTPS traffic with WAF protection 72 | # Security is provided by: 73 | # 1. When CloudFront is enabled: Custom origin header authentication via ALB listener rules 74 | # 2. When CloudFront is disabled: WAF rules on the ALB 75 | # 3. When private: Only accessible from private subnets 76 | ingress { 77 | description = "HTTPS traffic" 78 | protocol = "tcp" 79 | from_port = 443 80 | to_port = 443 81 | cidr_blocks = var.public_load_balancer ? ["0.0.0.0/0"] : var.private_subnets_cidr_blocks 82 | } 83 | 84 | # Add HTTP ingress for CloudFront origin connections 85 | # Security for HTTP is provided by custom header authentication 86 | ingress { 87 | description = "HTTP traffic for CloudFront origin" 88 | protocol = "tcp" 89 | from_port = 80 90 | to_port = 80 91 | cidr_blocks = var.public_load_balancer ? ["0.0.0.0/0"] : var.private_subnets_cidr_blocks 92 | } 93 | 94 | tags = { 95 | Name = "${var.name}-alb-sg" 96 | SecurityModel = var.use_cloudfront ? "CloudFront-Protected" : (var.public_load_balancer ? "Public-WAF-Protected" : "Private-VPC-Only") 97 | } 98 | 99 | # Allow all outbound 100 | egress { 101 | description = "Allow all outbound" 102 | protocol = -1 103 | from_port = 0 104 | to_port = 0 105 | cidr_blocks = ["0.0.0.0/0"] 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /litellm-terraform-stack/modules/ecs/variables.tf: -------------------------------------------------------------------------------- 1 | variable "name" { 2 | description = "Standard name to be used as prefix on all resources." 3 | type = string 4 | } 5 | 6 | # Variables needed for the configuration 7 | variable "config_bucket_arn" { 8 | description = "ARN of the configuration bucket" 9 | type = string 10 | } 11 | 12 | variable "log_bucket_arn" { 13 | description = "ARN of the log bucket" 14 | type = string 15 | } 16 | 17 | # Required variables 18 | variable "ecr_litellm_repository_url" { 19 | description = "URL of the ECR repository for LiteLLM" 20 | type = string 21 | } 22 | 23 | variable "ecr_middleware_repository_url" { 24 | description = "URL of the ECR repository for middleware" 25 | type = string 26 | } 27 | 28 | variable "litellm_version" { 29 | description = "Version tag for LiteLLM image" 30 | type = string 31 | } 32 | 33 | variable "config_bucket_name" { 34 | description = "Name of the S3 bucket containing config" 35 | type = string 36 | } 37 | 38 | variable "redis_host" { 39 | description = "The Redis host name" 40 | type = string 41 | } 42 | 43 | variable "redis_port" { 44 | description = "The Redis port" 45 | type = string 46 | } 47 | 48 | variable "redis_password" { 49 | description = "The Redis password" 50 | type = string 51 | } 52 | 53 | variable "openai_api_key" { 54 | description = "OpenAI API key" 55 | type = string 56 | sensitive = true 57 | } 58 | 59 | variable "azure_openai_api_key" { 60 | description = "Azure OpenAI API key" 61 | type = string 62 | sensitive = true 63 | } 64 | 65 | variable "azure_api_key" { 66 | description = "Azure API key" 67 | type = string 68 | sensitive = true 69 | } 70 | 71 | variable "anthropic_api_key" { 72 | description = "Anthropic API key" 73 | type = string 74 | sensitive = true 75 | } 76 | 77 | variable "groq_api_key" { 78 | description = "Groq API key" 79 | type = string 80 | sensitive = true 81 | } 82 | 83 | variable "cohere_api_key" { 84 | description = "Cohere API key" 85 | type = string 86 | sensitive = true 87 | } 88 | 89 | variable "co_api_key" { 90 | description = "Co API key" 91 | type = string 92 | sensitive = true 93 | } 94 | 95 | variable "hf_token" { 96 | description = "HuggingFace token" 97 | type = string 98 | sensitive = true 99 | } 100 | 101 | variable "huggingface_api_key" { 102 | description = "HuggingFace API key" 103 | type = string 104 | sensitive = true 105 | } 106 | 107 | variable "databricks_api_key" { 108 | description = "Databricks API key" 109 | type = string 110 | sensitive = true 111 | } 112 | 113 | variable "gemini_api_key" { 114 | description = "Gemini API key" 115 | type = string 116 | sensitive = true 117 | } 118 | 119 | variable "codestral_api_key" { 120 | description = "Codestral API key" 121 | type = string 122 | sensitive = true 123 | } 124 | 125 | variable "mistral_api_key" { 126 | description = "Mistral API key" 127 | type = string 128 | sensitive = true 129 | } 130 | 131 | variable "azure_ai_api_key" { 132 | description = "Azure AI API key" 133 | type = string 134 | sensitive = true 135 | } 136 | 137 | variable "nvidia_nim_api_key" { 138 | description = "NVIDIA NIM API key" 139 | type = string 140 | sensitive = true 141 | } 142 | 143 | variable "xai_api_key" { 144 | description = "XAI API key" 145 | type = string 146 | sensitive = true 147 | } 148 | 149 | variable "perplexityai_api_key" { 150 | description = "PerplexityAI API key" 151 | type = string 152 | sensitive = true 153 | } 154 | 155 | variable "github_api_key" { 156 | description = "GitHub API key" 157 | type = string 158 | sensitive = true 159 | } 160 | 161 | variable "deepseek_api_key" { 162 | description = "Deepseek API key" 163 | type = string 164 | sensitive = true 165 | } 166 | 167 | variable "ai21_api_key" { 168 | description = "AI21 API key" 169 | type = string 170 | sensitive = true 171 | } 172 | 173 | variable "langsmith_api_key" { 174 | description = "Langsmith API key" 175 | type = string 176 | sensitive = true 177 | } 178 | 179 | variable "langsmith_project" { 180 | description = "Langsmith project" 181 | type = string 182 | } 183 | 184 | variable "langsmith_default_run_name" { 185 | description = "langsmith default run name" 186 | type = string 187 | } 188 | 189 | variable "okta_audience" { 190 | description = "Okta audience" 191 | type = string 192 | } 193 | 194 | variable "okta_issuer" { 195 | description = "Okta issuer" 196 | type = string 197 | } 198 | 199 | variable "certificate_arn" { 200 | description = "ARN of the ACM certificate" 201 | type = string 202 | default = "" 203 | } 204 | 205 | variable "wafv2_acl_arn" { 206 | description = "ARN of the WAFv2 ACL" 207 | type = string 208 | } 209 | 210 | variable "record_name" { 211 | description = "Record name for the ingress" 212 | type = string 213 | default = "" 214 | } 215 | 216 | variable "hosted_zone_name" { 217 | description = "Hosted zone name for the ingress" 218 | type = string 219 | default = "" 220 | } 221 | 222 | variable "use_route53" { 223 | description = "Whether to use Route53 for DNS management" 224 | type = bool 225 | default = false 226 | } 227 | 228 | variable "use_cloudfront" { 229 | description = "Whether to use CloudFront in front of ALB" 230 | type = bool 231 | default = true 232 | } 233 | 234 | variable "cloudfront_price_class" { 235 | description = "The price class for CloudFront distribution" 236 | type = string 237 | default = "PriceClass_100" 238 | } 239 | 240 | variable "vpc_id" { 241 | description = "VPC ID where the cluster and nodes will be deployed" 242 | type = string 243 | } 244 | 245 | variable "db_security_group_id" { 246 | description = "RDS db security group id" 247 | type = string 248 | } 249 | 250 | variable "redis_security_group_id" { 251 | description = "redis security group id" 252 | type = string 253 | } 254 | 255 | variable "architecture" { 256 | description = "The architecture for the node group instances (x86 or arm64)" 257 | type = string 258 | validation { 259 | condition = contains(["x86", "arm"], var.architecture) 260 | error_message = "Architecture must be either 'x86' or 'arm64'." 261 | } 262 | } 263 | 264 | variable "disable_outbound_network_access" { 265 | description = "Whether to disable outbound network access for the EKS Cluster" 266 | type = bool 267 | } 268 | 269 | variable "desired_capacity" { 270 | description = "Desired Capacity on the node group and deployment" 271 | type = number 272 | } 273 | 274 | variable "min_capacity" { 275 | description = "Min Capacity on the node group" 276 | type = number 277 | } 278 | 279 | variable "max_capacity" { 280 | description = "Max Capacity on the node group" 281 | type = number 282 | } 283 | 284 | variable "public_load_balancer" { 285 | description = "whether the load balancer is public" 286 | type = bool 287 | } 288 | 289 | variable "master_and_salt_key_secret_arn" { 290 | description = "ARN of secret with master and salt key" 291 | type = string 292 | } 293 | 294 | variable "main_db_secret_arn" { 295 | description = "ARN of secret for main rds db" 296 | type = string 297 | } 298 | 299 | variable "vcpus" { 300 | description = "Number of ECS vcpus" 301 | type = number 302 | } 303 | 304 | variable "cpu_target_utilization_percent" { 305 | description = "CPU target utilization percent for autoscale" 306 | type = number 307 | } 308 | 309 | variable "memory_target_utilization_percent" { 310 | description = "Memory target utilization percent for autoscale" 311 | type = number 312 | } 313 | 314 | variable "private_subnets" { 315 | description = "List of private subnet IDs" 316 | type = list(string) 317 | } 318 | 319 | variable "public_subnets" { 320 | description = "List of public subnet IDs" 321 | type = list(string) 322 | } 323 | 324 | variable "private_subnets_cidr_blocks" { 325 | description = "CIDR blocks of the private subnets" 326 | type = list(string) 327 | default = ["10.0.0.0/8", "172.16.0.0/12", "192.168.0.0/16"] # Default private address spaces 328 | } 329 | 330 | variable "disable_swagger_page" { 331 | type = bool 332 | description = "Whether to disable the swagger page or not" 333 | } 334 | 335 | variable "disable_admin_ui" { 336 | type = bool 337 | description = "Whether to disable the admin UI or not" 338 | } 339 | 340 | variable "langfuse_public_key" { 341 | type = string 342 | description = "the public key of your langfuse deployment" 343 | } 344 | 345 | variable "langfuse_secret_key" { 346 | type = string 347 | description = "the secret key of your langfuse deployment" 348 | } 349 | 350 | variable "langfuse_host" { 351 | type = string 352 | description = "the hostname of your langfuse deployment." 353 | } 354 | -------------------------------------------------------------------------------- /litellm-terraform-stack/modules/ecs/waf.tf: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # (10) WAFv2 Web ACL Association 3 | ############################################################################### 4 | resource "aws_wafv2_web_acl_association" "litellm_waf" { 5 | resource_arn = aws_lb.this.arn 6 | web_acl_arn = var.wafv2_acl_arn 7 | } -------------------------------------------------------------------------------- /litellm-terraform-stack/modules/eks/iam.tf: -------------------------------------------------------------------------------- 1 | resource "aws_iam_role" "eks_developers" { 2 | name = "${var.name}-developers" 3 | assume_role_policy = data.aws_iam_policy_document.assume_role.json 4 | } 5 | 6 | resource "aws_iam_role" "eks_operators" { 7 | name = "${var.name}-operators" 8 | assume_role_policy = data.aws_iam_policy_document.assume_role.json 9 | } 10 | 11 | data "aws_iam_policy_document" "assume_role" { 12 | statement { 13 | sid = "AssumeRole" 14 | actions = ["sts:AssumeRole"] 15 | 16 | principals { 17 | type = "AWS" 18 | identifiers = ["arn:aws:iam::${data.aws_caller_identity.current.account_id}:root"] 19 | } 20 | } 21 | } 22 | 23 | resource "aws_iam_role" "eks_nodegroup" { 24 | name = "${var.name}-eks-nodegroup-role" 25 | 26 | assume_role_policy = jsonencode({ 27 | Version = "2012-10-17" 28 | Statement = [ 29 | { 30 | Effect = "Allow" 31 | Principal = { 32 | Service = "ec2.amazonaws.com" 33 | } 34 | Action = "sts:AssumeRole" 35 | } 36 | ] 37 | }) 38 | } 39 | 40 | # Attach AWS-managed policies 41 | resource "aws_iam_role_policy_attachment" "eks_nodegroup_worker_policy" { 42 | role = aws_iam_role.eks_nodegroup.name 43 | policy_arn = "arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy" 44 | } 45 | 46 | resource "aws_iam_role_policy_attachment" "eks_nodegroup_cni_policy" { 47 | role = aws_iam_role.eks_nodegroup.name 48 | policy_arn = "arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy" 49 | } 50 | 51 | resource "aws_iam_role_policy_attachment" "eks_nodegroup_ec2_registry" { 52 | role = aws_iam_role.eks_nodegroup.name 53 | policy_arn = "arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly" 54 | } 55 | 56 | resource "aws_iam_role_policy_attachment" "eks_nodegroup_ssm" { 57 | role = aws_iam_role.eks_nodegroup.name 58 | policy_arn = "arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore" 59 | } 60 | 61 | data "aws_iam_policy_document" "nodegroup_ecr_ptc" { 62 | statement { 63 | sid = "ECRPullThroughCache" 64 | effect = "Allow" 65 | actions = [ 66 | "ecr:CreateRepository", 67 | "ecr:BatchImportUpstreamImage", 68 | ] 69 | resources = ["*"] 70 | } 71 | } 72 | 73 | resource "aws_iam_policy" "nodegroup_ecr_ptc" { 74 | name = "${var.name}-nodegroup-ecr-ptc" 75 | policy = data.aws_iam_policy_document.nodegroup_ecr_ptc.json 76 | description = "Allow ECR Pull Through Cache" 77 | } 78 | 79 | resource "aws_iam_policy_attachment" "nodegroup_ecr_ptc_attach" { 80 | name = "${var.name}-nodegroup-ecr-ptc-attach" 81 | policy_arn = aws_iam_policy.nodegroup_ecr_ptc.arn 82 | roles = [aws_iam_role.eks_nodegroup.name] 83 | } 84 | 85 | # Additional custom inline policy for the node group 86 | resource "aws_iam_role_policy" "node_additional_policies" { 87 | name = "${var.name}-eks-node-additional" 88 | role = aws_iam_role.eks_nodegroup.name 89 | 90 | policy = jsonencode({ 91 | Version = "2012-10-17" 92 | Statement = [ 93 | { 94 | Effect = "Allow" 95 | Action = [ 96 | "s3:GetObject", 97 | "s3:ListBucket" 98 | ] 99 | Resource = [ 100 | var.config_bucket_arn, 101 | "${var.config_bucket_arn}/*" 102 | ] 103 | }, 104 | { 105 | Effect = "Allow" 106 | Action = [ 107 | "s3:*" 108 | ] 109 | Resource = [ 110 | var.log_bucket_arn, 111 | "${var.log_bucket_arn}/*" 112 | ] 113 | }, 114 | { 115 | Effect = "Allow" 116 | Action = [ 117 | "bedrock:*" 118 | ] 119 | Resource = ["*"] 120 | }, 121 | { 122 | Effect = "Allow" 123 | Action = [ 124 | "sagemaker:InvokeEndpoint" 125 | ] 126 | Resource = ["*"] 127 | } 128 | ] 129 | }) 130 | } 131 | 132 | data "aws_iam_policy_document" "pod_identity_assume_role" { 133 | statement { 134 | effect = "Allow" 135 | principals { 136 | type = "Service" 137 | identifiers = ["pods.eks.amazonaws.com"] 138 | } 139 | actions = ["sts:AssumeRole", "sts:TagSession"] 140 | } 141 | } 142 | 143 | resource "aws_iam_role" "cw_observability_role" { 144 | # Make sure this only creates if you're creating the cluster or adding add-ons 145 | count = var.create_cluster || var.install_add_ons_in_existing_eks_cluster ? 1 : 0 146 | 147 | name = "${var.name}-cw-observability-role" 148 | assume_role_policy = data.aws_iam_policy_document.pod_identity_assume_role.json 149 | } 150 | 151 | resource "aws_iam_role_policy_attachment" "cw_agent_policy_attach" { 152 | count = var.create_cluster || var.install_add_ons_in_existing_eks_cluster ? 1 : 0 153 | 154 | role = aws_iam_role.cw_observability_role[0].name 155 | policy_arn = "arn:aws:iam::aws:policy/CloudWatchAgentServerPolicy" 156 | } 157 | 158 | data "aws_iam_policy_document" "eks_cluster_kms" { 159 | count = var.create_cluster ? 1 : 0 160 | statement { 161 | sid = "AllowKMSUseOfEncryptionKey" 162 | effect = "Allow" 163 | actions = [ 164 | "kms:Encrypt", 165 | "kms:Decrypt", 166 | "kms:ReEncrypt*", 167 | "kms:GenerateDataKey*", 168 | "kms:DescribeKey", 169 | "kms:CreateGrant" 170 | ] 171 | resources = [ 172 | aws_kms_key.eks_secrets[0].arn 173 | ] 174 | } 175 | } 176 | 177 | resource "aws_iam_role_policy" "eks_cluster_kms_policy" { 178 | count = var.create_cluster ? 1 : 0 179 | name = "EKS-Cluster-KMS-Policy" 180 | role = aws_iam_role.eks_cluster[0].name 181 | 182 | policy = data.aws_iam_policy_document.eks_cluster_kms[0].json 183 | } -------------------------------------------------------------------------------- /litellm-terraform-stack/modules/eks/kms.tf: -------------------------------------------------------------------------------- 1 | resource "aws_kms_key" "eks_secrets" { 2 | count = var.create_cluster ? 1 : 0 3 | description = "KMS key for encrypting EKS Secrets" 4 | enable_key_rotation = true 5 | deletion_window_in_days = 30 6 | 7 | # Key policy that allows: 8 | # - Root to do anything (standard practice) 9 | # - The EKS cluster role to use the key for encryption (kms:Encrypt, kms:Decrypt, etc.) 10 | policy = jsonencode({ 11 | Version = "2012-10-17" 12 | Id = "key-default-1" 13 | Statement = [ 14 | { 15 | Sid = "Enable IAM User Permissions" 16 | Effect = "Allow" 17 | Principal = { 18 | AWS = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:root" 19 | } 20 | Action = "kms:*" 21 | Resource = "*" 22 | }, 23 | { 24 | Sid = "Allow use of the key by EKS Cluster Role" 25 | Effect = "Allow" 26 | Principal = { 27 | AWS = aws_iam_role.eks_cluster[0].arn 28 | } 29 | Action = [ 30 | "kms:Encrypt", 31 | "kms:Decrypt", 32 | "kms:ReEncrypt*", 33 | "kms:GenerateDataKey*", 34 | "kms:DescribeKey", 35 | "kms:CreateGrant" 36 | ] 37 | Resource = "*" 38 | } 39 | ] 40 | }) 41 | } 42 | -------------------------------------------------------------------------------- /litellm-terraform-stack/modules/eks/outputs.tf: -------------------------------------------------------------------------------- 1 | # output "vpc" { 2 | # description = "Amazon VPC full configuration" 3 | # value = module.vpc 4 | # } 5 | 6 | output "eks" { 7 | description = "Amazon EKS Cluster full configuration" 8 | value = var.create_cluster ? aws_eks_cluster.this[0] : null 9 | } 10 | 11 | output "configure_kubectl" { 12 | description = "Configure kubectl: make sure you're logged in with the correct AWS profile and run the following command to update your kubeconfig" 13 | value = "aws eks --region ${data.aws_region.current.name} update-kubeconfig --name ${local.cluster_name}" 14 | } 15 | 16 | # Outputs matching the CDK configuration 17 | output "cluster_name" { 18 | description = "The name of the EKS cluster" 19 | value = local.cluster_name 20 | } 21 | 22 | output "cluster_endpoint" { 23 | description = "The endpoint for the EKS cluster" 24 | value = local.cluster_endpoint 25 | } 26 | 27 | output "cluster_security_group_id" { 28 | description = "Security group ID attached to the EKS cluster" 29 | value = local.cluster_security_group_id 30 | } 31 | 32 | output "eks_cluster_name" { 33 | description = "Name of the EKS cluster" 34 | value = local.cluster_name 35 | } 36 | 37 | output "eks_deployment_name" { 38 | description = "Name of the Kubernetes deployment" 39 | value = kubernetes_deployment.litellm.metadata[0].name 40 | } 41 | 42 | output "public_subnet_ids" { 43 | description = "IDs of the public subnets" 44 | value = data.aws_subnets.public.ids 45 | } 46 | 47 | output "private_subnet_ids" { 48 | description = "IDs of the private subnets" 49 | value = data.aws_subnets.private.ids 50 | } 51 | 52 | output "litellm_url" { 53 | description = "The URL for the LiteLLM service" 54 | value = "https://${aws_route53_record.litellm.name}" 55 | } 56 | 57 | output "cluster_ca" { 58 | value = local.cluster_ca 59 | } 60 | 61 | -------------------------------------------------------------------------------- /litellm-terraform-stack/modules/eks/route53.tf: -------------------------------------------------------------------------------- 1 | 2 | data "aws_route53_zone" "selected" { 3 | name = var.hosted_zone_name 4 | private_zone = var.public_load_balancer ? false : true 5 | } 6 | 7 | 8 | # Create the A record 9 | resource "aws_route53_record" "litellm" { 10 | zone_id = data.aws_route53_zone.selected.zone_id 11 | name = var.record_name # e.g., "litellm.mirodrr.people.aws.dev" 12 | type = "A" 13 | 14 | alias { 15 | name = data.aws_lb.ingress_alb.dns_name 16 | zone_id = data.aws_lb.ingress_alb.zone_id 17 | evaluate_target_health = true 18 | } 19 | 20 | depends_on = [kubernetes_ingress_v1.litellm] 21 | } -------------------------------------------------------------------------------- /litellm-terraform-stack/modules/eks/variables.tf: -------------------------------------------------------------------------------- 1 | variable "name" { 2 | description = "Standard name to be used as prefix on all resources." 3 | type = string 4 | } 5 | 6 | variable "private_subnet_ids" { 7 | description = "List of private subnet IDs" 8 | type = list(string) 9 | } 10 | 11 | variable "public_subnet_ids" { 12 | description = "List of public subnet IDs" 13 | type = list(string) 14 | } 15 | 16 | variable "existing_cluster_name" { 17 | description = "Name of the existing EKS Cluster." 18 | type = string 19 | } 20 | 21 | variable "cluster_version" { 22 | description = "Kubernetes version for Amazon EKS Cluster." 23 | type = string 24 | } 25 | 26 | # Variables needed for the configuration 27 | variable "config_bucket_arn" { 28 | description = "ARN of the configuration bucket" 29 | type = string 30 | } 31 | 32 | variable "log_bucket_arn" { 33 | description = "ARN of the log bucket" 34 | type = string 35 | } 36 | 37 | # Required variables 38 | variable "ecr_litellm_repository_url" { 39 | description = "URL of the ECR repository for LiteLLM" 40 | type = string 41 | } 42 | 43 | variable "ecr_middleware_repository_url" { 44 | description = "URL of the ECR repository for middleware" 45 | type = string 46 | } 47 | 48 | variable "litellm_version" { 49 | description = "Version tag for LiteLLM image" 50 | type = string 51 | } 52 | 53 | variable "config_bucket_name" { 54 | description = "Name of the S3 bucket containing config" 55 | type = string 56 | } 57 | 58 | variable "redis_host" { 59 | description = "The Redis host name" 60 | type = string 61 | } 62 | 63 | variable "redis_port" { 64 | description = "The Redis port" 65 | type = string 66 | } 67 | 68 | variable "redis_password" { 69 | description = "The Redis password" 70 | type = string 71 | } 72 | 73 | variable "database_url" { 74 | description = "Database connection URL" 75 | type = string 76 | } 77 | 78 | variable "litellm_master_key" { 79 | description = "LiteLLM master key" 80 | type = string 81 | sensitive = true 82 | } 83 | 84 | variable "litellm_salt_key" { 85 | description = "LiteLLM salt key" 86 | type = string 87 | sensitive = true 88 | } 89 | 90 | variable "openai_api_key" { 91 | description = "OpenAI API key" 92 | type = string 93 | sensitive = true 94 | } 95 | 96 | variable "azure_openai_api_key" { 97 | description = "Azure OpenAI API key" 98 | type = string 99 | sensitive = true 100 | } 101 | 102 | variable "azure_api_key" { 103 | description = "Azure API key" 104 | type = string 105 | sensitive = true 106 | } 107 | 108 | variable "anthropic_api_key" { 109 | description = "Anthropic API key" 110 | type = string 111 | sensitive = true 112 | } 113 | 114 | variable "groq_api_key" { 115 | description = "Groq API key" 116 | type = string 117 | sensitive = true 118 | } 119 | 120 | variable "cohere_api_key" { 121 | description = "Cohere API key" 122 | type = string 123 | sensitive = true 124 | } 125 | 126 | variable "co_api_key" { 127 | description = "Co API key" 128 | type = string 129 | sensitive = true 130 | } 131 | 132 | variable "hf_token" { 133 | description = "HuggingFace token" 134 | type = string 135 | sensitive = true 136 | } 137 | 138 | variable "huggingface_api_key" { 139 | description = "HuggingFace API key" 140 | type = string 141 | sensitive = true 142 | } 143 | 144 | variable "databricks_api_key" { 145 | description = "Databricks API key" 146 | type = string 147 | sensitive = true 148 | } 149 | 150 | variable "gemini_api_key" { 151 | description = "Gemini API key" 152 | type = string 153 | sensitive = true 154 | } 155 | 156 | variable "codestral_api_key" { 157 | description = "Codestral API key" 158 | type = string 159 | sensitive = true 160 | } 161 | 162 | variable "mistral_api_key" { 163 | description = "Mistral API key" 164 | type = string 165 | sensitive = true 166 | } 167 | 168 | variable "azure_ai_api_key" { 169 | description = "Azure AI API key" 170 | type = string 171 | sensitive = true 172 | } 173 | 174 | variable "nvidia_nim_api_key" { 175 | description = "NVIDIA NIM API key" 176 | type = string 177 | sensitive = true 178 | } 179 | 180 | variable "xai_api_key" { 181 | description = "XAI API key" 182 | type = string 183 | sensitive = true 184 | } 185 | 186 | variable "perplexityai_api_key" { 187 | description = "PerplexityAI API key" 188 | type = string 189 | sensitive = true 190 | } 191 | 192 | variable "github_api_key" { 193 | description = "GitHub API key" 194 | type = string 195 | sensitive = true 196 | } 197 | 198 | variable "deepseek_api_key" { 199 | description = "Deepseek API key" 200 | type = string 201 | sensitive = true 202 | } 203 | 204 | variable "ai21_api_key" { 205 | description = "AI21 API key" 206 | type = string 207 | sensitive = true 208 | } 209 | 210 | variable "langsmith_api_key" { 211 | description = "Langsmith API key" 212 | type = string 213 | sensitive = true 214 | } 215 | 216 | variable "langsmith_project" { 217 | description = "Langsmith project" 218 | type = string 219 | } 220 | 221 | variable "langsmith_default_run_name" { 222 | description = "langsmith default run name" 223 | type = string 224 | } 225 | 226 | variable "okta_audience" { 227 | description = "Okta audience" 228 | type = string 229 | } 230 | 231 | variable "okta_issuer" { 232 | description = "Okta issuer" 233 | type = string 234 | } 235 | 236 | 237 | variable "certificate_arn" { 238 | description = "ARN of the ACM certificate" 239 | type = string 240 | } 241 | 242 | variable "wafv2_acl_arn" { 243 | description = "ARN of the WAFv2 ACL" 244 | type = string 245 | } 246 | 247 | variable "record_name" { 248 | description = "record name for the ingress" 249 | type = string 250 | } 251 | 252 | variable "hosted_zone_name" { 253 | description = "Hosted zone name for the ingress" 254 | type = string 255 | } 256 | 257 | # Variables 258 | variable "create_cluster" { 259 | description = "Controls if EKS cluster should be created" 260 | type = bool 261 | } 262 | 263 | variable "vpc_id" { 264 | description = "VPC ID where the cluster and nodes will be deployed" 265 | type = string 266 | } 267 | 268 | variable "db_security_group_id" { 269 | description = "RDS db security group id" 270 | type = string 271 | } 272 | 273 | variable "redis_security_group_id" { 274 | description = "redis security group id" 275 | type = string 276 | } 277 | 278 | variable "architecture" { 279 | description = "The architecture for the node group instances (x86 or arm64)" 280 | type = string 281 | validation { 282 | condition = contains(["x86", "arm"], var.architecture) 283 | error_message = "Architecture must be either 'x86' or 'arm64'." 284 | } 285 | } 286 | 287 | variable "disable_outbound_network_access" { 288 | description = "Whether to disable outbound network access for the EKS Cluster" 289 | type = bool 290 | } 291 | 292 | variable "eks_alb_controller_private_ecr_repository_name" { 293 | description = "The name of the ECR repo that is used to store the EKS ALB Controller Container Image in EKS deployments with outbound network access disabled" 294 | type = string 295 | } 296 | 297 | variable "install_add_ons_in_existing_eks_cluster" { 298 | description = "Whether to install add ons onto an existing EKS Cluster" 299 | type = bool 300 | } 301 | 302 | variable "desired_capacity" { 303 | description = "Desired Capacity on the node group and deployment" 304 | type = number 305 | } 306 | 307 | variable "min_capacity" { 308 | description = "Min Capacity on the node group" 309 | type = number 310 | } 311 | 312 | variable "max_capacity" { 313 | description = "Max Capacity on the node group" 314 | type = number 315 | } 316 | 317 | variable "arm_instance_type" { 318 | description = "Instance type for arm deployment" 319 | type = string 320 | } 321 | 322 | variable "x86_instance_type" { 323 | description = "Instance type for x86 deployment" 324 | type = string 325 | } 326 | 327 | variable "arm_ami_type" { 328 | description = "AMI type for arm deployment" 329 | type = string 330 | } 331 | 332 | variable "x86_ami_type" { 333 | description = "AMI type for x86 deployment" 334 | type = string 335 | } 336 | 337 | variable "public_load_balancer" { 338 | description = "whether the load balancer is public" 339 | type = bool 340 | } 341 | 342 | variable "disable_swagger_page" { 343 | type = bool 344 | description = "Whether to disable the swagger page or not" 345 | } 346 | 347 | variable "disable_admin_ui" { 348 | type = bool 349 | description = "Whether to disable the admin UI or not" 350 | } 351 | 352 | variable "langfuse_public_key" { 353 | type = string 354 | description = "the public key of your langfuse deployment" 355 | } 356 | 357 | variable "langfuse_secret_key" { 358 | type = string 359 | description = "the secret key of your langfuse deployment" 360 | } 361 | 362 | variable "langfuse_host" { 363 | type = string 364 | description = "the hostname of your langfuse deployment." 365 | } -------------------------------------------------------------------------------- /litellm-terraform-stack/modules/eks/versions.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_version = ">= 1.3" 3 | 4 | required_providers { 5 | aws = { 6 | source = "hashicorp/aws" 7 | version = ">= 5.34" 8 | } 9 | kubernetes = { 10 | source = "hashicorp/kubernetes" 11 | version = ">= 2.20" 12 | } 13 | time = { 14 | source = "hashicorp/time" 15 | version = ">= 0.13.0" 16 | } 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /litellm-terraform-stack/outputs.tf: -------------------------------------------------------------------------------- 1 | output "LitellmEcsCluster" { 2 | value = try(module.ecs_cluster[0].LitellmEcsCluster, "") 3 | description = "Name of the ECS Cluster" 4 | } 5 | 6 | output "LitellmEcsTask" { 7 | value = try(module.ecs_cluster[0].LitellmEcsTask, "") 8 | description = "Name of the ECS Service" 9 | } 10 | 11 | output "eks_cluster_name" { 12 | description = "Name of the EKS cluster" 13 | value = try(module.eks_cluster[0].eks_cluster_name, "") 14 | } 15 | 16 | output "eks_deployment_name" { 17 | description = "Name of the Kubernetes deployment" 18 | value = try(module.eks_cluster[0].eks_deployment_name, "") 19 | } 20 | 21 | output "cloudfront_distribution_id" { 22 | description = "The ID of the CloudFront distribution" 23 | value = var.use_cloudfront ? try(module.ecs_cluster[0].cloudfront_distribution_id, "") : "" 24 | } 25 | 26 | output "cloudfront_domain_name" { 27 | description = "The domain name of the CloudFront distribution" 28 | value = var.use_cloudfront ? try(module.ecs_cluster[0].cloudfront_domain_name, "") : "" 29 | } 30 | 31 | output "ServiceURL" { 32 | description = "The service URL" 33 | value = var.use_route53 ? "https://${var.record_name}.${var.hosted_zone_name}" : ( 34 | var.use_cloudfront ? "https://${try(module.ecs_cluster[0].cloudfront_domain_name, "")}" : "https://${try(module.ecs_cluster[0].alb_dns_name, "")}" 35 | ) 36 | } 37 | 38 | output "vpc_id" { 39 | description = "the vpc id we deployed to" 40 | value = module.base.VpcId 41 | } 42 | 43 | output "ConfigBucketName" { 44 | description = "The Name of the configuration bucket" 45 | value = module.base.ConfigBucketName 46 | } 47 | 48 | # Added to expose the CloudFront authentication secret once after creation 49 | # This allows for troubleshooting and verification if needed 50 | output "cloudfront_auth_secret" { 51 | description = "The CloudFront authentication secret (only shown once after creation)" 52 | value = var.use_cloudfront ? try(module.ecs_cluster[0].cloudfront_auth_secret, null) : null 53 | sensitive = true 54 | } 55 | -------------------------------------------------------------------------------- /litellm-terraform-stack/providers.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | backend "s3" {} 3 | } 4 | 5 | data "aws_caller_identity" "current" {} 6 | data "aws_region" "current" {} 7 | 8 | locals { 9 | SolutionNameKeySatisfyingRestrictions = "Guidance-for-Running-Generative-AI-Gateway-Proxy-on-AWS" 10 | common_labels = { 11 | project = "llmgateway" 12 | AWSSolution = "ToDo" 13 | GithubRepo = "https://github.com/aws-solutions-library-samples/" 14 | SolutionID = "SO9022" 15 | SolutionNameKey = "Guidance for Running Generative AI Gateway Proxy on AWS" 16 | SolutionVersionKey = "1.0.0" 17 | } 18 | } 19 | 20 | 21 | provider "aws" { 22 | default_tags { 23 | tags = local.common_labels 24 | } 25 | } 26 | 27 | resource "aws_servicecatalogappregistry_application" "solution_application" { 28 | name = "${local.SolutionNameKeySatisfyingRestrictions}-${data.aws_region.current.name}-${data.aws_caller_identity.current.account_id}" 29 | description = "Service Catalog application to track and manage all your resources for the solution ${local.common_labels.SolutionNameKey}" 30 | 31 | tags = { 32 | "Solutions:SolutionID" = local.common_labels.SolutionID 33 | "Solutions:SolutionName" = local.common_labels.SolutionNameKey 34 | "Solutions:SolutionVersion" = local.common_labels.SolutionVersionKey 35 | "Solutions:ApplicationType" = "AWS-Solutions" 36 | } 37 | } 38 | 39 | 40 | 41 | data "aws_eks_cluster_auth" "cluster" { 42 | count = local.platform == "EKS" ? 1 : 0 43 | name = module.eks_cluster[0].cluster_name 44 | } 45 | 46 | provider "kubernetes" { 47 | host = local.platform == "EKS" ? module.eks_cluster[0].cluster_endpoint : "" 48 | cluster_ca_certificate = local.platform == "EKS" ? base64decode(module.eks_cluster[0].cluster_ca) : "" 49 | token = local.platform == "EKS" ? data.aws_eks_cluster_auth.cluster[0].token : "" 50 | } 51 | 52 | provider "helm" { 53 | kubernetes { 54 | host = local.platform == "EKS" ? module.eks_cluster[0].cluster_endpoint : "" 55 | cluster_ca_certificate = local.platform == "EKS" ? base64decode(module.eks_cluster[0].cluster_ca) : "" 56 | token = local.platform == "EKS" ? data.aws_eks_cluster_auth.cluster[0].token : "" 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /media/Gateway latest architecture with CloudFront.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-solutions-library-samples/guidance-for-multi-provider-generative-ai-gateway-on-aws/4570827d7aed49d60649cdc81bdf085868bf5511/media/Gateway latest architecture with CloudFront.pptx -------------------------------------------------------------------------------- /media/Gateway-Architecture-with-CloudFront.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-solutions-library-samples/guidance-for-multi-provider-generative-ai-gateway-on-aws/4570827d7aed49d60649cdc81bdf085868bf5511/media/Gateway-Architecture-with-CloudFront.png -------------------------------------------------------------------------------- /media/Reference_architecture_ECS_EKS_platform_combined.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-solutions-library-samples/guidance-for-multi-provider-generative-ai-gateway-on-aws/4570827d7aed49d60649cdc81bdf085868bf5511/media/Reference_architecture_ECS_EKS_platform_combined.jpg -------------------------------------------------------------------------------- /media/Required-EKS-Add-ons.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-solutions-library-samples/guidance-for-multi-provider-generative-ai-gateway-on-aws/4570827d7aed49d60649cdc81bdf085868bf5511/media/Required-EKS-Add-ons.png -------------------------------------------------------------------------------- /media/Tested-Bring-Your-Own-EKS-Cluster-Configuration.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-solutions-library-samples/guidance-for-multi-provider-generative-ai-gateway-on-aws/4570827d7aed49d60649cdc81bdf085868bf5511/media/Tested-Bring-Your-Own-EKS-Cluster-Configuration.png -------------------------------------------------------------------------------- /media/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-solutions-library-samples/guidance-for-multi-provider-generative-ai-gateway-on-aws/4570827d7aed49d60649cdc81bdf085868bf5511/media/architecture.png -------------------------------------------------------------------------------- /middleware/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11-slim 2 | 3 | WORKDIR /app 4 | 5 | COPY requirements.txt . 6 | RUN pip install --no-cache-dir -r requirements.txt 7 | 8 | COPY app.py . 9 | 10 | CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "3000"] -------------------------------------------------------------------------------- /middleware/docker-build-and-deploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ $# -ne 2 ]; then 4 | echo "Usage: $0 " 5 | exit 1 6 | fi 7 | 8 | APP_NAME=$1 9 | ARCH=$2 10 | 11 | AWS_REGION=$(aws ec2 describe-availability-zones --output text --query 'AvailabilityZones[0].[RegionName]') 12 | export AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query "Account" --output text) 13 | 14 | # Check if the repository already exists 15 | REPO_EXISTS=$(aws ecr describe-repositories --repository-names $APP_NAME 2>/dev/null) 16 | 17 | if [ -z "$REPO_EXISTS" ]; then 18 | # Repository does not exist, create it with tag 19 | aws ecr create-repository --repository-name $APP_NAME --tags Key=project,Value=llmgateway 20 | else 21 | echo "Repository $APP_NAME already exists, checking tags..." 22 | 23 | # Get current tags for the repository 24 | CURRENT_TAGS=$(aws ecr list-tags-for-resource --resource-arn arn:aws:ecr:${AWS_REGION}:${AWS_ACCOUNT_ID}:repository/${APP_NAME}) 25 | 26 | # Check if project=llmgateway tag exists 27 | if ! echo "$CURRENT_TAGS" | grep -q '"Key": "project".*"Value": "llmgateway"'; then 28 | echo "Adding project=llmgateway tag..." 29 | aws ecr tag-resource \ 30 | --resource-arn arn:aws:ecr:${AWS_REGION}:${AWS_ACCOUNT_ID}:repository/${APP_NAME} \ 31 | --tags Key=project,Value=llmgateway 32 | else 33 | echo "Tag project=llmgateway already exists." 34 | fi 35 | fi 36 | 37 | echo $ARCH 38 | case $ARCH in 39 | "x86") 40 | DOCKER_ARCH="linux/amd64" 41 | ;; 42 | "arm") 43 | DOCKER_ARCH="linux/arm64" 44 | ;; 45 | *) 46 | echo "Unsupported architecture: $ARCH" 47 | exit 1 48 | ;; 49 | esac 50 | 51 | echo $DOCKER_ARCH 52 | 53 | aws ecr get-login-password --region $AWS_REGION | docker login --username AWS --password-stdin $AWS_ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com 54 | docker build --platform $DOCKER_ARCH -t $APP_NAME . 55 | docker tag $APP_NAME\:latest $AWS_ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com/$APP_NAME\:latest 56 | docker push $AWS_ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com/$APP_NAME\:latest -------------------------------------------------------------------------------- /middleware/requirements.txt: -------------------------------------------------------------------------------- 1 | fastapi 2 | uvicorn 3 | httpx 4 | pydantic 5 | openai 6 | botocore 7 | google-crc32c 8 | boto3 9 | sqlalchemy 10 | psycopg2-binary 11 | okta-jwt-verifier 12 | cryptography 13 | anyio -------------------------------------------------------------------------------- /scripts/.env.template: -------------------------------------------------------------------------------- 1 | BASE_URL= 2 | API_KEY= 3 | MODELS=anthropic.claude-3-5-sonnet-20241022-v2:0,gpt-4o -------------------------------------------------------------------------------- /scripts/benchmark.py: -------------------------------------------------------------------------------- 1 | from openai import OpenAI 2 | from litellm import completion_cost 3 | import time 4 | import click 5 | from tqdm import tqdm 6 | from tabulate import tabulate 7 | from termcolor import colored 8 | import os 9 | from dotenv import load_dotenv 10 | 11 | questions = ["Can you tell me a story?", "What are 5 good business ideas?"] 12 | 13 | load_dotenv() 14 | 15 | base_url = os.getenv("BASE_URL") # Litellm proxy base url 16 | api_key = os.getenv("API_KEY") # Litellm proxy api key 17 | 18 | models = os.getenv("MODELS").split( 19 | "," 20 | ) # List of models to benchmark. Values should be subset of model ids from your config.yaml 21 | 22 | # List of questions to benchmark (replace with your questions) 23 | 24 | # Enter your system prompt here 25 | system_prompt = """ 26 | You are LiteLLMs helpful assistant 27 | """ 28 | 29 | 30 | @click.command() 31 | @click.option( 32 | "--system-prompt", 33 | default="You are a helpful assistant that can answer questions.", 34 | help="System prompt for the conversation.", 35 | ) 36 | def main(system_prompt): 37 | client = OpenAI(base_url=base_url, api_key=api_key) 38 | 39 | for question in questions: 40 | data = [] # Data for the current question 41 | 42 | with tqdm(total=len(models)) as pbar: 43 | for model in models: 44 | colored_description = colored( 45 | f"Running question: {question} for model: {model}", "green" 46 | ) 47 | pbar.set_description(colored_description) 48 | start_time = time.time() 49 | 50 | response = client.chat.completions.create( 51 | model=model, 52 | max_tokens=500, 53 | messages=[ 54 | {"role": "system", "content": system_prompt}, 55 | {"role": "user", "content": question}, 56 | ], 57 | ).model_dump() 58 | 59 | end = time.time() 60 | total_time = end - start_time 61 | cost = completion_cost(completion_response=response) 62 | raw_response = response["choices"][0]["message"]["content"] 63 | 64 | data.append( 65 | { 66 | "Model": colored(model, "light_blue"), 67 | "Response": raw_response, # Colorize the response 68 | "ResponseTime": colored(f"{total_time:.2f} seconds", "red"), 69 | "Cost": colored(f"${cost:.6f}", "green"), # Colorize the cost 70 | } 71 | ) 72 | 73 | pbar.update(1) 74 | 75 | # Separate headers from the data 76 | headers = ["Model", "Response", "Response Time (seconds)", "Cost ($)"] 77 | colwidths = [15, 80, 15, 10] 78 | 79 | # Create a nicely formatted table for the current question 80 | table = tabulate( 81 | [list(d.values()) for d in data], 82 | headers, 83 | tablefmt="grid", 84 | maxcolwidths=colwidths, 85 | ) 86 | 87 | # Print the table for the current question 88 | colored_question = colored(question, "green") 89 | click.echo(f"\nBenchmark Results for '{colored_question}':") 90 | click.echo(table) # Display the formatted table 91 | 92 | 93 | if __name__ == "__main__": 94 | main() 95 | -------------------------------------------------------------------------------- /scripts/requirements.txt: -------------------------------------------------------------------------------- 1 | litellm 2 | tabulate 3 | termcolor 4 | python-dotenv -------------------------------------------------------------------------------- /test-middleware-streaming.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import os 3 | from botocore.client import Config 4 | from botocore import UNSIGNED 5 | from typing import Generator, Dict, Any, Optional 6 | 7 | 8 | global_session_id: Optional[str] = None 9 | 10 | 11 | def create_bedrock_client(): 12 | """ 13 | Creates a Bedrock client with custom endpoint and authorization header. 14 | Uses environment variables for configuration. 15 | 16 | Required environment variables: 17 | - API_ENDPOINT: Custom Bedrock endpoint URL 18 | - API_KEY: Authorization bearer token 19 | - AWS_REGION: AWS region 20 | 21 | Returns: 22 | boto3.client: Configured Bedrock client 23 | """ 24 | endpoint = os.getenv("API_ENDPOINT") 25 | api_key = os.getenv("API_KEY") 26 | region = os.getenv("AWS_REGION") 27 | 28 | if not all([endpoint, api_key, region]): 29 | raise ValueError( 30 | "Missing required environment variables: API_ENDPOINT, API_KEY, AWS_REGION" 31 | ) 32 | 33 | # Initialize session and configure client 34 | session = boto3.Session() 35 | client_config = Config( 36 | signature_version=UNSIGNED, # Disable SigV4 signing 37 | retries={"max_attempts": 10, "mode": "standard"}, 38 | ) 39 | 40 | # Create the Bedrock client 41 | client = session.client( 42 | "bedrock-runtime", 43 | endpoint_url=endpoint, 44 | config=client_config, 45 | region_name=region, 46 | ) 47 | 48 | # Define authorization header handler 49 | def add_authorization_header(request, **kwargs): 50 | request.headers["Authorization"] = f"Bearer {api_key}" 51 | 52 | # Register the event handler 53 | client.meta.events.register("request-created.*", add_authorization_header) 54 | 55 | return client 56 | 57 | 58 | def extract_session_id(response) -> Optional[str]: 59 | """ 60 | Extracts the x-session-id from the response headers. 61 | 62 | Args: 63 | response: The raw response object from the Bedrock API 64 | 65 | Returns: 66 | str: The session ID if found, None otherwise 67 | """ 68 | try: 69 | # Access the response metadata which contains the headers 70 | headers = response["ResponseMetadata"]["HTTPHeaders"] 71 | print(f"headers: {headers}") 72 | session_id = headers.get("x-session-id") 73 | print(f"session_id: {session_id}") 74 | return session_id 75 | except (KeyError, AttributeError): 76 | print("Warning: Could not extract x-session-id from response headers") 77 | return None 78 | 79 | 80 | def send_message_stream( 81 | client, 82 | message: str, 83 | model_id: str = "anthropic.claude-3-haiku-20240307-v1:0", 84 | max_tokens: int = 1000, 85 | temperature: float = 0.7, 86 | ) -> Generator[Dict[str, Any], None, None]: 87 | """ 88 | Sends a message to the Bedrock Converse API with streaming response. 89 | 90 | Args: 91 | client: Configured Bedrock client 92 | message (str): Message to send 93 | model_id (str): ID of the model to use 94 | max_tokens (int): Maximum number of tokens to generate 95 | temperature (float): Temperature for response generation 96 | 97 | Yields: 98 | dict: Streaming response events 99 | """ 100 | 101 | global global_session_id 102 | 103 | try: 104 | if global_session_id: 105 | response = client.converse_stream( 106 | modelId=model_id, 107 | messages=[{"role": "user", "content": [{"text": message}]}], 108 | inferenceConfig={ 109 | "maxTokens": max_tokens, 110 | "temperature": temperature, 111 | }, 112 | additionalModelRequestFields={"session_id": global_session_id}, 113 | ) 114 | else: 115 | response = client.converse_stream( 116 | modelId=model_id, 117 | messages=[{"role": "user", "content": [{"text": message}]}], 118 | inferenceConfig={ 119 | "maxTokens": max_tokens, 120 | "temperature": temperature, 121 | }, 122 | additionalModelRequestFields={"enable_history": True}, 123 | ) 124 | global_session_id = extract_session_id(response) 125 | if global_session_id: 126 | print(f"global_session_id: {global_session_id}") 127 | print(f"response: {response}") 128 | print(f"response['stream']: {response["stream"]}") 129 | 130 | # Process the streaming response 131 | for event in response["stream"]: 132 | yield event 133 | 134 | except Exception as e: 135 | print(f"Error in streaming request: {str(e)}") 136 | raise 137 | 138 | 139 | def process_stream_response(event: Dict[str, Any]) -> str: 140 | """ 141 | Processes a streaming response event and extracts the text content if present. 142 | 143 | Args: 144 | event (dict): Streaming response event 145 | 146 | Returns: 147 | str: Extracted text content or empty string 148 | """ 149 | if "contentBlockDelta" in event: 150 | delta = event["contentBlockDelta"].get("delta", {}) 151 | if "text" in delta: 152 | return delta["text"] 153 | return "" 154 | 155 | 156 | def send_message_stream_wrapper(client, message): 157 | try: 158 | 159 | # Accumulate the response 160 | 161 | # Process the streaming response 162 | for event in send_message_stream(client, message): 163 | print(f"event: {event}") 164 | # Handle different event types 165 | if "internalServerException" in event: 166 | raise Exception( 167 | f"Internal server error: {event['internalServerException']}" 168 | ) 169 | elif "modelStreamErrorException" in event: 170 | raise Exception( 171 | f"Model stream error: {event['modelStreamErrorException']}" 172 | ) 173 | elif "validationException" in event: 174 | raise Exception(f"Validation error: {event['validationException']}") 175 | elif "throttlingException" in event: 176 | raise Exception(f"Throttling error: {event['throttlingException']}") 177 | # Handle metadata and stop events 178 | if "messageStop" in event: 179 | print("\n\nStream finished.") 180 | print(f"Stop reason: {event['messageStop'].get('stopReason')}") 181 | elif "metadata" in event: 182 | usage = event["metadata"].get("usage", {}) 183 | if usage: 184 | print(f"\nToken usage: {usage}") 185 | 186 | except Exception as e: 187 | print(f"Error in main: {str(e)}") 188 | 189 | 190 | def main(): 191 | # Create the client 192 | client = create_bedrock_client() 193 | 194 | # Example of using streaming response 195 | print("Sending streaming request...") 196 | message = "tell me a short story." 197 | send_message_stream_wrapper(client=client, message=message) 198 | message2 = "What did I last say to you?" 199 | send_message_stream_wrapper(client=client, message=message2) 200 | 201 | 202 | if __name__ == "__main__": 203 | main() 204 | -------------------------------------------------------------------------------- /test-middleware-synchronous.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import os 3 | from botocore.client import Config 4 | from botocore import UNSIGNED 5 | from botocore.exceptions import ClientError 6 | 7 | 8 | def create_bedrock_client(): 9 | # Get configuration from environment variables 10 | endpoint = os.getenv("API_ENDPOINT") 11 | api_key = os.getenv("API_KEY") 12 | region = os.getenv("AWS_REGION") 13 | 14 | if not all([endpoint, api_key, region]): 15 | raise ValueError( 16 | "Missing required environment variables: API_ENDPOINT, API_KEY, AWS_REGION" 17 | ) 18 | 19 | # Initialize session and configure client 20 | session = boto3.Session() 21 | client_config = Config( 22 | signature_version=UNSIGNED, # Disable SigV4 signing 23 | ) 24 | 25 | # Create the Bedrock client 26 | client = session.client( 27 | "bedrock-runtime", 28 | endpoint_url=endpoint, 29 | config=client_config, 30 | region_name=region, 31 | ) 32 | 33 | # Define authorization header handler 34 | def add_authorization_header(request, **kwargs): 35 | request.headers["Authorization"] = f"Bearer {api_key}" 36 | 37 | # Register the event handler 38 | client.meta.events.register("request-created.*", add_authorization_header) 39 | 40 | return client 41 | 42 | 43 | def send_message( 44 | client, 45 | message, 46 | model_id="anthropic.claude-3-haiku-20240307-v1:0", 47 | session_id=None, 48 | ): 49 | """ 50 | Sends a message to the Bedrock Converse API. 51 | 52 | Args: 53 | client: Configured Bedrock client 54 | message (str): Message to send 55 | model_id (str): ID of the model to use 56 | 57 | Returns: 58 | dict: API response 59 | """ 60 | 61 | # model_id = "arn:aws:bedrock:us-west-2:235614385815:prompt/6LE1KDKISG:2" 62 | body = {} 63 | try: 64 | if session_id: 65 | response = client.converse( 66 | modelId=model_id, 67 | # promptVariables={ 68 | # "topic": {"text": "fruit"}, 69 | # }, 70 | additionalModelRequestFields={"session_id": session_id}, 71 | messages=[{"role": "user", "content": [{"text": message}]}], 72 | ) 73 | else: 74 | response = client.converse( 75 | modelId=model_id, 76 | # promptVariables={ 77 | # "topic": {"text": "fruit"}, 78 | # }, 79 | additionalModelRequestFields={"enable_history": True}, 80 | messages=[{"role": "user", "content": [{"text": message}]}], 81 | ) 82 | 83 | return response 84 | except Exception as e: 85 | print(f"Error sending message: {str(e)}") 86 | raise 87 | 88 | 89 | def main(): 90 | try: 91 | # Create the client 92 | client = create_bedrock_client() 93 | 94 | # Send a test message 95 | response = send_message(client=client, message="tell me a short story.") 96 | 97 | print("Response:", response) 98 | session_id = response["ResponseMetadata"]["HTTPHeaders"].get("x-session-id") 99 | print(f"session_id: {session_id}") 100 | response_2 = send_message( 101 | client=client, message="What did I last say to you?", session_id=session_id 102 | ) 103 | print("Response 2:", response_2) 104 | 105 | except ClientError as e: 106 | error_code = e.response["Error"]["Code"] 107 | error_message = e.response["Error"]["Message"] 108 | print(f"e.response: {e.response}") 109 | 110 | print(f"AWS Error: {error_code} - {error_message}") 111 | except Exception as e: 112 | print(f"Unexpected error: {str(e)}") 113 | 114 | 115 | if __name__ == "__main__": 116 | main() 117 | -------------------------------------------------------------------------------- /tests/.env.template: -------------------------------------------------------------------------------- 1 | API_ENDPOINT= 2 | API_KEY= 3 | MODEL_ID= 4 | MANAGED_PROMPT_ARN= 5 | MANAGED_PROMPT_VARIABLE_NAME= 6 | MANAGED_PROMPT_VARIABLE_VALUE= -------------------------------------------------------------------------------- /tests/locust_load_test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import uuid 3 | from locust import HttpUser, task, between 4 | from dotenv import load_dotenv 5 | 6 | load_dotenv() 7 | 8 | base_url = os.getenv("API_ENDPOINT") 9 | api_key = os.getenv("API_KEY") 10 | 11 | 12 | class MyUser(HttpUser): 13 | host = base_url 14 | wait_time = between(0.5, 1) # Random wait time between requests 15 | 16 | @task(100) 17 | def litellm_completion(self): 18 | # no cache hits with this 19 | payload = { 20 | "model": "fake-openai-endpoint", 21 | "messages": [ 22 | { 23 | "role": "user", 24 | "content": f"{uuid.uuid4()} This is a test there will be no cache hits and we'll fill up the context" 25 | * 150, 26 | } 27 | ], 28 | } 29 | response = self.client.post("/chat/completions", json=payload) 30 | if response.status_code != 200: 31 | # log the errors in error.txt 32 | with open("error.txt", "a") as error_log: 33 | print(f"error: {response}") 34 | error_log.write(response.text + "\n") 35 | 36 | def on_start(self): 37 | self.api_key = api_key 38 | self.client.headers.update({"Authorization": f"Bearer {self.api_key}"}) 39 | -------------------------------------------------------------------------------- /tests/requirements.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | requests 3 | openai 4 | pytest-asyncio 5 | aiohttp 6 | python-dotenv 7 | boto3 8 | locust -------------------------------------------------------------------------------- /update-litellm-config.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -aeuo pipefail 3 | 4 | aws_region=$(aws ec2 describe-availability-zones --output text --query 'AvailabilityZones[0].[RegionName]') 5 | echo $aws_region 6 | 7 | # Load environment variables from .env file 8 | source .env 9 | 10 | # Check if config.yaml exists 11 | if [ ! -f "config/config.yaml" ]; then 12 | echo "config/config.yaml does not exist, can't upload to S3" 13 | exit 1 14 | fi 15 | 16 | cd litellm-terraform-stack 17 | ConfigBucketName=$(terraform output -raw ConfigBucketName) 18 | cd .. 19 | 20 | echo "uploading config.yaml to bucket $ConfigBucketName" # This was missing the closing quote 21 | 22 | # Add the actual upload command 23 | aws s3 cp config/config.yaml s3://$ConfigBucketName/config.yaml --region $aws_region 24 | 25 | echo "Upload complete" 26 | 27 | cd litellm-terraform-stack 28 | if [ "$DEPLOYMENT_PLATFORM" = "ECS" ]; then 29 | LITELLM_ECS_CLUSTER=$(terraform output -raw LitellmEcsCluster) 30 | LITELLM_ECS_TASK=$(terraform output -raw LitellmEcsTask) 31 | 32 | echo "Rebooting ECS Task $LITELLM_ECS_TASK on ECS cluster $LITELLM_ECS_CLUSTER" 33 | 34 | aws ecs update-service \ 35 | --cluster $LITELLM_ECS_CLUSTER \ 36 | --service $LITELLM_ECS_TASK \ 37 | --force-new-deployment \ 38 | --desired-count $DESIRED_CAPACITY \ 39 | --no-cli-pager 40 | fi 41 | 42 | if [ "$DEPLOYMENT_PLATFORM" = "EKS" ]; then 43 | EKS_CLUSTER_NAME=$(terraform output -raw eks_cluster_name) 44 | EKS_DEPLOYMENT_NAME=$(terraform output -raw eks_deployment_name) 45 | echo "Rebooting EKS deployment $EKS_DEPLOYMENT_NAME on EKS cluster $EKS_CLUSTER_NAME" 46 | 47 | aws eks update-kubeconfig --region $aws_region --name $EKS_CLUSTER_NAME 48 | kubectl rollout restart deployment $EKS_DEPLOYMENT_NAME 49 | fi 50 | --------------------------------------------------------------------------------