├── .env.template
├── .gitignore
├── .gitlab
    └── issue_templates
    │   ├── Default.md
    │   ├── Documentation.md
    │   └── Enhancement.md
├── .gitleaksignore
├── ATTRIBUTION.md
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── Dockerfile
├── LICENSE
├── README.md
├── config
    ├── default-config-ap-northeast-1.yaml
    ├── default-config-ap-northeast-2.yaml
    ├── default-config-ap-northeast-3.yaml
    ├── default-config-ap-south-1.yaml
    ├── default-config-ap-southeast-1.yaml
    ├── default-config-ap-southeast-2.yaml
    ├── default-config-base.yaml
    ├── default-config-ca-central-1.yaml
    ├── default-config-eu-central-1.yaml
    ├── default-config-eu-north-1.yaml
    ├── default-config-eu-west-1.yaml
    ├── default-config-eu-west-2.yaml
    ├── default-config-eu-west-3.yaml
    ├── default-config-sa-east-1.yaml
    ├── default-config-us-east-1.yaml
    ├── default-config-us-east-2.yaml
    ├── default-config-us-west-1.yaml
    ├── default-config-us-west-2.yaml
    └── details-for-models-that-need-additional-manual-config.yaml
├── create-ec2-to-access-private-load-balancer.sh
├── create-fake-llm-load-testing-server.sh
├── delete-fake-llm-load-testing-server.sh
├── deploy.sh
├── docker-build-and-deploy.sh
├── install-cloud9-prerequisites.sh
├── litellm-fake-llm-load-testing-server-terraform
    ├── docker
    │   ├── Dockerfile
    │   ├── docker-build-and-deploy.sh
    │   ├── fake_llm_server.py
    │   └── requirements.txt
    ├── main.tf
    ├── outputs.tf
    ├── providers.tf
    └── variables.tf
├── litellm-private-load-balancer-ec2-terraform
    ├── main.tf
    ├── outputs.tf
    ├── providers.tf
    └── variables.tf
├── litellm-s3-log-bucket-terraform
    ├── outputs.tf
    ├── provider.tf
    ├── s3.tf
    └── variables.tf
├── litellm-terraform-stack
    ├── main.tf
    ├── modules
    │   ├── base
    │   │   ├── ecr.tf
    │   │   ├── iam.tf
    │   │   ├── locals.tf
    │   │   ├── network.tf
    │   │   ├── outputs.tf
    │   │   ├── rds.tf
    │   │   ├── redis.tf
    │   │   ├── route53.tf
    │   │   ├── s3.tf
    │   │   ├── secrets-manager.tf
    │   │   ├── variables.tf
    │   │   ├── vpc-endpoints.tf
    │   │   └── waf.tf
    │   ├── ecs
    │   │   ├── alb.tf
    │   │   ├── cloudfront.tf
    │   │   ├── cloudwatch.tf
    │   │   ├── ecs.tf
    │   │   ├── iam.tf
    │   │   ├── outputs.tf
    │   │   ├── route53.tf
    │   │   ├── s3.tf
    │   │   ├── secrets-manager.tf
    │   │   ├── security-groups.tf
    │   │   ├── variables.tf
    │   │   └── waf.tf
    │   └── eks
    │   │   ├── eks.tf
    │   │   ├── iam.tf
    │   │   ├── kms.tf
    │   │   ├── main.tf
    │   │   ├── outputs.tf
    │   │   ├── route53.tf
    │   │   ├── variables.tf
    │   │   └── versions.tf
    ├── outputs.tf
    ├── providers.tf
    └── variables.tf
├── media
    ├── Gateway latest architecture with CloudFront.pptx
    ├── Gateway-Architecture-with-CloudFront.png
    ├── Reference_architecture_ECS_EKS_platform_combined.jpg
    ├── Required-EKS-Add-ons.png
    ├── Tested-Bring-Your-Own-EKS-Cluster-Configuration.png
    └── architecture.png
├── middleware
    ├── Dockerfile
    ├── app.py
    ├── docker-build-and-deploy.sh
    └── requirements.txt
├── scripts
    ├── .env.template
    ├── benchmark.py
    └── requirements.txt
├── test-middleware-streaming.py
├── test-middleware-synchronous.py
├── tests
    ├── .env.template
    ├── bedrock_chat_test_file.py
    ├── locust_load_test.py
    ├── management_apis_test_file.py
    ├── openai_chat_test_file.py
    └── requirements.txt
├── undeploy.sh
└── update-litellm-config.sh


/.env.template:
--------------------------------------------------------------------------------
 1 | # LITELLM_VERSION eg: main-v1.56.5
 2 | # Get it from https://github.com/berriai/litellm/pkgs/container/litellm/versions?filters%5Bversion_type%5D=tagged
 3 | LITELLM_VERSION="litellm_stable_release_branch-v1.63.2-stable"
 4 | TERRAFORM_S3_BUCKET_NAME="" #Must be globally unique
 5 | BUILD_FROM_SOURCE="false"
 6 | HOSTED_ZONE_NAME=""
 7 | CREATE_PRIVATE_HOSTED_ZONE_IN_EXISTING_VPC="false"
 8 | RECORD_NAME=""
 9 | CERTIFICATE_ARN=""
10 | OKTA_ISSUER=""
11 | OKTA_AUDIENCE="api://default"
12 | OPENAI_API_KEY="placeholder"
13 | AZURE_OPENAI_API_KEY="placeholder"
14 | AZURE_API_KEY="placeholder"
15 | ANTHROPIC_API_KEY="placeholder"
16 | GROQ_API_KEY="placeholder"
17 | COHERE_API_KEY="placeholder"
18 | CO_API_KEY="placeholder"
19 | HF_TOKEN="placeholder"
20 | HUGGINGFACE_API_KEY="placeholder"
21 | DATABRICKS_API_KEY="placeholder"
22 | GEMINI_API_KEY="placeholder"
23 | CODESTRAL_API_KEY="placeholder"
24 | MISTRAL_API_KEY="placeholder"
25 | AZURE_AI_API_KEY="placeholder"
26 | NVIDIA_NIM_API_KEY="placeholder"
27 | XAI_API_KEY="placeholder"
28 | PERPLEXITYAI_API_KEY="placeholder"
29 | GITHUB_API_KEY="placeholder"
30 | DEEPSEEK_API_KEY="placeholder"
31 | AI21_API_KEY="placeholder"
32 | LANGSMITH_API_KEY=""
33 | LANGSMITH_PROJECT=""
34 | LANGSMITH_DEFAULT_RUN_NAME=""
35 | DEPLOYMENT_PLATFORM="ECS"
36 | EXISTING_VPC_ID=""
37 | EXISTING_EKS_CLUSTER_NAME=""
38 | DISABLE_OUTBOUND_NETWORK_ACCESS="false"
39 | CREATE_VPC_ENDPOINTS_IN_EXISTING_VPC="false"
40 | INSTALL_ADD_ONS_IN_EXISTING_EKS_CLUSTER="false"
41 | DESIRED_CAPACITY="2" #Number of ECS or EKS instances to run by default (for horizontal scaling)
42 | MIN_CAPACITY="2"
43 | MAX_CAPACITY="4"
44 | ECS_CPU_TARGET_UTILIZATION_PERCENTAGE="50"
45 | ECS_MEMORY_TARGET_UTILIZATION_PERCENTAGE="40"
46 | ECS_VCPUS="2"
47 | EKS_ARM_INSTANCE_TYPE="t4g.medium"
48 | EKS_X86_INSTANCE_TYPE="t3.medium"
49 | EKS_ARM_AMI_TYPE="AL2_ARM_64"
50 | EKS_X86_AMI_TYPE="AL2_x86_64"
51 | CPU_ARCHITECTURE="" #If empty, defaults to the architecture of your deployment machine "x86" or "arm"
52 | PUBLIC_LOAD_BALANCER="true"
53 | RDS_INSTANCE_CLASS="db.t3.small"
54 | RDS_ALLOCATED_STORAGE_GB="20"
55 | REDIS_NODE_TYPE="cache.t3.micro"
56 | REDIS_NUM_CACHE_CLUSTERS="2" #Number of cache clusters (primary and replicas) the replication group will have
57 | EC2_KEY_PAIR_NAME=""
58 | DISABLE_SWAGGER_PAGE="false"
59 | DISABLE_ADMIN_UI="false"
60 | LANGFUSE_PUBLIC_KEY=""
61 | LANGFUSE_SECRET_KEY=""
62 | LANGFUSE_HOST="" # Optional, defaults to https://cloud.langfuse.com
63 | FAKE_LLM_LOAD_TESTING_ENDPOINT_CERTIFICATE_ARN=""
64 | FAKE_LLM_LOAD_TESTING_ENDPOINT_HOSTED_ZONE_NAME=""
65 | FAKE_LLM_LOAD_TESTING_ENDPOINT_RECORD_NAME=""
66 | 
67 | # CloudFront and Route53 Configuration
68 | USE_ROUTE53="false"
69 | USE_CLOUDFRONT="true"
70 | CLOUDFRONT_PRICE_CLASS="PriceClass_100"
71 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .env
 2 | config/config.yaml
 3 | config/local-config.yaml
 4 | 
 5 | .*.sw?
 6 | **/.DS_Store
 7 | .idea/
 8 | .aws-sam/
 9 | .vscode/settings.json
10 | **/.vim/
11 | 
12 | *.js
13 | !jest.config.js
14 | *.d.ts
15 | node_modules
16 | 
17 | cdk.out
18 | cdk.context.json
19 | litellm-source
20 | 
21 | litellm-cdk/resources.txt
22 | 
23 | **/outputs.json
24 | 
25 | **/myenv/
26 | **/fresh_venv/
27 | *.pyc
28 | .terraform.lock.hcl
29 | .terraform
30 | terraform.tfstate
31 | terraform.tfstate.backup
32 | .terraform.tfstate.lock.info
33 | terraform.tfstate.*
34 | 
35 | **.env.testing
36 | **error.txt
37 | **.env
38 | sessionmanager-bundle.zip
39 | sessionmanager-bundle
40 | backend.hcl
41 | resources.txt
42 | errored.tfstate


--------------------------------------------------------------------------------
/.gitlab/issue_templates/Default.md:
--------------------------------------------------------------------------------
 1 | ## Summary
 2 | 
 3 | (Summarize the bug encountered concisely)
 4 | 
 5 | ## Steps to reproduce
 6 | 
 7 | (How one can reproduce the issue - this is very important)
 8 | 
 9 | ## Example Project
10 | 
11 | (If possible, create an example project here on GitLab.com that exhibits the problematic
12 | behavior, and link to it here in the bug report.
13 | If you are using an older version of GitLab, this will also determine whether the bug has been fixed
14 | in a more recent version)
15 | 
16 | ## What is the current bug behavior?
17 | 
18 | (What actually happens)
19 | 
20 | ## What is the expected correct behavior?
21 | 
22 | (What you should see instead)
23 | 
24 | ## Relevant logs and/or screenshots
25 | 
26 | (Paste any relevant logs - use code blocks (```) to format console output, logs, and code, as
27 | it's very hard to read otherwise.)
28 | 
29 | ## Possible fixes
30 | 
31 | (If you can, link to the line of code that might be responsible for the problem)


--------------------------------------------------------------------------------
/.gitlab/issue_templates/Documentation.md:
--------------------------------------------------------------------------------
1 | ## Summary  
2 | (Summarize the issue and why it is wrong, confusing or misleading)  
3 | 
4 | ## Link to Document  
5 | (The document should be public facing such as the README.md)
6 |  
7 | ## Suggested Change  
8 | (Please suggest a change)


--------------------------------------------------------------------------------
/.gitlab/issue_templates/Enhancement.md:
--------------------------------------------------------------------------------
 1 | Please complete as many of the following sections as possible. 
 2 | 
 3 | ## Title
 4 | [Concise title of the enhancement]
 5 | 
 6 | ## Author(s)
 7 | [Name(s) of the author(s) proposing the enhancement]
 8 | 
 9 | ## Status
10 | [Draft/In Review/Approved/Rejected/Implemented]
11 | 
12 | ## Summary
13 | [Brief overview of the proposed enhancement (1-2 sentences)]
14 | 
15 | ## Motivation
16 | [Explain why this enhancement is needed and what problems it solves]
17 | 
18 | ## Proposal
19 | [Detailed description of the proposed enhancement]
20 | 
21 | ### User Experience
22 | [Describe how this enhancement will affect the user experience]
23 | 
24 | ### Technical Implementation
25 | [Provide technical details on how this enhancement could be implemented]
26 | 
27 | ## Alternatives Considered
28 | [List any alternative solutions or features you've considered]
29 | 
30 | ## Benefits
31 | [Outline the benefits of implementing this enhancement]
32 | 
33 | ## Drawbacks
34 | [Discuss any potential drawbacks or challenges]
35 | 
36 | ## Required Resources
37 | [Estimate the resources (time, personnel, etc.) required to implement this enhancement]
38 | 
39 | ## Dependencies
40 | [List any dependencies or prerequisites for this enhancement]
41 | 
42 | ## Testing Plan
43 | [Describe how this enhancement will be tested]
44 | 
45 | ## Rollout Plan
46 | [Explain how this enhancement will be rolled out to users]
47 | 
48 | ## Documentation
49 | [Outline any documentation updates required for this enhancement]
50 | 
51 | ## Open Questions
52 | [List any unresolved questions or areas that need further discussion]
53 | 
54 | ## References
55 | [Include any relevant links, issues, or external resources]
56 | 
57 | /label enhancement


--------------------------------------------------------------------------------
/.gitleaksignore:
--------------------------------------------------------------------------------
1 | 0cb0c568dc995ce4a6220278a0cf5d76828e0ae9:README.md:generic-api-key:840
2 | dc45f9723bd40324f1bb95568b71948e685dcb10:litellm-cdk/lib/litellm-cdk-stack.ts:generic-api-key:213
3 | 15bd509e996cd7c79e8dc717d1e19a0c66b070ca:tests/openai_chat_test_file.py:generic-api-key:236
4 | 78062c1f9dcc9b68243bbcaf5302676583b760cb:README.md:generic-api-key:347
5 | 1d4f9615f50affba757f297c13360ebc590d88cb:README.md:generic-api-key:677


--------------------------------------------------------------------------------
/ATTRIBUTION.md:
--------------------------------------------------------------------------------
1 | This software uses the following open source libraries, codes and fonts:
2 | 
3 | ## LiteLLM
4 | - License: [MIT License](https://github.com/BerriAI/litellm/blob/main/LICENSE)
5 | - Homepage: https://www.litellm.ai/


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.
5 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guidelines
 2 | 
 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
 4 | documentation, we greatly value feedback and contributions from our community.
 5 | 
 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
 7 | information to effectively respond to your bug report or contribution.
 8 | 
 9 | 
10 | ## Reporting Bugs/Feature Requests
11 | 
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 | 
14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 | 
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 | 
22 | 
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 | 
26 | 1. You are working against the latest source on the *main* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 | 
30 | To send us a pull request, please:
31 | 
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 | 
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 | 
42 | 
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start.
45 | 
46 | 
47 | ## Code of Conduct
48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
50 | opensource-codeofconduct@amazon.com with any additional questions or comments.
51 | 
52 | 
53 | ## Security issue notifications
54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
55 | 
56 | 
57 | ## Licensing
58 | 
59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.
60 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | ARG LITELLM_VERSION=latest
2 | FROM ghcr.io/berriai/litellm:${LITELLM_VERSION}
3 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT No Attribution
 2 | 
 3 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 6 | this software and associated documentation files (the "Software"), to deal in
 7 | the Software without restriction, including without limitation the rights to
 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 9 | the Software, and to permit persons to whom the Software is furnished to do so.
10 | 
11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
13 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
14 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
15 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
16 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/config/default-config-ap-northeast-1.yaml:
--------------------------------------------------------------------------------
 1 | model_list:
 2 | #Bedrock Models
 3 |   - model_name: amazon.titan-text-express-v1
 4 |     litellm_params:
 5 |       model: bedrock/amazon.titan-text-express-v1
 6 | 
 7 |   - model_name: amazon.titan-embed-text-v1
 8 |     litellm_params:
 9 |       model: bedrock/amazon.titan-embed-text-v1
10 |       drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client
11 | 
12 |   - model_name: amazon.titan-embed-text-v2:0
13 |     litellm_params:
14 |       model: bedrock/amazon.titan-embed-text-v2:0
15 |       drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client
16 | 
17 |   - model_name: amazon.rerank-v1:0
18 |     litellm_params:
19 |       model: bedrock/amazon.rerank-v1:0
20 | 
21 |   - model_name: anthropic.claude-3-haiku-20240307-v1:0
22 |     litellm_params:
23 |       model: bedrock/anthropic.claude-3-haiku-20240307-v1:0
24 | 
25 |   - model_name: anthropic.claude-3-5-sonnet-20240620-v1:0
26 |     litellm_params:
27 |       model: bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0
28 | 
29 |   - model_name: cohere.embed-english-v3
30 |     litellm_params:
31 |       model: bedrock/cohere.embed-english-v3
32 |       drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client
33 | 
34 |   - model_name: cohere.embed-multilingual-v3
35 |     litellm_params:
36 |       model: bedrock/cohere.embed-multilingual-v3
37 | 
38 |   - model_name: cohere.rerank-v3-5:0
39 |     litellm_params:
40 |       model: bedrock/cohere.rerank-v3-5:0
41 | 
42 |   - model_name: apac.anthropic.claude-3-sonnet-20240229-v1:0
43 |     litellm_params:
44 |       model: bedrock/apac.anthropic.claude-3-sonnet-20240229-v1:0
45 | 
46 |   - model_name: apac.anthropic.claude-3-5-sonnet-20240620-v1:0
47 |     litellm_params:
48 |       model: bedrock/apac.anthropic.claude-3-5-sonnet-20240620-v1:0
49 | 
50 |   - model_name: apac.anthropic.claude-3-haiku-20240307-v1:0
51 |     litellm_params:
52 |       model: bedrock/apac.anthropic.claude-3-haiku-20240307-v1:0
53 | 
54 |   - model_name: apac.anthropic.claude-3-5-sonnet-20241022-v2:0
55 |     litellm_params:
56 |       model: bedrock/apac.anthropic.claude-3-5-sonnet-20241022-v2:0
57 | 
58 | 


--------------------------------------------------------------------------------
/config/default-config-ap-northeast-2.yaml:
--------------------------------------------------------------------------------
 1 | model_list:
 2 | #Bedrock Models
 3 |   - model_name: amazon.titan-embed-text-v2:0
 4 |     litellm_params:
 5 |       model: bedrock/amazon.titan-embed-text-v2:0
 6 |       drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client
 7 | 
 8 |   - model_name: anthropic.claude-3-5-sonnet-20240620-v1:0
 9 |     litellm_params:
10 |       model: bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0
11 | 
12 |   - model_name: anthropic.claude-3-haiku-20240307-v1:0
13 |     litellm_params:
14 |       model: bedrock/anthropic.claude-3-haiku-20240307-v1:0
15 | 
16 |   - model_name: apac.anthropic.claude-3-sonnet-20240229-v1:0
17 |     litellm_params:
18 |       model: bedrock/apac.anthropic.claude-3-sonnet-20240229-v1:0
19 | 
20 |   - model_name: apac.anthropic.claude-3-5-sonnet-20240620-v1:0
21 |     litellm_params:
22 |       model: bedrock/apac.anthropic.claude-3-5-sonnet-20240620-v1:0
23 | 
24 |   - model_name: apac.anthropic.claude-3-haiku-20240307-v1:0
25 |     litellm_params:
26 |       model: bedrock/apac.anthropic.claude-3-haiku-20240307-v1:0
27 | 
28 |   - model_name: apac.anthropic.claude-3-5-sonnet-20241022-v2:0
29 |     litellm_params:
30 |       model: bedrock/apac.anthropic.claude-3-5-sonnet-20241022-v2:0
31 | 
32 | 


--------------------------------------------------------------------------------
/config/default-config-ap-northeast-3.yaml:
--------------------------------------------------------------------------------
1 | model_list:
2 | #Bedrock Models
3 |   - model_name: apac.anthropic.claude-3-5-sonnet-20241022-v2:0
4 |     litellm_params:
5 |       model: bedrock/apac.anthropic.claude-3-5-sonnet-20241022-v2:0
6 | 
7 | 


--------------------------------------------------------------------------------
/config/default-config-ap-south-1.yaml:
--------------------------------------------------------------------------------
 1 | model_list:
 2 | #Bedrock Models
 3 |   - model_name: amazon.titan-text-lite-v1
 4 |     litellm_params:
 5 |       model: bedrock/amazon.titan-text-lite-v1
 6 | 
 7 |   - model_name: amazon.titan-text-express-v1
 8 |     litellm_params:
 9 |       model: bedrock/amazon.titan-text-express-v1
10 | 
11 |   - model_name: amazon.titan-embed-image-v1
12 |     litellm_params:
13 |       model: bedrock/amazon.titan-embed-image-v1
14 |       drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client
15 | 
16 |   - model_name: amazon.titan-image-generator-v1
17 |     litellm_params:
18 |       model: bedrock/amazon.titan-image-generator-v1
19 | 
20 |   - model_name: amazon.titan-embed-text-v2:0
21 |     litellm_params:
22 |       model: bedrock/amazon.titan-embed-text-v2:0
23 |       drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client
24 | 
25 |   - model_name: anthropic.claude-3-sonnet-20240229-v1:0
26 |     litellm_params:
27 |       model: bedrock/anthropic.claude-3-sonnet-20240229-v1:0
28 | 
29 |   - model_name: anthropic.claude-3-haiku-20240307-v1:0
30 |     litellm_params:
31 |       model: bedrock/anthropic.claude-3-haiku-20240307-v1:0
32 | 
33 |   - model_name: cohere.embed-english-v3
34 |     litellm_params:
35 |       model: bedrock/cohere.embed-english-v3
36 |       drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client
37 | 
38 |   - model_name: cohere.embed-multilingual-v3
39 |     litellm_params:
40 |       model: bedrock/cohere.embed-multilingual-v3
41 | 
42 |   - model_name: meta.llama3-8b-instruct-v1:0
43 |     litellm_params:
44 |       model: bedrock/meta.llama3-8b-instruct-v1:0
45 | 
46 |   - model_name: meta.llama3-70b-instruct-v1:0
47 |     litellm_params:
48 |       model: bedrock/meta.llama3-70b-instruct-v1:0
49 | 
50 |   - model_name: mistral.mistral-7b-instruct-v0:2
51 |     litellm_params:
52 |       model: bedrock/mistral.mistral-7b-instruct-v0:2
53 | 
54 |   - model_name: mistral.mixtral-8x7b-instruct-v0:1
55 |     litellm_params:
56 |       model: bedrock/mistral.mixtral-8x7b-instruct-v0:1
57 | 
58 |   - model_name: mistral.mistral-large-2402-v1:0
59 |     litellm_params:
60 |       model: bedrock/mistral.mistral-large-2402-v1:0
61 | 
62 |   - model_name: apac.anthropic.claude-3-5-sonnet-20240620-v1:0
63 |     litellm_params:
64 |       model: bedrock/apac.anthropic.claude-3-5-sonnet-20240620-v1:0
65 | 
66 |   - model_name: apac.anthropic.claude-3-sonnet-20240229-v1:0
67 |     litellm_params:
68 |       model: bedrock/apac.anthropic.claude-3-sonnet-20240229-v1:0
69 | 
70 |   - model_name: apac.anthropic.claude-3-haiku-20240307-v1:0
71 |     litellm_params:
72 |       model: bedrock/apac.anthropic.claude-3-haiku-20240307-v1:0
73 | 
74 |   - model_name: apac.anthropic.claude-3-5-sonnet-20241022-v2:0
75 |     litellm_params:
76 |       model: bedrock/apac.anthropic.claude-3-5-sonnet-20241022-v2:0
77 | 
78 | 


--------------------------------------------------------------------------------
/config/default-config-ap-southeast-1.yaml:
--------------------------------------------------------------------------------
 1 | model_list:
 2 | #Bedrock Models
 3 |   - model_name: anthropic.claude-3-haiku-20240307-v1:0
 4 |     litellm_params:
 5 |       model: bedrock/anthropic.claude-3-haiku-20240307-v1:0
 6 | 
 7 |   - model_name: anthropic.claude-3-5-sonnet-20240620-v1:0
 8 |     litellm_params:
 9 |       model: bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0
10 | 
11 |   - model_name: cohere.embed-english-v3
12 |     litellm_params:
13 |       model: bedrock/cohere.embed-english-v3
14 |       drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client
15 | 
16 |   - model_name: cohere.embed-multilingual-v3
17 |     litellm_params:
18 |       model: bedrock/cohere.embed-multilingual-v3
19 | 
20 |   - model_name: apac.anthropic.claude-3-5-sonnet-20240620-v1:0
21 |     litellm_params:
22 |       model: bedrock/apac.anthropic.claude-3-5-sonnet-20240620-v1:0
23 | 
24 |   - model_name: apac.anthropic.claude-3-sonnet-20240229-v1:0
25 |     litellm_params:
26 |       model: bedrock/apac.anthropic.claude-3-sonnet-20240229-v1:0
27 | 
28 |   - model_name: apac.anthropic.claude-3-haiku-20240307-v1:0
29 |     litellm_params:
30 |       model: bedrock/apac.anthropic.claude-3-haiku-20240307-v1:0
31 | 
32 |   - model_name: apac.anthropic.claude-3-5-sonnet-20241022-v2:0
33 |     litellm_params:
34 |       model: bedrock/apac.anthropic.claude-3-5-sonnet-20241022-v2:0
35 | 
36 | 


--------------------------------------------------------------------------------
/config/default-config-ap-southeast-2.yaml:
--------------------------------------------------------------------------------
 1 | model_list:
 2 | #Bedrock Models
 3 |   - model_name: amazon.titan-text-lite-v1
 4 |     litellm_params:
 5 |       model: bedrock/amazon.titan-text-lite-v1
 6 | 
 7 |   - model_name: amazon.titan-text-express-v1
 8 |     litellm_params:
 9 |       model: bedrock/amazon.titan-text-express-v1
10 | 
11 |   - model_name: amazon.titan-embed-image-v1
12 |     litellm_params:
13 |       model: bedrock/amazon.titan-embed-image-v1
14 |       drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client
15 | 
16 |   - model_name: amazon.titan-embed-text-v2:0
17 |     litellm_params:
18 |       model: bedrock/amazon.titan-embed-text-v2:0
19 |       drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client
20 | 
21 |   - model_name: anthropic.claude-3-sonnet-20240229-v1:0
22 |     litellm_params:
23 |       model: bedrock/anthropic.claude-3-sonnet-20240229-v1:0
24 | 
25 |   - model_name: anthropic.claude-3-haiku-20240307-v1:0
26 |     litellm_params:
27 |       model: bedrock/anthropic.claude-3-haiku-20240307-v1:0
28 | 
29 |   - model_name: anthropic.claude-3-5-sonnet-20241022-v2:0
30 |     litellm_params:
31 |       model: bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0
32 | 
33 |   - model_name: cohere.embed-english-v3
34 |     litellm_params:
35 |       model: bedrock/cohere.embed-english-v3
36 |       drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client
37 | 
38 |   - model_name: cohere.embed-multilingual-v3
39 |     litellm_params:
40 |       model: bedrock/cohere.embed-multilingual-v3
41 | 
42 |   - model_name: mistral.mistral-7b-instruct-v0:2
43 |     litellm_params:
44 |       model: bedrock/mistral.mistral-7b-instruct-v0:2
45 | 
46 |   - model_name: mistral.mixtral-8x7b-instruct-v0:1
47 |     litellm_params:
48 |       model: bedrock/mistral.mixtral-8x7b-instruct-v0:1
49 | 
50 |   - model_name: mistral.mistral-large-2402-v1:0
51 |     litellm_params:
52 |       model: bedrock/mistral.mistral-large-2402-v1:0
53 | 
54 |   - model_name: apac.anthropic.claude-3-sonnet-20240229-v1:0
55 |     litellm_params:
56 |       model: bedrock/apac.anthropic.claude-3-sonnet-20240229-v1:0
57 | 
58 |   - model_name: apac.anthropic.claude-3-5-sonnet-20240620-v1:0
59 |     litellm_params:
60 |       model: bedrock/apac.anthropic.claude-3-5-sonnet-20240620-v1:0
61 | 
62 |   - model_name: apac.anthropic.claude-3-haiku-20240307-v1:0
63 |     litellm_params:
64 |       model: bedrock/apac.anthropic.claude-3-haiku-20240307-v1:0
65 | 
66 |   - model_name: apac.anthropic.claude-3-5-sonnet-20241022-v2:0
67 |     litellm_params:
68 |       model: bedrock/apac.anthropic.claude-3-5-sonnet-20241022-v2:0
69 | 
70 | 


--------------------------------------------------------------------------------
/config/default-config-ca-central-1.yaml:
--------------------------------------------------------------------------------
 1 | model_list:
 2 | #Bedrock Models
 3 |   - model_name: amazon.titan-text-lite-v1
 4 |     litellm_params:
 5 |       model: bedrock/amazon.titan-text-lite-v1
 6 | 
 7 |   - model_name: amazon.titan-text-express-v1
 8 |     litellm_params:
 9 |       model: bedrock/amazon.titan-text-express-v1
10 | 
11 |   - model_name: amazon.titan-embed-image-v1
12 |     litellm_params:
13 |       model: bedrock/amazon.titan-embed-image-v1
14 |       drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client
15 | 
16 |   - model_name: amazon.titan-embed-text-v2:0
17 |     litellm_params:
18 |       model: bedrock/amazon.titan-embed-text-v2:0
19 |       drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client
20 | 
21 |   - model_name: amazon.rerank-v1:0
22 |     litellm_params:
23 |       model: bedrock/amazon.rerank-v1:0
24 | 
25 |   - model_name: anthropic.claude-3-sonnet-20240229-v1:0
26 |     litellm_params:
27 |       model: bedrock/anthropic.claude-3-sonnet-20240229-v1:0
28 | 
29 |   - model_name: anthropic.claude-3-haiku-20240307-v1:0
30 |     litellm_params:
31 |       model: bedrock/anthropic.claude-3-haiku-20240307-v1:0
32 | 
33 |   - model_name: cohere.embed-english-v3
34 |     litellm_params:
35 |       model: bedrock/cohere.embed-english-v3
36 |       drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client
37 | 
38 |   - model_name: cohere.embed-multilingual-v3
39 |     litellm_params:
40 |       model: bedrock/cohere.embed-multilingual-v3
41 | 
42 |   - model_name: cohere.rerank-v3-5:0
43 |     litellm_params:
44 |       model: bedrock/cohere.rerank-v3-5:0
45 | 
46 |   - model_name: meta.llama3-8b-instruct-v1:0
47 |     litellm_params:
48 |       model: bedrock/meta.llama3-8b-instruct-v1:0
49 | 
50 |   - model_name: meta.llama3-70b-instruct-v1:0
51 |     litellm_params:
52 |       model: bedrock/meta.llama3-70b-instruct-v1:0
53 | 
54 |   - model_name: mistral.mistral-7b-instruct-v0:2
55 |     litellm_params:
56 |       model: bedrock/mistral.mistral-7b-instruct-v0:2
57 | 
58 |   - model_name: mistral.mixtral-8x7b-instruct-v0:1
59 |     litellm_params:
60 |       model: bedrock/mistral.mixtral-8x7b-instruct-v0:1
61 | 
62 |   - model_name: mistral.mistral-large-2402-v1:0
63 |     litellm_params:
64 |       model: bedrock/mistral.mistral-large-2402-v1:0
65 | 
66 | 


--------------------------------------------------------------------------------
/config/default-config-eu-central-1.yaml:
--------------------------------------------------------------------------------
 1 | model_list:
 2 | #Bedrock Models
 3 |   - model_name: amazon.titan-text-express-v1
 4 |     litellm_params:
 5 |       model: bedrock/amazon.titan-text-express-v1
 6 | 
 7 |   - model_name: amazon.titan-text-lite-v1
 8 |     litellm_params:
 9 |       model: bedrock/amazon.titan-text-lite-v1
10 | 
11 |   - model_name: amazon.titan-embed-text-v1
12 |     litellm_params:
13 |       model: bedrock/amazon.titan-embed-text-v1
14 |       drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client
15 | 
16 |   - model_name: amazon.titan-embed-image-v1
17 |     litellm_params:
18 |       model: bedrock/amazon.titan-embed-image-v1
19 |       drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client
20 | 
21 |   - model_name: amazon.titan-embed-text-v2:0
22 |     litellm_params:
23 |       model: bedrock/amazon.titan-embed-text-v2:0
24 |       drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client
25 | 
26 |   - model_name: amazon.rerank-v1:0
27 |     litellm_params:
28 |       model: bedrock/amazon.rerank-v1:0
29 | 
30 |   - model_name: anthropic.claude-3-haiku-20240307-v1:0
31 |     litellm_params:
32 |       model: bedrock/anthropic.claude-3-haiku-20240307-v1:0
33 | 
34 |   - model_name: anthropic.claude-3-5-sonnet-20240620-v1:0
35 |     litellm_params:
36 |       model: bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0
37 | 
38 |   - model_name: cohere.embed-english-v3
39 |     litellm_params:
40 |       model: bedrock/cohere.embed-english-v3
41 |       drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client
42 | 
43 |   - model_name: cohere.embed-multilingual-v3
44 |     litellm_params:
45 |       model: bedrock/cohere.embed-multilingual-v3
46 | 
47 |   - model_name: cohere.rerank-v3-5:0
48 |     litellm_params:
49 |       model: bedrock/cohere.rerank-v3-5:0
50 | 
51 |   - model_name: eu.anthropic.claude-3-sonnet-20240229-v1:0
52 |     litellm_params:
53 |       model: bedrock/eu.anthropic.claude-3-sonnet-20240229-v1:0
54 | 
55 |   - model_name: eu.anthropic.claude-3-5-sonnet-20240620-v1:0
56 |     litellm_params:
57 |       model: bedrock/eu.anthropic.claude-3-5-sonnet-20240620-v1:0
58 | 
59 |   - model_name: eu.anthropic.claude-3-haiku-20240307-v1:0
60 |     litellm_params:
61 |       model: bedrock/eu.anthropic.claude-3-haiku-20240307-v1:0
62 | 
63 |   - model_name: eu.meta.llama3-2-3b-instruct-v1:0
64 |     litellm_params:
65 |       model: bedrock/eu.meta.llama3-2-3b-instruct-v1:0
66 | 
67 |   - model_name: eu.meta.llama3-2-1b-instruct-v1:0
68 |     litellm_params:
69 |       model: bedrock/eu.meta.llama3-2-1b-instruct-v1:0
70 | 
71 | 


--------------------------------------------------------------------------------
/config/default-config-eu-north-1.yaml:
--------------------------------------------------------------------------------
1 | model_list:
2 | #Bedrock Models
3 |   - model_name: amazon.titan-embed-text-v2:0
4 |     litellm_params:
5 |       model: bedrock/amazon.titan-embed-text-v2:0
6 |       drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client
7 | 
8 | 


--------------------------------------------------------------------------------
/config/default-config-eu-west-1.yaml:
--------------------------------------------------------------------------------
 1 | model_list:
 2 | #Bedrock Models
 3 |   - model_name: amazon.titan-text-lite-v1
 4 |     litellm_params:
 5 |       model: bedrock/amazon.titan-text-lite-v1
 6 | 
 7 |   - model_name: amazon.titan-text-express-v1
 8 |     litellm_params:
 9 |       model: bedrock/amazon.titan-text-express-v1
10 | 
11 |   - model_name: amazon.titan-embed-image-v1
12 |     litellm_params:
13 |       model: bedrock/amazon.titan-embed-image-v1
14 |       drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client
15 | 
16 |   - model_name: amazon.titan-embed-text-v2:0
17 |     litellm_params:
18 |       model: bedrock/amazon.titan-embed-text-v2:0
19 |       drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client
20 | 
21 |   - model_name: amazon.titan-image-generator-v1
22 |     litellm_params:
23 |       model: bedrock/amazon.titan-image-generator-v1
24 | 
25 |   - model_name: anthropic.claude-3-haiku-20240307-v1:0
26 |     litellm_params:
27 |       model: bedrock/anthropic.claude-3-haiku-20240307-v1:0
28 | 
29 |   - model_name: cohere.embed-english-v3
30 |     litellm_params:
31 |       model: bedrock/cohere.embed-english-v3
32 |       drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client
33 | 
34 |   - model_name: cohere.embed-multilingual-v3
35 |     litellm_params:
36 |       model: bedrock/cohere.embed-multilingual-v3
37 | 
38 |   - model_name: mistral.mistral-7b-instruct-v0:2
39 |     litellm_params:
40 |       model: bedrock/mistral.mistral-7b-instruct-v0:2
41 | 
42 |   - model_name: mistral.mixtral-8x7b-instruct-v0:1
43 |     litellm_params:
44 |       model: bedrock/mistral.mixtral-8x7b-instruct-v0:1
45 | 
46 |   - model_name: mistral.mistral-large-2402-v1:0
47 |     litellm_params:
48 |       model: bedrock/mistral.mistral-large-2402-v1:0
49 | 
50 |   - model_name: eu.anthropic.claude-3-sonnet-20240229-v1:0
51 |     litellm_params:
52 |       model: bedrock/eu.anthropic.claude-3-sonnet-20240229-v1:0
53 | 
54 |   - model_name: eu.anthropic.claude-3-haiku-20240307-v1:0
55 |     litellm_params:
56 |       model: bedrock/eu.anthropic.claude-3-haiku-20240307-v1:0
57 | 
58 |   - model_name: eu.anthropic.claude-3-5-sonnet-20240620-v1:0
59 |     litellm_params:
60 |       model: bedrock/eu.anthropic.claude-3-5-sonnet-20240620-v1:0
61 | 
62 |   - model_name: eu.meta.llama3-2-3b-instruct-v1:0
63 |     litellm_params:
64 |       model: bedrock/eu.meta.llama3-2-3b-instruct-v1:0
65 | 
66 |   - model_name: eu.meta.llama3-2-1b-instruct-v1:0
67 |     litellm_params:
68 |       model: bedrock/eu.meta.llama3-2-1b-instruct-v1:0
69 | 
70 | 


--------------------------------------------------------------------------------
/config/default-config-eu-west-2.yaml:
--------------------------------------------------------------------------------
 1 | model_list:
 2 | #Bedrock Models
 3 |   - model_name: amazon.titan-text-lite-v1
 4 |     litellm_params:
 5 |       model: bedrock/amazon.titan-text-lite-v1
 6 | 
 7 |   - model_name: amazon.titan-text-express-v1
 8 |     litellm_params:
 9 |       model: bedrock/amazon.titan-text-express-v1
10 | 
11 |   - model_name: amazon.titan-embed-image-v1
12 |     litellm_params:
13 |       model: bedrock/amazon.titan-embed-image-v1
14 |       drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client
15 | 
16 |   - model_name: amazon.titan-image-generator-v1
17 |     litellm_params:
18 |       model: bedrock/amazon.titan-image-generator-v1
19 | 
20 |   - model_name: amazon.titan-embed-text-v2:0
21 |     litellm_params:
22 |       model: bedrock/amazon.titan-embed-text-v2:0
23 |       drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client
24 | 
25 |   - model_name: anthropic.claude-3-sonnet-20240229-v1:0
26 |     litellm_params:
27 |       model: bedrock/anthropic.claude-3-sonnet-20240229-v1:0
28 | 
29 |   - model_name: anthropic.claude-3-haiku-20240307-v1:0
30 |     litellm_params:
31 |       model: bedrock/anthropic.claude-3-haiku-20240307-v1:0
32 | 
33 |   - model_name: cohere.embed-english-v3
34 |     litellm_params:
35 |       model: bedrock/cohere.embed-english-v3
36 |       drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client
37 | 
38 |   - model_name: cohere.embed-multilingual-v3
39 |     litellm_params:
40 |       model: bedrock/cohere.embed-multilingual-v3
41 | 
42 |   - model_name: meta.llama3-8b-instruct-v1:0
43 |     litellm_params:
44 |       model: bedrock/meta.llama3-8b-instruct-v1:0
45 | 
46 |   - model_name: meta.llama3-70b-instruct-v1:0
47 |     litellm_params:
48 |       model: bedrock/meta.llama3-70b-instruct-v1:0
49 | 
50 |   - model_name: mistral.mistral-7b-instruct-v0:2
51 |     litellm_params:
52 |       model: bedrock/mistral.mistral-7b-instruct-v0:2
53 | 
54 |   - model_name: mistral.mixtral-8x7b-instruct-v0:1
55 |     litellm_params:
56 |       model: bedrock/mistral.mixtral-8x7b-instruct-v0:1
57 | 
58 |   - model_name: mistral.mistral-large-2402-v1:0
59 |     litellm_params:
60 |       model: bedrock/mistral.mistral-large-2402-v1:0
61 | 
62 | 


--------------------------------------------------------------------------------
/config/default-config-eu-west-3.yaml:
--------------------------------------------------------------------------------
 1 | model_list:
 2 | #Bedrock Models
 3 |   - model_name: amazon.titan-text-lite-v1
 4 |     litellm_params:
 5 |       model: bedrock/amazon.titan-text-lite-v1
 6 | 
 7 |   - model_name: amazon.titan-text-express-v1
 8 |     litellm_params:
 9 |       model: bedrock/amazon.titan-text-express-v1
10 | 
11 |   - model_name: amazon.titan-embed-image-v1
12 |     litellm_params:
13 |       model: bedrock/amazon.titan-embed-image-v1
14 |       drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client
15 | 
16 |   - model_name: amazon.titan-embed-text-v2:0
17 |     litellm_params:
18 |       model: bedrock/amazon.titan-embed-text-v2:0
19 |       drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client
20 | 
21 |   - model_name: anthropic.claude-3-haiku-20240307-v1:0
22 |     litellm_params:
23 |       model: bedrock/anthropic.claude-3-haiku-20240307-v1:0
24 | 
25 |   - model_name: cohere.embed-english-v3
26 |     litellm_params:
27 |       model: bedrock/cohere.embed-english-v3
28 |       drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client
29 | 
30 |   - model_name: cohere.embed-multilingual-v3
31 |     litellm_params:
32 |       model: bedrock/cohere.embed-multilingual-v3
33 | 
34 |   - model_name: mistral.mistral-7b-instruct-v0:2
35 |     litellm_params:
36 |       model: bedrock/mistral.mistral-7b-instruct-v0:2
37 | 
38 |   - model_name: mistral.mixtral-8x7b-instruct-v0:1
39 |     litellm_params:
40 |       model: bedrock/mistral.mixtral-8x7b-instruct-v0:1
41 | 
42 |   - model_name: mistral.mistral-large-2402-v1:0
43 |     litellm_params:
44 |       model: bedrock/mistral.mistral-large-2402-v1:0
45 | 
46 |   - model_name: eu.anthropic.claude-3-5-sonnet-20240620-v1:0
47 |     litellm_params:
48 |       model: bedrock/eu.anthropic.claude-3-5-sonnet-20240620-v1:0
49 | 
50 |   - model_name: eu.anthropic.claude-3-sonnet-20240229-v1:0
51 |     litellm_params:
52 |       model: bedrock/eu.anthropic.claude-3-sonnet-20240229-v1:0
53 | 
54 |   - model_name: eu.anthropic.claude-3-haiku-20240307-v1:0
55 |     litellm_params:
56 |       model: bedrock/eu.anthropic.claude-3-haiku-20240307-v1:0
57 | 
58 |   - model_name: eu.meta.llama3-2-1b-instruct-v1:0
59 |     litellm_params:
60 |       model: bedrock/eu.meta.llama3-2-1b-instruct-v1:0
61 | 
62 |   - model_name: eu.meta.llama3-2-3b-instruct-v1:0
63 |     litellm_params:
64 |       model: bedrock/eu.meta.llama3-2-3b-instruct-v1:0
65 | 
66 | 


--------------------------------------------------------------------------------
/config/default-config-sa-east-1.yaml:
--------------------------------------------------------------------------------
 1 | model_list:
 2 | #Bedrock Models
 3 |   - model_name: amazon.titan-text-lite-v1
 4 |     litellm_params:
 5 |       model: bedrock/amazon.titan-text-lite-v1
 6 | 
 7 |   - model_name: amazon.titan-text-express-v1
 8 |     litellm_params:
 9 |       model: bedrock/amazon.titan-text-express-v1
10 | 
11 |   - model_name: amazon.titan-embed-image-v1
12 |     litellm_params:
13 |       model: bedrock/amazon.titan-embed-image-v1
14 |       drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client
15 | 
16 |   - model_name: amazon.titan-embed-text-v2:0
17 |     litellm_params:
18 |       model: bedrock/amazon.titan-embed-text-v2:0
19 |       drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client
20 | 
21 |   - model_name: anthropic.claude-3-sonnet-20240229-v1:0
22 |     litellm_params:
23 |       model: bedrock/anthropic.claude-3-sonnet-20240229-v1:0
24 | 
25 |   - model_name: anthropic.claude-3-haiku-20240307-v1:0
26 |     litellm_params:
27 |       model: bedrock/anthropic.claude-3-haiku-20240307-v1:0
28 | 
29 |   - model_name: cohere.embed-english-v3
30 |     litellm_params:
31 |       model: bedrock/cohere.embed-english-v3
32 |       drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client
33 | 
34 |   - model_name: cohere.embed-multilingual-v3
35 |     litellm_params:
36 |       model: bedrock/cohere.embed-multilingual-v3
37 | 
38 |   - model_name: mistral.mistral-7b-instruct-v0:2
39 |     litellm_params:
40 |       model: bedrock/mistral.mistral-7b-instruct-v0:2
41 | 
42 |   - model_name: mistral.mixtral-8x7b-instruct-v0:1
43 |     litellm_params:
44 |       model: bedrock/mistral.mixtral-8x7b-instruct-v0:1
45 | 
46 |   - model_name: mistral.mistral-large-2402-v1:0
47 |     litellm_params:
48 |       model: bedrock/mistral.mistral-large-2402-v1:0
49 | 
50 | 


--------------------------------------------------------------------------------
/config/default-config-us-east-1.yaml:
--------------------------------------------------------------------------------
  1 | model_list:
  2 | #Bedrock Models
  3 |   - model_name: amazon.titan-tg1-large
  4 |     litellm_params:
  5 |       model: bedrock/amazon.titan-tg1-large
  6 | 
  7 |   - model_name: amazon.titan-image-generator-v1
  8 |     litellm_params:
  9 |       model: bedrock/amazon.titan-image-generator-v1
 10 | 
 11 |   - model_name: amazon.titan-image-generator-v2:0
 12 |     litellm_params:
 13 |       model: bedrock/amazon.titan-image-generator-v2:0
 14 | 
 15 |   - model_name: amazon.titan-text-premier-v1:0
 16 |     litellm_params:
 17 |       model: bedrock/amazon.titan-text-premier-v1:0
 18 | 
 19 |   - model_name: amazon.nova-pro-v1:0
 20 |     litellm_params:
 21 |       model: bedrock/amazon.nova-pro-v1:0
 22 | 
 23 |   - model_name: amazon.nova-lite-v1:0
 24 |     litellm_params:
 25 |       model: bedrock/amazon.nova-lite-v1:0
 26 | 
 27 |   - model_name: amazon.nova-canvas-v1:0
 28 |     litellm_params:
 29 |       model: bedrock/amazon.nova-canvas-v1:0
 30 | 
 31 |   - model_name: amazon.nova-reel-v1:0
 32 |     litellm_params:
 33 |       model: bedrock/amazon.nova-reel-v1:0
 34 | 
 35 |   - model_name: amazon.nova-micro-v1:0
 36 |     litellm_params:
 37 |       model: bedrock/amazon.nova-micro-v1:0
 38 | 
 39 |   - model_name: amazon.titan-embed-g1-text-02
 40 |     litellm_params:
 41 |       model: bedrock/amazon.titan-embed-g1-text-02
 42 | 
 43 |   - model_name: amazon.titan-text-lite-v1
 44 |     litellm_params:
 45 |       model: bedrock/amazon.titan-text-lite-v1
 46 | 
 47 |   - model_name: amazon.titan-text-express-v1
 48 |     litellm_params:
 49 |       model: bedrock/amazon.titan-text-express-v1
 50 | 
 51 |   - model_name: amazon.titan-embed-text-v1
 52 |     litellm_params:
 53 |       model: bedrock/amazon.titan-embed-text-v1
 54 |       drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client
 55 | 
 56 |   - model_name: amazon.titan-embed-text-v2:0
 57 |     litellm_params:
 58 |       model: bedrock/amazon.titan-embed-text-v2:0
 59 |       drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client
 60 | 
 61 |   - model_name: amazon.titan-embed-image-v1
 62 |     litellm_params:
 63 |       model: bedrock/amazon.titan-embed-image-v1
 64 |       drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client
 65 | 
 66 |   - model_name: ai21.jamba-1-5-large-v1:0
 67 |     litellm_params:
 68 |       model: bedrock/converse/ai21.jamba-1-5-large-v1:0
 69 | 
 70 |   - model_name: ai21.jamba-1-5-mini-v1:0
 71 |     litellm_params:
 72 |       model: bedrock/converse/ai21.jamba-1-5-mini-v1:0
 73 | 
 74 |   - model_name: anthropic.claude-3-haiku-20240307-v1:0
 75 |     litellm_params:
 76 |       model: bedrock/anthropic.claude-3-haiku-20240307-v1:0
 77 | 
 78 |   - model_name: anthropic.claude-3-5-sonnet-20240620-v1:0
 79 |     litellm_params:
 80 |       model: bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0
 81 | 
 82 |   - model_name: cohere.command-text-v14
 83 |     litellm_params:
 84 |       model: bedrock/cohere.command-text-v14
 85 | 
 86 |   - model_name: cohere.command-r-v1:0
 87 |     litellm_params:
 88 |       model: bedrock/cohere.command-r-v1:0
 89 | 
 90 |   - model_name: cohere.command-r-plus-v1:0
 91 |     litellm_params:
 92 |       model: bedrock/cohere.command-r-plus-v1:0
 93 | 
 94 |   - model_name: cohere.command-light-text-v14
 95 |     litellm_params:
 96 |       model: bedrock/cohere.command-light-text-v14
 97 | 
 98 |   - model_name: cohere.embed-english-v3
 99 |     litellm_params:
100 |       model: bedrock/cohere.embed-english-v3
101 |       drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client
102 | 
103 |   - model_name: cohere.embed-multilingual-v3
104 |     litellm_params:
105 |       model: bedrock/cohere.embed-multilingual-v3
106 | 
107 |   - model_name: meta.llama3-8b-instruct-v1:0
108 |     litellm_params:
109 |       model: bedrock/meta.llama3-8b-instruct-v1:0
110 | 
111 |   - model_name: meta.llama3-70b-instruct-v1:0
112 |     litellm_params:
113 |       model: bedrock/meta.llama3-70b-instruct-v1:0
114 | 
115 |   - model_name: mistral.mistral-7b-instruct-v0:2
116 |     litellm_params:
117 |       model: bedrock/mistral.mistral-7b-instruct-v0:2
118 | 
119 |   - model_name: mistral.mixtral-8x7b-instruct-v0:1
120 |     litellm_params:
121 |       model: bedrock/mistral.mixtral-8x7b-instruct-v0:1
122 | 
123 |   - model_name: mistral.mistral-large-2402-v1:0
124 |     litellm_params:
125 |       model: bedrock/mistral.mistral-large-2402-v1:0
126 | 
127 |   - model_name: mistral.mistral-small-2402-v1:0
128 |     litellm_params:
129 |       model: bedrock/mistral.mistral-small-2402-v1:0
130 | 
131 |   - model_name: us.anthropic.claude-3-sonnet-20240229-v1:0
132 |     litellm_params:
133 |       model: bedrock/us.anthropic.claude-3-sonnet-20240229-v1:0
134 | 
135 |   - model_name: us.anthropic.claude-3-opus-20240229-v1:0
136 |     litellm_params:
137 |       model: bedrock/us.anthropic.claude-3-opus-20240229-v1:0
138 | 
139 |   - model_name: us.anthropic.claude-3-haiku-20240307-v1:0
140 |     litellm_params:
141 |       model: bedrock/us.anthropic.claude-3-haiku-20240307-v1:0
142 | 
143 |   - model_name: us.meta.llama3-2-11b-instruct-v1:0
144 |     litellm_params:
145 |       model: bedrock/us.meta.llama3-2-11b-instruct-v1:0
146 | 
147 |   - model_name: us.meta.llama3-2-3b-instruct-v1:0
148 |     litellm_params:
149 |       model: bedrock/us.meta.llama3-2-3b-instruct-v1:0
150 | 
151 |   - model_name: us.meta.llama3-2-90b-instruct-v1:0
152 |     litellm_params:
153 |       model: bedrock/us.meta.llama3-2-90b-instruct-v1:0
154 | 
155 |   - model_name: us.meta.llama3-2-1b-instruct-v1:0
156 |     litellm_params:
157 |       model: bedrock/us.meta.llama3-2-1b-instruct-v1:0
158 | 
159 |   - model_name: us.anthropic.claude-3-5-sonnet-20240620-v1:0
160 |     litellm_params:
161 |       model: bedrock/us.anthropic.claude-3-5-sonnet-20240620-v1:0
162 | 
163 |   - model_name: us.anthropic.claude-3-5-haiku-20241022-v1:0
164 |     litellm_params:
165 |       model: bedrock/us.anthropic.claude-3-5-haiku-20241022-v1:0
166 | 
167 |   - model_name: us.meta.llama3-1-8b-instruct-v1:0
168 |     litellm_params:
169 |       model: bedrock/us.meta.llama3-1-8b-instruct-v1:0
170 | 
171 |   - model_name: us.meta.llama3-1-70b-instruct-v1:0
172 |     litellm_params:
173 |       model: bedrock/us.meta.llama3-1-70b-instruct-v1:0
174 | 
175 |   - model_name: us.amazon.nova-lite-v1:0
176 |     litellm_params:
177 |       model: bedrock/us.amazon.nova-lite-v1:0
178 | 
179 |   - model_name: us.amazon.nova-pro-v1:0
180 |     litellm_params:
181 |       model: bedrock/us.amazon.nova-pro-v1:0
182 | 
183 |   - model_name: us.amazon.nova-micro-v1:0
184 |     litellm_params:
185 |       model: bedrock/us.amazon.nova-micro-v1:0
186 | 
187 |   - model_name: us.meta.llama3-3-70b-instruct-v1:0
188 |     litellm_params:
189 |       model: bedrock/us.meta.llama3-3-70b-instruct-v1:0
190 | 
191 |   - model_name: us.anthropic.claude-3-5-sonnet-20241022-v2:0
192 |     litellm_params:
193 |       model: bedrock/us.anthropic.claude-3-5-sonnet-20241022-v2:0
194 | 
195 |   - model_name: us.anthropic.claude-3-7-sonnet-20250219-v1:0
196 |     litellm_params:
197 |       model: bedrock/converse/us.anthropic.claude-3-7-sonnet-20250219-v1:0
198 | 


--------------------------------------------------------------------------------
/config/default-config-us-east-2.yaml:
--------------------------------------------------------------------------------
 1 | model_list:
 2 | #Bedrock Models
 3 |   - model_name: amazon.titan-embed-text-v2:0
 4 |     litellm_params:
 5 |       model: bedrock/amazon.titan-embed-text-v2:0
 6 |       drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client
 7 | 
 8 |   - model_name: meta.llama3-3-70b-instruct-v1:0
 9 |     litellm_params:
10 |       model: bedrock/meta.llama3-3-70b-instruct-v1:0
11 | 
12 |   - model_name: us.anthropic.claude-3-haiku-20240307-v1:0
13 |     litellm_params:
14 |       model: bedrock/us.anthropic.claude-3-haiku-20240307-v1:0
15 | 
16 |   - model_name: us.meta.llama3-2-1b-instruct-v1:0
17 |     litellm_params:
18 |       model: bedrock/us.meta.llama3-2-1b-instruct-v1:0
19 | 
20 |   - model_name: us.meta.llama3-2-11b-instruct-v1:0
21 |     litellm_params:
22 |       model: bedrock/us.meta.llama3-2-11b-instruct-v1:0
23 | 
24 |   - model_name: us.meta.llama3-2-3b-instruct-v1:0
25 |     litellm_params:
26 |       model: bedrock/us.meta.llama3-2-3b-instruct-v1:0
27 | 
28 |   - model_name: us.meta.llama3-2-90b-instruct-v1:0
29 |     litellm_params:
30 |       model: bedrock/us.meta.llama3-2-90b-instruct-v1:0
31 | 
32 |   - model_name: us.meta.llama3-1-8b-instruct-v1:0
33 |     litellm_params:
34 |       model: bedrock/us.meta.llama3-1-8b-instruct-v1:0
35 | 
36 |   - model_name: us.meta.llama3-1-70b-instruct-v1:0
37 |     litellm_params:
38 |       model: bedrock/us.meta.llama3-1-70b-instruct-v1:0
39 | 
40 |   - model_name: us.amazon.nova-micro-v1:0
41 |     litellm_params:
42 |       model: bedrock/us.amazon.nova-micro-v1:0
43 | 
44 |   - model_name: us.amazon.nova-lite-v1:0
45 |     litellm_params:
46 |       model: bedrock/us.amazon.nova-lite-v1:0
47 | 
48 |   - model_name: us.amazon.nova-pro-v1:0
49 |     litellm_params:
50 |       model: bedrock/us.amazon.nova-pro-v1:0
51 | 
52 |   - model_name: us.anthropic.claude-3-5-haiku-20241022-v1:0
53 |     litellm_params:
54 |       model: bedrock/us.anthropic.claude-3-5-haiku-20241022-v1:0
55 | 
56 |   - model_name: us.meta.llama3-1-405b-instruct-v1:0
57 |     litellm_params:
58 |       model: bedrock/us.meta.llama3-1-405b-instruct-v1:0
59 | 
60 |   - model_name: us.meta.llama3-3-70b-instruct-v1:0
61 |     litellm_params:
62 |       model: bedrock/us.meta.llama3-3-70b-instruct-v1:0
63 | 
64 |   - model_name: us.anthropic.claude-3-5-sonnet-20240620-v1:0
65 |     litellm_params:
66 |       model: bedrock/us.anthropic.claude-3-5-sonnet-20240620-v1:0
67 | 
68 |   - model_name: us.anthropic.claude-3-5-sonnet-20241022-v2:0
69 |     litellm_params:
70 |       model: bedrock/us.anthropic.claude-3-5-sonnet-20241022-v2:0
71 | 
72 |   - model_name: us.anthropic.claude-3-7-sonnet-20250219-v1:0
73 |     litellm_params:
74 |       model: bedrock/converse/us.anthropic.claude-3-7-sonnet-20250219-v1:0
75 | 
76 | 


--------------------------------------------------------------------------------
/config/default-config-us-west-1.yaml:
--------------------------------------------------------------------------------
1 | model_list:
2 | #Bedrock Models
3 |   - model_name: amazon.titan-embed-text-v2:0
4 |     litellm_params:
5 |       model: bedrock/amazon.titan-embed-text-v2:0
6 |       drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client
7 | 
8 | 


--------------------------------------------------------------------------------
/config/default-config-us-west-2.yaml:
--------------------------------------------------------------------------------
  1 | model_list:
  2 | #Bedrock Models
  3 |   - model_name: amazon.titan-tg1-large
  4 |     litellm_params:
  5 |       model: bedrock/amazon.titan-tg1-large
  6 | 
  7 |   - model_name: amazon.titan-embed-g1-text-02
  8 |     litellm_params:
  9 |       model: bedrock/amazon.titan-embed-g1-text-02
 10 | 
 11 |   - model_name: amazon.titan-text-lite-v1
 12 |     litellm_params:
 13 |       model: bedrock/amazon.titan-text-lite-v1
 14 | 
 15 |   - model_name: amazon.titan-text-express-v1
 16 |     litellm_params:
 17 |       model: bedrock/amazon.titan-text-express-v1
 18 | 
 19 |   - model_name: amazon.titan-embed-text-v1
 20 |     litellm_params:
 21 |       model: bedrock/amazon.titan-embed-text-v1
 22 |       drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client
 23 | 
 24 |   - model_name: amazon.titan-embed-text-v2:0
 25 |     litellm_params:
 26 |       model: bedrock/amazon.titan-embed-text-v2:0
 27 |       drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client
 28 | 
 29 |   - model_name: amazon.titan-embed-image-v1
 30 |     litellm_params:
 31 |       model: bedrock/amazon.titan-embed-image-v1
 32 |       drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client
 33 | 
 34 |   - model_name: amazon.titan-image-generator-v1
 35 |     litellm_params:
 36 |       model: bedrock/amazon.titan-image-generator-v1
 37 | 
 38 |   - model_name: amazon.titan-image-generator-v2:0
 39 |     litellm_params:
 40 |       model: bedrock/amazon.titan-image-generator-v2:0
 41 | 
 42 |   - model_name: amazon.rerank-v1:0
 43 |     litellm_params:
 44 |       model: bedrock/amazon.rerank-v1:0
 45 | 
 46 |   - model_name: stability.sd3-5-large-v1:0
 47 |     litellm_params:
 48 |       model: bedrock/stability.sd3-5-large-v1:0
 49 | 
 50 |   - model_name: anthropic.claude-3-5-sonnet-20241022-v2:0
 51 |     litellm_params:
 52 |       model: bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0
 53 | 
 54 |   - model_name: anthropic.claude-3-5-haiku-20241022-v1:0
 55 |     litellm_params:
 56 |       model: bedrock/anthropic.claude-3-5-haiku-20241022-v1:0
 57 | 
 58 |   - model_name: anthropic.claude-3-haiku-20240307-v1:0
 59 |     litellm_params:
 60 |       model: bedrock/anthropic.claude-3-haiku-20240307-v1:0
 61 | 
 62 |   - model_name: anthropic.claude-3-opus-20240229-v1:0
 63 |     litellm_params:
 64 |       model: bedrock/anthropic.claude-3-opus-20240229-v1:0
 65 | 
 66 |   - model_name: anthropic.claude-3-5-sonnet-20240620-v1:0
 67 |     litellm_params:
 68 |       model: bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0
 69 | 
 70 |   - model_name: cohere.command-text-v14
 71 |     litellm_params:
 72 |       model: bedrock/cohere.command-text-v14
 73 | 
 74 |   - model_name: cohere.command-r-v1:0
 75 |     litellm_params:
 76 |       model: bedrock/cohere.command-r-v1:0
 77 | 
 78 |   - model_name: cohere.command-r-plus-v1:0
 79 |     litellm_params:
 80 |       model: bedrock/cohere.command-r-plus-v1:0
 81 | 
 82 |   - model_name: cohere.command-light-text-v14
 83 |     litellm_params:
 84 |       model: bedrock/cohere.command-light-text-v14
 85 | 
 86 |   - model_name: cohere.embed-english-v3
 87 |     litellm_params:
 88 |       model: bedrock/cohere.embed-english-v3
 89 |       drop_params: true #Needed to avoid errors when encoding_format is passed in by openai python client
 90 | 
 91 |   - model_name: cohere.embed-multilingual-v3
 92 |     litellm_params:
 93 |       model: bedrock/cohere.embed-multilingual-v3
 94 | 
 95 |   - model_name: cohere.rerank-v3-5:0
 96 |     litellm_params:
 97 |       model: bedrock/cohere.rerank-v3-5:0
 98 | 
 99 |   - model_name: meta.llama3-8b-instruct-v1:0
100 |     litellm_params:
101 |       model: bedrock/meta.llama3-8b-instruct-v1:0
102 | 
103 |   - model_name: meta.llama3-70b-instruct-v1:0
104 |     litellm_params:
105 |       model: bedrock/meta.llama3-70b-instruct-v1:0
106 | 
107 |   - model_name: meta.llama3-1-8b-instruct-v1:0
108 |     litellm_params:
109 |       model: bedrock/meta.llama3-1-8b-instruct-v1:0
110 | 
111 |   - model_name: meta.llama3-1-70b-instruct-v1:0
112 |     litellm_params:
113 |       model: bedrock/meta.llama3-1-70b-instruct-v1:0
114 | 
115 |   - model_name: meta.llama3-1-405b-instruct-v1:0
116 |     litellm_params:
117 |       model: bedrock/meta.llama3-1-405b-instruct-v1:0
118 | 
119 |   - model_name: mistral.mistral-7b-instruct-v0:2
120 |     litellm_params:
121 |       model: bedrock/mistral.mistral-7b-instruct-v0:2
122 | 
123 |   - model_name: mistral.mixtral-8x7b-instruct-v0:1
124 |     litellm_params:
125 |       model: bedrock/mistral.mixtral-8x7b-instruct-v0:1
126 | 
127 |   - model_name: mistral.mistral-large-2402-v1:0
128 |     litellm_params:
129 |       model: bedrock/mistral.mistral-large-2402-v1:0
130 | 
131 |   - model_name: mistral.mistral-large-2407-v1:0
132 |     litellm_params:
133 |       model: bedrock/mistral.mistral-large-2407-v1:0
134 | 
135 |   - model_name: luma.ray-v2:0
136 |     litellm_params:
137 |       model: bedrock/luma.ray-v2:0
138 | 
139 |   - model_name: us.anthropic.claude-3-haiku-20240307-v1:0
140 |     litellm_params:
141 |       model: bedrock/us.anthropic.claude-3-haiku-20240307-v1:0
142 | 
143 |   - model_name: us.anthropic.claude-3-5-sonnet-20240620-v1:0
144 |     litellm_params:
145 |       model: bedrock/us.anthropic.claude-3-5-sonnet-20240620-v1:0
146 | 
147 |   - model_name: us.anthropic.claude-3-sonnet-20240229-v1:0
148 |     litellm_params:
149 |       model: bedrock/us.anthropic.claude-3-sonnet-20240229-v1:0
150 | 
151 |   - model_name: us.anthropic.claude-3-opus-20240229-v1:0
152 |     litellm_params:
153 |       model: bedrock/us.anthropic.claude-3-opus-20240229-v1:0
154 | 
155 |   - model_name: us.meta.llama3-2-11b-instruct-v1:0
156 |     litellm_params:
157 |       model: bedrock/us.meta.llama3-2-11b-instruct-v1:0
158 | 
159 |   - model_name: us.meta.llama3-2-90b-instruct-v1:0
160 |     litellm_params:
161 |       model: bedrock/us.meta.llama3-2-90b-instruct-v1:0
162 | 
163 |   - model_name: us.meta.llama3-2-3b-instruct-v1:0
164 |     litellm_params:
165 |       model: bedrock/us.meta.llama3-2-3b-instruct-v1:0
166 | 
167 |   - model_name: us.meta.llama3-2-1b-instruct-v1:0
168 |     litellm_params:
169 |       model: bedrock/us.meta.llama3-2-1b-instruct-v1:0
170 | 
171 |   - model_name: us.anthropic.claude-3-5-haiku-20241022-v1:0
172 |     litellm_params:
173 |       model: bedrock/us.anthropic.claude-3-5-haiku-20241022-v1:0
174 | 
175 |   - model_name: us.meta.llama3-1-8b-instruct-v1:0
176 |     litellm_params:
177 |       model: bedrock/us.meta.llama3-1-8b-instruct-v1:0
178 | 
179 |   - model_name: us.meta.llama3-1-70b-instruct-v1:0
180 |     litellm_params:
181 |       model: bedrock/us.meta.llama3-1-70b-instruct-v1:0
182 | 
183 |   - model_name: us.amazon.nova-pro-v1:0
184 |     litellm_params:
185 |       model: bedrock/us.amazon.nova-pro-v1:0
186 | 
187 |   - model_name: us.amazon.nova-lite-v1:0
188 |     litellm_params:
189 |       model: bedrock/us.amazon.nova-lite-v1:0
190 | 
191 |   - model_name: us.amazon.nova-micro-v1:0
192 |     litellm_params:
193 |       model: bedrock/us.amazon.nova-micro-v1:0
194 | 
195 |   - model_name: us.meta.llama3-3-70b-instruct-v1:0
196 |     litellm_params:
197 |       model: bedrock/us.meta.llama3-3-70b-instruct-v1:0
198 | 
199 |   - model_name: us.anthropic.claude-3-5-sonnet-20241022-v2:0
200 |     litellm_params:
201 |       model: bedrock/us.anthropic.claude-3-5-sonnet-20241022-v2:0
202 | 
203 |   - model_name: us.anthropic.claude-3-7-sonnet-20250219-v1:0
204 |     litellm_params:
205 |       model: bedrock/converse/us.anthropic.claude-3-7-sonnet-20250219-v1:0


--------------------------------------------------------------------------------
/config/details-for-models-that-need-additional-manual-config.yaml:
--------------------------------------------------------------------------------
  1 | model_list:
  2 | #Databricks Models (Commented out because the api_base value will vary by user and needs to be manually updated)
  3 |   # - model_name: databricks-meta-llama-3-1-70b-instruct
  4 |   #   litellm_params:
  5 |   #     model: databricks/databricks-meta-llama-3-1-70b-instruct
  6 |   #     api_base: "<Your databricks base url>" # e.g.: https://adb-3064715882934586.6.azuredatabricks.net/serving-endpoints
  7 | 
  8 |   # - model_name: databricks-meta-llama-3-1-405b-instruct
  9 |   #   litellm_params:
 10 |   #     model: databricks/databricks-meta-llama-3-1-405b-instruct
 11 |   #     api_base: "<Your databricks base url>" # e.g.: https://adb-3064715882934586.6.azuredatabricks.net/serving-endpoints
 12 | 
 13 |   # - model_name: databricks-dbrx-instruct
 14 |   #   litellm_params:
 15 |   #     model: databricks/databricks-dbrx-instruct
 16 |   #     api_base: "<Your databricks base url>" # e.g.: https://adb-3064715882934586.6.azuredatabricks.net/serving-endpoints
 17 | 
 18 |   # - model_name: databricks-meta-llama-3-70b-instruct
 19 |   #   litellm_params:
 20 |   #     model: databricks/databricks-meta-llama-3-70b-instruct
 21 |   #     api_base: "<Your databricks base url>" # e.g.: https://adb-3064715882934586.6.azuredatabricks.net/serving-endpoints
 22 | 
 23 |   # - model_name: databricks-llama-2-70b-chat
 24 |   #   litellm_params:
 25 |   #     model: databricks/databricks-llama-2-70b-chat
 26 |   #     api_base: "<Your databricks base url>" # e.g.: https://adb-3064715882934586.6.azuredatabricks.net/serving-endpoints
 27 | 
 28 |   # - model_name: databricks-mixtral-8x7b-instruct
 29 |   #   litellm_params:
 30 |   #     model: databricks/databricks-mixtral-8x7b-instruct
 31 |   #     api_base: "<Your databricks base url>" # e.g.: https://adb-3064715882934586.6.azuredatabricks.net/serving-endpoints
 32 | 
 33 |   # - model_name: databricks-mpt-30b-instruct
 34 |   #   litellm_params:
 35 |   #     model: databricks/databricks-mpt-30b-instruct
 36 |   #     api_base: "<Your databricks base url>" # e.g.: https://adb-3064715882934586.6.azuredatabricks.net/serving-endpoints
 37 | 
 38 |   # - model_name: databricks-mpt-7b-instruct
 39 |   #   litellm_params:
 40 |   #     model: databricks/databricks-mpt-7b-instruct
 41 |   #     api_base: "<Your databricks base url>" # e.g.: https://adb-3064715882934586.6.azuredatabricks.net/serving-endpoints
 42 | 
 43 |   #Sagemaker Models (Commented out because your config will vary dramatically based on the specific model you are using. Refer to the docs: https://docs.litellm.ai/docs/providers/aws_sagemaker)
 44 |   # - model_name: jumpstart-model
 45 |   #   litellm_params:
 46 |   #     model: sagemaker/jumpstart-dft-hf-textgeneration1-mp-20240815-185614
 47 | 
 48 |   #Azure OpenAI Models (Commented out because the api_base and api_version values will vary by user and need to be manually updated)
 49 |   # - model_name: azure/gpt-4o-realtime-preview-2024-10-01
 50 |   #   litellm_params:
 51 |   #     model: azure/gpt-4o-realtime-preview-2024-10-01
 52 |   #     api_base: https://<your-resource-name>.openai.azure.com/
 53 |   #     api_version: "<your api version>"
 54 | 
 55 |   # - model_name: azure/o1-mini
 56 |   #   litellm_params:
 57 |   #     model: azure/o1-mini
 58 |   #     api_base: https://<your-resource-name>.openai.azure.com/
 59 |   #     api_version: "<your api version>"
 60 | 
 61 |   # - model_name: azure/o1-preview
 62 |   #   litellm_params:
 63 |   #     model: azure/o1-preview
 64 |   #     api_base: https://<your-resource-name>.openai.azure.com/
 65 |   #     api_version: "<your api version>"
 66 | 
 67 |   # - model_name: azure/gpt-4o-mini
 68 |   #   litellm_params:
 69 |   #     model: azure/gpt-4o-mini
 70 |   #     api_base: https://<your-resource-name>.openai.azure.com/
 71 |   #     api_version: "<your api version>"
 72 | 
 73 |   # - model_name: azure/gpt-4o-mini-2024-07-18
 74 |   #   litellm_params:
 75 |   #     model: azure/gpt-4o-mini-2024-07-18
 76 |   #     api_base: https://<your-resource-name>.openai.azure.com/
 77 |   #     api_version: "<your api version>"
 78 | 
 79 |   # - model_name: azure/gpt-4o
 80 |   #   litellm_params:
 81 |   #     model: azure/gpt-4o
 82 |   #     api_base: https://<your-resource-name>.openai.azure.com/
 83 |   #     api_version: "<your api version>"
 84 |       
 85 |   # - model_name: azure/gpt-4o-2024-08-06
 86 |   #   litellm_params:
 87 |   #     model: azure/gpt-4o-2024-08-06
 88 |   #     api_base: https://<your-resource-name>.openai.azure.com/
 89 |   #     api_version: "<your api version>"
 90 | 
 91 |   # - model_name: azure/gpt-4o-2024-05-13
 92 |   #   litellm_params:
 93 |   #     model: azure/gpt-4o-2024-05-13
 94 |   #     api_base: https://<your-resource-name>.openai.azure.com/
 95 |   #     api_version: "<your api version>"
 96 | 
 97 |   # - model_name: azure/gpt-4-turbo
 98 |   #   litellm_params:
 99 |   #     model: azure/gpt-4-turbo
100 |   #     api_base: https://<your-resource-name>.openai.azure.com/
101 |   #     api_version: "<your api version>"
102 | 
103 |   # - model_name: azure/gpt-4-turbo-preview
104 |   #   litellm_params:
105 |   #     model: azure/gpt-4-0125-preview
106 |   #     api_base: https://<your-resource-name>.openai.azure.com/
107 |   #     api_version: "<your api version>"
108 | 
109 |   # - model_name: azure/gpt-4-0125-preview
110 |   #   litellm_params:
111 |   #     model: azure/gpt-4-0125-preview
112 |   #     api_base: https://<your-resource-name>.openai.azure.com/
113 |   #     api_version: "<your api version>"
114 | 
115 |   # - model_name: azure/gpt-4-1106-preview
116 |   #   litellm_params:
117 |   #     model: azure/gpt-4-1106-preview
118 |   #     api_base: https://<your-resource-name>.openai.azure.com/
119 |   #     api_version: "<your api version>"
120 | 
121 |   # - model_name: azure/gpt-3.5-turbo
122 |   #   litellm_params:
123 |   #     model: azure/gpt-3.5-turbo
124 |   #     api_base: https://<your-resource-name>.openai.azure.com/
125 |   #     api_version: "<your api version>"
126 | 
127 |   # - model_name: azure/gpt-3.5-turbo-1106
128 |   #   litellm_params:
129 |   #     model: azure/gpt-3.5-turbo-1106
130 |   #     api_base: https://<your-resource-name>.openai.azure.com/
131 |   #     api_version: "<your api version>"
132 | 
133 |   # - model_name: azure/gpt-3.5-turbo-0301
134 |   #   litellm_params:
135 |   #     model: azure/gpt-3.5-turbo-0301
136 |   #     api_base: https://<your-resource-name>.openai.azure.com/
137 |   #     api_version: "<your api version>"
138 | 
139 |   # - model_name: azure/gpt-3.5-turbo-0613
140 |   #   litellm_params:
141 |   #     model: azure/gpt-3.5-turbo-0613
142 |   #     api_base: https://<your-resource-name>.openai.azure.com/
143 |   #     api_version: "<your api version>"
144 | 
145 |   # - model_name: azure/gpt-3.5-turbo-16k
146 |   #   litellm_params:
147 |   #     model: azure/gpt-3.5-turbo-16k
148 |   #     api_base: https://<your-resource-name>.openai.azure.com/
149 |   #     api_version: "<your api version>"
150 | 
151 |   # - model_name: azure/gpt-3.5-turbo-16k-0613
152 |   #   litellm_params:
153 |   #     model: azure/gpt-3.5-turbo-16k-0613
154 |   #     api_base: https://<your-resource-name>.openai.azure.com/
155 |   #     api_version: "<your api version>"
156 | 
157 |   # - model_name: azure/gpt-4
158 |   #   litellm_params:
159 |   #     model: azure/gpt-4
160 |   #     api_base: https://<your-resource-name>.openai.azure.com/
161 |   #     api_version: "<your api version>"
162 | 
163 |   # - model_name: azure/gpt-4-0314
164 |   #   litellm_params:
165 |   #     model: azure/gpt-4-0314
166 |   #     api_base: https://<your-resource-name>.openai.azure.com/
167 |   #     api_version: "<your api version>"
168 | 
169 |   # - model_name: azure/gpt-4-0613
170 |   #   litellm_params:
171 |   #     model: azure/gpt-4-0613
172 |   #     api_base: https://<your-resource-name>.openai.azure.com/
173 |   #     api_version: "<your api version>"
174 | 
175 |   # - model_name: azure/gpt-4-32k
176 |   #   litellm_params:
177 |   #     model: azure/gpt-4-32k
178 |   #     api_base: https://<your-resource-name>.openai.azure.com/
179 |   #     api_version: "<your api version>"
180 | 
181 |   # - model_name: azure/gpt-4-32k-0314
182 |   #   litellm_params:
183 |   #     model: azure/gpt-4-32k-0314
184 |   #     api_base: https://<your-resource-name>.openai.azure.com/
185 |   #     api_version: "<your api version>"
186 | 
187 |   # - model_name: azure/gpt-4-32k-0613
188 |   #   litellm_params:
189 |   #     model: azure/gpt-4-32k-0613
190 |   #     api_base: https://<your-resource-name>.openai.azure.com/
191 |   #     api_version: "<your api version>"
192 |   
193 |   # - model_name: azure/gpt-4-vision-preview
194 |   #   litellm_params:
195 |   #     model: azure/gpt-4-vision-preview
196 |   #     api_base: https://<your-resource-name>.openai.azure.com/
197 |   #     api_version: "<your api version>"
198 | 
199 |   #Azure AI Studio Models (Commented out because the api_base value will vary by user and need to be manually updated)
200 |   # - model_name: azure_ai/Llama-3.1-70B-Instruct
201 |   #   litellm_params:
202 |   #     model: azure_ai/Llama-3.1-70B-Instruct
203 |   #     api_base: "<your azure ai base url>"


--------------------------------------------------------------------------------
/create-ec2-to-access-private-load-balancer.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -aeuo pipefail
 3 | 
 4 | aws_region=$(aws ec2 describe-availability-zones --output text --query 'AvailabilityZones[0].[RegionName]')
 5 | echo $aws_region
 6 | 
 7 | # Load environment variables from .env file
 8 | source .env
 9 | 
10 | echo "EC2_KEY_PAIR_NAME: $EC2_KEY_PAIR_NAME" 
11 | 
12 | # Check if bucket exists
13 | if aws s3api head-bucket --bucket "$TERRAFORM_S3_BUCKET_NAME" 2>/dev/null; then
14 |     echo "Terraform Bucket $TERRAFORM_S3_BUCKET_NAME already exists, skipping creation"
15 | else
16 |     echo "Creating bucket $TERRAFORM_S3_BUCKET_NAME..."
17 |     aws s3 mb "s3://$TERRAFORM_S3_BUCKET_NAME" --region $aws_region
18 |     echo "Terraform Bucket created successfully"
19 | fi
20 | 
21 | cd litellm-terraform-stack
22 | VPC_ID=$(terraform output -raw vpc_id)
23 | cd ..
24 | 
25 | cd litellm-private-load-balancer-ec2-terraform
26 | 
27 | echo "about to deploy"
28 | 
29 | cat > backend.hcl << EOF
30 | bucket  = "${TERRAFORM_S3_BUCKET_NAME}"
31 | key     = "terraform-ec2.tfstate"
32 | region  = "${aws_region}"
33 | encrypt = true
34 | EOF
35 | echo "Generated backend.hcl configuration"
36 | 
37 | terraform init -backend-config=backend.hcl
38 | 
39 | export TF_VAR_vpc_id=$VPC_ID
40 | export TF_VAR_key_pair_name=$EC2_KEY_PAIR_NAME
41 | 
42 | terraform apply -auto-approve
43 | echo "deployed"


--------------------------------------------------------------------------------
/create-fake-llm-load-testing-server.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -aeuo pipefail
 3 | 
 4 | aws_region=$(aws ec2 describe-availability-zones --output text --query 'AvailabilityZones[0].[RegionName]')
 5 | echo $aws_region
 6 | 
 7 | APP_NAME=fakeserver
 8 | 
 9 | source .env
10 | 
11 | cd litellm-terraform-stack
12 | VPC_ID=$(terraform output -raw vpc_id)
13 | cd ..
14 | 
15 | cd litellm-fake-llm-load-testing-server-terraform
16 | 
17 | if [ -n "$CPU_ARCHITECTURE" ]; then
18 |     # Check if CPU_ARCHITECTURE is either "x86" or "arm"
19 |     case "$CPU_ARCHITECTURE" in
20 |         "x86"|"arm")
21 |             ARCH="$CPU_ARCHITECTURE"
22 |             ;;
23 |         *)
24 |             echo "Error: CPU_ARCHITECTURE must be either 'x86' or 'arm'"
25 |             exit 1
26 |             ;;
27 |     esac
28 | else
29 |     # Determine architecture from system
30 |     ARCH=$(uname -m)
31 |     case $ARCH in
32 |         x86_64)
33 |             ARCH="x86"
34 |             ;;
35 |         arm64)
36 |             ARCH="arm"
37 |             ;;
38 |         *)
39 |             echo "Unsupported architecture: $ARCH"
40 |             exit 1
41 |             ;;
42 |     esac
43 | fi
44 | 
45 | echo $ARCH
46 | 
47 | echo "about to build and push image"
48 | cd docker
49 | ./docker-build-and-deploy.sh $APP_NAME $ARCH
50 | cd ..
51 | 
52 | echo "about to deploy"
53 | 
54 | export TF_VAR_vpc_id=$VPC_ID
55 | export TF_VAR_ecr_fake_server_repository=$APP_NAME
56 | export TF_VAR_architecture=$ARCH
57 | export TF_VAR_fake_llm_load_testing_endpoint_certifiacte_arn=$FAKE_LLM_LOAD_TESTING_ENDPOINT_CERTIFICATE_ARN
58 | export TF_VAR_fake_llm_load_testing_endpoint_hosted_zone_name=$FAKE_LLM_LOAD_TESTING_ENDPOINT_HOSTED_ZONE_NAME
59 | export TF_VAR_fake_llm_load_testing_endpoint_record_name=$FAKE_LLM_LOAD_TESTING_ENDPOINT_RECORD_NAME
60 | 
61 | 
62 | cat > backend.hcl << EOF
63 | bucket  = "${TERRAFORM_S3_BUCKET_NAME}"
64 | key     = "terraform-fake-llm-server.tfstate"
65 | region  = "${aws_region}"
66 | encrypt = true
67 | EOF
68 | echo "Generated backend.hcl configuration"
69 | 
70 | terraform init -backend-config=backend.hcl -reconfigure
71 | terraform apply -auto-approve
72 | 
73 | echo "deployed"
74 | 
75 | if [ $? -eq 0 ]; then
76 |     LITELLM_ECS_CLUSTER=$(terraform output -raw fake_server_ecs_cluster)
77 |     LITELLM_ECS_TASK=$(terraform output -raw fake_server_ecs_task)
78 | 
79 |     aws ecs update-service \
80 |         --cluster $LITELLM_ECS_CLUSTER \
81 |         --service $LITELLM_ECS_TASK \
82 |         --force-new-deployment \
83 |         --desired-count 3 \
84 |         --no-cli-pager
85 | else
86 |     echo "Deployment failed"
87 | fi


--------------------------------------------------------------------------------
/delete-fake-llm-load-testing-server.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -aeuo pipefail
 3 | 
 4 | aws_region=$(aws ec2 describe-availability-zones --output text --query 'AvailabilityZones[0].[RegionName]')
 5 | echo $aws_region
 6 | 
 7 | APP_NAME=fakeserver
 8 | 
 9 | source .env
10 | 
11 | cd litellm-terraform-stack
12 | VPC_ID=$(terraform output -raw vpc_id)
13 | cd ..
14 | 
15 | cd litellm-fake-llm-load-testing-server-terraform
16 | 
17 | if [ -n "$CPU_ARCHITECTURE" ]; then
18 |     # Check if CPU_ARCHITECTURE is either "x86" or "arm"
19 |     case "$CPU_ARCHITECTURE" in
20 |         "x86"|"arm")
21 |             ARCH="$CPU_ARCHITECTURE"
22 |             ;;
23 |         *)
24 |             echo "Error: CPU_ARCHITECTURE must be either 'x86' or 'arm'"
25 |             exit 1
26 |             ;;
27 |     esac
28 | else
29 |     # Determine architecture from system
30 |     ARCH=$(uname -m)
31 |     case $ARCH in
32 |         x86_64)
33 |             ARCH="x86"
34 |             ;;
35 |         arm64)
36 |             ARCH="arm"
37 |             ;;
38 |         *)
39 |             echo "Unsupported architecture: $ARCH"
40 |             exit 1
41 |             ;;
42 |     esac
43 | fi
44 | 
45 | echo $ARCH
46 | 
47 | echo "about to destroy"
48 | 
49 | export TF_VAR_vpc_id="vpc-02b681fa786fa8292"
50 | export TF_VAR_ecr_fake_server_repository=$APP_NAME
51 | export TF_VAR_architecture=$ARCH
52 | export TF_VAR_fake_llm_load_testing_endpoint_certifiacte_arn=$FAKE_LLM_LOAD_TESTING_ENDPOINT_CERTIFICATE_ARN
53 | export TF_VAR_fake_llm_load_testing_endpoint_hosted_zone_name=$FAKE_LLM_LOAD_TESTING_ENDPOINT_HOSTED_ZONE_NAME
54 | export TF_VAR_fake_llm_load_testing_endpoint_record_name=$FAKE_LLM_LOAD_TESTING_ENDPOINT_RECORD_NAME
55 | 
56 | 
57 | cat > backend.hcl << EOF
58 | bucket  = "${TERRAFORM_S3_BUCKET_NAME}"
59 | key     = "terraform-fake-llm-server.tfstate"
60 | region  = "${aws_region}"
61 | encrypt = true
62 | EOF
63 | echo "Generated backend.hcl configuration"
64 | 
65 | terraform init -backend-config=backend.hcl -reconfigure
66 | terraform destroy -auto-approve
67 | 
68 | echo "destroyed"


--------------------------------------------------------------------------------
/docker-build-and-deploy.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ $# -ne 3 ]; then
 4 |   echo "Usage: $0 <APP_NAME> <BUILD_FROM_SOURCE> <ARCH>"
 5 |   exit 1
 6 | fi
 7 | 
 8 | APP_NAME=$1
 9 | BUILD_FROM_SOURCE=$(echo "$2" | tr '[:upper:]' '[:lower:]')
10 | ARCH=$3
11 | 
12 | # check again if LITELLM_VERSION is set if script is used standalone
13 | source .env
14 | if [[ (-z "$LITELLM_VERSION") || ("$LITELLM_VERSION" == "placeholder") ]]; then
15 |     echo "LITELLM_VERSION must be set in .env file"
16 |     exit 1
17 | fi
18 | 
19 | if [ "$BUILD_FROM_SOURCE" = "true" ]; then
20 |     echo "Building from source..."
21 |     if [ ! -d "litellm-source" ]; then
22 |         echo "Fetching source for LiteLLM version ${LITELLM_VERSION}"
23 |         mkdir litellm-source
24 |         curl -L https://github.com/BerriAI/litellm/archive/refs/tags/${LITELLM_VERSION}.tar.gz | tar -xz -C litellm-source --strip-components=1
25 |     else
26 |         LITELLM_SOURCE_VERSION=$(yq '.tool.poetry.version' litellm-source/pyproject.toml)
27 |         if [ v"$LITELLM_SOURCE_VERSION" != "$LITELLM_VERSION" ]; then
28 |             echo "Your specified version ${LITELLM_VERSION} does not match the source version ${LITELLM_SOURCE_VERSION}"
29 |             echo "Please remove the litellm-source directory manually and re-run this script when you change the version number"
30 |             exit 1
31 |         else
32 |             echo "Source version ${LITELLM_VERSION} already exists, skipping fetching".
33 |         fi
34 |     fi
35 | 
36 |     cd litellm-source
37 | fi
38 | 
39 | AWS_REGION=$(aws ec2 describe-availability-zones --output text --query 'AvailabilityZones[0].[RegionName]')
40 | export AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query "Account" --output text)
41 | 
42 | # Check if the repository already exists
43 | REPO_EXISTS=$(aws ecr describe-repositories --repository-names $APP_NAME 2>/dev/null)
44 | 
45 | if [ -z "$REPO_EXISTS" ]; then
46 |     # Repository does not exist, create it with tag
47 |     aws ecr create-repository --repository-name $APP_NAME --tags Key=project,Value=llmgateway
48 | else
49 |     echo "Repository $APP_NAME already exists, checking tags..."
50 |     
51 |     # Get current tags for the repository
52 |     CURRENT_TAGS=$(aws ecr list-tags-for-resource --resource-arn arn:aws:ecr:${AWS_REGION}:${AWS_ACCOUNT_ID}:repository/${APP_NAME})
53 |     
54 |     # Check if project=llmgateway tag exists
55 |     if ! echo "$CURRENT_TAGS" | grep -q '"Key": "project".*"Value": "llmgateway"'; then
56 |         echo "Adding project=llmgateway tag..."
57 |         aws ecr tag-resource \
58 |             --resource-arn arn:aws:ecr:${AWS_REGION}:${AWS_ACCOUNT_ID}:repository/${APP_NAME} \
59 |             --tags Key=project,Value=llmgateway
60 |     else
61 |         echo "Tag project=llmgateway already exists."
62 |     fi
63 | fi
64 | 
65 | echo $ARCH
66 | 
67 | case $ARCH in
68 |     "x86")
69 |         DOCKER_ARCH="linux/amd64"
70 |         ;;
71 |     "arm")
72 |         DOCKER_ARCH="linux/arm64"
73 |         ;;
74 |     *)
75 |         echo "Unsupported architecture: $ARCH"
76 |         exit 1
77 |         ;;
78 | esac
79 | 
80 | echo $DOCKER_ARCH
81 | 
82 | aws ecr get-login-password --region $AWS_REGION | docker login --username AWS --password-stdin $AWS_ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com
83 | docker build --platform $DOCKER_ARCH --build-arg LITELLM_VERSION=${LITELLM_VERSION} -t $APP_NAME\:${LITELLM_VERSION} .
84 | echo "Tagging image with ${APP_NAME}:${LITELLM_VERSION}"
85 | docker tag $APP_NAME\:${LITELLM_VERSION} $AWS_ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com/$APP_NAME\:${LITELLM_VERSION}
86 | docker push $AWS_ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com/$APP_NAME\:${LITELLM_VERSION}
87 | 


--------------------------------------------------------------------------------
/install-cloud9-prerequisites.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Function to check if a command exists
 4 | command_exists() {
 5 |     command -v "$1" >/dev/null 2>&1
 6 | }
 7 | 
 8 | # Check if yq is installed
 9 | if command_exists yq; then
10 |     echo "yq is already installed"
11 |     yq --version
12 | else
13 |     echo "yq is not installed. Installing now..."
14 |     
15 |     # Set the version
16 |     VERSION="v4.40.5"
17 |     BINARY="yq_linux_amd64"
18 |     
19 |     # Check if script is run with sudo
20 |     if [ "$EUID" -ne 0 ]; then 
21 |         echo "Please run with sudo privileges"
22 |         exit 1
23 |     fi
24 |     
25 |     # Download yq
26 |     if wget https://github.com/mikefarah/yq/releases/download/${VERSION}/${BINARY} -O /usr/bin/yq; then
27 |         # Make it executable
28 |         chmod +x /usr/bin/yq
29 |         
30 |         echo "yq has been successfully installed"
31 |         yq --version
32 |     else
33 |         echo "Failed to download yq"
34 |         exit 1
35 |     fi
36 | fi
37 | 
38 | sudo yum update -y
39 | 
40 | # Install required dependencies
41 | sudo yum install -y yum-utils unzip wget
42 | 
43 | # Download the signing key
44 | wget -O- https://apt.releases.hashicorp.com/gpg | sudo gpg --import -
45 | 
46 | # Add the HashiCorp repository
47 | sudo yum-config-manager --add-repo https://rpm.releases.hashicorp.com/AmazonLinux/hashicorp.repo
48 | 
49 | # Install Terraform
50 | sudo yum install -y terraform
51 | 
52 | # Verify installation
53 | terraform version
54 | 
55 | curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl"
56 | 
57 | sudo install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl
58 | 
59 | kubectl version --client


--------------------------------------------------------------------------------
/litellm-fake-llm-load-testing-server-terraform/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Dockerfile
 2 | 
 3 | FROM python:3.13-slim
 4 | 
 5 | WORKDIR /app
 6 | 
 7 | # Install system dependencies if needed
 8 | # RUN apt-get update && apt-get install -y ...
 9 | 
10 | COPY requirements.txt /app/requirements.txt
11 | 
12 | RUN pip install --no-cache-dir -r requirements.txt
13 | 
14 | COPY . /app
15 | 
16 | # Expose the port; ECS will map it
17 | EXPOSE 8080
18 | 
19 | CMD ["python", "fake_llm_server.py"]
20 | 


--------------------------------------------------------------------------------
/litellm-fake-llm-load-testing-server-terraform/docker/docker-build-and-deploy.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ $# -ne 2 ]; then
 4 |   echo "Usage: $0 <APP_NAME> <ARCH>"
 5 |   exit 1
 6 | fi
 7 | 
 8 | APP_NAME=$1
 9 | ARCH=$2
10 | 
11 | AWS_REGION=$(aws ec2 describe-availability-zones --output text --query 'AvailabilityZones[0].[RegionName]')
12 | export AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query "Account" --output text)
13 | 
14 | # Check if the repository already exists
15 | REPO_EXISTS=$(aws ecr describe-repositories --repository-names $APP_NAME 2>/dev/null)
16 | 
17 | if [ -z "$REPO_EXISTS" ]; then
18 |     # Repository does not exist, create it with tag
19 |     aws ecr create-repository --repository-name $APP_NAME --tags Key=project,Value=llmgateway
20 | else
21 |     echo "Repository $APP_NAME already exists, checking tags..."
22 |     
23 |     # Get current tags for the repository
24 |     CURRENT_TAGS=$(aws ecr list-tags-for-resource --resource-arn arn:aws:ecr:${AWS_REGION}:${AWS_ACCOUNT_ID}:repository/${APP_NAME})
25 |     
26 |     # Check if project=llmgateway tag exists
27 |     if ! echo "$CURRENT_TAGS" | grep -q '"Key": "project".*"Value": "llmgateway"'; then
28 |         echo "Adding project=llmgateway tag..."
29 |         aws ecr tag-resource \
30 |             --resource-arn arn:aws:ecr:${AWS_REGION}:${AWS_ACCOUNT_ID}:repository/${APP_NAME} \
31 |             --tags Key=project,Value=llmgateway
32 |     else
33 |         echo "Tag project=llmgateway already exists."
34 |     fi
35 | fi
36 | 
37 | echo $ARCH
38 | case $ARCH in
39 |     "x86")
40 |         DOCKER_ARCH="linux/amd64"
41 |         ;;
42 |     "arm")
43 |         DOCKER_ARCH="linux/arm64"
44 |         ;;
45 |     *)
46 |         echo "Unsupported architecture: $ARCH"
47 |         exit 1
48 |         ;;
49 | esac
50 | 
51 | echo $DOCKER_ARCH
52 | 
53 | aws ecr get-login-password --region $AWS_REGION | docker login --username AWS --password-stdin $AWS_ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com
54 | docker build --platform $DOCKER_ARCH -t $APP_NAME .
55 | docker tag $APP_NAME\:latest $AWS_ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com/$APP_NAME\:latest
56 | docker push $AWS_ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com/$APP_NAME\:latest


--------------------------------------------------------------------------------
/litellm-fake-llm-load-testing-server-terraform/docker/fake_llm_server.py:
--------------------------------------------------------------------------------
  1 | from fastapi import FastAPI, Request, status
  2 | from fastapi.responses import StreamingResponse, JSONResponse
  3 | from fastapi.security import OAuth2PasswordBearer
  4 | from fastapi.middleware.cors import CORSMiddleware
  5 | from fastapi import HTTPException
  6 | from typing import Optional
  7 | from slowapi import Limiter
  8 | from slowapi.util import get_remote_address
  9 | from slowapi.errors import RateLimitExceeded
 10 | import asyncio
 11 | import random
 12 | import json
 13 | import socket
 14 | import uvicorn
 15 | 
 16 | 
 17 | class ProxyException(Exception):
 18 |     # NOTE: DO NOT MODIFY THIS
 19 |     # This is used to map exactly to OPENAI Exceptions
 20 |     def __init__(
 21 |         self,
 22 |         message: str,
 23 |         type: str,
 24 |         param: Optional[str],
 25 |         code: Optional[int],
 26 |     ):
 27 |         self.message = message
 28 |         self.type = type
 29 |         self.param = param
 30 |         self.code = code
 31 | 
 32 |     def to_dict(self) -> dict:
 33 |         return {
 34 |             "message": self.message,
 35 |             "type": self.type,
 36 |             "param": self.param,
 37 |             "code": self.code,
 38 |         }
 39 | 
 40 | 
 41 | limiter = Limiter(key_func=get_remote_address)
 42 | app = FastAPI()
 43 | app.state.limiter = limiter
 44 | 
 45 | 
 46 | @app.exception_handler(RateLimitExceeded)
 47 | async def _rate_limit_exceeded_handler(request: Request, exc: RateLimitExceeded):
 48 |     return JSONResponse(status_code=429, content={"detail": "Rate Limited!"})
 49 | 
 50 | 
 51 | app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
 52 | 
 53 | app.add_middleware(
 54 |     CORSMiddleware,
 55 |     allow_origins=["*"],
 56 |     allow_credentials=True,
 57 |     allow_methods=["*"],
 58 |     allow_headers=["*"],
 59 | )
 60 | 
 61 | 
 62 | @app.get("/")
 63 | async def health_check():
 64 |     return {"status": "healthy"}
 65 | 
 66 | @app.post("/model/{model_id}/converse")
 67 | async def converse(model_id: str, request: Request):
 68 |     """
 69 |     Fake Bedrock 'converse' endpoint. 
 70 |     Returns a single JSON response according to the Bedrock response schema.
 71 |     """
 72 |     body = await request.json()
 73 | 
 74 |     # Simulate random processing delay (optional)
 75 |     # await asyncio.sleep(random.uniform(1.0, 3.0))
 76 | 
 77 |     # You could inspect 'body' here to see the user's messages or parameters.
 78 |     # For example: messages = body.get("messages", [])
 79 |     # Then craft a response. Here we just hard-code a sample.
 80 | 
 81 |     # A minimal valid Bedrock-like response
 82 |     response_data = {
 83 |         "output": {
 84 |             "message": {
 85 |                 "role": "assistant",
 86 |                 "content": [
 87 |                     {
 88 |                         "text": "Hello there! This is a fake response from the Bedrock model."
 89 |                     }
 90 |                 ]
 91 |             }
 92 |         },
 93 |         "stopReason": "end_turn",
 94 |         "usage": {
 95 |             "inputTokens": 30,
 96 |             "outputTokens": 10,
 97 |             "totalTokens": 40
 98 |         },
 99 |         "metrics": {
100 |             "latencyMs": 1234
101 |         }
102 |     }
103 | 
104 |     # Return a JSON response
105 |     return JSONResponse(content=response_data)
106 | 
107 | 
108 | @app.post("/chat/completions")
109 | @app.post("/v1/chat/completions")
110 | async def completion(request: Request):
111 |     """
112 |     Completion endpoint that either returns:
113 |       - A normal (non-streaming) completion with a random 1–3 second delay
114 |       - A streaming response with multiple chunks and random 0.2–0.8 second delays
115 |     """
116 |     body = await request.json()
117 |     stream_requested = body.get("stream", False)
118 | 
119 |     if stream_requested:
120 |         # Simulate a small initial delay before the streaming starts
121 |         #await asyncio.sleep(random.uniform(0.8, 1.5))
122 | 
123 |         # Return a streaming response
124 |         async def stream_generator():
125 |             # These are pseudo "token" parts of a response.
126 |             content_parts = [
127 |                 "Hello",
128 |                 " there,",
129 |                 " how ",
130 |                 "can ",
131 |                 "I ",
132 |                 "assist ",
133 |                 "you ",
134 |                 "today?",
135 |             ]
136 | 
137 |             # Stream each part in a chunk
138 |             for i, part in enumerate(content_parts):
139 |                 # Build a chunk that mimics OpenAI's streaming format
140 |                 chunk_data = {
141 |                     "id": "chatcmpl-123",
142 |                     "object": "chat.completion.chunk",
143 |                     "created": 1677652288,
144 |                     "model": "gpt-3.5-turbo-0301",
145 |                     "choices": [
146 |                         {
147 |                             "delta": {
148 |                                 # The first chunk includes "role"
149 |                                 **({"role": "assistant"} if i == 0 else {}),
150 |                                 "content": part,
151 |                             },
152 |                             "index": 0,
153 |                             "finish_reason": None,
154 |                         }
155 |                     ],
156 |                 }
157 |                 yield f"data: {json.dumps(chunk_data)}\n\n"
158 |                 #await asyncio.sleep(random.uniform(0.2, 0.8))
159 | 
160 |             # Final chunk signaling the end
161 |             final_chunk = {
162 |                 "id": "chatcmpl-123",
163 |                 "object": "chat.completion.chunk",
164 |                 "created": 1677652288,
165 |                 "model": "gpt-3.5-turbo-0301",
166 |                 "choices": [
167 |                     {
168 |                         "delta": {},
169 |                         "index": 0,
170 |                         "finish_reason": "stop",
171 |                     }
172 |                 ],
173 |             }
174 |             yield f"data: {json.dumps(final_chunk)}\n\n"
175 |             # The [DONE] message
176 |             yield "data: [DONE]\n\n"
177 | 
178 |         return StreamingResponse(stream_generator(), media_type="text/event-stream")
179 | 
180 |     else:
181 |         # Normal non-streaming response with random 1–3 second delay
182 |         #await asyncio.sleep(random.uniform(1.0, 3.0))
183 |         return {
184 |             "id": "chatcmpl-123",
185 |             "object": "chat.completion",
186 |             "created": 1677652288,
187 |             "model": "gpt-3.5-turbo-0301",
188 |             "system_fingerprint": "fp_44709d6fcb",
189 |             "choices": [
190 |                 {
191 |                     "index": 0,
192 |                     "message": {
193 |                         "role": "assistant",
194 |                         "content": "Hello there, how may I assist you today?",
195 |                     },
196 |                     "logprobs": None,
197 |                     "finish_reason": "stop",
198 |                 }
199 |             ],
200 |             "usage": {"prompt_tokens": 9, "completion_tokens": 12, "total_tokens": 21},
201 |         }
202 | 
203 | 
204 | if __name__ == "__main__":
205 |     port = 8080
206 |     while True:
207 |         sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
208 |         result = sock.connect_ex(("0.0.0.0", port))
209 |         if result != 0:
210 |             print(f"Port {port} is available, starting server on {port}...")
211 |             break
212 |         else:
213 |             port += 1
214 | 
215 |     uvicorn.run(app, host="0.0.0.0", port=port)
216 | 


--------------------------------------------------------------------------------
/litellm-fake-llm-load-testing-server-terraform/docker/requirements.txt:
--------------------------------------------------------------------------------
1 | fastapi
2 | uvicorn
3 | slowapi
4 | httpx
5 | openai


--------------------------------------------------------------------------------
/litellm-fake-llm-load-testing-server-terraform/main.tf:
--------------------------------------------------------------------------------
  1 | # Data sources
  2 | data "aws_subnets" "public" {
  3 |   filter {
  4 |     name   = "vpc-id"
  5 |     values = [var.vpc_id]
  6 |   }
  7 |   filter {
  8 |     name   = "map-public-ip-on-launch"
  9 |     values = ["true"]
 10 |   }
 11 | }
 12 | 
 13 | data "aws_subnets" "private" {
 14 |   filter {
 15 |     name   = "vpc-id"
 16 |     values = [var.vpc_id]
 17 |   }
 18 |   filter {
 19 |     name   = "map-public-ip-on-launch"
 20 |     values = ["false"]
 21 |   }
 22 | }
 23 | 
 24 | data "aws_route53_zone" "hosted_zone" {
 25 |   name = var.fake_llm_load_testing_endpoint_hosted_zone_name
 26 | }
 27 | 
 28 | data "aws_ecr_repository" "fake_server_repo" {
 29 |   name = var.ecr_fake_server_repository
 30 | }
 31 | 
 32 | # ECS Cluster
 33 | resource "aws_ecs_cluster" "fake_llm_cluster" {
 34 |   name = "FakeLlmCluster"
 35 | }
 36 | 
 37 | # ECS Task Definition
 38 | resource "aws_ecs_task_definition" "fake_server_task_def" {
 39 |   family                   = "FakeServerTaskDef"
 40 |   requires_compatibilities = ["FARGATE"]
 41 |   network_mode             = "awsvpc"
 42 |   cpu                      = "512"
 43 |   memory                   = "1024"
 44 |   execution_role_arn       = aws_iam_role.ecs_task_execution_role.arn
 45 |   task_role_arn            = aws_iam_role.ecs_task_role.arn
 46 |   
 47 |   runtime_platform {
 48 |     cpu_architecture        = var.architecture == "x86" ? "X86_64" : "ARM64"
 49 |     operating_system_family = "LINUX"
 50 |   }
 51 | 
 52 |   container_definitions = jsonencode([
 53 |     {
 54 |       name      = "FakeServerContainer"
 55 |       image     = "${data.aws_ecr_repository.fake_server_repo.repository_url}:latest"
 56 |       essential = true
 57 |       
 58 |       logConfiguration = {
 59 |         logDriver = "awslogs"
 60 |         options = {
 61 |           "awslogs-group"         = aws_cloudwatch_log_group.fake_server_logs.name
 62 |           "awslogs-region"        = data.aws_region.current.name
 63 |           "awslogs-stream-prefix" = "FakeServer"
 64 |         }
 65 |       }
 66 |       
 67 |       portMappings = [
 68 |         {
 69 |           containerPort = 8080
 70 |           hostPort      = 8080
 71 |           protocol      = "tcp"
 72 |         }
 73 |       ]
 74 |     }
 75 |   ])
 76 | }
 77 | 
 78 | # CloudWatch Log Group
 79 | resource "aws_cloudwatch_log_group" "fake_server_logs" {
 80 |   name              = "/ecs/FakeServer"
 81 |   retention_in_days = 30
 82 | }
 83 | 
 84 | # IAM Roles
 85 | resource "aws_iam_role" "ecs_task_execution_role" {
 86 |   name = "FakeServerEcsTaskExecutionRole"
 87 | 
 88 |   assume_role_policy = jsonencode({
 89 |     Version = "2012-10-17"
 90 |     Statement = [
 91 |       {
 92 |         Action = "sts:AssumeRole"
 93 |         Effect = "Allow"
 94 |         Principal = {
 95 |           Service = "ecs-tasks.amazonaws.com"
 96 |         }
 97 |       }
 98 |     ]
 99 |   })
100 | }
101 | 
102 | resource "aws_iam_role_policy_attachment" "ecs_task_execution_role_policy" {
103 |   role       = aws_iam_role.ecs_task_execution_role.name
104 |   policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy"
105 | }
106 | 
107 | resource "aws_iam_role" "ecs_task_role" {
108 |   name = "FakeServerEcsTaskRole"
109 | 
110 |   assume_role_policy = jsonencode({
111 |     Version = "2012-10-17"
112 |     Statement = [
113 |       {
114 |         Action = "sts:AssumeRole"
115 |         Effect = "Allow"
116 |         Principal = {
117 |           Service = "ecs-tasks.amazonaws.com"
118 |         }
119 |       }
120 |     ]
121 |   })
122 | }
123 | 
124 | # Application Load Balancer
125 | resource "aws_lb" "fake_server_alb" {
126 |   name               = "FakeServer-ALB"
127 |   internal           = false
128 |   load_balancer_type = "application"
129 |   security_groups    = [aws_security_group.alb_sg.id]
130 |   subnets            = data.aws_subnets.public.ids
131 | 
132 |   enable_deletion_protection = false
133 | }
134 | 
135 | # ALB HTTPS Listener
136 | resource "aws_lb_listener" "fake_server_listener" {
137 |   load_balancer_arn = aws_lb.fake_server_alb.arn
138 |   port              = "443"
139 |   protocol          = "HTTPS"
140 |   ssl_policy        = "ELBSecurityPolicy-TLS13-1-2-2021-06"
141 |   certificate_arn   = var.fake_llm_load_testing_endpoint_certifiacte_arn
142 | 
143 |   default_action {
144 |     type             = "forward"
145 |     target_group_arn = aws_lb_target_group.fake_server_tg.arn
146 |   }
147 | }
148 | 
149 | # Target Group
150 | resource "aws_lb_target_group" "fake_server_tg" {
151 |   name        = "FakeServer-TG"
152 |   port        = 8080
153 |   protocol    = "HTTP"
154 |   vpc_id      = var.vpc_id
155 |   target_type = "ip"
156 | 
157 |   health_check {
158 |     enabled             = true
159 |     interval            = 30
160 |     path                = "/"
161 |     port                = "traffic-port"
162 |     healthy_threshold   = 3
163 |     unhealthy_threshold = 3
164 |     timeout             = 5
165 |     protocol            = "HTTP"
166 |     matcher             = "200-399"
167 |   }
168 | }
169 | 
170 | # Security Groups
171 | resource "aws_security_group" "alb_sg" {
172 |   name        = "fake-server-alb-sg"
173 |   description = "Allow HTTPS inbound traffic"
174 |   vpc_id      = var.vpc_id
175 | 
176 |   ingress {
177 |     description = "HTTPS from internet"
178 |     from_port   = 443
179 |     to_port     = 443
180 |     protocol    = "tcp"
181 |     cidr_blocks = ["0.0.0.0/0"]
182 |   }
183 | 
184 |   egress {
185 |     from_port   = 0
186 |     to_port     = 0
187 |     protocol    = "-1"
188 |     cidr_blocks = ["0.0.0.0/0"]
189 |   }
190 | }
191 | 
192 | resource "aws_security_group" "ecs_sg" {
193 |   name        = "fake-server-ecs-sg"
194 |   description = "Allow inbound traffic from ALB"
195 |   vpc_id      = var.vpc_id
196 | 
197 |   ingress {
198 |     description     = "HTTP from ALB"
199 |     from_port       = 8080
200 |     to_port         = 8080
201 |     protocol        = "tcp"
202 |     security_groups = [aws_security_group.alb_sg.id]
203 |   }
204 | 
205 |   egress {
206 |     from_port   = 0
207 |     to_port     = 0
208 |     protocol    = "-1"
209 |     cidr_blocks = ["0.0.0.0/0"]
210 |   }
211 | }
212 | 
213 | # ECS Service
214 | resource "aws_ecs_service" "fake_server_service" {
215 |   name                               = "FakeServer"
216 |   cluster                            = aws_ecs_cluster.fake_llm_cluster.id
217 |   task_definition                    = aws_ecs_task_definition.fake_server_task_def.arn
218 |   desired_count                      = 3
219 |   launch_type                        = "FARGATE"
220 |   health_check_grace_period_seconds  = 300
221 |   
222 |   network_configuration {
223 |     subnets          = data.aws_subnets.private.ids
224 |     security_groups  = [aws_security_group.ecs_sg.id]
225 |     assign_public_ip = false
226 |   }
227 | 
228 |   load_balancer {
229 |     target_group_arn = aws_lb_target_group.fake_server_tg.arn
230 |     container_name   = "FakeServerContainer"
231 |     container_port   = 8080
232 |   }
233 | }
234 | 
235 | # Route 53 Record
236 | resource "aws_route53_record" "fake_server_dns" {
237 |   zone_id = data.aws_route53_zone.hosted_zone.zone_id
238 |   name    = var.fake_llm_load_testing_endpoint_record_name
239 |   type    = "A"
240 | 
241 |   alias {
242 |     name                   = aws_lb.fake_server_alb.dns_name
243 |     zone_id                = aws_lb.fake_server_alb.zone_id
244 |     evaluate_target_health = true
245 |   }
246 | }
247 | 
248 | 


--------------------------------------------------------------------------------
/litellm-fake-llm-load-testing-server-terraform/outputs.tf:
--------------------------------------------------------------------------------
 1 | # Outputs
 2 | output "fake_server_ecs_cluster" {
 3 |   value       = aws_ecs_cluster.fake_llm_cluster.name
 4 |   description = "Name of the ECS Cluster"
 5 | }
 6 | 
 7 | output "fake_server_ecs_task" {
 8 |   value       = aws_ecs_service.fake_server_service.name
 9 |   description = "Name of the task service"
10 | }
11 | 
12 | output "fake_server_service_url" {
13 |   value       = "https://${var.fake_llm_load_testing_endpoint_record_name}"
14 |   description = "URL of the deployed service"
15 | }
16 | 


--------------------------------------------------------------------------------
/litellm-fake-llm-load-testing-server-terraform/providers.tf:
--------------------------------------------------------------------------------
1 | provider "aws" {
2 | }
3 | 
4 | terraform {
5 |   backend "s3" {}
6 | }
7 | 
8 | data "aws_region" "current" {}
9 | 


--------------------------------------------------------------------------------
/litellm-fake-llm-load-testing-server-terraform/variables.tf:
--------------------------------------------------------------------------------
 1 | # Variables definition
 2 | variable "vpc_id" {
 3 |   description = "ID of the VPC"
 4 |   type        = string
 5 | }
 6 | 
 7 | variable "fake_llm_load_testing_endpoint_certifiacte_arn" {
 8 |   description = "ARN of the SSL certificate"
 9 |   type        = string
10 | }
11 | 
12 | variable "fake_llm_load_testing_endpoint_hosted_zone_name" {
13 |   description = "Name of the hosted zone"
14 |   type        = string
15 | }
16 | 
17 | variable "fake_llm_load_testing_endpoint_record_name" {
18 |   description = "Route53 A record name for the service"
19 |   type        = string
20 | }
21 | 
22 | variable "ecr_fake_server_repository" {
23 |   description = "Name of the ECR repository"
24 |   type        = string
25 | }
26 | 
27 | variable "architecture" {
28 |   description = "CPU architecture (x86 or arm)"
29 |   type        = string
30 | }


--------------------------------------------------------------------------------
/litellm-private-load-balancer-ec2-terraform/main.tf:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | # Look up existing VPC
  4 | data "aws_vpc" "imported_vpc" {
  5 |   id = var.vpc_id
  6 | }
  7 | 
  8 | # Find subnets with auto-assign public IP enabled
  9 | data "aws_subnets" "public_subnets" {
 10 |   filter {
 11 |     name   = "vpc-id"
 12 |     values = [var.vpc_id]
 13 |   }
 14 | 
 15 |   filter {
 16 |     name   = "map-public-ip-on-launch"
 17 |     values = ["true"]
 18 |   }
 19 | }
 20 | 
 21 | # Find latest Amazon Linux 2023 AMI
 22 | data "aws_ami" "amazon_linux" {
 23 |   most_recent = true
 24 |   owners      = ["amazon"]
 25 | 
 26 |   filter {
 27 |     name   = "name"
 28 |     values = ["al2023-ami-*-kernel-6.1-x86_64"]
 29 |   }
 30 | 
 31 |   filter {
 32 |     name   = "virtualization-type"
 33 |     values = ["hvm"]
 34 |   }
 35 | }
 36 | 
 37 | # Create Security Group for the Linux instance
 38 | resource "aws_security_group" "linux_sg" {
 39 |   name        = "LinuxInstanceSG"
 40 |   description = "Security group for Linux EC2 instance"
 41 |   vpc_id      = data.aws_vpc.imported_vpc.id
 42 | 
 43 |   # Allow SSH inbound
 44 |   ingress {
 45 |     from_port   = 22
 46 |     to_port     = 22
 47 |     protocol    = "tcp"
 48 |     cidr_blocks = ["0.0.0.0/0"] # Consider restricting to specific IPs in production
 49 |   }
 50 | 
 51 |   # Allow all outbound traffic
 52 |   egress {
 53 |     from_port   = 0
 54 |     to_port     = 0
 55 |     protocol    = "-1"
 56 |     cidr_blocks = ["0.0.0.0/0"]
 57 |   }
 58 | 
 59 |   tags = {
 60 |     Name = "LinuxInstanceSG"
 61 |   }
 62 | }
 63 | 
 64 | # Create IAM role for SSM
 65 | resource "aws_iam_role" "ec2_ssm_role" {
 66 |   name = "Ec2SsmRole"
 67 | 
 68 |   assume_role_policy = jsonencode({
 69 |     Version = "2012-10-17"
 70 |     Statement = [
 71 |       {
 72 |         Action = "sts:AssumeRole"
 73 |         Effect = "Allow"
 74 |         Principal = {
 75 |           Service = "ec2.amazonaws.com"
 76 |         }
 77 |       }
 78 |     ]
 79 |   })
 80 | }
 81 | 
 82 | # Attach SSM policy to the IAM role
 83 | resource "aws_iam_role_policy_attachment" "ssm_policy_attachment" {
 84 |   role       = aws_iam_role.ec2_ssm_role.name
 85 |   policy_arn = "arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore"
 86 | }
 87 | 
 88 | # Create an instance profile for the IAM role
 89 | resource "aws_iam_instance_profile" "ec2_instance_profile" {
 90 |   name = "ec2-instance-profile"
 91 |   role = aws_iam_role.ec2_ssm_role.name
 92 | }
 93 | 
 94 | # Launch an EC2 instance with Amazon Linux
 95 | resource "aws_instance" "linux_instance" {
 96 |   ami                    = data.aws_ami.amazon_linux.id
 97 |   instance_type          = "t3.small"
 98 |   subnet_id              = length(data.aws_subnets.public_subnets.ids) > 0 ? data.aws_subnets.public_subnets.ids[0] : null
 99 |   vpc_security_group_ids = [aws_security_group.linux_sg.id]
100 |   key_name               = var.key_pair_name
101 |   iam_instance_profile   = aws_iam_instance_profile.ec2_instance_profile.name
102 |   associate_public_ip_address = true
103 | 
104 |   metadata_options {
105 |     http_endpoint = "enabled"
106 |     http_tokens   = "required"
107 |   }
108 | 
109 |   tags = {
110 |     Name = "LinuxInstance"
111 |   }
112 | 
113 |   lifecycle {
114 |     precondition {
115 |       condition     = length(data.aws_subnets.public_subnets.ids) > 0
116 |       error_message = "No subnets with auto-assign public IP enabled were found in the VPC. Please enable auto-assign public IP on at least one subnet."
117 |     }
118 |   }
119 | }
120 | 


--------------------------------------------------------------------------------
/litellm-private-load-balancer-ec2-terraform/outputs.tf:
--------------------------------------------------------------------------------
 1 | # Output the instance ID
 2 | output "linux_instance_id" {
 3 |   value       = aws_instance.linux_instance.id
 4 |   description = "Linux EC2 Instance ID"
 5 | }
 6 | 
 7 | # Output the public IP address
 8 | output "bastion_host_public_ip" {
 9 |   value       = aws_instance.linux_instance.public_ip
10 |   description = "Public IP address of the Linux EC2 Instance"
11 | }
12 | 


--------------------------------------------------------------------------------
/litellm-private-load-balancer-ec2-terraform/providers.tf:
--------------------------------------------------------------------------------
1 | provider "aws" {
2 | }
3 | 
4 | terraform {
5 |   backend "s3" {}
6 | }


--------------------------------------------------------------------------------
/litellm-private-load-balancer-ec2-terraform/variables.tf:
--------------------------------------------------------------------------------
 1 | # Variables
 2 | variable "vpc_id" {
 3 |   description = "The ID of the VPC"
 4 |   type        = string
 5 | }
 6 | 
 7 | variable "key_pair_name" {
 8 |   description = "The name of the key pair to use for SSH access"
 9 |   type        = string
10 | }


--------------------------------------------------------------------------------
/litellm-s3-log-bucket-terraform/outputs.tf:
--------------------------------------------------------------------------------
 1 | # Outputs to match the CDK stack's CfnOutput
 2 | output "LogBucketName" {
 3 |   description = "The name of the Log S3 bucket"
 4 |   value       = aws_s3_bucket.log_bucket.bucket
 5 | }
 6 | 
 7 | output "LogBucketArn" {
 8 |   description = "The ARN of the Log S3 bucket"
 9 |   value       = aws_s3_bucket.log_bucket.arn
10 | }


--------------------------------------------------------------------------------
/litellm-s3-log-bucket-terraform/provider.tf:
--------------------------------------------------------------------------------
 1 | provider "aws" {
 2 |   default_tags {
 3 |     tags = {
 4 |       "stack-id" = var.name
 5 |       "project"  = "llmgateway"
 6 |     }
 7 |   }
 8 | }
 9 | 
10 | terraform {
11 |   backend "s3" {}
12 | }


--------------------------------------------------------------------------------
/litellm-s3-log-bucket-terraform/s3.tf:
--------------------------------------------------------------------------------
 1 | resource "aws_s3_bucket" "log_bucket" {
 2 |   bucket_prefix = "litellm-logs-"
 3 |   force_destroy = true
 4 | }
 5 | 
 6 | resource "aws_s3_bucket_server_side_encryption_configuration" "log_bucket" {
 7 |   bucket = aws_s3_bucket.log_bucket.id
 8 | 
 9 |   rule {
10 |     apply_server_side_encryption_by_default {
11 |       sse_algorithm = "AES256"
12 |     }
13 |   }
14 | }
15 | 
16 | resource "aws_s3_bucket_policy" "log_bucket" {
17 |   bucket = aws_s3_bucket.log_bucket.id
18 | 
19 |   policy = jsonencode({
20 |     Version = "2012-10-17"
21 |     Statement = [
22 |       {
23 |         Sid       = "EnforceSSLOnly"
24 |         Effect    = "Deny"
25 |         Principal = "*"
26 |         Action    = "s3:*"
27 |         Resource = [
28 |           aws_s3_bucket.log_bucket.arn,
29 |           "${aws_s3_bucket.log_bucket.arn}/*"
30 |         ]
31 |         Condition = {
32 |           Bool = {
33 |             "aws:SecureTransport" = "false"
34 |           }
35 |         }
36 |       }
37 |     ]
38 |   })
39 | }
40 | 
41 | resource "aws_s3_bucket_public_access_block" "log_bucket" {
42 |   bucket = aws_s3_bucket.log_bucket.id
43 |   block_public_acls   = true
44 |   block_public_policy = true
45 | }
46 | 


--------------------------------------------------------------------------------
/litellm-s3-log-bucket-terraform/variables.tf:
--------------------------------------------------------------------------------
1 | variable "name" {
2 |   type        = string
3 |   description = "Name or ID of the stack (used as a tag)"
4 | }


--------------------------------------------------------------------------------
/litellm-terraform-stack/main.tf:
--------------------------------------------------------------------------------
  1 | #--------------------------------------------------------------
  2 | # Adding guidance solution ID via AWS CloudFormation resource
  3 | #--------------------------------------------------------------
  4 | resource "aws_cloudformation_stack" "guidance_deployment_metrics" {
  5 |     name = "tracking-stack"
  6 |     template_body = <<STACK
  7 |     {
  8 |         "AWSTemplateFormatVersion": "2010-09-09",
  9 |         "Description": "Guidance for Running Generative AI Gateway Proxy on AWS. The Solution ID is SO9022 and the Solution Version is 1.1.0",
 10 |         "Resources": {
 11 |             "EmptyResource": {
 12 |                 "Type": "AWS::CloudFormation::WaitConditionHandle"
 13 |             }
 14 |         }
 15 |     }
 16 |     STACK
 17 | }
 18 | 
 19 | module "base" {
 20 |   source = "./modules/base"
 21 |   name = var.name
 22 |   vpc_id = var.vpc_id
 23 |   deployment_platform = local.platform
 24 |   create_vpc_endpoints_in_existing_vpc = var.create_vpc_endpoints_in_existing_vpc
 25 |   disable_outbound_network_access = var.disable_outbound_network_access
 26 |   ecrLitellmRepository = var.ecrLitellmRepository
 27 |   ecrMiddlewareRepository = var.ecrMiddlewareRepository
 28 |   hostedZoneName = var.hosted_zone_name
 29 |   create_private_hosted_zone_in_existing_vpc = var.create_private_hosted_zone_in_existing_vpc
 30 |   publicLoadBalancer = var.public_load_balancer
 31 |   rds_instance_class = var.rds_instance_class
 32 |   rds_allocated_storage = var.rds_allocated_storage
 33 |   redis_node_type = var.redis_node_type
 34 |   redis_num_cache_clusters = var.redis_num_cache_clusters
 35 |   use_route53 = var.use_route53
 36 | }
 37 | 
 38 | module "ecs_cluster" {
 39 |   source = "./modules/ecs"
 40 |   count  = local.platform == "ECS" ? 1 : 0
 41 |   name = var.name
 42 |   config_bucket_arn = module.base.ConfigBucketArn
 43 |   redis_host = module.base.RedisHost
 44 |   redis_port = module.base.RedisPort
 45 |   redis_password = module.base.RedisPassword
 46 |   log_bucket_arn = var.log_bucket_arn
 47 |   ecr_litellm_repository_url = module.base.LiteLLMRepositoryUrl
 48 |   ecr_middleware_repository_url = module.base.MiddlewareRepositoryUrl
 49 |   litellm_version = var.litellm_version
 50 |   config_bucket_name = module.base.ConfigBucketName
 51 |   use_route53 = var.use_route53
 52 |   use_cloudfront = var.use_cloudfront
 53 |   cloudfront_price_class = var.cloudfront_price_class
 54 |   openai_api_key = var.openai_api_key
 55 |   azure_openai_api_key = var.azure_openai_api_key
 56 |   azure_api_key = var.azure_api_key
 57 |   anthropic_api_key = var.anthropic_api_key
 58 |   groq_api_key = var.groq_api_key
 59 |   cohere_api_key = var.cohere_api_key
 60 |   co_api_key = var.co_api_key
 61 |   hf_token = var.hf_token
 62 |   huggingface_api_key = var.huggingface_api_key
 63 |   databricks_api_key = var.databricks_api_key
 64 |   gemini_api_key = var.gemini_api_key
 65 |   codestral_api_key = var.codestral_api_key
 66 |   mistral_api_key = var.mistral_api_key
 67 |   azure_ai_api_key = var.azure_ai_api_key
 68 |   nvidia_nim_api_key = var.nvidia_nim_api_key
 69 |   xai_api_key = var.xai_api_key
 70 |   perplexityai_api_key = var.perplexityai_api_key
 71 |   github_api_key = var.github_api_key
 72 |   deepseek_api_key = var.deepseek_api_key
 73 |   ai21_api_key = var.ai21_api_key
 74 |   langsmith_api_key = var.langsmith_api_key
 75 |   langsmith_project = var.langsmith_project
 76 |   langsmith_default_run_name = var.langsmith_default_run_name
 77 |   okta_audience = var.okta_audience
 78 |   okta_issuer = var.okta_issuer
 79 |   certificate_arn = var.certificate_arn
 80 |   wafv2_acl_arn = module.base.WafAclArn
 81 |   record_name = var.record_name
 82 |   hosted_zone_name = var.hosted_zone_name
 83 |   vpc_id = module.base.VpcId
 84 |   db_security_group_id = module.base.DbSecurityGroupId
 85 |   redis_security_group_id = module.base.RedisSecurityGroupId
 86 |   architecture = var.architecture
 87 |   disable_outbound_network_access = var.disable_outbound_network_access
 88 |   desired_capacity = var.desired_capacity
 89 |   min_capacity = var.min_capacity
 90 |   max_capacity = var.max_capacity
 91 |   public_load_balancer = var.public_load_balancer
 92 |   master_and_salt_key_secret_arn = module.base.LitellmMasterAndSaltKeySecretArn
 93 |   main_db_secret_arn = module.base.DatabaseUrlSecretArn
 94 |   vcpus = var.vcpus
 95 |   cpu_target_utilization_percent = var.cpu_target_utilization_percent
 96 |   memory_target_utilization_percent = var.memory_target_utilization_percent
 97 |   private_subnets = module.base.private_subnet_ids
 98 |   public_subnets = module.base.public_subnet_ids
 99 |   disable_swagger_page = var.disable_swagger_page
100 |   disable_admin_ui = var.disable_admin_ui
101 |   langfuse_public_key = var.langfuse_public_key
102 |   langfuse_secret_key = var.langfuse_secret_key
103 |   langfuse_host = var.langfuse_host
104 | 
105 |   depends_on = [ module.base ]
106 | }
107 | 
108 | data "aws_subnets" "private" {
109 |   filter {
110 |     name   = "vpc-id"
111 |     values = [module.base.VpcId]
112 |   }
113 |   filter {
114 |     name   = "map-public-ip-on-launch"
115 |     values = ["false"]
116 |   }
117 | }
118 | 
119 | data "aws_subnets" "public" {
120 |   filter {
121 |     name   = "vpc-id"
122 |     values = [module.base.VpcId]
123 |   }
124 |   filter {
125 |     name   = "map-public-ip-on-launch"
126 |     values = ["true"]
127 |   }
128 | }
129 | 
130 | module "eks_cluster" {
131 |   source = "./modules/eks"
132 |   count  = local.platform == "EKS" ? 1 : 0
133 |   name = var.name
134 |   private_subnet_ids = data.aws_subnets.private.ids
135 |   public_subnet_ids  = data.aws_subnets.public.ids
136 |   config_bucket_arn = module.base.ConfigBucketArn
137 |   existing_cluster_name = var.existing_cluster_name
138 |   cluster_version = var.cluster_version
139 |   redis_host = module.base.RedisHost
140 |   redis_port = module.base.RedisPort
141 |   redis_password = module.base.RedisPassword
142 |   log_bucket_arn = var.log_bucket_arn
143 |   ecr_litellm_repository_url = module.base.LiteLLMRepositoryUrl
144 |   ecr_middleware_repository_url = module.base.MiddlewareRepositoryUrl
145 |   litellm_version = var.litellm_version
146 |   config_bucket_name = module.base.ConfigBucketName
147 |   database_url = module.base.database_url
148 |   litellm_master_key = module.base.litellm_master_key
149 |   litellm_salt_key = module.base.litellm_salt_key
150 |   openai_api_key = var.openai_api_key
151 |   azure_openai_api_key = var.azure_openai_api_key
152 |   azure_api_key = var.azure_api_key
153 |   anthropic_api_key = var.anthropic_api_key
154 |   groq_api_key = var.groq_api_key
155 |   cohere_api_key = var.cohere_api_key
156 |   co_api_key = var.co_api_key
157 |   hf_token = var.hf_token
158 |   huggingface_api_key = var.huggingface_api_key
159 |   databricks_api_key = var.databricks_api_key
160 |   gemini_api_key = var.gemini_api_key
161 |   codestral_api_key = var.codestral_api_key
162 |   mistral_api_key = var.mistral_api_key
163 |   azure_ai_api_key = var.azure_ai_api_key
164 |   nvidia_nim_api_key = var.nvidia_nim_api_key
165 |   xai_api_key = var.xai_api_key
166 |   perplexityai_api_key = var.perplexityai_api_key
167 |   github_api_key = var.github_api_key
168 |   deepseek_api_key = var.deepseek_api_key
169 |   ai21_api_key = var.ai21_api_key
170 |   langsmith_api_key = var.langsmith_api_key
171 |   langsmith_project = var.langsmith_project
172 |   langsmith_default_run_name = var.langsmith_default_run_name
173 |   okta_audience = var.okta_audience
174 |   okta_issuer = var.okta_issuer
175 |   certificate_arn = var.certificate_arn
176 |   wafv2_acl_arn = module.base.WafAclArn
177 |   record_name = var.record_name
178 |   hosted_zone_name = var.hosted_zone_name
179 |   create_cluster = var.create_cluster
180 |   vpc_id = module.base.VpcId
181 |   db_security_group_id = module.base.DbSecurityGroupId
182 |   redis_security_group_id = module.base.RedisSecurityGroupId
183 |   architecture = var.architecture
184 |   disable_outbound_network_access = var.disable_outbound_network_access
185 |   eks_alb_controller_private_ecr_repository_name = module.base.EksAlbControllerPrivateEcrRepositoryName
186 |   install_add_ons_in_existing_eks_cluster = var.install_add_ons_in_existing_eks_cluster
187 |   desired_capacity = var.desired_capacity
188 |   min_capacity = var.min_capacity
189 |   max_capacity = var.max_capacity
190 |   arm_instance_type = var.arm_instance_type
191 |   x86_instance_type = var.x86_instance_type
192 |   arm_ami_type = var.arm_ami_type
193 |   x86_ami_type = var.x86_ami_type
194 |   public_load_balancer = var.public_load_balancer
195 |   disable_swagger_page = var.disable_swagger_page
196 |   disable_admin_ui = var.disable_admin_ui
197 |   langfuse_public_key = var.langfuse_public_key
198 |   langfuse_secret_key = var.langfuse_secret_key
199 |   langfuse_host = var.langfuse_host
200 | 
201 |   depends_on = [ module.base ]
202 | }
203 | 


--------------------------------------------------------------------------------
/litellm-terraform-stack/modules/base/ecr.tf:
--------------------------------------------------------------------------------
 1 | resource "aws_ecr_repository" "my_ecr_repository" {
 2 |   count        = (var.disable_outbound_network_access && var.deployment_platform == "EKS") ? 1 : 0
 3 |   name         = "my-public-ecr-cache-repo"
 4 |   image_scanning_configuration {
 5 |     scan_on_push = true
 6 |   }
 7 | 
 8 |   force_delete = true  # replicates cdk.RemovalPolicy.DESTROY
 9 | }
10 | 
11 | resource "aws_ecr_pull_through_cache_rule" "alb_pull_through_cache" {
12 |   count = (var.disable_outbound_network_access && var.deployment_platform == "EKS") ? 1 : 0
13 |   ecr_repository_prefix = aws_ecr_repository.my_ecr_repository[0].name
14 |   upstream_registry_url = "public.ecr.aws"
15 | }
16 | 


--------------------------------------------------------------------------------
/litellm-terraform-stack/modules/base/iam.tf:
--------------------------------------------------------------------------------
 1 | # VPC Flow Logs to CloudWatch, replicating cdk FlowLog to logs with 1 minute interval
 2 | # In Terraform, we need an IAM role to publish flow logs to CloudWatch.
 3 | resource "aws_iam_role" "vpc_flow_logs_role" {
 4 |   count = local.creating_new_vpc ? 1 : 0
 5 |   name               = "${var.name}-vpc-flow-logs-role"
 6 |   assume_role_policy = data.aws_iam_policy_document.vpc_flow_logs_assume.json
 7 | }
 8 | 
 9 | data "aws_iam_policy_document" "vpc_flow_logs_assume" {
10 |   statement {
11 |     actions = ["sts:AssumeRole"]
12 |     principals {
13 |       type        = "Service"
14 |       identifiers = ["vpc-flow-logs.amazonaws.com"]
15 |     }
16 |   }
17 | }
18 | 
19 | resource "aws_iam_role_policy_attachment" "vpc_flow_logs_attach" {
20 |   count      = local.creating_new_vpc ? 1 : 0
21 |   role       = aws_iam_role.vpc_flow_logs_role[0].name
22 |   policy_arn = "arn:aws:iam::aws:policy/CloudWatchLogsFullAccess"
23 | }
24 | 
25 | # First, create an IAM role for RDS Enhanced Monitoring
26 | resource "aws_iam_role" "rds_enhanced_monitoring" {
27 |   name = "${var.name}-rds-enhanced-monitoring"
28 | 
29 |   assume_role_policy = jsonencode({
30 |     Version = "2012-10-17"
31 |     Statement = [
32 |       {
33 |         Action = "sts:AssumeRole"
34 |         Effect = "Allow"
35 |         Principal = {
36 |           Service = "monitoring.rds.amazonaws.com"
37 |         }
38 |       }
39 |     ]
40 |   })
41 | }
42 | 
43 | # Attach the required policy for Enhanced Monitoring
44 | resource "aws_iam_role_policy_attachment" "rds_enhanced_monitoring" {
45 |   role       = aws_iam_role.rds_enhanced_monitoring.name
46 |   policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonRDSEnhancedMonitoringRole"
47 | }
48 | 


--------------------------------------------------------------------------------
/litellm-terraform-stack/modules/base/locals.tf:
--------------------------------------------------------------------------------
  1 | # The NAT gateway count replicates:
  2 | #   natGatewayCount = props.disableOutboundNetworkAccess ? 0 : 1
  3 | locals {
  4 |   nat_gateway_count = var.disable_outbound_network_access ? 0 : 1
  5 | 
  6 |   # We'll create private subnets that route to NAT if NAT is 1, or are isolated if NAT is 0.
  7 |   # This helps replicate the concept:
  8 |   #   - "PRIVATE_WITH_EGRESS" if outbound is allowed
  9 |   #   - "PRIVATE_ISOLATED" if outbound is disabled
 10 |   # We'll also create 2 public subnets (to have at least an IGW if NAT is needed),
 11 |   # though we only place a NAT in the first one.
 12 | }
 13 | 
 14 | locals {
 15 |   creating_new_vpc = length(trimspace(var.vpc_id)) == 0
 16 |   final_vpc_id = local.creating_new_vpc ? aws_vpc.new[0].id : data.aws_vpc.existing[0].id
 17 | }
 18 | 
 19 | # First get all subnets in the VPC with auto-assign public IP enabled
 20 | data "aws_subnets" "public_ip_subnets" {
 21 |   filter {
 22 |     name   = "vpc-id"
 23 |     values = [var.vpc_id]
 24 |   }
 25 |   
 26 |   filter {
 27 |     name   = "map-public-ip-on-launch"
 28 |     values = ["true"]
 29 |   }
 30 | }
 31 | 
 32 | # Get route tables for these subnets
 33 | data "aws_route_table" "subnet_route_tables" {
 34 |   for_each  = toset(data.aws_subnets.public_ip_subnets.ids)
 35 |   subnet_id = each.value
 36 | }
 37 | 
 38 | # Get all subnets with auto-assign public IP disabled
 39 | data "aws_subnets" "private_ip_subnets" {
 40 |   filter {
 41 |     name   = "vpc-id"
 42 |     values = [var.vpc_id]
 43 |   }
 44 |   
 45 |   filter {
 46 |     name   = "map-public-ip-on-launch"
 47 |     values = ["false"]
 48 |   }
 49 | }
 50 | 
 51 | # Get route tables for these subnets
 52 | data "aws_route_table" "private_subnet_route_tables" {
 53 |   for_each  = toset(data.aws_subnets.private_ip_subnets.ids)
 54 |   subnet_id = each.value
 55 | }
 56 | 
 57 | locals {
 58 |   # For new VPC
 59 |   new_private_subnet_ids = flatten([
 60 |     for s in aws_subnet.private : s.id
 61 |   ])
 62 | 
 63 |   new_public_subnet_ids = flatten([
 64 |     for s in aws_subnet.public : s.id
 65 |   ])
 66 | 
 67 |   existing_public_subnet_ids = [
 68 |     for subnet_id, rt in data.aws_route_table.subnet_route_tables : subnet_id
 69 |     if length([
 70 |       for route in rt.routes : route
 71 |       if route.gateway_id != null && 
 72 |          can(regex("^igw-", route.gateway_id)) && 
 73 |          route.cidr_block == "0.0.0.0/0"
 74 |     ]) > 0
 75 |   ]
 76 | 
 77 |   existing_private_subnet_ids = [
 78 |     for subnet_id, rt in data.aws_route_table.private_subnet_route_tables : subnet_id
 79 |     if length([
 80 |       for route in rt.routes : route
 81 |       if route.gateway_id != null && 
 82 |         can(regex("^igw-", route.gateway_id)) && 
 83 |         route.cidr_block == "0.0.0.0/0"
 84 |     ]) == 0
 85 |   ]
 86 | 
 87 |   # The final chosen subnets for "private_with_egress" or "private_isolated" usage.
 88 |   # If existing VPC => data subnets (you must do your own filtering in real usage).
 89 |   # If new VPC => the private subnets we created.
 90 |   chosen_subnet_ids = local.creating_new_vpc ? local.new_private_subnet_ids : local.existing_private_subnet_ids
 91 | }
 92 | 
 93 | locals {
 94 |   create_endpoints = (local.creating_new_vpc || var.create_vpc_endpoints_in_existing_vpc)
 95 | }
 96 | 
 97 | data "aws_route_tables" "existing_vpc_all" {
 98 |   # only do the lookup if var.vpc_id is set
 99 |   count = local.creating_new_vpc ? 0 : 1
100 | 
101 |   filter {
102 |     name   = "vpc-id"
103 |     values = [var.vpc_id]
104 |   }
105 | }
106 | 
107 | locals {
108 |   # If we’re using an existing VPC, fetch ALL route table IDs.
109 |   # Otherwise, just pick the new route tables from our resources.
110 |   s3_gateway_route_table_ids = local.creating_new_vpc ? [aws_route_table.public[0].id, local.private_route_table.id] : data.aws_route_tables.existing_vpc_all[0].ids
111 |   private_route_table = local.creating_new_vpc ? (local.nat_gateway_count == 1 ? aws_route_table.private_with_nat[0] : aws_route_table.private_isolated[0]) : (null)
112 | 
113 | }
114 | 
115 | data "aws_vpc_endpoint_service" "bedrock_agent" {
116 |   # This service name must match exactly what you used in the resource
117 |   service_name = "com.amazonaws.${data.aws_region.current.name}.bedrock-agent"
118 | }
119 | 
120 | data "aws_subnet" "chosen_subnets" {
121 |   count  = length(local.chosen_subnet_ids)
122 |   id     = local.chosen_subnet_ids[count.index]
123 | }
124 | 
125 | locals {
126 |   # A map from subnet_id => availability_zone
127 |   subnet_az_map = { 
128 |     for idx, s in data.aws_subnet.chosen_subnets :
129 |     s.id => s.availability_zone
130 |   }
131 | }
132 | 
133 | locals {
134 |   # Suppose local.chosen_subnet_ids is the list of subnets you want to use
135 |   # for endpoints in general. We filter them down to only those whose AZ
136 |   # is in the service's list of availability_zones.
137 |   bedrock_agent_compatible_subnets = [
138 |     for subnet_id in local.chosen_subnet_ids : subnet_id 
139 |     if contains(data.aws_vpc_endpoint_service.bedrock_agent.availability_zones, local.subnet_az_map[subnet_id])
140 |   ]
141 | }
142 | 


--------------------------------------------------------------------------------
/litellm-terraform-stack/modules/base/network.tf:
--------------------------------------------------------------------------------
  1 | data "aws_vpc" "existing" {
  2 |   count = local.creating_new_vpc ? 0 : 1
  3 |   id    = var.vpc_id
  4 | }
  5 | 
  6 | # We'll expose a local reference to either the existing VPC or a newly created one:
  7 | resource "aws_vpc" "new" {
  8 |   count             = local.creating_new_vpc ? 1 : 0
  9 |   cidr_block        = "10.0.0.0/16"
 10 |   enable_dns_hostnames = true
 11 |   enable_dns_support   = true
 12 | }
 13 | 
 14 | # We create an Internet Gateway only if we're creating the new VPC
 15 | resource "aws_internet_gateway" "this" {
 16 |   count = local.creating_new_vpc ? 1 : 0
 17 |   vpc_id = aws_vpc.new[0].id
 18 | }
 19 | 
 20 | # Create the NAT gateway only if nat_gateway_count = 1 (and we have a new VPC).
 21 | # We'll put it in the first public subnet for simplicity.
 22 | resource "aws_eip" "nat" {
 23 |   count = (local.creating_new_vpc && local.nat_gateway_count == 1) ? 1 : 0
 24 |   domain = "vpc"
 25 | }
 26 | 
 27 | resource "aws_nat_gateway" "this" {
 28 |   count         = (local.creating_new_vpc && local.nat_gateway_count == 1) ? 1 : 0
 29 |   allocation_id = aws_eip.nat[0].id
 30 |   subnet_id     = aws_subnet.public[0].id
 31 |   depends_on    = [aws_internet_gateway.this]
 32 | }
 33 | 
 34 | resource "aws_subnet" "public" {
 35 |   count             = local.creating_new_vpc ? 2 : 0
 36 |   vpc_id            = aws_vpc.new[0].id
 37 |   cidr_block        = cidrsubnet(aws_vpc.new[0].cidr_block, 3, count.index)
 38 |   availability_zone = element(data.aws_availability_zones.available.names, count.index)
 39 |   map_public_ip_on_launch = true
 40 | }
 41 | 
 42 | resource "aws_subnet" "private" {
 43 |   count             = local.creating_new_vpc ? 2 : 0
 44 |   vpc_id            = aws_vpc.new[0].id
 45 |   cidr_block        = cidrsubnet(aws_vpc.new[0].cidr_block, 3, count.index + 2)
 46 |   availability_zone = element(data.aws_availability_zones.available.names, count.index)
 47 |   map_public_ip_on_launch = false
 48 | }
 49 | 
 50 | # Route tables: one for public subnets, one for private/isolated subnets.
 51 | resource "aws_route_table" "public" {
 52 |   count = local.creating_new_vpc ? 1 : 0
 53 |   vpc_id = aws_vpc.new[0].id
 54 |   route {
 55 |     cidr_block = "0.0.0.0/0"
 56 |     gateway_id = aws_internet_gateway.this[0].id
 57 |   }
 58 | }
 59 | 
 60 | resource "aws_route_table" "private_with_nat" {
 61 |   count  = local.creating_new_vpc && (local.nat_gateway_count == 1) ? 1 : 0
 62 |   vpc_id = aws_vpc.new[0].id
 63 | 
 64 |   route {
 65 |     cidr_block     = "0.0.0.0/0"
 66 |     nat_gateway_id = aws_nat_gateway.this[0].id
 67 |   }
 68 |   lifecycle {
 69 |     ignore_changes = [route]
 70 |   }
 71 | }
 72 | 
 73 | # Route table for isolated private subnets (no routes)
 74 | resource "aws_route_table" "private_isolated" {
 75 |   count  = local.creating_new_vpc && (local.nat_gateway_count == 0) ? 1 : 0
 76 |   vpc_id = aws_vpc.new[0].id
 77 |   lifecycle {
 78 |     ignore_changes = [route]
 79 |   }
 80 | }
 81 | 
 82 | # Subnet associations
 83 | resource "aws_route_table_association" "public" {
 84 |   count = local.creating_new_vpc ? length(aws_subnet.public) : 0
 85 |   subnet_id      = aws_subnet.public[count.index].id
 86 |   route_table_id = aws_route_table.public[0].id
 87 | }
 88 | 
 89 | resource "aws_route_table_association" "private" {
 90 |   count = local.creating_new_vpc ? length(aws_subnet.private) : 0
 91 |   subnet_id      = aws_subnet.private[count.index].id
 92 |   route_table_id = local.nat_gateway_count == 1 ? aws_route_table.private_with_nat[0].id : aws_route_table.private_isolated[0].id
 93 | }
 94 | 
 95 | # Data source for availability_zones
 96 | data "aws_availability_zones" "available" {
 97 |   state = "available"
 98 |   # We only need 2 for the new VPC, but we’ll still retrieve them all, just using index=0,1
 99 | }
100 | 
101 | resource "aws_cloudwatch_log_group" "vpc_flow_logs" {
102 |   count             = local.creating_new_vpc ? 1 : 0
103 |   name_prefix              = "/aws/vpc/${var.name}-flow-logs"
104 |   retention_in_days = 365
105 | }
106 | 
107 | resource "aws_flow_log" "this" {
108 |   count = local.creating_new_vpc ? 1 : 0
109 |   log_destination      = aws_cloudwatch_log_group.vpc_flow_logs[0].arn
110 |   log_destination_type = "cloud-watch-logs"
111 |   vpc_id               = aws_vpc.new[0].id
112 |   iam_role_arn         = aws_iam_role.vpc_flow_logs_role[0].arn
113 |   traffic_type         = "ALL"
114 |   max_aggregation_interval = 60
115 | }
116 | 
117 | data "aws_subnets" "existing_all" {
118 |   count = local.creating_new_vpc ? 0 : 1
119 |   filter {
120 |     name   = "vpc-id"
121 |     values = [var.vpc_id]
122 |   }
123 | }
124 | 


--------------------------------------------------------------------------------
/litellm-terraform-stack/modules/base/outputs.tf:
--------------------------------------------------------------------------------
  1 | #############################################
  2 | # OUTPUTS
  3 | #############################################
  4 | output "RdsLitellmHostname" {
  5 |   description = "The hostname of the LiteLLM RDS instance"
  6 |   value       = aws_db_instance.database.endpoint
  7 | }
  8 | 
  9 | output "RdsLitellmSecretArn" {
 10 |   description = "The ARN of the LiteLLM RDS secret"
 11 |   value       = aws_secretsmanager_secret.db_secret_main.arn
 12 | }
 13 | 
 14 | output "RedisHostName" {
 15 |   description = "The hostname of the Redis cluster"
 16 |   value       = aws_elasticache_replication_group.redis.primary_endpoint_address
 17 | }
 18 | 
 19 | output "RdsSecurityGroupId" {
 20 |   description = "The ID of the RDS security group"
 21 |   value       = aws_security_group.db_sg.id
 22 | }
 23 | 
 24 | output "RedisSecurityGroupId" {
 25 |   description = "The ID of the Redis security group"
 26 |   value       = aws_security_group.redis_sg.id
 27 | }
 28 | 
 29 | output "VpcId" {
 30 |   description = "The ID of the VPC"
 31 |   value       = local.final_vpc_id
 32 | }
 33 | 
 34 | # If we created the pull-through cache:
 35 | output "EksAlbControllerPrivateEcrRepositoryName" {
 36 |   description = "ECR repo for EKS ALB Controller (only if outbound disabled + EKS)."
 37 |   value       = (var.disable_outbound_network_access && var.deployment_platform == "EKS") ? aws_ecr_repository.my_ecr_repository[0].name : ""
 38 | }
 39 | 
 40 | output "private_subnet_ids" {
 41 |   description = "List of IDs of private subnets"
 42 |   value = local.creating_new_vpc ? local.new_private_subnet_ids : local.existing_private_subnet_ids
 43 | }
 44 | 
 45 | output "public_subnet_ids" {
 46 |   description = "List of IDs of public subnets"
 47 |   value = local.creating_new_vpc ? local.new_public_subnet_ids : local.existing_public_subnet_ids
 48 | }
 49 | 
 50 | ###############################################################################
 51 | # Outputs (mirror the CDK CfnOutputs)
 52 | ###############################################################################
 53 | output "ConfigBucketName" {
 54 |   description = "The Name of the configuration bucket"
 55 |   value       = aws_s3_bucket.config_bucket.bucket
 56 | }
 57 | 
 58 | output "ConfigBucketArn" {
 59 |   description = "The ARN of the configuration bucket"
 60 |   value       = aws_s3_bucket.config_bucket.arn
 61 | }
 62 | 
 63 | output "WafAclArn" {
 64 |   description = "The ARN of the WAF ACL"
 65 |   value       = aws_wafv2_web_acl.litellm_waf.arn
 66 | }
 67 | 
 68 | # ECR Repositories
 69 | data "aws_ecr_repository" "litellm" {
 70 |   name = var.ecrLitellmRepository
 71 | }
 72 | 
 73 | data "aws_ecr_repository" "middleware" {
 74 |   name = var.ecrMiddlewareRepository
 75 | }
 76 | 
 77 | output "LiteLLMRepositoryUrl" {
 78 |   description = "The URI of the LiteLLM ECR repository"
 79 |   value       = data.aws_ecr_repository.litellm.repository_url
 80 | }
 81 | 
 82 | output "MiddlewareRepositoryUrl" {
 83 |   description = "The URI of the Middleware ECR repository"
 84 |   value       = data.aws_ecr_repository.middleware.repository_url
 85 | }
 86 | 
 87 | output "DatabaseUrlSecretArn" {
 88 |   description = "The endpoint of the main database"
 89 |   value       = aws_secretsmanager_secret.db_url_secret.arn
 90 | }
 91 | 
 92 | output "RedisUrl" {
 93 |   description = "The Redis connection URL"
 94 |   value       = "rediss://${aws_elasticache_replication_group.redis.primary_endpoint_address}:6379"
 95 | }
 96 | 
 97 | output "RedisHost" {
 98 |   description = "The Redis host name"
 99 |   value       = aws_elasticache_replication_group.redis.primary_endpoint_address
100 | }
101 | 
102 | output "RedisPort" {
103 |   description = "The Redis port"
104 |   value       = "6379"
105 | }
106 | 
107 | output "RedisPassword" {
108 |   description = "The Redis password"
109 |   value = random_password.redis_password_main.result
110 | }
111 | 
112 | output "LitellmMasterAndSaltKeySecretArn" {
113 |   description = "LiteLLM Master & Salt Key Secret ARN"
114 |   value       = aws_secretsmanager_secret.litellm_master_salt.arn
115 | }
116 | 
117 | output "DbSecurityGroupId" {
118 |   description = "DB Security Group ID"
119 |   value       = aws_security_group.db_sg.id
120 | }
121 | 
122 | output "database_url" {
123 |   value = "postgresql://llmproxy:${local.litellm_db_password}@${aws_db_instance.database.endpoint}/litellm"
124 | }
125 | 
126 | output "litellm_master_key" {
127 |   value = local.litellm_master_key
128 | }
129 | 
130 | output "litellm_salt_key" {
131 |   value = local.litellm_salt_key
132 | }


--------------------------------------------------------------------------------
/litellm-terraform-stack/modules/base/rds.tf:
--------------------------------------------------------------------------------
 1 | # We replicate the logic:
 2 | #  Secret for DB user "llmproxy", random password, exclude punctuation
 3 | 
 4 | # Random passwords
 5 | resource "random_password" "db_password_main" {
 6 |   length  = 16
 7 |   special = false
 8 | }
 9 | 
10 | resource "aws_secretsmanager_secret" "db_secret_main" {
11 |   name_prefix = "${var.name}-DBSecret-"
12 |   recovery_window_in_days = 0
13 | }
14 | 
15 | resource "aws_secretsmanager_secret_version" "db_secret_main_version" {
16 |   secret_id     = aws_secretsmanager_secret.db_secret_main.id
17 |   secret_string = jsonencode({
18 |     username = "llmproxy"
19 |     password = random_password.db_password_main.result
20 |   })
21 | }
22 | 
23 | #############################################
24 | # RDS SECURITY GROUP
25 | #############################################
26 | 
27 | resource "aws_security_group" "db_sg" {
28 |   name        = "${var.name}-db-sg"
29 |   description = "Security group for RDS instance"
30 |   vpc_id      = local.final_vpc_id
31 | 
32 |   egress {
33 |     description = "allow all outbound access"
34 |     from_port   = 0
35 |     to_port     = 0
36 |     protocol    = "-1"
37 |     cidr_blocks = ["0.0.0.0/0"]
38 |   }
39 | }
40 | 
41 | #############################################
42 | # RDS INSTANCES
43 | #############################################
44 | 
45 | # Subnet group for the DB
46 | resource "aws_db_subnet_group" "main" {
47 |   name       = "${var.name}-db-subnet-group"
48 |   subnet_ids = local.chosen_subnet_ids
49 | }
50 | 
51 | resource "aws_db_parameter_group" "example_pg" {
52 |   name   = "rds-postgres-parameter-group"
53 |   # Update the family to match your PostgreSQL version
54 |   family = "postgres15"
55 | 
56 |   # Enable logging of all statements
57 |   parameter {
58 |     name  = "log_statement"
59 |     value = "all"
60 |   }
61 | 
62 |   # Log statements that take longer than 1ms
63 |   parameter {
64 |     name  = "log_min_duration_statement"
65 |     value = "1"
66 |   }
67 | }
68 | 
69 | # Database #1: litellm
70 | resource "aws_db_instance" "database" {
71 |   identifier                = "${var.name}-litellm-db"
72 |   engine                    = "postgres"
73 |   engine_version           = "15" # or "15.x"
74 |   instance_class            = var.rds_instance_class
75 |   storage_type              = "gp3"
76 |   allocated_storage         = var.rds_allocated_storage
77 |   storage_encrypted         = true
78 |   db_name                      = "litellm"
79 |   db_subnet_group_name      = aws_db_subnet_group.main.name
80 |   vpc_security_group_ids    = [aws_security_group.db_sg.id]
81 |   username                  = jsondecode(aws_secretsmanager_secret_version.db_secret_main_version.secret_string)["username"]
82 |   password                  = jsondecode(aws_secretsmanager_secret_version.db_secret_main_version.secret_string)["password"]
83 |   skip_final_snapshot       = true
84 |   deletion_protection       = false
85 |   multi_az = true
86 |   performance_insights_enabled = true
87 |   enabled_cloudwatch_logs_exports = ["postgresql"]
88 |   auto_minor_version_upgrade = true
89 |   monitoring_interval = 60
90 |   monitoring_role_arn      = aws_iam_role.rds_enhanced_monitoring.arn
91 |   parameter_group_name = aws_db_parameter_group.example_pg.name
92 |   copy_tags_to_snapshot     = true
93 |   apply_immediately = true
94 | }


--------------------------------------------------------------------------------
/litellm-terraform-stack/modules/base/redis.tf:
--------------------------------------------------------------------------------
 1 | #############################################
 2 | # REDIS SECURITY GROUP
 3 | #############################################
 4 | 
 5 | resource "aws_security_group" "redis_sg" {
 6 |   name        = "${var.name}-redis-sg"
 7 |   description = "Security group for Redis cluster"
 8 |   vpc_id      = local.final_vpc_id
 9 | 
10 |   egress {
11 |     description = "allow all outbound access"
12 |     from_port   = 0
13 |     to_port     = 0
14 |     protocol    = "-1"
15 |     cidr_blocks = ["0.0.0.0/0"]
16 |   }
17 | }
18 | 
19 | #############################################
20 | # REDIS SUBNET GROUP
21 | #############################################
22 | 
23 | resource "aws_elasticache_subnet_group" "redis_subnet_group" {
24 |   name        = "litellm-redis-subnet-group"
25 |   description = "Subnet group for Redis cluster"
26 |   subnet_ids  = local.chosen_subnet_ids
27 | }
28 | 
29 | #############################################
30 | # REDIS PARAMETER GROUP
31 | #############################################
32 | 
33 | resource "aws_elasticache_parameter_group" "redis_parameter_group" {
34 |   name               = "${var.name}-redis-parameter-group"
35 |   family             = "redis7"
36 |   description        = "Redis parameter group"
37 |   parameter {
38 |     name  = "timeout" 
39 |     value = "0"
40 |   }
41 |   # Add additional parameters if desired.
42 | }
43 | 
44 | #############################################
45 | # REDIS REPLICATION GROUP
46 | #############################################
47 | 
48 | # Random passwords
49 | resource "random_password" "redis_password_main" {
50 |   length  = 18
51 |   special = false
52 | }
53 | 
54 | resource "aws_elasticache_replication_group" "redis" {
55 |   replication_group_id          = "${var.name}-redis"
56 |   description = "redis"
57 |   engine                        = "redis"
58 |   engine_version                = "7.1"
59 |   node_type = var.redis_node_type
60 |   num_cache_clusters = var.redis_num_cache_clusters
61 |   automatic_failover_enabled    = true
62 |   parameter_group_name = aws_elasticache_parameter_group.redis_parameter_group.name
63 |   subnet_group_name             = aws_elasticache_subnet_group.redis_subnet_group.name
64 |   security_group_ids            = [aws_security_group.redis_sg.id]
65 |   port                          = 6379
66 |   multi_az_enabled = true
67 |   at_rest_encryption_enabled    = true
68 |   transit_encryption_enabled    = true
69 |   transit_encryption_mode      = "required"
70 |   auth_token = random_password.redis_password_main.result
71 |   auth_token_update_strategy = "SET"
72 | 
73 |   depends_on = [
74 |     aws_elasticache_subnet_group.redis_subnet_group,
75 |     aws_elasticache_parameter_group.redis_parameter_group
76 |   ]
77 | }
78 | 


--------------------------------------------------------------------------------
/litellm-terraform-stack/modules/base/route53.tf:
--------------------------------------------------------------------------------
 1 | locals {
 2 |   # Only proceed with Route53 resources if use_route53 is true
 3 |   use_route53 = var.use_route53 && var.hostedZoneName != ""
 4 |   create_private_load_balancer = var.use_route53 && !var.publicLoadBalancer ? local.creating_new_vpc || var.create_private_hosted_zone_in_existing_vpc ? true : false : false
 5 |   import_private_load_balancer = var.use_route53 && !var.publicLoadBalancer ? !local.create_private_load_balancer : false
 6 | }
 7 | 
 8 | # If publicLoadBalancer = true and use_route53 = true, we fetch the existing public hosted zone
 9 | data "aws_route53_zone" "public_zone" {
10 |   count       = local.use_route53 && var.publicLoadBalancer ? 1 : 0
11 |   name        = var.hostedZoneName
12 |   private_zone = false
13 | }
14 | 
15 | resource "aws_route53_zone" "new_private_zone" {
16 |   //If use_route53 = false or public load balancer, never create private zone
17 |   //If private load balancer, always create private zone if we are creating new vpc
18 |   //If private load balancer, and user brings their own vpc, decide whether to create or import private hosted zone based on "var.create_private_hosted_zone_in_existing_vpc" variable
19 |   count = local.create_private_load_balancer ? 1 : 0
20 |   name = var.hostedZoneName
21 |   vpc {
22 |     vpc_id = local.final_vpc_id
23 |   }
24 | }
25 | 
26 | data "aws_route53_zone" "existing_private_zone" {
27 |   //If use_route53 = false or public load balancer, never look up private zone
28 |   //If private load balancer, always create private zone if we are creating new vpc
29 |   //If private load balancer, and user brings their own vpc, decide whether to create or import private hosted zone based on "var.create_private_hosted_zone_in_existing_vpc" variable
30 |   count = local.import_private_load_balancer ? 1 : 0
31 |   name = var.hostedZoneName
32 |   private_zone = true
33 | }
34 | 


--------------------------------------------------------------------------------
/litellm-terraform-stack/modules/base/s3.tf:
--------------------------------------------------------------------------------
 1 | ###############################################################################
 2 | # S3 bucket for config
 3 | ###############################################################################
 4 | resource "aws_s3_bucket" "config_bucket" {
 5 |   bucket_prefix = "litellm-config-"
 6 |   force_destroy = true
 7 | }
 8 | 
 9 | resource "aws_s3_bucket_server_side_encryption_configuration" "config_bucket" {
10 |   bucket = aws_s3_bucket.config_bucket.id
11 | 
12 |   rule {
13 |       apply_server_side_encryption_by_default {
14 |         sse_algorithm = "AES256"
15 |       }
16 |     }
17 | }
18 | 
19 | resource "aws_s3_bucket_policy" "config_bucket" {
20 |   bucket = aws_s3_bucket.config_bucket.id
21 | 
22 |   policy = jsonencode({
23 |     Version = "2012-10-17"
24 |     Statement = [
25 |       {
26 |         Sid       = "EnforceSSLOnly"
27 |         Effect    = "Deny"
28 |         Principal = "*"
29 |         Action    = "s3:*"
30 |         Resource = [
31 |           aws_s3_bucket.config_bucket.arn,
32 |           "${aws_s3_bucket.config_bucket.arn}/*"
33 |         ]
34 |         Condition = {
35 |           Bool = {
36 |             "aws:SecureTransport" = "false"
37 |           }
38 |         }
39 |       }
40 |     ]
41 |   })
42 | }
43 | 
44 | resource "aws_s3_bucket_public_access_block" "config_bucket" {
45 |   bucket = aws_s3_bucket.config_bucket.id
46 |   block_public_acls   = true
47 |   block_public_policy = true
48 | }
49 | 
50 | # Single file upload of `config.yaml`
51 | # In your CDK, you used s3deploy with `include: ['config.yaml']` and `exclude: ['*']` then re-included `config.yaml`.
52 | resource "aws_s3_object" "config_file" {
53 |   bucket = aws_s3_bucket.config_bucket.id
54 |   key    = "config.yaml"
55 |   source = "${path.module}/../../../config/config.yaml"  # Adjust path as needed
56 |   etag   = filemd5("${path.module}/../../../config/config.yaml")
57 | }


--------------------------------------------------------------------------------
/litellm-terraform-stack/modules/base/secrets-manager.tf:
--------------------------------------------------------------------------------
 1 | ###############################################################################
 2 | # Secrets Manager: LiteLLM master/salt keys
 3 | ###############################################################################
 4 | # Generate random strings for master and salt
 5 | resource "random_password" "litellm_master" {
 6 |   length  = 21
 7 |   special = false
 8 | }
 9 | 
10 | resource "random_password" "litellm_salt" {
11 |   length  = 21  # Reduced by 3 to account for "sk-" prefix
12 |   special = false
13 | }
14 | 
15 | 
16 | # Create a secret (the "shell" or "container" for the key)
17 | resource "aws_secretsmanager_secret" "litellm_master_salt" {
18 |   name_prefix = "LiteLLMMasterSalt-"
19 |   recovery_window_in_days = 0
20 | }
21 | 
22 | locals {
23 |   litellm_master_key = "sk-${random_password.litellm_master.result}"
24 |   litellm_salt_key = "sk-${random_password.litellm_salt.result}"
25 | }
26 | 
27 | # Store the generated values
28 | resource "aws_secretsmanager_secret_version" "litellm_master_salt_ver" {
29 |   secret_id = aws_secretsmanager_secret.litellm_master_salt.id
30 | 
31 |   secret_string = jsonencode({
32 |     LITELLM_MASTER_KEY = local.litellm_master_key
33 |     LITELLM_SALT_KEY   = local.litellm_salt_key
34 |   })
35 | }
36 | 
37 | ###############################################################################
38 | # Construct DB URLs from existing Secrets Manager password
39 | ###############################################################################
40 | # For demonstration, parse the JSON from data sources (the RDS secrets).
41 | # Adjust keys if your secrets structure differ.
42 | 
43 | locals {
44 |   litellm_db_password     = jsondecode(aws_secretsmanager_secret_version.db_secret_main_version.secret_string).password
45 | }
46 | 
47 | resource "aws_secretsmanager_secret" "db_url_secret" {
48 |   name_prefix = "DBUrlSecret-"
49 |   recovery_window_in_days = 0
50 | }
51 | 
52 | resource "aws_secretsmanager_secret_version" "db_url_secret_ver" {
53 |   secret_id = aws_secretsmanager_secret.db_url_secret.id
54 | 
55 |   secret_string = "postgresql://llmproxy:${local.litellm_db_password}@${aws_db_instance.database.endpoint}/litellm"
56 | }


--------------------------------------------------------------------------------
/litellm-terraform-stack/modules/base/variables.tf:
--------------------------------------------------------------------------------
 1 | variable "name" {
 2 |   description = "Standard name to be used as prefix on all resources."
 3 |   type        = string
 4 | }
 5 | 
 6 | variable "vpc_id" {
 7 |   description = "ID of an existing VPC to use. If not provided, a new VPC will be created."
 8 |   type        = string
 9 |   default     = ""
10 | }
11 | 
12 | variable "ecrLitellmRepository" {
13 |   type        = string
14 |   description = "Name of the LiteLLM ECR repository"
15 | }
16 | 
17 | variable "ecrMiddlewareRepository" {
18 |   type        = string
19 |   description = "Name of the Middleware ECR repository"
20 | }
21 | 
22 | variable "deployment_platform" {
23 |   description = "Which platform to deploy (ECS or EKS)"
24 |   type        = string
25 |   
26 |   validation {
27 |     condition     = can(regex("^(ECS|EKS)$", upper(var.deployment_platform)))
28 |     error_message = "DEPLOYMENT_PLATFORM must be either 'ECS' or 'EKS' (case insensitive)."
29 |   }
30 | }
31 | 
32 | variable "disable_outbound_network_access" {
33 |     description = "Whether to disable outbound network access"
34 |     type = bool
35 | }
36 | 
37 | variable "create_vpc_endpoints_in_existing_vpc" {
38 |   type    = bool
39 |   description = "If using an existing VPC, set this to true to also create interface/gateway endpoints within it."
40 | }
41 | 
42 | variable "hostedZoneName" {
43 |   description = "Hosted zone name"
44 |   type        = string
45 |   default     = ""
46 | }
47 | 
48 | variable "publicLoadBalancer" {
49 |   description = "Whether the load balancer is public or private"
50 |   type = bool
51 | }
52 | 
53 | variable "create_private_hosted_zone_in_existing_vpc" {
54 |   description = "In the case publicLoadBalancer=false (meaning we need a private hosted zone), and an vpc_id is provided, decides whether we create a private hosted zone, or assume one already exists and import it"
55 |   type        = bool
56 | }
57 | 
58 | variable "rds_instance_class" {
59 |   type        = string
60 |   description = "The instance class for the RDS database"
61 | }
62 | 
63 | variable "rds_allocated_storage" {
64 |   type        = number
65 |   description = "The allocated storage in GB for the RDS database"
66 | }
67 | 
68 | variable "redis_node_type" {
69 |   type        = string
70 |   description = "The node type for Redis clusters"
71 | }
72 | 
73 | variable "redis_num_cache_clusters" {
74 |   type        = number
75 |   description = "The number of cache clusters for Redis"
76 | }
77 | 
78 | variable "use_route53" {
79 |   description = "Whether to use Route53 for DNS management. If false, no Route53 resources will be created."
80 |   type        = bool
81 |   default     = false
82 | }
83 | 


--------------------------------------------------------------------------------
/litellm-terraform-stack/modules/base/waf.tf:
--------------------------------------------------------------------------------
 1 | ###############################################################################
 2 | # WAFv2 Web ACL
 3 | ###############################################################################
 4 | resource "aws_wafv2_web_acl" "litellm_waf" {
 5 |   name        = "LiteLLMWAF"
 6 |   description = "WAF for LiteLLM"
 7 |   scope       = "REGIONAL" # or CLOUDFRONT
 8 | 
 9 |   default_action {
10 |     allow {}
11 |   }
12 | 
13 |   visibility_config {
14 |     cloudwatch_metrics_enabled = true
15 |     metric_name                = "LiteLLMWebAcl"
16 |     sampled_requests_enabled   = true
17 |   }
18 | 
19 |   rule {
20 |     name     = "AWSManagedRulesCommonRuleSet-Exclusions"
21 |     priority = 1
22 | 
23 |     # override_action is required if referencing a rule group
24 |     # - use 'none' if you want to keep the group’s default action 
25 |     # - or 'count' to effectively “disable” or “exclude” from blocking
26 |     override_action {
27 |       none {}
28 |     }
29 | 
30 |     statement {
31 |       managed_rule_group_statement {
32 |         name        = "AWSManagedRulesCommonRuleSet"
33 |         vendor_name = "AWS"
34 | 
35 |         # This is the Terraform equivalent to the "excludedRules" from CloudFormation/CDK:
36 |         # We override the action of these specific sub-rules to avoid them blocking requests.
37 |         rule_action_override {
38 |           name = "NoUserAgent_HEADER"
39 |           action_to_use {
40 |             count {}
41 |           }
42 |         }
43 | 
44 |         rule_action_override {
45 |           name = "SizeRestrictions_BODY"
46 |           action_to_use {
47 |             count {}
48 |           }
49 |         }
50 |       }
51 |     }
52 | 
53 |     visibility_config {
54 |       cloudwatch_metrics_enabled = true
55 |       metric_name                = "LiteLLMCommonRuleSet"
56 |       sampled_requests_enabled   = true
57 |     }
58 |   }
59 | 
60 |   rule {
61 |     name     = "AWS-AWSManagedRulesKnownBadInputsRuleSet"
62 |     priority = 2
63 | 
64 |     override_action {
65 |       none {}
66 |     }
67 | 
68 |     statement {
69 |       managed_rule_group_statement {
70 |         name        = "AWSManagedRulesKnownBadInputsRuleSet"
71 |         vendor_name = "AWS"
72 |       }
73 |     }
74 |     visibility_config {
75 |       cloudwatch_metrics_enabled = true
76 |       metric_name                = "LiteLLMCommonRuleSet"
77 |       sampled_requests_enabled   = true
78 |     }
79 |   }
80 | 
81 | }
82 | 


--------------------------------------------------------------------------------
/litellm-terraform-stack/modules/ecs/cloudfront.tf:
--------------------------------------------------------------------------------
  1 | # Generate a random secret for CloudFront-to-ALB authentication
  2 | # This secret is used for secure origin authentication between CloudFront and ALB
  3 | resource "random_password" "cloudfront_secret" {
  4 |   count   = var.use_cloudfront ? 1 : 0
  5 |   length  = 32
  6 |   special = false
  7 |   
  8 |   # Add keepers to prevent regeneration unless explicitly changed
  9 |   keepers = {
 10 |     name = var.name  # Only regenerate if the name changes
 11 |   }
 12 |   
 13 |   # Prevent updates to the secret's properties during regular deployments
 14 |   lifecycle {
 15 |     ignore_changes = [length, special, min_lower, min_upper, min_numeric]
 16 |   }
 17 | }
 18 | 
 19 | # CloudFront Distribution
 20 | resource "aws_cloudfront_distribution" "this" {
 21 |   count               = var.use_cloudfront ? 1 : 0
 22 |   enabled             = true
 23 |   is_ipv6_enabled     = true
 24 |   comment             = "${var.name}-distribution"
 25 |   default_root_object = ""
 26 |   price_class         = var.cloudfront_price_class
 27 |   
 28 |   origin {
 29 |     domain_name = aws_lb.this.dns_name
 30 |     origin_id   = "ALB"
 31 |     
 32 |     # Add a custom origin header for security
 33 |     # This replaces the IP-based security group approach
 34 |     # ALB should be configured to only accept requests with this header
 35 |     custom_header {
 36 |       name  = "X-CloudFront-Secret"
 37 |       value = "litellm-cf-${random_password.cloudfront_secret[0].result}"
 38 |     }
 39 |     
 40 |     # Security note on CloudFront-ALB communication:
 41 |     # 
 42 |     # By setting origin_protocol_policy = "http-only", communication between CloudFront and ALB 
 43 |     # is unencrypted. However, security is maintained through:
 44 |     #
 45 |     # 1. Custom header authentication (X-CloudFront-Secret) that prevents direct access to the ALB
 46 |     # 2. Communication between end users and CloudFront remains encrypted with HTTPS
 47 |     # 3. The ALB is configured to reject requests without the secret header
 48 |     #
 49 |     # This approach eliminates certificate validation issues while maintaining a strong security posture.
 50 |     custom_origin_config {
 51 |       http_port              = 80
 52 |       https_port             = 443
 53 |       origin_protocol_policy = "http-only"
 54 |       origin_ssl_protocols   = ["TLSv1.2"]
 55 |     }
 56 |   }
 57 |   
 58 |   # Default cache behavior for API requests
 59 |   default_cache_behavior {
 60 |     allowed_methods  = ["DELETE", "GET", "HEAD", "OPTIONS", "PATCH", "POST", "PUT"]
 61 |     cached_methods   = ["GET", "HEAD", "OPTIONS"]
 62 |     target_origin_id = "ALB"
 63 |     
 64 |     forwarded_values {
 65 |       query_string = true
 66 |       headers      = ["Authorization", "Host", "Origin"]
 67 |       
 68 |       cookies {
 69 |         forward = "all"
 70 |       }
 71 |     }
 72 |     
 73 |     viewer_protocol_policy = "redirect-to-https"
 74 |     min_ttl                = 0
 75 |     default_ttl            = 0
 76 |     max_ttl                = 0
 77 |     compress               = true
 78 |   }
 79 |   
 80 |   # Use the provided certificate if Route53 is enabled with a custom domain
 81 |   dynamic "viewer_certificate" {
 82 |     for_each = var.use_route53 && var.certificate_arn != "" ? [1] : []
 83 |     content {
 84 |       acm_certificate_arn = var.certificate_arn
 85 |       ssl_support_method  = "sni-only"
 86 |       minimum_protocol_version = "TLSv1.2_2021"
 87 |     }
 88 |   }
 89 |   
 90 |   # Use CloudFront default certificate if no Route53 or certificate is provided
 91 |   dynamic "viewer_certificate" {
 92 |     for_each = !var.use_route53 || var.certificate_arn == "" ? [1] : []
 93 |     content {
 94 |       cloudfront_default_certificate = true
 95 |     }
 96 |   }
 97 |   
 98 |   # Add aliases only if Route53 is used
 99 |   aliases = var.use_route53 ? [format("%s.%s", var.record_name, var.hosted_zone_name)] : []
100 |   
101 |   # Associate WAF Web ACL if provided - commented out due to regional WAF scope issue
102 |   # CloudFront requires global WAF WebACLs, but the current WAF is regional
103 |   # web_acl_id = var.wafv2_acl_arn
104 |   
105 |   restrictions {
106 |     geo_restriction {
107 |       restriction_type = "none"
108 |     }
109 |   }
110 | 
111 |   # Enable logging to the ALB access logs bucket - commented out to avoid S3 ACL issues
112 |   # logging_config {
113 |   #   include_cookies = false
114 |   #   bucket          = aws_s3_bucket.access_log_bucket.bucket_domain_name
115 |   #   prefix          = "cloudfront-logs/"
116 |   # }
117 | 
118 |   tags = {
119 |     Name = "${var.name}-cloudfront-distribution"
120 |   }
121 | 
122 |   depends_on = [aws_lb.this]
123 | }
124 | 


--------------------------------------------------------------------------------
/litellm-terraform-stack/modules/ecs/cloudwatch.tf:
--------------------------------------------------------------------------------
 1 | resource "aws_cloudwatch_log_group" "litellm" {
 2 |   name              = "/ecs/${var.name}-litellm"
 3 |   retention_in_days = 365
 4 | }
 5 | 
 6 | resource "aws_cloudwatch_log_group" "middleware" {
 7 |   name              = "/ecs/${var.name}-middleware"
 8 |   retention_in_days = 365
 9 | }
10 | 


--------------------------------------------------------------------------------
/litellm-terraform-stack/modules/ecs/iam.tf:
--------------------------------------------------------------------------------
  1 | data "aws_iam_policy_document" "ecs_task_assume_role" {
  2 |   statement {
  3 |     actions = ["sts:AssumeRole"]
  4 |     principals {
  5 |       type        = "Service"
  6 |       identifiers = ["ecs-tasks.amazonaws.com"]
  7 |     }
  8 |   }
  9 | }
 10 | 
 11 | resource "aws_iam_role" "task_role" {
 12 |   name               = "${var.name}-ecs-task-role"
 13 |   assume_role_policy = data.aws_iam_policy_document.ecs_task_assume_role.json
 14 | }
 15 | 
 16 | data "aws_iam_policy_document" "ecs_execution_assume_role" {
 17 |   statement {
 18 |     actions = ["sts:AssumeRole"]
 19 |     principals {
 20 |       type        = "Service"
 21 |       identifiers = ["ecs-tasks.amazonaws.com"]
 22 |     }
 23 |   }
 24 | }
 25 | 
 26 | resource "aws_iam_role" "execution_role" {
 27 |   name               = "${var.name}-ecs-execution-role"
 28 |   assume_role_policy = data.aws_iam_policy_document.ecs_execution_assume_role.json
 29 | }
 30 | 
 31 | resource "aws_iam_role_policy_attachment" "execution_role_attachment" {
 32 |   role       = aws_iam_role.execution_role.name
 33 |   policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy"
 34 | }
 35 | 
 36 | data "aws_iam_policy_document" "execution_role_policy_doc" {
 37 |   statement {
 38 |     sid       = "EcrImageAccess"
 39 |     actions   = ["ecr:BatchCheckLayerAvailability", "ecr:BatchGetImage", "ecr:GetDownloadUrlForLayer"]
 40 |     resources = [
 41 |       "*"
 42 |     ]
 43 |   }
 44 | 
 45 |   statement {
 46 |     sid       = "EcrTokenAccess"
 47 |     actions   = ["ecr:GetAuthorizationToken"]
 48 |     resources = [
 49 |       "*"
 50 |     ]
 51 |   }
 52 | 
 53 |   statement {
 54 |     sid       = "CloudwatchAccess"
 55 |     actions   = ["logs:CreateLogStream", "logs:PutLogEvents"]
 56 |     resources = ["*"]
 57 |   }
 58 | 
 59 |   statement {
 60 |     actions   = ["secretsmanager:GetSecretValue", "secretsmanager:DescribeSecret"]
 61 |     resources = [var.master_and_salt_key_secret_arn, var.main_db_secret_arn, aws_secretsmanager_secret.litellm_other_secrets.arn]
 62 |   }
 63 | }
 64 | 
 65 | resource "aws_iam_policy" "execution_role_policy" {
 66 |   name   = "${var.name}-ecs-execution-role-policy"
 67 |   policy = data.aws_iam_policy_document.execution_role_policy_doc.json
 68 | }
 69 | 
 70 | resource "aws_iam_role_policy_attachment" "execution_role_attach" {
 71 |   role       = aws_iam_role.execution_role.name
 72 |   policy_arn = aws_iam_policy.execution_role_policy.arn
 73 | }
 74 | 
 75 | # --------------------------------------------------------------------
 76 | # Task Role Policy (S3, Bedrock, SageMaker)
 77 | # --------------------------------------------------------------------
 78 | data "aws_iam_policy_document" "task_role_policy_doc" {
 79 |   statement {
 80 |     sid       = "S3ConfigBucketAccess"
 81 |     actions   = ["s3:GetObject", "s3:ListBucket"]
 82 |     resources = [
 83 |       var.config_bucket_arn,
 84 |       "${var.config_bucket_arn}/*"
 85 |     ]
 86 |   }
 87 | 
 88 |   statement {
 89 |     sid       = "S3LogBucketAccess"
 90 |     actions   = ["s3:*"]
 91 |     resources = [
 92 |       var.log_bucket_arn,
 93 |       "${var.log_bucket_arn}/*"
 94 |     ]
 95 |   }
 96 | 
 97 |   statement {
 98 |     sid       = "BedrockAccess"
 99 |     actions   = ["bedrock:*"]
100 |     resources = ["*"]
101 |   }
102 | 
103 |   statement {
104 |     sid       = "SageMakerInvoke"
105 |     actions   = ["sagemaker:InvokeEndpoint"]
106 |     resources = ["*"]
107 |   }
108 | }
109 | 
110 | resource "aws_iam_policy" "task_role_policy" {
111 |   name   = "${var.name}-ecs-task-role-policy"
112 |   policy = data.aws_iam_policy_document.task_role_policy_doc.json
113 | }
114 | 
115 | resource "aws_iam_role_policy_attachment" "task_role_attach" {
116 |   role       = aws_iam_role.task_role.name
117 |   policy_arn = aws_iam_policy.task_role_policy.arn
118 | }


--------------------------------------------------------------------------------
/litellm-terraform-stack/modules/ecs/outputs.tf:
--------------------------------------------------------------------------------
 1 | ###############################################################################
 2 | # (12) Outputs
 3 | ###############################################################################
 4 | output "LitellmEcsCluster" {
 5 |   value       = aws_ecs_cluster.this.name
 6 |   description = "Name of the ECS Cluster"
 7 | }
 8 | 
 9 | output "LitellmEcsTask" {
10 |   value       = aws_ecs_service.litellm_service.name
11 |   description = "Name of the ECS Service"
12 | }
13 | 
14 | output "alb_dns_name" {
15 |   value       = aws_lb.this.dns_name
16 |   description = "The DNS name of the ALB"
17 | }
18 | 
19 | output "alb_zone_id" {
20 |   value       = aws_lb.this.zone_id
21 |   description = "The zone ID of the ALB"
22 | }
23 | 
24 | output "cloudfront_distribution_id" {
25 |   value       = var.use_cloudfront ? aws_cloudfront_distribution.this[0].id : ""
26 |   description = "The ID of the CloudFront distribution"
27 | }
28 | 
29 | output "cloudfront_domain_name" {
30 |   value       = var.use_cloudfront ? aws_cloudfront_distribution.this[0].domain_name : ""
31 |   description = "The domain name of the CloudFront distribution"
32 | }
33 | 
34 | output "ServiceURL" {
35 |   description = "The service URL"
36 |   value = var.use_route53 ? "https://${var.record_name}.${var.hosted_zone_name}" : (
37 |     var.use_cloudfront ? "https://${aws_cloudfront_distribution.this[0].domain_name}" : "https://${aws_lb.this.dns_name}"
38 |   )
39 | }
40 | 
41 | output "cloudfront_auth_secret" {
42 |   description = "The CloudFront authentication secret (only shown once after creation)"
43 |   value       = var.use_cloudfront ? random_password.cloudfront_secret[0].result : null
44 |   sensitive   = true
45 | }
46 | 


--------------------------------------------------------------------------------
/litellm-terraform-stack/modules/ecs/route53.tf:
--------------------------------------------------------------------------------
 1 | # Only lookup the Route53 zone if use_route53 is true
 2 | data "aws_route53_zone" "this" {
 3 |   count        = var.use_route53 ? 1 : 0
 4 |   name         = var.hosted_zone_name
 5 |   private_zone = !var.public_load_balancer
 6 | }
 7 | 
 8 | # Only create Route53 records if use_route53 is true
 9 | resource "aws_route53_record" "alb_alias" {
10 |   count   = var.use_route53 ? 1 : 0
11 |   zone_id = data.aws_route53_zone.this[0].zone_id
12 |   name    = var.record_name
13 |   type    = "A"
14 | 
15 |   alias {
16 |     # If CloudFront is enabled, point to CloudFront, otherwise point to ALB
17 |     name                   = var.use_cloudfront ? aws_cloudfront_distribution.this[0].domain_name : aws_lb.this.dns_name
18 |     zone_id                = var.use_cloudfront ? aws_cloudfront_distribution.this[0].hosted_zone_id : aws_lb.this.zone_id
19 |     evaluate_target_health = true
20 |   }
21 | 
22 |   depends_on = [aws_cloudfront_distribution.this]
23 | }
24 | 


--------------------------------------------------------------------------------
/litellm-terraform-stack/modules/ecs/s3.tf:
--------------------------------------------------------------------------------
 1 | resource "aws_s3_bucket" "access_log_bucket" {
 2 |   bucket_prefix = "access-logs-"
 3 |   force_destroy = true
 4 | }
 5 | 
 6 | resource "aws_s3_bucket_server_side_encryption_configuration" "access_log_bucket" {
 7 |   bucket = aws_s3_bucket.access_log_bucket.id
 8 | 
 9 |   rule {
10 |       apply_server_side_encryption_by_default {
11 |         sse_algorithm = "AES256"
12 |       }
13 |     }
14 | }
15 | 
16 | data "aws_elb_service_account" "main" {}
17 | 
18 | resource "aws_s3_bucket_policy" "access_log_bucket" {
19 |   bucket = aws_s3_bucket.access_log_bucket.id
20 | 
21 |   policy = jsonencode({
22 |     Version = "2012-10-17"
23 |     Statement = [
24 |       {
25 |         Sid       = "EnforceSSLOnly"
26 |         Effect    = "Deny"
27 |         Principal = "*"
28 |         Action    = "s3:*"
29 |         Resource = [
30 |           aws_s3_bucket.access_log_bucket.arn,
31 |           "${aws_s3_bucket.access_log_bucket.arn}/*"
32 |         ]
33 |         Condition = {
34 |           Bool = {
35 |             "aws:SecureTransport" = "false"
36 |           }
37 |         }
38 |       },
39 |       {
40 |         Sid       = "AllowELBLogDelivery"
41 |         Effect    = "Allow"
42 |         Principal = {
43 |           AWS = data.aws_elb_service_account.main.arn
44 |         }
45 |         Action    = "s3:PutObject"
46 |         Resource  = "${aws_s3_bucket.access_log_bucket.arn}/*"
47 |       }
48 |     ]
49 |   })
50 | }
51 | 
52 | resource "aws_s3_bucket_public_access_block" "access_log_bucket" {
53 |   bucket = aws_s3_bucket.access_log_bucket.id
54 |   block_public_acls   = true
55 |   block_public_policy = true
56 | }


--------------------------------------------------------------------------------
/litellm-terraform-stack/modules/ecs/secrets-manager.tf:
--------------------------------------------------------------------------------
 1 | resource "aws_secretsmanager_secret" "litellm_other_secrets" {
 2 |   name_prefix = "LiteLLMApiKeySecret-"
 3 |   recovery_window_in_days = 0
 4 | }
 5 | 
 6 | resource "aws_secretsmanager_secret_version" "litellm_other_secrets_ver" {
 7 |   secret_id = aws_secretsmanager_secret.litellm_other_secrets.id
 8 | 
 9 |   secret_string = jsonencode({
10 |     OPENAI_API_KEY         = var.openai_api_key
11 |     AZURE_OPENAI_API_KEY   = var.azure_openai_api_key
12 |     AZURE_API_KEY          = var.azure_api_key
13 |     ANTHROPIC_API_KEY      = var.anthropic_api_key
14 |     GROQ_API_KEY           = var.groq_api_key
15 |     COHERE_API_KEY         = var.cohere_api_key
16 |     CO_API_KEY             = var.co_api_key
17 |     HF_TOKEN               = var.hf_token
18 |     HUGGINGFACE_API_KEY    = var.huggingface_api_key
19 |     DATABRICKS_API_KEY     = var.databricks_api_key
20 |     GEMINI_API_KEY         = var.gemini_api_key
21 |     CODESTRAL_API_KEY      = var.codestral_api_key
22 |     MISTRAL_API_KEY        = var.mistral_api_key
23 |     AZURE_AI_API_KEY       = var.azure_ai_api_key
24 |     NVIDIA_NIM_API_KEY     = var.nvidia_nim_api_key
25 |     XAI_API_KEY            = var.xai_api_key
26 |     PERPLEXITYAI_API_KEY   = var.perplexityai_api_key
27 |     GITHUB_API_KEY         = var.github_api_key
28 |     DEEPSEEK_API_KEY       = var.deepseek_api_key
29 |     AI21_API_KEY           = var.ai21_api_key
30 |     LANGSMITH_API_KEY      = var.langsmith_api_key
31 |     LANGFUSE_SECRET_KEY = var.langfuse_secret_key
32 |   })
33 | }


--------------------------------------------------------------------------------
/litellm-terraform-stack/modules/ecs/security-groups.tf:
--------------------------------------------------------------------------------
  1 | ###############################################################################
  2 | # (7) Security Groups & Ingress for Redis and RDS
  3 | ###############################################################################
  4 | 
  5 | # CloudFront security is implemented using origin custom headers instead of IP ranges
  6 | # This avoids hitting AWS security group rule limits (60 rules per security group)
  7 | # CloudFront has hundreds of IP ranges globally, which would exceed the limit
  8 | # Security group for ECS Service tasks
  9 | resource "aws_security_group" "ecs_service_sg" {
 10 |   name        = "${var.name}-service-sg"
 11 |   description = "Security group for ECS Fargate service"
 12 |   vpc_id      = var.vpc_id
 13 | 
 14 |   egress {
 15 |     from_port        = 0
 16 |     to_port          = 0
 17 |     protocol         = "-1"  # "-1" represents all protocols
 18 |     cidr_blocks      = ["0.0.0.0/0"]
 19 |     description      = "Allow all outbound traffic by default"
 20 |   }
 21 | }
 22 | 
 23 | resource "aws_security_group_rule" "alb_ingress_4000" {
 24 |   type                     = "ingress"
 25 |   from_port                = 4000
 26 |   to_port                  = 4000
 27 |   protocol                 = "tcp"
 28 |   security_group_id        = aws_security_group.ecs_service_sg.id
 29 |   source_security_group_id = aws_security_group.alb_sg.id
 30 |   description              = "Allow Load Balancer to ECS"
 31 | }
 32 | 
 33 | resource "aws_security_group_rule" "alb_ingress_3000" {
 34 |   type                     = "ingress"
 35 |   from_port                = 3000
 36 |   to_port                  = 3000
 37 |   protocol                 = "tcp"
 38 |   security_group_id        = aws_security_group.ecs_service_sg.id
 39 |   source_security_group_id = aws_security_group.alb_sg.id
 40 |   description              = "Allow Load Balancer to ECS"
 41 | }
 42 | 
 43 | 
 44 | # Allow ECS tasks to connect to Redis
 45 | resource "aws_security_group_rule" "redis_ingress" {
 46 |   type                     = "ingress"
 47 |   from_port                = 6379
 48 |   to_port                  = 6379
 49 |   protocol                 = "tcp"
 50 |   security_group_id        = var.redis_security_group_id
 51 |   source_security_group_id = aws_security_group.ecs_service_sg.id
 52 |   description              = "Allow ECS tasks to connect to Redis"
 53 | }
 54 | 
 55 | # Allow ECS tasks to connect to RDS
 56 | resource "aws_security_group_rule" "db_ingress" {
 57 |   type                     = "ingress"
 58 |   from_port                = 5432
 59 |   to_port                  = 5432
 60 |   protocol                 = "tcp"
 61 |   security_group_id        = var.db_security_group_id
 62 |   source_security_group_id = aws_security_group.ecs_service_sg.id
 63 |   description              = "Allow ECS tasks to connect to RDS"
 64 | }
 65 | 
 66 | resource "aws_security_group" "alb_sg" {
 67 |   name        = "${var.name}-alb-sg"
 68 |   description = "Security group for ALB"
 69 |   vpc_id      = var.vpc_id
 70 | 
 71 |   # Public load balancer: Allow HTTPS traffic with WAF protection
 72 |   # Security is provided by:
 73 |   # 1. When CloudFront is enabled: Custom origin header authentication via ALB listener rules
 74 |   # 2. When CloudFront is disabled: WAF rules on the ALB
 75 |   # 3. When private: Only accessible from private subnets
 76 |   ingress {
 77 |     description = "HTTPS traffic"
 78 |     protocol    = "tcp"
 79 |     from_port   = 443
 80 |     to_port     = 443
 81 |     cidr_blocks = var.public_load_balancer ? ["0.0.0.0/0"] : var.private_subnets_cidr_blocks
 82 |   }
 83 |   
 84 |   # Add HTTP ingress for CloudFront origin connections
 85 |   # Security for HTTP is provided by custom header authentication
 86 |   ingress {
 87 |     description = "HTTP traffic for CloudFront origin"
 88 |     protocol    = "tcp"
 89 |     from_port   = 80
 90 |     to_port     = 80
 91 |     cidr_blocks = var.public_load_balancer ? ["0.0.0.0/0"] : var.private_subnets_cidr_blocks
 92 |   }
 93 | 
 94 |   tags = {
 95 |     Name = "${var.name}-alb-sg"
 96 |     SecurityModel = var.use_cloudfront ? "CloudFront-Protected" : (var.public_load_balancer ? "Public-WAF-Protected" : "Private-VPC-Only")
 97 |   }
 98 | 
 99 |   # Allow all outbound
100 |   egress {
101 |     description = "Allow all outbound"
102 |     protocol    = -1
103 |     from_port   = 0
104 |     to_port     = 0
105 |     cidr_blocks = ["0.0.0.0/0"]
106 |   }
107 | }
108 | 


--------------------------------------------------------------------------------
/litellm-terraform-stack/modules/ecs/variables.tf:
--------------------------------------------------------------------------------
  1 | variable "name" {
  2 |   description = "Standard name to be used as prefix on all resources."
  3 |   type        = string
  4 | }
  5 | 
  6 | # Variables needed for the configuration
  7 | variable "config_bucket_arn" {
  8 |   description = "ARN of the configuration bucket"
  9 |   type        = string
 10 | }
 11 | 
 12 | variable "log_bucket_arn" {
 13 |   description = "ARN of the log bucket"
 14 |   type        = string
 15 | }
 16 | 
 17 | # Required variables
 18 | variable "ecr_litellm_repository_url" {
 19 |   description = "URL of the ECR repository for LiteLLM"
 20 |   type        = string
 21 | }
 22 | 
 23 | variable "ecr_middleware_repository_url" {
 24 |   description = "URL of the ECR repository for middleware"
 25 |   type        = string
 26 | }
 27 | 
 28 | variable "litellm_version" {
 29 |   description = "Version tag for LiteLLM image"
 30 |   type        = string
 31 | }
 32 | 
 33 | variable "config_bucket_name" {
 34 |   description = "Name of the S3 bucket containing config"
 35 |   type        = string
 36 | }
 37 | 
 38 | variable "redis_host" {
 39 |   description = "The Redis host name"
 40 |   type        = string
 41 | }
 42 | 
 43 | variable "redis_port" {
 44 |   description = "The Redis port"
 45 |   type        = string
 46 | }
 47 | 
 48 | variable "redis_password" {
 49 |   description = "The Redis password"
 50 |   type        = string
 51 | }
 52 | 
 53 | variable "openai_api_key" {
 54 |   description = "OpenAI API key"
 55 |   type        = string
 56 |   sensitive   = true
 57 | }
 58 | 
 59 | variable "azure_openai_api_key" {
 60 |   description = "Azure OpenAI API key"
 61 |   type        = string
 62 |   sensitive   = true
 63 | }
 64 | 
 65 | variable "azure_api_key" {
 66 |   description = "Azure API key"
 67 |   type        = string
 68 |   sensitive   = true
 69 | }
 70 | 
 71 | variable "anthropic_api_key" {
 72 |   description = "Anthropic API key"
 73 |   type        = string
 74 |   sensitive   = true
 75 | }
 76 | 
 77 | variable "groq_api_key" {
 78 |   description = "Groq API key"
 79 |   type        = string
 80 |   sensitive   = true
 81 | }
 82 | 
 83 | variable "cohere_api_key" {
 84 |   description = "Cohere API key"
 85 |   type        = string
 86 |   sensitive   = true
 87 | }
 88 | 
 89 | variable "co_api_key" {
 90 |   description = "Co API key"
 91 |   type        = string
 92 |   sensitive   = true
 93 | }
 94 | 
 95 | variable "hf_token" {
 96 |   description = "HuggingFace token"
 97 |   type        = string
 98 |   sensitive   = true
 99 | }
100 | 
101 | variable "huggingface_api_key" {
102 |   description = "HuggingFace API key"
103 |   type        = string
104 |   sensitive   = true
105 | }
106 | 
107 | variable "databricks_api_key" {
108 |   description = "Databricks API key"
109 |   type        = string
110 |   sensitive   = true
111 | }
112 | 
113 | variable "gemini_api_key" {
114 |   description = "Gemini API key"
115 |   type        = string
116 |   sensitive   = true
117 | }
118 | 
119 | variable "codestral_api_key" {
120 |   description = "Codestral API key"
121 |   type        = string
122 |   sensitive   = true
123 | }
124 | 
125 | variable "mistral_api_key" {
126 |   description = "Mistral API key"
127 |   type        = string
128 |   sensitive   = true
129 | }
130 | 
131 | variable "azure_ai_api_key" {
132 |   description = "Azure AI API key"
133 |   type        = string
134 |   sensitive   = true
135 | }
136 | 
137 | variable "nvidia_nim_api_key" {
138 |   description = "NVIDIA NIM API key"
139 |   type        = string
140 |   sensitive   = true
141 | }
142 | 
143 | variable "xai_api_key" {
144 |   description = "XAI API key"
145 |   type        = string
146 |   sensitive   = true
147 | }
148 | 
149 | variable "perplexityai_api_key" {
150 |   description = "PerplexityAI API key"
151 |   type        = string
152 |   sensitive   = true
153 | }
154 | 
155 | variable "github_api_key" {
156 |   description = "GitHub API key"
157 |   type        = string
158 |   sensitive   = true
159 | }
160 | 
161 | variable "deepseek_api_key" {
162 |   description = "Deepseek API key"
163 |   type        = string
164 |   sensitive   = true
165 | }
166 | 
167 | variable "ai21_api_key" {
168 |   description = "AI21 API key"
169 |   type        = string
170 |   sensitive   = true
171 | }
172 | 
173 | variable "langsmith_api_key" {
174 |   description = "Langsmith API key"
175 |   type        = string
176 |   sensitive   = true
177 | }
178 | 
179 | variable "langsmith_project" {
180 |   description = "Langsmith project"
181 |   type        = string
182 | }
183 | 
184 | variable "langsmith_default_run_name" {
185 |   description = "langsmith default run name"
186 |   type        = string
187 | }
188 | 
189 | variable "okta_audience" {
190 |   description = "Okta audience"
191 |   type        = string
192 | }
193 | 
194 | variable "okta_issuer" {
195 |   description = "Okta issuer"
196 |   type        = string
197 | }
198 | 
199 | variable "certificate_arn" {
200 |   description = "ARN of the ACM certificate"
201 |   type        = string
202 |   default     = ""
203 | }
204 | 
205 | variable "wafv2_acl_arn" {
206 |   description = "ARN of the WAFv2 ACL"
207 |   type        = string
208 | }
209 | 
210 | variable "record_name" {
211 |   description = "Record name for the ingress"
212 |   type        = string
213 |   default     = ""
214 | }
215 | 
216 | variable "hosted_zone_name" {
217 |   description = "Hosted zone name for the ingress"
218 |   type        = string
219 |   default     = ""
220 | }
221 | 
222 | variable "use_route53" {
223 |   description = "Whether to use Route53 for DNS management"
224 |   type        = bool
225 |   default     = false
226 | }
227 | 
228 | variable "use_cloudfront" {
229 |   description = "Whether to use CloudFront in front of ALB"
230 |   type        = bool
231 |   default     = true
232 | }
233 | 
234 | variable "cloudfront_price_class" {
235 |   description = "The price class for CloudFront distribution"
236 |   type        = string
237 |   default     = "PriceClass_100"
238 | }
239 | 
240 | variable "vpc_id" {
241 |   description = "VPC ID where the cluster and nodes will be deployed"
242 |   type        = string
243 | }
244 | 
245 | variable "db_security_group_id" {
246 |   description = "RDS db security group id"
247 |   type        = string
248 | }
249 | 
250 | variable "redis_security_group_id" {
251 |   description = "redis security group id"
252 |   type        = string
253 | }
254 | 
255 | variable "architecture" {
256 |   description = "The architecture for the node group instances (x86 or arm64)"
257 |   type        = string
258 |   validation {
259 |     condition     = contains(["x86", "arm"], var.architecture)
260 |     error_message = "Architecture must be either 'x86' or 'arm64'."
261 |   }
262 | }
263 | 
264 | variable "disable_outbound_network_access" {
265 |     description = "Whether to disable outbound network access for the EKS Cluster"
266 |     type = bool
267 | }
268 | 
269 | variable "desired_capacity" {
270 |   description = "Desired Capacity on the node group and deployment"
271 |   type = number
272 | }
273 | 
274 | variable "min_capacity" {
275 |   description = "Min Capacity on the node group"
276 |   type = number
277 | }
278 | 
279 | variable "max_capacity" {
280 |   description = "Max Capacity on the node group"
281 |   type = number
282 | }
283 | 
284 | variable "public_load_balancer" {
285 |   description = "whether the load balancer is public"
286 |   type = bool
287 | }
288 | 
289 | variable "master_and_salt_key_secret_arn" {
290 |   description = "ARN of secret with master and salt key"
291 |   type = string
292 | }
293 | 
294 | variable "main_db_secret_arn" {
295 |   description = "ARN of secret for main rds db"
296 |   type = string
297 | }
298 | 
299 | variable "vcpus" {
300 |   description = "Number of ECS vcpus"
301 |   type = number
302 | }
303 | 
304 | variable "cpu_target_utilization_percent" {
305 |   description = "CPU target utilization percent for autoscale"
306 |   type = number
307 | }
308 | 
309 | variable "memory_target_utilization_percent" {
310 |   description = "Memory target utilization percent for autoscale"
311 |   type = number
312 | }
313 | 
314 | variable "private_subnets" {
315 |   description = "List of private subnet IDs"
316 |   type        = list(string)
317 | }
318 | 
319 | variable "public_subnets" {
320 |   description = "List of public subnet IDs"
321 |   type        = list(string)
322 | }
323 | 
324 | variable "private_subnets_cidr_blocks" {
325 |   description = "CIDR blocks of the private subnets"
326 |   type        = list(string)
327 |   default     = ["10.0.0.0/8", "172.16.0.0/12", "192.168.0.0/16"] # Default private address spaces
328 | }
329 | 
330 | variable "disable_swagger_page" {
331 |   type    = bool
332 |   description = "Whether to disable the swagger page or not"
333 | }
334 | 
335 | variable "disable_admin_ui" {
336 |   type    = bool
337 |   description = "Whether to disable the admin UI or not"
338 | }
339 | 
340 | variable "langfuse_public_key" {
341 |   type    = string
342 |   description = "the public key of your langfuse deployment"
343 | }
344 | 
345 | variable "langfuse_secret_key" {
346 |   type    = string
347 |   description = "the secret key of your langfuse deployment"
348 | }
349 | 
350 | variable "langfuse_host" {
351 |   type    = string
352 |   description = "the hostname of your langfuse deployment."
353 | }
354 | 


--------------------------------------------------------------------------------
/litellm-terraform-stack/modules/ecs/waf.tf:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | # (10) WAFv2 Web ACL Association
3 | ###############################################################################
4 | resource "aws_wafv2_web_acl_association" "litellm_waf" {
5 |   resource_arn = aws_lb.this.arn
6 |   web_acl_arn  = var.wafv2_acl_arn
7 | }


--------------------------------------------------------------------------------
/litellm-terraform-stack/modules/eks/iam.tf:
--------------------------------------------------------------------------------
  1 | resource "aws_iam_role" "eks_developers" {
  2 |   name               = "${var.name}-developers"
  3 |   assume_role_policy = data.aws_iam_policy_document.assume_role.json
  4 | }
  5 | 
  6 | resource "aws_iam_role" "eks_operators" {
  7 |   name               = "${var.name}-operators"
  8 |   assume_role_policy = data.aws_iam_policy_document.assume_role.json
  9 | }
 10 | 
 11 | data "aws_iam_policy_document" "assume_role" {
 12 |   statement {
 13 |     sid     = "AssumeRole"
 14 |     actions = ["sts:AssumeRole"]
 15 | 
 16 |     principals {
 17 |       type        = "AWS"
 18 |       identifiers = ["arn:aws:iam::${data.aws_caller_identity.current.account_id}:root"]
 19 |     }
 20 |   }
 21 | }
 22 | 
 23 | resource "aws_iam_role" "eks_nodegroup" {
 24 |   name = "${var.name}-eks-nodegroup-role"
 25 | 
 26 |   assume_role_policy = jsonencode({
 27 |     Version = "2012-10-17"
 28 |     Statement = [
 29 |       {
 30 |         Effect    = "Allow"
 31 |         Principal = {
 32 |           Service = "ec2.amazonaws.com"
 33 |         }
 34 |         Action = "sts:AssumeRole"
 35 |       }
 36 |     ]
 37 |   })
 38 | }
 39 | 
 40 | # Attach AWS-managed policies
 41 | resource "aws_iam_role_policy_attachment" "eks_nodegroup_worker_policy" {
 42 |   role       = aws_iam_role.eks_nodegroup.name
 43 |   policy_arn = "arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy"
 44 | }
 45 | 
 46 | resource "aws_iam_role_policy_attachment" "eks_nodegroup_cni_policy" {
 47 |   role       = aws_iam_role.eks_nodegroup.name
 48 |   policy_arn = "arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy"
 49 | }
 50 | 
 51 | resource "aws_iam_role_policy_attachment" "eks_nodegroup_ec2_registry" {
 52 |   role       = aws_iam_role.eks_nodegroup.name
 53 |   policy_arn = "arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly"
 54 | }
 55 | 
 56 | resource "aws_iam_role_policy_attachment" "eks_nodegroup_ssm" {
 57 |   role       = aws_iam_role.eks_nodegroup.name
 58 |   policy_arn = "arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore"
 59 | }
 60 | 
 61 | data "aws_iam_policy_document" "nodegroup_ecr_ptc" {
 62 |   statement {
 63 |     sid     = "ECRPullThroughCache"
 64 |     effect  = "Allow"
 65 |     actions = [
 66 |       "ecr:CreateRepository",
 67 |       "ecr:BatchImportUpstreamImage",
 68 |     ]
 69 |     resources = ["*"]
 70 |   }
 71 | }
 72 | 
 73 | resource "aws_iam_policy" "nodegroup_ecr_ptc" {
 74 |   name        = "${var.name}-nodegroup-ecr-ptc"
 75 |   policy      = data.aws_iam_policy_document.nodegroup_ecr_ptc.json
 76 |   description = "Allow ECR Pull Through Cache"
 77 | }
 78 | 
 79 | resource "aws_iam_policy_attachment" "nodegroup_ecr_ptc_attach" {
 80 |   name       = "${var.name}-nodegroup-ecr-ptc-attach"
 81 |   policy_arn = aws_iam_policy.nodegroup_ecr_ptc.arn
 82 |   roles      = [aws_iam_role.eks_nodegroup.name]
 83 | }
 84 | 
 85 | # Additional custom inline policy for the node group
 86 | resource "aws_iam_role_policy" "node_additional_policies" {
 87 |   name = "${var.name}-eks-node-additional"
 88 |   role = aws_iam_role.eks_nodegroup.name
 89 | 
 90 |   policy = jsonencode({
 91 |     Version = "2012-10-17"
 92 |     Statement = [
 93 |       {
 94 |         Effect = "Allow"
 95 |         Action = [
 96 |           "s3:GetObject",
 97 |           "s3:ListBucket"
 98 |         ]
 99 |         Resource = [
100 |           var.config_bucket_arn,
101 |           "${var.config_bucket_arn}/*"
102 |         ]
103 |       },
104 |       {
105 |         Effect = "Allow"
106 |         Action = [
107 |           "s3:*"
108 |         ]
109 |         Resource = [
110 |           var.log_bucket_arn,
111 |           "${var.log_bucket_arn}/*"
112 |         ]
113 |       },
114 |       {
115 |         Effect = "Allow"
116 |         Action = [
117 |           "bedrock:*"
118 |         ]
119 |         Resource = ["*"]
120 |       },
121 |       {
122 |         Effect = "Allow"
123 |         Action = [
124 |           "sagemaker:InvokeEndpoint"
125 |         ]
126 |         Resource = ["*"]
127 |       }
128 |     ]
129 |   })
130 | }
131 | 
132 | data "aws_iam_policy_document" "pod_identity_assume_role" {
133 |   statement {
134 |     effect = "Allow"
135 |     principals {
136 |       type        = "Service"
137 |       identifiers = ["pods.eks.amazonaws.com"]
138 |     }
139 |     actions = ["sts:AssumeRole", "sts:TagSession"]
140 |   }
141 | }
142 | 
143 | resource "aws_iam_role" "cw_observability_role" {
144 |   # Make sure this only creates if you're creating the cluster or adding add-ons
145 |   count = var.create_cluster || var.install_add_ons_in_existing_eks_cluster ? 1 : 0
146 | 
147 |   name               = "${var.name}-cw-observability-role"
148 |   assume_role_policy = data.aws_iam_policy_document.pod_identity_assume_role.json
149 | }
150 | 
151 | resource "aws_iam_role_policy_attachment" "cw_agent_policy_attach" {
152 |   count = var.create_cluster || var.install_add_ons_in_existing_eks_cluster ? 1 : 0
153 | 
154 |   role       = aws_iam_role.cw_observability_role[0].name
155 |   policy_arn = "arn:aws:iam::aws:policy/CloudWatchAgentServerPolicy"
156 | }
157 | 
158 | data "aws_iam_policy_document" "eks_cluster_kms" {
159 |   count = var.create_cluster ? 1 : 0
160 |   statement {
161 |     sid     = "AllowKMSUseOfEncryptionKey"
162 |     effect  = "Allow"
163 |     actions = [
164 |       "kms:Encrypt",
165 |       "kms:Decrypt",
166 |       "kms:ReEncrypt*",
167 |       "kms:GenerateDataKey*",
168 |       "kms:DescribeKey",
169 |       "kms:CreateGrant"
170 |     ]
171 |     resources = [
172 |       aws_kms_key.eks_secrets[0].arn
173 |     ]
174 |   }
175 | }
176 | 
177 | resource "aws_iam_role_policy" "eks_cluster_kms_policy" {
178 |   count = var.create_cluster ? 1 : 0
179 |   name = "EKS-Cluster-KMS-Policy"
180 |   role = aws_iam_role.eks_cluster[0].name
181 | 
182 |   policy = data.aws_iam_policy_document.eks_cluster_kms[0].json
183 | }


--------------------------------------------------------------------------------
/litellm-terraform-stack/modules/eks/kms.tf:
--------------------------------------------------------------------------------
 1 | resource "aws_kms_key" "eks_secrets" {
 2 | count = var.create_cluster ? 1 : 0
 3 |   description             = "KMS key for encrypting EKS Secrets"
 4 |   enable_key_rotation     = true
 5 |   deletion_window_in_days = 30
 6 | 
 7 |   # Key policy that allows:
 8 |   # - Root to do anything (standard practice)
 9 |   # - The EKS cluster role to use the key for encryption (kms:Encrypt, kms:Decrypt, etc.)
10 |   policy = jsonencode({
11 |     Version = "2012-10-17"
12 |     Id      = "key-default-1"
13 |     Statement = [
14 |       {
15 |         Sid      = "Enable IAM User Permissions"
16 |         Effect   = "Allow"
17 |         Principal = {
18 |           AWS = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:root"
19 |         }
20 |         Action   = "kms:*"
21 |         Resource = "*"
22 |       },
23 |       {
24 |         Sid    = "Allow use of the key by EKS Cluster Role"
25 |         Effect = "Allow"
26 |         Principal = {
27 |           AWS = aws_iam_role.eks_cluster[0].arn
28 |         }
29 |         Action = [
30 |           "kms:Encrypt",
31 |           "kms:Decrypt",
32 |           "kms:ReEncrypt*",
33 |           "kms:GenerateDataKey*",
34 |           "kms:DescribeKey",
35 |           "kms:CreateGrant"
36 |         ]
37 |         Resource = "*"
38 |       }
39 |     ]
40 |   })
41 | }
42 | 


--------------------------------------------------------------------------------
/litellm-terraform-stack/modules/eks/outputs.tf:
--------------------------------------------------------------------------------
 1 | # output "vpc" {
 2 | #   description = "Amazon VPC full configuration"
 3 | #   value       = module.vpc
 4 | # }
 5 | 
 6 | output "eks" {
 7 |   description = "Amazon EKS Cluster full configuration"
 8 |   value       = var.create_cluster ? aws_eks_cluster.this[0] : null
 9 | }
10 | 
11 | output "configure_kubectl" {
12 |   description = "Configure kubectl: make sure you're logged in with the correct AWS profile and run the following command to update your kubeconfig"
13 |   value       = "aws eks --region ${data.aws_region.current.name} update-kubeconfig --name ${local.cluster_name}"
14 | }
15 | 
16 | # Outputs matching the CDK configuration
17 | output "cluster_name" {
18 |   description = "The name of the EKS cluster"
19 |   value       = local.cluster_name
20 | }
21 | 
22 | output "cluster_endpoint" {
23 |   description = "The endpoint for the EKS cluster"
24 |   value       = local.cluster_endpoint
25 | }
26 | 
27 | output "cluster_security_group_id" {
28 |   description = "Security group ID attached to the EKS cluster"
29 |   value       = local.cluster_security_group_id
30 | }
31 | 
32 | output "eks_cluster_name" {
33 |   description = "Name of the EKS cluster"
34 |   value       = local.cluster_name
35 | }
36 | 
37 | output "eks_deployment_name" {
38 |   description = "Name of the Kubernetes deployment"
39 |   value       = kubernetes_deployment.litellm.metadata[0].name
40 | }
41 | 
42 | output "public_subnet_ids" {
43 |   description = "IDs of the public subnets"
44 |   value       = data.aws_subnets.public.ids
45 | }
46 | 
47 | output "private_subnet_ids" {
48 |   description = "IDs of the private subnets"
49 |   value       = data.aws_subnets.private.ids
50 | }
51 | 
52 | output "litellm_url" {
53 |   description = "The URL for the LiteLLM service"
54 |   value       = "https://${aws_route53_record.litellm.name}"
55 | }
56 | 
57 | output "cluster_ca" {
58 |   value = local.cluster_ca
59 | }
60 | 
61 | 


--------------------------------------------------------------------------------
/litellm-terraform-stack/modules/eks/route53.tf:
--------------------------------------------------------------------------------
 1 | 
 2 | data "aws_route53_zone" "selected" {
 3 |   name = var.hosted_zone_name
 4 |   private_zone = var.public_load_balancer ? false : true
 5 | }
 6 | 
 7 | 
 8 | # Create the A record
 9 | resource "aws_route53_record" "litellm" {
10 |   zone_id = data.aws_route53_zone.selected.zone_id
11 |   name    = var.record_name  # e.g., "litellm.mirodrr.people.aws.dev"
12 |   type    = "A"
13 | 
14 |   alias {
15 |     name                   = data.aws_lb.ingress_alb.dns_name
16 |     zone_id                = data.aws_lb.ingress_alb.zone_id
17 |     evaluate_target_health = true
18 |   }
19 | 
20 |   depends_on = [kubernetes_ingress_v1.litellm]
21 | }


--------------------------------------------------------------------------------
/litellm-terraform-stack/modules/eks/variables.tf:
--------------------------------------------------------------------------------
  1 | variable "name" {
  2 |   description = "Standard name to be used as prefix on all resources."
  3 |   type        = string
  4 | }
  5 | 
  6 | variable "private_subnet_ids" {
  7 |   description = "List of private subnet IDs"
  8 |   type        = list(string)
  9 | }
 10 | 
 11 | variable "public_subnet_ids" {
 12 |   description = "List of public subnet IDs"
 13 |   type        = list(string)
 14 | }
 15 | 
 16 | variable "existing_cluster_name" {
 17 |   description = "Name of the existing EKS Cluster."
 18 |   type        = string
 19 | }
 20 | 
 21 | variable "cluster_version" {
 22 |   description = "Kubernetes version for Amazon EKS Cluster."
 23 |   type        = string
 24 | }
 25 | 
 26 | # Variables needed for the configuration
 27 | variable "config_bucket_arn" {
 28 |   description = "ARN of the configuration bucket"
 29 |   type        = string
 30 | }
 31 | 
 32 | variable "log_bucket_arn" {
 33 |   description = "ARN of the log bucket"
 34 |   type        = string
 35 | }
 36 | 
 37 | # Required variables
 38 | variable "ecr_litellm_repository_url" {
 39 |   description = "URL of the ECR repository for LiteLLM"
 40 |   type        = string
 41 | }
 42 | 
 43 | variable "ecr_middleware_repository_url" {
 44 |   description = "URL of the ECR repository for middleware"
 45 |   type        = string
 46 | }
 47 | 
 48 | variable "litellm_version" {
 49 |   description = "Version tag for LiteLLM image"
 50 |   type        = string
 51 | }
 52 | 
 53 | variable "config_bucket_name" {
 54 |   description = "Name of the S3 bucket containing config"
 55 |   type        = string
 56 | }
 57 | 
 58 | variable "redis_host" {
 59 |   description = "The Redis host name"
 60 |   type        = string
 61 | }
 62 | 
 63 | variable "redis_port" {
 64 |   description = "The Redis port"
 65 |   type        = string
 66 | }
 67 | 
 68 | variable "redis_password" {
 69 |   description = "The Redis password"
 70 |   type        = string
 71 | }
 72 | 
 73 | variable "database_url" {
 74 |   description = "Database connection URL"
 75 |   type        = string
 76 | }
 77 | 
 78 | variable "litellm_master_key" {
 79 |   description = "LiteLLM master key"
 80 |   type        = string
 81 |   sensitive   = true
 82 | }
 83 | 
 84 | variable "litellm_salt_key" {
 85 |   description = "LiteLLM salt key"
 86 |   type        = string
 87 |   sensitive   = true
 88 | }
 89 | 
 90 | variable "openai_api_key" {
 91 |   description = "OpenAI API key"
 92 |   type        = string
 93 |   sensitive   = true
 94 | }
 95 | 
 96 | variable "azure_openai_api_key" {
 97 |   description = "Azure OpenAI API key"
 98 |   type        = string
 99 |   sensitive   = true
100 | }
101 | 
102 | variable "azure_api_key" {
103 |   description = "Azure API key"
104 |   type        = string
105 |   sensitive   = true
106 | }
107 | 
108 | variable "anthropic_api_key" {
109 |   description = "Anthropic API key"
110 |   type        = string
111 |   sensitive   = true
112 | }
113 | 
114 | variable "groq_api_key" {
115 |   description = "Groq API key"
116 |   type        = string
117 |   sensitive   = true
118 | }
119 | 
120 | variable "cohere_api_key" {
121 |   description = "Cohere API key"
122 |   type        = string
123 |   sensitive   = true
124 | }
125 | 
126 | variable "co_api_key" {
127 |   description = "Co API key"
128 |   type        = string
129 |   sensitive   = true
130 | }
131 | 
132 | variable "hf_token" {
133 |   description = "HuggingFace token"
134 |   type        = string
135 |   sensitive   = true
136 | }
137 | 
138 | variable "huggingface_api_key" {
139 |   description = "HuggingFace API key"
140 |   type        = string
141 |   sensitive   = true
142 | }
143 | 
144 | variable "databricks_api_key" {
145 |   description = "Databricks API key"
146 |   type        = string
147 |   sensitive   = true
148 | }
149 | 
150 | variable "gemini_api_key" {
151 |   description = "Gemini API key"
152 |   type        = string
153 |   sensitive   = true
154 | }
155 | 
156 | variable "codestral_api_key" {
157 |   description = "Codestral API key"
158 |   type        = string
159 |   sensitive   = true
160 | }
161 | 
162 | variable "mistral_api_key" {
163 |   description = "Mistral API key"
164 |   type        = string
165 |   sensitive   = true
166 | }
167 | 
168 | variable "azure_ai_api_key" {
169 |   description = "Azure AI API key"
170 |   type        = string
171 |   sensitive   = true
172 | }
173 | 
174 | variable "nvidia_nim_api_key" {
175 |   description = "NVIDIA NIM API key"
176 |   type        = string
177 |   sensitive   = true
178 | }
179 | 
180 | variable "xai_api_key" {
181 |   description = "XAI API key"
182 |   type        = string
183 |   sensitive   = true
184 | }
185 | 
186 | variable "perplexityai_api_key" {
187 |   description = "PerplexityAI API key"
188 |   type        = string
189 |   sensitive   = true
190 | }
191 | 
192 | variable "github_api_key" {
193 |   description = "GitHub API key"
194 |   type        = string
195 |   sensitive   = true
196 | }
197 | 
198 | variable "deepseek_api_key" {
199 |   description = "Deepseek API key"
200 |   type        = string
201 |   sensitive   = true
202 | }
203 | 
204 | variable "ai21_api_key" {
205 |   description = "AI21 API key"
206 |   type        = string
207 |   sensitive   = true
208 | }
209 | 
210 | variable "langsmith_api_key" {
211 |   description = "Langsmith API key"
212 |   type        = string
213 |   sensitive   = true
214 | }
215 | 
216 | variable "langsmith_project" {
217 |   description = "Langsmith project"
218 |   type        = string
219 | }
220 | 
221 | variable "langsmith_default_run_name" {
222 |   description = "langsmith default run name"
223 |   type        = string
224 | }
225 | 
226 | variable "okta_audience" {
227 |   description = "Okta audience"
228 |   type        = string
229 | }
230 | 
231 | variable "okta_issuer" {
232 |   description = "Okta issuer"
233 |   type        = string
234 | }
235 | 
236 | 
237 | variable "certificate_arn" {
238 |   description = "ARN of the ACM certificate"
239 |   type        = string
240 | }
241 | 
242 | variable "wafv2_acl_arn" {
243 |   description = "ARN of the WAFv2 ACL"
244 |   type        = string
245 | }
246 | 
247 | variable "record_name" {
248 |   description = "record name for the ingress"
249 |   type        = string
250 | }
251 | 
252 | variable "hosted_zone_name" {
253 |   description = "Hosted zone name for the ingress"
254 |   type        = string
255 | }
256 | 
257 | # Variables
258 | variable "create_cluster" {
259 |   description = "Controls if EKS cluster should be created"
260 |   type        = bool
261 | }
262 | 
263 | variable "vpc_id" {
264 |   description = "VPC ID where the cluster and nodes will be deployed"
265 |   type        = string
266 | }
267 | 
268 | variable "db_security_group_id" {
269 |   description = "RDS db security group id"
270 |   type        = string
271 | }
272 | 
273 | variable "redis_security_group_id" {
274 |   description = "redis security group id"
275 |   type        = string
276 | }
277 | 
278 | variable "architecture" {
279 |   description = "The architecture for the node group instances (x86 or arm64)"
280 |   type        = string
281 |   validation {
282 |     condition     = contains(["x86", "arm"], var.architecture)
283 |     error_message = "Architecture must be either 'x86' or 'arm64'."
284 |   }
285 | }
286 | 
287 | variable "disable_outbound_network_access" {
288 |     description = "Whether to disable outbound network access for the EKS Cluster"
289 |     type = bool
290 | }
291 | 
292 | variable "eks_alb_controller_private_ecr_repository_name" {
293 |   description = "The name of the ECR repo that is used to store the EKS ALB Controller Container Image in EKS deployments with outbound network access disabled"
294 |   type        = string
295 | }
296 | 
297 | variable "install_add_ons_in_existing_eks_cluster" {
298 |   description = "Whether to install add ons onto an existing EKS Cluster"
299 |   type = bool
300 | }
301 | 
302 | variable "desired_capacity" {
303 |   description = "Desired Capacity on the node group and deployment"
304 |   type = number
305 | }
306 | 
307 | variable "min_capacity" {
308 |   description = "Min Capacity on the node group"
309 |   type = number
310 | }
311 | 
312 | variable "max_capacity" {
313 |   description = "Max Capacity on the node group"
314 |   type = number
315 | }
316 | 
317 | variable "arm_instance_type" {
318 |   description = "Instance type for arm deployment"
319 |   type = string
320 | }
321 | 
322 | variable "x86_instance_type" {
323 |   description = "Instance type for x86 deployment"
324 |   type = string
325 | }
326 | 
327 | variable "arm_ami_type" {
328 |   description = "AMI type for arm deployment"
329 |   type = string
330 | }
331 | 
332 | variable "x86_ami_type" {
333 |   description = "AMI type for x86 deployment"
334 |   type = string
335 | }
336 | 
337 | variable "public_load_balancer" {
338 |   description = "whether the load balancer is public"
339 |   type = bool
340 | }
341 | 
342 | variable "disable_swagger_page" {
343 |   type    = bool
344 |   description = "Whether to disable the swagger page or not"
345 | }
346 | 
347 | variable "disable_admin_ui" {
348 |   type    = bool
349 |   description = "Whether to disable the admin UI or not"
350 | }
351 | 
352 | variable "langfuse_public_key" {
353 |   type    = string
354 |   description = "the public key of your langfuse deployment"
355 | }
356 | 
357 | variable "langfuse_secret_key" {
358 |   type    = string
359 |   description = "the secret key of your langfuse deployment"
360 | }
361 | 
362 | variable "langfuse_host" {
363 |   type    = string
364 |   description = "the hostname of your langfuse deployment."
365 | }


--------------------------------------------------------------------------------
/litellm-terraform-stack/modules/eks/versions.tf:
--------------------------------------------------------------------------------
 1 | terraform {
 2 |   required_version = ">= 1.3"
 3 | 
 4 |   required_providers {
 5 |     aws = {
 6 |       source  = "hashicorp/aws"
 7 |       version = ">= 5.34"
 8 |     }
 9 |     kubernetes = {
10 |       source  = "hashicorp/kubernetes"
11 |       version = ">= 2.20"
12 |     }
13 |     time = {
14 |       source  = "hashicorp/time"
15 |       version = ">= 0.13.0"
16 |     }
17 |   }
18 | }
19 | 


--------------------------------------------------------------------------------
/litellm-terraform-stack/outputs.tf:
--------------------------------------------------------------------------------
 1 | output "LitellmEcsCluster" {
 2 |   value       = try(module.ecs_cluster[0].LitellmEcsCluster, "")
 3 |   description = "Name of the ECS Cluster"
 4 | }
 5 | 
 6 | output "LitellmEcsTask" {
 7 |   value       = try(module.ecs_cluster[0].LitellmEcsTask, "")
 8 |   description = "Name of the ECS Service"
 9 | }
10 | 
11 | output "eks_cluster_name" {
12 |   description = "Name of the EKS cluster"
13 |   value       = try(module.eks_cluster[0].eks_cluster_name, "")
14 | }
15 | 
16 | output "eks_deployment_name" {
17 |   description = "Name of the Kubernetes deployment"
18 |   value       = try(module.eks_cluster[0].eks_deployment_name, "")
19 | }
20 | 
21 | output "cloudfront_distribution_id" {
22 |   description = "The ID of the CloudFront distribution"
23 |   value       = var.use_cloudfront ? try(module.ecs_cluster[0].cloudfront_distribution_id, "") : ""
24 | }
25 | 
26 | output "cloudfront_domain_name" {
27 |   description = "The domain name of the CloudFront distribution"
28 |   value       = var.use_cloudfront ? try(module.ecs_cluster[0].cloudfront_domain_name, "") : ""
29 | }
30 | 
31 | output "ServiceURL" {
32 |   description = "The service URL"
33 |   value = var.use_route53 ? "https://${var.record_name}.${var.hosted_zone_name}" : (
34 |     var.use_cloudfront ? "https://${try(module.ecs_cluster[0].cloudfront_domain_name, "")}" : "https://${try(module.ecs_cluster[0].alb_dns_name, "")}"
35 |   )
36 | }
37 | 
38 | output "vpc_id" {
39 |   description = "the vpc id we deployed to"
40 |   value       = module.base.VpcId
41 | }
42 | 
43 | output "ConfigBucketName" {
44 |   description = "The Name of the configuration bucket"
45 |   value       = module.base.ConfigBucketName
46 | }
47 | 
48 | # Added to expose the CloudFront authentication secret once after creation
49 | # This allows for troubleshooting and verification if needed
50 | output "cloudfront_auth_secret" {
51 |   description = "The CloudFront authentication secret (only shown once after creation)"
52 |   value       = var.use_cloudfront ? try(module.ecs_cluster[0].cloudfront_auth_secret, null) : null
53 |   sensitive   = true
54 | }
55 | 


--------------------------------------------------------------------------------
/litellm-terraform-stack/providers.tf:
--------------------------------------------------------------------------------
 1 | terraform {
 2 |   backend "s3" {}
 3 | }
 4 | 
 5 | data "aws_caller_identity" "current" {}
 6 | data "aws_region" "current" {}
 7 | 
 8 | locals {
 9 |   SolutionNameKeySatisfyingRestrictions = "Guidance-for-Running-Generative-AI-Gateway-Proxy-on-AWS"
10 |   common_labels = {
11 |     project     = "llmgateway"
12 |     AWSSolution = "ToDo"
13 |     GithubRepo  = "https://github.com/aws-solutions-library-samples/"
14 |     SolutionID  = "SO9022"
15 |     SolutionNameKey = "Guidance for Running Generative AI Gateway Proxy on AWS"
16 |     SolutionVersionKey = "1.0.0"
17 |   }
18 | }
19 | 
20 | 
21 | provider "aws" {
22 |   default_tags {
23 |     tags = local.common_labels
24 |   }
25 | }
26 | 
27 | resource "aws_servicecatalogappregistry_application" "solution_application" {
28 |   name        = "${local.SolutionNameKeySatisfyingRestrictions}-${data.aws_region.current.name}-${data.aws_caller_identity.current.account_id}"
29 |   description = "Service Catalog application to track and manage all your resources for the solution ${local.common_labels.SolutionNameKey}"
30 | 
31 |   tags = {
32 |     "Solutions:SolutionID"      = local.common_labels.SolutionID
33 |     "Solutions:SolutionName"    = local.common_labels.SolutionNameKey
34 |     "Solutions:SolutionVersion" = local.common_labels.SolutionVersionKey
35 |     "Solutions:ApplicationType" = "AWS-Solutions"
36 |   }
37 | }
38 | 
39 | 
40 | 
41 | data "aws_eks_cluster_auth" "cluster" {
42 |   count = local.platform == "EKS" ? 1 : 0
43 |   name = module.eks_cluster[0].cluster_name
44 | }
45 | 
46 | provider "kubernetes" {
47 |   host                   = local.platform == "EKS" ? module.eks_cluster[0].cluster_endpoint : ""
48 |   cluster_ca_certificate = local.platform == "EKS" ? base64decode(module.eks_cluster[0].cluster_ca) : ""
49 |   token = local.platform == "EKS" ? data.aws_eks_cluster_auth.cluster[0].token : ""
50 | }
51 | 
52 | provider "helm" {
53 |   kubernetes {
54 |     host                   = local.platform == "EKS" ? module.eks_cluster[0].cluster_endpoint : ""
55 |     cluster_ca_certificate = local.platform == "EKS" ? base64decode(module.eks_cluster[0].cluster_ca) : ""
56 |     token = local.platform == "EKS" ? data.aws_eks_cluster_auth.cluster[0].token : ""
57 |   }
58 | }
59 | 


--------------------------------------------------------------------------------
/media/Gateway latest architecture with CloudFront.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-solutions-library-samples/guidance-for-multi-provider-generative-ai-gateway-on-aws/b9eb19c435ccc15ff75228605e42a3acb1f01266/media/Gateway latest architecture with CloudFront.pptx


--------------------------------------------------------------------------------
/media/Gateway-Architecture-with-CloudFront.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-solutions-library-samples/guidance-for-multi-provider-generative-ai-gateway-on-aws/b9eb19c435ccc15ff75228605e42a3acb1f01266/media/Gateway-Architecture-with-CloudFront.png


--------------------------------------------------------------------------------
/media/Reference_architecture_ECS_EKS_platform_combined.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-solutions-library-samples/guidance-for-multi-provider-generative-ai-gateway-on-aws/b9eb19c435ccc15ff75228605e42a3acb1f01266/media/Reference_architecture_ECS_EKS_platform_combined.jpg


--------------------------------------------------------------------------------
/media/Required-EKS-Add-ons.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-solutions-library-samples/guidance-for-multi-provider-generative-ai-gateway-on-aws/b9eb19c435ccc15ff75228605e42a3acb1f01266/media/Required-EKS-Add-ons.png


--------------------------------------------------------------------------------
/media/Tested-Bring-Your-Own-EKS-Cluster-Configuration.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-solutions-library-samples/guidance-for-multi-provider-generative-ai-gateway-on-aws/b9eb19c435ccc15ff75228605e42a3acb1f01266/media/Tested-Bring-Your-Own-EKS-Cluster-Configuration.png


--------------------------------------------------------------------------------
/media/architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-solutions-library-samples/guidance-for-multi-provider-generative-ai-gateway-on-aws/b9eb19c435ccc15ff75228605e42a3acb1f01266/media/architecture.png


--------------------------------------------------------------------------------
/middleware/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.11-slim
 2 | 
 3 | WORKDIR /app
 4 | 
 5 | COPY requirements.txt .
 6 | RUN pip install --no-cache-dir -r requirements.txt
 7 | 
 8 | COPY app.py .
 9 | 
10 | CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "3000"]


--------------------------------------------------------------------------------
/middleware/docker-build-and-deploy.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ $# -ne 2 ]; then
 4 |   echo "Usage: $0 <APP_NAME> <ARCH>"
 5 |   exit 1
 6 | fi
 7 | 
 8 | APP_NAME=$1
 9 | ARCH=$2
10 | 
11 | AWS_REGION=$(aws ec2 describe-availability-zones --output text --query 'AvailabilityZones[0].[RegionName]')
12 | export AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query "Account" --output text)
13 | 
14 | # Check if the repository already exists
15 | REPO_EXISTS=$(aws ecr describe-repositories --repository-names $APP_NAME 2>/dev/null)
16 | 
17 | if [ -z "$REPO_EXISTS" ]; then
18 |     # Repository does not exist, create it with tag
19 |     aws ecr create-repository --repository-name $APP_NAME --tags Key=project,Value=llmgateway
20 | else
21 |     echo "Repository $APP_NAME already exists, checking tags..."
22 |     
23 |     # Get current tags for the repository
24 |     CURRENT_TAGS=$(aws ecr list-tags-for-resource --resource-arn arn:aws:ecr:${AWS_REGION}:${AWS_ACCOUNT_ID}:repository/${APP_NAME})
25 |     
26 |     # Check if project=llmgateway tag exists
27 |     if ! echo "$CURRENT_TAGS" | grep -q '"Key": "project".*"Value": "llmgateway"'; then
28 |         echo "Adding project=llmgateway tag..."
29 |         aws ecr tag-resource \
30 |             --resource-arn arn:aws:ecr:${AWS_REGION}:${AWS_ACCOUNT_ID}:repository/${APP_NAME} \
31 |             --tags Key=project,Value=llmgateway
32 |     else
33 |         echo "Tag project=llmgateway already exists."
34 |     fi
35 | fi
36 | 
37 | echo $ARCH
38 | case $ARCH in
39 |     "x86")
40 |         DOCKER_ARCH="linux/amd64"
41 |         ;;
42 |     "arm")
43 |         DOCKER_ARCH="linux/arm64"
44 |         ;;
45 |     *)
46 |         echo "Unsupported architecture: $ARCH"
47 |         exit 1
48 |         ;;
49 | esac
50 | 
51 | echo $DOCKER_ARCH
52 | 
53 | aws ecr get-login-password --region $AWS_REGION | docker login --username AWS --password-stdin $AWS_ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com
54 | docker build --platform $DOCKER_ARCH -t $APP_NAME .
55 | docker tag $APP_NAME\:latest $AWS_ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com/$APP_NAME\:latest
56 | docker push $AWS_ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com/$APP_NAME\:latest


--------------------------------------------------------------------------------
/middleware/requirements.txt:
--------------------------------------------------------------------------------
 1 | fastapi
 2 | uvicorn
 3 | httpx
 4 | pydantic
 5 | openai
 6 | botocore
 7 | google-crc32c
 8 | boto3
 9 | sqlalchemy
10 | psycopg2-binary
11 | okta-jwt-verifier
12 | cryptography
13 | anyio


--------------------------------------------------------------------------------
/scripts/.env.template:
--------------------------------------------------------------------------------
1 | BASE_URL=
2 | API_KEY=
3 | MODELS=anthropic.claude-3-5-sonnet-20241022-v2:0,gpt-4o


--------------------------------------------------------------------------------
/scripts/benchmark.py:
--------------------------------------------------------------------------------
 1 | from openai import OpenAI
 2 | from litellm import completion_cost
 3 | import time
 4 | import click
 5 | from tqdm import tqdm
 6 | from tabulate import tabulate
 7 | from termcolor import colored
 8 | import os
 9 | from dotenv import load_dotenv
10 | 
11 | questions = ["Can you tell me a story?", "What are 5 good business ideas?"]
12 | 
13 | load_dotenv()
14 | 
15 | base_url = os.getenv("BASE_URL")  # Litellm proxy base url
16 | api_key = os.getenv("API_KEY")  # Litellm proxy api key
17 | 
18 | models = os.getenv("MODELS").split(
19 |     ","
20 | )  # List of models to benchmark. Values should be subset of model ids from your config.yaml
21 | 
22 | # List of questions to benchmark (replace with your questions)
23 | 
24 | # Enter your system prompt here
25 | system_prompt = """
26 | You are LiteLLMs helpful assistant
27 | """
28 | 
29 | 
30 | @click.command()
31 | @click.option(
32 |     "--system-prompt",
33 |     default="You are a helpful assistant that can answer questions.",
34 |     help="System prompt for the conversation.",
35 | )
36 | def main(system_prompt):
37 |     client = OpenAI(base_url=base_url, api_key=api_key)
38 | 
39 |     for question in questions:
40 |         data = []  # Data for the current question
41 | 
42 |         with tqdm(total=len(models)) as pbar:
43 |             for model in models:
44 |                 colored_description = colored(
45 |                     f"Running question: {question} for model: {model}", "green"
46 |                 )
47 |                 pbar.set_description(colored_description)
48 |                 start_time = time.time()
49 | 
50 |                 response = client.chat.completions.create(
51 |                     model=model,
52 |                     max_tokens=500,
53 |                     messages=[
54 |                         {"role": "system", "content": system_prompt},
55 |                         {"role": "user", "content": question},
56 |                     ],
57 |                 ).model_dump()
58 | 
59 |                 end = time.time()
60 |                 total_time = end - start_time
61 |                 cost = completion_cost(completion_response=response)
62 |                 raw_response = response["choices"][0]["message"]["content"]
63 | 
64 |                 data.append(
65 |                     {
66 |                         "Model": colored(model, "light_blue"),
67 |                         "Response": raw_response,  # Colorize the response
68 |                         "ResponseTime": colored(f"{total_time:.2f} seconds", "red"),
69 |                         "Cost": colored(f"${cost:.6f}", "green"),  # Colorize the cost
70 |                     }
71 |                 )
72 | 
73 |                 pbar.update(1)
74 | 
75 |         # Separate headers from the data
76 |         headers = ["Model", "Response", "Response Time (seconds)", "Cost ($)"]
77 |         colwidths = [15, 80, 15, 10]
78 | 
79 |         # Create a nicely formatted table for the current question
80 |         table = tabulate(
81 |             [list(d.values()) for d in data],
82 |             headers,
83 |             tablefmt="grid",
84 |             maxcolwidths=colwidths,
85 |         )
86 | 
87 |         # Print the table for the current question
88 |         colored_question = colored(question, "green")
89 |         click.echo(f"\nBenchmark Results for '{colored_question}':")
90 |         click.echo(table)  # Display the formatted table
91 | 
92 | 
93 | if __name__ == "__main__":
94 |     main()
95 | 


--------------------------------------------------------------------------------
/scripts/requirements.txt:
--------------------------------------------------------------------------------
1 | litellm
2 | tabulate
3 | termcolor
4 | python-dotenv


--------------------------------------------------------------------------------
/test-middleware-streaming.py:
--------------------------------------------------------------------------------
  1 | import boto3
  2 | import os
  3 | from botocore.client import Config
  4 | from botocore import UNSIGNED
  5 | from typing import Generator, Dict, Any, Optional
  6 | 
  7 | 
  8 | global_session_id: Optional[str] = None
  9 | 
 10 | 
 11 | def create_bedrock_client():
 12 |     """
 13 |     Creates a Bedrock client with custom endpoint and authorization header.
 14 |     Uses environment variables for configuration.
 15 | 
 16 |     Required environment variables:
 17 |     - API_ENDPOINT: Custom Bedrock endpoint URL
 18 |     - API_KEY: Authorization bearer token
 19 |     - AWS_REGION: AWS region
 20 | 
 21 |     Returns:
 22 |         boto3.client: Configured Bedrock client
 23 |     """
 24 |     endpoint = os.getenv("API_ENDPOINT")
 25 |     api_key = os.getenv("API_KEY")
 26 |     region = os.getenv("AWS_REGION")
 27 | 
 28 |     if not all([endpoint, api_key, region]):
 29 |         raise ValueError(
 30 |             "Missing required environment variables: API_ENDPOINT, API_KEY, AWS_REGION"
 31 |         )
 32 | 
 33 |     # Initialize session and configure client
 34 |     session = boto3.Session()
 35 |     client_config = Config(
 36 |         signature_version=UNSIGNED,  # Disable SigV4 signing
 37 |         retries={"max_attempts": 10, "mode": "standard"},
 38 |     )
 39 | 
 40 |     # Create the Bedrock client
 41 |     client = session.client(
 42 |         "bedrock-runtime",
 43 |         endpoint_url=endpoint,
 44 |         config=client_config,
 45 |         region_name=region,
 46 |     )
 47 | 
 48 |     # Define authorization header handler
 49 |     def add_authorization_header(request, **kwargs):
 50 |         request.headers["Authorization"] = f"Bearer {api_key}"
 51 | 
 52 |     # Register the event handler
 53 |     client.meta.events.register("request-created.*", add_authorization_header)
 54 | 
 55 |     return client
 56 | 
 57 | 
 58 | def extract_session_id(response) -> Optional[str]:
 59 |     """
 60 |     Extracts the x-session-id from the response headers.
 61 | 
 62 |     Args:
 63 |         response: The raw response object from the Bedrock API
 64 | 
 65 |     Returns:
 66 |         str: The session ID if found, None otherwise
 67 |     """
 68 |     try:
 69 |         # Access the response metadata which contains the headers
 70 |         headers = response["ResponseMetadata"]["HTTPHeaders"]
 71 |         print(f"headers: {headers}")
 72 |         session_id = headers.get("x-session-id")
 73 |         print(f"session_id: {session_id}")
 74 |         return session_id
 75 |     except (KeyError, AttributeError):
 76 |         print("Warning: Could not extract x-session-id from response headers")
 77 |         return None
 78 | 
 79 | 
 80 | def send_message_stream(
 81 |     client,
 82 |     message: str,
 83 |     model_id: str = "anthropic.claude-3-haiku-20240307-v1:0",
 84 |     max_tokens: int = 1000,
 85 |     temperature: float = 0.7,
 86 | ) -> Generator[Dict[str, Any], None, None]:
 87 |     """
 88 |     Sends a message to the Bedrock Converse API with streaming response.
 89 | 
 90 |     Args:
 91 |         client: Configured Bedrock client
 92 |         message (str): Message to send
 93 |         model_id (str): ID of the model to use
 94 |         max_tokens (int): Maximum number of tokens to generate
 95 |         temperature (float): Temperature for response generation
 96 | 
 97 |     Yields:
 98 |         dict: Streaming response events
 99 |     """
100 | 
101 |     global global_session_id
102 | 
103 |     try:
104 |         if global_session_id:
105 |             response = client.converse_stream(
106 |                 modelId=model_id,
107 |                 messages=[{"role": "user", "content": [{"text": message}]}],
108 |                 inferenceConfig={
109 |                     "maxTokens": max_tokens,
110 |                     "temperature": temperature,
111 |                 },
112 |                 additionalModelRequestFields={"session_id": global_session_id},
113 |             )
114 |         else:
115 |             response = client.converse_stream(
116 |                 modelId=model_id,
117 |                 messages=[{"role": "user", "content": [{"text": message}]}],
118 |                 inferenceConfig={
119 |                     "maxTokens": max_tokens,
120 |                     "temperature": temperature,
121 |                 },
122 |                 additionalModelRequestFields={"enable_history": True},
123 |             )
124 |         global_session_id = extract_session_id(response)
125 |         if global_session_id:
126 |             print(f"global_session_id: {global_session_id}")
127 |         print(f"response: {response}")
128 |         print(f"response['stream']: {response["stream"]}")
129 | 
130 |         # Process the streaming response
131 |         for event in response["stream"]:
132 |             yield event
133 | 
134 |     except Exception as e:
135 |         print(f"Error in streaming request: {str(e)}")
136 |         raise
137 | 
138 | 
139 | def process_stream_response(event: Dict[str, Any]) -> str:
140 |     """
141 |     Processes a streaming response event and extracts the text content if present.
142 | 
143 |     Args:
144 |         event (dict): Streaming response event
145 | 
146 |     Returns:
147 |         str: Extracted text content or empty string
148 |     """
149 |     if "contentBlockDelta" in event:
150 |         delta = event["contentBlockDelta"].get("delta", {})
151 |         if "text" in delta:
152 |             return delta["text"]
153 |     return ""
154 | 
155 | 
156 | def send_message_stream_wrapper(client, message):
157 |     try:
158 | 
159 |         # Accumulate the response
160 | 
161 |         # Process the streaming response
162 |         for event in send_message_stream(client, message):
163 |             print(f"event: {event}")
164 |             # Handle different event types
165 |             if "internalServerException" in event:
166 |                 raise Exception(
167 |                     f"Internal server error: {event['internalServerException']}"
168 |                 )
169 |             elif "modelStreamErrorException" in event:
170 |                 raise Exception(
171 |                     f"Model stream error: {event['modelStreamErrorException']}"
172 |                 )
173 |             elif "validationException" in event:
174 |                 raise Exception(f"Validation error: {event['validationException']}")
175 |             elif "throttlingException" in event:
176 |                 raise Exception(f"Throttling error: {event['throttlingException']}")
177 |             # Handle metadata and stop events
178 |             if "messageStop" in event:
179 |                 print("\n\nStream finished.")
180 |                 print(f"Stop reason: {event['messageStop'].get('stopReason')}")
181 |             elif "metadata" in event:
182 |                 usage = event["metadata"].get("usage", {})
183 |                 if usage:
184 |                     print(f"\nToken usage: {usage}")
185 | 
186 |     except Exception as e:
187 |         print(f"Error in main: {str(e)}")
188 | 
189 | 
190 | def main():
191 |     # Create the client
192 |     client = create_bedrock_client()
193 | 
194 |     # Example of using streaming response
195 |     print("Sending streaming request...")
196 |     message = "tell me a short story."
197 |     send_message_stream_wrapper(client=client, message=message)
198 |     message2 = "What did I last say to you?"
199 |     send_message_stream_wrapper(client=client, message=message2)
200 | 
201 | 
202 | if __name__ == "__main__":
203 |     main()
204 | 


--------------------------------------------------------------------------------
/test-middleware-synchronous.py:
--------------------------------------------------------------------------------
  1 | import boto3
  2 | import os
  3 | from botocore.client import Config
  4 | from botocore import UNSIGNED
  5 | from botocore.exceptions import ClientError
  6 | 
  7 | 
  8 | def create_bedrock_client():
  9 |     # Get configuration from environment variables
 10 |     endpoint = os.getenv("API_ENDPOINT")
 11 |     api_key = os.getenv("API_KEY")
 12 |     region = os.getenv("AWS_REGION")
 13 | 
 14 |     if not all([endpoint, api_key, region]):
 15 |         raise ValueError(
 16 |             "Missing required environment variables: API_ENDPOINT, API_KEY, AWS_REGION"
 17 |         )
 18 | 
 19 |     # Initialize session and configure client
 20 |     session = boto3.Session()
 21 |     client_config = Config(
 22 |         signature_version=UNSIGNED,  # Disable SigV4 signing
 23 |     )
 24 | 
 25 |     # Create the Bedrock client
 26 |     client = session.client(
 27 |         "bedrock-runtime",
 28 |         endpoint_url=endpoint,
 29 |         config=client_config,
 30 |         region_name=region,
 31 |     )
 32 | 
 33 |     # Define authorization header handler
 34 |     def add_authorization_header(request, **kwargs):
 35 |         request.headers["Authorization"] = f"Bearer {api_key}"
 36 | 
 37 |     # Register the event handler
 38 |     client.meta.events.register("request-created.*", add_authorization_header)
 39 | 
 40 |     return client
 41 | 
 42 | 
 43 | def send_message(
 44 |     client,
 45 |     message,
 46 |     model_id="anthropic.claude-3-haiku-20240307-v1:0",
 47 |     session_id=None,
 48 | ):
 49 |     """
 50 |     Sends a message to the Bedrock Converse API.
 51 | 
 52 |     Args:
 53 |         client: Configured Bedrock client
 54 |         message (str): Message to send
 55 |         model_id (str): ID of the model to use
 56 | 
 57 |     Returns:
 58 |         dict: API response
 59 |     """
 60 | 
 61 |     # model_id = "arn:aws:bedrock:us-west-2:235614385815:prompt/6LE1KDKISG:2"
 62 |     body = {}
 63 |     try:
 64 |         if session_id:
 65 |             response = client.converse(
 66 |                 modelId=model_id,
 67 |                 # promptVariables={
 68 |                 #     "topic": {"text": "fruit"},
 69 |                 # },
 70 |                 additionalModelRequestFields={"session_id": session_id},
 71 |                 messages=[{"role": "user", "content": [{"text": message}]}],
 72 |             )
 73 |         else:
 74 |             response = client.converse(
 75 |                 modelId=model_id,
 76 |                 # promptVariables={
 77 |                 #     "topic": {"text": "fruit"},
 78 |                 # },
 79 |                 additionalModelRequestFields={"enable_history": True},
 80 |                 messages=[{"role": "user", "content": [{"text": message}]}],
 81 |             )
 82 | 
 83 |         return response
 84 |     except Exception as e:
 85 |         print(f"Error sending message: {str(e)}")
 86 |         raise
 87 | 
 88 | 
 89 | def main():
 90 |     try:
 91 |         # Create the client
 92 |         client = create_bedrock_client()
 93 | 
 94 |         # Send a test message
 95 |         response = send_message(client=client, message="tell me a short story.")
 96 | 
 97 |         print("Response:", response)
 98 |         session_id = response["ResponseMetadata"]["HTTPHeaders"].get("x-session-id")
 99 |         print(f"session_id: {session_id}")
100 |         response_2 = send_message(
101 |             client=client, message="What did I last say to you?", session_id=session_id
102 |         )
103 |         print("Response 2:", response_2)
104 | 
105 |     except ClientError as e:
106 |         error_code = e.response["Error"]["Code"]
107 |         error_message = e.response["Error"]["Message"]
108 |         print(f"e.response: {e.response}")
109 | 
110 |         print(f"AWS Error: {error_code} - {error_message}")
111 |     except Exception as e:
112 |         print(f"Unexpected error: {str(e)}")
113 | 
114 | 
115 | if __name__ == "__main__":
116 |     main()
117 | 


--------------------------------------------------------------------------------
/tests/.env.template:
--------------------------------------------------------------------------------
1 | API_ENDPOINT=
2 | API_KEY=
3 | MODEL_ID=
4 | MANAGED_PROMPT_ARN=
5 | MANAGED_PROMPT_VARIABLE_NAME=
6 | MANAGED_PROMPT_VARIABLE_VALUE=


--------------------------------------------------------------------------------
/tests/locust_load_test.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import uuid
 3 | from locust import HttpUser, task, between
 4 | from dotenv import load_dotenv
 5 | 
 6 | load_dotenv()
 7 | 
 8 | base_url = os.getenv("API_ENDPOINT")
 9 | api_key = os.getenv("API_KEY")
10 | 
11 | 
12 | class MyUser(HttpUser):
13 |     host = base_url
14 |     wait_time = between(0.5, 1)  # Random wait time between requests
15 | 
16 |     @task(100)
17 |     def litellm_completion(self):
18 |         # no cache hits with this
19 |         payload = {
20 |             "model": "fake-openai-endpoint",
21 |             "messages": [
22 |                 {
23 |                     "role": "user",
24 |                     "content": f"{uuid.uuid4()} This is a test there will be no cache hits and we'll fill up the context"
25 |                     * 150,
26 |                 }
27 |             ],
28 |         }
29 |         response = self.client.post("/chat/completions", json=payload)
30 |         if response.status_code != 200:
31 |             # log the errors in error.txt
32 |             with open("error.txt", "a") as error_log:
33 |                 print(f"error: {response}")
34 |                 error_log.write(response.text + "\n")
35 | 
36 |     def on_start(self):
37 |         self.api_key = api_key
38 |         self.client.headers.update({"Authorization": f"Bearer {self.api_key}"})
39 | 


--------------------------------------------------------------------------------
/tests/requirements.txt:
--------------------------------------------------------------------------------
1 | pytest
2 | requests
3 | openai
4 | pytest-asyncio
5 | aiohttp
6 | python-dotenv
7 | boto3
8 | locust


--------------------------------------------------------------------------------
/update-litellm-config.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -aeuo pipefail
 3 | 
 4 | aws_region=$(aws ec2 describe-availability-zones --output text --query 'AvailabilityZones[0].[RegionName]')
 5 | echo $aws_region
 6 | 
 7 | # Load environment variables from .env file
 8 | source .env
 9 | 
10 | # Check if config.yaml exists
11 | if [ ! -f "config/config.yaml" ]; then
12 |   echo "config/config.yaml does not exist, can't upload to S3"
13 |   exit 1
14 | fi
15 | 
16 | cd litellm-terraform-stack
17 | ConfigBucketName=$(terraform output -raw ConfigBucketName)
18 | cd ..
19 | 
20 | echo "uploading config.yaml to bucket $ConfigBucketName"  # This was missing the closing quote
21 | 
22 | # Add the actual upload command
23 | aws s3 cp config/config.yaml s3://$ConfigBucketName/config.yaml --region $aws_region
24 | 
25 | echo "Upload complete"
26 | 
27 | cd litellm-terraform-stack
28 | if [ "$DEPLOYMENT_PLATFORM" = "ECS" ]; then
29 |     LITELLM_ECS_CLUSTER=$(terraform output -raw LitellmEcsCluster)
30 |     LITELLM_ECS_TASK=$(terraform output -raw LitellmEcsTask)
31 | 
32 |     echo "Rebooting ECS Task $LITELLM_ECS_TASK on ECS cluster $LITELLM_ECS_CLUSTER"
33 | 
34 |     aws ecs update-service \
35 |         --cluster $LITELLM_ECS_CLUSTER \
36 |         --service $LITELLM_ECS_TASK \
37 |         --force-new-deployment \
38 |         --desired-count $DESIRED_CAPACITY \
39 |         --no-cli-pager
40 | fi
41 | 
42 | if [ "$DEPLOYMENT_PLATFORM" = "EKS" ]; then
43 |     EKS_CLUSTER_NAME=$(terraform output -raw eks_cluster_name)
44 |     EKS_DEPLOYMENT_NAME=$(terraform output -raw eks_deployment_name)
45 |     echo "Rebooting EKS deployment $EKS_DEPLOYMENT_NAME on EKS cluster $EKS_CLUSTER_NAME"
46 | 
47 |     aws eks update-kubeconfig --region $aws_region --name $EKS_CLUSTER_NAME
48 |     kubectl rollout restart deployment $EKS_DEPLOYMENT_NAME
49 | fi
50 | 


--------------------------------------------------------------------------------