├── .gitignore
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── docs
    ├── 10-architecture
    │   └── 10-architecture.md
    ├── 20-deploy
    │   ├── 20-configure-env-variables.md
    │   ├── 25-clone-repo.md
    │   ├── 30-tools.md
    │   ├── 40-create-prepare-cluster.md
    │   ├── 50-install-litellm.md
    │   ├── 60-expose-litellm.md
    │   ├── 70-open-webui.md
    │   └── 99-clean-up.md
    ├── 25-config
    │   ├── 10-model-alias.md
    │   ├── 20-rate-limit.md
    │   ├── 25-global-rate-limit.md
    │   ├── 30-route.md
    │   └── 40-apply-config-changes.md
    ├── 30-app-changes
    │   └── 10-app-changes.md
    ├── 99-contributors.md
    ├── bedrock-litellm.drawio.png
    ├── index.md
    └── open-webui.png
├── eksctl
    └── cluster-config.yaml
├── helm
    ├── open-webui-private-values.yaml
    └── open-webui-public-values.yaml
├── iam
    ├── litellm-bedrock-and-sagemaker-policy.json
    └── litellm-bedrock-policy.json
├── litellm
    ├── config
    │   ├── proxy_config.yaml
    │   ├── proxy_config_global_rate_limit.yaml
    │   ├── proxy_config_model_alias.yaml
    │   ├── proxy_config_with_sagemaker_models.yaml
    │   └── route
    │   │   ├── proxy_config_fallback.yaml
    │   │   ├── proxy_config_latency_routing.yaml
    │   │   ├── proxy_config_load_balancer_default.yaml
    │   │   └── proxy_config_rate_limit_aware_routing.yaml
    └── deploy
    │   └── k8s
    │       └── ingress.yaml
└── mkdocs.yml


/.gitignore:
--------------------------------------------------------------------------------
 1 | 
 2 | # General
 3 | .DS_Store
 4 | .AppleDouble
 5 | .LSOverride
 6 | 
 7 | ### macOS Patch ###
 8 | # iCloud generated files
 9 | *.icloud
10 | 
11 | ### VisualStudioCode ###
12 | .vscode/*
13 | !.vscode/settings.json
14 | !.vscode/tasks.json
15 | !.vscode/launch.json
16 | !.vscode/extensions.json
17 | !.vscode/*.code-snippets
18 | 
19 | # Local History for Visual Studio Code
20 | .history/
21 | 
22 | # Built Visual Studio Code Extensions
23 | *.vsix
24 | 
25 | ### VisualStudioCode Patch ###
26 | # Ignore all local history of files
27 | .history
28 | .ionide


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.
5 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guidelines
 2 | 
 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
 4 | documentation, we greatly value feedback and contributions from our community.
 5 | 
 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
 7 | information to effectively respond to your bug report or contribution.
 8 | 
 9 | 
10 | ## Reporting Bugs/Feature Requests
11 | 
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 | 
14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 | 
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 | 
22 | 
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 | 
26 | 1. You are working against the latest source on the *main* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 | 
30 | To send us a pull request, please:
31 | 
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 | 
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 | 
42 | 
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start.
45 | 
46 | 
47 | ## Code of Conduct
48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
50 | opensource-codeofconduct@amazon.com with any additional questions or comments.
51 | 
52 | 
53 | ## Security issue notifications
54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
55 | 
56 | 
57 | ## Licensing
58 | 
59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.
60 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT No Attribution
 2 | 
 3 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 6 | this software and associated documentation files (the "Software"), to deal in
 7 | the Software without restriction, including without limitation the rights to
 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 9 | the Software, and to permit persons to whom the Software is furnished to do so.
10 | 
11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
13 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
14 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
15 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
16 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
17 | 
18 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Migrate to Amazon Bedrock using LiteLLM 
 2 | Some organizations have already built applications that work with OpenAI compatible APIs and would like to switch to Amazon Bedrock -- this guide shows how you can do that without changing app code using LiteLLM.
 3 | 
 4 | To leverage this repository, please use our website powered by this GitHub: [Website](https://aws-samples.github.io/bedrock-litellm/)
 5 | 
 6 | ## Security
 7 | 
 8 | See CONTRIBUTING for more information.
 9 | 
10 | ## License
11 | This library is licensed under the MIT-0 License. See the LICENSE file.


--------------------------------------------------------------------------------
/docs/10-architecture/10-architecture.md:
--------------------------------------------------------------------------------
1 | # Architecture
2 | The diagram below depicts the solution architecture. [LiteLLM](https://www.litellm.ai/) is used as a proxy to translate the API call originating from the app in OpenAI format to Bedrock format.
3 | 
4 | ![architecture](../bedrock-litellm.drawio.png)
5 | 
6 | LiteLLM is deployed on Amazon EKS. If the app is hosted on the same cluster, it can access LiteLLM internally through Kubernetes `Service` of `type` `ClusterIP`. If the app is hosted outside the cluster, LiteLLM has to be exposed via a load balancer -- refer to [Exposing applications](https://www.eksworkshop.com/docs/fundamentals/exposing/) section of Amazon EKS workshop for guidance. This implementation assumes the app is hosted on the same cluster.
7 | 
8 | While LiteLLM is only used as a proxy in this implementation, it has several other features e.g. retry/fallback logic across multiple deployments, track spend & set budgets per project, etc.
9 | 


--------------------------------------------------------------------------------
/docs/20-deploy/20-configure-env-variables.md:
--------------------------------------------------------------------------------
 1 | # Configure environment variables
 2 | 
 3 | !!! note annotate "Note"
 4 |     The steps in the following sections have been tested on Cloud9/Amazon Linux. Make sure to disable AWS managed temporary credentials and attach an IAM role with sufficient permissions.
 5 | 
 6 | 1. Configure environment variables
 7 | 
 8 | ```sh
 9 | export TOKEN=`curl -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 21600"`
10 | export AWS_REGION=`curl -H "X-aws-ec2-metadata-token: $TOKEN" http://169.254.169.254/latest/meta-data/placement/region`
11 | export ACCOUNT_ID=`curl -H "X-aws-ec2-metadata-token: $TOKEN" http://169.254.169.254/latest/meta-data/identity-credentials/ec2/info | jq -r '.AccountId'`
12 | export CLUSTER_NAME="litellm-demo"
13 | 
14 | echo "export AWS_REGION=${AWS_REGION}" | tee -a ~/.bash_profile
15 | echo "export ACCOUNT_ID=${ACCOUNT_ID}" | tee -a ~/.bash_profile
16 | echo "export CLUSTER_NAME=${CLUSTER_NAME}" | tee -a ~/.bash_profile
17 | ```
18 | 


--------------------------------------------------------------------------------
/docs/20-deploy/25-clone-repo.md:
--------------------------------------------------------------------------------
 1 | # Clone the repo
 2 | 1. Clone bedrock-litellm repo
 3 | ```sh
 4 | git clone https://github.com/aws-samples/bedrock-litellm.git
 5 | ```
 6 | 1. Save bedrock-litellm directory in an environment variable
 7 | ```sh
 8 | export BEDROCK_LITELLM_DIR=$PWD/bedrock-litellm
 9 | echo "export BEDROCK_LITELLM_DIR=${BEDROCK_LITELLM_DIR}" | tee -a ~/.bash_profile
10 | ```
11 | 


--------------------------------------------------------------------------------
/docs/20-deploy/30-tools.md:
--------------------------------------------------------------------------------
 1 | # Install Kubernetes tools
 2 | 
 3 | 1. Install eksctl:
 4 | ```sh
 5 | # for ARM systems, set ARCH to: `arm64`, `armv6` or `armv7`
 6 | ARCH=amd64
 7 | PLATFORM=$(uname -s)_$ARCH
 8 | 
 9 | curl -sLO "https://github.com/eksctl-io/eksctl/releases/latest/download/eksctl_$PLATFORM.tar.gz"
10 | 
11 | # (Optional) Verify checksum
12 | curl -sL "https://github.com/eksctl-io/eksctl/releases/latest/download/eksctl_checksums.txt" | grep $PLATFORM | sha256sum --check
13 | 
14 | tar -xzf eksctl_$PLATFORM.tar.gz -C /tmp && rm eksctl_$PLATFORM.tar.gz
15 | 
16 | sudo mv /tmp/eksctl /usr/local/bin
17 | ```
18 | 
19 | 1. Install kubectl:
20 | ```sh
21 | curl -O https://s3.us-west-2.amazonaws.com/amazon-eks/1.30.0/2024-05-12/bin/linux/amd64/kubectl
22 | chmod +x ./kubectl
23 | mkdir -p $HOME/bin && cp ./kubectl $HOME/bin/kubectl && export PATH=$HOME/bin:$PATH
24 | ```
25 | 
26 | 1. Install yq:
27 | ```sh
28 | echo 'yq() {
29 |   docker run --rm -i -v "${PWD}":/workdir mikefarah/yq "$@"
30 | }' | tee -a ~/.bashrc && source ~/.bashrc
31 | ```
32 | 
33 | 1. Install Helm:
34 | ```sh
35 | curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3
36 | chmod 700 get_helm.sh
37 | ./get_helm.sh
38 | ```
39 | 
40 | 1. Install envsubst:
41 | ```sh
42 | curl -L https://github.com/a8m/envsubst/releases/download/v1.2.0/envsubst-`uname -s`-`uname -m` -o envsubst
43 | chmod +x envsubst
44 | sudo mv envsubst /usr/local/bin
45 | ```


--------------------------------------------------------------------------------
/docs/20-deploy/40-create-prepare-cluster.md:
--------------------------------------------------------------------------------
 1 | # Create and prepare an EKS cluster
 2 | 
 3 | 1. Create cluster:
 4 | ```sh
 5 | envsubst < $BEDROCK_LITELLM_DIR/eksctl/cluster-config.yaml | eksctl create cluster -f -
 6 | ```
 7 | 
 8 | 1. Create an IAM OIDC provider for the cluster to be able to use [IAM roles for service accounts](https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts.html) (required for granting IAM permissions to LiteLLM to be able to invoke Bedrock models):
 9 | ```sh
10 | eksctl utils associate-iam-oidc-provider --cluster $CLUSTER_NAME --approve
11 | ```
12 | 
13 | 1. (Optional) Install AWS Load Balancer Controller (AWS LBC):
14 | 
15 |     !!! note annotate "Note"
16 |         Install AWS Load Balancer Controller if you are planning to expose LiteLLM or one of the clients on ELB.
17 | 
18 |     First, create the IAM policy:
19 | 
20 |     ```sh
21 |     curl -O https://raw.githubusercontent.com/kubernetes-sigs/aws-load-balancer-controller/v2.7.2/docs/install/iam_policy.json
22 | 
23 |     export AWS_LBC_IAM_POLICY_ARN=$(aws iam create-policy \
24 |     --policy-name AWSLoadBalancerControllerIAMPolicy \
25 |     --policy-document file://iam_policy.json \
26 |     --output text \
27 |     --query "Policy.Arn")
28 |     echo "export AWS_LBC_IAM_POLICY_ARN=${AWS_LBC_IAM_POLICY_ARN}" | tee -a ~/.bash_profile
29 |     ```
30 | 
31 |     Then, create IRSA setup:
32 |     ```sh
33 |     eksctl create iamserviceaccount \
34 |         --cluster $CLUSTER_NAME \
35 |         --namespace=kube-system \
36 |         --name=aws-load-balancer-controller \
37 |         --role-name AmazonEKS_LoadBalancerController_Role \
38 |         --attach-policy-arn $AWS_LBC_IAM_POLICY_ARN \
39 |         --approve
40 |     ```
41 |     Then, install AWS LBC helm chart:
42 |     ```sh
43 |     helm repo add eks https://aws.github.io/eks-charts
44 |     helm repo update eks
45 |     helm install aws-load-balancer-controller eks/aws-load-balancer-controller \
46 |     --namespace kube-system \
47 |     --set clusterName=$CLUSTER_NAME \
48 |     --set serviceAccount.create=false \
49 |     --set serviceAccount.name=aws-load-balancer-controller 
50 |     ```
51 | 
52 | 
53 | 1. (Optional) Install EBS CSI driver (EBS volumes will be used to store Open WebUI state):
54 | 
55 |     !!! note annotate "Note"
56 |         Install EBS CSI driver if you are planning to use Open WebUI as it depends on EBS volumes for storing its state.
57 | 
58 |     First, create IRSA dependencies:
59 |     ```sh
60 |     eksctl create iamserviceaccount \
61 |         --name ebs-csi-controller-sa \
62 |         --namespace kube-system \
63 |         --cluster $CLUSTER_NAME \
64 |         --role-name AmazonEKS_EBS_CSI_DriverRole \
65 |         --attach-policy-arn arn:aws:iam::aws:policy/service-role/AmazonEBSCSIDriverPolicy \
66 |         --approve
67 |     ```
68 |     Then, install EBS CSI driver helm chart:
69 |     ```sh
70 |     helm repo add aws-ebs-csi-driver https://kubernetes-sigs.github.io/aws-ebs-csi-driver
71 |     helm repo update
72 |     helm upgrade --install aws-ebs-csi-driver \
73 |         --namespace kube-system \
74 |         --set controller.serviceAccount.create=false \
75 |         aws-ebs-csi-driver/aws-ebs-csi-driver
76 |     ```
77 | 


--------------------------------------------------------------------------------
/docs/20-deploy/50-install-litellm.md:
--------------------------------------------------------------------------------
  1 | # Install LiteLLM
  2 | 1. Clone LiteLLM repo
  3 | ```sh
  4 | git clone https://github.com/BerriAI/litellm.git
  5 | ```
  6 | 
  7 | 1. Save LiteLLM directory in an environment variable
  8 | ```sh
  9 | export LITELLM_DIR=$PWD/litellm
 10 | echo "export LITELLM_DIR=${LITELLM_DIR}" | tee -a ~/.bash_profile
 11 | ```
 12 | 
 13 | 1. (Optional) If you plan to use LiteLLM to connect to SageMaker, retrieve the endpoint name.
 14 | 
 15 |     First, find the endpoint that you wish to use. If you wish to deploy a new foundation model endpoint use SageMaker JumpStart, please refer to [JumpStart foundation model usage](https://docs.aws.amazon.com/sagemaker/latest/dg/jumpstart-foundation-models-use.html). To list the available endpoints:
 16 | 
 17 |     ```sh
 18 |     aws sagemaker list-endpoints
 19 |     ```
 20 | 
 21 |     Then, save the endpoint name as an environment variable:
 22 |     ```sh
 23 |     export ENDPOINT_NAME="your-endpoint-name"
 24 |     echo "export ENDPOINT_NAME=${ENDPOINT_NAME}" | tee -a ~/.bash_profile
 25 |     ```
 26 | 
 27 | 1. Create IRSA dependencies for LiteLLM
 28 | 
 29 |     First, create the IAM policy.
 30 | 
 31 |     !!! note annotate "Note"
 32 |         If you are also connecting to an endpoint running on Amazon SageMaker, replace the next command below with:
 33 |             ```sh
 34 |             envsubst < $BEDROCK_LITELLM_DIR/iam/litellm-bedrock-and-sagemaker-policy.json > /tmp/litellm-bedrock-policy.json
 35 |             ```
 36 | 
 37 |     Running the following cell only if you are not connection to a SageMaker Endpoint, otherwise run the above cell:
 38 |     ```sh
 39 |     envsubst < $BEDROCK_LITELLM_DIR/iam/litellm-bedrock-policy.json > /tmp/litellm-bedrock-policy.json
 40 |     ```
 41 | 
 42 |     Create the policy:
 43 |     ```sh
 44 |     export LITELLM_BEDROCK_IAM_POLICY_ARN=$(aws iam create-policy \
 45 |     --policy-name litellm-bedrock-policy \
 46 |     --policy-document file:///tmp/litellm-bedrock-policy.json \
 47 |     --output text \
 48 |     --query "Policy.Arn")
 49 |     echo "export LITELLM_BEDROCK_IAM_POLICY_ARN=${LITELLM_BEDROCK_IAM_POLICY_ARN}" | tee -a ~/.bash_profile
 50 |     ```
 51 | 
 52 |     Then, create IRSA setup:
 53 |     ```sh
 54 |     eksctl create iamserviceaccount \
 55 |         --name litellm-sa \
 56 |         --cluster $CLUSTER_NAME \
 57 |         --role-name AmazonEKS_LiteLLM_Role \
 58 |         --attach-policy-arn $LITELLM_BEDROCK_IAM_POLICY_ARN \
 59 |         --approve
 60 |     ```
 61 | 
 62 | 1. Install LiteLLM
 63 | 
 64 |     !!! warning annotate "Note"
 65 |         LiteLLM helm chart is currently BETA, hence K8s manifests were used for installation. The snippet below will be changed once the helm chart is GAed.
 66 | 
 67 |     !!! warning annotate "Note"
 68 |         Make sure to change `LITELLM_MASTER_KEY` in `$LITELLM_DIR/deploy/kubernetes/kub.yaml` to a random string rather than using the default API key, especially if you will expose LiteLLM endpoint externally.
 69 | 
 70 |     ```sh
 71 |     yq -i '.spec.template.spec.serviceAccount= "litellm-sa"' litellm/deploy/kubernetes/kub.yaml
 72 |     yq -i 'del(.spec.template.spec.containers[0].env[] | select(.name == "DATABASE_URL") )' litellm/deploy/kubernetes/kub.yaml
 73 |     yq -i '.spec.type= "ClusterIP"' litellm/deploy/kubernetes/service.yaml
 74 |     ```
 75 |     !!! note annotate "Note"
 76 |         If you are also connect to an endpoint running on Amazon SageMaker, run next the following cell before creating the configmap:
 77 |             ```sh
 78 |             envsubst < $BEDROCK_LITELLM_DIR/litellm/config/proxy_config_with_sagemaker_models.yaml > $BEDROCK_LITELLM_DIR/litellm/config/proxy_config.yaml
 79 |             ```
 80 | 
 81 |     Create the configmap:
 82 |     ```sh
 83 |     kubectl create configmap litellm-config --from-file=$BEDROCK_LITELLM_DIR/litellm/config/proxy_config.yaml
 84 |     ```
 85 | 
 86 |     Apply the changes:
 87 |     ```sh
 88 |     kubectl apply -f $LITELLM_DIR/deploy/kubernetes/kub.yaml
 89 |     kubectl apply -f $LITELLM_DIR/deploy/kubernetes/service.yaml
 90 |     ```
 91 | 
 92 | 1. Allow acess to Bedrock models by following the steps in [this doc page](https://docs.aws.amazon.com/bedrock/latest/userguide/model-access.html#model-access-add).
 93 | 
 94 | 1. Ensure that LiteLLM pods are up and running
 95 | 
 96 | 1. Verify LiteLLM
 97 | 
 98 |     ```sh
 99 |     kubectl run curl --image=curlimages/curl --rm -it -- /bin/sh
100 |     curl --location "http://litellm-service.default.svc.cluster.local:4000/chat/completions" \
101 |         --header 'Content-Type: application/json' \
102 |         --header 'Authorization: Bearer sk-1234' \
103 |         --data '{
104 |         "model": "bedrock-llama3-8b-instruct-v1",
105 |         "messages": [
106 |             {
107 |             "role": "user",
108 |             "content": "what llm are you"
109 |             }
110 |         ]
111 |     }'
112 |     ```
113 | 
114 |     (Optional) If you configured a SageMaker Endpoint, you can also query this, for example:
115 | 
116 |     ```sh
117 |     curl --location "http://litellm-service.default.svc.cluster.local:4000/chat/completions" \
118 |         --header 'Content-Type: application/json' \
119 |         --header 'Authorization: Bearer sk-1234' \
120 |         --data '{
121 |         "model": "sagemaker-model",
122 |         "messages": [
123 |             {
124 |             "role": "user",
125 |             "content": "Write me a haiku about Mount Fuji"
126 |             }
127 |         ]
128 |     }'
129 |     ```


--------------------------------------------------------------------------------
/docs/20-deploy/60-expose-litellm.md:
--------------------------------------------------------------------------------
 1 | # (Optional) Expose LiteLLM
 2 | 
 3 | ## Pre-requisites
 4 | - A domain that can be used for hosting LiteLLM and exposing it externally through public endpoint.
 5 | - A digital certificate in AWS Certificate Manager (ACM) for enabling TLS on LiteLLM
 6 | 
 7 | ## Expose LiteLLM
 8 | 1. Configure environment variables; replace `<litellm-hostname>`, `<litellm-cert-arn>` with the corresponding hostnames and ACM certificates ARN.
 9 |     ```sh
10 |     export LITELLM_HOSTNAME="<litellm-hostname>"
11 |     export LITELLM_CERTIFICATE_ARN="<litellm-cert-arn>"
12 | 
13 |     echo "export LITELLM_HOSTNAME=${LITELLM_HOSTNAME}" | tee -a ~/.bash_profile
14 |     echo "export LITELLM_CERTIFICATE_ARN=${LITELLM_CERTIFICATE_ARN}" | tee -a ~/.bash_profile
15 |     ```
16 | 
17 | 1. Apply LiteLLM ingress
18 |     ```sh
19 |     envsubst < $BEDROCK_LITELLM_DIR/litellm/deploy/ingress.yaml | kubectl apply -f -
20 |     ```
21 | 
22 |     !!! note annotate "Note"
23 |         ELB needs a minute or so to complete the target registration; if the URL above did not work for you, wait for a few seconds for the registration to get completed.
24 | 
25 | 1. Extract LiteLLM URL:
26 |     ```sh
27 |     kubectl get ingress litellm-ingress  -o jsonpath='{.status.loadBalancer.ingress[*].hostname}'
28 |     ```
29 | 
30 | 1. Add a CNAME record for `<litellm-hostname>` (check prerequisities section) that points to the ALB host name, then access LiteLLM using `<litellm-hostname>`.
31 | 
32 | 
33 | 1. Verify LiteLLM through external endpoint
34 | 
35 |     ```sh
36 |     curl --location "https://${LITELLM_HOSTNAME}/chat/completions" \
37 |         --header 'Content-Type: application/json' \
38 |         --header 'Authorization: Bearer sk-1234' \
39 |         --data '{
40 |         "model": "bedrock-llama3-8b-instruct-v1",
41 |         "messages": [
42 |             {
43 |             "role": "user",
44 |             "content": "what llm are you"
45 |             }
46 |         ]
47 |     }'
48 |     ```
49 | 


--------------------------------------------------------------------------------
/docs/20-deploy/70-open-webui.md:
--------------------------------------------------------------------------------
 1 | # (Optional) Connect Open WebUI to LiteLLM
 2 | 
 3 | [Open WebUI]() is a web frontend that allows users to interact with LLMs. It supports locally running LLMs using Ollama, and OpenAI-compatible remote endpoints. In this implementation, we are configuring a remote endpoint that points to LiteLLM to show how LiteLLM allows for accessing Bedrock through an OpenAI-compatible interface. 
 4 | 
 5 | ## Pre-requisites
 6 | - A domain that can be used for hosting Open WebUI, a web frontend that allows users to interact with LLMs; it will be used to test LiteLLM setup.
 7 | - A digital certificate in AWS Certificate Manager (ACM) for enabling TLS on Open WebUI
 8 | 
 9 | 
10 | ## Open WebUI deployment
11 | 1. Configure environment variables; replace `<open-webui-hostname>`, `<open-webui-cert-arn>` with the corresponding hostnames and ACM certificates ARN.
12 | 
13 |     ```sh
14 |     export OPEN_WEBUI_HOSTNAME="<open-webui-hostname>"
15 |     export OPEN_WEBUI_CERTIFICATE_ARN="<open-webui-cert-arn>"
16 | 
17 |     echo "export OPEN_WEBUI_HOSTNAME=${OPEN_WEBUI_HOSTNAME}" | tee -a ~/.bash_profile
18 |     echo "export OPEN_WEBUI_CERTIFICATE_ARN=${OPEN_WEBUI_CERTIFICATE_ARN}" | tee -a ~/.bash_profile
19 |     ```
20 | 
21 | 1. Install Open WebUI:
22 |     ```sh
23 |     helm repo add open-webui https://helm.openwebui.com/
24 |     helm repo update
25 | 
26 |     helm upgrade \
27 |         --install open-webui open-webui/open-webui \
28 |         --namespace open-webui \
29 |         --create-namespace \
30 |         -f bedrock-litellm/helm/open-webui-private-values.yaml
31 |     ```
32 | 
33 |     The first user signing up will get admin access. So, initially, Open WebUI will be only accessible from within the cluster to securely create the first/admin user. Subsequent sign ups will be in pending state till they are approved by the admin user.
34 | 
35 | 1. Use `kubectl port-forward` to allow access to Open WebUI from the machine used for installation:
36 |     ```sh
37 |     kubectl port-forward service/open-webui -n open-webui  8080:80
38 |     ```
39 |     
40 |     If you are using Cloud9, you can access Open WebUI by clicking "Preview" (top bar), then "Preview Running Application".
41 | 
42 | 1. Sign-up (remember, first signed up user get admin access), then go to User icon at top right, settings, admin settings, connections, then edit OpenAI API to be as follows:
43 | 
44 |     ```sh
45 |     http://litellm-service.default.svc.cluster.local:4000/v1
46 |     ```
47 | 
48 |     Click on "Verify connection" button to make sure connectivity is in-place, then save. You should be able to see three of the Bedrock models available in Open WebUI as depicted in the screenshot below:
49 | 
50 |     ![architecture](../open-webui.png)
51 | 
52 |     Now, we have the admin user created, we can make Open WebUI accessible publicly.
53 | 
54 | 1. Update Open WebUI helm release to include `Ingress` object for exposing it:
55 |     ```sh
56 |     envsubst < $BEDROCK_LITELLM_DIR/helm/open-webui-public-values.yaml | helm upgrade \
57 |         open-webui open-webui/open-webui \
58 |         --namespace open-webui \
59 |         -f -
60 |     ```
61 |     
62 |     !!! note annotate "Note"
63 |         ELB needs a minute or so to complete the target registration; if the URL above did not work for you, wait for a few seconds for the registration to get completed.
64 | 
65 | 
66 | 1. Extract Open WebUI URL:
67 |     ```sh
68 |     kubectl -n open-webui get ingress open-webui  -o jsonpath='{.status.loadBalancer.ingress[*].hostname}'
69 |     ```
70 | 
71 | 1. Add a CNAME record for `<open-webui-hostname>` (check prerequisities section) that points to the ALB host name, then access Open WebUI using `<open-webui-hostname>`.
72 | 
73 | 
74 | 1. Edit `litellm/proxy_config.yaml`, update the IAM policy `litellm-bedrock-policy.json`, and enable access through the Bedrock console to add more Bedrock models on LiteLLM.


--------------------------------------------------------------------------------
/docs/20-deploy/99-clean-up.md:
--------------------------------------------------------------------------------
 1 | 1. Uninstall Open WebUI:
 2 | ```sh
 3 | helm uninstall open-webui --namespace open-webui
 4 | ```
 5 | 
 6 | 2. Uninstall LiteLLM
 7 | ```sh
 8 | kubectl delete -f $BEDROCK_LITELLM_DIR/litellm/ingress.yaml
 9 | kubectl delete -f $LITELLM_DIR/deploy/kubernetes/service.yaml
10 | kubectl delete -f $LITELLM_DIR/deploy/kubernetes/kub.yaml
11 | kubectl delete configmap litellm-config
12 | eksctl delete iamserviceaccount \
13 |     --name litellm-sa \
14 |     --cluster $CLUSTER_NAME
15 | aws iam delete-policy --policy-arn $LITELLM_BEDROCK_IAM_POLICY_ARN
16 | ```
17 | 
18 | 3. Uninstall AWS LBC
19 | ```sh
20 | helm uninstall aws-load-balancer-controller --namespace kube-system
21 | eksctl delete iamserviceaccount \
22 |     --name aws-load-balancer-controller \
23 |     --namespace=kube-system \
24 |     --cluster $CLUSTER_NAME
25 | aws iam delete-policy --policy-arn $AWS_LBC_IAM_POLICY_ARN
26 | ```
27 | 
28 | 4. Uninstall EBS driver
29 | ```sh
30 | helm uninstall aws-ebs-csi-driver \
31 |     --namespace kube-system
32 | eksctl delete iamserviceaccount \
33 |     --name ebs-csi-controller-sa \
34 |     --cluster $CLUSTER_NAME
35 | ```
36 | 
37 | 5. Delete cluster
38 | ```sh
39 | eksctl delete cluster --name $CLUSTER_NAME
40 | ```
41 | 
42 | 6. Delete the CNAME DNS records and the ACM certiciates used for LiteLLM and Open WebUI
43 | 


--------------------------------------------------------------------------------
/docs/25-config/10-model-alias.md:
--------------------------------------------------------------------------------
 1 | # Model aliases
 2 | 
 3 | Model aliases help you define a user-friendly name for a model or group of models, abstracting away the actual model name used by the model provider. LiteLLM provides the ability to use model aliases, which allow you to present a simplified or user-friendly model name to your end-users while invoking a different, more specific model name on the backend. This is useful when you want to abstract Bedrock model ids from clients or when dealing with multiple versions of models.
 4 | 
 5 | For example, you might display the model name `claude-3` to the end-user while internally calling `bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0` on the backend.
 6 | 
 7 | In the configuration file `proxy_config_model_alias.yaml`, the `model_name` parameter (e.g., `claude-3`) is the user-facing name, while the `litellm_params.model` parameter contains the actual backend model id (e.g., `bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0`) and any additional parameters needed for model configuration.
 8 | 
 9 | ## Sample configuration
10 | 
11 | ```yaml
12 | model_list:
13 |   - model_name: claude-3
14 |     litellm_params:
15 |       model: bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0
16 |       aws_region_name: $AWS_REGION
17 | ```
18 | 
19 | ## Steps to configure
20 | 
21 | 1. Use the configuration file `$BEDROCK_LITELLM_DIR/litellm/config/proxy_config_model_alias.yaml`
22 | 1. To apply the new configuration, follow the steps outlined in [Apply configuration changes](./40-apply-config-changes.md).
23 | 
24 | ## Steps to test
25 | 1. Test the model alias by invoking the user-facing model name as shown
26 | ```sh
27 | curl --location "https://${LITELLM_HOSTNAME}/chat/completions" \
28 |   --header 'Content-Type: application/json' \
29 |   --header 'Authorization: Bearer <your key>' \
30 |   --data '{
31 |     "model": "claude-3",
32 |     "messages": [
33 |       {
34 |         "role": "user",
35 |         "content": "what is Amazon S3"
36 |       }
37 |     ]
38 |   }'
39 | ```
40 | 


--------------------------------------------------------------------------------
/docs/25-config/20-rate-limit.md:
--------------------------------------------------------------------------------
 1 | # Rate limiting
 2 | 
 3 | LiteLLM provides rate-limiting capabilities to control the number of requests or tokens that can be processed by a model over a specific time. This feature allows you to manage traffic, control costs, and ensure fairness by restricting access based on requests per minute (RPM) or tokens per minute (TPM).
 4 | 
 5 | Rate limits can be applied:
 6 | 
 7 | 1. Per API key
 8 | 2. Per user
 9 | 3. Per team
10 | 4. Per specific models
11 | 
12 | You can specify both rpm_limit (requests per minute) and tpm_limit (tokens per minute) for models, users, or teams in your configuration.
13 | 
14 | ## Steps to configure
15 | 
16 | 1. Create user with RPM and TPM values. You can create a user and define the limits for RPM and TPM by sending a request to the user creation API. This will ensure that the user is rate-limited accordingly.
17 |   ```bash
18 |   curl --location 'http://${LITELLM_HOSTNAME}/user/new' \
19 |   --header 'Authorization: Bearer <your key>' \
20 |   --header 'Content-Type: application/json' \
21 |   --data '{
22 |       "user_id": "test_user_1", 
23 |       "max_parallel_requests": 10, 
24 |       "tpm_limit": 20, 
25 |       "rpm_limit": 2
26 |   }'
27 |   ```
28 |   You should get a `key` in the response header. This will serve as a master key while making chat request. For example, lets say value returned is `sk-1234567`.
29 | 
30 | ## Steps to test
31 | 
32 | 1. Using the master key obtained in the previous step, you can make a request to the chat API. Ensure the correct model and message are passed, and the authorization header includes the master key:
33 |   ```bash
34 |   curl --location "http://${LITELLM_HOSTNAME}/chat/completions" \
35 |       --header 'Content-Type: application/json' \
36 |       --header 'Authorization: Bearer sk-1234567' \
37 |       --data '{
38 |       "model": "claude-3",
39 |       "messages": [
40 |           {
41 |           "role": "user",
42 |           "content": "what llm are you"
43 |           }
44 |       ]
45 |   }'
46 |   ```
47 | 1. After making more than 2 requests, you should start receiving an error response indicating that the RPM limit has been reached. Here is an example of the error response you might see:
48 |   ```json
49 |   {
50 |       "error": {
51 |           "message": "Max parallel request limit reached. Hit limit for api_key: xxx. tpm_limit: 20, current_tpm 46, rpm_limit: 2, current rpm 1",
52 |           "type": "None",
53 |           "param": "None",
54 |           "code": "429"
55 |       }
56 |   }
57 |   ```
58 |   Similarly you can perform other tests on TPM as well.
59 |   
60 |   Please note the above test is for applying rate limit on user level. If you want to test different configurations on teams, organization etc, you can follow LiteLLM documentation [here](https://docs.litellm.ai/docs/proxy/users).
61 | 


--------------------------------------------------------------------------------
/docs/25-config/25-global-rate-limit.md:
--------------------------------------------------------------------------------
 1 | # Global rate limiting
 2 | 
 3 | LiteLLM allows you to apply requests per minute (RPM) and tokens per minute (TPM) limits globally across all users, teams, and models through the LiteLLM configuration file. These global limits ensure that traffic is controlled across all requests, regardless of individual user or team limits.
 4 | 
 5 | !!! note annotate "Note"
 6 |     You do not need to configure a database URL or use the LLM master key to apply this configuration, making it simpler for deployments where per-user tracking is not required.
 7 | 
 8 | 
 9 | ## Sample configuration
10 | 
11 | ```yaml
12 | model_list:
13 |   - model_name: claude-3
14 |     litellm_params:
15 |       model: bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0
16 |       aws_region_name: $AWS_REGION
17 |       rpm: 2 
18 |       tpm: 200
19 | 
20 | router_settings:
21 |   enable_pre_call_checks: true # 1. Enable pre-call checks
22 | ```
23 | 
24 | ## Steps to configure
25 | 
26 | 1. Use the configuration file `$BEDROCK_LITELLM_DIR/litellm/config/proxy_config_global_rate_limit.yaml`
27 | 1. To apply the new configuration, follow the steps outlined in [Apply configuration changes](./40-apply-config-changes.md).
28 | 
29 | ## Steps to test
30 | 1. To test the global rate limit, make three or more API requests within one minute. After the second request, you should start receiving an error indicating that the limit has been reached. The limit will reset after one minute.
31 |   ```bash
32 |   curl --location "http://${LITELLM_HOSTNAME}/chat/completions" \
33 |     --header 'Content-Type: application/json' \
34 |     --data '{
35 |       "model": "claude-3",
36 |       "messages": [
37 |         {
38 |           "role": "user",
39 |           "content": "what is amazon S3"
40 |         }
41 |       ]
42 |     }'
43 |   ```
44 |   If the rate limit is exceeded, you should receive an error response similar to the one below:
45 |   ```json
46 |   {
47 |     "error":
48 |       {
49 |         "message":"No deployments available for selected model, Try again in 60 seconds. Passed model=bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0. Try again in 60 seconds.",
50 |         "type":"None",
51 |         "param":"None",
52 |         "code":"429"
53 |       }
54 |   }
55 |   ```
56 | 


--------------------------------------------------------------------------------
/docs/25-config/30-route.md:
--------------------------------------------------------------------------------
  1 | # Routing
  2 | 
  3 | LiteLLM's routing feature allows for dynamic control over how requests are directed to LLMs deployed across multiple backends. The routing configuration is critical for optimizing load distribution, cost management, fallback strategies, and latency.
  4 | 
  5 | ## Steps to configure
  6 | 
  7 | To apply different routing configurations (such as load balancing, fallback, rate limit-aware routing, or latency-based routing), follow these steps:
  8 | 
  9 | 1. Use one of the configuration files at `$BEDROCK_LITELLM_DIR/litellm/config/route/`
 10 | 1. Follow the steps outlined in [Apply configuration changes](./40-apply-config-changes.md).
 11 | 
 12 | ## Steps to test
 13 | 1. Test by making a call to the chat API:
 14 | ```sh
 15 | curl --location "https://${LITELLM_HOSTNAME}/chat/completions" \
 16 |   --header 'Content-Type: application/json' \
 17 |   --header 'Authorization: Bearer <your key>' \
 18 |   --data '{
 19 |     "model": "claude-3",
 20 |     "messages": [
 21 |       {
 22 |         "role": "user",
 23 |         "content": "what is Amazon S3"
 24 |       }
 25 |     ]
 26 |   }'
 27 | ```
 28 | 
 29 | ## Sample configurations
 30 | 
 31 | The sample configurations below demonstrate LiteLLM key routing functionalities.
 32 | 
 33 | ### Load balancing
 34 | LiteLLM distributes requests across multiple model instances using various strategies such as round-robin(default), least busy etc.
 35 | 
 36 | ```yaml
 37 | model_list:
 38 |   - model_name: claude-3
 39 |     litellm_params:
 40 |       model: bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0
 41 |       aws_region_name: $AWS_REGION
 42 |   
 43 |   - model_name: claude-3
 44 |     litellm_params:
 45 |       model: bedrock/anthropic.claude-3-haiku-20240307-v1:0
 46 |       aws_region_name: $AWS_REGION
 47 | ```
 48 | 
 49 | ### Fallbacks
 50 | In case of a failure from a primary model, LiteLLM automatically redirects the request to a fallback model. This ensures uninterrupted service even if a model or provider is down.
 51 | 
 52 | ```yaml
 53 | model_list:
 54 |   - model_name: claude-3-sonnet
 55 |     litellm_params:
 56 |       model: bedrock/invalid
 57 |       aws_region_name: $AWS_REGION
 58 |   
 59 |   - model_name: claude-3-haiku
 60 |     litellm_params:
 61 |       model: bedrock/anthropic.claude-3-haiku-20240307-v1:0
 62 |       aws_region_name: $AWS_REGION
 63 | 
 64 | router_settings:
 65 |     enable_pre_call_checks: true # 1. Enable pre-call checks
 66 | 
 67 | litellm_settings:
 68 |   num_retries: 3 # retry call 3 times on each model_name (e.g. zephyr-beta)
 69 |   request_timeout: 10 # raise Timeout error if call takes longer than 10s. Sets litellm.request_timeout 
 70 |   fallbacks: [{"claude-3-sonnet": ["claude-3-haiku"]}] 
 71 |   allowed_fails: 3 # cooldown model if it fails > 1 call in a minute. 
 72 |   cooldown_time: 30 # how long to cooldown model if fails/min > allowed_fails
 73 | ```
 74 | 
 75 | For testing, make sure when you pass additional field called `mock_testing_fallbacks` as shown below:
 76 | ```bash
 77 | curl --location "http://${LITELLM_HOSTNAME}/chat/completions" \
 78 |     --header 'Content-Type: application/json' \
 79 |     --header 'Authorization: Bearer <your key>' \
 80 |     --data '{
 81 |     "model": "claude-3-sonnet",
 82 |     "messages": [
 83 |         {
 84 |         "role": "user",
 85 |         "content": "what llm are you"
 86 |         }
 87 |     ], "mock_testing_fallbacks": true
 88 | }'
 89 | ```
 90 | 
 91 | ### Rate limit-aware routing
 92 | LiteLLM can dynamically reroute requests if a model has exceeded its rate limit (requests per minute or tokens per minute). This prevents service disruption when models reach their capacity limits.
 93 | 
 94 | ```yaml
 95 | model_list:
 96 |   - model_name: claude-3-sonnet
 97 |     litellm_params: 
 98 |       model: bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0
 99 |       aws_region_name: $AWS_REGION
100 |     tpm: 2000
101 |     rpm: 10
102 |   - model_name: claude-3-haiku
103 |     litellm_params: 
104 |       model: bedrock/anthropic.claude-3-haiku-20240307-v1:0
105 |       aws_region_name: $AWS_REGION
106 |     tpm: 10000
107 |     rpm: 1
108 | 
109 | router_settings:
110 |   routing_strategy: usage-based-routing-v2
111 |   enable_pre_call_check: true
112 | ```
113 | 
114 | In this configuration, Claude Sonnet is limited to 10 requests per minute and 2000 tokens per minute. When exceeding these limits, LiteLLM filters out the deployment, and routes to the deployment with the lowest TPM usage for that minute.
115 | 
116 | ### Latency-based routing
117 | 
118 | LiteLLM can prioritize routing based on model response times (latency). It Picks the deployment with the lowest response time by caching, and updating the response times for deployments based on when a request was sent and received from a deployment.
119 | 
120 | ```yaml
121 | model_list:
122 |   - model_name: claude-3
123 |     litellm_params: 
124 |       model: bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0
125 |       aws_region_name: $AWS_REGION
126 |   
127 |   - model_name: claude-3
128 |     litellm_params: 
129 |       model: bedrock/anthropic.claude-3-haiku-20240307-v1:0
130 |       aws_region_name: $AWS_REGION
131 | 
132 | router_settings:
133 |   routing_strategy: latency-based-routing"
134 |   enable_pre_call_check: true
135 | ```
136 | 
137 | For more details on routing, please refer to the [LiteLLM](https://docs.litellm.ai/docs/routing) docs.


--------------------------------------------------------------------------------
/docs/25-config/40-apply-config-changes.md:
--------------------------------------------------------------------------------
 1 | # Apply configuration changes
 2 | 
 3 | In order to apply LiteLLM configuration changes in ConfigMap, follow the steps below to ensure the deployment is patched, thereby forcing a pod restart to reflect latest changes:
 4 | 
 5 | !!! note annotate "Note"
 6 |     If  the configuration file contains environment variables (e.g., `$AWS_REGION`), use the `envsubst` command to replace the variables with their actual values, before updating the ConfigMap. See an example below.
 7 | 
 8 | !!! tip annotate "Note"
 9 |     Replace the filename below to reflect your file with configuration update i.e. `$BEDROCK_LITELLM_DIR/litellm/config/proxy_config_model_alias.yaml` to be replaced with your relevant file that contains the updated configuration.
10 |     
11 | 1. Run the following command to replace the environment variables with their values.
12 | ```sh
13 | envsubst < $BEDROCK_LITELLM_DIR/litellm/config/proxy_config_model_alias.yaml > /tmp/proxy_config_model_alias.yaml
14 | ```
15 | 1. Run the following command to update the configMap with latest configuration from a file.
16 | ```sh
17 | kubectl create configmap litellm-config --from-file=litellm-config.yaml=/tmp/proxy_config_model_alias.yaml --dry-run=client -o yaml | kubectl apply -f -
18 | ```
19 | 1. Patch the deployment to reflect the changes. After updating the ConfigMap, the pods won’t automatically reload the configuration. You need to patch the deployment to force a pod restart, ensuring that the new configuration is picked up.
20 | ```sh
21 | kubectl patch deployment litellm-deployment -p "{\"spec\":{\"template\":{\"metadata\":{\"annotations\":{\"configmap-update-timestamp\":\"$(date +'%s')\"}}}}}"
22 | ```
23 | 1. Once the pods are restarted, verify the new configuration is applied by checking the logs.
24 | 


--------------------------------------------------------------------------------
/docs/30-app-changes/10-app-changes.md:
--------------------------------------------------------------------------------
 1 | # Code Changes for OpenAI to Amazon Bedrock Migration
 2 | 
 3 | With LiteLLM successfully deployed onto Amazon EKS and proxying requests to Amazon Bedrock, you can choose to migrate from OpenAI with minimal code changes.
 4 | 
 5 | Requests from your applications that do not originate from Open WebUI can be modified by updating your OpenAI base endpoint to point to your ALB DNS name. This is similar to the change we made in step 17, updating an OpenAI endpoint to point to your LiteLLM service, this time to the ALB host name, or your CNAME record for <litellm-hostname> (check prerequisities section) that points to the ALB host name.
 6 | 
 7 | 1. Update your application's OpenAI API endpoint to point to your <litellm-hostname>.
 8 | 
 9 |     ```python
10 |     import openai
11 | 
12 |     openai.api_base = {"your-litellm-hostname"}
13 |     openai.api_key = {"your-open-ai-api-key"}
14 | 
15 |     # Your existing OpenAI code remains unchanged
16 |     response = openai.Completion.create(
17 |     model="text-davinci-003",
18 |     prompt="Translate the following English text to French: 'Hello, how are you?'"
19 |     )
20 |     ```
21 | 
22 | 1. Test and validate that your existing code and application work as expected, calling foundation models hosted on Amazon Bedrock via LiteLLM hosted on Amazon EKS. Best practices and considerations:
23 | 
24 |     1. Gradually migrate: Start by routing a small percentage of traffic through the LiteLLM proxy and gradually increase as you gain confidence.
25 |     2. Monitor performance: Use Amazon CloudWatch to monitor the performance and AWS Cost Explorer to monitor the costs of your AWS usage, including Amazon Bedrock.
26 |     3. Security: Ensure least privilege AWS Identity and Access Management (AWS IAM) roles and security groups are in place for your EKS cluster and Amazon Bedrock access.
27 |     4. Scalability: Configure auto-scaling for your EKS nodes to handle varying loads.
28 | 


--------------------------------------------------------------------------------
/docs/99-contributors.md:
--------------------------------------------------------------------------------
1 | # Contributors (sorted alphabetically)
2 | - Amit Lulla
3 | - Islam Mahgoub
4 | - Naresh Nagpal
5 | - Sam Sanders


--------------------------------------------------------------------------------
/docs/bedrock-litellm.drawio.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/bedrock-litellm/f04fcdbb849b28ba0c50be700b267f2c2c7dcf7d/docs/bedrock-litellm.drawio.png


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
1 | Some organisations have already built applications that work with OpenAI compatible API and would like to switch to Amazon Bedrock -- this guide is to help you achieve that without changing app code by using LiteLLM.
2 | 


--------------------------------------------------------------------------------
/docs/open-webui.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/bedrock-litellm/f04fcdbb849b28ba0c50be700b267f2c2c7dcf7d/docs/open-webui.png


--------------------------------------------------------------------------------
/eksctl/cluster-config.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | apiVersion: eksctl.io/v1alpha5
 3 | kind: ClusterConfig
 4 | 
 5 | metadata:
 6 |   name: $CLUSTER_NAME
 7 |   region: $AWS_REGION
 8 |   version: "1.32"
 9 | 
10 | availabilityZones: ["${AWS_REGION}a", "${AWS_REGION}b", "${AWS_REGION}c"]
11 | 
12 | managedNodeGroups:
13 | - name: nodegroup
14 |   desiredCapacity: 3
15 |   instanceType: t3.medium
16 |   ssh:
17 |     enableSsm: true
18 | 


--------------------------------------------------------------------------------
/helm/open-webui-private-values.yaml:
--------------------------------------------------------------------------------
1 | persistence:
2 |     storageClass: gp2
3 | service:
4 |     type: ClusterIP
5 | pipelines:
6 |     enabled: false
7 | 


--------------------------------------------------------------------------------
/helm/open-webui-public-values.yaml:
--------------------------------------------------------------------------------
 1 | persistence:
 2 |     storageClass: gp2
 3 | service:
 4 |     type: ClusterIP
 5 | pipelines:
 6 |     enabled: false
 7 | ingress:
 8 |     enabled: true
 9 |     annotations: {
10 |         "kubernetes.io/ingress.class": "alb",
11 |         "alb.ingress.kubernetes.io/scheme": "internet-facing",
12 |         "alb.ingress.kubernetes.io/target-type": "ip",
13 |         "alb.ingress.kubernetes.io/certificate-arn": "${OPEN_WEBUI_CERTIFICATE_ARN}"
14 |     }
15 |     tls: true
16 |     host: "${OPEN_WEBUI_HOSTNAME}"


--------------------------------------------------------------------------------
/iam/litellm-bedrock-and-sagemaker-policy.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "Version": "2012-10-17",
 3 |     "Statement": [
 4 |         {
 5 |             "Action": [
 6 |                 "bedrock:InvokeModel",
 7 |                 "bedrock:InvokeModelWithResponseStream"
 8 |             ],
 9 |             "Effect": "Allow",
10 |             "Resource": [
11 |                 "arn:aws:bedrock:$AWS_REGION::foundation-model/anthropic.claude-3-5-sonnet-20240620-v1:0",
12 |                 "arn:aws:bedrock:$AWS_REGION::foundation-model/anthropic.claude-3-sonnet-20240229-v1:0",
13 |                 "arn:aws:bedrock:$AWS_REGION::foundation-model/anthropic.claude-3-haiku-20240307-v1:0",
14 |                 "arn:aws:bedrock:$AWS_REGION::foundation-model/meta.llama3-8b-instruct-v1:0",
15 |                 "arn:aws:bedrock:$AWS_REGION::foundation-model/mistral.mixtral-8x7b-instruct-v0:1",
16 |                 "arn:aws:bedrock:$AWS_REGION::foundation-model/anthropic.claude-3-5-sonnet-20240620-v1:0",
17 |                 "arn:aws:bedrock:$AWS_REGION::foundation-model/anthropic.claude-3-haiku-20240307-v1:0"
18 |             ]
19 |         },
20 |         {
21 |             "Action": [
22 |                 "sagemaker:InvokeEndpoint",
23 |                 "sagemaker:InvokeEndpointWithResponseStream"
24 |             ],
25 |             "Effect": "Allow",
26 |             "Resource": [
27 |                 "arn:aws:sagemaker:$AWS_REGION:$ACCOUNT_ID:endpoint/$ENDPOINT_NAME"
28 |             ]
29 |         }
30 |     ]
31 | }


--------------------------------------------------------------------------------
/iam/litellm-bedrock-policy.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "Version": "2012-10-17",
 3 |     "Statement": [
 4 |         {
 5 |             "Action": [
 6 |                 "bedrock:InvokeModel",
 7 |                 "bedrock:InvokeModelWithResponseStream"
 8 |             ],
 9 |             "Effect": "Allow",
10 |             "Resource": [
11 |                 "arn:aws:bedrock:$AWS_REGION::foundation-model/anthropic.claude-3-5-sonnet-20240620-v1:0",
12 |                 "arn:aws:bedrock:$AWS_REGION::foundation-model/anthropic.claude-3-sonnet-20240229-v1:0",
13 |                 "arn:aws:bedrock:$AWS_REGION::foundation-model/anthropic.claude-3-haiku-20240307-v1:0",
14 |                 "arn:aws:bedrock:$AWS_REGION::foundation-model/meta.llama3-8b-instruct-v1:0",
15 |                 "arn:aws:bedrock:$AWS_REGION::foundation-model/mistral.mixtral-8x7b-instruct-v0:1",
16 |                 "arn:aws:bedrock:$AWS_REGION::foundation-model/anthropic.claude-3-5-sonnet-20240620-v1:0",
17 |                 "arn:aws:bedrock:$AWS_REGION::foundation-model/anthropic.claude-3-haiku-20240307-v1:0"
18 |             ]
19 |         }
20 |     ]
21 | }


--------------------------------------------------------------------------------
/litellm/config/proxy_config.yaml:
--------------------------------------------------------------------------------
 1 | model_list:
 2 |   - model_name: bedrock-claude-3-sonnet
 3 |     litellm_params:
 4 |       model: "bedrock/anthropic.claude-3-sonnet-20240229-v1:0"
 5 |   - model_name: bedrock-llama3-8b-instruct-v1
 6 |     litellm_params:
 7 |       model: "bedrock/meta.llama3-8b-instruct-v1:0"
 8 |   - model_name: bedrock-mixtral-8x7b-instruct-v0
 9 |     litellm_params:
10 |       model: "bedrock/mistral.mixtral-8x7b-instruct-v0:1"


--------------------------------------------------------------------------------
/litellm/config/proxy_config_global_rate_limit.yaml:
--------------------------------------------------------------------------------
 1 | model_list:
 2 |   - model_name: claude-3
 3 |     litellm_params:
 4 |       model: bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0
 5 |       aws_region_name: $AWS_REGION
 6 |       rpm: 2 
 7 |       tpm: 200
 8 | 
 9 | router_settings:
10 |   enable_pre_call_checks: true # 1. Enable pre-call checks
11 |  
12 | 
13 | litellm_settings:
14 |   num_retries: 2 # retry call 2 times on each model_name (e.g. llama-3.1-8b)
15 |   request_timeout: 45
16 |   allowed_fails: 3 # cooldown model if it fails > 1 call in a minute. 
17 |   cooldown_time: 30 # how long to cooldown model if fails/min > allowed_fails
18 |   set_verbose: False # Switch off Debug Logging, ensure your logs do not have any debugging on
19 |   json_logs: true
20 |   max_end_user_budget: 0.0001
21 | 
22 | 
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/litellm/config/proxy_config_model_alias.yaml:
--------------------------------------------------------------------------------
 1 | model_list:
 2 |   - model_name: claude-3
 3 |     litellm_params:
 4 |       model: bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0
 5 |       aws_region_name: $AWS_REGION
 6 | 
 7 | litellm_settings:
 8 |   num_retries: 2 # retry call 2 times on each model_name (e.g. llama-3.1-8b)
 9 |   request_timeout: 45
10 |   allowed_fails: 3 # cooldown model if it fails > 1 call in a minute. 
11 |   cooldown_time: 30 # how long to cooldown model if fails/min > allowed_fails
12 |   set_verbose: False # Switch off Debug Logging, ensure your logs do not have any debugging on
13 |   json_logs: true


--------------------------------------------------------------------------------
/litellm/config/proxy_config_with_sagemaker_models.yaml:
--------------------------------------------------------------------------------
 1 | model_list:
 2 |   - model_name: bedrock-llama3-8b-instruct-v1
 3 |     litellm_params:
 4 |       model: bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0
 5 |       aws_region_name: $AWS_REGION
 6 |   - model_name: sagemaker-model
 7 |     litellm_params:
 8 |       model: sagemaker/$ENDPOINT_NAME
 9 |       aws_region_name: $AWS_REGION
10 | 
11 | litellm_settings:
12 |   num_retries: 2 # retry call 2 times on each model_name (e.g. llama-3.1-8b)
13 |   request_timeout: 45
14 |   allowed_fails: 3 # cooldown model if it fails > 1 call in a minute. 
15 |   cooldown_time: 30 # how long to cooldown model if fails/min > allowed_fails
16 |   set_verbose: False # Switch off Debug Logging, ensure your logs do not have any debugging on
17 |   json_logs: true


--------------------------------------------------------------------------------
/litellm/config/route/proxy_config_fallback.yaml:
--------------------------------------------------------------------------------
 1 | model_list:
 2 |   - model_name: claude-3-sonnet
 3 |     litellm_params:
 4 |       model: bedrock/invalid
 5 |       aws_region_name: 'us-east-1'
 6 |   - model_name: claude-3-haiku
 7 |     litellm_params:
 8 |       model: bedrock/anthropic.claude-3-haiku-20240307-v1:0
 9 |       aws_region_name: 'us-west-2'
10 | 
11 | router_settings:
12 |     enable_pre_call_checks: true # 1. Enable pre-call checks
13 | 
14 | litellm_settings:
15 |   num_retries: 3 # retry call 3 times on each model_name (e.g. zephyr-beta)
16 |   request_timeout: 10 # raise Timeout error if call takes longer than 10s. Sets litellm.request_timeout 
17 |   fallbacks: [{"claude-3-sonnet": ["claude-3-haiku"]}] 
18 |   allowed_fails: 3 # cooldown model if it fails > 1 call in a minute. 
19 |   cooldown_time: 30 # how long to cooldown model if fails/min > allowed_fails
20 | 


--------------------------------------------------------------------------------
/litellm/config/route/proxy_config_latency_routing.yaml:
--------------------------------------------------------------------------------
 1 | model_list:
 2 |   - model_name: claude-3
 3 |     litellm_params: 
 4 |       model: bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0
 5 |       aws_region_name: $AWS_REGION
 6 |   - model_name: claude-3
 7 |     litellm_params: 
 8 |       model: bedrock/anthropic.claude-3-haiku-20240307-v1:0
 9 |       aws_region_name: $AWS_REGION
10 | 
11 | router_settings:
12 |   routing_strategy: latency-based-routing"
13 |   enable_pre_call_check: true
14 | 


--------------------------------------------------------------------------------
/litellm/config/route/proxy_config_load_balancer_default.yaml:
--------------------------------------------------------------------------------
 1 | model_list:
 2 |   - model_name: claude-3
 3 |     litellm_params:
 4 |       model: bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0
 5 |       aws_region_name: $AWS_REGION
 6 |   - model_name: claude-3
 7 |     litellm_params:
 8 |       model: bedrock/anthropic.claude-3-haiku-20240307-v1:0
 9 |       aws_region_name: $AWS_REGION
10 | 
11 | litellm_settings:
12 |   num_retries: 2 # retry call 2 times on each model_name (e.g. llama-3.1-8b)
13 |   request_timeout: 45
14 |   allowed_fails: 3 # cooldown model if it fails > 1 call in a minute. 
15 |   cooldown_time: 30 # how long to cooldown model if fails/min > allowed_fails
16 |   set_verbose: False # Switch off Debug Logging, ensure your logs do not have any debugging on
17 |   json_logs: true
18 | 
19 | 
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/litellm/config/route/proxy_config_rate_limit_aware_routing.yaml:
--------------------------------------------------------------------------------
 1 | model_list:
 2 |   - model_name: claude-3-sonnet
 3 |     litellm_params: 
 4 |       model: bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0
 5 |       aws_region_name: $AWS_REGION
 6 |     tpm: 200
 7 |     rpm: 1
 8 |   - model_name: claude-3-haiku
 9 |     litellm_params: 
10 |       model: bedrock/anthropic.claude-3-haiku-20240307-v1:0
11 |       aws_region_name: $AWS_REGION
12 |     tpm: 200
13 |     rpm: 1
14 | 
15 | router_settings:
16 |   routing_strategy: usage-based-routing-v2
17 |   enable_pre_call_check: true


--------------------------------------------------------------------------------
/litellm/deploy/k8s/ingress.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | apiVersion: networking.k8s.io/v1
 3 | kind: Ingress
 4 | metadata:
 5 |   name: litellm-ingress
 6 |   annotations:
 7 |     alb.ingress.kubernetes.io/target-type: ip
 8 |     alb.ingress.kubernetes.io/scheme: internet-facing
 9 |     alb.ingress.kubernetes.io/certificate-arn: ${LITELLM_CERTIFICATE_ARN}
10 | spec:
11 |   ingressClassName: alb
12 |   tls:
13 |   - hosts:
14 |       - ${LITELLM_HOSTNAME}
15 |   rules:
16 |     - http:
17 |         paths:
18 |           - path: /
19 |             pathType: Prefix
20 |             backend:
21 |               service:
22 |                 name: litellm-service
23 |                 port:
24 |                   number: 4000
25 | 


--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
 1 | site_name: Migrate to Amazon Bedrock using LiteLLM
 2 | site_url: https://aws-samples.github.io/bedrock-litellm/
 3 | repo_url: https://github.com/aws-samples/bedrock-litellm
 4 | theme:
 5 |   name: material
 6 |   features:
 7 |     - navigation.path
 8 |     - navigation.indexes
 9 |     - content.code.copy
10 | 
11 | # Website Page Tree
12 | nav:
13 |   - Home: 'index.md'
14 |   # - Get Started: 'general/get-started.md'
15 |   - Architecture: './10-architecture/10-architecture.md'
16 |   - Deployment:
17 |     - Configure environment variables: './20-deploy/20-configure-env-variables.md'
18 |     - Clone the repo: './20-deploy/25-clone-repo.md'
19 |     - Install Kubernetes tools: './20-deploy/30-tools.md'
20 |     - Create and prepare an EKS cluster: './20-deploy/40-create-prepare-cluster.md'
21 |     - Install LiteLLM: './20-deploy/50-install-litellm.md'
22 |     - (Optional) Expose LiteLLM: './20-deploy/60-expose-litellm.md'
23 |     - (Optional) Open WebUI: './20-deploy/70-open-webui.md'
24 |     - Clean-up: './20-deploy/99-clean-up.md'
25 |   - Configuration:
26 |     - Model aliases: './25-config/10-model-alias.md'
27 |     - Rate limiting: './25-config/20-rate-limit.md'
28 |     - Global rate limiting: './25-config/25-global-rate-limit.md'
29 |     - Routing: './25-config/30-route.md'
30 |     - Apply configuration changes: './25-config/40-apply-config-changes.md'
31 |   - App changes: './30-app-changes/10-app-changes.md'
32 |   - Contributors: './99-contributors.md'
33 |     
34 | 
35 | markdown_extensions:
36 |   - admonition
37 |   - pymdownx.highlight:
38 |       anchor_linenums: true
39 |       line_spans: __span
40 |       pygments_lang_class: true
41 |   - pymdownx.inlinehilite
42 |   - pymdownx.snippets
43 |   - pymdownx.superfences


--------------------------------------------------------------------------------