├── .github
    └── workflows
    │   └── build-huggingface.yml
├── .gitignore
├── CODEOWNERS
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── NOTICE
├── README.md
├── docs
    └── huggingface
    │   └── tgi-0.9.3.md
├── examples
    └── huggingface
    │   ├── huggingface-large-model-inference-falcon-40b.ipynb
    │   ├── huggingface-large-model-inference-falcon-7b.ipynb
    │   └── huggingface-large-model-inference.ipynb
├── huggingface
    └── pytorch
    │   ├── optimum
    │       └── docker
    │       │   ├── 0.0.16
    │       │       └── Dockerfile
    │       │   ├── 0.0.17
    │       │       └── Dockerfile
    │       │   ├── 0.0.18
    │       │       └── Dockerfile
    │       │   ├── 0.0.19
    │       │       └── Dockerfile
    │       │   ├── 0.0.20
    │       │       └── Dockerfile
    │       │   ├── 0.0.21
    │       │       └── Dockerfile
    │       │   ├── 0.0.22
    │       │       └── Dockerfile
    │       │   ├── 0.0.23
    │       │       └── Dockerfile
    │       │   ├── 0.0.24
    │       │       └── Dockerfile
    │       │   ├── 0.0.25
    │       │       └── Dockerfile
    │       │   ├── 0.0.27
    │       │       └── Dockerfile
    │       │   └── 0.0.28
    │       │       └── Dockerfile
    │   ├── release_utils.py
    │   ├── tei
    │       └── docker
    │       │   ├── 1.2.3
    │       │       ├── cpu
    │       │       │   └── Dockerfile
    │       │       └── gpu
    │       │       │   ├── Dockerfile
    │       │       │   └── sagemaker-entrypoint-cuda-all.sh
    │       │   ├── 1.4.0
    │       │       ├── cpu
    │       │       │   └── Dockerfile
    │       │       └── gpu
    │       │       │   └── Dockerfile
    │       │   ├── 1.6.0
    │       │       ├── cpu
    │       │       │   └── Dockerfile
    │       │       └── gpu
    │       │       │   └── Dockerfile
    │       │   ├── 1.7.0
    │       │       ├── cpu
    │       │       │   └── Dockerfile
    │       │       └── gpu
    │       │       │   ├── Dockerfile
    │       │       │   └── start-cuda-compat.sh
    │       │   ├── buildspec.yml
    │       │   ├── tei-requirements.txt
    │       │   └── tei.py
    │   ├── tgi
    │       └── docker
    │       │   ├── 1.3.3
    │       │       ├── Dockerfile
    │       │       └── THIRD-PARTY-LICENSES
    │       │   ├── 1.4.0
    │       │       ├── Dockerfile
    │       │       └── THIRD-PARTY-LICENSES
    │       │   ├── 1.4.2
    │       │       ├── Dockerfile
    │       │       └── THIRD-PARTY-LICENSES
    │       │   ├── 1.4.5
    │       │       ├── Dockerfile
    │       │       └── THIRD-PARTY-LICENSES
    │       │   ├── 2.0.0
    │       │       ├── Dockerfile
    │       │       └── THIRD-PARTY-LICENSES
    │       │   ├── 2.0.1
    │       │       ├── Dockerfile
    │       │       └── THIRD-PARTY-LICENSES
    │       │   ├── 2.0.2
    │       │       ├── Dockerfile
    │       │       └── THIRD-PARTY-LICENSES
    │       │   ├── 2.0.3
    │       │       ├── Dockerfile
    │       │       └── THIRD-PARTY-LICENSES
    │       │   ├── 2.2.0
    │       │       ├── Dockerfile
    │       │       └── THIRD-PARTY-LICENSES
    │       │   ├── 2.3.1
    │       │       ├── Dockerfile
    │       │       ├── THIRD-PARTY-LICENSES
    │       │       └── entrypoint.sh
    │       │   ├── 2.4.0
    │       │       ├── Dockerfile
    │       │       ├── THIRD-PARTY-LICENSES
    │       │       └── entrypoint.sh
    │       │   ├── 3.0.1
    │       │       ├── Dockerfile
    │       │       ├── THIRD-PARTY-LICENSES
    │       │       └── entrypoint.sh
    │       │   ├── 3.1.0
    │       │       ├── Dockerfile
    │       │       └── THIRD-PARTY-LICENSES
    │       │   ├── 3.1.1
    │       │       ├── Dockerfile
    │       │       ├── THIRD-PARTY-LICENSES
    │       │       └── start-cuda-compat.sh
    │       │   ├── 3.2.0
    │       │       ├── Dockerfile
    │       │       ├── THIRD-PARTY-LICENSES
    │       │       └── start-cuda-compat.sh
    │       │   ├── 3.2.3
    │       │       ├── Dockerfile
    │       │       ├── THIRD-PARTY-LICENSES
    │       │       └── start-cuda-compat.sh
    │       │   ├── archived
    │       │       ├── 0.5.0
    │       │       │   └── py3
    │       │       │   │   └── cu118
    │       │       │   │       └── Dockerfile.gpu
    │       │       ├── 0.6.0
    │       │       │   └── py3
    │       │       │   │   └── cu118
    │       │       │   │       └── Dockerfile.gpu
    │       │       ├── 0.8.2
    │       │       │   └── py3
    │       │       │   │   └── cu118
    │       │       │   │       └── Dockerfile.gpu
    │       │       ├── 0.9.3
    │       │       │   └── py3
    │       │       │   │   └── cu118
    │       │       │   │       └── Dockerfile.gpu
    │       │       ├── 1.0.2
    │       │       │   └── py3
    │       │       │   │   └── cu118
    │       │       │   │       ├── Dockerfile.gpu
    │       │       │   │       └── THIRD-PARTY-LICENSES
    │       │       ├── 1.0.3
    │       │       │   ├── gpu
    │       │       │   │   ├── Dockerfile
    │       │       │   │   ├── PYTHON_PACKAGES_LICENSES
    │       │       │   │   └── THIRD-PARTY-LICENSES
    │       │       │   └── py3
    │       │       │   │   └── cu118
    │       │       │   │       ├── Dockerfile.gpu
    │       │       │   │       ├── PYTHON_PACKAGES_LICENSES
    │       │       │   │       └── THIRD-PARTY-LICENSES
    │       │       ├── 1.1.0
    │       │       │   ├── gpu
    │       │       │   │   ├── Dockerfile
    │       │       │   │   └── THIRD-PARTY-LICENSES
    │       │       │   └── py3
    │       │       │   │   └── cu118
    │       │       │   │       ├── Dockerfile.gpu
    │       │       │   │       └── THIRD-PARTY-LICENSES
    │       │       ├── 1.2.0
    │       │       │   └── gpu
    │       │       │   │   ├── Dockerfile
    │       │       │   │   └── THIRD-PARTY-LICENSES
    │       │       └── 1.3.1
    │       │       │   └── gpu
    │       │       │       ├── Dockerfile
    │       │       │       └── THIRD-PARTY-LICENSES
    │       │   ├── buildspec.yml
    │       │   ├── tgi-requirements.txt
    │       │   └── tgi.py
    │   └── tgillamacpp
    │       └── docker
    │           ├── buildspec.yml
    │           ├── tgi-llamacpp-requirements.txt
    │           └── tgi-llamacpp.py
├── releases.json
└── tests
    └── huggingface
        ├── README.md
        ├── enable_ssm_access_to_endpoint.py
        ├── requirements.txt
        └── sagemaker_dlc_test.py


/.github/workflows/build-huggingface.yml:
--------------------------------------------------------------------------------
  1 | name: Build and push HuggingFace TGI docker image
  2 | 
  3 | on:
  4 |   workflow_dispatch:
  5 |     inputs:
  6 |       tgi-version:
  7 |         description: 'tgi version'
  8 |         required: true
  9 |         default: '1.1.0'
 10 |       pytorch-version:
 11 |         description: 'pytorch version'
 12 |         required: true
 13 |         default: '2.0.1'
 14 |       cuda-version:
 15 |         description: 'cuda version'
 16 |         required: true
 17 |         default: '118'
 18 |       ubuntu-version:
 19 |         description: 'ubuntu version'
 20 |         required: true
 21 |         default: '20.04'
 22 | 
 23 | jobs:
 24 |   create-runner:
 25 |     runs-on: [ self-hosted, scheduler ]
 26 |     steps:
 27 |       - name: Create new G5 instance
 28 |         id: create_gpu
 29 |         run: |
 30 |           cd /home/ubuntu/djl_benchmark_script/scripts
 31 |           token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
 32 |           https://api.github.com/repos/awslabs/llm-hosting-container/actions/runners/registration-token \
 33 |           --fail \
 34 |           | jq '.token' | tr -d '"' )
 35 |           ./start_instance.sh action_g5 $token awslabs/llm-hosting-container
 36 |     outputs:
 37 |       gpu_instance_id: ${{ steps.create_gpu.outputs.action_g5_instance_id }}
 38 | 
 39 |   build-and-push-image:
 40 |     runs-on: [ self-hosted, g5 ]
 41 |     timeout-minutes: 150
 42 |     needs: create-runner
 43 |     env:
 44 |       TGI_VERSION: ${{github.event.inputs.tgi-version}}
 45 |       PYTORCH_VERSION: ${{github.event.inputs.pytorch-version}}
 46 |       CUDA_VERSION: ${{github.event.inputs.cuda-version}}
 47 |       UBUNTU_VERSION: ${{github.event.inputs.ubuntu-version}}
 48 |     steps:
 49 |       - uses: actions/checkout@v3
 50 |         with:
 51 |           repository: huggingface/text-generation-inference
 52 |           ref: v${{ env.TGI_VERSION }}
 53 |       - uses: actions/checkout@v3
 54 |         with:
 55 |           path: llm-hosting-container
 56 |       - name: Setup Docker buildx
 57 |         uses: docker/setup-buildx-action@v2
 58 |         with:
 59 |           install: true
 60 |       - name: Inject slug/short variables
 61 |         uses: rlespinasse/github-slug-action@v4.4.1
 62 |       - name: Configure AWS Credentials
 63 |         uses: aws-actions/configure-aws-credentials@v2
 64 |         with:
 65 |           aws-region: us-east-1
 66 |       - name: Login to Amazon ECR
 67 |         id: login-ecr
 68 |         uses: aws-actions/amazon-ecr-login@v1
 69 |         with:
 70 |           registries: "125045733377"
 71 |       - name: Clean docker env
 72 |         run: |
 73 |           yes | docker system prune -a --volumes
 74 |       - name: Build and push docker image
 75 |         uses: docker/build-push-action@v4
 76 |         env:
 77 |           REGISTRY: ${{ steps.login-ecr.outputs.registry }}
 78 |           REPOSITORY: djl-serving
 79 |         with:
 80 |           context: .
 81 |           file: llm-hosting-container/huggingface/pytorch/tgi/docker/${{ env.TGI_VERSION }}/py3/cu${{ env.CUDA_VERSION }}/Dockerfile.gpu
 82 |           push: true
 83 |           target: sagemaker
 84 |           platforms: 'linux/amd64'
 85 |           provenance: false
 86 |           tags: ${{ env.REGISTRY }}/${{ env.REPOSITORY }}:${{ env.PYTORCH_VERSION }}-tgi${{ env.TGI_VERSION }}-gpu-py39-cu${{ env.CUDA_VERSION }}-ubuntu${{ env.UBUNTU_VERSION }}
 87 |           cache-from: type=gha
 88 |           cache-to: type=gha,mode=max
 89 | 
 90 |   run-tests:
 91 |     runs-on: [ self-hosted, g5 ]
 92 |     timeout-minutes: 30
 93 |     needs: [build-and-push-image, create-runner]
 94 |     env:
 95 |       TGI_VERSION: ${{github.event.inputs.tgi-version}}
 96 |       REPOSITORY: djl-serving
 97 |       TAG: ${{github.event.inputs.pytorch-version}}-tgi${{github.event.inputs.tgi-version}}-gpu-py39-cu${{github.event.inputs.cuda-version}}-ubuntu${{github.event.inputs.ubuntu-version}}
 98 |     steps:
 99 |       - uses: actions/checkout@v3
100 |       - name: Clean env
101 |         run: |
102 |           yes | docker system prune -a --volumes
103 |           sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/
104 |           echo "wait dpkg lock..."
105 |           while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done
106 |       - name: Configure AWS Credentials
107 |         uses: aws-actions/configure-aws-credentials@v2
108 |         with:
109 |           aws-region: us-east-1
110 |       - name: Login to Amazon ECR
111 |         id: login-ecr
112 |         uses: aws-actions/amazon-ecr-login@v1
113 |         with:
114 |           registries: "125045733377"
115 |       - name: Pull docker
116 |         env:
117 |           REGISTRY: ${{ steps.login-ecr.outputs.registry }}
118 |         run: |
119 |           docker pull ${REGISTRY}/${REPOSITORY}:${TAG}
120 |       - name: Test bloom-560m
121 |         env:
122 |           REGISTRY: ${{ steps.login-ecr.outputs.registry }}
123 |         run: |
124 |           set -ex
125 |           HF_MODEL_ID=bigscience/bloom-560m && \
126 |           SM_NUM_GPUS=4 && \
127 |           TGI_VERSION=$TGI_VERSION && \
128 |           docker run --gpus all --shm-size 2g -itd --rm -p 8080:8080 \
129 |               -e SM_NUM_GPUS=$SM_NUM_GPUS -e HF_MODEL_ID=$HF_MODEL_ID \
130 |               ${REGISTRY}/${REPOSITORY}:${TAG}
131 |           sleep 30
132 |           ret=$(curl http://localhost:8080/invocations -X POST \
133 |               -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":128}}' \
134 |               -H 'Content-Type: application/json')
135 |           [[ $ret != "[{\"generated_text\":\"What is Deep Learning?"* ]] && exit 1
136 |           docker rm -f $(docker ps -aq)
137 |       - name: Test gpt-neox-20b
138 |         env:
139 |           REGISTRY: ${{ steps.login-ecr.outputs.registry }}
140 |         run: |
141 |           set -ex
142 |           HF_MODEL_ID=EleutherAI/gpt-neox-20b && \
143 |           SM_NUM_GPUS=4 && \
144 |           TGI_VERSION=$TGI_VERSION && \
145 |           docker run --gpus all --shm-size 2g -itd --rm -p 8080:8080 \
146 |               -e SM_NUM_GPUS=$SM_NUM_GPUS -e HF_MODEL_ID=$HF_MODEL_ID \
147 |               ${REGISTRY}/${REPOSITORY}:${TAG}
148 |           sleep 400
149 |           ret=$(curl http://localhost:8080/invocations -X POST \
150 |               -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":128}}' \
151 |               -H 'Content-Type: application/json')
152 |           [[ $ret != "[{\"generated_text\":\"What is Deep Learning?"* ]] && exit 1
153 |           docker rm -f $(docker ps -aq)
154 |       - name: Test flan-t5-xxl
155 |         env:
156 |           REGISTRY: ${{ steps.login-ecr.outputs.registry }}
157 |         run: |
158 |           set -ex
159 |           HF_MODEL_ID=google/flan-t5-xxl && \
160 |           SM_NUM_GPUS=4 && \
161 |           TGI_VERSION=$TGI_VERSION && \
162 |           docker run --gpus all --shm-size 2g -itd --rm -p 8080:8080 \
163 |               -e SM_NUM_GPUS=$SM_NUM_GPUS -e HF_MODEL_ID=$HF_MODEL_ID \
164 |               ${REGISTRY}/${REPOSITORY}:${TAG}
165 |           sleep 400
166 |           ret=$(curl http://localhost:8080/invocations -X POST \
167 |               -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":128}}' \
168 |               -H 'Content-Type: application/json')
169 |           [[ $ret != "[{\"generated_text\""* ]] && exit 1
170 |           docker rm -f $(docker ps -aq)
171 |       - name: On fail step
172 |         if: ${{ failure() }}
173 |         run: |
174 |           docker rm -f $(docker ps -aq) || true
175 | 
176 |   stop-runner:
177 |     if: always()
178 |     runs-on: [ self-hosted, scheduler ]
179 |     needs: [run-tests, build-and-push-image, create-runner]
180 |     steps:
181 |       - name: Stop all instances
182 |         run: |
183 |           cd /home/ubuntu/djl_benchmark_script/scripts
184 |           instance_id=${{ needs.create-runner.outputs.gpu_instance_id }}
185 |           ./stop_instance.sh $instance_id
186 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | .idea
3 | 
4 | 


--------------------------------------------------------------------------------
/CODEOWNERS:
--------------------------------------------------------------------------------
1 | * @awslabs/sagemaker-1p-algorithms


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.
5 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guidelines
 2 | 
 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
 4 | documentation, we greatly value feedback and contributions from our community.
 5 | 
 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
 7 | information to effectively respond to your bug report or contribution.
 8 | 
 9 | 
10 | ## Reporting Bugs/Feature Requests
11 | 
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 | 
14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 | 
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 | 
22 | 
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 | 
26 | 1. You are working against the latest source on the *main* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 | 
30 | To send us a pull request, please:
31 | 
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 | 
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 | 
42 | 
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start.
45 | 
46 | 
47 | ## Code of Conduct
48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
50 | opensource-codeofconduct@amazon.com with any additional questions or comments.
51 | 
52 | 
53 | ## Security issue notifications
54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
55 | 
56 | 
57 | ## Licensing
58 | 
59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.
60 | 


--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # LLM Hosting Container
 2 | 
 3 | Welcome to the LLM Hosting Container GitHub repository! 
 4 | 
 5 | This repository contains the Dockerfiles and associated resources for building and
 6 | hosting containers for large language models and embedding models.
 7 | 
 8 | * Hugging Face Text Generation Inference (TGI) container
 9 | * Hugging Face Text Embeddings Inference (TEI) container
10 | 
11 | ## Security
12 | 
13 | See [CONTRIBUTING](CONTRIBUTING.md) for more information.
14 | 
15 | ## License
16 | 
17 | This project is licensed under the Apache-2.0 License.
18 | 


--------------------------------------------------------------------------------
/huggingface/pytorch/optimum/docker/0.0.16/Dockerfile:
--------------------------------------------------------------------------------
  1 | # Fetch and extract the TGI sources
  2 | FROM alpine AS tgi
  3 | RUN mkdir -p /tgi
  4 | ADD https://github.com/huggingface/text-generation-inference/archive/refs/tags/v1.0.2.tar.gz /tgi/sources.tar.gz
  5 | RUN tar -C /tgi -xf /tgi/sources.tar.gz --strip-components=1
  6 | 
  7 | # Build cargo components (adapted from TGI original Dockerfile)
  8 | # Note that the build image is aligned on the same Linux version as the base image (Debian bookworm/ Ubuntu 22.04)
  9 | FROM lukemathwalker/cargo-chef:latest-rust-1.71-bookworm AS chef
 10 | WORKDIR /usr/src
 11 | 
 12 | ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
 13 | 
 14 | FROM chef as planner
 15 | COPY --from=tgi /tgi/Cargo.toml Cargo.toml
 16 | COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml
 17 | COPY --from=tgi /tgi/proto proto
 18 | COPY --from=tgi /tgi/benchmark benchmark
 19 | COPY --from=tgi /tgi/router router
 20 | COPY --from=tgi /tgi/launcher launcher
 21 | RUN cargo chef prepare --recipe-path recipe.json
 22 | 
 23 | FROM chef AS builder
 24 | 
 25 | RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
 26 |     curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
 27 |     unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
 28 |     unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \
 29 |     rm -f $PROTOC_ZIP
 30 | 
 31 | COPY --from=planner /usr/src/recipe.json recipe.json
 32 | RUN cargo chef cook --release --recipe-path recipe.json
 33 | 
 34 | COPY --from=tgi /tgi/Cargo.toml Cargo.toml
 35 | COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml
 36 | COPY --from=tgi /tgi/proto proto
 37 | COPY --from=tgi /tgi/benchmark benchmark
 38 | COPY --from=tgi /tgi/router router
 39 | COPY --from=tgi /tgi/launcher launcher
 40 | RUN cargo build --release --workspace --exclude benchmark
 41 | 
 42 | # Fetch optimum-neuron sources
 43 | FROM alpine/git AS optimum-neuron
 44 | RUN git clone --depth 1 --branch v0.0.16 https://github.com/huggingface/optimum-neuron.git /optimum-neuron
 45 | 
 46 | # Python base image
 47 | # Ubuntu 22.04 has Python 3.10 as default version https://packages.ubuntu.com/jammy/python3
 48 | FROM ubuntu:22.04 AS base
 49 | 
 50 | RUN apt-get update -y \
 51 |  && apt-get install -y --no-install-recommends \
 52 |     python3-pip \
 53 |     python3-setuptools \
 54 |     python-is-python3 \
 55 |     && rm -rf /var/lib/apt/lists/* \
 56 |     && apt-get clean
 57 | RUN pip3 --no-cache-dir install --upgrade pip
 58 | 
 59 | # Python server build image
 60 | FROM base AS pyserver
 61 | 
 62 | RUN apt-get update -y \
 63 |  && apt-get install -y --no-install-recommends \
 64 |     make \
 65 |     python3-venv \
 66 |     && rm -rf /var/lib/apt/lists/* \
 67 |     && apt-get clean
 68 | 
 69 | RUN install -d /pyserver
 70 | WORKDIR /pyserver
 71 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/server server
 72 | COPY --from=tgi /tgi/proto proto
 73 | RUN pip3 install -r server/build-requirements.txt
 74 | RUN VERBOSE=1 BUILDDIR=/pyserver/build PROTODIR=/pyserver/proto make -C server gen-server
 75 | 
 76 | # Neuron base image (used for deployment)
 77 | FROM base AS neuron
 78 | 
 79 | # Install system prerequisites
 80 | RUN apt-get update -y \
 81 |  && apt-get install -y --no-install-recommends \
 82 |     gnupg2 \
 83 |     wget \
 84 |     && rm -rf /var/lib/apt/lists/* \
 85 |     && apt-get clean
 86 | 
 87 | RUN echo "deb https://apt.repos.neuron.amazonaws.com jammy main" > /etc/apt/sources.list.d/neuron.list
 88 | RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add -
 89 | 
 90 | # Install neuronx packages
 91 | RUN apt-get update -y \
 92 |  && apt-get install -y --no-install-recommends \
 93 |     aws-neuronx-dkms=2.14.5.0 \
 94 |     aws-neuronx-collectives=2.18.18.0-f7a1f7a35 \
 95 |     aws-neuronx-runtime-lib=2.18.14.0-0678cafac \
 96 |     aws-neuronx-tools=2.15.4.0 \
 97 |     && rm -rf /var/lib/apt/lists/* \
 98 |     && apt-get clean
 99 | 
100 | ENV PATH="/opt/bin/:/opt/aws/neuron/bin:${PATH}"
101 | 
102 | RUN pip3 install \
103 |     # Neuron 2.15.0
104 |     neuronx-cc==2.11.0.34 \
105 |     torch-neuronx==1.13.1.1.12.1 \
106 |     transformers-neuronx==0.8.268 \
107 |     --extra-index-url=https://pip.repos.neuron.amazonaws.com
108 | 
109 | # Install HuggingFace packages
110 | RUN pip3 install \
111 |     hf_transfer
112 | 
113 | # Install optimum-neuron
114 | COPY --from=optimum-neuron /optimum-neuron optimum-neuron
115 | RUN pip3 install ./optimum-neuron
116 | 
117 | # TGI base env
118 | ENV HUGGINGFACE_HUB_CACHE=/data \
119 |     HF_HUB_ENABLE_HF_TRANSFER=1 \
120 |     PORT=80
121 | 
122 | # Install router
123 | COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router
124 | # Install launcher
125 | COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher
126 | # Install python server
127 | COPY --from=pyserver /pyserver/build/dist dist
128 | RUN pip install dist/text-generation-server*.tar.gz
129 | 
130 | # AWS Sagemaker compatible image
131 | FROM neuron as sagemaker
132 | 
133 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/sagemaker-entrypoint.sh entrypoint.sh
134 | RUN chmod +x entrypoint.sh
135 | 
136 | ENTRYPOINT ["./entrypoint.sh"]
137 | 
138 | 
139 | RUN apt-get update && apt-get install -y --no-install-recommends curl unzip \
140 |     && rm -rf /var/lib/apt/lists/*
141 | RUN HOME_DIR=/root && \
142 |     pip install requests && \
143 |     curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip && \
144 |     unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ && \
145 |     cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance && \
146 |     chmod +x /usr/local/bin/testOSSCompliance && \
147 |     chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh && \
148 |     ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python && \
149 |     rm -rf ${HOME_DIR}/oss_compliance*
150 | 
151 | RUN echo "N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image \
152 | has an indirect documentation dependency on third party <docutils/tools/editors/emacs/rst.el> project. The \
153 | <docutils/tools/editors/emacs/rst.el> project's licensing includes the <GPL v3> license. \
154 | \n\n\
155 | N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image uses the \
156 | third party <Text Generation Inference (TGI)> project. The <Text Generation Inference (TGI)> project's licensing \
157 | includes the <HFOIL --> https://github.com/huggingface/text-generation-inference/blob/main/LICENSE> \
158 | license." > /root/THIRD_PARTY_LICENSES
159 | 
160 | LABEL dlc_major_version="1"
161 | LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.huggingface.tgi="true"
162 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port="true"
163 | 


--------------------------------------------------------------------------------
/huggingface/pytorch/optimum/docker/0.0.17/Dockerfile:
--------------------------------------------------------------------------------
  1 | # Fetch and extract the TGI sources
  2 | FROM alpine AS tgi
  3 | RUN mkdir -p /tgi
  4 | ADD https://github.com/huggingface/text-generation-inference/archive/refs/tags/v1.0.2.tar.gz /tgi/sources.tar.gz
  5 | RUN tar -C /tgi -xf /tgi/sources.tar.gz --strip-components=1
  6 | 
  7 | # Build cargo components (adapted from TGI original Dockerfile)
  8 | # Note that the build image is aligned on the same Linux version as the base image (Debian bookworm/ Ubuntu 22.04)
  9 | FROM lukemathwalker/cargo-chef:latest-rust-1.71-bookworm AS chef
 10 | WORKDIR /usr/src
 11 | 
 12 | ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
 13 | 
 14 | FROM chef as planner
 15 | COPY --from=tgi /tgi/Cargo.toml Cargo.toml
 16 | COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml
 17 | COPY --from=tgi /tgi/proto proto
 18 | COPY --from=tgi /tgi/benchmark benchmark
 19 | COPY --from=tgi /tgi/router router
 20 | COPY --from=tgi /tgi/launcher launcher
 21 | RUN cargo chef prepare --recipe-path recipe.json
 22 | 
 23 | FROM chef AS builder
 24 | 
 25 | RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
 26 |     curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
 27 |     unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
 28 |     unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \
 29 |     rm -f $PROTOC_ZIP
 30 | 
 31 | COPY --from=planner /usr/src/recipe.json recipe.json
 32 | RUN cargo chef cook --release --recipe-path recipe.json
 33 | 
 34 | COPY --from=tgi /tgi/Cargo.toml Cargo.toml
 35 | COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml
 36 | COPY --from=tgi /tgi/proto proto
 37 | COPY --from=tgi /tgi/benchmark benchmark
 38 | COPY --from=tgi /tgi/router router
 39 | COPY --from=tgi /tgi/launcher launcher
 40 | RUN cargo build --release --workspace --exclude benchmark
 41 | 
 42 | # Fetch optimum-neuron sources
 43 | FROM alpine/git AS optimum-neuron
 44 | RUN git clone --depth 1 --branch v0.0.17 https://github.com/huggingface/optimum-neuron.git /optimum-neuron
 45 | 
 46 | # Python base image
 47 | FROM ubuntu:22.04 AS base
 48 | 
 49 | RUN apt-get update -y \
 50 |  && apt-get install -y --no-install-recommends \
 51 |     python3-pip \
 52 |     python3-setuptools \
 53 |     python-is-python3 \
 54 |     && rm -rf /var/lib/apt/lists/* \
 55 |     && apt-get clean
 56 | RUN pip3 --no-cache-dir install --upgrade pip
 57 | 
 58 | # Python server build image
 59 | FROM base AS pyserver
 60 | 
 61 | RUN apt-get update -y \
 62 |  && apt-get install -y --no-install-recommends \
 63 |     make \
 64 |     python3-venv \
 65 |     && rm -rf /var/lib/apt/lists/* \
 66 |     && apt-get clean
 67 | 
 68 | RUN install -d /pyserver
 69 | WORKDIR /pyserver
 70 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/server server
 71 | COPY --from=tgi /tgi/proto proto
 72 | RUN pip3 install -r server/build-requirements.txt
 73 | RUN VERBOSE=1 BUILDDIR=/pyserver/build PROTODIR=/pyserver/proto make -C server gen-server
 74 | 
 75 | # Neuron base image (used for deployment)
 76 | FROM base AS neuron
 77 | 
 78 | # Install system prerequisites
 79 | RUN apt-get update -y \
 80 |  && apt-get install -y --no-install-recommends \
 81 |     gnupg2 \
 82 |     wget \
 83 |     && rm -rf /var/lib/apt/lists/* \
 84 |     && apt-get clean
 85 | 
 86 | RUN echo "deb https://apt.repos.neuron.amazonaws.com jammy main" > /etc/apt/sources.list.d/neuron.list
 87 | RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add -
 88 | 
 89 | # Install neuronx packages
 90 | RUN apt-get update -y \
 91 |  && apt-get install -y --no-install-recommends \
 92 |     aws-neuronx-dkms=2.15.9.0 \
 93 |     aws-neuronx-collectives=2.19.7.0-530fb3064 \
 94 |     aws-neuronx-runtime-lib=2.19.5.0-97e2d271b \
 95 |     aws-neuronx-tools=2.16.1.0 \
 96 |     && rm -rf /var/lib/apt/lists/* \
 97 |     && apt-get clean
 98 | 
 99 | ENV PATH="/opt/bin/:/opt/aws/neuron/bin:${PATH}"
100 | 
101 | RUN pip3 install \
102 |     neuronx-cc==2.12.54.0 \
103 |     torch-neuronx==1.13.1.1.13.0 \
104 |     transformers-neuronx==0.9.474 \
105 |     --extra-index-url=https://pip.repos.neuron.amazonaws.com
106 | 
107 | # Install HuggingFace packages
108 | RUN pip3 install \
109 |     hf_transfer
110 | 
111 | # Install optimum-neuron
112 | COPY --from=optimum-neuron /optimum-neuron optimum-neuron
113 | RUN pip3 install ./optimum-neuron
114 | 
115 | # TGI base env
116 | ENV HUGGINGFACE_HUB_CACHE=/data \
117 |     HF_HUB_ENABLE_HF_TRANSFER=1 \
118 |     PORT=80
119 | 
120 | # Install router
121 | COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router
122 | # Install launcher
123 | COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher
124 | # Install python server
125 | COPY --from=pyserver /pyserver/build/dist dist
126 | RUN pip install dist/text-generation-server*.tar.gz
127 | 
128 | # AWS Sagemaker compatible image
129 | FROM neuron as sagemaker
130 | 
131 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/sagemaker-entrypoint.sh entrypoint.sh
132 | RUN chmod +x entrypoint.sh
133 | 
134 | ENTRYPOINT ["./entrypoint.sh"]
135 | 
136 | 
137 | RUN apt-get update && apt-get install -y --no-install-recommends curl unzip \
138 |     && rm -rf /var/lib/apt/lists/*
139 | RUN HOME_DIR=/root && \
140 |     pip install requests && \
141 |     curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip && \
142 |     unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ && \
143 |     cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance && \
144 |     chmod +x /usr/local/bin/testOSSCompliance && \
145 |     chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh && \
146 |     ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python && \
147 |     rm -rf ${HOME_DIR}/oss_compliance*
148 | 
149 | RUN echo "N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image \
150 | has an indirect documentation dependency on third party <docutils/tools/editors/emacs/rst.el> project. The \
151 | <docutils/tools/editors/emacs/rst.el> project's licensing includes the <GPL v3> license. \
152 | \n\n\
153 | N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image uses the \
154 | third party <Text Generation Inference (TGI)> project. The <Text Generation Inference (TGI)> project's licensing \
155 | includes the <HFOIL --> https://github.com/huggingface/text-generation-inference/blob/main/LICENSE> \
156 | license." > /root/THIRD_PARTY_LICENSES
157 | 
158 | LABEL dlc_major_version="1"
159 | LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.huggingface.tgi="true"
160 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port="true"
161 | 


--------------------------------------------------------------------------------
/huggingface/pytorch/optimum/docker/0.0.18/Dockerfile:
--------------------------------------------------------------------------------
  1 | # Fetch and extract the TGI sources
  2 | FROM alpine AS tgi
  3 | RUN mkdir -p /tgi
  4 | ADD https://github.com/huggingface/text-generation-inference/archive/refs/tags/v1.0.2.tar.gz /tgi/sources.tar.gz
  5 | RUN tar -C /tgi -xf /tgi/sources.tar.gz --strip-components=1
  6 | 
  7 | # Build cargo components (adapted from TGI original Dockerfile)
  8 | # Note that the build image is aligned on the same Linux version as the base image (Debian bookworm/ Ubuntu 22.04)
  9 | FROM lukemathwalker/cargo-chef:latest-rust-1.71-bookworm AS chef
 10 | WORKDIR /usr/src
 11 | 
 12 | ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
 13 | 
 14 | FROM chef as planner
 15 | COPY --from=tgi /tgi/Cargo.toml Cargo.toml
 16 | COPY --from=tgi /tgi/Cargo.lock Cargo.lock
 17 | COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml
 18 | COPY --from=tgi /tgi/proto proto
 19 | COPY --from=tgi /tgi/benchmark benchmark
 20 | COPY --from=tgi /tgi/router router
 21 | COPY --from=tgi /tgi/launcher launcher
 22 | RUN cargo chef prepare --recipe-path recipe.json
 23 | 
 24 | FROM chef AS builder
 25 | 
 26 | RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
 27 |     curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
 28 |     unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
 29 |     unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \
 30 |     rm -f $PROTOC_ZIP
 31 | 
 32 | COPY --from=planner /usr/src/recipe.json recipe.json
 33 | RUN cargo chef cook --release --recipe-path recipe.json
 34 | 
 35 | COPY --from=tgi /tgi/Cargo.toml Cargo.toml
 36 | COPY --from=tgi /tgi/Cargo.lock Cargo.lock
 37 | COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml
 38 | COPY --from=tgi /tgi/proto proto
 39 | COPY --from=tgi /tgi/benchmark benchmark
 40 | COPY --from=tgi /tgi/router router
 41 | COPY --from=tgi /tgi/launcher launcher
 42 | RUN cargo build --release --workspace --exclude benchmark
 43 | 
 44 | # Fetch optimum-neuron sources
 45 | FROM alpine/git AS optimum-neuron
 46 | RUN git clone --depth 1 --branch v0.0.18 https://github.com/huggingface/optimum-neuron.git /optimum-neuron
 47 | 
 48 | # Python base image
 49 | FROM ubuntu:22.04 AS base
 50 | 
 51 | RUN apt-get update -y \
 52 |  && apt-get install -y --no-install-recommends \
 53 |     python3-pip \
 54 |     python3-setuptools \
 55 |     python-is-python3 \
 56 |     && rm -rf /var/lib/apt/lists/* \
 57 |     && apt-get clean
 58 | RUN pip3 --no-cache-dir install --upgrade pip
 59 | 
 60 | # Python server build image
 61 | FROM base AS pyserver
 62 | 
 63 | RUN apt-get update -y \
 64 |  && apt-get install -y --no-install-recommends \
 65 |     make \
 66 |     python3-venv \
 67 |     && rm -rf /var/lib/apt/lists/* \
 68 |     && apt-get clean
 69 | 
 70 | RUN install -d /pyserver
 71 | WORKDIR /pyserver
 72 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/server server
 73 | COPY --from=tgi /tgi/proto proto
 74 | RUN pip3 install -r server/build-requirements.txt
 75 | RUN VERBOSE=1 BUILDDIR=/pyserver/build PROTODIR=/pyserver/proto make -C server gen-server
 76 | 
 77 | # Neuron base image (used for deployment)
 78 | FROM base AS neuron
 79 | 
 80 | # Install system prerequisites
 81 | RUN apt-get update -y \
 82 |  && apt-get install -y --no-install-recommends \
 83 |     gnupg2 \
 84 |     wget \
 85 |     && rm -rf /var/lib/apt/lists/* \
 86 |     && apt-get clean
 87 | 
 88 | RUN echo "deb https://apt.repos.neuron.amazonaws.com jammy main" > /etc/apt/sources.list.d/neuron.list
 89 | RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add -
 90 | 
 91 | # Install neuronx packages
 92 | RUN apt-get update -y \
 93 |  && apt-get install -y --no-install-recommends \
 94 |     aws-neuronx-dkms=2.15.9.0 \
 95 |     aws-neuronx-collectives=2.19.7.0-530fb3064 \
 96 |     aws-neuronx-runtime-lib=2.19.5.0-97e2d271b \
 97 |     aws-neuronx-tools=2.16.1.0 \
 98 |     && rm -rf /var/lib/apt/lists/* \
 99 |     && apt-get clean
100 | 
101 | ENV PATH="/opt/bin/:/opt/aws/neuron/bin:${PATH}"
102 | 
103 | RUN pip3 install \
104 |     neuronx-cc==2.12.68.0 \
105 |     torch-neuronx==1.13.1.1.13.0 \
106 |     transformers-neuronx==0.9.474 \
107 |     --extra-index-url=https://pip.repos.neuron.amazonaws.com
108 | 
109 | # Install HuggingFace packages
110 | RUN pip3 install \
111 |     hf_transfer
112 | 
113 | # Install optimum-neuron
114 | COPY --from=optimum-neuron /optimum-neuron optimum-neuron
115 | RUN pip3 install ./optimum-neuron
116 | 
117 | # TGI base env
118 | ENV HUGGINGFACE_HUB_CACHE=/data \
119 |     HF_HUB_ENABLE_HF_TRANSFER=1 \
120 |     PORT=80
121 | 
122 | # Install router
123 | COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router
124 | # Install launcher
125 | COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher
126 | # Install python server
127 | COPY --from=pyserver /pyserver/build/dist dist
128 | RUN pip install dist/text-generation-server*.tar.gz
129 | 
130 | # AWS Sagemaker compatible image
131 | FROM neuron as sagemaker
132 | 
133 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/sagemaker-entrypoint.sh entrypoint.sh
134 | RUN chmod +x entrypoint.sh
135 | 
136 | ENTRYPOINT ["./entrypoint.sh"]
137 | 
138 | 
139 | RUN apt-get update && apt-get install -y --no-install-recommends curl unzip \
140 |     && rm -rf /var/lib/apt/lists/*
141 | RUN HOME_DIR=/root && \
142 |     pip install requests && \
143 |     curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip && \
144 |     unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ && \
145 |     cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance && \
146 |     chmod +x /usr/local/bin/testOSSCompliance && \
147 |     chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh && \
148 |     ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python && \
149 |     rm -rf ${HOME_DIR}/oss_compliance*
150 | 
151 | RUN echo "N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image \
152 | has an indirect documentation dependency on third party <docutils/tools/editors/emacs/rst.el> project. The \
153 | <docutils/tools/editors/emacs/rst.el> project's licensing includes the <GPL v3> license. \
154 | \n\n\
155 | N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image uses the \
156 | third party <Text Generation Inference (TGI)> project. The <Text Generation Inference (TGI)> project's licensing \
157 | includes the <HFOIL --> https://github.com/huggingface/text-generation-inference/blob/main/LICENSE> \
158 | license." > /root/THIRD_PARTY_LICENSES
159 | 
160 | LABEL dlc_major_version="1"
161 | LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.huggingface.tgi="true"
162 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port="true"
163 | 


--------------------------------------------------------------------------------
/huggingface/pytorch/optimum/docker/0.0.19/Dockerfile:
--------------------------------------------------------------------------------
  1 | # Fetch and extract the TGI sources
  2 | FROM alpine AS tgi
  3 | RUN mkdir -p /tgi
  4 | ADD https://github.com/huggingface/text-generation-inference/archive/refs/tags/v1.4.1.tar.gz /tgi/sources.tar.gz
  5 | RUN tar -C /tgi -xf /tgi/sources.tar.gz --strip-components=1
  6 | 
  7 | # Build cargo components (adapted from TGI original Dockerfile)
  8 | # Note that the build image is aligned on the same Linux version as the base image (Debian bookworm/ Ubuntu 22.04)
  9 | FROM lukemathwalker/cargo-chef:latest-rust-1.75-bookworm AS chef
 10 | WORKDIR /usr/src
 11 | 
 12 | ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
 13 | 
 14 | FROM chef as planner
 15 | COPY --from=tgi /tgi/Cargo.toml Cargo.toml
 16 | COPY --from=tgi /tgi/Cargo.lock Cargo.lock
 17 | COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml
 18 | COPY --from=tgi /tgi/proto proto
 19 | COPY --from=tgi /tgi/benchmark benchmark
 20 | COPY --from=tgi /tgi/router router
 21 | COPY --from=tgi /tgi/launcher launcher
 22 | RUN cargo chef prepare --recipe-path recipe.json
 23 | 
 24 | FROM chef AS builder
 25 | 
 26 | RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
 27 |     curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
 28 |     unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
 29 |     unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \
 30 |     rm -f $PROTOC_ZIP
 31 | 
 32 | COPY --from=planner /usr/src/recipe.json recipe.json
 33 | RUN cargo chef cook --release --recipe-path recipe.json
 34 | 
 35 | COPY --from=tgi /tgi/Cargo.toml Cargo.toml
 36 | COPY --from=tgi /tgi/Cargo.lock Cargo.lock
 37 | COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml
 38 | COPY --from=tgi /tgi/proto proto
 39 | COPY --from=tgi /tgi/benchmark benchmark
 40 | COPY --from=tgi /tgi/router router
 41 | COPY --from=tgi /tgi/launcher launcher
 42 | RUN cargo build --release --workspace --exclude benchmark
 43 | 
 44 | # Fetch optimum-neuron sources
 45 | FROM alpine/git AS optimum-neuron
 46 | RUN git clone --depth 1 --branch v0.0.19 https://github.com/huggingface/optimum-neuron.git /optimum-neuron
 47 | 
 48 | # Python base image
 49 | FROM ubuntu:22.04 AS base
 50 | 
 51 | RUN apt-get update -y \
 52 |  && apt-get install -y --no-install-recommends \
 53 |     python3-pip \
 54 |     python3-setuptools \
 55 |     python-is-python3 \
 56 |     && rm -rf /var/lib/apt/lists/* \
 57 |     && apt-get clean
 58 | RUN pip3 --no-cache-dir install --upgrade pip
 59 | 
 60 | # Python server build image
 61 | FROM base AS pyserver
 62 | 
 63 | RUN apt-get update -y \
 64 |  && apt-get install -y --no-install-recommends \
 65 |     make \
 66 |     python3-venv \
 67 |     && rm -rf /var/lib/apt/lists/* \
 68 |     && apt-get clean
 69 | 
 70 | RUN install -d /pyserver
 71 | WORKDIR /pyserver
 72 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/server server
 73 | COPY --from=tgi /tgi/proto proto
 74 | RUN pip3 install -r server/build-requirements.txt
 75 | RUN VERBOSE=1 BUILDDIR=/pyserver/build PROTODIR=/pyserver/proto make -C server gen-server
 76 | 
 77 | # Neuron base image (used for deployment)
 78 | FROM base AS neuron
 79 | 
 80 | # Install system prerequisites
 81 | RUN apt-get update -y \
 82 |  && apt-get install -y --no-install-recommends \
 83 |     gnupg2 \
 84 |     wget \
 85 |     && rm -rf /var/lib/apt/lists/* \
 86 |     && apt-get clean
 87 | 
 88 | RUN echo "deb https://apt.repos.neuron.amazonaws.com jammy main" > /etc/apt/sources.list.d/neuron.list
 89 | RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add -
 90 | 
 91 | # Install neuronx packages
 92 | RUN apt-get update -y \
 93 |  && apt-get install -y --no-install-recommends \
 94 |     aws-neuronx-dkms=2.15.9.0 \
 95 |     aws-neuronx-collectives=2.20.11.0-c101c322e \
 96 |     aws-neuronx-runtime-lib=2.20.11.0-b7d33e68b \
 97 |     aws-neuronx-tools=2.17.0.0 \
 98 |     && rm -rf /var/lib/apt/lists/* \
 99 |     && apt-get clean
100 | 
101 | ENV PATH="/opt/bin/:/opt/aws/neuron/bin:${PATH}"
102 | 
103 | RUN pip3 install \
104 |     neuronx-cc==2.12.68.0 \
105 |     torch-neuronx==1.13.1.1.13.1 \
106 |     transformers-neuronx==0.9.474 \
107 |     --extra-index-url=https://pip.repos.neuron.amazonaws.com
108 | 
109 | # Install HuggingFace packages
110 | RUN pip3 install \
111 |     hf_transfer
112 | 
113 | # Install optimum-neuron
114 | COPY --from=optimum-neuron /optimum-neuron optimum-neuron
115 | RUN pip3 install ./optimum-neuron
116 | 
117 | # TGI base env
118 | ENV HUGGINGFACE_HUB_CACHE=/tmp \
119 |     HF_HUB_ENABLE_HF_TRANSFER=1 \
120 |     PORT=80
121 | 
122 | # Install router
123 | COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router
124 | # Install launcher
125 | COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher
126 | # Install python server
127 | COPY --from=pyserver /pyserver/build/dist dist
128 | RUN pip install dist/text-generation-server*.tar.gz
129 | 
130 | # AWS Sagemaker compatible image
131 | FROM neuron as sagemaker
132 | 
133 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/sagemaker-entrypoint.sh entrypoint.sh
134 | RUN chmod +x entrypoint.sh
135 | 
136 | ENTRYPOINT ["./entrypoint.sh"]
137 | 
138 | 
139 | RUN apt-get update && apt-get install -y --no-install-recommends curl unzip \
140 |     && rm -rf /var/lib/apt/lists/*
141 | RUN HOME_DIR=/root && \
142 |     pip install requests && \
143 |     curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip && \
144 |     unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ && \
145 |     cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance && \
146 |     chmod +x /usr/local/bin/testOSSCompliance && \
147 |     chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh && \
148 |     ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python && \
149 |     rm -rf ${HOME_DIR}/oss_compliance*
150 | 
151 | RUN echo "N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image \
152 | has an indirect documentation dependency on third party <docutils/tools/editors/emacs/rst.el> project. The \
153 | <docutils/tools/editors/emacs/rst.el> project's licensing includes the <GPL v3> license. \
154 | \n\n\
155 | N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image uses the \
156 | third party <Text Generation Inference (TGI)> project. The <Text Generation Inference (TGI)> project's licensing \
157 | includes the <HFOIL --> https://github.com/huggingface/text-generation-inference/blob/main/LICENSE> \
158 | license." > /root/THIRD_PARTY_LICENSES
159 | 
160 | LABEL dlc_major_version="1"
161 | LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.huggingface.tgi="true"
162 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port="true"
163 | 


--------------------------------------------------------------------------------
/huggingface/pytorch/optimum/docker/0.0.20/Dockerfile:
--------------------------------------------------------------------------------
  1 | # Fetch and extract the TGI sources
  2 | FROM alpine AS tgi
  3 | RUN mkdir -p /tgi
  4 | ADD https://github.com/huggingface/text-generation-inference/archive/refs/tags/v1.4.1.tar.gz /tgi/sources.tar.gz
  5 | RUN tar -C /tgi -xf /tgi/sources.tar.gz --strip-components=1
  6 | 
  7 | # Build cargo components (adapted from TGI original Dockerfile)
  8 | # Note that the build image is aligned on the same Linux version as the base image (Debian bookworm/ Ubuntu 22.04)
  9 | FROM lukemathwalker/cargo-chef:latest-rust-1.75-bookworm AS chef
 10 | WORKDIR /usr/src
 11 | 
 12 | ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
 13 | 
 14 | FROM chef as planner
 15 | COPY --from=tgi /tgi/Cargo.toml Cargo.toml
 16 | COPY --from=tgi /tgi/Cargo.lock Cargo.lock
 17 | COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml
 18 | COPY --from=tgi /tgi/proto proto
 19 | COPY --from=tgi /tgi/benchmark benchmark
 20 | COPY --from=tgi /tgi/router router
 21 | COPY --from=tgi /tgi/launcher launcher
 22 | RUN cargo chef prepare --recipe-path recipe.json
 23 | 
 24 | FROM chef AS builder
 25 | 
 26 | RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
 27 |     curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
 28 |     unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
 29 |     unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \
 30 |     rm -f $PROTOC_ZIP
 31 | 
 32 | COPY --from=planner /usr/src/recipe.json recipe.json
 33 | RUN cargo chef cook --release --recipe-path recipe.json
 34 | 
 35 | COPY --from=tgi /tgi/Cargo.toml Cargo.toml
 36 | COPY --from=tgi /tgi/Cargo.lock Cargo.lock
 37 | COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml
 38 | COPY --from=tgi /tgi/proto proto
 39 | COPY --from=tgi /tgi/benchmark benchmark
 40 | COPY --from=tgi /tgi/router router
 41 | COPY --from=tgi /tgi/launcher launcher
 42 | RUN cargo build --release --workspace --exclude benchmark
 43 | 
 44 | # Fetch optimum-neuron sources
 45 | FROM alpine/git AS optimum-neuron
 46 | RUN git clone --depth 1 --branch v0.0.20 https://github.com/huggingface/optimum-neuron.git /optimum-neuron
 47 | 
 48 | # Python base image
 49 | FROM ubuntu:22.04 AS base
 50 | 
 51 | RUN apt-get update -y \
 52 |  && apt-get install -y --no-install-recommends \
 53 |     python3-pip \
 54 |     python3-setuptools \
 55 |     python-is-python3 \
 56 |     && rm -rf /var/lib/apt/lists/* \
 57 |     && apt-get clean
 58 | RUN pip3 --no-cache-dir install --upgrade pip
 59 | 
 60 | # Python server build image
 61 | FROM base AS pyserver
 62 | 
 63 | RUN apt-get update -y \
 64 |  && apt-get install -y --no-install-recommends \
 65 |     make \
 66 |     python3-venv \
 67 |     && rm -rf /var/lib/apt/lists/* \
 68 |     && apt-get clean
 69 | 
 70 | RUN install -d /pyserver
 71 | WORKDIR /pyserver
 72 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/server server
 73 | COPY --from=tgi /tgi/proto proto
 74 | RUN pip3 install -r server/build-requirements.txt
 75 | RUN VERBOSE=1 BUILDDIR=/pyserver/build PROTODIR=/pyserver/proto make -C server gen-server
 76 | 
 77 | # Neuron base image (used for deployment)
 78 | FROM base AS neuron
 79 | 
 80 | # Install system prerequisites
 81 | RUN apt-get update -y \
 82 |  && apt-get install -y --no-install-recommends \
 83 |     gnupg2 \
 84 |     wget \
 85 |     && rm -rf /var/lib/apt/lists/* \
 86 |     && apt-get clean
 87 | 
 88 | RUN echo "deb https://apt.repos.neuron.amazonaws.com jammy main" > /etc/apt/sources.list.d/neuron.list
 89 | RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add -
 90 | 
 91 | # Install neuronx packages
 92 | RUN apt-get update -y \
 93 |  && apt-get install -y --no-install-recommends \
 94 |     aws-neuronx-dkms=2.15.9.0 \
 95 |     aws-neuronx-collectives=2.20.11.0-c101c322e \
 96 |     aws-neuronx-runtime-lib=2.20.11.0-b7d33e68b \
 97 |     aws-neuronx-tools=2.17.0.0 \
 98 |     && rm -rf /var/lib/apt/lists/* \
 99 |     && apt-get clean
100 | 
101 | ENV PATH="/opt/bin/:/opt/aws/neuron/bin:${PATH}"
102 | 
103 | RUN pip3 install \
104 |     neuronx-cc==2.12.68.0 \
105 |     torch-neuronx==1.13.1.1.13.1 \
106 |     transformers-neuronx==0.9.474 \
107 |     --extra-index-url=https://pip.repos.neuron.amazonaws.com
108 | 
109 | # Install HuggingFace packages
110 | RUN pip3 install \
111 |     hf_transfer
112 | 
113 | # Install optimum-neuron
114 | COPY --from=optimum-neuron /optimum-neuron optimum-neuron
115 | RUN pip3 install ./optimum-neuron
116 | 
117 | # TGI base env
118 | ENV HUGGINGFACE_HUB_CACHE=/tmp \
119 |     HF_HUB_ENABLE_HF_TRANSFER=1 \
120 |     PORT=80
121 | 
122 | # Install router
123 | COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router
124 | # Install launcher
125 | COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher
126 | # Install python server
127 | COPY --from=pyserver /pyserver/build/dist dist
128 | RUN pip install dist/text-generation-server*.tar.gz
129 | 
130 | # AWS Sagemaker compatible image
131 | FROM neuron as sagemaker
132 | 
133 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/sagemaker-entrypoint.sh entrypoint.sh
134 | RUN chmod +x entrypoint.sh
135 | 
136 | ENTRYPOINT ["./entrypoint.sh"]
137 | 
138 | 
139 | RUN apt-get update && apt-get install -y --no-install-recommends curl unzip \
140 |     && rm -rf /var/lib/apt/lists/*
141 | RUN HOME_DIR=/root && \
142 |     pip install requests && \
143 |     curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip && \
144 |     unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ && \
145 |     cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance && \
146 |     chmod +x /usr/local/bin/testOSSCompliance && \
147 |     chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh && \
148 |     ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python && \
149 |     rm -rf ${HOME_DIR}/oss_compliance*
150 | 
151 | RUN echo "N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image \
152 | has an indirect documentation dependency on third party <docutils/tools/editors/emacs/rst.el> project. The \
153 | <docutils/tools/editors/emacs/rst.el> project's licensing includes the <GPL v3> license. \
154 | \n\n\
155 | N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image uses the \
156 | third party <Text Generation Inference (TGI)> project. The <Text Generation Inference (TGI)> project's licensing \
157 | includes the <HFOIL --> https://github.com/huggingface/text-generation-inference/blob/main/LICENSE> \
158 | license." > /root/THIRD_PARTY_LICENSES
159 | 
160 | LABEL dlc_major_version="1"
161 | LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.huggingface.tgi="true"
162 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port="true"
163 | 


--------------------------------------------------------------------------------
/huggingface/pytorch/optimum/docker/0.0.21/Dockerfile:
--------------------------------------------------------------------------------
  1 | # Fetch and extract the TGI sources
  2 | FROM alpine AS tgi
  3 | RUN mkdir -p /tgi
  4 | ADD https://github.com/huggingface/text-generation-inference/archive/refs/tags/v1.4.1.tar.gz /tgi/sources.tar.gz
  5 | RUN tar -C /tgi -xf /tgi/sources.tar.gz --strip-components=1
  6 | 
  7 | # Build cargo components (adapted from TGI original Dockerfile)
  8 | # Note that the build image is aligned on the same Linux version as the base image (Debian bookworm/ Ubuntu 22.04)
  9 | FROM lukemathwalker/cargo-chef:latest-rust-1.75-bookworm AS chef
 10 | WORKDIR /usr/src
 11 | 
 12 | ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
 13 | 
 14 | FROM chef as planner
 15 | COPY --from=tgi /tgi/Cargo.toml Cargo.toml
 16 | COPY --from=tgi /tgi/Cargo.lock Cargo.lock
 17 | COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml
 18 | COPY --from=tgi /tgi/proto proto
 19 | COPY --from=tgi /tgi/benchmark benchmark
 20 | COPY --from=tgi /tgi/router router
 21 | COPY --from=tgi /tgi/launcher launcher
 22 | RUN cargo chef prepare --recipe-path recipe.json
 23 | 
 24 | FROM chef AS builder
 25 | 
 26 | RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
 27 |     curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
 28 |     unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
 29 |     unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \
 30 |     rm -f $PROTOC_ZIP
 31 | 
 32 | COPY --from=planner /usr/src/recipe.json recipe.json
 33 | RUN cargo chef cook --release --recipe-path recipe.json
 34 | 
 35 | COPY --from=tgi /tgi/Cargo.toml Cargo.toml
 36 | COPY --from=tgi /tgi/Cargo.lock Cargo.lock
 37 | COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml
 38 | COPY --from=tgi /tgi/proto proto
 39 | COPY --from=tgi /tgi/benchmark benchmark
 40 | COPY --from=tgi /tgi/router router
 41 | COPY --from=tgi /tgi/launcher launcher
 42 | RUN cargo build --release --workspace --exclude benchmark
 43 | 
 44 | # Fetch optimum-neuron sources
 45 | FROM alpine/git AS optimum-neuron
 46 | RUN git clone --depth 1 --branch v0.0.21 https://github.com/huggingface/optimum-neuron.git /optimum-neuron
 47 | 
 48 | # Python base image
 49 | FROM ubuntu:22.04 AS base
 50 | 
 51 | RUN apt-get update -y \
 52 |  && apt-get install -y --no-install-recommends \
 53 |     python3-pip \
 54 |     python3-setuptools \
 55 |     python-is-python3 \
 56 |     && rm -rf /var/lib/apt/lists/* \
 57 |     && apt-get clean
 58 | RUN pip3 --no-cache-dir install --upgrade pip
 59 | 
 60 | # Python server build image
 61 | FROM base AS pyserver
 62 | 
 63 | RUN apt-get update -y \
 64 |  && apt-get install -y --no-install-recommends \
 65 |     make \
 66 |     python3-venv \
 67 |     && rm -rf /var/lib/apt/lists/* \
 68 |     && apt-get clean
 69 | 
 70 | RUN install -d /pyserver
 71 | WORKDIR /pyserver
 72 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/server server
 73 | COPY --from=tgi /tgi/proto proto
 74 | RUN pip3 install -r server/build-requirements.txt
 75 | RUN VERBOSE=1 BUILDDIR=/pyserver/build PROTODIR=/pyserver/proto make -C server gen-server
 76 | 
 77 | # Neuron base image (used for deployment)
 78 | FROM base AS neuron
 79 | 
 80 | # Install system prerequisites
 81 | RUN apt-get update -y \
 82 |  && apt-get install -y --no-install-recommends \
 83 |     gnupg2 \
 84 |     wget \
 85 |     && rm -rf /var/lib/apt/lists/* \
 86 |     && apt-get clean
 87 | 
 88 | RUN echo "deb https://apt.repos.neuron.amazonaws.com jammy main" > /etc/apt/sources.list.d/neuron.list
 89 | RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add -
 90 | 
 91 | # Install neuronx packages
 92 | RUN apt-get update -y \
 93 |  && apt-get install -y --no-install-recommends \
 94 |     aws-neuronx-dkms=2.16.7.0 \
 95 |     aws-neuronx-collectives=2.20.22.0-c101c322e \
 96 |     aws-neuronx-runtime-lib=2.20.22.0-1b3ca6425 \
 97 |     aws-neuronx-tools=2.17.1.0 \
 98 |     && rm -rf /var/lib/apt/lists/* \
 99 |     && apt-get clean
100 | 
101 | ENV PATH="/opt/bin/:/opt/aws/neuron/bin:${PATH}"
102 | 
103 | RUN pip3 install \
104 |     neuronx-cc==2.13.66.0 \
105 |     torch-neuronx==1.13.1.1.14.0 \
106 |     transformers-neuronx==0.10.0.21 \
107 |     --extra-index-url=https://pip.repos.neuron.amazonaws.com
108 | 
109 | # Install HuggingFace packages
110 | RUN pip3 install \
111 |     hf_transfer
112 | 
113 | # Install optimum-neuron
114 | COPY --from=optimum-neuron /optimum-neuron optimum-neuron
115 | RUN pip3 install ./optimum-neuron
116 | 
117 | # TGI base env
118 | ENV HUGGINGFACE_HUB_CACHE=/tmp \
119 |     HF_HUB_ENABLE_HF_TRANSFER=1 \
120 |     PORT=80
121 | 
122 | # Disable color logs as they are not supported by CloudWatch
123 | ENV LOGURU_COLORIZE=NO
124 | 
125 | # Install router
126 | COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router
127 | # Install launcher
128 | COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher
129 | # Install python server
130 | COPY --from=pyserver /pyserver/build/dist dist
131 | RUN pip install dist/text-generation-server*.tar.gz
132 | 
133 | # AWS Sagemaker compatible image
134 | FROM neuron as sagemaker
135 | 
136 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/sagemaker-entrypoint.sh entrypoint.sh
137 | RUN chmod +x entrypoint.sh
138 | 
139 | ENTRYPOINT ["./entrypoint.sh"]
140 | 
141 | 
142 | RUN apt-get update && apt-get install -y --no-install-recommends curl unzip \
143 |     && rm -rf /var/lib/apt/lists/*
144 | RUN HOME_DIR=/root && \
145 |     pip install requests && \
146 |     curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip && \
147 |     unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ && \
148 |     cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance && \
149 |     chmod +x /usr/local/bin/testOSSCompliance && \
150 |     chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh && \
151 |     ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python && \
152 |     rm -rf ${HOME_DIR}/oss_compliance*
153 | 
154 | RUN echo "N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image \
155 | has an indirect documentation dependency on third party <docutils/tools/editors/emacs/rst.el> project. The \
156 | <docutils/tools/editors/emacs/rst.el> project's licensing includes the <GPL v3> license. \
157 | \n\n\
158 | N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image uses the \
159 | third party <Text Generation Inference (TGI)> project. The <Text Generation Inference (TGI)> project's licensing \
160 | includes the <HFOIL --> https://github.com/huggingface/text-generation-inference/blob/main/LICENSE> \
161 | license." > /root/THIRD_PARTY_LICENSES
162 | 
163 | LABEL dlc_major_version="1"
164 | LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.huggingface.tgi="true"
165 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port="true"
166 | 


--------------------------------------------------------------------------------
/huggingface/pytorch/optimum/docker/0.0.22/Dockerfile:
--------------------------------------------------------------------------------
  1 | # Fetch and extract the TGI sources (TGI_VERSION is mandatory)
  2 | FROM alpine AS tgi
  3 | RUN mkdir -p /tgi
  4 | ADD https://github.com/huggingface/text-generation-inference/archive/refs/tags/v2.0.2.tar.gz /tgi/sources.tar.gz
  5 | RUN tar -C /tgi -xf /tgi/sources.tar.gz --strip-components=1
  6 | 
  7 | # Build cargo components (adapted from TGI original Dockerfile)
  8 | # Note that the build image is aligned on the same Linux version as the base image (Debian bookworm/ Ubuntu 22.04)
  9 | FROM lukemathwalker/cargo-chef:latest-rust-1.75-bookworm AS chef
 10 | WORKDIR /usr/src
 11 | 
 12 | ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
 13 | 
 14 | FROM chef as planner
 15 | COPY --from=tgi /tgi/Cargo.toml Cargo.toml
 16 | COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml
 17 | COPY --from=tgi /tgi/proto proto
 18 | COPY --from=tgi /tgi/benchmark benchmark
 19 | COPY --from=tgi /tgi/router router
 20 | COPY --from=tgi /tgi/launcher launcher
 21 | RUN cargo chef prepare --recipe-path recipe.json
 22 | 
 23 | FROM chef AS builder
 24 | 
 25 | RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
 26 |     curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
 27 |     unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
 28 |     unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \
 29 |     rm -f $PROTOC_ZIP
 30 | 
 31 | COPY --from=planner /usr/src/recipe.json recipe.json
 32 | RUN cargo chef cook --release --recipe-path recipe.json
 33 | 
 34 | COPY --from=tgi /tgi/Cargo.toml Cargo.toml
 35 | COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml
 36 | COPY --from=tgi /tgi/proto proto
 37 | COPY --from=tgi /tgi/benchmark benchmark
 38 | COPY --from=tgi /tgi/router router
 39 | COPY --from=tgi /tgi/launcher launcher
 40 | RUN cargo build --release --workspace --exclude benchmark
 41 | 
 42 | # Fetch optimum-neuron sources
 43 | FROM alpine/git AS optimum-neuron
 44 | RUN git clone --depth 1 --branch v0.0.22 https://github.com/huggingface/optimum-neuron.git /optimum-neuron
 45 | 
 46 | # Python base image
 47 | FROM ubuntu:22.04 AS base
 48 | 
 49 | RUN apt-get update -y \
 50 |     && apt-get install -y --no-install-recommends \
 51 |     python3-pip \
 52 |     python3-setuptools \
 53 |     python-is-python3 \
 54 |     && rm -rf /var/lib/apt/lists/* \
 55 |     && apt-get clean
 56 | RUN pip3 --no-cache-dir install --upgrade pip
 57 | 
 58 | # Python server build image
 59 | FROM base AS pyserver
 60 | 
 61 | RUN apt-get update -y \
 62 |     && apt-get install -y --no-install-recommends \
 63 |     make \
 64 |     python3-venv \
 65 |     && rm -rf /var/lib/apt/lists/* \
 66 |     && apt-get clean
 67 | 
 68 | RUN install -d /pyserver
 69 | WORKDIR /pyserver
 70 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/server server
 71 | COPY --from=tgi /tgi/proto proto
 72 | RUN pip3 install -r server/build-requirements.txt
 73 | RUN VERBOSE=1 BUILDDIR=/pyserver/build PROTODIR=/pyserver/proto make -C server gen-server
 74 | 
 75 | # Neuron base image (used for deployment)
 76 | FROM base AS neuron
 77 | 
 78 | # Install system prerequisites
 79 | RUN apt-get update -y \
 80 |     && apt-get install -y --no-install-recommends \
 81 |     gnupg2 \
 82 |     wget \
 83 |     python3-dev \
 84 |     && rm -rf /var/lib/apt/lists/* \
 85 |     && apt-get clean
 86 | 
 87 | RUN echo "deb https://apt.repos.neuron.amazonaws.com jammy main" > /etc/apt/sources.list.d/neuron.list
 88 | RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add -
 89 | 
 90 | # Install neuronx packages
 91 | RUN apt-get update -y \
 92 |     && apt-get install -y --no-install-recommends \
 93 |     aws-neuronx-dkms=2.16.7.0 \
 94 |     aws-neuronx-collectives=2.20.22.0-c101c322e \
 95 |     aws-neuronx-runtime-lib=2.20.22.0-1b3ca6425 \
 96 |     aws-neuronx-tools=2.17.1.0 \
 97 |     && rm -rf /var/lib/apt/lists/* \
 98 |     && apt-get clean
 99 | 
100 | ENV PATH="/opt/bin/:/opt/aws/neuron/bin:${PATH}"
101 | 
102 | RUN pip3 install \
103 |     neuronx-cc==2.13.66.0 \
104 |     torch-neuronx==2.1.2.2.1.0 \
105 |     transformers-neuronx==0.10.0.21 \
106 |     --extra-index-url=https://pip.repos.neuron.amazonaws.com
107 | 
108 | # Install HuggingFace packages
109 | RUN pip3 install \
110 |     hf_transfer huggingface_hub
111 | 
112 | # Install optimum-neuron
113 | COPY --from=optimum-neuron /optimum-neuron optimum-neuron
114 | RUN pip3 install ./optimum-neuron
115 | 
116 | # TGI base env
117 | ENV HUGGINGFACE_HUB_CACHE=/tmp \
118 |     HF_HUB_ENABLE_HF_TRANSFER=1 \
119 |     PORT=80
120 | 
121 | # Disable color logs as they are not supported by CloudWatch
122 | ENV LOGURU_COLORIZE=NO
123 | 
124 | # Install router
125 | COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router
126 | # Install launcher
127 | COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher
128 | # Install python server
129 | COPY --from=pyserver /pyserver/build/dist dist
130 | RUN pip install dist/text_generation_server*.tar.gz
131 | 
132 | # AWS Sagemaker compatible image
133 | FROM neuron as sagemaker
134 | 
135 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/sagemaker-entrypoint.sh entrypoint.sh
136 | RUN chmod +x entrypoint.sh
137 | 
138 | ENTRYPOINT ["./entrypoint.sh"]
139 | 
140 | 
141 | RUN apt-get update && apt-get install -y --no-install-recommends curl unzip \
142 |     && rm -rf /var/lib/apt/lists/*
143 | RUN HOME_DIR=/root && \
144 |     pip install requests && \
145 |     curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip && \
146 |     unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ && \
147 |     cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance && \
148 |     chmod +x /usr/local/bin/testOSSCompliance && \
149 |     chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh && \
150 |     ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python && \
151 |     rm -rf ${HOME_DIR}/oss_compliance*
152 | 
153 | RUN echo "N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image \
154 |     has an indirect documentation dependency on third party <docutils/tools/editors/emacs/rst.el> project. The \
155 |     <docutils/tools/editors/emacs/rst.el> project's licensing includes the <GPL v3> license. \
156 |     \n\n\
157 |     N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image uses the \
158 |     third party <Text Generation Inference (TGI)> project. The <Text Generation Inference (TGI)> project's licensing \
159 |     includes the <HFOIL --> https://github.com/huggingface/text-generation-inference/blob/main/LICENSE> \
160 |     license." > /root/THIRD_PARTY_LICENSES
161 | 
162 | LABEL dlc_major_version="1"
163 | LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.huggingface.tgi="true"
164 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port="true"


--------------------------------------------------------------------------------
/huggingface/pytorch/optimum/docker/0.0.23/Dockerfile:
--------------------------------------------------------------------------------
  1 | # Fetch and extract the TGI sources (TGI_VERSION is mandatory)
  2 | FROM alpine AS tgi
  3 | RUN mkdir -p /tgi
  4 | ADD https://github.com/huggingface/text-generation-inference/archive/refs/tags/v2.0.2.tar.gz /tgi/sources.tar.gz
  5 | RUN tar -C /tgi -xf /tgi/sources.tar.gz --strip-components=1
  6 | 
  7 | # Build cargo components (adapted from TGI original Dockerfile)
  8 | # Note that the build image is aligned on the same Linux version as the base image (Debian bookworm/ Ubuntu 22.04)
  9 | FROM lukemathwalker/cargo-chef:latest-rust-1.75-bookworm AS chef
 10 | WORKDIR /usr/src
 11 | 
 12 | ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
 13 | 
 14 | FROM chef as planner
 15 | COPY --from=tgi /tgi/Cargo.toml Cargo.toml
 16 | COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml
 17 | COPY --from=tgi /tgi/proto proto
 18 | COPY --from=tgi /tgi/benchmark benchmark
 19 | COPY --from=tgi /tgi/router router
 20 | COPY --from=tgi /tgi/launcher launcher
 21 | RUN cargo chef prepare --recipe-path recipe.json
 22 | 
 23 | FROM chef AS builder
 24 | 
 25 | RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
 26 |     curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
 27 |     unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
 28 |     unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \
 29 |     rm -f $PROTOC_ZIP
 30 | 
 31 | COPY --from=planner /usr/src/recipe.json recipe.json
 32 | RUN cargo chef cook --release --recipe-path recipe.json
 33 | 
 34 | COPY --from=tgi /tgi/Cargo.toml Cargo.toml
 35 | COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml
 36 | COPY --from=tgi /tgi/proto proto
 37 | COPY --from=tgi /tgi/benchmark benchmark
 38 | COPY --from=tgi /tgi/router router
 39 | COPY --from=tgi /tgi/launcher launcher
 40 | RUN cargo build --release --workspace --exclude benchmark
 41 | 
 42 | # Fetch optimum-neuron sources
 43 | FROM alpine/git AS optimum-neuron
 44 | RUN git clone --depth 1 --branch v0.0.23 https://github.com/huggingface/optimum-neuron.git /optimum-neuron
 45 | 
 46 | # Python base image
 47 | FROM ubuntu:22.04 AS base
 48 | 
 49 | RUN apt-get update -y \
 50 |     && apt-get install -y --no-install-recommends \
 51 |     python3-pip \
 52 |     python3-setuptools \
 53 |     python-is-python3 \
 54 |     && rm -rf /var/lib/apt/lists/* \
 55 |     && apt-get clean
 56 | RUN pip3 --no-cache-dir install --upgrade pip
 57 | 
 58 | # Python server build image
 59 | FROM base AS pyserver
 60 | 
 61 | RUN apt-get update -y \
 62 |     && apt-get install -y --no-install-recommends \
 63 |     make \
 64 |     python3-venv \
 65 |     && rm -rf /var/lib/apt/lists/* \
 66 |     && apt-get clean
 67 | 
 68 | RUN install -d /pyserver
 69 | WORKDIR /pyserver
 70 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/server server
 71 | COPY --from=tgi /tgi/proto proto
 72 | RUN pip3 install -r server/build-requirements.txt
 73 | RUN VERBOSE=1 BUILDDIR=/pyserver/build PROTODIR=/pyserver/proto make -C server gen-server
 74 | 
 75 | # Neuron base image (used for deployment)
 76 | FROM base AS neuron
 77 | 
 78 | # Install system prerequisites
 79 | RUN apt-get update -y \
 80 |     && apt-get install -y --no-install-recommends \
 81 |     gnupg2 \
 82 |     wget \
 83 |     python3-dev \
 84 |     && rm -rf /var/lib/apt/lists/* \
 85 |     && apt-get clean
 86 | 
 87 | RUN echo "deb https://apt.repos.neuron.amazonaws.com jammy main" > /etc/apt/sources.list.d/neuron.list
 88 | RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add -
 89 | 
 90 | # Install neuronx packages
 91 | RUN apt-get update -y \
 92 |     && apt-get install -y --no-install-recommends \
 93 |     aws-neuronx-dkms=2.16.7.0 \
 94 |     aws-neuronx-collectives=2.20.22.0-c101c322e \
 95 |     aws-neuronx-runtime-lib=2.20.22.0-1b3ca6425 \
 96 |     aws-neuronx-tools=2.17.1.0 \
 97 |     && rm -rf /var/lib/apt/lists/* \
 98 |     && apt-get clean
 99 | 
100 | ENV PATH="/opt/bin/:/opt/aws/neuron/bin:${PATH}"
101 | 
102 | RUN pip3 install \
103 |     neuronx-cc==2.13.66.0 \
104 |     torch-neuronx==2.1.2.2.1.0 \
105 |     transformers-neuronx==0.10.0.21 \
106 |     --extra-index-url=https://pip.repos.neuron.amazonaws.com
107 | 
108 | # Install HuggingFace packages
109 | RUN pip3 install \
110 |     hf_transfer huggingface_hub
111 | 
112 | # Install optimum-neuron
113 | COPY --from=optimum-neuron /optimum-neuron optimum-neuron
114 | RUN pip3 install ./optimum-neuron
115 | 
116 | # TGI base env
117 | ENV HUGGINGFACE_HUB_CACHE=/tmp \
118 |     HF_HUB_ENABLE_HF_TRANSFER=1 \
119 |     PORT=80
120 | 
121 | # Disable color logs as they are not supported by CloudWatch
122 | ENV LOGURU_COLORIZE=NO
123 | ENV LOG_COLORIZE=0
124 | 
125 | # Install router
126 | COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router
127 | # Install launcher
128 | COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher
129 | # Install python server
130 | COPY --from=pyserver /pyserver/build/dist dist
131 | RUN pip install dist/text_generation_server*.tar.gz
132 | 
133 | # AWS Sagemaker compatible image
134 | FROM neuron as sagemaker
135 | 
136 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/sagemaker-entrypoint.sh entrypoint.sh
137 | RUN chmod +x entrypoint.sh
138 | 
139 | ENTRYPOINT ["./entrypoint.sh"]
140 | 
141 | 
142 | RUN apt-get update && apt-get install -y --no-install-recommends curl unzip \
143 |     && rm -rf /var/lib/apt/lists/*
144 | RUN HOME_DIR=/root && \
145 |     pip install requests && \
146 |     curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip && \
147 |     unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ && \
148 |     cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance && \
149 |     chmod +x /usr/local/bin/testOSSCompliance && \
150 |     chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh && \
151 |     ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python && \
152 |     rm -rf ${HOME_DIR}/oss_compliance*
153 | 
154 | RUN echo "N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image \
155 |     has an indirect documentation dependency on third party <docutils/tools/editors/emacs/rst.el> project. The \
156 |     <docutils/tools/editors/emacs/rst.el> project's licensing includes the <GPL v3> license. \
157 |     \n\n\
158 |     N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image uses the \
159 |     third party <Text Generation Inference (TGI)> project. The <Text Generation Inference (TGI)> project's licensing \
160 |     includes the <HFOIL --> https://github.com/huggingface/text-generation-inference/blob/main/LICENSE> \
161 |     license." > /root/THIRD_PARTY_LICENSES
162 | 
163 | LABEL dlc_major_version="1"
164 | LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.huggingface.tgi="true"
165 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port="true"
166 | 


--------------------------------------------------------------------------------
/huggingface/pytorch/optimum/docker/0.0.24/Dockerfile:
--------------------------------------------------------------------------------
  1 | # Fetch and extract the TGI sources (TGI_VERSION is mandatory)
  2 | FROM alpine AS tgi
  3 | RUN mkdir -p /tgi
  4 | ADD https://github.com/huggingface/text-generation-inference/archive/refs/tags/v2.1.1.tar.gz /tgi/sources.tar.gz
  5 | RUN tar -C /tgi -xf /tgi/sources.tar.gz --strip-components=1
  6 | 
  7 | # Build cargo components (adapted from TGI original Dockerfile)
  8 | # Note that the build image is aligned on the same Linux version as the base image (Debian bookworm/ Ubuntu 22.04)
  9 | FROM lukemathwalker/cargo-chef:latest-rust-1.79-bookworm AS chef
 10 | WORKDIR /usr/src
 11 | 
 12 | ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
 13 | 
 14 | FROM chef as planner
 15 | COPY --from=tgi /tgi/Cargo.toml Cargo.toml
 16 | COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml
 17 | COPY --from=tgi /tgi/proto proto
 18 | COPY --from=tgi /tgi/benchmark benchmark
 19 | COPY --from=tgi /tgi/router router
 20 | COPY --from=tgi /tgi/launcher launcher
 21 | RUN cargo chef prepare --recipe-path recipe.json
 22 | 
 23 | FROM chef AS builder
 24 | 
 25 | RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
 26 |     curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
 27 |     unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
 28 |     unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \
 29 |     rm -f $PROTOC_ZIP
 30 | 
 31 | COPY --from=planner /usr/src/recipe.json recipe.json
 32 | RUN cargo chef cook --release --recipe-path recipe.json
 33 | 
 34 | COPY --from=tgi /tgi/Cargo.toml Cargo.toml
 35 | COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml
 36 | COPY --from=tgi /tgi/proto proto
 37 | COPY --from=tgi /tgi/benchmark benchmark
 38 | COPY --from=tgi /tgi/router router
 39 | COPY --from=tgi /tgi/launcher launcher
 40 | # Remove this line once TGI has fixed the conflict
 41 | RUN cargo update ureq --precise 2.9.7
 42 | RUN cargo build --release --workspace --exclude benchmark
 43 | 
 44 | # Fetch optimum-neuron sources
 45 | FROM alpine/git AS optimum-neuron
 46 | RUN git clone --depth 1 --branch v0.0.24 https://github.com/huggingface/optimum-neuron.git /optimum-neuron
 47 | 
 48 | # Python base image
 49 | FROM ubuntu:22.04 AS base
 50 | 
 51 | RUN apt-get update -y \
 52 |     && apt-get install -y --no-install-recommends \
 53 |     python3-pip \
 54 |     python3-setuptools \
 55 |     python-is-python3 \
 56 |     && rm -rf /var/lib/apt/lists/* \
 57 |     && apt-get clean
 58 | RUN pip3 --no-cache-dir install --upgrade pip
 59 | 
 60 | # Python server build image
 61 | FROM base AS pyserver
 62 | 
 63 | RUN apt-get update -y \
 64 |     && apt-get install -y --no-install-recommends \
 65 |     make \
 66 |     python3-venv \
 67 |     && rm -rf /var/lib/apt/lists/* \
 68 |     && apt-get clean
 69 | 
 70 | RUN install -d /pyserver
 71 | WORKDIR /pyserver
 72 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/server server
 73 | COPY --from=tgi /tgi/proto proto
 74 | RUN pip3 install -r server/build-requirements.txt
 75 | RUN VERBOSE=1 BUILDDIR=/pyserver/build PROTODIR=/pyserver/proto make -C server gen-server
 76 | 
 77 | # Neuron base image (used for deployment)
 78 | FROM base AS neuron
 79 | 
 80 | # Install system prerequisites
 81 | RUN apt-get update -y \
 82 |     && apt-get install -y --no-install-recommends \
 83 |     gnupg2 \
 84 |     wget \
 85 |     python3-dev \
 86 |     libexpat1 \
 87 |     && rm -rf /var/lib/apt/lists/* \
 88 |     && apt-get clean
 89 | 
 90 | RUN echo "deb https://apt.repos.neuron.amazonaws.com jammy main" > /etc/apt/sources.list.d/neuron.list
 91 | RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add -
 92 | 
 93 | # Install neuronx packages
 94 | RUN apt-get update -y \
 95 |     && apt-get install -y --no-install-recommends \
 96 |     aws-neuronx-dkms=2.17.17.0 \
 97 |     aws-neuronx-collectives=2.21.46.0-69b77134b \
 98 |     aws-neuronx-runtime-lib=2.21.41.0-fb1705f5f \
 99 |     aws-neuronx-tools=2.18.3.0 \
100 |     && rm -rf /var/lib/apt/lists/* \
101 |     && apt-get clean
102 | 
103 | ENV PATH="/opt/bin/:/opt/aws/neuron/bin:${PATH}"
104 | 
105 | RUN pip3 install \
106 |     neuronx-cc==2.14.227.0 \
107 |     torch-neuronx==2.1.2.2.2.0 \
108 |     transformers-neuronx==0.11.351 \
109 |     --extra-index-url=https://pip.repos.neuron.amazonaws.com
110 | 
111 | # Install HuggingFace packages
112 | RUN pip3 install \
113 |     hf_transfer huggingface_hub
114 | 
115 | # Install optimum-neuron
116 | COPY --from=optimum-neuron /optimum-neuron optimum-neuron
117 | RUN pip3 install ./optimum-neuron
118 | 
119 | # TGI base env
120 | ENV HUGGINGFACE_HUB_CACHE=/tmp \
121 |     HF_HUB_ENABLE_HF_TRANSFER=1 \
122 |     PORT=80
123 | 
124 | # Disable color logs as they are not supported by CloudWatch
125 | ENV LOGURU_COLORIZE=NO
126 | ENV LOG_COLORIZE=0
127 | 
128 | # Install router
129 | COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router
130 | # Install launcher
131 | COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher
132 | # Install python server
133 | COPY --from=pyserver /pyserver/build/dist dist
134 | RUN pip install dist/text_generation_server*.tar.gz
135 | 
136 | # AWS Sagemaker compatible image
137 | FROM neuron as sagemaker
138 | 
139 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/sagemaker-entrypoint.sh entrypoint.sh
140 | RUN chmod +x entrypoint.sh
141 | 
142 | ENTRYPOINT ["./entrypoint.sh"]
143 | 
144 | 
145 | RUN apt-get update && apt-get install -y --no-install-recommends curl unzip \
146 |     && rm -rf /var/lib/apt/lists/*
147 | RUN HOME_DIR=/root && \
148 |     pip install requests && \
149 |     curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip && \
150 |     unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ && \
151 |     cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance && \
152 |     chmod +x /usr/local/bin/testOSSCompliance && \
153 |     chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh && \
154 |     ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python && \
155 |     rm -rf ${HOME_DIR}/oss_compliance*
156 | 
157 | RUN echo "N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image \
158 |     has an indirect documentation dependency on third party <docutils/tools/editors/emacs/rst.el> project. The \
159 |     <docutils/tools/editors/emacs/rst.el> project's licensing includes the <GPL v3> license. \
160 |     \n\n\
161 |     N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image uses the \
162 |     third party <Text Generation Inference (TGI)> project. The <Text Generation Inference (TGI)> project's licensing \
163 |     includes the <HFOIL --> https://github.com/huggingface/text-generation-inference/blob/main/LICENSE> \
164 |     license." > /root/THIRD_PARTY_LICENSES
165 | 
166 | LABEL dlc_major_version="1"
167 | LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.huggingface.tgi="true"
168 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port="true"


--------------------------------------------------------------------------------
/huggingface/pytorch/optimum/docker/0.0.25/Dockerfile:
--------------------------------------------------------------------------------
  1 | # Fetch and extract the TGI sources (TGI_VERSION is mandatory)
  2 | FROM alpine AS tgi
  3 | RUN mkdir -p /tgi
  4 | ADD https://github.com/huggingface/text-generation-inference/archive/refs/tags/v2.1.1.tar.gz /tgi/sources.tar.gz
  5 | RUN tar -C /tgi -xf /tgi/sources.tar.gz --strip-components=1
  6 | 
  7 | # Build cargo components (adapted from TGI original Dockerfile)
  8 | # Note that the build image is aligned on the same Linux version as the base image (Debian bookworm/ Ubuntu 22.04)
  9 | FROM lukemathwalker/cargo-chef:latest-rust-1.79-bookworm AS chef
 10 | WORKDIR /usr/src
 11 | 
 12 | ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
 13 | 
 14 | FROM chef as planner
 15 | COPY --from=tgi /tgi/Cargo.lock Cargo.lock
 16 | COPY --from=tgi /tgi/Cargo.toml Cargo.toml
 17 | COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml
 18 | COPY --from=tgi /tgi/proto proto
 19 | COPY --from=tgi /tgi/benchmark benchmark
 20 | COPY --from=tgi /tgi/router router
 21 | COPY --from=tgi /tgi/launcher launcher
 22 | RUN cargo chef prepare --recipe-path recipe.json
 23 | 
 24 | FROM chef AS builder
 25 | 
 26 | RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
 27 |     curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
 28 |     unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
 29 |     unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \
 30 |     rm -f $PROTOC_ZIP
 31 | 
 32 | COPY --from=planner /usr/src/recipe.json recipe.json
 33 | RUN cargo chef cook --release --recipe-path recipe.json
 34 | 
 35 | COPY --from=tgi /tgi/Cargo.lock Cargo.lock
 36 | COPY --from=tgi /tgi/Cargo.toml Cargo.toml
 37 | COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml
 38 | COPY --from=tgi /tgi/proto proto
 39 | COPY --from=tgi /tgi/benchmark benchmark
 40 | COPY --from=tgi /tgi/router router
 41 | COPY --from=tgi /tgi/launcher launcher
 42 | # Remove this line once TGI has fixed the conflict
 43 | RUN cargo update ureq --precise 2.9.7
 44 | RUN cargo build --release --workspace --exclude benchmark
 45 | 
 46 | # Fetch optimum-neuron sources
 47 | FROM alpine/git AS optimum-neuron
 48 | RUN git clone --depth 1 --branch v0.0.25 https://github.com/huggingface/optimum-neuron.git /optimum-neuron
 49 | 
 50 | # Python base image
 51 | FROM ubuntu:22.04 AS base
 52 | 
 53 | RUN apt-get update -y \
 54 |     && apt-get install -y --no-install-recommends \
 55 |     python3-pip \
 56 |     python3-setuptools \
 57 |     python-is-python3 \
 58 |     && rm -rf /var/lib/apt/lists/* \
 59 |     && apt-get clean
 60 | RUN pip3 --no-cache-dir install --upgrade pip
 61 | 
 62 | # Python server build image
 63 | FROM base AS pyserver
 64 | 
 65 | RUN apt-get update -y \
 66 |     && apt-get install -y --no-install-recommends \
 67 |     make \
 68 |     python3-venv \
 69 |     && rm -rf /var/lib/apt/lists/* \
 70 |     && apt-get clean
 71 | 
 72 | RUN install -d /pyserver
 73 | WORKDIR /pyserver
 74 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/server server
 75 | COPY --from=tgi /tgi/proto proto
 76 | RUN pip3 install -r server/build-requirements.txt
 77 | RUN VERBOSE=1 BUILDDIR=/pyserver/build PROTODIR=/pyserver/proto make -C server gen-server
 78 | 
 79 | # Neuron base image (used for deployment)
 80 | FROM base AS neuron
 81 | 
 82 | # Install system prerequisites
 83 | RUN apt-get update -y \
 84 |     && apt-get install -y --no-install-recommends \
 85 |     gnupg2 \
 86 |     wget \
 87 |     python3-dev \
 88 |     libexpat1 \
 89 |     && rm -rf /var/lib/apt/lists/* \
 90 |     && apt-get clean
 91 | 
 92 | RUN echo "deb https://apt.repos.neuron.amazonaws.com jammy main" > /etc/apt/sources.list.d/neuron.list
 93 | RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add -
 94 | 
 95 | # Install neuronx packages
 96 | RUN apt-get update -y \
 97 |     && apt-get install -y --no-install-recommends \
 98 |     aws-neuronx-dkms=2.18.12.0 \
 99 |     aws-neuronx-collectives=2.22.26.0-17a033bc8 \
100 |     aws-neuronx-runtime-lib=2.22.14.0-6e27b8d5b \
101 |     aws-neuronx-tools=2.19.0.0 \
102 |     libxml2 \
103 |     && rm -rf /var/lib/apt/lists/* \
104 |     && apt-get clean
105 | 
106 | ENV PATH="/opt/bin/:/opt/aws/neuron/bin:${PATH}"
107 | 
108 | RUN pip3 install \
109 |     neuronx-cc==2.15.128.0 \
110 |     torch-neuronx==2.1.2.2.3.0 \
111 |     transformers-neuronx==0.12.313 \
112 |     libneuronxla==2.0.4115.0 \
113 |     --extra-index-url=https://pip.repos.neuron.amazonaws.com
114 | 
115 | # Install HuggingFace packages
116 | RUN pip3 install \
117 |     hf_transfer huggingface_hub
118 | 
119 | # Install optimum-neuron
120 | COPY --from=optimum-neuron /optimum-neuron optimum-neuron
121 | RUN pip3 install ./optimum-neuron
122 | 
123 | # TGI base env
124 | ENV HUGGINGFACE_HUB_CACHE=/tmp \
125 |     HF_HUB_ENABLE_HF_TRANSFER=1 \
126 |     PORT=80
127 | 
128 | # Disable color logs as they are not supported by CloudWatch
129 | ENV LOGURU_COLORIZE=NO
130 | ENV LOG_COLORIZE=0
131 | 
132 | # Install router
133 | COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router
134 | # Install launcher
135 | COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher
136 | # Install python server
137 | COPY --from=pyserver /pyserver/build/dist dist
138 | RUN pip install dist/text_generation_server*.tar.gz
139 | 
140 | # AWS Sagemaker compatible image
141 | FROM neuron as sagemaker
142 | 
143 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/sagemaker-entrypoint.sh entrypoint.sh
144 | RUN chmod +x entrypoint.sh
145 | 
146 | ENTRYPOINT ["./entrypoint.sh"]
147 | 
148 | 
149 | RUN apt-get update && apt-get install -y --no-install-recommends curl unzip \
150 |     && rm -rf /var/lib/apt/lists/*
151 | RUN HOME_DIR=/root && \
152 |     pip install requests && \
153 |     curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip && \
154 |     unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ && \
155 |     cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance && \
156 |     chmod +x /usr/local/bin/testOSSCompliance && \
157 |     chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh && \
158 |     ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python && \
159 |     rm -rf ${HOME_DIR}/oss_compliance*
160 | 
161 | RUN echo "N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image \
162 |     has an indirect documentation dependency on third party <docutils/tools/editors/emacs/rst.el> project. The \
163 |     <docutils/tools/editors/emacs/rst.el> project's licensing includes the <GPL v3> license. \
164 |     \n\n\
165 |     N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image uses the \
166 |     third party <Text Generation Inference (TGI)> project. The <Text Generation Inference (TGI)> project's licensing \
167 |     includes the <HFOIL --> https://github.com/huggingface/text-generation-inference/blob/main/LICENSE> \
168 |     license." > /root/THIRD_PARTY_LICENSES
169 | 
170 | LABEL dlc_major_version="1"
171 | LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.huggingface.tgi="true"
172 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port="true"
173 | 


--------------------------------------------------------------------------------
/huggingface/pytorch/optimum/docker/0.0.27/Dockerfile:
--------------------------------------------------------------------------------
  1 | # Fetch and extract the TGI sources
  2 | FROM alpine AS tgi
  3 | RUN mkdir -p /tgi
  4 | ADD https://github.com/huggingface/text-generation-inference/archive/refs/tags/v3.0.0.tar.gz /tgi/sources.tar.gz
  5 | RUN tar -C /tgi -xf /tgi/sources.tar.gz --strip-components=1
  6 | 
  7 | # Fetch also the optimum-neuron sources that contain modified TGI sources
  8 | FROM alpine AS optimum-neuron
  9 | RUN mkdir -p /optimum-neuron
 10 | ADD https://github.com/huggingface/optimum-neuron/archive/refs/tags/v0.0.27.tar.gz /optimum-neuron/sources.tar.gz
 11 | RUN tar -C /optimum-neuron -xf /optimum-neuron/sources.tar.gz --strip-components=1
 12 | 
 13 | # Build cargo components (adapted from TGI original Dockerfile)
 14 | # Note: we cannot use the cargo-chef base image as it uses python 3.11
 15 | FROM ubuntu:22.04 AS chef
 16 | 
 17 | RUN apt-get update -y \
 18 |  && apt-get install -y --no-install-recommends \
 19 |     curl ca-certificates build-essential \
 20 |     && rm -rf /var/lib/apt/lists/* \
 21 |     && apt-get clean
 22 | 
 23 | RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- --default-toolchain 1.80.1 --profile minimal -y
 24 | ENV PATH="/root/.cargo/bin:${PATH}"
 25 | RUN cargo install cargo-chef --locked
 26 | 
 27 | WORKDIR /usr/src
 28 | 
 29 | ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
 30 | 
 31 | FROM chef AS planner
 32 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/Cargo.toml Cargo.toml
 33 | COPY --from=tgi /tgi/Cargo.lock Cargo.lock
 34 | COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml
 35 | COPY --from=tgi /tgi/proto proto
 36 | COPY --from=tgi /tgi/router router
 37 | COPY --from=tgi /tgi/backends backends
 38 | COPY --from=tgi /tgi/launcher launcher
 39 | RUN cargo chef prepare --recipe-path recipe.json
 40 | 
 41 | FROM chef AS builder
 42 | 
 43 | RUN apt-get update -y \
 44 |  && apt-get install -y --no-install-recommends \
 45 |     unzip python3-dev libssl-dev pkg-config \
 46 |     && rm -rf /var/lib/apt/lists/* \
 47 |     && apt-get clean
 48 | 
 49 | RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
 50 |     curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
 51 |     unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
 52 |     unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \
 53 |     rm -f $PROTOC_ZIP
 54 | 
 55 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/Cargo.toml Cargo.toml
 56 | COPY --from=planner /usr/src/recipe.json recipe.json
 57 | RUN cargo chef cook --release --recipe-path recipe.json
 58 | 
 59 | COPY --from=tgi /tgi/Cargo.lock Cargo.lock
 60 | COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml
 61 | COPY --from=tgi /tgi/proto proto
 62 | COPY --from=tgi /tgi/router router
 63 | COPY --from=tgi /tgi/backends backends
 64 | COPY --from=tgi /tgi/launcher launcher
 65 | # Remove this line once TGI has fixed the conflict
 66 | RUN cargo update ureq --precise 2.9.7
 67 | RUN cargo build --release
 68 | 
 69 | # Python base image
 70 | FROM ubuntu:22.04 AS base
 71 | 
 72 | RUN apt-get update -y \
 73 |     && apt-get install -y --no-install-recommends \
 74 |     python3-pip \
 75 |     python3-setuptools \
 76 |     python-is-python3 \
 77 |     && rm -rf /var/lib/apt/lists/* \
 78 |     && apt-get clean
 79 | RUN pip3 --no-cache-dir install --upgrade pip
 80 | 
 81 | # Python server build image
 82 | FROM base AS pyserver
 83 | 
 84 | RUN apt-get update -y \
 85 |     && apt-get install -y --no-install-recommends \
 86 |     golang-go \
 87 |     make \
 88 |     python3-venv \
 89 |     && rm -rf /var/lib/apt/lists/* \
 90 |     && apt-get clean
 91 | 
 92 | RUN install -d /pyserver
 93 | WORKDIR /pyserver
 94 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/server server
 95 | COPY --from=tgi /tgi/proto proto
 96 | RUN pip3 install -r server/build-requirements.txt
 97 | RUN VERBOSE=1 BUILDDIR=/pyserver/build PROTODIR=/pyserver/proto make -C server gen-server
 98 | 
 99 | # Neuron base image (used for deployment)
100 | FROM base AS neuron
101 | 
102 | # Install system prerequisites
103 | RUN apt-get update -y \
104 |     && apt-get install -y --no-install-recommends \
105 |     gnupg2 \
106 |     wget \
107 |     python3-dev \
108 |     libexpat1 \
109 |     && rm -rf /var/lib/apt/lists/* \
110 |     && apt-get clean
111 | 
112 | RUN echo "deb https://apt.repos.neuron.amazonaws.com jammy main" > /etc/apt/sources.list.d/neuron.list
113 | RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add -
114 | 
115 | # Install neuronx packages
116 | RUN apt-get update -y \
117 |     && apt-get install -y --no-install-recommends \
118 |     aws-neuronx-dkms=2.18.20.0 \
119 |     aws-neuronx-collectives=2.22.33.0-d2128d1aa \
120 |     aws-neuronx-runtime-lib=2.22.19.0-5856c0b42 \
121 |     aws-neuronx-tools=2.19.0.0 \
122 |     libxml2 \
123 |     && rm -rf /var/lib/apt/lists/* \
124 |     && apt-get clean
125 | 
126 | ENV PATH="/opt/bin/:/opt/aws/neuron/bin:${PATH}"
127 | 
128 | RUN pip3 install \
129 |     neuronx-cc==2.15.143.0 \
130 |     torch-neuronx==2.1.2.2.3.2 \
131 |     transformers-neuronx==0.12.313 \
132 |     libneuronxla==2.0.5347.0 \
133 |     --extra-index-url=https://pip.repos.neuron.amazonaws.com
134 | 
135 | # Install HuggingFace packages
136 | RUN pip3 install \
137 |     hf_transfer huggingface_hub
138 | 
139 | # Install optimum-neuron
140 | COPY --from=optimum-neuron /optimum-neuron optimum-neuron
141 | RUN pip3 install ./optimum-neuron
142 | 
143 | # TGI base env
144 | ENV HUGGINGFACE_HUB_CACHE=/tmp \
145 |     HF_HUB_ENABLE_HF_TRANSFER=1 \
146 |     PORT=80
147 | 
148 | # Disable color logs as they are not supported by CloudWatch
149 | ENV LOGURU_COLORIZE=NO
150 | ENV LOG_COLORIZE=0
151 | 
152 | # Install router
153 | COPY --from=builder /usr/src/target/release/text-generation-router-v2 /usr/local/bin/text-generation-router
154 | # Install launcher
155 | COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher
156 | # Install python server
157 | COPY --from=pyserver /pyserver/build/dist dist
158 | RUN pip install dist/text_generation_server*.tar.gz
159 | 
160 | # AWS Sagemaker compatible image
161 | FROM neuron as sagemaker
162 | 
163 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/sagemaker-entrypoint.sh entrypoint.sh
164 | RUN chmod +x entrypoint.sh
165 | 
166 | ENTRYPOINT ["./entrypoint.sh"]
167 | 
168 | 
169 | RUN apt-get update && apt-get install -y --no-install-recommends curl unzip \
170 |     && rm -rf /var/lib/apt/lists/*
171 | RUN HOME_DIR=/root && \
172 |     pip install requests && \
173 |     curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip && \
174 |     unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ && \
175 |     cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance && \
176 |     chmod +x /usr/local/bin/testOSSCompliance && \
177 |     chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh && \
178 |     ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python && \
179 |     rm -rf ${HOME_DIR}/oss_compliance*
180 | 
181 | RUN echo "N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image \
182 |     has an indirect documentation dependency on third party <docutils/tools/editors/emacs/rst.el> project. The \
183 |     <docutils/tools/editors/emacs/rst.el> project's licensing includes the <GPL v3> license. \
184 |     \n\n\
185 |     N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image uses the \
186 |     third party <Text Generation Inference (TGI)> project. The <Text Generation Inference (TGI)> project's licensing \
187 |     includes the <HFOIL --> https://github.com/huggingface/text-generation-inference/blob/main/LICENSE> \
188 |     license." > /root/THIRD_PARTY_LICENSES
189 | 
190 | LABEL dlc_major_version="1"
191 | LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.huggingface.tgi="true"
192 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port="true"
193 | 


--------------------------------------------------------------------------------
/huggingface/pytorch/optimum/docker/0.0.28/Dockerfile:
--------------------------------------------------------------------------------
  1 | # Fetch and extract the TGI sources
  2 | FROM alpine AS tgi
  3 | RUN mkdir -p /tgi
  4 | ADD https://github.com/huggingface/text-generation-inference/archive/refs/tags/v3.0.0.tar.gz /tgi/sources.tar.gz
  5 | RUN tar -C /tgi -xf /tgi/sources.tar.gz --strip-components=1
  6 | 
  7 | # Fetch also the optimum-neuron sources that contain modified TGI sources
  8 | FROM alpine AS optimum-neuron
  9 | RUN mkdir -p /optimum-neuron
 10 | ADD https://github.com/huggingface/optimum-neuron/archive/refs/tags/v0.0.28.tar.gz /optimum-neuron/sources.tar.gz
 11 | RUN tar -C /optimum-neuron -xf /optimum-neuron/sources.tar.gz --strip-components=1
 12 | 
 13 | # Build cargo components (adapted from TGI original Dockerfile)
 14 | # Note: we cannot use the cargo-chef base image as it uses python 3.11
 15 | FROM ubuntu:22.04 AS chef
 16 | 
 17 | RUN apt-get update -y \
 18 |  && apt-get install -y --no-install-recommends \
 19 |     curl ca-certificates build-essential \
 20 |     && rm -rf /var/lib/apt/lists/* \
 21 |     && apt-get clean
 22 | 
 23 | RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- --default-toolchain 1.80.1 --profile minimal -y
 24 | ENV PATH="/root/.cargo/bin:${PATH}"
 25 | RUN cargo install cargo-chef --locked
 26 | 
 27 | WORKDIR /usr/src
 28 | 
 29 | ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
 30 | 
 31 | FROM chef AS planner
 32 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/Cargo.toml Cargo.toml
 33 | COPY --from=tgi /tgi/Cargo.lock Cargo.lock
 34 | COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml
 35 | COPY --from=tgi /tgi/proto proto
 36 | COPY --from=tgi /tgi/router router
 37 | COPY --from=tgi /tgi/backends backends
 38 | COPY --from=tgi /tgi/launcher launcher
 39 | RUN cargo chef prepare --recipe-path recipe.json
 40 | 
 41 | FROM chef AS builder
 42 | 
 43 | RUN apt-get update -y \
 44 |  && apt-get install -y --no-install-recommends \
 45 |     unzip python3-dev libssl-dev pkg-config \
 46 |     && rm -rf /var/lib/apt/lists/* \
 47 |     && apt-get clean
 48 | 
 49 | RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
 50 |     curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
 51 |     unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
 52 |     unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \
 53 |     rm -f $PROTOC_ZIP
 54 | 
 55 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/Cargo.toml Cargo.toml
 56 | COPY --from=planner /usr/src/recipe.json recipe.json
 57 | RUN cargo chef cook --release --recipe-path recipe.json
 58 | 
 59 | COPY --from=tgi /tgi/Cargo.lock Cargo.lock
 60 | COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml
 61 | COPY --from=tgi /tgi/proto proto
 62 | COPY --from=tgi /tgi/router router
 63 | COPY --from=tgi /tgi/backends backends
 64 | COPY --from=tgi /tgi/launcher launcher
 65 | # Remove this line once TGI has fixed the conflict
 66 | RUN cargo update ureq --precise 2.9.7
 67 | RUN cargo build --release
 68 | 
 69 | # Python base image
 70 | FROM ubuntu:22.04 AS base
 71 | 
 72 | RUN apt-get update -y \
 73 |     && apt-get install -y --no-install-recommends \
 74 |     python3-pip \
 75 |     python3-setuptools \
 76 |     python-is-python3 \
 77 |     && rm -rf /var/lib/apt/lists/* \
 78 |     && apt-get clean
 79 | RUN pip3 --no-cache-dir install --upgrade pip
 80 | 
 81 | # Python server build image
 82 | FROM base AS pyserver
 83 | 
 84 | RUN apt-get update -y \
 85 |     && apt-get install -y --no-install-recommends \
 86 |     make \
 87 |     python3-venv \
 88 |     && rm -rf /var/lib/apt/lists/* \
 89 |     && apt-get clean
 90 | 
 91 | RUN install -d /pyserver
 92 | WORKDIR /pyserver
 93 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/server server
 94 | COPY --from=tgi /tgi/proto proto
 95 | RUN pip3 install -r server/build-requirements.txt
 96 | RUN VERBOSE=1 BUILDDIR=/pyserver/build PROTODIR=/pyserver/proto make -C server gen-server
 97 | 
 98 | # Neuron base image (used for deployment)
 99 | FROM base AS neuron
100 | 
101 | # Install system prerequisites
102 | RUN apt-get update -y \
103 |     && apt-get install -y --no-install-recommends \
104 |     gnupg2 \
105 |     wget \
106 |     python3-dev \
107 |     libexpat1 \
108 |     && rm -rf /var/lib/apt/lists/* \
109 |     && apt-get clean
110 | 
111 | RUN echo "deb https://apt.repos.neuron.amazonaws.com jammy main" > /etc/apt/sources.list.d/neuron.list
112 | RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add -
113 | 
114 | # Install neuronx packages
115 | RUN apt-get update -y \
116 |     && apt-get install -y --no-install-recommends \
117 |     aws-neuronx-dkms=2.18.20.0 \
118 |     aws-neuronx-collectives=2.22.33.0-d2128d1aa \
119 |     aws-neuronx-runtime-lib=2.22.19.0-5856c0b42 \
120 |     aws-neuronx-tools=2.19.0.0 \
121 |     libxml2 \
122 |     && rm -rf /var/lib/apt/lists/* \
123 |     && apt-get clean
124 | 
125 | ENV PATH="/opt/bin/:/opt/aws/neuron/bin:${PATH}"
126 | 
127 | RUN pip3 install \
128 |     neuronx-cc==2.15.143.0 \
129 |     torch-neuronx==2.1.2.2.3.2 \
130 |     transformers-neuronx==0.12.313 \
131 |     libneuronxla==2.0.5347.0 \
132 |     --extra-index-url=https://pip.repos.neuron.amazonaws.com
133 | 
134 | # Install HuggingFace packages
135 | RUN pip3 install \
136 |     hf_transfer huggingface_hub
137 | 
138 | # Install optimum-neuron
139 | COPY --from=optimum-neuron /optimum-neuron optimum-neuron
140 | RUN pip3 install ./optimum-neuron
141 | 
142 | # TGI base env
143 | ENV HUGGINGFACE_HUB_CACHE=/tmp \
144 |     HF_HUB_ENABLE_HF_TRANSFER=1 \
145 |     PORT=80
146 | 
147 | # Disable color logs as they are not supported by CloudWatch
148 | ENV LOGURU_COLORIZE=NO
149 | ENV LOG_COLORIZE=0
150 | 
151 | # Install router
152 | COPY --from=builder /usr/src/target/release/text-generation-router-v2 /usr/local/bin/text-generation-router
153 | # Install launcher
154 | COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher
155 | # Install python server
156 | COPY --from=pyserver /pyserver/build/dist dist
157 | RUN pip install dist/text_generation_server*.tar.gz
158 | 
159 | # AWS Sagemaker compatible image
160 | FROM neuron as sagemaker
161 | 
162 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/sagemaker-entrypoint.sh entrypoint.sh
163 | RUN chmod +x entrypoint.sh
164 | 
165 | ENTRYPOINT ["./entrypoint.sh"]
166 | 
167 | 
168 | RUN apt-get update && apt-get install -y --no-install-recommends curl unzip \
169 |     && rm -rf /var/lib/apt/lists/*
170 | RUN HOME_DIR=/root && \
171 |     pip install requests && \
172 |     curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip && \
173 |     unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ && \
174 |     cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance && \
175 |     chmod +x /usr/local/bin/testOSSCompliance && \
176 |     chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh && \
177 |     ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python && \
178 |     rm -rf ${HOME_DIR}/oss_compliance*
179 | 
180 | RUN echo "N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image \
181 |     has an indirect documentation dependency on third party <docutils/tools/editors/emacs/rst.el> project. The \
182 |     <docutils/tools/editors/emacs/rst.el> project's licensing includes the <GPL v3> license. \
183 |     \n\n\
184 |     N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image uses the \
185 |     third party <Text Generation Inference (TGI)> project. The <Text Generation Inference (TGI)> project's licensing \
186 |     includes the <HFOIL --> https://github.com/huggingface/text-generation-inference/blob/main/LICENSE> \
187 |     license." > /root/THIRD_PARTY_LICENSES
188 | 
189 | LABEL dlc_major_version="1"
190 | LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.huggingface.tgi="true"
191 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port="true"
192 | 


--------------------------------------------------------------------------------
/huggingface/pytorch/tei/docker/1.2.3/cpu/Dockerfile:
--------------------------------------------------------------------------------
  1 | FROM lukemathwalker/cargo-chef:latest-rust-1.75-bookworm AS chef
  2 | WORKDIR /usr/src
  3 | 
  4 | ENV SCCACHE=0.5.4
  5 | ENV RUSTC_WRAPPER=/usr/local/bin/sccache
  6 | 
  7 | # Donwload and configure sccache
  8 | RUN curl -fsSL https://github.com/mozilla/sccache/releases/download/v$SCCACHE/sccache-v$SCCACHE-x86_64-unknown-linux-musl.tar.gz | tar -xzv --strip-components=1 -C /usr/local/bin sccache-v$SCCACHE-x86_64-unknown-linux-musl/sccache && \
  9 |     chmod +x /usr/local/bin/sccache
 10 | 
 11 | FROM chef AS planner
 12 | 
 13 | COPY backends backends
 14 | COPY core core
 15 | COPY router router
 16 | COPY Cargo.toml ./
 17 | COPY Cargo.lock ./
 18 | 
 19 | RUN cargo chef prepare  --recipe-path recipe.json
 20 | 
 21 | FROM chef AS builder
 22 | 
 23 | ARG GIT_SHA
 24 | ARG DOCKER_LABEL
 25 | 
 26 | # sccache specific variables
 27 | ARG ACTIONS_CACHE_URL
 28 | ARG ACTIONS_RUNTIME_TOKEN
 29 | ARG SCCACHE_GHA_ENABLED
 30 | 
 31 | RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
 32 | | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && \
 33 |   echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | \
 34 |   tee /etc/apt/sources.list.d/oneAPI.list
 35 | 
 36 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
 37 |     intel-oneapi-mkl-devel=2024.0.0-49656 \
 38 |     build-essential \
 39 |     && rm -rf /var/lib/apt/lists/*
 40 | 
 41 | RUN echo "int mkl_serv_intel_cpu_true() {return 1;}" > fakeintel.c && \
 42 |     gcc -shared -fPIC -o libfakeintel.so fakeintel.c
 43 | 
 44 | COPY --from=planner /usr/src/recipe.json recipe.json
 45 | 
 46 | RUN cargo chef cook --release --features candle --features mkl-dynamic --no-default-features --recipe-path recipe.json && sccache -s
 47 | 
 48 | COPY backends backends
 49 | COPY core core
 50 | COPY router router
 51 | COPY Cargo.toml ./
 52 | COPY Cargo.lock ./
 53 | 
 54 | FROM builder as http-builder
 55 | 
 56 | RUN cargo build --release --bin text-embeddings-router -F candle -F mkl-dynamic -F http --no-default-features && sccache -s
 57 | 
 58 | FROM builder as grpc-builder
 59 | 
 60 | RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
 61 |     curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
 62 |     unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
 63 |     unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \
 64 |     rm -f $PROTOC_ZIP
 65 | 
 66 | COPY proto proto
 67 | 
 68 | RUN cargo build --release --bin text-embeddings-router -F grpc -F candle -F mkl-dynamic --no-default-features && sccache -s
 69 | 
 70 | FROM debian:bookworm-slim as base
 71 | 
 72 | ENV HUGGINGFACE_HUB_CACHE=/data \
 73 |     PORT=80 \
 74 |     MKL_ENABLE_INSTRUCTIONS=AVX512_E4 \
 75 |     RAYON_NUM_THREADS=8 \
 76 |     LD_PRELOAD=/usr/local/libfakeintel.so \
 77 |     LD_LIBRARY_PATH=/usr/local/lib
 78 | 
 79 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
 80 |     libomp-dev \
 81 |     ca-certificates \
 82 |     libssl-dev \
 83 |     curl \
 84 |     && rm -rf /var/lib/apt/lists/*
 85 | 
 86 | # Copy a lot of the Intel shared objects because of the mkl_serv_intel_cpu_true patch...
 87 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_intel_lp64.so.2 /usr/local/lib/libmkl_intel_lp64.so.2
 88 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_intel_thread.so.2 /usr/local/lib/libmkl_intel_thread.so.2
 89 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_core.so.2 /usr/local/lib/libmkl_core.so.2
 90 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_vml_def.so.2 /usr/local/lib/libmkl_vml_def.so.2
 91 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_def.so.2 /usr/local/lib/libmkl_def.so.2
 92 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_vml_avx2.so.2 /usr/local/lib/libmkl_vml_avx2.so.2
 93 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_vml_avx512.so.2 /usr/local/lib/libmkl_vml_avx512.so.2
 94 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_avx2.so.2 /usr/local/lib/libmkl_avx2.so.2
 95 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_avx512.so.2 /usr/local/lib/libmkl_avx512.so.2
 96 | COPY --from=builder /usr/src/libfakeintel.so /usr/local/libfakeintel.so
 97 | 
 98 | FROM base as grpc
 99 | 
100 | COPY --from=grpc-builder /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router
101 | 
102 | ENTRYPOINT ["text-embeddings-router"]
103 | CMD ["--json-output"]
104 | 
105 | FROM base AS http
106 | 
107 | COPY --from=http-builder /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router
108 | 
109 | # Amazon SageMaker compatible image
110 | FROM http as sagemaker
111 | COPY --chmod=775 sagemaker-entrypoint.sh entrypoint.sh
112 | 
113 | ENTRYPOINT ["./entrypoint.sh"]
114 | 
115 | # Default image
116 | FROM http
117 | 
118 | ENTRYPOINT ["text-embeddings-router"]
119 | CMD ["--json-output"]


--------------------------------------------------------------------------------
/huggingface/pytorch/tei/docker/1.2.3/gpu/Dockerfile:
--------------------------------------------------------------------------------
  1 | # Use the official NVIDIA CUDA base image
  2 | FROM nvidia/cuda:12.2.0-devel-ubuntu22.04 AS base-builder
  3 | 
  4 | ENV SCCACHE=0.5.4
  5 | ENV RUSTC_WRAPPER=/usr/local/bin/sccache
  6 | ENV PATH="/root/.cargo/bin:${PATH}"
  7 | 
  8 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
  9 |     curl \
 10 |     libssl-dev \
 11 |     pkg-config \
 12 |     wget \
 13 |     gnupg2
 14 | 
 15 | # Download and configure sccache
 16 | RUN curl -fsSL https://github.com/mozilla/sccache/releases/download/v$SCCACHE/sccache-v$SCCACHE-x86_64-unknown-linux-musl.tar.gz | tar -xzv --strip-components=1 -C /usr/local/bin sccache-v$SCCACHE-x86_64-unknown-linux-musl/sccache && \
 17 |     chmod +x /usr/local/bin/sccache
 18 | 
 19 | RUN curl https://sh.rustup.rs -sSf | bash -s -- -y
 20 | RUN cargo install cargo-chef --locked
 21 | 
 22 | FROM base-builder AS planner
 23 | 
 24 | WORKDIR /usr/src
 25 | 
 26 | COPY backends backends
 27 | COPY core core
 28 | COPY router router
 29 | COPY Cargo.toml ./
 30 | COPY Cargo.lock ./
 31 | 
 32 | RUN cargo chef prepare  --recipe-path recipe.json
 33 | 
 34 | FROM base-builder AS builder
 35 | 
 36 | ARG GIT_SHA
 37 | ARG DOCKER_LABEL
 38 | ARG VERTEX="false"
 39 | 
 40 | # sccache specific variables
 41 | ARG ACTIONS_CACHE_URL
 42 | ARG ACTIONS_RUNTIME_TOKEN
 43 | ARG SCCACHE_GHA_ENABLED
 44 | 
 45 | # limit the number of kernels built at the same time
 46 | ARG RAYON_NUM_THREADS=4
 47 | 
 48 | WORKDIR /usr/src
 49 | 
 50 | COPY --from=planner /usr/src/recipe.json recipe.json
 51 | 
 52 | RUN if [ $VERTEX = "true" ]; \
 53 |     then \
 54 |       cargo chef cook --release --features google --recipe-path recipe.json && sccache -s; \
 55 |     else \
 56 |       cargo chef cook --release --recipe-path recipe.json && sccache -s; \
 57 |     fi;
 58 | 
 59 | RUN if [ $VERTEX = "true" ]; \
 60 |     then \
 61 |       CUDA_COMPUTE_CAP=75 cargo chef cook --release --features google --features candle-cuda-turing --recipe-path recipe.json && sccache -s; \
 62 |     else \
 63 |       CUDA_COMPUTE_CAP=75 cargo chef cook --release --features candle-cuda-turing --recipe-path recipe.json && sccache -s; \
 64 |     fi;
 65 | 
 66 | RUN if [ $VERTEX = "true" ]; \
 67 |     then \
 68 |       CUDA_COMPUTE_CAP=80 cargo chef cook --release --features google --features candle-cuda --recipe-path recipe.json && sccache -s; \
 69 |     else \
 70 |       CUDA_COMPUTE_CAP=80 cargo chef cook --release --features candle-cuda --recipe-path recipe.json && sccache -s; \
 71 |     fi;
 72 | 
 73 | RUN if [ $VERTEX = "true" ]; \
 74 |     then \
 75 |       CUDA_COMPUTE_CAP=90 cargo chef cook --release --features google --features candle-cuda --recipe-path recipe.json && sccache -s; \
 76 |     else \
 77 |       CUDA_COMPUTE_CAP=90 cargo chef cook --release --features candle-cuda --recipe-path recipe.json && sccache -s; \
 78 |     fi;
 79 | 
 80 | COPY backends backends
 81 | COPY core core
 82 | COPY router router
 83 | COPY Cargo.toml ./
 84 | COPY Cargo.lock ./
 85 | 
 86 | RUN if [ $VERTEX = "true" ]; \
 87 |     then \
 88 |         CUDA_COMPUTE_CAP=75 cargo build --release --bin text-embeddings-router -F candle-cuda-turing -F google  && sccache -s; \
 89 |     else \
 90 |         CUDA_COMPUTE_CAP=75 cargo build --release --bin text-embeddings-router -F candle-cuda-turing && sccache -s; \
 91 |     fi;
 92 | 
 93 | RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-75
 94 | 
 95 | RUN if [ $VERTEX = "true" ]; \
 96 |     then \
 97 |         CUDA_COMPUTE_CAP=80 cargo build --release --bin text-embeddings-router -F candle-cuda -F google  && sccache -s; \
 98 |     else \
 99 |         CUDA_COMPUTE_CAP=80 cargo build --release --bin text-embeddings-router -F candle-cuda && sccache -s; \
100 |     fi;
101 | 
102 | RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-80
103 | 
104 | RUN if [ $VERTEX = "true" ]; \
105 |     then \
106 |         CUDA_COMPUTE_CAP=90 cargo build --release --bin text-embeddings-router -F candle-cuda -F google  && sccache -s; \
107 |     else \
108 |         CUDA_COMPUTE_CAP=90 cargo build --release --bin text-embeddings-router -F candle-cuda && sccache -s; \
109 |     fi;
110 | 
111 | RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-90
112 | 
113 | FROM nvidia/cuda:12.2.0-runtime-ubuntu22.04 as base
114 | 
115 | ARG DEFAULT_USE_FLASH_ATTENTION=True
116 | 
117 | ENV HUGGINGFACE_HUB_CACHE=/data \
118 |     PORT=80 \
119 |     USE_FLASH_ATTENTION=$DEFAULT_USE_FLASH_ATTENTION
120 | 
121 | # Install nvidia-smi and other necessary utilities
122 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
123 |     wget \
124 |     libtemplate-perl \
125 |     perl
126 | 
127 | COPY --from=builder /usr/src/target/release/text-embeddings-router-75 /usr/local/bin/text-embeddings-router-75
128 | COPY --from=builder /usr/src/target/release/text-embeddings-router-80 /usr/local/bin/text-embeddings-router-80
129 | COPY --from=builder /usr/src/target/release/text-embeddings-router-90 /usr/local/bin/text-embeddings-router-90
130 | 
131 | # Amazon SageMaker compatible image
132 | FROM base AS sagemaker
133 | 
134 | COPY --chmod=775 /huggingface/pytorch/tei/docker/1.2.3/gpu/sagemaker-entrypoint-cuda-all.sh entrypoint.sh
135 | 
136 | ENTRYPOINT ["./entrypoint.sh"]
137 | 
138 | # Default image
139 | FROM base
140 | 
141 | COPY --chmod=775 cuda-all-entrypoint.sh entrypoint.sh
142 | 
143 | ENTRYPOINT ["./entrypoint.sh"]
144 | CMD ["--json-output"]


--------------------------------------------------------------------------------
/huggingface/pytorch/tei/docker/1.2.3/gpu/sagemaker-entrypoint-cuda-all.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | verlte() {
 4 |     [ "$1" = "$2" ] && return 1 || [ "$2" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ]
 5 | }
 6 | 
 7 | if [ -f /usr/local/cuda/compat/libcuda.so.1 ]; then
 8 |     CUDA_COMPAT_MAX_DRIVER_VERSION=$(readlink /usr/local/cuda/compat/libcuda.so.1 | cut -d"." -f 3-)
 9 |     echo "CUDA compat package requires Nvidia driver ≤${CUDA_COMPAT_MAX_DRIVER_VERSION}"
10 |     cat /proc/driver/nvidia/version
11 |     NVIDIA_DRIVER_VERSION=$(sed -n 's/^NVRM.*Kernel Module *\([0-9.]*\).*$/\1/p' /proc/driver/nvidia/version 2>/dev/null || true)
12 |     echo "Current installed Nvidia driver version is ${NVIDIA_DRIVER_VERSION}"
13 |     if [ $(verlte "$CUDA_COMPAT_MAX_DRIVER_VERSION" "$NVIDIA_DRIVER_VERSION") ]; then
14 |         echo "Setup CUDA compatibility libs path to LD_LIBRARY_PATH"
15 |         export LD_LIBRARY_PATH=/usr/local/cuda/compat:$LD_LIBRARY_PATH
16 |         echo $LD_LIBRARY_PATH
17 |     else
18 |         echo "Skip CUDA compat libs setup as newer Nvidia driver is installed"
19 |     fi
20 | else
21 |     echo "Skip CUDA compat libs setup as package not found"
22 | fi
23 | 
24 | if [[ -z "${HF_MODEL_ID}" ]]; then
25 |   echo "HF_MODEL_ID must be set"
26 |   exit 1
27 | fi
28 | 
29 | export MODEL_ID="${HF_MODEL_ID}"
30 | 
31 | if [[ -n "${HF_MODEL_REVISION}" ]]; then
32 |   export REVISION="${HF_MODEL_REVISION}"
33 | fi
34 | 
35 | if ! command -v nvidia-smi &> /dev/null; then
36 |     echo "Error: 'nvidia-smi' command not found."
37 |     exit 1
38 | fi
39 | 
40 | # Query GPU name using nvidia-smi
41 | gpu_name=$(nvidia-smi --query-gpu=gpu_name --format=csv | awk 'NR==2')
42 | if [ $? -ne 0 ]; then
43 |     echo "Error: $gpu_name"
44 |     echo "Query gpu_name failed"
45 | else
46 |     echo "Query gpu_name succeeded. Printing output: $gpu_name"
47 | fi
48 | 
49 | # Function to get compute capability based on GPU name
50 | get_compute_cap() {
51 |     gpu_name="$1"
52 | 
53 |     # Check if the GPU name contains "A10G"
54 |     if [[ "$gpu_name" == *"A10G"* ]]; then
55 |         echo "86"
56 |     # Check if the GPU name contains "A100"
57 |     elif [[ "$gpu_name" == *"A100"* ]]; then
58 |         echo "80"
59 |     # Check if the GPU name contains "H100"
60 |     elif [[ "$gpu_name" == *"H100"* ]]; then
61 |         echo "90"
62 |     # Cover Nvidia T4
63 |     elif [[ "$gpu_name" == *"T4"* ]]; then
64 |         echo "75"
65 |     # Cover Nvidia L4
66 |     elif [[ "$gpu_name" == *"L4"* ]]; then
67 |         echo "89"
68 |     else
69 |         echo "80"  # Default compute capability
70 |     fi
71 | }
72 | 
73 | if [[ -z "${CUDA_COMPUTE_CAP}" ]]
74 | then
75 |     compute_cap=$(get_compute_cap "$gpu_name")
76 |     echo "the compute_cap is $compute_cap"
77 | else
78 |     compute_cap=$CUDA_COMPUTE_CAP
79 | fi
80 | 
81 | if [[ ${compute_cap} -eq 75 ]]
82 | then
83 |     text-embeddings-router-75 --port 8080 --json-output
84 | elif [[ ${compute_cap} -ge 80 && ${compute_cap} -lt 90 ]]
85 | then
86 |     text-embeddings-router-80 --port 8080 --json-output
87 | elif [[ ${compute_cap} -eq 90 ]]
88 | then
89 |     text-embeddings-router-90 --port 8080 --json-output
90 | else
91 |     echo "cuda compute cap ${compute_cap} is not supported"; exit 1
92 | fi


--------------------------------------------------------------------------------
/huggingface/pytorch/tei/docker/1.4.0/cpu/Dockerfile:
--------------------------------------------------------------------------------
  1 | FROM lukemathwalker/cargo-chef:latest-rust-1.75-bookworm AS chef
  2 | WORKDIR /usr/src
  3 | 
  4 | ENV SCCACHE=0.5.4
  5 | ENV RUSTC_WRAPPER=/usr/local/bin/sccache
  6 | 
  7 | # Donwload, configure sccache
  8 | RUN curl -fsSL https://github.com/mozilla/sccache/releases/download/v$SCCACHE/sccache-v$SCCACHE-x86_64-unknown-linux-musl.tar.gz | tar -xzv --strip-components=1 -C /usr/local/bin sccache-v$SCCACHE-x86_64-unknown-linux-musl/sccache && \
  9 |     chmod +x /usr/local/bin/sccache
 10 | 
 11 | FROM chef AS planner
 12 | 
 13 | COPY backends backends
 14 | COPY core core
 15 | COPY router router
 16 | COPY Cargo.toml ./
 17 | COPY Cargo.lock ./
 18 | 
 19 | RUN cargo chef prepare  --recipe-path recipe.json
 20 | 
 21 | FROM chef AS builder
 22 | 
 23 | ARG GIT_SHA
 24 | ARG DOCKER_LABEL
 25 | 
 26 | # sccache specific variables
 27 | ARG ACTIONS_CACHE_URL
 28 | ARG ACTIONS_RUNTIME_TOKEN
 29 | ARG SCCACHE_GHA_ENABLED
 30 | 
 31 | RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
 32 |     | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && \
 33 |     echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | \
 34 |     tee /etc/apt/sources.list.d/oneAPI.list
 35 | 
 36 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
 37 |     intel-oneapi-mkl-devel=2024.0.0-49656 \
 38 |     build-essential \
 39 |     && rm -rf /var/lib/apt/lists/*
 40 | 
 41 | RUN echo "int mkl_serv_intel_cpu_true() {return 1;}" > fakeintel.c && \
 42 |     gcc -shared -fPIC -o libfakeintel.so fakeintel.c
 43 | 
 44 | COPY --from=planner /usr/src/recipe.json recipe.json
 45 | 
 46 | RUN cargo chef cook --release --features candle --features mkl-dynamic --no-default-features --recipe-path recipe.json && sccache -s
 47 | 
 48 | COPY backends backends
 49 | COPY core core
 50 | COPY router router
 51 | COPY Cargo.toml ./
 52 | COPY Cargo.lock ./
 53 | 
 54 | FROM builder as http-builder
 55 | 
 56 | RUN cargo build --release --bin text-embeddings-router -F candle -F mkl-dynamic -F http --no-default-features && sccache -s
 57 | 
 58 | FROM builder as grpc-builder
 59 | 
 60 | RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
 61 |     curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
 62 |     unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
 63 |     unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \
 64 |     rm -f $PROTOC_ZIP
 65 | 
 66 | COPY proto proto
 67 | 
 68 | RUN cargo build --release --bin text-embeddings-router -F grpc -F candle -F mkl-dynamic --no-default-features && sccache -s
 69 | 
 70 | FROM debian:bookworm-slim as base
 71 | 
 72 | ENV HUGGINGFACE_HUB_CACHE=/data \
 73 |     PORT=80 \
 74 |     MKL_ENABLE_INSTRUCTIONS=AVX512_E4 \
 75 |     RAYON_NUM_THREADS=8 \
 76 |     LD_PRELOAD=/usr/local/libfakeintel.so \
 77 |     LD_LIBRARY_PATH=/usr/local/lib
 78 | 
 79 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
 80 |     libomp-dev \
 81 |     ca-certificates \
 82 |     libssl-dev \
 83 |     curl \
 84 |     && rm -rf /var/lib/apt/lists/*
 85 | 
 86 | # Copy a lot of the Intel shared objects because of the mkl_serv_intel_cpu_true patch...
 87 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_intel_lp64.so.2 /usr/local/lib/libmkl_intel_lp64.so.2
 88 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_intel_thread.so.2 /usr/local/lib/libmkl_intel_thread.so.2
 89 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_core.so.2 /usr/local/lib/libmkl_core.so.2
 90 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_vml_def.so.2 /usr/local/lib/libmkl_vml_def.so.2
 91 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_def.so.2 /usr/local/lib/libmkl_def.so.2
 92 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_vml_avx2.so.2 /usr/local/lib/libmkl_vml_avx2.so.2
 93 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_vml_avx512.so.2 /usr/local/lib/libmkl_vml_avx512.so.2
 94 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_avx2.so.2 /usr/local/lib/libmkl_avx2.so.2
 95 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_avx512.so.2 /usr/local/lib/libmkl_avx512.so.2
 96 | COPY --from=builder /usr/src/libfakeintel.so /usr/local/libfakeintel.so
 97 | 
 98 | FROM base as grpc
 99 | 
100 | COPY --from=grpc-builder /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router
101 | 
102 | ENTRYPOINT ["text-embeddings-router"]
103 | CMD ["--json-output"]
104 | 
105 | FROM base AS http
106 | 
107 | COPY --from=http-builder /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router
108 | 
109 | # Amazon SageMaker compatible image
110 | FROM http as sagemaker
111 | COPY --chmod=775 sagemaker-entrypoint.sh entrypoint.sh
112 | 
113 | ENTRYPOINT ["./entrypoint.sh"]


--------------------------------------------------------------------------------
/huggingface/pytorch/tei/docker/1.4.0/gpu/Dockerfile:
--------------------------------------------------------------------------------
  1 | FROM nvidia/cuda:12.2.0-devel-ubuntu22.04 AS base-builder
  2 | 
  3 | ENV SCCACHE=0.5.4
  4 | ENV RUSTC_WRAPPER=/usr/local/bin/sccache
  5 | ENV PATH="/root/.cargo/bin:${PATH}"
  6 | 
  7 | RUN apt-get update && apt-get upgrade -y && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
  8 |     curl \
  9 |     libssl-dev \
 10 |     pkg-config \
 11 |     perl \
 12 |     && rm -rf /var/lib/apt/lists/*
 13 | 
 14 | # Donwload and configure sccache
 15 | RUN curl -fsSL https://github.com/mozilla/sccache/releases/download/v$SCCACHE/sccache-v$SCCACHE-x86_64-unknown-linux-musl.tar.gz | tar -xzv --strip-components=1 -C /usr/local/bin sccache-v$SCCACHE-x86_64-unknown-linux-musl/sccache && \
 16 |     chmod +x /usr/local/bin/sccache
 17 | 
 18 | RUN curl https://sh.rustup.rs -sSf | bash -s -- -y
 19 | RUN cargo install cargo-chef --locked
 20 | 
 21 | FROM base-builder AS planner
 22 | 
 23 | WORKDIR /usr/src
 24 | 
 25 | COPY backends backends
 26 | COPY core core
 27 | COPY router router
 28 | COPY Cargo.toml ./
 29 | COPY Cargo.lock ./
 30 | 
 31 | RUN cargo chef prepare  --recipe-path recipe.json
 32 | 
 33 | FROM base-builder AS builder
 34 | 
 35 | ARG GIT_SHA
 36 | ARG DOCKER_LABEL
 37 | ARG VERTEX="false"
 38 | 
 39 | # sccache specific variables
 40 | ARG ACTIONS_CACHE_URL
 41 | ARG ACTIONS_RUNTIME_TOKEN
 42 | ARG SCCACHE_GHA_ENABLED
 43 | 
 44 | # Limit parallelism
 45 | ARG RAYON_NUM_THREADS=4
 46 | ARG CARGO_BUILD_JOBS
 47 | ARG CARGO_BUILD_INCREMENTAL
 48 | 
 49 | WORKDIR /usr/src
 50 | 
 51 | COPY --from=planner /usr/src/recipe.json recipe.json
 52 | 
 53 | RUN if [ $VERTEX = "true" ]; \
 54 |     then \
 55 |     cargo chef cook --release --features google --recipe-path recipe.json && sccache -s; \
 56 |     else \
 57 |     cargo chef cook --release --recipe-path recipe.json && sccache -s; \
 58 |     fi;
 59 | 
 60 | RUN if [ $VERTEX = "true" ]; \
 61 |     then \
 62 |     CUDA_COMPUTE_CAP=75 cargo chef cook --release --features google --features candle-cuda-turing --recipe-path recipe.json && sccache -s; \
 63 |     else \
 64 |     CUDA_COMPUTE_CAP=75 cargo chef cook --release --features candle-cuda-turing --recipe-path recipe.json && sccache -s; \
 65 |     fi;
 66 | 
 67 | RUN if [ $VERTEX = "true" ]; \
 68 |     then \
 69 |     CUDA_COMPUTE_CAP=80 cargo chef cook --release --features google --features candle-cuda --recipe-path recipe.json && sccache -s; \
 70 |     else \
 71 |     CUDA_COMPUTE_CAP=80 cargo chef cook --release --features candle-cuda --recipe-path recipe.json && sccache -s; \
 72 |     fi;
 73 | 
 74 | RUN if [ $VERTEX = "true" ]; \
 75 |     then \
 76 |     CUDA_COMPUTE_CAP=90 cargo chef cook --release --features google --features candle-cuda --recipe-path recipe.json && sccache -s; \
 77 |     else \
 78 |     CUDA_COMPUTE_CAP=90 cargo chef cook --release --features candle-cuda --recipe-path recipe.json && sccache -s; \
 79 |     fi;
 80 | 
 81 | COPY backends backends
 82 | COPY core core
 83 | COPY router router
 84 | COPY Cargo.toml ./
 85 | COPY Cargo.lock ./
 86 | 
 87 | RUN if [ $VERTEX = "true" ]; \
 88 |     then \
 89 |     CUDA_COMPUTE_CAP=75 cargo build --release --bin text-embeddings-router -F candle-cuda-turing -F google  && sccache -s; \
 90 |     else \
 91 |     CUDA_COMPUTE_CAP=75 cargo build --release --bin text-embeddings-router -F candle-cuda-turing && sccache -s; \
 92 |     fi;
 93 | 
 94 | RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-75
 95 | 
 96 | RUN if [ $VERTEX = "true" ]; \
 97 |     then \
 98 |     CUDA_COMPUTE_CAP=80 cargo build --release --bin text-embeddings-router -F candle-cuda -F google  && sccache -s; \
 99 |     else \
100 |     CUDA_COMPUTE_CAP=80 cargo build --release --bin text-embeddings-router -F candle-cuda && sccache -s; \
101 |     fi;
102 | 
103 | RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-80
104 | 
105 | RUN if [ $VERTEX = "true" ]; \
106 |     then \
107 |     CUDA_COMPUTE_CAP=90 cargo build --release --bin text-embeddings-router -F candle-cuda -F google  && sccache -s; \
108 |     else \
109 |     CUDA_COMPUTE_CAP=90 cargo build --release --bin text-embeddings-router -F candle-cuda && sccache -s; \
110 |     fi;
111 | 
112 | RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-90
113 | 
114 | FROM nvidia/cuda:12.2.0-runtime-ubuntu22.04 as base
115 | 
116 | ARG DEFAULT_USE_FLASH_ATTENTION=True
117 | 
118 | ENV HUGGINGFACE_HUB_CACHE=/data \
119 |     PORT=80 \
120 |     USE_FLASH_ATTENTION=$DEFAULT_USE_FLASH_ATTENTION
121 | 
122 | # Something between this and the above apt-get upgrade has a conflicting dependency that overrides the previous
123 | # upgrade. This needs further investigation, but patching this for now to fix the CVE
124 | RUN apt-get update && apt-get upgrade -y && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
125 |     libgssapi-krb5-2 \
126 |     && rm -rf /var/lib/apt/lists/*
127 | 
128 | COPY --from=builder /usr/src/target/release/text-embeddings-router-75 /usr/local/bin/text-embeddings-router-75
129 | COPY --from=builder /usr/src/target/release/text-embeddings-router-80 /usr/local/bin/text-embeddings-router-80
130 | COPY --from=builder /usr/src/target/release/text-embeddings-router-90 /usr/local/bin/text-embeddings-router-90
131 | 
132 | # Amazon SageMaker compatible image
133 | FROM base AS sagemaker
134 | 
135 | COPY --chmod=775 sagemaker-entrypoint-cuda-all.sh entrypoint.sh
136 | 
137 | ENTRYPOINT ["./entrypoint.sh"]
138 | 
139 | # Default image
140 | FROM base
141 | 
142 | COPY --chmod=775 cuda-all-entrypoint.sh entrypoint.sh
143 | 
144 | ENTRYPOINT ["./entrypoint.sh"]
145 | CMD ["--json-output"]


--------------------------------------------------------------------------------
/huggingface/pytorch/tei/docker/1.6.0/cpu/Dockerfile:
--------------------------------------------------------------------------------
  1 | FROM lukemathwalker/cargo-chef:latest-rust-1.75-bookworm AS chef
  2 | WORKDIR /usr/src
  3 | 
  4 | ENV SCCACHE=0.5.4
  5 | ENV RUSTC_WRAPPER=/usr/local/bin/sccache
  6 | 
  7 | # Donwload, configure sccache
  8 | RUN curl -fsSL https://github.com/mozilla/sccache/releases/download/v$SCCACHE/sccache-v$SCCACHE-x86_64-unknown-linux-musl.tar.gz | tar -xzv --strip-components=1 -C /usr/local/bin sccache-v$SCCACHE-x86_64-unknown-linux-musl/sccache && \
  9 |     chmod +x /usr/local/bin/sccache
 10 | 
 11 | FROM chef AS planner
 12 | 
 13 | COPY backends backends
 14 | COPY core core
 15 | COPY router router
 16 | COPY Cargo.toml ./
 17 | COPY Cargo.lock ./
 18 | 
 19 | RUN cargo chef prepare  --recipe-path recipe.json
 20 | 
 21 | FROM chef AS builder
 22 | 
 23 | ARG GIT_SHA
 24 | ARG DOCKER_LABEL
 25 | 
 26 | # sccache specific variables
 27 | ARG ACTIONS_CACHE_URL
 28 | ARG ACTIONS_RUNTIME_TOKEN
 29 | ARG SCCACHE_GHA_ENABLED
 30 | 
 31 | RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
 32 |     | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && \
 33 |     echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | \
 34 |     tee /etc/apt/sources.list.d/oneAPI.list
 35 | 
 36 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
 37 |     intel-oneapi-mkl-devel=2024.0.0-49656 \
 38 |     build-essential \
 39 |     && rm -rf /var/lib/apt/lists/*
 40 | 
 41 | RUN echo "int mkl_serv_intel_cpu_true() {return 1;}" > fakeintel.c && \
 42 |     gcc -shared -fPIC -o libfakeintel.so fakeintel.c
 43 | 
 44 | COPY --from=planner /usr/src/recipe.json recipe.json
 45 | 
 46 | RUN cargo chef cook --release --features ort --features candle --features mkl-dynamic --no-default-features --recipe-path recipe.json && sccache -s
 47 | 
 48 | COPY backends backends
 49 | COPY core core
 50 | COPY router router
 51 | COPY Cargo.toml ./
 52 | COPY Cargo.lock ./
 53 | 
 54 | FROM builder AS http-builder
 55 | 
 56 | RUN cargo build --release --bin text-embeddings-router -F ort -F candle -F mkl-dynamic -F http --no-default-features && sccache -s
 57 | 
 58 | FROM builder AS grpc-builder
 59 | 
 60 | RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
 61 |     curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
 62 |     unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
 63 |     unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \
 64 |     rm -f $PROTOC_ZIP
 65 | 
 66 | COPY proto proto
 67 | 
 68 | RUN cargo build --release --bin text-embeddings-router -F grpc -F ort -F candle -F mkl-dynamic --no-default-features && sccache -s
 69 | 
 70 | FROM debian:bookworm-slim AS base
 71 | 
 72 | ENV HUGGINGFACE_HUB_CACHE=/tmp \
 73 |     PORT=80 \
 74 |     MKL_ENABLE_INSTRUCTIONS=AVX512_E4 \
 75 |     RAYON_NUM_THREADS=8 \
 76 |     LD_PRELOAD=/usr/local/libfakeintel.so \
 77 |     LD_LIBRARY_PATH=/usr/local/lib
 78 | 
 79 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
 80 |     libomp-dev \
 81 |     ca-certificates \
 82 |     libssl-dev \
 83 |     curl \
 84 |     && rm -rf /var/lib/apt/lists/*
 85 | 
 86 | # Copy a lot of the Intel shared objects because of the mkl_serv_intel_cpu_true patch...
 87 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_intel_lp64.so.2 /usr/local/lib/libmkl_intel_lp64.so.2
 88 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_intel_thread.so.2 /usr/local/lib/libmkl_intel_thread.so.2
 89 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_core.so.2 /usr/local/lib/libmkl_core.so.2
 90 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_vml_def.so.2 /usr/local/lib/libmkl_vml_def.so.2
 91 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_def.so.2 /usr/local/lib/libmkl_def.so.2
 92 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_vml_avx2.so.2 /usr/local/lib/libmkl_vml_avx2.so.2
 93 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_vml_avx512.so.2 /usr/local/lib/libmkl_vml_avx512.so.2
 94 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_avx2.so.2 /usr/local/lib/libmkl_avx2.so.2
 95 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_avx512.so.2 /usr/local/lib/libmkl_avx512.so.2
 96 | COPY --from=builder /usr/src/libfakeintel.so /usr/local/libfakeintel.so
 97 | 
 98 | FROM base AS grpc
 99 | 
100 | COPY --from=grpc-builder /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router
101 | 
102 | ENTRYPOINT ["text-embeddings-router"]
103 | CMD ["--json-output"]
104 | 
105 | FROM base AS http
106 | 
107 | COPY --from=http-builder /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router
108 | 
109 | # Amazon SageMaker compatible image
110 | FROM http as sagemaker
111 | COPY --chmod=775 sagemaker-entrypoint.sh entrypoint.sh
112 | 
113 | ENTRYPOINT ["./entrypoint.sh"]


--------------------------------------------------------------------------------
/huggingface/pytorch/tei/docker/1.6.0/gpu/Dockerfile:
--------------------------------------------------------------------------------
  1 | FROM nvidia/cuda:12.2.0-devel-ubuntu22.04 AS base-builder
  2 | 
  3 | ENV SCCACHE=0.5.4
  4 | ENV RUSTC_WRAPPER=/usr/local/bin/sccache
  5 | ENV PATH="/root/.cargo/bin:${PATH}"
  6 | 
  7 | RUN apt-get update && apt-get upgrade -y && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
  8 |     curl \
  9 |     libssl-dev \
 10 |     pkg-config \
 11 |     && rm -rf /var/lib/apt/lists/*
 12 | 
 13 | # Donwload and configure sccache
 14 | RUN curl -fsSL https://github.com/mozilla/sccache/releases/download/v$SCCACHE/sccache-v$SCCACHE-x86_64-unknown-linux-musl.tar.gz | tar -xzv --strip-components=1 -C /usr/local/bin sccache-v$SCCACHE-x86_64-unknown-linux-musl/sccache && \
 15 |     chmod +x /usr/local/bin/sccache
 16 | 
 17 | RUN curl https://sh.rustup.rs -sSf | bash -s -- -y
 18 | RUN cargo install cargo-chef --locked
 19 | 
 20 | FROM base-builder AS planner
 21 | 
 22 | WORKDIR /usr/src
 23 | 
 24 | COPY backends backends
 25 | COPY core core
 26 | COPY router router
 27 | COPY Cargo.toml ./
 28 | COPY Cargo.lock ./
 29 | 
 30 | RUN cargo chef prepare  --recipe-path recipe.json
 31 | 
 32 | FROM base-builder AS builder
 33 | 
 34 | ARG GIT_SHA
 35 | ARG DOCKER_LABEL
 36 | ARG VERTEX="false"
 37 | 
 38 | # sccache specific variables
 39 | ARG ACTIONS_CACHE_URL
 40 | ARG ACTIONS_RUNTIME_TOKEN
 41 | ARG SCCACHE_GHA_ENABLED
 42 | 
 43 | # Limit parallelism
 44 | ARG RAYON_NUM_THREADS=4
 45 | ARG CARGO_BUILD_JOBS
 46 | ARG CARGO_BUILD_INCREMENTAL
 47 | 
 48 | WORKDIR /usr/src
 49 | 
 50 | COPY --from=planner /usr/src/recipe.json recipe.json
 51 | 
 52 | RUN if [ $VERTEX = "true" ]; \
 53 |     then \
 54 |     cargo chef cook --release --features google --recipe-path recipe.json && sccache -s; \
 55 |     else \
 56 |     cargo chef cook --release --recipe-path recipe.json && sccache -s; \
 57 |     fi;
 58 | 
 59 | RUN if [ $VERTEX = "true" ]; \
 60 |     then \
 61 |     CUDA_COMPUTE_CAP=75 cargo chef cook --release --features google --features candle-cuda-turing --recipe-path recipe.json && sccache -s; \
 62 |     else \
 63 |     CUDA_COMPUTE_CAP=75 cargo chef cook --release --features candle-cuda-turing --recipe-path recipe.json && sccache -s; \
 64 |     fi;
 65 | 
 66 | RUN if [ $VERTEX = "true" ]; \
 67 |     then \
 68 |     CUDA_COMPUTE_CAP=80 cargo chef cook --release --features google --features candle-cuda --recipe-path recipe.json && sccache -s; \
 69 |     else \
 70 |     CUDA_COMPUTE_CAP=80 cargo chef cook --release --features candle-cuda --recipe-path recipe.json && sccache -s; \
 71 |     fi;
 72 | 
 73 | RUN if [ $VERTEX = "true" ]; \
 74 |     then \
 75 |     CUDA_COMPUTE_CAP=90 cargo chef cook --release --features google --features candle-cuda --recipe-path recipe.json && sccache -s; \
 76 |     else \
 77 |     CUDA_COMPUTE_CAP=90 cargo chef cook --release --features candle-cuda --recipe-path recipe.json && sccache -s; \
 78 |     fi;
 79 | 
 80 | COPY backends backends
 81 | COPY core core
 82 | COPY router router
 83 | COPY Cargo.toml ./
 84 | COPY Cargo.lock ./
 85 | 
 86 | RUN if [ $VERTEX = "true" ]; \
 87 |     then \
 88 |     CUDA_COMPUTE_CAP=75 cargo build --release --bin text-embeddings-router -F candle-cuda-turing -F google  && sccache -s; \
 89 |     else \
 90 |     CUDA_COMPUTE_CAP=75 cargo build --release --bin text-embeddings-router -F candle-cuda-turing && sccache -s; \
 91 |     fi;
 92 | 
 93 | RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-75
 94 | 
 95 | RUN if [ $VERTEX = "true" ]; \
 96 |     then \
 97 |     CUDA_COMPUTE_CAP=80 cargo build --release --bin text-embeddings-router -F candle-cuda -F google  && sccache -s; \
 98 |     else \
 99 |     CUDA_COMPUTE_CAP=80 cargo build --release --bin text-embeddings-router -F candle-cuda && sccache -s; \
100 |     fi;
101 | 
102 | RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-80
103 | 
104 | RUN if [ $VERTEX = "true" ]; \
105 |     then \
106 |     CUDA_COMPUTE_CAP=90 cargo build --release --bin text-embeddings-router -F candle-cuda -F google  && sccache -s; \
107 |     else \
108 |     CUDA_COMPUTE_CAP=90 cargo build --release --bin text-embeddings-router -F candle-cuda && sccache -s; \
109 |     fi;
110 | 
111 | RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-90
112 | 
113 | FROM nvidia/cuda:12.2.0-runtime-ubuntu22.04 AS base
114 | 
115 | ARG DEFAULT_USE_FLASH_ATTENTION=True
116 | 
117 | ENV HUGGINGFACE_HUB_CACHE=/tmp \
118 |     PORT=80 \
119 |     USE_FLASH_ATTENTION=$DEFAULT_USE_FLASH_ATTENTION
120 | 
121 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
122 |     libgssapi-krb5-2 \
123 |     ca-certificates \
124 |     libssl-dev \
125 |     curl \
126 |     && rm -rf /var/lib/apt/lists/*
127 | 
128 | COPY --from=builder /usr/src/target/release/text-embeddings-router-75 /usr/local/bin/text-embeddings-router-75
129 | COPY --from=builder /usr/src/target/release/text-embeddings-router-80 /usr/local/bin/text-embeddings-router-80
130 | COPY --from=builder /usr/src/target/release/text-embeddings-router-90 /usr/local/bin/text-embeddings-router-90
131 | 
132 | # Amazon SageMaker compatible image
133 | FROM base AS sagemaker
134 | 
135 | COPY --chmod=775 sagemaker-entrypoint-cuda-all.sh entrypoint.sh
136 | 
137 | ENTRYPOINT ["./entrypoint.sh"]
138 | 
139 | # Default image
140 | FROM base
141 | 
142 | COPY --chmod=775 cuda-all-entrypoint.sh entrypoint.sh
143 | 
144 | ENTRYPOINT ["./entrypoint.sh"]
145 | CMD ["--json-output"]


--------------------------------------------------------------------------------
/huggingface/pytorch/tei/docker/1.7.0/cpu/Dockerfile:
--------------------------------------------------------------------------------
  1 | FROM lukemathwalker/cargo-chef:latest-rust-1.85-bookworm AS chef
  2 | WORKDIR /usr/src
  3 | 
  4 | ENV SCCACHE=0.5.4
  5 | ENV RUSTC_WRAPPER=/usr/local/bin/sccache
  6 | 
  7 | # Donwload, configure sccache
  8 | RUN curl -fsSL https://github.com/mozilla/sccache/releases/download/v$SCCACHE/sccache-v$SCCACHE-x86_64-unknown-linux-musl.tar.gz | tar -xzv --strip-components=1 -C /usr/local/bin sccache-v$SCCACHE-x86_64-unknown-linux-musl/sccache && \
  9 |     chmod +x /usr/local/bin/sccache
 10 | 
 11 | FROM chef AS planner
 12 | 
 13 | COPY candle-extensions candle-extensions
 14 | COPY backends backends
 15 | COPY core core
 16 | COPY router router
 17 | COPY Cargo.toml ./
 18 | COPY Cargo.lock ./
 19 | 
 20 | RUN cargo chef prepare  --recipe-path recipe.json
 21 | 
 22 | FROM chef AS builder
 23 | 
 24 | ARG GIT_SHA
 25 | ARG DOCKER_LABEL
 26 | 
 27 | # sccache specific variables
 28 | ARG SCCACHE_GHA_ENABLED
 29 | 
 30 | RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
 31 |     | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && \
 32 |     echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | \
 33 |     tee /etc/apt/sources.list.d/oneAPI.list
 34 | 
 35 | RUN apt-get update && apt-get upgrade -y && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
 36 |     intel-oneapi-mkl-devel=2024.0.0-49656 \
 37 |     build-essential \
 38 |     && rm -rf /var/lib/apt/lists/*
 39 | 
 40 | RUN echo "int mkl_serv_intel_cpu_true() {return 1;}" > fakeintel.c && \
 41 |     gcc -shared -fPIC -o libfakeintel.so fakeintel.c
 42 | 
 43 | COPY --from=planner /usr/src/recipe.json recipe.json
 44 | 
 45 | RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \
 46 |     --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
 47 |     cargo chef cook --release --features ort,candle,mkl --no-default-features --recipe-path recipe.json && sccache -s
 48 | 
 49 | COPY backends backends
 50 | COPY core core
 51 | COPY router router
 52 | COPY Cargo.toml ./
 53 | COPY Cargo.lock ./
 54 | 
 55 | FROM builder AS http-builder
 56 | 
 57 | RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \
 58 |     --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
 59 |     cargo build --release --bin text-embeddings-router --features ort,candle,mkl,http --no-default-features && sccache -s
 60 | 
 61 | FROM builder AS grpc-builder
 62 | 
 63 | RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
 64 |     curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
 65 |     unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
 66 |     unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \
 67 |     rm -f $PROTOC_ZIP
 68 | 
 69 | COPY proto proto
 70 | 
 71 | RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \
 72 |     --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
 73 |     cargo build --release --bin text-embeddings-router --features ort,candle,mkl,grpc --no-default-features && sccache -s
 74 | 
 75 | FROM debian:bookworm-slim AS base
 76 | 
 77 | ENV HUGGINGFACE_HUB_CACHE=/opt/ml/model \
 78 |     PORT=80 \
 79 |     HF_HUB_USER_AGENT_ORIGIN=aws:sagemaker:cpu:inference:tei \
 80 |     MKL_ENABLE_INSTRUCTIONS=AVX512_E4 \
 81 |     RAYON_NUM_THREADS=8 \
 82 |     LD_PRELOAD=/usr/local/libfakeintel.so \
 83 |     LD_LIBRARY_PATH=/usr/local/lib
 84 | 
 85 | RUN apt-get update && apt-get upgrade -y && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
 86 |     libomp-dev \
 87 |     ca-certificates \
 88 |     libssl-dev \
 89 |     curl \
 90 |     && rm -rf /var/lib/apt/lists/*
 91 | 
 92 | # Copy a lot of the Intel shared objects because of the mkl_serv_intel_cpu_true patch...
 93 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_intel_lp64.so.2 /usr/local/lib/libmkl_intel_lp64.so.2
 94 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_intel_thread.so.2 /usr/local/lib/libmkl_intel_thread.so.2
 95 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_core.so.2 /usr/local/lib/libmkl_core.so.2
 96 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_vml_def.so.2 /usr/local/lib/libmkl_vml_def.so.2
 97 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_def.so.2 /usr/local/lib/libmkl_def.so.2
 98 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_vml_avx2.so.2 /usr/local/lib/libmkl_vml_avx2.so.2
 99 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_vml_avx512.so.2 /usr/local/lib/libmkl_vml_avx512.so.2
100 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_avx2.so.2 /usr/local/lib/libmkl_avx2.so.2
101 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_avx512.so.2 /usr/local/lib/libmkl_avx512.so.2
102 | COPY --from=builder /usr/src/libfakeintel.so /usr/local/libfakeintel.so
103 | 
104 | FROM base AS grpc
105 | 
106 | COPY --from=grpc-builder /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router
107 | 
108 | ENTRYPOINT ["text-embeddings-router"]
109 | CMD ["--json-output"]
110 | 
111 | FROM base AS http
112 | 
113 | COPY --from=http-builder /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router
114 | 
115 | # Amazon SageMaker compatible image
116 | FROM http AS sagemaker
117 | 
118 | COPY --chmod=775 sagemaker-entrypoint.sh entrypoint.sh
119 | 
120 | ENTRYPOINT ["./entrypoint.sh"]
121 | 


--------------------------------------------------------------------------------
/huggingface/pytorch/tei/docker/1.7.0/gpu/Dockerfile:
--------------------------------------------------------------------------------
  1 | FROM nvidia/cuda:12.2.0-devel-ubuntu22.04 AS base-builder
  2 | 
  3 | ENV SCCACHE=0.5.4
  4 | ENV RUSTC_WRAPPER=/usr/local/bin/sccache
  5 | ENV PATH="/root/.cargo/bin:${PATH}"
  6 | # aligned with `cargo-chef` version in `lukemathwalker/cargo-chef:latest-rust-1.85-bookworm`
  7 | ENV CARGO_CHEF=0.1.71
  8 | 
  9 | RUN apt-get update && apt-get upgrade -y && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
 10 |     curl \
 11 |     libssl-dev \
 12 |     pkg-config \
 13 |     libgssapi-krb5-2 \
 14 |     && rm -rf /var/lib/apt/lists/*
 15 | 
 16 | # Donwload and configure sccache
 17 | RUN curl -fsSL https://github.com/mozilla/sccache/releases/download/v$SCCACHE/sccache-v$SCCACHE-x86_64-unknown-linux-musl.tar.gz | tar -xzv --strip-components=1 -C /usr/local/bin sccache-v$SCCACHE-x86_64-unknown-linux-musl/sccache && \
 18 |     chmod +x /usr/local/bin/sccache
 19 | 
 20 | RUN curl https://sh.rustup.rs -sSf | bash -s -- -y
 21 | RUN cargo install cargo-chef --version $CARGO_CHEF --locked
 22 | 
 23 | FROM base-builder AS planner
 24 | 
 25 | WORKDIR /usr/src
 26 | 
 27 | COPY candle-extensions candle-extensions
 28 | COPY backends backends
 29 | COPY core core
 30 | COPY router router
 31 | COPY Cargo.toml ./
 32 | COPY Cargo.lock ./
 33 | 
 34 | RUN cargo chef prepare  --recipe-path recipe.json
 35 | 
 36 | FROM base-builder AS builder
 37 | 
 38 | ARG GIT_SHA
 39 | ARG DOCKER_LABEL
 40 | 
 41 | # sccache specific variables
 42 | ARG SCCACHE_GHA_ENABLED
 43 | 
 44 | # Limit parallelism
 45 | ARG RAYON_NUM_THREADS=4
 46 | ARG CARGO_BUILD_JOBS
 47 | ARG CARGO_BUILD_INCREMENTAL
 48 | 
 49 | WORKDIR /usr/src
 50 | 
 51 | COPY --from=planner /usr/src/recipe.json recipe.json
 52 | 
 53 | RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \
 54 |     --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
 55 |     cargo chef cook --release --recipe-path recipe.json && sccache -s;
 56 | 
 57 | RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \
 58 |     --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
 59 |     CUDA_COMPUTE_CAP=75 cargo chef cook --release --features candle-cuda-turing --recipe-path recipe.json && sccache -s;
 60 | 
 61 | RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \
 62 |     --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
 63 |     CUDA_COMPUTE_CAP=80 cargo chef cook --release --features candle-cuda --recipe-path recipe.json && sccache -s;
 64 | 
 65 | RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \
 66 |     --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
 67 |     CUDA_COMPUTE_CAP=90 cargo chef cook --release --features candle-cuda --recipe-path recipe.json && sccache -s;
 68 | 
 69 | COPY candle-extensions candle-extensions
 70 | COPY backends backends
 71 | COPY core core
 72 | COPY router router
 73 | COPY Cargo.toml ./
 74 | COPY Cargo.lock ./
 75 | 
 76 | RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \
 77 |     --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
 78 |     CUDA_COMPUTE_CAP=75 cargo build --release --bin text-embeddings-router -F candle-cuda-turing && sccache -s;
 79 | 
 80 | RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-75
 81 | 
 82 | RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \
 83 |     --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
 84 |     CUDA_COMPUTE_CAP=80 cargo build --release --bin text-embeddings-router -F candle-cuda && sccache -s;
 85 | 
 86 | RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-80
 87 | 
 88 | RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \
 89 |     --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
 90 |     CUDA_COMPUTE_CAP=90 cargo build --release --bin text-embeddings-router -F candle-cuda && sccache -s;
 91 | 
 92 | RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-90
 93 | 
 94 | FROM nvidia/cuda:12.2.0-runtime-ubuntu22.04 AS base
 95 | 
 96 | ARG DEFAULT_USE_FLASH_ATTENTION=True
 97 | 
 98 | ENV HUGGINGFACE_HUB_CACHE=/opt/ml/model \
 99 |     PORT=80 \
100 |     USE_FLASH_ATTENTION=$DEFAULT_USE_FLASH_ATTENTION \
101 |     HF_HUB_USER_AGENT_ORIGIN=aws:sagemaker:gpu-cuda:inference:tei
102 | 
103 | RUN apt-get update && apt-get upgrade -y && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
104 |     ca-certificates \
105 |     libssl-dev \
106 |     curl \
107 |     && rm -rf /var/lib/apt/lists/*
108 | 
109 | COPY --from=builder /usr/src/target/release/text-embeddings-router-75 /usr/local/bin/text-embeddings-router-75
110 | COPY --from=builder /usr/src/target/release/text-embeddings-router-80 /usr/local/bin/text-embeddings-router-80
111 | COPY --from=builder /usr/src/target/release/text-embeddings-router-90 /usr/local/bin/text-embeddings-router-90
112 | 
113 | # Amazon SageMaker compatible image
114 | FROM base AS sagemaker
115 | 
116 | COPY --chmod=775 sagemaker-entrypoint-cuda-all.sh entrypoint.sh
117 | COPY --chmod=775 /huggingface/pytorch/tei/docker/1.7.0/gpu/start-cuda-compat.sh start-cuda-compat.sh
118 | 
119 | ENTRYPOINT ["./entrypoint.sh"]
120 | 


--------------------------------------------------------------------------------
/huggingface/pytorch/tei/docker/1.7.0/gpu/start-cuda-compat.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | verlt() {
 4 |     [ "$1" = "$2" ] && return 1 || [ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ]
 5 | }
 6 | 
 7 | if [ -f /usr/local/cuda/compat/libcuda.so.1 ]; then
 8 |     CUDA_COMPAT_MAX_DRIVER_VERSION=$(readlink /usr/local/cuda/compat/libcuda.so.1 | cut -d'.' -f 3-)
 9 |     echo "CUDA compat package should be installed for NVIDIA driver smaller than ${CUDA_COMPAT_MAX_DRIVER_VERSION}"
10 |     NVIDIA_DRIVER_VERSION=$(sed -n 's/^NVRM.*Kernel Module *\([0-9.]*\).*$/\1/p' /proc/driver/nvidia/version 2>/dev/null || true)
11 |     echo "Current installed NVIDIA driver version is ${NVIDIA_DRIVER_VERSION}"
12 |     if verlt $NVIDIA_DRIVER_VERSION $CUDA_COMPAT_MAX_DRIVER_VERSION; then
13 |         echo "Adding CUDA compat to LD_LIBRARY_PATH"
14 |         export LD_LIBRARY_PATH=/usr/local/cuda/compat:$LD_LIBRARY_PATH
15 |         echo $LD_LIBRARY_PATH
16 |     else
17 |         echo "Skipping CUDA compat setup as newer NVIDIA driver is installed"
18 |     fi
19 | else
20 |     echo "Skipping CUDA compat setup as package not found"
21 | fi
22 | 


--------------------------------------------------------------------------------
/huggingface/pytorch/tei/docker/buildspec.yml:
--------------------------------------------------------------------------------
 1 | version: 0.2
 2 | 
 3 | env:
 4 |   shell: bash
 5 |   variables:
 6 |     FRAMEWORK_FOLDER: "huggingface/pytorch/tei/docker"
 7 |     PYTHONPATH: "/codebuild/output/src*/src/github.com/awslabs/llm-hosting-container"
 8 | 
 9 | phases:
10 |   install:
11 |     runtime-versions:
12 |       python: 3.11
13 |     commands:
14 |       - echo "Installing Python version 3.11 ..."
15 |       - pyenv global $PYTHON_311_VERSION
16 | 
17 |   pre_build:
18 |     commands:
19 |       - echo Pre-build started on `date`
20 |       - export PYTHONPATH=$(pwd):$PYTHONPATH
21 |       
22 |       # Continue with regular pre-build steps if BUILD_REQUIRED=true 
23 |       - |
24 |         echo Setting up Docker buildx.
25 |         docker buildx version
26 |         docker buildx create --name builder --driver docker-container --buildkitd-flags '--allow-insecure-entitlement security.insecure --allow-insecure-entitlement network.host' --use
27 |         docker buildx inspect --bootstrap --builder builder
28 |         docker buildx install
29 |         echo Preparing system dependencies for execution.
30 |         docker --version
31 |         docker login -u $DOCKER_USERNAME -p $DOCKER_PASSWORD
32 |         curl -LO http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
33 |         bash Miniconda3-latest-Linux-x86_64.sh -bfp /miniconda3
34 |         export PATH=/miniconda3/bin:${PATH}
35 |         conda install python=3.11
36 |         conda update -y conda
37 |         echo Prepare TEI dependencies for execution.
38 |         mkdir tei-artifacts
39 |         python -m pip install -r $FRAMEWORK_FOLDER/tei-requirements.txt
40 | 
41 |   build:
42 |     commands:
43 |       - |
44 |         echo Build started on `date`
45 |         echo "Current PYTHONPATH: $PYTHONPATH"
46 |         python $FRAMEWORK_FOLDER/tei.py
47 | 
48 |   post_build:
49 |     commands:
50 |       - |
51 |         echo Build completed on `date`


--------------------------------------------------------------------------------
/huggingface/pytorch/tei/docker/tei-requirements.txt:
--------------------------------------------------------------------------------
 1 | boto3
 2 | dataclasses
 3 | docker
 4 | gitpython
 5 | sagemaker
 6 | 
 7 | parameterized
 8 | pytest
 9 | pytest-mock
10 | pytest-xdist


--------------------------------------------------------------------------------
/huggingface/pytorch/tei/docker/tei.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | import shutil
  4 | import subprocess
  5 | 
  6 | import git
  7 | 
  8 | from huggingface.pytorch.release_utils import (
  9 |     GIT_REPO_DOCKERFILES_ROOT_DIRECTORY,
 10 |     GIT_REPO_PYTEST_PATH,
 11 |     LOG,
 12 |     Aws,
 13 |     DockerClient,
 14 |     EnvironmentVariable,
 15 |     Mode,
 16 |     ReleaseConfigs,
 17 | )
 18 | 
 19 | GIT_REPO_TEI_LOCAL_FOLDER_NAME = "text-embeddings-inference"
 20 | GIT_REPO_TEI_TAG_PATTERN = "v{version}"
 21 | GIT_REPO_TEI_URL = "https://github.com/huggingface/text-embeddings-inference.git"
 22 | 
 23 | 
 24 | def build(configs: ReleaseConfigs):
 25 |     """Builds the Docker image for the provided configs."""
 26 |     aws = Aws()
 27 |     docker_client = DockerClient()
 28 |     for config in configs.releases:
 29 |         LOG.info(f"Going to build image for config: {config}.")
 30 |         image_uri = config.get_image_uri_for_staging()
 31 |         if aws.does_ecr_image_exist(image_uri):
 32 |             LOG.info(f"Skipping already built image '{image_uri}'. Config: {config}.")
 33 |             continue
 34 | 
 35 |         LOG.info(
 36 |             f"Setting up build prerequisites for release config with version: {config.version}"
 37 |         )
 38 |         build_path = GIT_REPO_TEI_LOCAL_FOLDER_NAME
 39 |         shutil.rmtree(GIT_REPO_TEI_LOCAL_FOLDER_NAME, ignore_errors=True)
 40 |         hf_tei_repo = git.Repo.clone_from(
 41 |             GIT_REPO_TEI_URL, GIT_REPO_TEI_LOCAL_FOLDER_NAME, no_checkout=True
 42 |         )
 43 |         hf_tei_repo_tag = GIT_REPO_TEI_TAG_PATTERN.format(version=config.version)
 44 |         hf_tei_repo.git.checkout(hf_tei_repo_tag)
 45 |         LOG.info(
 46 |             f"Checked out {hf_tei_repo} with tag: {hf_tei_repo_tag} to {GIT_REPO_TEI_LOCAL_FOLDER_NAME}."
 47 |         )
 48 | 
 49 |         subprocess.run(
 50 |             ["git", "submodule", "update", "--init"],
 51 |             cwd=GIT_REPO_TEI_LOCAL_FOLDER_NAME,
 52 |             check=True,
 53 |         )
 54 |         LOG.info(f"Initialized and updated submodules for {hf_tei_repo_tag}.")
 55 | 
 56 |         shutil.copytree(
 57 |             GIT_REPO_DOCKERFILES_ROOT_DIRECTORY,
 58 |             os.path.join(
 59 |                 GIT_REPO_TEI_LOCAL_FOLDER_NAME, GIT_REPO_DOCKERFILES_ROOT_DIRECTORY
 60 |             ),
 61 |         )
 62 |         LOG.info(
 63 |             f"Copied '{GIT_REPO_DOCKERFILES_ROOT_DIRECTORY}' directory to TEI directory for 'COPY' command."
 64 |         )
 65 | 
 66 |         dockerfile_path = config.get_dockerfile_path()
 67 |         LOG.info(f"Building Dockerfile: '{dockerfile_path}'. This may take a while...")
 68 |         docker_client.build(
 69 |             image_uri=image_uri, dockerfile_path=dockerfile_path, build_path=build_path
 70 |         )
 71 | 
 72 |         username, password = aws.get_ecr_credentials(image_uri)
 73 |         docker_client.login(username, password, image_uri)
 74 |         docker_client.push(image_uri)
 75 | 
 76 | 
 77 | def test(configs: ReleaseConfigs):
 78 |     """Runs SageMaker tests for the Docker images associated with the provided configs and current git commit."""
 79 |     aws = Aws()
 80 |     for config in configs.releases:
 81 |         LOG.info(f"Going to test built image for config: {config}.")
 82 |         test_role_arn = os.getenv(EnvironmentVariable.TEST_ROLE_ARN.name)
 83 |         test_session = aws.get_session_for_role(test_role_arn)
 84 |         test_credentials = test_session.get_credentials()
 85 |         environ = os.environ.copy()
 86 |         environ.update(
 87 |             {
 88 |                 "DEVICE_TYPE": config.device.lower(),
 89 |                 "AWS_ACCESS_KEY_ID": test_credentials.access_key,
 90 |                 "AWS_SECRET_ACCESS_KEY": test_credentials.secret_key,
 91 |                 "AWS_SESSION_TOKEN": test_credentials.token,
 92 |                 "IMAGE_URI": config.get_image_uri_for_staging(),
 93 |                 "TEST_ROLE_ARN": test_role_arn,
 94 |             }
 95 |         )
 96 | 
 97 |         command = [
 98 |             "pytest",
 99 |             "-m",
100 |             config.device.lower(),
101 |             "-n",
102 |             "auto",
103 |             "--log-cli-level",
104 |             "info",
105 |             GIT_REPO_PYTEST_PATH,
106 |         ]
107 |         LOG.info(f"Running test command: {command}.")
108 |         process = subprocess.run(
109 |             command, env=environ, encoding="utf-8", capture_output=True
110 |         )
111 |         LOG.info(process.stdout)
112 |         assert process.returncode == 0, (
113 |             f"Failed with config: {config}.\nError: {process.stderr}."
114 |         )
115 |         LOG.info(f"Finished testing image with config: {config}.")
116 | 
117 | 
118 | def pr(configs: ReleaseConfigs):
119 |     """Executes both build and test modes."""
120 |     build(configs)
121 |     test(configs)
122 | 
123 | 
124 | def release(configs: ReleaseConfigs):
125 |     """trigger SMFrameworks algo release pipeline"""
126 |     aws = Aws()
127 |     docker_client = DockerClient()
128 |     for config in configs.releases:
129 |         LOG.info(f"Releasing image associated for config: {config}.")
130 |         released_image_uri = config.get_image_uri_for_released()
131 |         if aws.does_ecr_image_exist(released_image_uri):
132 |             LOG.info(
133 |                 f"Skipping already released image '{released_image_uri}'. Config: {config}."
134 |             )
135 |             continue
136 | 
137 |         staged_image_uri = config.get_image_uri_for_staging()
138 |         username, password = aws.get_ecr_credentials(staged_image_uri)
139 |         docker_client.login(username, password, staged_image_uri)
140 |         docker_client.prune_all()
141 |         docker_client.pull(staged_image_uri)
142 | 
143 |         docker_client.login(username, password, staged_image_uri)
144 |         docker_client.tag(staged_image_uri, released_image_uri)
145 |         docker_client.push(released_image_uri)
146 | 
147 |         js_uris = config.get_image_uris_for_jumpstart()
148 |         username, password = aws.get_ecr_credentials(js_uris[0])
149 |         docker_client.login(username, password, js_uris[0])
150 |         for js_uri in js_uris:
151 |             docker_client.tag(staged_image_uri, js_uri)
152 |             docker_client.push(js_uri)
153 |         LOG.info(
154 |             f"Release marked as complete for following config ({js_uris}): {config}"
155 |         )
156 | 
157 | 
158 | if __name__ == "__main__":
159 |     logging.basicConfig(
160 |         level=logging.INFO,
161 |         format="%(asctime)s %(levelname)-8s %(message)s",
162 |         datefmt="%Y-%m-%d %H:%M:%S",
163 |     )
164 |     configs = ReleaseConfigs()
165 |     configs.validate()
166 |     mode = os.getenv(EnvironmentVariable.MODE.name)
167 |     LOG.info(f"Mode has been set to: {mode}.")
168 |     if mode == Mode.PR.name:
169 |         pr(configs)
170 |     elif mode == Mode.BUILD.name:
171 |         build(configs)
172 |     elif mode == Mode.TEST.name:
173 |         test(configs)
174 |     elif mode == Mode.RELEASE.name:
175 |         release(configs)
176 |     else:
177 |         raise ValueError(
178 |             f"The mode '{mode}' is not recognized. Please set it correctly.'"
179 |         )
180 | 


--------------------------------------------------------------------------------
/huggingface/pytorch/tgi/docker/2.3.1/entrypoint.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | verlt() {
 4 |     [ "$1" = "$2" ] && return 1 || [ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ]
 5 | }
 6 | 
 7 | if [ -f /usr/local/cuda/compat/libcuda.so.1 ]; then
 8 |     CUDA_COMPAT_MAX_DRIVER_VERSION=$(readlink /usr/local/cuda/compat/libcuda.so.1 | cut -d'.' -f 3-)
 9 |     echo "CUDA compat package should be installed for NVIDIA driver smaller than ${CUDA_COMPAT_MAX_DRIVER_VERSION}"
10 |     NVIDIA_DRIVER_VERSION=$(sed -n 's/^NVRM.*Kernel Module *\([0-9.]*\).*$/\1/p' /proc/driver/nvidia/version 2>/dev/null || true)
11 |     echo "Current installed NVIDIA driver version is ${NVIDIA_DRIVER_VERSION}"
12 |     if verlt $NVIDIA_DRIVER_VERSION $CUDA_COMPAT_MAX_DRIVER_VERSION; then
13 |         echo "Adding CUDA compat to LD_LIBRARY_PATH"
14 |         export LD_LIBRARY_PATH=/usr/local/cuda/compat:$LD_LIBRARY_PATH
15 |         echo $LD_LIBRARY_PATH
16 |     else
17 |         echo "Skipping CUDA compat setup as newer NVIDIA driver is installed"
18 |     fi
19 | else
20 |     echo "Skipping CUDA compat setup as package not found"
21 | fi
22 | 
23 | if [[ -z "${HF_MODEL_ID}" ]]; then
24 |   echo "HF_MODEL_ID must be set"
25 |   exit 1
26 | fi
27 | export MODEL_ID="${HF_MODEL_ID}"
28 | 
29 | if [[ -n "${HF_MODEL_REVISION}" ]]; then
30 |   export REVISION="${HF_MODEL_REVISION}"
31 | fi
32 | 
33 | if [[ -n "${SM_NUM_GPUS}" ]]; then
34 |   export NUM_SHARD="${SM_NUM_GPUS}"
35 | fi
36 | 
37 | if [[ -n "${HF_MODEL_QUANTIZE}" ]]; then
38 |   export QUANTIZE="${HF_MODEL_QUANTIZE}"
39 | fi
40 | 
41 | if [[ -n "${HF_MODEL_TRUST_REMOTE_CODE}" ]]; then
42 |   export TRUST_REMOTE_CODE="${HF_MODEL_TRUST_REMOTE_CODE}"
43 | fi
44 | 
45 | text-generation-launcher --port 8080


--------------------------------------------------------------------------------
/huggingface/pytorch/tgi/docker/2.4.0/entrypoint.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | verlt() {
 4 |     [ "$1" = "$2" ] && return 1 || [ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ]
 5 | }
 6 | 
 7 | if [ -f /usr/local/cuda/compat/libcuda.so.1 ]; then
 8 |     CUDA_COMPAT_MAX_DRIVER_VERSION=$(readlink /usr/local/cuda/compat/libcuda.so.1 | cut -d'.' -f 3-)
 9 |     echo "CUDA compat package should be installed for NVIDIA driver smaller than ${CUDA_COMPAT_MAX_DRIVER_VERSION}"
10 |     NVIDIA_DRIVER_VERSION=$(sed -n 's/^NVRM.*Kernel Module *\([0-9.]*\).*$/\1/p' /proc/driver/nvidia/version 2>/dev/null || true)
11 |     echo "Current installed NVIDIA driver version is ${NVIDIA_DRIVER_VERSION}"
12 |     if verlt $NVIDIA_DRIVER_VERSION $CUDA_COMPAT_MAX_DRIVER_VERSION; then
13 |         echo "Adding CUDA compat to LD_LIBRARY_PATH"
14 |         export LD_LIBRARY_PATH=/usr/local/cuda/compat:$LD_LIBRARY_PATH
15 |         echo $LD_LIBRARY_PATH
16 |     else
17 |         echo "Skipping CUDA compat setup as newer NVIDIA driver is installed"
18 |     fi
19 | else
20 |     echo "Skipping CUDA compat setup as package not found"
21 | fi
22 | 
23 | if [[ -z "${HF_MODEL_ID}" ]]; then
24 |   echo "HF_MODEL_ID must be set"
25 |   exit 1
26 | fi
27 | export MODEL_ID="${HF_MODEL_ID}"
28 | 
29 | if [[ -n "${HF_MODEL_REVISION}" ]]; then
30 |   export REVISION="${HF_MODEL_REVISION}"
31 | fi
32 | 
33 | if [[ -n "${SM_NUM_GPUS}" ]]; then
34 |   export NUM_SHARD="${SM_NUM_GPUS}"
35 | fi
36 | 
37 | if [[ -n "${HF_MODEL_QUANTIZE}" ]]; then
38 |   export QUANTIZE="${HF_MODEL_QUANTIZE}"
39 | fi
40 | 
41 | if [[ -n "${HF_MODEL_TRUST_REMOTE_CODE}" ]]; then
42 |   export TRUST_REMOTE_CODE="${HF_MODEL_TRUST_REMOTE_CODE}"
43 | fi
44 | 
45 | text-generation-launcher --port 8080


--------------------------------------------------------------------------------
/huggingface/pytorch/tgi/docker/3.0.1/entrypoint.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | verlt() {
 4 |     [ "$1" = "$2" ] && return 1 || [ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ]
 5 | }
 6 | 
 7 | if [ -f /usr/local/cuda/compat/libcuda.so.1 ]; then
 8 |     CUDA_COMPAT_MAX_DRIVER_VERSION=$(readlink /usr/local/cuda/compat/libcuda.so.1 | cut -d'.' -f 3-)
 9 |     echo "CUDA compat package should be installed for NVIDIA driver smaller than ${CUDA_COMPAT_MAX_DRIVER_VERSION}"
10 |     NVIDIA_DRIVER_VERSION=$(sed -n 's/^NVRM.*Kernel Module *\([0-9.]*\).*$/\1/p' /proc/driver/nvidia/version 2>/dev/null || true)
11 |     echo "Current installed NVIDIA driver version is ${NVIDIA_DRIVER_VERSION}"
12 |     if verlt $NVIDIA_DRIVER_VERSION $CUDA_COMPAT_MAX_DRIVER_VERSION; then
13 |         echo "Adding CUDA compat to LD_LIBRARY_PATH"
14 |         export LD_LIBRARY_PATH=/usr/local/cuda/compat:$LD_LIBRARY_PATH
15 |         echo $LD_LIBRARY_PATH
16 |     else
17 |         echo "Skipping CUDA compat setup as newer NVIDIA driver is installed"
18 |     fi
19 | else
20 |     echo "Skipping CUDA compat setup as package not found"
21 | fi
22 | 
23 | if [[ -z "${HF_MODEL_ID}" ]]; then
24 |   echo "HF_MODEL_ID must be set"
25 |   exit 1
26 | fi
27 | export MODEL_ID="${HF_MODEL_ID}"
28 | 
29 | if [[ -n "${HF_MODEL_REVISION}" ]]; then
30 |   export REVISION="${HF_MODEL_REVISION}"
31 | fi
32 | 
33 | if [[ -n "${SM_NUM_GPUS}" ]]; then
34 |   export NUM_SHARD="${SM_NUM_GPUS}"
35 | fi
36 | 
37 | if [[ -n "${HF_MODEL_QUANTIZE}" ]]; then
38 |   export QUANTIZE="${HF_MODEL_QUANTIZE}"
39 | fi
40 | 
41 | if [[ -n "${HF_MODEL_TRUST_REMOTE_CODE}" ]]; then
42 |   export TRUST_REMOTE_CODE="${HF_MODEL_TRUST_REMOTE_CODE}"
43 | fi
44 | 
45 | text-generation-launcher --port 8080


--------------------------------------------------------------------------------
/huggingface/pytorch/tgi/docker/3.1.1/start-cuda-compat.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | verlt() {
 4 |     [ "$1" = "$2" ] && return 1 || [ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ]
 5 | }
 6 | 
 7 | if [ -f /usr/local/cuda/compat/libcuda.so.1 ]; then
 8 |     CUDA_COMPAT_MAX_DRIVER_VERSION=$(readlink /usr/local/cuda/compat/libcuda.so.1 | cut -d'.' -f 3-)
 9 |     echo "CUDA compat package should be installed for NVIDIA driver smaller than ${CUDA_COMPAT_MAX_DRIVER_VERSION}"
10 |     NVIDIA_DRIVER_VERSION=$(sed -n 's/^NVRM.*Kernel Module *\([0-9.]*\).*$/\1/p' /proc/driver/nvidia/version 2>/dev/null || true)
11 |     echo "Current installed NVIDIA driver version is ${NVIDIA_DRIVER_VERSION}"
12 |     if verlt $NVIDIA_DRIVER_VERSION $CUDA_COMPAT_MAX_DRIVER_VERSION; then
13 |         echo "Adding CUDA compat to LD_LIBRARY_PATH"
14 |         export LD_LIBRARY_PATH=/usr/local/cuda/compat:$LD_LIBRARY_PATH
15 |         echo $LD_LIBRARY_PATH
16 |     else
17 |         echo "Skipping CUDA compat setup as newer NVIDIA driver is installed"
18 |     fi
19 | else
20 |     echo "Skipping CUDA compat setup as package not found"
21 | fi


--------------------------------------------------------------------------------
/huggingface/pytorch/tgi/docker/3.2.0/start-cuda-compat.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awslabs/llm-hosting-container/b7c890f23332e5a57ffa5a8d41e3d66321d441b6/huggingface/pytorch/tgi/docker/3.2.0/start-cuda-compat.sh


--------------------------------------------------------------------------------
/huggingface/pytorch/tgi/docker/3.2.3/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Use the original TGI Dockerfile as a base
 2 | FROM ghcr.io/huggingface/text-generation-inference:3.2.3 AS base
 3 | 
 4 | FROM base AS sagemaker
 5 | 
 6 | COPY /huggingface/pytorch/tgi/docker/3.2.3/start-cuda-compat.sh start-cuda-compat.sh
 7 | RUN chmod +x start-cuda-compat.sh
 8 | 
 9 | RUN apt-get update && apt-get upgrade -y unzip
10 | 
11 | RUN HOME_DIR=/root && \
12 |     uv pip install pip requests PTable && \
13 |     curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip && \
14 |     unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ && \
15 |     cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance && \
16 |     chmod +x /usr/local/bin/testOSSCompliance && \
17 |     chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh && \
18 |     ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python && \
19 |     rm -rf ${HOME_DIR}/oss_compliance*
20 | 
21 | COPY /huggingface/pytorch/tgi/docker/3.2.3/THIRD-PARTY-LICENSES /root/THIRD-PARTY-LICENSES
22 | 
23 | ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/root/.local/share/uv/python/cpython-3.11.11-linux-x86_64-gnu/lib/"
24 | ENV HF_HUB_USER_AGENT_ORIGIN=aws:sagemaker:gpu-cuda:inference:tgi-native
25 | 
26 | COPY sagemaker-entrypoint.sh entrypoint.sh
27 | RUN chmod +x entrypoint.sh
28 | 
29 | ENTRYPOINT ["./entrypoint.sh"]
30 | CMD ["--json-output"]
31 | 
32 | LABEL dlc_major_version="2"
33 | LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.huggingface.tgi="true"
34 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port="true"
35 | 


--------------------------------------------------------------------------------
/huggingface/pytorch/tgi/docker/3.2.3/start-cuda-compat.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | verlt() {
 4 |     [ "$1" = "$2" ] && return 1 || [ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ]
 5 | }
 6 | 
 7 | if [ -f /usr/local/cuda/compat/libcuda.so.1 ]; then
 8 |     CUDA_COMPAT_MAX_DRIVER_VERSION=$(readlink /usr/local/cuda/compat/libcuda.so.1 | cut -d'.' -f 3-)
 9 |     echo "CUDA compat package should be installed for NVIDIA driver smaller than ${CUDA_COMPAT_MAX_DRIVER_VERSION}"
10 |     NVIDIA_DRIVER_VERSION=$(sed -n 's/^NVRM.*Kernel Module *\([0-9.]*\).*$/\1/p' /proc/driver/nvidia/version 2>/dev/null || true)
11 |     echo "Current installed NVIDIA driver version is ${NVIDIA_DRIVER_VERSION}"
12 |     if verlt $NVIDIA_DRIVER_VERSION $CUDA_COMPAT_MAX_DRIVER_VERSION; then
13 |         echo "Adding CUDA compat to LD_LIBRARY_PATH"
14 |         export LD_LIBRARY_PATH=/usr/local/cuda/compat:$LD_LIBRARY_PATH
15 |         echo $LD_LIBRARY_PATH
16 |     else
17 |         echo "Skipping CUDA compat setup as newer NVIDIA driver is installed"
18 |     fi
19 | else
20 |     echo "Skipping CUDA compat setup as package not found"
21 | fi


--------------------------------------------------------------------------------
/huggingface/pytorch/tgi/docker/archived/0.5.0/py3/cu118/Dockerfile.gpu:
--------------------------------------------------------------------------------
  1 | FROM lukemathwalker/cargo-chef:latest-rust-1.67 AS chef
  2 | WORKDIR /usr/src
  3 | 
  4 | FROM chef as planner
  5 | COPY Cargo.toml Cargo.toml
  6 | COPY rust-toolchain.toml rust-toolchain.toml
  7 | COPY proto proto
  8 | COPY router router
  9 | COPY launcher launcher
 10 | RUN cargo chef prepare --recipe-path recipe.json
 11 | 
 12 | FROM chef AS builder
 13 | 
 14 | RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
 15 |     curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
 16 |     unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
 17 |     unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \
 18 |     rm -f $PROTOC_ZIP
 19 | 
 20 | COPY --from=planner /usr/src/recipe.json recipe.json
 21 | RUN cargo chef cook --release --recipe-path recipe.json
 22 | 
 23 | COPY Cargo.toml Cargo.toml
 24 | COPY rust-toolchain.toml rust-toolchain.toml
 25 | COPY proto proto
 26 | COPY router router
 27 | COPY launcher launcher
 28 | RUN cargo build --release
 29 | 
 30 | FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as base
 31 | 
 32 | ARG PYTORCH_VERSION=2.0.0
 33 | ARG PYTHON_VERSION=3.9
 34 | ARG MAMBA_VERSION=23.1.0-1
 35 | ARG CUDA_CHANNEL=nvidia
 36 | ARG INSTALL_CHANNEL=pytorch
 37 | # Automatically set by buildx
 38 | ARG TARGETPLATFORM
 39 | 
 40 | ENV LANG=C.UTF-8 \
 41 |     LC_ALL=C.UTF-8 \
 42 |     DEBIAN_FRONTEND=noninteractive \
 43 |     HUGGINGFACE_HUB_CACHE=/tmp \
 44 |     TRANSFORMERS_CACHE=/tmp \
 45 |     HF_HUB_ENABLE_HF_TRANSFER=1 \
 46 |     MODEL_ID=bigscience/bloom-560m \
 47 |     QUANTIZE=false \
 48 |     NUM_SHARD=1 \
 49 |     PORT=80 \
 50 |     CUDA_HOME=/usr/local/cuda \
 51 |     LD_LIBRARY_PATH="/opt/conda/lib:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH" \
 52 |     PATH=$PATH:/opt/conda/bin:/usr/local/cuda/bin \
 53 |     CONDA_PREFIX=/opt/conda
 54 | 
 55 | RUN apt-get update && apt-get install -y --no-install-recommends \
 56 |         libssl-dev \
 57 |         unzip \
 58 |         build-essential \
 59 |         ca-certificates \
 60 |         ccache \
 61 |         curl \
 62 |         git && \
 63 |         rm -rf /var/lib/apt/lists/*
 64 | 
 65 | # Install conda
 66 | # translating Docker's TARGETPLATFORM into mamba arches
 67 | RUN case ${TARGETPLATFORM} in \
 68 |          "linux/arm64")  MAMBA_ARCH=aarch64  ;; \
 69 |          *)              MAMBA_ARCH=x86_64   ;; \
 70 |     esac && \
 71 |     curl -fsSL -v -o ~/mambaforge.sh -O  "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh"
 72 | RUN chmod +x ~/mambaforge.sh && \
 73 |     bash ~/mambaforge.sh -b -p /opt/conda && \
 74 |     rm ~/mambaforge.sh
 75 | 
 76 | # Install pytorch
 77 | # On arm64 we exit with an error code
 78 | RUN case ${TARGETPLATFORM} in \
 79 |          "linux/arm64")  exit 1 ;; \
 80 |          *)              /opt/conda/bin/conda update -y conda &&  \
 81 |                          /opt/conda/bin/conda install -c "${INSTALL_CHANNEL}" -c "${CUDA_CHANNEL}" -y "python=${PYTHON_VERSION}" pytorch==$PYTORCH_VERSION "pytorch-cuda=$(echo $CUDA_VERSION | cut -d'.' -f 1-2)"  ;; \
 82 |     esac && \
 83 |     /opt/conda/bin/conda clean -ya
 84 | 
 85 | WORKDIR /usr/src
 86 | 
 87 | RUN LIBSSL_DEB=libssl1.1_1.1.0g-2ubuntu4_amd64.deb && \
 88 |     curl -OL http://archive.ubuntu.com/ubuntu/pool/main/o/openssl/$LIBSSL_DEB && \
 89 |     dpkg -i $LIBSSL_DEB && \
 90 |     rm -f $LIBSSL_DEB
 91 | 
 92 | # Install specific version of flash attention
 93 | COPY server/Makefile-flash-att server/Makefile
 94 | RUN cd server && make install-flash-attention
 95 | 
 96 | # Install specific version of transformers
 97 | COPY server/Makefile-transformers server/Makefile
 98 | RUN cd server && BUILD_EXTENSIONS="True" make install-transformers
 99 | 
100 | COPY server/Makefile server/Makefile
101 | 
102 | # Install server
103 | COPY proto proto
104 | COPY server server
105 | RUN cd server && \
106 |     make gen-server && \
107 |     pip install ".[bnb]" --no-cache-dir
108 | RUN rm -r proto server
109 | 
110 | # Install router
111 | COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router
112 | # Install launcher
113 | COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher
114 | 
115 | # AWS Sagemaker compatible image
116 | FROM base as sagemaker
117 | 
118 | COPY --chmod=775 sagemaker-entrypoint.sh entrypoint.sh
119 | RUN sed -i '7 i export MODEL_ID="${HF_MODEL_ID}"' entrypoint.sh
120 | 
121 | RUN HOME_DIR=/root && \
122 |     pip install requests && \
123 |     curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip && \
124 |     unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ && \
125 |     cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance && \
126 |     chmod +x /usr/local/bin/testOSSCompliance && \
127 |     chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh && \
128 |     ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python && \
129 |     rm -rf ${HOME_DIR}/oss_compliance*
130 | 
131 | ENTRYPOINT ["./entrypoint.sh"]
132 | 
133 | LABEL dlc_major_version="1"
134 | LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.huggingface.tgi="true"
135 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port="true"
136 | 


--------------------------------------------------------------------------------
/huggingface/pytorch/tgi/docker/archived/0.6.0/py3/cu118/Dockerfile.gpu:
--------------------------------------------------------------------------------
  1 | # Rust builder
  2 | FROM lukemathwalker/cargo-chef:latest-rust-1.69 AS chef
  3 | WORKDIR /usr/src
  4 | 
  5 | FROM chef as planner
  6 | COPY Cargo.toml Cargo.toml
  7 | COPY rust-toolchain.toml rust-toolchain.toml
  8 | COPY proto proto
  9 | COPY router router
 10 | COPY launcher launcher
 11 | RUN cargo chef prepare --recipe-path recipe.json
 12 | 
 13 | FROM chef AS builder
 14 | 
 15 | ARG GIT_SHA
 16 | 
 17 | RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
 18 |     curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
 19 |     unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
 20 |     unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \
 21 |     rm -f $PROTOC_ZIP
 22 | 
 23 | COPY --from=planner /usr/src/recipe.json recipe.json
 24 | RUN cargo chef cook --release --recipe-path recipe.json
 25 | 
 26 | COPY Cargo.toml Cargo.toml
 27 | COPY rust-toolchain.toml rust-toolchain.toml
 28 | COPY proto proto
 29 | COPY router router
 30 | COPY launcher launcher
 31 | RUN cargo build --release
 32 | 
 33 | # Python builder
 34 | # Adapted from: https://github.com/pytorch/pytorch/blob/master/Dockerfile
 35 | FROM debian:bullseye-slim as pytorch-install
 36 | 
 37 | ARG PYTORCH_VERSION=2.0.1
 38 | ARG PYTHON_VERSION=3.9
 39 | ARG CUDA_VERSION=11.8
 40 | ARG MAMBA_VERSION=23.1.0-4
 41 | ARG CUDA_CHANNEL=nvidia
 42 | ARG INSTALL_CHANNEL=pytorch
 43 | # Automatically set by buildx
 44 | ARG TARGETPLATFORM
 45 | 
 46 | ENV PATH /opt/conda/bin:$PATH
 47 | 
 48 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
 49 |         build-essential \
 50 |         ca-certificates \
 51 |         ccache \
 52 |         curl \
 53 |         git && \
 54 |         rm -rf /var/lib/apt/lists/*
 55 | 
 56 | # Install conda
 57 | # translating Docker's TARGETPLATFORM into mamba arches
 58 | RUN case ${TARGETPLATFORM} in \
 59 |          "linux/arm64")  MAMBA_ARCH=aarch64  ;; \
 60 |          *)              MAMBA_ARCH=x86_64   ;; \
 61 |     esac && \
 62 |     curl -fsSL -v -o ~/mambaforge.sh -O  "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh"
 63 | RUN chmod +x ~/mambaforge.sh && \
 64 |     bash ~/mambaforge.sh -b -p /opt/conda && \
 65 |     rm ~/mambaforge.sh
 66 | 
 67 | # Install pytorch
 68 | # On arm64 we exit with an error code
 69 | RUN case ${TARGETPLATFORM} in \
 70 |          "linux/arm64")  exit 1 ;; \
 71 |          *)              /opt/conda/bin/conda update -y conda &&  \
 72 |                          /opt/conda/bin/conda install -c "${INSTALL_CHANNEL}" -c "${CUDA_CHANNEL}" -y "python=${PYTHON_VERSION}" pytorch==$PYTORCH_VERSION "pytorch-cuda=$(echo $CUDA_VERSION | cut -d'.' -f 1-2)"  ;; \
 73 |     esac && \
 74 |     /opt/conda/bin/conda clean -ya
 75 | 
 76 | # CUDA kernels builder image
 77 | FROM pytorch-install as kernel-builder
 78 | 
 79 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
 80 |         ninja-build \
 81 |         && rm -rf /var/lib/apt/lists/*
 82 | 
 83 | RUN /opt/conda/bin/conda install -c "nvidia/label/cuda-11.8.0" -y cuda==11.8 && \
 84 |     /opt/conda/bin/conda clean -ya
 85 | 
 86 | # Build Flash Attention CUDA kernels
 87 | FROM kernel-builder as flash-att-builder
 88 | 
 89 | WORKDIR /usr/src
 90 | 
 91 | COPY server/Makefile-flash-att Makefile
 92 | 
 93 | # Build specific version of flash attention
 94 | RUN make build-flash-attention
 95 | 
 96 | # Build Transformers CUDA kernels
 97 | FROM kernel-builder as transformers-builder
 98 | 
 99 | WORKDIR /usr/src
100 | 
101 | COPY server/Makefile-transformers Makefile
102 | 
103 | # Build specific version of transformers
104 | RUN BUILD_EXTENSIONS="True" make build-transformers
105 | 
106 | # Text Generation Inference base image
107 | FROM nvidia/cuda:11.8.0-base-ubuntu20.04 as base
108 | 
109 | # Conda env
110 | ENV PATH=/opt/conda/bin:$PATH \
111 |     CONDA_PREFIX=/opt/conda
112 | 
113 | # Text Generation Inference base env
114 | ENV HUGGINGFACE_HUB_CACHE=/tmp \
115 |     TRANSFORMERS_CACHE=/tmp \
116 |     HF_HUB_ENABLE_HF_TRANSFER=1 \
117 |     MODEL_ID=bigscience/bloom-560m \
118 |     QUANTIZE=false \
119 |     NUM_SHARD=1 \
120 |     PORT=80
121 | 
122 | LABEL com.nvidia.volumes.needed="nvidia_driver"
123 | 
124 | WORKDIR /usr/src
125 | 
126 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
127 |         libssl-dev \
128 |         ca-certificates \
129 |         make \
130 |         unzip \
131 |         curl \
132 |         && rm -rf /var/lib/apt/lists/*
133 | 
134 | # Copy conda with PyTorch installed
135 | COPY --from=pytorch-install /opt/conda /opt/conda
136 | 
137 | # Copy build artifacts from flash attention builder
138 | COPY --from=flash-att-builder /usr/src/flash-attention/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
139 | COPY --from=flash-att-builder /usr/src/flash-attention/csrc/layer_norm/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
140 | COPY --from=flash-att-builder /usr/src/flash-attention/csrc/rotary/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
141 | 
142 | # Copy build artifacts from transformers builder
143 | COPY --from=transformers-builder /usr/src/transformers /usr/src/transformers
144 | COPY --from=transformers-builder /usr/src/transformers/build/lib.linux-x86_64-cpython-39/transformers /usr/src/transformers/src/transformers
145 | 
146 | # Install transformers dependencies
147 | RUN cd /usr/src/transformers && pip install -e . --no-cache-dir && pip install einops --no-cache-dir
148 | RUN rm -r transformers/examples
149 | 
150 | # FIXME: remove when we get a release of huggingface-hub
151 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
152 |         git \
153 |         && rm -rf /var/lib/apt/lists/*
154 | 
155 | # Install server
156 | COPY proto proto
157 | COPY server server
158 | COPY server/Makefile server/Makefile
159 | RUN cd server && \
160 |     make gen-server && \
161 |     pip install -r requirements.txt && \
162 |     pip install ".[bnb, accelerate]" --no-cache-dir
163 | RUN rm -r proto server
164 | 
165 | # Install router
166 | COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router
167 | # Install launcher
168 | COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher
169 | 
170 | # AWS Sagemaker compatbile image
171 | FROM base as sagemaker
172 | 
173 | COPY --chmod=775 sagemaker-entrypoint.sh entrypoint.sh
174 | RUN sed -i '7 i export MODEL_ID="${HF_MODEL_ID}"' entrypoint.sh
175 | 
176 | RUN HOME_DIR=/root && \
177 |     pip install requests && \
178 |     curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip && \
179 |     unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ && \
180 |     cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance && \
181 |     chmod +x /usr/local/bin/testOSSCompliance && \
182 |     chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh && \
183 |     ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python && \
184 |     rm -rf ${HOME_DIR}/oss_compliance*
185 | RUN curl -o /root/THIRD-PARTY-LICENSES https://publish.djl.ai/dlc-licenses/huggingface/tgi-0.6.0/THIRD-PARTY-LICENSES
186 | 
187 | ENTRYPOINT ["./entrypoint.sh"]
188 | 
189 | LABEL dlc_major_version="1"
190 | LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.huggingface.tgi="true"
191 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port="true"
192 | 


--------------------------------------------------------------------------------
/huggingface/pytorch/tgi/docker/archived/0.8.2/py3/cu118/Dockerfile.gpu:
--------------------------------------------------------------------------------
  1 | # Rust builder
  2 | FROM lukemathwalker/cargo-chef:latest-rust-1.69 AS chef
  3 | WORKDIR /usr/src
  4 | 
  5 | FROM chef as planner
  6 | COPY Cargo.toml Cargo.toml
  7 | COPY rust-toolchain.toml rust-toolchain.toml
  8 | COPY proto proto
  9 | COPY benchmark benchmark
 10 | COPY router router
 11 | COPY launcher launcher
 12 | RUN cargo chef prepare --recipe-path recipe.json
 13 | 
 14 | FROM chef AS builder
 15 | 
 16 | ARG GIT_SHA
 17 | 
 18 | RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
 19 |     curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
 20 |     unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
 21 |     unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \
 22 |     rm -f $PROTOC_ZIP
 23 | 
 24 | COPY --from=planner /usr/src/recipe.json recipe.json
 25 | RUN cargo chef cook --release --recipe-path recipe.json
 26 | 
 27 | COPY Cargo.toml Cargo.toml
 28 | COPY rust-toolchain.toml rust-toolchain.toml
 29 | COPY proto proto
 30 | COPY benchmark benchmark
 31 | COPY router router
 32 | COPY launcher launcher
 33 | RUN cargo build --release
 34 | 
 35 | # Python builder
 36 | # Adapted from: https://github.com/pytorch/pytorch/blob/master/Dockerfile
 37 | FROM debian:bullseye-slim as pytorch-install
 38 | 
 39 | ARG PYTORCH_VERSION=2.0.0
 40 | ARG PYTHON_VERSION=3.9
 41 | ARG CUDA_VERSION=11.8
 42 | ARG MAMBA_VERSION=23.1.0-1
 43 | ARG CUDA_CHANNEL=nvidia
 44 | ARG INSTALL_CHANNEL=pytorch
 45 | # Automatically set by buildx
 46 | ARG TARGETPLATFORM
 47 | 
 48 | ENV PATH /opt/conda/bin:$PATH
 49 | 
 50 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
 51 |         build-essential \
 52 |         ca-certificates \
 53 |         ccache \
 54 |         curl \
 55 |         git && \
 56 |         rm -rf /var/lib/apt/lists/*
 57 | 
 58 | # Install conda
 59 | # translating Docker's TARGETPLATFORM into mamba arches
 60 | RUN case ${TARGETPLATFORM} in \
 61 |          "linux/arm64")  MAMBA_ARCH=aarch64  ;; \
 62 |          *)              MAMBA_ARCH=x86_64   ;; \
 63 |     esac && \
 64 |     curl -fsSL -v -o ~/mambaforge.sh -O  "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh"
 65 | RUN chmod +x ~/mambaforge.sh && \
 66 |     bash ~/mambaforge.sh -b -p /opt/conda && \
 67 |     rm ~/mambaforge.sh
 68 | 
 69 | # Install pytorch
 70 | # On arm64 we exit with an error code
 71 | RUN case ${TARGETPLATFORM} in \
 72 |          "linux/arm64")  exit 1 ;; \
 73 |          *)              /opt/conda/bin/conda update -y conda &&  \
 74 |                          /opt/conda/bin/conda install -c "${INSTALL_CHANNEL}" -c "${CUDA_CHANNEL}" -y "python=${PYTHON_VERSION}" pytorch==$PYTORCH_VERSION "pytorch-cuda=$(echo $CUDA_VERSION | cut -d'.' -f 1-2)"  ;; \
 75 |     esac && \
 76 |     /opt/conda/bin/conda clean -ya
 77 | 
 78 | # CUDA kernels builder image
 79 | FROM pytorch-install as kernel-builder
 80 | 
 81 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
 82 |         ninja-build \
 83 |         && rm -rf /var/lib/apt/lists/*
 84 | 
 85 | RUN /opt/conda/bin/conda install -c "nvidia/label/cuda-11.8.0" -y cuda==11.8 && \
 86 |     /opt/conda/bin/conda clean -ya
 87 | 
 88 | # Build Flash Attention CUDA kernels
 89 | FROM kernel-builder as flash-att-builder
 90 | 
 91 | WORKDIR /usr/src
 92 | 
 93 | COPY server/Makefile-flash-att Makefile
 94 | 
 95 | # Build specific version of flash attention
 96 | RUN make build-flash-attention
 97 | 
 98 | # Build Transformers CUDA kernels
 99 | FROM kernel-builder as transformers-builder
100 | 
101 | WORKDIR /usr/src
102 | 
103 | COPY server/Makefile-transformers Makefile
104 | 
105 | # Build specific version of transformers
106 | RUN BUILD_EXTENSIONS="True" make build-transformers
107 | 
108 | # Text Generation Inference base image
109 | FROM nvidia/cuda:11.8.0-base-ubuntu20.04 as base
110 | 
111 | # Conda env
112 | ENV PATH=/opt/conda/bin:$PATH \
113 |     CONDA_PREFIX=/opt/conda
114 | 
115 | # Text Generation Inference base env
116 | ENV HUGGINGFACE_HUB_CACHE=/tmp \
117 |     HF_HUB_ENABLE_HF_TRANSFER=1 \
118 |     PORT=80
119 | 
120 | WORKDIR /usr/src
121 | 
122 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
123 |         libssl-dev \
124 |         ca-certificates \
125 |         make \
126 |         unzip \
127 |         curl \
128 |         && rm -rf /var/lib/apt/lists/*
129 | 
130 | # Copy conda with PyTorch installed
131 | COPY --from=pytorch-install /opt/conda /opt/conda
132 | 
133 | # Copy build artifacts from flash attention builder
134 | COPY --from=flash-att-builder /usr/src/flash-attention/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
135 | COPY --from=flash-att-builder /usr/src/flash-attention/csrc/layer_norm/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
136 | COPY --from=flash-att-builder /usr/src/flash-attention/csrc/rotary/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
137 | 
138 | # Copy build artifacts from transformers builder
139 | COPY --from=transformers-builder /usr/src/transformers /usr/src/transformers
140 | COPY --from=transformers-builder /usr/src/transformers/build/lib.linux-x86_64-cpython-39/transformers /usr/src/transformers/src/transformers
141 | 
142 | # Install transformers dependencies
143 | RUN cd /usr/src/transformers && pip install -e . --no-cache-dir && pip install einops --no-cache-dir
144 | RUN rm -r transformers/examples
145 | 
146 | # Install server
147 | COPY proto proto
148 | COPY server server
149 | COPY server/Makefile server/Makefile
150 | RUN cd server && \
151 |     make gen-server && \
152 |     pip install -r requirements.txt && \
153 |     pip install ".[bnb, accelerate]" --no-cache-dir
154 | RUN rm -r proto server
155 | 
156 | # Install benchmarker
157 | COPY --from=builder /usr/src/target/release/text-generation-benchmark /usr/local/bin/text-generation-benchmark
158 | # Install router
159 | COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router
160 | # Install launcher
161 | COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher
162 | 
163 | # AWS Sagemaker compatbile image
164 | FROM base as sagemaker
165 | 
166 | COPY --chmod=775 sagemaker-entrypoint.sh entrypoint.sh
167 | 
168 | RUN HOME_DIR=/root && \
169 |     pip install requests && \
170 |     curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip && \
171 |     unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ && \
172 |     cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance && \
173 |     chmod +x /usr/local/bin/testOSSCompliance && \
174 |     chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh && \
175 |     ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python && \
176 |     rm -rf ${HOME_DIR}/oss_compliance*
177 | RUN curl -o /root/THIRD-PARTY-LICENSES https://publish.djl.ai/dlc-licenses/huggingface/tgi-0.8.2/THIRD-PARTY-LICENSES
178 | 
179 | ENTRYPOINT ["./entrypoint.sh"]
180 | 
181 | LABEL dlc_major_version="1"
182 | LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.huggingface.tgi="true"
183 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port="true"
184 | 


--------------------------------------------------------------------------------
/huggingface/pytorch/tgi/docker/archived/0.9.3/py3/cu118/Dockerfile.gpu:
--------------------------------------------------------------------------------
  1 | FROM lukemathwalker/cargo-chef:latest-rust-1.70 AS chef
  2 | WORKDIR /usr/src
  3 | 
  4 | ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
  5 | 
  6 | FROM chef as planner
  7 | COPY Cargo.toml Cargo.toml
  8 | COPY rust-toolchain.toml rust-toolchain.toml
  9 | COPY proto proto
 10 | COPY benchmark benchmark
 11 | COPY router router
 12 | COPY launcher launcher
 13 | RUN cargo chef prepare --recipe-path recipe.json
 14 | 
 15 | FROM chef AS builder
 16 | 
 17 | ARG GIT_SHA
 18 | ARG DOCKER_LABEL
 19 | 
 20 | RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
 21 |     curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
 22 |     unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
 23 |     unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \
 24 |     rm -f $PROTOC_ZIP
 25 | 
 26 | COPY --from=planner /usr/src/recipe.json recipe.json
 27 | RUN cargo chef cook --release --recipe-path recipe.json
 28 | 
 29 | COPY Cargo.toml Cargo.toml
 30 | COPY rust-toolchain.toml rust-toolchain.toml
 31 | COPY proto proto
 32 | COPY benchmark benchmark
 33 | COPY router router
 34 | COPY launcher launcher
 35 | RUN cargo build --release
 36 | 
 37 | # Python builder
 38 | # Adapted from: https://github.com/pytorch/pytorch/blob/master/Dockerfile
 39 | FROM debian:bullseye-slim as pytorch-install
 40 | 
 41 | ARG PYTORCH_VERSION=2.0.1
 42 | ARG PYTHON_VERSION=3.9
 43 | ARG CUDA_VERSION=11.8
 44 | ARG MAMBA_VERSION=23.1.0-4
 45 | ARG CUDA_CHANNEL=nvidia
 46 | ARG INSTALL_CHANNEL=pytorch
 47 | # Automatically set by buildx
 48 | ARG TARGETPLATFORM
 49 | 
 50 | ENV PATH /opt/conda/bin:$PATH
 51 | 
 52 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
 53 |         build-essential \
 54 |         ca-certificates \
 55 |         ccache \
 56 |         curl \
 57 |         git && \
 58 |         rm -rf /var/lib/apt/lists/*
 59 | 
 60 | # Install conda
 61 | # translating Docker's TARGETPLATFORM into mamba arches
 62 | RUN case ${TARGETPLATFORM} in \
 63 |          "linux/arm64")  MAMBA_ARCH=aarch64  ;; \
 64 |          *)              MAMBA_ARCH=x86_64   ;; \
 65 |     esac && \
 66 |     curl -fsSL -v -o ~/mambaforge.sh -O  "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh"
 67 | RUN chmod +x ~/mambaforge.sh && \
 68 |     bash ~/mambaforge.sh -b -p /opt/conda && \
 69 |     rm ~/mambaforge.sh
 70 | 
 71 | # Install pytorch
 72 | # On arm64 we exit with an error code
 73 | RUN case ${TARGETPLATFORM} in \
 74 |          "linux/arm64")  exit 1 ;; \
 75 |          *)              /opt/conda/bin/conda update -y conda &&  \
 76 |                          /opt/conda/bin/conda install -c "${INSTALL_CHANNEL}" -c "${CUDA_CHANNEL}" -y "python=${PYTHON_VERSION}" pytorch==$PYTORCH_VERSION "pytorch-cuda=$(echo $CUDA_VERSION | cut -d'.' -f 1-2)"  ;; \
 77 |     esac && \
 78 |     /opt/conda/bin/conda clean -ya
 79 | 
 80 | # CUDA kernels builder image
 81 | FROM pytorch-install as kernel-builder
 82 | 
 83 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
 84 |         ninja-build \
 85 |         && rm -rf /var/lib/apt/lists/*
 86 | 
 87 | RUN /opt/conda/bin/conda install -c "nvidia/label/cuda-11.8.0" -y cuda==11.8 && \
 88 |     /opt/conda/bin/conda clean -ya
 89 | 
 90 | # Build Flash Attention CUDA kernels
 91 | FROM kernel-builder as flash-att-builder
 92 | 
 93 | WORKDIR /usr/src
 94 | 
 95 | COPY server/Makefile-flash-att Makefile
 96 | 
 97 | # Build specific version of flash attention
 98 | RUN make build-flash-attention
 99 | 
100 | # Build Flash Attention v2 CUDA kernels
101 | FROM kernel-builder as flash-att-v2-builder
102 | 
103 | WORKDIR /usr/src
104 | 
105 | COPY server/Makefile-flash-att-v2 Makefile
106 | 
107 | # Build specific version of flash attention v2
108 | RUN make build-flash-attention-v2
109 | 
110 | # Build Transformers CUDA kernels
111 | FROM kernel-builder as custom-kernels-builder
112 | 
113 | WORKDIR /usr/src
114 | 
115 | COPY server/custom_kernels/ .
116 | 
117 | # Build specific version of transformers
118 | RUN python setup.py build
119 | 
120 | # Build vllm CUDA kernels
121 | FROM kernel-builder as vllm-builder
122 | 
123 | WORKDIR /usr/src
124 | 
125 | COPY server/Makefile-vllm Makefile
126 | 
127 | # Build specific version of vllm
128 | RUN make build-vllm
129 | 
130 | # Text Generation Inference base image
131 | FROM nvidia/cuda:11.8.0-base-ubuntu20.04 as base
132 | 
133 | # Conda env
134 | ENV PATH=/opt/conda/bin:$PATH \
135 |     CONDA_PREFIX=/opt/conda
136 | 
137 | # Text Generation Inference base env
138 | ENV HUGGINGFACE_HUB_CACHE=/tmp \
139 |     HF_HUB_ENABLE_HF_TRANSFER=1 \
140 |     PORT=80
141 | 
142 | WORKDIR /usr/src
143 | 
144 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
145 |         libssl-dev \
146 |         ca-certificates \
147 |         make \
148 |         unzip \
149 |         curl \
150 |         && rm -rf /var/lib/apt/lists/*
151 | 
152 | # Copy conda with PyTorch installed
153 | COPY --from=pytorch-install /opt/conda /opt/conda
154 | 
155 | # Copy build artifacts from flash attention builder
156 | COPY --from=flash-att-builder /usr/src/flash-attention/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
157 | COPY --from=flash-att-builder /usr/src/flash-attention/csrc/layer_norm/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
158 | COPY --from=flash-att-builder /usr/src/flash-attention/csrc/rotary/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
159 | 
160 | # Copy build artifacts from flash attention v2 builder
161 | COPY --from=flash-att-v2-builder /usr/src/flash-attention-v2/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
162 | 
163 | # Copy build artifacts from custom kernels builder
164 | COPY --from=custom-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
165 | 
166 | # Copy builds artifacts from vllm builder
167 | COPY --from=vllm-builder /usr/src/vllm/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
168 | 
169 | # Install flash-attention dependencies
170 | RUN pip install einops --no-cache-dir
171 | 
172 | # Install server
173 | COPY proto proto
174 | COPY server server
175 | COPY server/Makefile server/Makefile
176 | RUN cd server && \
177 |     make gen-server && \
178 |     pip install -r requirements.txt && \
179 |     pip install ".[bnb, accelerate]" --no-cache-dir
180 | RUN rm -r proto server
181 | 
182 | # Install benchmarker
183 | COPY --from=builder /usr/src/target/release/text-generation-benchmark /usr/local/bin/text-generation-benchmark
184 | # Install router
185 | COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router
186 | # Install launcher
187 | COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher
188 | 
189 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
190 |         build-essential \
191 |         g++ \
192 |         && rm -rf /var/lib/apt/lists/*
193 | 
194 | # AWS Sagemaker compatbile image
195 | FROM base as sagemaker
196 | 
197 | COPY --chmod=775 sagemaker-entrypoint.sh entrypoint.sh
198 | 
199 | RUN HOME_DIR=/root && \
200 |     pip install requests && \
201 |     curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip && \
202 |     unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ && \
203 |     cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance && \
204 |     chmod +x /usr/local/bin/testOSSCompliance && \
205 |     chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh && \
206 |     ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python && \
207 |     rm -rf ${HOME_DIR}/oss_compliance*
208 | RUN curl -o /root/THIRD-PARTY-LICENSES https://publish.djl.ai/dlc-licenses/huggingface/tgi-0.9.3/THIRD-PARTY-LICENSES
209 | 
210 | ENTRYPOINT ["./entrypoint.sh"]
211 | CMD ["--json-output"]
212 | 
213 | LABEL dlc_major_version="1"
214 | LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.huggingface.tgi="true"
215 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port="true"
216 | 


--------------------------------------------------------------------------------
/huggingface/pytorch/tgi/docker/buildspec.yml:
--------------------------------------------------------------------------------
 1 | version: 0.2
 2 | 
 3 | env:
 4 |   shell: bash
 5 |   variables:
 6 |     FRAMEWORK_FOLDER: "huggingface/pytorch/tgi/docker"
 7 |     PYTHONPATH: "/codebuild/output/src*/src/github.com/awslabs/llm-hosting-container"
 8 | 
 9 | phases:
10 |   install:
11 |     runtime-versions:
12 |       python: 3.11
13 |     commands:
14 |       - echo "Installing Python version 3.11 ..."
15 |       - pyenv global $PYTHON_311_VERSION
16 | 
17 |   pre_build:
18 |     commands:
19 |       - export PYTHONPATH=$(pwd):$PYTHONPATH
20 |       - |
21 |         echo Setting up Docker buildx.
22 |         docker buildx version
23 |         docker buildx create --name builder --driver docker-container --buildkitd-flags '--allow-insecure-entitlement security.insecure --allow-insecure-entitlement network.host' --use
24 |         docker buildx inspect --bootstrap --builder builder
25 |         docker buildx install
26 |         echo Preparing system dependencies for execution.
27 |         docker --version
28 |         docker login -u $DOCKER_USERNAME -p $DOCKER_PASSWORD
29 |         curl -LO http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
30 |         bash Miniconda3-latest-Linux-x86_64.sh -bfp /miniconda3
31 |         export PATH=/miniconda3/bin:${PATH}
32 |         conda install python=3.11
33 |         conda update -y conda
34 |         echo Prepare TGI dependencies for execution.
35 |         mkdir tgi-artifacts
36 |         python -m pip install -r $FRAMEWORK_FOLDER/tgi-requirements.txt
37 | 
38 |   build:
39 |     commands:
40 |       - |
41 |         echo Build started on `date`
42 |         echo "Current PYTHONPATH: $PYTHONPATH"
43 |         python $FRAMEWORK_FOLDER/tgi.py
44 | 
45 |   post_build:
46 |     commands:
47 |       - |
48 |         echo Build completed on `date`
49 | 


--------------------------------------------------------------------------------
/huggingface/pytorch/tgi/docker/tgi-requirements.txt:
--------------------------------------------------------------------------------
 1 | boto3
 2 | dataclasses
 3 | docker
 4 | gitpython
 5 | sagemaker
 6 | 
 7 | parameterized
 8 | pytest
 9 | pytest-mock
10 | pytest-xdist


--------------------------------------------------------------------------------
/huggingface/pytorch/tgi/docker/tgi.py:
--------------------------------------------------------------------------------
  1 | import git
  2 | import logging
  3 | import os
  4 | import shutil
  5 | import subprocess
  6 | import time
  7 | 
  8 | from huggingface.pytorch.release_utils import (
  9 |     ECR_SCAN_TIMEOUT_IN_SECONDS,
 10 |     GIT_REPO_DOCKERFILES_ROOT_DIRECTORY,
 11 |     GIT_REPO_PYTEST_PATH,
 12 |     LOG,
 13 |     Aws,
 14 |     Device,
 15 |     DlcPipeline,
 16 |     DockerClient,
 17 |     EnvironmentVariable,
 18 |     Mode,
 19 |     ReleaseConfigs,
 20 |     VulnerabilitySeverity,
 21 | )
 22 | 
 23 | GIT_REPO_TGI_LOCAL_FOLDER_NAME = "text-generation-inference"
 24 | GIT_REPO_TGI_TAG_PATTERN = "v{version}"
 25 | GIT_REPO_TGI_URL = "https://github.com/huggingface/text-generation-inference.git"
 26 | 
 27 | def build(configs: ReleaseConfigs):
 28 |     """Builds the Docker image for the provided configs."""
 29 |     aws = Aws()
 30 |     docker_client = DockerClient()
 31 |     for config in configs.releases:
 32 |         LOG.info(f"Going to build image for config: {config}.")
 33 |         image_uri = config.get_image_uri_for_staging()
 34 |         if aws.does_ecr_image_exist(image_uri):
 35 |             LOG.info(f"Skipping already built image '{image_uri}'. Config: {config}.")
 36 |             continue
 37 | 
 38 |         build_path = "."
 39 |         if config.device.lower() == Device.GPU.name.lower():
 40 |             LOG.info(f"Setting up build prerequisites for GPU release config with version: {config.version}")
 41 |             build_path = GIT_REPO_TGI_LOCAL_FOLDER_NAME
 42 |             shutil.rmtree(GIT_REPO_TGI_LOCAL_FOLDER_NAME, ignore_errors=True)
 43 |             hf_tgi_repo = git.Repo.clone_from(GIT_REPO_TGI_URL, GIT_REPO_TGI_LOCAL_FOLDER_NAME, no_checkout=True)
 44 |             hf_tgi_repo_tag = GIT_REPO_TGI_TAG_PATTERN.format(version=config.version)
 45 |             hf_tgi_repo.git.checkout(hf_tgi_repo_tag)
 46 |             LOG.info(f"Checked out {hf_tgi_repo} with tag: {hf_tgi_repo_tag} to {GIT_REPO_TGI_LOCAL_FOLDER_NAME}.")
 47 |             shutil.copytree(GIT_REPO_DOCKERFILES_ROOT_DIRECTORY,
 48 |                 os.path.join(GIT_REPO_TGI_LOCAL_FOLDER_NAME, GIT_REPO_DOCKERFILES_ROOT_DIRECTORY))
 49 |             LOG.info(f"Copied '{GIT_REPO_DOCKERFILES_ROOT_DIRECTORY}' directory to TGI directory for 'COPY' command.")
 50 | 
 51 |         dockerfile_path = config.get_dockerfile_path()
 52 |         LOG.info(f"Building Dockerfile: '{dockerfile_path}'. This may take a while...")
 53 |         docker_client.build(image_uri=image_uri, dockerfile_path=dockerfile_path, build_path=build_path)
 54 | 
 55 |         username, password = aws.get_ecr_credentials(image_uri)
 56 |         docker_client.login(username, password, image_uri)
 57 |         docker_client.push(image_uri)
 58 | 
 59 | def test(configs: ReleaseConfigs):
 60 |     """Runs SageMaker tests for the Docker images associated with the provided configs and current git commit."""
 61 |     aws = Aws()
 62 |     for config in configs.releases:
 63 |         LOG.info(f"Going to test built image for config: {config}.")
 64 |         test_role_arn = os.getenv(EnvironmentVariable.TEST_ROLE_ARN.name)
 65 |         test_session = aws.get_session_for_role(test_role_arn)
 66 |         test_credentials = test_session.get_credentials()
 67 |         environ = os.environ.copy()
 68 |         environ.update({
 69 |             "DEVICE_TYPE": config.device.lower(),
 70 |             "AWS_ACCESS_KEY_ID": test_credentials.access_key,
 71 |             "AWS_SECRET_ACCESS_KEY": test_credentials.secret_key,
 72 |             "AWS_SESSION_TOKEN": test_credentials.token,
 73 |             "IMAGE_URI": config.get_image_uri_for_staging(),
 74 |             "TEST_ROLE_ARN": test_role_arn })
 75 | 
 76 |         command = ["pytest", "-m", config.device.lower(), "-n", "auto", "--log-cli-level", "info", GIT_REPO_PYTEST_PATH]
 77 |         LOG.info(f"Running test command: {command}.")
 78 |         process = subprocess.run(command, env=environ, encoding="utf-8", capture_output=True)
 79 |         LOG.info(process.stdout)
 80 |         assert process.returncode == 0, f"Failed with config: {config}.\nError: {process.stderr}."
 81 |         LOG.info(f"Finished testing image with config: {config}.")
 82 | 
 83 |         start_time = time.time()
 84 |         image_uri = config.get_image_uri_for_staging()
 85 |         while aws.is_ecr_image_scan_pending(image_uri):
 86 |             LOG.info(f"Waiting for image scan results for image: {image_uri}.")
 87 |             assert time.time() - start_time <= ECR_SCAN_TIMEOUT_IN_SECONDS, \
 88 |                 f"{image_uri} with config {config} has not completed scanning beyond permitted wait time."
 89 | 
 90 |         severities = {VulnerabilitySeverity.CRITICAL.name}
 91 |         vulnerability_ids = aws.get_image_scan_findings(image_uri, severities, set(configs.ignore_vulnerabilities))
 92 |         assert len(vulnerability_ids) == 0, f"{image_uri} with {config} has vulnerabilities: {vulnerability_ids}."
 93 |         LOG.info(f"Finished checking vulnerabilities for image: {image_uri}.")
 94 | 
 95 | def pr(configs: ReleaseConfigs):
 96 |     """Executes both build and test modes."""
 97 |     build(configs)
 98 |     test(configs)
 99 | 
100 | def release(configs: ReleaseConfigs):
101 |     """Integrates with DLC to release the tested images associated for the provided configs."""
102 |     aws = Aws()
103 |     docker_client = DockerClient()
104 |     for config in configs.releases:
105 |         LOG.info(f"Releasing image associated for config: {config}.")
106 |         released_image_uri = config.get_image_uri_for_released()
107 |         if aws.does_ecr_image_exist(released_image_uri):
108 |             LOG.info(f"Skipping already released image '{released_image_uri}'. Config: {config}.")
109 |             continue
110 | 
111 |         staged_image_uri = config.get_image_uri_for_staging()
112 |         username, password = aws.get_ecr_credentials(staged_image_uri)
113 |         docker_client.login(username, password, staged_image_uri)
114 |         docker_client.prune_all()
115 |         docker_client.pull(staged_image_uri)
116 | 
117 |         pipeline = DlcPipeline(aws, docker_client)
118 |         pipeline.stage_image(config)
119 |         pipeline.set_parameters(config)
120 |         pipeline.start_pipeline(config)
121 |         LOG.info(f"DLC pipeline completed for staged image URI: {staged_image_uri}.")
122 | 
123 |         username, password = aws.get_ecr_credentials(staged_image_uri)
124 |         docker_client.login(username, password, staged_image_uri)
125 |         released_image_uri = config.get_image_uri_for_released()
126 |         docker_client.tag(staged_image_uri, released_image_uri)
127 |         docker_client.push(released_image_uri)
128 |         LOG.info(f"Release marked as complete for following config ({released_image_uri}): {config}")
129 | 
130 | 
131 | if __name__ == "__main__":
132 |     logging.basicConfig(
133 |         level=logging.INFO,
134 |         format="%(asctime)s %(levelname)-8s %(message)s",
135 |         datefmt="%Y-%m-%d %H:%M:%S")
136 |     configs = ReleaseConfigs()
137 |     configs.validate()
138 |     mode = os.getenv(EnvironmentVariable.MODE.name)
139 |     LOG.info(f"Mode has been set to: {mode}.")
140 |     if mode == Mode.PR.name:
141 |         pr(configs)
142 |     elif mode == Mode.BUILD.name:
143 |         build(configs)
144 |     elif mode == Mode.TEST.name:
145 |         test(configs)
146 |     elif mode == Mode.RELEASE.name:
147 |         release(configs)
148 |     else:
149 |         raise ValueError(f"The mode '{mode}' is not recognized. Please set it correctly.'")


--------------------------------------------------------------------------------
/huggingface/pytorch/tgillamacpp/docker/buildspec.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | version: 0.2
 3 | 
 4 | env:
 5 |   shell: bash
 6 |   variables:
 7 |     FRAMEWORK_FOLDER: "huggingface/pytorch/tgillamacpp/docker"
 8 |     PYTHONPATH: "/codebuild/output/src*/src/github.com/awslabs/llm-hosting-container"
 9 | 
10 | phases:
11 |   install:
12 |     runtime-versions:
13 |       python: 3.11
14 |     commands:
15 |       - echo "Installing Python version 3.11 ..."
16 |       - pyenv global $PYTHON_311_VERSION
17 | 
18 |   pre_build:
19 |     commands:
20 |       - echo Pre-build started on `date`
21 |       - export PYTHONPATH=$(pwd):$PYTHONPATH
22 |       
23 |       # Continue with regular pre-build steps if BUILD_REQUIRED=true 
24 |       - |
25 |         echo Setting up Docker buildx.
26 |         docker buildx version
27 |         docker buildx create --name builder --driver docker-container --buildkitd-flags '--allow-insecure-entitlement security.insecure --allow-insecure-entitlement network.host' --use
28 |         docker buildx inspect --bootstrap --builder builder
29 |         docker buildx install
30 |         echo Preparing system dependencies for execution.
31 |         docker --version
32 |         docker login -u $DOCKER_USERNAME -p $DOCKER_PASSWORD
33 |         curl -LO http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
34 |         bash Miniconda3-latest-Linux-x86_64.sh -bfp /miniconda3
35 |         export PATH=/miniconda3/bin:${PATH}
36 |         conda install python=3.11
37 |         conda update -y conda
38 |         echo Prepare TGI_LLAMACPP dependencies for execution.
39 |         mkdir tgi-llamacpp-artifacts
40 |         python -m pip install -r $FRAMEWORK_FOLDER/tgi-llamacpp-requirements.txt
41 | 
42 |   build:
43 |     commands:
44 |       - |
45 |         echo "Current PYTHONPATH: $PYTHONPATH"
46 |         python $FRAMEWORK_FOLDER/tgi-llamacpp.py
47 | 
48 |   post_build:
49 |     commands:
50 |       - |
51 |         echo Build completed on `date`


--------------------------------------------------------------------------------
/huggingface/pytorch/tgillamacpp/docker/tgi-llamacpp-requirements.txt:
--------------------------------------------------------------------------------
 1 | boto3
 2 | dataclasses
 3 | docker
 4 | gitpython
 5 | sagemaker
 6 | 
 7 | parameterized
 8 | pytest
 9 | pytest-mock
10 | pytest-xdist


--------------------------------------------------------------------------------
/huggingface/pytorch/tgillamacpp/docker/tgi-llamacpp.py:
--------------------------------------------------------------------------------
  1 | import git
  2 | import logging
  3 | import os
  4 | import shutil
  5 | import subprocess
  6 | import time
  7 | 
  8 | from huggingface.pytorch.release_utils import (
  9 |     GIT_REPO_DOCKERFILES_ROOT_DIRECTORY,
 10 |     GIT_REPO_PYTEST_PATH,
 11 |     LOG,
 12 |     Aws,
 13 |     DockerClient,
 14 |     EnvironmentVariable,
 15 |     Mode,
 16 |     ReleaseConfigs
 17 | )
 18 | 
 19 | GIT_REPO_TGI_LLAMACPP_LOCAL_FOLDER_NAME = "tgi-llamacpp"
 20 | GIT_REPO_TGI_LLAMACPP_TAG_PATTERN = "v{version}"
 21 | GIT_REPO_TGI_LLAMACPP_URL = "https://github.com/huggingface/text-generation-inference.git"
 22 | 
 23 | def build(configs: ReleaseConfigs):
 24 |     """Builds the Docker image for the provided configs."""
 25 |     aws = Aws()
 26 |     docker_client = DockerClient()
 27 |     for config in configs.releases:
 28 |         LOG.info(f"Going to build image for config: {config}.")
 29 |         image_uri = config.get_image_uri_for_staging()
 30 |         if aws.does_ecr_image_exist(image_uri):
 31 |             LOG.info(f"Skipping already built image '{image_uri}'. Config: {config}.")
 32 |             continue
 33 |         
 34 |         LOG.info(f"Setting up build prerequisites for release config with version: {config.version}")
 35 |         build_path = GIT_REPO_TGI_LLAMACPP_LOCAL_FOLDER_NAME
 36 |         shutil.rmtree(GIT_REPO_TGI_LLAMACPP_LOCAL_FOLDER_NAME, ignore_errors=True)
 37 |         hf_tgi_llamacpp_repo = git.Repo.clone_from(GIT_REPO_TGI_LLAMACPP_URL, GIT_REPO_TGI_LLAMACPP_LOCAL_FOLDER_NAME, no_checkout=True)
 38 |         hf_tgi_llamacpp_repo_tag = GIT_REPO_TGI_LLAMACPP_TAG_PATTERN.format(version=config.version)
 39 |         hf_tgi_llamacpp_repo.git.checkout(hf_tgi_llamacpp_repo_tag)
 40 |         LOG.info(f"Checked out {hf_tgi_llamacpp_repo} with tag: {hf_tgi_llamacpp_repo_tag} to {GIT_REPO_TGI_LLAMACPP_LOCAL_FOLDER_NAME}.")
 41 |         shutil.copytree(GIT_REPO_DOCKERFILES_ROOT_DIRECTORY,
 42 |             os.path.join(GIT_REPO_TGI_LLAMACPP_LOCAL_FOLDER_NAME, GIT_REPO_DOCKERFILES_ROOT_DIRECTORY))
 43 |         LOG.info(f"Copied '{GIT_REPO_DOCKERFILES_ROOT_DIRECTORY}' directory to TGI_LLAMACPP directory for 'COPY' command.")
 44 | 
 45 |         dockerfile_path = config.get_dockerfile_path()
 46 |         LOG.info(f"Building Dockerfile: '{dockerfile_path}'. This may take a while...")
 47 |         docker_client.build(image_uri=image_uri, dockerfile_path=dockerfile_path, build_path=build_path)
 48 | 
 49 |         username, password = aws.get_ecr_credentials(image_uri)
 50 |         docker_client.login(username, password, image_uri)
 51 |         docker_client.push(image_uri)
 52 | 
 53 | def test(configs: ReleaseConfigs):
 54 |     """Runs SageMaker tests for the Docker images associated with the provided configs and current git commit."""
 55 |     aws = Aws()
 56 |     for config in configs.releases:
 57 |         LOG.info(f"Going to test built image for config: {config}.")
 58 |         test_role_arn = os.getenv(EnvironmentVariable.TEST_ROLE_ARN.name)
 59 |         test_session = aws.get_session_for_role(test_role_arn)
 60 |         test_credentials = test_session.get_credentials()
 61 |         environ = os.environ.copy()
 62 |         environ.update({
 63 |             "DEVICE_TYPE": config.device.lower(),
 64 |             "AWS_ACCESS_KEY_ID": test_credentials.access_key,
 65 |             "AWS_SECRET_ACCESS_KEY": test_credentials.secret_key,
 66 |             "AWS_SESSION_TOKEN": test_credentials.token,
 67 |             "IMAGE_URI": config.get_image_uri_for_staging(),
 68 |             "TEST_ROLE_ARN": test_role_arn })
 69 | 
 70 |         command = ["pytest", "-m", config.device.lower(), "-n", "auto", "--log-cli-level", "info", GIT_REPO_PYTEST_PATH]
 71 |         LOG.info(f"Running test command: {command}.")
 72 |         process = subprocess.run(command, env=environ, encoding="utf-8", capture_output=True)
 73 |         LOG.info(process.stdout)
 74 |         assert process.returncode == 0, f"Failed with config: {config}.\nError: {process.stderr}."
 75 |         LOG.info(f"Finished testing image with config: {config}.")
 76 |     
 77 | 
 78 | def pr(configs: ReleaseConfigs):
 79 |     """Executes both build and test modes."""
 80 |     build(configs)
 81 |     test(configs)
 82 | 
 83 | def release(configs: ReleaseConfigs):
 84 |     """trigger SMFrameworks algo release pipeline"""
 85 |     aws = Aws()
 86 |     docker_client = DockerClient()
 87 |     for config in configs.releases:
 88 |         LOG.info(f"Releasing image associated for config: {config}.")
 89 |         released_image_uri = config.get_image_uri_for_released()
 90 |         if aws.does_ecr_image_exist(released_image_uri):
 91 |             LOG.info(f"Skipping already released image '{released_image_uri}'. Config: {config}.")
 92 |             continue
 93 | 
 94 |         staged_image_uri = config.get_image_uri_for_staging()
 95 |         username, password = aws.get_ecr_credentials(staged_image_uri)
 96 |         docker_client.login(username, password, staged_image_uri)
 97 |         docker_client.prune_all()
 98 |         docker_client.pull(staged_image_uri)
 99 |         
100 |         docker_client.login(username, password, staged_image_uri)
101 |         docker_client.tag(staged_image_uri, released_image_uri)
102 |         docker_client.push(released_image_uri)
103 |         
104 |         js_uris = config.get_image_uris_for_jumpstart()
105 |         username, password = aws.get_ecr_credentials(js_uris[0])
106 |         docker_client.login(username, password, js_uris[0])
107 |         for js_uri in js_uris:
108 |             docker_client.tag(staged_image_uri, js_uri)
109 |             docker_client.push(js_uri)
110 |         LOG.info(f"Release marked as complete for following config ({js_uris}): {config}")
111 | 
112 | 
113 | if __name__ == "__main__":
114 |     logging.basicConfig(
115 |         level=logging.INFO,
116 |         format="%(asctime)s %(levelname)-8s %(message)s",
117 |         datefmt="%Y-%m-%d %H:%M:%S")
118 |     configs = ReleaseConfigs()
119 |     configs.validate()
120 |     mode = os.getenv(EnvironmentVariable.MODE.name)
121 |     LOG.info(f"Mode has been set to: {mode}.")
122 |     if mode == Mode.PR.name:
123 |         pr(configs)
124 |     elif mode == Mode.BUILD.name:
125 |         build(configs)
126 |     elif mode == Mode.TEST.name:
127 |         test(configs)
128 |     elif mode == Mode.RELEASE.name:
129 |         release(configs)
130 |     else:
131 |         raise ValueError(f"The mode '{mode}' is not recognized. Please set it correctly.'")


--------------------------------------------------------------------------------
/releases.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "permitted_combinations": {
  3 |         "TGI": [
  4 |             {
  5 |                 "device": "gpu",
  6 |                 "min_version": "1.0.0",
  7 |                 "max_version": "1.1.0",
  8 |                 "os_version": "ubuntu20.04",
  9 |                 "cuda_version": "cu118",
 10 |                 "python_version": "py39",
 11 |                 "pytorch_version": "2.0.1"
 12 |             },
 13 |             {
 14 |                 "device": "gpu",
 15 |                 "min_version": "1.2.0",
 16 |                 "max_version": "1.4.0",
 17 |                 "os_version": "ubuntu20.04",
 18 |                 "cuda_version": "cu121",
 19 |                 "python_version": "py310",
 20 |                 "pytorch_version": "2.1.1"
 21 |             },
 22 |             {
 23 |                 "device": "gpu",
 24 |                 "min_version": "1.4.2",
 25 |                 "max_version": "2.0.1",
 26 |                 "os_version": "ubuntu22.04",
 27 |                 "cuda_version": "cu121",
 28 |                 "python_version": "py310",
 29 |                 "pytorch_version": "2.1.1"
 30 |             },
 31 |             {
 32 |                 "device": "gpu",
 33 |                 "min_version": "2.0.2",
 34 |                 "max_version": "2.2.0",
 35 |                 "os_version": "ubuntu22.04",
 36 |                 "cuda_version": "cu121",
 37 |                 "python_version": "py310",
 38 |                 "pytorch_version": "2.3.0"
 39 |             },
 40 |             {
 41 |                 "device": "gpu",
 42 |                 "min_version": "2.3.1",
 43 |                 "max_version": "3.0.1",
 44 |                 "os_version": "ubuntu22.04",
 45 |                 "cuda_version": "cu124",
 46 |                 "python_version": "py311",
 47 |                 "pytorch_version": "2.4.0"
 48 |             },
 49 |             {
 50 |                 "device": "gpu",
 51 |                 "min_version": "3.0.1",
 52 |                 "max_version": "3.1.0",
 53 |                 "os_version": "ubuntu22.04",
 54 |                 "cuda_version": "cu124",
 55 |                 "python_version": "py311",
 56 |                 "pytorch_version": "2.5.1"
 57 |             },
 58 |             {
 59 |                 "device": "gpu",
 60 |                 "min_version": "3.1.1",
 61 |                 "max_version": "3.2.3",
 62 |                 "os_version": "ubuntu22.04",
 63 |                 "cuda_version": "cu124",
 64 |                 "python_version": "py311",
 65 |                 "pytorch_version": "2.6.0"
 66 |             },
 67 |             {
 68 |                 "device": "inf2",
 69 |                 "min_version": "0.0.16",
 70 |                 "max_version": "0.0.21",
 71 |                 "os_version": "ubuntu22.04",
 72 |                 "python_version": "py310",
 73 |                 "pytorch_version": "1.13.1"
 74 |             },
 75 |             {
 76 |                 "device": "inf2",
 77 |                 "min_version": "0.0.22",
 78 |                 "max_version": "0.0.28",
 79 |                 "os_version": "ubuntu22.04",
 80 |                 "python_version": "py310",
 81 |                 "pytorch_version": "2.1.2"
 82 |             }
 83 |         ],
 84 |         "TEI": [
 85 |             {
 86 |                 "device": "gpu",
 87 |                 "min_version": "1.2.1",
 88 |                 "max_version": "1.7.0",
 89 |                 "os_version": "ubuntu22.04",
 90 |                 "cuda_version": "cu122",
 91 |                 "python_version": "py310",
 92 |                 "pytorch_version": "2.0.1"
 93 |             },
 94 |             {
 95 |                 "device": "cpu",
 96 |                 "min_version": "1.2.1",
 97 |                 "max_version": "1.7.0",
 98 |                 "os_version": "ubuntu22.04",
 99 |                 "cuda_version": "cu122",
100 |                 "python_version": "py310",
101 |                 "pytorch_version": "2.0.1"
102 |             }
103 |         ]
104 |     },
105 |     "ignore_vulnerabilities": [
106 |         "CVE-2024-42154 - linux",
107 |         "CVE-2025-32434 - torch"
108 |     ],
109 |     "releases": [
110 |         {
111 |             "framework": "TEI",
112 |             "device": "gpu",
113 |             "version": "1.7.0",
114 |             "os_version": "ubuntu22.04",
115 |             "python_version": "py310",
116 |             "pytorch_version": "2.0.1",
117 |             "cuda_version": "cu122"
118 |         },
119 |         {
120 |             "framework": "TEI",
121 |             "device": "cpu",
122 |             "version": "1.7.0",
123 |             "os_version": "ubuntu22.04",
124 |             "python_version": "py310",
125 |             "pytorch_version": "2.0.1"
126 |         },
127 |         {
128 |             "framework": "TGI",
129 |             "device": "inf2",
130 |             "version": "0.0.28",
131 |             "os_version": "ubuntu22.04",
132 |             "python_version": "py310",
133 |             "pytorch_version": "2.1.2"
134 |         }
135 |     ]
136 | }
137 | 


--------------------------------------------------------------------------------
/tests/huggingface/README.md:
--------------------------------------------------------------------------------
 1 | # SageMaker DLC Test
 2 | 
 3 | This folder is a collection of scripts that enables users to test and validate
 4 | the Deep Learning Containers (DLC) on SageMaker.
 5 | 
 6 | ## Requirements
 7 | 
 8 | - An AWS account
 9 | - SageMaker Python SDK installed
10 | 
11 | ## Usage
12 | 
13 | Run the test script using the command below:
14 | 
15 | ```
16 | pip3 install -r requirements.txt
17 | 
18 | IMAGE_URI=<YOUR_IMAGE_URI>
19 | INSTANCE_TYPE=ml.g5.12xlarge
20 | NUM_GPUS=4
21 | ROLE=<YOUR_ROLE>
22 | 
23 | python3 sagemaker_dlc_test.py --image_uri $IMAGE_URI --instance_type $INSTANCE_TYPE --model_id bigscience/bloom-560m --num_gpus $NUM_GPUS --role $ROLE --timeout 600
24 | python3 sagemaker_dlc_test.py --image_uri $IMAGE_URI --instance_type $INSTANCE_TYPE --model_id EleutherAI/gpt-neox-20b --num_gpus $NUM_GPUS --role $ROLE --timeout 2000
25 | python3 sagemaker_dlc_test.py --image_uri $IMAGE_URI --instance_type $INSTANCE_TYPE --model_id google/flan-t5-xxl --num_gpus $NUM_GPUS --role $ROLE --timeout 3000
26 | ```
27 | 
28 | The tests will deploy a SageMaker endpoint and run inference.
29 | 


--------------------------------------------------------------------------------
/tests/huggingface/enable_ssm_access_to_endpoint.py:
--------------------------------------------------------------------------------
 1 | import uuid
 2 | import boto3
 3 | # This script helps you to enable SSM access to the endpoint so we can debug the
 4 | # container level issues there
 5 | def main():
 6 |     session = boto3.Session()
 7 |     client = session.client("sagemaker", region_name="us-west-2")
 8 | 
 9 |     # List existing endpoints
10 |     print("Listing endpoints:")
11 |     print(client.list_endpoints())
12 |     print()
13 | 
14 |     # Get endpoint name
15 |     endpoint_name = client.list_endpoints()["Endpoints"][0]["EndpointName"]
16 |     print(f"Endpoint name: {endpoint_name}\n")
17 | 
18 |     # Describe endpoint
19 |     response = client.describe_endpoint(EndpointName=endpoint_name)
20 |     endpoint_config_name = response["EndpointConfigName"]
21 | 
22 |     # Check if EnableSSMAccess is currently enabled
23 |     current_ssm_access = response["ProductionVariants"][0].get("EnableSSMAccess", False)
24 |     print(f"Current EnableSSMAccess status: {current_ssm_access}\n")
25 | 
26 |     # Generate new endpoint config name
27 |     new_endpoint_config_name = f"{endpoint_config_name.split('-')[0]}-{str(uuid.uuid4())[:11]}"
28 | 
29 |     # Update EnableSSMAccess to True in new production variant
30 |     new_production_variants = response["ProductionVariants"]
31 |     new_production_variants[0]["EnableSSMAccess"] = True
32 | 
33 |     # Create new endpoint config
34 |     create_endpoint_config_response = client.create_endpoint_config(
35 |         EndpointConfigName=new_endpoint_config_name,
36 |         ProductionVariants=new_production_variants,
37 |     )
38 |     print(f"Created new endpoint config: {create_endpoint_config_response}\n")
39 | 
40 |     # Describe new endpoint config
41 |     new_endpoint_config_response = client.describe_endpoint_config(
42 |         EndpointConfigName=new_endpoint_config_name
43 |     )
44 |     print(f"New endpoint config: {new_endpoint_config_response}\n")
45 | 
46 |     # Update endpoint with new endpoint config
47 |     update_endpoint_response = client.update_endpoint(
48 |         EndpointName=endpoint_name, EndpointConfigName=new_endpoint_config_name
49 |     )
50 |     print(update_endpoint_response)
51 | 
52 | if __name__ == "__main__":
53 |     main()


--------------------------------------------------------------------------------
/tests/huggingface/requirements.txt:
--------------------------------------------------------------------------------
1 | sagemaker>=2.153.0
2 | pytest


--------------------------------------------------------------------------------
/tests/huggingface/sagemaker_dlc_test.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import sys
  3 | import argparse
  4 | import time
  5 | import signal
  6 | import json
  7 | import os
  8 | import pytest
  9 | 
 10 | from sagemaker.huggingface import HuggingFaceModel
 11 | 
 12 | 
 13 | logging.basicConfig(stream=sys.stdout, format="%(message)s", level=logging.INFO)
 14 | 
 15 | 
 16 | class TimeoutError(Exception):
 17 |     pass
 18 | 
 19 | 
 20 | def timeout_handler(signum, frame):
 21 |     raise TimeoutError("Test timed out")
 22 | 
 23 | def run_test(args):
 24 |     default_env = { "HF_MODEL_ID": args.model_id }
 25 |     if args.model_revision:
 26 |         default_env["HF_MODEL_REVISION"] = args.model_revision
 27 |     if args.instance_type.startswith("ml.inf2"):
 28 |         default_env["HF_NUM_CORES"] = "2"
 29 |         default_env["HF_AUTO_CAST_TYPE"] = "fp16"
 30 |         default_env["MAX_BATCH_SIZE"] = "1"
 31 |         default_env["MAX_INPUT_TOKENS"] = "2048"
 32 |         default_env["MAX_TOTAL_TOKENS"] = "4096"
 33 |     else:
 34 |         default_env["SM_NUM_GPUS"] = "4"
 35 | 
 36 |     signal.signal(signal.SIGALRM, timeout_handler)
 37 |     signal.alarm(int(args.timeout))
 38 |     predictor = None
 39 |     try:
 40 |         # Create Hugging Face Model Class
 41 |         endpoint_name = args.model_id.replace("/","-").replace(".", "-")[:40]
 42 |         endpoint_name = endpoint_name + "-" + time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())
 43 |         model = HuggingFaceModel(
 44 |             name=endpoint_name,
 45 |             env=default_env,
 46 |             role=args.role,
 47 |             image_uri=args.image_uri
 48 |         )
 49 |         deploy_parameters = {
 50 |             "instance_type": args.instance_type,
 51 |             "initial_instance_count": 1,
 52 |             "endpoint_name": endpoint_name,
 53 |             "container_startup_health_check_timeout": 1800,
 54 |         }
 55 |         if args.instance_type.startswith("ml.inf2"):
 56 |             deploy_parameters["volume_size"] = 256
 57 |         predictor = model.deploy(**deploy_parameters)
 58 | 
 59 |         logging.info("Endpoint deployment complete.")
 60 | 
 61 |         data = {
 62 |             "inputs": "What is Deep Learning?",
 63 |             "parameters": {"max_new_tokens": 50, "top_k": 50, "top_p": 0.95, "do_sample": True},
 64 |         }
 65 |         output = predictor.predict(data)
 66 |         logging.info("Output: " + json.dumps(output))
 67 |         # TODO: we need to clearly define the expected output format for each models.
 68 |         # assert "generated_text" in output[0]
 69 |     finally:
 70 |         if predictor:
 71 |             predictor.delete_model()
 72 |             predictor.delete_endpoint()
 73 |         signal.alarm(0)
 74 | 
 75 | def get_models_for_image(image_type, device_type):
 76 |     if image_type == "TGI":
 77 |         if device_type == "gpu":
 78 |             return [
 79 |                 ("bigscience/bloom-560m", None, "ml.g5.12xlarge"),
 80 |                 ("EleutherAI/gpt-neox-20b", None, "ml.g5.12xlarge"),
 81 |                 ("google/flan-t5-xxl", None, "ml.g5.12xlarge"),
 82 |             ]
 83 |         elif device_type == "inf2":
 84 |             return [ ("princeton-nlp/Sheared-LLaMA-1.3B", None, "ml.inf2.xlarge") ]
 85 |         else:
 86 |             raise ValueError(f"No testing models found for {image_type} on instance {device_type}. "
 87 |                              f"please check whether the image_type and instance_type are supported.")
 88 |     elif image_type == "TEI":
 89 |         if device_type == "gpu":
 90 |             return [
 91 |                 ("BAAI/bge-m3", None, "ml.g5.12xlarge"),
 92 |                 ("intfloat/multilingual-e5-base", None, "ml.g5.12xlarge"),
 93 |                 ("thenlper/gte-base", None, "ml.g5.12xlarge"),
 94 |                 ("sentence-transformers/all-MiniLM-L6-v2", None, "ml.g5.12xlarge")
 95 |             ]
 96 |         elif device_type == "cpu":
 97 |             return [("BAAI/bge-m3", None, "ml.g5.12xlarge")]
 98 |         else:
 99 |             raise ValueError(f"No testing models found for {image_type} on instance {device_type}. "
100 |                             f"please check whether the image_type and instance_type are supported.")
101 |     else:
102 |         raise ValueError("Invalid image type. Supported types are 'TGI' and 'TEI'.")
103 | 
104 | def should_run_test_for_image(test_type, target_type):
105 |     return test_type == target_type
106 | 
107 | @pytest.mark.parametrize("image_type, device_type", [
108 |     pytest.param("TGI", "gpu", marks=pytest.mark.gpu),
109 |     pytest.param("TGI", "inf2", marks=pytest.mark.inf2),
110 |     pytest.param("TEI", "gpu", marks=pytest.mark.gpu),
111 |     pytest.param("TEI", "cpu", marks=pytest.mark.cpu),
112 | ])
113 | def test(image_type, device_type, timeout: str = "3000"):
114 |     test_target_image_type = os.getenv("TARGET_IMAGE_TYPE")
115 |     test_device_type = os.getenv("DEVICE_TYPE")
116 |     if test_target_image_type and not should_run_test_for_image(image_type, test_target_image_type):
117 |         pytest.skip(f"Skipping test for image type {image_type} as it does not match target image type {test_target_image_type}")
118 | 
119 |     if test_device_type and not should_run_test_for_image(device_type, test_device_type):
120 |         pytest.skip(f"Skipping test for device type {device_type} as it does not match current device type {test_device_type}")
121 | 
122 |     image_uri = os.getenv("IMAGE_URI")
123 |     test_role_arn = os.getenv("TEST_ROLE_ARN")
124 |     assert image_uri, f"Please set IMAGE_URI environment variable."
125 |     assert test_role_arn, f"Please set TEST_ROLE_ARN environment variable."
126 | 
127 |     models = get_models_for_image(image_type, device_type)
128 |     for model_id, model_revision, instance_type in models:
129 |         args = argparse.Namespace(
130 |             image_uri=image_uri,
131 |             instance_type=instance_type,
132 |             model_id=model_id,
133 |             model_revision=model_revision,
134 |             role=test_role_arn,
135 |             timeout=timeout
136 |         )
137 |         logging.info(f"Running sanity test with the following args: {args}.")
138 |         run_test(args)
139 | 
140 | 
141 | if __name__ == '__main__':
142 |     arg_parser = argparse.ArgumentParser()
143 |     arg_parser.add_argument("--image_uri", type=str, required=True)
144 |     arg_parser.add_argument("--instance_type", type=str, required=True)
145 |     arg_parser.add_argument("--model_id", type=str, required=True)
146 |     arg_parser.add_argument("--model_revision", type=str, required=False)
147 |     arg_parser.add_argument("--role", type=str, required=True)
148 |     arg_parser.add_argument("--timeout", type=str, required=True)
149 | 
150 |     args = arg_parser.parse_args()
151 |     run_test(args)
152 | 


--------------------------------------------------------------------------------