├── .github └── workflows │ └── build-huggingface.yml ├── .gitignore ├── CODEOWNERS ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── NOTICE ├── README.md ├── docs └── huggingface │ └── tgi-0.9.3.md ├── examples └── huggingface │ ├── huggingface-large-model-inference-falcon-40b.ipynb │ ├── huggingface-large-model-inference-falcon-7b.ipynb │ └── huggingface-large-model-inference.ipynb ├── huggingface └── pytorch │ ├── optimum │ └── docker │ │ ├── 0.0.16 │ │ └── Dockerfile │ │ ├── 0.0.17 │ │ └── Dockerfile │ │ ├── 0.0.18 │ │ └── Dockerfile │ │ ├── 0.0.19 │ │ └── Dockerfile │ │ ├── 0.0.20 │ │ └── Dockerfile │ │ ├── 0.0.21 │ │ └── Dockerfile │ │ ├── 0.0.22 │ │ └── Dockerfile │ │ ├── 0.0.23 │ │ └── Dockerfile │ │ ├── 0.0.24 │ │ └── Dockerfile │ │ ├── 0.0.25 │ │ └── Dockerfile │ │ ├── 0.0.27 │ │ └── Dockerfile │ │ └── 0.0.28 │ │ └── Dockerfile │ ├── release_utils.py │ ├── tei │ └── docker │ │ ├── 1.2.3 │ │ ├── cpu │ │ │ └── Dockerfile │ │ └── gpu │ │ │ ├── Dockerfile │ │ │ └── sagemaker-entrypoint-cuda-all.sh │ │ ├── 1.4.0 │ │ ├── cpu │ │ │ └── Dockerfile │ │ └── gpu │ │ │ └── Dockerfile │ │ ├── 1.6.0 │ │ ├── cpu │ │ │ └── Dockerfile │ │ └── gpu │ │ │ └── Dockerfile │ │ ├── 1.7.0 │ │ ├── cpu │ │ │ └── Dockerfile │ │ └── gpu │ │ │ ├── Dockerfile │ │ │ └── start-cuda-compat.sh │ │ ├── buildspec.yml │ │ ├── tei-requirements.txt │ │ └── tei.py │ ├── tgi │ └── docker │ │ ├── 1.3.3 │ │ ├── Dockerfile │ │ └── THIRD-PARTY-LICENSES │ │ ├── 1.4.0 │ │ ├── Dockerfile │ │ └── THIRD-PARTY-LICENSES │ │ ├── 1.4.2 │ │ ├── Dockerfile │ │ └── THIRD-PARTY-LICENSES │ │ ├── 1.4.5 │ │ ├── Dockerfile │ │ └── THIRD-PARTY-LICENSES │ │ ├── 2.0.0 │ │ ├── Dockerfile │ │ └── THIRD-PARTY-LICENSES │ │ ├── 2.0.1 │ │ ├── Dockerfile │ │ └── THIRD-PARTY-LICENSES │ │ ├── 2.0.2 │ │ ├── Dockerfile │ │ └── THIRD-PARTY-LICENSES │ │ ├── 2.0.3 │ │ ├── Dockerfile │ │ └── THIRD-PARTY-LICENSES │ │ ├── 2.2.0 │ │ ├── Dockerfile │ │ └── THIRD-PARTY-LICENSES │ │ ├── 2.3.1 │ │ ├── Dockerfile │ │ ├── THIRD-PARTY-LICENSES │ │ └── entrypoint.sh │ │ ├── 2.4.0 │ │ ├── Dockerfile │ │ ├── THIRD-PARTY-LICENSES │ │ └── entrypoint.sh │ │ ├── 3.0.1 │ │ ├── Dockerfile │ │ ├── THIRD-PARTY-LICENSES │ │ └── entrypoint.sh │ │ ├── 3.1.0 │ │ ├── Dockerfile │ │ └── THIRD-PARTY-LICENSES │ │ ├── 3.1.1 │ │ ├── Dockerfile │ │ ├── THIRD-PARTY-LICENSES │ │ └── start-cuda-compat.sh │ │ ├── 3.2.0 │ │ ├── Dockerfile │ │ ├── THIRD-PARTY-LICENSES │ │ └── start-cuda-compat.sh │ │ ├── 3.2.3 │ │ ├── Dockerfile │ │ ├── THIRD-PARTY-LICENSES │ │ └── start-cuda-compat.sh │ │ ├── archived │ │ ├── 0.5.0 │ │ │ └── py3 │ │ │ │ └── cu118 │ │ │ │ └── Dockerfile.gpu │ │ ├── 0.6.0 │ │ │ └── py3 │ │ │ │ └── cu118 │ │ │ │ └── Dockerfile.gpu │ │ ├── 0.8.2 │ │ │ └── py3 │ │ │ │ └── cu118 │ │ │ │ └── Dockerfile.gpu │ │ ├── 0.9.3 │ │ │ └── py3 │ │ │ │ └── cu118 │ │ │ │ └── Dockerfile.gpu │ │ ├── 1.0.2 │ │ │ └── py3 │ │ │ │ └── cu118 │ │ │ │ ├── Dockerfile.gpu │ │ │ │ └── THIRD-PARTY-LICENSES │ │ ├── 1.0.3 │ │ │ ├── gpu │ │ │ │ ├── Dockerfile │ │ │ │ ├── PYTHON_PACKAGES_LICENSES │ │ │ │ └── THIRD-PARTY-LICENSES │ │ │ └── py3 │ │ │ │ └── cu118 │ │ │ │ ├── Dockerfile.gpu │ │ │ │ ├── PYTHON_PACKAGES_LICENSES │ │ │ │ └── THIRD-PARTY-LICENSES │ │ ├── 1.1.0 │ │ │ ├── gpu │ │ │ │ ├── Dockerfile │ │ │ │ └── THIRD-PARTY-LICENSES │ │ │ └── py3 │ │ │ │ └── cu118 │ │ │ │ ├── Dockerfile.gpu │ │ │ │ └── THIRD-PARTY-LICENSES │ │ ├── 1.2.0 │ │ │ └── gpu │ │ │ │ ├── Dockerfile │ │ │ │ └── THIRD-PARTY-LICENSES │ │ └── 1.3.1 │ │ │ └── gpu │ │ │ ├── Dockerfile │ │ │ └── THIRD-PARTY-LICENSES │ │ ├── buildspec.yml │ │ ├── tgi-requirements.txt │ │ └── tgi.py │ └── tgillamacpp │ └── docker │ ├── buildspec.yml │ ├── tgi-llamacpp-requirements.txt │ └── tgi-llamacpp.py ├── releases.json └── tests └── huggingface ├── README.md ├── enable_ssm_access_to_endpoint.py ├── requirements.txt └── sagemaker_dlc_test.py /.github/workflows/build-huggingface.yml: -------------------------------------------------------------------------------- 1 | name: Build and push HuggingFace TGI docker image 2 | 3 | on: 4 | workflow_dispatch: 5 | inputs: 6 | tgi-version: 7 | description: 'tgi version' 8 | required: true 9 | default: '1.1.0' 10 | pytorch-version: 11 | description: 'pytorch version' 12 | required: true 13 | default: '2.0.1' 14 | cuda-version: 15 | description: 'cuda version' 16 | required: true 17 | default: '118' 18 | ubuntu-version: 19 | description: 'ubuntu version' 20 | required: true 21 | default: '20.04' 22 | 23 | jobs: 24 | create-runner: 25 | runs-on: [ self-hosted, scheduler ] 26 | steps: 27 | - name: Create new G5 instance 28 | id: create_gpu 29 | run: | 30 | cd /home/ubuntu/djl_benchmark_script/scripts 31 | token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \ 32 | https://api.github.com/repos/awslabs/llm-hosting-container/actions/runners/registration-token \ 33 | --fail \ 34 | | jq '.token' | tr -d '"' ) 35 | ./start_instance.sh action_g5 $token awslabs/llm-hosting-container 36 | outputs: 37 | gpu_instance_id: ${{ steps.create_gpu.outputs.action_g5_instance_id }} 38 | 39 | build-and-push-image: 40 | runs-on: [ self-hosted, g5 ] 41 | timeout-minutes: 150 42 | needs: create-runner 43 | env: 44 | TGI_VERSION: ${{github.event.inputs.tgi-version}} 45 | PYTORCH_VERSION: ${{github.event.inputs.pytorch-version}} 46 | CUDA_VERSION: ${{github.event.inputs.cuda-version}} 47 | UBUNTU_VERSION: ${{github.event.inputs.ubuntu-version}} 48 | steps: 49 | - uses: actions/checkout@v3 50 | with: 51 | repository: huggingface/text-generation-inference 52 | ref: v${{ env.TGI_VERSION }} 53 | - uses: actions/checkout@v3 54 | with: 55 | path: llm-hosting-container 56 | - name: Setup Docker buildx 57 | uses: docker/setup-buildx-action@v2 58 | with: 59 | install: true 60 | - name: Inject slug/short variables 61 | uses: rlespinasse/github-slug-action@v4.4.1 62 | - name: Configure AWS Credentials 63 | uses: aws-actions/configure-aws-credentials@v2 64 | with: 65 | aws-region: us-east-1 66 | - name: Login to Amazon ECR 67 | id: login-ecr 68 | uses: aws-actions/amazon-ecr-login@v1 69 | with: 70 | registries: "125045733377" 71 | - name: Clean docker env 72 | run: | 73 | yes | docker system prune -a --volumes 74 | - name: Build and push docker image 75 | uses: docker/build-push-action@v4 76 | env: 77 | REGISTRY: ${{ steps.login-ecr.outputs.registry }} 78 | REPOSITORY: djl-serving 79 | with: 80 | context: . 81 | file: llm-hosting-container/huggingface/pytorch/tgi/docker/${{ env.TGI_VERSION }}/py3/cu${{ env.CUDA_VERSION }}/Dockerfile.gpu 82 | push: true 83 | target: sagemaker 84 | platforms: 'linux/amd64' 85 | provenance: false 86 | tags: ${{ env.REGISTRY }}/${{ env.REPOSITORY }}:${{ env.PYTORCH_VERSION }}-tgi${{ env.TGI_VERSION }}-gpu-py39-cu${{ env.CUDA_VERSION }}-ubuntu${{ env.UBUNTU_VERSION }} 87 | cache-from: type=gha 88 | cache-to: type=gha,mode=max 89 | 90 | run-tests: 91 | runs-on: [ self-hosted, g5 ] 92 | timeout-minutes: 30 93 | needs: [build-and-push-image, create-runner] 94 | env: 95 | TGI_VERSION: ${{github.event.inputs.tgi-version}} 96 | REPOSITORY: djl-serving 97 | TAG: ${{github.event.inputs.pytorch-version}}-tgi${{github.event.inputs.tgi-version}}-gpu-py39-cu${{github.event.inputs.cuda-version}}-ubuntu${{github.event.inputs.ubuntu-version}} 98 | steps: 99 | - uses: actions/checkout@v3 100 | - name: Clean env 101 | run: | 102 | yes | docker system prune -a --volumes 103 | sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/ 104 | echo "wait dpkg lock..." 105 | while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done 106 | - name: Configure AWS Credentials 107 | uses: aws-actions/configure-aws-credentials@v2 108 | with: 109 | aws-region: us-east-1 110 | - name: Login to Amazon ECR 111 | id: login-ecr 112 | uses: aws-actions/amazon-ecr-login@v1 113 | with: 114 | registries: "125045733377" 115 | - name: Pull docker 116 | env: 117 | REGISTRY: ${{ steps.login-ecr.outputs.registry }} 118 | run: | 119 | docker pull ${REGISTRY}/${REPOSITORY}:${TAG} 120 | - name: Test bloom-560m 121 | env: 122 | REGISTRY: ${{ steps.login-ecr.outputs.registry }} 123 | run: | 124 | set -ex 125 | HF_MODEL_ID=bigscience/bloom-560m && \ 126 | SM_NUM_GPUS=4 && \ 127 | TGI_VERSION=$TGI_VERSION && \ 128 | docker run --gpus all --shm-size 2g -itd --rm -p 8080:8080 \ 129 | -e SM_NUM_GPUS=$SM_NUM_GPUS -e HF_MODEL_ID=$HF_MODEL_ID \ 130 | ${REGISTRY}/${REPOSITORY}:${TAG} 131 | sleep 30 132 | ret=$(curl http://localhost:8080/invocations -X POST \ 133 | -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":128}}' \ 134 | -H 'Content-Type: application/json') 135 | [[ $ret != "[{\"generated_text\":\"What is Deep Learning?"* ]] && exit 1 136 | docker rm -f $(docker ps -aq) 137 | - name: Test gpt-neox-20b 138 | env: 139 | REGISTRY: ${{ steps.login-ecr.outputs.registry }} 140 | run: | 141 | set -ex 142 | HF_MODEL_ID=EleutherAI/gpt-neox-20b && \ 143 | SM_NUM_GPUS=4 && \ 144 | TGI_VERSION=$TGI_VERSION && \ 145 | docker run --gpus all --shm-size 2g -itd --rm -p 8080:8080 \ 146 | -e SM_NUM_GPUS=$SM_NUM_GPUS -e HF_MODEL_ID=$HF_MODEL_ID \ 147 | ${REGISTRY}/${REPOSITORY}:${TAG} 148 | sleep 400 149 | ret=$(curl http://localhost:8080/invocations -X POST \ 150 | -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":128}}' \ 151 | -H 'Content-Type: application/json') 152 | [[ $ret != "[{\"generated_text\":\"What is Deep Learning?"* ]] && exit 1 153 | docker rm -f $(docker ps -aq) 154 | - name: Test flan-t5-xxl 155 | env: 156 | REGISTRY: ${{ steps.login-ecr.outputs.registry }} 157 | run: | 158 | set -ex 159 | HF_MODEL_ID=google/flan-t5-xxl && \ 160 | SM_NUM_GPUS=4 && \ 161 | TGI_VERSION=$TGI_VERSION && \ 162 | docker run --gpus all --shm-size 2g -itd --rm -p 8080:8080 \ 163 | -e SM_NUM_GPUS=$SM_NUM_GPUS -e HF_MODEL_ID=$HF_MODEL_ID \ 164 | ${REGISTRY}/${REPOSITORY}:${TAG} 165 | sleep 400 166 | ret=$(curl http://localhost:8080/invocations -X POST \ 167 | -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":128}}' \ 168 | -H 'Content-Type: application/json') 169 | [[ $ret != "[{\"generated_text\""* ]] && exit 1 170 | docker rm -f $(docker ps -aq) 171 | - name: On fail step 172 | if: ${{ failure() }} 173 | run: | 174 | docker rm -f $(docker ps -aq) || true 175 | 176 | stop-runner: 177 | if: always() 178 | runs-on: [ self-hosted, scheduler ] 179 | needs: [run-tests, build-and-push-image, create-runner] 180 | steps: 181 | - name: Stop all instances 182 | run: | 183 | cd /home/ubuntu/djl_benchmark_script/scripts 184 | instance_id=${{ needs.create-runner.outputs.gpu_instance_id }} 185 | ./stop_instance.sh $instance_id 186 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .idea 3 | 4 | -------------------------------------------------------------------------------- /CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @awslabs/sagemaker-1p-algorithms -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *main* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LLM Hosting Container 2 | 3 | Welcome to the LLM Hosting Container GitHub repository! 4 | 5 | This repository contains the Dockerfiles and associated resources for building and 6 | hosting containers for large language models and embedding models. 7 | 8 | * Hugging Face Text Generation Inference (TGI) container 9 | * Hugging Face Text Embeddings Inference (TEI) container 10 | 11 | ## Security 12 | 13 | See [CONTRIBUTING](CONTRIBUTING.md) for more information. 14 | 15 | ## License 16 | 17 | This project is licensed under the Apache-2.0 License. 18 | -------------------------------------------------------------------------------- /huggingface/pytorch/optimum/docker/0.0.16/Dockerfile: -------------------------------------------------------------------------------- 1 | # Fetch and extract the TGI sources 2 | FROM alpine AS tgi 3 | RUN mkdir -p /tgi 4 | ADD https://github.com/huggingface/text-generation-inference/archive/refs/tags/v1.0.2.tar.gz /tgi/sources.tar.gz 5 | RUN tar -C /tgi -xf /tgi/sources.tar.gz --strip-components=1 6 | 7 | # Build cargo components (adapted from TGI original Dockerfile) 8 | # Note that the build image is aligned on the same Linux version as the base image (Debian bookworm/ Ubuntu 22.04) 9 | FROM lukemathwalker/cargo-chef:latest-rust-1.71-bookworm AS chef 10 | WORKDIR /usr/src 11 | 12 | ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse 13 | 14 | FROM chef as planner 15 | COPY --from=tgi /tgi/Cargo.toml Cargo.toml 16 | COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml 17 | COPY --from=tgi /tgi/proto proto 18 | COPY --from=tgi /tgi/benchmark benchmark 19 | COPY --from=tgi /tgi/router router 20 | COPY --from=tgi /tgi/launcher launcher 21 | RUN cargo chef prepare --recipe-path recipe.json 22 | 23 | FROM chef AS builder 24 | 25 | RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \ 26 | curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \ 27 | unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \ 28 | unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \ 29 | rm -f $PROTOC_ZIP 30 | 31 | COPY --from=planner /usr/src/recipe.json recipe.json 32 | RUN cargo chef cook --release --recipe-path recipe.json 33 | 34 | COPY --from=tgi /tgi/Cargo.toml Cargo.toml 35 | COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml 36 | COPY --from=tgi /tgi/proto proto 37 | COPY --from=tgi /tgi/benchmark benchmark 38 | COPY --from=tgi /tgi/router router 39 | COPY --from=tgi /tgi/launcher launcher 40 | RUN cargo build --release --workspace --exclude benchmark 41 | 42 | # Fetch optimum-neuron sources 43 | FROM alpine/git AS optimum-neuron 44 | RUN git clone --depth 1 --branch v0.0.16 https://github.com/huggingface/optimum-neuron.git /optimum-neuron 45 | 46 | # Python base image 47 | # Ubuntu 22.04 has Python 3.10 as default version https://packages.ubuntu.com/jammy/python3 48 | FROM ubuntu:22.04 AS base 49 | 50 | RUN apt-get update -y \ 51 | && apt-get install -y --no-install-recommends \ 52 | python3-pip \ 53 | python3-setuptools \ 54 | python-is-python3 \ 55 | && rm -rf /var/lib/apt/lists/* \ 56 | && apt-get clean 57 | RUN pip3 --no-cache-dir install --upgrade pip 58 | 59 | # Python server build image 60 | FROM base AS pyserver 61 | 62 | RUN apt-get update -y \ 63 | && apt-get install -y --no-install-recommends \ 64 | make \ 65 | python3-venv \ 66 | && rm -rf /var/lib/apt/lists/* \ 67 | && apt-get clean 68 | 69 | RUN install -d /pyserver 70 | WORKDIR /pyserver 71 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/server server 72 | COPY --from=tgi /tgi/proto proto 73 | RUN pip3 install -r server/build-requirements.txt 74 | RUN VERBOSE=1 BUILDDIR=/pyserver/build PROTODIR=/pyserver/proto make -C server gen-server 75 | 76 | # Neuron base image (used for deployment) 77 | FROM base AS neuron 78 | 79 | # Install system prerequisites 80 | RUN apt-get update -y \ 81 | && apt-get install -y --no-install-recommends \ 82 | gnupg2 \ 83 | wget \ 84 | && rm -rf /var/lib/apt/lists/* \ 85 | && apt-get clean 86 | 87 | RUN echo "deb https://apt.repos.neuron.amazonaws.com jammy main" > /etc/apt/sources.list.d/neuron.list 88 | RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add - 89 | 90 | # Install neuronx packages 91 | RUN apt-get update -y \ 92 | && apt-get install -y --no-install-recommends \ 93 | aws-neuronx-dkms=2.14.5.0 \ 94 | aws-neuronx-collectives=2.18.18.0-f7a1f7a35 \ 95 | aws-neuronx-runtime-lib=2.18.14.0-0678cafac \ 96 | aws-neuronx-tools=2.15.4.0 \ 97 | && rm -rf /var/lib/apt/lists/* \ 98 | && apt-get clean 99 | 100 | ENV PATH="/opt/bin/:/opt/aws/neuron/bin:${PATH}" 101 | 102 | RUN pip3 install \ 103 | # Neuron 2.15.0 104 | neuronx-cc==2.11.0.34 \ 105 | torch-neuronx==1.13.1.1.12.1 \ 106 | transformers-neuronx==0.8.268 \ 107 | --extra-index-url=https://pip.repos.neuron.amazonaws.com 108 | 109 | # Install HuggingFace packages 110 | RUN pip3 install \ 111 | hf_transfer 112 | 113 | # Install optimum-neuron 114 | COPY --from=optimum-neuron /optimum-neuron optimum-neuron 115 | RUN pip3 install ./optimum-neuron 116 | 117 | # TGI base env 118 | ENV HUGGINGFACE_HUB_CACHE=/data \ 119 | HF_HUB_ENABLE_HF_TRANSFER=1 \ 120 | PORT=80 121 | 122 | # Install router 123 | COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router 124 | # Install launcher 125 | COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher 126 | # Install python server 127 | COPY --from=pyserver /pyserver/build/dist dist 128 | RUN pip install dist/text-generation-server*.tar.gz 129 | 130 | # AWS Sagemaker compatible image 131 | FROM neuron as sagemaker 132 | 133 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/sagemaker-entrypoint.sh entrypoint.sh 134 | RUN chmod +x entrypoint.sh 135 | 136 | ENTRYPOINT ["./entrypoint.sh"] 137 | 138 | 139 | RUN apt-get update && apt-get install -y --no-install-recommends curl unzip \ 140 | && rm -rf /var/lib/apt/lists/* 141 | RUN HOME_DIR=/root && \ 142 | pip install requests && \ 143 | curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip && \ 144 | unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ && \ 145 | cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance && \ 146 | chmod +x /usr/local/bin/testOSSCompliance && \ 147 | chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh && \ 148 | ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python && \ 149 | rm -rf ${HOME_DIR}/oss_compliance* 150 | 151 | RUN echo "N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image \ 152 | has an indirect documentation dependency on third party project. The \ 153 | project's licensing includes the license. \ 154 | \n\n\ 155 | N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image uses the \ 156 | third party project. The project's licensing \ 157 | includes the https://github.com/huggingface/text-generation-inference/blob/main/LICENSE> \ 158 | license." > /root/THIRD_PARTY_LICENSES 159 | 160 | LABEL dlc_major_version="1" 161 | LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.huggingface.tgi="true" 162 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port="true" 163 | -------------------------------------------------------------------------------- /huggingface/pytorch/optimum/docker/0.0.17/Dockerfile: -------------------------------------------------------------------------------- 1 | # Fetch and extract the TGI sources 2 | FROM alpine AS tgi 3 | RUN mkdir -p /tgi 4 | ADD https://github.com/huggingface/text-generation-inference/archive/refs/tags/v1.0.2.tar.gz /tgi/sources.tar.gz 5 | RUN tar -C /tgi -xf /tgi/sources.tar.gz --strip-components=1 6 | 7 | # Build cargo components (adapted from TGI original Dockerfile) 8 | # Note that the build image is aligned on the same Linux version as the base image (Debian bookworm/ Ubuntu 22.04) 9 | FROM lukemathwalker/cargo-chef:latest-rust-1.71-bookworm AS chef 10 | WORKDIR /usr/src 11 | 12 | ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse 13 | 14 | FROM chef as planner 15 | COPY --from=tgi /tgi/Cargo.toml Cargo.toml 16 | COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml 17 | COPY --from=tgi /tgi/proto proto 18 | COPY --from=tgi /tgi/benchmark benchmark 19 | COPY --from=tgi /tgi/router router 20 | COPY --from=tgi /tgi/launcher launcher 21 | RUN cargo chef prepare --recipe-path recipe.json 22 | 23 | FROM chef AS builder 24 | 25 | RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \ 26 | curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \ 27 | unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \ 28 | unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \ 29 | rm -f $PROTOC_ZIP 30 | 31 | COPY --from=planner /usr/src/recipe.json recipe.json 32 | RUN cargo chef cook --release --recipe-path recipe.json 33 | 34 | COPY --from=tgi /tgi/Cargo.toml Cargo.toml 35 | COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml 36 | COPY --from=tgi /tgi/proto proto 37 | COPY --from=tgi /tgi/benchmark benchmark 38 | COPY --from=tgi /tgi/router router 39 | COPY --from=tgi /tgi/launcher launcher 40 | RUN cargo build --release --workspace --exclude benchmark 41 | 42 | # Fetch optimum-neuron sources 43 | FROM alpine/git AS optimum-neuron 44 | RUN git clone --depth 1 --branch v0.0.17 https://github.com/huggingface/optimum-neuron.git /optimum-neuron 45 | 46 | # Python base image 47 | FROM ubuntu:22.04 AS base 48 | 49 | RUN apt-get update -y \ 50 | && apt-get install -y --no-install-recommends \ 51 | python3-pip \ 52 | python3-setuptools \ 53 | python-is-python3 \ 54 | && rm -rf /var/lib/apt/lists/* \ 55 | && apt-get clean 56 | RUN pip3 --no-cache-dir install --upgrade pip 57 | 58 | # Python server build image 59 | FROM base AS pyserver 60 | 61 | RUN apt-get update -y \ 62 | && apt-get install -y --no-install-recommends \ 63 | make \ 64 | python3-venv \ 65 | && rm -rf /var/lib/apt/lists/* \ 66 | && apt-get clean 67 | 68 | RUN install -d /pyserver 69 | WORKDIR /pyserver 70 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/server server 71 | COPY --from=tgi /tgi/proto proto 72 | RUN pip3 install -r server/build-requirements.txt 73 | RUN VERBOSE=1 BUILDDIR=/pyserver/build PROTODIR=/pyserver/proto make -C server gen-server 74 | 75 | # Neuron base image (used for deployment) 76 | FROM base AS neuron 77 | 78 | # Install system prerequisites 79 | RUN apt-get update -y \ 80 | && apt-get install -y --no-install-recommends \ 81 | gnupg2 \ 82 | wget \ 83 | && rm -rf /var/lib/apt/lists/* \ 84 | && apt-get clean 85 | 86 | RUN echo "deb https://apt.repos.neuron.amazonaws.com jammy main" > /etc/apt/sources.list.d/neuron.list 87 | RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add - 88 | 89 | # Install neuronx packages 90 | RUN apt-get update -y \ 91 | && apt-get install -y --no-install-recommends \ 92 | aws-neuronx-dkms=2.15.9.0 \ 93 | aws-neuronx-collectives=2.19.7.0-530fb3064 \ 94 | aws-neuronx-runtime-lib=2.19.5.0-97e2d271b \ 95 | aws-neuronx-tools=2.16.1.0 \ 96 | && rm -rf /var/lib/apt/lists/* \ 97 | && apt-get clean 98 | 99 | ENV PATH="/opt/bin/:/opt/aws/neuron/bin:${PATH}" 100 | 101 | RUN pip3 install \ 102 | neuronx-cc==2.12.54.0 \ 103 | torch-neuronx==1.13.1.1.13.0 \ 104 | transformers-neuronx==0.9.474 \ 105 | --extra-index-url=https://pip.repos.neuron.amazonaws.com 106 | 107 | # Install HuggingFace packages 108 | RUN pip3 install \ 109 | hf_transfer 110 | 111 | # Install optimum-neuron 112 | COPY --from=optimum-neuron /optimum-neuron optimum-neuron 113 | RUN pip3 install ./optimum-neuron 114 | 115 | # TGI base env 116 | ENV HUGGINGFACE_HUB_CACHE=/data \ 117 | HF_HUB_ENABLE_HF_TRANSFER=1 \ 118 | PORT=80 119 | 120 | # Install router 121 | COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router 122 | # Install launcher 123 | COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher 124 | # Install python server 125 | COPY --from=pyserver /pyserver/build/dist dist 126 | RUN pip install dist/text-generation-server*.tar.gz 127 | 128 | # AWS Sagemaker compatible image 129 | FROM neuron as sagemaker 130 | 131 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/sagemaker-entrypoint.sh entrypoint.sh 132 | RUN chmod +x entrypoint.sh 133 | 134 | ENTRYPOINT ["./entrypoint.sh"] 135 | 136 | 137 | RUN apt-get update && apt-get install -y --no-install-recommends curl unzip \ 138 | && rm -rf /var/lib/apt/lists/* 139 | RUN HOME_DIR=/root && \ 140 | pip install requests && \ 141 | curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip && \ 142 | unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ && \ 143 | cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance && \ 144 | chmod +x /usr/local/bin/testOSSCompliance && \ 145 | chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh && \ 146 | ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python && \ 147 | rm -rf ${HOME_DIR}/oss_compliance* 148 | 149 | RUN echo "N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image \ 150 | has an indirect documentation dependency on third party project. The \ 151 | project's licensing includes the license. \ 152 | \n\n\ 153 | N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image uses the \ 154 | third party project. The project's licensing \ 155 | includes the https://github.com/huggingface/text-generation-inference/blob/main/LICENSE> \ 156 | license." > /root/THIRD_PARTY_LICENSES 157 | 158 | LABEL dlc_major_version="1" 159 | LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.huggingface.tgi="true" 160 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port="true" 161 | -------------------------------------------------------------------------------- /huggingface/pytorch/optimum/docker/0.0.18/Dockerfile: -------------------------------------------------------------------------------- 1 | # Fetch and extract the TGI sources 2 | FROM alpine AS tgi 3 | RUN mkdir -p /tgi 4 | ADD https://github.com/huggingface/text-generation-inference/archive/refs/tags/v1.0.2.tar.gz /tgi/sources.tar.gz 5 | RUN tar -C /tgi -xf /tgi/sources.tar.gz --strip-components=1 6 | 7 | # Build cargo components (adapted from TGI original Dockerfile) 8 | # Note that the build image is aligned on the same Linux version as the base image (Debian bookworm/ Ubuntu 22.04) 9 | FROM lukemathwalker/cargo-chef:latest-rust-1.71-bookworm AS chef 10 | WORKDIR /usr/src 11 | 12 | ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse 13 | 14 | FROM chef as planner 15 | COPY --from=tgi /tgi/Cargo.toml Cargo.toml 16 | COPY --from=tgi /tgi/Cargo.lock Cargo.lock 17 | COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml 18 | COPY --from=tgi /tgi/proto proto 19 | COPY --from=tgi /tgi/benchmark benchmark 20 | COPY --from=tgi /tgi/router router 21 | COPY --from=tgi /tgi/launcher launcher 22 | RUN cargo chef prepare --recipe-path recipe.json 23 | 24 | FROM chef AS builder 25 | 26 | RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \ 27 | curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \ 28 | unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \ 29 | unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \ 30 | rm -f $PROTOC_ZIP 31 | 32 | COPY --from=planner /usr/src/recipe.json recipe.json 33 | RUN cargo chef cook --release --recipe-path recipe.json 34 | 35 | COPY --from=tgi /tgi/Cargo.toml Cargo.toml 36 | COPY --from=tgi /tgi/Cargo.lock Cargo.lock 37 | COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml 38 | COPY --from=tgi /tgi/proto proto 39 | COPY --from=tgi /tgi/benchmark benchmark 40 | COPY --from=tgi /tgi/router router 41 | COPY --from=tgi /tgi/launcher launcher 42 | RUN cargo build --release --workspace --exclude benchmark 43 | 44 | # Fetch optimum-neuron sources 45 | FROM alpine/git AS optimum-neuron 46 | RUN git clone --depth 1 --branch v0.0.18 https://github.com/huggingface/optimum-neuron.git /optimum-neuron 47 | 48 | # Python base image 49 | FROM ubuntu:22.04 AS base 50 | 51 | RUN apt-get update -y \ 52 | && apt-get install -y --no-install-recommends \ 53 | python3-pip \ 54 | python3-setuptools \ 55 | python-is-python3 \ 56 | && rm -rf /var/lib/apt/lists/* \ 57 | && apt-get clean 58 | RUN pip3 --no-cache-dir install --upgrade pip 59 | 60 | # Python server build image 61 | FROM base AS pyserver 62 | 63 | RUN apt-get update -y \ 64 | && apt-get install -y --no-install-recommends \ 65 | make \ 66 | python3-venv \ 67 | && rm -rf /var/lib/apt/lists/* \ 68 | && apt-get clean 69 | 70 | RUN install -d /pyserver 71 | WORKDIR /pyserver 72 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/server server 73 | COPY --from=tgi /tgi/proto proto 74 | RUN pip3 install -r server/build-requirements.txt 75 | RUN VERBOSE=1 BUILDDIR=/pyserver/build PROTODIR=/pyserver/proto make -C server gen-server 76 | 77 | # Neuron base image (used for deployment) 78 | FROM base AS neuron 79 | 80 | # Install system prerequisites 81 | RUN apt-get update -y \ 82 | && apt-get install -y --no-install-recommends \ 83 | gnupg2 \ 84 | wget \ 85 | && rm -rf /var/lib/apt/lists/* \ 86 | && apt-get clean 87 | 88 | RUN echo "deb https://apt.repos.neuron.amazonaws.com jammy main" > /etc/apt/sources.list.d/neuron.list 89 | RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add - 90 | 91 | # Install neuronx packages 92 | RUN apt-get update -y \ 93 | && apt-get install -y --no-install-recommends \ 94 | aws-neuronx-dkms=2.15.9.0 \ 95 | aws-neuronx-collectives=2.19.7.0-530fb3064 \ 96 | aws-neuronx-runtime-lib=2.19.5.0-97e2d271b \ 97 | aws-neuronx-tools=2.16.1.0 \ 98 | && rm -rf /var/lib/apt/lists/* \ 99 | && apt-get clean 100 | 101 | ENV PATH="/opt/bin/:/opt/aws/neuron/bin:${PATH}" 102 | 103 | RUN pip3 install \ 104 | neuronx-cc==2.12.68.0 \ 105 | torch-neuronx==1.13.1.1.13.0 \ 106 | transformers-neuronx==0.9.474 \ 107 | --extra-index-url=https://pip.repos.neuron.amazonaws.com 108 | 109 | # Install HuggingFace packages 110 | RUN pip3 install \ 111 | hf_transfer 112 | 113 | # Install optimum-neuron 114 | COPY --from=optimum-neuron /optimum-neuron optimum-neuron 115 | RUN pip3 install ./optimum-neuron 116 | 117 | # TGI base env 118 | ENV HUGGINGFACE_HUB_CACHE=/data \ 119 | HF_HUB_ENABLE_HF_TRANSFER=1 \ 120 | PORT=80 121 | 122 | # Install router 123 | COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router 124 | # Install launcher 125 | COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher 126 | # Install python server 127 | COPY --from=pyserver /pyserver/build/dist dist 128 | RUN pip install dist/text-generation-server*.tar.gz 129 | 130 | # AWS Sagemaker compatible image 131 | FROM neuron as sagemaker 132 | 133 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/sagemaker-entrypoint.sh entrypoint.sh 134 | RUN chmod +x entrypoint.sh 135 | 136 | ENTRYPOINT ["./entrypoint.sh"] 137 | 138 | 139 | RUN apt-get update && apt-get install -y --no-install-recommends curl unzip \ 140 | && rm -rf /var/lib/apt/lists/* 141 | RUN HOME_DIR=/root && \ 142 | pip install requests && \ 143 | curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip && \ 144 | unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ && \ 145 | cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance && \ 146 | chmod +x /usr/local/bin/testOSSCompliance && \ 147 | chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh && \ 148 | ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python && \ 149 | rm -rf ${HOME_DIR}/oss_compliance* 150 | 151 | RUN echo "N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image \ 152 | has an indirect documentation dependency on third party project. The \ 153 | project's licensing includes the license. \ 154 | \n\n\ 155 | N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image uses the \ 156 | third party project. The project's licensing \ 157 | includes the https://github.com/huggingface/text-generation-inference/blob/main/LICENSE> \ 158 | license." > /root/THIRD_PARTY_LICENSES 159 | 160 | LABEL dlc_major_version="1" 161 | LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.huggingface.tgi="true" 162 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port="true" 163 | -------------------------------------------------------------------------------- /huggingface/pytorch/optimum/docker/0.0.19/Dockerfile: -------------------------------------------------------------------------------- 1 | # Fetch and extract the TGI sources 2 | FROM alpine AS tgi 3 | RUN mkdir -p /tgi 4 | ADD https://github.com/huggingface/text-generation-inference/archive/refs/tags/v1.4.1.tar.gz /tgi/sources.tar.gz 5 | RUN tar -C /tgi -xf /tgi/sources.tar.gz --strip-components=1 6 | 7 | # Build cargo components (adapted from TGI original Dockerfile) 8 | # Note that the build image is aligned on the same Linux version as the base image (Debian bookworm/ Ubuntu 22.04) 9 | FROM lukemathwalker/cargo-chef:latest-rust-1.75-bookworm AS chef 10 | WORKDIR /usr/src 11 | 12 | ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse 13 | 14 | FROM chef as planner 15 | COPY --from=tgi /tgi/Cargo.toml Cargo.toml 16 | COPY --from=tgi /tgi/Cargo.lock Cargo.lock 17 | COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml 18 | COPY --from=tgi /tgi/proto proto 19 | COPY --from=tgi /tgi/benchmark benchmark 20 | COPY --from=tgi /tgi/router router 21 | COPY --from=tgi /tgi/launcher launcher 22 | RUN cargo chef prepare --recipe-path recipe.json 23 | 24 | FROM chef AS builder 25 | 26 | RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \ 27 | curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \ 28 | unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \ 29 | unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \ 30 | rm -f $PROTOC_ZIP 31 | 32 | COPY --from=planner /usr/src/recipe.json recipe.json 33 | RUN cargo chef cook --release --recipe-path recipe.json 34 | 35 | COPY --from=tgi /tgi/Cargo.toml Cargo.toml 36 | COPY --from=tgi /tgi/Cargo.lock Cargo.lock 37 | COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml 38 | COPY --from=tgi /tgi/proto proto 39 | COPY --from=tgi /tgi/benchmark benchmark 40 | COPY --from=tgi /tgi/router router 41 | COPY --from=tgi /tgi/launcher launcher 42 | RUN cargo build --release --workspace --exclude benchmark 43 | 44 | # Fetch optimum-neuron sources 45 | FROM alpine/git AS optimum-neuron 46 | RUN git clone --depth 1 --branch v0.0.19 https://github.com/huggingface/optimum-neuron.git /optimum-neuron 47 | 48 | # Python base image 49 | FROM ubuntu:22.04 AS base 50 | 51 | RUN apt-get update -y \ 52 | && apt-get install -y --no-install-recommends \ 53 | python3-pip \ 54 | python3-setuptools \ 55 | python-is-python3 \ 56 | && rm -rf /var/lib/apt/lists/* \ 57 | && apt-get clean 58 | RUN pip3 --no-cache-dir install --upgrade pip 59 | 60 | # Python server build image 61 | FROM base AS pyserver 62 | 63 | RUN apt-get update -y \ 64 | && apt-get install -y --no-install-recommends \ 65 | make \ 66 | python3-venv \ 67 | && rm -rf /var/lib/apt/lists/* \ 68 | && apt-get clean 69 | 70 | RUN install -d /pyserver 71 | WORKDIR /pyserver 72 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/server server 73 | COPY --from=tgi /tgi/proto proto 74 | RUN pip3 install -r server/build-requirements.txt 75 | RUN VERBOSE=1 BUILDDIR=/pyserver/build PROTODIR=/pyserver/proto make -C server gen-server 76 | 77 | # Neuron base image (used for deployment) 78 | FROM base AS neuron 79 | 80 | # Install system prerequisites 81 | RUN apt-get update -y \ 82 | && apt-get install -y --no-install-recommends \ 83 | gnupg2 \ 84 | wget \ 85 | && rm -rf /var/lib/apt/lists/* \ 86 | && apt-get clean 87 | 88 | RUN echo "deb https://apt.repos.neuron.amazonaws.com jammy main" > /etc/apt/sources.list.d/neuron.list 89 | RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add - 90 | 91 | # Install neuronx packages 92 | RUN apt-get update -y \ 93 | && apt-get install -y --no-install-recommends \ 94 | aws-neuronx-dkms=2.15.9.0 \ 95 | aws-neuronx-collectives=2.20.11.0-c101c322e \ 96 | aws-neuronx-runtime-lib=2.20.11.0-b7d33e68b \ 97 | aws-neuronx-tools=2.17.0.0 \ 98 | && rm -rf /var/lib/apt/lists/* \ 99 | && apt-get clean 100 | 101 | ENV PATH="/opt/bin/:/opt/aws/neuron/bin:${PATH}" 102 | 103 | RUN pip3 install \ 104 | neuronx-cc==2.12.68.0 \ 105 | torch-neuronx==1.13.1.1.13.1 \ 106 | transformers-neuronx==0.9.474 \ 107 | --extra-index-url=https://pip.repos.neuron.amazonaws.com 108 | 109 | # Install HuggingFace packages 110 | RUN pip3 install \ 111 | hf_transfer 112 | 113 | # Install optimum-neuron 114 | COPY --from=optimum-neuron /optimum-neuron optimum-neuron 115 | RUN pip3 install ./optimum-neuron 116 | 117 | # TGI base env 118 | ENV HUGGINGFACE_HUB_CACHE=/tmp \ 119 | HF_HUB_ENABLE_HF_TRANSFER=1 \ 120 | PORT=80 121 | 122 | # Install router 123 | COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router 124 | # Install launcher 125 | COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher 126 | # Install python server 127 | COPY --from=pyserver /pyserver/build/dist dist 128 | RUN pip install dist/text-generation-server*.tar.gz 129 | 130 | # AWS Sagemaker compatible image 131 | FROM neuron as sagemaker 132 | 133 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/sagemaker-entrypoint.sh entrypoint.sh 134 | RUN chmod +x entrypoint.sh 135 | 136 | ENTRYPOINT ["./entrypoint.sh"] 137 | 138 | 139 | RUN apt-get update && apt-get install -y --no-install-recommends curl unzip \ 140 | && rm -rf /var/lib/apt/lists/* 141 | RUN HOME_DIR=/root && \ 142 | pip install requests && \ 143 | curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip && \ 144 | unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ && \ 145 | cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance && \ 146 | chmod +x /usr/local/bin/testOSSCompliance && \ 147 | chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh && \ 148 | ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python && \ 149 | rm -rf ${HOME_DIR}/oss_compliance* 150 | 151 | RUN echo "N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image \ 152 | has an indirect documentation dependency on third party project. The \ 153 | project's licensing includes the license. \ 154 | \n\n\ 155 | N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image uses the \ 156 | third party project. The project's licensing \ 157 | includes the https://github.com/huggingface/text-generation-inference/blob/main/LICENSE> \ 158 | license." > /root/THIRD_PARTY_LICENSES 159 | 160 | LABEL dlc_major_version="1" 161 | LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.huggingface.tgi="true" 162 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port="true" 163 | -------------------------------------------------------------------------------- /huggingface/pytorch/optimum/docker/0.0.20/Dockerfile: -------------------------------------------------------------------------------- 1 | # Fetch and extract the TGI sources 2 | FROM alpine AS tgi 3 | RUN mkdir -p /tgi 4 | ADD https://github.com/huggingface/text-generation-inference/archive/refs/tags/v1.4.1.tar.gz /tgi/sources.tar.gz 5 | RUN tar -C /tgi -xf /tgi/sources.tar.gz --strip-components=1 6 | 7 | # Build cargo components (adapted from TGI original Dockerfile) 8 | # Note that the build image is aligned on the same Linux version as the base image (Debian bookworm/ Ubuntu 22.04) 9 | FROM lukemathwalker/cargo-chef:latest-rust-1.75-bookworm AS chef 10 | WORKDIR /usr/src 11 | 12 | ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse 13 | 14 | FROM chef as planner 15 | COPY --from=tgi /tgi/Cargo.toml Cargo.toml 16 | COPY --from=tgi /tgi/Cargo.lock Cargo.lock 17 | COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml 18 | COPY --from=tgi /tgi/proto proto 19 | COPY --from=tgi /tgi/benchmark benchmark 20 | COPY --from=tgi /tgi/router router 21 | COPY --from=tgi /tgi/launcher launcher 22 | RUN cargo chef prepare --recipe-path recipe.json 23 | 24 | FROM chef AS builder 25 | 26 | RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \ 27 | curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \ 28 | unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \ 29 | unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \ 30 | rm -f $PROTOC_ZIP 31 | 32 | COPY --from=planner /usr/src/recipe.json recipe.json 33 | RUN cargo chef cook --release --recipe-path recipe.json 34 | 35 | COPY --from=tgi /tgi/Cargo.toml Cargo.toml 36 | COPY --from=tgi /tgi/Cargo.lock Cargo.lock 37 | COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml 38 | COPY --from=tgi /tgi/proto proto 39 | COPY --from=tgi /tgi/benchmark benchmark 40 | COPY --from=tgi /tgi/router router 41 | COPY --from=tgi /tgi/launcher launcher 42 | RUN cargo build --release --workspace --exclude benchmark 43 | 44 | # Fetch optimum-neuron sources 45 | FROM alpine/git AS optimum-neuron 46 | RUN git clone --depth 1 --branch v0.0.20 https://github.com/huggingface/optimum-neuron.git /optimum-neuron 47 | 48 | # Python base image 49 | FROM ubuntu:22.04 AS base 50 | 51 | RUN apt-get update -y \ 52 | && apt-get install -y --no-install-recommends \ 53 | python3-pip \ 54 | python3-setuptools \ 55 | python-is-python3 \ 56 | && rm -rf /var/lib/apt/lists/* \ 57 | && apt-get clean 58 | RUN pip3 --no-cache-dir install --upgrade pip 59 | 60 | # Python server build image 61 | FROM base AS pyserver 62 | 63 | RUN apt-get update -y \ 64 | && apt-get install -y --no-install-recommends \ 65 | make \ 66 | python3-venv \ 67 | && rm -rf /var/lib/apt/lists/* \ 68 | && apt-get clean 69 | 70 | RUN install -d /pyserver 71 | WORKDIR /pyserver 72 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/server server 73 | COPY --from=tgi /tgi/proto proto 74 | RUN pip3 install -r server/build-requirements.txt 75 | RUN VERBOSE=1 BUILDDIR=/pyserver/build PROTODIR=/pyserver/proto make -C server gen-server 76 | 77 | # Neuron base image (used for deployment) 78 | FROM base AS neuron 79 | 80 | # Install system prerequisites 81 | RUN apt-get update -y \ 82 | && apt-get install -y --no-install-recommends \ 83 | gnupg2 \ 84 | wget \ 85 | && rm -rf /var/lib/apt/lists/* \ 86 | && apt-get clean 87 | 88 | RUN echo "deb https://apt.repos.neuron.amazonaws.com jammy main" > /etc/apt/sources.list.d/neuron.list 89 | RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add - 90 | 91 | # Install neuronx packages 92 | RUN apt-get update -y \ 93 | && apt-get install -y --no-install-recommends \ 94 | aws-neuronx-dkms=2.15.9.0 \ 95 | aws-neuronx-collectives=2.20.11.0-c101c322e \ 96 | aws-neuronx-runtime-lib=2.20.11.0-b7d33e68b \ 97 | aws-neuronx-tools=2.17.0.0 \ 98 | && rm -rf /var/lib/apt/lists/* \ 99 | && apt-get clean 100 | 101 | ENV PATH="/opt/bin/:/opt/aws/neuron/bin:${PATH}" 102 | 103 | RUN pip3 install \ 104 | neuronx-cc==2.12.68.0 \ 105 | torch-neuronx==1.13.1.1.13.1 \ 106 | transformers-neuronx==0.9.474 \ 107 | --extra-index-url=https://pip.repos.neuron.amazonaws.com 108 | 109 | # Install HuggingFace packages 110 | RUN pip3 install \ 111 | hf_transfer 112 | 113 | # Install optimum-neuron 114 | COPY --from=optimum-neuron /optimum-neuron optimum-neuron 115 | RUN pip3 install ./optimum-neuron 116 | 117 | # TGI base env 118 | ENV HUGGINGFACE_HUB_CACHE=/tmp \ 119 | HF_HUB_ENABLE_HF_TRANSFER=1 \ 120 | PORT=80 121 | 122 | # Install router 123 | COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router 124 | # Install launcher 125 | COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher 126 | # Install python server 127 | COPY --from=pyserver /pyserver/build/dist dist 128 | RUN pip install dist/text-generation-server*.tar.gz 129 | 130 | # AWS Sagemaker compatible image 131 | FROM neuron as sagemaker 132 | 133 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/sagemaker-entrypoint.sh entrypoint.sh 134 | RUN chmod +x entrypoint.sh 135 | 136 | ENTRYPOINT ["./entrypoint.sh"] 137 | 138 | 139 | RUN apt-get update && apt-get install -y --no-install-recommends curl unzip \ 140 | && rm -rf /var/lib/apt/lists/* 141 | RUN HOME_DIR=/root && \ 142 | pip install requests && \ 143 | curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip && \ 144 | unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ && \ 145 | cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance && \ 146 | chmod +x /usr/local/bin/testOSSCompliance && \ 147 | chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh && \ 148 | ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python && \ 149 | rm -rf ${HOME_DIR}/oss_compliance* 150 | 151 | RUN echo "N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image \ 152 | has an indirect documentation dependency on third party project. The \ 153 | project's licensing includes the license. \ 154 | \n\n\ 155 | N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image uses the \ 156 | third party project. The project's licensing \ 157 | includes the https://github.com/huggingface/text-generation-inference/blob/main/LICENSE> \ 158 | license." > /root/THIRD_PARTY_LICENSES 159 | 160 | LABEL dlc_major_version="1" 161 | LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.huggingface.tgi="true" 162 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port="true" 163 | -------------------------------------------------------------------------------- /huggingface/pytorch/optimum/docker/0.0.21/Dockerfile: -------------------------------------------------------------------------------- 1 | # Fetch and extract the TGI sources 2 | FROM alpine AS tgi 3 | RUN mkdir -p /tgi 4 | ADD https://github.com/huggingface/text-generation-inference/archive/refs/tags/v1.4.1.tar.gz /tgi/sources.tar.gz 5 | RUN tar -C /tgi -xf /tgi/sources.tar.gz --strip-components=1 6 | 7 | # Build cargo components (adapted from TGI original Dockerfile) 8 | # Note that the build image is aligned on the same Linux version as the base image (Debian bookworm/ Ubuntu 22.04) 9 | FROM lukemathwalker/cargo-chef:latest-rust-1.75-bookworm AS chef 10 | WORKDIR /usr/src 11 | 12 | ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse 13 | 14 | FROM chef as planner 15 | COPY --from=tgi /tgi/Cargo.toml Cargo.toml 16 | COPY --from=tgi /tgi/Cargo.lock Cargo.lock 17 | COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml 18 | COPY --from=tgi /tgi/proto proto 19 | COPY --from=tgi /tgi/benchmark benchmark 20 | COPY --from=tgi /tgi/router router 21 | COPY --from=tgi /tgi/launcher launcher 22 | RUN cargo chef prepare --recipe-path recipe.json 23 | 24 | FROM chef AS builder 25 | 26 | RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \ 27 | curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \ 28 | unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \ 29 | unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \ 30 | rm -f $PROTOC_ZIP 31 | 32 | COPY --from=planner /usr/src/recipe.json recipe.json 33 | RUN cargo chef cook --release --recipe-path recipe.json 34 | 35 | COPY --from=tgi /tgi/Cargo.toml Cargo.toml 36 | COPY --from=tgi /tgi/Cargo.lock Cargo.lock 37 | COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml 38 | COPY --from=tgi /tgi/proto proto 39 | COPY --from=tgi /tgi/benchmark benchmark 40 | COPY --from=tgi /tgi/router router 41 | COPY --from=tgi /tgi/launcher launcher 42 | RUN cargo build --release --workspace --exclude benchmark 43 | 44 | # Fetch optimum-neuron sources 45 | FROM alpine/git AS optimum-neuron 46 | RUN git clone --depth 1 --branch v0.0.21 https://github.com/huggingface/optimum-neuron.git /optimum-neuron 47 | 48 | # Python base image 49 | FROM ubuntu:22.04 AS base 50 | 51 | RUN apt-get update -y \ 52 | && apt-get install -y --no-install-recommends \ 53 | python3-pip \ 54 | python3-setuptools \ 55 | python-is-python3 \ 56 | && rm -rf /var/lib/apt/lists/* \ 57 | && apt-get clean 58 | RUN pip3 --no-cache-dir install --upgrade pip 59 | 60 | # Python server build image 61 | FROM base AS pyserver 62 | 63 | RUN apt-get update -y \ 64 | && apt-get install -y --no-install-recommends \ 65 | make \ 66 | python3-venv \ 67 | && rm -rf /var/lib/apt/lists/* \ 68 | && apt-get clean 69 | 70 | RUN install -d /pyserver 71 | WORKDIR /pyserver 72 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/server server 73 | COPY --from=tgi /tgi/proto proto 74 | RUN pip3 install -r server/build-requirements.txt 75 | RUN VERBOSE=1 BUILDDIR=/pyserver/build PROTODIR=/pyserver/proto make -C server gen-server 76 | 77 | # Neuron base image (used for deployment) 78 | FROM base AS neuron 79 | 80 | # Install system prerequisites 81 | RUN apt-get update -y \ 82 | && apt-get install -y --no-install-recommends \ 83 | gnupg2 \ 84 | wget \ 85 | && rm -rf /var/lib/apt/lists/* \ 86 | && apt-get clean 87 | 88 | RUN echo "deb https://apt.repos.neuron.amazonaws.com jammy main" > /etc/apt/sources.list.d/neuron.list 89 | RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add - 90 | 91 | # Install neuronx packages 92 | RUN apt-get update -y \ 93 | && apt-get install -y --no-install-recommends \ 94 | aws-neuronx-dkms=2.16.7.0 \ 95 | aws-neuronx-collectives=2.20.22.0-c101c322e \ 96 | aws-neuronx-runtime-lib=2.20.22.0-1b3ca6425 \ 97 | aws-neuronx-tools=2.17.1.0 \ 98 | && rm -rf /var/lib/apt/lists/* \ 99 | && apt-get clean 100 | 101 | ENV PATH="/opt/bin/:/opt/aws/neuron/bin:${PATH}" 102 | 103 | RUN pip3 install \ 104 | neuronx-cc==2.13.66.0 \ 105 | torch-neuronx==1.13.1.1.14.0 \ 106 | transformers-neuronx==0.10.0.21 \ 107 | --extra-index-url=https://pip.repos.neuron.amazonaws.com 108 | 109 | # Install HuggingFace packages 110 | RUN pip3 install \ 111 | hf_transfer 112 | 113 | # Install optimum-neuron 114 | COPY --from=optimum-neuron /optimum-neuron optimum-neuron 115 | RUN pip3 install ./optimum-neuron 116 | 117 | # TGI base env 118 | ENV HUGGINGFACE_HUB_CACHE=/tmp \ 119 | HF_HUB_ENABLE_HF_TRANSFER=1 \ 120 | PORT=80 121 | 122 | # Disable color logs as they are not supported by CloudWatch 123 | ENV LOGURU_COLORIZE=NO 124 | 125 | # Install router 126 | COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router 127 | # Install launcher 128 | COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher 129 | # Install python server 130 | COPY --from=pyserver /pyserver/build/dist dist 131 | RUN pip install dist/text-generation-server*.tar.gz 132 | 133 | # AWS Sagemaker compatible image 134 | FROM neuron as sagemaker 135 | 136 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/sagemaker-entrypoint.sh entrypoint.sh 137 | RUN chmod +x entrypoint.sh 138 | 139 | ENTRYPOINT ["./entrypoint.sh"] 140 | 141 | 142 | RUN apt-get update && apt-get install -y --no-install-recommends curl unzip \ 143 | && rm -rf /var/lib/apt/lists/* 144 | RUN HOME_DIR=/root && \ 145 | pip install requests && \ 146 | curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip && \ 147 | unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ && \ 148 | cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance && \ 149 | chmod +x /usr/local/bin/testOSSCompliance && \ 150 | chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh && \ 151 | ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python && \ 152 | rm -rf ${HOME_DIR}/oss_compliance* 153 | 154 | RUN echo "N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image \ 155 | has an indirect documentation dependency on third party project. The \ 156 | project's licensing includes the license. \ 157 | \n\n\ 158 | N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image uses the \ 159 | third party project. The project's licensing \ 160 | includes the https://github.com/huggingface/text-generation-inference/blob/main/LICENSE> \ 161 | license." > /root/THIRD_PARTY_LICENSES 162 | 163 | LABEL dlc_major_version="1" 164 | LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.huggingface.tgi="true" 165 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port="true" 166 | -------------------------------------------------------------------------------- /huggingface/pytorch/optimum/docker/0.0.22/Dockerfile: -------------------------------------------------------------------------------- 1 | # Fetch and extract the TGI sources (TGI_VERSION is mandatory) 2 | FROM alpine AS tgi 3 | RUN mkdir -p /tgi 4 | ADD https://github.com/huggingface/text-generation-inference/archive/refs/tags/v2.0.2.tar.gz /tgi/sources.tar.gz 5 | RUN tar -C /tgi -xf /tgi/sources.tar.gz --strip-components=1 6 | 7 | # Build cargo components (adapted from TGI original Dockerfile) 8 | # Note that the build image is aligned on the same Linux version as the base image (Debian bookworm/ Ubuntu 22.04) 9 | FROM lukemathwalker/cargo-chef:latest-rust-1.75-bookworm AS chef 10 | WORKDIR /usr/src 11 | 12 | ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse 13 | 14 | FROM chef as planner 15 | COPY --from=tgi /tgi/Cargo.toml Cargo.toml 16 | COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml 17 | COPY --from=tgi /tgi/proto proto 18 | COPY --from=tgi /tgi/benchmark benchmark 19 | COPY --from=tgi /tgi/router router 20 | COPY --from=tgi /tgi/launcher launcher 21 | RUN cargo chef prepare --recipe-path recipe.json 22 | 23 | FROM chef AS builder 24 | 25 | RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \ 26 | curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \ 27 | unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \ 28 | unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \ 29 | rm -f $PROTOC_ZIP 30 | 31 | COPY --from=planner /usr/src/recipe.json recipe.json 32 | RUN cargo chef cook --release --recipe-path recipe.json 33 | 34 | COPY --from=tgi /tgi/Cargo.toml Cargo.toml 35 | COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml 36 | COPY --from=tgi /tgi/proto proto 37 | COPY --from=tgi /tgi/benchmark benchmark 38 | COPY --from=tgi /tgi/router router 39 | COPY --from=tgi /tgi/launcher launcher 40 | RUN cargo build --release --workspace --exclude benchmark 41 | 42 | # Fetch optimum-neuron sources 43 | FROM alpine/git AS optimum-neuron 44 | RUN git clone --depth 1 --branch v0.0.22 https://github.com/huggingface/optimum-neuron.git /optimum-neuron 45 | 46 | # Python base image 47 | FROM ubuntu:22.04 AS base 48 | 49 | RUN apt-get update -y \ 50 | && apt-get install -y --no-install-recommends \ 51 | python3-pip \ 52 | python3-setuptools \ 53 | python-is-python3 \ 54 | && rm -rf /var/lib/apt/lists/* \ 55 | && apt-get clean 56 | RUN pip3 --no-cache-dir install --upgrade pip 57 | 58 | # Python server build image 59 | FROM base AS pyserver 60 | 61 | RUN apt-get update -y \ 62 | && apt-get install -y --no-install-recommends \ 63 | make \ 64 | python3-venv \ 65 | && rm -rf /var/lib/apt/lists/* \ 66 | && apt-get clean 67 | 68 | RUN install -d /pyserver 69 | WORKDIR /pyserver 70 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/server server 71 | COPY --from=tgi /tgi/proto proto 72 | RUN pip3 install -r server/build-requirements.txt 73 | RUN VERBOSE=1 BUILDDIR=/pyserver/build PROTODIR=/pyserver/proto make -C server gen-server 74 | 75 | # Neuron base image (used for deployment) 76 | FROM base AS neuron 77 | 78 | # Install system prerequisites 79 | RUN apt-get update -y \ 80 | && apt-get install -y --no-install-recommends \ 81 | gnupg2 \ 82 | wget \ 83 | python3-dev \ 84 | && rm -rf /var/lib/apt/lists/* \ 85 | && apt-get clean 86 | 87 | RUN echo "deb https://apt.repos.neuron.amazonaws.com jammy main" > /etc/apt/sources.list.d/neuron.list 88 | RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add - 89 | 90 | # Install neuronx packages 91 | RUN apt-get update -y \ 92 | && apt-get install -y --no-install-recommends \ 93 | aws-neuronx-dkms=2.16.7.0 \ 94 | aws-neuronx-collectives=2.20.22.0-c101c322e \ 95 | aws-neuronx-runtime-lib=2.20.22.0-1b3ca6425 \ 96 | aws-neuronx-tools=2.17.1.0 \ 97 | && rm -rf /var/lib/apt/lists/* \ 98 | && apt-get clean 99 | 100 | ENV PATH="/opt/bin/:/opt/aws/neuron/bin:${PATH}" 101 | 102 | RUN pip3 install \ 103 | neuronx-cc==2.13.66.0 \ 104 | torch-neuronx==2.1.2.2.1.0 \ 105 | transformers-neuronx==0.10.0.21 \ 106 | --extra-index-url=https://pip.repos.neuron.amazonaws.com 107 | 108 | # Install HuggingFace packages 109 | RUN pip3 install \ 110 | hf_transfer huggingface_hub 111 | 112 | # Install optimum-neuron 113 | COPY --from=optimum-neuron /optimum-neuron optimum-neuron 114 | RUN pip3 install ./optimum-neuron 115 | 116 | # TGI base env 117 | ENV HUGGINGFACE_HUB_CACHE=/tmp \ 118 | HF_HUB_ENABLE_HF_TRANSFER=1 \ 119 | PORT=80 120 | 121 | # Disable color logs as they are not supported by CloudWatch 122 | ENV LOGURU_COLORIZE=NO 123 | 124 | # Install router 125 | COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router 126 | # Install launcher 127 | COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher 128 | # Install python server 129 | COPY --from=pyserver /pyserver/build/dist dist 130 | RUN pip install dist/text_generation_server*.tar.gz 131 | 132 | # AWS Sagemaker compatible image 133 | FROM neuron as sagemaker 134 | 135 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/sagemaker-entrypoint.sh entrypoint.sh 136 | RUN chmod +x entrypoint.sh 137 | 138 | ENTRYPOINT ["./entrypoint.sh"] 139 | 140 | 141 | RUN apt-get update && apt-get install -y --no-install-recommends curl unzip \ 142 | && rm -rf /var/lib/apt/lists/* 143 | RUN HOME_DIR=/root && \ 144 | pip install requests && \ 145 | curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip && \ 146 | unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ && \ 147 | cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance && \ 148 | chmod +x /usr/local/bin/testOSSCompliance && \ 149 | chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh && \ 150 | ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python && \ 151 | rm -rf ${HOME_DIR}/oss_compliance* 152 | 153 | RUN echo "N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image \ 154 | has an indirect documentation dependency on third party project. The \ 155 | project's licensing includes the license. \ 156 | \n\n\ 157 | N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image uses the \ 158 | third party project. The project's licensing \ 159 | includes the https://github.com/huggingface/text-generation-inference/blob/main/LICENSE> \ 160 | license." > /root/THIRD_PARTY_LICENSES 161 | 162 | LABEL dlc_major_version="1" 163 | LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.huggingface.tgi="true" 164 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port="true" -------------------------------------------------------------------------------- /huggingface/pytorch/optimum/docker/0.0.23/Dockerfile: -------------------------------------------------------------------------------- 1 | # Fetch and extract the TGI sources (TGI_VERSION is mandatory) 2 | FROM alpine AS tgi 3 | RUN mkdir -p /tgi 4 | ADD https://github.com/huggingface/text-generation-inference/archive/refs/tags/v2.0.2.tar.gz /tgi/sources.tar.gz 5 | RUN tar -C /tgi -xf /tgi/sources.tar.gz --strip-components=1 6 | 7 | # Build cargo components (adapted from TGI original Dockerfile) 8 | # Note that the build image is aligned on the same Linux version as the base image (Debian bookworm/ Ubuntu 22.04) 9 | FROM lukemathwalker/cargo-chef:latest-rust-1.75-bookworm AS chef 10 | WORKDIR /usr/src 11 | 12 | ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse 13 | 14 | FROM chef as planner 15 | COPY --from=tgi /tgi/Cargo.toml Cargo.toml 16 | COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml 17 | COPY --from=tgi /tgi/proto proto 18 | COPY --from=tgi /tgi/benchmark benchmark 19 | COPY --from=tgi /tgi/router router 20 | COPY --from=tgi /tgi/launcher launcher 21 | RUN cargo chef prepare --recipe-path recipe.json 22 | 23 | FROM chef AS builder 24 | 25 | RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \ 26 | curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \ 27 | unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \ 28 | unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \ 29 | rm -f $PROTOC_ZIP 30 | 31 | COPY --from=planner /usr/src/recipe.json recipe.json 32 | RUN cargo chef cook --release --recipe-path recipe.json 33 | 34 | COPY --from=tgi /tgi/Cargo.toml Cargo.toml 35 | COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml 36 | COPY --from=tgi /tgi/proto proto 37 | COPY --from=tgi /tgi/benchmark benchmark 38 | COPY --from=tgi /tgi/router router 39 | COPY --from=tgi /tgi/launcher launcher 40 | RUN cargo build --release --workspace --exclude benchmark 41 | 42 | # Fetch optimum-neuron sources 43 | FROM alpine/git AS optimum-neuron 44 | RUN git clone --depth 1 --branch v0.0.23 https://github.com/huggingface/optimum-neuron.git /optimum-neuron 45 | 46 | # Python base image 47 | FROM ubuntu:22.04 AS base 48 | 49 | RUN apt-get update -y \ 50 | && apt-get install -y --no-install-recommends \ 51 | python3-pip \ 52 | python3-setuptools \ 53 | python-is-python3 \ 54 | && rm -rf /var/lib/apt/lists/* \ 55 | && apt-get clean 56 | RUN pip3 --no-cache-dir install --upgrade pip 57 | 58 | # Python server build image 59 | FROM base AS pyserver 60 | 61 | RUN apt-get update -y \ 62 | && apt-get install -y --no-install-recommends \ 63 | make \ 64 | python3-venv \ 65 | && rm -rf /var/lib/apt/lists/* \ 66 | && apt-get clean 67 | 68 | RUN install -d /pyserver 69 | WORKDIR /pyserver 70 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/server server 71 | COPY --from=tgi /tgi/proto proto 72 | RUN pip3 install -r server/build-requirements.txt 73 | RUN VERBOSE=1 BUILDDIR=/pyserver/build PROTODIR=/pyserver/proto make -C server gen-server 74 | 75 | # Neuron base image (used for deployment) 76 | FROM base AS neuron 77 | 78 | # Install system prerequisites 79 | RUN apt-get update -y \ 80 | && apt-get install -y --no-install-recommends \ 81 | gnupg2 \ 82 | wget \ 83 | python3-dev \ 84 | && rm -rf /var/lib/apt/lists/* \ 85 | && apt-get clean 86 | 87 | RUN echo "deb https://apt.repos.neuron.amazonaws.com jammy main" > /etc/apt/sources.list.d/neuron.list 88 | RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add - 89 | 90 | # Install neuronx packages 91 | RUN apt-get update -y \ 92 | && apt-get install -y --no-install-recommends \ 93 | aws-neuronx-dkms=2.16.7.0 \ 94 | aws-neuronx-collectives=2.20.22.0-c101c322e \ 95 | aws-neuronx-runtime-lib=2.20.22.0-1b3ca6425 \ 96 | aws-neuronx-tools=2.17.1.0 \ 97 | && rm -rf /var/lib/apt/lists/* \ 98 | && apt-get clean 99 | 100 | ENV PATH="/opt/bin/:/opt/aws/neuron/bin:${PATH}" 101 | 102 | RUN pip3 install \ 103 | neuronx-cc==2.13.66.0 \ 104 | torch-neuronx==2.1.2.2.1.0 \ 105 | transformers-neuronx==0.10.0.21 \ 106 | --extra-index-url=https://pip.repos.neuron.amazonaws.com 107 | 108 | # Install HuggingFace packages 109 | RUN pip3 install \ 110 | hf_transfer huggingface_hub 111 | 112 | # Install optimum-neuron 113 | COPY --from=optimum-neuron /optimum-neuron optimum-neuron 114 | RUN pip3 install ./optimum-neuron 115 | 116 | # TGI base env 117 | ENV HUGGINGFACE_HUB_CACHE=/tmp \ 118 | HF_HUB_ENABLE_HF_TRANSFER=1 \ 119 | PORT=80 120 | 121 | # Disable color logs as they are not supported by CloudWatch 122 | ENV LOGURU_COLORIZE=NO 123 | ENV LOG_COLORIZE=0 124 | 125 | # Install router 126 | COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router 127 | # Install launcher 128 | COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher 129 | # Install python server 130 | COPY --from=pyserver /pyserver/build/dist dist 131 | RUN pip install dist/text_generation_server*.tar.gz 132 | 133 | # AWS Sagemaker compatible image 134 | FROM neuron as sagemaker 135 | 136 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/sagemaker-entrypoint.sh entrypoint.sh 137 | RUN chmod +x entrypoint.sh 138 | 139 | ENTRYPOINT ["./entrypoint.sh"] 140 | 141 | 142 | RUN apt-get update && apt-get install -y --no-install-recommends curl unzip \ 143 | && rm -rf /var/lib/apt/lists/* 144 | RUN HOME_DIR=/root && \ 145 | pip install requests && \ 146 | curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip && \ 147 | unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ && \ 148 | cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance && \ 149 | chmod +x /usr/local/bin/testOSSCompliance && \ 150 | chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh && \ 151 | ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python && \ 152 | rm -rf ${HOME_DIR}/oss_compliance* 153 | 154 | RUN echo "N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image \ 155 | has an indirect documentation dependency on third party project. The \ 156 | project's licensing includes the license. \ 157 | \n\n\ 158 | N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image uses the \ 159 | third party project. The project's licensing \ 160 | includes the https://github.com/huggingface/text-generation-inference/blob/main/LICENSE> \ 161 | license." > /root/THIRD_PARTY_LICENSES 162 | 163 | LABEL dlc_major_version="1" 164 | LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.huggingface.tgi="true" 165 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port="true" 166 | -------------------------------------------------------------------------------- /huggingface/pytorch/optimum/docker/0.0.24/Dockerfile: -------------------------------------------------------------------------------- 1 | # Fetch and extract the TGI sources (TGI_VERSION is mandatory) 2 | FROM alpine AS tgi 3 | RUN mkdir -p /tgi 4 | ADD https://github.com/huggingface/text-generation-inference/archive/refs/tags/v2.1.1.tar.gz /tgi/sources.tar.gz 5 | RUN tar -C /tgi -xf /tgi/sources.tar.gz --strip-components=1 6 | 7 | # Build cargo components (adapted from TGI original Dockerfile) 8 | # Note that the build image is aligned on the same Linux version as the base image (Debian bookworm/ Ubuntu 22.04) 9 | FROM lukemathwalker/cargo-chef:latest-rust-1.79-bookworm AS chef 10 | WORKDIR /usr/src 11 | 12 | ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse 13 | 14 | FROM chef as planner 15 | COPY --from=tgi /tgi/Cargo.toml Cargo.toml 16 | COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml 17 | COPY --from=tgi /tgi/proto proto 18 | COPY --from=tgi /tgi/benchmark benchmark 19 | COPY --from=tgi /tgi/router router 20 | COPY --from=tgi /tgi/launcher launcher 21 | RUN cargo chef prepare --recipe-path recipe.json 22 | 23 | FROM chef AS builder 24 | 25 | RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \ 26 | curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \ 27 | unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \ 28 | unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \ 29 | rm -f $PROTOC_ZIP 30 | 31 | COPY --from=planner /usr/src/recipe.json recipe.json 32 | RUN cargo chef cook --release --recipe-path recipe.json 33 | 34 | COPY --from=tgi /tgi/Cargo.toml Cargo.toml 35 | COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml 36 | COPY --from=tgi /tgi/proto proto 37 | COPY --from=tgi /tgi/benchmark benchmark 38 | COPY --from=tgi /tgi/router router 39 | COPY --from=tgi /tgi/launcher launcher 40 | # Remove this line once TGI has fixed the conflict 41 | RUN cargo update ureq --precise 2.9.7 42 | RUN cargo build --release --workspace --exclude benchmark 43 | 44 | # Fetch optimum-neuron sources 45 | FROM alpine/git AS optimum-neuron 46 | RUN git clone --depth 1 --branch v0.0.24 https://github.com/huggingface/optimum-neuron.git /optimum-neuron 47 | 48 | # Python base image 49 | FROM ubuntu:22.04 AS base 50 | 51 | RUN apt-get update -y \ 52 | && apt-get install -y --no-install-recommends \ 53 | python3-pip \ 54 | python3-setuptools \ 55 | python-is-python3 \ 56 | && rm -rf /var/lib/apt/lists/* \ 57 | && apt-get clean 58 | RUN pip3 --no-cache-dir install --upgrade pip 59 | 60 | # Python server build image 61 | FROM base AS pyserver 62 | 63 | RUN apt-get update -y \ 64 | && apt-get install -y --no-install-recommends \ 65 | make \ 66 | python3-venv \ 67 | && rm -rf /var/lib/apt/lists/* \ 68 | && apt-get clean 69 | 70 | RUN install -d /pyserver 71 | WORKDIR /pyserver 72 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/server server 73 | COPY --from=tgi /tgi/proto proto 74 | RUN pip3 install -r server/build-requirements.txt 75 | RUN VERBOSE=1 BUILDDIR=/pyserver/build PROTODIR=/pyserver/proto make -C server gen-server 76 | 77 | # Neuron base image (used for deployment) 78 | FROM base AS neuron 79 | 80 | # Install system prerequisites 81 | RUN apt-get update -y \ 82 | && apt-get install -y --no-install-recommends \ 83 | gnupg2 \ 84 | wget \ 85 | python3-dev \ 86 | libexpat1 \ 87 | && rm -rf /var/lib/apt/lists/* \ 88 | && apt-get clean 89 | 90 | RUN echo "deb https://apt.repos.neuron.amazonaws.com jammy main" > /etc/apt/sources.list.d/neuron.list 91 | RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add - 92 | 93 | # Install neuronx packages 94 | RUN apt-get update -y \ 95 | && apt-get install -y --no-install-recommends \ 96 | aws-neuronx-dkms=2.17.17.0 \ 97 | aws-neuronx-collectives=2.21.46.0-69b77134b \ 98 | aws-neuronx-runtime-lib=2.21.41.0-fb1705f5f \ 99 | aws-neuronx-tools=2.18.3.0 \ 100 | && rm -rf /var/lib/apt/lists/* \ 101 | && apt-get clean 102 | 103 | ENV PATH="/opt/bin/:/opt/aws/neuron/bin:${PATH}" 104 | 105 | RUN pip3 install \ 106 | neuronx-cc==2.14.227.0 \ 107 | torch-neuronx==2.1.2.2.2.0 \ 108 | transformers-neuronx==0.11.351 \ 109 | --extra-index-url=https://pip.repos.neuron.amazonaws.com 110 | 111 | # Install HuggingFace packages 112 | RUN pip3 install \ 113 | hf_transfer huggingface_hub 114 | 115 | # Install optimum-neuron 116 | COPY --from=optimum-neuron /optimum-neuron optimum-neuron 117 | RUN pip3 install ./optimum-neuron 118 | 119 | # TGI base env 120 | ENV HUGGINGFACE_HUB_CACHE=/tmp \ 121 | HF_HUB_ENABLE_HF_TRANSFER=1 \ 122 | PORT=80 123 | 124 | # Disable color logs as they are not supported by CloudWatch 125 | ENV LOGURU_COLORIZE=NO 126 | ENV LOG_COLORIZE=0 127 | 128 | # Install router 129 | COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router 130 | # Install launcher 131 | COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher 132 | # Install python server 133 | COPY --from=pyserver /pyserver/build/dist dist 134 | RUN pip install dist/text_generation_server*.tar.gz 135 | 136 | # AWS Sagemaker compatible image 137 | FROM neuron as sagemaker 138 | 139 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/sagemaker-entrypoint.sh entrypoint.sh 140 | RUN chmod +x entrypoint.sh 141 | 142 | ENTRYPOINT ["./entrypoint.sh"] 143 | 144 | 145 | RUN apt-get update && apt-get install -y --no-install-recommends curl unzip \ 146 | && rm -rf /var/lib/apt/lists/* 147 | RUN HOME_DIR=/root && \ 148 | pip install requests && \ 149 | curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip && \ 150 | unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ && \ 151 | cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance && \ 152 | chmod +x /usr/local/bin/testOSSCompliance && \ 153 | chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh && \ 154 | ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python && \ 155 | rm -rf ${HOME_DIR}/oss_compliance* 156 | 157 | RUN echo "N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image \ 158 | has an indirect documentation dependency on third party project. The \ 159 | project's licensing includes the license. \ 160 | \n\n\ 161 | N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image uses the \ 162 | third party project. The project's licensing \ 163 | includes the https://github.com/huggingface/text-generation-inference/blob/main/LICENSE> \ 164 | license." > /root/THIRD_PARTY_LICENSES 165 | 166 | LABEL dlc_major_version="1" 167 | LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.huggingface.tgi="true" 168 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port="true" -------------------------------------------------------------------------------- /huggingface/pytorch/optimum/docker/0.0.25/Dockerfile: -------------------------------------------------------------------------------- 1 | # Fetch and extract the TGI sources (TGI_VERSION is mandatory) 2 | FROM alpine AS tgi 3 | RUN mkdir -p /tgi 4 | ADD https://github.com/huggingface/text-generation-inference/archive/refs/tags/v2.1.1.tar.gz /tgi/sources.tar.gz 5 | RUN tar -C /tgi -xf /tgi/sources.tar.gz --strip-components=1 6 | 7 | # Build cargo components (adapted from TGI original Dockerfile) 8 | # Note that the build image is aligned on the same Linux version as the base image (Debian bookworm/ Ubuntu 22.04) 9 | FROM lukemathwalker/cargo-chef:latest-rust-1.79-bookworm AS chef 10 | WORKDIR /usr/src 11 | 12 | ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse 13 | 14 | FROM chef as planner 15 | COPY --from=tgi /tgi/Cargo.lock Cargo.lock 16 | COPY --from=tgi /tgi/Cargo.toml Cargo.toml 17 | COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml 18 | COPY --from=tgi /tgi/proto proto 19 | COPY --from=tgi /tgi/benchmark benchmark 20 | COPY --from=tgi /tgi/router router 21 | COPY --from=tgi /tgi/launcher launcher 22 | RUN cargo chef prepare --recipe-path recipe.json 23 | 24 | FROM chef AS builder 25 | 26 | RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \ 27 | curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \ 28 | unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \ 29 | unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \ 30 | rm -f $PROTOC_ZIP 31 | 32 | COPY --from=planner /usr/src/recipe.json recipe.json 33 | RUN cargo chef cook --release --recipe-path recipe.json 34 | 35 | COPY --from=tgi /tgi/Cargo.lock Cargo.lock 36 | COPY --from=tgi /tgi/Cargo.toml Cargo.toml 37 | COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml 38 | COPY --from=tgi /tgi/proto proto 39 | COPY --from=tgi /tgi/benchmark benchmark 40 | COPY --from=tgi /tgi/router router 41 | COPY --from=tgi /tgi/launcher launcher 42 | # Remove this line once TGI has fixed the conflict 43 | RUN cargo update ureq --precise 2.9.7 44 | RUN cargo build --release --workspace --exclude benchmark 45 | 46 | # Fetch optimum-neuron sources 47 | FROM alpine/git AS optimum-neuron 48 | RUN git clone --depth 1 --branch v0.0.25 https://github.com/huggingface/optimum-neuron.git /optimum-neuron 49 | 50 | # Python base image 51 | FROM ubuntu:22.04 AS base 52 | 53 | RUN apt-get update -y \ 54 | && apt-get install -y --no-install-recommends \ 55 | python3-pip \ 56 | python3-setuptools \ 57 | python-is-python3 \ 58 | && rm -rf /var/lib/apt/lists/* \ 59 | && apt-get clean 60 | RUN pip3 --no-cache-dir install --upgrade pip 61 | 62 | # Python server build image 63 | FROM base AS pyserver 64 | 65 | RUN apt-get update -y \ 66 | && apt-get install -y --no-install-recommends \ 67 | make \ 68 | python3-venv \ 69 | && rm -rf /var/lib/apt/lists/* \ 70 | && apt-get clean 71 | 72 | RUN install -d /pyserver 73 | WORKDIR /pyserver 74 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/server server 75 | COPY --from=tgi /tgi/proto proto 76 | RUN pip3 install -r server/build-requirements.txt 77 | RUN VERBOSE=1 BUILDDIR=/pyserver/build PROTODIR=/pyserver/proto make -C server gen-server 78 | 79 | # Neuron base image (used for deployment) 80 | FROM base AS neuron 81 | 82 | # Install system prerequisites 83 | RUN apt-get update -y \ 84 | && apt-get install -y --no-install-recommends \ 85 | gnupg2 \ 86 | wget \ 87 | python3-dev \ 88 | libexpat1 \ 89 | && rm -rf /var/lib/apt/lists/* \ 90 | && apt-get clean 91 | 92 | RUN echo "deb https://apt.repos.neuron.amazonaws.com jammy main" > /etc/apt/sources.list.d/neuron.list 93 | RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add - 94 | 95 | # Install neuronx packages 96 | RUN apt-get update -y \ 97 | && apt-get install -y --no-install-recommends \ 98 | aws-neuronx-dkms=2.18.12.0 \ 99 | aws-neuronx-collectives=2.22.26.0-17a033bc8 \ 100 | aws-neuronx-runtime-lib=2.22.14.0-6e27b8d5b \ 101 | aws-neuronx-tools=2.19.0.0 \ 102 | libxml2 \ 103 | && rm -rf /var/lib/apt/lists/* \ 104 | && apt-get clean 105 | 106 | ENV PATH="/opt/bin/:/opt/aws/neuron/bin:${PATH}" 107 | 108 | RUN pip3 install \ 109 | neuronx-cc==2.15.128.0 \ 110 | torch-neuronx==2.1.2.2.3.0 \ 111 | transformers-neuronx==0.12.313 \ 112 | libneuronxla==2.0.4115.0 \ 113 | --extra-index-url=https://pip.repos.neuron.amazonaws.com 114 | 115 | # Install HuggingFace packages 116 | RUN pip3 install \ 117 | hf_transfer huggingface_hub 118 | 119 | # Install optimum-neuron 120 | COPY --from=optimum-neuron /optimum-neuron optimum-neuron 121 | RUN pip3 install ./optimum-neuron 122 | 123 | # TGI base env 124 | ENV HUGGINGFACE_HUB_CACHE=/tmp \ 125 | HF_HUB_ENABLE_HF_TRANSFER=1 \ 126 | PORT=80 127 | 128 | # Disable color logs as they are not supported by CloudWatch 129 | ENV LOGURU_COLORIZE=NO 130 | ENV LOG_COLORIZE=0 131 | 132 | # Install router 133 | COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router 134 | # Install launcher 135 | COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher 136 | # Install python server 137 | COPY --from=pyserver /pyserver/build/dist dist 138 | RUN pip install dist/text_generation_server*.tar.gz 139 | 140 | # AWS Sagemaker compatible image 141 | FROM neuron as sagemaker 142 | 143 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/sagemaker-entrypoint.sh entrypoint.sh 144 | RUN chmod +x entrypoint.sh 145 | 146 | ENTRYPOINT ["./entrypoint.sh"] 147 | 148 | 149 | RUN apt-get update && apt-get install -y --no-install-recommends curl unzip \ 150 | && rm -rf /var/lib/apt/lists/* 151 | RUN HOME_DIR=/root && \ 152 | pip install requests && \ 153 | curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip && \ 154 | unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ && \ 155 | cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance && \ 156 | chmod +x /usr/local/bin/testOSSCompliance && \ 157 | chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh && \ 158 | ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python && \ 159 | rm -rf ${HOME_DIR}/oss_compliance* 160 | 161 | RUN echo "N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image \ 162 | has an indirect documentation dependency on third party project. The \ 163 | project's licensing includes the license. \ 164 | \n\n\ 165 | N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image uses the \ 166 | third party project. The project's licensing \ 167 | includes the https://github.com/huggingface/text-generation-inference/blob/main/LICENSE> \ 168 | license." > /root/THIRD_PARTY_LICENSES 169 | 170 | LABEL dlc_major_version="1" 171 | LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.huggingface.tgi="true" 172 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port="true" 173 | -------------------------------------------------------------------------------- /huggingface/pytorch/optimum/docker/0.0.27/Dockerfile: -------------------------------------------------------------------------------- 1 | # Fetch and extract the TGI sources 2 | FROM alpine AS tgi 3 | RUN mkdir -p /tgi 4 | ADD https://github.com/huggingface/text-generation-inference/archive/refs/tags/v3.0.0.tar.gz /tgi/sources.tar.gz 5 | RUN tar -C /tgi -xf /tgi/sources.tar.gz --strip-components=1 6 | 7 | # Fetch also the optimum-neuron sources that contain modified TGI sources 8 | FROM alpine AS optimum-neuron 9 | RUN mkdir -p /optimum-neuron 10 | ADD https://github.com/huggingface/optimum-neuron/archive/refs/tags/v0.0.27.tar.gz /optimum-neuron/sources.tar.gz 11 | RUN tar -C /optimum-neuron -xf /optimum-neuron/sources.tar.gz --strip-components=1 12 | 13 | # Build cargo components (adapted from TGI original Dockerfile) 14 | # Note: we cannot use the cargo-chef base image as it uses python 3.11 15 | FROM ubuntu:22.04 AS chef 16 | 17 | RUN apt-get update -y \ 18 | && apt-get install -y --no-install-recommends \ 19 | curl ca-certificates build-essential \ 20 | && rm -rf /var/lib/apt/lists/* \ 21 | && apt-get clean 22 | 23 | RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- --default-toolchain 1.80.1 --profile minimal -y 24 | ENV PATH="/root/.cargo/bin:${PATH}" 25 | RUN cargo install cargo-chef --locked 26 | 27 | WORKDIR /usr/src 28 | 29 | ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse 30 | 31 | FROM chef AS planner 32 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/Cargo.toml Cargo.toml 33 | COPY --from=tgi /tgi/Cargo.lock Cargo.lock 34 | COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml 35 | COPY --from=tgi /tgi/proto proto 36 | COPY --from=tgi /tgi/router router 37 | COPY --from=tgi /tgi/backends backends 38 | COPY --from=tgi /tgi/launcher launcher 39 | RUN cargo chef prepare --recipe-path recipe.json 40 | 41 | FROM chef AS builder 42 | 43 | RUN apt-get update -y \ 44 | && apt-get install -y --no-install-recommends \ 45 | unzip python3-dev libssl-dev pkg-config \ 46 | && rm -rf /var/lib/apt/lists/* \ 47 | && apt-get clean 48 | 49 | RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \ 50 | curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \ 51 | unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \ 52 | unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \ 53 | rm -f $PROTOC_ZIP 54 | 55 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/Cargo.toml Cargo.toml 56 | COPY --from=planner /usr/src/recipe.json recipe.json 57 | RUN cargo chef cook --release --recipe-path recipe.json 58 | 59 | COPY --from=tgi /tgi/Cargo.lock Cargo.lock 60 | COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml 61 | COPY --from=tgi /tgi/proto proto 62 | COPY --from=tgi /tgi/router router 63 | COPY --from=tgi /tgi/backends backends 64 | COPY --from=tgi /tgi/launcher launcher 65 | # Remove this line once TGI has fixed the conflict 66 | RUN cargo update ureq --precise 2.9.7 67 | RUN cargo build --release 68 | 69 | # Python base image 70 | FROM ubuntu:22.04 AS base 71 | 72 | RUN apt-get update -y \ 73 | && apt-get install -y --no-install-recommends \ 74 | python3-pip \ 75 | python3-setuptools \ 76 | python-is-python3 \ 77 | && rm -rf /var/lib/apt/lists/* \ 78 | && apt-get clean 79 | RUN pip3 --no-cache-dir install --upgrade pip 80 | 81 | # Python server build image 82 | FROM base AS pyserver 83 | 84 | RUN apt-get update -y \ 85 | && apt-get install -y --no-install-recommends \ 86 | golang-go \ 87 | make \ 88 | python3-venv \ 89 | && rm -rf /var/lib/apt/lists/* \ 90 | && apt-get clean 91 | 92 | RUN install -d /pyserver 93 | WORKDIR /pyserver 94 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/server server 95 | COPY --from=tgi /tgi/proto proto 96 | RUN pip3 install -r server/build-requirements.txt 97 | RUN VERBOSE=1 BUILDDIR=/pyserver/build PROTODIR=/pyserver/proto make -C server gen-server 98 | 99 | # Neuron base image (used for deployment) 100 | FROM base AS neuron 101 | 102 | # Install system prerequisites 103 | RUN apt-get update -y \ 104 | && apt-get install -y --no-install-recommends \ 105 | gnupg2 \ 106 | wget \ 107 | python3-dev \ 108 | libexpat1 \ 109 | && rm -rf /var/lib/apt/lists/* \ 110 | && apt-get clean 111 | 112 | RUN echo "deb https://apt.repos.neuron.amazonaws.com jammy main" > /etc/apt/sources.list.d/neuron.list 113 | RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add - 114 | 115 | # Install neuronx packages 116 | RUN apt-get update -y \ 117 | && apt-get install -y --no-install-recommends \ 118 | aws-neuronx-dkms=2.18.20.0 \ 119 | aws-neuronx-collectives=2.22.33.0-d2128d1aa \ 120 | aws-neuronx-runtime-lib=2.22.19.0-5856c0b42 \ 121 | aws-neuronx-tools=2.19.0.0 \ 122 | libxml2 \ 123 | && rm -rf /var/lib/apt/lists/* \ 124 | && apt-get clean 125 | 126 | ENV PATH="/opt/bin/:/opt/aws/neuron/bin:${PATH}" 127 | 128 | RUN pip3 install \ 129 | neuronx-cc==2.15.143.0 \ 130 | torch-neuronx==2.1.2.2.3.2 \ 131 | transformers-neuronx==0.12.313 \ 132 | libneuronxla==2.0.5347.0 \ 133 | --extra-index-url=https://pip.repos.neuron.amazonaws.com 134 | 135 | # Install HuggingFace packages 136 | RUN pip3 install \ 137 | hf_transfer huggingface_hub 138 | 139 | # Install optimum-neuron 140 | COPY --from=optimum-neuron /optimum-neuron optimum-neuron 141 | RUN pip3 install ./optimum-neuron 142 | 143 | # TGI base env 144 | ENV HUGGINGFACE_HUB_CACHE=/tmp \ 145 | HF_HUB_ENABLE_HF_TRANSFER=1 \ 146 | PORT=80 147 | 148 | # Disable color logs as they are not supported by CloudWatch 149 | ENV LOGURU_COLORIZE=NO 150 | ENV LOG_COLORIZE=0 151 | 152 | # Install router 153 | COPY --from=builder /usr/src/target/release/text-generation-router-v2 /usr/local/bin/text-generation-router 154 | # Install launcher 155 | COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher 156 | # Install python server 157 | COPY --from=pyserver /pyserver/build/dist dist 158 | RUN pip install dist/text_generation_server*.tar.gz 159 | 160 | # AWS Sagemaker compatible image 161 | FROM neuron as sagemaker 162 | 163 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/sagemaker-entrypoint.sh entrypoint.sh 164 | RUN chmod +x entrypoint.sh 165 | 166 | ENTRYPOINT ["./entrypoint.sh"] 167 | 168 | 169 | RUN apt-get update && apt-get install -y --no-install-recommends curl unzip \ 170 | && rm -rf /var/lib/apt/lists/* 171 | RUN HOME_DIR=/root && \ 172 | pip install requests && \ 173 | curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip && \ 174 | unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ && \ 175 | cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance && \ 176 | chmod +x /usr/local/bin/testOSSCompliance && \ 177 | chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh && \ 178 | ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python && \ 179 | rm -rf ${HOME_DIR}/oss_compliance* 180 | 181 | RUN echo "N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image \ 182 | has an indirect documentation dependency on third party project. The \ 183 | project's licensing includes the license. \ 184 | \n\n\ 185 | N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image uses the \ 186 | third party project. The project's licensing \ 187 | includes the https://github.com/huggingface/text-generation-inference/blob/main/LICENSE> \ 188 | license." > /root/THIRD_PARTY_LICENSES 189 | 190 | LABEL dlc_major_version="1" 191 | LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.huggingface.tgi="true" 192 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port="true" 193 | -------------------------------------------------------------------------------- /huggingface/pytorch/optimum/docker/0.0.28/Dockerfile: -------------------------------------------------------------------------------- 1 | # Fetch and extract the TGI sources 2 | FROM alpine AS tgi 3 | RUN mkdir -p /tgi 4 | ADD https://github.com/huggingface/text-generation-inference/archive/refs/tags/v3.0.0.tar.gz /tgi/sources.tar.gz 5 | RUN tar -C /tgi -xf /tgi/sources.tar.gz --strip-components=1 6 | 7 | # Fetch also the optimum-neuron sources that contain modified TGI sources 8 | FROM alpine AS optimum-neuron 9 | RUN mkdir -p /optimum-neuron 10 | ADD https://github.com/huggingface/optimum-neuron/archive/refs/tags/v0.0.28.tar.gz /optimum-neuron/sources.tar.gz 11 | RUN tar -C /optimum-neuron -xf /optimum-neuron/sources.tar.gz --strip-components=1 12 | 13 | # Build cargo components (adapted from TGI original Dockerfile) 14 | # Note: we cannot use the cargo-chef base image as it uses python 3.11 15 | FROM ubuntu:22.04 AS chef 16 | 17 | RUN apt-get update -y \ 18 | && apt-get install -y --no-install-recommends \ 19 | curl ca-certificates build-essential \ 20 | && rm -rf /var/lib/apt/lists/* \ 21 | && apt-get clean 22 | 23 | RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- --default-toolchain 1.80.1 --profile minimal -y 24 | ENV PATH="/root/.cargo/bin:${PATH}" 25 | RUN cargo install cargo-chef --locked 26 | 27 | WORKDIR /usr/src 28 | 29 | ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse 30 | 31 | FROM chef AS planner 32 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/Cargo.toml Cargo.toml 33 | COPY --from=tgi /tgi/Cargo.lock Cargo.lock 34 | COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml 35 | COPY --from=tgi /tgi/proto proto 36 | COPY --from=tgi /tgi/router router 37 | COPY --from=tgi /tgi/backends backends 38 | COPY --from=tgi /tgi/launcher launcher 39 | RUN cargo chef prepare --recipe-path recipe.json 40 | 41 | FROM chef AS builder 42 | 43 | RUN apt-get update -y \ 44 | && apt-get install -y --no-install-recommends \ 45 | unzip python3-dev libssl-dev pkg-config \ 46 | && rm -rf /var/lib/apt/lists/* \ 47 | && apt-get clean 48 | 49 | RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \ 50 | curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \ 51 | unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \ 52 | unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \ 53 | rm -f $PROTOC_ZIP 54 | 55 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/Cargo.toml Cargo.toml 56 | COPY --from=planner /usr/src/recipe.json recipe.json 57 | RUN cargo chef cook --release --recipe-path recipe.json 58 | 59 | COPY --from=tgi /tgi/Cargo.lock Cargo.lock 60 | COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml 61 | COPY --from=tgi /tgi/proto proto 62 | COPY --from=tgi /tgi/router router 63 | COPY --from=tgi /tgi/backends backends 64 | COPY --from=tgi /tgi/launcher launcher 65 | # Remove this line once TGI has fixed the conflict 66 | RUN cargo update ureq --precise 2.9.7 67 | RUN cargo build --release 68 | 69 | # Python base image 70 | FROM ubuntu:22.04 AS base 71 | 72 | RUN apt-get update -y \ 73 | && apt-get install -y --no-install-recommends \ 74 | python3-pip \ 75 | python3-setuptools \ 76 | python-is-python3 \ 77 | && rm -rf /var/lib/apt/lists/* \ 78 | && apt-get clean 79 | RUN pip3 --no-cache-dir install --upgrade pip 80 | 81 | # Python server build image 82 | FROM base AS pyserver 83 | 84 | RUN apt-get update -y \ 85 | && apt-get install -y --no-install-recommends \ 86 | make \ 87 | python3-venv \ 88 | && rm -rf /var/lib/apt/lists/* \ 89 | && apt-get clean 90 | 91 | RUN install -d /pyserver 92 | WORKDIR /pyserver 93 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/server server 94 | COPY --from=tgi /tgi/proto proto 95 | RUN pip3 install -r server/build-requirements.txt 96 | RUN VERBOSE=1 BUILDDIR=/pyserver/build PROTODIR=/pyserver/proto make -C server gen-server 97 | 98 | # Neuron base image (used for deployment) 99 | FROM base AS neuron 100 | 101 | # Install system prerequisites 102 | RUN apt-get update -y \ 103 | && apt-get install -y --no-install-recommends \ 104 | gnupg2 \ 105 | wget \ 106 | python3-dev \ 107 | libexpat1 \ 108 | && rm -rf /var/lib/apt/lists/* \ 109 | && apt-get clean 110 | 111 | RUN echo "deb https://apt.repos.neuron.amazonaws.com jammy main" > /etc/apt/sources.list.d/neuron.list 112 | RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add - 113 | 114 | # Install neuronx packages 115 | RUN apt-get update -y \ 116 | && apt-get install -y --no-install-recommends \ 117 | aws-neuronx-dkms=2.18.20.0 \ 118 | aws-neuronx-collectives=2.22.33.0-d2128d1aa \ 119 | aws-neuronx-runtime-lib=2.22.19.0-5856c0b42 \ 120 | aws-neuronx-tools=2.19.0.0 \ 121 | libxml2 \ 122 | && rm -rf /var/lib/apt/lists/* \ 123 | && apt-get clean 124 | 125 | ENV PATH="/opt/bin/:/opt/aws/neuron/bin:${PATH}" 126 | 127 | RUN pip3 install \ 128 | neuronx-cc==2.15.143.0 \ 129 | torch-neuronx==2.1.2.2.3.2 \ 130 | transformers-neuronx==0.12.313 \ 131 | libneuronxla==2.0.5347.0 \ 132 | --extra-index-url=https://pip.repos.neuron.amazonaws.com 133 | 134 | # Install HuggingFace packages 135 | RUN pip3 install \ 136 | hf_transfer huggingface_hub 137 | 138 | # Install optimum-neuron 139 | COPY --from=optimum-neuron /optimum-neuron optimum-neuron 140 | RUN pip3 install ./optimum-neuron 141 | 142 | # TGI base env 143 | ENV HUGGINGFACE_HUB_CACHE=/tmp \ 144 | HF_HUB_ENABLE_HF_TRANSFER=1 \ 145 | PORT=80 146 | 147 | # Disable color logs as they are not supported by CloudWatch 148 | ENV LOGURU_COLORIZE=NO 149 | ENV LOG_COLORIZE=0 150 | 151 | # Install router 152 | COPY --from=builder /usr/src/target/release/text-generation-router-v2 /usr/local/bin/text-generation-router 153 | # Install launcher 154 | COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher 155 | # Install python server 156 | COPY --from=pyserver /pyserver/build/dist dist 157 | RUN pip install dist/text_generation_server*.tar.gz 158 | 159 | # AWS Sagemaker compatible image 160 | FROM neuron as sagemaker 161 | 162 | COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/sagemaker-entrypoint.sh entrypoint.sh 163 | RUN chmod +x entrypoint.sh 164 | 165 | ENTRYPOINT ["./entrypoint.sh"] 166 | 167 | 168 | RUN apt-get update && apt-get install -y --no-install-recommends curl unzip \ 169 | && rm -rf /var/lib/apt/lists/* 170 | RUN HOME_DIR=/root && \ 171 | pip install requests && \ 172 | curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip && \ 173 | unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ && \ 174 | cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance && \ 175 | chmod +x /usr/local/bin/testOSSCompliance && \ 176 | chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh && \ 177 | ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python && \ 178 | rm -rf ${HOME_DIR}/oss_compliance* 179 | 180 | RUN echo "N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image \ 181 | has an indirect documentation dependency on third party project. The \ 182 | project's licensing includes the license. \ 183 | \n\n\ 184 | N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image uses the \ 185 | third party project. The project's licensing \ 186 | includes the https://github.com/huggingface/text-generation-inference/blob/main/LICENSE> \ 187 | license." > /root/THIRD_PARTY_LICENSES 188 | 189 | LABEL dlc_major_version="1" 190 | LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.huggingface.tgi="true" 191 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port="true" 192 | -------------------------------------------------------------------------------- /huggingface/pytorch/tei/docker/1.2.3/cpu/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM lukemathwalker/cargo-chef:latest-rust-1.75-bookworm AS chef 2 | WORKDIR /usr/src 3 | 4 | ENV SCCACHE=0.5.4 5 | ENV RUSTC_WRAPPER=/usr/local/bin/sccache 6 | 7 | # Donwload and configure sccache 8 | RUN curl -fsSL https://github.com/mozilla/sccache/releases/download/v$SCCACHE/sccache-v$SCCACHE-x86_64-unknown-linux-musl.tar.gz | tar -xzv --strip-components=1 -C /usr/local/bin sccache-v$SCCACHE-x86_64-unknown-linux-musl/sccache && \ 9 | chmod +x /usr/local/bin/sccache 10 | 11 | FROM chef AS planner 12 | 13 | COPY backends backends 14 | COPY core core 15 | COPY router router 16 | COPY Cargo.toml ./ 17 | COPY Cargo.lock ./ 18 | 19 | RUN cargo chef prepare --recipe-path recipe.json 20 | 21 | FROM chef AS builder 22 | 23 | ARG GIT_SHA 24 | ARG DOCKER_LABEL 25 | 26 | # sccache specific variables 27 | ARG ACTIONS_CACHE_URL 28 | ARG ACTIONS_RUNTIME_TOKEN 29 | ARG SCCACHE_GHA_ENABLED 30 | 31 | RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \ 32 | | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && \ 33 | echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | \ 34 | tee /etc/apt/sources.list.d/oneAPI.list 35 | 36 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ 37 | intel-oneapi-mkl-devel=2024.0.0-49656 \ 38 | build-essential \ 39 | && rm -rf /var/lib/apt/lists/* 40 | 41 | RUN echo "int mkl_serv_intel_cpu_true() {return 1;}" > fakeintel.c && \ 42 | gcc -shared -fPIC -o libfakeintel.so fakeintel.c 43 | 44 | COPY --from=planner /usr/src/recipe.json recipe.json 45 | 46 | RUN cargo chef cook --release --features candle --features mkl-dynamic --no-default-features --recipe-path recipe.json && sccache -s 47 | 48 | COPY backends backends 49 | COPY core core 50 | COPY router router 51 | COPY Cargo.toml ./ 52 | COPY Cargo.lock ./ 53 | 54 | FROM builder as http-builder 55 | 56 | RUN cargo build --release --bin text-embeddings-router -F candle -F mkl-dynamic -F http --no-default-features && sccache -s 57 | 58 | FROM builder as grpc-builder 59 | 60 | RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \ 61 | curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \ 62 | unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \ 63 | unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \ 64 | rm -f $PROTOC_ZIP 65 | 66 | COPY proto proto 67 | 68 | RUN cargo build --release --bin text-embeddings-router -F grpc -F candle -F mkl-dynamic --no-default-features && sccache -s 69 | 70 | FROM debian:bookworm-slim as base 71 | 72 | ENV HUGGINGFACE_HUB_CACHE=/data \ 73 | PORT=80 \ 74 | MKL_ENABLE_INSTRUCTIONS=AVX512_E4 \ 75 | RAYON_NUM_THREADS=8 \ 76 | LD_PRELOAD=/usr/local/libfakeintel.so \ 77 | LD_LIBRARY_PATH=/usr/local/lib 78 | 79 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ 80 | libomp-dev \ 81 | ca-certificates \ 82 | libssl-dev \ 83 | curl \ 84 | && rm -rf /var/lib/apt/lists/* 85 | 86 | # Copy a lot of the Intel shared objects because of the mkl_serv_intel_cpu_true patch... 87 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_intel_lp64.so.2 /usr/local/lib/libmkl_intel_lp64.so.2 88 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_intel_thread.so.2 /usr/local/lib/libmkl_intel_thread.so.2 89 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_core.so.2 /usr/local/lib/libmkl_core.so.2 90 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_vml_def.so.2 /usr/local/lib/libmkl_vml_def.so.2 91 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_def.so.2 /usr/local/lib/libmkl_def.so.2 92 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_vml_avx2.so.2 /usr/local/lib/libmkl_vml_avx2.so.2 93 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_vml_avx512.so.2 /usr/local/lib/libmkl_vml_avx512.so.2 94 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_avx2.so.2 /usr/local/lib/libmkl_avx2.so.2 95 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_avx512.so.2 /usr/local/lib/libmkl_avx512.so.2 96 | COPY --from=builder /usr/src/libfakeintel.so /usr/local/libfakeintel.so 97 | 98 | FROM base as grpc 99 | 100 | COPY --from=grpc-builder /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router 101 | 102 | ENTRYPOINT ["text-embeddings-router"] 103 | CMD ["--json-output"] 104 | 105 | FROM base AS http 106 | 107 | COPY --from=http-builder /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router 108 | 109 | # Amazon SageMaker compatible image 110 | FROM http as sagemaker 111 | COPY --chmod=775 sagemaker-entrypoint.sh entrypoint.sh 112 | 113 | ENTRYPOINT ["./entrypoint.sh"] 114 | 115 | # Default image 116 | FROM http 117 | 118 | ENTRYPOINT ["text-embeddings-router"] 119 | CMD ["--json-output"] -------------------------------------------------------------------------------- /huggingface/pytorch/tei/docker/1.2.3/gpu/Dockerfile: -------------------------------------------------------------------------------- 1 | # Use the official NVIDIA CUDA base image 2 | FROM nvidia/cuda:12.2.0-devel-ubuntu22.04 AS base-builder 3 | 4 | ENV SCCACHE=0.5.4 5 | ENV RUSTC_WRAPPER=/usr/local/bin/sccache 6 | ENV PATH="/root/.cargo/bin:${PATH}" 7 | 8 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ 9 | curl \ 10 | libssl-dev \ 11 | pkg-config \ 12 | wget \ 13 | gnupg2 14 | 15 | # Download and configure sccache 16 | RUN curl -fsSL https://github.com/mozilla/sccache/releases/download/v$SCCACHE/sccache-v$SCCACHE-x86_64-unknown-linux-musl.tar.gz | tar -xzv --strip-components=1 -C /usr/local/bin sccache-v$SCCACHE-x86_64-unknown-linux-musl/sccache && \ 17 | chmod +x /usr/local/bin/sccache 18 | 19 | RUN curl https://sh.rustup.rs -sSf | bash -s -- -y 20 | RUN cargo install cargo-chef --locked 21 | 22 | FROM base-builder AS planner 23 | 24 | WORKDIR /usr/src 25 | 26 | COPY backends backends 27 | COPY core core 28 | COPY router router 29 | COPY Cargo.toml ./ 30 | COPY Cargo.lock ./ 31 | 32 | RUN cargo chef prepare --recipe-path recipe.json 33 | 34 | FROM base-builder AS builder 35 | 36 | ARG GIT_SHA 37 | ARG DOCKER_LABEL 38 | ARG VERTEX="false" 39 | 40 | # sccache specific variables 41 | ARG ACTIONS_CACHE_URL 42 | ARG ACTIONS_RUNTIME_TOKEN 43 | ARG SCCACHE_GHA_ENABLED 44 | 45 | # limit the number of kernels built at the same time 46 | ARG RAYON_NUM_THREADS=4 47 | 48 | WORKDIR /usr/src 49 | 50 | COPY --from=planner /usr/src/recipe.json recipe.json 51 | 52 | RUN if [ $VERTEX = "true" ]; \ 53 | then \ 54 | cargo chef cook --release --features google --recipe-path recipe.json && sccache -s; \ 55 | else \ 56 | cargo chef cook --release --recipe-path recipe.json && sccache -s; \ 57 | fi; 58 | 59 | RUN if [ $VERTEX = "true" ]; \ 60 | then \ 61 | CUDA_COMPUTE_CAP=75 cargo chef cook --release --features google --features candle-cuda-turing --recipe-path recipe.json && sccache -s; \ 62 | else \ 63 | CUDA_COMPUTE_CAP=75 cargo chef cook --release --features candle-cuda-turing --recipe-path recipe.json && sccache -s; \ 64 | fi; 65 | 66 | RUN if [ $VERTEX = "true" ]; \ 67 | then \ 68 | CUDA_COMPUTE_CAP=80 cargo chef cook --release --features google --features candle-cuda --recipe-path recipe.json && sccache -s; \ 69 | else \ 70 | CUDA_COMPUTE_CAP=80 cargo chef cook --release --features candle-cuda --recipe-path recipe.json && sccache -s; \ 71 | fi; 72 | 73 | RUN if [ $VERTEX = "true" ]; \ 74 | then \ 75 | CUDA_COMPUTE_CAP=90 cargo chef cook --release --features google --features candle-cuda --recipe-path recipe.json && sccache -s; \ 76 | else \ 77 | CUDA_COMPUTE_CAP=90 cargo chef cook --release --features candle-cuda --recipe-path recipe.json && sccache -s; \ 78 | fi; 79 | 80 | COPY backends backends 81 | COPY core core 82 | COPY router router 83 | COPY Cargo.toml ./ 84 | COPY Cargo.lock ./ 85 | 86 | RUN if [ $VERTEX = "true" ]; \ 87 | then \ 88 | CUDA_COMPUTE_CAP=75 cargo build --release --bin text-embeddings-router -F candle-cuda-turing -F google && sccache -s; \ 89 | else \ 90 | CUDA_COMPUTE_CAP=75 cargo build --release --bin text-embeddings-router -F candle-cuda-turing && sccache -s; \ 91 | fi; 92 | 93 | RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-75 94 | 95 | RUN if [ $VERTEX = "true" ]; \ 96 | then \ 97 | CUDA_COMPUTE_CAP=80 cargo build --release --bin text-embeddings-router -F candle-cuda -F google && sccache -s; \ 98 | else \ 99 | CUDA_COMPUTE_CAP=80 cargo build --release --bin text-embeddings-router -F candle-cuda && sccache -s; \ 100 | fi; 101 | 102 | RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-80 103 | 104 | RUN if [ $VERTEX = "true" ]; \ 105 | then \ 106 | CUDA_COMPUTE_CAP=90 cargo build --release --bin text-embeddings-router -F candle-cuda -F google && sccache -s; \ 107 | else \ 108 | CUDA_COMPUTE_CAP=90 cargo build --release --bin text-embeddings-router -F candle-cuda && sccache -s; \ 109 | fi; 110 | 111 | RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-90 112 | 113 | FROM nvidia/cuda:12.2.0-runtime-ubuntu22.04 as base 114 | 115 | ARG DEFAULT_USE_FLASH_ATTENTION=True 116 | 117 | ENV HUGGINGFACE_HUB_CACHE=/data \ 118 | PORT=80 \ 119 | USE_FLASH_ATTENTION=$DEFAULT_USE_FLASH_ATTENTION 120 | 121 | # Install nvidia-smi and other necessary utilities 122 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ 123 | wget \ 124 | libtemplate-perl \ 125 | perl 126 | 127 | COPY --from=builder /usr/src/target/release/text-embeddings-router-75 /usr/local/bin/text-embeddings-router-75 128 | COPY --from=builder /usr/src/target/release/text-embeddings-router-80 /usr/local/bin/text-embeddings-router-80 129 | COPY --from=builder /usr/src/target/release/text-embeddings-router-90 /usr/local/bin/text-embeddings-router-90 130 | 131 | # Amazon SageMaker compatible image 132 | FROM base AS sagemaker 133 | 134 | COPY --chmod=775 /huggingface/pytorch/tei/docker/1.2.3/gpu/sagemaker-entrypoint-cuda-all.sh entrypoint.sh 135 | 136 | ENTRYPOINT ["./entrypoint.sh"] 137 | 138 | # Default image 139 | FROM base 140 | 141 | COPY --chmod=775 cuda-all-entrypoint.sh entrypoint.sh 142 | 143 | ENTRYPOINT ["./entrypoint.sh"] 144 | CMD ["--json-output"] -------------------------------------------------------------------------------- /huggingface/pytorch/tei/docker/1.2.3/gpu/sagemaker-entrypoint-cuda-all.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | verlte() { 4 | [ "$1" = "$2" ] && return 1 || [ "$2" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ] 5 | } 6 | 7 | if [ -f /usr/local/cuda/compat/libcuda.so.1 ]; then 8 | CUDA_COMPAT_MAX_DRIVER_VERSION=$(readlink /usr/local/cuda/compat/libcuda.so.1 | cut -d"." -f 3-) 9 | echo "CUDA compat package requires Nvidia driver ≤${CUDA_COMPAT_MAX_DRIVER_VERSION}" 10 | cat /proc/driver/nvidia/version 11 | NVIDIA_DRIVER_VERSION=$(sed -n 's/^NVRM.*Kernel Module *\([0-9.]*\).*$/\1/p' /proc/driver/nvidia/version 2>/dev/null || true) 12 | echo "Current installed Nvidia driver version is ${NVIDIA_DRIVER_VERSION}" 13 | if [ $(verlte "$CUDA_COMPAT_MAX_DRIVER_VERSION" "$NVIDIA_DRIVER_VERSION") ]; then 14 | echo "Setup CUDA compatibility libs path to LD_LIBRARY_PATH" 15 | export LD_LIBRARY_PATH=/usr/local/cuda/compat:$LD_LIBRARY_PATH 16 | echo $LD_LIBRARY_PATH 17 | else 18 | echo "Skip CUDA compat libs setup as newer Nvidia driver is installed" 19 | fi 20 | else 21 | echo "Skip CUDA compat libs setup as package not found" 22 | fi 23 | 24 | if [[ -z "${HF_MODEL_ID}" ]]; then 25 | echo "HF_MODEL_ID must be set" 26 | exit 1 27 | fi 28 | 29 | export MODEL_ID="${HF_MODEL_ID}" 30 | 31 | if [[ -n "${HF_MODEL_REVISION}" ]]; then 32 | export REVISION="${HF_MODEL_REVISION}" 33 | fi 34 | 35 | if ! command -v nvidia-smi &> /dev/null; then 36 | echo "Error: 'nvidia-smi' command not found." 37 | exit 1 38 | fi 39 | 40 | # Query GPU name using nvidia-smi 41 | gpu_name=$(nvidia-smi --query-gpu=gpu_name --format=csv | awk 'NR==2') 42 | if [ $? -ne 0 ]; then 43 | echo "Error: $gpu_name" 44 | echo "Query gpu_name failed" 45 | else 46 | echo "Query gpu_name succeeded. Printing output: $gpu_name" 47 | fi 48 | 49 | # Function to get compute capability based on GPU name 50 | get_compute_cap() { 51 | gpu_name="$1" 52 | 53 | # Check if the GPU name contains "A10G" 54 | if [[ "$gpu_name" == *"A10G"* ]]; then 55 | echo "86" 56 | # Check if the GPU name contains "A100" 57 | elif [[ "$gpu_name" == *"A100"* ]]; then 58 | echo "80" 59 | # Check if the GPU name contains "H100" 60 | elif [[ "$gpu_name" == *"H100"* ]]; then 61 | echo "90" 62 | # Cover Nvidia T4 63 | elif [[ "$gpu_name" == *"T4"* ]]; then 64 | echo "75" 65 | # Cover Nvidia L4 66 | elif [[ "$gpu_name" == *"L4"* ]]; then 67 | echo "89" 68 | else 69 | echo "80" # Default compute capability 70 | fi 71 | } 72 | 73 | if [[ -z "${CUDA_COMPUTE_CAP}" ]] 74 | then 75 | compute_cap=$(get_compute_cap "$gpu_name") 76 | echo "the compute_cap is $compute_cap" 77 | else 78 | compute_cap=$CUDA_COMPUTE_CAP 79 | fi 80 | 81 | if [[ ${compute_cap} -eq 75 ]] 82 | then 83 | text-embeddings-router-75 --port 8080 --json-output 84 | elif [[ ${compute_cap} -ge 80 && ${compute_cap} -lt 90 ]] 85 | then 86 | text-embeddings-router-80 --port 8080 --json-output 87 | elif [[ ${compute_cap} -eq 90 ]] 88 | then 89 | text-embeddings-router-90 --port 8080 --json-output 90 | else 91 | echo "cuda compute cap ${compute_cap} is not supported"; exit 1 92 | fi -------------------------------------------------------------------------------- /huggingface/pytorch/tei/docker/1.4.0/cpu/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM lukemathwalker/cargo-chef:latest-rust-1.75-bookworm AS chef 2 | WORKDIR /usr/src 3 | 4 | ENV SCCACHE=0.5.4 5 | ENV RUSTC_WRAPPER=/usr/local/bin/sccache 6 | 7 | # Donwload, configure sccache 8 | RUN curl -fsSL https://github.com/mozilla/sccache/releases/download/v$SCCACHE/sccache-v$SCCACHE-x86_64-unknown-linux-musl.tar.gz | tar -xzv --strip-components=1 -C /usr/local/bin sccache-v$SCCACHE-x86_64-unknown-linux-musl/sccache && \ 9 | chmod +x /usr/local/bin/sccache 10 | 11 | FROM chef AS planner 12 | 13 | COPY backends backends 14 | COPY core core 15 | COPY router router 16 | COPY Cargo.toml ./ 17 | COPY Cargo.lock ./ 18 | 19 | RUN cargo chef prepare --recipe-path recipe.json 20 | 21 | FROM chef AS builder 22 | 23 | ARG GIT_SHA 24 | ARG DOCKER_LABEL 25 | 26 | # sccache specific variables 27 | ARG ACTIONS_CACHE_URL 28 | ARG ACTIONS_RUNTIME_TOKEN 29 | ARG SCCACHE_GHA_ENABLED 30 | 31 | RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \ 32 | | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && \ 33 | echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | \ 34 | tee /etc/apt/sources.list.d/oneAPI.list 35 | 36 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ 37 | intel-oneapi-mkl-devel=2024.0.0-49656 \ 38 | build-essential \ 39 | && rm -rf /var/lib/apt/lists/* 40 | 41 | RUN echo "int mkl_serv_intel_cpu_true() {return 1;}" > fakeintel.c && \ 42 | gcc -shared -fPIC -o libfakeintel.so fakeintel.c 43 | 44 | COPY --from=planner /usr/src/recipe.json recipe.json 45 | 46 | RUN cargo chef cook --release --features candle --features mkl-dynamic --no-default-features --recipe-path recipe.json && sccache -s 47 | 48 | COPY backends backends 49 | COPY core core 50 | COPY router router 51 | COPY Cargo.toml ./ 52 | COPY Cargo.lock ./ 53 | 54 | FROM builder as http-builder 55 | 56 | RUN cargo build --release --bin text-embeddings-router -F candle -F mkl-dynamic -F http --no-default-features && sccache -s 57 | 58 | FROM builder as grpc-builder 59 | 60 | RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \ 61 | curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \ 62 | unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \ 63 | unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \ 64 | rm -f $PROTOC_ZIP 65 | 66 | COPY proto proto 67 | 68 | RUN cargo build --release --bin text-embeddings-router -F grpc -F candle -F mkl-dynamic --no-default-features && sccache -s 69 | 70 | FROM debian:bookworm-slim as base 71 | 72 | ENV HUGGINGFACE_HUB_CACHE=/data \ 73 | PORT=80 \ 74 | MKL_ENABLE_INSTRUCTIONS=AVX512_E4 \ 75 | RAYON_NUM_THREADS=8 \ 76 | LD_PRELOAD=/usr/local/libfakeintel.so \ 77 | LD_LIBRARY_PATH=/usr/local/lib 78 | 79 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ 80 | libomp-dev \ 81 | ca-certificates \ 82 | libssl-dev \ 83 | curl \ 84 | && rm -rf /var/lib/apt/lists/* 85 | 86 | # Copy a lot of the Intel shared objects because of the mkl_serv_intel_cpu_true patch... 87 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_intel_lp64.so.2 /usr/local/lib/libmkl_intel_lp64.so.2 88 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_intel_thread.so.2 /usr/local/lib/libmkl_intel_thread.so.2 89 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_core.so.2 /usr/local/lib/libmkl_core.so.2 90 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_vml_def.so.2 /usr/local/lib/libmkl_vml_def.so.2 91 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_def.so.2 /usr/local/lib/libmkl_def.so.2 92 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_vml_avx2.so.2 /usr/local/lib/libmkl_vml_avx2.so.2 93 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_vml_avx512.so.2 /usr/local/lib/libmkl_vml_avx512.so.2 94 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_avx2.so.2 /usr/local/lib/libmkl_avx2.so.2 95 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_avx512.so.2 /usr/local/lib/libmkl_avx512.so.2 96 | COPY --from=builder /usr/src/libfakeintel.so /usr/local/libfakeintel.so 97 | 98 | FROM base as grpc 99 | 100 | COPY --from=grpc-builder /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router 101 | 102 | ENTRYPOINT ["text-embeddings-router"] 103 | CMD ["--json-output"] 104 | 105 | FROM base AS http 106 | 107 | COPY --from=http-builder /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router 108 | 109 | # Amazon SageMaker compatible image 110 | FROM http as sagemaker 111 | COPY --chmod=775 sagemaker-entrypoint.sh entrypoint.sh 112 | 113 | ENTRYPOINT ["./entrypoint.sh"] -------------------------------------------------------------------------------- /huggingface/pytorch/tei/docker/1.4.0/gpu/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:12.2.0-devel-ubuntu22.04 AS base-builder 2 | 3 | ENV SCCACHE=0.5.4 4 | ENV RUSTC_WRAPPER=/usr/local/bin/sccache 5 | ENV PATH="/root/.cargo/bin:${PATH}" 6 | 7 | RUN apt-get update && apt-get upgrade -y && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ 8 | curl \ 9 | libssl-dev \ 10 | pkg-config \ 11 | perl \ 12 | && rm -rf /var/lib/apt/lists/* 13 | 14 | # Donwload and configure sccache 15 | RUN curl -fsSL https://github.com/mozilla/sccache/releases/download/v$SCCACHE/sccache-v$SCCACHE-x86_64-unknown-linux-musl.tar.gz | tar -xzv --strip-components=1 -C /usr/local/bin sccache-v$SCCACHE-x86_64-unknown-linux-musl/sccache && \ 16 | chmod +x /usr/local/bin/sccache 17 | 18 | RUN curl https://sh.rustup.rs -sSf | bash -s -- -y 19 | RUN cargo install cargo-chef --locked 20 | 21 | FROM base-builder AS planner 22 | 23 | WORKDIR /usr/src 24 | 25 | COPY backends backends 26 | COPY core core 27 | COPY router router 28 | COPY Cargo.toml ./ 29 | COPY Cargo.lock ./ 30 | 31 | RUN cargo chef prepare --recipe-path recipe.json 32 | 33 | FROM base-builder AS builder 34 | 35 | ARG GIT_SHA 36 | ARG DOCKER_LABEL 37 | ARG VERTEX="false" 38 | 39 | # sccache specific variables 40 | ARG ACTIONS_CACHE_URL 41 | ARG ACTIONS_RUNTIME_TOKEN 42 | ARG SCCACHE_GHA_ENABLED 43 | 44 | # Limit parallelism 45 | ARG RAYON_NUM_THREADS=4 46 | ARG CARGO_BUILD_JOBS 47 | ARG CARGO_BUILD_INCREMENTAL 48 | 49 | WORKDIR /usr/src 50 | 51 | COPY --from=planner /usr/src/recipe.json recipe.json 52 | 53 | RUN if [ $VERTEX = "true" ]; \ 54 | then \ 55 | cargo chef cook --release --features google --recipe-path recipe.json && sccache -s; \ 56 | else \ 57 | cargo chef cook --release --recipe-path recipe.json && sccache -s; \ 58 | fi; 59 | 60 | RUN if [ $VERTEX = "true" ]; \ 61 | then \ 62 | CUDA_COMPUTE_CAP=75 cargo chef cook --release --features google --features candle-cuda-turing --recipe-path recipe.json && sccache -s; \ 63 | else \ 64 | CUDA_COMPUTE_CAP=75 cargo chef cook --release --features candle-cuda-turing --recipe-path recipe.json && sccache -s; \ 65 | fi; 66 | 67 | RUN if [ $VERTEX = "true" ]; \ 68 | then \ 69 | CUDA_COMPUTE_CAP=80 cargo chef cook --release --features google --features candle-cuda --recipe-path recipe.json && sccache -s; \ 70 | else \ 71 | CUDA_COMPUTE_CAP=80 cargo chef cook --release --features candle-cuda --recipe-path recipe.json && sccache -s; \ 72 | fi; 73 | 74 | RUN if [ $VERTEX = "true" ]; \ 75 | then \ 76 | CUDA_COMPUTE_CAP=90 cargo chef cook --release --features google --features candle-cuda --recipe-path recipe.json && sccache -s; \ 77 | else \ 78 | CUDA_COMPUTE_CAP=90 cargo chef cook --release --features candle-cuda --recipe-path recipe.json && sccache -s; \ 79 | fi; 80 | 81 | COPY backends backends 82 | COPY core core 83 | COPY router router 84 | COPY Cargo.toml ./ 85 | COPY Cargo.lock ./ 86 | 87 | RUN if [ $VERTEX = "true" ]; \ 88 | then \ 89 | CUDA_COMPUTE_CAP=75 cargo build --release --bin text-embeddings-router -F candle-cuda-turing -F google && sccache -s; \ 90 | else \ 91 | CUDA_COMPUTE_CAP=75 cargo build --release --bin text-embeddings-router -F candle-cuda-turing && sccache -s; \ 92 | fi; 93 | 94 | RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-75 95 | 96 | RUN if [ $VERTEX = "true" ]; \ 97 | then \ 98 | CUDA_COMPUTE_CAP=80 cargo build --release --bin text-embeddings-router -F candle-cuda -F google && sccache -s; \ 99 | else \ 100 | CUDA_COMPUTE_CAP=80 cargo build --release --bin text-embeddings-router -F candle-cuda && sccache -s; \ 101 | fi; 102 | 103 | RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-80 104 | 105 | RUN if [ $VERTEX = "true" ]; \ 106 | then \ 107 | CUDA_COMPUTE_CAP=90 cargo build --release --bin text-embeddings-router -F candle-cuda -F google && sccache -s; \ 108 | else \ 109 | CUDA_COMPUTE_CAP=90 cargo build --release --bin text-embeddings-router -F candle-cuda && sccache -s; \ 110 | fi; 111 | 112 | RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-90 113 | 114 | FROM nvidia/cuda:12.2.0-runtime-ubuntu22.04 as base 115 | 116 | ARG DEFAULT_USE_FLASH_ATTENTION=True 117 | 118 | ENV HUGGINGFACE_HUB_CACHE=/data \ 119 | PORT=80 \ 120 | USE_FLASH_ATTENTION=$DEFAULT_USE_FLASH_ATTENTION 121 | 122 | # Something between this and the above apt-get upgrade has a conflicting dependency that overrides the previous 123 | # upgrade. This needs further investigation, but patching this for now to fix the CVE 124 | RUN apt-get update && apt-get upgrade -y && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ 125 | libgssapi-krb5-2 \ 126 | && rm -rf /var/lib/apt/lists/* 127 | 128 | COPY --from=builder /usr/src/target/release/text-embeddings-router-75 /usr/local/bin/text-embeddings-router-75 129 | COPY --from=builder /usr/src/target/release/text-embeddings-router-80 /usr/local/bin/text-embeddings-router-80 130 | COPY --from=builder /usr/src/target/release/text-embeddings-router-90 /usr/local/bin/text-embeddings-router-90 131 | 132 | # Amazon SageMaker compatible image 133 | FROM base AS sagemaker 134 | 135 | COPY --chmod=775 sagemaker-entrypoint-cuda-all.sh entrypoint.sh 136 | 137 | ENTRYPOINT ["./entrypoint.sh"] 138 | 139 | # Default image 140 | FROM base 141 | 142 | COPY --chmod=775 cuda-all-entrypoint.sh entrypoint.sh 143 | 144 | ENTRYPOINT ["./entrypoint.sh"] 145 | CMD ["--json-output"] -------------------------------------------------------------------------------- /huggingface/pytorch/tei/docker/1.6.0/cpu/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM lukemathwalker/cargo-chef:latest-rust-1.75-bookworm AS chef 2 | WORKDIR /usr/src 3 | 4 | ENV SCCACHE=0.5.4 5 | ENV RUSTC_WRAPPER=/usr/local/bin/sccache 6 | 7 | # Donwload, configure sccache 8 | RUN curl -fsSL https://github.com/mozilla/sccache/releases/download/v$SCCACHE/sccache-v$SCCACHE-x86_64-unknown-linux-musl.tar.gz | tar -xzv --strip-components=1 -C /usr/local/bin sccache-v$SCCACHE-x86_64-unknown-linux-musl/sccache && \ 9 | chmod +x /usr/local/bin/sccache 10 | 11 | FROM chef AS planner 12 | 13 | COPY backends backends 14 | COPY core core 15 | COPY router router 16 | COPY Cargo.toml ./ 17 | COPY Cargo.lock ./ 18 | 19 | RUN cargo chef prepare --recipe-path recipe.json 20 | 21 | FROM chef AS builder 22 | 23 | ARG GIT_SHA 24 | ARG DOCKER_LABEL 25 | 26 | # sccache specific variables 27 | ARG ACTIONS_CACHE_URL 28 | ARG ACTIONS_RUNTIME_TOKEN 29 | ARG SCCACHE_GHA_ENABLED 30 | 31 | RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \ 32 | | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && \ 33 | echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | \ 34 | tee /etc/apt/sources.list.d/oneAPI.list 35 | 36 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ 37 | intel-oneapi-mkl-devel=2024.0.0-49656 \ 38 | build-essential \ 39 | && rm -rf /var/lib/apt/lists/* 40 | 41 | RUN echo "int mkl_serv_intel_cpu_true() {return 1;}" > fakeintel.c && \ 42 | gcc -shared -fPIC -o libfakeintel.so fakeintel.c 43 | 44 | COPY --from=planner /usr/src/recipe.json recipe.json 45 | 46 | RUN cargo chef cook --release --features ort --features candle --features mkl-dynamic --no-default-features --recipe-path recipe.json && sccache -s 47 | 48 | COPY backends backends 49 | COPY core core 50 | COPY router router 51 | COPY Cargo.toml ./ 52 | COPY Cargo.lock ./ 53 | 54 | FROM builder AS http-builder 55 | 56 | RUN cargo build --release --bin text-embeddings-router -F ort -F candle -F mkl-dynamic -F http --no-default-features && sccache -s 57 | 58 | FROM builder AS grpc-builder 59 | 60 | RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \ 61 | curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \ 62 | unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \ 63 | unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \ 64 | rm -f $PROTOC_ZIP 65 | 66 | COPY proto proto 67 | 68 | RUN cargo build --release --bin text-embeddings-router -F grpc -F ort -F candle -F mkl-dynamic --no-default-features && sccache -s 69 | 70 | FROM debian:bookworm-slim AS base 71 | 72 | ENV HUGGINGFACE_HUB_CACHE=/tmp \ 73 | PORT=80 \ 74 | MKL_ENABLE_INSTRUCTIONS=AVX512_E4 \ 75 | RAYON_NUM_THREADS=8 \ 76 | LD_PRELOAD=/usr/local/libfakeintel.so \ 77 | LD_LIBRARY_PATH=/usr/local/lib 78 | 79 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ 80 | libomp-dev \ 81 | ca-certificates \ 82 | libssl-dev \ 83 | curl \ 84 | && rm -rf /var/lib/apt/lists/* 85 | 86 | # Copy a lot of the Intel shared objects because of the mkl_serv_intel_cpu_true patch... 87 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_intel_lp64.so.2 /usr/local/lib/libmkl_intel_lp64.so.2 88 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_intel_thread.so.2 /usr/local/lib/libmkl_intel_thread.so.2 89 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_core.so.2 /usr/local/lib/libmkl_core.so.2 90 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_vml_def.so.2 /usr/local/lib/libmkl_vml_def.so.2 91 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_def.so.2 /usr/local/lib/libmkl_def.so.2 92 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_vml_avx2.so.2 /usr/local/lib/libmkl_vml_avx2.so.2 93 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_vml_avx512.so.2 /usr/local/lib/libmkl_vml_avx512.so.2 94 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_avx2.so.2 /usr/local/lib/libmkl_avx2.so.2 95 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_avx512.so.2 /usr/local/lib/libmkl_avx512.so.2 96 | COPY --from=builder /usr/src/libfakeintel.so /usr/local/libfakeintel.so 97 | 98 | FROM base AS grpc 99 | 100 | COPY --from=grpc-builder /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router 101 | 102 | ENTRYPOINT ["text-embeddings-router"] 103 | CMD ["--json-output"] 104 | 105 | FROM base AS http 106 | 107 | COPY --from=http-builder /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router 108 | 109 | # Amazon SageMaker compatible image 110 | FROM http as sagemaker 111 | COPY --chmod=775 sagemaker-entrypoint.sh entrypoint.sh 112 | 113 | ENTRYPOINT ["./entrypoint.sh"] -------------------------------------------------------------------------------- /huggingface/pytorch/tei/docker/1.6.0/gpu/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:12.2.0-devel-ubuntu22.04 AS base-builder 2 | 3 | ENV SCCACHE=0.5.4 4 | ENV RUSTC_WRAPPER=/usr/local/bin/sccache 5 | ENV PATH="/root/.cargo/bin:${PATH}" 6 | 7 | RUN apt-get update && apt-get upgrade -y && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ 8 | curl \ 9 | libssl-dev \ 10 | pkg-config \ 11 | && rm -rf /var/lib/apt/lists/* 12 | 13 | # Donwload and configure sccache 14 | RUN curl -fsSL https://github.com/mozilla/sccache/releases/download/v$SCCACHE/sccache-v$SCCACHE-x86_64-unknown-linux-musl.tar.gz | tar -xzv --strip-components=1 -C /usr/local/bin sccache-v$SCCACHE-x86_64-unknown-linux-musl/sccache && \ 15 | chmod +x /usr/local/bin/sccache 16 | 17 | RUN curl https://sh.rustup.rs -sSf | bash -s -- -y 18 | RUN cargo install cargo-chef --locked 19 | 20 | FROM base-builder AS planner 21 | 22 | WORKDIR /usr/src 23 | 24 | COPY backends backends 25 | COPY core core 26 | COPY router router 27 | COPY Cargo.toml ./ 28 | COPY Cargo.lock ./ 29 | 30 | RUN cargo chef prepare --recipe-path recipe.json 31 | 32 | FROM base-builder AS builder 33 | 34 | ARG GIT_SHA 35 | ARG DOCKER_LABEL 36 | ARG VERTEX="false" 37 | 38 | # sccache specific variables 39 | ARG ACTIONS_CACHE_URL 40 | ARG ACTIONS_RUNTIME_TOKEN 41 | ARG SCCACHE_GHA_ENABLED 42 | 43 | # Limit parallelism 44 | ARG RAYON_NUM_THREADS=4 45 | ARG CARGO_BUILD_JOBS 46 | ARG CARGO_BUILD_INCREMENTAL 47 | 48 | WORKDIR /usr/src 49 | 50 | COPY --from=planner /usr/src/recipe.json recipe.json 51 | 52 | RUN if [ $VERTEX = "true" ]; \ 53 | then \ 54 | cargo chef cook --release --features google --recipe-path recipe.json && sccache -s; \ 55 | else \ 56 | cargo chef cook --release --recipe-path recipe.json && sccache -s; \ 57 | fi; 58 | 59 | RUN if [ $VERTEX = "true" ]; \ 60 | then \ 61 | CUDA_COMPUTE_CAP=75 cargo chef cook --release --features google --features candle-cuda-turing --recipe-path recipe.json && sccache -s; \ 62 | else \ 63 | CUDA_COMPUTE_CAP=75 cargo chef cook --release --features candle-cuda-turing --recipe-path recipe.json && sccache -s; \ 64 | fi; 65 | 66 | RUN if [ $VERTEX = "true" ]; \ 67 | then \ 68 | CUDA_COMPUTE_CAP=80 cargo chef cook --release --features google --features candle-cuda --recipe-path recipe.json && sccache -s; \ 69 | else \ 70 | CUDA_COMPUTE_CAP=80 cargo chef cook --release --features candle-cuda --recipe-path recipe.json && sccache -s; \ 71 | fi; 72 | 73 | RUN if [ $VERTEX = "true" ]; \ 74 | then \ 75 | CUDA_COMPUTE_CAP=90 cargo chef cook --release --features google --features candle-cuda --recipe-path recipe.json && sccache -s; \ 76 | else \ 77 | CUDA_COMPUTE_CAP=90 cargo chef cook --release --features candle-cuda --recipe-path recipe.json && sccache -s; \ 78 | fi; 79 | 80 | COPY backends backends 81 | COPY core core 82 | COPY router router 83 | COPY Cargo.toml ./ 84 | COPY Cargo.lock ./ 85 | 86 | RUN if [ $VERTEX = "true" ]; \ 87 | then \ 88 | CUDA_COMPUTE_CAP=75 cargo build --release --bin text-embeddings-router -F candle-cuda-turing -F google && sccache -s; \ 89 | else \ 90 | CUDA_COMPUTE_CAP=75 cargo build --release --bin text-embeddings-router -F candle-cuda-turing && sccache -s; \ 91 | fi; 92 | 93 | RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-75 94 | 95 | RUN if [ $VERTEX = "true" ]; \ 96 | then \ 97 | CUDA_COMPUTE_CAP=80 cargo build --release --bin text-embeddings-router -F candle-cuda -F google && sccache -s; \ 98 | else \ 99 | CUDA_COMPUTE_CAP=80 cargo build --release --bin text-embeddings-router -F candle-cuda && sccache -s; \ 100 | fi; 101 | 102 | RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-80 103 | 104 | RUN if [ $VERTEX = "true" ]; \ 105 | then \ 106 | CUDA_COMPUTE_CAP=90 cargo build --release --bin text-embeddings-router -F candle-cuda -F google && sccache -s; \ 107 | else \ 108 | CUDA_COMPUTE_CAP=90 cargo build --release --bin text-embeddings-router -F candle-cuda && sccache -s; \ 109 | fi; 110 | 111 | RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-90 112 | 113 | FROM nvidia/cuda:12.2.0-runtime-ubuntu22.04 AS base 114 | 115 | ARG DEFAULT_USE_FLASH_ATTENTION=True 116 | 117 | ENV HUGGINGFACE_HUB_CACHE=/tmp \ 118 | PORT=80 \ 119 | USE_FLASH_ATTENTION=$DEFAULT_USE_FLASH_ATTENTION 120 | 121 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ 122 | libgssapi-krb5-2 \ 123 | ca-certificates \ 124 | libssl-dev \ 125 | curl \ 126 | && rm -rf /var/lib/apt/lists/* 127 | 128 | COPY --from=builder /usr/src/target/release/text-embeddings-router-75 /usr/local/bin/text-embeddings-router-75 129 | COPY --from=builder /usr/src/target/release/text-embeddings-router-80 /usr/local/bin/text-embeddings-router-80 130 | COPY --from=builder /usr/src/target/release/text-embeddings-router-90 /usr/local/bin/text-embeddings-router-90 131 | 132 | # Amazon SageMaker compatible image 133 | FROM base AS sagemaker 134 | 135 | COPY --chmod=775 sagemaker-entrypoint-cuda-all.sh entrypoint.sh 136 | 137 | ENTRYPOINT ["./entrypoint.sh"] 138 | 139 | # Default image 140 | FROM base 141 | 142 | COPY --chmod=775 cuda-all-entrypoint.sh entrypoint.sh 143 | 144 | ENTRYPOINT ["./entrypoint.sh"] 145 | CMD ["--json-output"] -------------------------------------------------------------------------------- /huggingface/pytorch/tei/docker/1.7.0/cpu/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM lukemathwalker/cargo-chef:latest-rust-1.85-bookworm AS chef 2 | WORKDIR /usr/src 3 | 4 | ENV SCCACHE=0.5.4 5 | ENV RUSTC_WRAPPER=/usr/local/bin/sccache 6 | 7 | # Donwload, configure sccache 8 | RUN curl -fsSL https://github.com/mozilla/sccache/releases/download/v$SCCACHE/sccache-v$SCCACHE-x86_64-unknown-linux-musl.tar.gz | tar -xzv --strip-components=1 -C /usr/local/bin sccache-v$SCCACHE-x86_64-unknown-linux-musl/sccache && \ 9 | chmod +x /usr/local/bin/sccache 10 | 11 | FROM chef AS planner 12 | 13 | COPY candle-extensions candle-extensions 14 | COPY backends backends 15 | COPY core core 16 | COPY router router 17 | COPY Cargo.toml ./ 18 | COPY Cargo.lock ./ 19 | 20 | RUN cargo chef prepare --recipe-path recipe.json 21 | 22 | FROM chef AS builder 23 | 24 | ARG GIT_SHA 25 | ARG DOCKER_LABEL 26 | 27 | # sccache specific variables 28 | ARG SCCACHE_GHA_ENABLED 29 | 30 | RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \ 31 | | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && \ 32 | echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | \ 33 | tee /etc/apt/sources.list.d/oneAPI.list 34 | 35 | RUN apt-get update && apt-get upgrade -y && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ 36 | intel-oneapi-mkl-devel=2024.0.0-49656 \ 37 | build-essential \ 38 | && rm -rf /var/lib/apt/lists/* 39 | 40 | RUN echo "int mkl_serv_intel_cpu_true() {return 1;}" > fakeintel.c && \ 41 | gcc -shared -fPIC -o libfakeintel.so fakeintel.c 42 | 43 | COPY --from=planner /usr/src/recipe.json recipe.json 44 | 45 | RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \ 46 | --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ 47 | cargo chef cook --release --features ort,candle,mkl --no-default-features --recipe-path recipe.json && sccache -s 48 | 49 | COPY backends backends 50 | COPY core core 51 | COPY router router 52 | COPY Cargo.toml ./ 53 | COPY Cargo.lock ./ 54 | 55 | FROM builder AS http-builder 56 | 57 | RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \ 58 | --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ 59 | cargo build --release --bin text-embeddings-router --features ort,candle,mkl,http --no-default-features && sccache -s 60 | 61 | FROM builder AS grpc-builder 62 | 63 | RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \ 64 | curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \ 65 | unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \ 66 | unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \ 67 | rm -f $PROTOC_ZIP 68 | 69 | COPY proto proto 70 | 71 | RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \ 72 | --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ 73 | cargo build --release --bin text-embeddings-router --features ort,candle,mkl,grpc --no-default-features && sccache -s 74 | 75 | FROM debian:bookworm-slim AS base 76 | 77 | ENV HUGGINGFACE_HUB_CACHE=/opt/ml/model \ 78 | PORT=80 \ 79 | HF_HUB_USER_AGENT_ORIGIN=aws:sagemaker:cpu:inference:tei \ 80 | MKL_ENABLE_INSTRUCTIONS=AVX512_E4 \ 81 | RAYON_NUM_THREADS=8 \ 82 | LD_PRELOAD=/usr/local/libfakeintel.so \ 83 | LD_LIBRARY_PATH=/usr/local/lib 84 | 85 | RUN apt-get update && apt-get upgrade -y && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ 86 | libomp-dev \ 87 | ca-certificates \ 88 | libssl-dev \ 89 | curl \ 90 | && rm -rf /var/lib/apt/lists/* 91 | 92 | # Copy a lot of the Intel shared objects because of the mkl_serv_intel_cpu_true patch... 93 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_intel_lp64.so.2 /usr/local/lib/libmkl_intel_lp64.so.2 94 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_intel_thread.so.2 /usr/local/lib/libmkl_intel_thread.so.2 95 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_core.so.2 /usr/local/lib/libmkl_core.so.2 96 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_vml_def.so.2 /usr/local/lib/libmkl_vml_def.so.2 97 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_def.so.2 /usr/local/lib/libmkl_def.so.2 98 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_vml_avx2.so.2 /usr/local/lib/libmkl_vml_avx2.so.2 99 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_vml_avx512.so.2 /usr/local/lib/libmkl_vml_avx512.so.2 100 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_avx2.so.2 /usr/local/lib/libmkl_avx2.so.2 101 | COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_avx512.so.2 /usr/local/lib/libmkl_avx512.so.2 102 | COPY --from=builder /usr/src/libfakeintel.so /usr/local/libfakeintel.so 103 | 104 | FROM base AS grpc 105 | 106 | COPY --from=grpc-builder /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router 107 | 108 | ENTRYPOINT ["text-embeddings-router"] 109 | CMD ["--json-output"] 110 | 111 | FROM base AS http 112 | 113 | COPY --from=http-builder /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router 114 | 115 | # Amazon SageMaker compatible image 116 | FROM http AS sagemaker 117 | 118 | COPY --chmod=775 sagemaker-entrypoint.sh entrypoint.sh 119 | 120 | ENTRYPOINT ["./entrypoint.sh"] 121 | -------------------------------------------------------------------------------- /huggingface/pytorch/tei/docker/1.7.0/gpu/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:12.2.0-devel-ubuntu22.04 AS base-builder 2 | 3 | ENV SCCACHE=0.5.4 4 | ENV RUSTC_WRAPPER=/usr/local/bin/sccache 5 | ENV PATH="/root/.cargo/bin:${PATH}" 6 | # aligned with `cargo-chef` version in `lukemathwalker/cargo-chef:latest-rust-1.85-bookworm` 7 | ENV CARGO_CHEF=0.1.71 8 | 9 | RUN apt-get update && apt-get upgrade -y && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ 10 | curl \ 11 | libssl-dev \ 12 | pkg-config \ 13 | libgssapi-krb5-2 \ 14 | && rm -rf /var/lib/apt/lists/* 15 | 16 | # Donwload and configure sccache 17 | RUN curl -fsSL https://github.com/mozilla/sccache/releases/download/v$SCCACHE/sccache-v$SCCACHE-x86_64-unknown-linux-musl.tar.gz | tar -xzv --strip-components=1 -C /usr/local/bin sccache-v$SCCACHE-x86_64-unknown-linux-musl/sccache && \ 18 | chmod +x /usr/local/bin/sccache 19 | 20 | RUN curl https://sh.rustup.rs -sSf | bash -s -- -y 21 | RUN cargo install cargo-chef --version $CARGO_CHEF --locked 22 | 23 | FROM base-builder AS planner 24 | 25 | WORKDIR /usr/src 26 | 27 | COPY candle-extensions candle-extensions 28 | COPY backends backends 29 | COPY core core 30 | COPY router router 31 | COPY Cargo.toml ./ 32 | COPY Cargo.lock ./ 33 | 34 | RUN cargo chef prepare --recipe-path recipe.json 35 | 36 | FROM base-builder AS builder 37 | 38 | ARG GIT_SHA 39 | ARG DOCKER_LABEL 40 | 41 | # sccache specific variables 42 | ARG SCCACHE_GHA_ENABLED 43 | 44 | # Limit parallelism 45 | ARG RAYON_NUM_THREADS=4 46 | ARG CARGO_BUILD_JOBS 47 | ARG CARGO_BUILD_INCREMENTAL 48 | 49 | WORKDIR /usr/src 50 | 51 | COPY --from=planner /usr/src/recipe.json recipe.json 52 | 53 | RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \ 54 | --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ 55 | cargo chef cook --release --recipe-path recipe.json && sccache -s; 56 | 57 | RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \ 58 | --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ 59 | CUDA_COMPUTE_CAP=75 cargo chef cook --release --features candle-cuda-turing --recipe-path recipe.json && sccache -s; 60 | 61 | RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \ 62 | --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ 63 | CUDA_COMPUTE_CAP=80 cargo chef cook --release --features candle-cuda --recipe-path recipe.json && sccache -s; 64 | 65 | RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \ 66 | --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ 67 | CUDA_COMPUTE_CAP=90 cargo chef cook --release --features candle-cuda --recipe-path recipe.json && sccache -s; 68 | 69 | COPY candle-extensions candle-extensions 70 | COPY backends backends 71 | COPY core core 72 | COPY router router 73 | COPY Cargo.toml ./ 74 | COPY Cargo.lock ./ 75 | 76 | RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \ 77 | --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ 78 | CUDA_COMPUTE_CAP=75 cargo build --release --bin text-embeddings-router -F candle-cuda-turing && sccache -s; 79 | 80 | RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-75 81 | 82 | RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \ 83 | --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ 84 | CUDA_COMPUTE_CAP=80 cargo build --release --bin text-embeddings-router -F candle-cuda && sccache -s; 85 | 86 | RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-80 87 | 88 | RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \ 89 | --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ 90 | CUDA_COMPUTE_CAP=90 cargo build --release --bin text-embeddings-router -F candle-cuda && sccache -s; 91 | 92 | RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-90 93 | 94 | FROM nvidia/cuda:12.2.0-runtime-ubuntu22.04 AS base 95 | 96 | ARG DEFAULT_USE_FLASH_ATTENTION=True 97 | 98 | ENV HUGGINGFACE_HUB_CACHE=/opt/ml/model \ 99 | PORT=80 \ 100 | USE_FLASH_ATTENTION=$DEFAULT_USE_FLASH_ATTENTION \ 101 | HF_HUB_USER_AGENT_ORIGIN=aws:sagemaker:gpu-cuda:inference:tei 102 | 103 | RUN apt-get update && apt-get upgrade -y && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ 104 | ca-certificates \ 105 | libssl-dev \ 106 | curl \ 107 | && rm -rf /var/lib/apt/lists/* 108 | 109 | COPY --from=builder /usr/src/target/release/text-embeddings-router-75 /usr/local/bin/text-embeddings-router-75 110 | COPY --from=builder /usr/src/target/release/text-embeddings-router-80 /usr/local/bin/text-embeddings-router-80 111 | COPY --from=builder /usr/src/target/release/text-embeddings-router-90 /usr/local/bin/text-embeddings-router-90 112 | 113 | # Amazon SageMaker compatible image 114 | FROM base AS sagemaker 115 | 116 | COPY --chmod=775 sagemaker-entrypoint-cuda-all.sh entrypoint.sh 117 | COPY --chmod=775 /huggingface/pytorch/tei/docker/1.7.0/gpu/start-cuda-compat.sh start-cuda-compat.sh 118 | 119 | ENTRYPOINT ["./entrypoint.sh"] 120 | -------------------------------------------------------------------------------- /huggingface/pytorch/tei/docker/1.7.0/gpu/start-cuda-compat.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | verlt() { 4 | [ "$1" = "$2" ] && return 1 || [ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ] 5 | } 6 | 7 | if [ -f /usr/local/cuda/compat/libcuda.so.1 ]; then 8 | CUDA_COMPAT_MAX_DRIVER_VERSION=$(readlink /usr/local/cuda/compat/libcuda.so.1 | cut -d'.' -f 3-) 9 | echo "CUDA compat package should be installed for NVIDIA driver smaller than ${CUDA_COMPAT_MAX_DRIVER_VERSION}" 10 | NVIDIA_DRIVER_VERSION=$(sed -n 's/^NVRM.*Kernel Module *\([0-9.]*\).*$/\1/p' /proc/driver/nvidia/version 2>/dev/null || true) 11 | echo "Current installed NVIDIA driver version is ${NVIDIA_DRIVER_VERSION}" 12 | if verlt $NVIDIA_DRIVER_VERSION $CUDA_COMPAT_MAX_DRIVER_VERSION; then 13 | echo "Adding CUDA compat to LD_LIBRARY_PATH" 14 | export LD_LIBRARY_PATH=/usr/local/cuda/compat:$LD_LIBRARY_PATH 15 | echo $LD_LIBRARY_PATH 16 | else 17 | echo "Skipping CUDA compat setup as newer NVIDIA driver is installed" 18 | fi 19 | else 20 | echo "Skipping CUDA compat setup as package not found" 21 | fi 22 | -------------------------------------------------------------------------------- /huggingface/pytorch/tei/docker/buildspec.yml: -------------------------------------------------------------------------------- 1 | version: 0.2 2 | 3 | env: 4 | shell: bash 5 | variables: 6 | FRAMEWORK_FOLDER: "huggingface/pytorch/tei/docker" 7 | PYTHONPATH: "/codebuild/output/src*/src/github.com/awslabs/llm-hosting-container" 8 | 9 | phases: 10 | install: 11 | runtime-versions: 12 | python: 3.11 13 | commands: 14 | - echo "Installing Python version 3.11 ..." 15 | - pyenv global $PYTHON_311_VERSION 16 | 17 | pre_build: 18 | commands: 19 | - echo Pre-build started on `date` 20 | - export PYTHONPATH=$(pwd):$PYTHONPATH 21 | 22 | # Continue with regular pre-build steps if BUILD_REQUIRED=true 23 | - | 24 | echo Setting up Docker buildx. 25 | docker buildx version 26 | docker buildx create --name builder --driver docker-container --buildkitd-flags '--allow-insecure-entitlement security.insecure --allow-insecure-entitlement network.host' --use 27 | docker buildx inspect --bootstrap --builder builder 28 | docker buildx install 29 | echo Preparing system dependencies for execution. 30 | docker --version 31 | docker login -u $DOCKER_USERNAME -p $DOCKER_PASSWORD 32 | curl -LO http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh 33 | bash Miniconda3-latest-Linux-x86_64.sh -bfp /miniconda3 34 | export PATH=/miniconda3/bin:${PATH} 35 | conda install python=3.11 36 | conda update -y conda 37 | echo Prepare TEI dependencies for execution. 38 | mkdir tei-artifacts 39 | python -m pip install -r $FRAMEWORK_FOLDER/tei-requirements.txt 40 | 41 | build: 42 | commands: 43 | - | 44 | echo Build started on `date` 45 | echo "Current PYTHONPATH: $PYTHONPATH" 46 | python $FRAMEWORK_FOLDER/tei.py 47 | 48 | post_build: 49 | commands: 50 | - | 51 | echo Build completed on `date` -------------------------------------------------------------------------------- /huggingface/pytorch/tei/docker/tei-requirements.txt: -------------------------------------------------------------------------------- 1 | boto3 2 | dataclasses 3 | docker 4 | gitpython 5 | sagemaker 6 | 7 | parameterized 8 | pytest 9 | pytest-mock 10 | pytest-xdist -------------------------------------------------------------------------------- /huggingface/pytorch/tei/docker/tei.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import shutil 4 | import subprocess 5 | 6 | import git 7 | 8 | from huggingface.pytorch.release_utils import ( 9 | GIT_REPO_DOCKERFILES_ROOT_DIRECTORY, 10 | GIT_REPO_PYTEST_PATH, 11 | LOG, 12 | Aws, 13 | DockerClient, 14 | EnvironmentVariable, 15 | Mode, 16 | ReleaseConfigs, 17 | ) 18 | 19 | GIT_REPO_TEI_LOCAL_FOLDER_NAME = "text-embeddings-inference" 20 | GIT_REPO_TEI_TAG_PATTERN = "v{version}" 21 | GIT_REPO_TEI_URL = "https://github.com/huggingface/text-embeddings-inference.git" 22 | 23 | 24 | def build(configs: ReleaseConfigs): 25 | """Builds the Docker image for the provided configs.""" 26 | aws = Aws() 27 | docker_client = DockerClient() 28 | for config in configs.releases: 29 | LOG.info(f"Going to build image for config: {config}.") 30 | image_uri = config.get_image_uri_for_staging() 31 | if aws.does_ecr_image_exist(image_uri): 32 | LOG.info(f"Skipping already built image '{image_uri}'. Config: {config}.") 33 | continue 34 | 35 | LOG.info( 36 | f"Setting up build prerequisites for release config with version: {config.version}" 37 | ) 38 | build_path = GIT_REPO_TEI_LOCAL_FOLDER_NAME 39 | shutil.rmtree(GIT_REPO_TEI_LOCAL_FOLDER_NAME, ignore_errors=True) 40 | hf_tei_repo = git.Repo.clone_from( 41 | GIT_REPO_TEI_URL, GIT_REPO_TEI_LOCAL_FOLDER_NAME, no_checkout=True 42 | ) 43 | hf_tei_repo_tag = GIT_REPO_TEI_TAG_PATTERN.format(version=config.version) 44 | hf_tei_repo.git.checkout(hf_tei_repo_tag) 45 | LOG.info( 46 | f"Checked out {hf_tei_repo} with tag: {hf_tei_repo_tag} to {GIT_REPO_TEI_LOCAL_FOLDER_NAME}." 47 | ) 48 | 49 | subprocess.run( 50 | ["git", "submodule", "update", "--init"], 51 | cwd=GIT_REPO_TEI_LOCAL_FOLDER_NAME, 52 | check=True, 53 | ) 54 | LOG.info(f"Initialized and updated submodules for {hf_tei_repo_tag}.") 55 | 56 | shutil.copytree( 57 | GIT_REPO_DOCKERFILES_ROOT_DIRECTORY, 58 | os.path.join( 59 | GIT_REPO_TEI_LOCAL_FOLDER_NAME, GIT_REPO_DOCKERFILES_ROOT_DIRECTORY 60 | ), 61 | ) 62 | LOG.info( 63 | f"Copied '{GIT_REPO_DOCKERFILES_ROOT_DIRECTORY}' directory to TEI directory for 'COPY' command." 64 | ) 65 | 66 | dockerfile_path = config.get_dockerfile_path() 67 | LOG.info(f"Building Dockerfile: '{dockerfile_path}'. This may take a while...") 68 | docker_client.build( 69 | image_uri=image_uri, dockerfile_path=dockerfile_path, build_path=build_path 70 | ) 71 | 72 | username, password = aws.get_ecr_credentials(image_uri) 73 | docker_client.login(username, password, image_uri) 74 | docker_client.push(image_uri) 75 | 76 | 77 | def test(configs: ReleaseConfigs): 78 | """Runs SageMaker tests for the Docker images associated with the provided configs and current git commit.""" 79 | aws = Aws() 80 | for config in configs.releases: 81 | LOG.info(f"Going to test built image for config: {config}.") 82 | test_role_arn = os.getenv(EnvironmentVariable.TEST_ROLE_ARN.name) 83 | test_session = aws.get_session_for_role(test_role_arn) 84 | test_credentials = test_session.get_credentials() 85 | environ = os.environ.copy() 86 | environ.update( 87 | { 88 | "DEVICE_TYPE": config.device.lower(), 89 | "AWS_ACCESS_KEY_ID": test_credentials.access_key, 90 | "AWS_SECRET_ACCESS_KEY": test_credentials.secret_key, 91 | "AWS_SESSION_TOKEN": test_credentials.token, 92 | "IMAGE_URI": config.get_image_uri_for_staging(), 93 | "TEST_ROLE_ARN": test_role_arn, 94 | } 95 | ) 96 | 97 | command = [ 98 | "pytest", 99 | "-m", 100 | config.device.lower(), 101 | "-n", 102 | "auto", 103 | "--log-cli-level", 104 | "info", 105 | GIT_REPO_PYTEST_PATH, 106 | ] 107 | LOG.info(f"Running test command: {command}.") 108 | process = subprocess.run( 109 | command, env=environ, encoding="utf-8", capture_output=True 110 | ) 111 | LOG.info(process.stdout) 112 | assert process.returncode == 0, ( 113 | f"Failed with config: {config}.\nError: {process.stderr}." 114 | ) 115 | LOG.info(f"Finished testing image with config: {config}.") 116 | 117 | 118 | def pr(configs: ReleaseConfigs): 119 | """Executes both build and test modes.""" 120 | build(configs) 121 | test(configs) 122 | 123 | 124 | def release(configs: ReleaseConfigs): 125 | """trigger SMFrameworks algo release pipeline""" 126 | aws = Aws() 127 | docker_client = DockerClient() 128 | for config in configs.releases: 129 | LOG.info(f"Releasing image associated for config: {config}.") 130 | released_image_uri = config.get_image_uri_for_released() 131 | if aws.does_ecr_image_exist(released_image_uri): 132 | LOG.info( 133 | f"Skipping already released image '{released_image_uri}'. Config: {config}." 134 | ) 135 | continue 136 | 137 | staged_image_uri = config.get_image_uri_for_staging() 138 | username, password = aws.get_ecr_credentials(staged_image_uri) 139 | docker_client.login(username, password, staged_image_uri) 140 | docker_client.prune_all() 141 | docker_client.pull(staged_image_uri) 142 | 143 | docker_client.login(username, password, staged_image_uri) 144 | docker_client.tag(staged_image_uri, released_image_uri) 145 | docker_client.push(released_image_uri) 146 | 147 | js_uris = config.get_image_uris_for_jumpstart() 148 | username, password = aws.get_ecr_credentials(js_uris[0]) 149 | docker_client.login(username, password, js_uris[0]) 150 | for js_uri in js_uris: 151 | docker_client.tag(staged_image_uri, js_uri) 152 | docker_client.push(js_uri) 153 | LOG.info( 154 | f"Release marked as complete for following config ({js_uris}): {config}" 155 | ) 156 | 157 | 158 | if __name__ == "__main__": 159 | logging.basicConfig( 160 | level=logging.INFO, 161 | format="%(asctime)s %(levelname)-8s %(message)s", 162 | datefmt="%Y-%m-%d %H:%M:%S", 163 | ) 164 | configs = ReleaseConfigs() 165 | configs.validate() 166 | mode = os.getenv(EnvironmentVariable.MODE.name) 167 | LOG.info(f"Mode has been set to: {mode}.") 168 | if mode == Mode.PR.name: 169 | pr(configs) 170 | elif mode == Mode.BUILD.name: 171 | build(configs) 172 | elif mode == Mode.TEST.name: 173 | test(configs) 174 | elif mode == Mode.RELEASE.name: 175 | release(configs) 176 | else: 177 | raise ValueError( 178 | f"The mode '{mode}' is not recognized. Please set it correctly.'" 179 | ) 180 | -------------------------------------------------------------------------------- /huggingface/pytorch/tgi/docker/2.3.1/entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | verlt() { 4 | [ "$1" = "$2" ] && return 1 || [ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ] 5 | } 6 | 7 | if [ -f /usr/local/cuda/compat/libcuda.so.1 ]; then 8 | CUDA_COMPAT_MAX_DRIVER_VERSION=$(readlink /usr/local/cuda/compat/libcuda.so.1 | cut -d'.' -f 3-) 9 | echo "CUDA compat package should be installed for NVIDIA driver smaller than ${CUDA_COMPAT_MAX_DRIVER_VERSION}" 10 | NVIDIA_DRIVER_VERSION=$(sed -n 's/^NVRM.*Kernel Module *\([0-9.]*\).*$/\1/p' /proc/driver/nvidia/version 2>/dev/null || true) 11 | echo "Current installed NVIDIA driver version is ${NVIDIA_DRIVER_VERSION}" 12 | if verlt $NVIDIA_DRIVER_VERSION $CUDA_COMPAT_MAX_DRIVER_VERSION; then 13 | echo "Adding CUDA compat to LD_LIBRARY_PATH" 14 | export LD_LIBRARY_PATH=/usr/local/cuda/compat:$LD_LIBRARY_PATH 15 | echo $LD_LIBRARY_PATH 16 | else 17 | echo "Skipping CUDA compat setup as newer NVIDIA driver is installed" 18 | fi 19 | else 20 | echo "Skipping CUDA compat setup as package not found" 21 | fi 22 | 23 | if [[ -z "${HF_MODEL_ID}" ]]; then 24 | echo "HF_MODEL_ID must be set" 25 | exit 1 26 | fi 27 | export MODEL_ID="${HF_MODEL_ID}" 28 | 29 | if [[ -n "${HF_MODEL_REVISION}" ]]; then 30 | export REVISION="${HF_MODEL_REVISION}" 31 | fi 32 | 33 | if [[ -n "${SM_NUM_GPUS}" ]]; then 34 | export NUM_SHARD="${SM_NUM_GPUS}" 35 | fi 36 | 37 | if [[ -n "${HF_MODEL_QUANTIZE}" ]]; then 38 | export QUANTIZE="${HF_MODEL_QUANTIZE}" 39 | fi 40 | 41 | if [[ -n "${HF_MODEL_TRUST_REMOTE_CODE}" ]]; then 42 | export TRUST_REMOTE_CODE="${HF_MODEL_TRUST_REMOTE_CODE}" 43 | fi 44 | 45 | text-generation-launcher --port 8080 -------------------------------------------------------------------------------- /huggingface/pytorch/tgi/docker/2.4.0/entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | verlt() { 4 | [ "$1" = "$2" ] && return 1 || [ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ] 5 | } 6 | 7 | if [ -f /usr/local/cuda/compat/libcuda.so.1 ]; then 8 | CUDA_COMPAT_MAX_DRIVER_VERSION=$(readlink /usr/local/cuda/compat/libcuda.so.1 | cut -d'.' -f 3-) 9 | echo "CUDA compat package should be installed for NVIDIA driver smaller than ${CUDA_COMPAT_MAX_DRIVER_VERSION}" 10 | NVIDIA_DRIVER_VERSION=$(sed -n 's/^NVRM.*Kernel Module *\([0-9.]*\).*$/\1/p' /proc/driver/nvidia/version 2>/dev/null || true) 11 | echo "Current installed NVIDIA driver version is ${NVIDIA_DRIVER_VERSION}" 12 | if verlt $NVIDIA_DRIVER_VERSION $CUDA_COMPAT_MAX_DRIVER_VERSION; then 13 | echo "Adding CUDA compat to LD_LIBRARY_PATH" 14 | export LD_LIBRARY_PATH=/usr/local/cuda/compat:$LD_LIBRARY_PATH 15 | echo $LD_LIBRARY_PATH 16 | else 17 | echo "Skipping CUDA compat setup as newer NVIDIA driver is installed" 18 | fi 19 | else 20 | echo "Skipping CUDA compat setup as package not found" 21 | fi 22 | 23 | if [[ -z "${HF_MODEL_ID}" ]]; then 24 | echo "HF_MODEL_ID must be set" 25 | exit 1 26 | fi 27 | export MODEL_ID="${HF_MODEL_ID}" 28 | 29 | if [[ -n "${HF_MODEL_REVISION}" ]]; then 30 | export REVISION="${HF_MODEL_REVISION}" 31 | fi 32 | 33 | if [[ -n "${SM_NUM_GPUS}" ]]; then 34 | export NUM_SHARD="${SM_NUM_GPUS}" 35 | fi 36 | 37 | if [[ -n "${HF_MODEL_QUANTIZE}" ]]; then 38 | export QUANTIZE="${HF_MODEL_QUANTIZE}" 39 | fi 40 | 41 | if [[ -n "${HF_MODEL_TRUST_REMOTE_CODE}" ]]; then 42 | export TRUST_REMOTE_CODE="${HF_MODEL_TRUST_REMOTE_CODE}" 43 | fi 44 | 45 | text-generation-launcher --port 8080 -------------------------------------------------------------------------------- /huggingface/pytorch/tgi/docker/3.0.1/entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | verlt() { 4 | [ "$1" = "$2" ] && return 1 || [ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ] 5 | } 6 | 7 | if [ -f /usr/local/cuda/compat/libcuda.so.1 ]; then 8 | CUDA_COMPAT_MAX_DRIVER_VERSION=$(readlink /usr/local/cuda/compat/libcuda.so.1 | cut -d'.' -f 3-) 9 | echo "CUDA compat package should be installed for NVIDIA driver smaller than ${CUDA_COMPAT_MAX_DRIVER_VERSION}" 10 | NVIDIA_DRIVER_VERSION=$(sed -n 's/^NVRM.*Kernel Module *\([0-9.]*\).*$/\1/p' /proc/driver/nvidia/version 2>/dev/null || true) 11 | echo "Current installed NVIDIA driver version is ${NVIDIA_DRIVER_VERSION}" 12 | if verlt $NVIDIA_DRIVER_VERSION $CUDA_COMPAT_MAX_DRIVER_VERSION; then 13 | echo "Adding CUDA compat to LD_LIBRARY_PATH" 14 | export LD_LIBRARY_PATH=/usr/local/cuda/compat:$LD_LIBRARY_PATH 15 | echo $LD_LIBRARY_PATH 16 | else 17 | echo "Skipping CUDA compat setup as newer NVIDIA driver is installed" 18 | fi 19 | else 20 | echo "Skipping CUDA compat setup as package not found" 21 | fi 22 | 23 | if [[ -z "${HF_MODEL_ID}" ]]; then 24 | echo "HF_MODEL_ID must be set" 25 | exit 1 26 | fi 27 | export MODEL_ID="${HF_MODEL_ID}" 28 | 29 | if [[ -n "${HF_MODEL_REVISION}" ]]; then 30 | export REVISION="${HF_MODEL_REVISION}" 31 | fi 32 | 33 | if [[ -n "${SM_NUM_GPUS}" ]]; then 34 | export NUM_SHARD="${SM_NUM_GPUS}" 35 | fi 36 | 37 | if [[ -n "${HF_MODEL_QUANTIZE}" ]]; then 38 | export QUANTIZE="${HF_MODEL_QUANTIZE}" 39 | fi 40 | 41 | if [[ -n "${HF_MODEL_TRUST_REMOTE_CODE}" ]]; then 42 | export TRUST_REMOTE_CODE="${HF_MODEL_TRUST_REMOTE_CODE}" 43 | fi 44 | 45 | text-generation-launcher --port 8080 -------------------------------------------------------------------------------- /huggingface/pytorch/tgi/docker/3.1.1/start-cuda-compat.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | verlt() { 4 | [ "$1" = "$2" ] && return 1 || [ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ] 5 | } 6 | 7 | if [ -f /usr/local/cuda/compat/libcuda.so.1 ]; then 8 | CUDA_COMPAT_MAX_DRIVER_VERSION=$(readlink /usr/local/cuda/compat/libcuda.so.1 | cut -d'.' -f 3-) 9 | echo "CUDA compat package should be installed for NVIDIA driver smaller than ${CUDA_COMPAT_MAX_DRIVER_VERSION}" 10 | NVIDIA_DRIVER_VERSION=$(sed -n 's/^NVRM.*Kernel Module *\([0-9.]*\).*$/\1/p' /proc/driver/nvidia/version 2>/dev/null || true) 11 | echo "Current installed NVIDIA driver version is ${NVIDIA_DRIVER_VERSION}" 12 | if verlt $NVIDIA_DRIVER_VERSION $CUDA_COMPAT_MAX_DRIVER_VERSION; then 13 | echo "Adding CUDA compat to LD_LIBRARY_PATH" 14 | export LD_LIBRARY_PATH=/usr/local/cuda/compat:$LD_LIBRARY_PATH 15 | echo $LD_LIBRARY_PATH 16 | else 17 | echo "Skipping CUDA compat setup as newer NVIDIA driver is installed" 18 | fi 19 | else 20 | echo "Skipping CUDA compat setup as package not found" 21 | fi -------------------------------------------------------------------------------- /huggingface/pytorch/tgi/docker/3.2.0/start-cuda-compat.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/llm-hosting-container/b7c890f23332e5a57ffa5a8d41e3d66321d441b6/huggingface/pytorch/tgi/docker/3.2.0/start-cuda-compat.sh -------------------------------------------------------------------------------- /huggingface/pytorch/tgi/docker/3.2.3/Dockerfile: -------------------------------------------------------------------------------- 1 | # Use the original TGI Dockerfile as a base 2 | FROM ghcr.io/huggingface/text-generation-inference:3.2.3 AS base 3 | 4 | FROM base AS sagemaker 5 | 6 | COPY /huggingface/pytorch/tgi/docker/3.2.3/start-cuda-compat.sh start-cuda-compat.sh 7 | RUN chmod +x start-cuda-compat.sh 8 | 9 | RUN apt-get update && apt-get upgrade -y unzip 10 | 11 | RUN HOME_DIR=/root && \ 12 | uv pip install pip requests PTable && \ 13 | curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip && \ 14 | unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ && \ 15 | cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance && \ 16 | chmod +x /usr/local/bin/testOSSCompliance && \ 17 | chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh && \ 18 | ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python && \ 19 | rm -rf ${HOME_DIR}/oss_compliance* 20 | 21 | COPY /huggingface/pytorch/tgi/docker/3.2.3/THIRD-PARTY-LICENSES /root/THIRD-PARTY-LICENSES 22 | 23 | ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/root/.local/share/uv/python/cpython-3.11.11-linux-x86_64-gnu/lib/" 24 | ENV HF_HUB_USER_AGENT_ORIGIN=aws:sagemaker:gpu-cuda:inference:tgi-native 25 | 26 | COPY sagemaker-entrypoint.sh entrypoint.sh 27 | RUN chmod +x entrypoint.sh 28 | 29 | ENTRYPOINT ["./entrypoint.sh"] 30 | CMD ["--json-output"] 31 | 32 | LABEL dlc_major_version="2" 33 | LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.huggingface.tgi="true" 34 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port="true" 35 | -------------------------------------------------------------------------------- /huggingface/pytorch/tgi/docker/3.2.3/start-cuda-compat.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | verlt() { 4 | [ "$1" = "$2" ] && return 1 || [ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ] 5 | } 6 | 7 | if [ -f /usr/local/cuda/compat/libcuda.so.1 ]; then 8 | CUDA_COMPAT_MAX_DRIVER_VERSION=$(readlink /usr/local/cuda/compat/libcuda.so.1 | cut -d'.' -f 3-) 9 | echo "CUDA compat package should be installed for NVIDIA driver smaller than ${CUDA_COMPAT_MAX_DRIVER_VERSION}" 10 | NVIDIA_DRIVER_VERSION=$(sed -n 's/^NVRM.*Kernel Module *\([0-9.]*\).*$/\1/p' /proc/driver/nvidia/version 2>/dev/null || true) 11 | echo "Current installed NVIDIA driver version is ${NVIDIA_DRIVER_VERSION}" 12 | if verlt $NVIDIA_DRIVER_VERSION $CUDA_COMPAT_MAX_DRIVER_VERSION; then 13 | echo "Adding CUDA compat to LD_LIBRARY_PATH" 14 | export LD_LIBRARY_PATH=/usr/local/cuda/compat:$LD_LIBRARY_PATH 15 | echo $LD_LIBRARY_PATH 16 | else 17 | echo "Skipping CUDA compat setup as newer NVIDIA driver is installed" 18 | fi 19 | else 20 | echo "Skipping CUDA compat setup as package not found" 21 | fi -------------------------------------------------------------------------------- /huggingface/pytorch/tgi/docker/archived/0.5.0/py3/cu118/Dockerfile.gpu: -------------------------------------------------------------------------------- 1 | FROM lukemathwalker/cargo-chef:latest-rust-1.67 AS chef 2 | WORKDIR /usr/src 3 | 4 | FROM chef as planner 5 | COPY Cargo.toml Cargo.toml 6 | COPY rust-toolchain.toml rust-toolchain.toml 7 | COPY proto proto 8 | COPY router router 9 | COPY launcher launcher 10 | RUN cargo chef prepare --recipe-path recipe.json 11 | 12 | FROM chef AS builder 13 | 14 | RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \ 15 | curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \ 16 | unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \ 17 | unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \ 18 | rm -f $PROTOC_ZIP 19 | 20 | COPY --from=planner /usr/src/recipe.json recipe.json 21 | RUN cargo chef cook --release --recipe-path recipe.json 22 | 23 | COPY Cargo.toml Cargo.toml 24 | COPY rust-toolchain.toml rust-toolchain.toml 25 | COPY proto proto 26 | COPY router router 27 | COPY launcher launcher 28 | RUN cargo build --release 29 | 30 | FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as base 31 | 32 | ARG PYTORCH_VERSION=2.0.0 33 | ARG PYTHON_VERSION=3.9 34 | ARG MAMBA_VERSION=23.1.0-1 35 | ARG CUDA_CHANNEL=nvidia 36 | ARG INSTALL_CHANNEL=pytorch 37 | # Automatically set by buildx 38 | ARG TARGETPLATFORM 39 | 40 | ENV LANG=C.UTF-8 \ 41 | LC_ALL=C.UTF-8 \ 42 | DEBIAN_FRONTEND=noninteractive \ 43 | HUGGINGFACE_HUB_CACHE=/tmp \ 44 | TRANSFORMERS_CACHE=/tmp \ 45 | HF_HUB_ENABLE_HF_TRANSFER=1 \ 46 | MODEL_ID=bigscience/bloom-560m \ 47 | QUANTIZE=false \ 48 | NUM_SHARD=1 \ 49 | PORT=80 \ 50 | CUDA_HOME=/usr/local/cuda \ 51 | LD_LIBRARY_PATH="/opt/conda/lib:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH" \ 52 | PATH=$PATH:/opt/conda/bin:/usr/local/cuda/bin \ 53 | CONDA_PREFIX=/opt/conda 54 | 55 | RUN apt-get update && apt-get install -y --no-install-recommends \ 56 | libssl-dev \ 57 | unzip \ 58 | build-essential \ 59 | ca-certificates \ 60 | ccache \ 61 | curl \ 62 | git && \ 63 | rm -rf /var/lib/apt/lists/* 64 | 65 | # Install conda 66 | # translating Docker's TARGETPLATFORM into mamba arches 67 | RUN case ${TARGETPLATFORM} in \ 68 | "linux/arm64") MAMBA_ARCH=aarch64 ;; \ 69 | *) MAMBA_ARCH=x86_64 ;; \ 70 | esac && \ 71 | curl -fsSL -v -o ~/mambaforge.sh -O "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh" 72 | RUN chmod +x ~/mambaforge.sh && \ 73 | bash ~/mambaforge.sh -b -p /opt/conda && \ 74 | rm ~/mambaforge.sh 75 | 76 | # Install pytorch 77 | # On arm64 we exit with an error code 78 | RUN case ${TARGETPLATFORM} in \ 79 | "linux/arm64") exit 1 ;; \ 80 | *) /opt/conda/bin/conda update -y conda && \ 81 | /opt/conda/bin/conda install -c "${INSTALL_CHANNEL}" -c "${CUDA_CHANNEL}" -y "python=${PYTHON_VERSION}" pytorch==$PYTORCH_VERSION "pytorch-cuda=$(echo $CUDA_VERSION | cut -d'.' -f 1-2)" ;; \ 82 | esac && \ 83 | /opt/conda/bin/conda clean -ya 84 | 85 | WORKDIR /usr/src 86 | 87 | RUN LIBSSL_DEB=libssl1.1_1.1.0g-2ubuntu4_amd64.deb && \ 88 | curl -OL http://archive.ubuntu.com/ubuntu/pool/main/o/openssl/$LIBSSL_DEB && \ 89 | dpkg -i $LIBSSL_DEB && \ 90 | rm -f $LIBSSL_DEB 91 | 92 | # Install specific version of flash attention 93 | COPY server/Makefile-flash-att server/Makefile 94 | RUN cd server && make install-flash-attention 95 | 96 | # Install specific version of transformers 97 | COPY server/Makefile-transformers server/Makefile 98 | RUN cd server && BUILD_EXTENSIONS="True" make install-transformers 99 | 100 | COPY server/Makefile server/Makefile 101 | 102 | # Install server 103 | COPY proto proto 104 | COPY server server 105 | RUN cd server && \ 106 | make gen-server && \ 107 | pip install ".[bnb]" --no-cache-dir 108 | RUN rm -r proto server 109 | 110 | # Install router 111 | COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router 112 | # Install launcher 113 | COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher 114 | 115 | # AWS Sagemaker compatible image 116 | FROM base as sagemaker 117 | 118 | COPY --chmod=775 sagemaker-entrypoint.sh entrypoint.sh 119 | RUN sed -i '7 i export MODEL_ID="${HF_MODEL_ID}"' entrypoint.sh 120 | 121 | RUN HOME_DIR=/root && \ 122 | pip install requests && \ 123 | curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip && \ 124 | unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ && \ 125 | cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance && \ 126 | chmod +x /usr/local/bin/testOSSCompliance && \ 127 | chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh && \ 128 | ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python && \ 129 | rm -rf ${HOME_DIR}/oss_compliance* 130 | 131 | ENTRYPOINT ["./entrypoint.sh"] 132 | 133 | LABEL dlc_major_version="1" 134 | LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.huggingface.tgi="true" 135 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port="true" 136 | -------------------------------------------------------------------------------- /huggingface/pytorch/tgi/docker/archived/0.6.0/py3/cu118/Dockerfile.gpu: -------------------------------------------------------------------------------- 1 | # Rust builder 2 | FROM lukemathwalker/cargo-chef:latest-rust-1.69 AS chef 3 | WORKDIR /usr/src 4 | 5 | FROM chef as planner 6 | COPY Cargo.toml Cargo.toml 7 | COPY rust-toolchain.toml rust-toolchain.toml 8 | COPY proto proto 9 | COPY router router 10 | COPY launcher launcher 11 | RUN cargo chef prepare --recipe-path recipe.json 12 | 13 | FROM chef AS builder 14 | 15 | ARG GIT_SHA 16 | 17 | RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \ 18 | curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \ 19 | unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \ 20 | unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \ 21 | rm -f $PROTOC_ZIP 22 | 23 | COPY --from=planner /usr/src/recipe.json recipe.json 24 | RUN cargo chef cook --release --recipe-path recipe.json 25 | 26 | COPY Cargo.toml Cargo.toml 27 | COPY rust-toolchain.toml rust-toolchain.toml 28 | COPY proto proto 29 | COPY router router 30 | COPY launcher launcher 31 | RUN cargo build --release 32 | 33 | # Python builder 34 | # Adapted from: https://github.com/pytorch/pytorch/blob/master/Dockerfile 35 | FROM debian:bullseye-slim as pytorch-install 36 | 37 | ARG PYTORCH_VERSION=2.0.1 38 | ARG PYTHON_VERSION=3.9 39 | ARG CUDA_VERSION=11.8 40 | ARG MAMBA_VERSION=23.1.0-4 41 | ARG CUDA_CHANNEL=nvidia 42 | ARG INSTALL_CHANNEL=pytorch 43 | # Automatically set by buildx 44 | ARG TARGETPLATFORM 45 | 46 | ENV PATH /opt/conda/bin:$PATH 47 | 48 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ 49 | build-essential \ 50 | ca-certificates \ 51 | ccache \ 52 | curl \ 53 | git && \ 54 | rm -rf /var/lib/apt/lists/* 55 | 56 | # Install conda 57 | # translating Docker's TARGETPLATFORM into mamba arches 58 | RUN case ${TARGETPLATFORM} in \ 59 | "linux/arm64") MAMBA_ARCH=aarch64 ;; \ 60 | *) MAMBA_ARCH=x86_64 ;; \ 61 | esac && \ 62 | curl -fsSL -v -o ~/mambaforge.sh -O "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh" 63 | RUN chmod +x ~/mambaforge.sh && \ 64 | bash ~/mambaforge.sh -b -p /opt/conda && \ 65 | rm ~/mambaforge.sh 66 | 67 | # Install pytorch 68 | # On arm64 we exit with an error code 69 | RUN case ${TARGETPLATFORM} in \ 70 | "linux/arm64") exit 1 ;; \ 71 | *) /opt/conda/bin/conda update -y conda && \ 72 | /opt/conda/bin/conda install -c "${INSTALL_CHANNEL}" -c "${CUDA_CHANNEL}" -y "python=${PYTHON_VERSION}" pytorch==$PYTORCH_VERSION "pytorch-cuda=$(echo $CUDA_VERSION | cut -d'.' -f 1-2)" ;; \ 73 | esac && \ 74 | /opt/conda/bin/conda clean -ya 75 | 76 | # CUDA kernels builder image 77 | FROM pytorch-install as kernel-builder 78 | 79 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ 80 | ninja-build \ 81 | && rm -rf /var/lib/apt/lists/* 82 | 83 | RUN /opt/conda/bin/conda install -c "nvidia/label/cuda-11.8.0" -y cuda==11.8 && \ 84 | /opt/conda/bin/conda clean -ya 85 | 86 | # Build Flash Attention CUDA kernels 87 | FROM kernel-builder as flash-att-builder 88 | 89 | WORKDIR /usr/src 90 | 91 | COPY server/Makefile-flash-att Makefile 92 | 93 | # Build specific version of flash attention 94 | RUN make build-flash-attention 95 | 96 | # Build Transformers CUDA kernels 97 | FROM kernel-builder as transformers-builder 98 | 99 | WORKDIR /usr/src 100 | 101 | COPY server/Makefile-transformers Makefile 102 | 103 | # Build specific version of transformers 104 | RUN BUILD_EXTENSIONS="True" make build-transformers 105 | 106 | # Text Generation Inference base image 107 | FROM nvidia/cuda:11.8.0-base-ubuntu20.04 as base 108 | 109 | # Conda env 110 | ENV PATH=/opt/conda/bin:$PATH \ 111 | CONDA_PREFIX=/opt/conda 112 | 113 | # Text Generation Inference base env 114 | ENV HUGGINGFACE_HUB_CACHE=/tmp \ 115 | TRANSFORMERS_CACHE=/tmp \ 116 | HF_HUB_ENABLE_HF_TRANSFER=1 \ 117 | MODEL_ID=bigscience/bloom-560m \ 118 | QUANTIZE=false \ 119 | NUM_SHARD=1 \ 120 | PORT=80 121 | 122 | LABEL com.nvidia.volumes.needed="nvidia_driver" 123 | 124 | WORKDIR /usr/src 125 | 126 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ 127 | libssl-dev \ 128 | ca-certificates \ 129 | make \ 130 | unzip \ 131 | curl \ 132 | && rm -rf /var/lib/apt/lists/* 133 | 134 | # Copy conda with PyTorch installed 135 | COPY --from=pytorch-install /opt/conda /opt/conda 136 | 137 | # Copy build artifacts from flash attention builder 138 | COPY --from=flash-att-builder /usr/src/flash-attention/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages 139 | COPY --from=flash-att-builder /usr/src/flash-attention/csrc/layer_norm/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages 140 | COPY --from=flash-att-builder /usr/src/flash-attention/csrc/rotary/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages 141 | 142 | # Copy build artifacts from transformers builder 143 | COPY --from=transformers-builder /usr/src/transformers /usr/src/transformers 144 | COPY --from=transformers-builder /usr/src/transformers/build/lib.linux-x86_64-cpython-39/transformers /usr/src/transformers/src/transformers 145 | 146 | # Install transformers dependencies 147 | RUN cd /usr/src/transformers && pip install -e . --no-cache-dir && pip install einops --no-cache-dir 148 | RUN rm -r transformers/examples 149 | 150 | # FIXME: remove when we get a release of huggingface-hub 151 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ 152 | git \ 153 | && rm -rf /var/lib/apt/lists/* 154 | 155 | # Install server 156 | COPY proto proto 157 | COPY server server 158 | COPY server/Makefile server/Makefile 159 | RUN cd server && \ 160 | make gen-server && \ 161 | pip install -r requirements.txt && \ 162 | pip install ".[bnb, accelerate]" --no-cache-dir 163 | RUN rm -r proto server 164 | 165 | # Install router 166 | COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router 167 | # Install launcher 168 | COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher 169 | 170 | # AWS Sagemaker compatbile image 171 | FROM base as sagemaker 172 | 173 | COPY --chmod=775 sagemaker-entrypoint.sh entrypoint.sh 174 | RUN sed -i '7 i export MODEL_ID="${HF_MODEL_ID}"' entrypoint.sh 175 | 176 | RUN HOME_DIR=/root && \ 177 | pip install requests && \ 178 | curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip && \ 179 | unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ && \ 180 | cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance && \ 181 | chmod +x /usr/local/bin/testOSSCompliance && \ 182 | chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh && \ 183 | ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python && \ 184 | rm -rf ${HOME_DIR}/oss_compliance* 185 | RUN curl -o /root/THIRD-PARTY-LICENSES https://publish.djl.ai/dlc-licenses/huggingface/tgi-0.6.0/THIRD-PARTY-LICENSES 186 | 187 | ENTRYPOINT ["./entrypoint.sh"] 188 | 189 | LABEL dlc_major_version="1" 190 | LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.huggingface.tgi="true" 191 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port="true" 192 | -------------------------------------------------------------------------------- /huggingface/pytorch/tgi/docker/archived/0.8.2/py3/cu118/Dockerfile.gpu: -------------------------------------------------------------------------------- 1 | # Rust builder 2 | FROM lukemathwalker/cargo-chef:latest-rust-1.69 AS chef 3 | WORKDIR /usr/src 4 | 5 | FROM chef as planner 6 | COPY Cargo.toml Cargo.toml 7 | COPY rust-toolchain.toml rust-toolchain.toml 8 | COPY proto proto 9 | COPY benchmark benchmark 10 | COPY router router 11 | COPY launcher launcher 12 | RUN cargo chef prepare --recipe-path recipe.json 13 | 14 | FROM chef AS builder 15 | 16 | ARG GIT_SHA 17 | 18 | RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \ 19 | curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \ 20 | unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \ 21 | unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \ 22 | rm -f $PROTOC_ZIP 23 | 24 | COPY --from=planner /usr/src/recipe.json recipe.json 25 | RUN cargo chef cook --release --recipe-path recipe.json 26 | 27 | COPY Cargo.toml Cargo.toml 28 | COPY rust-toolchain.toml rust-toolchain.toml 29 | COPY proto proto 30 | COPY benchmark benchmark 31 | COPY router router 32 | COPY launcher launcher 33 | RUN cargo build --release 34 | 35 | # Python builder 36 | # Adapted from: https://github.com/pytorch/pytorch/blob/master/Dockerfile 37 | FROM debian:bullseye-slim as pytorch-install 38 | 39 | ARG PYTORCH_VERSION=2.0.0 40 | ARG PYTHON_VERSION=3.9 41 | ARG CUDA_VERSION=11.8 42 | ARG MAMBA_VERSION=23.1.0-1 43 | ARG CUDA_CHANNEL=nvidia 44 | ARG INSTALL_CHANNEL=pytorch 45 | # Automatically set by buildx 46 | ARG TARGETPLATFORM 47 | 48 | ENV PATH /opt/conda/bin:$PATH 49 | 50 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ 51 | build-essential \ 52 | ca-certificates \ 53 | ccache \ 54 | curl \ 55 | git && \ 56 | rm -rf /var/lib/apt/lists/* 57 | 58 | # Install conda 59 | # translating Docker's TARGETPLATFORM into mamba arches 60 | RUN case ${TARGETPLATFORM} in \ 61 | "linux/arm64") MAMBA_ARCH=aarch64 ;; \ 62 | *) MAMBA_ARCH=x86_64 ;; \ 63 | esac && \ 64 | curl -fsSL -v -o ~/mambaforge.sh -O "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh" 65 | RUN chmod +x ~/mambaforge.sh && \ 66 | bash ~/mambaforge.sh -b -p /opt/conda && \ 67 | rm ~/mambaforge.sh 68 | 69 | # Install pytorch 70 | # On arm64 we exit with an error code 71 | RUN case ${TARGETPLATFORM} in \ 72 | "linux/arm64") exit 1 ;; \ 73 | *) /opt/conda/bin/conda update -y conda && \ 74 | /opt/conda/bin/conda install -c "${INSTALL_CHANNEL}" -c "${CUDA_CHANNEL}" -y "python=${PYTHON_VERSION}" pytorch==$PYTORCH_VERSION "pytorch-cuda=$(echo $CUDA_VERSION | cut -d'.' -f 1-2)" ;; \ 75 | esac && \ 76 | /opt/conda/bin/conda clean -ya 77 | 78 | # CUDA kernels builder image 79 | FROM pytorch-install as kernel-builder 80 | 81 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ 82 | ninja-build \ 83 | && rm -rf /var/lib/apt/lists/* 84 | 85 | RUN /opt/conda/bin/conda install -c "nvidia/label/cuda-11.8.0" -y cuda==11.8 && \ 86 | /opt/conda/bin/conda clean -ya 87 | 88 | # Build Flash Attention CUDA kernels 89 | FROM kernel-builder as flash-att-builder 90 | 91 | WORKDIR /usr/src 92 | 93 | COPY server/Makefile-flash-att Makefile 94 | 95 | # Build specific version of flash attention 96 | RUN make build-flash-attention 97 | 98 | # Build Transformers CUDA kernels 99 | FROM kernel-builder as transformers-builder 100 | 101 | WORKDIR /usr/src 102 | 103 | COPY server/Makefile-transformers Makefile 104 | 105 | # Build specific version of transformers 106 | RUN BUILD_EXTENSIONS="True" make build-transformers 107 | 108 | # Text Generation Inference base image 109 | FROM nvidia/cuda:11.8.0-base-ubuntu20.04 as base 110 | 111 | # Conda env 112 | ENV PATH=/opt/conda/bin:$PATH \ 113 | CONDA_PREFIX=/opt/conda 114 | 115 | # Text Generation Inference base env 116 | ENV HUGGINGFACE_HUB_CACHE=/tmp \ 117 | HF_HUB_ENABLE_HF_TRANSFER=1 \ 118 | PORT=80 119 | 120 | WORKDIR /usr/src 121 | 122 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ 123 | libssl-dev \ 124 | ca-certificates \ 125 | make \ 126 | unzip \ 127 | curl \ 128 | && rm -rf /var/lib/apt/lists/* 129 | 130 | # Copy conda with PyTorch installed 131 | COPY --from=pytorch-install /opt/conda /opt/conda 132 | 133 | # Copy build artifacts from flash attention builder 134 | COPY --from=flash-att-builder /usr/src/flash-attention/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages 135 | COPY --from=flash-att-builder /usr/src/flash-attention/csrc/layer_norm/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages 136 | COPY --from=flash-att-builder /usr/src/flash-attention/csrc/rotary/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages 137 | 138 | # Copy build artifacts from transformers builder 139 | COPY --from=transformers-builder /usr/src/transformers /usr/src/transformers 140 | COPY --from=transformers-builder /usr/src/transformers/build/lib.linux-x86_64-cpython-39/transformers /usr/src/transformers/src/transformers 141 | 142 | # Install transformers dependencies 143 | RUN cd /usr/src/transformers && pip install -e . --no-cache-dir && pip install einops --no-cache-dir 144 | RUN rm -r transformers/examples 145 | 146 | # Install server 147 | COPY proto proto 148 | COPY server server 149 | COPY server/Makefile server/Makefile 150 | RUN cd server && \ 151 | make gen-server && \ 152 | pip install -r requirements.txt && \ 153 | pip install ".[bnb, accelerate]" --no-cache-dir 154 | RUN rm -r proto server 155 | 156 | # Install benchmarker 157 | COPY --from=builder /usr/src/target/release/text-generation-benchmark /usr/local/bin/text-generation-benchmark 158 | # Install router 159 | COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router 160 | # Install launcher 161 | COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher 162 | 163 | # AWS Sagemaker compatbile image 164 | FROM base as sagemaker 165 | 166 | COPY --chmod=775 sagemaker-entrypoint.sh entrypoint.sh 167 | 168 | RUN HOME_DIR=/root && \ 169 | pip install requests && \ 170 | curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip && \ 171 | unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ && \ 172 | cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance && \ 173 | chmod +x /usr/local/bin/testOSSCompliance && \ 174 | chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh && \ 175 | ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python && \ 176 | rm -rf ${HOME_DIR}/oss_compliance* 177 | RUN curl -o /root/THIRD-PARTY-LICENSES https://publish.djl.ai/dlc-licenses/huggingface/tgi-0.8.2/THIRD-PARTY-LICENSES 178 | 179 | ENTRYPOINT ["./entrypoint.sh"] 180 | 181 | LABEL dlc_major_version="1" 182 | LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.huggingface.tgi="true" 183 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port="true" 184 | -------------------------------------------------------------------------------- /huggingface/pytorch/tgi/docker/archived/0.9.3/py3/cu118/Dockerfile.gpu: -------------------------------------------------------------------------------- 1 | FROM lukemathwalker/cargo-chef:latest-rust-1.70 AS chef 2 | WORKDIR /usr/src 3 | 4 | ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse 5 | 6 | FROM chef as planner 7 | COPY Cargo.toml Cargo.toml 8 | COPY rust-toolchain.toml rust-toolchain.toml 9 | COPY proto proto 10 | COPY benchmark benchmark 11 | COPY router router 12 | COPY launcher launcher 13 | RUN cargo chef prepare --recipe-path recipe.json 14 | 15 | FROM chef AS builder 16 | 17 | ARG GIT_SHA 18 | ARG DOCKER_LABEL 19 | 20 | RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \ 21 | curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \ 22 | unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \ 23 | unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \ 24 | rm -f $PROTOC_ZIP 25 | 26 | COPY --from=planner /usr/src/recipe.json recipe.json 27 | RUN cargo chef cook --release --recipe-path recipe.json 28 | 29 | COPY Cargo.toml Cargo.toml 30 | COPY rust-toolchain.toml rust-toolchain.toml 31 | COPY proto proto 32 | COPY benchmark benchmark 33 | COPY router router 34 | COPY launcher launcher 35 | RUN cargo build --release 36 | 37 | # Python builder 38 | # Adapted from: https://github.com/pytorch/pytorch/blob/master/Dockerfile 39 | FROM debian:bullseye-slim as pytorch-install 40 | 41 | ARG PYTORCH_VERSION=2.0.1 42 | ARG PYTHON_VERSION=3.9 43 | ARG CUDA_VERSION=11.8 44 | ARG MAMBA_VERSION=23.1.0-4 45 | ARG CUDA_CHANNEL=nvidia 46 | ARG INSTALL_CHANNEL=pytorch 47 | # Automatically set by buildx 48 | ARG TARGETPLATFORM 49 | 50 | ENV PATH /opt/conda/bin:$PATH 51 | 52 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ 53 | build-essential \ 54 | ca-certificates \ 55 | ccache \ 56 | curl \ 57 | git && \ 58 | rm -rf /var/lib/apt/lists/* 59 | 60 | # Install conda 61 | # translating Docker's TARGETPLATFORM into mamba arches 62 | RUN case ${TARGETPLATFORM} in \ 63 | "linux/arm64") MAMBA_ARCH=aarch64 ;; \ 64 | *) MAMBA_ARCH=x86_64 ;; \ 65 | esac && \ 66 | curl -fsSL -v -o ~/mambaforge.sh -O "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh" 67 | RUN chmod +x ~/mambaforge.sh && \ 68 | bash ~/mambaforge.sh -b -p /opt/conda && \ 69 | rm ~/mambaforge.sh 70 | 71 | # Install pytorch 72 | # On arm64 we exit with an error code 73 | RUN case ${TARGETPLATFORM} in \ 74 | "linux/arm64") exit 1 ;; \ 75 | *) /opt/conda/bin/conda update -y conda && \ 76 | /opt/conda/bin/conda install -c "${INSTALL_CHANNEL}" -c "${CUDA_CHANNEL}" -y "python=${PYTHON_VERSION}" pytorch==$PYTORCH_VERSION "pytorch-cuda=$(echo $CUDA_VERSION | cut -d'.' -f 1-2)" ;; \ 77 | esac && \ 78 | /opt/conda/bin/conda clean -ya 79 | 80 | # CUDA kernels builder image 81 | FROM pytorch-install as kernel-builder 82 | 83 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ 84 | ninja-build \ 85 | && rm -rf /var/lib/apt/lists/* 86 | 87 | RUN /opt/conda/bin/conda install -c "nvidia/label/cuda-11.8.0" -y cuda==11.8 && \ 88 | /opt/conda/bin/conda clean -ya 89 | 90 | # Build Flash Attention CUDA kernels 91 | FROM kernel-builder as flash-att-builder 92 | 93 | WORKDIR /usr/src 94 | 95 | COPY server/Makefile-flash-att Makefile 96 | 97 | # Build specific version of flash attention 98 | RUN make build-flash-attention 99 | 100 | # Build Flash Attention v2 CUDA kernels 101 | FROM kernel-builder as flash-att-v2-builder 102 | 103 | WORKDIR /usr/src 104 | 105 | COPY server/Makefile-flash-att-v2 Makefile 106 | 107 | # Build specific version of flash attention v2 108 | RUN make build-flash-attention-v2 109 | 110 | # Build Transformers CUDA kernels 111 | FROM kernel-builder as custom-kernels-builder 112 | 113 | WORKDIR /usr/src 114 | 115 | COPY server/custom_kernels/ . 116 | 117 | # Build specific version of transformers 118 | RUN python setup.py build 119 | 120 | # Build vllm CUDA kernels 121 | FROM kernel-builder as vllm-builder 122 | 123 | WORKDIR /usr/src 124 | 125 | COPY server/Makefile-vllm Makefile 126 | 127 | # Build specific version of vllm 128 | RUN make build-vllm 129 | 130 | # Text Generation Inference base image 131 | FROM nvidia/cuda:11.8.0-base-ubuntu20.04 as base 132 | 133 | # Conda env 134 | ENV PATH=/opt/conda/bin:$PATH \ 135 | CONDA_PREFIX=/opt/conda 136 | 137 | # Text Generation Inference base env 138 | ENV HUGGINGFACE_HUB_CACHE=/tmp \ 139 | HF_HUB_ENABLE_HF_TRANSFER=1 \ 140 | PORT=80 141 | 142 | WORKDIR /usr/src 143 | 144 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ 145 | libssl-dev \ 146 | ca-certificates \ 147 | make \ 148 | unzip \ 149 | curl \ 150 | && rm -rf /var/lib/apt/lists/* 151 | 152 | # Copy conda with PyTorch installed 153 | COPY --from=pytorch-install /opt/conda /opt/conda 154 | 155 | # Copy build artifacts from flash attention builder 156 | COPY --from=flash-att-builder /usr/src/flash-attention/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages 157 | COPY --from=flash-att-builder /usr/src/flash-attention/csrc/layer_norm/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages 158 | COPY --from=flash-att-builder /usr/src/flash-attention/csrc/rotary/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages 159 | 160 | # Copy build artifacts from flash attention v2 builder 161 | COPY --from=flash-att-v2-builder /usr/src/flash-attention-v2/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages 162 | 163 | # Copy build artifacts from custom kernels builder 164 | COPY --from=custom-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages 165 | 166 | # Copy builds artifacts from vllm builder 167 | COPY --from=vllm-builder /usr/src/vllm/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages 168 | 169 | # Install flash-attention dependencies 170 | RUN pip install einops --no-cache-dir 171 | 172 | # Install server 173 | COPY proto proto 174 | COPY server server 175 | COPY server/Makefile server/Makefile 176 | RUN cd server && \ 177 | make gen-server && \ 178 | pip install -r requirements.txt && \ 179 | pip install ".[bnb, accelerate]" --no-cache-dir 180 | RUN rm -r proto server 181 | 182 | # Install benchmarker 183 | COPY --from=builder /usr/src/target/release/text-generation-benchmark /usr/local/bin/text-generation-benchmark 184 | # Install router 185 | COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router 186 | # Install launcher 187 | COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher 188 | 189 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ 190 | build-essential \ 191 | g++ \ 192 | && rm -rf /var/lib/apt/lists/* 193 | 194 | # AWS Sagemaker compatbile image 195 | FROM base as sagemaker 196 | 197 | COPY --chmod=775 sagemaker-entrypoint.sh entrypoint.sh 198 | 199 | RUN HOME_DIR=/root && \ 200 | pip install requests && \ 201 | curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip && \ 202 | unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ && \ 203 | cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance && \ 204 | chmod +x /usr/local/bin/testOSSCompliance && \ 205 | chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh && \ 206 | ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python && \ 207 | rm -rf ${HOME_DIR}/oss_compliance* 208 | RUN curl -o /root/THIRD-PARTY-LICENSES https://publish.djl.ai/dlc-licenses/huggingface/tgi-0.9.3/THIRD-PARTY-LICENSES 209 | 210 | ENTRYPOINT ["./entrypoint.sh"] 211 | CMD ["--json-output"] 212 | 213 | LABEL dlc_major_version="1" 214 | LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.huggingface.tgi="true" 215 | LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port="true" 216 | -------------------------------------------------------------------------------- /huggingface/pytorch/tgi/docker/buildspec.yml: -------------------------------------------------------------------------------- 1 | version: 0.2 2 | 3 | env: 4 | shell: bash 5 | variables: 6 | FRAMEWORK_FOLDER: "huggingface/pytorch/tgi/docker" 7 | PYTHONPATH: "/codebuild/output/src*/src/github.com/awslabs/llm-hosting-container" 8 | 9 | phases: 10 | install: 11 | runtime-versions: 12 | python: 3.11 13 | commands: 14 | - echo "Installing Python version 3.11 ..." 15 | - pyenv global $PYTHON_311_VERSION 16 | 17 | pre_build: 18 | commands: 19 | - export PYTHONPATH=$(pwd):$PYTHONPATH 20 | - | 21 | echo Setting up Docker buildx. 22 | docker buildx version 23 | docker buildx create --name builder --driver docker-container --buildkitd-flags '--allow-insecure-entitlement security.insecure --allow-insecure-entitlement network.host' --use 24 | docker buildx inspect --bootstrap --builder builder 25 | docker buildx install 26 | echo Preparing system dependencies for execution. 27 | docker --version 28 | docker login -u $DOCKER_USERNAME -p $DOCKER_PASSWORD 29 | curl -LO http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh 30 | bash Miniconda3-latest-Linux-x86_64.sh -bfp /miniconda3 31 | export PATH=/miniconda3/bin:${PATH} 32 | conda install python=3.11 33 | conda update -y conda 34 | echo Prepare TGI dependencies for execution. 35 | mkdir tgi-artifacts 36 | python -m pip install -r $FRAMEWORK_FOLDER/tgi-requirements.txt 37 | 38 | build: 39 | commands: 40 | - | 41 | echo Build started on `date` 42 | echo "Current PYTHONPATH: $PYTHONPATH" 43 | python $FRAMEWORK_FOLDER/tgi.py 44 | 45 | post_build: 46 | commands: 47 | - | 48 | echo Build completed on `date` 49 | -------------------------------------------------------------------------------- /huggingface/pytorch/tgi/docker/tgi-requirements.txt: -------------------------------------------------------------------------------- 1 | boto3 2 | dataclasses 3 | docker 4 | gitpython 5 | sagemaker 6 | 7 | parameterized 8 | pytest 9 | pytest-mock 10 | pytest-xdist -------------------------------------------------------------------------------- /huggingface/pytorch/tgi/docker/tgi.py: -------------------------------------------------------------------------------- 1 | import git 2 | import logging 3 | import os 4 | import shutil 5 | import subprocess 6 | import time 7 | 8 | from huggingface.pytorch.release_utils import ( 9 | ECR_SCAN_TIMEOUT_IN_SECONDS, 10 | GIT_REPO_DOCKERFILES_ROOT_DIRECTORY, 11 | GIT_REPO_PYTEST_PATH, 12 | LOG, 13 | Aws, 14 | Device, 15 | DlcPipeline, 16 | DockerClient, 17 | EnvironmentVariable, 18 | Mode, 19 | ReleaseConfigs, 20 | VulnerabilitySeverity, 21 | ) 22 | 23 | GIT_REPO_TGI_LOCAL_FOLDER_NAME = "text-generation-inference" 24 | GIT_REPO_TGI_TAG_PATTERN = "v{version}" 25 | GIT_REPO_TGI_URL = "https://github.com/huggingface/text-generation-inference.git" 26 | 27 | def build(configs: ReleaseConfigs): 28 | """Builds the Docker image for the provided configs.""" 29 | aws = Aws() 30 | docker_client = DockerClient() 31 | for config in configs.releases: 32 | LOG.info(f"Going to build image for config: {config}.") 33 | image_uri = config.get_image_uri_for_staging() 34 | if aws.does_ecr_image_exist(image_uri): 35 | LOG.info(f"Skipping already built image '{image_uri}'. Config: {config}.") 36 | continue 37 | 38 | build_path = "." 39 | if config.device.lower() == Device.GPU.name.lower(): 40 | LOG.info(f"Setting up build prerequisites for GPU release config with version: {config.version}") 41 | build_path = GIT_REPO_TGI_LOCAL_FOLDER_NAME 42 | shutil.rmtree(GIT_REPO_TGI_LOCAL_FOLDER_NAME, ignore_errors=True) 43 | hf_tgi_repo = git.Repo.clone_from(GIT_REPO_TGI_URL, GIT_REPO_TGI_LOCAL_FOLDER_NAME, no_checkout=True) 44 | hf_tgi_repo_tag = GIT_REPO_TGI_TAG_PATTERN.format(version=config.version) 45 | hf_tgi_repo.git.checkout(hf_tgi_repo_tag) 46 | LOG.info(f"Checked out {hf_tgi_repo} with tag: {hf_tgi_repo_tag} to {GIT_REPO_TGI_LOCAL_FOLDER_NAME}.") 47 | shutil.copytree(GIT_REPO_DOCKERFILES_ROOT_DIRECTORY, 48 | os.path.join(GIT_REPO_TGI_LOCAL_FOLDER_NAME, GIT_REPO_DOCKERFILES_ROOT_DIRECTORY)) 49 | LOG.info(f"Copied '{GIT_REPO_DOCKERFILES_ROOT_DIRECTORY}' directory to TGI directory for 'COPY' command.") 50 | 51 | dockerfile_path = config.get_dockerfile_path() 52 | LOG.info(f"Building Dockerfile: '{dockerfile_path}'. This may take a while...") 53 | docker_client.build(image_uri=image_uri, dockerfile_path=dockerfile_path, build_path=build_path) 54 | 55 | username, password = aws.get_ecr_credentials(image_uri) 56 | docker_client.login(username, password, image_uri) 57 | docker_client.push(image_uri) 58 | 59 | def test(configs: ReleaseConfigs): 60 | """Runs SageMaker tests for the Docker images associated with the provided configs and current git commit.""" 61 | aws = Aws() 62 | for config in configs.releases: 63 | LOG.info(f"Going to test built image for config: {config}.") 64 | test_role_arn = os.getenv(EnvironmentVariable.TEST_ROLE_ARN.name) 65 | test_session = aws.get_session_for_role(test_role_arn) 66 | test_credentials = test_session.get_credentials() 67 | environ = os.environ.copy() 68 | environ.update({ 69 | "DEVICE_TYPE": config.device.lower(), 70 | "AWS_ACCESS_KEY_ID": test_credentials.access_key, 71 | "AWS_SECRET_ACCESS_KEY": test_credentials.secret_key, 72 | "AWS_SESSION_TOKEN": test_credentials.token, 73 | "IMAGE_URI": config.get_image_uri_for_staging(), 74 | "TEST_ROLE_ARN": test_role_arn }) 75 | 76 | command = ["pytest", "-m", config.device.lower(), "-n", "auto", "--log-cli-level", "info", GIT_REPO_PYTEST_PATH] 77 | LOG.info(f"Running test command: {command}.") 78 | process = subprocess.run(command, env=environ, encoding="utf-8", capture_output=True) 79 | LOG.info(process.stdout) 80 | assert process.returncode == 0, f"Failed with config: {config}.\nError: {process.stderr}." 81 | LOG.info(f"Finished testing image with config: {config}.") 82 | 83 | start_time = time.time() 84 | image_uri = config.get_image_uri_for_staging() 85 | while aws.is_ecr_image_scan_pending(image_uri): 86 | LOG.info(f"Waiting for image scan results for image: {image_uri}.") 87 | assert time.time() - start_time <= ECR_SCAN_TIMEOUT_IN_SECONDS, \ 88 | f"{image_uri} with config {config} has not completed scanning beyond permitted wait time." 89 | 90 | severities = {VulnerabilitySeverity.CRITICAL.name} 91 | vulnerability_ids = aws.get_image_scan_findings(image_uri, severities, set(configs.ignore_vulnerabilities)) 92 | assert len(vulnerability_ids) == 0, f"{image_uri} with {config} has vulnerabilities: {vulnerability_ids}." 93 | LOG.info(f"Finished checking vulnerabilities for image: {image_uri}.") 94 | 95 | def pr(configs: ReleaseConfigs): 96 | """Executes both build and test modes.""" 97 | build(configs) 98 | test(configs) 99 | 100 | def release(configs: ReleaseConfigs): 101 | """Integrates with DLC to release the tested images associated for the provided configs.""" 102 | aws = Aws() 103 | docker_client = DockerClient() 104 | for config in configs.releases: 105 | LOG.info(f"Releasing image associated for config: {config}.") 106 | released_image_uri = config.get_image_uri_for_released() 107 | if aws.does_ecr_image_exist(released_image_uri): 108 | LOG.info(f"Skipping already released image '{released_image_uri}'. Config: {config}.") 109 | continue 110 | 111 | staged_image_uri = config.get_image_uri_for_staging() 112 | username, password = aws.get_ecr_credentials(staged_image_uri) 113 | docker_client.login(username, password, staged_image_uri) 114 | docker_client.prune_all() 115 | docker_client.pull(staged_image_uri) 116 | 117 | pipeline = DlcPipeline(aws, docker_client) 118 | pipeline.stage_image(config) 119 | pipeline.set_parameters(config) 120 | pipeline.start_pipeline(config) 121 | LOG.info(f"DLC pipeline completed for staged image URI: {staged_image_uri}.") 122 | 123 | username, password = aws.get_ecr_credentials(staged_image_uri) 124 | docker_client.login(username, password, staged_image_uri) 125 | released_image_uri = config.get_image_uri_for_released() 126 | docker_client.tag(staged_image_uri, released_image_uri) 127 | docker_client.push(released_image_uri) 128 | LOG.info(f"Release marked as complete for following config ({released_image_uri}): {config}") 129 | 130 | 131 | if __name__ == "__main__": 132 | logging.basicConfig( 133 | level=logging.INFO, 134 | format="%(asctime)s %(levelname)-8s %(message)s", 135 | datefmt="%Y-%m-%d %H:%M:%S") 136 | configs = ReleaseConfigs() 137 | configs.validate() 138 | mode = os.getenv(EnvironmentVariable.MODE.name) 139 | LOG.info(f"Mode has been set to: {mode}.") 140 | if mode == Mode.PR.name: 141 | pr(configs) 142 | elif mode == Mode.BUILD.name: 143 | build(configs) 144 | elif mode == Mode.TEST.name: 145 | test(configs) 146 | elif mode == Mode.RELEASE.name: 147 | release(configs) 148 | else: 149 | raise ValueError(f"The mode '{mode}' is not recognized. Please set it correctly.'") -------------------------------------------------------------------------------- /huggingface/pytorch/tgillamacpp/docker/buildspec.yml: -------------------------------------------------------------------------------- 1 | 2 | version: 0.2 3 | 4 | env: 5 | shell: bash 6 | variables: 7 | FRAMEWORK_FOLDER: "huggingface/pytorch/tgillamacpp/docker" 8 | PYTHONPATH: "/codebuild/output/src*/src/github.com/awslabs/llm-hosting-container" 9 | 10 | phases: 11 | install: 12 | runtime-versions: 13 | python: 3.11 14 | commands: 15 | - echo "Installing Python version 3.11 ..." 16 | - pyenv global $PYTHON_311_VERSION 17 | 18 | pre_build: 19 | commands: 20 | - echo Pre-build started on `date` 21 | - export PYTHONPATH=$(pwd):$PYTHONPATH 22 | 23 | # Continue with regular pre-build steps if BUILD_REQUIRED=true 24 | - | 25 | echo Setting up Docker buildx. 26 | docker buildx version 27 | docker buildx create --name builder --driver docker-container --buildkitd-flags '--allow-insecure-entitlement security.insecure --allow-insecure-entitlement network.host' --use 28 | docker buildx inspect --bootstrap --builder builder 29 | docker buildx install 30 | echo Preparing system dependencies for execution. 31 | docker --version 32 | docker login -u $DOCKER_USERNAME -p $DOCKER_PASSWORD 33 | curl -LO http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh 34 | bash Miniconda3-latest-Linux-x86_64.sh -bfp /miniconda3 35 | export PATH=/miniconda3/bin:${PATH} 36 | conda install python=3.11 37 | conda update -y conda 38 | echo Prepare TGI_LLAMACPP dependencies for execution. 39 | mkdir tgi-llamacpp-artifacts 40 | python -m pip install -r $FRAMEWORK_FOLDER/tgi-llamacpp-requirements.txt 41 | 42 | build: 43 | commands: 44 | - | 45 | echo "Current PYTHONPATH: $PYTHONPATH" 46 | python $FRAMEWORK_FOLDER/tgi-llamacpp.py 47 | 48 | post_build: 49 | commands: 50 | - | 51 | echo Build completed on `date` -------------------------------------------------------------------------------- /huggingface/pytorch/tgillamacpp/docker/tgi-llamacpp-requirements.txt: -------------------------------------------------------------------------------- 1 | boto3 2 | dataclasses 3 | docker 4 | gitpython 5 | sagemaker 6 | 7 | parameterized 8 | pytest 9 | pytest-mock 10 | pytest-xdist -------------------------------------------------------------------------------- /huggingface/pytorch/tgillamacpp/docker/tgi-llamacpp.py: -------------------------------------------------------------------------------- 1 | import git 2 | import logging 3 | import os 4 | import shutil 5 | import subprocess 6 | import time 7 | 8 | from huggingface.pytorch.release_utils import ( 9 | GIT_REPO_DOCKERFILES_ROOT_DIRECTORY, 10 | GIT_REPO_PYTEST_PATH, 11 | LOG, 12 | Aws, 13 | DockerClient, 14 | EnvironmentVariable, 15 | Mode, 16 | ReleaseConfigs 17 | ) 18 | 19 | GIT_REPO_TGI_LLAMACPP_LOCAL_FOLDER_NAME = "tgi-llamacpp" 20 | GIT_REPO_TGI_LLAMACPP_TAG_PATTERN = "v{version}" 21 | GIT_REPO_TGI_LLAMACPP_URL = "https://github.com/huggingface/text-generation-inference.git" 22 | 23 | def build(configs: ReleaseConfigs): 24 | """Builds the Docker image for the provided configs.""" 25 | aws = Aws() 26 | docker_client = DockerClient() 27 | for config in configs.releases: 28 | LOG.info(f"Going to build image for config: {config}.") 29 | image_uri = config.get_image_uri_for_staging() 30 | if aws.does_ecr_image_exist(image_uri): 31 | LOG.info(f"Skipping already built image '{image_uri}'. Config: {config}.") 32 | continue 33 | 34 | LOG.info(f"Setting up build prerequisites for release config with version: {config.version}") 35 | build_path = GIT_REPO_TGI_LLAMACPP_LOCAL_FOLDER_NAME 36 | shutil.rmtree(GIT_REPO_TGI_LLAMACPP_LOCAL_FOLDER_NAME, ignore_errors=True) 37 | hf_tgi_llamacpp_repo = git.Repo.clone_from(GIT_REPO_TGI_LLAMACPP_URL, GIT_REPO_TGI_LLAMACPP_LOCAL_FOLDER_NAME, no_checkout=True) 38 | hf_tgi_llamacpp_repo_tag = GIT_REPO_TGI_LLAMACPP_TAG_PATTERN.format(version=config.version) 39 | hf_tgi_llamacpp_repo.git.checkout(hf_tgi_llamacpp_repo_tag) 40 | LOG.info(f"Checked out {hf_tgi_llamacpp_repo} with tag: {hf_tgi_llamacpp_repo_tag} to {GIT_REPO_TGI_LLAMACPP_LOCAL_FOLDER_NAME}.") 41 | shutil.copytree(GIT_REPO_DOCKERFILES_ROOT_DIRECTORY, 42 | os.path.join(GIT_REPO_TGI_LLAMACPP_LOCAL_FOLDER_NAME, GIT_REPO_DOCKERFILES_ROOT_DIRECTORY)) 43 | LOG.info(f"Copied '{GIT_REPO_DOCKERFILES_ROOT_DIRECTORY}' directory to TGI_LLAMACPP directory for 'COPY' command.") 44 | 45 | dockerfile_path = config.get_dockerfile_path() 46 | LOG.info(f"Building Dockerfile: '{dockerfile_path}'. This may take a while...") 47 | docker_client.build(image_uri=image_uri, dockerfile_path=dockerfile_path, build_path=build_path) 48 | 49 | username, password = aws.get_ecr_credentials(image_uri) 50 | docker_client.login(username, password, image_uri) 51 | docker_client.push(image_uri) 52 | 53 | def test(configs: ReleaseConfigs): 54 | """Runs SageMaker tests for the Docker images associated with the provided configs and current git commit.""" 55 | aws = Aws() 56 | for config in configs.releases: 57 | LOG.info(f"Going to test built image for config: {config}.") 58 | test_role_arn = os.getenv(EnvironmentVariable.TEST_ROLE_ARN.name) 59 | test_session = aws.get_session_for_role(test_role_arn) 60 | test_credentials = test_session.get_credentials() 61 | environ = os.environ.copy() 62 | environ.update({ 63 | "DEVICE_TYPE": config.device.lower(), 64 | "AWS_ACCESS_KEY_ID": test_credentials.access_key, 65 | "AWS_SECRET_ACCESS_KEY": test_credentials.secret_key, 66 | "AWS_SESSION_TOKEN": test_credentials.token, 67 | "IMAGE_URI": config.get_image_uri_for_staging(), 68 | "TEST_ROLE_ARN": test_role_arn }) 69 | 70 | command = ["pytest", "-m", config.device.lower(), "-n", "auto", "--log-cli-level", "info", GIT_REPO_PYTEST_PATH] 71 | LOG.info(f"Running test command: {command}.") 72 | process = subprocess.run(command, env=environ, encoding="utf-8", capture_output=True) 73 | LOG.info(process.stdout) 74 | assert process.returncode == 0, f"Failed with config: {config}.\nError: {process.stderr}." 75 | LOG.info(f"Finished testing image with config: {config}.") 76 | 77 | 78 | def pr(configs: ReleaseConfigs): 79 | """Executes both build and test modes.""" 80 | build(configs) 81 | test(configs) 82 | 83 | def release(configs: ReleaseConfigs): 84 | """trigger SMFrameworks algo release pipeline""" 85 | aws = Aws() 86 | docker_client = DockerClient() 87 | for config in configs.releases: 88 | LOG.info(f"Releasing image associated for config: {config}.") 89 | released_image_uri = config.get_image_uri_for_released() 90 | if aws.does_ecr_image_exist(released_image_uri): 91 | LOG.info(f"Skipping already released image '{released_image_uri}'. Config: {config}.") 92 | continue 93 | 94 | staged_image_uri = config.get_image_uri_for_staging() 95 | username, password = aws.get_ecr_credentials(staged_image_uri) 96 | docker_client.login(username, password, staged_image_uri) 97 | docker_client.prune_all() 98 | docker_client.pull(staged_image_uri) 99 | 100 | docker_client.login(username, password, staged_image_uri) 101 | docker_client.tag(staged_image_uri, released_image_uri) 102 | docker_client.push(released_image_uri) 103 | 104 | js_uris = config.get_image_uris_for_jumpstart() 105 | username, password = aws.get_ecr_credentials(js_uris[0]) 106 | docker_client.login(username, password, js_uris[0]) 107 | for js_uri in js_uris: 108 | docker_client.tag(staged_image_uri, js_uri) 109 | docker_client.push(js_uri) 110 | LOG.info(f"Release marked as complete for following config ({js_uris}): {config}") 111 | 112 | 113 | if __name__ == "__main__": 114 | logging.basicConfig( 115 | level=logging.INFO, 116 | format="%(asctime)s %(levelname)-8s %(message)s", 117 | datefmt="%Y-%m-%d %H:%M:%S") 118 | configs = ReleaseConfigs() 119 | configs.validate() 120 | mode = os.getenv(EnvironmentVariable.MODE.name) 121 | LOG.info(f"Mode has been set to: {mode}.") 122 | if mode == Mode.PR.name: 123 | pr(configs) 124 | elif mode == Mode.BUILD.name: 125 | build(configs) 126 | elif mode == Mode.TEST.name: 127 | test(configs) 128 | elif mode == Mode.RELEASE.name: 129 | release(configs) 130 | else: 131 | raise ValueError(f"The mode '{mode}' is not recognized. Please set it correctly.'") -------------------------------------------------------------------------------- /releases.json: -------------------------------------------------------------------------------- 1 | { 2 | "permitted_combinations": { 3 | "TGI": [ 4 | { 5 | "device": "gpu", 6 | "min_version": "1.0.0", 7 | "max_version": "1.1.0", 8 | "os_version": "ubuntu20.04", 9 | "cuda_version": "cu118", 10 | "python_version": "py39", 11 | "pytorch_version": "2.0.1" 12 | }, 13 | { 14 | "device": "gpu", 15 | "min_version": "1.2.0", 16 | "max_version": "1.4.0", 17 | "os_version": "ubuntu20.04", 18 | "cuda_version": "cu121", 19 | "python_version": "py310", 20 | "pytorch_version": "2.1.1" 21 | }, 22 | { 23 | "device": "gpu", 24 | "min_version": "1.4.2", 25 | "max_version": "2.0.1", 26 | "os_version": "ubuntu22.04", 27 | "cuda_version": "cu121", 28 | "python_version": "py310", 29 | "pytorch_version": "2.1.1" 30 | }, 31 | { 32 | "device": "gpu", 33 | "min_version": "2.0.2", 34 | "max_version": "2.2.0", 35 | "os_version": "ubuntu22.04", 36 | "cuda_version": "cu121", 37 | "python_version": "py310", 38 | "pytorch_version": "2.3.0" 39 | }, 40 | { 41 | "device": "gpu", 42 | "min_version": "2.3.1", 43 | "max_version": "3.0.1", 44 | "os_version": "ubuntu22.04", 45 | "cuda_version": "cu124", 46 | "python_version": "py311", 47 | "pytorch_version": "2.4.0" 48 | }, 49 | { 50 | "device": "gpu", 51 | "min_version": "3.0.1", 52 | "max_version": "3.1.0", 53 | "os_version": "ubuntu22.04", 54 | "cuda_version": "cu124", 55 | "python_version": "py311", 56 | "pytorch_version": "2.5.1" 57 | }, 58 | { 59 | "device": "gpu", 60 | "min_version": "3.1.1", 61 | "max_version": "3.2.3", 62 | "os_version": "ubuntu22.04", 63 | "cuda_version": "cu124", 64 | "python_version": "py311", 65 | "pytorch_version": "2.6.0" 66 | }, 67 | { 68 | "device": "inf2", 69 | "min_version": "0.0.16", 70 | "max_version": "0.0.21", 71 | "os_version": "ubuntu22.04", 72 | "python_version": "py310", 73 | "pytorch_version": "1.13.1" 74 | }, 75 | { 76 | "device": "inf2", 77 | "min_version": "0.0.22", 78 | "max_version": "0.0.28", 79 | "os_version": "ubuntu22.04", 80 | "python_version": "py310", 81 | "pytorch_version": "2.1.2" 82 | } 83 | ], 84 | "TEI": [ 85 | { 86 | "device": "gpu", 87 | "min_version": "1.2.1", 88 | "max_version": "1.7.0", 89 | "os_version": "ubuntu22.04", 90 | "cuda_version": "cu122", 91 | "python_version": "py310", 92 | "pytorch_version": "2.0.1" 93 | }, 94 | { 95 | "device": "cpu", 96 | "min_version": "1.2.1", 97 | "max_version": "1.7.0", 98 | "os_version": "ubuntu22.04", 99 | "cuda_version": "cu122", 100 | "python_version": "py310", 101 | "pytorch_version": "2.0.1" 102 | } 103 | ] 104 | }, 105 | "ignore_vulnerabilities": [ 106 | "CVE-2024-42154 - linux", 107 | "CVE-2025-32434 - torch" 108 | ], 109 | "releases": [ 110 | { 111 | "framework": "TEI", 112 | "device": "gpu", 113 | "version": "1.7.0", 114 | "os_version": "ubuntu22.04", 115 | "python_version": "py310", 116 | "pytorch_version": "2.0.1", 117 | "cuda_version": "cu122" 118 | }, 119 | { 120 | "framework": "TEI", 121 | "device": "cpu", 122 | "version": "1.7.0", 123 | "os_version": "ubuntu22.04", 124 | "python_version": "py310", 125 | "pytorch_version": "2.0.1" 126 | }, 127 | { 128 | "framework": "TGI", 129 | "device": "inf2", 130 | "version": "0.0.28", 131 | "os_version": "ubuntu22.04", 132 | "python_version": "py310", 133 | "pytorch_version": "2.1.2" 134 | } 135 | ] 136 | } 137 | -------------------------------------------------------------------------------- /tests/huggingface/README.md: -------------------------------------------------------------------------------- 1 | # SageMaker DLC Test 2 | 3 | This folder is a collection of scripts that enables users to test and validate 4 | the Deep Learning Containers (DLC) on SageMaker. 5 | 6 | ## Requirements 7 | 8 | - An AWS account 9 | - SageMaker Python SDK installed 10 | 11 | ## Usage 12 | 13 | Run the test script using the command below: 14 | 15 | ``` 16 | pip3 install -r requirements.txt 17 | 18 | IMAGE_URI= 19 | INSTANCE_TYPE=ml.g5.12xlarge 20 | NUM_GPUS=4 21 | ROLE= 22 | 23 | python3 sagemaker_dlc_test.py --image_uri $IMAGE_URI --instance_type $INSTANCE_TYPE --model_id bigscience/bloom-560m --num_gpus $NUM_GPUS --role $ROLE --timeout 600 24 | python3 sagemaker_dlc_test.py --image_uri $IMAGE_URI --instance_type $INSTANCE_TYPE --model_id EleutherAI/gpt-neox-20b --num_gpus $NUM_GPUS --role $ROLE --timeout 2000 25 | python3 sagemaker_dlc_test.py --image_uri $IMAGE_URI --instance_type $INSTANCE_TYPE --model_id google/flan-t5-xxl --num_gpus $NUM_GPUS --role $ROLE --timeout 3000 26 | ``` 27 | 28 | The tests will deploy a SageMaker endpoint and run inference. 29 | -------------------------------------------------------------------------------- /tests/huggingface/enable_ssm_access_to_endpoint.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | import boto3 3 | # This script helps you to enable SSM access to the endpoint so we can debug the 4 | # container level issues there 5 | def main(): 6 | session = boto3.Session() 7 | client = session.client("sagemaker", region_name="us-west-2") 8 | 9 | # List existing endpoints 10 | print("Listing endpoints:") 11 | print(client.list_endpoints()) 12 | print() 13 | 14 | # Get endpoint name 15 | endpoint_name = client.list_endpoints()["Endpoints"][0]["EndpointName"] 16 | print(f"Endpoint name: {endpoint_name}\n") 17 | 18 | # Describe endpoint 19 | response = client.describe_endpoint(EndpointName=endpoint_name) 20 | endpoint_config_name = response["EndpointConfigName"] 21 | 22 | # Check if EnableSSMAccess is currently enabled 23 | current_ssm_access = response["ProductionVariants"][0].get("EnableSSMAccess", False) 24 | print(f"Current EnableSSMAccess status: {current_ssm_access}\n") 25 | 26 | # Generate new endpoint config name 27 | new_endpoint_config_name = f"{endpoint_config_name.split('-')[0]}-{str(uuid.uuid4())[:11]}" 28 | 29 | # Update EnableSSMAccess to True in new production variant 30 | new_production_variants = response["ProductionVariants"] 31 | new_production_variants[0]["EnableSSMAccess"] = True 32 | 33 | # Create new endpoint config 34 | create_endpoint_config_response = client.create_endpoint_config( 35 | EndpointConfigName=new_endpoint_config_name, 36 | ProductionVariants=new_production_variants, 37 | ) 38 | print(f"Created new endpoint config: {create_endpoint_config_response}\n") 39 | 40 | # Describe new endpoint config 41 | new_endpoint_config_response = client.describe_endpoint_config( 42 | EndpointConfigName=new_endpoint_config_name 43 | ) 44 | print(f"New endpoint config: {new_endpoint_config_response}\n") 45 | 46 | # Update endpoint with new endpoint config 47 | update_endpoint_response = client.update_endpoint( 48 | EndpointName=endpoint_name, EndpointConfigName=new_endpoint_config_name 49 | ) 50 | print(update_endpoint_response) 51 | 52 | if __name__ == "__main__": 53 | main() -------------------------------------------------------------------------------- /tests/huggingface/requirements.txt: -------------------------------------------------------------------------------- 1 | sagemaker>=2.153.0 2 | pytest -------------------------------------------------------------------------------- /tests/huggingface/sagemaker_dlc_test.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | import argparse 4 | import time 5 | import signal 6 | import json 7 | import os 8 | import pytest 9 | 10 | from sagemaker.huggingface import HuggingFaceModel 11 | 12 | 13 | logging.basicConfig(stream=sys.stdout, format="%(message)s", level=logging.INFO) 14 | 15 | 16 | class TimeoutError(Exception): 17 | pass 18 | 19 | 20 | def timeout_handler(signum, frame): 21 | raise TimeoutError("Test timed out") 22 | 23 | def run_test(args): 24 | default_env = { "HF_MODEL_ID": args.model_id } 25 | if args.model_revision: 26 | default_env["HF_MODEL_REVISION"] = args.model_revision 27 | if args.instance_type.startswith("ml.inf2"): 28 | default_env["HF_NUM_CORES"] = "2" 29 | default_env["HF_AUTO_CAST_TYPE"] = "fp16" 30 | default_env["MAX_BATCH_SIZE"] = "1" 31 | default_env["MAX_INPUT_TOKENS"] = "2048" 32 | default_env["MAX_TOTAL_TOKENS"] = "4096" 33 | else: 34 | default_env["SM_NUM_GPUS"] = "4" 35 | 36 | signal.signal(signal.SIGALRM, timeout_handler) 37 | signal.alarm(int(args.timeout)) 38 | predictor = None 39 | try: 40 | # Create Hugging Face Model Class 41 | endpoint_name = args.model_id.replace("/","-").replace(".", "-")[:40] 42 | endpoint_name = endpoint_name + "-" + time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime()) 43 | model = HuggingFaceModel( 44 | name=endpoint_name, 45 | env=default_env, 46 | role=args.role, 47 | image_uri=args.image_uri 48 | ) 49 | deploy_parameters = { 50 | "instance_type": args.instance_type, 51 | "initial_instance_count": 1, 52 | "endpoint_name": endpoint_name, 53 | "container_startup_health_check_timeout": 1800, 54 | } 55 | if args.instance_type.startswith("ml.inf2"): 56 | deploy_parameters["volume_size"] = 256 57 | predictor = model.deploy(**deploy_parameters) 58 | 59 | logging.info("Endpoint deployment complete.") 60 | 61 | data = { 62 | "inputs": "What is Deep Learning?", 63 | "parameters": {"max_new_tokens": 50, "top_k": 50, "top_p": 0.95, "do_sample": True}, 64 | } 65 | output = predictor.predict(data) 66 | logging.info("Output: " + json.dumps(output)) 67 | # TODO: we need to clearly define the expected output format for each models. 68 | # assert "generated_text" in output[0] 69 | finally: 70 | if predictor: 71 | predictor.delete_model() 72 | predictor.delete_endpoint() 73 | signal.alarm(0) 74 | 75 | def get_models_for_image(image_type, device_type): 76 | if image_type == "TGI": 77 | if device_type == "gpu": 78 | return [ 79 | ("bigscience/bloom-560m", None, "ml.g5.12xlarge"), 80 | ("EleutherAI/gpt-neox-20b", None, "ml.g5.12xlarge"), 81 | ("google/flan-t5-xxl", None, "ml.g5.12xlarge"), 82 | ] 83 | elif device_type == "inf2": 84 | return [ ("princeton-nlp/Sheared-LLaMA-1.3B", None, "ml.inf2.xlarge") ] 85 | else: 86 | raise ValueError(f"No testing models found for {image_type} on instance {device_type}. " 87 | f"please check whether the image_type and instance_type are supported.") 88 | elif image_type == "TEI": 89 | if device_type == "gpu": 90 | return [ 91 | ("BAAI/bge-m3", None, "ml.g5.12xlarge"), 92 | ("intfloat/multilingual-e5-base", None, "ml.g5.12xlarge"), 93 | ("thenlper/gte-base", None, "ml.g5.12xlarge"), 94 | ("sentence-transformers/all-MiniLM-L6-v2", None, "ml.g5.12xlarge") 95 | ] 96 | elif device_type == "cpu": 97 | return [("BAAI/bge-m3", None, "ml.g5.12xlarge")] 98 | else: 99 | raise ValueError(f"No testing models found for {image_type} on instance {device_type}. " 100 | f"please check whether the image_type and instance_type are supported.") 101 | else: 102 | raise ValueError("Invalid image type. Supported types are 'TGI' and 'TEI'.") 103 | 104 | def should_run_test_for_image(test_type, target_type): 105 | return test_type == target_type 106 | 107 | @pytest.mark.parametrize("image_type, device_type", [ 108 | pytest.param("TGI", "gpu", marks=pytest.mark.gpu), 109 | pytest.param("TGI", "inf2", marks=pytest.mark.inf2), 110 | pytest.param("TEI", "gpu", marks=pytest.mark.gpu), 111 | pytest.param("TEI", "cpu", marks=pytest.mark.cpu), 112 | ]) 113 | def test(image_type, device_type, timeout: str = "3000"): 114 | test_target_image_type = os.getenv("TARGET_IMAGE_TYPE") 115 | test_device_type = os.getenv("DEVICE_TYPE") 116 | if test_target_image_type and not should_run_test_for_image(image_type, test_target_image_type): 117 | pytest.skip(f"Skipping test for image type {image_type} as it does not match target image type {test_target_image_type}") 118 | 119 | if test_device_type and not should_run_test_for_image(device_type, test_device_type): 120 | pytest.skip(f"Skipping test for device type {device_type} as it does not match current device type {test_device_type}") 121 | 122 | image_uri = os.getenv("IMAGE_URI") 123 | test_role_arn = os.getenv("TEST_ROLE_ARN") 124 | assert image_uri, f"Please set IMAGE_URI environment variable." 125 | assert test_role_arn, f"Please set TEST_ROLE_ARN environment variable." 126 | 127 | models = get_models_for_image(image_type, device_type) 128 | for model_id, model_revision, instance_type in models: 129 | args = argparse.Namespace( 130 | image_uri=image_uri, 131 | instance_type=instance_type, 132 | model_id=model_id, 133 | model_revision=model_revision, 134 | role=test_role_arn, 135 | timeout=timeout 136 | ) 137 | logging.info(f"Running sanity test with the following args: {args}.") 138 | run_test(args) 139 | 140 | 141 | if __name__ == '__main__': 142 | arg_parser = argparse.ArgumentParser() 143 | arg_parser.add_argument("--image_uri", type=str, required=True) 144 | arg_parser.add_argument("--instance_type", type=str, required=True) 145 | arg_parser.add_argument("--model_id", type=str, required=True) 146 | arg_parser.add_argument("--model_revision", type=str, required=False) 147 | arg_parser.add_argument("--role", type=str, required=True) 148 | arg_parser.add_argument("--timeout", type=str, required=True) 149 | 150 | args = arg_parser.parse_args() 151 | run_test(args) 152 | --------------------------------------------------------------------------------