├── .env ├── Dockerfile ├── Dockerfile_cpu ├── README.md ├── alternative-compose ├── docker-compose.yml.for-cpu └── docker-compose.yml.with-volume ├── build_cpu.sh ├── build_gpu.sh ├── cpu_release.sh ├── docker-compose.yml └── release.sh /.env: -------------------------------------------------------------------------------- 1 | MODEL_DIR=/media/teamgroup/models 2 | MODEL=lotus-12B-Q4_K_M.gguf 3 | BLASTHREADS=5 4 | THREADS=6 5 | GPULAYERS=40 6 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # cuda devel image for base, best build compatibility 2 | FROM nvidia/cuda:12.1.1-devel-ubuntu22.04 as builder 3 | 4 | # Update base image and install dependencies 5 | RUN apt-get update && apt-get upgrade -y \ 6 | && apt-get install -y git build-essential \ 7 | python3 pip gcc wget \ 8 | ocl-icd-opencl-dev opencl-headers clinfo \ 9 | libclblast-dev libopenblas-dev \ 10 | && mkdir -p /etc/OpenCL/vendors && echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd 11 | 12 | ARG clone_arg 13 | 14 | # Pulling latest koboldcpp branch and installing requirements 15 | RUN git clone https://github.com/LostRuins/koboldcpp.git $clone_arg 16 | 17 | WORKDIR /koboldcpp 18 | 19 | RUN pip3 install -r requirements.txt 20 | 21 | # Setting up env variables 22 | ENV LLAMA_PORTABLE=1 23 | ENV LLAMA_CUBLAS=1 24 | ENV LLAMA_CLBLAST=1 25 | ENV LLAMA_OPENBLAS=1 26 | 27 | # build-o'clock 28 | RUN make 29 | 30 | # Using ubuntu 22.04 for smaller final image 31 | FROM ubuntu:22.04 32 | 33 | # update image and install necessary packages 34 | RUN apt-get update && apt-get upgrade -y \ 35 | && apt-get -y install python3 \ 36 | ocl-icd-opencl-dev opencl-headers clinfo \ 37 | libclblast-dev libopenblas-dev \ 38 | && apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* 39 | 40 | COPY --from=builder /usr/local/cuda-12.1 /usr/local/cuda-12.1 41 | COPY --from=builder /usr/local/cuda-12.1/bin /usr/local/cuda-12.1/bin 42 | COPY --from=builder /usr/local/cuda-12.1/lib64 /usr/local/cuda-12.1/lib64 43 | 44 | # Copy the git repo from builder 45 | COPY --from=builder /koboldcpp /koboldcpp 46 | 47 | WORKDIR /koboldcpp 48 | 49 | EXPOSE 80 50 | 51 | ENV CUDA_HOME='/usr/local/cuda-12.1' 52 | ENV PATH=/usr/local/cuda-12.1/bin${PATH:+:${PATH}} 53 | ENV LD_LIBRARY_PATH=/usr/local/cuda-12.1/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}} 54 | 55 | # koboldcpp.py as entry command 56 | CMD ["python3", "koboldcpp.py"] 57 | -------------------------------------------------------------------------------- /Dockerfile_cpu: -------------------------------------------------------------------------------- 1 | # Use an official Python runtime as a parent image 2 | FROM python:3.11.8-slim 3 | 4 | # Install git 5 | RUN apt-get update && apt-get install -y git \ 6 | build-essential \ 7 | libclblast-dev \ 8 | libopenblas-dev 9 | 10 | ARG clone_arg 11 | ARG commit 12 | 13 | RUN git clone https://github.com/LostRuins/koboldcpp.git $clone_arg \ 14 | && cd koboldcpp \ 15 | && if [ -n "$commit" ]; then git checkout $commit; fi \ 16 | && pip install --no-cache-dir --trusted-host pypi.python.org -r requirements.txt \ 17 | && make LLAMA_OPENBLAS=1 LLAMA_CLBLAST=1 18 | 19 | RUN apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* 20 | 21 | # Set the working directory 22 | WORKDIR /koboldcpp 23 | 24 | # Make port 80 available to the world outside this container 25 | EXPOSE 80 26 | 27 | # Use koboldcpp.py as the entrypoint when the container launches 28 | CMD ["python", "koboldcpp.py"] -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## This work is not official 2 | 3 | I am providing this work as a helpful hand to people who are looking for a simple, easy to build docker image with GPU support, this is not official in any capacity, and any issues arising from this docker image should be posted here and not on their own repo or discord. 4 | 5 | 6 | Note: this step may no longer be necessary, it was a workaround for a broken driver version 7 | Requires nvidia-driver 535.113.01, installed with apt-get install -y --allow-downgrades nvidia-driver-535/jammy-updates 8 | 9 | # koboldcpp-docker 10 | 11 | Docker images and configuration to run koboldcpp with GPU, currently updated to release v1.47.2 found here: https://github.com/LostRuins/koboldcpp.git 12 | 13 | # Build instructions 14 | 15 | First checkout this branch 16 | 17 | ```sh 18 | git clone https://github.com/noneabove1182/koboldcpp-docker.git 19 | ``` 20 | 21 | Next, build the image 22 | 23 | ```sh 24 | cd koboldcpp-docker 25 | docker build -t koboldcpp-docker:latest . 26 | ``` 27 | 28 | (note, if you don't require CUDA you can instead pass -f Dockerfile_cpu to build without CUDA support, and you can use the docker-compose.yml.for-cpu from ./alternative-compose/) 29 | 30 | # Running the image with docker run 31 | 32 | (add -d for detached) 33 | 34 | ```sh 35 | docker run --gpus all -p 80:80 -v /media/teamgroup/models:/app/models koboldcpp-docker:latest --model /app/models/wizardlm-13b-v1.1.ggmlv3.q4_1.bin --port 80 --threads 6 --usecublas --gpulayers 43 36 | ``` 37 | 38 | # Running the image with docker compose 39 | 40 | A docker-compose.yml file has been provided, as well as a .env file that I use for setting my model dir and the model name I'd like to load in with 41 | 42 | Feel free to modify both to fit your needs, for example I use lowvram for bigger models but remove it for smaller ones but if you don't you can remove it 43 | 44 | I've also provided an alternative-compose a docker-compose.yml.for-cpu for the default CPU arguments 45 | 46 | # Pre-built image 47 | 48 | Pre-built images are provided at https://hub.docker.com/r/noneabove1182/koboldcpp-gpu 49 | 50 | Follow the same command as above except with noneabove1182/koboldcpp-gpu:(version) 51 | 52 | CPU version provided as well but I'm slower at updating it: https://hub.docker.com/r/noneabove1182/koboldcpp-cpu 53 | 54 | # Quirks and features 55 | 56 | If you're having trouble saving info across sessions, try adding a docker volume. See alternative-compose folder for the one with volumes. 57 | 58 | I've had some issues in the past keeping the volume between versions, so try 'docker volume rm kobold' if you have some weird behaviour as a first troubleshooting step. 59 | 60 | for docker run: 61 | 62 | ``` 63 | docker volume create kobold 64 | ``` 65 | 66 | and the full command is now: 67 | 68 | ```sh 69 | docker run --gpus all -p 80:80 -v /media/teamgroup/models:/app/models koboldcpp-docker:latest -v kobold:/koboldcpp --model /app/models/wizardlm-13b-v1.1.ggmlv3.q4_1.bin --port 80 --threads 6 --usecublas --gpulayers 43 70 | ``` 71 | -------------------------------------------------------------------------------- /alternative-compose/docker-compose.yml.for-cpu: -------------------------------------------------------------------------------- 1 | version: '3.9' 2 | services: 3 | koboldcpp: 4 | container_name: koboldcpp 5 | restart: always 6 | volumes: 7 | - '${MODEL_DIR}:/app/models' 8 | ports: 9 | - '7860:80' 10 | image: 'noneabove1182/koboldcpp-cpu:latest' 11 | ulimits: 12 | memlock: -1 13 | mem_limit: 50gb 14 | command: 15 | [ 16 | "python3", 17 | "koboldcpp.py", 18 | "--model", 19 | "/app/models/${MODEL}", 20 | "--port", 21 | "80", 22 | "--threads", 23 | "6", 24 | "--usemlock" 25 | ] 26 | -------------------------------------------------------------------------------- /alternative-compose/docker-compose.yml.with-volume: -------------------------------------------------------------------------------- 1 | version: '3.9' 2 | services: 3 | koboldcpp: 4 | container_name: koboldcpp 5 | restart: always 6 | volumes: 7 | - '${MODEL_DIR}:/app/models' 8 | - kobold:/koboldcpp 9 | ports: 10 | - '7860:80' 11 | image: 'noneabove1182/koboldcpp-gpu:latest' 12 | ulimits: 13 | memlock: -1 14 | mem_limit: 50gb 15 | deploy: 16 | resources: 17 | reservations: 18 | devices: 19 | - driver: nvidia 20 | count: 1 21 | capabilities: [ gpu ] 22 | command: 23 | [ 24 | "python3", 25 | "koboldcpp.py", 26 | "--model", 27 | "/app/models/${MODEL}", 28 | "--port", 29 | "80", 30 | "--threads", 31 | "6", 32 | "--usemlock", 33 | "--usecublas", 34 | "0", 35 | "--gpulayers", 36 | "18", 37 | "--forceversion", 38 | "405" 39 | ] 40 | volumes: 41 | kobold: 42 | -------------------------------------------------------------------------------- /build_cpu.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Define the repository URL 4 | REPO_URL="https://api.github.com/repos/LostRuins/koboldcpp/commits/concedo" 5 | 6 | LATEST_COMMIT=$(curl -s $REPO_URL | grep 'sha' | cut -d\" -f4 | head -n 1) 7 | 8 | echo $LATEST_COMMIT 9 | 10 | # Build the Docker image 11 | docker build --build-arg commit="$LATEST_COMMIT" -t koboldcpp-cpu -f Dockerfile_cpu . 12 | 13 | # Check if Docker build was successful 14 | if [ $? -ne 0 ]; then 15 | echo "Docker build failed. Exiting..." 16 | exit 1 17 | fi 18 | -------------------------------------------------------------------------------- /build_gpu.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Define the repository URL 4 | REPO_URL="https://api.github.com/repos/LostRuins/koboldcpp/commits/main" 5 | 6 | LATEST_COMMIT=$(curl -s $REPO_URL | grep 'sha' | cut -d\" -f4 | head -n 1) 7 | 8 | echo $LATEST_COMMIT 9 | 10 | # Build the Docker image 11 | docker build --build-arg commit="$LATEST_COMMIT" -t koboldcpp-gpu . 12 | 13 | # Check if Docker build was successful 14 | if [ $? -ne 0 ]; then 15 | echo "Docker build failed. Exiting..." 16 | exit 1 17 | fi 18 | -------------------------------------------------------------------------------- /cpu_release.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Define the repository URL 4 | REPO_URL="https://api.github.com/repos/LostRuins/koboldcpp/releases/latest" 5 | 6 | # Get the latest release tag from the GitHub repository 7 | RELEASE_TAG=$(curl -s $REPO_URL | grep 'tag_name' | cut -d\" -f4) 8 | 9 | # Define clone argument for Docker 10 | if [ -z "$RELEASE_TAG" ]; then 11 | echo "Failed to get the latest release tag. Exiting..." 12 | exit 1 13 | fi 14 | 15 | echo $RELEASE_TAG 16 | 17 | CLONE_ARG="--branch $RELEASE_TAG" 18 | 19 | # Build the Docker image 20 | docker build --build-arg clone_arg="$CLONE_ARG" -t koboldcpp-cpu -f Dockerfile_cpu . 21 | 22 | # Check if Docker build was successful 23 | if [ $? -ne 0 ]; then 24 | echo "Docker build failed. Exiting..." 25 | exit 1 26 | fi 27 | 28 | # Tag the Docker image 29 | docker tag koboldcpp-cpu:latest noneabove1182/koboldcpp-cpu:$RELEASE_TAG 30 | docker tag koboldcpp-cpu:latest noneabove1182/koboldcpp-cpu:latest 31 | 32 | # Push the Docker images 33 | docker push noneabove1182/koboldcpp-cpu:latest 34 | docker push noneabove1182/koboldcpp-cpu:$RELEASE_TAG 35 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.9' 2 | services: 3 | koboldcpp: 4 | container_name: koboldcpp 5 | restart: always 6 | volumes: 7 | - '${MODEL_DIR}:/app/models' 8 | ports: 9 | - '7860:80' 10 | image: 'noneabove1182/koboldcpp-gpu:latest' 11 | ulimits: 12 | memlock: -1 13 | mem_limit: 50gb 14 | deploy: 15 | resources: 16 | reservations: 17 | devices: 18 | - driver: nvidia 19 | count: 1 20 | capabilities: [ gpu ] 21 | command: 22 | [ 23 | "python3", 24 | "koboldcpp.py", 25 | "--model", 26 | "/app/models/${MODEL}", 27 | "--port", 28 | "80", 29 | "--blasthreads", 30 | "${BLASTHREADS}", 31 | "--threads", 32 | "${THREADS}", 33 | "--usemlock", 34 | "--usecublas", 35 | "0", 36 | "--gpulayers", 37 | "${GPULAYERS}" 38 | ] 39 | -------------------------------------------------------------------------------- /release.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Define the repository URL 4 | REPO_URL="https://api.github.com/repos/LostRuins/koboldcpp/releases/latest" 5 | 6 | # Get the latest release tag from the GitHub repository 7 | RELEASE_TAG=$(curl -s $REPO_URL | grep 'tag_name' | cut -d\" -f4) 8 | 9 | # Define clone argument for Docker 10 | if [ -z "$RELEASE_TAG" ]; then 11 | echo "Failed to get the latest release tag. Exiting..." 12 | exit 1 13 | fi 14 | 15 | echo $RELEASE_TAG 16 | 17 | CLONE_ARG="--branch $RELEASE_TAG" 18 | 19 | # Build the Docker image 20 | docker build --build-arg clone_arg="$CLONE_ARG" -t noneabove1182/koboldcpp-gpu . 21 | 22 | # Check if Docker build was successful 23 | if [ $? -ne 0 ]; then 24 | echo "Docker build failed. Exiting..." 25 | exit 1 26 | fi 27 | 28 | # Tag the Docker image 29 | docker tag noneabove1182/koboldcpp-gpu:latest noneabove1182/koboldcpp-gpu:$RELEASE_TAG 30 | 31 | # Push the Docker images 32 | docker push noneabove1182/koboldcpp-gpu:latest 33 | docker push noneabove1182/koboldcpp-gpu:$RELEASE_TAG 34 | --------------------------------------------------------------------------------