├── .env
├── Dockerfile
├── Dockerfile_cpu
├── README.md
├── alternative-compose
    ├── docker-compose.yml.for-cpu
    └── docker-compose.yml.with-volume
├── build_cpu.sh
├── build_gpu.sh
├── cpu_release.sh
├── docker-compose.yml
└── release.sh


/.env:
--------------------------------------------------------------------------------
1 | MODEL_DIR=/media/teamgroup/models
2 | MODEL=lotus-12B-Q4_K_M.gguf
3 | BLASTHREADS=5
4 | THREADS=6
5 | GPULAYERS=40
6 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # cuda devel image for base, best build compatibility
 2 | FROM nvidia/cuda:12.1.1-devel-ubuntu22.04 as builder
 3 | 
 4 | # Update base image and install dependencies
 5 | RUN apt-get update && apt-get upgrade -y \
 6 |     && apt-get install -y git build-essential \
 7 |     python3 pip gcc wget \
 8 |     ocl-icd-opencl-dev opencl-headers clinfo \
 9 |     libclblast-dev libopenblas-dev \
10 |     && mkdir -p /etc/OpenCL/vendors && echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd
11 | 
12 | ARG clone_arg
13 | 
14 | # Pulling latest koboldcpp branch and installing requirements
15 | RUN git clone https://github.com/LostRuins/koboldcpp.git $clone_arg
16 | 
17 | WORKDIR /koboldcpp
18 | 
19 | RUN pip3 install -r requirements.txt
20 | 
21 | # Setting up env variables
22 | ENV LLAMA_PORTABLE=1
23 | ENV LLAMA_CUBLAS=1
24 | ENV LLAMA_CLBLAST=1
25 | ENV LLAMA_OPENBLAS=1
26 | 
27 | # build-o'clock
28 | RUN make
29 | 
30 | # Using ubuntu 22.04 for smaller final image
31 | FROM ubuntu:22.04
32 | 
33 | # update image and install necessary packages
34 | RUN apt-get update && apt-get upgrade -y \
35 |     && apt-get -y install python3 \
36 |     ocl-icd-opencl-dev opencl-headers clinfo \
37 |     libclblast-dev libopenblas-dev \
38 |     && apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
39 | 
40 | COPY --from=builder /usr/local/cuda-12.1 /usr/local/cuda-12.1
41 | COPY --from=builder /usr/local/cuda-12.1/bin /usr/local/cuda-12.1/bin
42 | COPY --from=builder /usr/local/cuda-12.1/lib64 /usr/local/cuda-12.1/lib64
43 | 
44 | # Copy the git repo from builder
45 | COPY --from=builder /koboldcpp /koboldcpp
46 | 
47 | WORKDIR /koboldcpp
48 | 
49 | EXPOSE 80
50 | 
51 | ENV CUDA_HOME='/usr/local/cuda-12.1'
52 | ENV PATH=/usr/local/cuda-12.1/bin${PATH:+:${PATH}}
53 | ENV LD_LIBRARY_PATH=/usr/local/cuda-12.1/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}
54 | 
55 | # koboldcpp.py as entry command
56 | CMD ["python3", "koboldcpp.py"]
57 | 


--------------------------------------------------------------------------------
/Dockerfile_cpu:
--------------------------------------------------------------------------------
 1 | # Use an official Python runtime as a parent image
 2 | FROM python:3.11.8-slim
 3 | 
 4 | # Install git
 5 | RUN apt-get update && apt-get install -y git \
 6 |     build-essential \
 7 |     libclblast-dev \
 8 |     libopenblas-dev
 9 | 
10 | ARG clone_arg
11 | ARG commit
12 | 
13 | RUN git clone https://github.com/LostRuins/koboldcpp.git $clone_arg \
14 |     && cd koboldcpp \
15 |     && if [ -n "$commit" ]; then git checkout $commit; fi \
16 |     && pip install --no-cache-dir --trusted-host pypi.python.org -r requirements.txt \
17 |     && make LLAMA_OPENBLAS=1 LLAMA_CLBLAST=1
18 | 
19 | RUN apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
20 | 
21 | # Set the working directory
22 | WORKDIR /koboldcpp
23 | 
24 | # Make port 80 available to the world outside this container
25 | EXPOSE 80
26 | 
27 | # Use koboldcpp.py as the entrypoint when the container launches
28 | CMD ["python", "koboldcpp.py"]


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## This work is not official
 2 | 
 3 | I am providing this work as a helpful hand to people who are looking for a simple, easy to build docker image with GPU support, this is not official in any capacity, and any issues arising from this docker image should be posted here and not on their own repo or discord.
 4 | 
 5 | 
 6 | Note: this step may no longer be necessary, it was a workaround for a broken driver version
 7 | Requires nvidia-driver 535.113.01, installed with apt-get install -y --allow-downgrades nvidia-driver-535/jammy-updates
 8 | 
 9 | # koboldcpp-docker
10 | 
11 | Docker images and configuration to run koboldcpp with GPU, currently updated to release v1.47.2 found here: https://github.com/LostRuins/koboldcpp.git
12 | 
13 | # Build instructions
14 | 
15 | First checkout this branch
16 | 
17 | ```sh
18 | git clone https://github.com/noneabove1182/koboldcpp-docker.git
19 | ```
20 | 
21 | Next, build the image
22 | 
23 | ```sh
24 | cd koboldcpp-docker
25 | docker build -t koboldcpp-docker:latest .
26 | ```
27 | 
28 | (note, if you don't require CUDA you can instead pass -f Dockerfile_cpu to build without CUDA support, and you can use the docker-compose.yml.for-cpu from ./alternative-compose/)
29 | 
30 | # Running the image with docker run
31 | 
32 | (add -d for detached)
33 | 
34 | ```sh
35 | docker run --gpus all -p 80:80 -v /media/teamgroup/models:/app/models koboldcpp-docker:latest --model /app/models/wizardlm-13b-v1.1.ggmlv3.q4_1.bin --port 80 --threads 6 --usecublas --gpulayers 43
36 | ```
37 | 
38 | # Running the image with docker compose
39 | 
40 | A docker-compose.yml file has been provided, as well as a .env file that I use for setting my model dir and the model name I'd like to load in with
41 | 
42 | Feel free to modify both to fit your needs, for example I use lowvram for bigger models but remove it for smaller ones but if you don't you can remove it
43 | 
44 | I've also provided an alternative-compose a docker-compose.yml.for-cpu for the default CPU arguments
45 | 
46 | # Pre-built image
47 | 
48 | Pre-built images are provided at https://hub.docker.com/r/noneabove1182/koboldcpp-gpu
49 | 
50 | Follow the same command as above except with noneabove1182/koboldcpp-gpu:(version)
51 | 
52 | CPU version provided as well but I'm slower at updating it: https://hub.docker.com/r/noneabove1182/koboldcpp-cpu
53 | 
54 | # Quirks and features
55 | 
56 | If you're having trouble saving info across sessions, try adding a docker volume. See alternative-compose folder for the one with volumes.
57 | 
58 | I've had some issues in the past keeping the volume between versions, so try 'docker volume rm kobold' if you have some weird behaviour as a first troubleshooting step.
59 | 
60 | for docker run:
61 | 
62 | ```
63 | docker volume create kobold
64 | ```
65 | 
66 | and the full command is now:
67 | 
68 | ```sh
69 | docker run --gpus all -p 80:80 -v /media/teamgroup/models:/app/models koboldcpp-docker:latest -v kobold:/koboldcpp --model /app/models/wizardlm-13b-v1.1.ggmlv3.q4_1.bin --port 80 --threads 6 --usecublas --gpulayers 43
70 | ```
71 | 


--------------------------------------------------------------------------------
/alternative-compose/docker-compose.yml.for-cpu:
--------------------------------------------------------------------------------
 1 | version: '3.9'
 2 | services:
 3 |   koboldcpp:
 4 |     container_name: koboldcpp
 5 |     restart: always
 6 |     volumes:
 7 |       - '${MODEL_DIR}:/app/models'
 8 |     ports:
 9 |       - '7860:80'
10 |     image: 'noneabove1182/koboldcpp-cpu:latest'
11 |     ulimits:
12 |       memlock: -1
13 |     mem_limit: 50gb
14 |     command:
15 |       [
16 |         "python3",
17 |         "koboldcpp.py",
18 |         "--model",
19 |         "/app/models/${MODEL}",
20 |         "--port",
21 |         "80",
22 |         "--threads",
23 |         "6",
24 |         "--usemlock"
25 |       ]
26 | 


--------------------------------------------------------------------------------
/alternative-compose/docker-compose.yml.with-volume:
--------------------------------------------------------------------------------
 1 | version: '3.9'
 2 | services:
 3 |   koboldcpp:
 4 |     container_name: koboldcpp
 5 |     restart: always
 6 |     volumes:
 7 |       - '${MODEL_DIR}:/app/models'
 8 |       - kobold:/koboldcpp
 9 |     ports:
10 |       - '7860:80'
11 |     image: 'noneabove1182/koboldcpp-gpu:latest'
12 |     ulimits:
13 |       memlock: -1
14 |     mem_limit: 50gb
15 |     deploy:
16 |       resources:
17 |         reservations:
18 |           devices:
19 |             - driver: nvidia
20 |               count: 1
21 |               capabilities: [ gpu ]
22 |     command:
23 |       [
24 |         "python3",
25 |         "koboldcpp.py",
26 |         "--model",
27 |         "/app/models/${MODEL}",
28 |         "--port",
29 |         "80",
30 |         "--threads",
31 |         "6",
32 |         "--usemlock",
33 |         "--usecublas",
34 |         "0",
35 |         "--gpulayers",
36 |         "18",
37 |         "--forceversion",
38 |         "405"
39 |       ]
40 | volumes:
41 |   kobold:
42 | 


--------------------------------------------------------------------------------
/build_cpu.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Define the repository URL
 4 | REPO_URL="https://api.github.com/repos/LostRuins/koboldcpp/commits/concedo"
 5 | 
 6 | LATEST_COMMIT=$(curl -s $REPO_URL | grep 'sha' | cut -d\" -f4 | head -n 1)
 7 | 
 8 | echo $LATEST_COMMIT
 9 | 
10 | # Build the Docker image
11 | docker build --build-arg commit="$LATEST_COMMIT" -t koboldcpp-cpu -f Dockerfile_cpu .
12 | 
13 | # Check if Docker build was successful
14 | if [ $? -ne 0 ]; then
15 |   echo "Docker build failed. Exiting..."
16 |   exit 1
17 | fi
18 | 


--------------------------------------------------------------------------------
/build_gpu.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Define the repository URL
 4 | REPO_URL="https://api.github.com/repos/LostRuins/koboldcpp/commits/main"
 5 | 
 6 | LATEST_COMMIT=$(curl -s $REPO_URL | grep 'sha' | cut -d\" -f4 | head -n 1)
 7 | 
 8 | echo $LATEST_COMMIT
 9 | 
10 | # Build the Docker image
11 | docker build --build-arg commit="$LATEST_COMMIT" -t koboldcpp-gpu .
12 | 
13 | # Check if Docker build was successful
14 | if [ $? -ne 0 ]; then
15 |   echo "Docker build failed. Exiting..."
16 |   exit 1
17 | fi
18 | 


--------------------------------------------------------------------------------
/cpu_release.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Define the repository URL
 4 | REPO_URL="https://api.github.com/repos/LostRuins/koboldcpp/releases/latest"
 5 | 
 6 | # Get the latest release tag from the GitHub repository
 7 | RELEASE_TAG=$(curl -s $REPO_URL | grep 'tag_name' | cut -d\" -f4)
 8 | 
 9 | # Define clone argument for Docker
10 | if [ -z "$RELEASE_TAG" ]; then
11 |   echo "Failed to get the latest release tag. Exiting..."
12 |   exit 1
13 | fi
14 | 
15 | echo $RELEASE_TAG
16 | 
17 | CLONE_ARG="--branch $RELEASE_TAG"
18 | 
19 | # Build the Docker image
20 | docker build --build-arg clone_arg="$CLONE_ARG" -t koboldcpp-cpu -f Dockerfile_cpu .
21 | 
22 | # Check if Docker build was successful
23 | if [ $? -ne 0 ]; then
24 |   echo "Docker build failed. Exiting..."
25 |   exit 1
26 | fi
27 | 
28 | # Tag the Docker image
29 | docker tag koboldcpp-cpu:latest noneabove1182/koboldcpp-cpu:$RELEASE_TAG
30 | docker tag koboldcpp-cpu:latest noneabove1182/koboldcpp-cpu:latest
31 | 
32 | # Push the Docker images
33 | docker push noneabove1182/koboldcpp-cpu:latest
34 | docker push noneabove1182/koboldcpp-cpu:$RELEASE_TAG
35 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '3.9'
 2 | services:
 3 |   koboldcpp:
 4 |     container_name: koboldcpp
 5 |     restart: always
 6 |     volumes:
 7 |       - '${MODEL_DIR}:/app/models'
 8 |     ports:
 9 |       - '7860:80'
10 |     image: 'noneabove1182/koboldcpp-gpu:latest'
11 |     ulimits:
12 |       memlock: -1
13 |     mem_limit: 50gb
14 |     deploy:
15 |       resources:
16 |         reservations:
17 |           devices:
18 |             - driver: nvidia
19 |               count: 1
20 |               capabilities: [ gpu ]
21 |     command:
22 |       [
23 |         "python3",
24 |         "koboldcpp.py",
25 |         "--model",
26 |         "/app/models/${MODEL}",
27 |         "--port",
28 |         "80",
29 |         "--blasthreads",
30 |         "${BLASTHREADS}",
31 |         "--threads",
32 |         "${THREADS}",
33 |         "--usemlock",
34 |         "--usecublas",
35 |         "0",
36 |         "--gpulayers",
37 |         "${GPULAYERS}"
38 |       ]
39 | 


--------------------------------------------------------------------------------
/release.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Define the repository URL
 4 | REPO_URL="https://api.github.com/repos/LostRuins/koboldcpp/releases/latest"
 5 | 
 6 | # Get the latest release tag from the GitHub repository
 7 | RELEASE_TAG=$(curl -s $REPO_URL | grep 'tag_name' | cut -d\" -f4)
 8 | 
 9 | # Define clone argument for Docker
10 | if [ -z "$RELEASE_TAG" ]; then
11 |   echo "Failed to get the latest release tag. Exiting..."
12 |   exit 1
13 | fi
14 | 
15 | echo $RELEASE_TAG
16 | 
17 | CLONE_ARG="--branch $RELEASE_TAG"
18 | 
19 | # Build the Docker image
20 | docker build --build-arg clone_arg="$CLONE_ARG" -t noneabove1182/koboldcpp-gpu .
21 | 
22 | # Check if Docker build was successful
23 | if [ $? -ne 0 ]; then
24 |   echo "Docker build failed. Exiting..."
25 |   exit 1
26 | fi
27 | 
28 | # Tag the Docker image
29 | docker tag noneabove1182/koboldcpp-gpu:latest noneabove1182/koboldcpp-gpu:$RELEASE_TAG
30 | 
31 | # Push the Docker images
32 | docker push noneabove1182/koboldcpp-gpu:latest
33 | docker push noneabove1182/koboldcpp-gpu:$RELEASE_TAG
34 | 


--------------------------------------------------------------------------------