├── README.md
├── env.sh
├── images
    ├── github-runner
    │   ├── .dockerignore
    │   ├── Dockerfile
    │   ├── README.md
    │   ├── build.sh
    │   └── entrypoint.sh
    ├── github-runners-manager
    │   ├── .dockerignore
    │   ├── Dockerfile
    │   ├── README.md
    │   ├── build.sh
    │   ├── entrypoint.sh
    │   ├── manager.py
    │   └── requirements.txt
    └── llama.cpp-model-downloader
    │   ├── .dockerignore
    │   ├── Dockerfile
    │   ├── README.md
    │   ├── build.sh
    │   └── entrypoint.sh
├── install-cuda.sh
├── install-docker.sh
├── run.sh
├── service.sh
├── setup.sh
├── start-github-runner-manager.sh
├── start.sh
└── stop.sh


/README.md:
--------------------------------------------------------------------------------
 1 | # ci
 2 | 
 3 | CI for ggml and related projects
 4 | 
 5 | ## How it works
 6 | 
 7 | - Monitor the repositories for new commits
 8 | - By default, the `master` branch is monitored, but optional branches can be configured per node
 9 | - Additionally, all commits with a keyword in the commit message are also processed
10 | - For each commit, checkout and run the corrsponding `ci/run.sh` script of the target repo
11 | - The script should execute all necessary tests and generate a `README.md` with summary of the results
12 | - The script can decide what tests to execute based on the environment
13 | - The CI will then push the `README.md` and the generated logs to the [results](https://github.com/ggml-org/ci/tree/results) branch of this repo
14 | - Commit statuses are updated through the Github API
15 | 
16 | ## Add a new CI node
17 | 
18 | WARNING: run only inside fresh VMs! Do not run on your own workstation as it can mess up your data structure
19 | 
20 | - Create a fresh VM
21 | - [Setup env](https://github.com/ggml-org/secrets/blob/master/ggml-ci.txt)
22 | - Run the following:
23 | 
24 | ```bash
25 | git clone https://github.com/ggml-org/ci
26 | ./ci/setup.sh
27 | ./ci/start.sh id-node
28 | ```
29 | 


--------------------------------------------------------------------------------
/env.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | ## helper functions
  4 | 
  5 | # download a file if it does not exist or if it is outdated
  6 | function gg_wget {
  7 |     local out=$1
  8 |     local url=$2
  9 | 
 10 |     local cwd=`pwd`
 11 | 
 12 |     mkdir -p $out
 13 |     cd $out
 14 | 
 15 |     # should not re-download if file is the same
 16 |     wget -N $url
 17 | 
 18 |     cd $cwd
 19 | }
 20 | 
 21 | # useful for exporting bash variables and being able to vertically align them
 22 | function gg_export {
 23 |     local var=$1
 24 |     local val="$2"
 25 | 
 26 |     export $var="$val"
 27 | }
 28 | 
 29 | # printf to a file:
 30 | # usage: gg_printf file "string"
 31 | function gg_printf {
 32 |     local file="$1"
 33 | 
 34 |     shift
 35 |     printf -- "$@" >> $file
 36 | }
 37 | 
 38 | function gg_split_hash {
 39 |     h=$1
 40 | 
 41 |     h_0=$(echo ${h} | cut -c1-2)
 42 |     h_1=$(echo ${h} | cut -c3-)
 43 | 
 44 |     echo "${h_0}/${h_1}"
 45 | }
 46 | 
 47 | # return results path for a commit
 48 | # sync: gg_set_commit_status
 49 | function gg_out_for_commit {
 50 |     repo=$1
 51 |     commit=$2
 52 | 
 53 |     out="${GG_RESULTS_PATH}/${repo}/$(gg_split_hash ${commit})/${GG_NODE}"
 54 | 
 55 |     echo ${out}
 56 | }
 57 | 
 58 | # set the commit status on GitHub using the GitHub API
 59 | # sync: gg_out_for_commit
 60 | function gg_set_commit_status {
 61 |     owner=$1
 62 |     repo=$2
 63 |     commit=$3
 64 |     status=$4
 65 |     desc=$5
 66 | 
 67 |     commit_path=$(gg_split_hash ${commit})
 68 | 
 69 |     curl -L \
 70 |         -X POST \
 71 |         -H "Accept: application/vnd.github+json" \
 72 |         -H "Authorization: Bearer ${GG_SECRET_TOKEN_GH_API}"\
 73 |         -H "X-GitHub-Api-Version: 2022-11-28" \
 74 |         https://api.github.com/repos/${owner}/${repo}/statuses/${commit} \
 75 |         -d '{"state":"'"${status}"'","target_url":"'${GG_RESULTS_REPO}'/tree/'${GG_RESULTS_BRANCH}'/'${repo}'/'${commit_path}'/'${GG_NODE}'","description":"'"${desc}"'","context":"ggml-org / '"${GG_NODE}"'"}'
 76 | }
 77 | 
 78 | ## general env
 79 | 
 80 | source ~/.env.sh
 81 | 
 82 | # if GG_RESULTS_MNT is not set, default to /mnt
 83 | if [ -z "${GG_RESULTS_MNT}" ]; then
 84 |     gg_export GG_RESULTS_MNT "/mnt"
 85 | fi
 86 | 
 87 | # here we will clone and build the projects
 88 | gg_export GG_WORK_PATH     $(realpath ~)/work
 89 | gg_export GG_WORK_BRANCHES ${GG_WORK_PATH}/branches
 90 | 
 91 | gg_export GG_CI_KEYWORD "ggml-ci"
 92 | 
 93 | # here we will store all results
 94 | gg_export GG_RESULTS_PATH     $(realpath ~)/results
 95 | gg_export GG_RESULTS_REPO     "https://github.com/ggml-org/ci"
 96 | gg_export GG_RESULTS_REPO_SSH "git@github.com:ggml-org/ci.git"
 97 | gg_export GG_RESULTS_BRANCH   "results"
 98 | 
 99 | gg_export GG_GGML_DIR  "ggml"
100 | gg_export GG_GGML_OWN  "ggml-org"
101 | gg_export GG_GGML_REPO "https://github.com/ggml-org/ggml"
102 | gg_export GG_GGML_MNT  "${GG_RESULTS_MNT}/ggml"
103 | 
104 | gg_export GG_WHISPER_CPP_DIR  "whisper.cpp"
105 | gg_export GG_WHISPER_CPP_OWN  "ggerganov"
106 | gg_export GG_WHISPER_CPP_REPO "https://github.com/ggerganov/whisper.cpp"
107 | gg_export GG_WHISPER_CPP_MNT  "${GG_RESULTS_MNT}/whisper.cpp"
108 | 
109 | gg_export GG_LLAMA_CPP_DIR  "llama.cpp"
110 | gg_export GG_LLAMA_CPP_OWN  "ggml-org"
111 | gg_export GG_LLAMA_CPP_REPO "https://github.com/ggml-org/llama.cpp"
112 | gg_export GG_LLAMA_CPP_MNT  "${GG_RESULTS_MNT}/llama.cpp"
113 | 
114 | ## run env
115 | 
116 | # check last N commits
117 | gg_export GG_RUN_LAST_N        1
118 | gg_export GG_RUN_SLEEP        15
119 | gg_export GG_RUN_PUSH_RETRY    3
120 | gg_export GG_RUN_TIMEOUT    1800
121 | gg_export GG_RUN_PAUSE         0
122 | 


--------------------------------------------------------------------------------
/images/github-runner/.dockerignore:
--------------------------------------------------------------------------------
1 | build.sh
2 | README.md
3 | 


--------------------------------------------------------------------------------
/images/github-runner/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:latest
 2 | 
 3 | 
 4 | # system update
 5 | RUN set -eux ; \
 6 |     apt update ; \
 7 |     apt -y upgrade ; \
 8 |     apt -y install \
 9 |             libicu-dev \
10 |             curl \
11 |             wget \
12 |             build-essential \
13 |             cmake \
14 |             git \
15 |             python3-pip \
16 |             python3-venv \
17 |             language-pack-en \
18 |             libcurl4-openssl-dev \
19 |             netcat \
20 |             strace ;
21 | 
22 | # cuda install
23 | # https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html
24 | ENV DEBIAN_FRONTEND=noninteractive
25 | RUN set -eux ; \
26 |     wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.1-1_all.deb ; \
27 |     dpkg -i cuda-keyring_1.1-1_all.deb ; \
28 |     apt-get update ; \
29 |     apt-get -y install  \
30 |       cuda-nvcc-12-2 \
31 |       libcublas-dev-12-2;
32 | 
33 | # https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/adding-self-hosted-runners
34 | # https://github.com/actions/runner/releases
35 | ARG RUNNER_VERSION=2.316.0
36 | ARG RUNNER_VERSION_HASH=64a47e18119f0c5d70e21b6050472c2af3f582633c9678d40cb5bcb852bcc18f
37 | 
38 | RUN set -eux ; \
39 |     mkdir /ggml-ci /tmp/github-runner ; \
40 |     chown 1000:1000 /ggml-ci /tmp/github-runner ;
41 | 
42 | WORKDIR /ggml-ci
43 | 
44 | # User creation
45 | RUN set -eux ; \
46 |     groupadd --gid 1000 ggml ; \
47 |     useradd --uid 1000 --gid ggml --shell /bin/bash --create-home ggml ;
48 | 
49 | USER 1000:1000
50 | 
51 | # Github runner installation
52 | RUN set -eux ; \
53 |     curl -o actions-runner-linux-x64.tar.gz -L https://github.com/actions/runner/releases/download/v${RUNNER_VERSION}/actions-runner-linux-x64-${RUNNER_VERSION}.tar.gz ;  \
54 |     echo "${RUNNER_VERSION_HASH}  actions-runner-linux-x64.tar.gz" | sha256sum -c ;  \
55 |     tar xzf actions-runner-linux-x64.tar.gz ; \
56 |     rm actions-runner-linux-x64.tar.gz ;
57 | 
58 | ADD entrypoint.sh /entrypoint.sh
59 | ENTRYPOINT /entrypoint.sh
60 | 
61 | WORKDIR /github-runner
62 | ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/ggml-ci/bin
63 | 


--------------------------------------------------------------------------------
/images/github-runner/README.md:
--------------------------------------------------------------------------------
 1 | # GitHub Runner
 2 | 
 3 | GitHub self-hosted runner started with JIT config and provided label.
 4 | 
 5 | ### Build
 6 | 
 7 | ```shell
 8 | ./build.sh
 9 | ```
10 | 


--------------------------------------------------------------------------------
/images/github-runner/build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set +eux
3 | 
4 | docker build \
5 |   -t ggml-github-runner \
6 |   .
7 | 


--------------------------------------------------------------------------------
/images/github-runner/entrypoint.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set +eux
 3 | 
 4 | if [ $# -lt 1 ]
 5 | then
 6 |   # shellcheck disable=SC2145
 7 |   echo "invalid command: $@"
 8 |   echo "usage: $0 JITCONFIG"
 9 |   exit 1
10 | fi
11 | 
12 | 
13 | echo "RUNNER user: $(id)"
14 | 
15 | (
16 |   # Need to move to installation folder because of the weird:
17 |   # ldd: ./bin/libSystem.Security.Cryptography.Native.OpenSsl.so: No such file or director
18 |   cd /ggml-ci || exit 1
19 |   echo "RUNNER version: $(./config.sh --commit)"
20 | ) || exit 1
21 | 
22 | echo "RUNNER workdir: $(pwd)"
23 | 
24 | echo "RUNNER GPU:"
25 | nvidia-smi \
26 |   --query-gpu=timestamp,name,pci.bus_id,driver_version,pstate,pcie.link.gen.max,pcie.link.gen.current,temperature.gpu,utilization.gpu,utilization.memory,memory.total,memory.free,memory.used \
27 |    --format=csv || exit 1
28 | 
29 | /ggml-ci/run.sh --jitconfig "$1"
30 | 


--------------------------------------------------------------------------------
/images/github-runners-manager/.dockerignore:
--------------------------------------------------------------------------------
1 | build.sh
2 | README.md
3 | 


--------------------------------------------------------------------------------
/images/github-runners-manager/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:latest
 2 | 
 3 | 
 4 | RUN set -eux ; \
 5 |     apt update ; \
 6 |     apt -y upgrade ; \
 7 |     apt -y install \
 8 |             git \
 9 |             openssh-client \
10 |             python3 \
11 |             python3-pip \
12 |             curl  \
13 |             dbus-user-session  \
14 |             uidmap ; \
15 | # Install docker, docker daemon is running on the host, we just require the client here to pop the GitHub runner (docker in docker)
16 |     curl -sSL https://get.docker.com/ | sh ;
17 | 
18 | WORKDIR /ggml-ci
19 | 
20 | ADD requirements.txt ./
21 | 
22 | RUN set -eux ; \
23 |     pip install -r requirements.txt ;
24 | 
25 | ADD manager.py ./
26 | 
27 | ADD entrypoint.sh /
28 | ENTRYPOINT /entrypoint.sh
29 | 


--------------------------------------------------------------------------------
/images/github-runners-manager/README.md:
--------------------------------------------------------------------------------
 1 | # GitHub Runner Manager
 2 | 
 3 | Within a docker container, it monitors workflow job labels
 4 | and trigger a self-hosted JIT runner in docker if a job required this host compute label.
 5 | 
 6 | ### Build
 7 | 
 8 | ```shell
 9 | ./build.sh
10 | ```
11 | 


--------------------------------------------------------------------------------
/images/github-runners-manager/build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set +eux
3 | 
4 | docker build -t ggml-github-runners-manager .
5 | 


--------------------------------------------------------------------------------
/images/github-runners-manager/entrypoint.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set +eux
3 | 
4 | python3 manager.py --repo $REPO --token $TOKEN --runner-label $RUNNER_LABEL
5 | 


--------------------------------------------------------------------------------
/images/github-runners-manager/manager.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | import sys
 4 | import time
 5 | import traceback
 6 | from pathlib import Path
 7 | 
 8 | import docker
 9 | import requests
10 | from docker.types import DeviceRequest
11 | from github import Auth
12 | from github import Github
13 | 
14 | 
15 | def start_mainloop(args):
16 |     auth = Auth.Token(args.token)
17 |     g = Github(auth=auth)
18 |     repo = g.get_repo(args.repo)
19 |     client = docker.from_env()
20 |     while True:
21 |         print(f"ggml-ci: fetching workflows of {args.repo} ...")
22 |         workflows = repo.get_workflows()
23 |         for workflow in workflows:
24 |             for workflow_run in workflow.get_runs(status='queued'):
25 |                 for job in workflow_run.jobs():
26 |                     if [value for value in args.runner_label if value in job.raw_data['labels']]:
27 |                         runner_name = f"ggml-runner-{workflow.id}-{job.id}-{workflow_run.event}-{int(time.time())}"
28 | 
29 |                         print(f"ggml-ci:     {runner_name} triggered for workflow_name={workflow.name}")
30 |                         runner_folder = "/github-runner"
31 |                         work_folder = f"{runner_folder}/_work"
32 | 
33 |                         # Get a JIT runner config
34 |                         jitrequest = {
35 |                             'name': runner_name,
36 |                             'runner_group_id': 1,  # FIXME what to put here
37 |                             'labels': ["self-hosted", "X64", "linux", *args.runner_label],
38 |                             'work_folder': work_folder
39 |                         }
40 |                         response = requests.post(
41 |                             f" https://api.github.com/repos/{args.repo}/actions/runners/generate-jitconfig",
42 |                             headers={
43 |                                 'Authorization': f'Bearer {args.token}',
44 |                                 'X-GitHub-Api-Version': "2022-11-28"
45 |                             },
46 |                             json=jitrequest)
47 |                         if response.status_code != 201:
48 |                             print(f"invalid JIT response code: {response.status_code}\n    {response.text}")
49 |                             continue
50 |                         jitconfig = response.json()
51 | 
52 |                         # start the worker in its container and wait for finish
53 |                         print(
54 |                             f"ggml-ci:     {runner_name} running Github job runner id={jitconfig['runner']['id']} os={jitconfig['runner']['os']} labels={[value['name'] for value in jitconfig['runner']['labels']]}")
55 |                         try:
56 |                             client.containers.run("ggml-github-runner", jitconfig['encoded_jit_config'],
57 |                                                   entrypoint="/entrypoint.sh",
58 |                                                   name=runner_name,
59 |                                                   runtime="nvidia",
60 |                                                   device_requests=[
61 |                                                       DeviceRequest(device_ids=["all"],
62 |                                                                     capabilities=[['gpu']])],
63 |                                                   user='1000:1000',
64 |                                                   security_opt=["no-new-privileges:true"],
65 |                                                   auto_remove=True,
66 |                                                   tmpfs={
67 |                                                       '/tmp': 'size=32G,uid=1000,gid=1000',
68 |                                                       runner_folder: 'size=256G,uid=1000,gid=1000,exec'
69 |                                                   },
70 |                                                   # Models path to avoid downloading models everytime
71 |                                                   volumes={
72 |                                                       '/mnt/models': {'bind': '/models', 'mode': 'ro'}
73 |                                                   })
74 |                             print(f"ggml-ci:     {runner_name} done")
75 |                         except Exception:
76 |                             print(f"ggml-ci:     {runner_name} issue running github workflow:")
77 |                             traceback.print_exc(file=sys.stdout)
78 | 
79 |         print("ggml-ci: workflows iteration done.")
80 |         time.sleep(10)
81 | 
82 | 
83 | def main(args_in: list[str] | None = None) -> None:
84 |     parser = argparse.ArgumentParser(description="Start a github self-hosted runner using JIT based on a repo events")
85 |     parser.add_argument("--token", type=str, help="GitHub token", required=True)
86 |     parser.add_argument("--repo", type=str, help="GitHub repository", required=True)
87 |     parser.add_argument("--runner-label", type=str, action="append", help="GitHub Runner group", required=True)
88 | 
89 |     args = parser.parse_args(args_in)
90 | 
91 |     start_mainloop(args)
92 | 
93 | 
94 | if __name__ == '__main__':
95 |     main()
96 | 


--------------------------------------------------------------------------------
/images/github-runners-manager/requirements.txt:
--------------------------------------------------------------------------------
1 | PyGithub
2 | docker
3 | 


--------------------------------------------------------------------------------
/images/llama.cpp-model-downloader/.dockerignore:
--------------------------------------------------------------------------------
1 | build.sh
2 | README.md
3 | 


--------------------------------------------------------------------------------
/images/llama.cpp-model-downloader/Dockerfile:
--------------------------------------------------------------------------------
 1 | # FIXME REPLACE with the offical llama.cpp image with curl support: #6291
 2 | FROM nvidia/cuda:12.2.2-devel-ubuntu22.04
 3 | 
 4 | # system update
 5 | RUN set -eux ; \
 6 |     apt update ; \
 7 |     apt -y install \
 8 |             git \
 9 |             cmake \
10 |             libcurl4-openssl-dev ;
11 | 
12 | WORKDIR /llama.cpp
13 | RUN set -eux; \
14 |     git clone https://github.com/ggerganov/llama.cpp.git . ; \
15 |     mkdir build ; \
16 |     cd build ; \
17 |     cmake .. \
18 |       -DLLAMA_CURL=ON \
19 |       -DLLAMA_CUBLAS=ON \
20 |       -DCMAKE_CUDA_ARCHITECTURES=75 \
21 |       -DLLAMA_NATIVE=OFF \
22 |       -DCMAKE_BUILD_TYPE=Release; \
23 |     cmake --build . --config Release -j $(nproc) --target main ;
24 | 
25 | ADD entrypoint.sh /entrypoint.sh
26 | ENTRYPOINT /entrypoint.sh
27 | 


--------------------------------------------------------------------------------
/images/llama.cpp-model-downloader/README.md:
--------------------------------------------------------------------------------
 1 | # llama.cpp model downloader
 2 | 
 3 | Download a model needed for the CI
 4 | 
 5 | ### Build
 6 | 
 7 | ```shell
 8 | ./build.sh
 9 | ```
10 | 


--------------------------------------------------------------------------------
/images/llama.cpp-model-downloader/build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set +eux
3 | 
4 | docker build \
5 |   -t llama.cpp-model-downloader \
6 |   .
7 | 


--------------------------------------------------------------------------------
/images/llama.cpp-model-downloader/entrypoint.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set +eux
 3 | 
 4 | if [ -z "$HF_REPO" ] || [ -z "$HF_FILE" ]
 5 | then
 6 |   # shellcheck disable=SC2145
 7 |   echo "invalid command: $@"
 8 |   echo "usage: "
 9 |   echo "export HF_REPO=ggml-org/models"
10 |   echo "export HF_FILE=phi-2/ggml-model-q4_0.gguf"
11 |   echo "$0"
12 |   exit 1
13 | fi
14 | 
15 | nvidia-smi || exit 1
16 | 
17 | id
18 | ls -alR /models
19 | 
20 | echo "HF_REPO ${HF_REPO}"
21 | echo "HF_FILE ${HF_FILE}"
22 | MODEL_DIR=$(dirname "${HF_FILE}")
23 | mkdir -p "/models/$MODEL_DIR"
24 | 
25 | ./build/bin/main --hf-repo "${HF_REPO}" --hf-file "${HF_FILE}" --model "/models/$HF_FILE" --random-prompt --n-predict 1
26 | 


--------------------------------------------------------------------------------
/install-cuda.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.0-1_all.deb
4 | sudo dpkg -i cuda-keyring_1.0-1_all.deb
5 | sudo apt-get update
6 | sudo apt-get -y install cuda-drivers
7 | sudo apt-get -y install cuda-nvcc-12-2
8 | sudo apt-get -y install libcublas-dev-12-2
9 | 


--------------------------------------------------------------------------------
/install-docker.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set +eux
 3 | 
 4 | # https://docs.docker.com/engine/install/ubuntu/
 5 | # Add Docker's official GPG key:
 6 | sudo apt-get update
 7 | sudo apt-get install -y ca-certificates curl uidmap
 8 | sudo install -m 0755 -d /etc/apt/keyrings
 9 | sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc
10 | sudo chmod a+r /etc/apt/keyrings/docker.asc
11 | 
12 | # Add the repository to Apt sources:
13 | echo \
14 |   "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \
15 |   $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \
16 |   sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
17 | sudo apt-get update
18 | 
19 | # Install docker
20 | sudo apt-get -y install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
21 | 
22 | # Install NVidia docker engine runtime
23 | # https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html
24 | curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \
25 |   && curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
26 |     sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
27 |     sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
28 | sudo apt-get update
29 | sudo apt-get install -y nvidia-container-toolkit
30 | 
31 | # Rootless mode
32 | # https://docs.docker.com/engine/security/rootless/
33 | sudo apt-get install -y dbus-user-session
34 | sudo systemctl stop docker docker.socket
35 | sudo systemctl disable --now docker.service docker.socket
36 | 
37 | dockerd-rootless-setuptool.sh install
38 | mv ~/.docker /mnt/
39 | ln -s /mnt/.docker ~/.docker
40 | systemctl --user start docker
41 | systemctl --user enable docker
42 | sudo loginctl enable-linger $(whoami)
43 | 
44 | # Configuring Docker NVidia
45 | nvidia-ctk runtime configure --runtime=docker --config=$HOME/.config/docker/daemon.json
46 | systemctl --user restart docker
47 | sudo nvidia-ctk config --set nvidia-container-cli.no-cgroups --in-place
48 | 
49 | docker run -it --rm --gpus all ubuntu nvidia-smi
50 | 
51 | 


--------------------------------------------------------------------------------
/run.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # usage: run.sh node
  4 | 
  5 | if [ -z "${1}" ]; then
  6 |     printf "run.sh : usage: run.sh node\n"
  7 |     exit 1
  8 | fi
  9 | 
 10 | if [ ! -f ~/.env.sh ]; then
 11 |     printf "run.sh : ~/.env.sh is not found\n"
 12 |     exit 1
 13 | fi
 14 | 
 15 | sd="$(dirname $0)"
 16 | 
 17 | source $sd/env.sh
 18 | source ~/.env.sh
 19 | 
 20 | #env | grep GG_ | sort
 21 | #
 22 | #printf "\n"
 23 | 
 24 | GG_NODE=${1}
 25 | 
 26 | # check if results repo is cloned
 27 | 
 28 | if [ ! -d ${GG_RESULTS_PATH} ]; then
 29 |     printf "run.sh : results repo is not cloned\n"
 30 |     exit 1
 31 | fi
 32 | 
 33 | # check if GG_SECRET_TOKENGH_API env is empty
 34 | 
 35 | if [ -z "${GG_SECRET_TOKEN_GH_API}" ]; then
 36 |     printf "run.sh : GG_SECRET_TOKEN_GH_API env is not set\n"
 37 |     exit 1
 38 | fi
 39 | 
 40 | if [ ${GG_RUN_PAUSE} -eq 1 ]; then
 41 |     printf "run.sh : GG_RUN_PAUSE is set to 1\n"
 42 | 
 43 |     sleep ${GG_RUN_SLEEP}
 44 | 
 45 |     exit 1
 46 | fi
 47 | 
 48 | ## main
 49 | 
 50 | # get last N commits from a branch
 51 | function gg_get_last_commits {
 52 |     branch=$1
 53 |     N=$2
 54 | 
 55 |     git log origin/${branch} -n ${N} --pretty=format:"%H" --abbrev-commit
 56 | }
 57 | 
 58 | # get last N commits from all branches that contain a keyword
 59 | function gg_get_last_commits_grep {
 60 |     keyword=$1
 61 |     N=$2
 62 | 
 63 |     git log --all --grep="${keyword}" -n ${N} --pretty=format:"%H" --abbrev-commit
 64 | }
 65 | 
 66 | # commit and push results to the results repo
 67 | function gg_commit_results {
 68 |     repo=$1
 69 | 
 70 |     wd=$(pwd)
 71 | 
 72 |     cd ${GG_RESULTS_PATH}
 73 | 
 74 |     git add .
 75 |     git commit -m "$repo : ${GG_NODE}"
 76 | 
 77 |     for i in $(seq 1 ${GG_RUN_PUSH_RETRY}); do
 78 |         git pull --rebase
 79 |         git push
 80 | 
 81 |         if [ $? -eq 0 ]; then
 82 |             break
 83 |         fi
 84 |     done
 85 | 
 86 |     cd ${wd}
 87 | }
 88 | 
 89 | function gg_run {
 90 |     owner="$1"
 91 |     repo="$2"
 92 |     mnt="$3"
 93 | 
 94 |     cd ${GG_WORK_PATH}/${repo}
 95 | 
 96 |     git fetch --all > /dev/null 2>&1
 97 | 
 98 |     branches="master"
 99 | 
100 |     if [ -f ${GG_WORK_BRANCHES} ]; then
101 |         branches=$(cat ${GG_WORK_BRANCHES} | grep "^${repo}" | cut -d' ' -f2-)
102 | 
103 |         if [ -z "${branches}" ]; then
104 |             branches="master"
105 |         fi
106 |     fi
107 | 
108 |     printf "run.sh : processing '${repo}' branches - '${branches}'\n"
109 | 
110 |     commits=""
111 | 
112 |     for branch in ${branches} ; do
113 |         commits="${commits} $(gg_get_last_commits ${branch} ${GG_RUN_LAST_N})"
114 |     done
115 | 
116 |     commits="${commits} $(gg_get_last_commits_grep ${GG_CI_KEYWORD} ${GG_RUN_LAST_N})"
117 | 
118 |     for commit in ${commits} ; do
119 |         out=$(gg_out_for_commit ${repo} ${commit})
120 | 
121 |         if [ -d ${out} ]; then
122 |             continue
123 |         fi
124 | 
125 |         gg_set_commit_status "${owner}" "${repo}" "${commit}" "pending" "in queue ..."
126 |     done
127 | 
128 |     for commit in ${commits} ; do
129 |         out=$(gg_out_for_commit ${repo} ${commit})
130 | 
131 |         if [ -d ${out} ]; then
132 |             continue
133 |         fi
134 | 
135 |         printf "run.sh : processing '${repo}' commit ${commit}\n"
136 | 
137 |         gg_set_commit_status "${owner}" "${repo}" "${commit}" "pending" "running ..."
138 | 
139 |         mkdir -p ${out}
140 | 
141 |         git checkout ${commit}
142 |         git submodule update --init --recursive
143 |         git reset --hard
144 |         git clean -fd
145 | 
146 |         gg_export GG_CI_REPO          "https://github.com/${owner}/${repo}"
147 |         gg_export GG_CI_COMMIT_URL    "https://github.com/${owner}/${repo}/commit/${commit}"
148 |         gg_export GG_CI_COMMIT_MSG    "$(git log -1 --pretty=%B)"
149 |         gg_export GG_CI_COMMIT_AUTHOR "$(git log -1 --pretty=%an)"
150 | 
151 |         runtime="0:00.00"
152 | 
153 |         if [ -f ci/run.sh ]; then
154 |             timeout ${GG_RUN_TIMEOUT} time -o ${out}/runtime bash ci/run.sh "${out}" "${mnt}" > ${out}/stdall 2>&1
155 |             result=$?
156 |             runtime=$(tail -n 2 ${out}/runtime | head -n 1 | awk '{print $3}' | sed -e 's/elapsed//')
157 |         else
158 |             gg_printf ${out}/README.md "ci/run.sh was not found - nothing to do\n"
159 |             result=0
160 |         fi
161 | 
162 |         echo ${result} > ${out}/exit
163 | 
164 |         mv ${out}/README.md ${out}/README.md.bak
165 | 
166 |         status="$(if [ $result -eq 0 ]; then echo "SUCCESS ✅"; else echo "FAILURE ❌ (${result})"; fi)"
167 | 
168 |         gg_printf ${out}/README.md '## Summary\n\n'
169 | 
170 |         gg_printf ${out}/README.md '- status:  %s\n' "${status}"
171 |         gg_printf ${out}/README.md '- runtime: %s\n' "${runtime}"
172 |         gg_printf ${out}/README.md '- date:    %s\n' "$(date)"
173 |         gg_printf ${out}/README.md '- repo:    %s\n' "${GG_CI_REPO}"
174 |         gg_printf ${out}/README.md '- commit:  %s\n' "${GG_CI_COMMIT_URL}"
175 |         gg_printf ${out}/README.md '- author:  %s\n' "${GG_CI_COMMIT_AUTHOR}"
176 |         gg_printf ${out}/README.md '```\n%s\n```\n'  "${GG_CI_COMMIT_MSG}"
177 |         gg_printf ${out}/README.md '\n'
178 | 
179 |         gg_printf ${out}/README.md '## Environment\n\n'
180 | 
181 |         gg_printf ${out}/README.md '```\n'
182 |         gg_printf ${out}/README.md '%s\n' "$(env | grep GG_BUILD | sort)"
183 |         gg_printf ${out}/README.md '```\n'
184 |         gg_printf ${out}/README.md '\n'
185 | 
186 |         gg_printf ${out}/README.md '## Output\n\n'
187 | 
188 |         cat ${out}/README.md.bak >> ${out}/README.md
189 | 
190 |         commit_parent=$(git log -1 --pretty=%P)
191 | 
192 |         # if the output for the parent commit exists, append the "stdall" diff to the README.md
193 |         out_parent=$(gg_out_for_commit ${repo} ${commit_parent})
194 | 
195 |         #if [ -f ${out_parent}/stdall ]; then
196 |         #    gg_printf ${out}/README.md '## Diff with parent commit\n\n'
197 | 
198 |         #    gg_printf ${out}/README.md '<details><summary>click to expand</summary>\n\n'
199 |         #    gg_printf ${out}/README.md '```diff\n'
200 |         #    gg_printf ${out}/README.md '%s\n' "$(diff -u ${out_parent}/stdall ${out}/stdall)"
201 |         #    gg_printf ${out}/README.md '```\n'
202 |         #    gg_printf ${out}/README.md '</details>\n\n'
203 |         #fi
204 | 
205 |         if [ ${result} -eq 0 ]; then
206 |             gg_set_commit_status "${owner}" "${repo}" "${commit}" "success" "success in ${runtime}"
207 |         else
208 |             gg_set_commit_status "${owner}" "${repo}" "${commit}" "failure" "failure ${result} in ${runtime}"
209 |         fi
210 | 
211 |         printf "run.sh : done processing '${repo}' commit ${commit}, result ${result}\n"
212 | 
213 |         gg_commit_results "${repo}"
214 | 
215 |         sleep 1
216 |     done
217 | }
218 | 
219 | # main
220 | 
221 | gg_run "ggerganov" "ggml"        "${GG_GGML_MNT}"
222 | gg_run "ggerganov" "llama.cpp"   "${GG_LLAMA_CPP_MNT}"
223 | gg_run "ggerganov" "whisper.cpp" "${GG_WHISPER_CPP_MNT}"
224 | 
225 | sleep ${GG_RUN_SLEEP}
226 | 


--------------------------------------------------------------------------------
/service.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # check if the script is already running
 4 | 
 5 | if [ -f /tmp/ggml-lock ]; then
 6 |     printf "run.sh : script is already running\n"
 7 |     exit 1
 8 | fi
 9 | 
10 | # usage: service.sh node
11 | 
12 | if [ -z "${1}" ]; then
13 |     printf "run.sh : usage: run.sh node\n"
14 |     exit 1
15 | fi
16 | 
17 | # create a lock file
18 | 
19 | touch /tmp/ggml-lock
20 | 
21 | function gg_cleanup {
22 |     rm -f /tmp/ggml-lock
23 | }
24 | 
25 | # delete the lock file on exit
26 | 
27 | trap gg_cleanup EXIT
28 | 
29 | sd="$(dirname $0)"
30 | cd $sd
31 | 
32 | ## main loop
33 | 
34 | while true; do
35 |     bash ./run.sh "$1"
36 |     sleep 1
37 | 
38 |     git pull
39 | done
40 | 


--------------------------------------------------------------------------------
/setup.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | if [ ! -f ~/.env.sh ]; then
  4 |     printf "run.sh : ~/.env.sh is not found\n"
  5 |     exit 1
  6 | fi
  7 | 
  8 | sd=`dirname $0`
  9 | 
 10 | source $sd/env.sh
 11 | 
 12 | env | grep GG_ | sort
 13 | 
 14 | printf "\n"
 15 | 
 16 | ## dependencies
 17 | 
 18 | if [ -f /etc/lsb-release ] ; then
 19 |     sudo apt update
 20 |     sudo apt install cmake g++ python3-pip python3-venv unzip ccache git-lfs libcurl4-openssl-dev
 21 | else
 22 |     date
 23 | fi
 24 | 
 25 | ## helper functions
 26 | 
 27 | function gg_setup_ggml {
 28 |     cd $GG_WORK_PATH
 29 | 
 30 |     if [ ! -d $GG_GGML_DIR ]; then
 31 |         git clone $GG_GGML_REPO $GG_GGML_DIR
 32 |     fi
 33 | 
 34 |     mkdir -p $GG_GGML_MNT
 35 | }
 36 | 
 37 | function gg_setup_whisper_cpp {
 38 |     cd $GG_WORK_PATH
 39 | 
 40 |     if [ ! -d $GG_WHISPER_CPP_DIR ]; then
 41 |         git clone $GG_WHISPER_CPP_REPO $GG_WHISPER_CPP_DIR
 42 |     fi
 43 | 
 44 |     mkdir -p $GG_WHISPER_CPP_MNT
 45 | }
 46 | 
 47 | function gg_setup_llama_cpp {
 48 |     cd $GG_WORK_PATH
 49 | 
 50 |     if [ ! -d $GG_LLAMA_CPP_DIR ]; then
 51 |         git clone $GG_LLAMA_CPP_REPO $GG_LLAMA_CPP_DIR
 52 |     fi
 53 | 
 54 |     mkdir -p $GG_LLAMA_CPP_MNT
 55 | }
 56 | 
 57 | ## main
 58 | 
 59 | set -x
 60 | set -e
 61 | 
 62 | # prepare results repo
 63 | if [ ! -d $GG_RESULTS_PATH ]; then
 64 |     #git clone -c core.sshCommand="/usr/bin/ssh -i ~/.ssh/ggml-bot-main" $GG_RESULTS_REPO_SSH $GG_RESULTS_PATH -b $GG_RESULTS_BRANCH
 65 |     git clone $GG_RESULTS_REPO $GG_RESULTS_PATH -b $GG_RESULTS_BRANCH
 66 | 
 67 |     if [ ! -d $GG_RESULTS_PATH ]; then
 68 |         printf "setup.sh : failed to clone results repo\n"
 69 |         exit 1
 70 |     fi
 71 | 
 72 |     cd $GG_RESULTS_PATH
 73 | 
 74 |     git config user.name  $GG_BOT_NAME
 75 |     git config user.email $GG_BOT_EMAIL
 76 | 
 77 |     cd ..
 78 | else
 79 |     # reset the results
 80 |     cd $GG_RESULTS_PATH
 81 | 
 82 |     git fetch origin
 83 |     git reset --hard origin/$GG_RESULTS_BRANCH
 84 | 
 85 |     git config user.name  $GG_BOT_NAME
 86 |     git config user.email $GG_BOT_EMAIL
 87 | 
 88 |     cd ..
 89 | fi
 90 | 
 91 | # main
 92 | 
 93 | mkdir -p $GG_WORK_PATH
 94 | #sudo chown ggml:ggml /mnt
 95 | 
 96 | gg_setup_ggml
 97 | gg_setup_whisper_cpp
 98 | gg_setup_llama_cpp
 99 | 
100 | set +x
101 | set +e
102 | 


--------------------------------------------------------------------------------
/start-github-runner-manager.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set +eu
 3 | 
 4 | if [ $# -lt 3 ]
 5 | then
 6 |   echo "usage: $0 REPO TOKEN RUNNER_LABEL "
 7 |   exit 1
 8 | fi
 9 | 
10 | # Build the manager image
11 | echo "Building github runner manager image..."
12 | (
13 |   cd images/github-runners-manager || exit 1
14 |   ./build.sh
15 | ) || exit 1
16 | 
17 | # Build the github runner image
18 | echo "Building github runner image..."
19 | (
20 |   cd images/github-runner || exit 1
21 |   ./build.sh
22 | ) || exit 1
23 | 
24 | MODEL_FOLDERS="/mnt/models"
25 | if [ ! -d "$MODEL_FOLDERS" ]; then
26 |   mkdir -p $MODEL_FOLDERS
27 | fi
28 | 
29 | id
30 | ls -alR /mnt/models
31 | systemctl --user --no-block status docker | less -FRX
32 | sudo systemctl --no-block status docker | less -FRX
33 | 
34 | if [ -z "$DOWNLOAD_MODELS" ] || [ "$DOWNLOAD_MODELS" == "ON" ] ; then
35 |   (
36 |     set +eu
37 |     (
38 |       echo "Building models downloader..."
39 |       cd images/llama.cpp-model-downloader || exit 1
40 |       ./build.sh
41 |     ) || exit 1
42 | 
43 |     # Kill any dangling container
44 |     (docker ps    | grep -q llama.cpp-model-downloader) && docker kill llama.cpp-model-downloader && sleep 5
45 |     (docker ps -a | grep -q llama.cpp-model-downloader) && docker rm llama.cpp-model-downloader   && sleep 5
46 | 
47 |     echo "ggml-ci: downloading models..."
48 |     MODELS="ggml-org/models:phi-2/ggml-model-q4_0.gguf ggml-org/models:phi-2/ggml-model-q8_0.gguf ggml-org/models:phi-2/ggml-model-f16.gguf"
49 |     for MODEL in $MODELS
50 |     do
51 |       IFS=':'; S=($MODEL); unset IFS;
52 |       HF_REPO="${S[0]}"
53 |       HF_FILE="${S[1]}"
54 |       echo "ggml-ci:     --hf-repo $HF_REPO --hf-file $HF_FILE"
55 |       docker run \
56 |             --rm \
57 |             -it \
58 |             --name llama.cpp-model-downloader \
59 |             --gpus all \
60 |             -t \
61 |             -v $MODEL_FOLDERS:/models:rw \
62 |             -e HF_REPO="$HF_REPO" \
63 |             -e HF_FILE="$HF_FILE" \
64 |             llama.cpp-model-downloader > download_model."$(basename "$HF_FILE")".log 2>&1
65 |     done
66 |   ) || exit 1
67 | fi
68 | 
69 | # Kill any dangling container
70 | (docker ps    | grep -q ggml-github-runners-manager) && docker kill ggml-github-runners-manager && sleep 5
71 | (docker ps -a | grep -q ggml-github-runners-manager) && docker rm ggml-github-runners-manager   && sleep 5
72 | 
73 | # Start the runner
74 | echo "ggml-ci: starting github runner manager on repo=$1 label=$3..."
75 | docker run \
76 |       --rm \
77 |       --detach \
78 |       --name ggml-github-runners-manager \
79 |       -t \
80 |       -v /run/user/${UID}/docker.sock:/var/run/docker.sock:rw \
81 |       -v /mnt/runners:/runners:rw \
82 |       -e REPO="$1" \
83 |       -e TOKEN="$2" \
84 |       -e RUNNER_LABEL="$3" \
85 |       ggml-github-runners-manager
86 | 
87 | echo "ggml-ci: github runner manager started."
88 | echo "ggml-ci: github runner manager logs:"
89 | echo "         CTRL+C to stop logs pulling"
90 | docker logs -f ggml-github-runners-manager
91 | 


--------------------------------------------------------------------------------
/start.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | sd="$(dirname $0)"
4 | 
5 | nohup bash $sd/service.sh $@ | tee -a ./ci-service.log &
6 | 


--------------------------------------------------------------------------------
/stop.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | kill $(pgrep -f service.sh)
4 | 


--------------------------------------------------------------------------------