├── .dockerignore ├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── build-utils ├── entrypoint-install.sh └── make-bundelf-bundle.sh ├── build └── build.sh ├── kernels └── oraclelinux │ ├── 95virtiofs │ ├── module-setup.sh │ ├── mount-virtiofs.sh │ └── parse-virtiofs.sh │ └── addvirtiofs.conf ├── patches ├── dnsmasq │ └── remove-passwd-requirement.patch ├── dropbear │ └── runcvm.patch ├── mkinitfs │ └── nlplug-findfs.patch └── seabios │ └── qemu-fw-cfg-fix.patch ├── qemu-exit └── qemu-exit.c ├── runcvm-init ├── VERSION.h └── dumb-init.c ├── runcvm-scripts ├── functions │ └── cgroupfs ├── runcvm-ctr-defaults ├── runcvm-ctr-entrypoint ├── runcvm-ctr-exec ├── runcvm-ctr-exit ├── runcvm-ctr-qemu ├── runcvm-ctr-qemu-ifdown ├── runcvm-ctr-qemu-ifup ├── runcvm-ctr-qemu-poweroff ├── runcvm-ctr-shutdown ├── runcvm-ctr-virtiofsd ├── runcvm-install-runtime.sh ├── runcvm-ip-functions ├── runcvm-runtime ├── runcvm-vm-exec ├── runcvm-vm-init ├── runcvm-vm-qemu-ga ├── runcvm-vm-start └── runcvm-vm-start-wrapper └── tests ├── 00-http-docker-swarm ├── node │ ├── Dockerfile │ └── docker.sh └── test ├── 01-mariadb └── test ├── 02-user-workdir └── test ├── 03-env └── test ├── framework.sh └── run /.dockerignore: -------------------------------------------------------------------------------- 1 | # Ignore files matching the following patterns within docker build 2 | **/*~ 3 | depot.json 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .#* 2 | *~ 3 | *.bak 4 | *.o 5 | *.version 6 | *.orig 7 | *.tdy 8 | TAGS 9 | \#*\# 10 | .packlist 11 | perllocal.pod 12 | .c9 13 | .Trash-1000 14 | .vscode 15 | depot.json 16 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # syntax=docker/dockerfile:1.3-labs 2 | 3 | # Alpine version to build with 4 | ARG ALPINE_VERSION=3.19 5 | 6 | # --- BUILD STAGE --- 7 | # Build base alpine-sdk image for later build stages 8 | FROM alpine:$ALPINE_VERSION as alpine-sdk 9 | 10 | RUN apk update && apk add --no-cache alpine-sdk coreutils && \ 11 | abuild-keygen -an && \ 12 | # Copy the public keys to the system keys 13 | cp -a /root/.abuild/*.pub /etc/apk/keys && \ 14 | git clone --depth 1 --single-branch --filter=blob:none --sparse https://gitlab.alpinelinux.org/alpine/aports.git ~/aports && \ 15 | cd ~/aports/ && \ 16 | git sparse-checkout set main/seabios main/ 17 | 18 | # --- BUILD STAGE --- 19 | # Build patched SeaBIOS packages 20 | # to allow disabling of BIOS output by QEMU 21 | # (without triggering QEMU warnings) 22 | FROM alpine-sdk as alpine-seabios 23 | 24 | ADD patches/seabios/qemu-fw-cfg-fix.patch /root/aports/main/seabios/0003-qemu-fw-cfg-fix.patch 25 | 26 | RUN <>APKBUILD 30 | echo 'source="${source}0003-qemu-fw-cfg-fix.patch"' >>APKBUILD 31 | abuild -rFf 32 | EOF 33 | 34 | # --- BUILD STAGE --- 35 | # Build patched dnsmasq 36 | # that does not require /etc/passwd file to run 37 | # (needed for images such as hello-world) 38 | FROM alpine-sdk as alpine-dnsmasq 39 | 40 | ADD patches/dnsmasq/remove-passwd-requirement.patch /root/aports/main/dnsmasq/remove-passwd-requirement.patch 41 | 42 | RUN <>APKBUILD 46 | echo 'source="${source}remove-passwd-requirement.patch"' >>APKBUILD 47 | abuild -rFf 48 | EOF 49 | 50 | # --- BUILD STAGE --- 51 | # Build patched dropbear with epka plugin 52 | # that does not require /etc/passwd or PAM to run 53 | FROM alpine-sdk as alpine-dropbear 54 | 55 | ADD patches/dropbear/runcvm.patch /root/aports/main/dropbear/runcvm.patch 56 | 57 | RUN <>APKBUILD 62 | echo 'source="${source}runcvm.patch"' >>APKBUILD 63 | abuild -rFf 64 | 65 | cd /root 66 | git clone https://github.com/fabriziobertocci/dropbear-epka.git 67 | cd dropbear-epka 68 | apk add --no-cache automake autoconf libtool 69 | libtoolize --force 70 | aclocal 71 | autoheader || true 72 | automake --force-missing --add-missing 73 | autoconf 74 | ./configure 75 | make install 76 | EOF 77 | 78 | # --- BUILD STAGE --- 79 | # Build patched mkinitfs/nlplug-findfs 80 | # with shorter timeout for speedier boot (saving ~4s) 81 | FROM alpine-sdk as alpine-mkinitfs 82 | 83 | ADD patches/mkinitfs/nlplug-findfs.patch /root/aports/main/mkinitfs/nlplug-findfs.patch 84 | 85 | RUN <>APKBUILD 89 | echo 'source="${source} nlplug-findfs.patch"' >>APKBUILD 90 | abuild -rFf 91 | EOF 92 | 93 | # --- BUILD STAGE --- 94 | # Build dist-independent dynamic binaries and libraries 95 | FROM alpine:$ALPINE_VERSION as binaries 96 | 97 | RUN apk update && \ 98 | apk add --no-cache file bash qemu-system-x86_64 qemu-virtiofsd qemu-ui-curses qemu-guest-agent \ 99 | jq iproute2 netcat-openbsd e2fsprogs blkid util-linux \ 100 | s6 dnsmasq iptables nftables \ 101 | ncurses coreutils \ 102 | patchelf 103 | 104 | # Install patched SeaBIOS 105 | COPY --from=alpine-seabios /root/packages/main/x86_64 /tmp/seabios/ 106 | RUN apk add --allow-untrusted /tmp/seabios/*.apk && cp -a /usr/share/seabios/bios*.bin /usr/share/qemu/ 107 | 108 | # Install patched dnsmasq 109 | COPY --from=alpine-dnsmasq /root/packages/main/x86_64 /tmp/dnsmasq/ 110 | RUN apk add --allow-untrusted /tmp/dnsmasq/dnsmasq-2*.apk /tmp/dnsmasq/dnsmasq-common*.apk 111 | 112 | # Install patched dropbear 113 | COPY --from=alpine-dropbear /root/packages/main/x86_64 /usr/local/lib/libepka_file.so /tmp/dropbear/ 114 | RUN apk add --allow-untrusted /tmp/dropbear/dropbear-ssh*.apk /tmp/dropbear/dropbear-dbclient*.apk /tmp/dropbear/dropbear-2*.apk 115 | 116 | # Patch the binaries and set up symlinks 117 | COPY build-utils/make-bundelf-bundle.sh /usr/local/bin/make-bundelf-bundle.sh 118 | ENV BUNDELF_BINARIES="busybox bash jq ip nc mke2fs blkid findmnt dnsmasq xtables-legacy-multi nft xtables-nft-multi nft mount s6-applyuidgid qemu-system-x86_64 qemu-ga /usr/lib/qemu/virtiofsd tput coreutils getent dropbear dbclient dropbearkey" 119 | ENV BUNDELF_EXTRA_LIBS="/usr/lib/xtables /usr/libexec/coreutils /tmp/dropbear/libepka_file.so /usr/lib/qemu/*.so" 120 | ENV BUNDELF_EXTRA_SYSTEM_LIB_PATHS="/usr/lib/xtables" 121 | ENV BUNDELF_CODE_PATH="/opt/runcvm" 122 | ENV BUNDELF_EXEC_PATH="/.runcvm/guest" 123 | 124 | RUN /usr/local/bin/make-bundelf-bundle.sh --bundle && \ 125 | mkdir -p $BUNDELF_CODE_PATH/bin && \ 126 | cd $BUNDELF_CODE_PATH/bin && \ 127 | for cmd in \ 128 | awk base64 cat chgrp chmod cut grep head hostname init ln ls \ 129 | mkdir poweroff ps rm rmdir route sh sysctl tr touch; \ 130 | do \ 131 | ln -s busybox $cmd; \ 132 | done && \ 133 | mkdir -p $BUNDELF_CODE_PATH/usr/share && \ 134 | cp -a /usr/share/qemu $BUNDELF_CODE_PATH/usr/share && \ 135 | cp -a /etc/terminfo $BUNDELF_CODE_PATH/usr/share && \ 136 | # Remove setuid/setgid bits from any/all binaries 137 | chmod -R -s $BUNDELF_CODE_PATH/ 138 | 139 | # --- BUILD STAGE --- 140 | # Build static runcvm-init 141 | FROM alpine:$ALPINE_VERSION as runcvm-init 142 | 143 | RUN apk update && \ 144 | apk add --no-cache gcc musl-dev 145 | 146 | ADD runcvm-init /root/runcvm-init 147 | RUN cd /root/runcvm-init && cc -o /root/runcvm-init/runcvm-init -std=gnu99 -static -s -Wall -Werror -O3 dumb-init.c 148 | 149 | # --- BUILD STAGE --- 150 | # Build static qemu-exit 151 | FROM alpine:$ALPINE_VERSION as qemu-exit 152 | 153 | RUN apk update && \ 154 | apk add --no-cache gcc musl-dev 155 | 156 | ADD qemu-exit /root/qemu-exit 157 | RUN cd /root/qemu-exit && cc -o /root/qemu-exit/qemu-exit -std=gnu99 -static -s -Wall -Werror -O3 qemu-exit.c 158 | 159 | # --- BUILD STAGE --- 160 | # Build alpine kernel and initramfs with virtiofs module 161 | FROM alpine:$ALPINE_VERSION as alpine-kernel 162 | 163 | # Install patched mkinitfs 164 | COPY --from=alpine-mkinitfs /root/packages/main/x86_64 /tmp/mkinitfs/ 165 | RUN apk add --allow-untrusted /tmp/mkinitfs/*.apk 166 | RUN apk add --no-cache linux-virt 167 | RUN echo 'kernel/fs/fuse/virtiofs*' >>/etc/mkinitfs/features.d/virtio.modules && \ 168 | sed -ri 's/\b(ata|nvme|raid|scsi|usb|cdrom|kms|mmc)\b//g; s/[ ]+/ /g' /etc/mkinitfs/mkinitfs.conf && \ 169 | sed -ri 's/(nlplug-findfs)/\1 --timeout=1000/' /usr/share/mkinitfs/initramfs-init && \ 170 | mkinitfs $(basename $(ls -d /lib/modules/*)) 171 | RUN BASENAME=$(basename $(ls -d /lib/modules/*)) && \ 172 | mkdir -p /opt/runcvm/kernels/alpine/$BASENAME && \ 173 | cp -a /boot/vmlinuz-virt /opt/runcvm/kernels/alpine/$BASENAME/vmlinuz && \ 174 | cp -a /boot/initramfs-virt /opt/runcvm/kernels/alpine/$BASENAME/initrd && \ 175 | cp -a /lib/modules/ /opt/runcvm/kernels/alpine/$BASENAME/ && \ 176 | cp -a /boot/config-virt /opt/runcvm/kernels/alpine/$BASENAME/modules/$BASENAME/config && \ 177 | chmod -R u+rwX,g+rX,o+rX /opt/runcvm/kernels/alpine 178 | 179 | FROM alpine-kernel as openwrt-kernel 180 | RUN mkdir -p /opt/runcvm/kernels/openwrt/$(basename $(ls -d /lib/modules/*))/modules/$(basename $(ls -d /lib/modules/*)) && \ 181 | cd /opt/runcvm/kernels/openwrt/$(basename $(ls -d /lib/modules/*)) && \ 182 | cp -a /boot/vmlinuz-virt vmlinuz && \ 183 | cp -a /boot/initramfs-virt initrd && \ 184 | find /lib/modules/ -type f -name '*.ko*' -exec cp -a {} modules/$(basename $(ls -d /lib/modules/*)) \; && \ 185 | gunzip modules/$(basename $(ls -d /lib/modules/*))/*.gz && \ 186 | chmod -R u+rwX,g+rX,o+rX /opt/runcvm/kernels/openwrt 187 | 188 | # --- BUILD STAGE --- 189 | # Build Debian bookworm kernel and initramfs with virtiofs module 190 | FROM amd64/debian:bookworm as debian-kernel 191 | 192 | ARG DEBIAN_FRONTEND=noninteractive 193 | RUN apt update && apt install -y linux-image-amd64:amd64 && \ 194 | echo 'virtiofs' >>/etc/initramfs-tools/modules && \ 195 | echo 'virtio_console' >>/etc/initramfs-tools/modules && \ 196 | echo "RESUME=none" >/etc/initramfs-tools/conf.d/resume && \ 197 | update-initramfs -u 198 | RUN BASENAME=$(basename $(ls -d /lib/modules/*)) && \ 199 | mkdir -p /opt/runcvm/kernels/debian/$BASENAME && \ 200 | cp -aL /vmlinuz /opt/runcvm/kernels/debian/$BASENAME/vmlinuz && \ 201 | cp -aL /initrd.img /opt/runcvm/kernels/debian/$BASENAME/initrd && \ 202 | cp -a /lib/modules/ /opt/runcvm/kernels/debian/$BASENAME/ && \ 203 | cp -a /boot/config-$BASENAME /opt/runcvm/kernels/debian/$BASENAME/modules/$BASENAME/config && \ 204 | chmod -R u+rwX,g+rX,o+rX /opt/runcvm/kernels/debian 205 | 206 | # --- BUILD STAGE --- 207 | # Build Ubuntu bullseye kernel and initramfs with virtiofs module 208 | FROM amd64/ubuntu:jammy as ubuntu-kernel 209 | 210 | ARG DEBIAN_FRONTEND=noninteractive 211 | RUN apt update && apt install -y linux-generic:amd64 && \ 212 | echo 'virtiofs' >>/etc/initramfs-tools/modules && \ 213 | echo 'virtio_console' >>/etc/initramfs-tools/modules && \ 214 | echo "RESUME=none" >/etc/initramfs-tools/conf.d/resume && \ 215 | update-initramfs -u 216 | RUN BASENAME=$(basename $(ls -d /lib/modules/*)) && \ 217 | mkdir -p /opt/runcvm/kernels/ubuntu/$BASENAME && \ 218 | cp -aL /boot/vmlinuz /opt/runcvm/kernels/ubuntu/$BASENAME/vmlinuz && \ 219 | cp -aL /boot/initrd.img /opt/runcvm/kernels/ubuntu/$BASENAME/initrd && \ 220 | cp -a /lib/modules/ /opt/runcvm/kernels/ubuntu/$BASENAME/ && \ 221 | cp -a /boot/config-$BASENAME /opt/runcvm/kernels/ubuntu/$BASENAME/modules/$BASENAME/config && \ 222 | chmod -R u+rwX,g+rX,o+rX /opt/runcvm/kernels/ubuntu 223 | 224 | # --- BUILD STAGE --- 225 | # Build Oracle Linux kernel and initramfs with virtiofs module 226 | FROM oraclelinux:9 as oracle-kernel 227 | 228 | RUN dnf install -y kernel 229 | ADD ./kernels/oraclelinux/addvirtiofs.conf /etc/dracut.conf.d/addvirtiofs.conf 230 | ADD ./kernels/oraclelinux/95virtiofs /usr/lib/dracut/modules.d/95virtiofs 231 | RUN dracut --force --kver $(basename /lib/modules/*) --kmoddir /lib/modules/* 232 | RUN mkdir -p /opt/runcvm/kernels/ol/$(basename $(ls -d /lib/modules/*)) && \ 233 | mv /lib/modules/*/vmlinuz /opt/runcvm/kernels/ol/$(basename $(ls -d /lib/modules/*))/vmlinuz && \ 234 | cp -aL /boot/initramfs* /opt/runcvm/kernels/ol/$(basename $(ls -d /lib/modules/*))/initrd && \ 235 | cp -a /lib/modules/ /opt/runcvm/kernels/ol/$(basename $(ls -d /lib/modules/*))/ && \ 236 | chmod -R u+rwX,g+rX,o+rX /opt/runcvm/kernels/ol 237 | 238 | # --- BUILD STAGE --- 239 | # Build RunCVM installer 240 | FROM alpine:$ALPINE_VERSION as installer 241 | 242 | COPY --from=binaries /opt/runcvm /opt/runcvm 243 | COPY --from=runcvm-init /root/runcvm-init/runcvm-init /opt/runcvm/sbin/ 244 | COPY --from=qemu-exit /root/qemu-exit/qemu-exit /opt/runcvm/sbin/ 245 | 246 | RUN apk update && apk add --no-cache rsync 247 | 248 | ADD runcvm-scripts /opt/runcvm/scripts/ 249 | 250 | ADD build-utils/entrypoint-install.sh / 251 | ENTRYPOINT ["/entrypoint-install.sh"] 252 | 253 | # Install needed kernels. 254 | # Comment out any kernels that are unneeded. 255 | COPY --from=alpine-kernel /opt/runcvm/kernels/alpine /opt/runcvm/kernels/alpine 256 | COPY --from=debian-kernel /opt/runcvm/kernels/debian /opt/runcvm/kernels/debian 257 | COPY --from=openwrt-kernel /opt/runcvm/kernels/openwrt /opt/runcvm/kernels/openwrt 258 | COPY --from=ubuntu-kernel /opt/runcvm/kernels/ubuntu /opt/runcvm/kernels/ubuntu 259 | COPY --from=oracle-kernel /opt/runcvm/kernels/ol /opt/runcvm/kernels/ol 260 | 261 | # Add 'latest' symlinks for available kernels 262 | RUN for d in /opt/runcvm/kernels/*; do cd $d && ln -s $(ls -d * | sort | head -n 1) latest; done 263 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RunCVM Container Runtime 2 | 3 | ## Introduction 4 | 5 | RunCVM (Run Container Virtual Machine) is an experimental open-source Docker container runtime for Linux, created by Struan Bartlett at NewsNow Labs, that makes launching standard containerised workloads and system workloads (e.g. Systemd, Docker, even OpenWrt) in VMs as easy as launching a container. 6 | 7 |

8 | Install RunCVM and then launch an Alpine Container/VM 9 |
10 | View on Asciinema 11 |

12 | 13 | ## Quick start 14 | 15 | Install: 16 | 17 | ```sh 18 | curl -s -o - https://raw.githubusercontent.com/newsnowlabs/runcvm/main/runcvm-scripts/runcvm-install-runtime.sh | sudo sh 19 | ``` 20 | 21 | Now launch an nginx VM listening on port 8080: 22 | 23 | ```console 24 | docker run --runtime=runcvm --name nginx1 --rm -p 8080:80 nginx 25 | ``` 26 | 27 | Launch a MariaDB VM, with 2 cpus and 2G memory, listening on port 3306: 28 | 29 | ```console 30 | docker run --runtime=runcvm --name mariadb1 --rm -p 3306:3306 --cpus 2 --memory 2G --env=MARIADB_ALLOW_EMPTY_ROOT_PASSWORD=1 mariadb 31 | ``` 32 | 33 | Launch a vanilla ubuntu VM, with interactive terminal: 34 | 35 | ```console 36 | docker run --runtime=runcvm --name ubuntu1 --rm -it ubuntu 37 | ``` 38 | 39 | Gain another interactive console on `ubuntu1`: 40 | 41 | ```console 42 | docker exec -it ubuntu1 bash 43 | ``` 44 | 45 | Launch a VM with 1G memory and a 1G ext4-formatted backing file mounted at `/var/lib/docker` and stored in the underlying container's filesystem: 46 | 47 | ```console 48 | docker run -it --runtime=runcvm --memory=1G --env=RUNCVM_DISKS=/disks/docker,/var/lib/docker,ext4,1G 49 | ``` 50 | 51 | Launch a VM with 2G memory and a 5G ext4-formatted backing file mounted at `/var/lib/docker` and stored in a dedicated Docker volume on the host: 52 | 53 | ```console 54 | docker run -it --runtime=runcvm --memory=2G --mount=type=volume,src=runcvm-disks,dst=/disks --env=RUNCVM_DISKS=/disks/docker,/var/lib/docker,ext4,5G 55 | ``` 56 | 57 | Launch a 3-node Docker Swarm on a network with 9000 MTU and, on the swarm, an http global service: 58 | 59 | ```console 60 | git clone https://github.com/newsnowlabs/runcvm.git && \ 61 | cd runcvm/tests/00-http-docker-swarm && \ 62 | NODES=3 MTU=9000 ./test 63 | ``` 64 | 65 | ### System workloads 66 | 67 | **Docker+Sysbox runtime demo** - Launch Ubuntu running Systemd and Docker with [Sysbox](https://github.com/nestybox/sysbox) runtime; then within it run an Alpine _Sysbox_ container; and, _within that_ install dockerd and run a container from the 'hello-world' image: 68 | 69 | ```console 70 | cat </dev/null & sleep 5; docker run --rm hello-world'" 84 | docker rm -fv ubuntu-docker-sysbox 85 | ``` 86 | 87 | - [Watch on Asciinema](https://asciinema.org/a/630032) 88 | 89 | **Nested RunCVM demo** - Launch Ubuntu running Systemd and Docker with RunCVM runtime installed; then within it run an Alpine _RunCVM_ Container/VM; and, within that install dockerd and, _within that_, run a container from the 'hello-world' image: 90 | 91 | ```console 92 | cat <>/etc/modules 101 | ENTRYPOINT ["/lib/systemd/systemd"] 102 | ENV RUNCVM_DISKS='/disks/docker,/var/lib/docker,ext4,1G' 103 | VOLUME /disks 104 | EOF 105 | docker run -d --runtime=runcvm -m 2g --name=ubuntu-docker-runcvm ubuntu-docker-runcvm 106 | docker exec ubuntu-docker-runcvm bash -c "docker run --rm --runtime=runcvm alpine ash -x -c 'apk add docker; dockerd &>/dev/null & sleep 5; docker run --rm hello-world'" 107 | docker rm -fv ubuntu-docker-runcvm 108 | ``` 109 | 110 | **Docker+GVisor runtime demo** - Launch Ubuntu running Systemd and Docker with GVisor runtime; then within it run the 'hello-world' image in a _GVisor_ container: 111 | 112 | ```console 113 | cat </etc/apt/sources.list.d/gvisor.list && \ 120 | apt update && \ 121 | apt-get install -y runsc 122 | RUN [ ! -f /etc/docker/daemon.json ] && echo '{}' > /etc/docker/daemon.json; cat /etc/docker/daemon.json | jq '.runtimes.runsc.path="/usr/bin/runsc"' | tee /etc/docker/daemon.json 123 | ENTRYPOINT ["/lib/systemd/systemd"] 124 | ENV RUNCVM_DISKS='/disks/docker,/var/lib/docker,ext4,1G' 125 | VOLUME /disks 126 | EOF 127 | docker run -d --runtime=runsc -m 2g --name=ubuntu-docker-gvisor ubuntu-docker-gvisor 128 | docker exec ubuntu-docker-gvisor bash -c "docker run --rm --runtime=runsc hello-world" 129 | docker rm -fv ubuntu-docker-gvisor 130 | ``` 131 | 132 | **Launch [OpenWrt](https://openwrt.org/)** - with port forward to LuCI web UI on port 10080: 133 | 134 | ```console 135 | docker import --change='ENTRYPOINT ["/sbin/init"]' https://archive.openwrt.org/releases/23.05.2/targets/x86/generic/openwrt-23.05.2-x86-generic-rootfs.tar.gz openwrt-23.05.2 && \ 136 | docker network create --subnet 172.128.0.0/24 runcvm-openwrt && \ 137 | echo -e "config interface 'loopback'\n\toption device 'lo'\n\toption proto 'static'\n\toption ipaddr '127.0.0.1'\n\toption netmask '255.0.0.0'\n\nconfig device\n\toption name 'br-lan'\n\toption type 'bridge'\n\tlist ports 'eth0'\n\nconfig interface 'lan'\n\toption device 'br-lan'\n\toption proto 'static'\n\toption ipaddr '172.128.0.5'\n\toption netmask '255.255.255.0'\n\toption gateway '172.128.0.1'\n" >/tmp/runcvm-openwrt-network && \ 138 | docker run -it --rm --runtime=runcvm --name=openwrt --network=runcvm-openwrt --ip=172.128.0.5 -v /tmp/runcvm-openwrt-network:/etc/config/network -p 10080:80 openwrt-23.05.2 139 | ``` 140 | 141 | - [Watch on Asciinema](https://asciinema.org/a/631857) 142 | 143 | ## RunCVM-in-Portainer walk-through 144 | 145 | [![Playing around with RunCVM, a docker runtime plugin](https://i.ytimg.com/vi/OENaWDlCWKg/maxresdefault.jpg)](https://www.youtube.com/watch?v=OENaWDlCWKg "Playing around with RunCVM, a docker runtime plugin") 146 | 147 | ## Motivation 148 | 149 | RunCVM was born out of difficulties experienced using the Docker and Podman CLIs to launch [Kata Containers v2](https://katacontainers.io/), and a belief that launching containerised workloads in VMs using Docker needn't be so complicated. 150 | 151 | > Motivations included: efforts to [re-add OCI CLI commands for docker/podman](https://github.com/kata-containers/kata-containers/issues/722) to Kata v2 to support Docker & Podman; other Kata issues [#3358](https://github.com/kata-containers/kata-containers/issues/3358), [#1123](https://github.com/kata-containers/kata-containers/issues/1123), [#1133](https://github.com/kata-containers/kata-containers/issues/1133), [#3038](https://github.com/kata-containers/runtime/issues/3038); [#5321](https://github.com/kata-containers/runtime/issues/5321); [#6861](https://github.com/kata-containers/runtime/issues/6861); Podman issues [#8579](https://github.com/containers/podman/issues/8579) and [#17070](https://github.com/containers/podman/issues/17070); and Kubernetes issue [#40114](https://github.com/kubernetes/website/issues/40114); though please note, since authoring RunCVM some of these issues may have been resolved. 152 | 153 | Like Kata, RunCVM aims to be a secure container runtime with lightweight virtual machines that feel and perform like containers, but provide stronger workload isolation using hardware virtualisation technology. 154 | 155 | However, while Kata aims to launch standard container images inside a restricted-privileges namespace inside a VM running a single fixed and heavily customised kernel and Linux distribution optimised for this purpose, RunCVM intentionally aims to launch container _or VM_ images as the _VM's root filesystem_ using stock or bespoke Linux kernels, the upshot being RunCVM's can run VM workloads that Kata's security and kernel model would explicitly prevent. 156 | 157 | For example: 158 | - RunCVM can launch system images expecting to interface directly with hardware, like [OpenWRT](https://openwrt.org/) 159 | - RunCVM can launch VMs nested inside a RunCVM VM - i.e. an 'inner' RunCVM Container/VM guest can be launched by Docker running within an 'outer' RunCVM Container/VM guest (assuming the host supports nested VMs) - in this sense, RunCVM is 'reentrant'. 160 | 161 | RunCVM features: 162 | 163 | - Compatible with `docker run` (with experimental support for `podman run`). 164 | - Uses a lightweight 'wrapper-runtime' technology that subverts the behaviour of the standard container runtime `runc` to cause a VM to be launched within the container (making its code footprint and external dependencies extremely small, and its internals extremely simple and easy to understand and tailor for specific purposes). 165 | - Highly portable among Linux distributions and development platforms providing KVM. Can even be installed on [GitHub Codespaces](https://github.com/features/codespaces)! 166 | - Written, using off-the-shelf open-source components, almost entirely in shell script for simplicity, portability and ease of development. 167 | 168 | > RunCVM makes some trade-offs in return for this simplicity. See the full list of [features and limitations](#features-and-limitations). 169 | 170 | ## Contents 171 | 172 | - [Introduction](#introduction) 173 | - [Quick start](#quick-start) 174 | - [Motivation](#motivation) 175 | - [Licence](#licence) 176 | - [Project aims](#project-aims) 177 | - [Project ambitions](#project-ambitions) 178 | - [Applications for RunCVM](#applications-for-runcvm) 179 | - [How RunCVM works](#how-runcvm-works) 180 | - [System requirements](#system-requirements) 181 | - [Installation](#installation) 182 | - [Upgrading](#upgrading) 183 | - [Features and Limitations](#features-and-limitations) 184 | - [RunCVM vs Kata comparison](#runcvm-vs-kata-comparison) 185 | - [Kernel selection](#kernel-selection) 186 | - [Option reference](#option-reference) 187 | - [Advanced usage](#advanced-usage) 188 | - [Developing](#developing) 189 | - [Building](#building) 190 | - [Testing](#testing) 191 | - [Contributing](#contributing) 192 | - [Support](#support) 193 | - [Uninstallation](#uninstallation) 194 | - [Legals](#Legals) 195 | 196 | ## Licence 197 | 198 | RunCVM is free and open-source, licensed under the Apache Licence, Version 2.0. See the [LICENSE](LICENSE) file for details. 199 | 200 | ## Project aims 201 | 202 | - Run any standard container workload in a VM using `docker run` with no need to customise images or the command line (except adding `--runtime=runcvm`) 203 | - Run unusual container workloads, like `dockerd` and `systemd` that will not run in standard container runtimes 204 | - Maintain a similar experience within a RunCVM VM as within a container: process table, network interfaces, stdio, exit code handling should be broadly similar to maximise compatibility 205 | - Container start/stop/kill semantics respected, where possible providing clean VM shutdown on stop 206 | - VM console accessible as one would expect using `docker run -it`, `docker start -ai` and `docker attach` (and so on), generally good support for other `docker container` subcommands 207 | - Efficient container startup, by using virtiofs to serve a container's filesystem directly to a VM (instead of unpacking an image into a backing file) 208 | - Improved security compared to the standard container runtime, and as much security as possible without compromising the simplicity of the implementation 209 | - Command-line and image-embedded options for customising the a container's VM specifications, devices, kernel 210 | - Intelligent kernel selection, according to the distribution used in the image being launched 211 | - No external dependencies, except for Docker/Podman and relevant Linux kernel modules (`kvm` and `tun`) 212 | - Support multiple Docker network interfaces attached to a created (but not yet running) container using `docker run --network=` and `docker network connect` (excluding IPv6) 213 | 214 | ## Project ambitions 215 | 216 | - Support for booting VM with a file-backed disk root fs generated from the container image, instead of only virtiofs root 217 | - Support running foreign-architecture VMs by using QEMU dynamic CPU emulation for the entire VM (instead of the approach used by [https://github.com/multiarch/qemu-user-static](https://github.com/multiarch/qemu-user-static) which uses dynamic CPU emulation for each individual binary) 218 | - Support for QEMU [microvm](https://qemu.readthedocs.io/en/latest/system/i386/microvm.html) or potentially Amazon Firecracker 219 | - More natural console support with independent stdout and stderr channels for `docker run -it` 220 | - Improve VM boot time and other behaviours using custom kernel 221 | - Support for specific hardware e.g. graphics display served via VNC 222 | 223 | ## Applications for RunCVM 224 | 225 | The main applications for RunCVM are: 226 | 227 | 1. Running and testing applications that: 228 | - don't work with (or require enhanced privileges to work with) standard container runtimes (e.g. `systemd`, `dockerd`, Docker swarm services, [Kubernetes](https://kubernetes.io/)) 229 | - require a running kernel, or a kernel version or modules not available on the host 230 | - require specific hardware that can be emulated e.g. disks, graphics displays 231 | 2. Running existing container workloads with increased security 232 | 3. Testing container workloads that are already intended to launch in VM environments, such as on [fly.io](https://fly.io) 233 | 4. Developing any of the above applications in [Dockside](https://dockside.io/) (see [RunCVM and Dockside](#runcvm-and-dockside)) 234 | 235 | ## How RunCVM works 236 | 237 | RunCVM's 'wrapper' runtime, `runcvm-runtime`, receives container create commands triggered by `docker` `run`/`create` commands, modifies the configuration of the requested container in such a way that the created container will launch a VM that boots from the container's filesystem, and then passes the request on to the standard container runtime (`runc`) to actually create and start the container. 238 | 239 | For a deep dive into RunCVM's internals, see the section on [Developing RunCVM](#developing). 240 | 241 | ## System requirements 242 | 243 | RunCVM should run on any amd64 (x86_64) hardware (or VM) running Linux Kernel >= 5.10, and that supports [KVM](https://www.linux-kvm.org/page/Main_Page) and [Docker](https://docker.com). So if your host can already run [KVM](https://www.linux-kvm.org/page/Main_Page) VMs and [Docker](https://docker.com) then it should run RunCVM. 244 | 245 | RunCVM has no other host dependencies, apart from Docker (or experimentally, Podman) and the `kvm` and `tun` kernel modules. RunCVM comes packaged with all binaries and libraries it needs to run (including its own QEMU binary). 246 | 247 | RunCVM is tested on Debian Bullseye and [GitHub Codespaces](https://github.com/codespaces/new?hide_repo_select=true&ref=main&repo=514606231). 248 | 249 | ### rp_filter sysctl settings 250 | 251 | For RunCVM to support Docker DNS within Container/VMs, the following condition on `/proc/sys/net/ipv4/conf/` must be met: 252 | - the max of `all/rp_filter` and `/rp_filter` should be 0 ('No Source Validation') or 2 (Loose mode as defined in RFC3704 Loose Reverse Path) 253 | (where `` is any bridge underpinning a Docker network to which RunCVM Container/VMs will be attached) 254 | 255 | This means that: 256 | - if `all/rp_filter` will be set to 0, then `/rp_filter` must be set to 0 or 2 257 | (or, if `` is not yet or might not yet have been created, then `default/rp_filter` must be set to 0 or 2) 258 | - if `all/rp_filter` will be set to 1, then `/rp_filter` must be set to 2 259 | (or, if `` is not yet or might not yet have been created, then `default/rp_filter` must be set to 2) 260 | - if `all/rp_filter` will be set to 2, then no further action is needed 261 | 262 | At time of writing: 263 | - the Debian default is `0`; 264 | - the Ubuntu default is `2`; 265 | - the Google Cloud Debian image has default `1` and `rp_filter` settings in `/etc/sysctl.d/60-gce-network-security.conf` must be modified or overridden to support RunCVM. 266 | 267 | We recommend `all/rp_filter` be set to 2, as this is the simplest change and provides a good balance of security. 268 | 269 | ## Installation 270 | 271 | Run: 272 | 273 | ```sh 274 | curl -s -o - https://raw.githubusercontent.com/newsnowlabs/runcvm/main/runcvm-scripts/runcvm-install-runtime.sh | sudo sh 275 | ``` 276 | 277 | This will: 278 | - Install the RunCVM software package to `/opt/runcvm` (installation elsewhere is currently unsupported) 279 | - For Docker support: 280 | - Enable the RunCVM runtime, by patching `/etc/docker/daemon.json` to add `runcvm` to the `runtimes` property 281 | - Restart `dockerd`, if it can be detected how for your system (e.g. `systemctl restart docker`) 282 | - Verify that RunCVM is recognised via `docker info` 283 | - For Podman support (experimental) 284 | - Display instructions on patching `/etc/containers/containers.conf` 285 | - Check your system network device `rp_filter` settings, and amend them if necessary 286 | 287 | Following installation, launch a basic test RunCVM Container/VM: 288 | 289 | ```console 290 | docker run --runtime=runcvm --rm -it hello-world 291 | ``` 292 | 293 | ### Install on Google Cloud 294 | 295 | Create an image that will allow instances to have VMX capability: 296 | 297 | ```console 298 | gcloud compute images create debian-12-vmx --source-image-project=debian-cloud --source-image-family=debian-12 --licenses="https://compute.googleapis.com/compute/v1/projects/vm-options/global/licenses/enable-vmx" 299 | ``` 300 | 301 | Now launch a VM, install Docker and RunCVM: 302 | 303 | ```console 304 | cat >/tmp/startup-script.sh </etc/docker/daemon.json && \ 310 | curl -fsSL https://get.docker.com | bash && \ 311 | curl -s -o - https://raw.githubusercontent.com/newsnowlabs/runcvm/main/runcvm-scripts/runcvm-install-runtime.sh | sudo REPO=newsnowlabs/runcvm:latest sh 312 | EOF 313 | 314 | gcloud compute instances create runcvm-vmx-test --zone=us-central1-a --machine-type=n2-highmem-2 --network-interface=network-tier=PREMIUM,stack-type=IPV4_ONLY,subnet=default --metadata-from-file=startup-script=/tmp/startup-script.sh --no-restart-on-failure --maintenance-policy=TERMINATE --provisioning-model=SPOT --instance-termination-action=STOP --no-service-account --no-scopes --create-disk=auto-delete=yes,boot=yes,image=debian-12-vmx,mode=rw,size=50,type=pd-ssd --no-shielded-secure-boot --shielded-vtpm --shielded-integrity-monitoring --labels=goog-ec-src=vm_add-gcloud --reservation-affinity=any 315 | ``` 316 | 317 | ## Upgrading 318 | 319 | To upgrade, follow this procedure: 320 | 321 | 1. Stop all RunCVM containers. 322 | 2. Run `/opt/runcvm/scripts/runcvm-install-runtime.sh` (or rerun the installation command - which runs the same script) 323 | 3. Start any RunCVM containers. 324 | 325 | ## Features and limitations 326 | 327 | In the below summary of RunCVM's current main features and limitations, [+] is used to indicate an area of compatibility with standard container runtimes and [-] is used indicate a feature of standard container runtimes that is unsupported. 328 | 329 | > N.B. `docker run` and `docker exec` options not listed below are unsupported and their effect, if used, is unspecified. 330 | 331 | - `docker run` 332 | - Mounts 333 | - [+] `--mount` (or `-v`) is supported for volume mounts, tmpfs mounts, and host file and directory bind-mounts (the `dst` mount path `/disks` is reserved) 334 | - [-] Bind-mounting host sockets or devices, and `--device` is unsupported 335 | - Networking 336 | - [+] The default bridge network is supported 337 | - [+] Custom/user-defined networks specified using `--network` are supported, including Docker DNS resolution of container names and respect for custom network MTU 338 | - [+] Multiple network interfaces - when attached via `docker run --network` or `docker network connect` (but only to a created and not yet running container) - are supported (including `scope=overlay` networks and those with multiple subnets) 339 | - [+] `--publish` (or `-p`) is supported 340 | - [+] `--dns`, `--dns-option`, `--dns-search` are supported 341 | - [+] `--ip` is supported 342 | - [+] `--hostname` (or `-h`) is supported 343 | - [-] `docker network connect` on a running container is not supported 344 | - [-] `--network=host` and `--network=container:name|id` are not supported 345 | - [-] IPv6 is not supported 346 | - Execution environment 347 | - [+] `--user` (or `-u`) is supported 348 | - [?] `--workdir` (or `-w`) is supported 349 | - [+] `--env` (or `-e`), `--env-file` is supported 350 | - [+] `--entrypoint` is supported 351 | - [+] `--init` - is supported (but runs RunCVM's own VM init process rather than Docker's default, `tini`) 352 | - stdio/Terminals 353 | - [+] `--detach` (or `-d`) is supported 354 | - [+] `--interactive` (or `-i`) is supported 355 | - [+] `--tty` (or `-t`) is supported (but to enter CTRL-T one must press CTRL-T twice) 356 | - [+] `--attach` (or `-a`) is supported 357 | - [+] Stdout and Stderr output should be broadly similar to running the same workload in a standard `runc` container 358 | - [-] Stdout and Stderr are not independently multiplexed so `docker run --runtime=runcvm debian bash -c 'echo stdout; echo stderr >&2' >/tmp/stdout 2>/tmp/stderr` does not produce the expected result 359 | - [-] Stdout and Stderr sent very soon after VM launch might be corrupted due to serial console issues 360 | - [-] Stdout and Stderr sent immediately before VM shutdown might not always be fully flushed 361 | - Resource allocation and limits 362 | - [+] `--cpus` is supported to specify number of VM CPUs 363 | - [+] `--memory` (or `-m`) is supported to specify VM memory 364 | - [-] Other container resource limit options such as (`--cpu-*`), block IO (`--blkio-*`), kernel memory (`--kernel-memory`) are unsupported or untested 365 | - Exit code 366 | - [+] Returning the entrypoint's exit code is supported, but it currently requires application support 367 | - [-] To return an exit code, your entrypoint may either write its exit code to `/.runcvm/exit-code` (supported exit codes 0-255) or call `/opt/runcvm/sbin/qemu-exit ` (supported exit codes 0-127). Automatic handling of exit codes from the entrypoint will be provided in a later version. 368 | - Disk performance 369 | - [+] No mountpoints are required for basic operation for most applications. Volume or disk mountpoints may be needed for running `dockerd` or to improve disk performance 370 | - [-] `dockerd` mileage will vary unless a volume or disk is mounted over `/var/lib/docker` 371 | - `docker exec` 372 | - [+] `--user` (or `-u`), `--workdir` (or `-w`), `--env` (or `-e`), `--env-file`, `--detach` (or `-d`), `--interactive` (or `-i`) and `--tty` (or `-t`) are all supported 373 | - [+] Stdout and Stderr _are_ independently multiplexed so `docker exec bash -c 'echo stdout; echo stderr >&2' >/tmp/stdout 2>/tmp/stderr` _does_ produce the expected result 374 | - Security 375 | - The RunCVM software package at `/opt/runcvm` is mounted read-only within RunCVM containers. Container applications cannot compromise RunCVM, but they can execute binaries from within the RunCVM package. The set of binaries available to the VM may be reduced to a minimum in a later version. 376 | - Kernels 377 | - [+] Use any kernel, either one pre-packaged with RunCVM or roll your own 378 | - [+] RunCVM will try to select an appropriate kernel to use based on examination of `/etc/os-release` within the image being launched. 379 | 380 | ## RunCVM vs Kata comparison 381 | 382 | This table provides a high-level comparison of RunCVM and Kata across various features like kernels, networking/DNS, memory allocation, namespace handling, method of operation, and performance characteristics: 383 | 384 | | Feature | RunCVM | Kata | 385 | |---------|--------|------| 386 | | **Methodology** | Boots VM from distribution kernels with container's filesystem directly mounted as root filesystem, using virtiofs. VM setup code and kernel modules are bind-mounted into the container. VM's PID1 runs setup code to reproduce the container's networking environment within the VM before executing the container's original entrypoint. | Boots VM from custom kernel with custom root disk image, mounts the virtiofsd-shared host container filesystem to a target folder and executes the container's entrypoint within a restricted namespace having chrooted to that folder. | 387 | | **Privileges/restrictions** | Container code has full root access to VM and its devices. It may run anything that runs in a VM, mounting filesystems, installing kernel modules, accessing devices. RunCVM helper processes are visible to `ps` etc. | Runs container code inside a VM namespace with restricted privileges. Use of mounts, kernel modules is restricted. Kata helper processes (like kata-agent and chronyd) are invisible to `ps`.| 388 | | **Kernels** | Launches stock Alpine, Debian, Ubuntu kernels. Kernel `/lib/modules` automatically mounted within VM. Install any needed modules without host reconfiguration. | Launches custom kernels. Kernel modules aren't mounted and need host reconfiguration to be installed. | 389 | | **Networking/DNS** | Docker container networking + internal/external DNS out-of-the-box. No support for `docker network connect/disconnect` | DNS issues presented: with custom network, external ping works, but DNS lookups fail both for internal docker hosts and external hosts.[^1] | 390 | | **Memory** | VM assigned and reports total memory as per `--memory ` | VM total memory reported by `free` appears unrelated to `--memory ` specified [^2] | 391 | | **CPUs** | VM assigned and reports CPUs as per `--cpus ` | CPUs must be hardcoded in Kata host config | 392 | | **Performance** | | Custom kernel optimisations may deliver improved startup (~3.2s) or operational performance (~15%) | 393 | | **virtiofsd** | Runs `virtiofsd` in container namespace | Unknown | 394 | 395 | [^1]: `docker network create --scope=local testnet >/dev/null && docker run --name=test --rm --runtime=kata --network=testnet --entrypoint=/bin/ash alpine -c 'for n in test google.com 8.8.8.8; do echo "ping $n ..."; ping -q -c 8 -i 0.5 $n; done'; docker network rm testnet >/dev/null` succeeds on `runc` and `runcvm` but at time of writing (2023-12-31) the DNS lookups needed fail on `kata`. 396 | ``` 397 | $ docker network create --scope=local testnet >/dev/null && docker run --name=test --rm -it --runtime=kata --network=testnet --entrypoint=/bin/ash alpine -c 'for n in test google.com 8.8.8.8; do echo "ping $n ..."; ping -q -c 8 -i 0.5 $n; done'; docker network rm testnet >/dev/null 398 | ping test ... 399 | ping: bad address 'test' 400 | ping google.com ... 401 | ping: bad address 'google.com' 402 | ping 8.8.8.8 ... 403 | PING 8.8.8.8 (8.8.8.8): 56 data bytes 404 | 405 | --- 8.8.8.8 ping statistics --- 406 | 8 packets transmitted, 8 packets received, 0% packet loss 407 | round-trip min/avg/max = 0.911/1.716/3.123 ms 408 | 409 | $ docker network create --scope=local testnet >/dev/null && docker run --name=test --rm -it --runtime=runcvm --network=testnet --entrypoint=/bin/ash alpine -c 'for n in test google.com 8.8.8.8; do echo "ping $n ..."; ping -q -c 8 -i 0.5 $n; done'; docker network rm testnet >/dev/null 410 | ping test ... 411 | PING test (172.25.8.2): 56 data bytes 412 | 413 | --- test ping statistics --- 414 | 8 packets transmitted, 8 packets received, 0% packet loss 415 | round-trip min/avg/max = 0.033/0.085/0.137 ms 416 | ping google.com ... 417 | PING google.com (172.217.16.238): 56 data bytes 418 | 419 | --- google.com ping statistics --- 420 | 8 packets transmitted, 8 packets received, 0% packet loss 421 | round-trip min/avg/max = 8.221/8.398/9.017 ms 422 | ping 8.8.8.8 ... 423 | PING 8.8.8.8 (8.8.8.8): 56 data bytes 424 | 425 | --- 8.8.8.8 ping statistics --- 426 | 8 packets transmitted, 8 packets received, 0% packet loss 427 | round-trip min/avg/max = 1.074/1.491/1.801 ms 428 | ``` 429 | 430 | [^2]: `docker run --rm -it --runtime=kata --entrypoint=/bin/ash -m 500m alpine -c 'free -h; df -h /dev/shm'` 431 | ``` 432 | $ docker run --rm --runtime=kata --name=test -m 2g --env=RUNCVM_KERNEL_DEBUG=1 -it alpine ash -c 'free -h' 433 | total used free shared buff/cache available 434 | Mem: 3.9G 94.4M 3.8G 0 3.7M 3.8G 435 | Swap: 0 0 0 436 | $ docker run --rm --runtime=kata --name=test -m 3g --env=RUNCVM_KERNEL_DEBUG=1 -it alpine ash -c 'free -h' 437 | total used free shared buff/cache available 438 | Mem: 4.9G 107.0M 4.8G 0 3.9M 4.8G 439 | Swap: 0 0 0 440 | $ docker run --rm --runtime=kata --name=test -m 0g --env=RUNCVM_KERNEL_DEBUG=1 -it alpine ash -c 'free -h' 441 | total used free shared buff/cache available 442 | Mem: 1.9G 58.8M 1.9G 0 3.4M 1.9G 443 | Swap: 0 0 0 444 | ``` 445 | 446 | ## Kernel auto-detection 447 | 448 | When creating a container, RunCVM will examine the image being launched to try to determine a suitable kernel to boot the VM with. Its process is as follows: 449 | 450 | 1. If `--env=RUNCVM_KERNEL=[/]` specified, use the indicated kernel 451 | 2. Otherwise, identify distro from `/etc/os-release` 452 | 1. If one is found in the appropriate distro-specific location in the image, select an in-image kernel. The locations are: 453 | - Debian: `/vmlinuz` and `/initrd.img` 454 | - Ubuntu: `/boot/vmlinuz` and `/boot/initrd.img` 455 | - Alpine: `/boot/vmlinuz-virt` `/boot/initramfs-virt` 456 | 2. Otherwise, if found in the RunCVM package, select the latest kernel compatible with the distro 457 | 3. Finally, use the Debian kernel from the RunCVM package 458 | 459 | ## Option reference 460 | 461 | RunCVM options are specified either via standard `docker run` options or via `--env==` options on the `docker run` 462 | command line. The following env options are user-configurable: 463 | 464 | ### `--env=RUNCVM_KERNEL=[/]` 465 | 466 | Specify with which RunCVM kernel (from `/opt/runcvm/kernels`) to boot the VM. Values must be of the form `/`, where `` is a directory under `/opt/runcvm/kernels` and `` is a subdirectory (or symlink to a subdirectory) under that. If `` is omitted, `latest` will be assumed. Here is an example command that will list available values of `/` on your installation. 467 | 468 | ```console 469 | $ find /opt/runcvm/kernels/ -maxdepth 2 | sed 's!^/opt/runcvm/kernels/!!; /^$/d' 470 | debian 471 | debian/latest 472 | debian/5.10.0-16-amd64 473 | alpine 474 | alpine/latest 475 | alpine/5.15.59-0-virt 476 | ubuntu 477 | ubuntu/latest 478 | ubuntu/5.15.0-43-generic 479 | ol 480 | ol/5.14.0-70.22.1.0.1.el9_0.x86_64 481 | ol/latest 482 | ``` 483 | 484 | Example: 485 | 486 | ```console 487 | docker run --rm --runtime=runcvm --env=RUNCVM_KERNEL=ol hello-world 488 | ``` 489 | 490 | ### `--env=RUNCVM_KERNEL_APPEND=1` 491 | 492 | Any custom kernel command line options e.g. `apparmor=0` or `systemd.unified_cgroup_hierarchy=0`. 493 | 494 | ### `--env='RUNCVM_DISKS=[;;...]'` 495 | 496 | Automatically create, format, prepopulate and mount backing files as virtual disks on the VM. 497 | 498 | Each `` should be a comma-separated list of values of the form: `,,[,]`. 499 | 500 | - `` is the path _within the container_ where the virtual disk backing file should be located. This may be in the container's overlayfs or within a volume (mounted using `--mount=type=volume`). 501 | - `` is both (a) the path within the VM where the virtual disk should be mounted; and (b) the location of the directory with which contents the disk should be prepopulated. 502 | - `` is the filesystem with which the backing disk should be formatted when first created. 503 | - `` is the size of the backing file (in `truncate` format), and must be specified if `` does not exist. 504 | 505 | When first created, the backing file will be created as a sparse file to the specified `` and formatted with the specified `` using `mke2fs` and prepopulated with any files preexisting at ``. 506 | 507 | When RunCVM creates a Container/VM, fstab entries will be drafted. After the VM boots, the fstab entries will be mounted. Typically, the first disk will be mounted as `/dev/vda`, the second as `/dev/vdb`, and so on. 508 | 509 | #### Example #1 510 | 511 | ```console 512 | docker run -it --runtime=runcvm --env=RUNCVM_DISKS=/disk1,/home,ext4,5G 513 | ``` 514 | 515 | In this example, RunCVM will check for existence of a file at `/disk1` within ``, and if not found create a 5G backing file (in the container's filesystem, typically overlay2) with an ext4 filesystem prepopulated with any preexisting contents of `/home`, then add the disk to `/etc/fstab` and mount it within the VM at `/home`. 516 | 517 | #### Example #2 518 | 519 | ```console 520 | docker run -it --runtime=runcvm --mount=type=volume,src=runcvm-disks,dst=/disks --env='RUNCVM_DISKS=/disks/disk1,/home,ext4,5G;/disks/disk2,/opt,ext4,2G' 521 | ``` 522 | 523 | This example behaves similarly, except that the `runcvm-disks` persistent Docker volume is first mounted at `/disks` within the container's filesystem, and therefore the backing files at `/disks/disk1` and `/disks/disk2` (mounted in the VM at `/home` and `/opt` respectively) are stored in the _persistent volume_ (typically stored in `/var/lib/docker` on the host, bypassing overlay2). 524 | 525 | > N.B. `/disks` and any paths below it are _reserved mountpoints_. Unlike other mountpoints, these are *NOT* mounted into the VM but only into the container, and are therefore suitable for use for mounting VM disks from bscking files that cannot be accessed within the VM's filesystem. 526 | 527 | ### `--env=RUNCVM_QEMU_DISPLAY=` 528 | 529 | Select a specific QEMU display. Currently only `curses` is supported, but others may trivially be added by customising the build. 530 | 531 | ### `--env=RUNCVM_SYS_ADMIN=1` 532 | 533 | By default, `virtiofsd` is not launched with `-o modcaps=+sys_admin` (and containers are not granted `CAP_SYS_ADMIN`). Use this option if you need to change this. 534 | 535 | ### `--env=RUNCVM_KERNEL_MOUNT_LIB_MODULES=1` 536 | 537 | If a RunCVM kernel (as opposed to an in-image kernel) is chosen to launch a VM, by default that kernel's modules will be mounted at `/lib/modules/` in the VM. If this variables is set, that kernel's modules will instead be mounted over `/lib/modules`. 538 | 539 | ### `--env=RUNCVM_KERNEL_DEBUG=1` 540 | 541 | Enable kernel logging (sets kernel `console=ttyS0`). 542 | 543 | ### `--env=RUNCVM_BIOS_DEBUG=1` 544 | 545 | By default BIOS console output is hidden. Enable it with this option. 546 | 547 | ### `--env=RUNCVM_RUNTIME_DEBUG=1` 548 | 549 | Enable debug logging for the runtime (the portion of RunCVM directly invoked by `docker run`, `docker exec` etc). 550 | Debug logs are written to files in `/tmp`. 551 | 552 | ### `--env=RUNCVM_BREAK=` 553 | 554 | Enable breakpoints (falling to bash shell) during the RunCVM Container/VM boot process. 555 | 556 | `` must be a comma-separated list of: `prenet`, `postnet`, `preqemu`. 557 | 558 | ### `--env=RUNCVM_HUGETLB=1` 559 | 560 | **[EXPERIMENTAL]** Enable use of preallocated hugetlb memory backend, which can improve performance in some scenarios. 561 | 562 | ### `--env=RUNCVM_CGROUPFS=` 563 | 564 | Configures cgroupfs mountpoints in the VM, which may be needed to run applications like Docker if systemd is not running. Acceptable values are: 565 | 566 | - `none`/`systemd` - do nothing; leave to the application or to systemd (if running) 567 | - `1`/`cgroup1` - mount only cgroup v1 filesystems supported by the running kernel to subdirectories of `/sys/fs/cgroup` 568 | - `2`/`cgroup2` - mount only cgroup v2 filesystem to `/sys/fs/cgroup` 569 | - `hybrid`/`mixed` - mount cgroup v1 filesystems and mount cgroup v2 filesystem to `/sys/fs/cgroup/unified` 570 | 571 | Please note that if `RUNCVM_CGROUPFS` is left undefined or set to an empty string, then RunCVM selects an appropriate 572 | default behaviour according to these rules: 573 | 574 | - If specified entrypoint (or, if a symlink, its target) matches the regex `/systemd$` then assume a default value of `none`; 575 | - Else, assume a default value of `hybrid`. 576 | 577 | These rules work well in the cases of running Docker in (a) stock Alpine/Debian/Ubuntu distributions in which Docker has been installed but Systemd is not running; and (b) distributions in which Systemd is running. Of course you should set `RUNCVM_CGROUPFS` if you need to override the default behaviour. 578 | 579 | Please also note that in the case your distribution is running Systemd you may instead set `--env=RUNCVM_KERNEL_APPEND='systemd.unified_cgroup_hierarchy='` (where `` is `0` or `1`) to request Systemd to create either hybrid or cgroup2-only cgroup filesystem(s) itself. 580 | 581 | ## Advanced usage 582 | 583 | ### Running Docker in a RunCVM Container/VM 584 | 585 | #### ext4 disk backing file mounted at `/var/lib/docker` 586 | 587 | If running Docker within a VM, it is recommended that you mount a disk backing file at `/var/lib/docker` to allow `dockerd` to use the preferred overlay filesystem and avoid it opting to use the extremely sub-performant `vfs` storage driver. 588 | 589 | e.g. To launch a VM with a 1G ext4-formatted backing file, stored in the underlying container's overlay filesystem, and mounted at `/var/lib/docker`, run: 590 | 591 | ```sh 592 | docker run -it --runtime=runcvm --env=RUNCVM_DISKS=/disks/docker,/var/lib/docker,ext4,1G 593 | ``` 594 | 595 | To launch a VM with a 5G ext4-formatted backing file, stored in a dedicated Docker volume on the host, and mounted at `/var/lib/docker`, run: 596 | 597 | ```sh 598 | docker run -it --runtime=runcvm --mount=type=volume,src=runcvm-disks,dst=/disks --env=RUNCVM_DISKS=/disks/docker,/var/lib/docker,ext4,5G 599 | ``` 600 | 601 | In both cases, RunCVM will check for existence of a file `/disks/docker` and, if not found, will create the disk backing file of the given size and format as an ext4 filesystem. It will add the disk to `/etc/fstab`. 602 | 603 | For full documentation of `RUNCVM_DISKS`, see above. 604 | 605 | #### Docker volume mounted at `/var/lib/docker` (NOT RECOMMENDED) 606 | 607 | Doing this is _not recommended_, but if running Docker within a VM, you can enable `dockerd` to use the overlay filesystem (at the cost of security) by launching with `--env=RUNCVM_SYS_ADMIN=1`. e.g. 608 | 609 | ```sh 610 | docker run --runtime=runcvm --mount=type=volume,src=mydocker1,dst=/var/lib/docker --env=RUNCVM_SYS_ADMIN=1 611 | ``` 612 | 613 | > N.B. This option adds `CAP_SYS_ADMIN` capabilities to the container and then launches `virtiofsd` with `-o modcaps=+sys_admin`. 614 | 615 | ## Developing 616 | 617 | The following deep dive should help explain the inner workings of RunCVM, and which files to modify to implement fixes, improvements and extensions. 618 | 619 | ### runcvm-runtime 620 | 621 | RunCVM's 'wrapper' runtime, `runcvm-runtime`, intercepts container `create` and `exec` commands and their specifications in JSON format (`config.json` and `process.json` respectively) that are normally provided (by `docker` `run`/`create` and `docker exec` respectively) to a standard container runtime like `runc`. 622 | 623 | The JSON file is parsed to retrieve properties of the command, and is modified to allow RunCVM to piggyback by overriding the originally intended behaviour with new behaviour. 624 | 625 | The modifications to `create` are designed to make the created container launch a VM that boots off the container's filesystem, served using `virtiofsd`. 626 | 627 | The modifications to `exec` are designed to run commands within the VM instead of the container. 628 | 629 | #### `runcvm-runtime` - `create` command 630 | 631 | In more detail, the RunCVM runtime `create` process: 632 | - Modifies the `config.json` file to: 633 | - Modify the container's entrypoint, to prepend `runcvm-ctr-entrypoint` to the container's original entrypoint and if an `--init` argument was detected, remove any init process and set the container env var `RUNCVM_INIT` to `1` 634 | - Set the container env var `RUNCVM_UIDGID` to `::` as intended for the container, then resets both the `` and `` to `0`. 635 | - Set the container env var `RUNCVM_CPUS` to the intended `--cpus` count so it can be passed to the VM 636 | - Extract and delete all requested tmpfs mounts (these will be independently mounted by the VM). 637 | - Add a bind mount from `/` to `/vm` that will recursively mount the following preceding mounts: 638 | - A bind mount from `/opt/runcvm` on the host to `/opt/runcvm` in the container. 639 | - A tmpfs mounted at `/.runcvm` 640 | - Add a tmpfs at `/run` in the container only. 641 | - Map all requested bind mounts from their original mountpoint `` to `/vm/` (except where `` is at or below `/disks`). 642 | - Determine a suitable VM launch kernel by looking for one inside the container's image, choosing a stock RunCVM kernel matching the image, or according to an env var argument. 643 | - Add a bind mount to `/vm/lib/modules/` for the kernel's modules 644 | - Set container env vars `RUNCVM_KERNEL_PATH`, `RUNCVM_KERNEL_INITRAMFS_PATH` and `RUNCVM_KERNEL_ROOT` 645 | - Add device mounts for `/dev/kvm` and `/dev/net/tun`. 646 | - Set the seccomp profile to 'unconfined'. 647 | - Set `/dev/shm` to the size desired for the VM's memory and set container env var accordingly. 648 | - Add necessary capabilities, if not already present (`NET_ADMIN`, `NET_RAW`, `MKNOD`, `AUDIT_WRITE`). 649 | - Only if requested by `--env=SYS_ADMIN=1`, add the `SYS_ADMIN` capability. 650 | - Executes the standard container runtime `runc` with the modified `config.json`. 651 | 652 | The `runcvm-ctr-entrypoint`: 653 | - Is always launched as PID1 within the standard Docker container. 654 | - Saves the container's originally-intended entrypoint and command line, environment variables and network configuration to files inside `/.runcvm`. 655 | - Creates a bridge (acting as a hub) for each container network interface, to join that interface to a VM tap network interface. 656 | - Launches `virtiofsd` to serve the container's root filesystem. 657 | - Configures `/etc/resolv.conf` in the container. 658 | - Adds container firewall rules, launches `dnsmasq` and modifies `/vm/etc/resolv.conf` to proxy DNS requests from the VM to Docker's DNS. 659 | - Execs RunCVM's own `runcvm-init` init process to supervise `runcvm-ctr-qemu` to launch the VM. 660 | 661 | The `runcvm-init` process: 662 | - Is RunCVM's custom init process, that takes over as PID1 within the container, supervising `runcvm-ctr-qemu` to launch the VM. 663 | - Waits for a TERM signal. On receiving one, it spawns `runcvm-ctr-shutdown`, which cycles through a number of methods to try to shut down the VM cleanly. 664 | - Waits for its child (QEMU) to exit. When it does, execs `runcvm-ctr-exit` to retrieve any saved exit code (written by the application to `/.runcvm/exit-code`) and exit with this code. 665 | 666 | The `runcvm-ctr-qemu` script: 667 | - Prepares disk backing files as specified by `--env=RUNCVM_DISKS=` 668 | - Prepares network configuration as saved from the container (modifying the MAC address of each container interface) 669 | - Launches [QEMU](https://www.qemu.org/) with the required kernel, network interfaces, disks, display, and with a root filesystem mounted via virtiofs from the container and with `runcvm-vm-init` as the VM's init process. 670 | 671 | The `runcvm-vm-init` process: 672 | - Runs as PID1 within the VM. 673 | - Retrieves the container configuration - network, environment, disk and tmpfs mounts - saved by `runcvm-ctr-entrypoint` to `/.runcvm`, and reproduces it within the VM 674 | - Launches the container's pre-existing entrypoint, in one of two ways. 675 | 1. If `RUNCVM_INIT` is `1` (i.e. the container was originally intended to be launched with Docker's own init process) then it configures and execs busybox `init`, which becomes the VM's PID1, to supervise `dropbear`, run `runcvm-vm-start` and `poweroff` the VM if signalled to do so. 676 | 2. Else, it backgrounds `dropbear`, then execs (via `runcvm-init`, purely to create a controlling tty) `runcvm-vm-start`, which runs as the VM's PID1. 677 | 678 | The `runcvm-vm-start` script: 679 | - Restores the container's originally-intended environment variables, ``, ``, `` and ``, and execs that entrypoint. 680 | 681 | #### `runcvm-runtime` - `exec` command 682 | 683 | The RunCVM runtime `exec` process: 684 | 685 | - Modifies the `process.json` file to: 686 | - Retrieve the intended ``, ``, ``, `` and `` for the command, as well as indicating the existence of a HOME environment variable. 687 | - Resets both the `` and `` to `0` and the `` to `/`. 688 | - Prepend `runcvm-ctr-exec '::' '' '' ''` to the originally intended command. 689 | - Executes the standard container runtime `runc` with the modified `process.json`. 690 | 691 | The `runcvm-ctr-exec` script: 692 | - Uses the Dropbear `dbclient` SSH client to execute the intended command, with the intended arguments within the VM, via the `runcvm-vm-exec` process, propagate the returned stdout and stderr and return the command's exit code. 693 | 694 | ## Building 695 | 696 | Building RunCVM requires Docker. To build RunCVM, first clone the repo, then run the build script, as follows: 697 | 698 | ```console 699 | cd runcvm 700 | ./build/build.sh 701 | ``` 702 | 703 | The build script creates a Docker image named `newsnowlabs/runcvm:latest`. 704 | 705 | Now follow the main [installation instructions](#installation) to install your built RunCVM from the Docker image. 706 | 707 | ## Testing 708 | 709 | Test RunCVM using nested RunCVM. You can do this using a Docker image capable of installing RunCVM, or an image built with a version of RunCVM preinstalled. 710 | 711 | Build a suitable image as follows: 712 | 713 | ```sh 714 | cat <>/etc/modules && \ 724 | useradd --create-home --shell /bin/bash --groups sudo,docker runcvm && \ 725 | echo runcvm:runcvm | chpasswd && \ 726 | echo 'runcvm ALL=(ALL) NOPASSWD: ALL' >/etc/sudoers.d/runcvm 727 | 728 | WORKDIR /home/runcvm 729 | ENTRYPOINT ["/lib/systemd/systemd"] 730 | VOLUME /disks 731 | 732 | # Mount formatted backing files at: 733 | # - /var/lib/docker for speed and overlay2 support 734 | # - /opt/runcvm to avoid nested virtiofs, which works, but can't be great for speed 735 | ENV RUNCVM_DISKS='/disks/docker,/var/lib/docker,ext4,2G;/disks/runcvm,/opt/runcvm,ext4,2G' 736 | 737 | # # Uncomment this block to preinstall RunCVM from the specified image 738 | # 739 | # COPY --from=newsnowlabs/runcvm:latest /opt /opt/ 740 | # RUN rm -f /etc/init.d/docker && \ 741 | # bash /opt/runcvm/scripts/runcvm-install-runtime.sh --no-dockerd 742 | EOF 743 | ``` 744 | 745 | (Uncomment the final block to build an image with RunCVM preinstalled, or leave the block commented to test RunCVM installation). 746 | 747 | To launch, run: 748 | 749 | ```sh 750 | docker run -d --runtime=runcvm -m 2g --name=ubuntu-docker-runcvm ubuntu-docker-runcvm 751 | ``` 752 | 753 | > Optionally modify this `docker run` command by: 754 | > - adding `--rm` - to automatically remove the container after systemd shutdown 755 | > - removing `-d` and adding `--env=RUNCVM_KERNEL_DEBUG=1` - to see kernel and systemd boot logs 756 | > - removing `-d` and adding `-it` - to provide a console 757 | 758 | Then `docker exec -it -u runcvm ubuntu-docker-runcvm bash` to obtain a command prompt and perform testing. 759 | 760 | Run `docker rm -fv ubuntu-docker-runcvm` to clean up after testing. 761 | 762 | ## Support 763 | 764 | **Support launching images:** If you encounter any Docker image that launches in a standard container runtime that does not launch in RunCVM, or launches but with unexpected behaviour, please [raise an issue](https://github.com/newsnowlabs/runcvm/issues) titled _Launch failure for image ``_ or _Unexpected behaviour for image ``_ and include log excerpts and an explanation of the failure, or expected and unexpected behaviour. 765 | 766 | **For all other issues:** please still [raise an issue](https://github.com/newsnowlabs/runcvm/issues) 767 | 768 | You can also reach out to us on the [NewsNow Labs Slack Workspace](https://join.slack.com/t/newsnowlabs/shared_invite/zt-wp54l05w-0DTxuc_n8uISJRtks3Xw3A). 769 | 770 | We are typically available to respond to queries Monday-Friday, 9am-5pm UK time, and will be happy to help. 771 | 772 | ## Contributing 773 | 774 | If you would like to contribute a feature suggestion or code, please raise an issue or submit a pull request. 775 | 776 | ## Uninstallation 777 | 778 | Shut down any RunCVM containers. 779 | 780 | Then run `sudo rm -f /opt/runcvm`. 781 | 782 | ## RunCVM and Dockside 783 | 784 | RunCVM and [Dockside](https://dockside.io/) are designed to work together in two alternative ways. 785 | 786 | 1. Dockside can be used to launch devtainers (development environments) in RunCVM VMs, allowing you to provision containerised online IDEs for developing applications like `dockerd`, Docker swarm, `systemd`, applications that require a running kernel, or kernel modules not available on the host, or specific hardware e.g. a graphics display. Follow the instructions for adding a runtime to your [Dockside profiles](https://github.com/newsnowlabs/dockside/blob/main/docs/setup.md#profiles). 787 | 2. Dockside can itself be launched inside a RunCVM VM with its own `dockerd` to provide increased security and compartmentalisation from a host. e.g. 788 | 789 | ``` 790 | docker run --rm -it --runtime=runcvm --memory=2g --name=docksidevm -p 443:443 -p 80:80 --mount=type=volume,src=dockside-data,dst=/data --mount=type=volume,src=dockside-disks,dst=/disks --env=RUNCVM_DISKS=/disks/disk1,/var/lib/docker,ext4,5G newsnowlabs/dockside --run-dockerd --ssl-builtin 791 | ``` 792 | 793 | ## Legals 794 | 795 | This project (known as "RunCVM"), comprising the files in this Git repository 796 | (but excluding files containing a conflicting copyright notice and licence), 797 | is copyright 2023 NewsNow Publishing Limited, Struan Bartlett, and contributors. 798 | 799 | RunCVM is an open-source project licensed under the Apache License, Version 2.0 800 | (the "License"); you may not use RunCVM or its constituent files except in 801 | compliance with the License. 802 | 803 | You may obtain a copy of the License at [http://www.apache.org/licenses/LICENSE-2.0](http://www.apache.org/licenses/LICENSE-2.0). 804 | 805 | Unless required by applicable law or agreed to in writing, software 806 | distributed under the License is distributed on an "AS IS" BASIS, 807 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 808 | See the License for the specific language governing permissions and 809 | limitations under the License. 810 | 811 | > N.B. In order to run, RunCVM relies upon other third-party open-source software dependencies that are separate to and independent from RunCVM and published under their own independent licences. 812 | > 813 | > RunCVM Docker images made available at [https://hub.docker.com/repository/docker/newsnowlabs/runcvm](https://hub.docker.com/repository/docker/newsnowlabs/runcvm) are distributions 814 | > designed to run RunCVM that comprise: (a) the RunCVM project source and/or object code; and 815 | > (b) third-party dependencies that RunCVM needs to run; and which are each distributed under the terms 816 | > of their respective licences. 817 | -------------------------------------------------------------------------------- /build-utils/entrypoint-install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | MNT=/runcvm 4 | REPO=newsnowlabs/runcvm 5 | 6 | while [ -n "$1" ]; 7 | do 8 | case "$1" in 9 | --quiet) QUIET=1; shift; continue; ;; 10 | --sleep|--wait|--pause) SLEEP=1; shift; continue; ;; 11 | *) echo "$0: Unknown argument '$1'; aborting!"; exit 2; ;; 12 | esac 13 | done 14 | 15 | if ! mountpoint $MNT >/dev/null 2>&1; then 16 | 17 | cat <<_EOE_ >&2 18 | ERROR: Host bind-mount not specified, see below for correct usage. 19 | 20 | Usage: docker run --rm -v /opt/runcvm:$MNT $REPO [--quiet] [--sleep] 21 | 22 | - Installs runcvm package to the host at /opt/runcvm 23 | (installation elsewhere is currently unsupported) 24 | 25 | N.B. This image should normally only be used by the install script. 26 | See README.md for installation instructions. 27 | _EOE_ 28 | 29 | exit 1 30 | fi 31 | 32 | rsync -aR --delete /opt/runcvm/./ $MNT/ || exit 1 33 | 34 | if [ -z "$QUIET" ]; then 35 | 36 | cat <<"_EOE_" >&2 37 | RunCVM install/upgrade successful 38 | ================================= 39 | 40 | If this is your first time installing RunCVM on this server/VM, then: 41 | 42 | 1. Run the following to update /etc/docker/daemon.conf and restart docker: 43 | 44 | sudo /opt/runcvm/scripts/runcvm-install-runtime.sh 45 | 46 | 2. Optionally, run the integration tests: 47 | 48 | ./tests/run 49 | 50 | _EOE_ 51 | fi 52 | 53 | # For installing across a docker swarm: 54 | # - Run: docker service create --name=runcvm --mode=global --mount=type=bind,src=/opt/runcvm,dst=/runcvm newsnowlabs/runcvm:latest --sleep 55 | # - Wait: until the service is created everywhere 56 | # - Run: docker service rm runcvm 57 | if [ -n "$SLEEP" ]; then 58 | echo "$(hostname): RunCVM package installed." 59 | sleep infinity 60 | else 61 | exit 0 62 | fi 63 | -------------------------------------------------------------------------------- /build-utils/make-bundelf-bundle.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # BundELF - ELF binary and dynamic library patcher/bundler for making portable/relocatable executables 4 | # ---------------------------------------------------------------------------------------------------- 5 | # 6 | # Licence: Apache 2.0 7 | # Authors: Struan Bartlett, NewsNow Labs, NewsNow Publishing Ltd 8 | # Version: 1.0.0 9 | # Git: https://github.com/newsnowlabs/bundelf 10 | 11 | # make-bundelf-bundle.sh is used to prepare and package ELF binaries and their 12 | # dynamic library dependencies for relocation to (and execution from) a new 13 | # location, making them completely portable and independent of the original 14 | # distribution. 15 | # 16 | # It can be used to package Linux binaries sourced from one distribution, 17 | # so that they run within, but completely independently of, any other 18 | # distribution. 19 | # 20 | # Example BundELF use cases: 21 | # - Bundling Alpine binaries for running within, but completely independently 22 | # of, any arbitrary distribution (including GLIBC-based distributions) 23 | # - Bundling GLIBC-based applications for running within Alpine (or indeed any 24 | # other distribution) 25 | # 26 | # BundELF is a core technology component of: 27 | # - https://github.com/newsnowlabs/dockside 28 | # - to allow running complex Node-based IDE applications and container-setup 29 | # processes inside containers running an unknown arbitrary Linux 30 | # distribution 31 | # - https://github.com/newsnowlabs/runcvm 32 | # - to allow running QEMU, virtiofsd, dnsmasq and other tools inside a 33 | # container (and indeed a VM) running an unknown arbitrary Linux 34 | # distribution 35 | # 36 | # Environment variable inputs: 37 | # - BUNDELF_BINARIES - list required binaries to be scanned and copied 38 | # - BUNDELF_DYNAMIC_PATHS - list optional paths to be scanned and copied 39 | # - BUNDELF_EXTRA_LIBS - list extra libraries to be scanned and copied 40 | # - BUNDELF_CODE_PATH - path where binaries and libraries will be copied to 41 | # - BUNDELF_EXEC_PATH - path where binaries and libraries will be executed from 42 | # = BUNDELF_MERGE_BINDIRS - non-empty if all specified binaries should be copied to $BUNDELF_CODE_PATH/bin 43 | # - BUNDELF_LIBPATH_TYPE - whether to use absolute or relative paths (the default) for RPATH 44 | # - BUNDELF_NODE_PATH - [optional] path to the node binary, if required to ensure ldd can resolve all library paths in .node files 45 | # - BUNDELF_EXTRA_SYSTEM_LIB_PATHS - [optional] list of extra system library paths to be added to the RPATH 46 | # 47 | # See README.md for full details. 48 | 49 | # BUNDELF_EXEC_PATH defaults to BUNDELF_CODE_PATH 50 | BUNDELF_EXEC_PATH="${BUNDELF_EXEC_PATH:-$BUNDELF_CODE_PATH}" 51 | 52 | # Whether to use absolute or relative paths for RPATH 53 | BUNDELF_LIBPATH_TYPE="${BUNDELF_LIBPATH_TYPE:-relative}" 54 | 55 | # Determine LD filepath, which is architecture-dependent: 56 | # e.g. ld-musl-aarch64.so.1 (linux/arm64), ld-musl-armhf.so.1 (linux/arm/v7), ld-musl-x86_64.so.1 (linux/amd64) 57 | # or ld-linux-aarch64.so.1 (linux/arm64), ld-linux-armhf.so.3 (linux/arm/v7), ld-linux-x86-64.so.2 (linux/amd64) 58 | LD_PATH=$(ls -1 /lib/ld-musl-* /lib/*-linux-*/ld-linux-*.so.* 2>/dev/null | head -n 1) 59 | LD_BIN=$(basename $LD_PATH) 60 | 61 | TMP=/tmp/bundelf.$$ 62 | 63 | append() { 64 | while read line; do echo "${line}${1}"; done 65 | } 66 | 67 | # Check that all dynamic library dependencies are correctly being resolved to versions stored within BUNDELF_CODE_PATH. 68 | # Prints any 69 | _verify() { 70 | local status=0 71 | 72 | # Deduce BUNDELF_CODE_PATH from elf-patcher.sh execution path, if none provided (useful when called with --verify within an alternative environment). 73 | [ -z $BUNDELF_CODE_PATH ] && BUNDELF_CODE_PATH=$(realpath $(dirname $0)/..) 74 | 75 | # Now check the ELF files 76 | for lib in $(cat $BUNDELF_CODE_PATH/.binelfs $BUNDELF_CODE_PATH/.libelfs) 77 | do 78 | echo -n "Checking: $lib ... " >&2 79 | $BUNDELF_CODE_PATH$LD_PATH --list $lib 2>/dev/null | sed -nr '/=>/!d; s/^\s*(\S+)\s*=>\s*(.*?)(\s*\(0x[0-9a-f]+\))?$/- \2 \1/;/^.+$/p;' | egrep -v "^- ($BUNDELF_CODE_PATH/|$BUNDELF_EXEC_PATH/.*/$LD_BIN)" 80 | 81 | # If any libraries do not match the expected pattern, grep returns true 82 | if [ $? -eq 0 ]; then 83 | status=1 84 | echo "BAD" 85 | else 86 | echo "GOOD" 87 | fi 88 | 89 | sleep 0.01 90 | done 91 | 92 | return $status 93 | } 94 | 95 | verify() { 96 | _verify 97 | exit $? 98 | } 99 | 100 | copy_binaries() { 101 | # Copy any binaries we require to the install location. 102 | # Write their paths to cmd-elf-bin. 103 | 104 | if [ -n "$BUNDELF_MERGE_BINDIRS" ]; then 105 | mkdir -p $BUNDELF_CODE_PATH/bin 106 | else 107 | mkdir -p $BUNDELF_CODE_PATH 108 | fi 109 | 110 | for bin in "$@" 111 | do 112 | local file=$(which $bin) 113 | 114 | if [ -n "$file" ]; then 115 | if [ -z "$BUNDELF_MERGE_BINDIRS" ]; then 116 | tar cv $file 2>/dev/null | tar x -C $BUNDELF_CODE_PATH/ 117 | echo "$BUNDELF_CODE_PATH$file" 118 | else 119 | cp -p $file $BUNDELF_CODE_PATH/bin/ 120 | echo "$BUNDELF_CODE_PATH/bin/$bin" 121 | fi 122 | fi 123 | done 124 | } 125 | 126 | scan_extra_libs() { 127 | for p in "$@" 128 | do 129 | find "$p" ! -type d | while read lib 130 | do 131 | local f=$(basename $lib) 132 | echo "$f $lib" 133 | done 134 | done 135 | } 136 | 137 | # Using ldd, generate list of resolved library filepaths for each ELF binary and library, 138 | # logging first argument (to be used as $lib) and second argument (to be used as $dest). 139 | # e.g. 140 | # libaio.so.1 /usr/lib/libaio.so.1 141 | # libblkid.so.1 /lib/libblkid.so.1 142 | find_lib_deps() { 143 | cat "$@" | sort -u | xargs -P $(nproc) -I '{}' ldd '{}' 2>/dev/null | sed -nr 's/^\s*(.*)=>\s*(.*?)\s.*$/\1 \2/p' | sort -u 144 | } 145 | 146 | copy_libs() { 147 | mkdir -p $BUNDELF_CODE_PATH 148 | 149 | # For each resolved library filepath: 150 | # - Copy $dest to the install location. 151 | # - If $dest is a symlink, copy the symlink to the install location too. 152 | # - If needed, add a symlink from $lib to $dest. 153 | # 154 | # N.B. These steps are all needed to ensure the Alpine dynamic linker can resolve library filepaths as required. 155 | # For more, see https://www.musl-libc.org/doc/1.0.0/manual.html 156 | # 157 | sort -u "$@" | while read lib dest 158 | do 159 | # Copy $dest; and if $dest is a symlink, copy its target. 160 | # This could conceivably result in duplicates if multiple symlinks point to the same target, 161 | # but is much simpler than trying to copy symlinks and targets separately. 162 | cp -a --parents -L $dest $BUNDELF_CODE_PATH 163 | 164 | # If needed, add a symlink from $lib to $(basename $dest) 165 | if [ "$(basename $dest)" != "$lib" ]; then 166 | if cd $BUNDELF_CODE_PATH/$(dirname $dest); then 167 | ln -s $(basename $dest) $lib 168 | cd - >/dev/null 169 | fi 170 | fi 171 | 172 | if [ "$dest" != "$LD_PATH" ]; then 173 | echo "$BUNDELF_CODE_PATH$dest" 174 | fi 175 | done 176 | } 177 | 178 | patch_binary() { 179 | local bin="$1" 180 | 181 | if patchelf --set-interpreter $BUNDELF_EXEC_PATH$LD_PATH $bin 2>/dev/null; then 182 | echo patchelf --set-interpreter $BUNDELF_EXEC_PATH$LD_PATH $bin >>$TMP/patchelf.log 183 | return 0 184 | fi 185 | 186 | return 1 187 | } 188 | 189 | # Function to replace a hard-linked file with a non-hard-linked copy 190 | replace_hard_link() { 191 | local file="$1" 192 | 193 | # Check if the file exists 194 | if [ ! -e "$file" ]; then 195 | echo "replace_hard_link: file '$file' does not exist." 196 | exit 1 197 | fi 198 | 199 | # Get the number of hard links to the file 200 | local link_count=$(stat -c %h "$file") 201 | 202 | # If the link count is greater than 1, the file is a hard link 203 | if [ "$link_count" -gt 1 ]; then 204 | # Create a temporary copy of the file, and overwrite the original file with the non-hard-linked copy 205 | local tmp_file=$(mktemp) 206 | cp -dp "$file" "$tmp_file" && mv "$tmp_file" "$file" 207 | fi 208 | 209 | return 0 210 | } 211 | 212 | patch_binaries_interpreter() { 213 | # For all ELF binaries, set the interpreter to our own. 214 | for bin in $(sort -u "$@") 215 | do 216 | patch_binary "$bin" || exit 1 217 | done 218 | } 219 | 220 | generate_extra_system_lib_paths() { 221 | for p in "$@" 222 | do 223 | echo $p 224 | done 225 | } 226 | 227 | generate_system_lib_paths() { 228 | # Generate a list of system library paths 229 | # - This will be used to set the RPATH for all binaries and libraries to an absolute or relative path. 230 | 231 | # This list is generated by: 232 | # - Running the dynamic linker with --list-diagnostics 233 | # - Extracting the system_dirs path from the output 234 | # - Removing any trailing slashes 235 | # $BUNDELF_CODE_PATH$LD_PATH --list-diagnostics | grep ^path.system_dirs | sed -r 's|^.*="([^"]+)/?"$|\1|; s|/$||' | sort -u 236 | 237 | # This list is generated by: 238 | # - Extracting the path to each library, relative to $BUNDELF_CODE_PATH; add leading '/' if missing. 239 | cat "$@" | \ 240 | grep -E '\.so(\.[0-9]+)*$' | \ 241 | sed -r "s|^$BUNDELF_CODE_PATH||; s|/[^/]+$||; s|^[^/]|/|;" | \ 242 | grep -E '^(/usr|/lib)(/|$)' | \ 243 | sort -u 244 | } 245 | 246 | generate_unique_rpath() { 247 | local prefix="$1"; shift 248 | 249 | local abs_syspaths 250 | for s in $(sort -u "$@") 251 | do 252 | abs_syspaths="$abs_syspaths$(echo "$prefix${s}:")" 253 | done 254 | 255 | # Remove trailing colon 256 | echo $abs_syspaths | sed 's/:$//' 257 | } 258 | 259 | patch_binaries_and_libs_rpath() { 260 | # For all ELF libs, set the RPATH to our own, and force RPATH use. 261 | local p 262 | local rpath 263 | local rpath_template 264 | 265 | if [ "$BUNDELF_LIBPATH_TYPE" = "absolute" ]; then 266 | rpath_template=$(generate_unique_rpath "$BUNDELF_CODE_PATH" "$TMP/system-lib-paths") 267 | else 268 | rpath_template=$(generate_unique_rpath "\$ORIGIN" "$TMP/system-lib-paths") 269 | fi 270 | 271 | for lib in $(sort -u "$@") 272 | do 273 | 274 | if [ "$BUNDELF_LIBPATH_TYPE" = "absolute" ]; then 275 | rpath="$rpath_template" 276 | 277 | # Add node as a needed library to '.node' files, to avoid misleading ldd errors in verify() 278 | if [ -n "$BUNDELF_NODE_PATH" ] && echo "$lib" | grep -qE "\.node$"; then 279 | echo patchelf --add-needed "$BUNDELF_CODE_PATH$BUNDELF_NODE_PATH" $lib >>$TMP/patchelf.log 280 | patchelf --add-needed "$BUNDELF_CODE_PATH$BUNDELF_NODE_PATH" $lib >>$TMP/patchelf.log 2>&1 || exit 1 281 | fi 282 | 283 | else 284 | # If $lib is hardlinked in different parts of the file hierarchy, then setting a relative RPATH on one file would break the correct RPATH set on another. 285 | # To prevent this, we un-hardlink any hardlinked files before we patch them. 286 | replace_hard_link "$lib" 287 | 288 | p=$(dirname "$lib" | sed -r "s|^$BUNDELF_CODE_PATH[/]+||; s|[^/]+|..|g") 289 | # rpath="\$ORIGIN/$p/lib:\$ORIGIN/$p/usr/lib:\$ORIGIN/$p/usr/lib/xtables" 290 | rpath="$(echo "$rpath_template" | sed "s|\$ORIGIN|\$ORIGIN/$p|g")" 291 | 292 | # Add node as a needed library to '.node' files, to avoid misleading ldd errors in verify() 293 | if [ -n "$BUNDELF_NODE_PATH" ] && echo "$lib" | grep -qE "\.node$"; then 294 | local NODE_DIR=$(dirname $BUNDELF_NODE_PATH) 295 | local NODE_BASENAME=$(basename $BUNDELF_NODE_PATH) 296 | 297 | # Augment rpath with relative path to the NODE_DIR 298 | rpath="$rpath:\$ORIGIN/$p$NODE_DIR" 299 | 300 | # Add a needed dynamic library dependency for NODE_BASENAME (will be searched for within the augmented rpath) 301 | echo patchelf --add-needed "$NODE_BASENAME" "$lib" >>$TMP/patchelf.log 302 | patchelf --add-needed "$NODE_BASENAME" "$lib" >>$TMP/patchelf.log 2>&1 || exit 1 303 | fi 304 | fi 305 | 306 | echo patchelf --force-rpath --set-rpath "$rpath" "$lib" >>$TMP/patchelf.log 307 | patchelf --force-rpath --set-rpath \ 308 | "$rpath" \ 309 | "$lib" >>$TMP/patchelf.log 2>&1 || exit 1 310 | 311 | # Fail silently if patchelf fails to set the interpreter: this is a catch-all for add libraries like /usr/lib/libcap.so.2 312 | # which strangely have an interpreter set. 313 | patch_binary "$lib" 314 | 315 | done 316 | } 317 | 318 | copy_and_scan_for_dynamics() { 319 | # Find all ELF files that are dynamically linked. 320 | # - This should includes all Theia .node files and spawn-helper, but not statically-linked binaries like 'rg' 321 | # - The only way to tell if a file is an ELF binary (or library) is to check the first 4 bytes for the magic byte sequence. 322 | 323 | mkdir -p $BUNDELF_CODE_PATH 324 | 325 | for q in "$@" 326 | do 327 | tar cv "$q" 2>/dev/null | tar x -C $BUNDELF_CODE_PATH/ 328 | 329 | find "$q" -type f ! -name '*.o' -print0 | xargs -0 -P $(nproc) -I '{}' hexdump -n 4 -e '4/1 "%2x" " {}\n"' {} | sed '/^7f454c46/!d; s/^7f454c46 //' | xargs -P $(nproc) file | grep dynamically 330 | done 331 | } 332 | 333 | get_dynamics_interpretable() { 334 | grep interpreter "$@" | cut -d':' -f1 | sed -r "s!^!$BUNDELF_CODE_PATH!" 335 | } 336 | 337 | get_dynamics_noninterpretable() { 338 | grep -v interpreter "$@" | cut -d':' -f1 | sed -r "s!^!$BUNDELF_CODE_PATH!" 339 | } 340 | 341 | write_digest() { 342 | # Prepare full and unique list of ELF binaries and libs for reference purposes and for checking 343 | sort -u $TMP/cmd-elf-bin >$BUNDELF_CODE_PATH/.binelfs 344 | sort -u $TMP/cmd-elf-lib >$BUNDELF_CODE_PATH/.libelfs 345 | } 346 | 347 | init() { 348 | for dep in file hexdump xargs patchelf 349 | do 350 | if ! [ -x "$(which $dep)" ]; then 351 | depsmissing=1 352 | echo "ERROR: Command '$dep' not found in PATH '$PATH'" >&2 353 | fi 354 | done 355 | 356 | [ -n "$depsmissing" ] && return 1 357 | 358 | # Initialise 359 | mkdir -p "$TMP" 360 | >$TMP/cmd-elf-bin 361 | >$TMP/cmd-elf-lib 362 | >$TMP/libs-tuples 363 | >$TMP/libs-extra-tuples 364 | >$TMP/scanned-dynamics 365 | >$TMP/system-lib-paths 366 | } 367 | 368 | all() { 369 | # Copy elf binaries to BUNDELF_CODE_PATH and generate 'cmd-elf-bin' list of ELF binaries 370 | copy_binaries $BUNDELF_BINARIES >>$TMP/cmd-elf-bin 371 | 372 | # Scan for additional dynamic binaries and libs 373 | copy_and_scan_for_dynamics $BUNDELF_DYNAMIC_PATHS >>$TMP/scanned-dynamics 374 | 375 | # Add the intepretable dynamics to 'cmd-elf-bin' 376 | get_dynamics_interpretable $TMP/scanned-dynamics >>$TMP/cmd-elf-bin 377 | 378 | # Add the non-intepretable dynamics to 'libs' 379 | get_dynamics_noninterpretable $TMP/scanned-dynamics >>$TMP/cmd-elf-lib 380 | 381 | # Find library dependencies of these dynamic binaries and libs; write tuples to 'libs' 382 | find_lib_deps $TMP/cmd-elf-bin $TMP/cmd-elf-lib >>$TMP/libs-tuples 383 | 384 | # Scan for extra libraries not formally declared as dependencies, and append tuples to 'libs' 385 | scan_extra_libs $BUNDELF_EXTRA_LIBS >>$TMP/libs-extra-tuples 386 | 387 | # Copy the library tuples from 'libs' to BUNDELF_CODE_PATH and append to 'cmd-elf-lib' 388 | copy_libs $TMP/libs-tuples $TMP/libs-extra-tuples >>$TMP/cmd-elf-lib 389 | 390 | # Patch interpreter on all ELF binaries in 'cmd-elf-bin' 391 | patch_binaries_interpreter $TMP/cmd-elf-bin 392 | 393 | # Generate non-unique list of system library paths: 394 | generate_system_lib_paths $TMP/cmd-elf-lib >>$TMP/system-lib-paths 395 | generate_extra_system_lib_paths $BUNDELF_EXTRA_SYSTEM_LIB_PATHS >>$TMP/system-lib-paths 396 | 397 | # Patch RPATH on all binaries in 'cmd-elf-bin' and libs in 'cmd-elf-lib' 398 | # TODO: This duplicates running patch_binaries_interpreter on all 'cmd-elf-bin' files, in order that it can be run in relaxed mode on 'cmd-elf-lib' 399 | patch_binaries_and_libs_rpath $TMP/cmd-elf-bin $TMP/cmd-elf-lib 400 | 401 | # Write a summary of binaries and libraries to BUNDELF_CODE_PATH 402 | write_digest 403 | 404 | # Copy LD and and create copnvenience symlink it to ld 405 | cp --parents $LD_PATH $BUNDELF_CODE_PATH 406 | ln -s $(echo $LD_PATH | sed -r 's|^/lib/|./|') $BUNDELF_CODE_PATH/lib/ld 407 | } 408 | 409 | # Run with --verify from within any distribution, to check that all dynamic library dependencies 410 | # are correctly being resolved to versions stored within BUNDELF_CODE_PATH. 411 | if [ "$1" = "--verify" ]; then 412 | # Check the full list for any library dependencies being inadvertently resolved outside the install location. 413 | # Returns true if OK, false on any problems. 414 | init || exit 1 415 | verify 416 | elif [ "$1" = "--bundle" ]; then 417 | init || exit 1 418 | all 419 | verify 420 | fi 421 | -------------------------------------------------------------------------------- /build/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh -e 2 | 3 | REPO=newsnowlabs/runcvm 4 | 5 | DOCKER_BUILDKIT=1 docker build -t $REPO . 6 | 7 | cat <<_EOE_ 8 | 9 | RunCVM build successful 10 | ======================= 11 | 12 | To install or upgrade, now run: 13 | 14 | sudo ./runcvm-scripts/runcvm-install-runtime.sh 15 | _EOE_ 16 | 17 | 18 | echo -------------------------------------------------------------------------------- /kernels/oraclelinux/95virtiofs/module-setup.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | 3 | # called by dracut 4 | check() { 5 | [[ $hostonly ]] || [[ $mount_needs ]] && { 6 | for fs in "${host_fs_types[@]}"; do 7 | [[ "$fs" == "virtiofs" ]] && return 0 8 | done 9 | return 255 10 | } 11 | 12 | is_qemu_virtualized && return 0 13 | 14 | return 255 15 | } 16 | 17 | # called by dracut 18 | depends() { 19 | return 0 20 | } 21 | 22 | # called by dracut 23 | installkernel() { 24 | instmods virtiofs 25 | 26 | # qemu specific modules 27 | hostonly='' instmods \ 28 | ata_piix ata_generic pata_acpi cdrom sr_mod ahci \ 29 | virtio_blk virtio virtio_ring virtio_pci \ 30 | virtio_scsi virtio_console virtio_rng virtio_mem \ 31 | virtio_net \ 32 | spapr-vscsi \ 33 | qemu_fw_cfg 34 | } 35 | 36 | # called by dracut 37 | install() { 38 | inst_hook cmdline 95 "$moddir/parse-virtiofs.sh" 39 | inst_hook pre-mount 99 "$moddir/mount-virtiofs.sh" 40 | } 41 | 42 | -------------------------------------------------------------------------------- /kernels/oraclelinux/95virtiofs/mount-virtiofs.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/sh 2 | 3 | type getarg >/dev/null 2>&1 || . /lib/dracut-lib.sh 4 | 5 | filter_rootopts() { 6 | rootopts=$1 7 | # strip ro and rw options 8 | local OLDIFS="$IFS" 9 | IFS=, 10 | set -- $rootopts 11 | IFS="$OLDIFS" 12 | local v 13 | while [ $# -gt 0 ]; do 14 | case $1 in 15 | rw|ro);; 16 | defaults);; 17 | *) 18 | v="$v,${1}";; 19 | esac 20 | shift 21 | done 22 | rootopts=${v#,} 23 | echo $rootopts 24 | } 25 | 26 | mount_root() { 27 | local _ret 28 | 29 | rootfs="virtiofs" 30 | rflags="rw" 31 | 32 | modprobe virtiofs 33 | 34 | mount -t ${rootfs} -o "$rflags",ro "${root#virtiofs:}" "$NEWROOT" 35 | 36 | rootopts= 37 | if getargbool 1 rd.fstab -n rd_NO_FSTAB \ 38 | && ! getarg rootflags \ 39 | && [ -f "$NEWROOT/etc/fstab" ] \ 40 | && ! [ -L "$NEWROOT/etc/fstab" ]; then 41 | # if $NEWROOT/etc/fstab contains special mount options for 42 | # the root filesystem, 43 | # remount it with the proper options 44 | rootopts="defaults" 45 | while read dev mp fs opts rest || [ -n "$dev" ]; do 46 | # skip comments 47 | [ "${dev%%#*}" != "$dev" ] && continue 48 | 49 | if [ "$mp" = "/" ]; then 50 | rootopts=$opts 51 | break 52 | fi 53 | done < "$NEWROOT/etc/fstab" 54 | 55 | rootopts=$(filter_rootopts $rootopts) 56 | fi 57 | 58 | # we want rootflags (rflags) to take precedence so prepend rootopts to 59 | # them; rflags is guaranteed to not be empty 60 | rflags="${rootopts:+${rootopts},}${rflags}" 61 | 62 | umount "$NEWROOT" 63 | 64 | info "Remounting ${root#virtiofs:} with -o ${rflags}" 65 | mount -t ${rootfs} -o "$rflags" "${root#virtiofs:}" "$NEWROOT" 2>&1 | 66 | vinfo 67 | 68 | [ -f "$NEWROOT"/forcefsck ] && rm -f -- "$NEWROOT"/forcefsck 2>/dev/null 69 | [ -f "$NEWROOT"/.autofsck ] && rm -f -- "$NEWROOT"/.autofsck 2>/dev/null 70 | } 71 | 72 | if [ -n "$root" -a -z "${root%%virtiofs:*}" ]; then 73 | mount_root 74 | fi 75 | : 76 | -------------------------------------------------------------------------------- /kernels/oraclelinux/95virtiofs/parse-virtiofs.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/sh 2 | 3 | if [ "${root%%:*}" = "virtiofs" ] ; then 4 | modprobe virtiofs 5 | 6 | rootok=1 7 | fi 8 | -------------------------------------------------------------------------------- /kernels/oraclelinux/addvirtiofs.conf: -------------------------------------------------------------------------------- 1 | add_dracutmodules+=" virtiofs " 2 | filesystems+=" virtiofs " 3 | -------------------------------------------------------------------------------- /patches/dnsmasq/remove-passwd-requirement.patch: -------------------------------------------------------------------------------- 1 | --- a/src/dnsmasq.c.orig 2 | +++ b/src/dnsmasq.c 3 | @@ -481,6 +481,7 @@ 4 | } 5 | #endif 6 | 7 | +#if 0 8 | if (daemon->username && !(ent_pw = getpwnam(daemon->username))) 9 | baduser = daemon->username; 10 | else if (daemon->groupname && !(gp = getgrnam(daemon->groupname))) 11 | @@ -488,6 +489,7 @@ 12 | 13 | if (baduser) 14 | die(_("unknown user or group: %s"), baduser, EC_BADCONF); 15 | +#endif 16 | 17 | /* implement group defaults, "dip" if available, or group associated with uid */ 18 | if (!daemon->group_set && !gp) 19 | -------------------------------------------------------------------------------- /patches/dropbear/runcvm.patch: -------------------------------------------------------------------------------- 1 | --- a/src/cli-kex.c 2 | +++ b/src/cli-kex.c 3 | @@ -312,7 +312,7 @@ 4 | int ret; 5 | 6 | if (cli_opts.no_hostkey_check) { 7 | - dropbear_log(LOG_INFO, "Caution, skipping hostkey check for %s\n", cli_opts.remotehost); 8 | + // dropbear_log(LOG_INFO, "Caution, skipping hostkey check for %s\n", cli_opts.remotehost); 9 | return; 10 | } 11 | 12 | --- a/src/dbutil.c 13 | +++ b/src/dbutil.c 14 | @@ -140,7 +140,9 @@ 15 | 16 | vsnprintf(printbuf, sizeof(printbuf), format, param); 17 | 18 | +#if 0 19 | fprintf(stderr, "%s\n", printbuf); 20 | +#endif 21 | 22 | } 23 | 24 | --- a/src/default_options.h 25 | +++ b/src/default_options.h 26 | @@ -21,10 +21,10 @@ 27 | /* Default hostkey paths - these can be specified on the command line. 28 | * Homedir is prepended if path begins with ~/ 29 | */ 30 | -#define DSS_PRIV_FILENAME "/etc/dropbear/dropbear_dss_host_key" 31 | -#define RSA_PRIV_FILENAME "/etc/dropbear/dropbear_rsa_host_key" 32 | -#define ECDSA_PRIV_FILENAME "/etc/dropbear/dropbear_ecdsa_host_key" 33 | -#define ED25519_PRIV_FILENAME "/etc/dropbear/dropbear_ed25519_host_key" 34 | +#define DSS_PRIV_FILENAME "/.runcvm/dropbear/dropbear_dss_host_key" 35 | +#define RSA_PRIV_FILENAME "/.runcvm/dropbear/dropbear_rsa_host_key" 36 | +#define ECDSA_PRIV_FILENAME "/.runcvm/dropbear/dropbear_ecdsa_host_key" 37 | +#define ED25519_PRIV_FILENAME "/.runcvm/dropbear/dropbear_ed25519_host_key" 38 | 39 | /* Set NON_INETD_MODE if you require daemon functionality (ie Dropbear listens 40 | * on chosen ports and keeps accepting connections. This is the default. 41 | @@ -218,7 +218,7 @@ 42 | #define DO_HOST_LOOKUP 0 43 | 44 | /* Whether to print the message of the day (MOTD). */ 45 | -#define DO_MOTD 1 46 | +#define DO_MOTD 0 47 | #define MOTD_FILENAME "/etc/motd" 48 | 49 | /* Authentication Types - at least one required. 50 | -------------------------------------------------------------------------------- /patches/mkinitfs/nlplug-findfs.patch: -------------------------------------------------------------------------------- 1 | --- mkinitfs-3.8.1.orig/nlplug-findfs/nlplug-findfs.c 2 | +++ mkinitfs-3.8.1/nlplug-findfs/nlplug-findfs.c 3 | @@ -41,7 +41,7 @@ 4 | #include 5 | #include 6 | 7 | -#define MAX_EVENT_TIMEOUT 5000 8 | +#define MAX_EVENT_TIMEOUT 1000 9 | #define DEFAULT_EVENT_TIMEOUT 250 10 | /* usb mass storage needs 1 sec to settle */ 11 | #define USB_STORAGE_TIMEOUT 1000 12 | -------------------------------------------------------------------------------- /patches/seabios/qemu-fw-cfg-fix.patch: -------------------------------------------------------------------------------- 1 | diff --git a/src/sercon.c b/src/sercon.c 2 | index 3019d9b..988c2a2 100644 3 | --- a/src/sercon.c 4 | +++ b/src/sercon.c 5 | @@ -516,7 +516,7 @@ void sercon_setup(void) 6 | struct segoff_s seabios, vgabios; 7 | u16 addr; 8 | 9 | - addr = romfile_loadint("etc/sercon-port", 0); 10 | + addr = romfile_loadint("opt/org.seabios/etc/sercon-port", 0); 11 | if (!addr) 12 | return; 13 | dprintf(1, "sercon: using ioport 0x%x\n", addr); 14 | diff --git a/src/fw/paravirt.c b/src/fw/paravirt.c 15 | index fba4e52..9a346d9 100644 16 | --- a/src/fw/paravirt.c 17 | +++ b/src/fw/paravirt.c 18 | diff --git a/src/fw/paravirt.c b/src/fw/paravirt.c 19 | index fba4e52..9a346d9 100644 20 | --- a/src/fw/paravirt.c 21 | +++ b/src/fw/paravirt.c 22 | @@ -652,9 +652,9 @@ void qemu_cfg_init(void) 23 | // serial console 24 | u16 nogfx = 0; 25 | qemu_cfg_read_entry(&nogfx, QEMU_CFG_NOGRAPHIC, sizeof(nogfx)); 26 | - if (nogfx && !romfile_find("etc/sercon-port") 27 | + if (nogfx && !romfile_find("opt/org.seabios/etc/sercon-port") 28 | && !romfile_find("vgaroms/sgabios.bin")) 29 | - const_romfile_add_int("etc/sercon-port", PORT_SERIAL1); 30 | + const_romfile_add_int("opt/org.seabios/etc/sercon-port", PORT_SERIAL1); 31 | } 32 | 33 | /* 34 | -------------------------------------------------------------------------------- /qemu-exit/qemu-exit.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #define SHUTDOWN_PORT 0x604 7 | #define EXIT_PORT 0x501 8 | 9 | static void clean_exit(void) { 10 | ioperm(SHUTDOWN_PORT, 16, 1); 11 | outw(0x2000, SHUTDOWN_PORT); 12 | } 13 | 14 | int main(int argc, char **argv) { 15 | int status; 16 | 17 | if (argc != 2) { 18 | clean_exit(); 19 | } 20 | 21 | status = atoi(argv[1]); 22 | if (!status) { 23 | clean_exit(); 24 | } 25 | 26 | ioperm(EXIT_PORT, 8, 1); 27 | 28 | // status returned is 1+(2*orig_status) 29 | outb(status-1, EXIT_PORT); 30 | 31 | // Didn't exit. Perhaps QEMU was not launched with -device isa-debug-exit 32 | exit(255); 33 | } -------------------------------------------------------------------------------- /runcvm-init/VERSION.h: -------------------------------------------------------------------------------- 1 | // THIS FILE IS AUTOMATICALLY GENERATED 2 | // Run `make VERSION.h` to update it after modifying VERSION. 3 | unsigned char VERSION[] = { 4 | 0x31, 0x2e, 0x32, 0x2e, 0x35, 0x0a 5 | }; 6 | unsigned int VERSION_len = 6; 7 | -------------------------------------------------------------------------------- /runcvm-init/dumb-init.c: -------------------------------------------------------------------------------- 1 | // For the purposes of the following license, the "Software" is this file, dumb-init.c. 2 | // 3 | // The MIT License (MIT) 4 | // 5 | // Copyright (c) 2015 Yelp, Inc. 6 | // 7 | // Permission is hereby granted, free of charge, to any person obtaining a copy 8 | // of this software and associated documentation files (the "Software"), to deal 9 | // in the Software without restriction, including without limitation the rights 10 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | // copies of the Software, and to permit persons to whom the Software is 12 | // furnished to do so, subject to the following conditions: 13 | 14 | // The above copyright notice and this permission notice shall be included in 15 | // all copies or substantial portions of the Software. 16 | 17 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | // THE SOFTWARE. 24 | 25 | // dumb-init.c modifications (c) 2022 NewsNow Publishing Limited 26 | 27 | /* 28 | * dumb-init is a simple wrapper program designed to run as PID 1 and pass 29 | * signals to its children. 30 | * 31 | * Usage: 32 | * ./dumb-init python -c 'while True: pass' 33 | * 34 | * To get debug output on stderr, run with '-v'. 35 | */ 36 | 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | #include 45 | #include 46 | #include 47 | #include 48 | #include "VERSION.h" 49 | 50 | #define PRINTERR(...) do { \ 51 | fprintf(stderr, "[runcvm-init] " __VA_ARGS__); \ 52 | } while (0) 53 | 54 | #define DEBUG(...) do { \ 55 | if (debug) { \ 56 | PRINTERR(__VA_ARGS__); \ 57 | } \ 58 | } while (0) 59 | 60 | // Signals we care about are numbered from 1 to 31, inclusive. 61 | // (32 and above are real-time signals.) 62 | // TODO: this is likely not portable outside of Linux, or on strange architectures 63 | #define MAXSIG 31 64 | 65 | // Indices are one-indexed (signal 1 is at index 1). Index zero is unused. 66 | // User-specified signal rewriting. 67 | int signal_rewrite[MAXSIG + 1] = {[0 ... MAXSIG] = -1}; 68 | // One-time ignores due to TTY quirks. 0 = no skip, 1 = skip the next-received signal. 69 | char signal_temporary_ignores[MAXSIG + 1] = {[0 ... MAXSIG] = 0}; 70 | 71 | pid_t child_pid = -1; 72 | char debug = 0; 73 | char use_setsid = 1; 74 | char no_fork = 0; 75 | 76 | int translate_signal(int signum) { 77 | if (signum <= 0 || signum > MAXSIG) { 78 | return signum; 79 | } else { 80 | int translated = signal_rewrite[signum]; 81 | if (translated == -1) { 82 | return signum; 83 | } else { 84 | DEBUG("Translating signal %d to %d.\n", signum, translated); 85 | return translated; 86 | } 87 | } 88 | } 89 | 90 | void forward_signal(int signum) { 91 | signum = translate_signal(signum); 92 | if (signum != 0) { 93 | kill(use_setsid ? -child_pid : child_pid, signum); 94 | DEBUG("Forwarded signal %d to children.\n", signum); 95 | } else { 96 | DEBUG("Not forwarding signal %d to children (ignored).\n", signum); 97 | } 98 | } 99 | 100 | pid_t shutdown() { 101 | pid_t my_child_pid; 102 | char *shutdown_cmd[] = {"/.runcvm/guest/scripts/runcvm-ctr-shutdown", NULL}; 103 | 104 | my_child_pid = fork(); 105 | if (my_child_pid < 0) { 106 | PRINTERR("Unable to fork. Exiting.\n"); 107 | return 1; 108 | } else if (my_child_pid == 0) { 109 | /* child */ 110 | DEBUG("Requesting child to shut down by spawning %s\n", shutdown_cmd[0]); 111 | execvp(shutdown_cmd[0], &shutdown_cmd[0]); 112 | 113 | // if this point is reached, exec failed, so we should exit nonzero 114 | PRINTERR("Shutdown child spawn failed: %s\n", strerror(errno)); 115 | return 2; 116 | } else { 117 | /* parent */ 118 | DEBUG("Shutdown child spawned with PID %d.\n", child_pid); 119 | } 120 | 121 | return my_child_pid; 122 | } 123 | 124 | void quit(int exit_status) { 125 | char exit_status_string[4]; 126 | char *exit_cmd[] = {"/.runcvm/guest/scripts/runcvm-ctr-exit", exit_status_string, NULL}; 127 | 128 | sprintf(exit_status_string, "%d", exit_status & 0xFF); 129 | 130 | DEBUG("Exiting by execing: %s %s\n", exit_cmd[0], exit_cmd[1]); 131 | execvp(exit_cmd[0], &exit_cmd[0]); 132 | DEBUG("Failed to exec %s, so exiting now with status %d\n", exit_cmd[0], exit_status); 133 | exit(exit_status); 134 | } 135 | 136 | /* 137 | * The dumb-init signal handler. 138 | * 139 | * The main job of this signal handler is to forward signals along to our child 140 | * process(es). In setsid mode, this means signaling the entire process group 141 | * rooted at our child. In non-setsid mode, this is just signaling the primary 142 | * child. 143 | * 144 | * In most cases, simply proxying the received signal is sufficient. If we 145 | * receive a job control signal, however, we should not only forward it, but 146 | * also sleep dumb-init itself. 147 | * 148 | * This allows users to run foreground processes using dumb-init and to 149 | * control them using normal shell job control features (e.g. Ctrl-Z to 150 | * generate a SIGTSTP and suspend the process). 151 | * 152 | * The libc manual is useful: 153 | * https://www.gnu.org/software/libc/manual/html_node/Job-Control-Signals.html 154 | * 155 | */ 156 | void handle_signal(int signum) { 157 | DEBUG("Received signal %d.\n", signum); 158 | 159 | if (signal_temporary_ignores[signum] == 1) { 160 | DEBUG("Ignoring tty hand-off signal %d.\n", signum); 161 | signal_temporary_ignores[signum] = 0; 162 | } else if (signum == SIGTERM) { 163 | shutdown(); 164 | } else if (signum == SIGCHLD) { 165 | int status, exit_status; 166 | pid_t killed_pid; 167 | while ((killed_pid = waitpid(-1, &status, WNOHANG)) > 0) { 168 | if (WIFEXITED(status)) { 169 | exit_status = WEXITSTATUS(status); 170 | DEBUG("A child with PID %d exited with exit status %d.\n", killed_pid, exit_status); 171 | } else { 172 | assert(WIFSIGNALED(status)); 173 | exit_status = 128 + WTERMSIG(status); 174 | DEBUG("A child with PID %d was terminated by signal %d.\n", killed_pid, exit_status - 128); 175 | } 176 | 177 | if (killed_pid == child_pid) { 178 | forward_signal(SIGTERM); // send SIGTERM to any remaining children 179 | DEBUG("Child exited with status %d. Goodbye.\n", exit_status); 180 | quit(exit_status); 181 | // exit(exit_status); 182 | } 183 | } 184 | } else { 185 | forward_signal(signum); 186 | if (signum == SIGTSTP || signum == SIGTTOU || signum == SIGTTIN) { 187 | DEBUG("Suspending self due to TTY signal.\n"); 188 | kill(getpid(), SIGSTOP); 189 | } 190 | } 191 | } 192 | 193 | void print_help(char *argv[]) { 194 | fprintf(stderr, 195 | "runcvm-init v%.*s" 196 | "Usage: %s [option] command [[arg] ...]\n" 197 | "\n" 198 | "runcvm-init is a simple process supervisor that forwards signals to children.\n" 199 | "It is designed to run as PID1 in minimal container environments.\n" 200 | "\n" 201 | "Optional arguments:\n" 202 | " -c, --single-child Run in single-child mode.\n" 203 | " In this mode, signals are only proxied to the\n" 204 | " direct child and not any of its descendants.\n" 205 | " -r, --rewrite s:r Rewrite received signal s to new signal r before proxying.\n" 206 | " To ignore (not proxy) a signal, rewrite it to 0.\n" 207 | " This option can be specified multiple times.\n" 208 | " -v, --verbose Print debugging information to stderr.\n" 209 | " -h, --help Print this help message and exit.\n" 210 | " -V, --version Print the current version and exit.\n" 211 | " -F, --no-fork Don't fork, just set up signals and tty\n" 212 | "\n", 213 | VERSION_len, VERSION, 214 | argv[0] 215 | ); 216 | } 217 | 218 | void print_rewrite_signum_help() { 219 | fprintf( 220 | stderr, 221 | "Usage: -r option takes :, where " 222 | "is between 1 and %d.\n" 223 | "This option can be specified multiple times.\n" 224 | "Use --help for full usage.\n", 225 | MAXSIG 226 | ); 227 | exit(1); 228 | } 229 | 230 | void parse_rewrite_signum(char *arg) { 231 | int signum, replacement; 232 | if ( 233 | sscanf(arg, "%d:%d", &signum, &replacement) == 2 && 234 | (signum >= 1 && signum <= MAXSIG) && 235 | (replacement >= 0 && replacement <= MAXSIG) 236 | ) { 237 | signal_rewrite[signum] = replacement; 238 | } else { 239 | print_rewrite_signum_help(); 240 | } 241 | } 242 | 243 | void set_rewrite_to_sigstop_if_not_defined(int signum) { 244 | if (signal_rewrite[signum] == -1) { 245 | signal_rewrite[signum] = SIGSTOP; 246 | } 247 | } 248 | 249 | char **parse_command(int argc, char *argv[]) { 250 | int opt; 251 | struct option long_options[] = { 252 | {"help", no_argument, NULL, 'h'}, 253 | {"single-child", no_argument, NULL, 'c'}, 254 | {"rewrite", required_argument, NULL, 'r'}, 255 | {"verbose", no_argument, NULL, 'v'}, 256 | {"version", no_argument, NULL, 'V'}, 257 | {"no-fork", no_argument, NULL, 'F'}, 258 | {NULL, 0, NULL, 0}, 259 | }; 260 | while ((opt = getopt_long(argc, argv, "+hvVcFr:", long_options, NULL)) != -1) { 261 | switch (opt) { 262 | case 'h': 263 | print_help(argv); 264 | exit(0); 265 | case 'v': 266 | debug = 1; 267 | break; 268 | case 'V': 269 | fprintf(stderr, "dumb-init v%.*s", VERSION_len, VERSION); 270 | exit(0); 271 | case 'c': 272 | use_setsid = 0; 273 | break; 274 | case 'r': 275 | parse_rewrite_signum(optarg); 276 | break; 277 | case 'F': 278 | no_fork = 1; 279 | break; 280 | default: 281 | exit(1); 282 | } 283 | } 284 | 285 | if (optind >= argc) { 286 | fprintf( 287 | stderr, 288 | "Usage: %s [option] program [args]\n" 289 | "Try %s --help for full usage.\n", 290 | argv[0], argv[0] 291 | ); 292 | exit(1); 293 | } 294 | 295 | char *debug_env = getenv("DUMB_INIT_DEBUG"); 296 | if (debug_env && strcmp(debug_env, "1") == 0) { 297 | debug = 1; 298 | DEBUG("Running in debug mode.\n"); 299 | } 300 | 301 | char *setsid_env = getenv("DUMB_INIT_SETSID"); 302 | if (setsid_env && strcmp(setsid_env, "0") == 0) { 303 | use_setsid = 0; 304 | DEBUG("Not running in setsid mode.\n"); 305 | } 306 | 307 | if (use_setsid) { 308 | set_rewrite_to_sigstop_if_not_defined(SIGTSTP); 309 | set_rewrite_to_sigstop_if_not_defined(SIGTTOU); 310 | set_rewrite_to_sigstop_if_not_defined(SIGTTIN); 311 | } 312 | 313 | return &argv[optind]; 314 | } 315 | 316 | // A dummy signal handler used for signals we care about. 317 | // On the FreeBSD kernel, ignored signals cannot be waited on by `sigwait` (but 318 | // they can be on Linux). We must provide a dummy handler. 319 | // https://lists.freebsd.org/pipermail/freebsd-ports/2009-October/057340.html 320 | void dummy(int signum) {} 321 | 322 | int main(int argc, char *argv[]) { 323 | char **cmd = parse_command(argc, argv); 324 | sigset_t all_signals; 325 | sigfillset(&all_signals); 326 | sigprocmask(SIG_BLOCK, &all_signals, NULL); 327 | 328 | int i = 0; 329 | for (i = 1; i <= MAXSIG; i++) { 330 | signal(i, dummy); 331 | } 332 | 333 | /* 334 | * Detach dumb-init from controlling tty, so that the child's session can 335 | * attach to it instead. 336 | * 337 | * We want the child to be able to be the session leader of the TTY so that 338 | * it can do normal job control. 339 | */ 340 | if (use_setsid) { 341 | if (ioctl(STDIN_FILENO, TIOCNOTTY) == -1) { 342 | DEBUG( 343 | "Unable to detach from controlling tty (errno=%d %s).\n", 344 | errno, 345 | strerror(errno) 346 | ); 347 | } else { 348 | /* 349 | * When the session leader detaches from its controlling tty via 350 | * TIOCNOTTY, the kernel sends SIGHUP and SIGCONT to the process 351 | * group. We need to be careful not to forward these on to the 352 | * dumb-init child so that it doesn't receive a SIGHUP and 353 | * terminate itself (#136). 354 | */ 355 | if (getsid(0) == getpid()) { 356 | DEBUG("Detached from controlling tty, ignoring the first SIGHUP and SIGCONT we receive.\n"); 357 | signal_temporary_ignores[SIGHUP] = 1; 358 | signal_temporary_ignores[SIGCONT] = 1; 359 | } else { 360 | DEBUG("Detached from controlling tty, but was not session leader.\n"); 361 | } 362 | } 363 | } 364 | 365 | if(no_fork) { 366 | child_pid = 0; 367 | } 368 | else { 369 | child_pid = fork(); 370 | } 371 | 372 | if (child_pid < 0) { 373 | PRINTERR("Unable to fork. Exiting.\n"); 374 | return 1; 375 | } else if (child_pid == 0) { 376 | /* child */ 377 | sigprocmask(SIG_UNBLOCK, &all_signals, NULL); 378 | if (use_setsid) { 379 | // Don't throw error if setsid() fails in no_fork mode; 380 | // we don't want this to prevent startup. 381 | if (setsid() == -1 && !no_fork) { 382 | PRINTERR( 383 | "Unable to setsid (errno=%d %s). Exiting.\n", 384 | errno, 385 | strerror(errno) 386 | ); 387 | exit(1); 388 | } 389 | 390 | if (ioctl(STDIN_FILENO, TIOCSCTTY, 0) == -1) { 391 | DEBUG( 392 | "Unable to attach to controlling tty (errno=%d %s).\n", 393 | errno, 394 | strerror(errno) 395 | ); 396 | } 397 | DEBUG("setsid complete.\n"); 398 | } 399 | execvp(cmd[0], &cmd[0]); 400 | 401 | // if this point is reached, exec failed, so we should exit nonzero 402 | PRINTERR("%s: %s\n", cmd[0], strerror(errno)); 403 | return 2; 404 | } else { 405 | /* parent */ 406 | DEBUG("Child spawned with PID %d.\n", child_pid); 407 | if (chdir("/") == -1) { 408 | DEBUG("Unable to chdir(\"/\") (errno=%d %s)\n", 409 | errno, 410 | strerror(errno)); 411 | } 412 | for (;;) { 413 | int signum; 414 | sigwait(&all_signals, &signum); 415 | handle_signal(signum); 416 | } 417 | } 418 | } 419 | -------------------------------------------------------------------------------- /runcvm-scripts/functions/cgroupfs: -------------------------------------------------------------------------------- 1 | cgroupfs_mount() { 2 | local cgroupfs="$1" 3 | 4 | # We want no cgroupfs at all, or we will leave it to the distribution. 5 | if [[ "$cgroupfs" = "none" || "$cgroupfs" = "systemd" ]]; then 6 | return 7 | fi 8 | 9 | # If defined in fstab, or there's no kernel support, skip. 10 | # see also https://github.com/tianon/cgroupfs-mount/blob/master/cgroupfs-mount 11 | if grep -v '^#' /etc/fstab | grep -q cgroup \ 12 | || [ ! -e /proc/cgroups ] \ 13 | || [ ! -d /sys/fs/cgroup ]; then 14 | return 15 | fi 16 | 17 | # If hybrid, mixed, or cgroup1 cgroup support is requested... 18 | if [[ "$cgroupfs" = "hybrid" || "$cgroupfs" = "mixed" || "$cgroupfs" = "1" || "$cgroupfs" = "cgroup1" ]]; then 19 | 20 | if ! findmnt -rnu -M /sys/fs/cgroup; then 21 | mount -t tmpfs -o uid=0,gid=0,mode=0755 cgroup /sys/fs/cgroup 22 | fi 23 | 24 | for subtype in $(awk '!/^#/ { if ($4 == 1) print $1 }' /proc/cgroups); do 25 | local sys="/sys/fs/cgroup/$subtype" 26 | mkdir -p $sys 27 | if ! findmnt -rnu -M $sys; then 28 | if ! mount -n -t cgroup -o $subtype cgroup $sys; then 29 | rmdir $sys || true 30 | fi 31 | fi 32 | done 33 | 34 | fi 35 | 36 | # If hybrid or mixed cgroup support is requested... 37 | if [[ "$cgroupfs" = "hybrid" || "$cgroupfs" = "mixed" ]]; then 38 | if ! findmnt -rnu -M /sys/fs/cgroup/unified; then 39 | mkdir -p /sys/fs/cgroup/unified 40 | mount -t cgroup2 -o rw,nosuid,nodev,noexec,relatime cgroup2 /sys/fs/cgroup/unified 41 | fi 42 | fi 43 | 44 | # If purely cgroup2 cgroup support is requested... 45 | if [[ "$cgroupfs" = "2" || "$cgroupfs" = "cgroup2" ]]; then 46 | if ! findmnt -rnu -M /sys/fs/cgroup; then 47 | mkdir -p /sys/fs/cgroup 48 | mount -t cgroup2 -o rw,nosuid,nodev,noexec,relatime cgroup2 /sys/fs/cgroup 49 | fi 50 | fi 51 | } -------------------------------------------------------------------------------- /runcvm-scripts/runcvm-ctr-defaults: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | RUNCVM_GUEST=${RUNCVM_GUEST:-/.runcvm/guest} 4 | RUNCVM_PATH=$RUNCVM_GUEST/usr/sbin:$RUNCVM_GUEST/usr/bin:$RUNCVM_GUEST/sbin:$RUNCVM_GUEST/bin:$RUNCVM_GUEST/usr/lib/qemu 5 | 6 | QEMU_VIRTIOFSD_SOCKET=/run/.virtiofs.sock 7 | QEMU_GUEST_AGENT=/run/.qemu-guest-agent 8 | QEMU_MONITOR_SOCKET=/run/.qemu-monitor-socket 9 | 10 | SSHD_PORT=22222 11 | 12 | clean_env() { 13 | export -n \ 14 | RUNCVM_BREAK RUNCVM_INIT \ 15 | RUNCVM_GUEST \ 16 | RUNCVM_RUNTIME_DEBUG RUNCVM_BIOS_DEBUG RUNCVM_KERNEL_DEBUG \ 17 | RUNCVM_KERNEL RUNCVM_KERNEL_ROOT RUNCVM_KERNEL_APPEND RUNCVM_KERNEL_INITRAMFS_PATH RUNCVM_KERNEL_PATH RUNCVM_DISKS \ 18 | RUNCVM_UIDGID RUNCVM_VM_MOUNTPOINT RUNCVM_TMPFS \ 19 | RUNCVM_CPUS RUNCVM_MEM_SIZE RUNCVM_HUGETLB \ 20 | RUNCVM_HAS_HOME \ 21 | RUNCVM_CGROUPFS 22 | 23 | # May be set in VM by busybox init process 24 | export -n USER 25 | } 26 | 27 | load_network() { 28 | local if="${1:-default}" 29 | [ -d /.runcvm/network/devices ] && [ -s /.runcvm/network/devices/$if ] || return 1 30 | read -r DOCKER_IF DOCKER_IF_MAC DOCKER_IF_MTU DOCKER_IF_IP DOCKER_IF_IP_NETPREFIX DOCKER_IF_IP_GW /.runcvm/entrypoint 9 | 10 | # SET HOME ENV VAR IF NEEDED 11 | 12 | # - See https://github.com/moby/moby/issues/2968#issuecomment-35822318 13 | # for details of how Docker sets HOME. 14 | # 15 | # - What this means is that: 16 | # 1. if HOME is defined in the image and 17 | # docker run: 18 | # a. does not define HOME 19 | # - config.json process.env[] will show the image-defined value and this value will be used 20 | # - docker exec 21 | # - does not define HOME, then process.json env[] will show the image-defined value and this value will be used 22 | # - does define HOME, then process.json env[] will show the exec-defined value and this value will be used 23 | # b. does define HOME, config.json process.env[] will show the docker run-defined value and this value will be used 24 | # - docker exec 25 | # - does not define HOME, then process.json env[] will show the docker run-defined value and this value will be used 26 | # - does define HOME, then process.json env[] will show the exec-defined value and this value will be used 27 | # (the above is irrespective of -u setting) 28 | # 29 | # 2. if HOME is not defined in the image and 30 | # docker run: 31 | # a. does not define HOME 32 | # - config.json process.env[] will show no HOME value and the user's default homedir will be used 33 | # - docker exec 34 | # - does not define HOME, then process.json env[] will show no HOME value and the user's default homedir will be used 35 | # - does define HOME, then process.json env[] will show the exec-defined value and this value will be used 36 | # b. does define HOME, config.json process.env[] will show the docker run-defined value and this value will be used 37 | # - docker exec 38 | # - does not define HOME, then process.json env[] will show the docker run-defined value and this value will be used 39 | # - does define HOME, then process.json env[] will show the exec-defined value and this value will be used 40 | 41 | # Problem in 2a for us with docker run and docker exec is that while we save the requested uid:gid, we set the actual uid:gid to 0:0 42 | # to allow us to run virtiofsd (and, today, qemu) (in the docker run case) and access the qemu guest agent socket (in the docker exec case - though use of the agent is deprecated in favour of ssh). 43 | # 44 | # Where HOME is not explicitly defined, this leads to docker setting HOME to root's default homedir (typically /root), 45 | # for the calls to runcvm-ctr-entrypoint and runcvm-ctr-exec (respectively). 46 | # 47 | # How then do we distinguish this case from the case where HOME is explicitly set to /root? 48 | # The answer is that runcvm-runtime must check for HOME in env[] and indicate its presence in the calls to runcvm-ctr-entrypoint and runcvm-ctr-exec. 49 | # 50 | # runcvm-runtime does this: 51 | # - in the docker run case, via the RUNCVM_HAS_HOME env var 52 | # - in the docker exec case, via an argument to runcvm-ctr-exec 53 | 54 | # Here we check RUNCVM_HAS_HOME to determine whether the HOME env var was set either in the image, or via docker run. 55 | # If not, then we set HOME to the requested user's default homedir in accordance with https://github.com/moby/moby/issues/2968. 56 | 57 | if [ "$RUNCVM_HAS_HOME" == "0" ]; then 58 | HOME=$($RUNCVM_GUEST/usr/bin/getent passwd "${RUNCVM_UIDGID%%:*}" | $RUNCVM_GUEST/bin/cut -d':' -f6) 59 | fi 60 | 61 | # SAVE ENVIRONMENT 62 | export -n SHLVL OLDPWD 63 | 64 | export >/.runcvm/config 65 | 66 | # NOW LOAD DEFAULT ENV AND PATH 67 | . $RUNCVM_GUEST/scripts/runcvm-ctr-defaults 68 | 69 | # LOAD IP MANIPULATION FUNCTIONS 70 | . $RUNCVM_GUEST/scripts/runcvm-ip-functions 71 | 72 | # SAVE PWD 73 | busybox pwd >/.runcvm/pwd 74 | 75 | # DEBUG 76 | if [[ "$RUNCVM_BREAK" =~ prenet ]]; then bash; fi 77 | 78 | # SAVE NETWORKING CONFIG AND CONFIGURE BRIDGES 79 | 80 | # Identify default gateway device and IP address 81 | IFS=$'\n' read -d '' -r DOCKER_GW_IF DOCKER_GW_IF_IP <<< \ 82 | $(ip -json route show | jq -r '.[] | (select(.dst == "default") | .dev, .gateway)') 83 | # e.g. eth0 172.25.10.1 84 | 85 | QEMU_BRIDGE_IP=169.254.1.1 86 | RUNCVM_DNS_IP=169.254.169.254 87 | 88 | mkdir -p /.runcvm/network/devices 89 | 90 | # Save non-link-scope non-default routes for later restoration in the running VM. 91 | ip -json route show | jq -r '.[] | select(.scope != "link" and .dst != "default") | "\(.dst) \(.gateway) \(.dev) \(.prefsrc)"' >/.runcvm/network/routes 92 | 93 | for if in $(ip -json link show | jq -r '.[] | .ifname') 94 | do 95 | 96 | [ "$if" = "lo" ] && continue 97 | 98 | IFS=$'\n' read -d '' -r DOCKER_IF_IP DOCKER_IF_IP_NETPREFIX DOCKER_IF_MAC DOCKER_IF_MTU <<< \ 99 | $(ip -json addr show "$if" | jq -r '.[0] | .addr_info[0].local, .addr_info[0].prefixlen, .address, .mtu') 100 | # e.g. 172.25.10.2 24 52:54:00:b7:0b:b6 1500 101 | 102 | # Save container network parameters 103 | if [ "$if" = "$DOCKER_GW_IF" ]; then 104 | echo "$if $DOCKER_IF_MAC $DOCKER_IF_MTU $DOCKER_IF_IP $DOCKER_IF_IP_NETPREFIX $DOCKER_GW_IF_IP" >/.runcvm/network/devices/$if 105 | ln -s "$if" /.runcvm/network/devices/default 106 | else 107 | echo "$if $DOCKER_IF_MAC $DOCKER_IF_MTU $DOCKER_IF_IP $DOCKER_IF_IP_NETPREFIX" >/.runcvm/network/devices/$if 108 | fi 109 | 110 | # RECONFIGURE CONTAINER NETWORK 111 | ip addr flush dev $if 112 | 113 | QEMU_BRIDGE="br-$if" 114 | 115 | # Create the container bridge 116 | # See https://bugs.launchpad.net/neutron/+bug/1738659 117 | ip link add $QEMU_BRIDGE type bridge forward_delay 0 ageing 0 118 | 119 | # Add the original container interface to the bridge and bring it up. 120 | ip link set dev "$if" master $QEMU_BRIDGE 121 | ip link set dev "$if" up 122 | 123 | # Bring the bridge up. 124 | ip link set dev $QEMU_BRIDGE up 125 | 126 | # Restore network route via this bridge 127 | DOCKER_NET=$(ip_prefix_to_network $DOCKER_IF_IP $DOCKER_IF_IP_NETPREFIX)/$DOCKER_IF_IP_NETPREFIX 128 | ip route add $DOCKER_NET dev $QEMU_BRIDGE 129 | 130 | # If this interface is the default gateway interface, perform additional special steps. 131 | if [ "$if" = "$DOCKER_GW_IF" ]; then 132 | 133 | # Add a private IP to this bridge. 134 | # We need it so the bridge can receive traffic, but the IP won't ever see the light of day. 135 | ip addr add $QEMU_BRIDGE_IP dev $QEMU_BRIDGE 136 | 137 | # Restore default gateway route via this bridge. 138 | ip route add default via $DOCKER_GW_IF_IP dev $QEMU_BRIDGE 139 | 140 | # Accept DNS requests for $RUNCVM_DNS_IP; these will be passed to dnsmasq 141 | XTABLES_LIBDIR=$RUNCVM_GUEST/usr/lib/xtables xtables-nft-multi iptables -t nat -A PREROUTING -d $RUNCVM_DNS_IP/32 -p udp -m udp --dport 53 -j REDIRECT 142 | 143 | # Match UDP port 53 traffic, outgoing via the QEMU bridge, from the bridge's own IP: 144 | # -> Masquerade as if from the VM's IP. 145 | # This allows outgoing DNS requests from the VM to be received by dnsmasq running in the container. 146 | XTABLES_LIBDIR=$RUNCVM_GUEST/usr/lib/xtables xtables-nft-multi iptables -t nat -A POSTROUTING -o $QEMU_BRIDGE -s $QEMU_BRIDGE_IP/32 -p udp -m udp --sport 53 -j SNAT --to-source $DOCKER_IF_IP 147 | XTABLES_LIBDIR=$RUNCVM_GUEST/usr/lib/xtables xtables-nft-multi iptables -t nat -A POSTROUTING -o $QEMU_BRIDGE -s $QEMU_BRIDGE_IP/32 -p udp -m udp --dport 53 -j SNAT --to-source $DOCKER_IF_IP 148 | 149 | # Match traffic on TCP port $SSHD_PORT, outgoing via the QEMU bridge, from the bridge's own IP: 150 | # -> Masquerade it as if from the DNS_IP. 151 | # This is necessary to allow SSH from within the container to the VM. 152 | XTABLES_LIBDIR=$RUNCVM_GUEST/usr/lib/xtables xtables-nft-multi iptables -t nat -A POSTROUTING -o $QEMU_BRIDGE -s $QEMU_BRIDGE_IP/32 -p tcp -m tcp --dport $SSHD_PORT -j SNAT --to-source $RUNCVM_DNS_IP 153 | fi 154 | 155 | done 156 | 157 | # FIXME: Bind-mount /etc/resolv.conf as well as /vm/etc/resolv.conf to prevent them showing in 'docker diff' 158 | cat /vm/etc/resolv.conf >/etc/resolv.conf 159 | RESOLV_CONF_NEW=$(busybox sed -r "s/127.0.0.11/$RUNCVM_DNS_IP/" /vm/etc/resolv.conf) 160 | echo "$RESOLV_CONF_NEW" >/vm/etc/resolv.conf 161 | 162 | # LAUNCH DNSMASQ 163 | # It will receive local DNS requests (within the container, on 127.0.0.1) 164 | # and requests redirected locally (via the iptables PREROUTING REDIRECT rule) for $RUNCVM_DNS_IP. 165 | dnsmasq -u root --no-hosts 166 | 167 | # LAUNCH VIRTIOFSD 168 | $RUNCVM_GUEST/scripts/runcvm-ctr-virtiofsd & 169 | 170 | # DEBUG 171 | if [[ "$RUNCVM_BREAK" =~ postnet ]]; then bash; fi 172 | 173 | # LAUNCH INIT SUPERVISING QEMU 174 | # FIXME: Add -v to debug 175 | exec $RUNCVM_GUEST/sbin/runcvm-init -c $RUNCVM_GUEST/scripts/runcvm-ctr-qemu 176 | -------------------------------------------------------------------------------- /runcvm-scripts/runcvm-ctr-exec: -------------------------------------------------------------------------------- 1 | #!/.runcvm/guest/bin/bash -e 2 | 3 | # See https://qemu-project.gitlab.io/qemu/interop/qemu-ga-ref.html 4 | 5 | # Load original environment 6 | . /.runcvm/config 7 | 8 | # Load defaults and aliases 9 | . $RUNCVM_GUEST/scripts/runcvm-ctr-defaults 10 | 11 | env() { 12 | busybox env "$@" 13 | } 14 | 15 | to_bin() { 16 | # tab, LF, space, ', ", \ 17 | tr "\011\012\040\047\042\134" '\200\201\202\203\204\205' 18 | } 19 | 20 | # Expects: 21 | # - To be run as root 22 | # - To be given env vars 23 | # - To be given arguments 24 | # $1 :: 25 | # $2 26 | # $3 27 | # $4 28 | # $(5...) 29 | 30 | command="$RUNCVM_GUEST/scripts/runcvm-vm-exec" 31 | uidgid="$1" 32 | cwd="$2" 33 | hasHome="$3" 34 | wantsTerminal="$4" 35 | shift 4 36 | 37 | # Parse uidgid and construct args array for the call to $command within the VM: 38 | # $1 39 | # $2 40 | # $3 41 | # $(4...) 42 | 43 | IFS=':' read -r uid gid additionalGids <<< "$uidgid" 44 | args=("$@") 45 | 46 | if [ ${#args[@]} -gt 0 ]; then 47 | args_bin=$(printf '%s\n' "${args[@]}" | to_bin) 48 | fi 49 | 50 | # If the HOME env var was not set either in the image, or via docker run, or via docker exec, 51 | # then set HOME to the requested user's default homedir. 52 | # 53 | # - See runcvm-ctr-entrypoint for full details of how/why hasHome is needed and HOME gets set. 54 | 55 | if [ "$hasHome" != "1" ]; then 56 | # Either this script needs to look up uid's HOME or else runcvm-vm-exec does; for now, we do it here. 57 | HOME=$(getent passwd "$uid" | cut -d':' -f6) 58 | fi 59 | 60 | # Clean RUNCVM env vars 61 | clean_env 62 | 63 | # N.B. Only exported env vars will be returned and sent 64 | mapfile -t env < <(env -u _ -u SHLVL -u PWD) 65 | 66 | if [ ${#env[@]} -gt 0 ]; then 67 | env_bin=$(printf '%s\n' "${env[@]}" | to_bin) 68 | fi 69 | 70 | if [ "$wantsTerminal" = "true" ]; then 71 | opts=(-t) 72 | fi 73 | 74 | retries=30 # 15 seconds 75 | delay=0 # Signal that extra time should be allowed for RunCVM VM, its init and its dropbear sshd to start after the above conditions are satisfied 76 | 77 | while ! [ -s /.runcvm/dropbear/key ] || ! load_network 78 | do 79 | if [ $retries -gt 0 ]; then 80 | retries=$((retries-1)) 81 | delay=1 82 | sleep 0.5 83 | continue 84 | fi 85 | 86 | echo "Error: RunCVM container not yet started" >&2 87 | exit 1 88 | done 89 | 90 | # If startup was detected, wait a few extra seconds for dropbear sshd to be ready 91 | if [ "$delay" -ne 0 ]; then 92 | sleep 2 93 | fi 94 | 95 | exec $RUNCVM_GUEST/usr/bin/dbclient "${opts[@]}" -p $SSHD_PORT -y -y -i /.runcvm/dropbear/key root@$DOCKER_IF_IP "$command '$uidgid' '$(echo -n $cwd | to_bin)' '$args_bin' '$env_bin'" -------------------------------------------------------------------------------- /runcvm-scripts/runcvm-ctr-exit: -------------------------------------------------------------------------------- 1 | #!/.runcvm/guest/bin/bash 2 | 3 | # Load original environment 4 | . /.runcvm/config 5 | 6 | # Load defaults and aliases 7 | . $RUNCVM_GUEST/scripts/runcvm-ctr-defaults 8 | 9 | # runcvm-init execs this script when it exits. 10 | # It: 11 | # - performs any post-VM tests. 12 | # - retrieves any saved exit code. 13 | # - resets terminal readline horizontal scroll 14 | # - exits with exit code 15 | 16 | if [ -f /.runcvm/exitcode ]; then 17 | read CODE /dev/null 27 | 28 | exit ${CODE:-0} -------------------------------------------------------------------------------- /runcvm-scripts/runcvm-ctr-qemu: -------------------------------------------------------------------------------- 1 | #!/.runcvm/guest/bin/bash 2 | 3 | # Exit on errors 4 | set -o errexit -o pipefail 5 | 6 | # Load original environment 7 | . /.runcvm/config 8 | 9 | # Load defaults 10 | . $RUNCVM_GUEST/scripts/runcvm-ctr-defaults && unset PATH 11 | 12 | QEMU_IFUP="$RUNCVM_GUEST/scripts/runcvm-ctr-qemu-ifup" 13 | QEMU_IFDOWN="$RUNCVM_GUEST/scripts/runcvm-ctr-qemu-ifdown" 14 | 15 | INIT="init=$RUNCVM_GUEST/scripts/runcvm-vm-init" 16 | 17 | # Must export TERMINFO so curses library can find terminfo database. 18 | export TERMINFO="$RUNCVM_GUEST/usr/share/terminfo" 19 | 20 | error() { 21 | echo "$1" >&2 22 | exit 1 23 | } 24 | 25 | # Argument e.g. /volume/disk1,/var/lib/docker,ext4,5G 26 | do_disk() { 27 | local spec="$1" 28 | local src dst fs size dir UUID 29 | 30 | local IFS=',' 31 | read src dst fs size <<< $(echo "$spec") 32 | 33 | if [[ -z "$src" || -z "$dst" || -z "$fs" ]]; then 34 | error "Error: disk spec '$spec' invalid: src, dst and fs must all be specified" 35 | fi 36 | 37 | if [[ "$src" = "$dst" ]]; then 38 | error "Error: disk spec '$spec' invalid: src '$src' cannot be same as dst" 39 | fi 40 | 41 | if [[ -e "$src" && ! -f "$src" ]]; then 42 | error "Error: disk spec '$spec' invalid: src '$src' must be a plain file if it exists" 43 | fi 44 | 45 | if [[ -e "$dst" && ! -d "$dst" ]]; then 46 | error "Error: disk spec '$spec' invalid: dst '$dst' must be a directory if it exists" 47 | fi 48 | 49 | if [[ ! -f "$src" ]]; then 50 | 51 | if [[ -z "$size" ]]; then 52 | error "Error: disk spec '$spec' invalid: size must be specified if src '$src' does not exist" 53 | fi 54 | 55 | # Create directory for disk backing file, if needed. 56 | dir="$(busybox dirname "$src")" 57 | if ! [ -d "$dir" ]; then 58 | mkdir -p $(busybox dirname "$src") 59 | fi 60 | 61 | # Create disk backing file. 62 | busybox truncate -s "$size" "$src" >&2 || error "Error: disk spec '$spec' invalid: truncate on '$src' with size '$size' failed" 63 | 64 | # Create filesystem on disk backing file, populated with any pre-existing files from dst. 65 | [ -d "$RUNCVM_VM_MOUNTPOINT/$dst" ]|| mkdir -p "$RUNCVM_VM_MOUNTPOINT/$dst" >&2 66 | mke2fs -q -F -t "$fs" -d "$RUNCVM_VM_MOUNTPOINT/$dst" "$src" >&2 || error "Error: disk spec '$spec' invalid: mke2fs on '$src' with fs '$fs' failed" 67 | fi 68 | 69 | # Create the mountpoint, if we haven't already/it didn't already exist. 70 | [ -d "$RUNCVM_VM_MOUNTPOINT/$dst" ]|| mkdir -p "$RUNCVM_VM_MOUNTPOINT/$dst" >&2 71 | 72 | # Obtain a UUID for the filesystem and add to fstab. 73 | read -r UUID <<< $(blkid -o value "$src") 74 | echo "UUID=$UUID $dst $fs defaults,noatime 0 0" >>/.runcvm/fstab 75 | 76 | # Add disk to QEMU command line. 77 | DISKS+=(-drive file=$src,format=raw,if=virtio,media=disk,cache=directsync,aio=native) 78 | } 79 | 80 | # Argument e.g. /disk1,/home,ext4,5G;/disk2,/var,ext4,1G 81 | do_disks() { 82 | local IFS=';' 83 | local disk 84 | for disk in $1 85 | do 86 | do_disk "$disk" 87 | done 88 | } 89 | 90 | do_networks() { 91 | local id=0 ifpath if mac 92 | local DOCKER_IF DOCKER_IF_MAC DOCKER_IF_MTU DOCKER_IF_IP DOCKER_IF_IP_NETPREFIX DOCKER_IF_IP_GW 93 | 94 | for ifpath in /.runcvm/network/devices/* 95 | do 96 | if=$(busybox basename "$ifpath") 97 | 98 | [ "$if" = "default" ] && continue 99 | 100 | load_network "$if" 101 | 102 | mac=$(busybox sed -r 's/^..:..:../52:54:00/' <<<$DOCKER_IF_MAC) 103 | 104 | IFACES+=( 105 | -netdev tap,id=qemu$id,ifname=tap-$DOCKER_IF,script=$QEMU_IFUP,downscript=$QEMU_IFDOWN 106 | -device virtio-net-pci,netdev=qemu$id,mac=$mac,rombar=$id 107 | ) 108 | 109 | id=$((id+1)) 110 | done 111 | } 112 | 113 | DISKS=() 114 | if [ -n "$RUNCVM_DISKS" ]; then 115 | do_disks "$RUNCVM_DISKS" 116 | fi 117 | 118 | IFACES=() 119 | do_networks 120 | 121 | if [ -n "$RUNCVM_TMPFS" ]; then 122 | echo "$RUNCVM_TMPFS" >>/.runcvm/fstab 123 | fi 124 | 125 | if [[ -z "$RUNCVM_CPUS" || "$RUNCVM_CPUS" -le 0 ]]; then 126 | RUNCVM_CPUS=$(busybox nproc) 127 | fi 128 | 129 | # TODO: 130 | # - Consider using '-device pvpanic' 131 | 132 | if [ "$RUNCVM_ARCH" = "arm64" ]; then 133 | CMD="$(which qemu-system-aarch64)" 134 | MACHINE+=(-cpu max -machine virt,gic-version=max,usb=off) 135 | else 136 | CMD="$(which qemu-system-x86_64)" 137 | MACHINE+=(-enable-kvm -cpu host,pmu=off -machine q35,accel=kvm,usb=off,sata=off -device isa-debug-exit) 138 | fi 139 | 140 | if [ -n "$RUNCVM_QEMU_DISPLAY" ]; then 141 | DISPLAY+=(-display $RUNCVM_QEMU_DISPLAY) 142 | else 143 | DISPLAY+=(-nographic) 144 | DISPLAY+=(-vga none) 145 | fi 146 | 147 | if [ "$RUNCVM_BIOS_DEBUG" != "1" ]; then 148 | # Disable SeaBIOS serial console. 149 | # This -cfw_cfg path is modified from the SeaBIOS default (to avoid an otherwise-inevitable QEMU 150 | # warning being emitted) and so requires patched bios.bin file(s) (see Dockerfile) 151 | OPTS+=(-fw_cfg opt/org.seabios/etc/sercon-port,string=0) 152 | fi 153 | 154 | MEM_BACKEND=(-numa node,memdev=mem) 155 | if [ "$RUNCVM_HUGETLB" != "1" ]; then 156 | # Tests suggests prealloc=on slows down mem-path=/dev/shm 157 | MEM_PATH="/dev/shm" MEM_PREALLOC="off" 158 | MEM_BACKEND+=(-object memory-backend-file,id=mem,size=$RUNCVM_MEM_SIZE,mem-path=$MEM_PATH,share=on,prealloc=$MEM_PREALLOC) 159 | else 160 | # Fastest performance: +15% CPU/net intensive; 3.5x disk intensive. 161 | MEM_BACKEND+=(-object memory-backend-memfd,id=mem,size=$RUNCVM_MEM_SIZE,share=on,prealloc=on,hugetlb=on) 162 | fi 163 | 164 | # 16-64 works well and is more performant than 1024 in some scenarios. 165 | # For now, stick with original figure. 166 | VIRTIOFS_QUEUE_SIZE=1024 167 | VIRTIOFS+=( 168 | -chardev socket,id=virtiofs,path=$QEMU_VIRTIOFSD_SOCKET 169 | -device vhost-user-fs-pci,queue-size=$VIRTIOFS_QUEUE_SIZE,chardev=virtiofs,tag=runcvmfs,ats=off 170 | ) 171 | 172 | CONSOLE=() 173 | CONSOLE_MONITOR="0" 174 | if [ "$CONSOLE_MONITOR" = "1" ]; then 175 | # Creates a multiplexed stdio backend connected to the serial port (and the QEMU monitor). 176 | # Use with /dev/ttyS0 177 | CONSOLE+=( 178 | -chardev stdio,id=char0,mux=on,signal=off 179 | -serial chardev:char0 -mon chardev=char0 180 | ) 181 | 182 | # Set monitor escape key to CTRL-T to reduce risk of conflict (as default, CTRL-A, is commonly used) 183 | CONSOLE+=(-echr 20) 184 | 185 | CONSOLE_DEV="ttyS0" 186 | else 187 | # Creates a stdio backend connected to the virtual console. 188 | # Use with /dev/hvc0 189 | CONSOLE+=( 190 | -chardev stdio,id=char0,mux=off,signal=off 191 | -device virtconsole,chardev=char0,id=console0 192 | ) 193 | 194 | CONSOLE_DEV="hvc0" 195 | fi 196 | 197 | # Save choice of console device 198 | echo "$CONSOLE_DEV" >/.runcvm/console 199 | 200 | # Experimental: Enable to specify a dedicated PCI bridge 201 | # OPTS+=(-device pci-bridge,bus=pcie.0,id=pci-bridge-0,chassis_nr=1,shpc=off,addr=2,io-reserve=4k,mem-reserve=1m,pref64-reserve=1m) 202 | 203 | # Experimental: Enable for a SCSI bus 204 | # OPTS+=(-device virtio-scsi-pci,id=scsi0,disable-modern=true) 205 | 206 | # Disable IPv6, which is currently unsupported, at kernel boot time 207 | APPEND+=(ipv6.disable=1 panic=-1) 208 | 209 | # Disable unneeded functionality 210 | APPEND+=(scsi_mod.scan=none tsc=reliable no_timer_check rcupdate.rcu_expedited=1 i8042.direct=1 i8042.dumbkbd=1 i8042.nopnp=1 i8042.noaux=1 noreplace-smp reboot=k cryptomgr.notests pci=lastbus=0 selinux=0) 211 | 212 | # Enable systemd startup logging by default: 213 | # - Only effective when --env=RUNCVM_KERNEL_DEBUG=1 214 | # - Override this by launching with --env='RUNCVM_KERNEL_APPEND=systemd.show_status=0' 215 | APPEND+=(systemd.show_status=1) 216 | 217 | if [ "$RUNCVM_KERNEL_DEBUG" = "1" ]; then 218 | APPEND+=(console=$CONSOLE_DEV) 219 | else 220 | APPEND+=(quiet) 221 | fi 222 | 223 | ARGS=( 224 | -no-user-config 225 | -nodefaults 226 | -no-reboot 227 | 228 | -action panic=none 229 | -action reboot=shutdown 230 | 231 | "${MACHINE[@]}" 232 | "${DISPLAY[@]}" 233 | "${OPTS[@]}" 234 | 235 | # N.B. There is a counterintuitive relationship between cpus and memory, and performance: 236 | # - more cpus needs more memory to maintain the same virtiofs disk I/O performance. 237 | -m "$RUNCVM_MEM_SIZE" 238 | -smp $RUNCVM_CPUS,cores=1,threads=1,sockets=$RUNCVM_CPUS,maxcpus=$RUNCVM_CPUS 239 | 240 | # Creates a virtio-serial bus on the PCI bus; this is used for the guest agent and virtiofs 241 | -device virtio-serial-pci,id=serial0 242 | 243 | # Creates an RNG on the PCI bus 244 | -object rng-random,id=rng0,filename=/dev/urandom -device virtio-rng-pci,rng=rng0 245 | 246 | # Memory backend 247 | "${MEM_BACKEND[@]}" 248 | 249 | # virtiofs socket and interface 250 | "${VIRTIOFS[@]}" 251 | 252 | # Configure host/container tap device with PXE roms disabled 253 | "${IFACES[@]}" 254 | "${DISKS[@]}" 255 | 256 | # Configure console 257 | "${CONSOLE[@]}" 258 | 259 | # Support for guest agent 260 | -chardev socket,id=qemuguest0,path=$QEMU_GUEST_AGENT,server=on,wait=off 261 | -device virtserialport,chardev=qemuguest0,name=org.qemu.guest_agent.0 262 | 263 | # Creates a unix socket for the QEMU monitor 264 | -monitor unix:$QEMU_MONITOR_SOCKET,server,nowait 265 | 266 | # Kernel and initrd and kernel cmdline 267 | -kernel $RUNCVM_KERNEL_PATH 268 | -initrd $RUNCVM_KERNEL_INITRAMFS_PATH 269 | -L $RUNCVM_GUEST/usr/share/qemu 270 | -append "$RUNCVM_KERNEL_ROOT $INIT rw ${APPEND[*]} $RUNCVM_KERNEL_APPEND" 271 | ) 272 | 273 | if [[ "$RUNCVM_BREAK" =~ preqemu ]]; then echo "Preparing to run: '$CMD' ${ARGS[@]@Q}"; bash; fi 274 | 275 | exec "$CMD" "${ARGS[@]}" 276 | -------------------------------------------------------------------------------- /runcvm-scripts/runcvm-ctr-qemu-ifdown: -------------------------------------------------------------------------------- 1 | #!/.runcvm/guest/bin/bash 2 | 3 | # Load original environment 4 | . /.runcvm/config 5 | 6 | # Load defaults and aliases 7 | . $RUNCVM_GUEST/scripts/runcvm-ctr-defaults 8 | 9 | ip link set dev "$1" down || true 10 | exit 0 11 | -------------------------------------------------------------------------------- /runcvm-scripts/runcvm-ctr-qemu-ifup: -------------------------------------------------------------------------------- 1 | #!/.runcvm/guest/bin/bash 2 | 3 | # Load original environment 4 | . /.runcvm/config 5 | 6 | # Load defaults and aliases 7 | . $RUNCVM_GUEST/scripts/runcvm-ctr-defaults 8 | 9 | tap="$1" 10 | if="$(busybox sed 's/tap-//' <<<$tap)" 11 | bri="$(busybox sed 's/tap-/br-/' <<<$tap)" 12 | 13 | load_network "$if" 14 | 15 | ip link set dev "$tap" up mtu "${DOCKER_IF_MTU:=1500}" master "$bri" 16 | 17 | exit 0 -------------------------------------------------------------------------------- /runcvm-scripts/runcvm-ctr-qemu-poweroff: -------------------------------------------------------------------------------- 1 | #!/.runcvm/guest/bin/bash 2 | 3 | # Load original environment 4 | . /.runcvm/config 5 | 6 | # Load defaults and aliases 7 | . $RUNCVM_GUEST/scripts/runcvm-ctr-defaults 8 | 9 | echo "system_powerdown" | nc -w 1 -U $QEMU_MONITOR_SOCKET -------------------------------------------------------------------------------- /runcvm-scripts/runcvm-ctr-shutdown: -------------------------------------------------------------------------------- 1 | #!/.runcvm/guest/bin/bash 2 | 3 | # runcvm-init forks and execs this script when it receives a SIGTERM 4 | 5 | # Load original environment 6 | . /.runcvm/config 7 | 8 | poweroff() { 9 | # Try ACPI poweroff 10 | $RUNCVM_GUEST/scripts/runcvm-ctr-qemu-poweroff 11 | 12 | # Try running busybox poweroff 13 | $RUNCVM_GUEST/scripts/runcvm-ctr-exec 0:0 / 0 0 $RUNCVM_GUEST/bin/poweroff &>/dev/null 14 | 15 | # Try killing the VM's PID 1 16 | $RUNCVM_GUEST/scripts/runcvm-ctr-exec 0:0 / 0 0 $RUNCVM_GUEST/bin/busybox kill 1 &>/dev/null 17 | } 18 | 19 | poweroff 20 | 21 | exit 0 22 | -------------------------------------------------------------------------------- /runcvm-scripts/runcvm-ctr-virtiofsd: -------------------------------------------------------------------------------- 1 | #!/.runcvm/guest/bin/bash 2 | 3 | # Load defaults and aliases 4 | . /.runcvm/guest/scripts/runcvm-ctr-defaults 5 | 6 | if [ "$RUNCVM_SYS_ADMIN" = "1" ]; then 7 | OPTS+=(-o modcaps=+sys_admin) 8 | fi 9 | 10 | OPTS+=(-o cache=always) 11 | 12 | # Experimental options that may improve performance. 13 | # OPTS+=(-o cache=auto) 14 | # OPTS+=(--thread-pool-size=1) 15 | 16 | # Send logs to /run in container (not in VM) 17 | exec "$(which virtiofsd)" "${OPTS[@]}" -o announce_submounts -o xattr --socket-path=$QEMU_VIRTIOFSD_SOCKET -o source=$RUNCVM_VM_MOUNTPOINT -o sandbox=chroot >/run/.virtiofsd.log 2>&1 -------------------------------------------------------------------------------- /runcvm-scripts/runcvm-install-runtime.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | RUNCVM=/opt/runcvm 4 | RUNCVM_LD=$RUNCVM/lib/ld 5 | RUNCVM_JQ=$RUNCVM/usr/bin/jq 6 | MNT=/runcvm 7 | REPO=${REPO:-newsnowlabs/runcvm} 8 | 9 | log() { 10 | echo "$@" 11 | } 12 | 13 | jq() { 14 | $RUNCVM_LD $RUNCVM_JQ "$@" 15 | } 16 | 17 | jq_set() { 18 | local file="$1" 19 | shift 20 | 21 | local tmp="/tmp/$$.json" 22 | 23 | if jq "$@" $file >$tmp; then 24 | mv $tmp $file 25 | else 26 | echo "Failed to update $(basename $file); aborting!" 2>&1 27 | exit 1 28 | fi 29 | } 30 | 31 | jq_get() { 32 | local file="$1" 33 | shift 34 | 35 | jq -r "$@" $file 36 | } 37 | 38 | usage() { 39 | cat <<_EOE_ >&2 40 | 41 | Usage: sudo $0 42 | _EOE_ 43 | exit 1 44 | } 45 | 46 | check_rp_filter() { 47 | # For RunCVM to work, the following condition on /proc/sys/net/ipv4/conf/ must be met: 48 | # - the max of all/rp_filter and /rp_filter should be 0 or 2 49 | # (where is the bridge underpinning the Docker network to which RunCVM instances will be attached) 50 | # 51 | # This means that: 52 | # - if all/rp_filter is set to 0, then /rp_filter must be set to 0 or 2 53 | # (or, if is not yet or might not yet have been created, then default/rp_filter must be set to 0 or 2) 54 | # - if all/rp_filter is set to 1, then /rp_filter must be set to 2 55 | # (or, if is not yet or might not yet have been created, then default/rp_filter must be set to 2) 56 | # - if all/rp_filter is set to 2, then no further action is needed 57 | 58 | local rp_filter_all rp_filter_default 59 | 60 | log "- Checking rp_filter ..." 61 | 62 | if [ -f "/proc/sys/net/ipv4/conf/all/rp_filter" ]; then 63 | rp_filter_all=$(cat /proc/sys/net/ipv4/conf/all/rp_filter) 64 | else 65 | log " - Warning: could not find /proc/sys/net/ipv4/conf/all/rp_filter" 66 | fi 67 | 68 | if [ -f "/proc/sys/net/ipv4/conf/default/rp_filter" ]; then 69 | rp_filter_default=$(cat /proc/sys/net/ipv4/conf/default/rp_filter) 70 | else 71 | log " - Warning: could not find /proc/sys/net/ipv4/conf/default/rp_filter" 72 | fi 73 | 74 | if [ -z "$rp_filter_all" ] || [ -z "$rp_filter_default" ]; then 75 | return 76 | fi 77 | 78 | if [ "$rp_filter_all" = "2" ]; then 79 | log " - sys.net.ipv4.conf.all.rp_filter is set to 2; assuming no further action needed" 80 | return 81 | elif [ "$rp_filter_all" = "0" ] && [ "$rp_filter_default" = "0" ]; then 82 | log " - sys.net.ipv4.conf.all.rp_filter AND sys.net.ipv4.conf.default.rp_filter are set to 0; assuming no further action needed" 83 | return 84 | fi 85 | 86 | log " - sys.net.ipv4.conf.all.rp_filter is set to $rp_filter_all; fixing ..." 87 | log " - Setting sys.net.ipv4.conf.all.rp_filter and Setting sys.net.ipv4.conf.default.rp_filter to 2 ..." 88 | echo 2 >/proc/sys/net/ipv4/conf/all/rp_filter 89 | echo 2 >/proc/sys/net/ipv4/conf/default/rp_filter 90 | 91 | log " - Patching /etc/sysctl.conf, /etc/sysctl.d/* to make these settings persist after reboot ..." 92 | find /etc/sysctl.conf /etc/sysctl.d -type f -exec sed -r -i 's/^([ ]*net.ipv4.conf.(all|default).rp_filter)=(1)$/# DISABLED BY RUNCVM\n# \1=\3\n# ADDED BY RUNCVM\n\1=2/' {} \; 93 | } 94 | 95 | docker_restart() { 96 | # docker_restart 97 | # - With systemd, run: systemctl restart docker 98 | # - On GitHub Codespaces, run: sudo killall dockerd && sudo /usr/local/share/docker-init.sh 99 | 100 | local cmd init 101 | 102 | init=$(ps -o comm,pid 1 | grep ' 1$' | awk '{print $1}') 103 | 104 | log " - Preparing to restart dockerd ..." 105 | 106 | if [ "$init" = "systemd" ]; then 107 | log " - Detected systemd" 108 | cmd="systemctl restart docker" 109 | 110 | elif [ -x "/etc/init.d/docker" ]; then 111 | log " - Detected sysvinit" 112 | cmd="/etc/init.d/docker restart" 113 | 114 | elif [ "$init" = "docker-init" ]; then 115 | 116 | if [ -x "/usr/local/share/docker-init.sh" ]; then 117 | log " - Detected docker-init on GitHub Codespaces" 118 | cmd="killall dockerd && /usr/local/share/docker-init.sh" 119 | fi 120 | fi 121 | 122 | if [ -n "$cmd" ]; then 123 | log " - Preparing to run: $cmd" 124 | read -p " - Run this? (Y/n): " yesno 125 | 126 | if [ "$yesno" != "${yesno#[Yy]}" ] || [ -z "$yesno" ]; then 127 | log " - Restarting dockerd with: $cmd" 128 | sh -c "$cmd" 2>&1 | sed 's/^/ - /' 129 | 130 | # Wait for dockerd to restart 131 | log " - Waiting for dockerd to restart ..." 132 | while ! docker ps >/dev/null 2>&1; do 133 | sleep 0.5 134 | done 135 | log " - Restarted dockerd successfully" 136 | 137 | else 138 | log " - Please restart dockerd manually in the usual manner for your system" 139 | fi 140 | 141 | else 142 | log " - Couldn't detect restart mechanism for dockerd, please restart manually in the usual manner for your system" 143 | fi 144 | } 145 | 146 | log 147 | log "RunCVM Runtime Installer" 148 | log "========================" 149 | log 150 | 151 | if [ $(id -u) -ne 0 ]; then 152 | log "- Error: $0 must be run as root. Please relaunch using sudo." 153 | usage 154 | fi 155 | 156 | for app in docker dockerd 157 | do 158 | if [ -z $(which docker) ]; then 159 | log "- Error: $0 currently requires the '$app' binary; please install it and try again" 160 | usage 161 | fi 162 | done 163 | 164 | 165 | if [ "$1" = "--no-dockerd" ]; then 166 | NO_DOCKERD="1" 167 | log "- Skipping dockerd check and docker-based package install due to '--no-dockerd'" 168 | shift 169 | else 170 | log "- Checking dockerd ..." 171 | if docker info >/dev/null 2>&1; then 172 | log " - Detected running dockerd" 173 | else 174 | log " - Error: dockerd not running; please start dockerd; aborting!" 175 | exit 1 176 | fi 177 | fi 178 | 179 | # Install RunCVM package to $MNT 180 | if [ -z "$NO_DOCKERD" ]; then 181 | log "- Installing RunCVM package to $MNT ..." 182 | if docker run --rm -v /opt/runcvm:$MNT $REPO --quiet; then 183 | log "- Installed RunCVM package to /opt/runcvm" 184 | else 185 | log "- Failed to install RunCVM package to /opt/runcvm; aborting!" 186 | exit 1 187 | fi 188 | fi 189 | 190 | if [ -d "/etc/docker" ]; then 191 | 192 | log "- Detected /etc/docker" 193 | 194 | if ! [ -f "/etc/docker/daemon.json" ]; then 195 | log " - Creating empty daemon.json" 196 | echo '{}' >/etc/docker/daemon.json 197 | fi 198 | 199 | if [ $(jq_get "/etc/docker/daemon.json" ".runtimes.runcvm.path") != "/opt/runcvm/scripts/runcvm-runtime" ]; then 200 | log " - Adding runcvm to daemon.json runtimes property ..." 201 | 202 | if jq_set "/etc/docker/daemon.json" '.runtimes.runcvm.path |= "/opt/runcvm/scripts/runcvm-runtime"'; then 203 | log " - Done" 204 | else 205 | log " - Failed: $!" 206 | exit 1 207 | fi 208 | 209 | # Attempt restart of dockerd 210 | # (if dockerd not found, we'll just continue) 211 | docker_restart 212 | 213 | else 214 | log " - Valid runcvm property already found in daemon.json" 215 | fi 216 | 217 | if docker info 2>/dev/null | grep -q runcvm; then 218 | # if [ $(docker info --format '{{ json .Runtimes.runcvm }}') = "{"path":"/opt/runcvm/scripts/runcvm-runtime"}" ]; then 219 | log " - Verification of RunCVM runtime in Docker completed" 220 | else 221 | log " - Warning: could not verify RunCVM runtime in Docker; perhaps you need to restart Docker manually" 222 | fi 223 | 224 | else 225 | log "- No /etc/docker detected; your mileage with RunCVM without Docker may vary!" 226 | fi 227 | 228 | if [ -n "$(which podman)" ]; then 229 | log "- Detected podman binary" 230 | cat <<_EOE_ >&2 231 | - To enable experimental RunCVM support for Podman, add the following 232 | to /etc/containers/containers.conf in the [engine.runtimes] section: 233 | 234 | runcvm = [ "/opt/runcvm/scripts/runcvm-runtime" ] 235 | _EOE_ 236 | fi 237 | 238 | # Check, correct and make persistent required rp_filter settings 239 | check_rp_filter 240 | 241 | log "- RunCVM installation/upgrade complete." 242 | log -------------------------------------------------------------------------------- /runcvm-scripts/runcvm-ip-functions: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cidr_to_int() { 4 | echo "$(( 0xffffffff ^ ((1 << (32 - $1)) - 1) ))" 5 | } 6 | 7 | int_to_ip() { 8 | local value="$1" 9 | echo "$(( ($1 >> 24) & 0xff )).$(( ($1 >> 16) & 0xff )).$(( ($1 >> 8) & 0xff )).$(( $1 & 0xff ))" 10 | } 11 | 12 | cidr_to_netmask() { 13 | local value=$(cidr_to_int "$1") 14 | int_to_ip "$value" 15 | } 16 | 17 | ip_prefix_to_network() { 18 | local IFS i1 i2 i3 i4 m1 m2 m3 m4 19 | IFS=. read -r i1 i2 i3 i4 <<< "$1" 20 | 21 | local mask=$(cidr_to_netmask "$2") 22 | IFS=. read -r m1 m2 m3 m4 <<< "$mask" 23 | 24 | printf "%d.%d.%d.%d\n" "$((i1 & m1))" "$((i2 & m2))" "$((i3 & m3))" "$((i4 & m4))" 25 | } 26 | 27 | cidr_to_bcastmask() { 28 | local value=$(( (1 << 32) - $(cidr_to_int "$1") - 1 )) 29 | int_to_ip "$value" 30 | } 31 | 32 | ip_prefix_to_bcast() { 33 | local IFS i1 i2 i3 i4 m1 m2 m3 m4 34 | IFS=. read -r i1 i2 i3 i4 <<< "$1" 35 | 36 | local mask=$(cidr_to_bcastmask "$2") 37 | IFS=. read -r m1 m2 m3 m4 <<< "$mask" 38 | 39 | printf "%d.%d.%d.%d\n" "$((i1 | m1))" "$((i2 | m2))" "$((i3 | m3))" "$((i4 | m4))" 40 | } -------------------------------------------------------------------------------- /runcvm-scripts/runcvm-runtime: -------------------------------------------------------------------------------- 1 | #!/opt/runcvm/lib/ld-musl-x86_64.so.1 /opt/runcvm/bin/bash 2 | 3 | # REFERENCES 4 | 5 | # Qemu: 6 | # - https://github.com/joshkunz/qemu-docker 7 | # - https://mergeboard.com/blog/2-qemu-microvm-docker/ 8 | # - https://github.com/BBVA/kvm 9 | 10 | # Virtiofs 11 | # - https://vmsplice.net/~stefan/virtio-fs_%20A%20Shared%20File%20System%20for%20Virtual%20Machines.pdf 12 | # - https://virtio-fs.gitlab.io/howto-qemu.html 13 | # - https://www.tauceti.blog/posts/qemu-kvm-share-host-directory-with-vm-with-virtio/ 14 | 15 | # Container config.json spec 16 | # - https://github.com/opencontainers/runtime-spec/ 17 | # - https://github.com/opencontainers/runtime-spec/blob/main/config.md 18 | 19 | # Mount namespaces 20 | # - https://www.kernel.org/doc/Documentation/filesystems/sharedsubtree.txt 21 | # - https://www.redhat.com/sysadmin/mount-namespaces 22 | 23 | RUNCVM=/opt/runcvm 24 | RUNCVM_LD=$RUNCVM/lib/ld 25 | RUNCVM_JQ=$RUNCVM/usr/bin/jq 26 | RUNCVM_VM_MOUNTPOINT="/vm" 27 | RUNCVM_GUEST=/.runcvm/guest 28 | RUNCVM_ENTRYPOINT=$RUNCVM_GUEST/scripts/runcvm-ctr-entrypoint 29 | RUNCVM_EXEC="$RUNCVM_GUEST/scripts/runcvm-ctr-exec" 30 | RUNCVM_KERNELS=$RUNCVM/kernels 31 | RUNCVM_GUEST_KERNELS=$RUNCVM_GUEST/kernels 32 | RUNCVM_KERNEL_DEFAULT=debian 33 | RUNCVM_MEM_SIZE_DEFAULT="512M" 34 | RUNCVM_DEBUG="" 35 | 36 | debug() { 37 | [ -n "$RUNCVM_DEBUG" ] && true || false 38 | } 39 | 40 | log() { 41 | debug && echo "$(date '+%Y-%m-%d %H:%M:%S.%6N'): $@" >>/tmp/runcvm-$$.log 42 | } 43 | 44 | error() { 45 | 46 | # Skip past any docker error ending in CR 47 | (echo; echo) >&2 48 | 49 | # Dump message to stderr 50 | echo "RunCVM: Error: $1" >&2 51 | 52 | # Dump error also to the logfile 53 | log "RunCVM: Error: $1" 54 | exit -1 55 | } 56 | 57 | load_env_from_file() { 58 | local file="$1" 59 | local var="$2" 60 | 61 | # Return gracefully if no $file exists 62 | if ! [ -f "$file" ]; then 63 | return 0 64 | fi 65 | 66 | while read LINE 67 | do 68 | local name="${LINE%%=*}" 69 | local value="${LINE#*=}" 70 | 71 | if [ "$name" != "$LINE" ] && [ "$value" != "$LINE" ] && [ "$name" = "$var" ]; then 72 | # We found variable $name: return it, removing any leading/trailing double quotes 73 | echo "$value" | sed 's/^"//;s/"$//' 74 | return 0 75 | fi 76 | done <"$file" 77 | 78 | return 1 79 | } 80 | 81 | jq() { 82 | $RUNCVM_LD $RUNCVM_JQ "$@" 83 | } 84 | 85 | jq_set() { 86 | local file="$1" 87 | shift 88 | 89 | local tmp="/tmp/config.json.$$" 90 | 91 | if jq "$@" $file >$tmp; then 92 | mv $tmp $file 93 | else 94 | echo "Failed to update $(basename $file); aborting!" 2>&1 95 | exit 1 96 | fi 97 | } 98 | 99 | jq_get() { 100 | local file="$1" 101 | shift 102 | 103 | jq -r "$@" $file 104 | } 105 | 106 | get_process_env() { 107 | local file="$1" 108 | local var="$2" 109 | local default="$3" 110 | local value 111 | 112 | value=$(jq_get "$file" --arg env "$var" '.env[] | select(match("^" + $env + "=")) | match("^" + $env + "=(.*)") | .captures[] | .string') 113 | 114 | [ -n "$value" ] && echo -n "$value" || echo -n "$default" 115 | } 116 | 117 | get_process_env_boolean() { 118 | local file="$1" 119 | local var="$2" 120 | local value 121 | 122 | value=$(jq_get "$file" --arg env "$var" '.env[] | select(match("^" + $env + "=")) | match("^" + $env + "=(.*)") | .captures[] | .string') 123 | 124 | [ -n "$value" ] && echo "1" || echo "0" 125 | } 126 | 127 | get_config_env() { 128 | local var="$1" 129 | local default="$2" 130 | local value 131 | 132 | value=$(jq_get "$CFG" --arg env "$var" '.process.env[] | select(match("^" + $env + "=")) | match("^" + $env + "=(.*)") | .captures[] | .string') 133 | 134 | [ -n "$value" ] && echo -n "$value" || echo -n "$default" 135 | } 136 | 137 | set_config_env() { 138 | local var="$1" 139 | local value="$2" 140 | 141 | jq_set "$CFG" --arg env "$var=$value" '.process.env |= (.+ [$env] | unique)' 142 | } 143 | 144 | 145 | # PARSE RUNC GLOBAL OPTIONS: 146 | # --debug enable debug logging 147 | # --log value set the log file to write runc logs to (default is '/dev/stderr') 148 | # --log-format value set the log format ('text' (default), or 'json') (default: "text") 149 | # --root value root directory for storage of container state (this should be located in tmpfs) (default: "/run/user/1000/runc") 150 | # --criu value path to the criu binary used for checkpoint and restore (default: "criu") 151 | # --systemd-cgroup enable systemd cgroup support, expects cgroupsPath to be of form "slice:prefix:name" for e.g. "system.slice:runc:434234" 152 | # --rootless value ignore cgroup permission errors ('true', 'false', or 'auto') (default: "auto") 153 | 154 | COMMAND_LINE=("$@") 155 | 156 | if debug; then 157 | log "Command line: $0 ${COMMAND_LINE[@]@Q}" 158 | fi 159 | 160 | while true 161 | do 162 | case "$1" in 163 | --debug|--systemd-cgroup) shift; continue; ;; 164 | --log|--log-format|--root|--criu|--rootless) shift; shift; continue; ;; 165 | --log=*|--log-format=*|--root=*|--criu=*|--rootless=*) shift; continue; ;; 166 | *) break; ;; 167 | esac 168 | done 169 | 170 | COMMAND="$1" 171 | shift 172 | 173 | if [ "$COMMAND" = "create" ]; then 174 | 175 | debug && log "Command: create" 176 | 177 | # USAGE: 178 | # runc create [command options] 179 | # 180 | # PARSE 'create' COMMAND OPTIONS 181 | # --bundle value, -b value path to the root of the bundle directory, defaults to the current directory 182 | # --console-socket value path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal 183 | # --pid-file value specify the file to write the process id to 184 | # --no-pivot do not use pivot root to jail process inside rootfs. This should be used whenever the rootfs is on top of a ramdisk 185 | # --no-new-keyring do not create a new session keyring for the container. This will cause the container to inherit the calling processes session key 186 | # --preserve-fds value Pass N additional file descriptors to the container (stdio + $LISTEN_FDS + N in total) (default: 0) 187 | while true 188 | do 189 | case "$1" in 190 | --bundle|-b) shift; BUNDLE="$1"; shift; continue; ;; 191 | --console-socket|--pid-file|--preserve-fds) shift; shift; continue; ;; 192 | --no-pivot|--no-new-keyring) shift; continue; ;; 193 | *) break; ;; 194 | esac 195 | done 196 | 197 | ID="$1" 198 | 199 | CFG="$BUNDLE/config.json" 200 | ROOT=$(jq -r .root.path $CFG) 201 | 202 | # Allow user to enable debug logging 203 | if [ "$(get_config_env RUNCVM_RUNTIME_DEBUG)" = "1" ]; then 204 | RUNCVM_DEBUG="1" 205 | fi 206 | 207 | if debug; then 208 | log "Command line: $0 ${COMMAND_LINE[@]@Q}" 209 | log "Command: create bundle=$BUNDLE id=$ID root=$ROOT" 210 | 211 | # Save formatted config.json 212 | jq -r . <$CFG >/tmp/config.json-$$-1 213 | 214 | fi 215 | 216 | # Pending support for user-specified mountpoint for the guest (VM) binaries and scripts 217 | set_config_env "RUNCVM_GUEST" "$RUNCVM_GUEST" 218 | 219 | ARG0=$(jq_get "$CFG" '.process.args[0]') 220 | # Now look in mounts for destination == $ARG0 (this works for Docker and Podman) 221 | if [ "$ARG0" = "/sbin/docker-init" ] || [ "$ARG0" = "/dev/init" ]; then 222 | 223 | # User intended an init process to be run in the container, 224 | # so arrange to run our own instead, that will launch the original entrypoint 225 | 226 | # Look for and remove a mountpoint for this process. 227 | jq_set "$CFG" --arg init "$ARG0" '(.mounts[] | select(.destination == $init)) |= empty' 228 | 229 | # Replace the first argument with our own entrypoint; and remove the second, '--' (for now, #TODO) 230 | jq_set "$CFG" --arg entrypoint "$RUNCVM_ENTRYPOINT" '.process.args[0] = $entrypoint | del(.process.args[1])' 231 | 232 | # We know the user intended an init process to be run in the container. 233 | # TODO: We might want to indicate this, so that our entrypoint does not skip doing this 234 | # if the original entrypoint also looks like an init process. 235 | set_config_env "RUNCVM_INIT" "1" 236 | else 237 | # We don't know if the original entrypoint is an init process or not. 238 | # Run our entrypoint first to work this out and do the right thing. 239 | 240 | jq_set "$CFG" --arg entrypoint "$RUNCVM_ENTRYPOINT" '.process.args |= [$entrypoint] + .' 241 | fi 242 | 243 | # SET RUNCVM_HAS_HOME 244 | # 245 | # If the HOME env var was not set either in the image, or via docker run, 246 | # then it will be missing in the config env. Detect this case for communication to runcvm-ctr-entrypoint 247 | # so that HOME can be set to the requested user's default homedir. 248 | # 249 | # - See runcvm-ctr-entrypoint for full details of how/why hasHome is needed and HOME gets set. 250 | if [ -n "$(get_config_env HOME)" ]; then 251 | set_config_env "RUNCVM_HAS_HOME" "1" 252 | else 253 | set_config_env "RUNCVM_HAS_HOME" "0" 254 | fi 255 | 256 | # CONFIGURE USER 257 | # - Must be root to run container 258 | RUNCVM_UIDGID=$(jq_get "$CFG" '(.process.user.uid | tostring) + ":" + (.process.user.gid | tostring) + ":" + ((.process.user.additionalGids // []) | join(","))') 259 | set_config_env "RUNCVM_UIDGID" "$RUNCVM_UIDGID" 260 | jq_set "$CFG" '.process.user = {"uid":0, "gid":0}' 261 | log "RUNCVM_UIDGID=$RUNCVM_UIDGID" 262 | 263 | # CONFIGURE CPUS 264 | RUNCVM_CPUS=$(( $(jq_get "$CFG" '.linux.resources.cpu.quota') / 100000)) 265 | set_config_env "RUNCVM_CPUS" "$RUNCVM_CPUS" 266 | log "RUNCVM_CPUS=$RUNCVM_CPUS" 267 | 268 | # CONFIGURE MOUNTS 269 | set_config_env "RUNCVM_VM_MOUNTPOINT" "$RUNCVM_VM_MOUNTPOINT" 270 | 271 | # First extract list of tmpfs mounts in fstab form, then delete them from the config 272 | RUNCVM_TMPFS=$(jq_get "$CFG" '( .mounts[] | select(.type == "tmpfs" and (.destination | test("^/dev(/|$)") | not) ) ) | [.source + " " + .destination + " tmpfs " + (.options | map(select(. != "rprivate" and . != "private")) | join(",")) + " 0 0"] | .[0]') 273 | jq_set "$CFG" -r 'del( .mounts[] | select(.type == "tmpfs" and (.destination | test("^/dev(/|$)") | not) ) )' 274 | set_config_env "RUNCVM_TMPFS" "$RUNCVM_TMPFS" 275 | 276 | # Rewrite all pre-existing bind/volume mounts (except those at or below /disks) to mount 277 | # below $RUNCVM_VM_MOUNTPOINT instead of below /. 278 | # 279 | # TODO TO CONSIDER: 280 | # If we excluded /etc/(resolv.conf,hosts,hostname), and moved these to top of the array 281 | # (by promoting them at the end of the below statements), they would be present in both 282 | # container and VM. 283 | # 284 | # N.B. A mount at or underneath /disks will NOT be mapped to /vm/disks - this path is reserved for mounting disk files to the container 285 | jq_set "$CFG" --arg vm "$RUNCVM_VM_MOUNTPOINT" '( .mounts[] | select(.type == "bind" and (.destination | test("^/disks(/|$)") | not) ) ).destination |= $vm + .' 286 | 287 | # Mount / from container to $RUNCVM_VM_MOUNTPOINT, recursively binding all pre-existing mount points 288 | # (these being only the ones defined ahead of this item in the mounts[] array - so order matters!) 289 | jq_set "$CFG" --arg root "$ROOT" --arg vm "$RUNCVM_VM_MOUNTPOINT" '.mounts |= [{"destination":$vm,"type":"bind","source":$root,"options":["rbind","private","rw"]}] + .' 290 | 291 | # Mount /opt/runcvm from host to container 292 | # Define this at top of mounts[] so it is recursively mounted 293 | # and before (but after in the mounts[] array) /.runcvm so it can be mounted inside it 294 | jq_set "$CFG" --arg runcvm "$RUNCVM" --arg runcvm_guest "$RUNCVM_GUEST" '.mounts |= [{"destination":$runcvm_guest,"type":"bind","source":$runcvm,"options":["bind","private","ro"]}] + .' 295 | 296 | # Mount a tmpfs at /.runcvm in container 297 | # Define this at top of mounts[] so it is recursively mounted 298 | jq_set "$CFG" '.mounts |= [{"destination":"/.runcvm","type":"tmpfs","source":"runcvm","options":["nosuid","noexec","nodev","size=1M","mode=700"]}] + .' 299 | 300 | # Mount a tmpfs at /run in container 301 | # Define this at bottom of mounts[] so it is not recursively mounted to /vm 302 | jq_set "$CFG" '.mounts += [{"destination":"/run","type":"tmpfs","source":"run","options":["nosuid","noexec","nodev","size=1M","mode=700"]}]' 303 | 304 | # DETERMINE LAUNCH KERNEL: 305 | # 306 | # 1. If RUNCVM_KERNEL specified: 307 | # - or /latest - use latest RUNCVM kernel available for this dist *and* ARGS 308 | # - / - use specific RUNCVM kernel version for this dist *and* ARGS 309 | # 2. Else, check /etc/os-release and: 310 | # a. Use builtin kernel for this dist (if present in the expected location) *and* ARGS 311 | # b. Use latest RUNCVM kernel available for the dist: 312 | # - ID=alpine, VERSION_ID=3.16.0 => alpine/latest 313 | # - ID=debian, VERSION_ID=11 => debian/latest 314 | # - ID=ubuntu, VERSION_ID=22.04 => ubuntu/latest 315 | 316 | # Look for RUNCVM_KERNEL env var 317 | RUNCVM_KERNEL=$(get_config_env 'RUNCVM_KERNEL') 318 | log "RUNCVM_KERNEL='$RUNCVM_KERNEL' (1)" 319 | 320 | # Generate: 321 | # - RUNCVM_KERNEL_ID: the distro name (e.g. alpine, debian, ubuntu) 322 | # - RUNCVM_KERNEL_IDVER: the distro name and kernel version (e.g. alpine/5.15.59-0-virt, debian/5.10.0-16-amd64) 323 | 324 | if [ -n "$RUNCVM_KERNEL" ]; then 325 | # If found, validate 326 | 327 | if [[ "$RUNCVM_KERNEL" =~ \.\. ]]; then 328 | error "Kernel '$RUNCVM_KERNEL' invalid (contains '..')" 329 | fi 330 | 331 | if ! [[ "$RUNCVM_KERNEL" =~ ^[a-z]+(/[^/]+)?$ ]]; then 332 | error "Kernel '$RUNCVM_KERNEL' invalid (should match ^[a-z]+(/[^/]+)?$)" 333 | fi 334 | 335 | if ! [ -d "$RUNCVM_KERNELS/$RUNCVM_KERNEL" ]; then 336 | error "Kernel '$RUNCVM_KERNEL' not found (check $RUNCVM_KERNELS)" 337 | fi 338 | 339 | # If RUNCVM_KERNEL is a distro name only, append /latest 340 | if [[ "$RUNCVM_KERNEL" =~ ^[a-z]+$ ]]; then 341 | RUNCVM_KERNEL_IDVER="$RUNCVM_KERNEL/latest" 342 | else 343 | RUNCVM_KERNEL_IDVER="$RUNCVM_KERNEL" 344 | fi 345 | 346 | RUNCVM_KERNEL_ID=$(dirname "$RUNCVM_KERNEL_IDVER") # Returns e.g. alpine, debian, ubuntu 347 | 348 | else 349 | # If not found, look for value from /etc/os-release in the container image 350 | 351 | RUNCVM_KERNEL_ID=$(load_env_from_file "$ROOT/etc/os-release" "ID") 352 | 353 | # Currently unused 354 | # RUNCVM_KERNEL_OS_VERSION_ID=$(load_var_from_env "$ROOT/etc/os-release" "VERSION_ID") 355 | 356 | # If still not found, assign a default 357 | if [ -z "$RUNCVM_KERNEL_ID" ]; then 358 | RUNCVM_KERNEL_ID="${RUNCVM_KERNEL_DEFAULT:-debian}" 359 | fi 360 | 361 | RUNCVM_KERNEL_IDVER="$RUNCVM_KERNEL_ID/latest" 362 | fi 363 | 364 | log "RUNCVM_KERNEL='$RUNCVM_KERNEL' (2)" 365 | log "RUNCVM_KERNEL_ID='$RUNCVM_KERNEL_ID'" 366 | log "RUNCVM_KERNEL_IDVER='$RUNCVM_KERNEL_IDVER'" 367 | 368 | # Now look up the default kernel and initramfs paths and args for this kernel 369 | case "$RUNCVM_KERNEL_ID" in 370 | debian) RUNCVM_KERNEL_OS_KERNEL_PATH="/vmlinuz" 371 | RUNCVM_KERNEL_OS_INITRAMFS_PATH="/initrd.img" 372 | RUNCVM_KERNEL_ROOT="rootfstype=virtiofs root=runcvmfs noresume nomodeset net.ifnames=1" 373 | ;; 374 | ubuntu) RUNCVM_KERNEL_OS_KERNEL_PATH="/boot/vmlinuz" 375 | RUNCVM_KERNEL_OS_INITRAMFS_PATH="/boot/initrd.img" 376 | RUNCVM_KERNEL_ROOT="rootfstype=virtiofs root=runcvmfs noresume nomodeset net.ifnames=1" 377 | ;; 378 | ol) RUNCVM_KERNEL_OS_KERNEL_PATH="/boot/vmlinuz" 379 | RUNCVM_KERNEL_OS_INITRAMFS_PATH="/boot/initramfs" 380 | RUNCVM_KERNEL_ROOT="root=virtiofs:runcvmfs noresume nomodeset net.ifnames=1" 381 | ;; 382 | alpine|openwrt) RUNCVM_KERNEL_OS_KERNEL_PATH="/boot/vmlinuz-virt" 383 | RUNCVM_KERNEL_OS_INITRAMFS_PATH="/boot/initramfs-virt" 384 | RUNCVM_KERNEL_ROOT="rootfstype=virtiofs root=runcvmfs resume= nomodeset" 385 | ;; 386 | 387 | *) error "Unrecognised image O/S '$RUNCVM_KERNEL'; specify --env=RUNCVM_KERNEL= or --env=RUNCVM_KERNEL=/"; ;; 388 | esac 389 | 390 | # If no RUNCVM_KERNEL specified, look for a kernel and initramfs at the expected paths in the container image. 391 | if [[ -z "$RUNCVM_KERNEL" && -f "$ROOT/$RUNCVM_KERNEL_OS_KERNEL_PATH" && -f "$ROOT/$RUNCVM_KERNEL_OS_INITRAMFS_PATH" ]]; then 392 | RUNCVM_KERNEL_PATH="$RUNCVM_KERNEL_OS_KERNEL_PATH" 393 | RUNCVM_KERNEL_INITRAMFS_PATH="$RUNCVM_KERNEL_OS_INITRAMFS_PATH" 394 | else 395 | # If RUNCVM_KERNEL was specified, or we didn't find a kernel and initramfs at the expected paths in the container image, 396 | # select the latest RUNCVM kernel version and arrange to mount it. 397 | 398 | RUNCVM_KERNEL_VERSION=$(basename $(readlink -f "$RUNCVM_KERNELS/$RUNCVM_KERNEL_IDVER")) # Returns e.g. 5.15.53-0-virt 399 | 400 | RUNCVM_KERNEL_MOUNT_LIB_MODULES=$(get_config_env 'RUNCVM_KERNEL_MOUNT_LIB_MODULES') 401 | if [ -n "$RUNCVM_KERNEL_MOUNT_LIB_MODULES" ]; then 402 | RUNCVM_KERNEL_MODULES_SRC="$RUNCVM_KERNELS/$RUNCVM_KERNEL_ID/$RUNCVM_KERNEL_VERSION/modules" 403 | RUNCVM_KERNEL_MODULES_DST="/lib/modules" 404 | else 405 | RUNCVM_KERNEL_MODULES_SRC="$RUNCVM_KERNELS/$RUNCVM_KERNEL_ID/$RUNCVM_KERNEL_VERSION/modules/$RUNCVM_KERNEL_VERSION" 406 | RUNCVM_KERNEL_MODULES_DST="/lib/modules/$RUNCVM_KERNEL_VERSION" 407 | fi 408 | 409 | RUNCVM_KERNEL_PATH="$RUNCVM_GUEST_KERNELS/$RUNCVM_KERNEL_ID/$RUNCVM_KERNEL_VERSION/vmlinuz" 410 | RUNCVM_KERNEL_INITRAMFS_PATH="$RUNCVM_GUEST_KERNELS/$RUNCVM_KERNEL_ID/$RUNCVM_KERNEL_VERSION/initrd" 411 | 412 | jq_set "$CFG" --arg modules_dst "$RUNCVM_VM_MOUNTPOINT$RUNCVM_KERNEL_MODULES_DST" --arg modules_src "$RUNCVM_KERNEL_MODULES_SRC" '.mounts += [{"destination":$modules_dst,"type":"bind","source":$modules_src,"options":["bind","private","ro"]}]' 413 | fi 414 | 415 | log "RUNCVM_KERNEL='$RUNCVM_KERNEL'" 416 | log "RUNCVM_KERNEL_ID='$RUNCVM_KERNEL_ID'" 417 | log "RUNCVM_KERNEL_VERSION='$RUNCVM_KERNEL_VERSION'" 418 | log "RUNCVM_KERNEL_OS_KERNEL_PATH='$RUNCVM_KERNEL_OS_KERNEL_PATH'" 419 | log "RUNCVM_KERNEL_OS_INITRAMFS_PATH='$RUNCVM_KERNEL_OS_INITRAMFS_PATH'" 420 | log "RUNCVM_KERNEL_PATH='$RUNCVM_KERNEL_PATH'" 421 | log "RUNCVM_KERNEL_INITRAMFS_PATH='$RUNCVM_KERNEL_INITRAMFS_PATH'" 422 | log "RUNCVM_KERNEL_ROOT='$RUNCVM_KERNEL_ROOT'" 423 | log "RUNCVM_KERNEL_MODULES_SRC='$RUNCVM_KERNEL_MODULES_SRC'" 424 | log "RUNCVM_KERNEL_MODULES_DST='$RUNCVM_KERNEL_MODULES_DST'" 425 | 426 | set_config_env "RUNCVM_KERNEL_PATH" "$RUNCVM_KERNEL_PATH" 427 | set_config_env "RUNCVM_KERNEL_INITRAMFS_PATH" "$RUNCVM_KERNEL_INITRAMFS_PATH" 428 | set_config_env "RUNCVM_KERNEL_ROOT" "$RUNCVM_KERNEL_ROOT" 429 | 430 | # Configure devices 431 | jq_set "$CFG" '.linux.resources.devices += [{"allow":true,"type":"c","major":10,"minor":232,"access":"rwm"},{"allow":true,"type":"c","major":10,"minor":200,"access":"rwm"}]' 432 | jq_set "$CFG" '.linux.devices+=[{"path":"/dev/net/tun","type":"c","major":10,"minor":200,"fileMode":8630,"uid":0,"gid":0},{"path":"/dev/kvm","type":"c","major":10,"minor":232,"fileMode":8630,"uid":0,"gid":0}]' 433 | 434 | # For now, hardcode --security-opt=seccomp=unconfined; 435 | # later, we can work out the minimal seccomp permissions required. 436 | jq_set "$CFG" '.linux.seccomp |= empty' 437 | 438 | # CONFIGURE MEMORY 439 | # Set /dev/shm to RUNCVM_MEM_SIZE env var, or to default 440 | # - it should be large enough to support VM memory 441 | RUNCVM_MEM_LIMIT=$(jq_get "$CFG" '.linux.resources.memory.limit') 442 | log "RUNCVM_MEM_LIMIT=$RUNCVM_MEM_LIMIT" 443 | if [ "$RUNCVM_MEM_LIMIT" != "null" ]; then 444 | RUNCVM_MEM_SIZE="$(( $RUNCVM_MEM_LIMIT/1024/1024 ))M" 445 | else 446 | RUNCVM_MEM_SIZE="$RUNCVM_MEM_SIZE_DEFAULT" 447 | fi 448 | log "RUNCVM_MEM_SIZE=$RUNCVM_MEM_SIZE" 449 | set_config_env "RUNCVM_MEM_SIZE" "$RUNCVM_MEM_SIZE" 450 | 451 | RUNCVM_HUGETLB=$(get_config_env "RUNCVM_HUGETLB") 452 | if [ "$RUNCVM_HUGETLB" != "1" ]; then 453 | jq_set "$CFG" --arg size "$RUNCVM_MEM_SIZE" '( .mounts[] | select(.destination == "/dev/shm") ) = {"destination": "/dev/shm","type": "tmpfs","source": "shm","options": ["nosuid","noexec","nodev","mode=1777","size=" + $size]}' 454 | # else 455 | # --shm-size applies; default 64m. 456 | fi 457 | 458 | # In future, set the container memory limit to something reasonable to support 459 | # QEMU + virtiofsd + dnsmasq. Perhaps $RUNCVM_MEM_LIMIT+K (or vice-versa, reduce 460 | # RUNCVM_MEM_SIZE by K), where K is the memory requirement for the container's processes 461 | # over and above QEMU. 462 | # jq_set "$CFG" --arg size $(($RUNCVM_MEM_LIMIT + )) '.linux.resources.memory.limit |= ($size | tonumber)' 463 | 464 | # Add non-default capabilities needed by: 465 | # - Docker: CAP_NET_ADMIN 466 | # - Podman: CAP_NET_ADMIN, CAP_NET_RAW, CAP_MKNOD, CAP_AUDIT_WRITE 467 | for field in bounding effective permitted 468 | do 469 | jq_set "$CFG" --arg field "bounding" '.process.capabilities[$field] |= (.+ ["CAP_NET_ADMIN","CAP_NET_RAW","CAP_MKNOD","CAP_AUDIT_WRITE"] | unique)' 470 | done 471 | 472 | # Filter for RUNCVM_SYS_ADMIN=1 473 | RUNCVM_SYS_ADMIN=$(get_config_env "RUNCVM_SYS_ADMIN") 474 | if [ "$RUNCVM_SYS_ADMIN" = "1" ]; then 475 | # TODO use 'unique' 476 | jq_set "$CFG" '.process.capabilities.bounding += ["CAP_SYS_ADMIN"] | .process.capabilities.effective += ["CAP_SYS_ADMIN"] | .process.capabilities.permitted += ["CAP_SYS_ADMIN"]' 477 | fi 478 | 479 | debug && cp -a $CFG /tmp/config.json-$$-2 480 | 481 | elif [ "$COMMAND" = "exec" ]; then 482 | 483 | debug && log "Command: exec" 484 | 485 | # USAGE: 486 | # runc exec [command options] [command options] || -p process.json 487 | # 488 | # PARSE 'exec' COMMAND OPTIONS 489 | # --console-socket value path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal 490 | # --cwd value current working directory in the container 491 | # --env value, -e value set environment variables 492 | # --tty, -t allocate a pseudo-TTY 493 | # --user value, -u value UID (format: [:]) 494 | # --additional-gids value, -g value additional gids 495 | # --process value, -p value path to the process.json 496 | # --detach, -d detach from the container's process 497 | # --pid-file value specify the file to write the process id to 498 | # --process-label value set the asm process label for the process commonly used with selinux 499 | # --apparmor value set the apparmor profile for the process 500 | # --no-new-privs set the no new privileges value for the process 501 | # --cap value, -c value add a capability to the bounding set for the process 502 | # --preserve-fds value Pass N additional file descriptors to the container (stdio + $LISTEN_FDS + N in total) (default: 0) 503 | # --cgroup value run the process in an (existing) sub-cgroup(s). Format is [:]. 504 | # --ignore-paused allow exec in a paused container 505 | while true 506 | do 507 | case "$1" in 508 | --console-socket|--cwd|--env|-e|--user|-u|--additional-gids|-g|--pid-file|--process-label|--apparmor|--cap|-c|--preserve-fds|--cgroup) shift; shift; continue; ;; 509 | --tty|-t|--detach|-d|--no-new-privs|--ignore-paused) shift; continue; ;; 510 | --process|-p) shift; PROCESS="$1"; continue; ;; 511 | *) break; ;; 512 | esac 513 | done 514 | 515 | # Allow user to enable debug logging 516 | if [ "$(get_process_env "$PROCESS" 'RUNCVM_RUNTIME_DEBUG' '0')" = "1" ]; then 517 | RUNCVM_DEBUG="1" 518 | fi 519 | 520 | if debug; then 521 | log "Command line: $0 ${COMMAND_LINE[@]@Q}" 522 | log "Command: exec process=$PROCESS" 523 | 524 | # Save formatted process.json 525 | jq -r . <$PROCESS >/tmp/process.json-$$-1 526 | fi 527 | 528 | ARG1=$(jq_get "$PROCESS" '.args[0]') 529 | if [ "$ARG1" = "---" ]; then 530 | jq_set "$PROCESS" 'del(.args[0])' 531 | else 532 | uidgid=$(jq_get "$PROCESS" '(.user.uid | tostring) + ":" + (.user.gid | tostring) + ":" + ((.user.additionalGids // []) | join(","))') 533 | cwd=$(jq_get "$PROCESS" '.cwd') 534 | hasHome=$(get_process_env_boolean "$PROCESS" 'HOME') 535 | wantsTerminal=$(jq_get "$PROCESS" '.terminal') 536 | 537 | jq_set "$PROCESS" \ 538 | --arg exec "$RUNCVM_EXEC" \ 539 | --arg uidgid "$uidgid" \ 540 | --arg cwd "$cwd" \ 541 | --arg hasHome "$hasHome" \ 542 | --arg wantsTerminal "$wantsTerminal" \ 543 | '.args |= [$exec, $uidgid, $cwd, $hasHome, $wantsTerminal] + .' 544 | 545 | # Force root (or whatever user qemu runs as) 546 | # Force cwd in the container to / 547 | jq_set "$PROCESS" '.user = {"uid":0, "gid":0} | .cwd="/"' 548 | fi 549 | 550 | debug && cp -a $PROCESS /tmp/process.json-$$-2 551 | fi 552 | 553 | debug && log "--- LOG ENDS ---" 554 | 555 | exec /usr/bin/runc "${COMMAND_LINE[@]}" 556 | -------------------------------------------------------------------------------- /runcvm-scripts/runcvm-vm-exec: -------------------------------------------------------------------------------- 1 | #!/.runcvm/guest/bin/bash 2 | 3 | from_bin() { 4 | tr '\200\201\202\203\204\205' "\011\012\040\047\042\134" 5 | } 6 | 7 | error() { 8 | echo "OCI runtime exec failed: exec failed: unable to start container process: chdir to cwd (\"$cwd\") set in config.json failed: no such file or directory: unknown" 9 | exit 126 10 | } 11 | 12 | uidgid="$1" 13 | cwd_bin="$2" 14 | shift 2 15 | 16 | IFS=':' read -r uid gid additionalGids <<< "$uidgid" 17 | 18 | args_bin="$1" 19 | env_bin="$2" 20 | 21 | mapfile -t args < <(echo -n "$args_bin" | from_bin) 22 | mapfile -t env < <(echo -n "$env_bin" | from_bin) 23 | cwd=$(echo -n "$cwd_bin" | from_bin) 24 | 25 | cd "$cwd" 2>/dev/null && unset OLDPWD || error 26 | 27 | # Load original environment 28 | . /.runcvm/config 29 | 30 | # Load defaults and aliases 31 | . $RUNCVM_GUEST/scripts/runcvm-ctr-defaults 32 | 33 | exec -c $RUNCVM_GUEST/bin/busybox env -i "${env[@]}" $RUNCVM_GUEST/bin/s6-applyuidgid -u $uid -g $gid -G "$additionalGids" "${args[@]}" -------------------------------------------------------------------------------- /runcvm-scripts/runcvm-vm-init: -------------------------------------------------------------------------------- 1 | #!/.runcvm/guest/bin/bash -e 2 | 3 | # Load original environment 4 | . /.runcvm/config 5 | 6 | # Load defaults and aliases 7 | . $RUNCVM_GUEST/scripts/runcvm-ctr-defaults 8 | 9 | # Alpine initrd doesn't honour command-line rw flag 10 | mount -o remount,rw / 11 | 12 | # FIXME: Something is making /.runcvm ro, so remount it rw 13 | # until such time as exit code handling and dropbear key creation 14 | # obviate the need for this. 15 | mount -o remount,rw /.runcvm 16 | 17 | # Alpine initrd doesn't configure /dev device permissions and ownership 18 | # to support non-root users. 19 | if [ "$(findmnt -rnu -o FSTYPE /dev)" = "devtmpfs" ]; then 20 | [ -e /dev/stdin ] || ln -snf /proc/self/fd/0 /dev/stdin 21 | [ -e /dev/stdout ] || ln -snf /proc/self/fd/1 /dev/stdout 22 | [ -e /dev/stderr ] || ln -snf /proc/self/fd/2 /dev/stderr 23 | [ -e /proc/kcore ] && ln -snf /proc/kcore /dev/core 24 | [ -h /dev/ptmx ] || ln -snf pts/ptmx /dev/ptmx 25 | chmod 666 /dev/null /dev/random /dev/urandom /dev/zero /dev/tty /dev/pts/ptmx 26 | chmod 620 /dev/tty[0-9]* 27 | chgrp tty /dev/tty* 28 | fi 29 | 30 | # Unmount /run if it is a tmpfs (not a virtiofs) mounted by the initramfs 31 | # /run may be populated in the underlying image, and may also be a volume or be bind-mounted, 32 | # and its contents should be accessible in these cases. 33 | if [ "$(findmnt -rnu -o FSTYPE /run)" = "tmpfs" ]; then 34 | busybox umount -fl /run 35 | fi 36 | 37 | # FIXME: virtiofs mounts aren't always made rw. Remount them all rw (if allowed) 38 | # $RUNCVM_GUEST/bin/mount -t virtiofs | awk '{print $3}' | xargs -n 1 mount -o remount,rw 39 | 40 | # Some systems do not set up /dev/fd. If needed, add it. 41 | if ! [ -h /dev/fd ]; then 42 | ln -s /proc/self/fd /dev/fd 43 | fi 44 | 45 | # FIXME: This must be run early enough, otherwise other interfaces like docker0 might have started 46 | IF=$(ls /sys/class/net/ | grep -vE '^(lo|docker)' | head -n 1) 47 | 48 | # https://bugzilla.redhat.com/show_bug.cgi?id=501934 49 | for i in all $IF 50 | do 51 | # /sbin/sysctl -q -w -e net.ipv6.conf.$i.disable_ipv6=1 net.ipv6.conf.$i.autoconf=0 net.ipv6.conf.$i.accept_ra=0 52 | sysctl -q -w -e net.ipv6.conf.$i.disable_ipv6=1 net.ipv6.conf.$i.autoconf=0 || true 53 | done 54 | 55 | # Bring up local interface 56 | ip link set lo up 57 | 58 | # Identify each interface by MAC address, then give each a temporary name 59 | # (as we might ultimately need to rename e.g. eth0->eth1 and eth1->eth0). 60 | for ifpath in /.runcvm/network/devices/* 61 | do 62 | if=$(busybox basename "$ifpath") 63 | 64 | [ "$if" = "default" ] && continue 65 | 66 | load_network "$if" 67 | 68 | # Locate the actual network device by its MAC address. 69 | mac=$(busybox sed -r 's/^..:..:../52:54:00/' <<<$DOCKER_IF_MAC) 70 | device=$(ip -json link show | jq -r --arg mac "$mac" '.[] | select(.address == $mac) | .ifname') 71 | 72 | ip link set $device name $DOCKER_IF-tmp 73 | done 74 | 75 | # Configure, rename and bring up all interfaces. 76 | for ifpath in /.runcvm/network/devices/* 77 | do 78 | if=$(busybox basename "$ifpath") 79 | 80 | [ "$if" = "default" ] && continue 81 | 82 | load_network "$if" 83 | 84 | ip link set $DOCKER_IF-tmp name $DOCKER_IF 85 | ip addr add $DOCKER_IF_IP/$DOCKER_IF_IP_NETPREFIX broadcast + dev $DOCKER_IF 86 | ip link set $DOCKER_IF up mtu "${DOCKER_IF_MTU:=1500}" 87 | 88 | # If this is the default gateway interface, establish the default gateway 89 | [ -n "$DOCKER_IF_IP_GW" ] && ip route add default via $DOCKER_IF_IP_GW 90 | done 91 | 92 | # Read and install any supplementary routes. 93 | while read -r DOCKER_RT_NET DOCKER_RT_GW DOCKER_RT_DEV DOCKER_RT_PREFSRC 94 | do 95 | [ -n "$DOCKER_RT_NET" ] && [ -n "$DOCKER_RT_GW" ] && [ -n "$DOCKER_RT_DEV" ] && \ 96 | ip route add "$DOCKER_RT_NET" via "$DOCKER_RT_GW" dev "$DOCKER_RT_DEV" || true 97 | done /dev/null | grep ^ssh | cut -d' ' -f2) 142 | 143 | # Create json for dropbear EPKA module 144 | cat <<_EOE_ >/.runcvm/dropbear/epka.json && chmod 400 /.runcvm/dropbear/epka.json 145 | [ 146 | { 147 | "user": "root", 148 | "keytype": "ssh-rsa", 149 | "key": "$KEY_PUBLIC", 150 | "options":"no-X11-forwarding", 151 | "comments": "" 152 | } 153 | ] 154 | _EOE_ 155 | 156 | # Load choice of console device 157 | read -r CONSOLE_DEVICE /etc/inittab <<_EOE_ 168 | $CONSOLE_DEVICE::respawn:-$RUNCVM_GUEST/scripts/runcvm-vm-start-wrapper 169 | null::respawn:$RUNCVM_GUEST/scripts/runcvm-vm-qemu-ga 170 | null::respawn:$RUNCVM_GUEST/usr/sbin/dropbear -REF -p $SSHD_PORT -A $RUNCVM_GUEST/tmp/dropbear/libepka_file.so,/.runcvm/dropbear/epka.json -P /.runcvm/dropbear/dropbear.pid 171 | null::ctrlaltdel:$RUNCVM_GUEST/bin/poweroff 172 | null::restart:$RUNCVM_GUEST/bin/poweroff 173 | null::shutdown:$RUNCVM_GUEST/bin/poweroff 174 | _EOE_ 175 | 176 | # Allow runcvm-vm-start to run once (and only once) 177 | rm -f /.runcvm/once 178 | 179 | # Clear the environment, and run our own init, disconnecting stdout and stderr from terminal 180 | exec -c $RUNCVM_GUEST/bin/init &>/dev/null 181 | else 182 | # If not, assume the user knows what they're doing: launch qemu-ga and just run their entrypoint. 183 | 184 | # Clean RUNCVM env vars 185 | clean_env 186 | 187 | # Run the qemu guest agent, needed to support future functionality 188 | $RUNCVM/scripts/runcvm-vm-qemu-ga &>/dev/null & 189 | 190 | # Run dropbear SSH server, needed to support 'docker exec' 191 | dropbear -REF -p $SSHD_PORT -A $RUNCVM_GUEST/tmp/dropbear/libepka_file.so,/.runcvm/dropbear/epka.json -P /.runcvm/dropbear/dropbear.pid &>/dev/null & 192 | 193 | # Run init from the image 194 | # Pipe input/output from/to console device 195 | exec /dev/$CONSOLE_DEVICE 196 | 197 | # Invoke runcvm-init with --no-fork purely to create controlling tty, 198 | # then exec runcvm-vm-start 199 | exec -c $RUNCVM_GUEST/sbin/runcvm-init --no-fork $RUNCVM_GUEST/scripts/runcvm-vm-start 200 | fi 201 | -------------------------------------------------------------------------------- /runcvm-scripts/runcvm-vm-qemu-ga: -------------------------------------------------------------------------------- 1 | #!/.runcvm/guest/bin/bash 2 | 3 | # Load config 4 | . /.runcvm/config 5 | 6 | # Load defaults and aliases 7 | . $RUNCVM_GUEST/scripts/runcvm-ctr-defaults 8 | 9 | OPTS=(--retry-path --statedir /.runcvm) 10 | 11 | if [ -f "/dev/virtio-ports/org.qemu.guest_agent.0" ]; then 12 | DEV="/dev/virtio-ports/org.qemu.guest_agent.0" 13 | else 14 | DEV=$(ls /dev/vport* | head -n 1) 15 | 16 | if [ -n "$DEV" ] && [ -c "$DEV" ]; then 17 | OPTS+=(-p "$DEV") 18 | fi 19 | fi 20 | 21 | if [ -z "$DEV" ]; then 22 | exit 0 23 | fi 24 | 25 | exec -c "$(which qemu-ga)" "${OPTS[@]}" 26 | -------------------------------------------------------------------------------- /runcvm-scripts/runcvm-vm-start: -------------------------------------------------------------------------------- 1 | #!/.runcvm/guest/bin/bash 2 | 3 | # Load original environment 4 | . /.runcvm/config 5 | 6 | # Load defaults and aliases 7 | . $RUNCVM_GUEST/scripts/runcvm-ctr-defaults 8 | 9 | if [ -f /.runcvm/once ]; then 10 | poweroff 11 | exit 0 12 | else 13 | touch /.runcvm/once 14 | fi 15 | 16 | # Change to saved PWD 17 | cd $(cat /.runcvm/pwd) && unset OLDPWD 18 | 19 | # Reload original environment 20 | . /.runcvm/config 21 | 22 | # Load original entrypoint 23 | mapfile -t ARGS /dev/null) 49 | [ -n "$IP" ] && break 50 | [ $i -eq 1 ] && log "Ingress IP detection for this node failed!" && exit 1 51 | sleep 0.5 52 | done 53 | 54 | echo -n "$IP" >$FILE 55 | } 56 | 57 | cgroupfs_mount 58 | 59 | ulimit -u unlimited 60 | 61 | modprobe ip_vs 62 | 63 | h=$(hostname) 64 | 65 | log "Checking network ..." 66 | read -r DOCKER_IF DOCKER_IF_GW \ 67 | <<< $(ip -json route show | jq -j '.[] | select(.dst == "default") | .dev, " ", .gateway') 68 | 69 | read -r DOCKER_IF_IP DOCKER_IF_MTU <<< \ 70 | $(ip -json addr show eth0 | jq -j '.[0] | .addr_info[0].local, " ", .mtu') 71 | 72 | log "- DOCKER_IF=$DOCKER_IF DOCKER_IF_IP=$DOCKER_IF_IP DOCKER_IF_GW=$DOCKER_IF_GW DOCKER_IF_MTU=$DOCKER_IF_MTU" 73 | 74 | # Start dockerd and keep it running 75 | DOCKER_OPTS=(--mtu=$DOCKER_IF_MTU) 76 | DOCKER_OPTS+=(--add-runtime runcvm=/opt/runcvm/scripts/runcvm-runtime) 77 | 78 | if [ -n "$REGISTRY_MIRROR" ]; then 79 | # Replace localhost with custom network gateway, if desired to reach registry running on host network 80 | DOCKER_OPTS+=(--registry-mirror=$(sed "s|/localhost\b|/$DOCKER_IF_GW|" <<< $REGISTRY_MIRROR)) 81 | fi 82 | 83 | log "Launching 'dockerd ${DOCKER_OPTS[*]}' ..." 84 | while true; do dockerd "${DOCKER_OPTS[@]}" >>/var/log/dockerd.log 2>&1; done & 85 | 86 | for i in $(seq 1 10 | sort -nr) 87 | do 88 | log "Waiting for dockerd to start (#$i) ..." 89 | docker ps >/dev/null 2>1 && break 90 | [ $i -eq 1 ] && exit 1 91 | sleep 0.5 92 | done 93 | 94 | log "dockerd started" 95 | 96 | docker info 97 | 98 | node_state 99 | log "docker swarm: node state = $NodeState; manager=$IsManager" 100 | 101 | log "Creating docker_gwbridge network with MTU $DOCKER_IF_MTU" 102 | docker network create -d bridge \ 103 | --subnet 172.18.0.0/16 \ 104 | --opt com.docker.network.bridge.name=docker_gwbridge \ 105 | --opt com.docker.network.bridge.enable_icc=false \ 106 | --opt com.docker.network.bridge.enable_ip_masquerade=true \ 107 | --opt com.docker.network.driver.mtu=$DOCKER_IF_MTU \ 108 | docker_gwbridge 109 | 110 | if [ "$NodeState" = "inactive" ] || [ "$NodeState" = "pending" ]; then 111 | 112 | if [ "$NODE" != "1" ]; then 113 | 114 | for i in $(seq 1 20 | sort -nr) 115 | do 116 | log "Waiting for swarm manager startup (#$i) ..." 117 | [ -f /swarm/worker ] && break 118 | [ $i -eq 1 ] && exit 1 119 | sleep 1 120 | done 121 | 122 | log "Swarm manager has started up." 123 | for i in $(seq 1 20 | sort -nr) 124 | do 125 | log "Joining swarm (#$i) ..." 126 | . /swarm/worker && break 127 | [ $i -eq 1 ] && exit 1 128 | sleep 0.5 129 | done 130 | 131 | log "Joined swarm!" 132 | 133 | else 134 | 135 | log "Initialising swarm ..." 136 | if ! docker swarm init >/dev/null; then 137 | log "Swarm initialisation FAILED!" 138 | exit 1 139 | fi 140 | 141 | log "Swarm initialised!" 142 | 143 | if [ -n "$MTU" ] && [ "$MTU" -gt 0 ]; then 144 | 145 | log "Removing default ingress ..." 146 | echo y | docker network rm ingress 147 | 148 | log "Waiting 3s for ingress removal ..." 149 | sleep 3 150 | 151 | log "Creating new ingress with MTU $DOCKER_IF_MTU" 152 | docker network create \ 153 | --driver=overlay \ 154 | --ingress \ 155 | --subnet=10.0.0.0/24 \ 156 | --gateway=10.0.0.1 \ 157 | --opt com.docker.network.driver.mtu=$DOCKER_IF_MTU \ 158 | ingress 159 | fi 160 | 161 | log "Writing swarm 'join token' to shared storage and waiting for other nodes ..." 162 | mkdir -p /swarm/nodes && docker swarm join-token worker | grep docker >/swarm/worker 163 | 164 | for i in $(seq 1 30 | sort -nr) 165 | do 166 | nodes=$(docker node ls --format '{{json .}}' | wc -l) 167 | log "Waiting for remaining $((NODES-nodes)) of $NODES nodes to join swarm (#$i) ..." 168 | [ $nodes -eq $NODES ] && break 169 | [ $i -eq 1 ] && log "Swarm failed!" && exit 1 170 | sleep 1 171 | done 172 | 173 | log "Swarm nodes started:" 174 | docker node ls 175 | echo 176 | 177 | fi 178 | 179 | log "Log memory consumption ..." 180 | free 181 | 182 | # Log this trigger line last BUT before (optionally) running DIRD. 183 | # This is because the test script waits for this line to appear before proceeding to launch the service. 184 | # We log multiple times to work around a minor bug whereby the test script sometimes fails to react to the first log line alone. 185 | for i in $(seq 1 5); do log "Swarm complete!"; sleep 0.25; done 186 | 187 | # Optionally run DIRD. 188 | # Do this after logging "Swarm complete" so that test script proceeds to launch the service; 189 | # as, until the service is launched, the nodes' ingress network IPs will not yet be defined. 190 | if [ "$DIRD" = "1" ]; then 191 | 192 | log "Detecting node ingress network IP ..." 193 | detect_ingress_ip /swarm/nodes/$NODE 194 | log "Detected node ingress network IP '$(cat /swarm/nodes/$NODE)'" 195 | 196 | log "Waiting for all nodes' ingress network IPs ..." 197 | for i in $(seq 1 30 | sort -nr) 198 | do 199 | [ $(ls /swarm/nodes/ | wc -l) -eq $NODES ] && break 200 | [ $i -eq 1 ] && log "Ingress IP detection for all nodes failed!" && exit 1 201 | sleep 0.5 202 | done 203 | 204 | for n in $(ls /swarm/nodes) 205 | do 206 | IPs+="$(cat /swarm/nodes/$n)," 207 | echo "$n: '$(cat /swarm/nodes/$n)'" 208 | done 209 | 210 | IPs=$(echo $IPs | sed 's/,$//') 211 | 212 | log "Running docker-ingress-routing-daemon --preexisting --ingress-gateway-ips $IPs --install ..." 213 | while true; do /usr/local/bin/docker-ingress-routing-daemon --preexisting --iptables-wait-seconds 3 --ingress-gateway-ips "$IPs" --install; sleep 1; done & 214 | 215 | fi 216 | 217 | fi 218 | 219 | node_state 220 | if [ "$NodeState" = "active" ] && [ "$IsManager" = "true" ]; then 221 | log "Manager ready" 222 | fi 223 | 224 | log "Looping indefinitely ..." 225 | while true; do sleep infinity; done 226 | -------------------------------------------------------------------------------- /tests/00-http-docker-swarm/test: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | # Load framework functions 4 | . ../framework.sh 5 | 6 | # TEST VARIABLES 7 | NODE=00-http-docker-swarm-node 8 | 9 | # Number of nodes 10 | NODES=${NODES:-3} 11 | 12 | # Network MTU to deploy in Docker network, RunCVM container VM nodes, and on Docker and swarm ingress network running on those nodes. 13 | MTU=${MTU:-9000} 14 | 15 | # Set to "1" to enable installation of https://github.com/newsnowlabs/docker-ingress-routing-daemon on the swarm 16 | DIRD=${DIRD:-0} 17 | 18 | # Set to "1" to disable cleanup of Docker image 19 | NO_CLEAN_IMAGE=${NO_CLEAN_IMAGE:-0} 20 | 21 | # OVERRIDE FRAMEWORK FUNCTIONS 22 | nodes() { seq 1 $NODES | sed "s/^/$NODE/"; } 23 | volumes() { echo swarm $(nodes); } 24 | networks() { echo runcvm-mtu; } 25 | images() { echo $IMAGE; } 26 | 27 | # Run routine cleanup of any preexisting containers, volumes, networks, and images 28 | cleanup 29 | 30 | h=$(hostname) 31 | 32 | if [ -n "$REGISTRY_MIRROR" ]; then 33 | log "REGISTRY_MIRROR '$REGISTRY_MIRROR' detected." 34 | else 35 | log "No REGISTRY_MIRROR detected: recommend setting REGISTRY_MIRROR=http://localhost:5000 and launching:" 36 | log "- docker run -d --name=registry --network=host -e REGISTRY_PROXY_REMOTEURL=https://registry-1.docker.io registry:2" 37 | fi 38 | 39 | log "Build image ..." 40 | docker build --iidfile /tmp/iid -f node/Dockerfile node/ 41 | IMAGE=$(cat /tmp/iid) 42 | 43 | if [ -n "$MTU" ] && [ "$MTU" -gt 0 ]; then 44 | log "Creating network 'runcvm-mtu' with MTU $MTU ..." 45 | docker network create --opt com.docker.network.driver.mtu=$MTU --scope=local runcvm-mtu 46 | else 47 | log "Creating network 'runcvm-mtu' with default (unspecified) MTU ..." 48 | docker network create --scope=local runcvm-mtu 49 | fi 50 | 51 | log "Launching $NODES x RunCVM nodes with image $IMAGE ..." 52 | for n in $(seq 1 $NODES) 53 | do 54 | log -n "Launching RunCVM node $n/$NODES ... " 55 | 56 | # Enables Docker's use of overlay2 storage driver in a file-backed disk stored in a dedicated Docker volume 57 | # diskopt="--mount=type=volume,src=$NODE$n,dst=/disks --env=RUNCVM_DISKS=/disks/disk1,/var/lib/docker,ext4,500M" 58 | 59 | # Docker will fall back to using the vfs storage driver, as it detects /var/lib/docker is an overlay2 fs. 60 | # diskopt="--mount=type=volume,src=$NODE$n,dst=/var/lib/docker" 61 | 62 | # Enables Docker's use of overlay2 storage driver in a file-backed disk stored in the container's overlayfs 63 | diskopt="--env=RUNCVM_DISKS=/disks/disk1,/var/lib/docker,ext4,500M" 64 | 65 | # The swarm volume, mounted at /swarm within the RunCVM VMs, will be used to share swarm info 66 | # among the nodes. 67 | docker run \ 68 | -d \ 69 | --rm \ 70 | --runtime=runcvm \ 71 | --network=runcvm-mtu \ 72 | --publish=$((8080+$n-1)):80 \ 73 | --name=$NODE$n \ 74 | --hostname=$NODE$n \ 75 | --memory=512m \ 76 | --env=NODE=$n \ 77 | --env=NODES=$NODES \ 78 | --env=MTU=$MTU \ 79 | --env=DIRD=$DIRD \ 80 | --env=REGISTRY_MIRROR=$REGISTRY_MIRROR \ 81 | --mount=type=volume,src=swarm,dst=/swarm \ 82 | $diskopt \ 83 | $IMAGE 84 | done 85 | 86 | log "Monitoring ${NODE}1 logs for swarm setup progress ..." 87 | docker logs -f ${NODE}1 -n 0 2>&1 | sed "s/^/> (${NODE}1) > /; /Swarm complete/q0; /Swarm failed/q129;" 88 | log "Finished monitoring ${NODE}1 logs as swarm is set up." 89 | 90 | log "Creating http service (please be patient) ..." 91 | docker exec ${NODE}1 docker service create \ 92 | -d \ 93 | --name=http --mode=global -p 80:80 --update-parallelism=0 \ 94 | alpine ash -c "$(tr '\012' ' ' <<_EOE_ 95 | apk add --no-cache mini_httpd && 96 | mkdir -p /www && 97 | echo -e "#!/bin/sh\n\necho Content-Type: text/plain\necho\necho hostname=\$HOSTNAME remote_addr=\\\$REMOTE_ADDR\nexit 0\n" >/www/index.cgi && 98 | chmod 755 /www/index.cgi && 99 | mini_httpd -d /www -D -l /dev/stdout -c '**.cgi' 100 | _EOE_ 101 | )" 102 | 103 | for i in $(seq 1 200 | sort -nr) 104 | do 105 | replicas=$(docker exec ${NODE}1 docker service ls --format='{{ .Replicas }}' --filter='Name=http') 106 | log "Waiting for remainder of $replicas replicas to launch (#$i) ..." 107 | [ "$replicas" = "$NODES/$NODES" ] && break 108 | [ $i -eq 1 ] && exit 253 109 | sleep 1 110 | done 111 | log "All $NODES replicas launched." 112 | sleep 1 113 | 114 | # Allow final test to complete, even if we encounter errors 115 | set +e 116 | 117 | if [ "$DIRD" = "1" ]; then 118 | DOCKER_IPV4=$(docker network inspect runcvm-mtu --format='{{(index .IPAM.Config 0).Gateway}}') 119 | else 120 | DOCKER_IPV4="10.0.0." 121 | fi 122 | 123 | log "Running $NODE test looking for '$DOCKER_IPV4' at $(date) ..." 124 | 125 | ERRORS=0 126 | TESTS=0 127 | for loop in $(seq 1 250) 128 | do 129 | i=$((loop % NODES)) 130 | 131 | host=http://0.0.0.0:$((8080+i))/ 132 | 133 | # Uncomment if running inside a Dockside devtainer (which must be preconnected to a precreated runcvm-mtu Docker network). 134 | # host=http://$NODE$((i+1)):80/ 135 | 136 | response=$(curl --max-time 1 -is $host) 137 | ERROR=$? 138 | 139 | if [ $ERROR -eq 0 ]; then 140 | response=$(tr '\012\015' ' ' <<<$response) 141 | grep -q "remote_addr=$DOCKER_IPV4" <<<$response 142 | [ $? -ne 0 ] && log "#$loop Response error: $response" && ERROR=1 143 | else 144 | log "#$loop Response error: curl error $ERROR" 145 | ERROR=1 146 | fi 147 | 148 | ERRORS=$((ERRORS+ERROR)) 149 | TESTS=$((TESTS+1)) 150 | 151 | done 152 | 153 | log "Completed $NODE test $TESTS times, with $ERRORS errors" 154 | 155 | # Uncomment to debug: 156 | # log "Falling to shell, type CTRL+D to exit and clean up"; bash -i 157 | 158 | sleep 1 159 | exit $ERRORS 160 | -------------------------------------------------------------------------------- /tests/01-mariadb/test: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | NODE=01-mariadb 4 | 5 | nodes() { 6 | echo $NODE-mysqld $NODE-mysql 7 | } 8 | 9 | volumes() { 10 | echo '' 11 | } 12 | 13 | networks() { 14 | echo $NODE-network 15 | } 16 | 17 | _cleanup() { 18 | echo "> ($h) Cleaning up nodes ..." 19 | docker rm -f $(nodes) 2>/dev/null 20 | echo 21 | 22 | if [ "$(volumes)" != "" ]; then 23 | echo "> ($h) Cleaning up volumes ..." 24 | docker volume rm -f $(volumes) 25 | fi 26 | echo 27 | 28 | if [ -n "$IMAGE" ]; then 29 | echo "> ($h) Cleaning up temporary image ..." 30 | docker rmi $IMAGE 31 | echo 32 | fi 33 | 34 | rm -f /tmp/iid 35 | 36 | if [ "$(networks)" != "" ]; then 37 | echo "> ($h) Cleaning up networks ..." 38 | docker network rm $(networks) 39 | fi 40 | echo 41 | 42 | echo "> ($h) Cleaned up" 43 | } 44 | 45 | cleanup() { 46 | # Allow this to complete, even if we encounter errors 47 | set +e 48 | 49 | _cleanup 50 | 51 | # Restore setting to fail on error 52 | set -e 53 | } 54 | 55 | quit() { 56 | # Don't run a second time 57 | trap '' TERM INT EXIT 58 | 59 | cleanup 60 | 61 | echo "> ($h) Exiting with code $ERRORS" 62 | } 63 | 64 | trap quit TERM INT EXIT 65 | 66 | h=$(hostname) 67 | 68 | cleanup 69 | 70 | echo "> ($h) Creating network $NODE-network ..." 71 | docker network rm $(networks) 2>/dev/null || true 72 | docker network create $NODE-network 73 | 74 | # Launch a mariadb VM using RunCVM 75 | echo "> ($h) Launch RunCVM mariadb server as $NODE-mysqld ..." 76 | docker run --runtime=runcvm -d --rm --name=$NODE-mysqld --hostname=$NODE-mysqld --network=$NODE-network --cpus=1 --memory=1G --env=MARIADB_ALLOW_EMPTY_ROOT_PASSWORD=1 mariadb 77 | 78 | echo "> ($h) Monitoring mariadb logs ..." 79 | docker logs -f -t -n 0 $NODE-mysqld & 80 | 81 | # Allow final test to complete, even if we encounter errors 82 | set +e 83 | 84 | # Launch standard runc container to test connecting to the mariadb VM 85 | echo "> ($h) Waiting for mariadb startup and running test queries in runc container ..." 86 | docker run --rm --network=$NODE-network --name=$NODE-mysql --hostname=$NODE-mysql --env=host=$NODE-mysqld alpine ash -c 'apk update && apk add mariadb-client && for a in $(seq 40 -1 1); do if mysql -P 3306 -h $host mysql -e ""; then echo "> $(hostname) Connected to mysqld ..."; break; else echo "> $(hostname) Waiting for mysqld (#$a) ..."; sleep 1; fi; done && mysql -P 3306 -h $host mysql -e "select count(*) from user"' 87 | ERRORS=$? 88 | 89 | echo "> ($h) Completed $NODE test with $ERRORS errors" 90 | 91 | # bash -i 92 | 93 | sleep 1 94 | exit $ERRORS 95 | -------------------------------------------------------------------------------- /tests/02-user-workdir/test: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | # Load framework functions 4 | . ../framework.sh 5 | 6 | # TEST VARIABLES 7 | NODE=runcvm-01-test 8 | NETWORK="$NODE-network" 9 | IMAGE="alpine" 10 | RUNTIME="${RUNTIME:-runcvm}" 11 | 12 | # OVERRIDE FRAMEWORK FUNCTIONS 13 | nodes() { echo $NODE; } 14 | networks() { echo $NETWORK; } 15 | 16 | # TEST DETAILS 17 | COMMAND='echo "$(id -u) $(pwd)"' 18 | USER_ID="1000" 19 | WORK_DIR="/tmp" 20 | EXPECTED_OUTPUT="${USER_ID} ${WORK_DIR}" 21 | 22 | # TEST FUNCTIONS 23 | # -------------- 24 | 25 | # Function to test output against expected values 26 | test_output() { 27 | local test_type="$1" 28 | local expected_output="$2" 29 | local output_to_test="$3" 30 | 31 | if [ "$output_to_test" = "$expected_output" ]; then 32 | log "docker $test_type test: expected and received '$output_to_test' - PASS" 33 | return 0 34 | fi 35 | 36 | log "docker $test_type test: expected '$expected_output', but got: '$output_to_test' - FAIL" 37 | return 1 38 | } 39 | 40 | # TEST PROCEDURE 41 | # -------------- 42 | 43 | # Run routine cleanup of any preexisting containers, volumes, networks, and images 44 | cleanup 45 | 46 | # Create custom network 47 | log -n "Creating network '$NETWORK' ..." 48 | docker network create $NETWORK 49 | 50 | # Create and run the container 51 | log -n "Launching runcvm container with command '$COMMAND' ..." 52 | docker run \ 53 | -d \ 54 | --rm \ 55 | --runtime=$RUNTIME \ 56 | --network=$NETWORK \ 57 | --name=$NODE \ 58 | --hostname=$NODE \ 59 | --user=$USER_ID \ 60 | --workdir=$WORK_DIR \ 61 | $IMAGE \ 62 | sh -c "$COMMAND; while true; do echo ===DONE===; sleep 1; done" 63 | 64 | shopt -s lastpipe 65 | log "Container '$NODE' output ..." 66 | docker logs -f $NODE 2>&1 | sed "s/^/($NODE) > /; /===DONE===/q0; /failed/q129;" 67 | 68 | ERRORS=0 69 | 70 | # Test docker run command: 71 | # - Retrieve first line of logs from container 72 | # - Strip carriage returns for now. as it's unclear why they are present and are not present in the expected output 73 | test_output "run" "$EXPECTED_OUTPUT" "$(docker logs $NODE | grep -v '===DONE===' | tr -d '\015')" || ERRORS=$((ERRORS+1)) 74 | 75 | # Test docker exec command: 76 | # - Retrieve output from exec command for exec test 77 | test_output "exec" "$EXPECTED_OUTPUT" "$(docker exec $NODE sh -c "$COMMAND")" || ERRORS=$((ERRORS+1)) 78 | 79 | # Final output 80 | log "Tests completed with $ERRORS errors" 81 | exit $ERRORS -------------------------------------------------------------------------------- /tests/03-env/test: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | # Load framework functions 4 | . ../framework.sh 5 | 6 | # TEST VARIABLES 7 | NODE=runcvm-01-test 8 | NETWORK="$NODE-network" 9 | IMAGE="alpine" 10 | RUNTIME="${RUNTIME:-runcvm}" 11 | 12 | # OVERRIDE FRAMEWORK FUNCTIONS 13 | nodes() { echo $NODE; } 14 | networks() { echo $NETWORK; } 15 | 16 | # TEST DETAILS 17 | COMMAND='env | sort' 18 | EXPECTED_OUTPUT="$(echo -e 'HOME=/root\nHOSTNAME=runcvm-01-test\nPATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\nPWD=/\nSHLVL=1\n')" 19 | 20 | # TEST FUNCTIONS 21 | # -------------- 22 | 23 | # Function to test output against expected values 24 | test_output() { 25 | local test_type="$1" 26 | local expected_output="$2" 27 | local output_to_test="$3" 28 | 29 | if [ "$output_to_test" = "$expected_output" ]; then 30 | log "docker $test_type test: expected and received '$output_to_test' - PASS" 31 | return 0 32 | fi 33 | 34 | log "docker $test_type test: expected '$expected_output', but got: '$output_to_test' - FAIL" 35 | return 1 36 | } 37 | 38 | # TEST PROCEDURE 39 | # -------------- 40 | 41 | # Run routine cleanup of any preexisting containers, volumes, networks, and images 42 | cleanup 43 | 44 | # Create custom network 45 | log -n "Creating network '$NETWORK' ..." 46 | docker network create $NETWORK 47 | 48 | # Create and run the container 49 | log -n "Launching runcvm container with command '$COMMAND' ..." 50 | docker run \ 51 | -d \ 52 | --rm \ 53 | --runtime=$RUNTIME \ 54 | --network=$NETWORK \ 55 | --name=$NODE \ 56 | --hostname=$NODE \ 57 | --user=$USER_ID \ 58 | --workdir=$WORK_DIR \ 59 | --init \ 60 | $IMAGE \ 61 | sh -c "$COMMAND; while true; do echo ===DONE===; sleep 1; done" 62 | 63 | shopt -s lastpipe 64 | log "Container '$NODE' output ..." 65 | docker logs -f $NODE 2>&1 | sed "s/^/($NODE) > /; /===DONE===/q0;" 66 | 67 | ERRORS=0 68 | 69 | # Test docker run command: 70 | # - Retrieve first line of logs from container 71 | # - Strip carriage returns for now. as it's unclear why they are present and are not present in the expected output 72 | test_output "run" "$EXPECTED_OUTPUT" "$(docker logs $NODE | grep -v '===DONE===' | tr -d '\015')" || ERRORS=$((ERRORS+1)) 73 | 74 | # Test docker exec command: 75 | # - Retrieve output from exec command for exec test 76 | test_output "exec" "$EXPECTED_OUTPUT" "$(docker exec $NODE sh -c "$COMMAND")" || ERRORS=$((ERRORS+1)) 77 | 78 | # Final output 79 | log "Tests completed with $ERRORS errors" 80 | exit $ERRORS -------------------------------------------------------------------------------- /tests/framework.sh: -------------------------------------------------------------------------------- 1 | images() { echo; } 2 | nodes() { echo; } 3 | volumes() { echo; } 4 | networks() { echo; } 5 | 6 | log() { 7 | local opts 8 | if [ "$1" = "-n" ]; then opts="-n"; shift; fi 9 | echo $opts "> $1" 10 | } 11 | 12 | _cleanup() { 13 | 14 | if [ "$(nodes)" != "" ]; then 15 | log -n "Cleaning up nodes ... " 16 | docker rm -f $(nodes) 2>&1 17 | fi 18 | 19 | if [ "$(volumes)" != "" ]; then 20 | log -n "Cleaning up volumes ... " 21 | docker volume rm -f $(volumes) 2>&1 22 | fi 23 | 24 | if [ "$(images)" != "" ] && [ "$NO_CLEAN_IMAGE" != "1" ]; then 25 | log -n "Cleaning up temporary images ... " 26 | docker rmi $(images) 2>&1 27 | fi 28 | 29 | if [ "$(networks)" != "" ]; then 30 | log -n "Cleaning up networks ... " 31 | docker network rm $(networks) 2>&1 32 | fi 33 | 34 | rm -f /tmp/iid 35 | } 36 | 37 | cleanup() { 38 | # Allow this to complete, even if we encounter errors 39 | set +e 40 | 41 | _cleanup 42 | 43 | # Restore setting to fail on error 44 | set -e 45 | } 46 | 47 | quit() { 48 | local code=$? 49 | 50 | # Don't run a second time 51 | trap '' TERM INT EXIT 52 | 53 | cleanup 54 | 55 | log "Exiting with code $code" 56 | } 57 | 58 | term() { 59 | exit 254 60 | } 61 | 62 | # Standard setup 63 | 64 | trap quit EXIT 65 | trap term TERM INT QUIT 66 | 67 | # Trap for cleanup on exit 68 | trap cleanup EXIT 69 | -------------------------------------------------------------------------------- /tests/run: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | ERRORS=0 4 | 5 | DIR=$(dirname $0) 6 | 7 | if [ -d "$DIR" ]; then 8 | echo "Running RunCVM integration tests in '$DIR' ..." 9 | cd $DIR 10 | else 11 | echo "$0: Error: RunCVM integration test directory '$DIR' not found; aborting!" 12 | exit -1 13 | fi 14 | 15 | for test in * 16 | do 17 | 18 | [ -d "$test" ] || continue; 19 | 20 | cd $test 21 | ./test 2>&1 | sed "s/^/$test - /" 22 | TEST_ERRORS=$? 23 | ERRORS=$((ERRORS+$TEST_ERRORS)) 24 | 25 | cd .. 26 | 27 | echo "RunCVM test $test finished with $TEST_ERRORS errors" 28 | done 29 | 30 | echo "RunCVM integration tests completed with $ERRORS errors" 31 | 32 | exit $ERRORS --------------------------------------------------------------------------------