├── .dockerignore ├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── build-utils ├── entrypoint-install.sh └── make-bundelf-bundle.sh ├── build └── build.sh ├── kernels └── oraclelinux │ ├── 95virtiofs │ ├── module-setup.sh │ ├── mount-virtiofs.sh │ └── parse-virtiofs.sh │ └── addvirtiofs.conf ├── patches ├── dnsmasq │ └── remove-passwd-requirement.patch ├── dropbear │ └── runcvm.patch ├── mkinitfs │ └── nlplug-findfs.patch └── seabios │ └── qemu-fw-cfg-fix.patch ├── qemu-exit └── qemu-exit.c ├── runcvm-init ├── VERSION.h └── dumb-init.c ├── runcvm-scripts ├── functions │ └── cgroupfs ├── runcvm-ctr-defaults ├── runcvm-ctr-entrypoint ├── runcvm-ctr-exec ├── runcvm-ctr-exit ├── runcvm-ctr-qemu ├── runcvm-ctr-qemu-ifdown ├── runcvm-ctr-qemu-ifup ├── runcvm-ctr-qemu-poweroff ├── runcvm-ctr-shutdown ├── runcvm-ctr-virtiofsd ├── runcvm-install-runtime.sh ├── runcvm-ip-functions ├── runcvm-runtime ├── runcvm-vm-exec ├── runcvm-vm-init ├── runcvm-vm-qemu-ga ├── runcvm-vm-start └── runcvm-vm-start-wrapper └── tests ├── 00-http-docker-swarm ├── node │ ├── Dockerfile │ └── docker.sh └── test ├── 01-mariadb └── test ├── 02-user-workdir └── test ├── 03-env └── test ├── framework.sh └── run /.dockerignore: -------------------------------------------------------------------------------- 1 | # Ignore files matching the following patterns within docker build 2 | **/*~ 3 | depot.json 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .#* 2 | *~ 3 | *.bak 4 | *.o 5 | *.version 6 | *.orig 7 | *.tdy 8 | TAGS 9 | \#*\# 10 | .packlist 11 | perllocal.pod 12 | .c9 13 | .Trash-1000 14 | .vscode 15 | depot.json 16 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # syntax=docker/dockerfile:1.3-labs 2 | 3 | # Alpine version to build with 4 | ARG ALPINE_VERSION=3.19 5 | 6 | # --- BUILD STAGE --- 7 | # Build base alpine-sdk image for later build stages 8 | FROM alpine:$ALPINE_VERSION as alpine-sdk 9 | 10 | RUN apk update && apk add --no-cache alpine-sdk coreutils && \ 11 | abuild-keygen -an && \ 12 | # Copy the public keys to the system keys 13 | cp -a /root/.abuild/*.pub /etc/apk/keys && \ 14 | git clone --depth 1 --single-branch --filter=blob:none --sparse https://gitlab.alpinelinux.org/alpine/aports.git ~/aports && \ 15 | cd ~/aports/ && \ 16 | git sparse-checkout set main/seabios main/ 17 | 18 | # --- BUILD STAGE --- 19 | # Build patched SeaBIOS packages 20 | # to allow disabling of BIOS output by QEMU 21 | # (without triggering QEMU warnings) 22 | FROM alpine-sdk as alpine-seabios 23 | 24 | ADD patches/seabios/qemu-fw-cfg-fix.patch /root/aports/main/seabios/0003-qemu-fw-cfg-fix.patch 25 | 26 | RUN <>APKBUILD 30 | echo 'source="${source}0003-qemu-fw-cfg-fix.patch"' >>APKBUILD 31 | abuild -rFf 32 | EOF 33 | 34 | # --- BUILD STAGE --- 35 | # Build patched dnsmasq 36 | # that does not require /etc/passwd file to run 37 | # (needed for images such as hello-world) 38 | FROM alpine-sdk as alpine-dnsmasq 39 | 40 | ADD patches/dnsmasq/remove-passwd-requirement.patch /root/aports/main/dnsmasq/remove-passwd-requirement.patch 41 | 42 | RUN <>APKBUILD 46 | echo 'source="${source}remove-passwd-requirement.patch"' >>APKBUILD 47 | abuild -rFf 48 | EOF 49 | 50 | # --- BUILD STAGE --- 51 | # Build patched dropbear with epka plugin 52 | # that does not require /etc/passwd or PAM to run 53 | FROM alpine-sdk as alpine-dropbear 54 | 55 | ADD patches/dropbear/runcvm.patch /root/aports/main/dropbear/runcvm.patch 56 | 57 | RUN <>APKBUILD 62 | echo 'source="${source}runcvm.patch"' >>APKBUILD 63 | abuild -rFf 64 | 65 | cd /root 66 | git clone https://github.com/fabriziobertocci/dropbear-epka.git 67 | cd dropbear-epka 68 | apk add --no-cache automake autoconf libtool 69 | libtoolize --force 70 | aclocal 71 | autoheader || true 72 | automake --force-missing --add-missing 73 | autoconf 74 | ./configure 75 | make install 76 | EOF 77 | 78 | # --- BUILD STAGE --- 79 | # Build patched mkinitfs/nlplug-findfs 80 | # with shorter timeout for speedier boot (saving ~4s) 81 | FROM alpine-sdk as alpine-mkinitfs 82 | 83 | ADD patches/mkinitfs/nlplug-findfs.patch /root/aports/main/mkinitfs/nlplug-findfs.patch 84 | 85 | RUN <>APKBUILD 89 | echo 'source="${source} nlplug-findfs.patch"' >>APKBUILD 90 | abuild -rFf 91 | EOF 92 | 93 | # --- BUILD STAGE --- 94 | # Build dist-independent dynamic binaries and libraries 95 | FROM alpine:$ALPINE_VERSION as binaries 96 | 97 | RUN apk update && \ 98 | apk add --no-cache file bash qemu-system-x86_64 qemu-virtiofsd qemu-ui-curses qemu-guest-agent \ 99 | jq iproute2 netcat-openbsd e2fsprogs blkid util-linux \ 100 | s6 dnsmasq iptables nftables \ 101 | ncurses coreutils \ 102 | patchelf 103 | 104 | # Install patched SeaBIOS 105 | COPY --from=alpine-seabios /root/packages/main/x86_64 /tmp/seabios/ 106 | RUN apk add --allow-untrusted /tmp/seabios/*.apk && cp -a /usr/share/seabios/bios*.bin /usr/share/qemu/ 107 | 108 | # Install patched dnsmasq 109 | COPY --from=alpine-dnsmasq /root/packages/main/x86_64 /tmp/dnsmasq/ 110 | RUN apk add --allow-untrusted /tmp/dnsmasq/dnsmasq-2*.apk /tmp/dnsmasq/dnsmasq-common*.apk 111 | 112 | # Install patched dropbear 113 | COPY --from=alpine-dropbear /root/packages/main/x86_64 /usr/local/lib/libepka_file.so /tmp/dropbear/ 114 | RUN apk add --allow-untrusted /tmp/dropbear/dropbear-ssh*.apk /tmp/dropbear/dropbear-dbclient*.apk /tmp/dropbear/dropbear-2*.apk 115 | 116 | # Patch the binaries and set up symlinks 117 | COPY build-utils/make-bundelf-bundle.sh /usr/local/bin/make-bundelf-bundle.sh 118 | ENV BUNDELF_BINARIES="busybox bash jq ip nc mke2fs blkid findmnt dnsmasq xtables-legacy-multi nft xtables-nft-multi nft mount s6-applyuidgid qemu-system-x86_64 qemu-ga /usr/lib/qemu/virtiofsd tput coreutils getent dropbear dbclient dropbearkey" 119 | ENV BUNDELF_EXTRA_LIBS="/usr/lib/xtables /usr/libexec/coreutils /tmp/dropbear/libepka_file.so /usr/lib/qemu/*.so" 120 | ENV BUNDELF_EXTRA_SYSTEM_LIB_PATHS="/usr/lib/xtables" 121 | ENV BUNDELF_CODE_PATH="/opt/runcvm" 122 | ENV BUNDELF_EXEC_PATH="/.runcvm/guest" 123 | 124 | RUN /usr/local/bin/make-bundelf-bundle.sh --bundle && \ 125 | mkdir -p $BUNDELF_CODE_PATH/bin && \ 126 | cd $BUNDELF_CODE_PATH/bin && \ 127 | for cmd in \ 128 | awk base64 cat chgrp chmod cut grep head hostname init ln ls \ 129 | mkdir poweroff ps rm rmdir route sh sysctl tr touch; \ 130 | do \ 131 | ln -s busybox $cmd; \ 132 | done && \ 133 | mkdir -p $BUNDELF_CODE_PATH/usr/share && \ 134 | cp -a /usr/share/qemu $BUNDELF_CODE_PATH/usr/share && \ 135 | cp -a /etc/terminfo $BUNDELF_CODE_PATH/usr/share && \ 136 | # Remove setuid/setgid bits from any/all binaries 137 | chmod -R -s $BUNDELF_CODE_PATH/ 138 | 139 | # --- BUILD STAGE --- 140 | # Build static runcvm-init 141 | FROM alpine:$ALPINE_VERSION as runcvm-init 142 | 143 | RUN apk update && \ 144 | apk add --no-cache gcc musl-dev 145 | 146 | ADD runcvm-init /root/runcvm-init 147 | RUN cd /root/runcvm-init && cc -o /root/runcvm-init/runcvm-init -std=gnu99 -static -s -Wall -Werror -O3 dumb-init.c 148 | 149 | # --- BUILD STAGE --- 150 | # Build static qemu-exit 151 | FROM alpine:$ALPINE_VERSION as qemu-exit 152 | 153 | RUN apk update && \ 154 | apk add --no-cache gcc musl-dev 155 | 156 | ADD qemu-exit /root/qemu-exit 157 | RUN cd /root/qemu-exit && cc -o /root/qemu-exit/qemu-exit -std=gnu99 -static -s -Wall -Werror -O3 qemu-exit.c 158 | 159 | # --- BUILD STAGE --- 160 | # Build alpine kernel and initramfs with virtiofs module 161 | FROM alpine:$ALPINE_VERSION as alpine-kernel 162 | 163 | # Install patched mkinitfs 164 | COPY --from=alpine-mkinitfs /root/packages/main/x86_64 /tmp/mkinitfs/ 165 | RUN apk add --allow-untrusted /tmp/mkinitfs/*.apk 166 | RUN apk add --no-cache linux-virt 167 | RUN echo 'kernel/fs/fuse/virtiofs*' >>/etc/mkinitfs/features.d/virtio.modules && \ 168 | sed -ri 's/\b(ata|nvme|raid|scsi|usb|cdrom|kms|mmc)\b//g; s/[ ]+/ /g' /etc/mkinitfs/mkinitfs.conf && \ 169 | sed -ri 's/(nlplug-findfs)/\1 --timeout=1000/' /usr/share/mkinitfs/initramfs-init && \ 170 | mkinitfs $(basename $(ls -d /lib/modules/*)) 171 | RUN BASENAME=$(basename $(ls -d /lib/modules/*)) && \ 172 | mkdir -p /opt/runcvm/kernels/alpine/$BASENAME && \ 173 | cp -a /boot/vmlinuz-virt /opt/runcvm/kernels/alpine/$BASENAME/vmlinuz && \ 174 | cp -a /boot/initramfs-virt /opt/runcvm/kernels/alpine/$BASENAME/initrd && \ 175 | cp -a /lib/modules/ /opt/runcvm/kernels/alpine/$BASENAME/ && \ 176 | cp -a /boot/config-virt /opt/runcvm/kernels/alpine/$BASENAME/modules/$BASENAME/config && \ 177 | chmod -R u+rwX,g+rX,o+rX /opt/runcvm/kernels/alpine 178 | 179 | FROM alpine-kernel as openwrt-kernel 180 | RUN mkdir -p /opt/runcvm/kernels/openwrt/$(basename $(ls -d /lib/modules/*))/modules/$(basename $(ls -d /lib/modules/*)) && \ 181 | cd /opt/runcvm/kernels/openwrt/$(basename $(ls -d /lib/modules/*)) && \ 182 | cp -a /boot/vmlinuz-virt vmlinuz && \ 183 | cp -a /boot/initramfs-virt initrd && \ 184 | find /lib/modules/ -type f -name '*.ko*' -exec cp -a {} modules/$(basename $(ls -d /lib/modules/*)) \; && \ 185 | gunzip modules/$(basename $(ls -d /lib/modules/*))/*.gz && \ 186 | chmod -R u+rwX,g+rX,o+rX /opt/runcvm/kernels/openwrt 187 | 188 | # --- BUILD STAGE --- 189 | # Build Debian bookworm kernel and initramfs with virtiofs module 190 | FROM amd64/debian:bookworm as debian-kernel 191 | 192 | ARG DEBIAN_FRONTEND=noninteractive 193 | RUN apt update && apt install -y linux-image-amd64:amd64 && \ 194 | echo 'virtiofs' >>/etc/initramfs-tools/modules && \ 195 | echo 'virtio_console' >>/etc/initramfs-tools/modules && \ 196 | echo "RESUME=none" >/etc/initramfs-tools/conf.d/resume && \ 197 | update-initramfs -u 198 | RUN BASENAME=$(basename $(ls -d /lib/modules/*)) && \ 199 | mkdir -p /opt/runcvm/kernels/debian/$BASENAME && \ 200 | cp -aL /vmlinuz /opt/runcvm/kernels/debian/$BASENAME/vmlinuz && \ 201 | cp -aL /initrd.img /opt/runcvm/kernels/debian/$BASENAME/initrd && \ 202 | cp -a /lib/modules/ /opt/runcvm/kernels/debian/$BASENAME/ && \ 203 | cp -a /boot/config-$BASENAME /opt/runcvm/kernels/debian/$BASENAME/modules/$BASENAME/config && \ 204 | chmod -R u+rwX,g+rX,o+rX /opt/runcvm/kernels/debian 205 | 206 | # --- BUILD STAGE --- 207 | # Build Ubuntu bullseye kernel and initramfs with virtiofs module 208 | FROM amd64/ubuntu:jammy as ubuntu-kernel 209 | 210 | ARG DEBIAN_FRONTEND=noninteractive 211 | RUN apt update && apt install -y linux-generic:amd64 && \ 212 | echo 'virtiofs' >>/etc/initramfs-tools/modules && \ 213 | echo 'virtio_console' >>/etc/initramfs-tools/modules && \ 214 | echo "RESUME=none" >/etc/initramfs-tools/conf.d/resume && \ 215 | update-initramfs -u 216 | RUN BASENAME=$(basename $(ls -d /lib/modules/*)) && \ 217 | mkdir -p /opt/runcvm/kernels/ubuntu/$BASENAME && \ 218 | cp -aL /boot/vmlinuz /opt/runcvm/kernels/ubuntu/$BASENAME/vmlinuz && \ 219 | cp -aL /boot/initrd.img /opt/runcvm/kernels/ubuntu/$BASENAME/initrd && \ 220 | cp -a /lib/modules/ /opt/runcvm/kernels/ubuntu/$BASENAME/ && \ 221 | cp -a /boot/config-$BASENAME /opt/runcvm/kernels/ubuntu/$BASENAME/modules/$BASENAME/config && \ 222 | chmod -R u+rwX,g+rX,o+rX /opt/runcvm/kernels/ubuntu 223 | 224 | # --- BUILD STAGE --- 225 | # Build Oracle Linux kernel and initramfs with virtiofs module 226 | FROM oraclelinux:9 as oracle-kernel 227 | 228 | RUN dnf install -y kernel 229 | ADD ./kernels/oraclelinux/addvirtiofs.conf /etc/dracut.conf.d/addvirtiofs.conf 230 | ADD ./kernels/oraclelinux/95virtiofs /usr/lib/dracut/modules.d/95virtiofs 231 | RUN dracut --force --kver $(basename /lib/modules/*) --kmoddir /lib/modules/* 232 | RUN mkdir -p /opt/runcvm/kernels/ol/$(basename $(ls -d /lib/modules/*)) && \ 233 | mv /lib/modules/*/vmlinuz /opt/runcvm/kernels/ol/$(basename $(ls -d /lib/modules/*))/vmlinuz && \ 234 | cp -aL /boot/initramfs* /opt/runcvm/kernels/ol/$(basename $(ls -d /lib/modules/*))/initrd && \ 235 | cp -a /lib/modules/ /opt/runcvm/kernels/ol/$(basename $(ls -d /lib/modules/*))/ && \ 236 | chmod -R u+rwX,g+rX,o+rX /opt/runcvm/kernels/ol 237 | 238 | # --- BUILD STAGE --- 239 | # Build RunCVM installer 240 | FROM alpine:$ALPINE_VERSION as installer 241 | 242 | COPY --from=binaries /opt/runcvm /opt/runcvm 243 | COPY --from=runcvm-init /root/runcvm-init/runcvm-init /opt/runcvm/sbin/ 244 | COPY --from=qemu-exit /root/qemu-exit/qemu-exit /opt/runcvm/sbin/ 245 | 246 | RUN apk update && apk add --no-cache rsync 247 | 248 | ADD runcvm-scripts /opt/runcvm/scripts/ 249 | 250 | ADD build-utils/entrypoint-install.sh / 251 | ENTRYPOINT ["/entrypoint-install.sh"] 252 | 253 | # Install needed kernels. 254 | # Comment out any kernels that are unneeded. 255 | COPY --from=alpine-kernel /opt/runcvm/kernels/alpine /opt/runcvm/kernels/alpine 256 | COPY --from=debian-kernel /opt/runcvm/kernels/debian /opt/runcvm/kernels/debian 257 | COPY --from=openwrt-kernel /opt/runcvm/kernels/openwrt /opt/runcvm/kernels/openwrt 258 | COPY --from=ubuntu-kernel /opt/runcvm/kernels/ubuntu /opt/runcvm/kernels/ubuntu 259 | COPY --from=oracle-kernel /opt/runcvm/kernels/ol /opt/runcvm/kernels/ol 260 | 261 | # Add 'latest' symlinks for available kernels 262 | RUN for d in /opt/runcvm/kernels/*; do cd $d && ln -s $(ls -d * | sort | head -n 1) latest; done 263 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RunCVM Container Runtime 2 | 3 | ## Introduction 4 | 5 | RunCVM (Run Container Virtual Machine) is an experimental open-source Docker container runtime for Linux, created by Struan Bartlett at NewsNow Labs, that makes launching standard containerised workloads and system workloads (e.g. Systemd, Docker, even OpenWrt) in VMs as easy as launching a container. 6 | 7 |

8 | 9 |
10 | View on Asciinema 11 |

12 | 13 | ## Quick start 14 | 15 | Install: 16 | 17 | ```sh 18 | curl -s -o - https://raw.githubusercontent.com/newsnowlabs/runcvm/main/runcvm-scripts/runcvm-install-runtime.sh | sudo sh 19 | ``` 20 | 21 | Now launch an nginx VM listening on port 8080: 22 | 23 | ```console 24 | docker run --runtime=runcvm --name nginx1 --rm -p 8080:80 nginx 25 | ``` 26 | 27 | Launch a MariaDB VM, with 2 cpus and 2G memory, listening on port 3306: 28 | 29 | ```console 30 | docker run --runtime=runcvm --name mariadb1 --rm -p 3306:3306 --cpus 2 --memory 2G --env=MARIADB_ALLOW_EMPTY_ROOT_PASSWORD=1 mariadb 31 | ``` 32 | 33 | Launch a vanilla ubuntu VM, with interactive terminal: 34 | 35 | ```console 36 | docker run --runtime=runcvm --name ubuntu1 --rm -it ubuntu 37 | ``` 38 | 39 | Gain another interactive console on `ubuntu1`: 40 | 41 | ```console 42 | docker exec -it ubuntu1 bash 43 | ``` 44 | 45 | Launch a VM with 1G memory and a 1G ext4-formatted backing file mounted at `/var/lib/docker` and stored in the underlying container's filesystem: 46 | 47 | ```console 48 | docker run -it --runtime=runcvm --memory=1G --env=RUNCVM_DISKS=/disks/docker,/var/lib/docker,ext4,1G 49 | ``` 50 | 51 | Launch a VM with 2G memory and a 5G ext4-formatted backing file mounted at `/var/lib/docker` and stored in a dedicated Docker volume on the host: 52 | 53 | ```console 54 | docker run -it --runtime=runcvm --memory=2G --mount=type=volume,src=runcvm-disks,dst=/disks --env=RUNCVM_DISKS=/disks/docker,/var/lib/docker,ext4,5G 55 | ``` 56 | 57 | Launch a 3-node Docker Swarm on a network with 9000 MTU and, on the swarm, an http global service: 58 | 59 | ```console 60 | git clone https://github.com/newsnowlabs/runcvm.git && \ 61 | cd runcvm/tests/00-http-docker-swarm && \ 62 | NODES=3 MTU=9000 ./test 63 | ``` 64 | 65 | ### System workloads 66 | 67 | **Docker+Sysbox runtime demo** - Launch Ubuntu running Systemd and Docker with [Sysbox](https://github.com/nestybox/sysbox) runtime; then within it run an Alpine _Sysbox_ container; and, _within that_ install dockerd and run a container from the 'hello-world' image: 68 | 69 | ```console 70 | cat </dev/null & sleep 5; docker run --rm hello-world'" 84 | docker rm -fv ubuntu-docker-sysbox 85 | ``` 86 | 87 | - [Watch on Asciinema](https://asciinema.org/a/630032) 88 | 89 | **Nested RunCVM demo** - Launch Ubuntu running Systemd and Docker with RunCVM runtime installed; then within it run an Alpine _RunCVM_ Container/VM; and, within that install dockerd and, _within that_, run a container from the 'hello-world' image: 90 | 91 | ```console 92 | cat <>/etc/modules 101 | ENTRYPOINT ["/lib/systemd/systemd"] 102 | ENV RUNCVM_DISKS='/disks/docker,/var/lib/docker,ext4,1G' 103 | VOLUME /disks 104 | EOF 105 | docker run -d --runtime=runcvm -m 2g --name=ubuntu-docker-runcvm ubuntu-docker-runcvm 106 | docker exec ubuntu-docker-runcvm bash -c "docker run --rm --runtime=runcvm alpine ash -x -c 'apk add docker; dockerd &>/dev/null & sleep 5; docker run --rm hello-world'" 107 | docker rm -fv ubuntu-docker-runcvm 108 | ``` 109 | 110 | **Docker+GVisor runtime demo** - Launch Ubuntu running Systemd and Docker with GVisor runtime; then within it run the 'hello-world' image in a _GVisor_ container: 111 | 112 | ```console 113 | cat </etc/apt/sources.list.d/gvisor.list && \ 120 | apt update && \ 121 | apt-get install -y runsc 122 | RUN [ ! -f /etc/docker/daemon.json ] && echo '{}' > /etc/docker/daemon.json; cat /etc/docker/daemon.json | jq '.runtimes.runsc.path="/usr/bin/runsc"' | tee /etc/docker/daemon.json 123 | ENTRYPOINT ["/lib/systemd/systemd"] 124 | ENV RUNCVM_DISKS='/disks/docker,/var/lib/docker,ext4,1G' 125 | VOLUME /disks 126 | EOF 127 | docker run -d --runtime=runsc -m 2g --name=ubuntu-docker-gvisor ubuntu-docker-gvisor 128 | docker exec ubuntu-docker-gvisor bash -c "docker run --rm --runtime=runsc hello-world" 129 | docker rm -fv ubuntu-docker-gvisor 130 | ``` 131 | 132 | **Launch [OpenWrt](https://openwrt.org/)** - with port forward to LuCI web UI on port 10080: 133 | 134 | ```console 135 | docker import --change='ENTRYPOINT ["/sbin/init"]' https://archive.openwrt.org/releases/23.05.2/targets/x86/generic/openwrt-23.05.2-x86-generic-rootfs.tar.gz openwrt-23.05.2 && \ 136 | docker network create --subnet 172.128.0.0/24 runcvm-openwrt && \ 137 | echo -e "config interface 'loopback'\n\toption device 'lo'\n\toption proto 'static'\n\toption ipaddr '127.0.0.1'\n\toption netmask '255.0.0.0'\n\nconfig device\n\toption name 'br-lan'\n\toption type 'bridge'\n\tlist ports 'eth0'\n\nconfig interface 'lan'\n\toption device 'br-lan'\n\toption proto 'static'\n\toption ipaddr '172.128.0.5'\n\toption netmask '255.255.255.0'\n\toption gateway '172.128.0.1'\n" >/tmp/runcvm-openwrt-network && \ 138 | docker run -it --rm --runtime=runcvm --name=openwrt --network=runcvm-openwrt --ip=172.128.0.5 -v /tmp/runcvm-openwrt-network:/etc/config/network -p 10080:80 openwrt-23.05.2 139 | ``` 140 | 141 | - [Watch on Asciinema](https://asciinema.org/a/631857) 142 | 143 | ## RunCVM-in-Portainer walk-through 144 | 145 | [![Playing around with RunCVM, a docker runtime plugin](https://i.ytimg.com/vi/OENaWDlCWKg/maxresdefault.jpg)](https://www.youtube.com/watch?v=OENaWDlCWKg "Playing around with RunCVM, a docker runtime plugin") 146 | 147 | ## Motivation 148 | 149 | RunCVM was born out of difficulties experienced using the Docker and Podman CLIs to launch [Kata Containers v2](https://katacontainers.io/), and a belief that launching containerised workloads in VMs using Docker needn't be so complicated. 150 | 151 | > Motivations included: efforts to [re-add OCI CLI commands for docker/podman](https://github.com/kata-containers/kata-containers/issues/722) to Kata v2 to support Docker & Podman; other Kata issues [#3358](https://github.com/kata-containers/kata-containers/issues/3358), [#1123](https://github.com/kata-containers/kata-containers/issues/1123), [#1133](https://github.com/kata-containers/kata-containers/issues/1133), [#3038](https://github.com/kata-containers/runtime/issues/3038); [#5321](https://github.com/kata-containers/runtime/issues/5321); [#6861](https://github.com/kata-containers/runtime/issues/6861); Podman issues [#8579](https://github.com/containers/podman/issues/8579) and [#17070](https://github.com/containers/podman/issues/17070); and Kubernetes issue [#40114](https://github.com/kubernetes/website/issues/40114); though please note, since authoring RunCVM some of these issues may have been resolved. 152 | 153 | Like Kata, RunCVM aims to be a secure container runtime with lightweight virtual machines that feel and perform like containers, but provide stronger workload isolation using hardware virtualisation technology. 154 | 155 | However, while Kata aims to launch standard container images inside a restricted-privileges namespace inside a VM running a single fixed and heavily customised kernel and Linux distribution optimised for this purpose, RunCVM intentionally aims to launch container _or VM_ images as the _VM's root filesystem_ using stock or bespoke Linux kernels, the upshot being RunCVM's can run VM workloads that Kata's security and kernel model would explicitly prevent. 156 | 157 | For example: 158 | - RunCVM can launch system images expecting to interface directly with hardware, like [OpenWRT](https://openwrt.org/) 159 | - RunCVM can launch VMs nested inside a RunCVM VM - i.e. an 'inner' RunCVM Container/VM guest can be launched by Docker running within an 'outer' RunCVM Container/VM guest (assuming the host supports nested VMs) - in this sense, RunCVM is 'reentrant'. 160 | 161 | RunCVM features: 162 | 163 | - Compatible with `docker run` (with experimental support for `podman run`). 164 | - Uses a lightweight 'wrapper-runtime' technology that subverts the behaviour of the standard container runtime `runc` to cause a VM to be launched within the container (making its code footprint and external dependencies extremely small, and its internals extremely simple and easy to understand and tailor for specific purposes). 165 | - Highly portable among Linux distributions and development platforms providing KVM. Can even be installed on [GitHub Codespaces](https://github.com/features/codespaces)! 166 | - Written, using off-the-shelf open-source components, almost entirely in shell script for simplicity, portability and ease of development. 167 | 168 | > RunCVM makes some trade-offs in return for this simplicity. See the full list of [features and limitations](#features-and-limitations). 169 | 170 | ## Contents 171 | 172 | - [Introduction](#introduction) 173 | - [Quick start](#quick-start) 174 | - [Motivation](#motivation) 175 | - [Licence](#licence) 176 | - [Project aims](#project-aims) 177 | - [Project ambitions](#project-ambitions) 178 | - [Applications for RunCVM](#applications-for-runcvm) 179 | - [How RunCVM works](#how-runcvm-works) 180 | - [System requirements](#system-requirements) 181 | - [Installation](#installation) 182 | - [Upgrading](#upgrading) 183 | - [Features and Limitations](#features-and-limitations) 184 | - [RunCVM vs Kata comparison](#runcvm-vs-kata-comparison) 185 | - [Kernel selection](#kernel-selection) 186 | - [Option reference](#option-reference) 187 | - [Advanced usage](#advanced-usage) 188 | - [Developing](#developing) 189 | - [Building](#building) 190 | - [Testing](#testing) 191 | - [Contributing](#contributing) 192 | - [Support](#support) 193 | - [Uninstallation](#uninstallation) 194 | - [Legals](#Legals) 195 | 196 | ## Licence 197 | 198 | RunCVM is free and open-source, licensed under the Apache Licence, Version 2.0. See the [LICENSE](LICENSE) file for details. 199 | 200 | ## Project aims 201 | 202 | - Run any standard container workload in a VM using `docker run` with no need to customise images or the command line (except adding `--runtime=runcvm`) 203 | - Run unusual container workloads, like `dockerd` and `systemd` that will not run in standard container runtimes 204 | - Maintain a similar experience within a RunCVM VM as within a container: process table, network interfaces, stdio, exit code handling should be broadly similar to maximise compatibility 205 | - Container start/stop/kill semantics respected, where possible providing clean VM shutdown on stop 206 | - VM console accessible as one would expect using `docker run -it`, `docker start -ai` and `docker attach` (and so on), generally good support for other `docker container` subcommands 207 | - Efficient container startup, by using virtiofs to serve a container's filesystem directly to a VM (instead of unpacking an image into a backing file) 208 | - Improved security compared to the standard container runtime, and as much security as possible without compromising the simplicity of the implementation 209 | - Command-line and image-embedded options for customising the a container's VM specifications, devices, kernel 210 | - Intelligent kernel selection, according to the distribution used in the image being launched 211 | - No external dependencies, except for Docker/Podman and relevant Linux kernel modules (`kvm` and `tun`) 212 | - Support multiple Docker network interfaces attached to a created (but not yet running) container using `docker run --network=` and `docker network connect` (excluding IPv6) 213 | 214 | ## Project ambitions 215 | 216 | - Support for booting VM with a file-backed disk root fs generated from the container image, instead of only virtiofs root 217 | - Support running foreign-architecture VMs by using QEMU dynamic CPU emulation for the entire VM (instead of the approach used by [https://github.com/multiarch/qemu-user-static](https://github.com/multiarch/qemu-user-static) which uses dynamic CPU emulation for each individual binary) 218 | - Support for QEMU [microvm](https://qemu.readthedocs.io/en/latest/system/i386/microvm.html) or potentially Amazon Firecracker 219 | - More natural console support with independent stdout and stderr channels for `docker run -it` 220 | - Improve VM boot time and other behaviours using custom kernel 221 | - Support for specific hardware e.g. graphics display served via VNC 222 | 223 | ## Applications for RunCVM 224 | 225 | The main applications for RunCVM are: 226 | 227 | 1. Running and testing applications that: 228 | - don't work with (or require enhanced privileges to work with) standard container runtimes (e.g. `systemd`, `dockerd`, Docker swarm services, [Kubernetes](https://kubernetes.io/)) 229 | - require a running kernel, or a kernel version or modules not available on the host 230 | - require specific hardware that can be emulated e.g. disks, graphics displays 231 | 2. Running existing container workloads with increased security 232 | 3. Testing container workloads that are already intended to launch in VM environments, such as on [fly.io](https://fly.io) 233 | 4. Developing any of the above applications in [Dockside](https://dockside.io/) (see [RunCVM and Dockside](#runcvm-and-dockside)) 234 | 235 | ## How RunCVM works 236 | 237 | RunCVM's 'wrapper' runtime, `runcvm-runtime`, receives container create commands triggered by `docker` `run`/`create` commands, modifies the configuration of the requested container in such a way that the created container will launch a VM that boots from the container's filesystem, and then passes the request on to the standard container runtime (`runc`) to actually create and start the container. 238 | 239 | For a deep dive into RunCVM's internals, see the section on [Developing RunCVM](#developing). 240 | 241 | ## System requirements 242 | 243 | RunCVM should run on any amd64 (x86_64) hardware (or VM) running Linux Kernel >= 5.10, and that supports [KVM](https://www.linux-kvm.org/page/Main_Page) and [Docker](https://docker.com). So if your host can already run [KVM](https://www.linux-kvm.org/page/Main_Page) VMs and [Docker](https://docker.com) then it should run RunCVM. 244 | 245 | RunCVM has no other host dependencies, apart from Docker (or experimentally, Podman) and the `kvm` and `tun` kernel modules. RunCVM comes packaged with all binaries and libraries it needs to run (including its own QEMU binary). 246 | 247 | RunCVM is tested on Debian Bullseye and [GitHub Codespaces](https://github.com/codespaces/new?hide_repo_select=true&ref=main&repo=514606231). 248 | 249 | ### rp_filter sysctl settings 250 | 251 | For RunCVM to support Docker DNS within Container/VMs, the following condition on `/proc/sys/net/ipv4/conf/` must be met: 252 | - the max of `all/rp_filter` and `/rp_filter` should be 0 ('No Source Validation') or 2 (Loose mode as defined in RFC3704 Loose Reverse Path) 253 | (where `` is any bridge underpinning a Docker network to which RunCVM Container/VMs will be attached) 254 | 255 | This means that: 256 | - if `all/rp_filter` will be set to 0, then `/rp_filter` must be set to 0 or 2 257 | (or, if `` is not yet or might not yet have been created, then `default/rp_filter` must be set to 0 or 2) 258 | - if `all/rp_filter` will be set to 1, then `/rp_filter` must be set to 2 259 | (or, if `` is not yet or might not yet have been created, then `default/rp_filter` must be set to 2) 260 | - if `all/rp_filter` will be set to 2, then no further action is needed 261 | 262 | At time of writing: 263 | - the Debian default is `0`; 264 | - the Ubuntu default is `2`; 265 | - the Google Cloud Debian image has default `1` and `rp_filter` settings in `/etc/sysctl.d/60-gce-network-security.conf` must be modified or overridden to support RunCVM. 266 | 267 | We recommend `all/rp_filter` be set to 2, as this is the simplest change and provides a good balance of security. 268 | 269 | ## Installation 270 | 271 | Run: 272 | 273 | ```sh 274 | curl -s -o - https://raw.githubusercontent.com/newsnowlabs/runcvm/main/runcvm-scripts/runcvm-install-runtime.sh | sudo sh 275 | ``` 276 | 277 | This will: 278 | - Install the RunCVM software package to `/opt/runcvm` (installation elsewhere is currently unsupported) 279 | - For Docker support: 280 | - Enable the RunCVM runtime, by patching `/etc/docker/daemon.json` to add `runcvm` to the `runtimes` property 281 | - Restart `dockerd`, if it can be detected how for your system (e.g. `systemctl restart docker`) 282 | - Verify that RunCVM is recognised via `docker info` 283 | - For Podman support (experimental) 284 | - Display instructions on patching `/etc/containers/containers.conf` 285 | - Check your system network device `rp_filter` settings, and amend them if necessary 286 | 287 | Following installation, launch a basic test RunCVM Container/VM: 288 | 289 | ```console 290 | docker run --runtime=runcvm --rm -it hello-world 291 | ``` 292 | 293 | ### Install on Google Cloud 294 | 295 | Create an image that will allow instances to have VMX capability: 296 | 297 | ```console 298 | gcloud compute images create debian-12-vmx --source-image-project=debian-cloud --source-image-family=debian-12 --licenses="https://compute.googleapis.com/compute/v1/projects/vm-options/global/licenses/enable-vmx" 299 | ``` 300 | 301 | Now launch a VM, install Docker and RunCVM: 302 | 303 | ```console 304 | cat >/tmp/startup-script.sh </etc/docker/daemon.json && \ 310 | curl -fsSL https://get.docker.com | bash && \ 311 | curl -s -o - https://raw.githubusercontent.com/newsnowlabs/runcvm/main/runcvm-scripts/runcvm-install-runtime.sh | sudo REPO=newsnowlabs/runcvm:latest sh 312 | EOF 313 | 314 | gcloud compute instances create runcvm-vmx-test --zone=us-central1-a --machine-type=n2-highmem-2 --network-interface=network-tier=PREMIUM,stack-type=IPV4_ONLY,subnet=default --metadata-from-file=startup-script=/tmp/startup-script.sh --no-restart-on-failure --maintenance-policy=TERMINATE --provisioning-model=SPOT --instance-termination-action=STOP --no-service-account --no-scopes --create-disk=auto-delete=yes,boot=yes,image=debian-12-vmx,mode=rw,size=50,type=pd-ssd --no-shielded-secure-boot --shielded-vtpm --shielded-integrity-monitoring --labels=goog-ec-src=vm_add-gcloud --reservation-affinity=any 315 | ``` 316 | 317 | ## Upgrading 318 | 319 | To upgrade, follow this procedure: 320 | 321 | 1. Stop all RunCVM containers. 322 | 2. Run `/opt/runcvm/scripts/runcvm-install-runtime.sh` (or rerun the installation command - which runs the same script) 323 | 3. Start any RunCVM containers. 324 | 325 | ## Features and limitations 326 | 327 | In the below summary of RunCVM's current main features and limitations, [+] is used to indicate an area of compatibility with standard container runtimes and [-] is used indicate a feature of standard container runtimes that is unsupported. 328 | 329 | > N.B. `docker run` and `docker exec` options not listed below are unsupported and their effect, if used, is unspecified. 330 | 331 | - `docker run` 332 | - Mounts 333 | - [+] `--mount` (or `-v`) is supported for volume mounts, tmpfs mounts, and host file and directory bind-mounts (the `dst` mount path `/disks` is reserved) 334 | - [-] Bind-mounting host sockets or devices, and `--device` is unsupported 335 | - Networking 336 | - [+] The default bridge network is supported 337 | - [+] Custom/user-defined networks specified using `--network` are supported, including Docker DNS resolution of container names and respect for custom network MTU 338 | - [+] Multiple network interfaces - when attached via `docker run --network` or `docker network connect` (but only to a created and not yet running container) - are supported (including `scope=overlay` networks and those with multiple subnets) 339 | - [+] `--publish` (or `-p`) is supported 340 | - [+] `--dns`, `--dns-option`, `--dns-search` are supported 341 | - [+] `--ip` is supported 342 | - [+] `--hostname` (or `-h`) is supported 343 | - [-] `docker network connect` on a running container is not supported 344 | - [-] `--network=host` and `--network=container:name|id` are not supported 345 | - [-] IPv6 is not supported 346 | - Execution environment 347 | - [+] `--user` (or `-u`) is supported 348 | - [?] `--workdir` (or `-w`) is supported 349 | - [+] `--env` (or `-e`), `--env-file` is supported 350 | - [+] `--entrypoint` is supported 351 | - [+] `--init` - is supported (but runs RunCVM's own VM init process rather than Docker's default, `tini`) 352 | - stdio/Terminals 353 | - [+] `--detach` (or `-d`) is supported 354 | - [+] `--interactive` (or `-i`) is supported 355 | - [+] `--tty` (or `-t`) is supported (but to enter CTRL-T one must press CTRL-T twice) 356 | - [+] `--attach` (or `-a`) is supported 357 | - [+] Stdout and Stderr output should be broadly similar to running the same workload in a standard `runc` container 358 | - [-] Stdout and Stderr are not independently multiplexed so `docker run --runtime=runcvm debian bash -c 'echo stdout; echo stderr >&2' >/tmp/stdout 2>/tmp/stderr` does not produce the expected result 359 | - [-] Stdout and Stderr sent very soon after VM launch might be corrupted due to serial console issues 360 | - [-] Stdout and Stderr sent immediately before VM shutdown might not always be fully flushed 361 | - Resource allocation and limits 362 | - [+] `--cpus` is supported to specify number of VM CPUs 363 | - [+] `--memory` (or `-m`) is supported to specify VM memory 364 | - [-] Other container resource limit options such as (`--cpu-*`), block IO (`--blkio-*`), kernel memory (`--kernel-memory`) are unsupported or untested 365 | - Exit code 366 | - [+] Returning the entrypoint's exit code is supported, but it currently requires application support 367 | - [-] To return an exit code, your entrypoint may either write its exit code to `/.runcvm/exit-code` (supported exit codes 0-255) or call `/opt/runcvm/sbin/qemu-exit

` (supported exit codes 0-127). Automatic handling of exit codes from the entrypoint will be provided in a later version.
368 |    - Disk performance
369 |       - [+] No mountpoints are required for basic operation for most applications. Volume or disk mountpoints may be needed for running `dockerd` or to improve disk performance
370 |       - [-] `dockerd` mileage will vary unless a volume or disk is mounted over `/var/lib/docker`
371 | - `docker exec`
372 |    - [+] `--user` (or `-u`), `--workdir` (or `-w`), `--env` (or `-e`), `--env-file`, `--detach` (or `-d`), `--interactive` (or `-i`) and `--tty` (or `-t`) are all supported
373 |    - [+] Stdout and Stderr _are_ independently multiplexed so `docker exec  bash -c 'echo stdout; echo stderr >&2' >/tmp/stdout 2>/tmp/stderr` _does_ produce the expected result
374 | - Security
375 |    - The RunCVM software package at `/opt/runcvm` is mounted read-only within RunCVM containers. Container applications cannot compromise RunCVM, but they can execute binaries from within the RunCVM package. The set of binaries available to the VM may be reduced to a minimum in a later version.
376 | - Kernels
377 |    - [+] Use any kernel, either one pre-packaged with RunCVM or roll your own
378 |    - [+] RunCVM will try to select an appropriate kernel to use based on examination of `/etc/os-release` within the image being launched.
379 | 
380 | ## RunCVM vs Kata comparison
381 | 
382 | This table provides a high-level comparison of RunCVM and Kata across various features like kernels, networking/DNS, memory allocation, namespace handling, method of operation, and performance characteristics:
383 | 
384 | | Feature | RunCVM | Kata |
385 | |---------|--------|------|
386 | | **Methodology** | Boots VM from distribution kernels with container's filesystem directly mounted as root filesystem, using virtiofs. VM setup code and kernel modules are bind-mounted into the container. VM's PID1 runs setup code to reproduce the container's networking environment within the VM before executing the container's original entrypoint. | Boots VM from custom kernel with custom root disk image, mounts the virtiofsd-shared host container filesystem to a target folder and executes the container's entrypoint within a restricted namespace having chrooted to that folder. |
387 | | **Privileges/restrictions** | Container code has full root access to VM and its devices. It may run anything that runs in a VM, mounting filesystems, installing kernel modules, accessing devices. RunCVM helper processes are visible to `ps` etc. | Runs container code inside a VM namespace with restricted privileges. Use of mounts, kernel modules is restricted. Kata helper processes (like kata-agent and chronyd) are invisible to `ps`.|
388 | | **Kernels** | Launches stock Alpine, Debian, Ubuntu kernels. Kernel `/lib/modules` automatically mounted within VM. Install any needed modules without host reconfiguration. | Launches custom kernels. Kernel modules aren't mounted and need host reconfiguration to be installed. |
389 | | **Networking/DNS** | Docker container networking + internal/external DNS out-of-the-box. No support for `docker network connect/disconnect` | DNS issues presented: with custom network, external ping works, but DNS lookups fail both for internal docker hosts and external hosts.[^1] |
390 | | **Memory** | VM assigned and reports total memory as per `--memory ` | VM total memory reported by `free` appears unrelated to `--memory ` specified [^2] |
391 | | **CPUs** | VM assigned and reports CPUs as per `--cpus ` | CPUs must be hardcoded in Kata host config |
392 | | **Performance** | | Custom kernel optimisations may deliver improved startup (~3.2s) or operational performance (~15%) |
393 | | **virtiofsd** | Runs `virtiofsd` in container namespace | Unknown |
394 | 
395 | [^1]: `docker network create --scope=local testnet >/dev/null && docker run --name=test --rm --runtime=kata --network=testnet --entrypoint=/bin/ash alpine -c 'for n in test google.com 8.8.8.8; do echo "ping $n ..."; ping -q -c 8 -i 0.5 $n; done'; docker network rm testnet >/dev/null` succeeds on `runc` and `runcvm` but at time of writing (2023-12-31) the DNS lookups needed fail on `kata`.
396 |     ```
397 |     $ docker network create --scope=local testnet >/dev/null && docker run --name=test --rm -it --runtime=kata --network=testnet --entrypoint=/bin/ash alpine -c 'for n in test google.com 8.8.8.8; do echo "ping $n ..."; ping -q -c 8 -i 0.5 $n; done'; docker network rm testnet >/dev/null
398 |     ping test ...
399 |     ping: bad address 'test'
400 |     ping google.com ...
401 |     ping: bad address 'google.com'
402 |     ping 8.8.8.8 ...
403 |     PING 8.8.8.8 (8.8.8.8): 56 data bytes
404 | 
405 |     --- 8.8.8.8 ping statistics ---
406 |     8 packets transmitted, 8 packets received, 0% packet loss
407 |     round-trip min/avg/max = 0.911/1.716/3.123 ms
408 |     
409 |     $ docker network create --scope=local testnet >/dev/null && docker run --name=test --rm -it --runtime=runcvm --network=testnet --entrypoint=/bin/ash alpine -c 'for n in test google.com 8.8.8.8; do echo "ping $n ..."; ping -q -c 8 -i 0.5 $n; done'; docker network rm testnet >/dev/null
410 |     ping test ...
411 |     PING test (172.25.8.2): 56 data bytes
412 | 
413 |     --- test ping statistics ---
414 |     8 packets transmitted, 8 packets received, 0% packet loss
415 |     round-trip min/avg/max = 0.033/0.085/0.137 ms
416 |     ping google.com ...
417 |     PING google.com (172.217.16.238): 56 data bytes
418 | 
419 |     --- google.com ping statistics ---
420 |     8 packets transmitted, 8 packets received, 0% packet loss
421 |     round-trip min/avg/max = 8.221/8.398/9.017 ms
422 |     ping 8.8.8.8 ...
423 |     PING 8.8.8.8 (8.8.8.8): 56 data bytes
424 | 
425 |     --- 8.8.8.8 ping statistics ---
426 |     8 packets transmitted, 8 packets received, 0% packet loss
427 |     round-trip min/avg/max = 1.074/1.491/1.801 ms
428 |     ```
429 | 
430 | [^2]: `docker run --rm -it --runtime=kata --entrypoint=/bin/ash -m 500m alpine -c 'free -h; df -h /dev/shm'`
431 |     ```
432 |     $ docker run --rm --runtime=kata --name=test -m 2g --env=RUNCVM_KERNEL_DEBUG=1 -it alpine ash -c 'free -h'
433 |                 total        used        free      shared  buff/cache   available
434 |     Mem:           3.9G       94.4M        3.8G           0        3.7M        3.8G
435 |     Swap:             0           0           0
436 |     $ docker run --rm --runtime=kata --name=test -m 3g --env=RUNCVM_KERNEL_DEBUG=1 -it alpine ash -c 'free -h'
437 |                 total        used        free      shared  buff/cache   available
438 |     Mem:           4.9G      107.0M        4.8G           0        3.9M        4.8G
439 |     Swap:             0           0           0
440 |     $ docker run --rm --runtime=kata --name=test -m 0g --env=RUNCVM_KERNEL_DEBUG=1 -it alpine ash -c 'free -h'
441 |                 total        used        free      shared  buff/cache   available
442 |     Mem:           1.9G       58.8M        1.9G           0        3.4M        1.9G
443 |     Swap:             0           0           0
444 |     ```
445 | 
446 | ## Kernel auto-detection
447 | 
448 | When creating a container, RunCVM will examine the image being launched to try to determine a suitable kernel to boot the VM with. Its process is as follows:
449 | 
450 | 1. If `--env=RUNCVM_KERNEL=[/]` specified, use the indicated kernel
451 | 2. Otherwise, identify distro from `/etc/os-release`
452 |    1. If one is found in the appropriate distro-specific location in the image, select an in-image kernel. The locations are:
453 |       - Debian: `/vmlinuz` and `/initrd.img`
454 |       - Ubuntu: `/boot/vmlinuz` and `/boot/initrd.img`
455 |       - Alpine: `/boot/vmlinuz-virt` `/boot/initramfs-virt`
456 |    2. Otherwise, if found in the RunCVM package, select the latest kernel compatible with the distro
457 |    3. Finally, use the Debian kernel from the RunCVM package
458 | 
459 | ## Option reference
460 | 
461 | RunCVM options are specified either via standard `docker run` options or via  `--env==` options on the `docker run`
462 | command line. The following env options are user-configurable:
463 | 
464 | ### `--env=RUNCVM_KERNEL=[/]`
465 | 
466 | Specify with which RunCVM kernel (from `/opt/runcvm/kernels`) to boot the VM. Values must be of the form `/`, where `` is a directory under `/opt/runcvm/kernels` and `` is a subdirectory (or symlink to a subdirectory) under that. If `` is omitted, `latest` will be assumed. Here is an example command that will list available values of `/` on your installation.
467 | 
468 | ```console
469 | $ find /opt/runcvm/kernels/ -maxdepth 2 | sed 's!^/opt/runcvm/kernels/!!; /^$/d'
470 | debian
471 | debian/latest
472 | debian/5.10.0-16-amd64
473 | alpine
474 | alpine/latest
475 | alpine/5.15.59-0-virt
476 | ubuntu
477 | ubuntu/latest
478 | ubuntu/5.15.0-43-generic
479 | ol
480 | ol/5.14.0-70.22.1.0.1.el9_0.x86_64
481 | ol/latest
482 | ```
483 | 
484 | Example:
485 | 
486 | ```console
487 | docker run --rm --runtime=runcvm --env=RUNCVM_KERNEL=ol hello-world
488 | ```
489 | 
490 | ### `--env=RUNCVM_KERNEL_APPEND=1`
491 | 
492 | Any custom kernel command line options e.g. `apparmor=0` or `systemd.unified_cgroup_hierarchy=0`.
493 | 
494 | ### `--env='RUNCVM_DISKS=[;;...]'`
495 | 
496 | Automatically create, format, prepopulate and mount backing files as virtual disks on the VM.
497 | 
498 | Each `` should be a comma-separated list of values of the form: `,,[,]`.
499 | 
500 | - `` is the path _within the container_ where the virtual disk backing file should be located. This may be in the container's overlayfs or within a volume (mounted using `--mount=type=volume`).
501 | - `` is both (a) the path within the VM where the virtual disk should be mounted; and (b) the location of the directory with which contents the disk should be prepopulated.
502 | - `` is the filesystem with which the backing disk should be formatted when first created.
503 | - `` is the size of the backing file (in `truncate` format), and must be specified if `` does not exist.
504 | 
505 | When first created, the backing file will be created as a sparse file to the specified `` and formatted with the specified `` using `mke2fs` and prepopulated with any files preexisting at ``.
506 | 
507 | When RunCVM creates a Container/VM, fstab entries will be drafted. After the VM boots, the fstab entries will be mounted. Typically, the first disk will be mounted as `/dev/vda`, the second as `/dev/vdb`, and so on.
508 | 
509 | #### Example #1
510 | 
511 | ```console
512 | docker run -it --runtime=runcvm --env=RUNCVM_DISKS=/disk1,/home,ext4,5G 
513 | ```
514 | 
515 | In this example, RunCVM will check for existence of a file at `/disk1` within ``, and if not found create a 5G backing file (in the container's filesystem, typically overlay2) with an ext4 filesystem prepopulated with any preexisting contents of `/home`, then add the disk to `/etc/fstab` and mount it within the VM at `/home`.
516 | 
517 | #### Example #2
518 | 
519 | ```console
520 | docker run -it --runtime=runcvm --mount=type=volume,src=runcvm-disks,dst=/disks --env='RUNCVM_DISKS=/disks/disk1,/home,ext4,5G;/disks/disk2,/opt,ext4,2G' 
521 | ```
522 | 
523 | This example behaves similarly, except that the `runcvm-disks` persistent Docker volume is first mounted at `/disks` within the container's filesystem, and therefore the backing files at `/disks/disk1` and `/disks/disk2` (mounted in the VM at `/home` and `/opt` respectively) are stored in the _persistent volume_ (typically stored in `/var/lib/docker` on the host, bypassing overlay2).
524 | 
525 | > N.B. `/disks` and any paths below it are _reserved mountpoints_. Unlike other mountpoints, these are *NOT* mounted into the VM but only into the container, and are therefore suitable for use for mounting VM disks from bscking files that cannot be accessed within the VM's filesystem.
526 | 
527 | ### `--env=RUNCVM_QEMU_DISPLAY=`
528 | 
529 | Select a specific QEMU display. Currently only `curses` is supported, but others may trivially be added by customising the build.
530 | 
531 | ### `--env=RUNCVM_SYS_ADMIN=1`
532 | 
533 | By default, `virtiofsd` is not launched with `-o modcaps=+sys_admin` (and containers are not granted `CAP_SYS_ADMIN`). Use this option if you need to change this.
534 | 
535 | ### `--env=RUNCVM_KERNEL_MOUNT_LIB_MODULES=1`
536 | 
537 | If a RunCVM kernel (as opposed to an in-image kernel) is chosen to launch a VM, by default that kernel's modules will be mounted at `/lib/modules/` in the VM. If this variables is set, that kernel's modules will instead be mounted over `/lib/modules`.
538 | 
539 | ### `--env=RUNCVM_KERNEL_DEBUG=1`
540 | 
541 | Enable kernel logging (sets kernel `console=ttyS0`).
542 | 
543 | ### `--env=RUNCVM_BIOS_DEBUG=1`
544 | 
545 | By default BIOS console output is hidden. Enable it with this option.
546 | 
547 | ### `--env=RUNCVM_RUNTIME_DEBUG=1`
548 | 
549 | Enable debug logging for the runtime (the portion of RunCVM directly invoked by `docker run`, `docker exec` etc).
550 | Debug logs are written to files in `/tmp`.
551 | 
552 | ### `--env=RUNCVM_BREAK=`
553 | 
554 | Enable breakpoints (falling to bash shell) during the RunCVM Container/VM boot process.
555 | 
556 | `` must be a comma-separated list of: `prenet`, `postnet`, `preqemu`.
557 | 
558 | ### `--env=RUNCVM_HUGETLB=1`
559 | 
560 | **[EXPERIMENTAL]** Enable use of preallocated hugetlb memory backend, which can improve performance in some scenarios.
561 | 
562 | ### `--env=RUNCVM_CGROUPFS=`
563 | 
564 | Configures cgroupfs mountpoints in the VM, which may be needed to run applications like Docker if systemd is not running. Acceptable values are:
565 | 
566 | - `none`/`systemd` - do nothing; leave to the application or to systemd (if running)
567 | - `1`/`cgroup1` - mount only cgroup v1 filesystems supported by the running kernel to subdirectories of `/sys/fs/cgroup`
568 | - `2`/`cgroup2` - mount only cgroup v2 filesystem to `/sys/fs/cgroup`
569 | - `hybrid`/`mixed` - mount cgroup v1 filesystems and mount cgroup v2 filesystem to `/sys/fs/cgroup/unified`
570 | 
571 | Please note that if `RUNCVM_CGROUPFS` is left undefined or set to an empty string, then RunCVM selects an appropriate
572 | default behaviour according to these rules:
573 | 
574 | - If specified entrypoint (or, if a symlink, its target) matches the regex `/systemd$` then assume a default value of `none`;
575 | - Else, assume a default value of `hybrid`.
576 | 
577 | These rules work well in the cases of running Docker in (a) stock Alpine/Debian/Ubuntu distributions in which Docker has been installed but Systemd is not running; and (b) distributions in which Systemd is running. Of course you should set `RUNCVM_CGROUPFS` if you need to override the default behaviour.
578 | 
579 | Please also note that in the case your distribution is running Systemd you may instead set `--env=RUNCVM_KERNEL_APPEND='systemd.unified_cgroup_hierarchy='` (where `` is `0` or `1`) to request Systemd to create either hybrid or cgroup2-only cgroup filesystem(s) itself.
580 | 
581 | ## Advanced usage
582 | 
583 | ### Running Docker in a RunCVM Container/VM
584 | 
585 | #### ext4 disk backing file mounted at `/var/lib/docker`
586 | 
587 | If running Docker within a VM, it is recommended that you mount a disk backing file at `/var/lib/docker` to allow `dockerd` to use the preferred overlay filesystem and avoid it opting to use the extremely sub-performant `vfs` storage driver.
588 | 
589 | e.g. To launch a VM with a 1G ext4-formatted backing file, stored in the underlying container's overlay filesystem, and mounted at `/var/lib/docker`, run:
590 | 
591 | ```sh
592 | docker run -it --runtime=runcvm --env=RUNCVM_DISKS=/disks/docker,/var/lib/docker,ext4,1G 
593 | ```
594 | 
595 | To launch a VM with a 5G ext4-formatted backing file, stored in a dedicated Docker volume on the host, and mounted at `/var/lib/docker`, run:
596 | 
597 | ```sh
598 | docker run -it --runtime=runcvm --mount=type=volume,src=runcvm-disks,dst=/disks --env=RUNCVM_DISKS=/disks/docker,/var/lib/docker,ext4,5G 
599 | ```
600 | 
601 | In both cases, RunCVM will check for existence of a file `/disks/docker` and, if not found, will create the disk backing file of the given size and format as an ext4 filesystem. It will add the disk to `/etc/fstab`.
602 | 
603 | For full documentation of `RUNCVM_DISKS`, see above.
604 | 
605 | #### Docker volume mounted at `/var/lib/docker` (NOT RECOMMENDED)
606 | 
607 | Doing this is _not recommended_, but if running Docker within a VM, you can enable `dockerd` to use the overlay filesystem (at the cost of security) by launching with `--env=RUNCVM_SYS_ADMIN=1`. e.g. 
608 | 
609 | ```sh
610 | docker run --runtime=runcvm --mount=type=volume,src=mydocker1,dst=/var/lib/docker --env=RUNCVM_SYS_ADMIN=1 
611 | ```
612 | 
613 | > N.B. This option adds `CAP_SYS_ADMIN` capabilities to the container and then launches `virtiofsd` with `-o modcaps=+sys_admin`. 
614 | 
615 | ## Developing
616 | 
617 | The following deep dive should help explain the inner workings of RunCVM, and which files to modify to implement fixes, improvements and extensions.
618 | 
619 | ### runcvm-runtime
620 | 
621 | RunCVM's 'wrapper' runtime, `runcvm-runtime`, intercepts container `create` and `exec` commands and their specifications in JSON format (`config.json` and `process.json` respectively) that are normally provided (by `docker` `run`/`create` and `docker exec` respectively) to a standard container runtime like `runc`.
622 | 
623 | The JSON file is parsed to retrieve properties of the command, and is modified to allow RunCVM to piggyback by overriding the originally intended behaviour with new behaviour.
624 | 
625 | The modifications to `create` are designed to make the created container launch a VM that boots off the container's filesystem, served using `virtiofsd`.
626 | 
627 | The modifications to `exec` are designed to run commands within the VM instead of the container.
628 | 
629 | #### `runcvm-runtime` - `create` command
630 | 
631 | In more detail, the RunCVM runtime `create` process:
632 | - Modifies the `config.json` file to:
633 |    - Modify the container's entrypoint, to prepend `runcvm-ctr-entrypoint` to the container's original entrypoint and if an `--init` argument was detected, remove any init process and set the container env var `RUNCVM_INIT` to `1`
634 |    - Set the container env var `RUNCVM_UIDGID` to `::` as intended for the container, then resets both the `` and `` to `0`.
635 |    - Set the container env var `RUNCVM_CPUS` to the intended `--cpus` count so it can be passed to the VM
636 |    - Extract and delete all requested tmpfs mounts (these will be independently mounted by the VM).
637 |    - Add a bind mount from `/` to `/vm` that will recursively mount the following preceding mounts:
638 |       - A bind mount from `/opt/runcvm` on the host to `/opt/runcvm` in the container.
639 |       - A tmpfs mounted at `/.runcvm`
640 |    - Add a tmpfs at `/run` in the container only.
641 |    - Map all requested bind mounts from their original mountpoint `` to `/vm/` (except where `` is at or below `/disks`).
642 |    - Determine a suitable VM launch kernel by looking for one inside the container's image, choosing a stock RunCVM kernel matching the image, or according to an env var argument.
643 |       - Add a bind mount to `/vm/lib/modules/` for the kernel's modules
644 |       - Set container env vars `RUNCVM_KERNEL_PATH`, `RUNCVM_KERNEL_INITRAMFS_PATH` and `RUNCVM_KERNEL_ROOT`
645 |    - Add device mounts for `/dev/kvm` and `/dev/net/tun`.
646 |    - Set the seccomp profile to 'unconfined'.
647 |    - Set `/dev/shm` to the size desired for the VM's memory and set container env var accordingly.
648 |    - Add necessary capabilities, if not already present (`NET_ADMIN`, `NET_RAW`, `MKNOD`, `AUDIT_WRITE`).
649 |    - Only if requested by `--env=SYS_ADMIN=1`, add the `SYS_ADMIN` capability.
650 | - Executes the standard container runtime `runc` with the modified `config.json`.
651 | 
652 | The `runcvm-ctr-entrypoint`:
653 | - Is always launched as PID1 within the standard Docker container.
654 | - Saves the container's originally-intended entrypoint and command line, environment variables and network configuration to files inside `/.runcvm`.
655 | - Creates a bridge (acting as a hub) for each container network interface, to join that interface to a VM tap network interface.
656 | - Launches `virtiofsd` to serve the container's root filesystem.
657 | - Configures `/etc/resolv.conf` in the container.
658 | - Adds container firewall rules, launches `dnsmasq` and modifies `/vm/etc/resolv.conf` to proxy DNS requests from the VM to Docker's DNS.
659 | - Execs RunCVM's own `runcvm-init` init process to supervise `runcvm-ctr-qemu` to launch the VM.
660 | 
661 | The `runcvm-init` process:
662 | - Is RunCVM's custom init process, that takes over as PID1 within the container, supervising `runcvm-ctr-qemu` to launch the VM.
663 | - Waits for a TERM signal. On receiving one, it spawns `runcvm-ctr-shutdown`, which cycles through a number of methods to try to shut down the VM cleanly.
664 | - Waits for its child (QEMU) to exit. When it does, execs `runcvm-ctr-exit` to retrieve any saved exit code (written by the application to `/.runcvm/exit-code`) and exit with this code.
665 | 
666 | The `runcvm-ctr-qemu` script:
667 | - Prepares disk backing files as specified by `--env=RUNCVM_DISKS=`
668 | - Prepares network configuration as saved from the container (modifying the MAC address of each container interface)
669 | - Launches [QEMU](https://www.qemu.org/) with the required kernel, network interfaces, disks, display, and with a root filesystem mounted via virtiofs from the container and with `runcvm-vm-init` as the VM's init process.
670 | 
671 | The `runcvm-vm-init` process:
672 | - Runs as PID1 within the VM.
673 | - Retrieves the container configuration - network, environment, disk and tmpfs mounts - saved by `runcvm-ctr-entrypoint` to `/.runcvm`, and reproduces it within the VM
674 | - Launches the container's pre-existing entrypoint, in one of two ways.
675 |    1. If `RUNCVM_INIT` is `1` (i.e. the container was originally intended to be launched with Docker's own init process) then it configures and execs busybox `init`, which becomes the VM's PID1, to supervise `dropbear`, run `runcvm-vm-start` and `poweroff` the VM if signalled to do so.
676 |    2. Else, it backgrounds `dropbear`, then execs (via `runcvm-init`, purely to create a controlling tty) `runcvm-vm-start`, which runs as the VM's PID1.
677 | 
678 | The `runcvm-vm-start` script:
679 | - Restores the container's originally-intended environment variables, ``, ``, `` and ``, and execs that entrypoint.
680 | 
681 | #### `runcvm-runtime` - `exec` command
682 | 
683 | The RunCVM runtime `exec` process:
684 | 
685 | - Modifies the `process.json` file to:
686 |    - Retrieve the intended ``, ``, ``, `` and `` for the command, as well as  indicating the existence of a HOME environment variable.
687 |    - Resets both the `` and `` to `0` and the `` to `/`.
688 |    - Prepend `runcvm-ctr-exec '::' '' '' ''` to the originally intended command.
689 | - Executes the standard container runtime `runc` with the modified `process.json`.
690 | 
691 | The `runcvm-ctr-exec` script:
692 | - Uses the Dropbear `dbclient` SSH client to execute the intended command, with the intended arguments within the VM, via the `runcvm-vm-exec` process, propagate the returned stdout and stderr and return the command's exit code.
693 | 
694 | ## Building
695 | 
696 | Building RunCVM requires Docker. To build RunCVM, first clone the repo, then run the build script, as follows:
697 | 
698 | ```console
699 | cd runcvm
700 | ./build/build.sh
701 | ```
702 | 
703 | The build script creates a Docker image named `newsnowlabs/runcvm:latest`.
704 | 
705 | Now follow the main [installation instructions](#installation) to install your built RunCVM from the Docker image.
706 | 
707 | ## Testing
708 | 
709 | Test RunCVM using nested RunCVM. You can do this using a Docker image capable of installing RunCVM, or an image built with a version of RunCVM preinstalled.
710 | 
711 | Build a suitable image as follows:
712 | 
713 | ```sh
714 | cat <>/etc/modules && \
724 |     useradd --create-home --shell /bin/bash --groups sudo,docker runcvm && \
725 |     echo runcvm:runcvm | chpasswd && \
726 |     echo 'runcvm ALL=(ALL) NOPASSWD: ALL' >/etc/sudoers.d/runcvm
727 | 
728 | WORKDIR /home/runcvm
729 | ENTRYPOINT ["/lib/systemd/systemd"]
730 | VOLUME /disks
731 | 
732 | # Mount formatted backing files at:
733 | # - /var/lib/docker for speed and overlay2 support
734 | # - /opt/runcvm to avoid nested virtiofs, which works, but can't be great for speed
735 | ENV RUNCVM_DISKS='/disks/docker,/var/lib/docker,ext4,2G;/disks/runcvm,/opt/runcvm,ext4,2G'
736 | 
737 | # # Uncomment this block to preinstall RunCVM from the specified image
738 | #
739 | # COPY --from=newsnowlabs/runcvm:latest /opt /opt/
740 | # RUN rm -f /etc/init.d/docker && \
741 | #     bash /opt/runcvm/scripts/runcvm-install-runtime.sh --no-dockerd
742 | EOF
743 | ```
744 | 
745 | (Uncomment the final block to build an image with RunCVM preinstalled, or leave the block commented to test RunCVM installation).
746 | 
747 | To launch, run:
748 | 
749 | ```sh
750 | docker run -d --runtime=runcvm -m 2g --name=ubuntu-docker-runcvm ubuntu-docker-runcvm
751 | ```
752 | 
753 | > Optionally modify this `docker run` command by:
754 | > - adding `--rm` - to automatically remove the container after systemd shutdown
755 | > - removing `-d` and adding `--env=RUNCVM_KERNEL_DEBUG=1` - to see kernel and systemd boot logs
756 | > - removing `-d` and adding `-it` - to provide a console
757 | 
758 | Then `docker exec -it -u runcvm ubuntu-docker-runcvm bash` to obtain a command prompt and perform testing.
759 | 
760 | Run `docker rm -fv ubuntu-docker-runcvm` to clean up after testing.
761 | 
762 | ## Support
763 | 
764 | **Support launching images:** If you encounter any Docker image that launches in a standard container runtime that does not launch in RunCVM, or launches but with unexpected behaviour, please [raise an issue](https://github.com/newsnowlabs/runcvm/issues) titled _Launch failure for image ``_ or _Unexpected behaviour for image ``_ and include log excerpts and an explanation of the failure, or expected and unexpected behaviour.
765 | 
766 | **For all other issues:** please still [raise an issue](https://github.com/newsnowlabs/runcvm/issues)
767 | 
768 | You can also reach out to us on the [NewsNow Labs Slack Workspace](https://join.slack.com/t/newsnowlabs/shared_invite/zt-wp54l05w-0DTxuc_n8uISJRtks3Xw3A).
769 | 
770 | We are typically available to respond to queries Monday-Friday, 9am-5pm UK time, and will be happy to help.
771 | 
772 | ## Contributing
773 | 
774 | If you would like to contribute a feature suggestion or code, please raise an issue or submit a pull request.
775 | 
776 | ## Uninstallation
777 | 
778 | Shut down any RunCVM containers.
779 | 
780 | Then run `sudo rm -f /opt/runcvm`.
781 | 
782 | ## RunCVM and Dockside
783 | 
784 | RunCVM and [Dockside](https://dockside.io/) are designed to work together in two alternative ways.
785 | 
786 | 1. Dockside can be used to launch devtainers (development environments) in RunCVM VMs, allowing you to provision containerised online IDEs for developing applications like `dockerd`, Docker swarm, `systemd`, applications that require a running kernel, or kernel modules not available on the host, or specific hardware e.g. a graphics display. Follow the instructions for adding a runtime to your [Dockside profiles](https://github.com/newsnowlabs/dockside/blob/main/docs/setup.md#profiles).
787 | 2. Dockside can itself be launched inside a RunCVM VM with its own `dockerd` to provide increased security and compartmentalisation from a host. e.g.
788 | 
789 | ```
790 | docker run --rm -it --runtime=runcvm  --memory=2g --name=docksidevm -p 443:443 -p 80:80 --mount=type=volume,src=dockside-data,dst=/data --mount=type=volume,src=dockside-disks,dst=/disks --env=RUNCVM_DISKS=/disks/disk1,/var/lib/docker,ext4,5G newsnowlabs/dockside --run-dockerd --ssl-builtin
791 | ```
792 | 
793 | ## Legals
794 | 
795 | This project (known as "RunCVM"), comprising the files in this Git repository
796 | (but excluding files containing a conflicting copyright notice and licence),
797 | is copyright 2023 NewsNow Publishing Limited, Struan Bartlett, and contributors.
798 | 
799 | RunCVM is an open-source project licensed under the Apache License, Version 2.0
800 | (the "License"); you may not use RunCVM or its constituent files except in
801 | compliance with the License.
802 | 
803 | You may obtain a copy of the License at [http://www.apache.org/licenses/LICENSE-2.0](http://www.apache.org/licenses/LICENSE-2.0).
804 | 
805 | Unless required by applicable law or agreed to in writing, software
806 | distributed under the License is distributed on an "AS IS" BASIS,
807 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
808 | See the License for the specific language governing permissions and
809 | limitations under the License.
810 | 
811 | > N.B. In order to run, RunCVM relies upon other third-party open-source software dependencies that are separate to and independent from RunCVM and published under their own independent licences.
812 | >
813 | > RunCVM Docker images made available at [https://hub.docker.com/repository/docker/newsnowlabs/runcvm](https://hub.docker.com/repository/docker/newsnowlabs/runcvm) are distributions
814 | > designed to run RunCVM that comprise: (a) the RunCVM project source and/or object code; and
815 | > (b) third-party dependencies that RunCVM needs to run; and which are each distributed under the terms
816 | > of their respective licences.
817 | 


--------------------------------------------------------------------------------
/build-utils/entrypoint-install.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | MNT=/runcvm
 4 | REPO=newsnowlabs/runcvm
 5 | 
 6 | while [ -n "$1" ];
 7 | do
 8 |   case "$1" in
 9 |     --quiet) QUIET=1; shift; continue; ;;
10 |     --sleep|--wait|--pause) SLEEP=1; shift; continue; ;;
11 |     *) echo "$0: Unknown argument '$1'; aborting!"; exit 2; ;;
12 |   esac
13 | done
14 | 
15 | if ! mountpoint $MNT >/dev/null 2>&1; then
16 | 
17 |   cat <<_EOE_ >&2
18 | ERROR: Host bind-mount not specified, see below for correct usage.
19 | 
20 | Usage: docker run --rm -v /opt/runcvm:$MNT $REPO [--quiet] [--sleep]
21 | 
22 |  - Installs runcvm package to the host at /opt/runcvm
23 |    (installation elsewhere is currently unsupported)
24 | 
25 |    N.B. This image should normally only be used by the install script.
26 |         See README.md for installation instructions.
27 | _EOE_
28 | 
29 |   exit 1
30 | fi
31 | 
32 | rsync -aR --delete /opt/runcvm/./ $MNT/ || exit 1
33 | 
34 | if [ -z "$QUIET" ]; then
35 | 
36 |   cat <<"_EOE_" >&2
37 | RunCVM install/upgrade successful
38 | =================================
39 | 
40 | If this is your first time installing RunCVM on this server/VM, then:
41 | 
42 | 1. Run the following to update /etc/docker/daemon.conf and restart docker:
43 | 
44 |   sudo /opt/runcvm/scripts/runcvm-install-runtime.sh
45 | 
46 | 2. Optionally, run the integration tests:
47 | 
48 |   ./tests/run
49 | 
50 | _EOE_
51 | fi
52 | 
53 | # For installing across a docker swarm:
54 | # - Run: docker service create --name=runcvm --mode=global --mount=type=bind,src=/opt/runcvm,dst=/runcvm newsnowlabs/runcvm:latest --sleep
55 | # - Wait: until the service is created everywhere
56 | # - Run: docker service rm runcvm
57 | if [ -n "$SLEEP" ]; then
58 |   echo "$(hostname): RunCVM package installed."
59 |   sleep infinity
60 | else
61 |   exit 0
62 | fi
63 | 


--------------------------------------------------------------------------------
/build-utils/make-bundelf-bundle.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/sh
  2 | 
  3 | # BundELF - ELF binary and dynamic library patcher/bundler for making portable/relocatable executables
  4 | # ----------------------------------------------------------------------------------------------------
  5 | #
  6 | # Licence: Apache 2.0
  7 | # Authors: Struan Bartlett, NewsNow Labs, NewsNow Publishing Ltd
  8 | # Version: 1.0.0
  9 | # Git: https://github.com/newsnowlabs/bundelf
 10 | 
 11 | # make-bundelf-bundle.sh is used to prepare and package ELF binaries and their 
 12 | # dynamic library dependencies for relocation to (and execution from) a new
 13 | # location, making them completely portable and independent of the original
 14 | # distribution.
 15 | #
 16 | # It can be used to package Linux binaries sourced from one distribution,
 17 | # so that they run within, but completely independently of, any other
 18 | # distribution.
 19 | #
 20 | # Example BundELF use cases:
 21 | # - Bundling Alpine binaries for running within, but completely independently
 22 | #   of, any arbitrary distribution (including GLIBC-based distributions)
 23 | # - Bundling GLIBC-based applications for running within Alpine (or indeed any
 24 | #   other distribution)
 25 | #
 26 | # BundELF is a core technology component of:
 27 | # - https://github.com/newsnowlabs/dockside
 28 | #   - to allow running complex Node-based IDE applications and container-setup
 29 | #     processes inside containers running an unknown arbitrary Linux
 30 | #     distribution
 31 | # - https://github.com/newsnowlabs/runcvm
 32 | #   - to allow running QEMU, virtiofsd, dnsmasq and other tools inside a
 33 | #     container (and indeed a VM) running an unknown arbitrary Linux
 34 | #     distribution
 35 | #
 36 | # Environment variable inputs:
 37 | # - BUNDELF_BINARIES - list required binaries to be scanned and copied
 38 | # - BUNDELF_DYNAMIC_PATHS - list optional paths to be scanned and copied
 39 | # - BUNDELF_EXTRA_LIBS - list extra libraries to be scanned and copied
 40 | # - BUNDELF_CODE_PATH - path where binaries and libraries will be copied to
 41 | # - BUNDELF_EXEC_PATH - path where binaries and libraries will be executed from
 42 | # = BUNDELF_MERGE_BINDIRS - non-empty if all specified binaries should be copied to $BUNDELF_CODE_PATH/bin
 43 | # - BUNDELF_LIBPATH_TYPE - whether to use absolute or relative paths (the default) for RPATH
 44 | # - BUNDELF_NODE_PATH - [optional] path to the node binary, if required to ensure ldd can resolve all library paths in .node files
 45 | # - BUNDELF_EXTRA_SYSTEM_LIB_PATHS - [optional] list of extra system library paths to be added to the RPATH
 46 | #
 47 | # See README.md for full details.
 48 | 
 49 | # BUNDELF_EXEC_PATH defaults to BUNDELF_CODE_PATH
 50 | BUNDELF_EXEC_PATH="${BUNDELF_EXEC_PATH:-$BUNDELF_CODE_PATH}"
 51 | 
 52 | # Whether to use absolute or relative paths for RPATH
 53 | BUNDELF_LIBPATH_TYPE="${BUNDELF_LIBPATH_TYPE:-relative}"
 54 | 
 55 | # Determine LD filepath, which is architecture-dependent:
 56 | # e.g. ld-musl-aarch64.so.1 (linux/arm64), ld-musl-armhf.so.1 (linux/arm/v7), ld-musl-x86_64.so.1 (linux/amd64)
 57 | #   or ld-linux-aarch64.so.1 (linux/arm64), ld-linux-armhf.so.3 (linux/arm/v7), ld-linux-x86-64.so.2 (linux/amd64)
 58 | LD_PATH=$(ls -1 /lib/ld-musl-* /lib/*-linux-*/ld-linux-*.so.* 2>/dev/null | head -n 1)
 59 | LD_BIN=$(basename $LD_PATH)
 60 | 
 61 | TMP=/tmp/bundelf.$$
 62 | 
 63 | append() {
 64 |   while read line; do echo "${line}${1}"; done
 65 | }
 66 | 
 67 | # Check that all dynamic library dependencies are correctly being resolved to versions stored within BUNDELF_CODE_PATH.
 68 | # Prints any 
 69 | _verify() {
 70 |   local status=0
 71 | 
 72 |   # Deduce BUNDELF_CODE_PATH from elf-patcher.sh execution path, if none provided (useful when called with --verify within an alternative environment).
 73 |   [ -z $BUNDELF_CODE_PATH ] && BUNDELF_CODE_PATH=$(realpath $(dirname $0)/..)
 74 | 
 75 |   # Now check the ELF files
 76 |   for lib in $(cat $BUNDELF_CODE_PATH/.binelfs $BUNDELF_CODE_PATH/.libelfs)
 77 |   do
 78 |     echo -n "Checking: $lib ... " >&2
 79 |     $BUNDELF_CODE_PATH$LD_PATH --list $lib 2>/dev/null | sed -nr '/=>/!d; s/^\s*(\S+)\s*=>\s*(.*?)(\s*\(0x[0-9a-f]+\))?$/- \2 \1/;/^.+$/p;' | egrep -v "^- ($BUNDELF_CODE_PATH/|$BUNDELF_EXEC_PATH/.*/$LD_BIN)"
 80 |   
 81 |     # If any libraries do not match the expected pattern, grep returns true
 82 |     if [ $? -eq 0 ]; then
 83 |       status=1
 84 |       echo "BAD"
 85 |     else
 86 |       echo "GOOD"
 87 |     fi
 88 | 
 89 |     sleep 0.01
 90 |   done
 91 |   
 92 |   return $status
 93 | }
 94 | 
 95 | verify() {
 96 |   _verify
 97 |   exit $?
 98 | }
 99 | 
100 | copy_binaries() {
101 |   # Copy any binaries we require to the install location.
102 |   # Write their paths to cmd-elf-bin.
103 | 
104 |   if [ -n "$BUNDELF_MERGE_BINDIRS" ]; then
105 |     mkdir -p $BUNDELF_CODE_PATH/bin
106 |   else
107 |     mkdir -p $BUNDELF_CODE_PATH
108 |   fi
109 | 
110 |   for bin in "$@"
111 |   do
112 |     local file=$(which $bin)
113 | 
114 |     if [ -n "$file" ]; then
115 |       if [ -z "$BUNDELF_MERGE_BINDIRS" ]; then
116 |         tar cv $file 2>/dev/null | tar x -C $BUNDELF_CODE_PATH/
117 |         echo "$BUNDELF_CODE_PATH$file"
118 |       else
119 |         cp -p $file $BUNDELF_CODE_PATH/bin/
120 |         echo "$BUNDELF_CODE_PATH/bin/$bin"
121 |       fi
122 |     fi
123 |   done
124 | }
125 | 
126 | scan_extra_libs() {
127 |   for p in "$@"
128 |   do
129 |     find "$p" ! -type d | while read lib
130 |       do
131 |         local f=$(basename $lib)
132 |         echo "$f $lib"
133 |       done
134 |   done
135 | }
136 | 
137 | # Using ldd, generate list of resolved library filepaths for each ELF binary and library,
138 | # logging first argument (to be used as $lib) and second argument (to be used as $dest).
139 | # e.g.
140 | # libaio.so.1  /usr/lib/libaio.so.1
141 | # libblkid.so.1  /lib/libblkid.so.1
142 | find_lib_deps() {
143 |   cat "$@" | sort -u | xargs -P $(nproc) -I '{}' ldd '{}' 2>/dev/null | sed -nr 's/^\s*(.*)=>\s*(.*?)\s.*$/\1 \2/p' | sort -u
144 | }
145 | 
146 | copy_libs() {
147 |   mkdir -p $BUNDELF_CODE_PATH
148 | 
149 |   # For each resolved library filepath:
150 |   # - Copy $dest to the install location.
151 |   # - If $dest is a symlink, copy the symlink to the install location too.
152 |   # - If needed, add a symlink from $lib to $dest.
153 |   #
154 |   # N.B. These steps are all needed to ensure the Alpine dynamic linker can resolve library filepaths as required.
155 |   #      For more, see https://www.musl-libc.org/doc/1.0.0/manual.html
156 |   #
157 |   sort -u "$@" | while read lib dest
158 |   do
159 |     # Copy $dest; and if $dest is a symlink, copy its target.
160 |     # This could conceivably result in duplicates if multiple symlinks point to the same target,
161 |     # but is much simpler than trying to copy symlinks and targets separately.
162 |     cp -a --parents -L $dest $BUNDELF_CODE_PATH
163 | 
164 |     # If needed, add a symlink from $lib to $(basename $dest)
165 |     if [ "$(basename $dest)" != "$lib" ]; then
166 |       if cd $BUNDELF_CODE_PATH/$(dirname $dest); then
167 |         ln -s $(basename $dest) $lib
168 |         cd - >/dev/null
169 |       fi
170 |     fi
171 | 
172 |     if [ "$dest" != "$LD_PATH" ]; then
173 |         echo "$BUNDELF_CODE_PATH$dest"
174 |     fi
175 |   done
176 | }
177 | 
178 | patch_binary() {
179 |   local bin="$1"
180 | 
181 |   if patchelf --set-interpreter $BUNDELF_EXEC_PATH$LD_PATH $bin 2>/dev/null; then
182 |     echo patchelf --set-interpreter $BUNDELF_EXEC_PATH$LD_PATH $bin >>$TMP/patchelf.log
183 |     return 0
184 |   fi
185 | 
186 |   return 1
187 | }
188 | 
189 | # Function to replace a hard-linked file with a non-hard-linked copy
190 | replace_hard_link() {
191 |     local file="$1"
192 |     
193 |     # Check if the file exists
194 |     if [ ! -e "$file" ]; then
195 |         echo "replace_hard_link: file '$file' does not exist."
196 |         exit 1
197 |     fi
198 | 
199 |     # Get the number of hard links to the file
200 |     local link_count=$(stat -c %h "$file")
201 | 
202 |     # If the link count is greater than 1, the file is a hard link
203 |     if [ "$link_count" -gt 1 ]; then
204 |         # Create a temporary copy of the file, and overwrite the original file with the non-hard-linked copy
205 |         local tmp_file=$(mktemp)
206 |         cp -dp "$file" "$tmp_file" && mv "$tmp_file" "$file"
207 |     fi
208 | 
209 |     return 0
210 | }
211 | 
212 | patch_binaries_interpreter() {
213 |   # For all ELF binaries, set the interpreter to our own.
214 |   for bin in $(sort -u "$@")
215 |   do
216 |     patch_binary "$bin" || exit 1
217 |   done
218 | }
219 | 
220 | generate_extra_system_lib_paths() {
221 |   for p in "$@"
222 |   do
223 |     echo $p
224 |   done 
225 | }
226 | 
227 | generate_system_lib_paths() {
228 |   # Generate a list of system library paths
229 |   # - This will be used to set the RPATH for all binaries and libraries to an absolute or relative path.
230 | 
231 |   # This list is generated by:
232 |   # - Running the dynamic linker with --list-diagnostics
233 |   # - Extracting the system_dirs path from the output
234 |   # - Removing any trailing slashes
235 |   # $BUNDELF_CODE_PATH$LD_PATH --list-diagnostics | grep ^path.system_dirs | sed -r 's|^.*="([^"]+)/?"$|\1|; s|/$||' | sort -u
236 | 
237 |   # This list is generated by:
238 |   # - Extracting the path to each library, relative to $BUNDELF_CODE_PATH; add leading '/' if missing.
239 |   cat "$@" | \
240 |     grep -E '\.so(\.[0-9]+)*$' | \
241 |     sed -r "s|^$BUNDELF_CODE_PATH||; s|/[^/]+$||; s|^[^/]|/|;" | \
242 |     grep -E '^(/usr|/lib)(/|$)' | \
243 |     sort -u
244 | }
245 | 
246 | generate_unique_rpath() {
247 |   local prefix="$1"; shift
248 | 
249 |   local abs_syspaths  
250 |   for s in $(sort -u "$@")
251 |   do
252 |     abs_syspaths="$abs_syspaths$(echo "$prefix${s}:")"
253 |   done
254 | 
255 |   # Remove trailing colon
256 |   echo $abs_syspaths | sed 's/:$//'
257 | }
258 | 
259 | patch_binaries_and_libs_rpath() {
260 |   # For all ELF libs, set the RPATH to our own, and force RPATH use.
261 |   local p
262 |   local rpath
263 |   local rpath_template
264 | 
265 |   if [ "$BUNDELF_LIBPATH_TYPE" = "absolute" ]; then
266 |     rpath_template=$(generate_unique_rpath "$BUNDELF_CODE_PATH" "$TMP/system-lib-paths")
267 |   else
268 |     rpath_template=$(generate_unique_rpath "\$ORIGIN" "$TMP/system-lib-paths")
269 |   fi
270 | 
271 |   for lib in $(sort -u "$@")
272 |   do
273 | 
274 |     if [ "$BUNDELF_LIBPATH_TYPE" = "absolute" ]; then
275 |       rpath="$rpath_template"
276 | 
277 |       # Add node as a needed library to '.node' files, to avoid misleading ldd errors in verify()
278 |       if [ -n "$BUNDELF_NODE_PATH" ] && echo "$lib" | grep -qE "\.node$"; then
279 |         echo patchelf --add-needed "$BUNDELF_CODE_PATH$BUNDELF_NODE_PATH" $lib >>$TMP/patchelf.log
280 |         patchelf --add-needed "$BUNDELF_CODE_PATH$BUNDELF_NODE_PATH" $lib >>$TMP/patchelf.log 2>&1 || exit 1
281 |       fi
282 | 
283 |     else
284 |       # If $lib is hardlinked in different parts of the file hierarchy, then setting a relative RPATH on one file would break the correct RPATH set on another.
285 |       # To prevent this, we un-hardlink any hardlinked files before we patch them.
286 |       replace_hard_link "$lib"
287 | 
288 |       p=$(dirname "$lib" | sed -r "s|^$BUNDELF_CODE_PATH[/]+||; s|[^/]+|..|g")
289 |       # rpath="\$ORIGIN/$p/lib:\$ORIGIN/$p/usr/lib:\$ORIGIN/$p/usr/lib/xtables"
290 |       rpath="$(echo "$rpath_template" | sed "s|\$ORIGIN|\$ORIGIN/$p|g")"
291 | 
292 |       # Add node as a needed library to '.node' files, to avoid misleading ldd errors in verify()
293 |       if [ -n "$BUNDELF_NODE_PATH" ] && echo "$lib" | grep -qE "\.node$"; then
294 |         local NODE_DIR=$(dirname $BUNDELF_NODE_PATH)
295 |         local NODE_BASENAME=$(basename $BUNDELF_NODE_PATH)
296 | 
297 |         # Augment rpath with relative path to the NODE_DIR
298 |         rpath="$rpath:\$ORIGIN/$p$NODE_DIR"
299 | 
300 |         # Add a needed dynamic library dependency for NODE_BASENAME (will be searched for within the augmented rpath)
301 |         echo patchelf --add-needed "$NODE_BASENAME" "$lib" >>$TMP/patchelf.log
302 |         patchelf --add-needed "$NODE_BASENAME" "$lib" >>$TMP/patchelf.log 2>&1 || exit 1
303 |       fi
304 |     fi
305 | 
306 |     echo patchelf --force-rpath --set-rpath "$rpath" "$lib" >>$TMP/patchelf.log
307 |     patchelf --force-rpath --set-rpath \
308 |       "$rpath" \
309 |       "$lib" >>$TMP/patchelf.log 2>&1 || exit 1
310 | 
311 |     # Fail silently if patchelf fails to set the interpreter: this is a catch-all for add libraries like /usr/lib/libcap.so.2
312 |     # which strangely have an interpreter set.
313 |     patch_binary "$lib"
314 | 
315 |   done
316 | }
317 | 
318 | copy_and_scan_for_dynamics() {
319 |   # Find all ELF files that are dynamically linked.
320 |   # - This should includes all Theia .node files and spawn-helper, but not statically-linked binaries like 'rg'
321 |   # - The only way to tell if a file is an ELF binary (or library) is to check the first 4 bytes for the magic byte sequence.
322 | 
323 |   mkdir -p $BUNDELF_CODE_PATH
324 | 
325 |   for q in "$@"
326 |   do
327 |     tar cv "$q" 2>/dev/null | tar x -C $BUNDELF_CODE_PATH/
328 | 
329 |     find "$q" -type f ! -name '*.o' -print0 | xargs -0 -P $(nproc) -I '{}' hexdump -n 4 -e '4/1 "%2x" " {}\n"' {} | sed '/^7f454c46/!d; s/^7f454c46 //' | xargs -P $(nproc) file | grep dynamically
330 |   done
331 | }
332 | 
333 | get_dynamics_interpretable() {
334 |   grep interpreter "$@" | cut -d':' -f1 | sed -r "s!^!$BUNDELF_CODE_PATH!"
335 | }
336 | 
337 | get_dynamics_noninterpretable() {
338 |   grep -v interpreter "$@" | cut -d':' -f1 | sed -r "s!^!$BUNDELF_CODE_PATH!"
339 | }
340 | 
341 | write_digest() {
342 |   # Prepare full and unique list of ELF binaries and libs for reference purposes and for checking
343 |   sort -u $TMP/cmd-elf-bin >$BUNDELF_CODE_PATH/.binelfs
344 |   sort -u $TMP/cmd-elf-lib >$BUNDELF_CODE_PATH/.libelfs
345 | }
346 | 
347 | init() {
348 |   for dep in file hexdump xargs patchelf
349 |   do
350 |     if ! [ -x "$(which $dep)" ]; then
351 |       depsmissing=1
352 |       echo "ERROR: Command '$dep' not found in PATH '$PATH'" >&2
353 |     fi
354 |   done
355 | 
356 |   [ -n "$depsmissing" ] && return 1
357 | 
358 |   # Initialise
359 |   mkdir -p "$TMP"
360 |   >$TMP/cmd-elf-bin
361 |   >$TMP/cmd-elf-lib
362 |   >$TMP/libs-tuples
363 |   >$TMP/libs-extra-tuples
364 |   >$TMP/scanned-dynamics
365 |   >$TMP/system-lib-paths
366 | }
367 | 
368 | all() {
369 |   # Copy elf binaries to BUNDELF_CODE_PATH and generate 'cmd-elf-bin' list of ELF binaries
370 |   copy_binaries $BUNDELF_BINARIES >>$TMP/cmd-elf-bin
371 | 
372 |   # Scan for additional dynamic binaries and libs
373 |   copy_and_scan_for_dynamics $BUNDELF_DYNAMIC_PATHS >>$TMP/scanned-dynamics
374 | 
375 |   # Add the intepretable dynamics to 'cmd-elf-bin'
376 |   get_dynamics_interpretable $TMP/scanned-dynamics >>$TMP/cmd-elf-bin
377 | 
378 |   # Add the non-intepretable dynamics to 'libs'
379 |   get_dynamics_noninterpretable $TMP/scanned-dynamics >>$TMP/cmd-elf-lib
380 | 
381 |   # Find library dependencies of these dynamic binaries and libs; write tuples to 'libs'
382 |   find_lib_deps $TMP/cmd-elf-bin $TMP/cmd-elf-lib >>$TMP/libs-tuples
383 | 
384 |   # Scan for extra libraries not formally declared as dependencies, and append tuples to 'libs'
385 |   scan_extra_libs $BUNDELF_EXTRA_LIBS >>$TMP/libs-extra-tuples
386 | 
387 |   # Copy the library tuples from 'libs' to BUNDELF_CODE_PATH and append to 'cmd-elf-lib'
388 |   copy_libs $TMP/libs-tuples $TMP/libs-extra-tuples >>$TMP/cmd-elf-lib
389 | 
390 |   # Patch interpreter on all ELF binaries in 'cmd-elf-bin'
391 |   patch_binaries_interpreter $TMP/cmd-elf-bin
392 | 
393 |   # Generate non-unique list of system library paths:
394 |   generate_system_lib_paths $TMP/cmd-elf-lib >>$TMP/system-lib-paths
395 |   generate_extra_system_lib_paths $BUNDELF_EXTRA_SYSTEM_LIB_PATHS >>$TMP/system-lib-paths
396 | 
397 |   # Patch RPATH on all binaries in 'cmd-elf-bin' and libs in 'cmd-elf-lib'
398 |   # TODO: This duplicates running patch_binaries_interpreter on all 'cmd-elf-bin' files, in order that it can be run in relaxed mode on 'cmd-elf-lib'
399 |   patch_binaries_and_libs_rpath $TMP/cmd-elf-bin $TMP/cmd-elf-lib
400 | 
401 |   # Write a summary of binaries and libraries to BUNDELF_CODE_PATH
402 |   write_digest
403 | 
404 |   # Copy LD and and create copnvenience symlink it to ld
405 |   cp --parents $LD_PATH $BUNDELF_CODE_PATH
406 |   ln -s $(echo $LD_PATH | sed -r 's|^/lib/|./|') $BUNDELF_CODE_PATH/lib/ld
407 | }
408 | 
409 | # Run with --verify from within any distribution, to check that all dynamic library dependencies
410 | # are correctly being resolved to versions stored within BUNDELF_CODE_PATH.
411 | if [ "$1" = "--verify" ]; then
412 |   # Check the full list for any library dependencies being inadvertently resolved outside the install location.
413 |   # Returns true if OK, false on any problems.
414 |   init || exit 1
415 |   verify
416 | elif [ "$1" = "--bundle" ]; then
417 |   init || exit 1
418 |   all
419 |   verify
420 | fi
421 | 


--------------------------------------------------------------------------------
/build/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh -e
 2 | 
 3 | REPO=newsnowlabs/runcvm
 4 | 
 5 | DOCKER_BUILDKIT=1 docker build -t $REPO .
 6 | 
 7 | cat <<_EOE_
 8 | 
 9 | RunCVM build successful
10 | =======================
11 | 
12 | To install or upgrade, now run:
13 | 
14 |   sudo ./runcvm-scripts/runcvm-install-runtime.sh
15 | _EOE_
16 | 
17 | 
18 | echo


--------------------------------------------------------------------------------
/kernels/oraclelinux/95virtiofs/module-setup.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/bash
 2 | 
 3 | # called by dracut
 4 | check() {
 5 |    [[ $hostonly ]] || [[ $mount_needs ]] && {
 6 |        for fs in "${host_fs_types[@]}"; do
 7 |            [[ "$fs" == "virtiofs" ]] && return 0
 8 |        done
 9 |        return 255
10 |    }
11 | 
12 |    is_qemu_virtualized && return 0
13 | 
14 |    return 255
15 | }
16 | 
17 | # called by dracut
18 | depends() {
19 |    return 0
20 | }
21 | 
22 | # called by dracut
23 | installkernel() {
24 |    instmods virtiofs
25 | 
26 |     # qemu specific modules
27 |     hostonly='' instmods \
28 |         ata_piix ata_generic pata_acpi cdrom sr_mod ahci \
29 |         virtio_blk virtio virtio_ring virtio_pci \
30 |         virtio_scsi virtio_console virtio_rng virtio_mem \
31 |         virtio_net \
32 |         spapr-vscsi \
33 |         qemu_fw_cfg
34 | }
35 | 
36 | # called by dracut
37 | install() {
38 |    inst_hook cmdline 95 "$moddir/parse-virtiofs.sh"
39 |    inst_hook pre-mount 99 "$moddir/mount-virtiofs.sh"
40 | }
41 | 
42 | 


--------------------------------------------------------------------------------
/kernels/oraclelinux/95virtiofs/mount-virtiofs.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/sh
 2 | 
 3 | type getarg >/dev/null 2>&1 || . /lib/dracut-lib.sh
 4 | 
 5 | filter_rootopts() {
 6 |    rootopts=$1
 7 |    # strip ro and rw options
 8 |    local OLDIFS="$IFS"
 9 |    IFS=,
10 |    set -- $rootopts
11 |    IFS="$OLDIFS"
12 |    local v
13 |    while [ $# -gt 0 ]; do
14 |        case $1 in
15 |            rw|ro);;
16 |            defaults);;
17 |            *)
18 |                v="$v,${1}";;
19 |        esac
20 |        shift
21 |    done
22 |    rootopts=${v#,}
23 |    echo $rootopts
24 | }
25 | 
26 | mount_root() {
27 |    local _ret
28 | 
29 |    rootfs="virtiofs"
30 |    rflags="rw"
31 | 
32 |    modprobe virtiofs
33 | 
34 |    mount -t ${rootfs} -o "$rflags",ro "${root#virtiofs:}" "$NEWROOT"
35 | 
36 |    rootopts=
37 |    if getargbool 1 rd.fstab -n rd_NO_FSTAB \
38 |        && ! getarg rootflags \
39 |        && [ -f "$NEWROOT/etc/fstab" ] \
40 |        && ! [ -L "$NEWROOT/etc/fstab" ]; then
41 |        # if $NEWROOT/etc/fstab contains special mount options for
42 |        # the root filesystem,
43 |        # remount it with the proper options
44 |        rootopts="defaults"
45 |        while read dev mp fs opts rest || [ -n "$dev" ]; do
46 |            # skip comments
47 |            [ "${dev%%#*}" != "$dev" ] && continue
48 | 
49 |            if [ "$mp" = "/" ]; then
50 |                rootopts=$opts
51 |                break
52 |            fi
53 |        done < "$NEWROOT/etc/fstab"
54 | 
55 |        rootopts=$(filter_rootopts $rootopts)
56 |    fi
57 | 
58 |    # we want rootflags (rflags) to take precedence so prepend rootopts to
59 |    # them; rflags is guaranteed to not be empty
60 |    rflags="${rootopts:+${rootopts},}${rflags}"
61 | 
62 |    umount "$NEWROOT"
63 | 
64 |    info "Remounting ${root#virtiofs:} with -o ${rflags}"
65 |    mount -t ${rootfs} -o "$rflags" "${root#virtiofs:}" "$NEWROOT" 2>&1 |
66 | vinfo
67 | 
68 |    [ -f "$NEWROOT"/forcefsck ] && rm -f -- "$NEWROOT"/forcefsck 2>/dev/null
69 |    [ -f "$NEWROOT"/.autofsck ] && rm -f -- "$NEWROOT"/.autofsck 2>/dev/null
70 | }
71 | 
72 | if [ -n "$root" -a -z "${root%%virtiofs:*}" ]; then
73 |    mount_root
74 | fi
75 | :
76 | 


--------------------------------------------------------------------------------
/kernels/oraclelinux/95virtiofs/parse-virtiofs.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/sh
2 | 
3 | if [ "${root%%:*}" = "virtiofs" ] ; then
4 |    modprobe virtiofs
5 | 
6 |    rootok=1
7 | fi
8 | 


--------------------------------------------------------------------------------
/kernels/oraclelinux/addvirtiofs.conf:
--------------------------------------------------------------------------------
1 | add_dracutmodules+=" virtiofs "
2 | filesystems+=" virtiofs "
3 | 


--------------------------------------------------------------------------------
/patches/dnsmasq/remove-passwd-requirement.patch:
--------------------------------------------------------------------------------
 1 | --- a/src/dnsmasq.c.orig
 2 | +++ b/src/dnsmasq.c
 3 | @@ -481,6 +481,7 @@
 4 |      }
 5 |  #endif
 6 |    
 7 | +#if 0
 8 |    if (daemon->username && !(ent_pw = getpwnam(daemon->username)))
 9 |      baduser = daemon->username;
10 |    else if (daemon->groupname && !(gp = getgrnam(daemon->groupname)))
11 | @@ -488,6 +489,7 @@
12 |  
13 |    if (baduser)
14 |      die(_("unknown user or group: %s"), baduser, EC_BADCONF);
15 | +#endif
16 |  
17 |    /* implement group defaults, "dip" if available, or group associated with uid */
18 |    if (!daemon->group_set && !gp)
19 | 


--------------------------------------------------------------------------------
/patches/dropbear/runcvm.patch:
--------------------------------------------------------------------------------
 1 | --- a/src/cli-kex.c
 2 | +++ b/src/cli-kex.c
 3 | @@ -312,7 +312,7 @@
 4 |  	int ret;
 5 |  
 6 |  	if (cli_opts.no_hostkey_check) {
 7 | -		dropbear_log(LOG_INFO, "Caution, skipping hostkey check for %s\n", cli_opts.remotehost);
 8 | +		// dropbear_log(LOG_INFO, "Caution, skipping hostkey check for %s\n", cli_opts.remotehost);
 9 |  		return;
10 |  	}
11 |  
12 | --- a/src/dbutil.c
13 | +++ b/src/dbutil.c
14 | @@ -140,7 +140,9 @@
15 |  
16 |  	vsnprintf(printbuf, sizeof(printbuf), format, param);
17 |  
18 | +#if 0
19 |  	fprintf(stderr, "%s\n", printbuf);
20 | +#endif
21 |  
22 |  }
23 |  
24 | --- a/src/default_options.h
25 | +++ b/src/default_options.h
26 | @@ -21,10 +21,10 @@
27 |  /* Default hostkey paths - these can be specified on the command line.
28 |   * Homedir is prepended if path begins with ~/
29 |   */
30 | -#define DSS_PRIV_FILENAME "/etc/dropbear/dropbear_dss_host_key"
31 | -#define RSA_PRIV_FILENAME "/etc/dropbear/dropbear_rsa_host_key"
32 | -#define ECDSA_PRIV_FILENAME "/etc/dropbear/dropbear_ecdsa_host_key"
33 | -#define ED25519_PRIV_FILENAME "/etc/dropbear/dropbear_ed25519_host_key"
34 | +#define DSS_PRIV_FILENAME "/.runcvm/dropbear/dropbear_dss_host_key"
35 | +#define RSA_PRIV_FILENAME "/.runcvm/dropbear/dropbear_rsa_host_key"
36 | +#define ECDSA_PRIV_FILENAME "/.runcvm/dropbear/dropbear_ecdsa_host_key"
37 | +#define ED25519_PRIV_FILENAME "/.runcvm/dropbear/dropbear_ed25519_host_key"
38 |  
39 |  /* Set NON_INETD_MODE if you require daemon functionality (ie Dropbear listens
40 |   * on chosen ports and keeps accepting connections. This is the default.
41 | @@ -218,7 +218,7 @@
42 |  #define DO_HOST_LOOKUP 0
43 |  
44 |  /* Whether to print the message of the day (MOTD). */
45 | -#define DO_MOTD 1
46 | +#define DO_MOTD 0
47 |  #define MOTD_FILENAME "/etc/motd"
48 |  
49 |  /* Authentication Types - at least one required.
50 | 


--------------------------------------------------------------------------------
/patches/mkinitfs/nlplug-findfs.patch:
--------------------------------------------------------------------------------
 1 | --- mkinitfs-3.8.1.orig/nlplug-findfs/nlplug-findfs.c
 2 | +++ mkinitfs-3.8.1/nlplug-findfs/nlplug-findfs.c
 3 | @@ -41,7 +41,7 @@
 4 |  #include 
 5 |  #include 
 6 |  
 7 | -#define MAX_EVENT_TIMEOUT	5000
 8 | +#define MAX_EVENT_TIMEOUT	1000
 9 |  #define DEFAULT_EVENT_TIMEOUT	250
10 |  /* usb mass storage needs 1 sec to settle */
11 |  #define USB_STORAGE_TIMEOUT	1000
12 | 


--------------------------------------------------------------------------------
/patches/seabios/qemu-fw-cfg-fix.patch:
--------------------------------------------------------------------------------
 1 | diff --git a/src/sercon.c b/src/sercon.c
 2 | index 3019d9b..988c2a2 100644
 3 | --- a/src/sercon.c
 4 | +++ b/src/sercon.c
 5 | @@ -516,7 +516,7 @@ void sercon_setup(void)
 6 |      struct segoff_s seabios, vgabios;
 7 |      u16 addr;
 8 |  
 9 | -    addr = romfile_loadint("etc/sercon-port", 0);
10 | +    addr = romfile_loadint("opt/org.seabios/etc/sercon-port", 0);
11 |      if (!addr)
12 |          return;
13 |      dprintf(1, "sercon: using ioport 0x%x\n", addr);
14 | diff --git a/src/fw/paravirt.c b/src/fw/paravirt.c
15 | index fba4e52..9a346d9 100644
16 | --- a/src/fw/paravirt.c
17 | +++ b/src/fw/paravirt.c
18 | diff --git a/src/fw/paravirt.c b/src/fw/paravirt.c
19 | index fba4e52..9a346d9 100644
20 | --- a/src/fw/paravirt.c
21 | +++ b/src/fw/paravirt.c
22 | @@ -652,9 +652,9 @@ void qemu_cfg_init(void)
23 |      // serial console
24 |      u16 nogfx = 0;
25 |      qemu_cfg_read_entry(&nogfx, QEMU_CFG_NOGRAPHIC, sizeof(nogfx));
26 | -    if (nogfx && !romfile_find("etc/sercon-port")
27 | +    if (nogfx && !romfile_find("opt/org.seabios/etc/sercon-port")
28 |          && !romfile_find("vgaroms/sgabios.bin"))
29 | -        const_romfile_add_int("etc/sercon-port", PORT_SERIAL1);
30 | +        const_romfile_add_int("opt/org.seabios/etc/sercon-port", PORT_SERIAL1);
31 |  }
32 |  
33 |  /*
34 | 


--------------------------------------------------------------------------------
/qemu-exit/qemu-exit.c:
--------------------------------------------------------------------------------
 1 | #include 
 2 | #include 
 3 | #include 
 4 | #include 
 5 | 
 6 | #define SHUTDOWN_PORT 0x604
 7 | #define EXIT_PORT     0x501
 8 | 
 9 | static void clean_exit(void) {
10 |     ioperm(SHUTDOWN_PORT, 16, 1);
11 |     outw(0x2000, SHUTDOWN_PORT);
12 | }
13 | 
14 | int main(int argc, char **argv) {
15 |     int status;
16 | 
17 |     if (argc != 2) {
18 |         clean_exit();
19 |     }
20 | 
21 |     status = atoi(argv[1]);
22 |     if (!status) {
23 |     	clean_exit();
24 |     }
25 | 
26 |     ioperm(EXIT_PORT, 8, 1);
27 | 
28 |     // status returned is 1+(2*orig_status)
29 |     outb(status-1, EXIT_PORT);
30 | 
31 |     // Didn't exit. Perhaps QEMU was not launched with -device isa-debug-exit
32 |     exit(255);
33 | }


--------------------------------------------------------------------------------
/runcvm-init/VERSION.h:
--------------------------------------------------------------------------------
1 | // THIS FILE IS AUTOMATICALLY GENERATED
2 | // Run `make VERSION.h` to update it after modifying VERSION.
3 | unsigned char VERSION[] = {
4 |   0x31, 0x2e, 0x32, 0x2e, 0x35, 0x0a
5 | };
6 | unsigned int VERSION_len = 6;
7 | 


--------------------------------------------------------------------------------
/runcvm-init/dumb-init.c:
--------------------------------------------------------------------------------
  1 | // For the purposes of the following license, the "Software" is this file, dumb-init.c.
  2 | //
  3 | // The MIT License (MIT)
  4 | //
  5 | // Copyright (c) 2015 Yelp, Inc.
  6 | //
  7 | // Permission is hereby granted, free of charge, to any person obtaining a copy
  8 | // of this software and associated documentation files (the "Software"), to deal
  9 | // in the Software without restriction, including without limitation the rights
 10 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 11 | // copies of the Software, and to permit persons to whom the Software is
 12 | // furnished to do so, subject to the following conditions:
 13 | 
 14 | // The above copyright notice and this permission notice shall be included in
 15 | // all copies or substantial portions of the Software.
 16 | 
 17 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 18 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 19 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 20 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 21 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 22 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 23 | // THE SOFTWARE.
 24 | 
 25 | // dumb-init.c modifications (c) 2022 NewsNow Publishing Limited
 26 | 
 27 | /*
 28 |  * dumb-init is a simple wrapper program designed to run as PID 1 and pass
 29 |  * signals to its children.
 30 |  *
 31 |  * Usage:
 32 |  *   ./dumb-init python -c 'while True: pass'
 33 |  *
 34 |  * To get debug output on stderr, run with '-v'.
 35 |  */
 36 | 
 37 | #include 
 38 | #include 
 39 | #include 
 40 | #include 
 41 | #include 
 42 | #include 
 43 | #include 
 44 | #include 
 45 | #include 
 46 | #include 
 47 | #include 
 48 | #include "VERSION.h"
 49 | 
 50 | #define PRINTERR(...) do { \
 51 |     fprintf(stderr, "[runcvm-init] " __VA_ARGS__); \
 52 | } while (0)
 53 | 
 54 | #define DEBUG(...) do { \
 55 |     if (debug) { \
 56 |         PRINTERR(__VA_ARGS__); \
 57 |     } \
 58 | } while (0)
 59 | 
 60 | // Signals we care about are numbered from 1 to 31, inclusive.
 61 | // (32 and above are real-time signals.)
 62 | // TODO: this is likely not portable outside of Linux, or on strange architectures
 63 | #define MAXSIG 31
 64 | 
 65 | // Indices are one-indexed (signal 1 is at index 1). Index zero is unused.
 66 | // User-specified signal rewriting.
 67 | int signal_rewrite[MAXSIG + 1] = {[0 ... MAXSIG] = -1};
 68 | // One-time ignores due to TTY quirks. 0 = no skip, 1 = skip the next-received signal.
 69 | char signal_temporary_ignores[MAXSIG + 1] = {[0 ... MAXSIG] = 0};
 70 | 
 71 | pid_t child_pid = -1;
 72 | char debug = 0;
 73 | char use_setsid = 1;
 74 | char no_fork = 0;
 75 | 
 76 | int translate_signal(int signum) {
 77 |     if (signum <= 0 || signum > MAXSIG) {
 78 |         return signum;
 79 |     } else {
 80 |         int translated = signal_rewrite[signum];
 81 |         if (translated == -1) {
 82 |             return signum;
 83 |         } else {
 84 |             DEBUG("Translating signal %d to %d.\n", signum, translated);
 85 |             return translated;
 86 |         }
 87 |     }
 88 | }
 89 | 
 90 | void forward_signal(int signum) {
 91 |     signum = translate_signal(signum);
 92 |     if (signum != 0) {
 93 |         kill(use_setsid ? -child_pid : child_pid, signum);
 94 |         DEBUG("Forwarded signal %d to children.\n", signum);
 95 |     } else {
 96 |         DEBUG("Not forwarding signal %d to children (ignored).\n", signum);
 97 |     }
 98 | }
 99 | 
100 | pid_t shutdown() {   
101 |    pid_t my_child_pid;
102 |    char *shutdown_cmd[] = {"/.runcvm/guest/scripts/runcvm-ctr-shutdown", NULL};
103 |    
104 |     my_child_pid = fork();
105 |     if (my_child_pid < 0) {
106 |         PRINTERR("Unable to fork. Exiting.\n");
107 |         return 1;
108 |     } else if (my_child_pid == 0) {
109 |         /* child */
110 |         DEBUG("Requesting child to shut down by spawning %s\n", shutdown_cmd[0]);
111 |         execvp(shutdown_cmd[0], &shutdown_cmd[0]);
112 | 
113 |         // if this point is reached, exec failed, so we should exit nonzero
114 |         PRINTERR("Shutdown child spawn failed: %s\n", strerror(errno));
115 |         return 2;
116 |     } else {
117 |         /* parent */
118 |         DEBUG("Shutdown child spawned with PID %d.\n", child_pid);
119 |     }
120 |    
121 |     return my_child_pid;
122 | }
123 | 
124 | void quit(int exit_status) {
125 |     char exit_status_string[4];
126 |     char *exit_cmd[] = {"/.runcvm/guest/scripts/runcvm-ctr-exit", exit_status_string, NULL};
127 | 
128 |     sprintf(exit_status_string, "%d", exit_status & 0xFF);
129 | 
130 |     DEBUG("Exiting by execing: %s %s\n", exit_cmd[0], exit_cmd[1]);
131 |     execvp(exit_cmd[0], &exit_cmd[0]);
132 |     DEBUG("Failed to exec %s, so exiting now with status %d\n", exit_cmd[0], exit_status);
133 |     exit(exit_status);
134 | }
135 | 
136 | /*
137 |  * The dumb-init signal handler.
138 |  *
139 |  * The main job of this signal handler is to forward signals along to our child
140 |  * process(es). In setsid mode, this means signaling the entire process group
141 |  * rooted at our child. In non-setsid mode, this is just signaling the primary
142 |  * child.
143 |  *
144 |  * In most cases, simply proxying the received signal is sufficient. If we
145 |  * receive a job control signal, however, we should not only forward it, but
146 |  * also sleep dumb-init itself.
147 |  *
148 |  * This allows users to run foreground processes using dumb-init and to
149 |  * control them using normal shell job control features (e.g. Ctrl-Z to
150 |  * generate a SIGTSTP and suspend the process).
151 |  *
152 |  * The libc manual is useful:
153 |  * https://www.gnu.org/software/libc/manual/html_node/Job-Control-Signals.html
154 |  *
155 | */
156 | void handle_signal(int signum) {
157 |     DEBUG("Received signal %d.\n", signum);
158 | 
159 |     if (signal_temporary_ignores[signum] == 1) {
160 |         DEBUG("Ignoring tty hand-off signal %d.\n", signum);
161 |         signal_temporary_ignores[signum] = 0;
162 |     } else if (signum == SIGTERM) {
163 |         shutdown();
164 |     } else if (signum == SIGCHLD) {
165 |         int status, exit_status;
166 |         pid_t killed_pid;
167 |         while ((killed_pid = waitpid(-1, &status, WNOHANG)) > 0) {
168 |             if (WIFEXITED(status)) {
169 |                 exit_status = WEXITSTATUS(status);
170 |                 DEBUG("A child with PID %d exited with exit status %d.\n", killed_pid, exit_status);
171 |             } else {
172 |                 assert(WIFSIGNALED(status));
173 |                 exit_status = 128 + WTERMSIG(status);
174 |                 DEBUG("A child with PID %d was terminated by signal %d.\n", killed_pid, exit_status - 128);
175 |             }
176 | 
177 |             if (killed_pid == child_pid) {
178 |                 forward_signal(SIGTERM);  // send SIGTERM to any remaining children
179 |                 DEBUG("Child exited with status %d. Goodbye.\n", exit_status);
180 |                 quit(exit_status);
181 |                 // exit(exit_status);
182 |             }
183 |         }
184 |     } else {
185 |         forward_signal(signum);
186 |         if (signum == SIGTSTP || signum == SIGTTOU || signum == SIGTTIN) {
187 |             DEBUG("Suspending self due to TTY signal.\n");
188 |             kill(getpid(), SIGSTOP);
189 |         }
190 |     }
191 | }
192 | 
193 | void print_help(char *argv[]) {
194 |     fprintf(stderr,
195 |         "runcvm-init v%.*s"
196 |         "Usage: %s [option] command [[arg] ...]\n"
197 |         "\n"
198 |         "runcvm-init is a simple process supervisor that forwards signals to children.\n"
199 |         "It is designed to run as PID1 in minimal container environments.\n"
200 |         "\n"
201 |         "Optional arguments:\n"
202 |         "   -c, --single-child   Run in single-child mode.\n"
203 |         "                        In this mode, signals are only proxied to the\n"
204 |         "                        direct child and not any of its descendants.\n"
205 |         "   -r, --rewrite s:r    Rewrite received signal s to new signal r before proxying.\n"
206 |         "                        To ignore (not proxy) a signal, rewrite it to 0.\n"
207 |         "                        This option can be specified multiple times.\n"
208 |         "   -v, --verbose        Print debugging information to stderr.\n"
209 |         "   -h, --help           Print this help message and exit.\n"
210 |         "   -V, --version        Print the current version and exit.\n"
211 |         "   -F, --no-fork        Don't fork, just set up signals and tty\n"
212 |         "\n",
213 |         VERSION_len, VERSION,
214 |         argv[0]
215 |     );
216 | }
217 | 
218 | void print_rewrite_signum_help() {
219 |     fprintf(
220 |         stderr,
221 |         "Usage: -r option takes :, where  "
222 |         "is between 1 and %d.\n"
223 |         "This option can be specified multiple times.\n"
224 |         "Use --help for full usage.\n",
225 |         MAXSIG
226 |     );
227 |     exit(1);
228 | }
229 | 
230 | void parse_rewrite_signum(char *arg) {
231 |     int signum, replacement;
232 |     if (
233 |         sscanf(arg, "%d:%d", &signum, &replacement) == 2 &&
234 |         (signum >= 1 && signum <= MAXSIG) &&
235 |         (replacement >= 0 && replacement <= MAXSIG)
236 |     ) {
237 |         signal_rewrite[signum] = replacement;
238 |     } else {
239 |         print_rewrite_signum_help();
240 |     }
241 | }
242 | 
243 | void set_rewrite_to_sigstop_if_not_defined(int signum) {
244 |     if (signal_rewrite[signum] == -1) {
245 |         signal_rewrite[signum] = SIGSTOP;
246 |     }
247 | }
248 | 
249 | char **parse_command(int argc, char *argv[]) {
250 |     int opt;
251 |     struct option long_options[] = {
252 |         {"help",         no_argument,       NULL, 'h'},
253 |         {"single-child", no_argument,       NULL, 'c'},
254 |         {"rewrite",      required_argument, NULL, 'r'},
255 |         {"verbose",      no_argument,       NULL, 'v'},
256 |         {"version",      no_argument,       NULL, 'V'},
257 |         {"no-fork",      no_argument,       NULL, 'F'},
258 |         {NULL,                     0,       NULL,   0},
259 |     };
260 |     while ((opt = getopt_long(argc, argv, "+hvVcFr:", long_options, NULL)) != -1) {
261 |         switch (opt) {
262 |             case 'h':
263 |                 print_help(argv);
264 |                 exit(0);
265 |             case 'v':
266 |                 debug = 1;
267 |                 break;
268 |             case 'V':
269 |                 fprintf(stderr, "dumb-init v%.*s", VERSION_len, VERSION);
270 |                 exit(0);
271 |             case 'c':
272 |                 use_setsid = 0;
273 |                 break;
274 |             case 'r':
275 |                 parse_rewrite_signum(optarg);
276 |                 break;
277 |             case 'F':
278 |                 no_fork = 1;
279 |                 break;
280 |             default:
281 |                 exit(1);
282 |         }
283 |     }
284 | 
285 |     if (optind >= argc) {
286 |         fprintf(
287 |             stderr,
288 |             "Usage: %s [option] program [args]\n"
289 |             "Try %s --help for full usage.\n",
290 |             argv[0], argv[0]
291 |         );
292 |         exit(1);
293 |     }
294 | 
295 |     char *debug_env = getenv("DUMB_INIT_DEBUG");
296 |     if (debug_env && strcmp(debug_env, "1") == 0) {
297 |         debug = 1;
298 |         DEBUG("Running in debug mode.\n");
299 |     }
300 | 
301 |     char *setsid_env = getenv("DUMB_INIT_SETSID");
302 |     if (setsid_env && strcmp(setsid_env, "0") == 0) {
303 |         use_setsid = 0;
304 |         DEBUG("Not running in setsid mode.\n");
305 |     }
306 | 
307 |     if (use_setsid) {
308 |         set_rewrite_to_sigstop_if_not_defined(SIGTSTP);
309 |         set_rewrite_to_sigstop_if_not_defined(SIGTTOU);
310 |         set_rewrite_to_sigstop_if_not_defined(SIGTTIN);
311 |     }
312 | 
313 |     return &argv[optind];
314 | }
315 | 
316 | // A dummy signal handler used for signals we care about.
317 | // On the FreeBSD kernel, ignored signals cannot be waited on by `sigwait` (but
318 | // they can be on Linux). We must provide a dummy handler.
319 | // https://lists.freebsd.org/pipermail/freebsd-ports/2009-October/057340.html
320 | void dummy(int signum) {}
321 | 
322 | int main(int argc, char *argv[]) {
323 |     char **cmd = parse_command(argc, argv);
324 |     sigset_t all_signals;
325 |     sigfillset(&all_signals);
326 |     sigprocmask(SIG_BLOCK, &all_signals, NULL);
327 | 
328 |     int i = 0;
329 |     for (i = 1; i <= MAXSIG; i++) {
330 |         signal(i, dummy);
331 |     }
332 | 
333 |     /*
334 |      * Detach dumb-init from controlling tty, so that the child's session can
335 |      * attach to it instead.
336 |      *
337 |      * We want the child to be able to be the session leader of the TTY so that
338 |      * it can do normal job control.
339 |      */
340 |     if (use_setsid) {
341 |         if (ioctl(STDIN_FILENO, TIOCNOTTY) == -1) {
342 |             DEBUG(
343 |                 "Unable to detach from controlling tty (errno=%d %s).\n",
344 |                 errno,
345 |                 strerror(errno)
346 |             );
347 |         } else {
348 |             /*
349 |              * When the session leader detaches from its controlling tty via
350 |              * TIOCNOTTY, the kernel sends SIGHUP and SIGCONT to the process
351 |              * group. We need to be careful not to forward these on to the
352 |              * dumb-init child so that it doesn't receive a SIGHUP and
353 |              * terminate itself (#136).
354 |              */
355 |             if (getsid(0) == getpid()) {
356 |                 DEBUG("Detached from controlling tty, ignoring the first SIGHUP and SIGCONT we receive.\n");
357 |                 signal_temporary_ignores[SIGHUP] = 1;
358 |                 signal_temporary_ignores[SIGCONT] = 1;
359 |             } else {
360 |                 DEBUG("Detached from controlling tty, but was not session leader.\n");
361 |             }
362 |         }
363 |     }
364 | 
365 |     if(no_fork) {
366 |         child_pid = 0;
367 |     }
368 |     else {
369 |         child_pid = fork();
370 |     }
371 |     
372 |     if (child_pid < 0) {
373 |         PRINTERR("Unable to fork. Exiting.\n");
374 |         return 1;
375 |     } else if (child_pid == 0) {
376 |         /* child */
377 |         sigprocmask(SIG_UNBLOCK, &all_signals, NULL);
378 |         if (use_setsid) {
379 |             // Don't throw error if setsid() fails in no_fork mode;
380 |             // we don't want this to prevent startup.
381 |             if (setsid() == -1 && !no_fork) {
382 |                 PRINTERR(
383 |                     "Unable to setsid (errno=%d %s). Exiting.\n",
384 |                     errno,
385 |                     strerror(errno)
386 |                 );
387 |                 exit(1);
388 |             }
389 | 
390 |             if (ioctl(STDIN_FILENO, TIOCSCTTY, 0) == -1) {
391 |                 DEBUG(
392 |                     "Unable to attach to controlling tty (errno=%d %s).\n",
393 |                     errno,
394 |                     strerror(errno)
395 |                 );
396 |             }
397 |             DEBUG("setsid complete.\n");
398 |         }
399 |         execvp(cmd[0], &cmd[0]);
400 | 
401 |         // if this point is reached, exec failed, so we should exit nonzero
402 |         PRINTERR("%s: %s\n", cmd[0], strerror(errno));
403 |         return 2;
404 |     } else {
405 |         /* parent */
406 |         DEBUG("Child spawned with PID %d.\n", child_pid);
407 |         if (chdir("/") == -1) {
408 |              DEBUG("Unable to chdir(\"/\") (errno=%d %s)\n",
409 |                    errno,
410 |                    strerror(errno));
411 |         }
412 |         for (;;) {
413 |             int signum;
414 |             sigwait(&all_signals, &signum);
415 |             handle_signal(signum);
416 |         }
417 |     }
418 | }
419 | 


--------------------------------------------------------------------------------
/runcvm-scripts/functions/cgroupfs:
--------------------------------------------------------------------------------
 1 | cgroupfs_mount() {
 2 |   local cgroupfs="$1"
 3 | 
 4 |   # We want no cgroupfs at all, or we will leave it to the distribution.
 5 |   if [[ "$cgroupfs" = "none" || "$cgroupfs" = "systemd" ]]; then
 6 |     return
 7 |   fi
 8 | 
 9 |   # If defined in fstab, or there's no kernel support, skip.
10 |   # see also https://github.com/tianon/cgroupfs-mount/blob/master/cgroupfs-mount
11 |   if grep -v '^#' /etc/fstab | grep -q cgroup \
12 |     || [ ! -e /proc/cgroups ] \
13 |     || [ ! -d /sys/fs/cgroup ]; then
14 |       return
15 |   fi
16 | 
17 |   # If hybrid, mixed, or cgroup1 cgroup support is requested...
18 |   if [[ "$cgroupfs" = "hybrid" || "$cgroupfs" = "mixed" || "$cgroupfs" = "1" || "$cgroupfs" = "cgroup1" ]]; then
19 | 
20 |     if ! findmnt -rnu -M /sys/fs/cgroup; then
21 |       mount -t tmpfs -o uid=0,gid=0,mode=0755 cgroup /sys/fs/cgroup
22 |     fi
23 |     
24 |     for subtype in $(awk '!/^#/ { if ($4 == 1) print $1 }' /proc/cgroups); do
25 |       local sys="/sys/fs/cgroup/$subtype"
26 |       mkdir -p $sys
27 |       if ! findmnt -rnu -M $sys; then
28 |         if ! mount -n -t cgroup -o $subtype cgroup $sys; then
29 |           rmdir $sys || true
30 |         fi
31 |       fi
32 |     done
33 | 
34 |   fi
35 | 
36 |   # If hybrid or mixed cgroup support is requested...
37 |   if [[ "$cgroupfs" = "hybrid" || "$cgroupfs" = "mixed" ]]; then
38 |     if ! findmnt -rnu -M /sys/fs/cgroup/unified; then
39 |       mkdir -p /sys/fs/cgroup/unified
40 |       mount -t cgroup2 -o rw,nosuid,nodev,noexec,relatime cgroup2 /sys/fs/cgroup/unified
41 |     fi
42 |   fi
43 | 
44 |   # If purely cgroup2 cgroup support is requested...
45 |   if [[ "$cgroupfs" = "2" || "$cgroupfs" = "cgroup2" ]]; then
46 |     if ! findmnt -rnu -M /sys/fs/cgroup; then
47 |       mkdir -p /sys/fs/cgroup
48 |       mount -t cgroup2 -o rw,nosuid,nodev,noexec,relatime cgroup2 /sys/fs/cgroup
49 |     fi
50 |   fi
51 | }


--------------------------------------------------------------------------------
/runcvm-scripts/runcvm-ctr-defaults:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | RUNCVM_GUEST=${RUNCVM_GUEST:-/.runcvm/guest}
 4 | RUNCVM_PATH=$RUNCVM_GUEST/usr/sbin:$RUNCVM_GUEST/usr/bin:$RUNCVM_GUEST/sbin:$RUNCVM_GUEST/bin:$RUNCVM_GUEST/usr/lib/qemu
 5 | 
 6 | QEMU_VIRTIOFSD_SOCKET=/run/.virtiofs.sock
 7 | QEMU_GUEST_AGENT=/run/.qemu-guest-agent
 8 | QEMU_MONITOR_SOCKET=/run/.qemu-monitor-socket
 9 | 
10 | SSHD_PORT=22222
11 | 
12 | clean_env() {
13 |   export -n \
14 |   RUNCVM_BREAK RUNCVM_INIT \
15 |   RUNCVM_GUEST \
16 |   RUNCVM_RUNTIME_DEBUG RUNCVM_BIOS_DEBUG RUNCVM_KERNEL_DEBUG \
17 |   RUNCVM_KERNEL RUNCVM_KERNEL_ROOT RUNCVM_KERNEL_APPEND RUNCVM_KERNEL_INITRAMFS_PATH RUNCVM_KERNEL_PATH RUNCVM_DISKS \
18 |   RUNCVM_UIDGID RUNCVM_VM_MOUNTPOINT RUNCVM_TMPFS \
19 |   RUNCVM_CPUS RUNCVM_MEM_SIZE RUNCVM_HUGETLB \
20 |   RUNCVM_HAS_HOME \
21 |   RUNCVM_CGROUPFS
22 | 
23 |   # May be set in VM by busybox init process
24 |   export -n USER
25 | }
26 | 
27 | load_network() {
28 |   local if="${1:-default}"
29 |   [ -d /.runcvm/network/devices ] && [ -s /.runcvm/network/devices/$if ] || return 1
30 |   read -r DOCKER_IF DOCKER_IF_MAC DOCKER_IF_MTU DOCKER_IF_IP DOCKER_IF_IP_NETPREFIX  DOCKER_IF_IP_GW /.runcvm/entrypoint
  9 | 
 10 | # SET HOME ENV VAR IF NEEDED
 11 | 
 12 | # - See https://github.com/moby/moby/issues/2968#issuecomment-35822318
 13 | #   for details of how Docker sets HOME.
 14 | #
 15 | # - What this means is that:
 16 | #   1. if HOME is defined in the image and
 17 | #      docker run:
 18 | #      a. does not define HOME
 19 | #         - config.json process.env[] will show the image-defined value and this value will be used
 20 | #         - docker exec
 21 | #           - does not define HOME, then process.json env[] will show the image-defined value and this value will be used
 22 | #           - does define HOME, then process.json env[] will show the exec-defined value and this value will be used
 23 | #      b. does define HOME, config.json process.env[] will show the docker run-defined value and this value will be used
 24 | #         - docker exec
 25 | #           - does not define HOME, then process.json env[] will show the docker run-defined value and this value will be used
 26 | #           - does define HOME, then process.json env[] will show the exec-defined value and this value will be used
 27 | #   (the above is irrespective of -u setting)
 28 | #
 29 | #   2. if HOME is not defined in the image and
 30 | #      docker run:
 31 | #      a. does not define HOME
 32 | #         - config.json process.env[] will show no HOME value and the user's default homedir will be used
 33 | #         - docker exec
 34 | #           - does not define HOME, then process.json env[] will show no HOME value and the user's default homedir will be used
 35 | #           - does define HOME, then process.json env[] will show the exec-defined value and this value will be used
 36 | #      b. does define HOME, config.json process.env[] will show the docker run-defined value and this value will be used
 37 | #         - docker exec
 38 | #           - does not define HOME, then process.json env[] will show the docker run-defined value and this value will be used
 39 | #           - does define HOME, then process.json env[] will show the exec-defined value and this value will be used
 40 | 
 41 | # Problem in 2a for us with docker run and docker exec is that while we save the requested uid:gid, we set the actual uid:gid to 0:0
 42 | # to allow us to run virtiofsd (and, today, qemu) (in the docker run case) and access the qemu guest agent socket (in the docker exec case - though use of the agent is deprecated in favour of ssh).
 43 | #
 44 | # Where HOME is not explicitly defined, this leads to docker setting HOME to root's default homedir (typically /root),
 45 | # for the calls to runcvm-ctr-entrypoint and runcvm-ctr-exec (respectively).
 46 | #
 47 | # How then do we distinguish this case from the case where HOME is explicitly set to /root?
 48 | # The answer is that runcvm-runtime must check for HOME in env[] and indicate its presence in the calls to runcvm-ctr-entrypoint and runcvm-ctr-exec.
 49 | #
 50 | # runcvm-runtime does this:
 51 | # - in the docker run case, via the RUNCVM_HAS_HOME env var
 52 | # - in the docker exec case, via an argument to runcvm-ctr-exec
 53 | 
 54 | # Here we check RUNCVM_HAS_HOME to determine whether the HOME env var was set either in the image, or via docker run.
 55 | # If not, then we set HOME to the requested user's default homedir in accordance with https://github.com/moby/moby/issues/2968.
 56 | 
 57 | if [ "$RUNCVM_HAS_HOME" == "0" ]; then
 58 |   HOME=$($RUNCVM_GUEST/usr/bin/getent passwd "${RUNCVM_UIDGID%%:*}" | $RUNCVM_GUEST/bin/cut -d':' -f6)
 59 | fi
 60 | 
 61 | # SAVE ENVIRONMENT
 62 | export -n SHLVL OLDPWD
 63 | 
 64 | export >/.runcvm/config
 65 | 
 66 | # NOW LOAD DEFAULT ENV AND PATH
 67 | . $RUNCVM_GUEST/scripts/runcvm-ctr-defaults
 68 | 
 69 | # LOAD IP MANIPULATION FUNCTIONS
 70 | . $RUNCVM_GUEST/scripts/runcvm-ip-functions
 71 | 
 72 | # SAVE PWD
 73 | busybox pwd >/.runcvm/pwd
 74 | 
 75 | # DEBUG
 76 | if [[ "$RUNCVM_BREAK" =~ prenet ]]; then bash; fi
 77 | 
 78 | # SAVE NETWORKING CONFIG AND CONFIGURE BRIDGES
 79 | 
 80 | # Identify default gateway device and IP address
 81 | IFS=$'\n' read -d '' -r DOCKER_GW_IF DOCKER_GW_IF_IP <<< \
 82 |   $(ip -json route show | jq -r '.[] | (select(.dst == "default") | .dev, .gateway)')
 83 | # e.g. eth0 172.25.10.1
 84 | 
 85 | QEMU_BRIDGE_IP=169.254.1.1
 86 | RUNCVM_DNS_IP=169.254.169.254
 87 | 
 88 | mkdir -p /.runcvm/network/devices
 89 | 
 90 | # Save non-link-scope non-default routes for later restoration in the running VM.
 91 | ip -json route show | jq -r '.[] | select(.scope != "link" and .dst != "default") | "\(.dst) \(.gateway) \(.dev) \(.prefsrc)"' >/.runcvm/network/routes
 92 | 
 93 | for if in $(ip -json link show | jq -r '.[] | .ifname')
 94 | do
 95 | 
 96 |   [ "$if" = "lo" ] && continue
 97 | 
 98 |   IFS=$'\n' read -d '' -r DOCKER_IF_IP DOCKER_IF_IP_NETPREFIX DOCKER_IF_MAC DOCKER_IF_MTU <<< \
 99 |     $(ip -json addr show "$if" | jq -r '.[0] | .addr_info[0].local, .addr_info[0].prefixlen, .address, .mtu')
100 |   # e.g. 172.25.10.2 24 52:54:00:b7:0b:b6 1500
101 | 
102 |   # Save container network parameters
103 |   if [ "$if" = "$DOCKER_GW_IF" ]; then
104 |     echo "$if $DOCKER_IF_MAC $DOCKER_IF_MTU $DOCKER_IF_IP $DOCKER_IF_IP_NETPREFIX $DOCKER_GW_IF_IP" >/.runcvm/network/devices/$if
105 |     ln -s "$if" /.runcvm/network/devices/default
106 |   else
107 |     echo "$if $DOCKER_IF_MAC $DOCKER_IF_MTU $DOCKER_IF_IP $DOCKER_IF_IP_NETPREFIX" >/.runcvm/network/devices/$if
108 |   fi
109 | 
110 |   # RECONFIGURE CONTAINER NETWORK
111 |   ip addr flush dev $if
112 | 
113 |   QEMU_BRIDGE="br-$if"
114 | 
115 |   # Create the container bridge
116 |   # See https://bugs.launchpad.net/neutron/+bug/1738659
117 |   ip link add $QEMU_BRIDGE type bridge forward_delay 0 ageing 0
118 | 
119 |   # Add the original container interface to the bridge and bring it up.
120 |   ip link set dev "$if" master $QEMU_BRIDGE
121 |   ip link set dev "$if" up
122 | 
123 |   # Bring the bridge up.
124 |   ip link set dev $QEMU_BRIDGE up
125 | 
126 |   # Restore network route via this bridge
127 |   DOCKER_NET=$(ip_prefix_to_network $DOCKER_IF_IP $DOCKER_IF_IP_NETPREFIX)/$DOCKER_IF_IP_NETPREFIX
128 |   ip route add $DOCKER_NET dev $QEMU_BRIDGE
129 | 
130 |   # If this interface is the default gateway interface, perform additional special steps.
131 |   if [ "$if" = "$DOCKER_GW_IF" ]; then
132 | 
133 |     # Add a private IP to this bridge.
134 |     # We need it so the bridge can receive traffic, but the IP won't ever see the light of day.
135 |     ip addr add $QEMU_BRIDGE_IP dev $QEMU_BRIDGE
136 | 
137 |     # Restore default gateway route via this bridge.
138 |     ip route add default via $DOCKER_GW_IF_IP dev $QEMU_BRIDGE
139 | 
140 |     # Accept DNS requests for $RUNCVM_DNS_IP; these will be passed to dnsmasq
141 |     XTABLES_LIBDIR=$RUNCVM_GUEST/usr/lib/xtables xtables-nft-multi iptables -t nat -A PREROUTING -d $RUNCVM_DNS_IP/32 -p udp -m udp --dport 53 -j REDIRECT
142 | 
143 |     # Match UDP port 53 traffic, outgoing via the QEMU bridge, from the bridge's own IP:
144 |     # -> Masquerade as if from the VM's IP.
145 |     #    This allows outgoing DNS requests from the VM to be received by dnsmasq running in the container.
146 |     XTABLES_LIBDIR=$RUNCVM_GUEST/usr/lib/xtables xtables-nft-multi iptables -t nat -A POSTROUTING -o $QEMU_BRIDGE -s $QEMU_BRIDGE_IP/32 -p udp -m udp --sport 53 -j SNAT --to-source $DOCKER_IF_IP
147 |     XTABLES_LIBDIR=$RUNCVM_GUEST/usr/lib/xtables xtables-nft-multi iptables -t nat -A POSTROUTING -o $QEMU_BRIDGE -s $QEMU_BRIDGE_IP/32 -p udp -m udp --dport 53 -j SNAT --to-source $DOCKER_IF_IP
148 | 
149 |     # Match traffic on TCP port $SSHD_PORT, outgoing via the QEMU bridge, from the bridge's own IP:
150 |     # -> Masquerade it as if from the DNS_IP.
151 |     #    This is necessary to allow SSH from within the container to the VM.
152 |     XTABLES_LIBDIR=$RUNCVM_GUEST/usr/lib/xtables xtables-nft-multi iptables -t nat -A POSTROUTING -o $QEMU_BRIDGE -s $QEMU_BRIDGE_IP/32 -p tcp -m tcp --dport $SSHD_PORT -j SNAT --to-source $RUNCVM_DNS_IP
153 |   fi
154 | 
155 | done
156 | 
157 | # FIXME: Bind-mount /etc/resolv.conf as well as /vm/etc/resolv.conf to prevent them showing in 'docker diff'
158 | cat /vm/etc/resolv.conf >/etc/resolv.conf
159 | RESOLV_CONF_NEW=$(busybox sed -r "s/127.0.0.11/$RUNCVM_DNS_IP/" /vm/etc/resolv.conf)
160 | echo "$RESOLV_CONF_NEW" >/vm/etc/resolv.conf
161 | 
162 | # LAUNCH DNSMASQ
163 | # It will receive local DNS requests (within the container, on 127.0.0.1)
164 | # and requests redirected locally (via the iptables PREROUTING REDIRECT rule) for $RUNCVM_DNS_IP.
165 | dnsmasq -u root --no-hosts
166 | 
167 | # LAUNCH VIRTIOFSD
168 | $RUNCVM_GUEST/scripts/runcvm-ctr-virtiofsd &
169 | 
170 | # DEBUG
171 | if [[ "$RUNCVM_BREAK" =~ postnet ]]; then bash; fi
172 | 
173 | # LAUNCH INIT SUPERVISING QEMU
174 | # FIXME: Add -v to debug
175 | exec $RUNCVM_GUEST/sbin/runcvm-init -c $RUNCVM_GUEST/scripts/runcvm-ctr-qemu
176 | 


--------------------------------------------------------------------------------
/runcvm-scripts/runcvm-ctr-exec:
--------------------------------------------------------------------------------
 1 | #!/.runcvm/guest/bin/bash -e
 2 | 
 3 | # See https://qemu-project.gitlab.io/qemu/interop/qemu-ga-ref.html
 4 | 
 5 | # Load original environment
 6 | . /.runcvm/config
 7 | 
 8 | # Load defaults and aliases
 9 | . $RUNCVM_GUEST/scripts/runcvm-ctr-defaults
10 | 
11 | env() {
12 |   busybox env "$@"
13 | }
14 | 
15 | to_bin() {
16 |   # tab, LF, space, ', ", \
17 |   tr "\011\012\040\047\042\134" '\200\201\202\203\204\205'
18 | }
19 | 
20 | # Expects:
21 | # - To be run as root
22 | # - To be given env vars
23 | # - To be given arguments
24 | #   $1 ::
25 | #   $2 
26 | #   $3 
27 | #   $4 
28 | #   $(5...)  
29 | 
30 | command="$RUNCVM_GUEST/scripts/runcvm-vm-exec"
31 | uidgid="$1"
32 | cwd="$2"
33 | hasHome="$3"
34 | wantsTerminal="$4"
35 | shift 4
36 | 
37 | # Parse uidgid and construct args array for the call to $command within the VM:
38 | # $1 
39 | # $2 
40 | # $3 
41 | # $(4...)  
42 | 
43 | IFS=':' read -r uid gid additionalGids <<< "$uidgid"
44 | args=("$@")
45 | 
46 | if [ ${#args[@]} -gt 0 ]; then
47 |   args_bin=$(printf '%s\n' "${args[@]}" | to_bin)
48 | fi
49 | 
50 | # If the HOME env var was not set either in the image, or via docker run, or via docker exec,
51 | # then set HOME to the requested user's default homedir.
52 | #
53 | # - See runcvm-ctr-entrypoint for full details of how/why hasHome is needed and HOME gets set.
54 | 
55 | if [ "$hasHome" != "1" ]; then
56 |   # Either this script needs to look up uid's HOME or else runcvm-vm-exec does; for now, we do it here.
57 |   HOME=$(getent passwd "$uid" | cut -d':' -f6)
58 | fi
59 | 
60 | # Clean RUNCVM env vars
61 | clean_env
62 | 
63 | # N.B. Only exported env vars will be returned and sent
64 | mapfile -t env < <(env -u _ -u SHLVL -u PWD)
65 | 
66 | if [ ${#env[@]} -gt 0 ]; then
67 |   env_bin=$(printf '%s\n' "${env[@]}" | to_bin)
68 | fi
69 | 
70 | if [ "$wantsTerminal" = "true" ]; then
71 |   opts=(-t)
72 | fi
73 | 
74 | retries=30 # 15 seconds
75 | delay=0 # Signal that extra time should be allowed for RunCVM VM, its init and its dropbear sshd to start after the above conditions are satisfied
76 | 
77 | while ! [ -s /.runcvm/dropbear/key ] || ! load_network
78 | do
79 |   if [ $retries -gt 0 ]; then
80 |     retries=$((retries-1))
81 |     delay=1
82 |     sleep 0.5
83 |     continue
84 |   fi
85 | 
86 |   echo "Error: RunCVM container not yet started" >&2
87 |   exit 1
88 | done
89 | 
90 | # If startup was detected, wait a few extra seconds for dropbear sshd to be ready
91 | if [ "$delay" -ne 0 ]; then
92 |   sleep 2
93 | fi
94 | 
95 | exec $RUNCVM_GUEST/usr/bin/dbclient "${opts[@]}" -p $SSHD_PORT -y -y -i /.runcvm/dropbear/key root@$DOCKER_IF_IP "$command '$uidgid' '$(echo -n $cwd | to_bin)' '$args_bin' '$env_bin'"


--------------------------------------------------------------------------------
/runcvm-scripts/runcvm-ctr-exit:
--------------------------------------------------------------------------------
 1 | #!/.runcvm/guest/bin/bash
 2 | 
 3 | # Load original environment
 4 | . /.runcvm/config
 5 | 
 6 | # Load defaults and aliases
 7 | . $RUNCVM_GUEST/scripts/runcvm-ctr-defaults
 8 | 
 9 | # runcvm-init execs this script when it exits.
10 | # It:
11 | # - performs any post-VM tests.
12 | # - retrieves any saved exit code.
13 | # - resets terminal readline horizontal scroll
14 | # - exits with exit code
15 | 
16 | if [ -f /.runcvm/exitcode ]; then
17 |   read CODE /dev/null
27 | 
28 | exit ${CODE:-0}


--------------------------------------------------------------------------------
/runcvm-scripts/runcvm-ctr-qemu:
--------------------------------------------------------------------------------
  1 | #!/.runcvm/guest/bin/bash
  2 | 
  3 | # Exit on errors
  4 | set -o errexit -o pipefail
  5 | 
  6 | # Load original environment
  7 | . /.runcvm/config
  8 | 
  9 | # Load defaults
 10 | . $RUNCVM_GUEST/scripts/runcvm-ctr-defaults && unset PATH
 11 | 
 12 | QEMU_IFUP="$RUNCVM_GUEST/scripts/runcvm-ctr-qemu-ifup"
 13 | QEMU_IFDOWN="$RUNCVM_GUEST/scripts/runcvm-ctr-qemu-ifdown"
 14 | 
 15 | INIT="init=$RUNCVM_GUEST/scripts/runcvm-vm-init"
 16 | 
 17 | # Must export TERMINFO so curses library can find terminfo database.
 18 | export TERMINFO="$RUNCVM_GUEST/usr/share/terminfo"
 19 | 
 20 | error() {
 21 |   echo "$1" >&2
 22 |   exit 1
 23 | }
 24 | 
 25 | # Argument e.g. /volume/disk1,/var/lib/docker,ext4,5G
 26 | do_disk() {
 27 |   local spec="$1"
 28 |   local src dst fs size dir UUID
 29 | 
 30 |   local IFS=','
 31 |   read src dst fs size <<< $(echo "$spec")
 32 | 
 33 |   if [[ -z "$src" || -z "$dst" || -z "$fs" ]]; then
 34 |     error "Error: disk spec '$spec' invalid: src, dst and fs must all be specified"
 35 |   fi
 36 | 
 37 |   if [[ "$src" = "$dst" ]]; then
 38 |     error "Error: disk spec '$spec' invalid: src '$src' cannot be same as dst"
 39 |   fi
 40 | 
 41 |   if [[ -e "$src" && ! -f "$src" ]]; then
 42 |     error "Error: disk spec '$spec' invalid: src '$src' must be a plain file if it exists"
 43 |   fi
 44 | 
 45 |   if [[ -e "$dst" && ! -d "$dst" ]]; then
 46 |     error "Error: disk spec '$spec' invalid: dst '$dst' must be a directory if it exists"
 47 |   fi
 48 | 
 49 |   if [[ ! -f "$src" ]]; then
 50 |     
 51 |     if [[ -z "$size" ]]; then
 52 |       error "Error: disk spec '$spec' invalid: size must be specified if src '$src' does not exist"
 53 |     fi
 54 | 
 55 |     # Create directory for disk backing file, if needed.
 56 |     dir="$(busybox dirname "$src")"
 57 |     if ! [ -d "$dir" ]; then
 58 |       mkdir -p $(busybox dirname "$src")
 59 |     fi
 60 | 
 61 |     # Create disk backing file.
 62 |     busybox truncate -s "$size" "$src" >&2 || error "Error: disk spec '$spec' invalid: truncate on '$src' with size '$size' failed"
 63 | 
 64 |     # Create filesystem on disk backing file, populated with any pre-existing files from dst.
 65 |     [ -d "$RUNCVM_VM_MOUNTPOINT/$dst" ]|| mkdir -p "$RUNCVM_VM_MOUNTPOINT/$dst" >&2
 66 |     mke2fs -q -F -t "$fs" -d "$RUNCVM_VM_MOUNTPOINT/$dst" "$src" >&2 || error "Error: disk spec '$spec' invalid: mke2fs on '$src' with fs '$fs' failed"
 67 |   fi
 68 | 
 69 |   # Create the mountpoint, if we haven't already/it didn't already exist.
 70 |   [ -d "$RUNCVM_VM_MOUNTPOINT/$dst" ]|| mkdir -p "$RUNCVM_VM_MOUNTPOINT/$dst" >&2
 71 | 
 72 |   # Obtain a UUID for the filesystem and add to fstab.
 73 |   read -r UUID <<< $(blkid -o value "$src")
 74 |   echo "UUID=$UUID $dst $fs defaults,noatime 0 0" >>/.runcvm/fstab
 75 | 
 76 |   # Add disk to QEMU command line.
 77 |   DISKS+=(-drive file=$src,format=raw,if=virtio,media=disk,cache=directsync,aio=native)
 78 | }
 79 | 
 80 | # Argument e.g. /disk1,/home,ext4,5G;/disk2,/var,ext4,1G
 81 | do_disks() {
 82 |   local IFS=';'
 83 |   local disk
 84 |   for disk in $1
 85 |   do
 86 |     do_disk "$disk"
 87 |   done
 88 | }
 89 | 
 90 | do_networks() {
 91 |   local id=0 ifpath if mac
 92 |   local DOCKER_IF DOCKER_IF_MAC DOCKER_IF_MTU DOCKER_IF_IP DOCKER_IF_IP_NETPREFIX DOCKER_IF_IP_GW
 93 | 
 94 |   for ifpath in /.runcvm/network/devices/*
 95 |   do
 96 |     if=$(busybox basename "$ifpath")
 97 | 
 98 |     [ "$if" = "default" ] && continue
 99 | 
100 |     load_network "$if"
101 | 
102 |     mac=$(busybox sed -r 's/^..:..:../52:54:00/' <<<$DOCKER_IF_MAC)
103 | 
104 |     IFACES+=(
105 |         -netdev tap,id=qemu$id,ifname=tap-$DOCKER_IF,script=$QEMU_IFUP,downscript=$QEMU_IFDOWN
106 |         -device virtio-net-pci,netdev=qemu$id,mac=$mac,rombar=$id
107 |     )
108 | 
109 |     id=$((id+1))
110 |   done
111 | }
112 | 
113 | DISKS=()
114 | if [ -n "$RUNCVM_DISKS" ]; then
115 |   do_disks "$RUNCVM_DISKS"
116 | fi
117 | 
118 | IFACES=()
119 | do_networks
120 | 
121 | if [ -n "$RUNCVM_TMPFS" ]; then
122 |   echo "$RUNCVM_TMPFS" >>/.runcvm/fstab
123 | fi
124 | 
125 | if [[ -z "$RUNCVM_CPUS" || "$RUNCVM_CPUS" -le 0 ]]; then
126 |   RUNCVM_CPUS=$(busybox nproc)
127 | fi
128 | 
129 | # TODO:
130 | # - Consider using '-device pvpanic'
131 | 
132 | if [ "$RUNCVM_ARCH" = "arm64" ]; then
133 |   CMD="$(which qemu-system-aarch64)"
134 |   MACHINE+=(-cpu max -machine virt,gic-version=max,usb=off)
135 | else
136 |   CMD="$(which qemu-system-x86_64)"
137 |   MACHINE+=(-enable-kvm -cpu host,pmu=off -machine q35,accel=kvm,usb=off,sata=off -device isa-debug-exit)
138 | fi
139 | 
140 | if [ -n "$RUNCVM_QEMU_DISPLAY" ]; then
141 |   DISPLAY+=(-display $RUNCVM_QEMU_DISPLAY)
142 | else
143 |   DISPLAY+=(-nographic)
144 |   DISPLAY+=(-vga none)
145 | fi
146 | 
147 | if [ "$RUNCVM_BIOS_DEBUG" != "1" ]; then
148 |   # Disable SeaBIOS serial console.
149 |   # This -cfw_cfg path is modified from the SeaBIOS default (to avoid an otherwise-inevitable QEMU
150 |   # warning being emitted) and so requires patched bios.bin file(s) (see Dockerfile)
151 |   OPTS+=(-fw_cfg opt/org.seabios/etc/sercon-port,string=0)
152 | fi
153 | 
154 | MEM_BACKEND=(-numa node,memdev=mem)
155 | if [ "$RUNCVM_HUGETLB" != "1" ]; then
156 |   # Tests suggests prealloc=on slows down mem-path=/dev/shm
157 |   MEM_PATH="/dev/shm" MEM_PREALLOC="off"
158 |   MEM_BACKEND+=(-object memory-backend-file,id=mem,size=$RUNCVM_MEM_SIZE,mem-path=$MEM_PATH,share=on,prealloc=$MEM_PREALLOC)
159 | else
160 |   # Fastest performance: +15% CPU/net intensive; 3.5x disk intensive.
161 |   MEM_BACKEND+=(-object memory-backend-memfd,id=mem,size=$RUNCVM_MEM_SIZE,share=on,prealloc=on,hugetlb=on)
162 | fi
163 | 
164 | # 16-64 works well and is more performant than 1024 in some scenarios.
165 | # For now, stick with original figure.
166 | VIRTIOFS_QUEUE_SIZE=1024
167 | VIRTIOFS+=(
168 |   -chardev socket,id=virtiofs,path=$QEMU_VIRTIOFSD_SOCKET
169 |   -device vhost-user-fs-pci,queue-size=$VIRTIOFS_QUEUE_SIZE,chardev=virtiofs,tag=runcvmfs,ats=off
170 | )
171 | 
172 | CONSOLE=()
173 | CONSOLE_MONITOR="0"
174 | if [ "$CONSOLE_MONITOR" = "1" ]; then
175 |   # Creates a multiplexed stdio backend connected to the serial port (and the QEMU monitor).
176 |   # Use with /dev/ttyS0
177 |   CONSOLE+=(
178 |     -chardev stdio,id=char0,mux=on,signal=off
179 |     -serial chardev:char0 -mon chardev=char0
180 |   )
181 | 
182 |   # Set monitor escape key to CTRL-T to reduce risk of conflict (as default, CTRL-A, is  commonly used)
183 |   CONSOLE+=(-echr 20)
184 | 
185 |   CONSOLE_DEV="ttyS0"
186 | else
187 |   # Creates a stdio backend connected to the virtual console.
188 |   # Use with /dev/hvc0
189 |   CONSOLE+=(
190 |     -chardev stdio,id=char0,mux=off,signal=off
191 |     -device virtconsole,chardev=char0,id=console0
192 |   )
193 | 
194 |   CONSOLE_DEV="hvc0"
195 | fi
196 | 
197 | # Save choice of console device
198 | echo "$CONSOLE_DEV" >/.runcvm/console
199 | 
200 | # Experimental: Enable to specify a dedicated PCI bridge
201 | # OPTS+=(-device pci-bridge,bus=pcie.0,id=pci-bridge-0,chassis_nr=1,shpc=off,addr=2,io-reserve=4k,mem-reserve=1m,pref64-reserve=1m)
202 | 
203 | # Experimental: Enable for a SCSI bus
204 | # OPTS+=(-device virtio-scsi-pci,id=scsi0,disable-modern=true)
205 | 
206 | # Disable IPv6, which is currently unsupported, at kernel boot time
207 | APPEND+=(ipv6.disable=1 panic=-1)
208 | 
209 | # Disable unneeded functionality
210 | APPEND+=(scsi_mod.scan=none tsc=reliable no_timer_check rcupdate.rcu_expedited=1 i8042.direct=1 i8042.dumbkbd=1 i8042.nopnp=1 i8042.noaux=1 noreplace-smp reboot=k cryptomgr.notests pci=lastbus=0 selinux=0)
211 | 
212 | # Enable systemd startup logging by default:
213 | # - Only effective when --env=RUNCVM_KERNEL_DEBUG=1
214 | # - Override this by launching with --env='RUNCVM_KERNEL_APPEND=systemd.show_status=0'
215 | APPEND+=(systemd.show_status=1)
216 | 
217 | if [ "$RUNCVM_KERNEL_DEBUG" = "1" ]; then
218 |   APPEND+=(console=$CONSOLE_DEV)
219 | else
220 |   APPEND+=(quiet)
221 | fi
222 | 
223 | ARGS=(
224 |   -no-user-config
225 |   -nodefaults
226 |   -no-reboot
227 | 
228 |   -action panic=none
229 |   -action reboot=shutdown
230 | 
231 |   "${MACHINE[@]}"
232 |   "${DISPLAY[@]}"
233 |   "${OPTS[@]}"
234 | 
235 |   # N.B. There is a counterintuitive relationship between cpus and memory, and performance:
236 |   # - more cpus needs more memory to maintain the same virtiofs disk I/O performance.
237 |   -m "$RUNCVM_MEM_SIZE"
238 |   -smp $RUNCVM_CPUS,cores=1,threads=1,sockets=$RUNCVM_CPUS,maxcpus=$RUNCVM_CPUS
239 | 
240 |   # Creates a virtio-serial bus on the PCI bus; this is used for the guest agent and virtiofs
241 |   -device virtio-serial-pci,id=serial0 
242 | 
243 |   # Creates an RNG on the PCI bus
244 |   -object rng-random,id=rng0,filename=/dev/urandom -device virtio-rng-pci,rng=rng0
245 | 
246 |   # Memory backend
247 |   "${MEM_BACKEND[@]}"
248 | 
249 |   # virtiofs socket and interface
250 |   "${VIRTIOFS[@]}"
251 | 
252 |   # Configure host/container tap device with PXE roms disabled
253 |   "${IFACES[@]}"
254 |   "${DISKS[@]}"
255 | 
256 |   # Configure console
257 |   "${CONSOLE[@]}"
258 | 
259 |   # Support for guest agent
260 |   -chardev socket,id=qemuguest0,path=$QEMU_GUEST_AGENT,server=on,wait=off
261 |   -device virtserialport,chardev=qemuguest0,name=org.qemu.guest_agent.0
262 | 
263 |   # Creates a unix socket for the QEMU monitor
264 |   -monitor unix:$QEMU_MONITOR_SOCKET,server,nowait
265 | 
266 |   # Kernel and initrd and kernel cmdline
267 |   -kernel $RUNCVM_KERNEL_PATH
268 |   -initrd $RUNCVM_KERNEL_INITRAMFS_PATH
269 |   -L $RUNCVM_GUEST/usr/share/qemu
270 |   -append "$RUNCVM_KERNEL_ROOT $INIT rw ${APPEND[*]} $RUNCVM_KERNEL_APPEND"
271 | )
272 | 
273 | if [[ "$RUNCVM_BREAK" =~ preqemu ]]; then echo "Preparing to run: '$CMD' ${ARGS[@]@Q}"; bash; fi
274 | 
275 | exec "$CMD" "${ARGS[@]}"
276 | 


--------------------------------------------------------------------------------
/runcvm-scripts/runcvm-ctr-qemu-ifdown:
--------------------------------------------------------------------------------
 1 | #!/.runcvm/guest/bin/bash
 2 | 
 3 | # Load original environment
 4 | . /.runcvm/config
 5 | 
 6 | # Load defaults and aliases
 7 | . $RUNCVM_GUEST/scripts/runcvm-ctr-defaults
 8 | 
 9 | ip link set dev "$1" down || true
10 | exit 0
11 | 


--------------------------------------------------------------------------------
/runcvm-scripts/runcvm-ctr-qemu-ifup:
--------------------------------------------------------------------------------
 1 | #!/.runcvm/guest/bin/bash
 2 | 
 3 | # Load original environment
 4 | . /.runcvm/config
 5 | 
 6 | # Load defaults and aliases
 7 | . $RUNCVM_GUEST/scripts/runcvm-ctr-defaults
 8 | 
 9 | tap="$1"
10 | if="$(busybox sed 's/tap-//' <<<$tap)"
11 | bri="$(busybox sed 's/tap-/br-/' <<<$tap)"
12 | 
13 | load_network "$if"
14 | 
15 | ip link set dev "$tap" up mtu "${DOCKER_IF_MTU:=1500}" master "$bri"
16 | 
17 | exit 0


--------------------------------------------------------------------------------
/runcvm-scripts/runcvm-ctr-qemu-poweroff:
--------------------------------------------------------------------------------
1 | #!/.runcvm/guest/bin/bash
2 | 
3 | # Load original environment
4 | . /.runcvm/config
5 | 
6 | # Load defaults and aliases
7 | . $RUNCVM_GUEST/scripts/runcvm-ctr-defaults
8 | 
9 | echo "system_powerdown" | nc -w 1 -U $QEMU_MONITOR_SOCKET


--------------------------------------------------------------------------------
/runcvm-scripts/runcvm-ctr-shutdown:
--------------------------------------------------------------------------------
 1 | #!/.runcvm/guest/bin/bash
 2 | 
 3 | # runcvm-init forks and execs this script when it receives a SIGTERM
 4 | 
 5 | # Load original environment
 6 | . /.runcvm/config
 7 | 
 8 | poweroff() {
 9 |   # Try ACPI poweroff
10 |   $RUNCVM_GUEST/scripts/runcvm-ctr-qemu-poweroff
11 |   
12 |   # Try running busybox poweroff
13 |   $RUNCVM_GUEST/scripts/runcvm-ctr-exec 0:0 / 0 0 $RUNCVM_GUEST/bin/poweroff &>/dev/null
14 | 
15 |   # Try killing the VM's PID 1
16 |   $RUNCVM_GUEST/scripts/runcvm-ctr-exec 0:0 / 0 0 $RUNCVM_GUEST/bin/busybox kill 1 &>/dev/null
17 | }
18 | 
19 | poweroff
20 | 
21 | exit 0
22 | 


--------------------------------------------------------------------------------
/runcvm-scripts/runcvm-ctr-virtiofsd:
--------------------------------------------------------------------------------
 1 | #!/.runcvm/guest/bin/bash
 2 | 
 3 | # Load defaults and aliases
 4 | . /.runcvm/guest/scripts/runcvm-ctr-defaults
 5 | 
 6 | if [ "$RUNCVM_SYS_ADMIN" = "1" ]; then
 7 |   OPTS+=(-o modcaps=+sys_admin)
 8 | fi
 9 | 
10 | OPTS+=(-o cache=always)
11 | 
12 | # Experimental options that may improve performance.
13 | # OPTS+=(-o cache=auto)
14 | # OPTS+=(--thread-pool-size=1)
15 | 
16 | # Send logs to /run in container (not in VM)
17 | exec "$(which virtiofsd)" "${OPTS[@]}" -o announce_submounts -o xattr --socket-path=$QEMU_VIRTIOFSD_SOCKET -o source=$RUNCVM_VM_MOUNTPOINT -o sandbox=chroot >/run/.virtiofsd.log 2>&1


--------------------------------------------------------------------------------
/runcvm-scripts/runcvm-install-runtime.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/sh
  2 | 
  3 | RUNCVM=/opt/runcvm
  4 | RUNCVM_LD=$RUNCVM/lib/ld
  5 | RUNCVM_JQ=$RUNCVM/usr/bin/jq
  6 | MNT=/runcvm
  7 | REPO=${REPO:-newsnowlabs/runcvm}
  8 | 
  9 | log() {
 10 |     echo "$@"
 11 | }
 12 | 
 13 | jq() {
 14 |   $RUNCVM_LD $RUNCVM_JQ "$@"
 15 | }
 16 | 
 17 | jq_set() {
 18 |   local file="$1"
 19 |   shift
 20 |   
 21 |   local tmp="/tmp/$$.json"
 22 | 
 23 |   if jq "$@" $file >$tmp; then
 24 |     mv $tmp $file
 25 |   else
 26 |     echo "Failed to update $(basename $file); aborting!" 2>&1
 27 |     exit 1
 28 |   fi
 29 | }
 30 | 
 31 | jq_get() {
 32 |   local file="$1"
 33 |   shift
 34 |   
 35 |   jq -r "$@" $file
 36 | }
 37 | 
 38 | usage() {
 39 |   cat <<_EOE_ >&2
 40 | 
 41 | Usage: sudo $0
 42 | _EOE_
 43 |   exit 1
 44 | }
 45 | 
 46 | check_rp_filter() {
 47 |   # For RunCVM to work, the following condition on /proc/sys/net/ipv4/conf/ must be met:
 48 |   # - the max of all/rp_filter and /rp_filter should be 0 or 2
 49 |   #   (where  is the bridge underpinning the Docker network to which RunCVM instances will be attached)
 50 |   #
 51 |   # This means that:
 52 |   # - if all/rp_filter is set to 0, then /rp_filter must be set to 0 or 2
 53 |   #   (or, if  is not yet or might not yet have been created, then default/rp_filter must be set to 0 or 2)
 54 |   # - if all/rp_filter is set to 1, then /rp_filter must be set to 2
 55 |   #   (or, if  is not yet or might not yet have been created, then default/rp_filter must be set to 2)
 56 |   # - if all/rp_filter is set to 2, then no further action is needed
 57 | 
 58 |   local rp_filter_all rp_filter_default
 59 | 
 60 |   log "- Checking rp_filter ..."
 61 | 
 62 |   if [ -f "/proc/sys/net/ipv4/conf/all/rp_filter" ]; then
 63 |     rp_filter_all=$(cat /proc/sys/net/ipv4/conf/all/rp_filter)
 64 |   else
 65 |     log "  - Warning: could not find /proc/sys/net/ipv4/conf/all/rp_filter"
 66 |   fi
 67 | 
 68 |   if [ -f "/proc/sys/net/ipv4/conf/default/rp_filter" ]; then
 69 |     rp_filter_default=$(cat /proc/sys/net/ipv4/conf/default/rp_filter)
 70 |   else
 71 |     log "  - Warning: could not find /proc/sys/net/ipv4/conf/default/rp_filter"
 72 |   fi
 73 | 
 74 |   if [ -z "$rp_filter_all" ] || [ -z "$rp_filter_default" ]; then
 75 |     return
 76 |   fi
 77 |   
 78 |   if [ "$rp_filter_all" = "2" ]; then
 79 |     log "  - sys.net.ipv4.conf.all.rp_filter is set to 2; assuming no further action needed"
 80 |     return
 81 |   elif [ "$rp_filter_all" = "0" ] && [ "$rp_filter_default" = "0" ]; then
 82 |     log "  - sys.net.ipv4.conf.all.rp_filter AND sys.net.ipv4.conf.default.rp_filter are set to 0; assuming no further action needed"
 83 |     return
 84 |   fi
 85 |   
 86 |   log "  - sys.net.ipv4.conf.all.rp_filter is set to $rp_filter_all; fixing ..."
 87 |   log "  - Setting sys.net.ipv4.conf.all.rp_filter and Setting sys.net.ipv4.conf.default.rp_filter to 2 ..."
 88 |   echo 2 >/proc/sys/net/ipv4/conf/all/rp_filter
 89 |   echo 2 >/proc/sys/net/ipv4/conf/default/rp_filter
 90 | 
 91 |   log "  - Patching /etc/sysctl.conf, /etc/sysctl.d/* to make these settings persist after reboot ..."
 92 |   find /etc/sysctl.conf /etc/sysctl.d -type f -exec sed -r -i 's/^([ ]*net.ipv4.conf.(all|default).rp_filter)=(1)$/# DISABLED BY RUNCVM\n# \1=\3\n# ADDED BY RUNCVM\n\1=2/' {} \;
 93 | }
 94 | 
 95 | docker_restart() {
 96 |   # docker_restart
 97 |   # - With systemd, run: systemctl restart docker
 98 |   # - On GitHub Codespaces, run: sudo killall dockerd && sudo /usr/local/share/docker-init.sh
 99 | 
100 |   local cmd init
101 |   
102 |   init=$(ps -o comm,pid 1 | grep ' 1$' | awk '{print $1}')
103 | 
104 |   log "  - Preparing to restart dockerd ..."
105 | 
106 |   if [ "$init" = "systemd" ]; then
107 |     log "    - Detected systemd"
108 |     cmd="systemctl restart docker"
109 | 
110 |   elif [ -x "/etc/init.d/docker" ]; then
111 |     log "    - Detected sysvinit"
112 |     cmd="/etc/init.d/docker restart"
113 | 
114 |   elif [ "$init" = "docker-init" ]; then
115 | 
116 |     if [ -x "/usr/local/share/docker-init.sh" ]; then
117 |       log "    - Detected docker-init on GitHub Codespaces"
118 |       cmd="killall dockerd && /usr/local/share/docker-init.sh"
119 |     fi
120 |   fi
121 | 
122 |   if [ -n "$cmd" ]; then
123 |     log "    - Preparing to run: $cmd"
124 |     read -p "    - Run this? (Y/n): " yesno
125 | 
126 |     if [ "$yesno" != "${yesno#[Yy]}" ] || [ -z "$yesno" ]; then
127 |       log "    - Restarting dockerd with: $cmd"
128 |       sh -c "$cmd" 2>&1 | sed 's/^/      - /'
129 | 
130 |       # Wait for dockerd to restart
131 |       log "    - Waiting for dockerd to restart ..."
132 |       while ! docker ps >/dev/null 2>&1; do
133 |         sleep 0.5
134 |       done
135 |       log "    - Restarted dockerd successfully"
136 | 
137 |     else
138 |       log "    - Please restart dockerd manually in the usual manner for your system"
139 |     fi
140 | 
141 |   else
142 |     log "  - Couldn't detect restart mechanism for dockerd, please restart manually in the usual manner for your system"
143 |   fi
144 | }
145 | 
146 | log
147 | log "RunCVM Runtime Installer"
148 | log "========================"
149 | log
150 | 
151 | if [ $(id -u) -ne 0 ]; then
152 |   log "- Error: $0 must be run as root. Please relaunch using sudo."
153 |   usage
154 | fi
155 | 
156 | for app in docker dockerd
157 | do
158 |   if [ -z $(which docker) ]; then
159 |     log "- Error: $0 currently requires the '$app' binary; please install it and try again"
160 |     usage
161 |   fi
162 | done
163 | 
164 | 
165 | if [ "$1" = "--no-dockerd" ]; then
166 |   NO_DOCKERD="1"
167 |   log "- Skipping dockerd check and docker-based package install due to '--no-dockerd'"
168 |   shift
169 | else
170 |   log "- Checking dockerd ..."
171 |   if docker info >/dev/null 2>&1; then
172 |     log "  - Detected running dockerd"
173 |   else
174 |     log "  - Error: dockerd not running; please start dockerd; aborting!"
175 |     exit 1
176 |   fi
177 | fi
178 | 
179 | # Install RunCVM package to $MNT
180 | if [ -z "$NO_DOCKERD" ]; then
181 |   log "- Installing RunCVM package to $MNT ..."
182 |   if docker run --rm -v /opt/runcvm:$MNT $REPO --quiet; then
183 |     log "- Installed RunCVM package to /opt/runcvm"
184 |   else
185 |     log "- Failed to install RunCVM package to /opt/runcvm; aborting!"
186 |     exit 1
187 |   fi
188 | fi
189 | 
190 | if [ -d "/etc/docker" ]; then
191 | 
192 |   log "- Detected /etc/docker"
193 | 
194 |   if ! [ -f "/etc/docker/daemon.json" ]; then
195 |     log "  - Creating empty daemon.json"
196 |     echo '{}' >/etc/docker/daemon.json
197 |   fi
198 | 
199 |   if [ $(jq_get "/etc/docker/daemon.json" ".runtimes.runcvm.path") != "/opt/runcvm/scripts/runcvm-runtime" ]; then
200 |     log "  - Adding runcvm to daemon.json runtimes property ..."
201 | 
202 |     if jq_set  "/etc/docker/daemon.json" '.runtimes.runcvm.path |= "/opt/runcvm/scripts/runcvm-runtime"'; then
203 |       log "    - Done"
204 |     else
205 |       log "    - Failed: $!"
206 |       exit 1
207 |     fi
208 | 
209 |     # Attempt restart of dockerd
210 |     # (if dockerd not found, we'll just continue)
211 |     docker_restart
212 | 
213 |   else
214 |     log "  - Valid runcvm property already found in daemon.json"
215 |   fi
216 | 
217 |   if docker info 2>/dev/null | grep -q runcvm; then
218 |   # if [ $(docker info --format '{{ json .Runtimes.runcvm }}') = "{"path":"/opt/runcvm/scripts/runcvm-runtime"}" ]; then
219 |     log "  - Verification of RunCVM runtime in Docker completed"
220 |   else
221 |     log "  - Warning: could not verify RunCVM runtime in Docker; perhaps you need to restart Docker manually"
222 |   fi
223 | 
224 | else
225 |   log "- No /etc/docker detected; your mileage with RunCVM without Docker may vary!"
226 | fi
227 | 
228 | if [ -n "$(which podman)" ]; then
229 |   log "- Detected podman binary"
230 |   cat <<_EOE_ >&2
231 |   - To enable experimental RunCVM support for Podman, add the following
232 |     to /etc/containers/containers.conf in the [engine.runtimes] section:
233 | 
234 |     runcvm = [ "/opt/runcvm/scripts/runcvm-runtime" ]
235 | _EOE_
236 | fi
237 | 
238 | # Check, correct and make persistent required rp_filter settings
239 | check_rp_filter
240 | 
241 | log "- RunCVM installation/upgrade complete."
242 | log


--------------------------------------------------------------------------------
/runcvm-scripts/runcvm-ip-functions:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | cidr_to_int() {
 4 |   echo "$(( 0xffffffff ^ ((1 << (32 - $1)) - 1) ))"
 5 | }
 6 | 
 7 | int_to_ip() {
 8 |   local value="$1"
 9 |   echo "$(( ($1 >> 24) & 0xff )).$(( ($1 >> 16) & 0xff )).$(( ($1 >> 8) & 0xff )).$(( $1 & 0xff ))"
10 | }
11 | 
12 | cidr_to_netmask() {
13 |   local value=$(cidr_to_int "$1")
14 |   int_to_ip "$value"
15 | }
16 | 
17 | ip_prefix_to_network() {
18 |   local IFS i1 i2 i3 i4 m1 m2 m3 m4
19 |   IFS=. read -r i1 i2 i3 i4 <<< "$1"
20 | 
21 |   local mask=$(cidr_to_netmask "$2")
22 |   IFS=. read -r m1 m2 m3 m4 <<< "$mask"
23 | 
24 |   printf "%d.%d.%d.%d\n" "$((i1 & m1))" "$((i2 & m2))" "$((i3 & m3))" "$((i4 & m4))"
25 | }
26 | 
27 | cidr_to_bcastmask() {
28 |   local value=$(( (1 << 32) - $(cidr_to_int "$1") - 1 ))
29 |   int_to_ip "$value"
30 | }
31 | 
32 | ip_prefix_to_bcast() {
33 |   local IFS i1 i2 i3 i4 m1 m2 m3 m4
34 |   IFS=. read -r i1 i2 i3 i4 <<< "$1"
35 | 
36 |   local mask=$(cidr_to_bcastmask "$2")
37 |   IFS=. read -r m1 m2 m3 m4 <<< "$mask"
38 | 
39 |   printf "%d.%d.%d.%d\n" "$((i1 | m1))" "$((i2 | m2))" "$((i3 | m3))" "$((i4 | m4))"
40 | }


--------------------------------------------------------------------------------
/runcvm-scripts/runcvm-runtime:
--------------------------------------------------------------------------------
  1 | #!/opt/runcvm/lib/ld-musl-x86_64.so.1 /opt/runcvm/bin/bash
  2 | 
  3 | # REFERENCES
  4 | 
  5 | # Qemu:
  6 | # - https://github.com/joshkunz/qemu-docker
  7 | # - https://mergeboard.com/blog/2-qemu-microvm-docker/
  8 | # - https://github.com/BBVA/kvm
  9 | 
 10 | # Virtiofs
 11 | # - https://vmsplice.net/~stefan/virtio-fs_%20A%20Shared%20File%20System%20for%20Virtual%20Machines.pdf
 12 | # - https://virtio-fs.gitlab.io/howto-qemu.html
 13 | # - https://www.tauceti.blog/posts/qemu-kvm-share-host-directory-with-vm-with-virtio/
 14 | 
 15 | # Container config.json spec
 16 | # - https://github.com/opencontainers/runtime-spec/
 17 | # - https://github.com/opencontainers/runtime-spec/blob/main/config.md
 18 | 
 19 | # Mount namespaces
 20 | # - https://www.kernel.org/doc/Documentation/filesystems/sharedsubtree.txt
 21 | # - https://www.redhat.com/sysadmin/mount-namespaces
 22 | 
 23 | RUNCVM=/opt/runcvm
 24 | RUNCVM_LD=$RUNCVM/lib/ld
 25 | RUNCVM_JQ=$RUNCVM/usr/bin/jq
 26 | RUNCVM_VM_MOUNTPOINT="/vm"
 27 | RUNCVM_GUEST=/.runcvm/guest
 28 | RUNCVM_ENTRYPOINT=$RUNCVM_GUEST/scripts/runcvm-ctr-entrypoint
 29 | RUNCVM_EXEC="$RUNCVM_GUEST/scripts/runcvm-ctr-exec"
 30 | RUNCVM_KERNELS=$RUNCVM/kernels
 31 | RUNCVM_GUEST_KERNELS=$RUNCVM_GUEST/kernels
 32 | RUNCVM_KERNEL_DEFAULT=debian
 33 | RUNCVM_MEM_SIZE_DEFAULT="512M"
 34 | RUNCVM_DEBUG=""
 35 | 
 36 | debug() {
 37 |   [ -n "$RUNCVM_DEBUG" ] && true || false
 38 | }
 39 | 
 40 | log() {
 41 |   debug && echo "$(date '+%Y-%m-%d %H:%M:%S.%6N'): $@" >>/tmp/runcvm-$$.log
 42 | }
 43 | 
 44 | error() {
 45 | 
 46 |   # Skip past any docker error ending in CR
 47 |   (echo; echo) >&2
 48 | 
 49 |   # Dump message to stderr
 50 |   echo "RunCVM: Error: $1" >&2
 51 | 
 52 |   # Dump error also to the logfile
 53 |   log "RunCVM: Error: $1"
 54 |   exit -1
 55 | }
 56 | 
 57 | load_env_from_file() {
 58 |   local file="$1"
 59 |   local var="$2"
 60 | 
 61 |   # Return gracefully if no $file exists
 62 |   if ! [ -f "$file" ]; then
 63 |     return 0
 64 |   fi
 65 | 
 66 |   while read LINE
 67 |   do
 68 |     local name="${LINE%%=*}"
 69 |     local value="${LINE#*=}"
 70 |     
 71 |     if [ "$name" != "$LINE" ] && [ "$value" != "$LINE" ] && [ "$name" = "$var" ]; then
 72 |       # We found variable $name: return it, removing any leading/trailing double quotes
 73 |       echo "$value" | sed 's/^"//;s/"$//'
 74 |       return 0
 75 |     fi
 76 |   done <"$file"
 77 |   
 78 |   return 1
 79 | }
 80 | 
 81 | jq() {
 82 |   $RUNCVM_LD $RUNCVM_JQ "$@"
 83 | }
 84 | 
 85 | jq_set() {
 86 |   local file="$1"
 87 |   shift
 88 |   
 89 |   local tmp="/tmp/config.json.$$"
 90 | 
 91 |   if jq "$@" $file >$tmp; then
 92 |     mv $tmp $file
 93 |   else
 94 |     echo "Failed to update $(basename $file); aborting!" 2>&1
 95 |     exit 1
 96 |   fi
 97 | }
 98 | 
 99 | jq_get() {
100 |   local file="$1"
101 |   shift
102 |   
103 |   jq -r "$@" $file
104 | }
105 | 
106 | get_process_env() {
107 |   local file="$1"
108 |   local var="$2"
109 |   local default="$3"
110 |   local value
111 |   
112 |   value=$(jq_get "$file" --arg env "$var" '.env[] | select(match("^" + $env + "=")) | match("^" + $env + "=(.*)") | .captures[] | .string')
113 |   
114 |   [ -n "$value" ] && echo -n "$value" || echo -n "$default"
115 | }
116 | 
117 | get_process_env_boolean() {
118 |   local file="$1"
119 |   local var="$2"
120 |   local value
121 |   
122 |   value=$(jq_get "$file" --arg env "$var" '.env[] | select(match("^" + $env + "=")) | match("^" + $env + "=(.*)") | .captures[] | .string')
123 |   
124 |   [ -n "$value" ] && echo "1" || echo "0"
125 | }
126 | 
127 | get_config_env() {
128 |   local var="$1"
129 |   local default="$2"
130 |   local value
131 | 
132 |   value=$(jq_get "$CFG" --arg env "$var" '.process.env[] | select(match("^" + $env + "=")) | match("^" + $env + "=(.*)") | .captures[] | .string')
133 |   
134 |   [ -n "$value" ] && echo -n "$value" || echo -n "$default"
135 | }
136 | 
137 | set_config_env() {
138 |   local var="$1"
139 |   local value="$2"
140 |   
141 |   jq_set "$CFG" --arg env "$var=$value" '.process.env |= (.+ [$env] | unique)'
142 | }
143 | 
144 | 
145 | # PARSE RUNC GLOBAL OPTIONS:
146 | # --debug             enable debug logging
147 | # --log value         set the log file to write runc logs to (default is '/dev/stderr')
148 | # --log-format value  set the log format ('text' (default), or 'json') (default: "text")
149 | # --root value        root directory for storage of container state (this should be located in tmpfs) (default: "/run/user/1000/runc")
150 | # --criu value        path to the criu binary used for checkpoint and restore (default: "criu")
151 | # --systemd-cgroup    enable systemd cgroup support, expects cgroupsPath to be of form "slice:prefix:name" for e.g. "system.slice:runc:434234"
152 | # --rootless value    ignore cgroup permission errors ('true', 'false', or 'auto') (default: "auto")
153 | 
154 | COMMAND_LINE=("$@")
155 | 
156 | if debug; then
157 |   log "Command line: $0 ${COMMAND_LINE[@]@Q}"
158 | fi
159 | 
160 | while true
161 | do
162 |   case "$1" in
163 |     --debug|--systemd-cgroup) shift; continue; ;;
164 |     --log|--log-format|--root|--criu|--rootless) shift; shift; continue; ;;
165 |     --log=*|--log-format=*|--root=*|--criu=*|--rootless=*) shift; continue; ;;
166 |     *) break; ;;
167 |   esac
168 | done
169 | 
170 | COMMAND="$1"
171 | shift
172 | 
173 | if [ "$COMMAND" = "create" ]; then
174 | 
175 |   debug && log "Command: create"
176 |   
177 |   # USAGE:
178 |   #    runc create [command options] 
179 |   #   
180 |   # PARSE 'create' COMMAND OPTIONS
181 |   # --bundle value, -b value  path to the root of the bundle directory, defaults to the current directory
182 |   # --console-socket value    path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal
183 |   # --pid-file value          specify the file to write the process id to
184 |   # --no-pivot                do not use pivot root to jail process inside rootfs.  This should be used whenever the rootfs is on top of a ramdisk
185 |   # --no-new-keyring          do not create a new session keyring for the container.  This will cause the container to inherit the calling processes session key
186 |   # --preserve-fds value      Pass N additional file descriptors to the container (stdio + $LISTEN_FDS + N in total) (default: 0)
187 |   while true
188 |   do
189 |     case "$1" in
190 |       --bundle|-b) shift; BUNDLE="$1"; shift; continue; ;;
191 |       --console-socket|--pid-file|--preserve-fds) shift; shift; continue; ;;
192 |       --no-pivot|--no-new-keyring) shift; continue; ;;
193 |       *) break; ;;
194 |     esac
195 |   done
196 | 
197 |   ID="$1"
198 | 
199 |   CFG="$BUNDLE/config.json"
200 |   ROOT=$(jq -r .root.path $CFG)
201 | 
202 |   # Allow user to enable debug logging
203 |   if [ "$(get_config_env RUNCVM_RUNTIME_DEBUG)" = "1" ]; then
204 |     RUNCVM_DEBUG="1"
205 |   fi
206 | 
207 |   if debug; then
208 |     log "Command line: $0 ${COMMAND_LINE[@]@Q}"
209 |     log "Command: create bundle=$BUNDLE id=$ID root=$ROOT"
210 |     
211 |     # Save formatted config.json
212 |     jq -r . <$CFG >/tmp/config.json-$$-1
213 |     
214 |   fi
215 |   
216 |   # Pending support for user-specified mountpoint for the guest (VM) binaries and scripts
217 |   set_config_env "RUNCVM_GUEST" "$RUNCVM_GUEST"
218 | 
219 |   ARG0=$(jq_get "$CFG" '.process.args[0]')
220 |   # Now look in mounts for destination == $ARG0 (this works for Docker and Podman)
221 |   if [ "$ARG0" = "/sbin/docker-init" ] || [ "$ARG0" = "/dev/init" ]; then
222 |   
223 |     # User intended an init process to be run in the container,
224 |     # so arrange to run our own instead, that will launch the original entrypoint
225 |     
226 |     # Look for and remove a mountpoint for this process.
227 |     jq_set "$CFG" --arg init "$ARG0" '(.mounts[] | select(.destination == $init)) |= empty'
228 |     
229 |     # Replace the first argument with our own entrypoint; and remove the second, '--' (for now, #TODO)
230 |     jq_set "$CFG" --arg entrypoint "$RUNCVM_ENTRYPOINT" '.process.args[0] = $entrypoint | del(.process.args[1])'
231 |     
232 |     # We know the user intended an init process to be run in the container.
233 |     # TODO: We might want to indicate this, so that our entrypoint does not skip doing this
234 |     # if the original entrypoint also looks like an init process.
235 |     set_config_env "RUNCVM_INIT" "1"
236 |   else
237 |     # We don't know if the original entrypoint is an init process or not.
238 |     # Run our entrypoint first to work this out and do the right thing.
239 |     
240 |     jq_set "$CFG" --arg entrypoint "$RUNCVM_ENTRYPOINT" '.process.args |= [$entrypoint] + .'
241 |   fi
242 | 
243 |   # SET RUNCVM_HAS_HOME
244 |   # 
245 |   # If the HOME env var was not set either in the image, or via docker run, 
246 |   # then it will be missing in the config env. Detect this case for communication to runcvm-ctr-entrypoint
247 |   # so that HOME can be set to the requested user's default homedir.
248 |   #
249 |   # - See runcvm-ctr-entrypoint for full details of how/why hasHome is needed and HOME gets set.
250 |   if [ -n "$(get_config_env HOME)" ]; then
251 |     set_config_env "RUNCVM_HAS_HOME" "1"
252 |   else
253 |     set_config_env "RUNCVM_HAS_HOME" "0"
254 |   fi
255 | 
256 |   # CONFIGURE USER
257 |   # - Must be root to run container
258 |   RUNCVM_UIDGID=$(jq_get "$CFG" '(.process.user.uid | tostring) + ":" + (.process.user.gid | tostring) + ":" + ((.process.user.additionalGids // []) | join(","))')
259 |   set_config_env "RUNCVM_UIDGID" "$RUNCVM_UIDGID"
260 |   jq_set "$CFG" '.process.user = {"uid":0, "gid":0}'
261 |   log "RUNCVM_UIDGID=$RUNCVM_UIDGID"
262 | 
263 |   # CONFIGURE CPUS
264 |   RUNCVM_CPUS=$(( $(jq_get "$CFG" '.linux.resources.cpu.quota') / 100000))
265 |   set_config_env "RUNCVM_CPUS" "$RUNCVM_CPUS"
266 |   log "RUNCVM_CPUS=$RUNCVM_CPUS"
267 | 
268 |   # CONFIGURE MOUNTS
269 |   set_config_env "RUNCVM_VM_MOUNTPOINT" "$RUNCVM_VM_MOUNTPOINT"
270 | 
271 |   # First extract list of tmpfs mounts in fstab form, then delete them from the config
272 |   RUNCVM_TMPFS=$(jq_get "$CFG" '( .mounts[] | select(.type == "tmpfs" and (.destination | test("^/dev(/|$)") | not) ) ) | [.source + " " + .destination + " tmpfs " + (.options | map(select(. != "rprivate" and . != "private")) | join(",")) + " 0 0"] | .[0]')
273 |   jq_set "$CFG" -r 'del( .mounts[] | select(.type == "tmpfs" and (.destination | test("^/dev(/|$)") | not) ) )'
274 |   set_config_env "RUNCVM_TMPFS" "$RUNCVM_TMPFS"
275 | 
276 |   # Rewrite all pre-existing bind/volume mounts (except those at or below /disks) to mount
277 |   # below $RUNCVM_VM_MOUNTPOINT instead of below /.
278 |   #
279 |   # TODO TO CONSIDER:
280 |   # If we excluded /etc/(resolv.conf,hosts,hostname), and moved these to top of the array
281 |   # (by promoting them at the end of the below statements), they would be present in both
282 |   # container and VM.
283 |   #
284 |   # N.B. A mount at or underneath /disks will NOT be mapped to /vm/disks - this path is reserved for mounting disk files to the container
285 |   jq_set "$CFG" --arg vm "$RUNCVM_VM_MOUNTPOINT" '( .mounts[] | select(.type == "bind" and (.destination | test("^/disks(/|$)") | not) ) ).destination |= $vm + .'
286 | 
287 |   # Mount / from container to $RUNCVM_VM_MOUNTPOINT, recursively binding all pre-existing mount points
288 |   # (these being only the ones defined ahead of this item in the mounts[] array - so order matters!)
289 |   jq_set "$CFG" --arg root "$ROOT" --arg vm "$RUNCVM_VM_MOUNTPOINT" '.mounts |= [{"destination":$vm,"type":"bind","source":$root,"options":["rbind","private","rw"]}] + .'
290 | 
291 |   # Mount /opt/runcvm from host to container
292 |   # Define this at top of mounts[] so it is recursively mounted
293 |   # and before (but after in the mounts[] array) /.runcvm so it can be mounted inside it
294 |   jq_set "$CFG" --arg runcvm "$RUNCVM" --arg runcvm_guest "$RUNCVM_GUEST" '.mounts |= [{"destination":$runcvm_guest,"type":"bind","source":$runcvm,"options":["bind","private","ro"]}] + .'
295 | 
296 |   # Mount a tmpfs at /.runcvm in container
297 |   # Define this at top of mounts[] so it is recursively mounted
298 |   jq_set "$CFG" '.mounts |= [{"destination":"/.runcvm","type":"tmpfs","source":"runcvm","options":["nosuid","noexec","nodev","size=1M","mode=700"]}] + .'
299 | 
300 |   # Mount a tmpfs at /run in container
301 |   # Define this at bottom of mounts[] so it is not recursively mounted to /vm
302 |   jq_set "$CFG" '.mounts += [{"destination":"/run","type":"tmpfs","source":"run","options":["nosuid","noexec","nodev","size=1M","mode=700"]}]'
303 | 
304 |   # DETERMINE LAUNCH KERNEL:
305 |   #
306 |   # 1. If RUNCVM_KERNEL specified:
307 |   #    -  or /latest - use latest RUNCVM kernel available for this dist *and* ARGS
308 |   #    - / - use specific RUNCVM kernel version for this dist *and* ARGS
309 |   # 2. Else, check /etc/os-release and:
310 |   #    a. Use builtin kernel for this dist (if present in the expected location) *and* ARGS
311 |   #    b. Use latest RUNCVM kernel available for the dist:
312 |   #      - ID=alpine, VERSION_ID=3.16.0 => alpine/latest
313 |   #      - ID=debian, VERSION_ID=11     => debian/latest
314 |   #      - ID=ubuntu, VERSION_ID=22.04  => ubuntu/latest
315 |   
316 |   # Look for RUNCVM_KERNEL env var
317 |   RUNCVM_KERNEL=$(get_config_env 'RUNCVM_KERNEL')
318 |   log "RUNCVM_KERNEL='$RUNCVM_KERNEL' (1)"
319 | 
320 |   # Generate:
321 |   # - RUNCVM_KERNEL_ID: the distro name (e.g. alpine, debian, ubuntu)
322 |   # - RUNCVM_KERNEL_IDVER: the distro name and kernel version (e.g. alpine/5.15.59-0-virt, debian/5.10.0-16-amd64)
323 | 
324 |   if [ -n "$RUNCVM_KERNEL" ]; then
325 |     # If found, validate
326 |   
327 |     if [[ "$RUNCVM_KERNEL" =~ \.\. ]]; then
328 |       error "Kernel '$RUNCVM_KERNEL' invalid (contains '..')"
329 |     fi
330 |   
331 |     if ! [[ "$RUNCVM_KERNEL" =~ ^[a-z]+(/[^/]+)?$ ]]; then
332 |       error "Kernel '$RUNCVM_KERNEL' invalid (should match ^[a-z]+(/[^/]+)?$)"
333 |     fi
334 |   
335 |     if ! [ -d "$RUNCVM_KERNELS/$RUNCVM_KERNEL" ]; then
336 |       error "Kernel '$RUNCVM_KERNEL' not found (check $RUNCVM_KERNELS)"
337 |     fi
338 | 
339 |     # If RUNCVM_KERNEL is a distro name only, append /latest
340 |     if [[ "$RUNCVM_KERNEL" =~ ^[a-z]+$ ]]; then
341 |       RUNCVM_KERNEL_IDVER="$RUNCVM_KERNEL/latest"
342 |     else
343 |       RUNCVM_KERNEL_IDVER="$RUNCVM_KERNEL"
344 |     fi  
345 | 
346 |     RUNCVM_KERNEL_ID=$(dirname "$RUNCVM_KERNEL_IDVER") # Returns e.g. alpine, debian, ubuntu
347 | 
348 |   else
349 |     # If not found, look for value from /etc/os-release in the container image
350 |     
351 |     RUNCVM_KERNEL_ID=$(load_env_from_file "$ROOT/etc/os-release" "ID")
352 | 
353 |     # Currently unused
354 |     # RUNCVM_KERNEL_OS_VERSION_ID=$(load_var_from_env "$ROOT/etc/os-release" "VERSION_ID")
355 | 
356 |     # If still not found, assign a default
357 |     if [ -z "$RUNCVM_KERNEL_ID" ]; then
358 |       RUNCVM_KERNEL_ID="${RUNCVM_KERNEL_DEFAULT:-debian}"
359 |     fi
360 | 
361 |     RUNCVM_KERNEL_IDVER="$RUNCVM_KERNEL_ID/latest"
362 |   fi
363 |   
364 |   log "RUNCVM_KERNEL='$RUNCVM_KERNEL' (2)"
365 |   log "RUNCVM_KERNEL_ID='$RUNCVM_KERNEL_ID'"
366 |   log "RUNCVM_KERNEL_IDVER='$RUNCVM_KERNEL_IDVER'"
367 |   
368 |   # Now look up the default kernel and initramfs paths and args for this kernel
369 |   case "$RUNCVM_KERNEL_ID" in
370 |           debian) RUNCVM_KERNEL_OS_KERNEL_PATH="/vmlinuz"
371 |                   RUNCVM_KERNEL_OS_INITRAMFS_PATH="/initrd.img"
372 |                   RUNCVM_KERNEL_ROOT="rootfstype=virtiofs root=runcvmfs noresume nomodeset net.ifnames=1"
373 |                   ;;
374 |           ubuntu) RUNCVM_KERNEL_OS_KERNEL_PATH="/boot/vmlinuz"
375 |                   RUNCVM_KERNEL_OS_INITRAMFS_PATH="/boot/initrd.img"
376 |                   RUNCVM_KERNEL_ROOT="rootfstype=virtiofs root=runcvmfs noresume nomodeset net.ifnames=1"
377 |                   ;;
378 |               ol) RUNCVM_KERNEL_OS_KERNEL_PATH="/boot/vmlinuz"
379 |                   RUNCVM_KERNEL_OS_INITRAMFS_PATH="/boot/initramfs"
380 |                   RUNCVM_KERNEL_ROOT="root=virtiofs:runcvmfs noresume nomodeset net.ifnames=1"
381 |                   ;;
382 |   alpine|openwrt) RUNCVM_KERNEL_OS_KERNEL_PATH="/boot/vmlinuz-virt"
383 |                   RUNCVM_KERNEL_OS_INITRAMFS_PATH="/boot/initramfs-virt"
384 |                   RUNCVM_KERNEL_ROOT="rootfstype=virtiofs root=runcvmfs resume= nomodeset"
385 |                   ;;
386 | 
387 |            *) error "Unrecognised image O/S '$RUNCVM_KERNEL'; specify --env=RUNCVM_KERNEL= or --env=RUNCVM_KERNEL=/"; ;;
388 |   esac
389 |   
390 |   # If no RUNCVM_KERNEL specified, look for a kernel and initramfs at the expected paths in the container image.
391 |   if [[ -z "$RUNCVM_KERNEL" && -f "$ROOT/$RUNCVM_KERNEL_OS_KERNEL_PATH" && -f "$ROOT/$RUNCVM_KERNEL_OS_INITRAMFS_PATH" ]]; then
392 |     RUNCVM_KERNEL_PATH="$RUNCVM_KERNEL_OS_KERNEL_PATH"
393 |     RUNCVM_KERNEL_INITRAMFS_PATH="$RUNCVM_KERNEL_OS_INITRAMFS_PATH"
394 |   else
395 |     # If RUNCVM_KERNEL was specified, or we didn't find a kernel and initramfs at the expected paths in the container image,
396 |     # select the latest RUNCVM kernel version and arrange to mount it.
397 | 
398 |     RUNCVM_KERNEL_VERSION=$(basename $(readlink -f "$RUNCVM_KERNELS/$RUNCVM_KERNEL_IDVER")) # Returns e.g. 5.15.53-0-virt
399 | 
400 |     RUNCVM_KERNEL_MOUNT_LIB_MODULES=$(get_config_env 'RUNCVM_KERNEL_MOUNT_LIB_MODULES')
401 |     if [ -n "$RUNCVM_KERNEL_MOUNT_LIB_MODULES" ]; then
402 |       RUNCVM_KERNEL_MODULES_SRC="$RUNCVM_KERNELS/$RUNCVM_KERNEL_ID/$RUNCVM_KERNEL_VERSION/modules"
403 |       RUNCVM_KERNEL_MODULES_DST="/lib/modules"
404 |     else
405 |       RUNCVM_KERNEL_MODULES_SRC="$RUNCVM_KERNELS/$RUNCVM_KERNEL_ID/$RUNCVM_KERNEL_VERSION/modules/$RUNCVM_KERNEL_VERSION"
406 |       RUNCVM_KERNEL_MODULES_DST="/lib/modules/$RUNCVM_KERNEL_VERSION"
407 |     fi
408 |     
409 |     RUNCVM_KERNEL_PATH="$RUNCVM_GUEST_KERNELS/$RUNCVM_KERNEL_ID/$RUNCVM_KERNEL_VERSION/vmlinuz"
410 |     RUNCVM_KERNEL_INITRAMFS_PATH="$RUNCVM_GUEST_KERNELS/$RUNCVM_KERNEL_ID/$RUNCVM_KERNEL_VERSION/initrd"
411 | 
412 |     jq_set "$CFG" --arg modules_dst "$RUNCVM_VM_MOUNTPOINT$RUNCVM_KERNEL_MODULES_DST" --arg modules_src "$RUNCVM_KERNEL_MODULES_SRC" '.mounts += [{"destination":$modules_dst,"type":"bind","source":$modules_src,"options":["bind","private","ro"]}]'
413 |   fi
414 | 
415 |   log "RUNCVM_KERNEL='$RUNCVM_KERNEL'"
416 |   log "RUNCVM_KERNEL_ID='$RUNCVM_KERNEL_ID'"
417 |   log "RUNCVM_KERNEL_VERSION='$RUNCVM_KERNEL_VERSION'"
418 |   log "RUNCVM_KERNEL_OS_KERNEL_PATH='$RUNCVM_KERNEL_OS_KERNEL_PATH'"
419 |   log "RUNCVM_KERNEL_OS_INITRAMFS_PATH='$RUNCVM_KERNEL_OS_INITRAMFS_PATH'"
420 |   log "RUNCVM_KERNEL_PATH='$RUNCVM_KERNEL_PATH'"
421 |   log "RUNCVM_KERNEL_INITRAMFS_PATH='$RUNCVM_KERNEL_INITRAMFS_PATH'"
422 |   log "RUNCVM_KERNEL_ROOT='$RUNCVM_KERNEL_ROOT'"
423 |   log "RUNCVM_KERNEL_MODULES_SRC='$RUNCVM_KERNEL_MODULES_SRC'"
424 |   log "RUNCVM_KERNEL_MODULES_DST='$RUNCVM_KERNEL_MODULES_DST'"
425 |   
426 |   set_config_env "RUNCVM_KERNEL_PATH" "$RUNCVM_KERNEL_PATH"
427 |   set_config_env "RUNCVM_KERNEL_INITRAMFS_PATH" "$RUNCVM_KERNEL_INITRAMFS_PATH"
428 |   set_config_env "RUNCVM_KERNEL_ROOT" "$RUNCVM_KERNEL_ROOT"
429 | 
430 |   # Configure devices
431 |   jq_set "$CFG" '.linux.resources.devices += [{"allow":true,"type":"c","major":10,"minor":232,"access":"rwm"},{"allow":true,"type":"c","major":10,"minor":200,"access":"rwm"}]'
432 |   jq_set "$CFG" '.linux.devices+=[{"path":"/dev/net/tun","type":"c","major":10,"minor":200,"fileMode":8630,"uid":0,"gid":0},{"path":"/dev/kvm","type":"c","major":10,"minor":232,"fileMode":8630,"uid":0,"gid":0}]'
433 |   
434 |   # For now, hardcode --security-opt=seccomp=unconfined;
435 |   # later, we can work out the minimal seccomp permissions required.
436 |   jq_set "$CFG" '.linux.seccomp |= empty'
437 |   
438 |   # CONFIGURE MEMORY
439 |   # Set /dev/shm to RUNCVM_MEM_SIZE env var, or to default
440 |   # - it should be large enough to support VM memory
441 |   RUNCVM_MEM_LIMIT=$(jq_get "$CFG" '.linux.resources.memory.limit')
442 |   log "RUNCVM_MEM_LIMIT=$RUNCVM_MEM_LIMIT"
443 |   if [ "$RUNCVM_MEM_LIMIT" != "null" ]; then
444 |     RUNCVM_MEM_SIZE="$(( $RUNCVM_MEM_LIMIT/1024/1024 ))M"
445 |   else
446 |     RUNCVM_MEM_SIZE="$RUNCVM_MEM_SIZE_DEFAULT"
447 |   fi
448 |   log "RUNCVM_MEM_SIZE=$RUNCVM_MEM_SIZE"
449 |   set_config_env "RUNCVM_MEM_SIZE" "$RUNCVM_MEM_SIZE"
450 | 
451 |   RUNCVM_HUGETLB=$(get_config_env "RUNCVM_HUGETLB")
452 |   if [ "$RUNCVM_HUGETLB" != "1" ]; then
453 |     jq_set "$CFG" --arg size "$RUNCVM_MEM_SIZE" '( .mounts[] | select(.destination == "/dev/shm") ) = {"destination": "/dev/shm","type": "tmpfs","source": "shm","options": ["nosuid","noexec","nodev","mode=1777","size=" + $size]}'
454 |   # else
455 |     # --shm-size applies; default 64m.
456 |   fi
457 | 
458 |   # In future, set the container memory limit to something reasonable to support
459 |   # QEMU + virtiofsd + dnsmasq. Perhaps $RUNCVM_MEM_LIMIT+K (or vice-versa, reduce
460 |   # RUNCVM_MEM_SIZE by K), where K is the memory requirement for the container's processes
461 |   # over and above QEMU.
462 |   # jq_set "$CFG" --arg size $(($RUNCVM_MEM_LIMIT + )) '.linux.resources.memory.limit |= ($size | tonumber)'
463 | 
464 |   # Add non-default capabilities needed by:
465 |   # - Docker: CAP_NET_ADMIN
466 |   # - Podman: CAP_NET_ADMIN, CAP_NET_RAW, CAP_MKNOD, CAP_AUDIT_WRITE
467 |   for field in bounding effective permitted
468 |   do
469 |     jq_set "$CFG" --arg field "bounding" '.process.capabilities[$field] |= (.+ ["CAP_NET_ADMIN","CAP_NET_RAW","CAP_MKNOD","CAP_AUDIT_WRITE"] | unique)'
470 |   done
471 |   
472 |   # Filter for RUNCVM_SYS_ADMIN=1
473 |   RUNCVM_SYS_ADMIN=$(get_config_env "RUNCVM_SYS_ADMIN")
474 |   if [ "$RUNCVM_SYS_ADMIN" = "1" ]; then
475 |     # TODO use 'unique'
476 |     jq_set "$CFG" '.process.capabilities.bounding += ["CAP_SYS_ADMIN"] | .process.capabilities.effective += ["CAP_SYS_ADMIN"] | .process.capabilities.permitted += ["CAP_SYS_ADMIN"]'
477 |   fi
478 | 
479 |   debug && cp -a $CFG /tmp/config.json-$$-2
480 |   
481 | elif [ "$COMMAND" = "exec" ]; then
482 | 
483 |   debug && log "Command: exec"
484 | 
485 |   # USAGE:
486 |   #   runc exec [command options]   [command options]  || -p process.json 
487 |   #
488 |   # PARSE 'exec' COMMAND OPTIONS
489 |   # --console-socket value             path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal
490 |   # --cwd value                        current working directory in the container
491 |   # --env value, -e value              set environment variables
492 |   # --tty, -t                          allocate a pseudo-TTY
493 |   # --user value, -u value             UID (format: [:])
494 |   # --additional-gids value, -g value  additional gids
495 |   # --process value, -p value          path to the process.json
496 |   # --detach, -d                       detach from the container's process
497 |   # --pid-file value                   specify the file to write the process id to
498 |   # --process-label value              set the asm process label for the process commonly used with selinux
499 |   # --apparmor value                   set the apparmor profile for the process
500 |   # --no-new-privs                     set the no new privileges value for the process
501 |   # --cap value, -c value              add a capability to the bounding set for the process
502 |   # --preserve-fds value               Pass N additional file descriptors to the container (stdio + $LISTEN_FDS + N in total) (default: 0)
503 |   # --cgroup value                     run the process in an (existing) sub-cgroup(s). Format is [:].
504 |   # --ignore-paused                    allow exec in a paused container    
505 |   while true
506 |   do
507 |     case "$1" in
508 |       --console-socket|--cwd|--env|-e|--user|-u|--additional-gids|-g|--pid-file|--process-label|--apparmor|--cap|-c|--preserve-fds|--cgroup) shift; shift; continue; ;;
509 |       --tty|-t|--detach|-d|--no-new-privs|--ignore-paused) shift; continue; ;;
510 |       --process|-p) shift; PROCESS="$1"; continue; ;;
511 |       *) break; ;;
512 |     esac
513 |   done
514 | 
515 |   # Allow user to enable debug logging
516 |   if [ "$(get_process_env "$PROCESS" 'RUNCVM_RUNTIME_DEBUG' '0')" = "1" ]; then
517 |     RUNCVM_DEBUG="1"
518 |   fi
519 | 
520 |   if debug; then
521 |     log "Command line: $0 ${COMMAND_LINE[@]@Q}"
522 |     log "Command: exec process=$PROCESS"
523 |     
524 |     # Save formatted process.json
525 |     jq -r . <$PROCESS >/tmp/process.json-$$-1
526 |   fi
527 | 
528 |   ARG1=$(jq_get "$PROCESS" '.args[0]')
529 |   if [ "$ARG1" = "---" ]; then
530 |     jq_set "$PROCESS" 'del(.args[0])'
531 |   else
532 |     uidgid=$(jq_get "$PROCESS" '(.user.uid | tostring) + ":" + (.user.gid | tostring) + ":" + ((.user.additionalGids // []) | join(","))')
533 |     cwd=$(jq_get "$PROCESS" '.cwd')
534 |     hasHome=$(get_process_env_boolean "$PROCESS" 'HOME')
535 |     wantsTerminal=$(jq_get "$PROCESS" '.terminal')
536 | 
537 |     jq_set "$PROCESS" \
538 |       --arg exec "$RUNCVM_EXEC" \
539 |       --arg uidgid "$uidgid" \
540 |       --arg cwd "$cwd" \
541 |       --arg hasHome "$hasHome" \
542 |       --arg wantsTerminal "$wantsTerminal" \
543 |       '.args |= [$exec, $uidgid, $cwd, $hasHome, $wantsTerminal] + .'
544 | 
545 |     # Force root (or whatever user qemu runs as)
546 |     # Force cwd in the container to / 
547 |     jq_set "$PROCESS" '.user = {"uid":0, "gid":0} | .cwd="/"'
548 |   fi
549 |   
550 |   debug && cp -a $PROCESS /tmp/process.json-$$-2
551 | fi
552 | 
553 | debug && log "--- LOG ENDS ---"
554 | 
555 | exec /usr/bin/runc "${COMMAND_LINE[@]}"
556 | 


--------------------------------------------------------------------------------
/runcvm-scripts/runcvm-vm-exec:
--------------------------------------------------------------------------------
 1 | #!/.runcvm/guest/bin/bash
 2 | 
 3 | from_bin() {
 4 |   tr '\200\201\202\203\204\205' "\011\012\040\047\042\134"
 5 | }
 6 | 
 7 | error() {
 8 |   echo "OCI runtime exec failed: exec failed: unable to start container process: chdir to cwd (\"$cwd\") set in config.json failed: no such file or directory: unknown"
 9 |   exit 126
10 | }
11 | 
12 | uidgid="$1"
13 | cwd_bin="$2"
14 | shift 2
15 | 
16 | IFS=':' read -r uid gid additionalGids <<< "$uidgid"
17 | 
18 | args_bin="$1"
19 | env_bin="$2"
20 | 
21 | mapfile -t args < <(echo -n "$args_bin" | from_bin)
22 | mapfile -t env < <(echo -n "$env_bin" | from_bin)
23 | cwd=$(echo -n "$cwd_bin" | from_bin)
24 | 
25 | cd "$cwd" 2>/dev/null && unset OLDPWD || error
26 | 
27 | # Load original environment
28 | . /.runcvm/config
29 | 
30 | # Load defaults and aliases
31 | . $RUNCVM_GUEST/scripts/runcvm-ctr-defaults
32 | 
33 | exec -c $RUNCVM_GUEST/bin/busybox env -i "${env[@]}" $RUNCVM_GUEST/bin/s6-applyuidgid -u $uid -g $gid -G "$additionalGids" "${args[@]}"


--------------------------------------------------------------------------------
/runcvm-scripts/runcvm-vm-init:
--------------------------------------------------------------------------------
  1 | #!/.runcvm/guest/bin/bash -e
  2 | 
  3 | # Load original environment
  4 | . /.runcvm/config
  5 | 
  6 | # Load defaults and aliases
  7 | . $RUNCVM_GUEST/scripts/runcvm-ctr-defaults
  8 | 
  9 | # Alpine initrd doesn't honour command-line rw flag
 10 | mount -o remount,rw /
 11 | 
 12 | # FIXME: Something is making /.runcvm ro, so remount it rw
 13 | # until such time as exit code handling and dropbear key creation
 14 | # obviate the need for this.
 15 | mount -o remount,rw /.runcvm
 16 | 
 17 | # Alpine initrd doesn't configure /dev device permissions and ownership
 18 | # to support non-root users.
 19 | if [ "$(findmnt -rnu -o FSTYPE /dev)" = "devtmpfs" ]; then
 20 |   [ -e /dev/stdin ] || ln -snf /proc/self/fd/0 /dev/stdin
 21 |   [ -e /dev/stdout ] || ln -snf /proc/self/fd/1 /dev/stdout
 22 |   [ -e /dev/stderr ] || ln -snf /proc/self/fd/2 /dev/stderr
 23 |   [ -e /proc/kcore ] && ln -snf /proc/kcore /dev/core
 24 |   [ -h /dev/ptmx ] || ln -snf pts/ptmx /dev/ptmx
 25 |   chmod 666 /dev/null /dev/random /dev/urandom /dev/zero /dev/tty /dev/pts/ptmx
 26 |   chmod 620 /dev/tty[0-9]*
 27 |   chgrp tty /dev/tty*
 28 | fi
 29 | 
 30 | # Unmount /run if it is a tmpfs (not a virtiofs) mounted by the initramfs
 31 | # /run may be populated in the underlying image, and may also be a volume or be bind-mounted,
 32 | # and its contents should be accessible in these cases.
 33 | if [ "$(findmnt -rnu -o FSTYPE /run)" = "tmpfs" ]; then
 34 |   busybox umount -fl /run
 35 | fi
 36 | 
 37 | # FIXME: virtiofs mounts aren't always made rw. Remount them all rw (if allowed)
 38 | # $RUNCVM_GUEST/bin/mount -t virtiofs | awk '{print $3}' | xargs -n 1 mount -o remount,rw
 39 | 
 40 | # Some systems do not set up /dev/fd. If needed, add it.
 41 | if ! [ -h /dev/fd ]; then
 42 |   ln -s /proc/self/fd /dev/fd
 43 | fi
 44 | 
 45 | # FIXME: This must be run early enough, otherwise other interfaces like docker0 might have started
 46 | IF=$(ls /sys/class/net/ | grep -vE '^(lo|docker)' | head -n 1)
 47 | 
 48 | # https://bugzilla.redhat.com/show_bug.cgi?id=501934
 49 | for i in all $IF
 50 | do
 51 |   # /sbin/sysctl -q -w -e net.ipv6.conf.$i.disable_ipv6=1 net.ipv6.conf.$i.autoconf=0 net.ipv6.conf.$i.accept_ra=0
 52 |   sysctl -q -w -e net.ipv6.conf.$i.disable_ipv6=1 net.ipv6.conf.$i.autoconf=0 || true
 53 | done
 54 | 
 55 | # Bring up local interface
 56 | ip link set lo up
 57 | 
 58 | # Identify each interface by MAC address, then give each a temporary name
 59 | # (as we might ultimately need to rename e.g. eth0->eth1 and eth1->eth0).
 60 | for ifpath in /.runcvm/network/devices/*
 61 | do
 62 |   if=$(busybox basename "$ifpath")
 63 | 
 64 |   [ "$if" = "default" ] && continue
 65 | 
 66 |   load_network "$if"
 67 | 
 68 |   # Locate the actual network device by its MAC address.
 69 |   mac=$(busybox sed -r 's/^..:..:../52:54:00/' <<<$DOCKER_IF_MAC)
 70 |   device=$(ip -json link show | jq -r --arg mac "$mac" '.[] | select(.address == $mac) | .ifname')
 71 | 
 72 |   ip link set $device name $DOCKER_IF-tmp
 73 | done
 74 | 
 75 | # Configure, rename and bring up all interfaces.
 76 | for ifpath in /.runcvm/network/devices/*
 77 | do
 78 |   if=$(busybox basename "$ifpath")
 79 | 
 80 |   [ "$if" = "default" ] && continue
 81 | 
 82 |   load_network "$if"
 83 | 
 84 |   ip link set $DOCKER_IF-tmp name $DOCKER_IF
 85 |   ip addr add $DOCKER_IF_IP/$DOCKER_IF_IP_NETPREFIX broadcast + dev $DOCKER_IF
 86 |   ip link set $DOCKER_IF up mtu "${DOCKER_IF_MTU:=1500}"
 87 | 
 88 |   # If this is the default gateway interface, establish the default gateway
 89 |   [ -n "$DOCKER_IF_IP_GW" ] && ip route add default via $DOCKER_IF_IP_GW
 90 | done
 91 | 
 92 | # Read and install any supplementary routes.
 93 | while read -r DOCKER_RT_NET DOCKER_RT_GW DOCKER_RT_DEV DOCKER_RT_PREFSRC
 94 | do
 95 |   [ -n "$DOCKER_RT_NET" ] && [ -n "$DOCKER_RT_GW" ] && [ -n "$DOCKER_RT_DEV" ] && \
 96 |     ip route add "$DOCKER_RT_NET" via "$DOCKER_RT_GW" dev "$DOCKER_RT_DEV" || true
 97 | done /dev/null | grep ^ssh | cut -d' ' -f2)
142 | 
143 | # Create json for dropbear EPKA module
144 | cat <<_EOE_ >/.runcvm/dropbear/epka.json && chmod 400 /.runcvm/dropbear/epka.json
145 | [
146 |     {
147 |         "user": "root",
148 |         "keytype": "ssh-rsa",
149 |         "key": "$KEY_PUBLIC",
150 |         "options":"no-X11-forwarding",
151 |         "comments": ""
152 |     }
153 | ]
154 | _EOE_
155 | 
156 | # Load choice of console device
157 | read -r CONSOLE_DEVICE /etc/inittab <<_EOE_
168 | $CONSOLE_DEVICE::respawn:-$RUNCVM_GUEST/scripts/runcvm-vm-start-wrapper
169 | null::respawn:$RUNCVM_GUEST/scripts/runcvm-vm-qemu-ga
170 | null::respawn:$RUNCVM_GUEST/usr/sbin/dropbear -REF -p $SSHD_PORT -A $RUNCVM_GUEST/tmp/dropbear/libepka_file.so,/.runcvm/dropbear/epka.json -P /.runcvm/dropbear/dropbear.pid
171 | null::ctrlaltdel:$RUNCVM_GUEST/bin/poweroff
172 | null::restart:$RUNCVM_GUEST/bin/poweroff
173 | null::shutdown:$RUNCVM_GUEST/bin/poweroff
174 | _EOE_
175 | 
176 |   # Allow runcvm-vm-start to run once (and only once)
177 |   rm -f /.runcvm/once
178 | 
179 |   # Clear the environment, and run our own init, disconnecting stdout and stderr from terminal
180 |   exec -c $RUNCVM_GUEST/bin/init &>/dev/null
181 | else
182 |   # If not, assume the user knows what they're doing: launch qemu-ga and just run their entrypoint.
183 | 
184 |   # Clean RUNCVM env vars
185 |   clean_env
186 | 
187 |   # Run the qemu guest agent, needed to support future functionality
188 |   $RUNCVM/scripts/runcvm-vm-qemu-ga &>/dev/null &
189 | 
190 |   # Run dropbear SSH server, needed to support 'docker exec'
191 |   dropbear -REF -p $SSHD_PORT -A $RUNCVM_GUEST/tmp/dropbear/libepka_file.so,/.runcvm/dropbear/epka.json -P /.runcvm/dropbear/dropbear.pid &>/dev/null &
192 | 
193 |   # Run init from the image
194 |   # Pipe input/output from/to console device
195 |   exec /dev/$CONSOLE_DEVICE
196 |   
197 |   # Invoke runcvm-init with --no-fork purely to create controlling tty,
198 |   # then exec runcvm-vm-start
199 |   exec -c $RUNCVM_GUEST/sbin/runcvm-init --no-fork $RUNCVM_GUEST/scripts/runcvm-vm-start
200 | fi
201 | 


--------------------------------------------------------------------------------
/runcvm-scripts/runcvm-vm-qemu-ga:
--------------------------------------------------------------------------------
 1 | #!/.runcvm/guest/bin/bash
 2 | 
 3 | # Load config
 4 | . /.runcvm/config
 5 | 
 6 | # Load defaults and aliases
 7 | . $RUNCVM_GUEST/scripts/runcvm-ctr-defaults
 8 | 
 9 | OPTS=(--retry-path --statedir /.runcvm)
10 | 
11 | if [ -f "/dev/virtio-ports/org.qemu.guest_agent.0" ]; then
12 |   DEV="/dev/virtio-ports/org.qemu.guest_agent.0"
13 | else
14 |   DEV=$(ls /dev/vport* | head -n 1)
15 |   
16 |   if [ -n "$DEV" ] && [ -c "$DEV" ]; then  
17 |     OPTS+=(-p "$DEV")
18 |   fi
19 | fi
20 | 
21 | if [ -z "$DEV" ]; then
22 |   exit 0
23 | fi
24 | 
25 | exec -c "$(which qemu-ga)" "${OPTS[@]}"
26 | 


--------------------------------------------------------------------------------
/runcvm-scripts/runcvm-vm-start:
--------------------------------------------------------------------------------
 1 | #!/.runcvm/guest/bin/bash
 2 | 
 3 | # Load original environment
 4 | . /.runcvm/config
 5 | 
 6 | # Load defaults and aliases
 7 | . $RUNCVM_GUEST/scripts/runcvm-ctr-defaults
 8 | 
 9 | if [ -f /.runcvm/once ]; then
10 |   poweroff
11 |   exit 0
12 | else
13 |   touch /.runcvm/once
14 | fi
15 | 
16 | # Change to saved PWD
17 | cd $(cat /.runcvm/pwd) && unset OLDPWD
18 | 
19 | # Reload original environment
20 | . /.runcvm/config
21 | 
22 | # Load original entrypoint
23 | mapfile -t ARGS /dev/null)
 49 |     [ -n "$IP" ] && break
 50 |     [ $i -eq 1 ] && log "Ingress IP detection for this node failed!" && exit 1
 51 |     sleep 0.5
 52 |   done
 53 |   
 54 |   echo -n "$IP" >$FILE
 55 | }
 56 | 
 57 | cgroupfs_mount
 58 | 
 59 | ulimit -u unlimited
 60 | 
 61 | modprobe ip_vs
 62 | 
 63 | h=$(hostname)
 64 | 
 65 | log "Checking network ..."
 66 | read -r DOCKER_IF DOCKER_IF_GW \
 67 |   <<< $(ip -json route show | jq -j '.[] | select(.dst == "default") | .dev, " ", .gateway')
 68 | 
 69 | read -r DOCKER_IF_IP DOCKER_IF_MTU <<< \
 70 |   $(ip -json addr show eth0 | jq -j '.[0] | .addr_info[0].local, " ", .mtu')
 71 | 
 72 | log "- DOCKER_IF=$DOCKER_IF DOCKER_IF_IP=$DOCKER_IF_IP DOCKER_IF_GW=$DOCKER_IF_GW DOCKER_IF_MTU=$DOCKER_IF_MTU"
 73 | 
 74 | # Start dockerd and keep it running
 75 | DOCKER_OPTS=(--mtu=$DOCKER_IF_MTU)
 76 | DOCKER_OPTS+=(--add-runtime runcvm=/opt/runcvm/scripts/runcvm-runtime)
 77 | 
 78 | if [ -n "$REGISTRY_MIRROR" ]; then
 79 |   # Replace localhost with custom network gateway, if desired to reach registry running on host network
 80 |   DOCKER_OPTS+=(--registry-mirror=$(sed "s|/localhost\b|/$DOCKER_IF_GW|" <<< $REGISTRY_MIRROR))
 81 | fi
 82 | 
 83 | log "Launching 'dockerd ${DOCKER_OPTS[*]}' ..."
 84 | while true; do dockerd "${DOCKER_OPTS[@]}" >>/var/log/dockerd.log 2>&1; done &
 85 | 
 86 | for i in $(seq 1 10 | sort -nr)
 87 | do
 88 |   log "Waiting for dockerd to start (#$i) ..."
 89 |   docker ps >/dev/null 2>1 && break
 90 |   [ $i -eq 1 ] && exit 1
 91 |   sleep 0.5
 92 | done
 93 | 
 94 | log "dockerd started"
 95 | 
 96 | docker info
 97 | 
 98 | node_state
 99 | log "docker swarm: node state = $NodeState; manager=$IsManager"
100 | 
101 | log "Creating docker_gwbridge network with MTU $DOCKER_IF_MTU"
102 | docker network create -d bridge \
103 |    --subnet 172.18.0.0/16 \
104 |    --opt com.docker.network.bridge.name=docker_gwbridge \
105 |    --opt com.docker.network.bridge.enable_icc=false \
106 |    --opt com.docker.network.bridge.enable_ip_masquerade=true \
107 |    --opt com.docker.network.driver.mtu=$DOCKER_IF_MTU \
108 |    docker_gwbridge
109 | 
110 | if [ "$NodeState" = "inactive" ] || [ "$NodeState" = "pending" ]; then
111 | 
112 |   if [ "$NODE" != "1" ]; then
113 | 
114 |     for i in $(seq 1 20 | sort -nr)
115 |     do
116 |       log "Waiting for swarm manager startup (#$i) ..."
117 |       [ -f /swarm/worker ] && break
118 |       [ $i -eq 1 ] && exit 1
119 |       sleep 1
120 |     done
121 |   
122 |     log "Swarm manager has started up."
123 |     for i in $(seq 1 20 | sort -nr)
124 |     do
125 |       log "Joining swarm (#$i) ..."
126 |       . /swarm/worker && break
127 |       [ $i -eq 1 ] && exit 1
128 |       sleep 0.5
129 |     done
130 |     
131 |     log "Joined swarm!"
132 |     
133 |   else
134 |   
135 |     log "Initialising swarm ..."
136 |     if ! docker swarm init >/dev/null; then
137 |       log "Swarm initialisation FAILED!"
138 |       exit 1
139 |     fi
140 | 
141 |     log "Swarm initialised!"
142 |     
143 |     if [ -n "$MTU" ] && [ "$MTU" -gt 0 ]; then
144 |     
145 |       log "Removing default ingress ..."
146 |       echo y | docker network rm ingress
147 | 
148 |       log "Waiting 3s for ingress removal ..."
149 |       sleep 3
150 |     
151 |       log "Creating new ingress with MTU $DOCKER_IF_MTU"
152 |       docker network create \
153 |         --driver=overlay \
154 | 	--ingress \
155 | 	--subnet=10.0.0.0/24 \
156 | 	--gateway=10.0.0.1 \
157 | 	--opt com.docker.network.driver.mtu=$DOCKER_IF_MTU \
158 | 	ingress
159 |     fi
160 | 
161 |     log "Writing swarm 'join token' to shared storage and waiting for other nodes ..."
162 |     mkdir -p /swarm/nodes && docker swarm join-token worker | grep docker >/swarm/worker
163 | 
164 |     for i in $(seq 1 30 | sort -nr)
165 |     do
166 |       nodes=$(docker node ls --format '{{json .}}' | wc -l)
167 |       log "Waiting for remaining $((NODES-nodes)) of $NODES nodes to join swarm (#$i) ..."
168 |       [ $nodes -eq $NODES ] && break
169 |       [ $i -eq 1 ] && log "Swarm failed!" && exit 1
170 |       sleep 1
171 |     done
172 | 
173 |     log "Swarm nodes started:"
174 |     docker node ls
175 |     echo
176 |     
177 |   fi
178 | 
179 |   log "Log memory consumption ..."
180 |   free
181 | 
182 |   # Log this trigger line last BUT before (optionally) running DIRD.
183 |   # This is because the test script waits for this line to appear before proceeding to launch the service.
184 |   # We log multiple times to work around a minor bug whereby the test script sometimes fails to react to the first log line alone.
185 |   for i in $(seq 1 5); do log "Swarm complete!"; sleep 0.25; done
186 |   
187 |   # Optionally run DIRD.
188 |   # Do this after logging "Swarm complete" so that test script proceeds to launch the service;
189 |   # as, until the service is launched, the nodes' ingress network IPs will not yet be defined.
190 |   if [ "$DIRD" = "1" ]; then
191 | 
192 |     log "Detecting node ingress network IP ..."
193 |     detect_ingress_ip /swarm/nodes/$NODE
194 |     log "Detected node ingress network IP '$(cat /swarm/nodes/$NODE)'"
195 | 
196 |     log "Waiting for all nodes' ingress network IPs ..."
197 |     for i in $(seq 1 30 | sort -nr)
198 |     do
199 |       [ $(ls /swarm/nodes/ | wc -l) -eq $NODES ] && break
200 |       [ $i -eq 1 ] && log "Ingress IP detection for all nodes failed!" && exit 1
201 |       sleep 0.5
202 |     done
203 | 
204 |     for n in $(ls /swarm/nodes)
205 |     do
206 |       IPs+="$(cat /swarm/nodes/$n),"
207 |       echo "$n: '$(cat /swarm/nodes/$n)'"
208 |     done
209 |   
210 |     IPs=$(echo $IPs | sed 's/,$//')
211 | 
212 |     log "Running docker-ingress-routing-daemon --preexisting --ingress-gateway-ips $IPs --install ..."
213 |     while true; do /usr/local/bin/docker-ingress-routing-daemon --preexisting --iptables-wait-seconds 3 --ingress-gateway-ips "$IPs" --install; sleep 1; done &
214 |   
215 |   fi
216 |     
217 | fi
218 | 
219 | node_state
220 | if [ "$NodeState" = "active" ] && [ "$IsManager" = "true" ]; then
221 |   log "Manager ready"
222 | fi
223 | 
224 | log "Looping indefinitely ..."
225 | while true; do sleep infinity; done
226 | 


--------------------------------------------------------------------------------
/tests/00-http-docker-swarm/test:
--------------------------------------------------------------------------------
  1 | #!/bin/bash -e
  2 | 
  3 | # Load framework functions
  4 | . ../framework.sh
  5 | 
  6 | # TEST VARIABLES
  7 | NODE=00-http-docker-swarm-node
  8 | 
  9 | # Number of nodes
 10 | NODES=${NODES:-3}
 11 | 
 12 | # Network MTU to deploy in Docker network, RunCVM container VM nodes, and on Docker and swarm ingress network running on those nodes.
 13 | MTU=${MTU:-9000}
 14 | 
 15 | # Set to "1" to enable installation of https://github.com/newsnowlabs/docker-ingress-routing-daemon on the swarm
 16 | DIRD=${DIRD:-0}
 17 | 
 18 | # Set to "1" to disable cleanup of Docker image
 19 | NO_CLEAN_IMAGE=${NO_CLEAN_IMAGE:-0}
 20 | 
 21 | # OVERRIDE FRAMEWORK FUNCTIONS
 22 | nodes() { seq 1 $NODES | sed "s/^/$NODE/"; }
 23 | volumes() { echo swarm $(nodes); }
 24 | networks() { echo runcvm-mtu; }
 25 | images() { echo $IMAGE; }
 26 | 
 27 | # Run routine cleanup of any preexisting containers, volumes, networks, and images
 28 | cleanup
 29 | 
 30 | h=$(hostname)
 31 | 
 32 | if [ -n "$REGISTRY_MIRROR" ]; then
 33 |   log "REGISTRY_MIRROR '$REGISTRY_MIRROR' detected."
 34 | else
 35 |   log "No REGISTRY_MIRROR detected: recommend setting REGISTRY_MIRROR=http://localhost:5000 and launching:"
 36 |   log "- docker run -d --name=registry --network=host -e REGISTRY_PROXY_REMOTEURL=https://registry-1.docker.io registry:2"
 37 | fi
 38 | 
 39 | log "Build image ..."
 40 | docker build --iidfile /tmp/iid -f node/Dockerfile node/
 41 | IMAGE=$(cat /tmp/iid)
 42 | 
 43 | if [ -n "$MTU" ] && [ "$MTU" -gt 0 ]; then
 44 |   log "Creating network 'runcvm-mtu' with MTU $MTU ..."
 45 |   docker network create --opt com.docker.network.driver.mtu=$MTU --scope=local runcvm-mtu
 46 | else
 47 |   log "Creating network 'runcvm-mtu' with default (unspecified) MTU ..."
 48 |   docker network create --scope=local runcvm-mtu
 49 | fi
 50 | 
 51 | log "Launching $NODES x RunCVM nodes with image $IMAGE ..."
 52 | for n in $(seq 1 $NODES)
 53 | do
 54 |   log -n "Launching RunCVM node $n/$NODES ... "
 55 | 
 56 |   # Enables Docker's use of overlay2 storage driver in a file-backed disk stored in a dedicated Docker volume
 57 |   # diskopt="--mount=type=volume,src=$NODE$n,dst=/disks --env=RUNCVM_DISKS=/disks/disk1,/var/lib/docker,ext4,500M"
 58 | 
 59 |   # Docker will fall back to using the vfs storage driver, as it detects /var/lib/docker is an overlay2 fs.
 60 |   # diskopt="--mount=type=volume,src=$NODE$n,dst=/var/lib/docker"
 61 |   
 62 |   # Enables Docker's use of overlay2 storage driver in a file-backed disk stored in the container's overlayfs
 63 |   diskopt="--env=RUNCVM_DISKS=/disks/disk1,/var/lib/docker,ext4,500M"
 64 | 
 65 |   # The swarm volume, mounted at /swarm within the RunCVM VMs, will be used to share swarm info
 66 |   # among the nodes.
 67 |   docker run \
 68 |     -d \
 69 |     --rm \
 70 |     --runtime=runcvm \
 71 |     --network=runcvm-mtu \
 72 |     --publish=$((8080+$n-1)):80 \
 73 |     --name=$NODE$n \
 74 |     --hostname=$NODE$n \
 75 |     --memory=512m \
 76 |     --env=NODE=$n \
 77 |     --env=NODES=$NODES \
 78 |     --env=MTU=$MTU \
 79 |     --env=DIRD=$DIRD \
 80 |     --env=REGISTRY_MIRROR=$REGISTRY_MIRROR \
 81 |     --mount=type=volume,src=swarm,dst=/swarm \
 82 |     $diskopt \
 83 |     $IMAGE
 84 | done
 85 | 
 86 | log "Monitoring ${NODE}1 logs for swarm setup progress ..."
 87 | docker logs -f ${NODE}1 -n 0 2>&1 | sed "s/^/> (${NODE}1) > /; /Swarm complete/q0; /Swarm failed/q129;"
 88 | log "Finished monitoring ${NODE}1 logs as swarm is set up."
 89 | 
 90 | log "Creating http service (please be patient) ..."
 91 | docker exec ${NODE}1 docker service create \
 92 |   -d \
 93 |   --name=http --mode=global -p 80:80 --update-parallelism=0 \
 94 |   alpine ash -c "$(tr '\012' ' ' <<_EOE_
 95 | apk add --no-cache mini_httpd &&
 96 | mkdir -p /www &&
 97 | echo -e "#!/bin/sh\n\necho Content-Type: text/plain\necho\necho hostname=\$HOSTNAME remote_addr=\\\$REMOTE_ADDR\nexit 0\n" >/www/index.cgi &&
 98 | chmod 755 /www/index.cgi &&
 99 | mini_httpd -d /www -D -l /dev/stdout -c '**.cgi'
100 | _EOE_
101 | )"
102 | 
103 | for i in $(seq 1 200 | sort -nr)
104 | do
105 |   replicas=$(docker exec ${NODE}1 docker service ls --format='{{ .Replicas }}' --filter='Name=http')
106 |   log "Waiting for remainder of $replicas replicas to launch (#$i) ..."
107 |   [ "$replicas" = "$NODES/$NODES" ] && break
108 |   [ $i -eq 1 ] && exit 253
109 |   sleep 1
110 | done
111 | log "All $NODES replicas launched."
112 | sleep 1
113 | 
114 | # Allow final test to complete, even if we encounter errors
115 | set +e
116 | 
117 | if [ "$DIRD" = "1" ]; then
118 |   DOCKER_IPV4=$(docker network inspect runcvm-mtu --format='{{(index .IPAM.Config 0).Gateway}}')
119 | else
120 |   DOCKER_IPV4="10.0.0."
121 | fi
122 | 
123 | log "Running $NODE test looking for '$DOCKER_IPV4' at $(date) ..."
124 | 
125 | ERRORS=0
126 | TESTS=0
127 | for loop in $(seq 1 250)
128 | do
129 |   i=$((loop % NODES))
130 | 
131 |   host=http://0.0.0.0:$((8080+i))/
132 | 
133 |   # Uncomment if running inside a Dockside devtainer (which must be preconnected to a precreated runcvm-mtu Docker network).
134 |   # host=http://$NODE$((i+1)):80/
135 | 
136 |   response=$(curl --max-time 1 -is $host)
137 |   ERROR=$?
138 | 
139 |   if [ $ERROR -eq 0 ]; then
140 |     response=$(tr '\012\015' '  ' <<<$response)
141 |     grep -q "remote_addr=$DOCKER_IPV4" <<<$response
142 |     [ $? -ne 0 ] && log "#$loop Response error: $response" && ERROR=1
143 |   else
144 |     log "#$loop Response error: curl error $ERROR"
145 |     ERROR=1
146 |   fi
147 | 
148 |   ERRORS=$((ERRORS+ERROR))
149 |   TESTS=$((TESTS+1))
150 | 
151 | done
152 | 
153 | log "Completed $NODE test $TESTS times, with $ERRORS errors"
154 | 
155 | # Uncomment to debug:
156 | # log "Falling to shell, type CTRL+D to exit and clean up"; bash -i
157 | 
158 | sleep 1
159 | exit $ERRORS
160 | 


--------------------------------------------------------------------------------
/tests/01-mariadb/test:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | NODE=01-mariadb
 4 | 
 5 | nodes() {
 6 |   echo $NODE-mysqld $NODE-mysql
 7 | }
 8 | 
 9 | volumes() {
10 |   echo ''
11 | }
12 | 
13 | networks() {
14 |   echo $NODE-network
15 | }
16 | 
17 | _cleanup() {
18 |   echo "> ($h) Cleaning up nodes ..."
19 |   docker rm -f $(nodes) 2>/dev/null
20 |   echo
21 | 
22 |   if [ "$(volumes)" != "" ]; then
23 |     echo "> ($h) Cleaning up volumes ..."
24 |     docker volume rm -f $(volumes)
25 |   fi
26 |   echo
27 | 
28 |   if [ -n "$IMAGE" ]; then
29 |     echo "> ($h) Cleaning up temporary image ..."
30 |     docker rmi $IMAGE
31 |     echo
32 |   fi
33 |   
34 |   rm -f /tmp/iid
35 |   
36 |   if [ "$(networks)" != "" ]; then
37 |     echo "> ($h) Cleaning up networks ..."
38 |     docker network rm $(networks)
39 |   fi
40 |   echo
41 | 
42 |   echo "> ($h) Cleaned up"
43 | }
44 | 
45 | cleanup() {
46 |   # Allow this to complete, even if we encounter errors
47 |   set +e
48 | 
49 |   _cleanup
50 |   
51 |   # Restore setting to fail on error
52 |   set -e
53 | }
54 | 
55 | quit() {
56 |   # Don't run a second time
57 |   trap '' TERM INT EXIT
58 |   
59 |   cleanup
60 | 
61 |   echo "> ($h) Exiting with code $ERRORS"
62 | }
63 | 
64 | trap quit TERM INT EXIT
65 | 
66 | h=$(hostname)
67 | 
68 | cleanup
69 | 
70 | echo "> ($h) Creating network $NODE-network ..."
71 | docker network rm $(networks) 2>/dev/null || true
72 | docker network create $NODE-network
73 | 
74 | # Launch a mariadb VM using RunCVM
75 | echo "> ($h) Launch RunCVM mariadb server as $NODE-mysqld ..."
76 | docker run --runtime=runcvm -d --rm --name=$NODE-mysqld --hostname=$NODE-mysqld --network=$NODE-network --cpus=1 --memory=1G --env=MARIADB_ALLOW_EMPTY_ROOT_PASSWORD=1 mariadb
77 | 
78 | echo "> ($h) Monitoring mariadb logs ..."
79 | docker logs -f -t -n 0 $NODE-mysqld &
80 | 
81 | # Allow final test to complete, even if we encounter errors
82 | set +e
83 | 
84 | # Launch standard runc container to test connecting to the mariadb VM
85 | echo "> ($h) Waiting for mariadb startup and running test queries in runc container ..."
86 | docker run --rm --network=$NODE-network --name=$NODE-mysql --hostname=$NODE-mysql --env=host=$NODE-mysqld alpine ash -c 'apk update && apk add mariadb-client && for a in $(seq 40 -1 1); do if mysql -P 3306 -h $host mysql -e ""; then echo "> $(hostname) Connected to mysqld ..."; break; else echo "> $(hostname) Waiting for mysqld (#$a) ..."; sleep 1; fi; done && mysql -P 3306 -h $host mysql -e "select count(*) from user"'
87 | ERRORS=$?
88 | 
89 | echo "> ($h) Completed $NODE test with $ERRORS errors"
90 | 
91 | # bash -i
92 | 
93 | sleep 1
94 | exit $ERRORS
95 | 


--------------------------------------------------------------------------------
/tests/02-user-workdir/test:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | # Load framework functions
 4 | . ../framework.sh
 5 | 
 6 | # TEST VARIABLES
 7 | NODE=runcvm-01-test
 8 | NETWORK="$NODE-network"
 9 | IMAGE="alpine"
10 | RUNTIME="${RUNTIME:-runcvm}"
11 | 
12 | # OVERRIDE FRAMEWORK FUNCTIONS
13 | nodes() { echo $NODE; }
14 | networks() { echo $NETWORK; }
15 | 
16 | # TEST DETAILS
17 | COMMAND='echo "$(id -u) $(pwd)"'
18 | USER_ID="1000"
19 | WORK_DIR="/tmp"
20 | EXPECTED_OUTPUT="${USER_ID} ${WORK_DIR}"
21 | 
22 | # TEST FUNCTIONS
23 | # --------------
24 | 
25 | # Function to test output against expected values
26 | test_output() {
27 |   local test_type="$1"
28 |   local expected_output="$2"
29 |   local output_to_test="$3"
30 | 
31 |   if [ "$output_to_test" = "$expected_output" ]; then
32 |     log "docker $test_type test: expected and received '$output_to_test' - PASS"
33 |     return 0
34 |   fi
35 | 
36 |   log "docker $test_type test: expected '$expected_output', but got: '$output_to_test' - FAIL"
37 |   return 1
38 | }
39 | 
40 | # TEST PROCEDURE
41 | # --------------
42 | 
43 | # Run routine cleanup of any preexisting containers, volumes, networks, and images
44 | cleanup
45 | 
46 | # Create custom network
47 | log -n "Creating network '$NETWORK' ..."
48 | docker network create $NETWORK
49 | 
50 | # Create and run the container
51 | log -n "Launching runcvm container with command '$COMMAND' ..."
52 | docker run \
53 |   -d \
54 |   --rm \
55 |   --runtime=$RUNTIME \
56 |   --network=$NETWORK \
57 |   --name=$NODE \
58 |   --hostname=$NODE \
59 |   --user=$USER_ID \
60 |   --workdir=$WORK_DIR \
61 |   $IMAGE \
62 |   sh -c "$COMMAND; while true; do echo ===DONE===; sleep 1; done"
63 | 
64 | shopt -s lastpipe
65 | log "Container '$NODE' output ..."
66 | docker logs -f $NODE 2>&1 | sed "s/^/($NODE) > /; /===DONE===/q0; /failed/q129;"
67 | 
68 | ERRORS=0
69 | 
70 | # Test docker run command:
71 | # - Retrieve first line of logs from container
72 | # - Strip carriage returns for now. as it's unclear why they are present and are not present in the expected output
73 | test_output "run" "$EXPECTED_OUTPUT" "$(docker logs $NODE | grep -v '===DONE===' | tr -d '\015')" || ERRORS=$((ERRORS+1))
74 | 
75 | # Test docker exec command:
76 | # - Retrieve output from exec command for exec test
77 | test_output "exec" "$EXPECTED_OUTPUT" "$(docker exec $NODE sh -c "$COMMAND")" || ERRORS=$((ERRORS+1))
78 | 
79 | # Final output
80 | log "Tests completed with $ERRORS errors"
81 | exit $ERRORS


--------------------------------------------------------------------------------
/tests/03-env/test:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | # Load framework functions
 4 | . ../framework.sh
 5 | 
 6 | # TEST VARIABLES
 7 | NODE=runcvm-01-test
 8 | NETWORK="$NODE-network"
 9 | IMAGE="alpine"
10 | RUNTIME="${RUNTIME:-runcvm}"
11 | 
12 | # OVERRIDE FRAMEWORK FUNCTIONS
13 | nodes() { echo $NODE; }
14 | networks() { echo $NETWORK; }
15 | 
16 | # TEST DETAILS
17 | COMMAND='env | sort'
18 | EXPECTED_OUTPUT="$(echo -e 'HOME=/root\nHOSTNAME=runcvm-01-test\nPATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\nPWD=/\nSHLVL=1\n')"
19 | 
20 | # TEST FUNCTIONS
21 | # --------------
22 | 
23 | # Function to test output against expected values
24 | test_output() {
25 |   local test_type="$1"
26 |   local expected_output="$2"
27 |   local output_to_test="$3"
28 | 
29 |   if [ "$output_to_test" = "$expected_output" ]; then
30 |     log "docker $test_type test: expected and received '$output_to_test' - PASS"
31 |     return 0
32 |   fi
33 | 
34 |   log "docker $test_type test: expected '$expected_output', but got: '$output_to_test' - FAIL"
35 |   return 1
36 | }
37 | 
38 | # TEST PROCEDURE
39 | # --------------
40 | 
41 | # Run routine cleanup of any preexisting containers, volumes, networks, and images
42 | cleanup
43 | 
44 | # Create custom network
45 | log -n "Creating network '$NETWORK' ..."
46 | docker network create $NETWORK
47 | 
48 | # Create and run the container
49 | log -n "Launching runcvm container with command '$COMMAND' ..."
50 | docker run \
51 |   -d \
52 |   --rm \
53 |   --runtime=$RUNTIME \
54 |   --network=$NETWORK \
55 |   --name=$NODE \
56 |   --hostname=$NODE \
57 |   --user=$USER_ID \
58 |   --workdir=$WORK_DIR \
59 |   --init \
60 |   $IMAGE \
61 |   sh -c "$COMMAND; while true; do echo ===DONE===; sleep 1; done"
62 | 
63 | shopt -s lastpipe
64 | log "Container '$NODE' output ..."
65 | docker logs -f $NODE 2>&1 | sed "s/^/($NODE) > /; /===DONE===/q0;"
66 | 
67 | ERRORS=0
68 | 
69 | # Test docker run command:
70 | # - Retrieve first line of logs from container
71 | # - Strip carriage returns for now. as it's unclear why they are present and are not present in the expected output
72 | test_output "run" "$EXPECTED_OUTPUT" "$(docker logs $NODE | grep -v '===DONE===' | tr -d '\015')" || ERRORS=$((ERRORS+1))
73 | 
74 | # Test docker exec command:
75 | # - Retrieve output from exec command for exec test
76 | test_output "exec" "$EXPECTED_OUTPUT" "$(docker exec $NODE sh -c "$COMMAND")" || ERRORS=$((ERRORS+1))
77 | 
78 | # Final output
79 | log "Tests completed with $ERRORS errors"
80 | exit $ERRORS


--------------------------------------------------------------------------------
/tests/framework.sh:
--------------------------------------------------------------------------------
 1 | images() { echo; }
 2 | nodes() { echo; }
 3 | volumes() { echo; }
 4 | networks() { echo; }
 5 | 
 6 | log() {
 7 |   local opts
 8 |   if [ "$1" = "-n" ]; then opts="-n"; shift; fi
 9 |   echo $opts "> $1"
10 | }
11 | 
12 | _cleanup() {
13 | 
14 |   if [ "$(nodes)" != "" ]; then
15 |     log -n "Cleaning up nodes ... "
16 |     docker rm -f $(nodes) 2>&1
17 |   fi
18 | 
19 |   if [ "$(volumes)" != "" ]; then
20 |     log -n "Cleaning up volumes ... "
21 |     docker volume rm -f $(volumes) 2>&1
22 |   fi
23 | 
24 |   if [ "$(images)" != "" ] && [ "$NO_CLEAN_IMAGE" != "1" ]; then
25 |     log -n "Cleaning up temporary images ... "
26 |     docker rmi $(images) 2>&1
27 |   fi
28 | 
29 |   if [ "$(networks)" != "" ]; then
30 |     log -n "Cleaning up networks ... "
31 |     docker network rm $(networks) 2>&1
32 |   fi
33 | 
34 |   rm -f /tmp/iid
35 | }
36 | 
37 | cleanup() {
38 |   # Allow this to complete, even if we encounter errors
39 |   set +e
40 | 
41 |   _cleanup
42 |   
43 |   # Restore setting to fail on error
44 |   set -e
45 | }
46 | 
47 | quit() {
48 |   local code=$?
49 | 
50 |   # Don't run a second time
51 |   trap '' TERM INT EXIT
52 |   
53 |   cleanup
54 | 
55 |   log "Exiting with code $code"
56 | }
57 | 
58 | term() {
59 |    exit 254
60 | }
61 | 
62 | # Standard setup
63 | 
64 | trap quit EXIT
65 | trap term TERM INT QUIT
66 | 
67 | # Trap for cleanup on exit
68 | trap cleanup EXIT
69 | 


--------------------------------------------------------------------------------
/tests/run:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | ERRORS=0
 4 | 
 5 | DIR=$(dirname $0)
 6 | 
 7 | if [ -d "$DIR" ]; then
 8 |   echo "Running RunCVM integration tests in '$DIR' ..."
 9 |   cd $DIR
10 | else
11 |   echo "$0: Error: RunCVM integration test directory '$DIR' not found; aborting!"
12 |   exit -1
13 | fi
14 | 
15 | for test in *
16 | do
17 | 
18 |   [ -d "$test" ] || continue;
19 | 
20 |   cd $test
21 |   ./test 2>&1 | sed "s/^/$test - /"
22 |   TEST_ERRORS=$?
23 |   ERRORS=$((ERRORS+$TEST_ERRORS))
24 | 
25 |   cd ..
26 |   
27 |   echo "RunCVM test $test finished with $TEST_ERRORS errors"
28 | done
29 | 
30 | echo "RunCVM integration tests completed with $ERRORS errors"
31 | 
32 | exit $ERRORS


--------------------------------------------------------------------------------