├── runtime ├── init-container │ ├── tests │ │ ├── .gitignore │ │ └── cyclic_buffer.c │ ├── .gitignore │ ├── include │ │ ├── init-seccomp.h │ │ ├── network.h │ │ ├── communication.h │ │ ├── process_bookkeeping.h │ │ ├── cyclic_buffer.h │ │ └── proto.h │ ├── busybox │ │ └── Dockerfile │ ├── src │ │ ├── process_bookkeeping.c │ │ ├── communication.c │ │ ├── cyclic_buffer.c │ │ ├── network.c │ │ └── seccomp.c │ └── Makefile ├── src │ ├── qcow2_min │ │ ├── 10k.header │ │ └── mod.rs │ ├── main.rs │ ├── response_parser.rs │ ├── cpu.rs │ ├── deploy.rs │ ├── self_test.rs │ ├── vmrt.rs │ └── lib.rs ├── poc │ ├── runtime │ │ ├── kvmvapic.bin │ │ ├── vmrt │ │ ├── bios-256k.bin │ │ ├── efi-virtio.rom │ │ ├── linuxboot_dma.bin │ │ └── vgabios-stdvga.bin │ └── gvmkit.sh ├── conf │ └── ya-runtime-vm.json ├── build.rs ├── Cargo.toml └── examples │ ├── commands.rs │ ├── direct.rs │ └── network.rs ├── .gitignore ├── qemu ├── copy_img ├── Makefile └── Dockerfile ├── .gitattributes ├── rust-toolchain.toml ├── .gitmodules ├── Cargo.toml ├── rustfmt.toml ├── .github └── workflows │ ├── build.yml │ └── release.yml ├── README.md └── LICENSE /runtime/init-container/tests/.gitignore: -------------------------------------------------------------------------------- 1 | cyclic_buffer 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | *.swp 3 | *.o 4 | *.d 5 | /.idea/ 6 | /.vscode/ 7 | logs/ 8 | -------------------------------------------------------------------------------- /qemu/copy_img: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | V=$(docker create $1) || exit 1 4 | docker cp "$V:$2" "$3" 5 | docker rm $V 6 | 7 | -------------------------------------------------------------------------------- /runtime/src/qcow2_min/10k.header: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/golemfactory/ya-runtime-vm/HEAD/runtime/src/qcow2_min/10k.header -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | runtime/image/* filter=lfs diff=lfs merge=lfs -text 2 | runtime/poc/runtime/* filter=lfs diff=lfs merge=lfs -text 3 | -------------------------------------------------------------------------------- /rust-toolchain.toml: -------------------------------------------------------------------------------- 1 | [toolchain] 2 | channel = "1.80.0" 3 | components = ["rustfmt", "clippy"] 4 | targets = ["x86_64-unknown-linux-musl"] 5 | -------------------------------------------------------------------------------- /runtime/init-container/.gitignore: -------------------------------------------------------------------------------- 1 | unpacked_kernel 2 | unpacked_headers 3 | vmlinuz-virt 4 | init 5 | initramfs.cpio.gz 6 | squashfs_drive 7 | busybox/mkfs.ext2 -------------------------------------------------------------------------------- /runtime/poc/runtime/kvmvapic.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:cdf057a71b07e3b52b19cbe210bdefa59250d01a9810b960f7fe1f98eed95a27 3 | size 9216 4 | -------------------------------------------------------------------------------- /runtime/poc/runtime/vmrt: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:fd0527a1688c483296b7bbe5535c1c95b94106e2e900fc2d08a032aa29b916f8 3 | size 11179352 4 | -------------------------------------------------------------------------------- /runtime/poc/runtime/bios-256k.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:2b864f5b202625b03bdf152322c7ecabcde1413e62bd0987970bded9cc915184 3 | size 262144 4 | -------------------------------------------------------------------------------- /runtime/poc/runtime/efi-virtio.rom: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:26be36901db7f8181c306cc62bd74891d8646528965a78e40cceadba5dd7c8e7 3 | size 160768 4 | -------------------------------------------------------------------------------- /runtime/poc/runtime/linuxboot_dma.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:9c49e255340c78fc12e54ed043462bca02fb7fca29b7cfab62ff88a5344b6950 3 | size 1536 4 | -------------------------------------------------------------------------------- /runtime/poc/runtime/vgabios-stdvga.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:e13dafa0bd55d3d8632f278d87272122db18cbd5675307002c28636d597e2d1e 3 | size 39936 4 | -------------------------------------------------------------------------------- /runtime/src/main.rs: -------------------------------------------------------------------------------- 1 | use ya_runtime_sdk::run; 2 | use ya_runtime_vm::Runtime; 3 | 4 | #[tokio::main] 5 | async fn main() -> anyhow::Result<()> { 6 | run::().await?; 7 | Ok(()) 8 | } 9 | -------------------------------------------------------------------------------- /qemu/Makefile: -------------------------------------------------------------------------------- 1 | all: vmrt 2 | 3 | vmrt: Dockerfile 4 | docker build -t build-qemu . 5 | $(SHELL) copy_img build-qemu vmrt . 6 | $(SHELL) copy_img build-qemu /qemu/pc-bios/vgabios-stdvga.bin . 7 | 8 | .PHONY: all 9 | -------------------------------------------------------------------------------- /runtime/init-container/include/init-seccomp.h: -------------------------------------------------------------------------------- 1 | #ifndef GOLEM_INIT_SANDBOX_H 2 | #define GOLEM_INIT_SANDBOX_H GOLEM_INIT_SANDBOX_H 3 | // Prepares for sandbox setup 4 | void setup_sandbox(void); 5 | // Actually enforces the sandbox. 6 | void sandbox_apply(void); 7 | #endif 8 | -------------------------------------------------------------------------------- /runtime/conf/ya-runtime-vm.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "vm", 4 | "version": "0.5.2", 5 | "supervisor-path": "exe-unit", 6 | "runtime-path": "ya-runtime-vm/ya-runtime-vm", 7 | "description": "vm runtime", 8 | "extra-args": ["--cap-handoff"] 9 | } 10 | ] 11 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "runtime/init-container/liburing"] 2 | path = runtime/init-container/liburing 3 | url = https://github.com/axboe/liburing 4 | [submodule "runtime/init-container/libseccomp"] 5 | path = runtime/init-container/libseccomp 6 | url = https://github.com/seccomp/libseccomp.git 7 | -------------------------------------------------------------------------------- /runtime/init-container/busybox/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alpine:3.14 2 | RUN apk update \ 3 | && apk add git build-base linux-headers \ 4 | && git clone https://github.com/mirror/busybox 5 | COPY config busybox/.config 6 | RUN cd busybox \ 7 | && git checkout 2d4a3d9e6c1493a9520b907e07a41aca90cdfd94 \ 8 | && make oldconfig \ 9 | && CFLAGS="-Os" LDFLAGS="--static" ./make_single_applets.sh -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | members = [ 3 | "runtime" 4 | ] 5 | resolver = "2" 6 | 7 | [workspace.dependencies] 8 | ya-runtime-sdk = { git = "https://github.com/golemfactory/ya-runtime-sdk.git", rev = "0395b0c704ef644d7f0554ac41e319f03b11c068" } 9 | ya-runtime-api = { git = "https://github.com/golemfactory/yagna.git", rev = "2222cadeed58ecc44295092fcb0bc9ffba8a3722"} 10 | ya-client-model = { git = "https://github.com/golemfactory/ya-client.git", rev = "44746845a559d40faa2a81d41c6f663d38361286" } 11 | -------------------------------------------------------------------------------- /runtime/init-container/include/network.h: -------------------------------------------------------------------------------- 1 | #ifndef _NETWORK_H 2 | #define _NETWORK_H 3 | 4 | int net_create_lo(const char *name); 5 | int net_create_tap(char *name); 6 | 7 | int net_if_up(const char *name, int up); 8 | int net_if_mtu(const char *name, int mtu); 9 | int net_if_addr(const char *name, const char *ip, const char *mask); 10 | int net_if_addr6(const char *name, const char *ip6); 11 | int net_if_hw_addr(const char *name, const char mac[6]); 12 | 13 | int net_route(const char *name, const char *ip, const char *mask, const char *via); 14 | int net_route6(const char *name, const char *ip6, const char *via); 15 | 16 | int net_if_addr_to_hw_addr(const char *ip, char *mac); 17 | int net_if_addr6_to_hw_addr(const char *ip, char *mac); 18 | 19 | #endif // _NETWORK_H 20 | -------------------------------------------------------------------------------- /runtime/init-container/include/communication.h: -------------------------------------------------------------------------------- 1 | #ifndef _COMMUNICATION_H 2 | #define _COMMUNICATION_H 3 | 4 | #include 5 | #include 6 | 7 | #include "cyclic_buffer.h" 8 | 9 | int readn(int fd, void* buf, size_t size); 10 | 11 | int recv_u64(int fd, uint64_t* res); 12 | int recv_u32(int fd, uint32_t* res); 13 | int recv_u16(int fd, uint16_t* res); 14 | int recv_u8(int fd, uint8_t* res); 15 | int recv_bytes(int fd, char** buf_ptr, uint64_t* size_ptr, 16 | bool is_cstring); 17 | 18 | int recv_strings_array(int fd, char*** array_ptr); 19 | 20 | void free_strings_array(char** array); 21 | 22 | int writen(int fd, const void* buf, size_t size); 23 | 24 | int send_bytes(int fd, const char* buf, uint64_t size); 25 | 26 | int send_bytes_cyclic_buffer(int fd, struct cyclic_buffer* cb, uint64_t size); 27 | 28 | #endif // _COMMUNICATION_H 29 | -------------------------------------------------------------------------------- /runtime/init-container/include/process_bookkeeping.h: -------------------------------------------------------------------------------- 1 | #ifndef _PROCESS_BOOKKEEPING_H 2 | #define _PROCESS_BOOKKEEPING_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "cyclic_buffer.h" 9 | #include "proto.h" 10 | 11 | struct redir_fd_desc { 12 | enum REDIRECT_FD_TYPE type; 13 | union { 14 | /* For REDIRECT_FD_FILE */ 15 | char* path; 16 | /* For REDIRECT_FD_PIPE_* */ 17 | struct { 18 | struct cyclic_buffer cb; 19 | int fds[2]; 20 | } buffer; 21 | }; 22 | }; 23 | 24 | struct process_desc { 25 | uint64_t id; 26 | pid_t pid; 27 | bool is_alive; 28 | struct redir_fd_desc redirs[3]; 29 | struct process_desc* prev; 30 | struct process_desc* next; 31 | }; 32 | 33 | void add_process(struct process_desc* proc_desc); 34 | void remove_process(const struct process_desc* proc_desc); 35 | struct process_desc* find_process_by_id(uint64_t id); 36 | struct process_desc* find_process_by_pid(pid_t pid); 37 | 38 | #endif // _PROCESS_BOOKKEEPING_H 39 | -------------------------------------------------------------------------------- /runtime/init-container/src/process_bookkeeping.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "process_bookkeeping.h" 4 | 5 | static struct process_desc* g_all_processes = NULL; 6 | 7 | void add_process(struct process_desc* proc_desc) { 8 | proc_desc->next = g_all_processes; 9 | proc_desc->prev = NULL; 10 | if (g_all_processes) { 11 | g_all_processes->prev = proc_desc; 12 | } 13 | g_all_processes = proc_desc; 14 | } 15 | 16 | void remove_process(const struct process_desc* proc_desc) { 17 | if (g_all_processes == proc_desc) { 18 | g_all_processes = proc_desc->next; 19 | if (g_all_processes) { 20 | g_all_processes->prev = NULL; 21 | } 22 | } else { 23 | if (proc_desc->prev) { 24 | proc_desc->prev->next = proc_desc->next; 25 | } 26 | if (proc_desc->next) { 27 | proc_desc->next->prev = proc_desc->prev; 28 | } 29 | } 30 | } 31 | 32 | struct process_desc* find_process_by_id(const uint64_t id) { 33 | struct process_desc* proc_desc = g_all_processes; 34 | while (proc_desc) { 35 | if (proc_desc->id == id) { 36 | return proc_desc; 37 | } 38 | proc_desc = proc_desc->next; 39 | } 40 | return NULL; 41 | } 42 | 43 | struct process_desc* find_process_by_pid(const pid_t pid) { 44 | struct process_desc* proc_desc = g_all_processes; 45 | while (proc_desc) { 46 | if (proc_desc->pid == pid) { 47 | return proc_desc; 48 | } 49 | proc_desc = proc_desc->next; 50 | } 51 | return NULL; 52 | } 53 | -------------------------------------------------------------------------------- /runtime/build.rs: -------------------------------------------------------------------------------- 1 | use anyhow::Context; 2 | use std::env; 3 | use std::ops::Not; 4 | use std::path::PathBuf; 5 | use std::process::Command; 6 | 7 | static RERUN_IF_CHANGED: &str = "cargo:rerun-if-changed"; 8 | 9 | fn main() -> anyhow::Result<()> { 10 | // skip build for CI 11 | if env::var("CI").is_ok() { 12 | return Ok(()); 13 | } 14 | 15 | let root_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()); 16 | let init_dir = root_dir.join("init-container").canonicalize().unwrap(); 17 | let include_dir = init_dir.join("include"); 18 | let src_dir = init_dir.join("src"); 19 | 20 | let make_result = Command::new("make") 21 | .current_dir(&init_dir) 22 | .status() 23 | .context("error building init")?; 24 | if make_result.success().not() { 25 | if let Some(code) = make_result.code() { 26 | anyhow::bail!("make failed with code {:?}", code) 27 | } else { 28 | anyhow::bail!("make failed") 29 | } 30 | } 31 | 32 | println!( 33 | r#" 34 | {rerun}={root}/Makefile 35 | {rerun}={include}/communication.h 36 | {rerun}={include}/cyclic_buffer.h 37 | {rerun}={include}/forward.h 38 | {rerun}={include}/network.h 39 | {rerun}={include}/process_bookkeeping.h 40 | {rerun}={include}/proto.h 41 | {rerun}={src}/communication.c 42 | {rerun}={src}/cyclic_buffer.c 43 | {rerun}={src}/forward.c 44 | {rerun}={src}/network.c 45 | {rerun}={src}/process_bookkeeping.c 46 | {rerun}={src}/init.c 47 | "#, 48 | rerun = RERUN_IF_CHANGED, 49 | root = init_dir.display(), 50 | include = include_dir.display(), 51 | src = src_dir.display(), 52 | ); 53 | Ok(()) 54 | } 55 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | max_width = 100 2 | hard_tabs = false 3 | tab_spaces = 4 4 | newline_style = "Auto" 5 | use_small_heuristics = "Default" 6 | #indent_style = "Block" 7 | #wrap_comments = false 8 | #comment_width = 80 9 | #normalize_comments = false 10 | #license_template_path = "" 11 | #format_strings = false 12 | #format_macro_matchers = false 13 | #format_macro_bodies = true 14 | #empty_item_single_line = true 15 | #struct_lit_single_line = true 16 | #fn_single_line = false 17 | #where_single_line = false 18 | #imports_indent = "Block" 19 | #imports_layout = "Mixed" 20 | #merge_imports = false 21 | reorder_imports = true 22 | reorder_modules = true 23 | #reorder_impl_items = false 24 | #type_punctuation_density = "Wide" 25 | #space_before_colon = false 26 | #space_after_colon = true 27 | #spaces_around_ranges = false 28 | #binop_separator = "Front" 29 | remove_nested_parens = true 30 | #combine_control_expr = true 31 | #struct_field_align_threshold = 0 32 | #match_arm_blocks = true 33 | #force_multiline_blocks = false 34 | #fn_args_density = "Tall" 35 | #brace_style = "SameLineWhere" 36 | #control_brace_style = "AlwaysSameLine" 37 | #trailing_semicolon = true 38 | #trailing_comma = "Vertical" 39 | #match_block_trailing_comma = false 40 | #blank_lines_upper_bound = 1 41 | #blank_lines_lower_bound = 0 42 | #edition = "Edition2015" 43 | merge_derives = true 44 | use_try_shorthand = false 45 | use_field_init_shorthand = false 46 | force_explicit_abi = true 47 | #condense_wildcard_suffixes = false 48 | #color = "Auto" 49 | #required_version = "0.99.1" 50 | #unstable_features = true 51 | #disable_all_formatting = false 52 | #skip_children = false 53 | #hide_parse_errors = false 54 | #error_on_line_overflow = false 55 | #error_on_unformatted = false 56 | #report_todo = "Never" 57 | #report_fixme = "Never" 58 | #ignore = [] 59 | #emit_mode = "Files" 60 | #make_backup = false 61 | #inline_attribute_width=50 # unstable 62 | -------------------------------------------------------------------------------- /runtime/init-container/include/cyclic_buffer.h: -------------------------------------------------------------------------------- 1 | #ifndef _CYCLIC_BUFFER_H 2 | #define _CYCLIC_BUFFER_H 3 | 4 | /* 5 | * Struct describing a cyclic buffer. 6 | * `buf` - pointer to the beginning of the buffer, 7 | * `size` - size of the buffer, 8 | * `begin` - pointer to the beginning of the currently stored data, 9 | * must be in `[buf, buf+size)`, 10 | * `end` - pointer to the end of the currently stored data, 11 | * must be in `[buf, buf+size)`, 12 | * If `begin == end` then either: 13 | * - `begin == buf` - buffer is empty, 14 | * - `begin != buf` - buffer is full. 15 | * If the buffer is full and `begin == buf`, then `end == buf + size`. 16 | */ 17 | struct cyclic_buffer { 18 | char* buf; 19 | size_t size; 20 | char* begin; 21 | char* end; 22 | }; 23 | 24 | /* 25 | * Initializes the buffer. 26 | * Returns 0 on success and -1 on error (error code in `errno`). 27 | */ 28 | int cyclic_buffer_init(struct cyclic_buffer* cb, size_t size); 29 | /* 30 | * Destroys the buffer, freeing all internal resources. 31 | * Returns 0 on success and -1 on error (error code in `errno`). 32 | * Errors are most likely unrecoverable (e.g. `munmap` failed). 33 | */ 34 | int cyclic_buffer_deinit(struct cyclic_buffer* cb); 35 | 36 | /* Returns the size of data in the buffer. */ 37 | size_t cyclic_buffer_data_size(const struct cyclic_buffer* cb); 38 | /* Returns the size of the free space in the buffer. */ 39 | size_t cyclic_buffer_free_size(const struct cyclic_buffer* cb); 40 | 41 | /* 42 | * Reads at most `count` bytes from `fd` into the buffer. 43 | * This functions has exactly same semantics as `read`, except that it handles 44 | * `EINTR` internally. 45 | * Note that `count` could be greater than the buffer capacity, in which case 46 | * least recently read data will be overwritten (if there is enough data to be 47 | * read). 48 | * If this `fd` is in blocking mode this might block, even if there is some 49 | * (but less than `count`) data available, thus non-blocking mode is preferred. 50 | */ 51 | ssize_t cyclic_buffer_read(int fd, struct cyclic_buffer* cb, size_t count); 52 | 53 | /* 54 | * Writes at most `count` bytes from buffer into `fd`. 55 | * This functions has exactly same semantics as `write`, except that it handles 56 | * `EINTR` internally. 57 | * Might write fewer data than requests if there is not enough data in buffer 58 | * or due to normal short write. 59 | * If this `fd` is in blocking mode this might block, even if some bytes (but 60 | * less than `count`) were already written, thus non-blocking mode is preferred. 61 | */ 62 | ssize_t cyclic_buffer_write(int fd, struct cyclic_buffer* cb, size_t count); 63 | 64 | #endif // _CYCLIC_BUFFER_H 65 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Build 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | - release/* 8 | pull_request: 9 | branches: 10 | - master 11 | - release/* 12 | 13 | jobs: 14 | build: 15 | name: Build 16 | env: 17 | RUSTFLAGS: "-D warnings -C opt-level=z -C debuginfo=1" 18 | X86_64_PC_WINDOWS_MSVC_OPENSSL_DIR: c:/vcpkg/installed/x64-windows 19 | rust_stable: 1.80.0 20 | runs-on: ${{ matrix.os }} 21 | strategy: 22 | matrix: 23 | #os: [macos-13, windows-latest, ubuntu-latest] 24 | os: [macos-13, ubuntu-latest] 25 | 26 | steps: 27 | - name: Checkout 28 | uses: actions/checkout@v1 29 | 30 | - name: Install Rust ${{ env.rust_stable }} 31 | uses: actions-rs/toolchain@v1 32 | with: 33 | toolchain: ${{ env.rust_stable }} 34 | 35 | - name: Cache cargo registry 36 | uses: actions/cache@v1 37 | with: 38 | path: ~/.cargo/registry 39 | key: ${{ runner.os }}-cargo-registry-${{ hashFiles('**/Cargo.lock') }} 40 | restore-keys: | 41 | ${{ runner.os }}-cargo-registry- 42 | 43 | - name: Cache cargo index 44 | uses: actions/cache@v1 45 | with: 46 | path: ~/.cargo/git 47 | key: ${{ runner.os }}-cargo-index-${{ hashFiles('**/Cargo.lock') }} 48 | restore-keys: | 49 | ${{ runner.os }}-cargo-index- 50 | 51 | - name: Cache cargo build 52 | uses: actions/cache@v1 53 | with: 54 | path: target 55 | key: ${{ runner.os }}-cargo-build-target1-${{ hashFiles('**/Cargo.lock') }} 56 | 57 | - name: cargo fmt 58 | uses: actions-rs/cargo@v1 59 | if: matrix.os == 'ubuntu-latest' 60 | with: 61 | command: fmt 62 | args: --all -- --check 63 | 64 | - name: Check clippy lints 65 | uses: actions-rs/cargo@v1 66 | with: 67 | command: clippy 68 | args: --all-targets --all-features --workspace -- -D warnings 69 | 70 | - name: Cache vcpkg's artifacts 71 | uses: actions/cache@v1 72 | if: matrix.os == 'windows-latest' 73 | with: 74 | path: c:/vcpkg/installed 75 | key: vcpkg-${{ runner.os }}-v0 76 | restore-keys: | 77 | vcpkg-${{ runner.os }}- 78 | 79 | - name: Install openssl 80 | if: matrix.os == 'windows-latest' 81 | run: | 82 | vcpkg install openssl:x64-windows openssl:x64-windows-static 83 | vcpkg list 84 | vcpkg integrate install 85 | 86 | - name: cargo test 87 | uses: actions-rs/cargo@v1 88 | with: 89 | command: test 90 | args: --workspace 91 | 92 | - name: cargo build 93 | uses: actions-rs/cargo@v1 94 | with: 95 | command: build 96 | args: --workspace 97 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ya-runtime-vm 2 | 3 | `ya-runtime-vm` is an implementation of a Docker-like runtime environment for Linux systems. 4 | 5 | This repository consists of 2 crates: 6 | 7 | - `ya-runtime-vm` 8 | 9 | An application for running Virtual Machine images pre-built for yagna. 10 | 11 | - `gvmkit` 12 | 13 | A tool for converting Docker images into yagna Virtual Machine images and uploading them to a public repository. 14 | Requires for [Docker](https://docs.docker.com/engine/install/ubuntu/) to be installed on your system. 15 | 16 | ## Building 17 | 18 | Prerequisites: 19 | 20 | - `rustc` 21 | 22 | Recommendation: use the Rust toolchain installer from [https://rustup.rs/](https://rustup.rs/) 23 | 24 | - `musl-gcc` and `gperf` 25 | 26 | On a Ubuntu system, execute in terminal: 27 | 28 | ```bash 29 | sudo apt install musl musl-tools gperf 30 | ``` 31 | 32 | Git checkout: 33 | 34 | Init `runtime/init-container/liburing` submodule. 35 | 36 | ```bash 37 | git submodule init 38 | git submodule update 39 | ``` 40 | 41 | Building: 42 | 43 | ```bash 44 | cd runtime 45 | cargo build 46 | ``` 47 | 48 | ## Installing 49 | 50 | Prerequisites: 51 | 52 | - `cargo-deb` 53 | 54 | Cargo helper command which automatically creates binary Debian packages. With Rust already installed, execute in terminal: 55 | 56 | ```bash 57 | cargo install cargo-deb 58 | ``` 59 | 60 | Installation: 61 | 62 | In terminal, change the working directory to `runtime` and install a freshly minted Debian package. 63 | 64 | ```bash 65 | cd runtime 66 | sudo dpkg -i $(cargo deb | tail -n1) 67 | ``` 68 | 69 | This will install the binary at `/usr/lib/yagna/plugins/ya-runtime-vm/ya-runtime-vm`. 70 | 71 | 72 | ## Command line 73 | 74 | Follow the installation section before executing. 75 | 76 | ``` 77 | ya-runtime-vm 0.2.5 78 | 79 | USAGE: 80 | ya-runtime-vm [OPTIONS] 81 | 82 | FLAGS: 83 | -h, --help Prints help information 84 | -V, --version Prints version information 85 | 86 | OPTIONS: 87 | -w, --workdir 88 | -t, --task-package 89 | --cpu-cores [default: 1] 90 | --mem-gib [default: 0.25] 91 | --storage-gib [default: 0.25] 92 | 93 | SUBCOMMANDS: 94 | test Perform a self-test 95 | offer-template Print the market offer template (JSON) 96 | deploy Deploy an image 97 | start Start a deployed image 98 | help Prints this message or the help of the given subcommand(s) 99 | ``` 100 | 101 | ## Caveats 102 | 103 | - Docker `VOLUME` command 104 | 105 | Directories specified in the `VOLUME` command are a mountpoint for directories on the host filesystem. Contents 106 | of those directories will appear as empty during execution. 107 | 108 | If you need to place static assets inside the image, try not to use the `VOLUME` command for that directory. 109 | -------------------------------------------------------------------------------- /qemu/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:22.04 2 | 3 | RUN apt update \ 4 | && \ 5 | \ 6 | DEBIAN_FRONTEND=noninteractive \ 7 | apt install --assume-yes \ 8 | build-essential \ 9 | ca-certificates \ 10 | git \ 11 | make \ 12 | python3 \ 13 | python3-pip \ 14 | wget \ 15 | pkg-config \ 16 | libglib2.0-dev \ 17 | libpixman-1-dev \ 18 | libmount-dev \ 19 | libffi-dev \ 20 | libselinux1-dev \ 21 | libcap-ng-dev \ 22 | libattr1-dev \ 23 | ninja-build \ 24 | && \ 25 | \ 26 | rm -rf /var/lib/apt/lists/* 27 | 28 | RUN git clone https://github.com/qemu/qemu.git -b v8.1.1 --recurse-submodules 29 | 30 | RUN echo CONFIG_VIRTIO=y >> /qemu/configs/devices/i386-softmmu/default.mak 31 | RUN echo CONFIG_VIRTIO_SERIAL=y >> /qemu/configs/devices/i386-softmmu/default.mak 32 | RUN echo CONFIG_VIRTIO_PCI=y >> /qemu/configs/devices/i386-softmmu/default.mak 33 | RUN echo CONFIG_VIRTIO_NET=y >> /qemu/configs/devices/i386-softmmu/default.mak 34 | RUN echo CONFIG_VIRTIO_RNG=y >> /qemu/configs/devices/i386-softmmu/default.mak 35 | RUN echo CONFIG_VIRTIO_MMIO=y >> /qemu/configs/devices/i386-softmmu/default.mak 36 | RUN echo CONFIG_VIRTIO_SCSI=y >> /qemu/configs/devices/i386-softmmu/default.mak 37 | RUN echo CONFIG_VIRTIO_BLK=y >> /qemu/configs/devices/i386-softmmu/default.mak 38 | RUN echo CONFIG_VIRTIO_9P=y >> /qemu/configs/devices/i386-softmmu/default.mak 39 | RUN echo CONFIG_PARALLEL=y >> /qemu/configs/devices/i386-softmmu/default.mak 40 | RUN echo CONFIG_FSDEV_9P=y >> /qemu/configs/devices/i386-softmmu/default.mak 41 | RUN echo CONFIG_VFIO=y >> /qemu/configs/devices/i386-softmmu/default.mak 42 | RUN echo CONFIG_VFIO_PCI=y >> /qemu/configs/devices/i386-softmmu/default.mak 43 | RUN echo CONFIG_EDID=y >> /qemu/configs/devices/i386-softmmu/default.mak 44 | RUN echo CONFIG_VGA=y >> /qemu/configs/devices/i386-softmmu/default.mak 45 | RUN echo CONFIG_VGA_PCI=y >> /qemu/configs/devices/i386-softmmu/default.mak 46 | RUN echo CONFIG_PCIE_PORT=y >> /qemu/configs/devices/i386-softmmu/default.mak 47 | 48 | # --without-default-devices 49 | RUN mkdir build && \ 50 | cd build && \ 51 | /qemu/configure \ 52 | --target-list=x86_64-softmmu \ 53 | --static \ 54 | --audio-drv-list="" \ 55 | --disable-slirp \ 56 | --disable-tcg-interpreter \ 57 | --disable-containers \ 58 | --disable-gtk \ 59 | --disable-capstone \ 60 | --disable-avx2 \ 61 | --disable-avx512f \ 62 | --disable-replication \ 63 | --disable-parallels \ 64 | --disable-vvfat \ 65 | --disable-qed \ 66 | --disable-vdi \ 67 | --disable-qcow1 \ 68 | --disable-dmg \ 69 | --disable-cloop \ 70 | --disable-bochs \ 71 | --disable-bzip2 \ 72 | --disable-guest-agent \ 73 | --disable-numa \ 74 | --disable-tcg \ 75 | --disable-vnc \ 76 | --disable-live-block-migration \ 77 | --disable-gio \ 78 | --enable-vhost-kernel \ 79 | --enable-virtfs \ 80 | --without-default-devices 81 | 82 | RUN cd build && make V=1 CFLAGS+="-Os -flto" -j4 83 | 84 | RUN cp /build/x86_64-softmmu/qemu-system-x86_64 vmrt 85 | -------------------------------------------------------------------------------- /runtime/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "ya-runtime-vm" 3 | version = "0.5.2" 4 | authors = ["Golem Factory "] 5 | edition = "2021" 6 | license = "GPL-3.0" 7 | 8 | [package.metadata.deb] 9 | assets = [ 10 | [ 11 | "target/release/ya-runtime-vm", 12 | "usr/lib/yagna/plugins/ya-runtime-vm/", 13 | "755", 14 | ], 15 | [ 16 | "conf/ya-runtime-vm.json", 17 | "usr/lib/yagna/plugins/", 18 | "644", 19 | ], 20 | # self-test.gvmi is downloaded by .github/workflows/release.yml from https://github.com/golemfactory/ya-self-test-img/releases 21 | [ 22 | "image/self-test.gvmi", 23 | "usr/lib/yagna/plugins/ya-runtime-vm/runtime/", 24 | "644", 25 | ], 26 | 27 | [ 28 | "poc/runtime/vmrt", 29 | "usr/lib/yagna/plugins/ya-runtime-vm/runtime/", 30 | "755", 31 | ], 32 | [ 33 | "poc/runtime/vgabios-stdvga.bin", 34 | "usr/lib/yagna/plugins/ya-runtime-vm/runtime/", 35 | "644", 36 | ], 37 | [ 38 | "poc/runtime/bios-256k.bin", 39 | "usr/lib/yagna/plugins/ya-runtime-vm/runtime/", 40 | "644", 41 | ], 42 | [ 43 | "poc/runtime/kvmvapic.bin", 44 | "usr/lib/yagna/plugins/ya-runtime-vm/runtime/", 45 | "644", 46 | ], 47 | [ 48 | "poc/runtime/linuxboot_dma.bin", 49 | "usr/lib/yagna/plugins/ya-runtime-vm/runtime/", 50 | "644", 51 | ], 52 | [ 53 | "poc/runtime/efi-virtio.rom", 54 | "usr/lib/yagna/plugins/ya-runtime-vm/runtime/", 55 | "644", 56 | ], 57 | 58 | [ 59 | "init-container/initramfs.cpio.gz", 60 | "usr/lib/yagna/plugins/ya-runtime-vm/runtime/", 61 | "644", 62 | ], 63 | [ 64 | "init-container/vmlinuz-virt", 65 | "usr/lib/yagna/plugins/ya-runtime-vm/runtime/", 66 | "644", 67 | ], 68 | ] 69 | 70 | [dependencies.ya-runtime-sdk] 71 | workspace = true 72 | features = ["macros", "logger"] 73 | 74 | [dependencies] 75 | anyhow = "1.0" 76 | bollard-stubs = "1.40.2" 77 | crc = "1.8" 78 | futures = "0.3" 79 | log = "0.4.8" 80 | # "crossbeam-channel" and "macos_fsevent" are default features. 81 | # Remove `macos_fsevent` if `macos` build will get dropped. 82 | notify = { version = "6.0", features = ["crossbeam-channel", "macos_fsevent"] } 83 | rand = "0.8" 84 | raw-cpuid = "10.7" 85 | serde = { version = "^1.0", features = ["derive"] } 86 | serde_json = "1.0" 87 | strip-ansi-escapes = "0.1.0" 88 | structopt = "0.3" 89 | tokio = { version = "1.21.2", features = [ 90 | "fs", 91 | "io-std", 92 | "io-util", 93 | "macros", 94 | "process", 95 | "rt", 96 | "rt-multi-thread", 97 | "sync", 98 | "time", 99 | "net", 100 | ] } 101 | tokio-byteorder = "0.3" 102 | uuid = { version = "1.1", features = ["v4"] } 103 | url = "2.3" 104 | ya-client-model.workspace = true 105 | 106 | [dev-dependencies] 107 | env_logger = "0.10" 108 | tempdir = "0.3.7" 109 | pnet = "0.33" 110 | 111 | [build-dependencies] 112 | anyhow = "1.0" 113 | 114 | [lib] 115 | name = "ya_runtime_vm" 116 | path = "src/lib.rs" 117 | 118 | [[bin]] 119 | name = "ya-runtime-vm" 120 | path = "src/main.rs" 121 | -------------------------------------------------------------------------------- /runtime/src/qcow2_min/mod.rs: -------------------------------------------------------------------------------- 1 | use std::io::{Result, SeekFrom}; 2 | 3 | use tokio::io::{AsyncSeekExt, AsyncWriteExt}; 4 | 5 | // Almost completely reverse-engineered 6 | // No educational value at all, look elsewhere. 7 | const QCOW2_HEADER: &[u8] = include_bytes!("10k.header"); 8 | const QCOW2_SIZE_OFFSET: u64 = 24; 9 | 10 | const QCOW2_L1_SIZE_DIV: u64 = 512 * 1024 * 1024; 11 | const QCOW2_L1_ENTRIES_OFFSET: u64 = 32; 12 | 13 | const QCOW2_MYSTERY_CONST1: u64 = 0x0000_0000_0002_0000; 14 | const QCOW2_MYSTERY_CONST1_OFFSET: u64 = 0x1_0000; 15 | 16 | const QCOW2_CLUSTERS_MIN: u64 = 4; 17 | const QCOW2_CLUSTERS_SZ_DIV: u64 = 4 * 1024 * 1024 * 1024 * 1024; 18 | const QCOW2_CLUSTERS_SZ_DIV2: u64 = 1024 * 1024 * 1024 * 1024; 19 | const QCOW2_CLUSTERS_OFFSET: u64 = 0x2_0000; 20 | const QCOW2_CLUSTER_OFFSET: u64 = 0x30000; 21 | const QCOW2_CLUSTER_SIZE: u64 = 16384; 22 | 23 | /// Qcow2 image parameters 24 | pub struct Qcow2Image { 25 | /// Virtual size 26 | pub size: u64, 27 | /// Image file will be no smaller than [`Self::preallocate`] 28 | pub preallocate: u64, 29 | } 30 | 31 | impl Qcow2Image { 32 | pub fn new(size: u64, preallocate: u64) -> Self { 33 | Qcow2Image { size, preallocate } 34 | } 35 | 36 | /// Writes a valid qcow2 image according to the parameters. 37 | /// 38 | /// Resultant `qemu-img info`: 39 | /// ```plain 40 | /// cluster_size: 65536 41 | /// Format specific information: 42 | /// compat: 1.1 43 | /// compression type: zlib 44 | /// lazy refcounts: false 45 | /// refcount bits: 16 46 | /// corrupt: false 47 | /// extended l2: false 48 | /// ``` 49 | pub async fn write( 50 | &self, 51 | mut writer: W, 52 | ) -> Result<()> { 53 | let clusters = ((self.size - 1) / QCOW2_CLUSTERS_SZ_DIV + 4).max(QCOW2_CLUSTERS_MIN); 54 | let clusters2 = self.size.div_ceil(QCOW2_CLUSTERS_SZ_DIV2); 55 | let mut file_sz = self 56 | .preallocate 57 | .max(clusters2 * QCOW2_CLUSTER_SIZE + QCOW2_CLUSTER_OFFSET); 58 | 59 | const BLOCK_SZ: u64 = 4096; 60 | let block: &'static [u8] = &[0; BLOCK_SZ as usize]; 61 | 62 | while file_sz >= BLOCK_SZ { 63 | writer.write_all(block).await?; 64 | file_sz -= BLOCK_SZ; 65 | } 66 | if file_sz > 0 { 67 | writer.write_all(&block[0..file_sz as usize]).await?; 68 | } 69 | 70 | writer.rewind().await?; 71 | writer.write_all(QCOW2_HEADER).await?; 72 | 73 | writer.seek(SeekFrom::Start(QCOW2_SIZE_OFFSET)).await?; 74 | writer.write_all(&self.size.to_be_bytes()).await?; 75 | 76 | writer 77 | .seek(SeekFrom::Start(QCOW2_L1_ENTRIES_OFFSET)) 78 | .await?; 79 | let l1_entries = self.size.div_ceil(QCOW2_L1_SIZE_DIV); 80 | writer.write_all(&l1_entries.to_be_bytes()).await?; 81 | 82 | writer 83 | .seek(SeekFrom::Start(QCOW2_MYSTERY_CONST1_OFFSET)) 84 | .await?; 85 | writer 86 | .write_all(&QCOW2_MYSTERY_CONST1.to_be_bytes()) 87 | .await?; 88 | 89 | for k in 0..clusters { 90 | let offset = QCOW2_CLUSTERS_OFFSET + 2 * k; 91 | writer.seek(SeekFrom::Start(offset)).await?; 92 | writer.write_all(&[0, 1]).await?; 93 | } 94 | 95 | Ok(()) 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /runtime/poc/gvmkit.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | usage() { 4 | local ME=$(basename $0) 5 | echo "" 6 | echo -e "\tGolem VMKIT" 7 | echo "" 8 | echo "Usage:" 9 | echo "" 10 | echo "$ME build [output-name]" 11 | echo "" 12 | echo "$ME run {-v : } [-m ] [ ... ]" 13 | echo "" 14 | } 15 | 16 | err() { 17 | echo $@ >&2 18 | exit 1 19 | } 20 | 21 | do_build() { 22 | set -x 23 | local IMAGE CONTAINER 24 | IMAGE="$1" 25 | CONTAINER=$(docker create "${IMAGE}") || err "unable to load image: $IMAGE" 26 | IMAGE_HASH=$(docker inspect $CONTAINER| jq .[0].Image -r | cut -c 8-20) 27 | local OUT_DIR="out-$IMAGE_HASH" 28 | mkdir "$OUT_DIR" 29 | fakeroot "$0" repack "$CONTAINER" "$OUT_DIR" 30 | rm -fr "$OUT_DIR" 31 | docker rm "$CONTAINER" 32 | } 33 | 34 | do_repack() { 35 | local cmd 36 | local OUT="$2" 37 | docker export "$1" | tar xf - -C "$OUT" 38 | docker inspect "$1" | jq '.[0].Config.Env[]' -r > $OUT/.env 39 | ep=$(docker inspect "$1" | jq '.[0].Config.Entrypoint[]' -e -r) && echo $ep > $OUT/.entrypoint 40 | cmd=$(docker inspect "$1" | jq '.[0].Config.Cmd[]' -e -r) && echo $cmd > $OUT/.cmd 41 | vols=$(docker inspect "$1" | jq '.[0].Config.Volumes | keys[]' -e -r) && echo $vols > $OUT/.vols 42 | docker inspect "$1" | jq '.[0].Config' > "${OUT}.json" 43 | mksquashfs "$OUT" "$OUT.golem-app" -comp lzo 44 | ( 45 | cat "${OUT}.json" 46 | printf "%08d" $(stat -c%s "${OUT}.json") 47 | ) >> "$OUT.golem-app" 48 | 49 | } 50 | 51 | SCRIPT_DIR=$(readlink -f ${0%/*}) 52 | 53 | do_run() { 54 | echo script_dir=$SCRIPT_DIR 55 | local CUR=$(pwd) 56 | local memory="200m" 57 | local arg="" 58 | local append="" 59 | local tag=0 60 | 61 | while getopts "dm:v:" o; do 62 | case "${o}" in 63 | m) 64 | memory=${OPTARG} 65 | ;; 66 | d) 67 | echo debug 68 | append="$append NO_LOADER=1 NO_POWEROFF=1" 69 | ;; 70 | v) 71 | IFS=':' read -ra VOLDEF <<< "$OPTARG" 72 | SRC=$(cd ${VOLDEF[0]} 2>/dev/null && pwd) || err "invalid path :${VOLDEF[0]}" 73 | DST="${VOLDEF[1]}" 74 | MODE=${VOLDEF[2]:-rw} 75 | tag=$[tag+1] 76 | arg="$arg -virtfs local,path=$SRC,id=vol${tag},mount_tag=vol${tag},security_model=none" 77 | arg="$arg -device virtio-9p-pci,fsdev=vol${tag},mount_tag=vol${tag}" 78 | append="$append volmnt=vol${tag}:$DST" 79 | ;; 80 | *) 81 | usage 82 | ;; 83 | esac 84 | done 85 | shift $((OPTIND-1)) 86 | echo memory=$memory 87 | echo arg=$arg 88 | 89 | local VMIMG="$(cd $(dirname "$1") && pwd)/$(basename "$1")" 90 | test -f "$VMIMG" || err "missing application image: $VMIMG" 91 | 92 | echo vmkit=$VMIMG 93 | 94 | cd $SCRIPT_DIR/runtime 95 | shift 96 | for a in $@ 97 | do 98 | append="$append apparg=\"$a\"" 99 | done 100 | echo $@ 101 | ./vmrt -m "$memory" -nographic -vga none -kernel vmlinuz-virt -initrd initramfs-virt -net none -accel kvm -cpu "host" -smp $(nproc) \ 102 | -device virtio-serial,id=ser0 -device virtserialport,chardev=foo,name=org.fedoraproject.port.0 -chardev socket,path=/tmp/foo,server,nowait,id=foo \ 103 | -append "console=ttyS0 panic=1 $append" \ 104 | -drive file="$VMIMG",cache=none,readonly=on,format=raw,if=virtio -no-reboot \ 105 | $arg 106 | } 107 | 108 | case $1 109 | in 110 | build) 111 | do_build $2 112 | ;; 113 | repack) 114 | shift 115 | do_repack $@ 116 | ;; 117 | run) 118 | shift 119 | do_run $@ 120 | ;; 121 | *) 122 | usage 123 | exit 1 124 | esac 125 | 126 | 127 | -------------------------------------------------------------------------------- /runtime/init-container/src/communication.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "communication.h" 8 | #include "cyclic_buffer.h" 9 | 10 | int readn(const int fd, void* buf, size_t size) { 11 | while (size) { 12 | const ssize_t ret = read(fd, buf, size); 13 | if (ret == 0) { 14 | puts("Waiting for host connection ..."); 15 | sleep(1); 16 | continue; 17 | } 18 | if (ret < 0) { 19 | if (errno == EINTR) { 20 | continue; 21 | } 22 | /* `errno` should be set. */ 23 | return -1; 24 | } 25 | buf = (char*)buf + ret; 26 | size -= ret; 27 | } 28 | return 0; 29 | } 30 | 31 | int recv_u64(const int fd, uint64_t* res) { 32 | return readn(fd, res, sizeof(*res)); 33 | } 34 | 35 | int recv_u32(const int fd, uint32_t* res) { 36 | return readn(fd, res, sizeof(*res)); 37 | } 38 | 39 | int recv_u16(const int fd, uint16_t* res) { 40 | return readn(fd, res, sizeof(*res)); 41 | } 42 | 43 | int recv_u8(const int fd, uint8_t* res) { 44 | return readn(fd, res, sizeof(*res)); 45 | } 46 | 47 | int recv_bytes(const int fd, char** buf_ptr, uint64_t* size_ptr, 48 | const bool is_cstring) { 49 | uint64_t size = 0; 50 | 51 | if (recv_u64(fd, &size) < 0) { 52 | return -1; 53 | } 54 | 55 | char* buf = malloc(size + (is_cstring ? 1 : 0)); 56 | if (!buf) { 57 | return -1; 58 | } 59 | 60 | if (readn(fd, buf, size) < 0) { 61 | const int tmp_errno = errno; 62 | free(buf); 63 | errno = tmp_errno; 64 | return -1; 65 | } 66 | 67 | if (is_cstring) { 68 | buf[size] = '\0'; 69 | } 70 | 71 | *buf_ptr = buf; 72 | if (size_ptr) { 73 | *size_ptr = size; 74 | } 75 | 76 | return 0; 77 | } 78 | 79 | void free_strings_array(char** array) { 80 | if (!array) { 81 | return; 82 | } 83 | 84 | for (size_t i = 0; array[i]; ++i) { 85 | free(array[i]); 86 | } 87 | free(array); 88 | } 89 | 90 | int recv_strings_array(const int fd, char*** array_ptr) { 91 | uint64_t size = 0; 92 | int ret = -1; 93 | 94 | if (recv_u64(fd, &size) < 0) { 95 | return -1; 96 | } 97 | 98 | char** array = calloc(size + 1, sizeof(*array)); 99 | if (!array) { 100 | return -1; 101 | } 102 | 103 | for (uint64_t i = 0; i < size; ++i) { 104 | if (recv_bytes(fd, &array[i], NULL, /*is_cstring=*/true) < 0) { 105 | goto out; 106 | } 107 | } 108 | 109 | *array_ptr = array; 110 | array = NULL; 111 | ret = 0; 112 | 113 | out: 114 | if (array) { 115 | const int tmp_errno = errno; 116 | free_strings_array(array); 117 | errno = tmp_errno; 118 | } 119 | return ret; 120 | } 121 | 122 | int writen(const int fd, const void* buf, size_t size) { 123 | while (size) { 124 | const ssize_t ret = write(fd, buf, size); 125 | if (ret == 0) { 126 | puts("Waiting for host connection ..."); 127 | sleep(1); 128 | continue; 129 | } 130 | if (ret < 0) { 131 | if (errno == EINTR) { 132 | continue; 133 | } 134 | /* `errno` should be set. */ 135 | return -1; 136 | } 137 | buf = (char*)buf + ret; 138 | size -= ret; 139 | } 140 | return 0; 141 | } 142 | 143 | int send_bytes(const int fd, const char* buf, const uint64_t size) { 144 | if (writen(fd, &size, sizeof(size)) < 0) { 145 | return -1; 146 | } 147 | 148 | return writen(fd, buf, size); 149 | } 150 | 151 | int send_bytes_cyclic_buffer(const int fd, struct cyclic_buffer* cb, uint64_t size) { 152 | const size_t cb_data_size = cyclic_buffer_data_size(cb); 153 | if (size > cb_data_size) { 154 | size = cb_data_size; 155 | } 156 | 157 | if (writen(fd, &size, sizeof(size)) < 0) { 158 | return -1; 159 | } 160 | 161 | while (size) { 162 | const ssize_t ret = cyclic_buffer_write(fd, cb, size); 163 | if (ret == 0) { 164 | puts("Waiting for host connection ..."); 165 | sleep(1); 166 | continue; 167 | } 168 | if (ret < 0) { 169 | return -1; 170 | } 171 | size -= ret; 172 | } 173 | return 0; 174 | } 175 | -------------------------------------------------------------------------------- /runtime/init-container/src/cyclic_buffer.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "cyclic_buffer.h" 8 | 9 | int cyclic_buffer_init(struct cyclic_buffer* cb, const size_t size) { 10 | cb->buf = mmap(NULL, size, PROT_READ | PROT_WRITE, 11 | MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); 12 | if (cb->buf == MAP_FAILED) { 13 | return -1; 14 | } 15 | 16 | cb->size = size; 17 | cb->begin = cb->buf; 18 | cb->end = cb->buf; 19 | return 0; 20 | } 21 | 22 | int cyclic_buffer_deinit(struct cyclic_buffer* cb) { 23 | if (cb->buf == MAP_FAILED || !cb->size) { 24 | return 0; 25 | } 26 | const int ret = munmap(cb->buf, cb->size); 27 | cb->buf = MAP_FAILED; 28 | return ret; 29 | } 30 | 31 | size_t cyclic_buffer_data_size(const struct cyclic_buffer* cb) { 32 | const char* data_begin = cb->begin; 33 | const char* data_end = cb->end; 34 | 35 | if (data_begin < data_end) { 36 | return data_end - data_begin; 37 | } 38 | if (data_begin > data_end) { 39 | return cb->size - (data_begin - data_end); 40 | } 41 | 42 | // data_begin == data_end 43 | if (data_begin == cb->buf) { 44 | /* Buffer is completely empty. */ 45 | return 0; 46 | } 47 | 48 | /* Buffer is completely full. */ 49 | return cb->size; 50 | } 51 | 52 | size_t cyclic_buffer_free_size(const struct cyclic_buffer* cb) { 53 | return cb->size - cyclic_buffer_data_size(cb); 54 | } 55 | 56 | static size_t min(const size_t a, const size_t b) { 57 | return a < b ? a : b; 58 | } 59 | 60 | ssize_t cyclic_buffer_read(const int fd, struct cyclic_buffer* cb, size_t count) { 61 | ssize_t got = 0; 62 | size_t free_space = cyclic_buffer_free_size(cb); 63 | 64 | while (count && free_space) { 65 | bool fixup_end = false; 66 | if (cb->end == cb->buf + cb->size) { 67 | cb->end = cb->buf; 68 | fixup_end = true; 69 | } 70 | 71 | const size_t this_read_size = min(free_space, min(cb->buf + cb->size - cb->end, count)); 72 | const ssize_t ret = read(fd, cb->end, this_read_size); 73 | if (ret <= 0) { 74 | if (fixup_end) { 75 | cb->end = cb->buf + cb->size; 76 | } 77 | 78 | if (ret == 0) { 79 | break; 80 | } 81 | if (errno == EINTR) { 82 | continue; 83 | } 84 | 85 | /* We have just seen an error, but let's ignore it if we have 86 | * already read some data before. */ 87 | if (got == 0) { 88 | got = -1; 89 | } 90 | break; 91 | } 92 | 93 | cb->end += ret; 94 | count -= ret; 95 | got += ret; 96 | free_space = cyclic_buffer_free_size(cb); 97 | 98 | if ((size_t)ret < this_read_size) { 99 | /* Not enough data to fill the whole request. */ 100 | break; 101 | } 102 | } 103 | 104 | return got; 105 | } 106 | 107 | ssize_t cyclic_buffer_write(const int fd, struct cyclic_buffer* cb, size_t count) { 108 | ssize_t wrote = 0; 109 | size_t available_data = cyclic_buffer_data_size(cb); 110 | 111 | while (count && available_data) { 112 | const size_t this_write_size = min(available_data, min(cb->buf + cb->size - cb->begin, count)); 113 | const ssize_t ret = write(fd, cb->begin, this_write_size); 114 | if (ret < 0) { 115 | if (errno == EINTR) { 116 | continue; 117 | } 118 | 119 | /* We have just seen an error, but let's ignore it if we have 120 | * already written some data before. */ 121 | if (wrote == 0) { 122 | wrote = -1; 123 | } 124 | break; 125 | } 126 | 127 | if (ret == 0) { 128 | break; 129 | } 130 | 131 | cb->begin += ret; 132 | if (cb->begin == cb->end) { 133 | // buffer is empty 134 | cb->begin = cb->buf; 135 | cb->end = cb->buf; 136 | } else if (cb->begin == cb->buf + cb->size) { 137 | cb->begin = cb->buf; 138 | } 139 | count -= ret; 140 | wrote += ret; 141 | available_data = cyclic_buffer_data_size(cb); 142 | 143 | if ((size_t)ret < this_write_size) { 144 | /* Short write. */ 145 | break; 146 | } 147 | } 148 | 149 | return wrote; 150 | } 151 | -------------------------------------------------------------------------------- /runtime/src/response_parser.rs: -------------------------------------------------------------------------------- 1 | use std::convert::TryFrom; 2 | use std::io; 3 | use tokio::io::{AsyncRead, AsyncReadExt}; 4 | 5 | #[derive(Debug)] 6 | pub enum Response { 7 | Ok, 8 | OkU64(u64), 9 | OkBytes(Vec), 10 | Err(u32), 11 | } 12 | 13 | #[derive(Debug)] 14 | pub enum ExitType { 15 | Exited, 16 | Killed, 17 | Dumped, 18 | } 19 | 20 | impl TryFrom for ExitType { 21 | type Error = io::Error; 22 | 23 | fn try_from(v: u8) -> Result { 24 | match v { 25 | 0 => Ok(ExitType::Exited), 26 | 1 => Ok(ExitType::Killed), 27 | 2 => Ok(ExitType::Dumped), 28 | _ => Err(io::Error::new( 29 | io::ErrorKind::InvalidData, 30 | "Invalid exit type", 31 | )), 32 | } 33 | } 34 | } 35 | 36 | #[derive(Debug)] 37 | pub struct ExitReason { 38 | pub status: u8, 39 | pub type_: ExitType, 40 | } 41 | 42 | #[derive(Debug)] 43 | pub enum Notification { 44 | OutputAvailable { id: u64, fd: u32 }, 45 | ProcessDied { id: u64, reason: ExitReason }, 46 | } 47 | 48 | #[derive(Debug)] 49 | pub struct ResponseWithId { 50 | pub id: u64, 51 | pub resp: Response, 52 | } 53 | 54 | #[derive(Debug)] 55 | pub enum GuestAgentMessage { 56 | Response(ResponseWithId), 57 | Notification(Notification), 58 | } 59 | 60 | async fn recv_u8(stream: &mut T) -> io::Result { 61 | let mut buf = [0; 1]; 62 | stream.read_exact(&mut buf).await?; 63 | Ok(u8::from_le_bytes(buf)) 64 | } 65 | 66 | async fn recv_u32(stream: &mut T) -> io::Result { 67 | let mut buf = [0; 4]; 68 | stream.read_exact(&mut buf).await?; 69 | Ok(u32::from_le_bytes(buf)) 70 | } 71 | 72 | async fn recv_u64(stream: &mut T) -> io::Result { 73 | let mut buf = [0; 8]; 74 | stream.read_exact(&mut buf).await?; 75 | Ok(u64::from_le_bytes(buf)) 76 | } 77 | 78 | async fn recv_bytes(stream: &mut T) -> io::Result> { 79 | let len = recv_u64(stream).await?; 80 | let mut buf = vec![0; len as usize]; 81 | stream.read_exact(buf.as_mut_slice()).await?; 82 | Ok(buf) 83 | } 84 | 85 | pub async fn parse_one_response( 86 | stream: &mut T, 87 | ) -> io::Result { 88 | let id = recv_u64(stream).await?; 89 | 90 | let typ = recv_u8(stream).await?; 91 | match typ { 92 | 0 => Ok(GuestAgentMessage::Response(ResponseWithId { 93 | id, 94 | resp: Response::Ok, 95 | })), 96 | 1 => { 97 | let val = recv_u64(stream).await?; 98 | Ok(GuestAgentMessage::Response(ResponseWithId { 99 | id, 100 | resp: Response::OkU64(val), 101 | })) 102 | } 103 | 2 => { 104 | let buf = recv_bytes(stream).await?; 105 | Ok(GuestAgentMessage::Response(ResponseWithId { 106 | id, 107 | resp: Response::OkBytes(buf), 108 | })) 109 | } 110 | 3 => { 111 | let code = recv_u32(stream).await?; 112 | Ok(GuestAgentMessage::Response(ResponseWithId { 113 | id, 114 | resp: Response::Err(code), 115 | })) 116 | } 117 | 4 => { 118 | if id == 0 { 119 | let proc_id = recv_u64(stream).await?; 120 | let fd = recv_u32(stream).await?; 121 | Ok(GuestAgentMessage::Notification( 122 | Notification::OutputAvailable { id: proc_id, fd }, 123 | )) 124 | } else { 125 | Err(io::Error::new( 126 | io::ErrorKind::InvalidData, 127 | "Invalid response message ID", 128 | )) 129 | } 130 | } 131 | 5 => { 132 | if id == 0 { 133 | let proc_id = recv_u64(stream).await?; 134 | let status = recv_u8(stream).await?; 135 | let type_ = ExitType::try_from(recv_u8(stream).await?)?; 136 | Ok(GuestAgentMessage::Notification(Notification::ProcessDied { 137 | id: proc_id, 138 | reason: ExitReason { status, type_ }, 139 | })) 140 | } else { 141 | Err(io::Error::new( 142 | io::ErrorKind::InvalidData, 143 | "Invalid response message ID", 144 | )) 145 | } 146 | } 147 | _ => Err(io::Error::new( 148 | io::ErrorKind::InvalidData, 149 | "Invalid response type", 150 | )), 151 | } 152 | } 153 | -------------------------------------------------------------------------------- /runtime/examples/commands.rs: -------------------------------------------------------------------------------- 1 | use futures::future::BoxFuture; 2 | use futures::FutureExt; 3 | use std::{ 4 | clone::Clone, 5 | collections::HashMap, 6 | env, 7 | path::PathBuf, 8 | sync::{Arc, Mutex}, 9 | }; 10 | use tokio::{process::Command, sync::Notify}; 11 | use ya_runtime_sdk::runtime_api::server::{self, ProcessStatus, RuntimeService, RuntimeStatus}; 12 | 13 | struct ProcessData { 14 | status: Option, 15 | died: Arc, 16 | } 17 | 18 | impl ProcessData { 19 | fn new() -> Self { 20 | Self { 21 | status: None, 22 | died: Arc::new(Notify::new()), 23 | } 24 | } 25 | 26 | fn new_with_status(status: ProcessStatus) -> Self { 27 | Self { 28 | status: Some(status), 29 | died: Arc::new(Notify::new()), 30 | } 31 | } 32 | } 33 | 34 | struct Events(Arc>>); 35 | 36 | impl Events { 37 | fn new() -> Self { 38 | Self(Arc::new(Mutex::new(HashMap::new()))) 39 | } 40 | 41 | fn process_died(&self, pid: u64) -> Arc { 42 | let mut processes = self.0.lock().unwrap(); 43 | match processes.get(&pid) { 44 | None => { 45 | let data = ProcessData::new(); 46 | let died = data.died.clone(); 47 | processes.insert(pid, data); 48 | died 49 | } 50 | Some(data) => data.died.clone(), 51 | } 52 | } 53 | } 54 | 55 | impl server::RuntimeHandler for Events { 56 | fn on_process_status<'a>(&self, status: ProcessStatus) -> BoxFuture<'a, ()> { 57 | log::debug!("event: {:?}", status); 58 | let mut processes = self.0.lock().unwrap(); 59 | let process = processes.get_mut(&status.pid); 60 | match process { 61 | None => { 62 | processes.insert(status.pid, ProcessData::new_with_status(status)); 63 | } 64 | Some(data) => { 65 | let was_running = match &data.status { 66 | None => true, 67 | Some(status) => status.running, 68 | }; 69 | let died = was_running && !status.running; 70 | data.status.replace(status); 71 | if died { 72 | data.died.notify_waiters(); 73 | } 74 | } 75 | } 76 | futures::future::ready(()).boxed() 77 | } 78 | 79 | fn on_runtime_status<'a>(&self, _status: RuntimeStatus) -> BoxFuture<'a, ()> { 80 | futures::future::ready(()).boxed() 81 | } 82 | } 83 | 84 | impl Clone for Events { 85 | fn clone(&self) -> Self { 86 | Self(self.0.clone()) 87 | } 88 | } 89 | 90 | #[tokio::main] 91 | async fn main() -> anyhow::Result<()> { 92 | env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init(); 93 | let root_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()) 94 | .join("..") 95 | .canonicalize() 96 | .unwrap(); 97 | let temp_dir = tempdir::TempDir::new("ya-runtime-vm")?; 98 | 99 | let temp_dir_string = temp_dir.path().display().to_string(); 100 | let drive_path_string = root_dir.join("squashfs_drive").display().to_string(); 101 | 102 | let args = [ 103 | "--task-package", 104 | drive_path_string.as_str(), 105 | "--workdir", 106 | temp_dir_string.as_str(), 107 | ]; 108 | 109 | let runtime_path = PathBuf::from("/usr/lib/yagna/plugins/ya-runtime-vm/ya-runtime-vm"); 110 | 111 | let mut cmd = Command::new(&runtime_path); 112 | cmd.env("RUST_LOG", "debug").args(args).arg("deploy"); 113 | let mut child = cmd.spawn()?; 114 | child.wait().await?; 115 | 116 | let mut cmd = Command::new(&runtime_path); 117 | cmd.env("RUST_LOG", "debug").args(args).arg("start"); 118 | 119 | let events = Events::new(); 120 | 121 | let c = server::spawn(cmd, events.clone()).await?; 122 | 123 | { 124 | let result = c.hello("0.0.0x").await; 125 | log::info!("hello_result: {:?}", result); 126 | } 127 | 128 | { 129 | let run = server::RunProcess { 130 | bin: "/bin/ls".to_string(), 131 | args: ["ls", "-al", "."].iter().map(|s| s.to_string()).collect(), 132 | work_dir: "/".to_string(), 133 | stdout: None, 134 | stderr: None, 135 | }; 136 | log::info!("running {:?}", run); 137 | let pid = c 138 | .run_process(run) 139 | .await 140 | .map_err(|e| anyhow::anyhow!("{:?}", e))? 141 | .pid; 142 | log::info!("pid: {}", pid); 143 | 144 | events.process_died(pid).notified().await; 145 | 146 | // TODO: get output 147 | } 148 | 149 | { 150 | let run = server::RunProcess { 151 | bin: "/bin/sleep".to_string(), 152 | args: vec!["10".to_string()], 153 | work_dir: "/".to_string(), 154 | stdout: None, 155 | stderr: None, 156 | }; 157 | log::info!("running {:?}", run); 158 | let pid = c 159 | .run_process(run) 160 | .await 161 | .map_err(|e| anyhow::anyhow!("{:?}", e))? 162 | .pid; 163 | log::info!("pid: {}", pid); 164 | 165 | let fut = c.kill_process(server::KillProcess { 166 | pid, 167 | signal: 0, // TODO 168 | }); 169 | std::mem::drop(fut); 170 | 171 | events.process_died(pid).notified().await; 172 | } 173 | 174 | c.shutdown().await.map_err(|e| anyhow::anyhow!("{:?}", e))?; 175 | 176 | Ok(()) 177 | } 178 | -------------------------------------------------------------------------------- /runtime/src/cpu.rs: -------------------------------------------------------------------------------- 1 | use std::convert::TryFrom; 2 | 3 | use raw_cpuid::CpuId; 4 | 5 | pub struct CpuInfo { 6 | pub model: CpuModel, 7 | pub capabilities: Vec, 8 | } 9 | 10 | impl CpuInfo { 11 | pub fn try_new() -> anyhow::Result { 12 | let info = raw_cpuid::CpuId::new(); 13 | let model = CpuModel::try_from(&info)?; 14 | let capabilities = cpu_features(&info)?; 15 | 16 | Ok(CpuInfo { 17 | model, 18 | capabilities, 19 | }) 20 | } 21 | } 22 | 23 | pub struct CpuModel { 24 | pub brand: String, 25 | pub vendor: String, 26 | pub stepping: u8, 27 | pub family: u16, 28 | pub model: u16, 29 | } 30 | 31 | impl<'a> TryFrom<&'a CpuId> for CpuModel { 32 | type Error = anyhow::Error; 33 | 34 | fn try_from(info: &'a CpuId) -> Result { 35 | let brand = info 36 | .get_processor_brand_string() 37 | .ok_or_else(|| anyhow::anyhow!("Unable to read CPU brand"))?; 38 | let vendor = info 39 | .get_vendor_info() 40 | .ok_or_else(|| anyhow::anyhow!("Unable to read CPU vendor info"))?; 41 | let features = info 42 | .get_feature_info() 43 | .ok_or_else(|| anyhow::anyhow!("Unable to read CPU features"))?; 44 | 45 | Ok(CpuModel { 46 | brand: brand.as_str().to_string(), 47 | vendor: vendor.to_string(), 48 | stepping: features.stepping_id(), 49 | family: (features.extended_family_id() as u16) + (features.family_id() as u16), 50 | model: ((features.extended_model_id() as u16) << 4) + (features.model_id() as u16), 51 | }) 52 | } 53 | } 54 | 55 | macro_rules! flags { 56 | ($cpu_info:ident, $(($has:ident, $lit:tt)),*) => {{ 57 | let mut results = Vec::new(); 58 | $(if ($cpu_info.$has()) { 59 | results.push(stringify!($lit).to_lowercase()); 60 | })* 61 | results 62 | }} 63 | } 64 | 65 | fn cpu_features(info: &CpuId) -> anyhow::Result> { 66 | let features = info 67 | .get_feature_info() 68 | .ok_or_else(|| anyhow::anyhow!("Unable to read CPU features"))?; 69 | let ext_features = info 70 | .get_extended_feature_info() 71 | .ok_or_else(|| anyhow::anyhow!("Unable to read extended CPU features"))?; 72 | 73 | let mut capabilities = flags!( 74 | features, 75 | (has_sse3, SSE3), 76 | (has_pclmulqdq, PCLMULQDQ), 77 | (has_ds_area, DTES64), 78 | (has_monitor_mwait, MONITOR), 79 | (has_cpl, DSCPL), 80 | (has_vmx, VMX), 81 | (has_smx, SMX), 82 | (has_eist, EIST), 83 | (has_tm2, TM2), 84 | (has_ssse3, SSSE3), 85 | (has_cnxtid, CNXTID), 86 | (has_fma, FMA), 87 | (has_cmpxchg16b, CMPXCHG16B), 88 | (has_pdcm, PDCM), 89 | (has_pcid, PCID), 90 | (has_dca, DCA), 91 | (has_sse41, SSE41), 92 | (has_sse42, SSE42), 93 | (has_x2apic, X2APIC), 94 | (has_movbe, MOVBE), 95 | (has_popcnt, POPCNT), 96 | (has_tsc_deadline, TSC_DEADLINE), 97 | (has_aesni, AESNI), 98 | (has_xsave, XSAVE), 99 | (has_oxsave, OSXSAVE), 100 | (has_avx, AVX), 101 | (has_f16c, F16C), 102 | (has_rdrand, RDRAND), 103 | (has_hypervisor, HYPERVISOR), 104 | (has_fpu, FPU), 105 | (has_vme, VME), 106 | (has_de, DE), 107 | (has_pse, PSE), 108 | (has_tsc, TSC), 109 | (has_msr, MSR), 110 | (has_pae, PAE), 111 | (has_mce, MCE), 112 | (has_cmpxchg8b, CX8), 113 | (has_apic, APIC), 114 | (has_sysenter_sysexit, SEP), 115 | (has_mtrr, MTRR), 116 | (has_pge, PGE), 117 | (has_mca, MCA), 118 | (has_cmov, CMOV), 119 | (has_pat, PAT), 120 | (has_pse36, PSE36), 121 | (has_psn, PSN), 122 | (has_clflush, CLFSH), 123 | (has_ds, DS), 124 | (has_acpi, ACPI), 125 | (has_mmx, MMX), 126 | (has_fxsave_fxstor, FXSR), 127 | (has_sse, SSE), 128 | (has_sse2, SSE2), 129 | (has_ss, SS), 130 | (has_htt, HTT), 131 | (has_tm, TM), 132 | (has_pbe, PBE) 133 | ); 134 | capabilities.extend(flags!( 135 | ext_features, 136 | (has_fsgsbase, FSGSBASE), 137 | (has_tsc_adjust_msr, ADJUST_MSR), 138 | (has_bmi1, BMI1), 139 | (has_hle, HLE), 140 | (has_avx2, AVX2), 141 | (has_fdp, FDP), 142 | (has_smep, SMEP), 143 | (has_bmi2, BMI2), 144 | (has_rep_movsb_stosb, REP_MOVSB_STOSB), 145 | (has_invpcid, INVPCID), 146 | (has_rtm, RTM), 147 | (has_rdtm, RDTM), 148 | (has_fpu_cs_ds_deprecated, DEPRECATE_FPU_CS_DS), 149 | (has_mpx, MPX), 150 | (has_rdta, RDTA), 151 | (has_rdseed, RDSEED), 152 | (has_adx, ADX), 153 | (has_smap, SMAP), 154 | (has_clflushopt, CLFLUSHOPT), 155 | (has_processor_trace, PROCESSOR_TRACE), 156 | (has_sha, SHA), 157 | (has_sgx, SGX), 158 | (has_avx512f, AVX512F), 159 | (has_avx512dq, AVX512DQ), 160 | (has_avx512_ifma, AVX512_IFMA), 161 | (has_avx512pf, AVX512PF), 162 | (has_avx512er, AVX512ER), 163 | (has_avx512cd, AVX512CD), 164 | (has_avx512bw, AVX512BW), 165 | (has_avx512vl, AVX512VL), 166 | (has_clwb, CLWB), 167 | (has_prefetchwt1, PREFETCHWT1), 168 | (has_umip, UMIP), 169 | (has_pku, PKU), 170 | (has_ospke, OSPKE), 171 | (has_rdpid, RDPID), 172 | (has_sgx_lc, SGX_LC) 173 | )); 174 | 175 | Ok(capabilities) 176 | } 177 | -------------------------------------------------------------------------------- /runtime/src/deploy.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | use std::io::SeekFrom; 3 | use std::path::PathBuf; 4 | 5 | use bollard_stubs::models::ContainerConfig; 6 | use crc::crc32; 7 | use serde::{Deserialize, Serialize}; 8 | use tokio::io::{AsyncRead, AsyncReadExt, AsyncSeek, AsyncSeekExt}; 9 | use tokio_byteorder::LittleEndian; 10 | use uuid::Uuid; 11 | 12 | use ya_client_model::activity::exe_script_command::VolumeMount; 13 | use ya_runtime_sdk::runtime_api::deploy::ContainerVolume; 14 | 15 | #[derive(Clone, Debug, Deserialize, Serialize)] 16 | pub struct DeploymentMount { 17 | pub name: String, 18 | pub guest_path: String, 19 | pub mount: VolumeMount, 20 | } 21 | 22 | #[derive(Clone, Debug, Default, Deserialize, Serialize)] 23 | pub struct Deployment { 24 | #[serde(default)] 25 | pub cpu_cores: usize, 26 | #[serde(default)] 27 | pub mem_mib: usize, 28 | #[serde(default)] 29 | pub task_packages: Vec, 30 | pub user: (u32, u32), 31 | pub volumes: Vec, 32 | pub mounts: Vec, 33 | pub hostname: String, 34 | pub config: ContainerConfig, 35 | } 36 | 37 | impl Deployment { 38 | pub async fn try_from_input( 39 | mut input: Input, 40 | cpu_cores: usize, 41 | mem_mib: usize, 42 | task_packages: &[PathBuf], 43 | volume_override: HashMap, 44 | hostname: String, 45 | ) -> Result 46 | where 47 | Input: AsyncRead + AsyncSeek + Unpin, 48 | { 49 | let json_len: u32 = { 50 | let mut buf = [0; 8]; 51 | input.seek(SeekFrom::End(-8)).await?; 52 | input.read_exact(&mut buf).await?; 53 | std::str::from_utf8(&buf)?.parse()? 54 | }; 55 | let crc: u32 = { 56 | let offset = 4 + json_len as i64 + 8; 57 | input.seek(SeekFrom::End(-offset)).await?; 58 | tokio_byteorder::AsyncReadBytesExt::read_u32::(&mut input).await? 59 | }; 60 | let json = { 61 | let mut buf = String::new(); 62 | let pos = -((json_len + 8) as i64); 63 | input.seek(SeekFrom::End(pos)).await?; 64 | input.take(json_len as u64).read_to_string(&mut buf).await?; 65 | buf 66 | }; 67 | if crc32::checksum_ieee(json.as_bytes()) != crc { 68 | return Err(anyhow::anyhow!("Invalid ContainerConfig crc32 sum")); 69 | } 70 | 71 | let config: ContainerConfig = serde_json::from_str(&json)?; 72 | 73 | let mut volumes = parse_volumes(config.volumes.as_ref()); 74 | 75 | // Host mount type is not permitted for rootfs 76 | for (path, mount) in &volume_override { 77 | if let VolumeMount::Host {} = mount { 78 | // catches `/` as well as `` and `///` etc. 79 | if path.bytes().all(|b| b == b'/') { 80 | return Err(anyhow::anyhow!( 81 | r#"Volume of type `host` specified for path="/""# 82 | )); 83 | } 84 | } 85 | } 86 | 87 | let mounts = volume_override 88 | .into_iter() 89 | .filter_map(|(path, vol_mount)| match vol_mount { 90 | VolumeMount::Host {} => { 91 | let volume_present = volumes.iter().any(|vol| vol.path == path); 92 | if !volume_present { 93 | volumes.push(ContainerVolume { 94 | name: format!("vol-{}", Uuid::new_v4()), 95 | path, 96 | }); 97 | } 98 | 99 | None 100 | } 101 | 102 | VolumeMount::Ram { .. } => { 103 | volumes.retain(|vol| vol.path != path); 104 | Some(DeploymentMount { 105 | name: format!("tmpfs-{}", Uuid::new_v4()), 106 | guest_path: path, 107 | mount: vol_mount, 108 | }) 109 | } 110 | 111 | VolumeMount::Storage { .. } => { 112 | volumes.retain(|vol| vol.path != path); 113 | Some(DeploymentMount { 114 | name: format!("vol-{}.img", Uuid::new_v4()), 115 | guest_path: path, 116 | mount: vol_mount, 117 | }) 118 | } 119 | }) 120 | .collect(); 121 | 122 | Ok(Deployment { 123 | cpu_cores, 124 | mem_mib, 125 | task_packages: task_packages.into(), 126 | user: parse_user(config.user.as_ref()).unwrap_or((0, 0)), 127 | volumes, 128 | mounts, 129 | hostname, 130 | config, 131 | }) 132 | } 133 | 134 | pub fn env(&self) -> Vec<&str> { 135 | self.config 136 | .env 137 | .as_ref() 138 | .map(|v| v.iter().map(|s| s.as_str()).collect()) 139 | .unwrap_or_default() 140 | } 141 | } 142 | 143 | fn parse_user(user: Option<&String>) -> anyhow::Result<(u32, u32)> { 144 | let user = user 145 | .map(|s| s.trim()) 146 | .ok_or_else(|| anyhow::anyhow!("User field missing"))?; 147 | let mut split = user.splitn(2, ':'); 148 | let uid: u32 = split 149 | .next() 150 | .ok_or_else(|| anyhow::anyhow!("Missing UID"))? 151 | .parse()?; 152 | let gid: u32 = split 153 | .next() 154 | .ok_or_else(|| anyhow::anyhow!("Missing GID"))? 155 | .parse()?; 156 | Ok((uid, gid)) 157 | } 158 | 159 | fn parse_volumes(volumes: Option<&HashMap>>) -> Vec { 160 | let volumes = match volumes { 161 | Some(v) => v, 162 | _ => return Vec::new(), 163 | }; 164 | volumes 165 | .keys() 166 | .map(|key| ContainerVolume { 167 | name: format!("vol-{}", Uuid::new_v4()), 168 | path: key.to_string(), 169 | }) 170 | .collect() 171 | } 172 | -------------------------------------------------------------------------------- /runtime/init-container/include/proto.h: -------------------------------------------------------------------------------- 1 | #ifndef _PROTO_H 2 | #define _PROTO_H 3 | 4 | #include 5 | 6 | #pragma pack(push, 1) 7 | 8 | /* 9 | * Host -> Guest 10 | * 11 | * - u64 message ID (non-zero), 12 | * - 1 byte type, 13 | * - stream of sub-messages ended with `SUB_MSG_END`; each sub-message 14 | * consists of 1 byte subtype subtype-specific body (described near each 15 | * subtype below) of types: 16 | * BYTES - unsigned 64bit length, followed by exactly that many bytes; 17 | * ARRAY - unsigned 64bit number of BYTES objects, that follow immediately 18 | * (encoded as described above); 19 | * u64 - unsigned 64bit number, 20 | * u32 - unsigned 32bit number. 21 | * 22 | * All numbers are encoded in little-endian format. 23 | */ 24 | 25 | /* 26 | * Guest -> Host 27 | * 28 | * Guest sends two types of messages - response and asynchronous notification: 29 | * - u64: message ID matching the request for response, 0 for notification, 30 | * - 1 byte type, followed by type-specific body. 31 | */ 32 | 33 | typedef uint64_t msg_id_t; 34 | 35 | struct msg_hdr { 36 | msg_id_t msg_id; 37 | uint8_t type; 38 | }; 39 | 40 | /* All the messages can respond with RESP_ERR in addition to what's listed 41 | * below. */ 42 | enum HOST_MSG_TYPE { 43 | /* Expected response: RESP_OK */ 44 | MSG_QUIT = 1, 45 | 46 | /* Expected response: RESP_OK_U64 - process ID. */ 47 | MSG_RUN_PROCESS, 48 | 49 | /* Expected response: RESP_OK */ 50 | MSG_KILL_PROCESS, 51 | 52 | /* Expected response: RESP_OK */ 53 | MSG_MOUNT_VOLUME, 54 | 55 | /* Expected response: RESP_OK */ 56 | MSG_UPLOAD_FILE, 57 | 58 | /* Expected response: RESP_OK_BYTES - chunk of process' output */ 59 | MSG_QUERY_OUTPUT, 60 | 61 | /* Expected response: RESP_OK */ 62 | MSG_PUT_INPUT, 63 | 64 | /* Expected response: RESP_OK */ 65 | MSG_SYNC_FS, 66 | 67 | /* Expected response: RESP_OK */ 68 | MSG_NET_CTL, 69 | 70 | /* Expected response: RESP_OK */ 71 | MSG_NET_HOST, 72 | }; 73 | 74 | enum SUB_MSG_QUIT_TYPE { 75 | /* End of sub-messages. */ 76 | SUB_MSG_QUIT_END = 0, 77 | }; 78 | 79 | /* All options except binary path and argv are optional. */ 80 | enum SUB_MSG_RUN_PROCESS_TYPE { 81 | /* End of sub-messages. */ 82 | SUB_MSG_RUN_PROCESS_END = 0, 83 | /* Binary path. (BYTES) */ 84 | SUB_MSG_RUN_PROCESS_BIN, 85 | /* Argv. (ARRAY) */ 86 | SUB_MSG_RUN_PROCESS_ARG, 87 | /* Environment variables. (ARRAY) */ 88 | SUB_MSG_RUN_PROCESS_ENV, 89 | /* Uid to run as. (u32) */ 90 | SUB_MSG_RUN_PROCESS_UID, 91 | /* Gid to run as. (u32) */ 92 | SUB_MSG_RUN_PROCESS_GID, 93 | /* Redirect a fd to the given path. (u32 + REDIRECT_FD_TYPE (1-byte) 94 | * + type specific data). */ 95 | SUB_MSG_RUN_PROCESS_RFD, 96 | /* Path to set as current working directory. (BYTES) */ 97 | SUB_MSG_RUN_PROCESS_CWD, 98 | /* This process is an entrypoint. (No body) */ 99 | SUB_MSG_RUN_PROCESS_ENT, 100 | }; 101 | 102 | enum SUB_MSG_KILL_PROCESS_TYPE { 103 | /* End of sub-messages. */ 104 | SUB_MSG_KILL_PROCESS_END = 0, 105 | /* ID of process. (u64) */ 106 | SUB_MSG_KILL_PROCESS_ID, 107 | }; 108 | 109 | enum SUB_MSG_MOUNT_VOLUME_TYPE { 110 | /* End of sub-messages. */ 111 | SUB_MSG_MOUNT_VOLUME_END = 0, 112 | /* Mount tag. (BYTES) */ 113 | SUB_MSG_MOUNT_VOLUME_TAG, 114 | /* Path to mount at. (BYTES) */ 115 | SUB_MSG_MOUNT_VOLUME_PATH, 116 | }; 117 | 118 | enum SUB_MSG_UPLOAD_FILE_TYPE { 119 | /* End of sub-messages. */ 120 | SUB_MSG_UPLOAD_FILE_END = 0, 121 | /* Path of the file. (BYTES) */ 122 | SUB_MSG_UPLOAD_FILE_PATH, 123 | /* Permissions of the file. (u32) */ 124 | SUB_MSG_UPLOAD_FILE_PERM, 125 | /* Owner (user) of the file. (u32) */ 126 | SUB_MSG_UPLOAD_FILE_USR, 127 | /* Owner (group) of the file. (u32) */ 128 | SUB_MSG_UPLOAD_FILE_GRP, 129 | /* Data to put into file. (BYTES) */ 130 | SUB_MSG_UPLOAD_FILE_DATA, 131 | }; 132 | 133 | enum SUB_MSG_QUERY_OUTPUT_TYPE { 134 | /* End of sub-messages. */ 135 | SUB_MSG_QUERY_OUTPUT_END = 0, 136 | /* ID of process. (u64) */ 137 | SUB_MSG_QUERY_OUTPUT_ID, 138 | /* File descriptor (u8) */ 139 | SUB_MSG_QUERY_OUTPUT_FD, 140 | /* Offset in output (default = 0). (u64) */ 141 | SUB_MSG_QUERY_OUTPUT_OFF, 142 | /* Requested length. (u64) */ 143 | SUB_MSG_QUERY_OUTPUT_LEN, 144 | }; 145 | 146 | enum SUB_MSG_PUT_INPUT_TYPE { 147 | /* End of sub-messages. */ 148 | SUB_MSG_PUT_INPUT_END = 0, 149 | /* ID of process. (u64) */ 150 | SUB_MSG_PUT_INPUT_ID, 151 | /* Data to put on process' stdin. (BYTES) */ 152 | SUB_MSG_PUT_INPUT_DATA, 153 | }; 154 | 155 | enum SUB_MSG_NET_CTL { 156 | /* End of sub-messages. */ 157 | SUB_MSG_NET_CTL_END = 0, 158 | /* Network control flags (u16) */ 159 | SUB_MSG_NET_CTL_FLAGS, 160 | /* Network address. (BYTES) */ 161 | SUB_MSG_NET_CTL_ADDR, 162 | /* Network mask. (BYTES) */ 163 | SUB_MSG_NET_CTL_MASK, 164 | /* Network gateway. (BYTES) */ 165 | SUB_MSG_NET_CTL_GATEWAY, 166 | /* Network interface address. (BYTES) */ 167 | SUB_MSG_NET_CTL_IF_ADDR, 168 | /* Network interface kind. (u16) */ 169 | SUB_MSG_NET_CTL_IF, 170 | }; 171 | 172 | enum SUB_MSG_NET_CTL_FLAGS { 173 | SUB_MSG_NET_CTL_FLAG_EMPTY = 0, 174 | SUB_MSG_NET_CTL_FLAG_ADD, 175 | }; 176 | 177 | enum SUB_MSG_NET_IF { 178 | SUB_MSG_NET_IF_VPN = 0, 179 | SUB_MSG_NET_IF_INET, 180 | }; 181 | 182 | enum SUB_MSG_NET_HOST { 183 | /* End of sub-messages. */ 184 | SUB_MSG_NET_HOST_END = 0, 185 | /* End of sub-messages. (BYTES, BYTES) */ 186 | SUB_MSG_NET_HOST_ENTRY, 187 | }; 188 | 189 | enum REDIRECT_FD_TYPE { 190 | /* Invalid type (useful only internally). */ 191 | REDIRECT_FD_INVALID = -1, 192 | /* Path to the file. (BYTES) */ 193 | REDIRECT_FD_FILE = 0, 194 | /* Buffer size. (u64) */ 195 | REDIRECT_FD_PIPE_BLOCKING, 196 | /* Buffer size. (u64) */ 197 | REDIRECT_FD_PIPE_CYCLIC, 198 | }; 199 | 200 | enum GUEST_MSG_TYPE { 201 | /* No body. */ 202 | RESP_OK = 0, 203 | /* Number. (u64) */ 204 | RESP_OK_U64, 205 | /* Bytes. (BYTES) */ 206 | RESP_OK_BYTES, 207 | /* Error code. (u32) */ 208 | RESP_ERR, 209 | /* ID of process and a file descriptor. (u64 + u32) */ 210 | NOTIFY_OUTPUT_AVAILABLE, 211 | /* ID of process and exit reason (two bytes). (u64 + u8 + u8) */ 212 | NOTIFY_PROCESS_DIED, 213 | }; 214 | 215 | #pragma pack(pop) 216 | 217 | #endif // _PROTO_H 218 | -------------------------------------------------------------------------------- /runtime/init-container/Makefile: -------------------------------------------------------------------------------- 1 | CC := musl-gcc 2 | CXX := /bin/false 3 | LIBSECCOMP_SUBMODULE ?= libseccomp 4 | NEW_ROOT := newroot 5 | # -MMD to create dependency files (*.d) on first compilation 6 | CFLAGS := -MMD -std=c11 -O2 -Wall -Wextra -Werror -fPIE -pie -Iinclude/ -Wmaybe-uninitialized -Iunpacked_headers/usr/include -I$(CURDIR)/$(LIBSECCOMP_SUBMODULE)/include '-DNEW_ROOT="$(NEW_ROOT)"' 7 | 8 | ifneq ($(DEBUG), "") 9 | CFLAGS += -DNDEBUG 10 | endif 11 | 12 | ifneq ($(findstring $(MAKEFLAGS),s),s) 13 | ifndef V 14 | QUIET_CC = @echo ' ' CC $@; 15 | endif 16 | endif 17 | 18 | KERNEL_VER ?= 5.10.29-0-virt 19 | KERNEL_URL ?= https://ya-runtime.s3-eu-west-1.amazonaws.com/vm/kernel/linux-virt-5.10.29-r0.apk 20 | KERNEL_SHA256 ?= f3f7ca3421c5232e260b2a8a741bbf72c21881006afcf790aa3bc938e2262719 21 | 22 | HEADERS_URL ?= https://ya-runtime.s3-eu-west-1.amazonaws.com/vm/kernel/linux-headers-5.7.8-r0.apk 23 | HEADERS_SHA256 ?= 0b0f0c30be30ff367f5d1eaee227e919efc4b7f442bc03b3190c6e0d1a165362 24 | 25 | BUSYBOX := busybox 26 | MKFS_NAME := mkfs.ext2 27 | 28 | UNPACKED_KERNEL := unpacked_kernel 29 | UNPACKED_HEADERS := unpacked_headers 30 | LIBURING_SUBMODULE ?= liburing 31 | SRC_DIR ?= src 32 | TEST_DIR ?= tests 33 | 34 | OBJECTS = $(addprefix $(SRC_DIR)/,init.o communication.o process_bookkeeping.o cyclic_buffer.o seccomp.o) 35 | OBJECTS_EXT = $(addprefix $(SRC_DIR)/,network.o) 36 | OBJECTS_EXT = $(addprefix $(SRC_DIR)/,network.o) 37 | 38 | # Add headers to object dependencies for conditional recompilation on header change 39 | SOURCES = $(wildcard $(SRC_DIR)/*.c) 40 | DEPS = $(SOURCES:%.c=%.d) 41 | -include $(DEPS) 42 | 43 | # Below are the steps performed by this Makefile: 44 | # - download the kernel && kernel headers apk packages 45 | # - verify checksums of the downloaded packages 46 | # - extract the contents of the downloaded packages 47 | # - copy vmlinuz-virt from the extracted kernel package 48 | # - create a build directory for initramfs 49 | # - copy required kernel modules from the extracted kernel package to initramfs build directory 50 | # - bundle initramfs.cpio.gz 51 | # - build liburing 52 | # - compile init (https://en.wikipedia.org/wiki/Init) from sources 53 | 54 | .DEFAULT_GOAL = all 55 | .PHONY: all 56 | all: vmlinuz-virt initramfs.cpio.gz 57 | 58 | $(SRC_DIR)/network.o: $(SRC_DIR)/network.c 59 | $(QUIET_CC)$(CC) $(CFLAGS) \ 60 | -I"$(CURDIR)/$(UNPACKED_HEADERS)/usr/include" \ 61 | -o $@ -c $< 62 | 63 | $(SRC_DIR)/seccomp.o: $(CURDIR)/$(LIBSECCOMP_SUBMODULE)/include/seccomp.h 64 | 65 | %.o: %.c 66 | $(QUIET_CC)$(CC) $(CFLAGS) -o $@ -c $< 67 | 68 | init: $(UNPACKED_HEADERS) $(OBJECTS) $(OBJECTS_EXT) $(CURDIR)/$(LIBSECCOMP_SUBMODULE)/src/.libs/libseccomp.a 69 | @echo init 70 | $(QUIET_CC)$(CC) $(CFLAGS) -static -o $@ $(wordlist 2, $(words $^), $^) 71 | @# default musl libs on some distros have debug symbols, lets strip them (and everything else) 72 | strip $@ 73 | 74 | $(UNPACKED_HEADERS): 75 | wget -q -O "headers" $(HEADERS_URL) 76 | echo $(HEADERS_SHA256) "headers" | sha256sum -c || (echo "Headers apk checksum verification failed!" && exit 1) 77 | $(RM) -rf $(UNPACKED_HEADERS) 78 | mkdir $(UNPACKED_HEADERS) 79 | tar --warning=no-unknown-keyword -C $(UNPACKED_HEADERS) -vxzf headers >/dev/null 80 | $(RM) headers 81 | 82 | $(UNPACKED_KERNEL): 83 | wget -q -O "kernel" $(KERNEL_URL) 84 | echo $(KERNEL_SHA256) "kernel" | sha256sum -c || (echo "Kernel apk checksum verification failed!" && exit 1) 85 | $(RM) -rf $(UNPACKED_KERNEL) 86 | mkdir $(UNPACKED_KERNEL) 87 | tar --warning=no-unknown-keyword -C $(UNPACKED_KERNEL) -vxzf kernel >/dev/null 88 | $(RM) kernel 89 | 90 | uring: $(UNPACKED_HEADERS) 91 | (cd $(LIBURING_SUBMODULE) && CC=$(CC) CXX=$(CXX) ./configure > /dev/null) 92 | $(MAKE) -e CC=$(CC) -e CFLAGS=-I"$(CURDIR)/$(UNPACKED_HEADERS)/usr/include" -C "$(LIBURING_SUBMODULE)/src" all 93 | 94 | mkfs: 95 | cd $(BUSYBOX); \ 96 | docker build -t busybox .; \ 97 | docker create --name busybox-tmp busybox; \ 98 | docker cp busybox-tmp:/busybox/busybox_MKFS_EXT2 $(MKFS_NAME); \ 99 | docker rm -f busybox-tmp 100 | 101 | SHELL := /bin/bash 102 | $(CURDIR)/$(LIBSECCOMP_SUBMODULE)/src/.libs/libseccomp.a $(CURDIR)/$(LIBSECCOMP_SUBMODULE)/include/seccomp.h: $(UNPACKED_HEADERS) $(LIBSECCOMP_SUBMODULE) 103 | set -euo pipefail; \ 104 | cd $(LIBSECCOMP_SUBMODULE); \ 105 | export CC=$(CC) CXX=$(CXX) CFLAGS=-I"$$PWD/../$(UNPACKED_HEADERS)/usr/include";\ 106 | ./autogen.sh; \ 107 | ./configure --disable-python;\ 108 | $(MAKE) all 109 | 110 | vmlinuz-virt: $(UNPACKED_KERNEL) 111 | cp $(UNPACKED_KERNEL)/boot/vmlinuz-virt . 112 | 113 | initramfs.cpio.gz: init mkfs $(UNPACKED_KERNEL) 114 | $(RM) -rf initramfs 115 | mkdir initramfs 116 | cp $< initramfs 117 | cp $(UNPACKED_KERNEL)/lib/modules/$(KERNEL_VER)/kernel/drivers/virtio/virtio.ko initramfs 118 | cp $(UNPACKED_KERNEL)/lib/modules/$(KERNEL_VER)/kernel/drivers/virtio/virtio_ring.ko initramfs 119 | cp $(UNPACKED_KERNEL)/lib/modules/$(KERNEL_VER)/kernel/drivers/virtio/virtio_pci.ko initramfs 120 | cp $(UNPACKED_KERNEL)/lib/modules/$(KERNEL_VER)/kernel/drivers/char/hw_random/rng-core.ko initramfs 121 | cp $(UNPACKED_KERNEL)/lib/modules/$(KERNEL_VER)/kernel/drivers/char/hw_random/virtio-rng.ko initramfs 122 | cp $(UNPACKED_KERNEL)/lib/modules/$(KERNEL_VER)/kernel/drivers/char/virtio_console.ko initramfs 123 | cp $(UNPACKED_KERNEL)/lib/modules/$(KERNEL_VER)/kernel/drivers/block/virtio_blk.ko initramfs 124 | cp $(UNPACKED_KERNEL)/lib/modules/$(KERNEL_VER)/kernel/drivers/net/tun.ko initramfs 125 | cp $(UNPACKED_KERNEL)/lib/modules/$(KERNEL_VER)/kernel/drivers/net/net_failover.ko initramfs 126 | cp $(UNPACKED_KERNEL)/lib/modules/$(KERNEL_VER)/kernel/drivers/net/virtio_net.ko initramfs 127 | cp $(UNPACKED_KERNEL)/lib/modules/$(KERNEL_VER)/kernel/fs/9p/9p.ko initramfs 128 | cp $(UNPACKED_KERNEL)/lib/modules/$(KERNEL_VER)/kernel/fs/mbcache.ko initramfs 129 | cp $(UNPACKED_KERNEL)/lib/modules/$(KERNEL_VER)/kernel/fs/ext2/ext2.ko initramfs 130 | cp $(UNPACKED_KERNEL)/lib/modules/$(KERNEL_VER)/kernel/fs/squashfs/squashfs.ko initramfs 131 | cp $(UNPACKED_KERNEL)/lib/modules/$(KERNEL_VER)/kernel/fs/overlayfs/overlay.ko initramfs 132 | cp $(UNPACKED_KERNEL)/lib/modules/$(KERNEL_VER)/kernel/fs/fscache/fscache.ko initramfs 133 | cp $(UNPACKED_KERNEL)/lib/modules/$(KERNEL_VER)/kernel/net/9p/9pnet.ko initramfs 134 | cp $(UNPACKED_KERNEL)/lib/modules/$(KERNEL_VER)/kernel/net/9p/9pnet_virtio.ko initramfs 135 | cp $(UNPACKED_KERNEL)/lib/modules/$(KERNEL_VER)/kernel/net/core/failover.ko initramfs 136 | cp $(UNPACKED_KERNEL)/lib/modules/$(KERNEL_VER)/kernel/net/ipv6/ipv6.ko initramfs 137 | cp $(UNPACKED_KERNEL)/lib/modules/$(KERNEL_VER)/kernel/net/packet/af_packet.ko initramfs 138 | cp $(UNPACKED_KERNEL)/lib/modules/5.10.29-0-virt/kernel/fs/fuse/fuse.ko initramfs 139 | 140 | cp $(BUSYBOX)/$(MKFS_NAME) initramfs 141 | mkdir initramfs/$(NEW_ROOT) 142 | set -euo pipefail; cd initramfs && find . | cpio --quiet -o -H newc -R 0:0 | gzip -9 > ../$@ 143 | $(RM) -rf initramfs 144 | 145 | TESTS_NAMES := cyclic_buffer 146 | TESTS := $(addprefix $(TEST_DIR)/,$(TESTS_NAMES)) 147 | 148 | $(TESTS): %: %.o $(addprefix $(SRC_DIR)/,cyclic_buffer.o) 149 | $(CC) $(CFLAGS) -static -o $@ $^ 150 | 151 | .PHONY: test 152 | test: $(TESTS) 153 | for t in $(TESTS) ; do \ 154 | $$t ; \ 155 | done 156 | 157 | .PHONY: clean 158 | clean: 159 | $(RM) init $(SRC_DIR)/*.o $(SRC_DIR)/*.d $(TEST_DIR)/*.o *.o $(TESTS) 160 | $(RM) vmlinuz-virt initramfs.cpio.gz 161 | $(MAKE) -s -C $(LIBURING_SUBMODULE) clean 162 | $(MAKE) -s -C $(LIBSECCOMP_SUBMODULE) clean 163 | 164 | .PHONY: distclean 165 | distclean: 166 | $(RM) -rf $(UNPACKED_KERNEL) $(UNPACKED_HEADERS) 167 | $(MAKE) -s -C $(LIBURING_SUBMODULE) clean 168 | git submodule foreach git reset --hard 169 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | on: 3 | push: 4 | tags: 5 | - v* 6 | - pre-rel-* 7 | 8 | env: 9 | self-test-img_tag: v0.1.4 10 | self-test-img_repository: golemfactory/ya-self-test-img 11 | rust_stable: 1.80.0 12 | 13 | jobs: 14 | create-release: 15 | name: "Create Release" 16 | runs-on: ubuntu-latest 17 | steps: 18 | - name: Create Release 19 | uses: actions/github-script@v3.0 20 | with: 21 | github-token: ${{secrets.GITHUB_TOKEN}} 22 | script: | 23 | let tag = context.payload.ref.replace(/.*\//, ''); 24 | let buildNo = context.runNumber; 25 | let versionName = tag.replace(/^pre-rel-/,''); 26 | try { 27 | let release = await github.request("GET /repos/:owner/:repo/releases/tags/:tag", { 28 | owner: context.repo.owner, 29 | repo: context.repo.repo, 30 | tag: tag 31 | }); 32 | } 33 | catch(e) { 34 | let body = ["TODO"].join("\n"); 35 | 36 | let release = await github.request("POST /repos/:owner/:repo/releases", { 37 | owner: context.repo.owner, 38 | repo: context.repo.repo, 39 | data: { 40 | tag_name: tag, 41 | prerelease: true, 42 | body: body, 43 | name: `${versionName} #${buildNo}` 44 | } 45 | }); 46 | console.log(release.data.upload_url); 47 | } 48 | build-init: 49 | name: Build container Init 50 | runs-on: ubuntu-22.04 51 | steps: 52 | - name: Install Musl 53 | run: sudo apt-get install -y musl-tools musl autoconf gperf libtool automake 54 | - uses: actions/checkout@v1 55 | - name: Make 56 | run: | 57 | musl-gcc -v 58 | git submodule init 59 | git submodule update 60 | cd runtime/init-container 61 | make 62 | - uses: actions/upload-artifact@v4 63 | with: 64 | name: init-container 65 | path: | 66 | runtime/init-container/initramfs.cpio.gz 67 | runtime/init-container/vmlinuz-virt 68 | 69 | build: 70 | name: Build Release 71 | needs: 72 | - create-release 73 | - build-init 74 | runs-on: ubuntu-latest 75 | env: 76 | OPENSSL_STATIC: 1 77 | steps: 78 | - uses: actions/checkout@v1 79 | with: 80 | lfs: true 81 | - uses: actions/download-artifact@v4 82 | with: 83 | name: init-container 84 | path: runtime/init-container/ 85 | - run: | 86 | ls -R 87 | test -f runtime/init-container/initramfs.cpio.gz 88 | - name: Download self-test image 89 | uses: robinraju/release-downloader@v1.8 90 | with: 91 | repository: ${{ env.self-test-img_repository }} 92 | tag: ${{ env.self-test-img_tag }} 93 | fileName: self-test.gvmi 94 | out-file-path: runtime/image/ 95 | tarBall: false 96 | zipBall: false 97 | - name: Install Musl 98 | run: | 99 | sudo apt-get install -y musl-tools musl 100 | - name: Get upload url 101 | id: release_upload_url 102 | uses: actions/github-script@0.9.0 103 | with: 104 | github-token: ${{secrets.GITHUB_TOKEN}} 105 | result-encoding: string 106 | script: | 107 | let tag = context.payload.ref.replace(/.*\//, ''); 108 | let release = await github.request("GET /repos/:owner/:repo/releases/tags/:tag", { 109 | owner: context.repo.owner, 110 | repo: context.repo.repo, 111 | tag: tag 112 | }); 113 | console.log(release.data.upload_url); 114 | return release.data.upload_url 115 | 116 | - name: Install Rust ${{ env.rust_stable }} 117 | uses: actions-rs/toolchain@v1 118 | with: 119 | toolchain: ${{ env.rust_stable }} 120 | target: x86_64-unknown-linux-musl 121 | override: true 122 | - name: Build 123 | run: | 124 | cd runtime && cargo build --release --target x86_64-unknown-linux-musl 125 | - name: Pack 126 | id: pack 127 | shell: bash 128 | env: 129 | GITHUB_REF: ${{ github.ref }} 130 | OS_NAME: linux 131 | run: | 132 | TAG_NAME="${GITHUB_REF##*/}" 133 | TARGET_DIR=releases/ya-runtime-vm-linux-${TAG_NAME} 134 | mkdir -p "$TARGET_DIR/ya-runtime-vm/runtime" 135 | strip "target/x86_64-unknown-linux-musl/release/ya-runtime-vm" 136 | set -x 137 | 138 | cp target/x86_64-unknown-linux-musl/release/ya-runtime-vm "$TARGET_DIR/ya-runtime-vm/" 139 | cp runtime/conf/ya-runtime-vm.json "$TARGET_DIR/" 140 | cp -r runtime/poc/runtime "$TARGET_DIR/ya-runtime-vm/" 141 | cp "runtime/image/self-test.gvmi" "$TARGET_DIR/ya-runtime-vm/runtime/" 142 | cp "runtime/init-container/initramfs.cpio.gz" "$TARGET_DIR/ya-runtime-vm/runtime/" 143 | cp "runtime/init-container/vmlinuz-virt" "$TARGET_DIR/ya-runtime-vm/runtime/" 144 | (cd releases && tar czvf "ya-runtime-vm-${OS_NAME}-${TAG_NAME}.tar.gz" "ya-runtime-vm-${OS_NAME}-${TAG_NAME}") 145 | echo "::set-output name=artifact::ya-runtime-vm-${OS_NAME}-${TAG_NAME}.tar.gz" 146 | echo "::set-output name=media::application/tar+gzip" 147 | - name: Upload 148 | run: echo todo upload ${{ steps.pack.outputs.artifact }} 149 | - name: Upload Release Asset 150 | id: upload-release-asset 151 | uses: actions/upload-release-asset@v1 152 | env: 153 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 154 | with: 155 | upload_url: ${{ steps.release_upload_url.outputs.result }} 156 | asset_path: ./releases/${{ steps.pack.outputs.artifact }} 157 | asset_name: ${{ steps.pack.outputs.artifact }} 158 | asset_content_type: ${{ steps.pack.outputs.media }} 159 | build-deb: 160 | name: Build Deb 161 | needs: 162 | - create-release 163 | - build-init 164 | runs-on: ubuntu-latest 165 | steps: 166 | - name: Get upload url 167 | id: release_upload_url 168 | uses: actions/github-script@0.9.0 169 | with: 170 | github-token: ${{secrets.GITHUB_TOKEN}} 171 | result-encoding: string 172 | script: | 173 | let tag = context.payload.ref.replace(/.*\//, ''); 174 | let release = await github.request("GET /repos/:owner/:repo/releases/tags/:tag", { 175 | owner: context.repo.owner, 176 | repo: context.repo.repo, 177 | tag: tag 178 | }); 179 | console.log(release.data.upload_url); 180 | return release.data.upload_url 181 | - name: Check out repository 182 | uses: actions/checkout@v2 183 | with: 184 | lfs: true 185 | - uses: actions/download-artifact@v4 186 | with: 187 | name: init-container 188 | path: runtime/init-container/ 189 | - run: | 190 | test -f runtime/init-container/initramfs.cpio.gz 191 | - name: Download self-test image 192 | uses: robinraju/release-downloader@v1.8 193 | with: 194 | repository: ${{ env.self-test-img_repository }} 195 | tag: ${{ env.self-test-img_tag }} 196 | fileName: self-test.gvmi 197 | out-file-path: runtime/image/ 198 | tarBall: false 199 | zipBall: false 200 | - name: Extract Version 201 | id: version 202 | shell: bash 203 | env: 204 | GITHUB_REF: ${{ github.ref }} 205 | run: | 206 | TAG_NAME="${GITHUB_REF##*/}" 207 | TAGV_NAME="${TAG_NAME#pre-rel-}" 208 | VERSION=${TAGV_NAME#v} 209 | echo "::set-output name=tagv::${TAG_NAME}" 210 | echo "::set-output name=version::${VERSION}" 211 | - uses: golemfactory/build-deb-action@v0.6 212 | id: deb 213 | with: 214 | debVersion: ${{ steps.version.outputs.version }} 215 | pkgName: ya-runtime-vm 216 | subdir: runtime 217 | - name: Upload Release Deb 218 | id: upload-release-asset-core 219 | uses: actions/upload-release-asset@v1 220 | env: 221 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 222 | with: 223 | upload_url: ${{ steps.release_upload_url.outputs.result }} 224 | asset_path: ${{ steps.deb.outputs.deb }} 225 | asset_name: ya-runtime-vm_${{ steps.version.outputs.tagv }}_amd64.deb 226 | asset_content_type: application/vnd.debian.binary-package 227 | -------------------------------------------------------------------------------- /runtime/init-container/tests/cyclic_buffer.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "cyclic_buffer.h" 7 | 8 | #define BUF_SIZE 0x1000 9 | 10 | struct test_setup { 11 | struct cyclic_buffer cb; 12 | int p_in[2]; 13 | int p_out[2]; 14 | char buf_in[BUF_SIZE]; 15 | char buf_out[BUF_SIZE]; 16 | } test_setup; 17 | 18 | static size_t min(size_t a, size_t b) { 19 | return a < b ? a : b; 20 | } 21 | 22 | static void assert_size_equal(ssize_t expected, ssize_t actual, char* msg) { 23 | if (expected != actual) { 24 | errx(2, "%s size did not match. expected: %zu, actual: %zu", msg, expected, actual); 25 | } 26 | } 27 | 28 | static void assert_buffers_match(struct test_setup* setup) { 29 | if (memcmp(setup->buf_in, setup->buf_out, BUF_SIZE) != 0) { 30 | err(3, "Output data does not match input buffer data"); 31 | } 32 | } 33 | 34 | static void check_cb_invariants(struct test_setup* setup, size_t expected_data_size) { 35 | struct cyclic_buffer* cb = &setup->cb; 36 | assert_size_equal(expected_data_size, cyclic_buffer_data_size(cb), "Data"); 37 | 38 | size_t expected_free_space = BUF_SIZE - expected_data_size; 39 | assert_size_equal(expected_free_space, cyclic_buffer_free_size(cb), "Free space"); 40 | } 41 | 42 | static ssize_t read_into_cb(int pipe, struct cyclic_buffer* cb, size_t size) { 43 | size_t free_space = cyclic_buffer_free_size(cb); 44 | size_t expected_read = min(free_space, size); 45 | char* begin = cb->begin; 46 | int ret = cyclic_buffer_read(pipe, cb, size); 47 | if (ret < 0) { 48 | errx(4, "'cyclic_buffer_read' failed with error code: %zd", ret); 49 | } 50 | 51 | assert_size_equal(expected_read, ret, "'cyclic_buffer_read' read"); 52 | 53 | if (begin != cb->begin) { 54 | err(5, "Start of available data pointer moved while reading into buffer."); 55 | } 56 | 57 | return ret; 58 | } 59 | 60 | static ssize_t pipe_to_cb(struct test_setup* setup, size_t size) { 61 | int ret = write(setup->p_in[1], setup->buf_in, size); 62 | if (ret < 0) { 63 | errx(6, "Write to pipe failed with error code: %zd", ret); 64 | } 65 | 66 | assert_size_equal(size, ret, "Pipe write"); 67 | 68 | size_t read_size = read_into_cb(setup->p_in[0], &setup->cb, size); 69 | 70 | return read_size; 71 | } 72 | 73 | static ssize_t write_from_cb(int pipe, struct cyclic_buffer* cb, size_t size) { 74 | size_t available_data = cyclic_buffer_data_size(cb); 75 | size_t expected_write = min(available_data, size); 76 | char* end = cb->end; 77 | int ret = cyclic_buffer_write(pipe, cb, size); 78 | if (ret < 0) { 79 | errx(8, "'cyclic_buffer_write' failed with error code: %zd", ret); 80 | } 81 | 82 | assert_size_equal(expected_write, ret, "'cyclic_buffer_write' write"); 83 | 84 | if (end != cb->end && (cb->begin != cb->end || cb->begin != cb->buf)) { 85 | err(9, "End of available data pointer moved while writing from buffer."); 86 | } 87 | 88 | return ret; 89 | } 90 | 91 | static ssize_t pipe_from_cb(struct test_setup* setup, size_t size) { 92 | ssize_t write_size = write_from_cb(setup->p_out[1], &setup->cb, size); 93 | if (write_size > 0) { 94 | ssize_t ret = read(setup->p_out[0], setup->buf_out, write_size); 95 | if (ret < 0) { 96 | errx(10, "Read from pipe failed with error code: %zd", ret); 97 | } 98 | 99 | assert_size_equal(write_size, ret, "Pipe read"); 100 | } 101 | 102 | return write_size; 103 | } 104 | 105 | static void close_pipe(int p[2]) { 106 | close(p[1]); 107 | close(p[0]); 108 | } 109 | 110 | static void run_test(char* test_name, void (*test_block)(struct test_setup*)) { 111 | printf("Running test: %s ", test_name); 112 | struct test_setup setup; 113 | if (cyclic_buffer_init(&setup.cb, BUF_SIZE) < 0) { 114 | err(42, "'cyclic_buffer_init' failed"); 115 | } 116 | if (pipe(setup.p_in) < 0) { 117 | err(42, "pipe in"); 118 | } 119 | if (pipe(setup.p_out) < 0) { 120 | err(42, "pipe out"); 121 | } 122 | memset(setup.buf_in, 0, BUF_SIZE); 123 | memset(setup.buf_out, 0, BUF_SIZE); 124 | (*test_block)(&setup); 125 | close_pipe(setup.p_in); 126 | close_pipe(setup.p_out); 127 | if (cyclic_buffer_deinit(&setup.cb) != 0) { 128 | err(42, "'cyclic_buffer_deinit' failed"); 129 | } 130 | printf("... PASSED\n"); 131 | } 132 | 133 | void test_empty_buffer(struct test_setup* setup) { 134 | check_cb_invariants(setup, 0); 135 | assert_size_equal(0, pipe_from_cb(setup, BUF_SIZE), "Read"); 136 | check_cb_invariants(setup, 0); 137 | } 138 | 139 | void test_full_buffer(struct test_setup* setup) { 140 | memset(setup->buf_in, 'a', BUF_SIZE); 141 | assert_size_equal(BUF_SIZE, pipe_to_cb(setup, BUF_SIZE), "Write"); 142 | check_cb_invariants(setup, BUF_SIZE); 143 | 144 | assert_size_equal(0, pipe_to_cb(setup, BUF_SIZE), "Write"); 145 | check_cb_invariants(setup, BUF_SIZE); 146 | } 147 | 148 | void test_more_data_in_pipe_than_capacity(struct test_setup* setup) { 149 | memset(setup->buf_in, 'a', BUF_SIZE); 150 | assert_size_equal(BUF_SIZE, pipe_to_cb(setup, BUF_SIZE), "Write"); 151 | assert_size_equal(0, pipe_to_cb(setup, BUF_SIZE), "Write"); 152 | check_cb_invariants(setup, BUF_SIZE); 153 | 154 | assert_size_equal(42, pipe_from_cb(setup, 42), "Read"); 155 | check_cb_invariants(setup, BUF_SIZE - 42); 156 | memset(setup->buf_in, 0, BUF_SIZE); 157 | memset(setup->buf_in, 'a', 42); 158 | assert_buffers_match(setup); 159 | 160 | assert_size_equal(42, read_into_cb(setup->p_in[0], &setup->cb, BUF_SIZE), "Read remaining from pipe"); 161 | check_cb_invariants(setup, BUF_SIZE); 162 | assert_size_equal(BUF_SIZE, pipe_from_cb(setup, BUF_SIZE), "Read"); 163 | check_cb_invariants(setup, 0); 164 | memset(setup->buf_in, 'a', BUF_SIZE); 165 | assert_buffers_match(setup); 166 | 167 | memset(setup->buf_out, 0, BUF_SIZE); 168 | assert_size_equal(BUF_SIZE - 42, read_into_cb(setup->p_in[0], &setup->cb, BUF_SIZE - 42), "Read remaining from pipe"); 169 | check_cb_invariants(setup, BUF_SIZE - 42); 170 | assert_size_equal(BUF_SIZE - 42, pipe_from_cb(setup, BUF_SIZE - 42), "Read"); 171 | memset(setup->buf_in, 0, BUF_SIZE); 172 | memset(setup->buf_in, 'a', BUF_SIZE - 42); 173 | assert_buffers_match(setup); 174 | } 175 | 176 | void test_buffer_with_some_data(struct test_setup* setup) { 177 | memset(setup->buf_in, 'a', 7); 178 | assert_size_equal(7, pipe_to_cb(setup, 7), "Write"); 179 | check_cb_invariants(setup, 7); 180 | assert_size_equal(7, pipe_from_cb(setup, BUF_SIZE), "Read"); 181 | check_cb_invariants(setup, 0); 182 | assert_size_equal(0, pipe_from_cb(setup, BUF_SIZE), "Read"); 183 | check_cb_invariants(setup, 0); 184 | assert_buffers_match(setup); 185 | } 186 | 187 | void test_buffer_never_empty(struct test_setup* setup) { 188 | memset(setup->buf_in, 'a', BUF_SIZE); 189 | assert_size_equal(BUF_SIZE, pipe_to_cb(setup, BUF_SIZE), "Write"); 190 | check_cb_invariants(setup, BUF_SIZE); 191 | assert_size_equal(BUF_SIZE / 2 - 42, pipe_from_cb(setup, BUF_SIZE / 2 - 42), "Read"); 192 | check_cb_invariants(setup, BUF_SIZE / 2 + 42); 193 | memset(setup->buf_in, 0, BUF_SIZE); 194 | memset(setup->buf_in, 'a', BUF_SIZE / 2 - 42); 195 | assert_buffers_match(setup); 196 | assert_size_equal(BUF_SIZE / 2, pipe_from_cb(setup, BUF_SIZE / 2), "Read"); 197 | check_cb_invariants(setup, 42); 198 | memset(setup->buf_in, 'a', BUF_SIZE / 2); 199 | assert_buffers_match(setup); 200 | // run for 24 rounds, this should force crossing the boundary at different points in the data 201 | int batch_size = BUF_SIZE - BUF_SIZE / 24; 202 | for(int i = 1; i < 25; i++) { 203 | memset(setup->buf_in, 'a' + i, batch_size); 204 | assert_size_equal(batch_size, pipe_to_cb(setup, batch_size), "Batch write"); 205 | check_cb_invariants(setup, batch_size + 42); 206 | assert_size_equal(batch_size, pipe_from_cb(setup, batch_size), "Batch read"); 207 | check_cb_invariants(setup, 42); 208 | memset(setup->buf_in, 'a' + i - 1, 42); // the first 42 characters are from the previous batch 209 | assert_buffers_match(setup); 210 | } 211 | } 212 | 213 | int main(void) { 214 | setbuf(stdin, NULL); 215 | setbuf(stdout, NULL); 216 | setbuf(stderr, NULL); 217 | 218 | run_test("emtpy buffer", test_empty_buffer); 219 | run_test("full buffer", test_full_buffer); 220 | run_test("buffer with some data", test_buffer_with_some_data); 221 | run_test("buffer never empty, pointer going around the boundary", test_buffer_never_empty); 222 | run_test("more data in pipe than capacity", test_more_data_in_pipe_than_capacity); 223 | 224 | puts("Test OK"); 225 | return 0; 226 | } 227 | -------------------------------------------------------------------------------- /runtime/init-container/src/network.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #include "network.h" 18 | 19 | static unsigned int alias_counter = 0; 20 | 21 | struct ifreq6_stub { 22 | struct in6_addr addr; 23 | uint32_t prefixlen; 24 | int32_t ifindex; 25 | }; 26 | 27 | int parse_prefix_len(const char *ip) { 28 | char *cp; 29 | if ((cp = strchr(ip, '/'))) { 30 | return atol(cp + 1); 31 | } 32 | return -1; 33 | } 34 | 35 | int net_if_alias(struct ifreq *ifr, const char *name) { 36 | const int suffix_len = 5; 37 | if (strlen(name) >= sizeof(ifr->ifr_name) - suffix_len) { 38 | return -1; 39 | } 40 | snprintf(ifr->ifr_name, sizeof(ifr->ifr_name) - 1, 41 | "%s:%d", name, ++alias_counter); 42 | return 0; 43 | } 44 | 45 | int net_create_lo(const char *name) { 46 | struct ifreq ifr; 47 | int fd, ret; 48 | 49 | if ((fd = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP)) < 0) { 50 | return fd; 51 | } 52 | 53 | memset(&ifr, 0, sizeof(ifr)); 54 | strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name) - 1); 55 | ifr.ifr_flags = IFF_LOOPBACK | IFF_UP; 56 | 57 | if ((ret = ioctl(fd, SIOCGIFFLAGS, &ifr)) < 0) { 58 | goto end; 59 | } 60 | end: 61 | close(fd); 62 | return ret; 63 | } 64 | 65 | int net_create_tap(char *name) { 66 | struct ifreq ifr; 67 | int fd, ret; 68 | 69 | if ((fd = open("/dev/net/tun", O_RDWR)) < 0) { 70 | return fd; 71 | } 72 | 73 | memset(&ifr, 0, sizeof(ifr)); 74 | ifr.ifr_flags = IFF_TAP | IFF_NO_PI; 75 | 76 | if (*name) { 77 | strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name) - 1); 78 | } 79 | 80 | if ((ret = ioctl(fd, TUNSETIFF, &ifr)) < 0) { 81 | goto err; 82 | } 83 | 84 | strcpy(name, ifr.ifr_name); 85 | return fd; 86 | err: 87 | close(fd); 88 | return ret; 89 | } 90 | 91 | int net_if_up(const char *name, int up) { 92 | struct ifreq ifr; 93 | int fd, ret; 94 | 95 | if ((fd = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP)) < 0) { 96 | return fd; 97 | } 98 | 99 | memset(&ifr, 0, sizeof(ifr)); 100 | strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name) - 1); 101 | 102 | if (up) { 103 | ifr.ifr_flags |= IFF_UP; 104 | } else { 105 | ifr.ifr_flags &= ~IFF_UP; 106 | } 107 | 108 | if ((ret = ioctl(fd, SIOCSIFFLAGS, &ifr)) < 0) { 109 | goto end; 110 | } 111 | end: 112 | close(fd); 113 | return ret; 114 | } 115 | 116 | int net_if_mtu(const char *name, int mtu) { 117 | struct ifreq ifr; 118 | int fd, ret; 119 | 120 | if ((fd = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP)) < 0) { 121 | return fd; 122 | } 123 | 124 | memset(&ifr, 0, sizeof(ifr)); 125 | strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name) - 1); 126 | 127 | ifr.ifr_addr.sa_family = AF_INET; 128 | ifr.ifr_mtu = mtu; 129 | if ((ret = ioctl(fd, SIOCSIFMTU, &ifr)) < 0) { 130 | goto end; 131 | } 132 | end: 133 | close(fd); 134 | return ret; 135 | } 136 | 137 | int net_if_addr(const char *name, const char *ip, const char *mask) { 138 | struct ifreq ifr; 139 | int fd, ret; 140 | 141 | if ((fd = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP)) < 0) { 142 | return fd; 143 | } 144 | 145 | memset(&ifr, 0, sizeof(ifr)); 146 | strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name) - 1); 147 | 148 | if (ioctl(fd, SIOCGIFADDR, &ifr) == 0) { 149 | if ((ret = net_if_alias(&ifr, name)) < 0) { 150 | goto end; 151 | } 152 | } 153 | 154 | struct sockaddr_in* sa = (struct sockaddr_in*) &ifr.ifr_addr; 155 | sa->sin_family = AF_INET; 156 | 157 | if ((ret = inet_pton(AF_INET, ip, &sa->sin_addr)) < 0) { 158 | goto end; 159 | } 160 | if ((ret = ioctl(fd, SIOCSIFADDR, &ifr)) < 0) { 161 | goto end; 162 | } 163 | if ((ret = inet_pton(AF_INET, mask, &sa->sin_addr)) < 0) { 164 | goto end; 165 | } 166 | if ((ret = ioctl(fd, SIOCSIFNETMASK, &ifr)) < 0) { 167 | goto end; 168 | } 169 | 170 | ifr.ifr_flags = IFF_UP; 171 | ret = ioctl(fd, SIOCSIFFLAGS, &ifr); 172 | 173 | end: 174 | close(fd); 175 | return ret; 176 | } 177 | 178 | int net_if_addr6(const char *name, const char *ip6) { 179 | struct ifreq ifr; 180 | struct ifreq6_stub ifr6; 181 | int fd, ret, pl; 182 | 183 | if ((fd = socket(PF_INET6, SOCK_DGRAM, IPPROTO_IP)) < 0) { 184 | return fd; 185 | } 186 | 187 | memset(&ifr, 0, sizeof(ifr)); 188 | memset(&ifr6, 0, sizeof(ifr6)); 189 | 190 | strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name) - 1); 191 | if ((ret = ioctl(fd, SIOGIFINDEX, &ifr)) < 0) { 192 | goto end; 193 | } 194 | 195 | if (ioctl(fd, SIOCGIFADDR, &ifr) == 0) { 196 | if ((ret = net_if_alias(&ifr, name)) < 0) { 197 | goto end; 198 | } 199 | } 200 | 201 | if ((pl = parse_prefix_len(ip6)) < 0) { 202 | pl = 128; 203 | } 204 | 205 | ifr6.ifindex = ifr.ifr_ifindex; 206 | ifr6.prefixlen = pl; 207 | 208 | if ((ret = inet_pton(AF_INET6, ip6, &ifr6.addr)) < 0) { 209 | goto end; 210 | } 211 | if ((ret = ioctl(fd, SIOCSIFADDR, &ifr6)) < 0) { 212 | goto end; 213 | } 214 | 215 | ifr.ifr_flags |= IFF_UP; 216 | ret = ioctl(fd, SIOCSIFFLAGS, &ifr); 217 | 218 | end: 219 | close(fd); 220 | return ret; 221 | } 222 | 223 | int net_if_addr_to_hw_addr(const char *ip, char *mac) { 224 | struct ifreq ifr; 225 | int ret; 226 | 227 | struct sockaddr_in* sa = (struct sockaddr_in*) &ifr.ifr_addr; 228 | sa->sin_family = AF_INET; 229 | 230 | if ((ret = inet_pton(AF_INET, ip, &sa->sin_addr)) < 0) { 231 | goto end; 232 | } 233 | 234 | const char *p = (void *) &sa->sin_addr; 235 | 236 | mac[0] = 0x90; 237 | mac[1] = 0x13; 238 | mac[2] = p[0]; 239 | mac[3] = p[1]; 240 | mac[4] = p[2]; 241 | mac[5] = p[3]; 242 | 243 | end: 244 | return ret; 245 | } 246 | 247 | 248 | int net_if_addr6_to_hw_addr(const char *ip, char *mac) { 249 | struct ifreq6_stub ifr6; 250 | int ret; 251 | 252 | if ((ret = inet_pton(AF_INET6, ip, &ifr6.addr)) < 0) { 253 | goto end; 254 | } 255 | 256 | const char *p = (char *)(void *) &ifr6.addr + 12; 257 | 258 | mac[0] = 0x90; 259 | mac[1] = 0x13; 260 | mac[2] = p[0]; 261 | mac[3] = p[1]; 262 | mac[4] = p[2]; 263 | mac[5] = p[3]; 264 | 265 | end: 266 | return ret; 267 | } 268 | 269 | int net_if_hw_addr(const char *name, const char mac[6]) { 270 | struct ifreq ifr; 271 | int fd; 272 | 273 | if ((fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL))) < 0) { 274 | return fd; 275 | } 276 | 277 | ifr.ifr_hwaddr.sa_family = ARPHRD_ETHER; 278 | memcpy(ifr.ifr_hwaddr.sa_data, mac, 6); 279 | 280 | strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name) - 1); 281 | const int ret = ioctl(fd, SIOCSIFHWADDR, &ifr); 282 | 283 | close(fd); 284 | return ret; 285 | } 286 | 287 | int net_route(const char *name, const char *ip, const char *mask, const char *via) { 288 | struct rtentry rt; 289 | int fd, ret; 290 | 291 | if ((fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_IP)) < 0) { 292 | return -1; 293 | } 294 | 295 | memset(&rt, 0, sizeof(rt)); 296 | 297 | rt.rt_flags |= RTF_UP | RTF_GATEWAY; 298 | rt.rt_dev = malloc(strlen(name) + 1); 299 | if (!rt.rt_dev) { 300 | ret = -ENOMEM; 301 | goto end; 302 | } 303 | memcpy(rt.rt_dev, name, strlen(name) + 1); 304 | 305 | struct sockaddr_in *addr = (struct sockaddr_in *) &rt.rt_gateway; 306 | addr->sin_family = AF_INET; 307 | addr->sin_addr.s_addr = inet_addr(via); 308 | 309 | addr = (struct sockaddr_in*) &rt.rt_dst; 310 | addr->sin_family = AF_INET; 311 | 312 | if (!ip) { 313 | addr->sin_addr.s_addr = INADDR_ANY; 314 | rt.rt_metric = 0; 315 | } else { 316 | addr->sin_addr.s_addr = inet_addr(ip); 317 | rt.rt_metric = 101; 318 | } 319 | 320 | addr = (struct sockaddr_in *) &rt.rt_genmask; 321 | addr->sin_family = AF_INET; 322 | 323 | if (!mask) { 324 | addr->sin_addr.s_addr = INADDR_ANY; 325 | } else { 326 | addr->sin_addr.s_addr = inet_addr(mask); 327 | } 328 | 329 | ret = ioctl(fd, SIOCADDRT, (void *) &rt); 330 | 331 | end: 332 | if (rt.rt_dev) free(rt.rt_dev); 333 | close(fd); 334 | return ret; 335 | } 336 | 337 | int net_route6(const char *name, const char *ip6, const char *via) { 338 | struct ifreq ifr; 339 | struct in6_rtmsg rt; 340 | int fd, pl, ret; 341 | 342 | if ((fd = socket(AF_INET6, SOCK_DGRAM, 0)) < 0) { 343 | return -1; 344 | } 345 | strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name) - 1); 346 | if ((ret = ioctl(fd, SIOGIFINDEX, &ifr)) < 0) { 347 | goto end; 348 | } 349 | 350 | memset(&rt, 0, sizeof(rt)); 351 | 352 | if (!ip6) { 353 | ip6 = "0:0:0:0:0:0:0:0"; 354 | } 355 | 356 | if ((pl = parse_prefix_len(ip6)) < 0) { 357 | pl = 128; 358 | } 359 | 360 | rt.rtmsg_dst_len = pl; 361 | rt.rtmsg_metric = 101; 362 | rt.rtmsg_ifindex = ifr.ifr_ifindex; 363 | rt.rtmsg_flags |= RTF_UP | RTF_GATEWAY; 364 | 365 | if ((ret = inet_pton(AF_INET6, via, &rt.rtmsg_gateway)) < 0) { 366 | goto end; 367 | } 368 | 369 | if ((ret = inet_pton(AF_INET6, ip6, &rt.rtmsg_dst)) < 0) { 370 | goto end; 371 | } 372 | 373 | ret = ioctl(fd, SIOCADDRT, (void *) &rt); 374 | 375 | end: 376 | close(fd); 377 | return ret; 378 | } 379 | -------------------------------------------------------------------------------- /runtime/examples/direct.rs: -------------------------------------------------------------------------------- 1 | use futures::FutureExt; 2 | use std::path::{Path, PathBuf}; 3 | use std::{ 4 | env, 5 | io::{self, prelude::*}, 6 | process::Stdio, 7 | sync::Arc, 8 | }; 9 | use tokio::{ 10 | process::{Child, Command}, 11 | sync, 12 | }; 13 | use ya_runtime_vm::guest_agent_comm::{GuestAgent, Notification, RedirectFdType}; 14 | 15 | struct Notifications { 16 | process_died: sync::Notify, 17 | output_available: sync::Notify, 18 | } 19 | 20 | impl Notifications { 21 | fn new() -> Self { 22 | Notifications { 23 | process_died: sync::Notify::new(), 24 | output_available: sync::Notify::new(), 25 | } 26 | } 27 | 28 | fn handle(&self, notification: Notification) { 29 | match notification { 30 | Notification::OutputAvailable { id, fd } => { 31 | println!("Process {} has output available on fd {}", id, fd); 32 | self.output_available.notify_waiters(); 33 | } 34 | Notification::ProcessDied { id, reason } => { 35 | println!("Process {} died with {:?}", id, reason); 36 | self.process_died.notify_waiters(); 37 | } 38 | } 39 | } 40 | } 41 | 42 | async fn run_process_with_output( 43 | ga: &mut GuestAgent, 44 | notifications: &Notifications, 45 | bin: &str, 46 | argv: &[&str], 47 | ) -> io::Result<()> { 48 | let id = ga 49 | .run_process( 50 | bin, 51 | argv, 52 | None, 53 | 0, 54 | 0, 55 | &[ 56 | None, 57 | Some(RedirectFdType::RedirectFdPipeBlocking(0x1000)), 58 | Some(RedirectFdType::RedirectFdPipeBlocking(0x1000)), 59 | ], 60 | None, 61 | ) 62 | .await? 63 | .expect("Run process failed"); 64 | println!("Spawned process with id: {}", id); 65 | notifications.process_died.notified().await; 66 | match ga.query_output(id, 1, 0, u64::MAX).await? { 67 | Ok(out) => { 68 | println!("Output:"); 69 | io::stdout().write_all(&out)?; 70 | } 71 | Err(code) => println!("Output query failed with: {}", code), 72 | } 73 | Ok(()) 74 | } 75 | 76 | fn get_project_dir() -> PathBuf { 77 | PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()) 78 | .canonicalize() 79 | .unwrap() 80 | } 81 | 82 | fn get_root_dir() -> PathBuf { 83 | get_project_dir().join("..").canonicalize().unwrap() 84 | } 85 | 86 | fn join_as_string>(path: P, file: impl ToString) -> String { 87 | path.as_ref() 88 | .join(file.to_string()) 89 | .canonicalize() 90 | .unwrap() 91 | .display() 92 | .to_string() 93 | } 94 | 95 | fn spawn_vm<'a, P: AsRef>(temp_path: P, mount_args: &'a [(&'a str, impl ToString)]) -> Child { 96 | let root_dir = get_root_dir(); 97 | let project_dir = get_project_dir(); 98 | let init_dir = project_dir.join("init-container"); 99 | 100 | let mut cmd = Command::new("qemu-system-x86_64"); 101 | cmd.current_dir(&init_dir).args([ 102 | "-m", 103 | "256m", 104 | "-nographic", 105 | "-vga", 106 | "none", 107 | "-kernel", 108 | join_as_string(&init_dir, "vmlinuz-virt").as_str(), 109 | "-initrd", 110 | join_as_string(&init_dir, "initramfs.cpio.gz").as_str(), 111 | "-no-reboot", 112 | "-net", 113 | "none", 114 | "-smp", 115 | "1", 116 | "-append", 117 | "console=ttyS0 panic=1", 118 | "-device", 119 | "virtio-serial", 120 | "-device", 121 | "virtio-rng-pci", 122 | "-chardev", 123 | format!( 124 | "socket,path={},server=true,wait=false,id=manager_cdev", 125 | temp_path.as_ref().join("manager.sock").display() 126 | ) 127 | .as_str(), 128 | "-device", 129 | "virtserialport,chardev=manager_cdev,name=manager_port", 130 | "-drive", 131 | format!( 132 | "file={},cache=none,readonly=on,format=raw,if=virtio", 133 | root_dir.join("squashfs_drive").display() 134 | ) 135 | .as_str(), 136 | ]); 137 | for (tag, path) in mount_args.iter() { 138 | cmd.args([ 139 | "-virtfs", 140 | &format!( 141 | "local,id={tag},path={path},security_model=none,mount_tag={tag}", 142 | tag = tag, 143 | path = path.to_string() 144 | ), 145 | ]); 146 | } 147 | cmd.stdin(Stdio::null()); 148 | cmd.spawn().expect("failed to spawn VM") 149 | } 150 | 151 | #[tokio::main] 152 | async fn main() -> io::Result<()> { 153 | let temp_dir = tempdir::TempDir::new("ya-vm-direct").expect("Failed to create temp dir"); 154 | let temp_path = temp_dir.path(); 155 | let inner_path = temp_path.join("inner"); 156 | 157 | std::fs::create_dir_all(&inner_path).expect("Failed to create a dir inside temp dir"); 158 | 159 | let notifications = Arc::new(Notifications::new()); 160 | let mount_args = [ 161 | ("tag0", temp_path.display()), 162 | ("tag1", inner_path.display()), 163 | ]; 164 | let mut child = spawn_vm(temp_path, &mount_args); 165 | 166 | let ns = notifications.clone(); 167 | let ga_mutex = GuestAgent::connected(temp_path.join("manager.sock"), 10, move |n, _g| { 168 | let notifications = ns.clone(); 169 | async move { notifications.clone().handle(n) }.boxed() 170 | }) 171 | .await?; 172 | let mut ga = ga_mutex.lock().await; 173 | 174 | let no_redir = [None, None, None]; 175 | 176 | for (i, (tag, _)) in mount_args.iter().enumerate() { 177 | ga.mount(tag, &format!("/mnt/mnt{}/{}", i, tag)) 178 | .await? 179 | .expect("Mount failed"); 180 | } 181 | 182 | let id = ga 183 | .run_process( 184 | "/bin/ls", 185 | &["ls", "-al", "."], 186 | None, 187 | 0, 188 | 0, 189 | &no_redir, 190 | Some("/mnt"), 191 | ) 192 | .await? 193 | .expect("Run process failed"); 194 | println!("Spawned process with id: {}", id); 195 | notifications.process_died.notified().await; 196 | let out = ga 197 | .query_output(id, 1, 0, u64::MAX) 198 | .await? 199 | .expect("Output query failed"); 200 | println!("Output:"); 201 | io::stdout().write_all(&out)?; 202 | 203 | run_process_with_output( 204 | &mut ga, 205 | ¬ifications, 206 | "/bin/ls", 207 | &["ls", "-al", "/mnt/mnt1/tag1"], 208 | ) 209 | .await?; 210 | 211 | run_process_with_output(&mut ga, ¬ifications, "/bin/mount", &["mount"]).await?; 212 | 213 | let fds = [ 214 | None, 215 | Some(RedirectFdType::RedirectFdFile( 216 | "/mnt/mnt1/tag1/write_test".as_bytes(), 217 | )), 218 | None, 219 | ]; 220 | let id = ga 221 | .run_process("/bin/echo", &["echo", "WRITE TEST"], None, 0, 0, &fds, None) 222 | .await? 223 | .expect("Run process failed"); 224 | println!("Spawned process with id: {}", id); 225 | notifications.process_died.notified().await; 226 | 227 | run_process_with_output( 228 | &mut ga, 229 | ¬ifications, 230 | "/bin/cat", 231 | &["cat", "/mnt/mnt1/tag1/write_test"], 232 | ) 233 | .await?; 234 | 235 | let id = ga 236 | .run_process("/bin/sleep", &["sleep", "10"], None, 0, 0, &no_redir, None) 237 | .await? 238 | .expect("Run process failed"); 239 | println!("Spawned process with id: {}", id); 240 | 241 | ga.kill(id).await?.expect("Kill failed"); 242 | notifications.process_died.notified().await; 243 | 244 | let id = ga 245 | .run_process( 246 | "/bin/bash", 247 | &[ 248 | "bash", 249 | "-c", 250 | "for i in {1..8000}; do echo -ne a >> /big; done; cat /big", 251 | ], 252 | None, 253 | 0, 254 | 0, 255 | &[ 256 | None, 257 | Some(RedirectFdType::RedirectFdPipeBlocking(0x1000)), 258 | None, 259 | ], 260 | None, 261 | ) 262 | .await? 263 | .expect("Run process failed"); 264 | println!("Spawned process with id: {}", id); 265 | notifications.output_available.notified().await; 266 | let out = ga 267 | .query_output(id, 1, 0, u64::MAX) 268 | .await? 269 | .expect("Output query failed"); 270 | println!( 271 | "Big output 1: {} {}", 272 | out.len(), 273 | out.iter().filter(|x| **x != 0x61).count() 274 | ); 275 | notifications.output_available.notified().await; 276 | let out = ga 277 | .query_output(id, 1, 0, u64::MAX) 278 | .await? 279 | .expect("Output query failed"); 280 | println!( 281 | "Big output 2: {} {}", 282 | out.len(), 283 | out.iter().filter(|x| **x != 0x61).count() 284 | ); 285 | 286 | let id = ga 287 | .run_process( 288 | "/bin/bash", 289 | &[ 290 | "bash", 291 | "-c", 292 | "echo > /big; for i in {1..4000}; do echo -ne a >> /big; done; for i in {1..4096}; do echo -ne b >> /big; done; cat /big", 293 | ], 294 | None, 295 | 0, 296 | 0, 297 | &[ 298 | None, 299 | Some(RedirectFdType::RedirectFdPipeCyclic(0x1000)), 300 | None, 301 | ], 302 | None, 303 | ) 304 | .await? 305 | .expect("Run process failed"); 306 | println!("Spawned process with id: {}", id); 307 | notifications.process_died.notified().await; 308 | notifications.output_available.notified().await; 309 | let out = ga 310 | .query_output(id, 1, 0, u64::MAX) 311 | .await? 312 | .expect("Output query failed"); 313 | println!( 314 | "Big output 1: {} {}", 315 | out.len(), 316 | out.iter().filter(|x| **x != 0x62).count() 317 | ); 318 | let out = ga 319 | .query_output(id, 1, 0, u64::MAX) 320 | .await? 321 | .expect("Output query failed"); 322 | println!("Big output 2: {}, expected 0", out.len()); 323 | 324 | let id = ga 325 | .run_entrypoint("/bin/sleep", &["sleep", "100"], None, 0, 0, &no_redir, None) 326 | .await? 327 | .expect("Run process failed"); 328 | println!("Spawned process with id: {}", id); 329 | notifications.process_died.notified().await; 330 | 331 | ga.quit().await?.expect("Quit failed"); 332 | 333 | /* VM should quit now. */ 334 | let e = child.wait().await.expect("failed to wait on child"); 335 | println!("{:?}", e); 336 | 337 | Ok(()) 338 | } 339 | -------------------------------------------------------------------------------- /runtime/init-container/src/seccomp.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include "init-seccomp.h" 19 | 20 | static const char *allow_syscalls[] = { 21 | "_llseek", 22 | "_newselect", 23 | "accept", 24 | "accept4", 25 | "access", 26 | "adjtimex", 27 | "alarm", 28 | "bind", 29 | "brk", 30 | "capget", 31 | "capset", 32 | "chdir", 33 | "chmod", 34 | "chown", 35 | "chown32", 36 | "chroot", 37 | "clock_adjtime", 38 | "clock_adjtime64", 39 | "clock_getres", 40 | "clock_getres_time64", 41 | "clock_gettime", 42 | "clock_gettime64", 43 | "clock_nanosleep", 44 | "clock_nanosleep_time64", 45 | "clone", 46 | "clone3", 47 | "close", 48 | "close_range", 49 | "connect", 50 | "copy_file_range", 51 | "creat", 52 | "dup", 53 | "dup2", 54 | "dup3", 55 | "epoll_create", 56 | "epoll_create1", 57 | "epoll_ctl", 58 | "epoll_ctl_old", 59 | "epoll_pwait", 60 | "epoll_pwait2", 61 | "epoll_wait", 62 | "epoll_wait_old", 63 | "eventfd", 64 | "eventfd2", 65 | "execve", 66 | "execveat", 67 | "exit", 68 | "exit_group", 69 | "faccessat", 70 | "faccessat2", 71 | "fadvise64", 72 | "fadvise64_64", 73 | "fallocate", 74 | "fanotify_mark", 75 | "fchdir", 76 | "fchmod", 77 | "fchmodat", 78 | "fchown", 79 | "fchown32", 80 | "fchownat", 81 | "fcntl", 82 | "fcntl64", 83 | "fdatasync", 84 | "fgetxattr", 85 | "flistxattr", 86 | "flock", 87 | "fork", 88 | "fremovexattr", 89 | "fsetxattr", 90 | "fstat", 91 | "fstat64", 92 | "fstatat64", 93 | "fstatfs", 94 | "fstatfs64", 95 | "fsync", 96 | "ftruncate", 97 | "ftruncate64", 98 | "futex", 99 | "futex_time64", 100 | "futimesat", 101 | "get_mempolicy", 102 | "get_robust_list", 103 | "get_thread_area", 104 | "getcpu", 105 | "getcwd", 106 | "getdents", 107 | "getdents64", 108 | "getegid", 109 | "getegid32", 110 | "geteuid", 111 | "geteuid32", 112 | "getgid", 113 | "getgid32", 114 | "getgroups", 115 | "getgroups32", 116 | "getitimer", 117 | "getpeername", 118 | "getpgid", 119 | "getpgrp", 120 | "getpid", 121 | "getppid", 122 | "getpriority", 123 | "getrandom", 124 | "getresgid", 125 | "getresgid32", 126 | "getresuid", 127 | "getresuid32", 128 | "getrlimit", 129 | "getrusage", 130 | "getsid", 131 | "getsockname", 132 | "getsockopt", 133 | "gettid", 134 | "gettimeofday", 135 | "getuid", 136 | "getuid32", 137 | "getxattr", 138 | "inotify_add_watch", 139 | "inotify_init", 140 | "inotify_init1", 141 | "inotify_rm_watch", 142 | "io_cancel", 143 | "io_destroy", 144 | "io_getevents", 145 | "io_setup", 146 | "io_submit", 147 | "ioctl", 148 | "ioprio_get", 149 | "ioprio_set", 150 | "ipc", 151 | "keyctl", 152 | "kill", 153 | "landlock_add_rule", 154 | "landlock_create_ruleset", 155 | "landlock_restrict_self", 156 | "lchown", 157 | "lchown32", 158 | "lgetxattr", 159 | "link", 160 | "linkat", 161 | "listen", 162 | "listxattr", 163 | "llistxattr", 164 | "lremovexattr", 165 | "lseek", 166 | "lsetxattr", 167 | "lstat", 168 | "lstat64", 169 | "madvise", 170 | "mbind", 171 | "membarrier", 172 | "memfd_create", 173 | "memfd_secret", 174 | "mincore", 175 | "mkdir", 176 | "mkdirat", 177 | "mknod", 178 | "mknodat", 179 | "mlock", 180 | "mlock2", 181 | "mlockall", 182 | "mmap", 183 | "mmap2", 184 | "mprotect", 185 | "mq_getsetattr", 186 | "mq_notify", 187 | "mq_open", 188 | "mq_timedreceive", 189 | "mq_timedreceive_time64", 190 | "mq_timedsend", 191 | "mq_timedsend_time64", 192 | "mq_unlink", 193 | "mremap", 194 | #if 0 195 | "msgctl", 196 | "msgget", 197 | "msgrcv", 198 | "msgsnd", 199 | #endif 200 | "msync", 201 | "munlock", 202 | "munlockall", 203 | "munmap", 204 | "name_to_handle_at", 205 | "nanosleep", 206 | "newfstatat", 207 | "open", 208 | "open_tree", 209 | "openat", 210 | "openat2", 211 | "pause", 212 | "pidfd_getfd", 213 | "pidfd_open", 214 | "pidfd_send_signal", 215 | "pipe", 216 | "pipe2", 217 | "pivot_root", 218 | "pkey_alloc", 219 | "pkey_free", 220 | "pkey_mprotect", 221 | "poll", 222 | "ppoll", 223 | "ppoll_time64", 224 | "prctl", 225 | "pread64", 226 | "preadv", 227 | "preadv2", 228 | "prlimit64", 229 | "process_mrelease", 230 | "process_vm_readv", 231 | "process_vm_writev", 232 | "pselect6", 233 | "pselect6_time64", 234 | "ptrace", 235 | "pwrite64", 236 | "pwritev", 237 | "pwritev2", 238 | "read", 239 | "readahead", 240 | "readdir", 241 | "readlink", 242 | "readlinkat", 243 | "readv", 244 | "reboot", 245 | "recv", 246 | "recvfrom", 247 | "recvmmsg", 248 | "recvmmsg_time64", 249 | "recvmsg", 250 | "remap_file_pages", 251 | "removexattr", 252 | "rename", 253 | "renameat", 254 | "renameat2", 255 | "restart_syscall", 256 | "rmdir", 257 | "rseq", 258 | "rt_sigaction", 259 | "rt_sigpending", 260 | "rt_sigprocmask", 261 | "rt_sigqueueinfo", 262 | "rt_sigreturn", 263 | "rt_sigsuspend", 264 | "rt_sigtimedwait", 265 | "rt_sigtimedwait_time64", 266 | "rt_tgsigqueueinfo", 267 | "sched_get_priority_max", 268 | "sched_get_priority_min", 269 | "sched_getaffinity", 270 | "sched_getattr", 271 | "sched_getparam", 272 | "sched_getscheduler", 273 | "sched_rr_get_interval", 274 | "sched_rr_get_interval_time64", 275 | "sched_setaffinity", 276 | "sched_setattr", 277 | "sched_setparam", 278 | "sched_setscheduler", 279 | "sched_yield", 280 | "seccomp", 281 | "select", 282 | #if 0 283 | "semctl", 284 | "semget", 285 | "semop", 286 | "semtimedop", 287 | "semtimedop_time64", 288 | #endif 289 | "send", 290 | "sendfile", 291 | "sendfile64", 292 | "sendmmsg", 293 | "sendmsg", 294 | "sendto", 295 | "set_mempolicy", 296 | "set_robust_list", 297 | "set_thread_area", 298 | "set_tid_address", 299 | "setfsgid", 300 | "setfsgid32", 301 | "setfsuid", 302 | "setfsuid32", 303 | "setgid", 304 | "setgid32", 305 | "setgroups", 306 | "setgroups32", 307 | "setitimer", 308 | "setpgid", 309 | "setpriority", 310 | "setregid", 311 | "setregid32", 312 | "setresgid", 313 | "setresgid32", 314 | "setresuid", 315 | "setresuid32", 316 | "setreuid", 317 | "setreuid32", 318 | "setrlimit", 319 | "setsid", 320 | "setsockopt", 321 | "setuid", 322 | "setuid32", 323 | "setxattr", 324 | "shmat", 325 | "shmctl", 326 | "shmdt", 327 | "shmget", 328 | "shutdown", 329 | "sigaction", 330 | "sigaltstack", 331 | "signal", 332 | "signalfd", 333 | "signalfd4", 334 | "sigpending", 335 | "sigprocmask", 336 | "sigreturn", 337 | "sigsuspend", 338 | "socket", 339 | "socketcall", 340 | "socketpair", 341 | "splice", 342 | "stat", 343 | "stat64", 344 | "statfs", 345 | "statfs64", 346 | "statx", 347 | "symlink", 348 | "symlinkat", 349 | "sync", 350 | "sync_file_range", 351 | "syncfs", 352 | "syscall", 353 | "sysinfo", 354 | "syslog", 355 | "tee", 356 | "tgkill", 357 | "time", 358 | "timer_create", 359 | "timer_delete", 360 | "timer_getoverrun", 361 | "timer_gettime", 362 | "timer_gettime64", 363 | "timer_settime", 364 | "timer_settime64", 365 | "timerfd", 366 | "timerfd_create", 367 | "timerfd_gettime", 368 | "timerfd_gettime64", 369 | "timerfd_settime", 370 | "timerfd_settime64", 371 | "times", 372 | "tkill", 373 | "truncate", 374 | "truncate64", 375 | "ugetrlimit", 376 | "umask", 377 | "uname", 378 | "unlink", 379 | "unlinkat", 380 | "utime", 381 | "utimensat", 382 | "utimensat_time64", 383 | "utimes", 384 | "vfork", 385 | "wait4", 386 | "waitid", 387 | "waitpid", 388 | "write", 389 | "writev", 390 | }; 391 | 392 | static const char *arm_syscalls[] = { 393 | "arm_fadvise64_64", 394 | "arm_sync_file_range", 395 | "breakpoint", 396 | "cacheflush", 397 | "set_tls", 398 | "sync_file_range2", 399 | }; 400 | 401 | static const char *x86_syscalls[] = { 402 | "arch_prctl", 403 | }; 404 | 405 | static const char *eperm_syscalls[] = { 406 | "bdflush", 407 | "bpf", 408 | "fanotify_init", 409 | "fsconfig", 410 | "fsmount", 411 | "fsopen", 412 | "fspick", 413 | "io_pgetevents", 414 | "kexec_file_load", 415 | "kexec_load", 416 | "migrate_pages", 417 | "mount", 418 | "mount_setattr", 419 | "move_mount", 420 | "move_pages", 421 | "nfsservctl", 422 | "nice", 423 | "oldfstat", 424 | "oldlstat", 425 | "oldolduname", 426 | "oldstat", 427 | "olduname", 428 | "pciconfig_iobase", 429 | "pciconfig_read", 430 | "pciconfig_write", 431 | "perf_event_open", 432 | "quotactl", 433 | "setdomainname", 434 | "sethostname", 435 | "setns", 436 | "sgetmask", 437 | "ssetmask", 438 | "swapcontext", 439 | "swapoff", 440 | "swapon", 441 | "sysfs", 442 | "umount", 443 | "umount2", 444 | "unshare", 445 | "uselib", 446 | "userfaultfd", 447 | "ustat", 448 | "vm86", 449 | "vm86old", 450 | "vmsplice", 451 | }; 452 | 453 | #define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0])) 454 | 455 | static void 456 | ya_runtime_add_syscalls(const scmp_filter_ctx ctx, const char *const *syscalls, 457 | const size_t count, const uint32_t arch, const uint32_t action) { 458 | for (size_t i = 0; i < count; ++i) { 459 | const int syscall_number = seccomp_syscall_resolve_name_rewrite(arch, syscalls[i]); 460 | if (syscall_number == __NR_SCMP_ERROR) 461 | abort(); 462 | const int status = seccomp_rule_add(ctx, action, syscall_number, 0); 463 | if (status != 0) 464 | abort(); 465 | } 466 | } 467 | 468 | static scmp_filter_ctx ctx; 469 | 470 | void setup_sandbox(void) { 471 | uint32_t const arch = seccomp_arch_native(); 472 | ctx = seccomp_init(SCMP_ACT_ERRNO(ENOSYS)); 473 | 474 | if (ctx == NULL) 475 | abort(); 476 | 477 | ya_runtime_add_syscalls(ctx, allow_syscalls, ARRAY_SIZE(allow_syscalls), arch, SCMP_ACT_ALLOW); 478 | const int status = seccomp_rule_add(ctx, SCMP_ACT_ALLOW, 479 | SCMP_SYS(personality), 1, SCMP_CMP64(0, SCMP_CMP_EQ, 0, 0)); 480 | if (status != 0) { 481 | abort(); 482 | } 483 | 484 | switch (arch) { 485 | case SCMP_ARCH_ARM: 486 | case SCMP_ARCH_AARCH64: 487 | ya_runtime_add_syscalls(ctx, arm_syscalls, ARRAY_SIZE(arm_syscalls), 488 | arch, SCMP_ACT_ALLOW); 489 | break; 490 | case SCMP_ARCH_X86: 491 | case SCMP_ARCH_X86_64: 492 | ya_runtime_add_syscalls(ctx, x86_syscalls, ARRAY_SIZE(x86_syscalls), 493 | arch, SCMP_ACT_ALLOW); 494 | default: 495 | break; 496 | } 497 | 498 | ya_runtime_add_syscalls(ctx, eperm_syscalls, ARRAY_SIZE(eperm_syscalls), arch, SCMP_ACT_ERRNO(EPERM)); 499 | const int fd = memfd_create("fake", MFD_CLOEXEC); 500 | if (fd < 3) 501 | abort(); 502 | if (seccomp_export_bpf(ctx, fd)) 503 | abort(); 504 | } 505 | 506 | void sandbox_apply(void) { 507 | if (seccomp_load(ctx)) 508 | abort(); 509 | } 510 | -------------------------------------------------------------------------------- /runtime/src/self_test.rs: -------------------------------------------------------------------------------- 1 | use anyhow::bail; 2 | use futures::future::BoxFuture; 3 | use futures::lock::Mutex; 4 | use futures::FutureExt; 5 | use notify::event::{AccessKind, AccessMode}; 6 | use notify::{Event, EventKind, RecommendedWatcher, RecursiveMode, Watcher}; 7 | use serde_json::Value; 8 | use std::collections::HashMap; 9 | use std::future; 10 | use std::io::Write; 11 | use std::path::{Path, PathBuf}; 12 | use std::str::FromStr; 13 | use std::sync::Arc; 14 | use std::time::Duration; 15 | use tokio::fs; 16 | use tokio::sync::Notify; 17 | use uuid::Uuid; 18 | use ya_client_model::activity::exe_script_command::VolumeMount; 19 | use ya_runtime_sdk::runtime_api::deploy::ContainerVolume; 20 | use ya_runtime_sdk::runtime_api::server::RuntimeHandler; 21 | use ya_runtime_sdk::{runtime_api::server, Error, ErrorExt, EventEmitter}; 22 | use ya_runtime_sdk::{ProcessStatus, RunProcess, RuntimeStatus}; 23 | 24 | use crate::deploy::{Deployment, DeploymentMount}; 25 | use crate::vmrt::{runtime_dir, RuntimeData}; 26 | use crate::{qcow2_min, Runtime, TestConfig}; 27 | 28 | const FILE_TEST_IMAGE: &str = "self-test.gvmi"; 29 | const FILE_TEST_EXECUTABLE: &str = "ya-self-test"; 30 | 31 | struct RaiiDir(PathBuf); 32 | 33 | impl RaiiDir { 34 | pub fn create(path: PathBuf) -> std::io::Result { 35 | std::fs::create_dir(&path)?; 36 | Ok(Self(path)) 37 | } 38 | } 39 | 40 | impl Drop for RaiiDir { 41 | fn drop(&mut self) { 42 | std::fs::remove_dir_all(&self.0) 43 | .unwrap_or_else(|_| panic!("Couldn't remove {}", self.0.display())); 44 | } 45 | } 46 | 47 | pub(crate) async fn test( 48 | pci_device_id: Option>, 49 | test_config: TestConfig, 50 | ) -> Result<(), Error> { 51 | run_self_test(verify_status, pci_device_id, test_config).await; 52 | Ok(()) 53 | } 54 | 55 | pub(crate) fn verify_status(status: anyhow::Result) -> anyhow::Result { 56 | let status = status?; 57 | Ok(serde_json::to_string(&status)?) 58 | } 59 | 60 | pub(crate) async fn run_self_test( 61 | handle_result: HANDLER, 62 | pci_device_id: Option>, 63 | test_config: TestConfig, 64 | ) where 65 | HANDLER: Fn(anyhow::Result) -> anyhow::Result, 66 | { 67 | struct Emitter; 68 | 69 | impl RuntimeHandler for Emitter { 70 | fn on_process_status<'a>(&self, _: ProcessStatus) -> BoxFuture<'a, ()> { 71 | future::ready(()).boxed() 72 | } 73 | 74 | fn on_runtime_status<'a>(&self, _: RuntimeStatus) -> BoxFuture<'a, ()> { 75 | future::ready(()).boxed() 76 | } 77 | } 78 | 79 | let emitter = Emitter; 80 | 81 | let tmp = std::env::temp_dir(); 82 | let work_dir = tmp.join(format!("ya-runtime-vm-self-test-{}", Uuid::new_v4())); 83 | let work_dir_handle = 84 | RaiiDir::create(work_dir.clone()).expect("Failed to create workdir for self-test"); 85 | 86 | let deployment = self_test_deployment(&work_dir, &test_config) 87 | .await 88 | .expect("Prepares self test img deployment"); 89 | 90 | let output_volume = 91 | get_self_test_only_volume(&deployment).expect("Self test image has an output volume"); 92 | let output_file_name = format!("out_{}.json", Uuid::new_v4()); 93 | let output_file_vm = PathBuf::from_str(&output_volume.path) 94 | .expect("Can create self test volume path") 95 | .join(&output_file_name); 96 | let output_dir = work_dir.join(output_volume.name); 97 | let output_file = output_dir.join(&output_file_name); 98 | 99 | let runtime = self_test_runtime(deployment, pci_device_id); 100 | 101 | let work_dir = &work_dir; 102 | 103 | log::info!("Starting runtime"); 104 | let start_response = start_runtime(emitter, work_dir.clone(), runtime.data.clone()) 105 | .await 106 | .expect("Starts runtime"); 107 | log::info!("Runtime start response {:?}", start_response); 108 | 109 | log::info!("Runtime: {:?}", runtime.data); 110 | log::info!("Running self test command"); 111 | let timeout = test_config.test_timeout(); 112 | run_self_test_command( 113 | runtime.data.clone(), 114 | &output_dir, 115 | &output_file, 116 | &output_file_vm, 117 | timeout, 118 | ) 119 | .await 120 | .expect("Can run self-test command"); 121 | 122 | log::info!("Stopping runtime"); 123 | crate::stop(runtime.data.clone()) 124 | .await 125 | .expect("Stops runtime"); 126 | 127 | log::info!("Handling result"); 128 | let out_result = read_json(&output_file); 129 | let result = handle_result(out_result).expect("Handles test result"); 130 | if !result.is_empty() { 131 | // the server refuses to stop by itself; print output to stdout 132 | let mut stdout = std::io::stdout().lock(); 133 | stdout.write_all(result.as_bytes()).unwrap(); 134 | stdout.write_all(b"\n").unwrap(); 135 | stdout.flush().unwrap(); 136 | } 137 | 138 | log::debug!("Deleting output files"); 139 | std::fs::remove_dir_all(output_dir).expect("Removes self-test output dir"); 140 | 141 | drop(work_dir_handle); 142 | } 143 | 144 | fn self_test_runtime(deployment: Deployment, pci_device_id: Option>) -> Runtime { 145 | let runtime_data = RuntimeData { 146 | deployment: Some(deployment), 147 | pci_device_id, 148 | ..Default::default() 149 | }; 150 | Runtime { 151 | data: Arc::new(Mutex::new(runtime_data)), 152 | } 153 | } 154 | 155 | /// Builds self test deployment based on `FILE_TEST_IMAGE` from path returned by `runtime_dir()` 156 | async fn self_test_deployment( 157 | work_dir: &Path, 158 | test_config: &TestConfig, 159 | ) -> anyhow::Result { 160 | let package_path = runtime_dir() 161 | .expect("Runtime directory not found") 162 | .join(FILE_TEST_IMAGE) 163 | .canonicalize() 164 | .expect("Test image not found"); 165 | 166 | let package_paths = [package_path]; 167 | 168 | let cpu_cores = test_config.test_cpu_cores; 169 | let mem_gib = test_config.test_mem_gib; 170 | log::info!("Task packages:"); 171 | for path in package_paths.iter() { 172 | log::info!("{}", path.display()); 173 | } 174 | let mem_mib = (mem_gib * 1024.) as usize; 175 | let package_file = fs::File::open(package_paths[0].clone()) 176 | .await 177 | .or_err("Error reading package file")?; 178 | let deployment = Deployment::try_from_input( 179 | package_file, 180 | cpu_cores, 181 | mem_mib, 182 | &package_paths, 183 | HashMap::from_iter([ 184 | ( 185 | "/golem/storage".to_string(), 186 | VolumeMount::Storage { 187 | size: "1mi".parse().unwrap(), 188 | preallocate: None, 189 | errors: Some("remount-ro".to_string()), 190 | }, 191 | ), 192 | ( 193 | "/golem/storage2".to_string(), 194 | VolumeMount::Ram { 195 | size: "1gi".parse().unwrap(), 196 | }, 197 | ), 198 | ]), 199 | "golem".to_string(), 200 | ) 201 | .await 202 | .or_err("Error reading package metadata")?; 203 | 204 | for vol in &deployment.volumes { 205 | let vol_dir = work_dir.join(&vol.name); 206 | log::debug!("Creating volume dir: {vol_dir:?} for path {}", vol.path); 207 | fs::create_dir_all(vol_dir) 208 | .await 209 | .or_err("Failed to create volume dir")?; 210 | } 211 | 212 | for DeploymentMount { 213 | name, 214 | mount, 215 | guest_path, 216 | } in &deployment.mounts 217 | { 218 | let VolumeMount::Storage { 219 | size, preallocate, .. 220 | } = mount 221 | else { 222 | continue; 223 | }; 224 | 225 | let qcow2_dir = work_dir.join(name); 226 | log::debug!( 227 | "Creating qcow2 image: {qcow2_dir:?} for path {guest_path} with parameters {mount:?}" 228 | ); 229 | let file = fs::File::create(qcow2_dir).await?; 230 | let qcow2 = 231 | qcow2_min::Qcow2Image::new(size.as_u64(), preallocate.unwrap_or_default().as_u64()); 232 | qcow2.write(file).await?; 233 | } 234 | 235 | Ok(deployment) 236 | } 237 | 238 | /// Returns path to self test image only volume. 239 | /// Fails if `self_test_deployment` has no volumes or more than one. 240 | fn get_self_test_only_volume(self_test_deployment: &Deployment) -> anyhow::Result { 241 | if self_test_deployment.volumes.len() != 1 { 242 | bail!("Self test image has to have one volume"); 243 | } 244 | Ok(self_test_deployment.volumes.first().unwrap().clone()) 245 | } 246 | 247 | /// Starts runtime with runtime handler wrapped to log process stdout and stdderr 248 | async fn start_runtime( 249 | handler: HANDLER, 250 | work_dir: PathBuf, 251 | runtime_data: Arc>, 252 | ) -> anyhow::Result> { 253 | let emitter = ProcessOutputLogger::new(handler); 254 | let emitter = EventEmitter::spawn(emitter); 255 | crate::start(work_dir.clone(), runtime_data, emitter.clone()).await 256 | } 257 | 258 | /// Runs command, monitors `output_dir` looking for `output_file`. 259 | /// Fails if `output_file` not created before `timeout`. 260 | async fn run_self_test_command( 261 | runtime_data: Arc>, 262 | output_dir: &Path, 263 | output_file: &Path, 264 | output_file_vm: &Path, 265 | timeout: Duration, 266 | ) -> anyhow::Result<()> { 267 | let run_process: RunProcess = server::RunProcess { 268 | bin: format!("/{FILE_TEST_EXECUTABLE}"), 269 | args: vec![ 270 | FILE_TEST_EXECUTABLE.into(), 271 | output_file_vm.to_string_lossy().into(), 272 | ], 273 | ..Default::default() 274 | }; 275 | log::info!("Self test process: {run_process:?}"); 276 | 277 | let output_notification = Arc::new(Notify::new()); 278 | // Keep `_watcher` . Watcher shutdowns when dropped. 279 | let _watcher = spawn_output_watcher(output_notification.clone(), output_dir, output_file)?; 280 | 281 | if let Err(err) = crate::run_command(runtime_data, run_process).await { 282 | bail!("Code: {}, msg: {}", err.code, err.message); 283 | }; 284 | 285 | if let Err(err) = tokio::time::timeout(timeout, output_notification.notified()).await { 286 | log::error!("File {output_file:?} not created before timeout of {timeout:?}s. Err: {err}."); 287 | }; 288 | Ok(()) 289 | } 290 | 291 | fn spawn_output_watcher( 292 | output_notification: Arc, 293 | output_dir: &Path, 294 | output_file: &Path, 295 | ) -> anyhow::Result { 296 | let output_file = output_file.into(); 297 | let mut watcher = notify::recommended_watcher(move |res| match res { 298 | Ok(Event { 299 | kind: EventKind::Access(AccessKind::Close(AccessMode::Write)), 300 | paths, 301 | .. 302 | }) if paths.contains(&output_file) => output_notification.notify_waiters(), 303 | Ok(event) => { 304 | log::trace!("Output file watch event: {:?}", event); 305 | } 306 | Err(error) => { 307 | log::error!("Output file watch error: {:?}", error); 308 | } 309 | })?; 310 | 311 | watcher.watch(output_dir, RecursiveMode::Recursive)?; 312 | Ok(watcher) 313 | } 314 | 315 | fn read_json(output_file: &Path) -> anyhow::Result { 316 | let output_file = std::fs::File::open(output_file)?; 317 | Ok(serde_json::from_reader(&output_file)?) 318 | } 319 | struct ProcessOutputLogger { 320 | handler: Box, 321 | } 322 | 323 | impl ProcessOutputLogger { 324 | fn new(handler: HANDLER) -> Self { 325 | let handler = Box::new(handler); 326 | Self { handler } 327 | } 328 | } 329 | 330 | impl RuntimeHandler for ProcessOutputLogger { 331 | fn on_process_status<'a>(&self, status: ProcessStatus) -> futures::future::BoxFuture<'a, ()> { 332 | if !status.stdout.is_empty() { 333 | log::debug!( 334 | "PID: {}, stdout: {}", 335 | status.pid, 336 | String::from_utf8_lossy(&status.stdout) 337 | ); 338 | } else if !status.stderr.is_empty() { 339 | log::debug!( 340 | "PID: {}, stderr: {}", 341 | status.pid, 342 | String::from_utf8_lossy(&status.stderr) 343 | ); 344 | } 345 | self.handler.on_process_status(status) 346 | } 347 | fn on_runtime_status<'a>(&self, status: RuntimeStatus) -> futures::future::BoxFuture<'a, ()> { 348 | self.handler.on_runtime_status(status) 349 | } 350 | } 351 | -------------------------------------------------------------------------------- /runtime/src/vmrt.rs: -------------------------------------------------------------------------------- 1 | use std::net::{Ipv4Addr, SocketAddrV4}; 2 | use std::path::PathBuf; 3 | use std::process::Stdio; 4 | use std::sync::atomic::AtomicU32; 5 | use std::sync::atomic::Ordering::Relaxed; 6 | use std::sync::Arc; 7 | 8 | use futures::lock::Mutex; 9 | use futures::FutureExt; 10 | use tokio::io::AsyncBufReadExt; 11 | use tokio::{io, process, spawn}; 12 | 13 | use ya_client_model::activity::exe_script_command::VolumeMount; 14 | use ya_runtime_sdk::runtime_api::server; 15 | use ya_runtime_sdk::server::ContainerEndpoint; 16 | use ya_runtime_sdk::{serialize, ErrorExt, EventEmitter}; 17 | 18 | use crate::deploy::{Deployment, DeploymentMount}; 19 | use crate::guest_agent_comm::{GuestAgent, Notification}; 20 | 21 | const DIR_RUNTIME: &str = "runtime"; 22 | const FILE_RUNTIME: &str = "vmrt"; 23 | const FILE_VMLINUZ: &str = "vmlinuz-virt"; 24 | const FILE_INITRAMFS: &str = "initramfs.cpio.gz"; 25 | const FILE_NVIDIA_FILES: &str = "nvidia-files.squashfs"; 26 | 27 | #[derive(Default)] 28 | pub struct RuntimeData { 29 | pub runtime: Option, 30 | pub vpn: Option, 31 | pub inet: Option, 32 | pub deployment: Option, 33 | pub ga: Option>>, 34 | pub pci_device_id: Option>, 35 | } 36 | 37 | impl RuntimeData { 38 | pub fn runtime(&mut self) -> anyhow::Result { 39 | self.runtime 40 | .take() 41 | .ok_or_else(|| anyhow::anyhow!("Runtime process not available")) 42 | } 43 | 44 | pub fn deployment(&self) -> anyhow::Result<&Deployment> { 45 | self.deployment 46 | .as_ref() 47 | .ok_or_else(|| anyhow::anyhow!("Runtime not deployed")) 48 | } 49 | 50 | pub fn ga(&self) -> anyhow::Result>> { 51 | self.ga 52 | .clone() 53 | .ok_or_else(|| anyhow::anyhow!("Runtime not started")) 54 | } 55 | } 56 | 57 | pub async fn start_vmrt( 58 | work_dir: PathBuf, 59 | runtime_data: Arc>, 60 | emitter: EventEmitter, 61 | ) -> anyhow::Result> { 62 | let runtime_dir = runtime_dir().or_err("Unable to resolve current directory")?; 63 | let temp_dir = std::env::temp_dir(); 64 | let uid = uuid::Uuid::new_v4().simple().to_string(); 65 | 66 | let mut data = runtime_data.lock().await; 67 | let deployment = data.deployment.clone().or_err("Missing deployment data")?; 68 | let volumes = deployment.volumes.clone(); 69 | 70 | let manager_sock = temp_dir.join(format!("{}.sock", uid)); 71 | let vpn_remote = data.vpn.clone(); 72 | let inet_remote = data.inet.clone(); 73 | 74 | let mut kernel_cmdline = "console=ttyS0 panic=1".to_string(); 75 | 76 | let mut cmd = process::Command::new(runtime_dir.join(FILE_RUNTIME)); 77 | cmd.current_dir(&runtime_dir); 78 | let memory_size = format!("{}M", deployment.mem_mib); 79 | let cpu_cores = deployment.cpu_cores.to_string(); 80 | let manager_sock_path = format!( 81 | "socket,path={},server=on,wait=off,id=manager_cdev", 82 | manager_sock.display() 83 | ); 84 | let mut args = vec![ 85 | "-nographic", 86 | "-no-reboot", 87 | "-m", 88 | memory_size.as_str(), 89 | "-kernel", 90 | FILE_VMLINUZ, 91 | "-initrd", 92 | FILE_INITRAMFS, 93 | "-enable-kvm", 94 | "-cpu", 95 | "host,-sgx", 96 | "-smp", 97 | cpu_cores.as_str(), 98 | "-device", 99 | "virtio-serial", 100 | "-device", 101 | "virtio-rng-pci", 102 | "-chardev", 103 | manager_sock_path.as_str(), 104 | "-device", 105 | "virtserialport,chardev=manager_cdev,name=manager_port", 106 | ]; 107 | 108 | let rootfs_devices: Vec<(String, String)> = deployment 109 | .task_packages 110 | .iter() 111 | .enumerate() 112 | .map(|(i, path)| { 113 | let drive = format!( 114 | "file={},cache=unsafe,readonly=on,format=raw,id=rootfs-{},if=none", 115 | path.display(), 116 | i 117 | ); 118 | let device = format!("virtio-blk-pci,drive=rootfs-{},serial=rootfs-{}", i, i); 119 | (drive, device) 120 | }) 121 | .collect(); 122 | 123 | for (drive, device) in rootfs_devices.iter() { 124 | args.push("-drive"); 125 | args.push(drive); 126 | args.push("-device"); 127 | args.push(device); 128 | } 129 | 130 | cmd.args(args); 131 | 132 | for ( 133 | vol_idx, 134 | DeploymentMount { 135 | name, 136 | guest_path, 137 | mount, 138 | }, 139 | ) in deployment.mounts.iter().enumerate() 140 | { 141 | match mount { 142 | VolumeMount::Host {} => { 143 | log::warn!("DeploymentMount::mount should never be VolumeMount::Host"); 144 | } 145 | VolumeMount::Storage { errors, .. } => { 146 | let errors = errors.as_deref().unwrap_or("continue"); 147 | 148 | let img_path = work_dir.join(name); 149 | cmd.args([ 150 | "-drive", 151 | format!( 152 | "file={},format=qcow2,media=disk,id=vol-{vol_idx},if=none", 153 | img_path.display() 154 | ) 155 | .as_str(), 156 | "-device", 157 | format!("virtio-blk-pci,drive=vol-{vol_idx},serial=vol-{vol_idx}").as_ref(), 158 | ]); 159 | kernel_cmdline.push_str(&format!(" vol-{vol_idx}-path={guest_path}")); 160 | kernel_cmdline.push_str(&format!(" vol-{vol_idx}-errors={errors}")); 161 | } 162 | VolumeMount::Ram { size } => { 163 | let size = size.as_u64(); 164 | kernel_cmdline.push_str(&format!(" vol-{vol_idx}-path={guest_path}")); 165 | kernel_cmdline.push_str(&format!(" vol-{vol_idx}-size={size}")); 166 | } 167 | } 168 | } 169 | 170 | if let Some(pci_device_id) = &data.pci_device_id { 171 | for device_id in pci_device_id.iter() { 172 | cmd.arg("-device"); 173 | cmd.arg(format!("vfio-pci,host={}", device_id).as_str()); 174 | } 175 | } else { 176 | cmd.arg("-vga"); 177 | cmd.arg("none"); 178 | } 179 | 180 | if runtime_dir.join(FILE_NVIDIA_FILES).exists() { 181 | cmd.args([ 182 | "-drive", 183 | format!( 184 | "file={},cache=unsafe,readonly=on,format=raw,id=nvidia-files,if=none", 185 | runtime_dir.join(FILE_NVIDIA_FILES).display() 186 | ) 187 | .as_str(), 188 | "-device", 189 | "virtio-blk-pci,drive=nvidia-files,serial=nvidia-files" 190 | .to_string() 191 | .as_ref(), 192 | ]); 193 | } 194 | 195 | kernel_cmdline.push_str(&format!(" hostname={}", deployment.hostname)); 196 | 197 | cmd.args(["-append", &kernel_cmdline]); 198 | 199 | if vpn_remote.is_some() || inet_remote.is_some() { 200 | let mut pair = SocketPairConf::default(); 201 | 202 | pair.probe().await?; 203 | 204 | if let Some(vpn_remote) = vpn_remote { 205 | let vpn = configure_netdev_endpoint(&mut cmd, "vpn", &vpn_remote, pair.first)?; 206 | data.vpn.replace(vpn); 207 | } 208 | 209 | if let Some(inet_remote) = inet_remote { 210 | let inet = configure_netdev_endpoint(&mut cmd, "inet", &inet_remote, pair.second)?; 211 | data.inet.replace(inet); 212 | } 213 | } 214 | 215 | for (idx, volume) in volumes.iter().enumerate() { 216 | cmd.arg("-virtfs"); 217 | cmd.arg(format!( 218 | "local,id={tag},path={path},security_model=none,mount_tag={tag}", 219 | tag = format!("mnt{}", idx), 220 | path = work_dir.join(&volume.name).to_string_lossy(), 221 | )); 222 | } 223 | 224 | log::info!("Executing command: {cmd:?}"); 225 | 226 | let mut runtime = cmd 227 | .stdin(Stdio::null()) 228 | .stdout(Stdio::piped()) 229 | .kill_on_drop(true) 230 | .spawn()?; 231 | 232 | let stdout = runtime.stdout.take().unwrap(); 233 | spawn(reader_to_log(stdout)); 234 | 235 | let ga = GuestAgent::connected(manager_sock, 10, move |notification, ga| { 236 | let mut emitter = emitter.clone(); 237 | async move { 238 | let status = notification_into_status(notification, ga).await; 239 | emitter.emit(status).await; 240 | } 241 | .boxed() 242 | }) 243 | .await?; 244 | 245 | { 246 | let mut ga = ga.lock().await; 247 | for (idx, volume) in deployment.volumes.iter().enumerate() { 248 | ga.mount(format!("mnt{}", idx).as_str(), volume.path.as_str()) 249 | .await? 250 | .expect("Mount failed"); 251 | } 252 | } 253 | 254 | data.runtime.replace(runtime); 255 | data.ga.replace(ga); 256 | 257 | Ok(None) 258 | } 259 | 260 | #[derive(Copy, Clone, Debug)] 261 | struct SocketConf { 262 | ip: Ipv4Addr, 263 | udp: u16, 264 | tcp: u16, 265 | } 266 | 267 | #[derive(Debug)] 268 | struct SocketPairConf { 269 | first: SocketConf, 270 | second: SocketConf, 271 | } 272 | 273 | impl Default for SocketPairConf { 274 | fn default() -> Self { 275 | let ip = Ipv4Addr::new(127, 0, 0, 1); 276 | Self { 277 | first: SocketConf { ip, udp: 0, tcp: 0 }, 278 | second: SocketConf { ip, udp: 0, tcp: 0 }, 279 | } 280 | } 281 | } 282 | 283 | impl SocketPairConf { 284 | // FIXME: TOC/TOU 285 | async fn probe(&mut self) -> anyhow::Result<()> { 286 | let first = std::net::UdpSocket::bind((self.first.ip, self.first.udp))?; 287 | let second = std::net::UdpSocket::bind((self.second.ip, self.second.udp))?; 288 | 289 | self.first.udp = first.local_addr()?.port(); 290 | self.second.udp = second.local_addr()?.port(); 291 | 292 | let first = std::net::TcpListener::bind((self.first.ip, self.first.tcp))?; 293 | let second = std::net::TcpListener::bind((self.second.ip, self.second.tcp))?; 294 | 295 | self.first.tcp = first.local_addr()?.port(); 296 | self.second.tcp = second.local_addr()?.port(); 297 | 298 | Ok(()) 299 | } 300 | } 301 | 302 | fn configure_netdev_endpoint( 303 | cmd: &mut process::Command, 304 | id: &str, 305 | endpoint: &ContainerEndpoint, 306 | conf: SocketConf, 307 | ) -> anyhow::Result { 308 | static COUNTER: AtomicU32 = AtomicU32::new(1); 309 | 310 | let ipv4 = conf.ip; 311 | 312 | let endpoint = match endpoint { 313 | ContainerEndpoint::UdpDatagram(remote_addr) => { 314 | let port = conf.udp; 315 | 316 | cmd.arg("-netdev"); 317 | cmd.arg(format!( 318 | "socket,id={id},udp={remote_addr},localaddr={ipv4}:{port}" 319 | )); 320 | 321 | ContainerEndpoint::UdpDatagram(SocketAddrV4::new(ipv4, port).into()) 322 | } 323 | ContainerEndpoint::TcpStream(remote_addr) => { 324 | cmd.arg("-netdev"); 325 | cmd.arg(format!("socket,id={id},connect={remote_addr}")); 326 | 327 | ContainerEndpoint::TcpStream(*remote_addr) 328 | } 329 | ContainerEndpoint::TcpListener(_) => { 330 | let port = conf.tcp; 331 | 332 | cmd.arg("-netdev"); 333 | cmd.arg(format!("socket,id={id},listen={ipv4}:{port}")); 334 | 335 | ContainerEndpoint::TcpStream(SocketAddrV4::new(ipv4, port).into()) 336 | } 337 | _ => return Err(anyhow::anyhow!("Unsupported remote VPN VM endpoint")), 338 | }; 339 | 340 | let counter = COUNTER.fetch_add(1, Relaxed); 341 | let bytes = counter.to_be_bytes(); 342 | 343 | cmd.arg("-device"); 344 | cmd.arg(format!( 345 | "virtio-net-pci,netdev={id},mac=90:13:{:0x}", 346 | HexWriter(&bytes), 347 | )); 348 | 349 | Ok(endpoint) 350 | } 351 | 352 | struct HexWriter<'a>(&'a [u8]); 353 | 354 | impl<'a> std::fmt::LowerHex for HexWriter<'a> { 355 | fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result { 356 | for (i, byte) in self.0.iter().enumerate() { 357 | let sep = if i < self.0.len().saturating_sub(1) { 358 | ":" 359 | } else { 360 | "" 361 | }; 362 | fmt.write_fmt(format_args!("{:02x}{}", byte, sep))?; 363 | } 364 | Ok(()) 365 | } 366 | } 367 | 368 | pub fn runtime_dir() -> io::Result { 369 | Ok(std::env::current_exe()? 370 | .parent() 371 | .ok_or_else(|| io::Error::from(io::ErrorKind::NotFound))? 372 | .to_path_buf() 373 | .join(DIR_RUNTIME)) 374 | } 375 | 376 | async fn reader_to_log(reader: T) { 377 | let mut reader = io::BufReader::new(reader); 378 | let mut buf = Vec::new(); 379 | loop { 380 | match reader.read_until(b'\n', &mut buf).await { 381 | Ok(0) => break, 382 | Ok(_) => { 383 | let bytes = strip_ansi_escapes::strip(&buf).unwrap(); 384 | log::debug!("VM: {}", String::from_utf8_lossy(&bytes).trim_end()); 385 | buf.clear(); 386 | } 387 | Err(e) => log::error!("VM output error: {}", e), 388 | } 389 | } 390 | } 391 | 392 | async fn notification_into_status( 393 | notification: Notification, 394 | ga: Arc>, 395 | ) -> server::ProcessStatus { 396 | match notification { 397 | Notification::OutputAvailable { id, fd } => { 398 | log::debug!("Process {} has output available on fd {}", id, fd); 399 | 400 | let output = { 401 | let result = { 402 | let mut guard = ga.lock().await; 403 | guard.query_output(id, fd as u8, 0, u64::MAX).await 404 | }; 405 | match result { 406 | Ok(Ok(vec)) => vec, 407 | Ok(Err(e)) => { 408 | log::error!("Remote error while querying output: {:?}", e); 409 | Vec::new() 410 | } 411 | Err(e) => { 412 | log::error!("Error querying output: {:?}", e); 413 | Vec::new() 414 | } 415 | } 416 | }; 417 | let (stdout, stderr) = match fd { 418 | 1 => (output, Vec::new()), 419 | _ => (Vec::new(), output), 420 | }; 421 | 422 | server::ProcessStatus { 423 | pid: id, 424 | running: true, 425 | return_code: 0, 426 | stdout, 427 | stderr, 428 | } 429 | } 430 | Notification::ProcessDied { id, reason } => { 431 | log::debug!("Process {} died with {:?}", id, reason); 432 | 433 | // TODO: reason._type ? 434 | server::ProcessStatus { 435 | pid: id, 436 | running: false, 437 | return_code: reason.status as i32, 438 | stdout: Vec::new(), 439 | stderr: Vec::new(), 440 | } 441 | } 442 | } 443 | } 444 | -------------------------------------------------------------------------------- /runtime/examples/network.rs: -------------------------------------------------------------------------------- 1 | use futures::lock::Mutex; 2 | use futures::FutureExt; 3 | use pnet::packet::arp::{ArpOperations, ArpPacket, MutableArpPacket}; 4 | use pnet::packet::ethernet::{EtherTypes, EthernetPacket, MutableEthernetPacket}; 5 | use pnet::packet::icmp::{echo_reply, echo_request, IcmpPacket, IcmpTypes, MutableIcmpPacket}; 6 | use pnet::packet::ip::{IpNextHeaderProtocol, IpNextHeaderProtocols}; 7 | use pnet::packet::ipv4::{Ipv4Packet, MutableIpv4Packet}; 8 | use pnet::packet::Packet; 9 | use pnet::util::MacAddr; 10 | use std::net::IpAddr; 11 | use std::path::{Path, PathBuf}; 12 | use std::sync::atomic::Ordering::Relaxed; 13 | use std::{ 14 | env, 15 | io::{self, prelude::*}, 16 | process::Stdio, 17 | sync::{atomic::AtomicU16, Arc}, 18 | }; 19 | use tokio::io::{AsyncReadExt, AsyncWriteExt}; 20 | use tokio::net::UnixStream; 21 | use tokio::{ 22 | process::{Child, Command}, 23 | sync, 24 | }; 25 | use ya_runtime_sdk::runtime_api::server; 26 | use ya_runtime_vm::guest_agent_comm::{GuestAgent, Notification, RedirectFdType}; 27 | 28 | #[allow(clippy::declare_interior_mutable_const)] 29 | const IDENTIFICATION: AtomicU16 = AtomicU16::new(42); 30 | const MTU: usize = 1400; 31 | const PREFIX_LEN: usize = 2; 32 | 33 | struct Notifications { 34 | process_died: sync::Notify, 35 | ga: Option>>, 36 | } 37 | 38 | impl Notifications { 39 | fn new() -> Self { 40 | Notifications { 41 | process_died: sync::Notify::new(), 42 | ga: None, 43 | } 44 | } 45 | 46 | fn set_ga(&mut self, ga: Arc>) { 47 | self.ga.replace(ga); 48 | } 49 | 50 | fn handle(&self, notification: Notification) { 51 | match notification { 52 | Notification::OutputAvailable { id, fd } => { 53 | let ga = match self.ga.as_ref() { 54 | Some(ga) => ga.clone(), 55 | _ => return, 56 | }; 57 | 58 | tokio::spawn(async move { 59 | match ga 60 | .lock() 61 | .await 62 | .query_output(id, fd as u8, 0u64, u64::MAX) 63 | .await 64 | { 65 | Ok(res) => match res { 66 | Ok(out) => while io::stdout().write_all(&out[..]).is_err() {}, 67 | Err(code) => eprintln!("Output query failed with: {}", code), 68 | }, 69 | Err(code) => eprintln!("Output query failed with: {}", code), 70 | } 71 | }); 72 | } 73 | Notification::ProcessDied { id, reason } => { 74 | eprintln!("Process {} died with {:?}", id, reason); 75 | self.process_died.notify_waiters(); 76 | } 77 | } 78 | } 79 | } 80 | 81 | async fn run_process(ga: &mut GuestAgent, bin: &str, argv: &[&str]) -> io::Result<()> { 82 | let id = ga 83 | .run_process( 84 | bin, 85 | argv, 86 | None, 87 | 0, 88 | 0, 89 | &[ 90 | None, 91 | Some(RedirectFdType::RedirectFdPipeBlocking(0x1000)), 92 | Some(RedirectFdType::RedirectFdPipeBlocking(0x1000)), 93 | ], 94 | None, 95 | ) 96 | .await? 97 | .expect("Run process failed"); 98 | eprintln!("Spawned process with id: {}", id); 99 | Ok(()) 100 | } 101 | 102 | fn get_project_dir() -> PathBuf { 103 | PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()) 104 | .canonicalize() 105 | .unwrap() 106 | } 107 | 108 | fn get_root_dir() -> PathBuf { 109 | get_project_dir().join("..").canonicalize().unwrap() 110 | } 111 | 112 | fn join_as_string>(path: P, file: impl ToString) -> String { 113 | path.as_ref() 114 | .join(file.to_string()) 115 | .canonicalize() 116 | .unwrap() 117 | .display() 118 | .to_string() 119 | } 120 | 121 | fn spawn_vm>(temp_path: P) -> Child { 122 | let root_dir = get_root_dir(); 123 | let project_dir = get_project_dir(); 124 | let runtime_dir = project_dir.join("poc").join("runtime"); 125 | let init_dir = project_dir.join("init-container"); 126 | 127 | let socket_path = temp_path.as_ref().join("manager.sock"); 128 | let socket_net_path = temp_path.as_ref().join("net.sock"); 129 | 130 | let chardev = 131 | |name, path: &PathBuf| format!("socket,path={},server,nowait,id={}", path.display(), name); 132 | 133 | let mut cmd = Command::new("vmrt"); 134 | cmd.current_dir(runtime_dir).args([ 135 | "-m", 136 | "256m", 137 | "-nographic", 138 | "-vga", 139 | "none", 140 | "-kernel", 141 | join_as_string(&init_dir, "vmlinuz-virt").as_str(), 142 | "-initrd", 143 | join_as_string(&init_dir, "initramfs.cpio.gz").as_str(), 144 | "-no-reboot", 145 | "-net", 146 | "none", 147 | "-enable-kvm", 148 | "-cpu", 149 | "host", 150 | "-smp", 151 | "1", 152 | "-append", 153 | "console=ttyS0 panic=1", 154 | "-device", 155 | "virtio-serial", 156 | "-device", 157 | "virtio-rng-pci", 158 | "-chardev", 159 | chardev("manager_cdev", &socket_path).as_str(), 160 | "-chardev", 161 | chardev("net_cdev", &socket_net_path).as_str(), 162 | "-device", 163 | "virtserialport,chardev=manager_cdev,name=manager_port", 164 | "-device", 165 | "virtserialport,chardev=net_cdev,name=net_port", 166 | "-drive", 167 | format!( 168 | "file={},cache=none,readonly=on,format=raw,if=virtio", 169 | root_dir.join("squashfs_drive").display() 170 | ) 171 | .as_str(), 172 | ]); 173 | cmd.stdin(Stdio::null()); 174 | cmd.spawn().expect("failed to spawn VM") 175 | } 176 | 177 | async fn handle_net>(path: P) -> anyhow::Result<()> { 178 | let stream = UnixStream::connect(path.as_ref()).await?; 179 | let (mut read, mut write) = tokio::io::split(stream); 180 | 181 | let fut = async move { 182 | let mut buf: [u8; MTU + 32] = [0u8; MTU + 32]; 183 | loop { 184 | let count = match read.read(&mut buf).await { 185 | Err(_) | Ok(0) => break, 186 | Ok(c) => c, 187 | }; 188 | eprintln!("-> {:?}", &buf[..count]); 189 | if let Some(mut res) = handle_ethernet_packet(&buf[PREFIX_LEN..count]) { 190 | let len_u16 = res.len() as u16; 191 | res.reserve(PREFIX_LEN); 192 | res.splice(0..0, u16::to_ne_bytes(len_u16).to_vec()); 193 | 194 | eprintln!("<- {:?}", &res); 195 | if let Err(e) = write.write_all(&res).await { 196 | eprintln!("Write error: {:?}", e); 197 | } 198 | } 199 | } 200 | }; 201 | 202 | tokio::spawn(fut); 203 | Ok(()) 204 | } 205 | 206 | fn handle_icmp(src: IpAddr, dst: IpAddr, packet: &[u8]) -> Option> { 207 | let icmp_packet = match IcmpPacket::new(packet) { 208 | Some(icmp_packet) => icmp_packet, 209 | None => return None, 210 | }; 211 | 212 | match icmp_packet.get_icmp_type() { 213 | IcmpTypes::EchoReply => { 214 | let reply = echo_reply::EchoReplyPacket::new(packet).unwrap(); 215 | eprintln!( 216 | "-> ICMP echo reply {} -> {} (seq={:?}, id={:?})", 217 | src, 218 | dst, 219 | reply.get_sequence_number(), 220 | reply.get_identifier() 221 | ); 222 | } 223 | IcmpTypes::EchoRequest => { 224 | let request = echo_request::EchoRequestPacket::new(packet).unwrap(); 225 | eprintln!( 226 | "-> ICMP echo request {} -> {} (seq={:?}, id={:?}, size={})", 227 | src, 228 | dst, 229 | request.get_sequence_number(), 230 | request.get_identifier(), 231 | request.packet().len(), 232 | ); 233 | 234 | let mut data: Vec = vec![0u8; request.packet().len()]; 235 | { 236 | let mut reply = echo_reply::MutableEchoReplyPacket::new(&mut data[..]).unwrap(); 237 | reply.set_identifier(request.get_identifier()); 238 | reply.set_sequence_number(request.get_sequence_number()); 239 | reply.set_icmp_type(IcmpTypes::EchoReply); 240 | reply.set_icmp_code(request.get_icmp_code()); 241 | reply.set_payload(request.payload()); 242 | } 243 | 244 | let mut reply = 245 | MutableIcmpPacket::new(&mut data[..request.payload().len() + 8]).unwrap(); 246 | let checksum = pnet::packet::icmp::checksum(&reply.to_immutable()); 247 | reply.set_checksum(checksum); 248 | 249 | return Some(reply.packet().to_vec()); 250 | } 251 | _ => eprintln!( 252 | "-> ICMP packet {} -> {} (type={:?})", 253 | src, 254 | dst, 255 | icmp_packet.get_icmp_type() 256 | ), 257 | } 258 | 259 | None 260 | } 261 | 262 | fn handle_transport( 263 | src: IpAddr, 264 | dst: IpAddr, 265 | protocol: IpNextHeaderProtocol, 266 | packet: &[u8], 267 | ) -> Option> { 268 | match protocol { 269 | IpNextHeaderProtocols::Icmp => handle_icmp(src, dst, packet), 270 | _ => None, 271 | } 272 | } 273 | 274 | fn handle_ipv4_packet(data: &[u8]) -> Option> { 275 | if let Some(ip) = Ipv4Packet::new(data) { 276 | let reply = handle_transport( 277 | IpAddr::V4(ip.get_source()), 278 | IpAddr::V4(ip.get_destination()), 279 | ip.get_next_level_protocol(), 280 | ip.payload(), 281 | ); 282 | 283 | reply.map(move |payload| { 284 | let mut data: Vec = vec![0u8; MTU]; 285 | let reply_len = 20 + payload.len(); 286 | 287 | let mut reply = MutableIpv4Packet::new(&mut data[..reply_len]).unwrap(); 288 | reply.set_version(4); 289 | reply.set_header_length(5); 290 | reply.set_total_length(reply_len as u16); 291 | #[allow(clippy::borrow_interior_mutable_const)] 292 | reply.set_identification(IDENTIFICATION.fetch_add(1, Relaxed)); 293 | reply.set_flags(pnet::packet::ipv4::Ipv4Flags::DontFragment); 294 | reply.set_fragment_offset(0); 295 | reply.set_ttl(ip.get_ttl() - 1); 296 | reply.set_payload(&payload[..]); 297 | reply.set_dscp(ip.get_dscp()); 298 | reply.set_ecn(ip.get_ecn()); 299 | reply.set_next_level_protocol(ip.get_next_level_protocol()); 300 | reply.set_source(ip.get_destination()); 301 | reply.set_destination(ip.get_source()); 302 | 303 | reply.set_checksum(pnet::packet::ipv4::checksum(&reply.to_immutable())); 304 | reply.packet().to_vec() 305 | }) 306 | } else { 307 | eprintln!("Malformed IPv4 Packet"); 308 | None 309 | } 310 | } 311 | 312 | fn handle_arp_packet(data: &[u8]) -> Option> { 313 | if let Some(arp) = ArpPacket::new(data) { 314 | if arp.get_operation() == ArpOperations::Request { 315 | let mut buffer = [0u8; 28]; 316 | let mut reply = MutableArpPacket::new(&mut buffer).unwrap(); 317 | 318 | reply.set_hardware_type(arp.get_hardware_type()); 319 | reply.set_protocol_type(arp.get_protocol_type()); 320 | reply.set_hw_addr_len(arp.get_hw_addr_len()); 321 | reply.set_proto_addr_len(arp.get_proto_addr_len()); 322 | reply.set_operation(ArpOperations::Reply); 323 | reply.set_sender_hw_addr(MacAddr(1, 2, 3, 4, 5, 6)); 324 | reply.set_sender_proto_addr(arp.get_target_proto_addr()); 325 | reply.set_target_hw_addr(arp.get_sender_hw_addr()); 326 | reply.set_target_proto_addr(arp.get_sender_proto_addr()); 327 | 328 | return Some(reply.packet().to_vec()); 329 | } 330 | } else { 331 | eprintln!("Malformed ARP Packet"); 332 | } 333 | None 334 | } 335 | 336 | fn handle_ethernet_packet(data: &[u8]) -> Option> { 337 | match EthernetPacket::new(data) { 338 | Some(eth) => match eth.get_ethertype() { 339 | EtherTypes::Ipv4 => { 340 | eprintln!("-> IPv4 packet"); 341 | handle_ipv4_packet(eth.payload()) 342 | } 343 | EtherTypes::Arp => { 344 | eprintln!("-> ARP packet"); 345 | handle_arp_packet(eth.payload()) 346 | } 347 | eth_type => { 348 | eprintln!("-> ETH packet: {:?}", eth_type); 349 | None 350 | } 351 | } 352 | .map(move |payload| { 353 | let mut data: Vec = vec![0u8; 14 + payload.len()]; 354 | let mut reply = MutableEthernetPacket::new(&mut data).unwrap(); 355 | reply.set_source(eth.get_destination()); 356 | reply.set_destination(eth.get_source()); 357 | reply.set_ethertype(eth.get_ethertype()); 358 | reply.set_payload(&payload); 359 | reply.packet().to_vec() 360 | }), 361 | _ => { 362 | eprintln!("Malformed Ethernet Packet"); 363 | None 364 | } 365 | } 366 | } 367 | 368 | #[tokio::main] 369 | async fn main() -> anyhow::Result<()> { 370 | let temp_dir = tempdir::TempDir::new("ya-vm-direct").expect("Failed to create temp dir"); 371 | let temp_path = temp_dir.path(); 372 | 373 | let notifications = Arc::new(Mutex::new(Notifications::new())); 374 | let mut child = spawn_vm(temp_path); 375 | 376 | let ns = notifications.clone(); 377 | let ga_mutex = GuestAgent::connected(temp_path.join("manager.sock"), 10, move |n, _g| { 378 | let notifications = ns.clone(); 379 | async move { notifications.clone().lock().await.handle(n) }.boxed() 380 | }) 381 | .await?; 382 | 383 | { 384 | notifications.clone().lock().await.set_ga(ga_mutex.clone()); 385 | }; 386 | 387 | handle_net(temp_path.join("net.sock")).await?; 388 | 389 | { 390 | let iface = server::NetworkInterface::Vpn as u16; 391 | let hosts = [("host0", "127.0.0.2"), ("host1", "127.0.0.3")] 392 | .iter() 393 | .map(|(h, i)| (h.to_string(), i.to_string())) 394 | .collect::>(); 395 | 396 | let mut ga = ga_mutex.lock().await; 397 | match ga.add_address("10.0.0.1", "255.255.255.0", iface).await? { 398 | Ok(_) | Err(0) => (), 399 | Err(code) => anyhow::bail!("Unable to set address {}", code), 400 | } 401 | match ga 402 | .create_network("10.0.0.0", "255.255.255.0", "10.0.0.1", iface) 403 | .await? 404 | { 405 | Ok(_) | Err(0) => (), 406 | Err(code) => anyhow::bail!("Unable to join network {}", code), 407 | } 408 | match ga.add_hosts(hosts.into_iter()).await? { 409 | Ok(_) | Err(0) => (), 410 | Err(code) => anyhow::bail!("Unable to add hosts {}", code), 411 | } 412 | } 413 | { 414 | let mut ga = ga_mutex.lock().await; 415 | run_process( 416 | &mut ga, 417 | "/bin/ping", 418 | &["ping", "-v", "-n", "-D", "-c", "3", "10.0.0.2"], 419 | ) 420 | .await?; 421 | } 422 | 423 | /* VM should quit now. */ 424 | let e = child.wait().await.expect("failed to wait on child"); 425 | eprintln!("{:?}", e); 426 | 427 | Ok(()) 428 | } 429 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | 294 | Copyright (C) 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | , 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | -------------------------------------------------------------------------------- /runtime/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod cpu; 2 | pub mod deploy; 3 | pub mod guest_agent_comm; 4 | mod qcow2_min; 5 | mod response_parser; 6 | mod self_test; 7 | pub mod vmrt; 8 | 9 | use bollard_stubs::models::ContainerConfig; 10 | use deploy::DeploymentMount; 11 | use futures::future::FutureExt; 12 | use futures::lock::Mutex; 13 | use futures::TryFutureExt; 14 | use std::collections::HashMap; 15 | use std::convert::TryFrom; 16 | use std::env; 17 | use std::path::{Component, Path, PathBuf}; 18 | use std::sync::Arc; 19 | use std::time::Duration; 20 | use structopt::StructOpt; 21 | use tokio::{ 22 | fs, 23 | io::{self, AsyncWriteExt}, 24 | }; 25 | use url::Url; 26 | use ya_client_model::activity::exe_script_command::VolumeMount; 27 | 28 | use crate::{ 29 | cpu::CpuInfo, 30 | deploy::Deployment, 31 | guest_agent_comm::{RedirectFdType, RemoteCommandResult}, 32 | vmrt::{start_vmrt, RuntimeData}, 33 | }; 34 | use ya_runtime_sdk::runtime_api::deploy::ContainerEndpoint; 35 | 36 | use ya_runtime_sdk::{ 37 | runtime_api::{ 38 | deploy::{DeployResult, StartMode}, 39 | server, 40 | }, 41 | serialize, Context, EmptyResponse, EndpointResponse, Error, ErrorExt, EventEmitter, 42 | OutputResponse, ProcessId, ProcessIdResponse, RuntimeMode, 43 | }; 44 | 45 | const FILE_DEPLOYMENT: &str = "deployment.json"; 46 | const DEFAULT_CWD: &str = "/"; 47 | 48 | #[derive(StructOpt, Clone, Default)] 49 | #[structopt(rename_all = "kebab-case")] 50 | pub struct Cli { 51 | /// GVMI image path 52 | #[structopt(short, long, required_ifs( 53 | &[ 54 | ("command", "deploy"), 55 | ("command", "start"), 56 | ("command", "run") 57 | ]) 58 | )] 59 | #[structopt(multiple = true)] 60 | task_package: Option>, 61 | /// Number of logical CPU cores 62 | #[structopt(long, default_value = "1")] 63 | cpu_cores: usize, 64 | /// Amount of RAM [GiB] 65 | #[structopt(long, default_value = "0.25")] 66 | mem_gib: f64, 67 | /// Amount of disk storage [GiB] 68 | #[allow(unused)] 69 | #[structopt(long, default_value = "0.25")] 70 | storage_gib: f64, 71 | /// VPN endpoint address 72 | #[structopt(long)] 73 | vpn_endpoint: Option, 74 | /// INET endpoint address 75 | #[structopt(long)] 76 | inet_endpoint: Option, 77 | /// PCI device identifier 78 | #[structopt(long, env = "YA_RUNTIME_VM_PCI_DEVICE")] 79 | pci_device: Option>, 80 | #[structopt(long, env = "YA_RUNTIME_VOLUME_OVERRIDE")] 81 | volume_override: Option, 82 | #[structopt(long, env = "YA_RUNTIME_HOSTNAME", default_value = "golem")] 83 | hostname: String, 84 | #[structopt(flatten)] 85 | test_config: TestConfig, 86 | } 87 | 88 | #[derive(ya_runtime_sdk::RuntimeDef, Default)] 89 | #[cli(Cli)] 90 | pub struct Runtime { 91 | data: Arc>, 92 | } 93 | 94 | #[derive(StructOpt, Clone, Default)] 95 | struct TestConfig { 96 | /// Test process timeout (in sec) 97 | #[structopt(long, env = "YA_RUNTIME_VM_TEST_TIMEOUT", default_value = "10")] 98 | test_timeout: u64, 99 | /// Number of logical CPU cores for test process 100 | #[structopt(long, env = "YA_RUNTIME_VM_TEST_CPU_CORES", default_value = "1")] 101 | test_cpu_cores: usize, 102 | /// Amount of RAM for test process [GiB] 103 | #[structopt(long, env = "YA_RUNTIME_VM_TEST_MEM_GIB", default_value = "0.5")] 104 | test_mem_gib: f64, 105 | } 106 | 107 | impl TestConfig { 108 | fn test_timeout(&self) -> Duration { 109 | Duration::from_secs(self.test_timeout) 110 | } 111 | } 112 | 113 | impl ya_runtime_sdk::Runtime for Runtime { 114 | fn deploy<'a>(&mut self, ctx: &mut Context) -> OutputResponse<'a> { 115 | let workdir = ctx.cli.workdir.clone().expect("Workdir not provided"); 116 | let cli = ctx.cli.runtime.clone(); 117 | 118 | deploy(workdir, cli).map_err(Into::into).boxed_local() 119 | } 120 | 121 | fn start<'a>(&mut self, ctx: &mut Context) -> OutputResponse<'a> { 122 | let emitter = ctx 123 | .emitter 124 | .clone() 125 | .expect("Service not running in Server mode"); 126 | 127 | let workdir = ctx.cli.workdir.clone().expect("Workdir not provided"); 128 | 129 | let deployment_file = std::fs::File::open(workdir.join(FILE_DEPLOYMENT)) 130 | .expect("Unable to open the deployment file"); 131 | let deployment: Deployment = serialize::json::from_reader(deployment_file) 132 | .expect("Failed to read the deployment file"); 133 | 134 | log::debug!("Deployment: {deployment:?}"); 135 | 136 | let vpn_endpoint = ctx.cli.runtime.vpn_endpoint.clone(); 137 | let inet_endpoint = ctx.cli.runtime.inet_endpoint.clone(); 138 | let pci_device_id = ctx.cli.runtime.pci_device.clone(); 139 | 140 | log::info!("VPN endpoint: {vpn_endpoint:?}"); 141 | log::info!("INET endpoint: {inet_endpoint:?}"); 142 | 143 | let cmd_args = ctx.cli.command.args(); 144 | log::debug!("Start command parameters: {cmd_args:?}"); 145 | 146 | let entrypoint = if cmd_args.iter().any(|arg| *arg == "start-entrypoint") { 147 | match extract_entrypoint(&deployment.config) { 148 | None => return async { 149 | Err(Error::from_string("'start_entrypoint' flag is set but the container does not define an entrypoint!")) 150 | }.boxed_local(), 151 | entrypoint => entrypoint, 152 | } 153 | } else { 154 | None 155 | }; 156 | 157 | let data = self.data.clone(); 158 | async move { 159 | { 160 | let mut data = data.lock().await; 161 | if let Some(pci_device_id) = pci_device_id { 162 | data.pci_device_id.replace(pci_device_id); 163 | } 164 | if let Some(vpn_endpoint) = vpn_endpoint { 165 | let endpoint = 166 | ContainerEndpoint::try_from(vpn_endpoint).map_err(Error::from)?; 167 | data.vpn.replace(endpoint); 168 | } 169 | if let Some(inet_endpoint) = inet_endpoint { 170 | let endpoint = 171 | ContainerEndpoint::try_from(inet_endpoint).map_err(Error::from)?; 172 | data.inet.replace(endpoint); 173 | } 174 | 175 | data.deployment.replace(deployment); 176 | } 177 | 178 | let start_response = start(workdir, data.clone(), emitter).await?; 179 | 180 | Ok(match entrypoint { 181 | Some(entrypoint) => Some(run_entrypoint(start_response, entrypoint, data).await?), 182 | None => start_response, 183 | }) 184 | } 185 | .boxed_local() 186 | } 187 | 188 | fn stop<'a>(&mut self, _: &mut Context) -> EmptyResponse<'a> { 189 | stop(self.data.clone()).map_err(Into::into).boxed_local() 190 | } 191 | 192 | fn run_command<'a>( 193 | &mut self, 194 | command: server::RunProcess, 195 | mode: RuntimeMode, 196 | ctx: &mut Context, 197 | ) -> ProcessIdResponse<'a> { 198 | if let RuntimeMode::Command = mode { 199 | return async move { Err(anyhow::anyhow!("CLI `run` is not supported")) } 200 | .map_err(Into::into) 201 | .boxed_local(); 202 | } 203 | let pci_device_id = ctx.cli.runtime.pci_device.clone(); 204 | let data = self.data.clone(); 205 | async move { 206 | if let Some(pci_device_id) = pci_device_id { 207 | let mut runtime_data = data.lock().await; 208 | runtime_data.pci_device_id.replace(pci_device_id); 209 | } 210 | run_command(data.clone(), command).await 211 | } 212 | .map_err(Into::into) 213 | .boxed_local() 214 | } 215 | 216 | fn kill_command<'a>( 217 | &mut self, 218 | kill: server::KillProcess, 219 | _: &mut Context, 220 | ) -> EmptyResponse<'a> { 221 | kill_command(self.data.clone(), kill) 222 | .map_err(Into::into) 223 | .boxed_local() 224 | } 225 | 226 | fn offer<'a>(&mut self, ctx: &mut Context) -> OutputResponse<'a> { 227 | let pci_device_id = ctx.cli.runtime.pci_device.clone(); 228 | let test_config = ctx.cli.runtime.test_config.clone(); 229 | self_test::run_self_test( 230 | |self_test_result| { 231 | self_test::verify_status(self_test_result) 232 | .and_then(|self_test_result| Ok(serde_json::from_str(&self_test_result)?)) 233 | .and_then(offer) 234 | .map(|offer| serde_json::Value::to_string(&offer)) 235 | }, 236 | pci_device_id, 237 | test_config, 238 | ) 239 | .map(|_| Ok(None)) 240 | .boxed_local() 241 | } 242 | 243 | fn test<'a>(&mut self, ctx: &mut Context) -> EmptyResponse<'a> { 244 | let pci_device_id = ctx.cli.runtime.pci_device.clone(); 245 | let test_config = ctx.cli.runtime.test_config.clone(); 246 | self_test::test(pci_device_id, test_config).boxed_local() 247 | } 248 | 249 | fn join_network<'a>( 250 | &mut self, 251 | join: server::CreateNetwork, 252 | _: &mut Context, 253 | ) -> EndpointResponse<'a> { 254 | join_network(self.data.clone(), join) 255 | .map_err(Into::into) 256 | .boxed_local() 257 | } 258 | } 259 | 260 | async fn deploy(workdir: PathBuf, cli: Cli) -> anyhow::Result> { 261 | let work_dir = normalize_path(&workdir).await?; 262 | let task_packages = cli.task_package.unwrap(); 263 | let mut package_paths = Vec::new(); 264 | for path in task_packages.iter() { 265 | let path = normalize_path(&path).await?; 266 | package_paths.push(path); 267 | } 268 | let package_file = fs::File::open(&package_paths[0]).await?; 269 | let volume_override = cli 270 | .volume_override 271 | .map(|vo_str| serde_json::from_str::>(&vo_str)) 272 | .transpose()? 273 | .unwrap_or_default(); 274 | 275 | let deployment = Deployment::try_from_input( 276 | package_file, 277 | cli.cpu_cores, 278 | (cli.mem_gib * 1024.) as usize, 279 | &package_paths, 280 | volume_override, 281 | cli.hostname, 282 | ) 283 | .await 284 | .or_err("Error reading package metadata")?; 285 | 286 | for vol in &deployment.volumes { 287 | fs::create_dir_all(work_dir.join(&vol.name)).await?; 288 | } 289 | 290 | for DeploymentMount { name, mount, .. } in &deployment.mounts { 291 | let VolumeMount::Storage { 292 | size, preallocate, .. 293 | } = mount 294 | else { 295 | continue; 296 | }; 297 | 298 | let file = fs::File::create(work_dir.join(name)).await?; 299 | let qcow2 = 300 | qcow2_min::Qcow2Image::new(size.as_u64(), preallocate.unwrap_or_default().as_u64()); 301 | qcow2.write(file).await?; 302 | } 303 | 304 | fs::OpenOptions::new() 305 | .create(true) 306 | .write(true) 307 | .truncate(true) 308 | .open(work_dir.join(FILE_DEPLOYMENT)) 309 | .await? 310 | .write_all(serde_json::to_string(&deployment)?.as_bytes()) 311 | .await?; 312 | 313 | Ok(Some(serialize::json::to_value(DeployResult { 314 | valid: Ok(Default::default()), 315 | vols: deployment.volumes, 316 | start_mode: StartMode::Blocking, 317 | })?)) 318 | } 319 | 320 | pub(crate) async fn start( 321 | work_dir: PathBuf, 322 | runtime_data: Arc>, 323 | emitter: EventEmitter, 324 | ) -> anyhow::Result> { 325 | start_vmrt(work_dir, runtime_data, emitter).await 326 | } 327 | 328 | pub(crate) async fn run_command( 329 | runtime_data: Arc>, 330 | run: server::RunProcess, 331 | ) -> Result { 332 | let data = runtime_data.lock().await; 333 | let deployment = data.deployment().expect("Runtime not started"); 334 | 335 | let (uid, gid) = deployment.user; 336 | let env = deployment.env(); 337 | let cwd = deployment 338 | .config 339 | .working_dir 340 | .as_ref() 341 | .filter(|s| !s.trim().is_empty()) 342 | .map(|s| s.as_str()) 343 | .unwrap_or_else(|| DEFAULT_CWD); 344 | 345 | log::debug!("got run process: {:?}", run); 346 | log::debug!("work dir: {:?}", deployment.config.working_dir); 347 | 348 | let result = data 349 | .ga() 350 | .unwrap() 351 | .lock() 352 | .await 353 | .run_process( 354 | &run.bin, 355 | run.args 356 | .iter() 357 | .map(|s| s.as_ref()) 358 | .collect::>() 359 | .as_slice(), 360 | Some(&env[..]), 361 | uid, 362 | gid, 363 | &[ 364 | None, 365 | Some(RedirectFdType::RedirectFdPipeCyclic(0x1000)), 366 | Some(RedirectFdType::RedirectFdPipeCyclic(0x1000)), 367 | ], 368 | Some(cwd), 369 | ) 370 | .await; 371 | 372 | convert_result(result, "Running process") 373 | } 374 | 375 | async fn kill_command( 376 | runtime_data: Arc>, 377 | kill: server::KillProcess, 378 | ) -> Result<(), server::ErrorResponse> { 379 | log::debug!("got kill: {:?}", kill); 380 | // TODO: send signal 381 | let data = runtime_data.lock().await; 382 | let mutex = data.ga().unwrap(); 383 | let result = mutex.lock().await.kill(kill.pid).await; 384 | convert_result(result, &format!("Killing process {}", kill.pid))?; 385 | Ok(()) 386 | } 387 | 388 | pub(crate) async fn stop( 389 | runtime_data: Arc>, 390 | ) -> Result<(), server::ErrorResponse> { 391 | log::debug!("got shutdown"); 392 | let mut data = runtime_data.lock().await; 393 | let mut runtime = data.runtime().unwrap(); 394 | 395 | { 396 | let mutex = data.ga().unwrap(); 397 | let mut ga = mutex.lock().await; 398 | convert_result(ga.quit().await, "Sending quit")?; 399 | } 400 | 401 | runtime 402 | .kill() 403 | .await 404 | .expect("Sending kill to runtime failed"); 405 | 406 | runtime 407 | .wait() 408 | .await 409 | .expect("Waiting for runtime stop failed"); 410 | Ok(()) 411 | } 412 | 413 | fn offer(self_test_result: serde_json::Value) -> anyhow::Result { 414 | let cpu = CpuInfo::try_new()?; 415 | let model = format!( 416 | "Stepping {} Family {} Model {}", 417 | cpu.model.stepping, cpu.model.family, cpu.model.model 418 | ); 419 | 420 | let mut runtime_capabilities = vec!["inet", "vpn", "manifest-support", "start-entrypoint"]; 421 | 422 | let mut offer_template = serde_json::json!({ 423 | "properties": { 424 | "golem.inf.cpu.vendor": cpu.model.vendor, 425 | "golem.inf.cpu.brand": cpu.model.brand, 426 | "golem.inf.cpu.model": model, 427 | "golem.inf.cpu.capabilities": cpu.capabilities, 428 | }, 429 | "constraints": "" 430 | }); 431 | 432 | let properties = offer_template 433 | .get_mut("properties") 434 | .and_then(serde_json::Value::as_object_mut) 435 | .or_err("Unable to read offer template as a map")?; 436 | 437 | if is_gpu_supported(&self_test_result) { 438 | properties.insert("golem.inf".into(), self_test_result); 439 | runtime_capabilities.push("gpu"); 440 | } 441 | 442 | properties.insert( 443 | "golem.runtime.capabilities".into(), 444 | serde_json::json!(runtime_capabilities), 445 | ); 446 | 447 | Ok(offer_template) 448 | } 449 | 450 | fn is_gpu_supported(self_test_result: &serde_json::Value) -> bool { 451 | self_test_result 452 | .as_object() 453 | .and_then(|root| root.get("gpu")) 454 | .is_some() 455 | } 456 | 457 | async fn join_network( 458 | runtime_data: Arc>, 459 | join: server::CreateNetwork, 460 | ) -> Result { 461 | let hosts = join.hosts; 462 | let networks = join.networks; 463 | let iface = match server::NetworkInterface::from_i32(join.interface) { 464 | Some(iface) => iface, 465 | _ => { 466 | return Err(server::ErrorResponse::msg(format!( 467 | "invalid network interface type: {:?}", 468 | join.interface 469 | ))); 470 | } 471 | }; 472 | 473 | let data = runtime_data.lock().await; 474 | let endpoint = match iface { 475 | server::NetworkInterface::Vpn => data.vpn.as_ref(), 476 | server::NetworkInterface::Inet => data.inet.as_ref(), 477 | } 478 | .cloned() 479 | .expect("No network endpoint"); 480 | 481 | let mutex = data.ga().unwrap(); 482 | let mut ga = mutex.lock().await; 483 | convert_result(ga.add_hosts(hosts.iter()).await, "Updating network hosts")?; 484 | 485 | for net in networks { 486 | let (net_addr, net_mask) = match iface { 487 | server::NetworkInterface::Vpn => (net.addr, net.mask.clone()), 488 | server::NetworkInterface::Inet => Default::default(), 489 | }; 490 | 491 | convert_result( 492 | ga.add_address(&net.if_addr, &net.mask, iface as u16).await, 493 | &format!("Adding interface address {} {}", net.if_addr, net.gateway), 494 | )?; 495 | convert_result( 496 | ga.create_network(&net_addr, &net_mask, &net.gateway, iface as u16) 497 | .await, 498 | &format!( 499 | "Creating route via {} for {} ({:?})", 500 | net.gateway, net_addr, iface 501 | ), 502 | )?; 503 | } 504 | 505 | Ok(endpoint) 506 | } 507 | 508 | async fn normalize_path>(path: P) -> anyhow::Result { 509 | Ok(fs::canonicalize(path) 510 | .await? 511 | .components() 512 | .filter(|c| !matches!(c, Component::Prefix(_))) 513 | .collect::()) 514 | } 515 | 516 | fn convert_result( 517 | result: io::Result>, 518 | msg: &str, 519 | ) -> Result { 520 | match result { 521 | Ok(Ok(result)) => Ok(result), 522 | Ok(Err(exit_code)) => Err(server::ErrorResponse::msg(format!( 523 | "{} failed, exit code: {}", 524 | msg, exit_code 525 | ))), 526 | Err(error) => Err(server::ErrorResponse::msg(format!( 527 | "{} failed: {}", 528 | msg, error 529 | ))), 530 | } 531 | } 532 | 533 | fn extract_entrypoint(config: &ContainerConfig) -> Option> { 534 | let entrypoint = config 535 | .entrypoint 536 | .clone() 537 | .unwrap_or_default() 538 | .into_iter() 539 | .chain(config.cmd.clone().unwrap_or_default()) 540 | .collect::>(); 541 | if entrypoint.is_empty() { 542 | None 543 | } else { 544 | Some(entrypoint) 545 | } 546 | } 547 | 548 | async fn run_entrypoint( 549 | start_response: Option, 550 | entrypoint: Vec, 551 | data: Arc>, 552 | ) -> Result { 553 | log::debug!("Starting container entrypoint: {entrypoint:?}"); 554 | let mut args = entrypoint.clone(); 555 | let bin_name = Path::new(&args[0]) 556 | .file_name() 557 | .ok_or_else(|| Error::from_string("Invalid binary name for container entrypoint"))? 558 | .to_string_lossy() 559 | .to_string(); 560 | let bin = std::mem::replace(&mut args[0], bin_name); 561 | 562 | run_command( 563 | data, 564 | server::RunProcess { 565 | bin, 566 | args, 567 | ..Default::default() 568 | }, 569 | ) 570 | .await 571 | .map(|pid| { 572 | use serde_json::json; 573 | 574 | json!({ 575 | "start": start_response.unwrap_or(json!(null)), 576 | "entrypoint": json!({ "pid": json!(pid), "command": json!(entrypoint)}), 577 | }) 578 | }) 579 | } 580 | --------------------------------------------------------------------------------