├── .github └── workflows │ ├── cmake.yml │ └── unittests.yml ├── .gitignore ├── .gitmodules ├── CMakeLists.txt ├── CODE_OF_CONDUCT.md ├── CONTRIBUTOR_LICENSE_AGREEMENT.md ├── LICENSE ├── README.md ├── fuzz ├── CMakeLists.txt ├── fuzz.cpp ├── fuzzer.sh └── helpers.cpp ├── guest ├── .gdbinit ├── STREAM │ ├── .gitignore │ ├── build.sh │ ├── hugepage_stream.c │ ├── results.txt │ └── stream.c ├── build.sh ├── glibc │ ├── .gdbinit │ ├── float.c │ ├── float.gdb │ ├── glibc.c │ └── s.gdb ├── go │ ├── main.go │ └── s.gdb ├── mini │ ├── build.sh │ ├── mini.asm │ └── s.gdb ├── musl │ ├── .gdbinit │ ├── build.sh │ ├── glibc.gdb │ ├── musl.c │ ├── s.gdb │ └── simple.c ├── nim │ ├── build.sh │ └── program.nim ├── nim_storage │ ├── .gitignore │ ├── build.sh │ ├── main.c │ ├── main.nim │ └── storage.nim ├── src │ ├── api.hpp │ ├── crc32c.cpp │ ├── guest.cpp │ └── start.cpp ├── storage │ ├── .gitignore │ ├── build.sh │ ├── main.c │ └── storage.c └── tests │ ├── build.sh │ ├── cxx_test.cpp │ ├── debug.sh │ ├── remote.gdb │ └── test.c ├── lib ├── CMakeLists.txt └── tinykvm │ ├── amd64 │ ├── amd64.hpp │ ├── builtin │ │ ├── .gitignore │ │ ├── assembly.sh │ │ ├── disassembly.sh │ │ ├── interrupts.asm │ │ ├── kernel_assembly.h │ │ ├── usercode.asm │ │ ├── usercode_assembly.sh │ │ ├── vsyscall.asm │ │ └── vsyscall_assembly.sh │ ├── gdt.cpp │ ├── gdt.hpp │ ├── idt.cpp │ ├── idt.hpp │ ├── lapic.hpp │ ├── memory_layout.hpp │ ├── paging.cpp │ ├── paging.hpp │ ├── tss.cpp │ ├── tss.hpp │ ├── usercode.cpp │ ├── usercode.hpp │ ├── vdso.cpp │ └── vdso.hpp │ ├── arm64 │ └── memory_layout.hpp │ ├── common.hpp │ ├── forward.hpp │ ├── linux │ ├── fds.cpp │ ├── fds.hpp │ ├── signals.cpp │ ├── signals.hpp │ ├── system_calls.cpp │ ├── threads.cpp │ └── threads.hpp │ ├── machine.cpp │ ├── machine.hpp │ ├── machine_elf.cpp │ ├── machine_env.cpp │ ├── machine_inline.hpp │ ├── machine_utils.cpp │ ├── memory.cpp │ ├── memory.hpp │ ├── memory_bank.cpp │ ├── memory_bank.hpp │ ├── memory_maps.cpp │ ├── mmap_cache.hpp │ ├── page_streaming.cpp │ ├── page_streaming.hpp │ ├── remote.cpp │ ├── rsp_client.cpp │ ├── rsp_client.hpp │ ├── smp.cpp │ ├── smp.hpp │ ├── util │ ├── elf.h │ ├── elf.hpp │ ├── function.hpp │ ├── threadpool.h │ └── threadtask.hpp │ ├── vcpu.cpp │ ├── vcpu.hpp │ ├── vcpu_run.cpp │ └── virtual_mem.hpp ├── src ├── assert.hpp ├── bench.cpp ├── load_file.hpp ├── simple.cpp ├── storage.cpp ├── tests.cpp └── timing.hpp └── tests ├── run_unit_tests.sh └── unit ├── .gitignore ├── CMakeLists.txt ├── basic.cpp ├── codebuilder.cpp ├── crc32.hpp ├── fork.cpp ├── mmap.cpp ├── remote.cpp ├── reset.cpp ├── tegridy.cpp └── timeout.cpp /.github/workflows/cmake.yml: -------------------------------------------------------------------------------- 1 | name: CMake Build 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | jobs: 10 | build: 11 | runs-on: ubuntu-latest 12 | env: 13 | BUILD_TYPE: ${{ matrix.builtype }} 14 | 15 | strategy: 16 | matrix: 17 | compiler: [g++, clang++] 18 | buildtype: [Debug, Release] 19 | steps: 20 | - uses: actions/checkout@v2 21 | 22 | - name: Install dependencies 23 | run: sudo apt-get install -y cmake g++ clang 24 | 25 | - name: Emulator configuration 26 | working-directory: ${{github.workspace}} 27 | env: 28 | CXX: ${{ matrix.compiler }} 29 | run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} 30 | 31 | - name: Emulator build 32 | run: cmake --build ${{github.workspace}}/build 33 | -------------------------------------------------------------------------------- /.github/workflows/unittests.yml: -------------------------------------------------------------------------------- 1 | name: Unit Tests 2 | on: 3 | workflow_dispatch: 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | defaults: 9 | run: 10 | working-directory: ${{github.workspace}}/tests/unit 11 | 12 | steps: 13 | - uses: actions/checkout@v2 14 | 15 | - name: Install dependencies 16 | run: | 17 | git submodule update --init ${{github.workspace}}/tests/Catch2 18 | 19 | - name: Configure 20 | run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} 21 | 22 | - name: Build the unittests 23 | run: cmake --build ${{github.workspace}}/build 24 | 25 | - name: Run tests 26 | working-directory: ${{github.workspace}}/build 27 | run: ctest --verbose . 28 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | **/build_* 2 | **/build 3 | **/.build 4 | .vscode/ 5 | guest/interrupts 6 | guest/guest.elf 7 | guest/musl/musl 8 | guest/musl/glibc 9 | guest/musl/simple 10 | guest/glibc/glibc 11 | guest/glibc/float 12 | guest/go/go 13 | 14 | guest/tests/glibc_test 15 | guest/tests/musl_test 16 | guest/tests/cxx_test 17 | guest/mini/mini.o 18 | guest/mini/mini 19 | 20 | crash-* 21 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "tests/Catch2"] 2 | path = tests/Catch2 3 | url = https://github.com/catchorg/Catch2.git 4 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | project(tinykvm CXX) 3 | 4 | option(FLTO "Link-time optimizations" OFF) 5 | option(SANITIZE "Enable address and ub sanitizers" OFF) 6 | 7 | set(CMAKE_CXX_FLAGS "-Wall -Wextra -O2 -ggdb3") 8 | 9 | if (FLTO) 10 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -flto=thin") 11 | set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=lld") 12 | endif() 13 | if (SANITIZE) 14 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address,undefined") 15 | endif() 16 | 17 | add_subdirectory(lib tinykvm) 18 | 19 | target_compile_definitions(tinykvm PUBLIC 20 | TINYKVM_ARCH_${TINYKVM_ARCH}=1 21 | TINYKVM_ARCH="${TINYKVM_ARCH}") 22 | 23 | add_executable(bench 24 | src/bench.cpp 25 | ) 26 | target_link_libraries(bench tinykvm) 27 | 28 | add_executable(tinytest 29 | src/tests.cpp 30 | ) 31 | target_link_libraries(tinytest tinykvm) 32 | 33 | add_executable(simplekvm 34 | src/simple.cpp 35 | ) 36 | target_link_libraries(simplekvm tinykvm) 37 | 38 | add_executable(storagekvm 39 | src/storage.cpp 40 | ) 41 | target_link_libraries(storagekvm tinykvm) 42 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | In the interest of fostering an open and welcoming environment, we as 4 | contributors and maintainers pledge to make participation in our project 5 | and our community a harassment-free experience for everyone. 6 | 7 | ## Expected Behavior 8 | 9 | - Be respectful, considerate, and constructive in all communications. 10 | - Value diverse perspectives and experiences. 11 | - Focus on ideas, not individuals. 12 | - Assume good intentions and engage in positive, open dialogue. 13 | 14 | ## Unacceptable Behavior 15 | 16 | - Harassment, hate speech, or derogatory comments. 17 | - Personal attacks or demeaning remarks. 18 | - Any conduct that could be reasonably perceived as unprofessional. 19 | 20 | ## Reporting Issues 21 | 22 | If you experience or witness unacceptable behavior, please contact 23 | compliance@varnish-software.com. All reports will be handled with confidentiality 24 | and prompt attention. 25 | 26 | *This Code of Conduct is adapted from the Contributor Covenant v2.1* 27 | (see [Contributor Covenant](https://www.contributor-covenant.org/version/2/1/code_of_conduct.html) for more details). 28 | -------------------------------------------------------------------------------- /CONTRIBUTOR_LICENSE_AGREEMENT.md: -------------------------------------------------------------------------------- 1 | # TinyKVM Contributor License Agreement 2 | 3 | Thank you for your interest in contributing to TinyKVM and VMOD-TinyKVM 4 | (the "Project"). By submitting a Contribution to this Project, you agree to 5 | the following terms. 6 | 7 | ## 1. Definitions 8 | 9 | - **Contribution**: Any code, documentation, or other materials you submit. 10 | - **Project**: The TinyKVM/VMOD-TinyKVM codebase, distributed under GPL‑3.0 for 11 | open source purposes and subject to a commercial license by Varnish Software. 12 | 13 | ## 2. Grant of Rights 14 | 15 | By contributing, you grant Varnish Software and the Project a worldwide, 16 | royalty‑free, non‑exclusive, perpetual, and irrevocable license to use, 17 | reproduce, modify, and distribute your Contribution under the terms of: 18 | - The GNU General Public License (GPL‑3.0), and/or 19 | - A commercial license as determined by Varnish Software. 20 | 21 | ## 3. Representations and Warranties 22 | 23 | You represent that: 24 | - Your Contribution is your original work. 25 | - You have the right to grant the above licenses. 26 | - Your Contribution does not infringe on any third-party rights. 27 | 28 | ## 4. Patent License 29 | 30 | You grant a patent license to any patents you hold that are necessarily infringed 31 | by your Contribution. 32 | 33 | ## 5. Acknowledgement 34 | 35 | By submitting your Contribution, you acknowledge that you have read and agree 36 | to the terms of this Contributor License Agreement. 37 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | TinyKVM and VMOD-TinyKVM 2 | Copyright (C) 2025 Varnish Software 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, version 3. 7 | 8 | This program is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | GNU General Public License for more details. 12 | 13 | You should have received a copy of the GNU General Public License 14 | along with this program. If not, see . 15 | 16 | -------------------------------------------------------------------- 17 | For commercial licensing inquiries, please contact: 18 | Varnish Software - compliance@varnish-software.com 19 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | TinyKVM userspace emulator library 2 | ============== 3 | 4 | TinyKVM is a simple, slim and specialized userspace emulator library with _native performance_. 5 | 6 | TinyKVM is designed to execute regular Linux programs and also excels at request-based workloads in high-performance HTTP caches and web servers. 7 | 8 | KVM is the most robust, battle-hardened virtualization API that exists right now. It is only 40k LOC in the kernel, and it is the foundation of the modern public cloud. TinyKVM uses only a fraction of the KVM API. 9 | 10 | 11 | ## Userspace Emulation 12 | 13 | Userspace emulation means running userspace programs. You can take a regular Linux program that you just built in your terminal and run it in TinyKVM. It will have the same exact run-time, the same exact CPU features and so on. 14 | 15 | The rule-of-thumb is thus: If you can run it locally on your machine, you can run it in TinyKVM, at the same speed. 16 | 17 | But there are some differences: 18 | 19 | - TinyKVM has an execution timeout feature, allowing automatic stopping of stuck programs 20 | - TinyKVM has memory limits 21 | - TinyKVM can fork an initialized program into hundreds of pre-initialized VMs 22 | - TinyKVM can load programs while preferring hugepages, leading to performance gains 23 | 24 | 25 | ## Hardware Virtualization 26 | 27 | A very understated feature of running directly on the CPU using hardware virtualization is that you don't need fancy toolchains to build programs. This is a most surprising and welcome feature as building and working with other architectures is often a struggle. 28 | 29 | Secondly, as CPUs evolve, so does TinyKVM. It never has to be updated, yet it will continue to run at native speeds on your CPU. 30 | 31 | 32 | ## Licensing 33 | 34 | TinyKVM and VMOD-TinyKVM are released under a dual licensing model: 35 | 36 | - **Open Source License**: GPL‑3.0 (see [LICENSE](LICENSE)). 37 | - **Commercial License**: Available under terms controlled by Varnish Software. 38 | 39 | For commercial licensing inquiries, please contact: 40 | compliance@varnish-software.com. 41 | 42 | ## Contributing 43 | 44 | We welcome contributions! By submitting a pull request or other contribution, 45 | you agree to our [Contributor License Agreement](CONTRIBUTOR_LICENSE_AGREEMENT.md) 46 | and our [Code of Conduct](CODE_OF_CONDUCT.md). 47 | 48 | For details on how to contribute, please refer to this document. 49 | -------------------------------------------------------------------------------- /fuzz/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.9) 2 | project(riscv CXX) 3 | 4 | #set(FUZZER_MODE "memory,undefined" "-fsanitize-memory-track-origins") 5 | set(FUZZER_MODE "address,undefined") 6 | 7 | add_subdirectory(../lib lib) 8 | target_compile_options(tinykvm PUBLIC "-fsanitize=${FUZZER_MODE},fuzzer-no-link") 9 | target_compile_definitions(tinykvm PUBLIC FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION=1) 10 | 11 | if (NOT CMAKE_CXX_COMPILER_ID STREQUAL "Clang") 12 | message(FATAL_ERROR "libfuzzer is part of the Clang compiler suite.") 13 | endif() 14 | 15 | set(SOURCES 16 | fuzz.cpp 17 | ) 18 | 19 | function(add_fuzzer NAME MODE) 20 | add_executable(${NAME} ${SOURCES}) 21 | target_link_libraries(${NAME} PUBLIC tinykvm) 22 | set_target_properties(${NAME} PROPERTIES CXX_STANDARD 17) 23 | target_link_libraries(${NAME} PUBLIC "-fsanitize=${FUZZER_MODE},fuzzer") 24 | target_link_libraries(${NAME} PUBLIC "-fuse-ld=lld") 25 | target_compile_definitions(${NAME} PRIVATE ${MODE}=1) 26 | endfunction() 27 | 28 | add_fuzzer(elffuzzer FUZZ_ELF) 29 | -------------------------------------------------------------------------------- /fuzz/fuzz.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "helpers.cpp" 3 | 4 | static const std::vector empty; 5 | static constexpr float TIMEOUT = 5.0f; 6 | 7 | const tinykvm::MachineOptions options { 8 | }; 9 | static tinykvm::Machine* machine; 10 | 11 | // In order to be able to inspect a coredump we want to 12 | // crash on every ASAN error. 13 | extern "C" void __asan_on_error() 14 | { 15 | abort(); 16 | } 17 | extern "C" void __msan_on_error() 18 | { 19 | abort(); 20 | } 21 | 22 | static inline void fuzz_elf_loader(const uint8_t* data, size_t len) 23 | { 24 | using namespace tinykvm; 25 | const std::string_view bin {(const char*) data, len}; 26 | try { 27 | machine->reset_to(bin, options); 28 | //machine->run(TIMEOUT); 29 | } catch (const MachineException& e) { 30 | //printf(">>> Exception: %s\n", e.what()); 31 | } 32 | } 33 | 34 | extern "C" 35 | void LLVMFuzzerTestOneInput(const uint8_t* data, size_t len) 36 | { 37 | if (machine == nullptr) { 38 | tinykvm::Machine::init(); 39 | 40 | machine = new tinykvm::Machine { std::string_view{}, options }; 41 | machine->install_unhandled_syscall_handler([] (auto&, unsigned) {}); 42 | } 43 | #if defined(FUZZ_ELF) 44 | fuzz_elf_loader(data, len); 45 | #else 46 | #error "Unknown fuzzing mode" 47 | #endif 48 | } 49 | -------------------------------------------------------------------------------- /fuzz/fuzzer.sh: -------------------------------------------------------------------------------- 1 | export ASAN_OPTIONS=disable_coredump=0::unmap_shadow_on_exit=1::handle_segv=0::handle_sigfpe=0 2 | export CXX=clang++-14 3 | 4 | set -e 5 | mkdir -p .build 6 | pushd .build 7 | cmake .. -DCMAKE_BUILD_TYPE=Debug 8 | make -j4 9 | popd 10 | 11 | echo "Starting: ./build/elffuzzer -fork=1 -handle_fpe=0" 12 | ./.build/elffuzzer -max_len=8192 -handle_fpe=0 -handle_segv=0 -handle_abrt=0 $@ 13 | -------------------------------------------------------------------------------- /fuzz/helpers.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | /* It is necessary to link with libgcc when fuzzing. 4 | See llvm.org/PR30643 for details. */ 5 | __attribute__((weak, no_sanitize("undefined"))) 6 | extern "C" __int128_t 7 | __muloti4(__int128_t a, __int128_t b, int* overflow) { 8 | const int N = (int)(sizeof(__int128_t) * CHAR_BIT); 9 | const __int128_t MIN = (__int128_t)1 << (N - 1); 10 | const __int128_t MAX = ~MIN; 11 | *overflow = 0; 12 | __int128_t result = a * b; 13 | if (a == MIN) { 14 | if (b != 0 && b != 1) 15 | *overflow = 1; 16 | return result; 17 | } 18 | if (b == MIN) { 19 | if (a != 0 && a != 1) 20 | *overflow = 1; 21 | return result; 22 | } 23 | __int128_t sa = a >> (N - 1); 24 | __int128_t abs_a = (a ^ sa) - sa; 25 | __int128_t sb = b >> (N - 1); 26 | __int128_t abs_b = (b ^ sb) - sb; 27 | if (abs_a < 2 || abs_b < 2) 28 | return result; 29 | if (sa == sb) { 30 | if (abs_a > MAX / abs_b) 31 | *overflow = 1; 32 | } else { 33 | if (abs_a > MIN / -abs_b) 34 | *overflow = 1; 35 | } 36 | return result; 37 | } 38 | -------------------------------------------------------------------------------- /guest/.gdbinit: -------------------------------------------------------------------------------- 1 | file guest.elf 2 | layout next 3 | layout next 4 | target remote localhost:2159 5 | break main 6 | -------------------------------------------------------------------------------- /guest/STREAM/.gitignore: -------------------------------------------------------------------------------- 1 | stream 2 | -------------------------------------------------------------------------------- /guest/STREAM/build.sh: -------------------------------------------------------------------------------- 1 | gcc-11 -static -O3 -march=native stream.c -o stream 2 | -------------------------------------------------------------------------------- /guest/STREAM/results.txt: -------------------------------------------------------------------------------- 1 | Natively run: 2 | ------------------------------------------------------------- 3 | STREAM version $Revision: 5.10 $ 4 | ------------------------------------------------------------- 5 | This system uses 8 bytes per array element. 6 | ------------------------------------------------------------- 7 | Array size = 8000000 (elements), Offset = 0 (elements) 8 | Memory per array = 61.0 MiB (= 0.1 GiB). 9 | Total memory required = 183.1 MiB (= 0.2 GiB). 10 | Each kernel will be executed 10 times. 11 | The *best* time for each kernel (excluding the first iteration) 12 | will be used to compute the reported bandwidth. 13 | ------------------------------------------------------------- 14 | Your clock granularity/precision appears to be 7 microseconds. 15 | Each test below will take on the order of 8604 microseconds. 16 | (= 1229 clock ticks) 17 | Increase the size of the arrays if this shows that 18 | you are not getting at least 20 clock ticks per test. 19 | ------------------------------------------------------------- 20 | WARNING -- The above is only a rough guideline. 21 | For best results, please be sure you know the 22 | precision of your system timer. 23 | ------------------------------------------------------------- 24 | Function Best Rate MB/s Avg time Min time Max time 25 | Copy: 21727.7 0.006434 0.005891 0.006807 26 | Scale: 13892.0 0.009927 0.009214 0.010536 27 | Add: 14385.4 0.014087 0.013347 0.015086 28 | Triad: 14826.3 0.014147 0.012950 0.015277 29 | ------------------------------------------------------------- 30 | Solution Validates: avg error less than 1.000000e-13 on all three arrays 31 | ------------------------------------------------------------- 32 | 33 | Inside a VM: 34 | ------------------------------------------------------------- 35 | STREAM version $Revision: 5.10 $ 36 | ------------------------------------------------------------- 37 | This system uses 8 bytes per array element. 38 | ------------------------------------------------------------- 39 | Array size = 8000000 (elements), Offset = 0 (elements) 40 | Memory per array = 61.0 MiB (= 0.1 GiB). 41 | Total memory required = 183.1 MiB (= 0.2 GiB). 42 | Each kernel will be executed 10 times. 43 | The *best* time for each kernel (excluding the first iteration) 44 | will be used to compute the reported bandwidth. 45 | ------------------------------------------------------------- 46 | Your clock granularity/precision appears to be 1 microseconds. 47 | Each test below will take on the order of 8112 microseconds. 48 | (= 8112 clock ticks) 49 | Increase the size of the arrays if this shows that 50 | you are not getting at least 20 clock ticks per test. 51 | ------------------------------------------------------------- 52 | WARNING -- The above is only a rough guideline. 53 | For best results, please be sure you know the 54 | precision of your system timer. 55 | ------------------------------------------------------------- 56 | Function Best Rate MB/s Avg time Min time Max time 57 | Copy: 21783.3 0.006461 0.005876 0.007131 58 | Scale: 13986.1 0.010020 0.009152 0.010908 59 | Add: 14413.4 0.013905 0.013321 0.015244 60 | Triad: 14648.6 0.014018 0.013107 0.014823 61 | ------------------------------------------------------------- 62 | Solution Validates: avg error less than 1.000000e-13 on all three arrays 63 | ------------------------------------------------------------- 64 | 65 | 66 | Calling a function in a forked VM: 67 | ------------------------------------------------------------- 68 | STREAM version $Revision: 5.10 $ 69 | ------------------------------------------------------------- 70 | This system uses 8 bytes per array element. 71 | ------------------------------------------------------------- 72 | Array size = 8000000 (elements), Offset = 0 (elements) 73 | Memory per array = 61.0 MiB (= 0.1 GiB). 74 | Total memory required = 183.1 MiB (= 0.2 GiB). 75 | Each kernel will be executed 10 times. 76 | The *best* time for each kernel (excluding the first iteration) 77 | will be used to compute the reported bandwidth. 78 | ------------------------------------------------------------- 79 | Your clock granularity/precision appears to be 7 microseconds. 80 | Each test below will take on the order of 7499 microseconds. 81 | (= 1071 clock ticks) 82 | Increase the size of the arrays if this shows that 83 | you are not getting at least 20 clock ticks per test. 84 | ------------------------------------------------------------- 85 | WARNING -- The above is only a rough guideline. 86 | For best results, please be sure you know the 87 | precision of your system timer. 88 | ------------------------------------------------------------- 89 | Function Best Rate MB/s Avg time Min time Max time 90 | Copy: 21473.1 0.007722 0.005961 0.008135 91 | Scale: 13848.3 0.012373 0.009243 0.012904 92 | Add: 14665.4 0.016909 0.013092 0.016773 93 | Triad: 14174.9 0.017208 0.013545 0.016800 94 | ------------------------------------------------------------- 95 | Solution Validates: avg error less than 1.000000e-13 on all three arrays 96 | ------------------------------------------------------------- 97 | -------------------------------------------------------------------------------- /guest/build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | WARN="-Wall -Wextra" 3 | CUSTOM="-static -ffreestanding -nostdlib -fno-exceptions -fno-rtti" 4 | COMMON="-O2 -ggdb3 -march=native -fno-omit-frame-pointer $CUSTOM" 5 | FILES="src/guest.cpp src/crc32c.cpp src/start.cpp" 6 | SYMS="-Wl,--defsym=syscall_entry=0x2000" 7 | 8 | g++ $WARN $COMMON -Ttext=201000 $SYMS $FILES -o guest.elf 9 | -------------------------------------------------------------------------------- /guest/glibc/.gdbinit: -------------------------------------------------------------------------------- 1 | file glinux 2 | layout next 3 | target remote localhost:2159 4 | break main 5 | -------------------------------------------------------------------------------- /guest/glibc/float.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | __attribute__((noinline)) 4 | static void call_other_function() 5 | { 6 | } 7 | 8 | __attribute__((noinline)) 9 | static void takes_float(float f) 10 | { 11 | call_other_function(); 12 | printf("Float has value: %f\n", f); 13 | } 14 | 15 | int main() 16 | { 17 | takes_float(111.0f); 18 | } 19 | -------------------------------------------------------------------------------- /guest/glibc/float.gdb: -------------------------------------------------------------------------------- 1 | file float 2 | layout prev 3 | layout next 4 | target remote localhost:2159 5 | 6 | set debug remote 1 7 | 8 | break takes_float 9 | continue 10 | -------------------------------------------------------------------------------- /guest/glibc/glibc.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | static void test_threads(); 6 | extern "C" int gettid(); 7 | 8 | static int threads_test_suite_ok = 0; 9 | int main() 10 | { 11 | char* test = (char *)malloc(14); 12 | strcpy(test, "Hello World!\n"); 13 | printf("%.*s", 13, test); 14 | 15 | test_threads(); 16 | return 0; 17 | } 18 | 19 | extern "C" __attribute__((used)) 20 | void test() 21 | { 22 | /* Verify that the threads test-suite passed */ 23 | assert(threads_test_suite_ok == 1); 24 | } 25 | 26 | #include 27 | #include 28 | #include 29 | #include // C++ threads 30 | #include 31 | 32 | struct testdata 33 | { 34 | int depth = 0; 35 | const int max_depth = 10; 36 | std::vector threads; 37 | }; 38 | static pthread_mutex_t mtx; 39 | 40 | extern "C" { 41 | static void* thread_function1(void* data) 42 | { 43 | printf("Inside thread function1, x = %d\n", *(int*) data); 44 | thread_local int test = 2021; 45 | printf("test @ %p, test = %d\n", &test, test); 46 | assert(test == 2021); 47 | return NULL; 48 | } 49 | static void* thread_function2(void* data) 50 | { 51 | printf("Inside thread function2, x = %d\n", *(int*) data); 52 | thread_local int test = 2022; 53 | assert(test == 2022); 54 | pthread_mutex_lock(&mtx); 55 | 56 | printf("Yielding from thread2, expecting to be returned to main thread\n"); 57 | sched_yield(); 58 | printf("Returned to thread2, expecting to exit to after main thread yield\n"); 59 | 60 | pthread_mutex_unlock(&mtx); 61 | pthread_exit(NULL); 62 | } 63 | static void* recursive_function(void* tdata) 64 | { 65 | auto* data = (testdata*) tdata; 66 | data->depth++; 67 | printf("%d: Thread depth %d / %d\n", 68 | gettid(), data->depth, data->max_depth); 69 | 70 | if (data->depth < data->max_depth) 71 | { 72 | pthread_t t; 73 | int res = pthread_create(&t, NULL, recursive_function, data); 74 | if (res < 0) { 75 | printf("Failed to create thread!\n"); 76 | return NULL; 77 | } 78 | data->threads.push_back(t); 79 | } 80 | printf("%d: Thread yielding %d / %d\n", 81 | gettid(), data->depth, data->max_depth); 82 | sched_yield(); 83 | 84 | printf("%d: Thread exiting %d / %d\n", 85 | gettid(), data->depth, data->max_depth); 86 | data->depth--; 87 | return NULL; 88 | } 89 | } 90 | 91 | void test_threads() 92 | { 93 | int x = 666; 94 | pthread_t t1; 95 | pthread_t t2; 96 | int res; 97 | pthread_mutex_init(&mtx, NULL); 98 | 99 | //printf("*** Testing pthread_create and sched_yield...\n"); 100 | res = pthread_create(&t1, NULL, thread_function1, &x); 101 | if (res < 0) { 102 | printf("Failed to create thread!\n"); 103 | return; 104 | } 105 | pthread_join(t1, NULL); 106 | 107 | res = pthread_create(&t2, NULL, thread_function2, &x); 108 | if (res < 0) { 109 | printf("Failed to create thread!\n"); 110 | return; 111 | } 112 | 113 | printf("Yielding from main thread, expecting to return to thread2\n"); 114 | // Ride back to thread2 using contested lock 115 | pthread_mutex_lock(&mtx); 116 | pthread_mutex_unlock(&mtx); 117 | printf("After yielding from main thread, looking good!\n"); 118 | // remove the thread 119 | pthread_join(t2, NULL); 120 | 121 | printf("*** Now testing recursive threads...\n"); 122 | static testdata rdata; 123 | recursive_function(&rdata); 124 | // now we have to yield until all the detached children also exit 125 | printf("*** Yielding until all children are dead!\n"); 126 | while (rdata.depth > 0) sched_yield(); 127 | 128 | printf("*** Joining until all children are freed!\n"); 129 | for (auto pt : rdata.threads) pthread_join(pt, NULL); 130 | 131 | auto* cpp_thread = new std::thread( 132 | [] (int a, long long b, std::string c) -> void { 133 | printf("Hello from a C++ thread\n"); 134 | assert(a == 1); 135 | assert(b == 2LL); 136 | assert(c == std::string("test")); 137 | printf("C++ thread arguments are OK, yielding...\n"); 138 | std::this_thread::yield(); 139 | printf("C++ thread exiting...\n"); 140 | }, 141 | 1, 2L, std::string("test")); 142 | printf("Returned to main. Yielding back...\n"); 143 | std::this_thread::yield(); 144 | printf("Returned to main. Joining the C++ thread\n"); 145 | cpp_thread->join(); 146 | printf("Deleting the C++ thread\n"); 147 | delete cpp_thread; 148 | 149 | printf("SUCCESS\n"); 150 | threads_test_suite_ok = 1; 151 | } 152 | -------------------------------------------------------------------------------- /guest/glibc/s.gdb: -------------------------------------------------------------------------------- 1 | file glibc 2 | layout next 3 | target remote localhost:2159 4 | continue 5 | -------------------------------------------------------------------------------- /guest/go/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | import "fmt" 3 | 4 | func main() { 5 | fmt.Println("hello world") 6 | } 7 | -------------------------------------------------------------------------------- /guest/go/s.gdb: -------------------------------------------------------------------------------- 1 | file go 2 | #break proc.go:1208 3 | #break asm_amd64.s:272 4 | #break proc.go:113 5 | #break asm_amd64.s:368 6 | break proc.go:4297 7 | layout next 8 | target remote localhost:2159 9 | -------------------------------------------------------------------------------- /guest/mini/build.sh: -------------------------------------------------------------------------------- 1 | nasm -f elf64 mini.asm -o mini.o 2 | gcc -static -Wall -nostartfiles -Ttext=0x200000 -Wl,-utest mini.o -o mini 3 | -------------------------------------------------------------------------------- /guest/mini/mini.asm: -------------------------------------------------------------------------------- 1 | [BITS 64] 2 | global _start:function 3 | global test:function 4 | global rexit:function 5 | 6 | SECTION .text 7 | 8 | ALIGN 0x8 9 | _start: 10 | mov di, 0x1337 11 | 12 | mov rsp, 0x1ff000 13 | push rdi 14 | 15 | mov ax, 60 ;; exit 16 | syscall 17 | 18 | ALIGN 0x8 19 | test: 20 | ret 21 | 22 | ALIGN 0x8 23 | rexit: 24 | mov rdi, rax 25 | mov rax, 60 ;; exit 26 | syscall 27 | -------------------------------------------------------------------------------- /guest/mini/s.gdb: -------------------------------------------------------------------------------- 1 | file mini 2 | layout next 3 | layout next 4 | target remote localhost:2159 5 | -------------------------------------------------------------------------------- /guest/musl/.gdbinit: -------------------------------------------------------------------------------- 1 | file musl 2 | layout next 3 | target remote localhost:2159 4 | #break __init_tls 5 | break main 6 | -------------------------------------------------------------------------------- /guest/musl/build.sh: -------------------------------------------------------------------------------- 1 | musl-gcc -static -O2 -ggdb3 musl.c -o musl 2 | musl-gcc -static -O2 -ggdb3 simple.c -o simple 3 | gcc -static -O2 -ggdb3 musl.c -o glibc 4 | -------------------------------------------------------------------------------- /guest/musl/glibc.gdb: -------------------------------------------------------------------------------- 1 | file glibc 2 | layout next 3 | layout next 4 | target remote localhost:2159 5 | -------------------------------------------------------------------------------- /guest/musl/musl.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | static void test_threads(); 5 | 6 | static long nprimes = 0; 7 | 8 | __attribute__((noinline)) 9 | void test_ud2() 10 | { 11 | asm("ud2"); 12 | } 13 | 14 | int main(int argc, char** argv) 15 | { 16 | printf("Hello musl World!\n"); 17 | 18 | static const int N = 1000000; 19 | char prime[N]; 20 | memset(prime, 1, sizeof(prime)); 21 | for (long n = 2; n < N; n++) 22 | { 23 | if (prime[n]) { 24 | nprimes += 1; 25 | for (long i = n*n; i < N; i += n) 26 | prime[i] = 0; 27 | } 28 | } 29 | 30 | test_threads(); 31 | //test_ud2(); 32 | return 0; 33 | } 34 | 35 | #include 36 | static int t = 0; 37 | 38 | __attribute__((used)) 39 | void bench() 40 | { 41 | //assert(t == 0); 42 | //t = 1; 43 | assert(nprimes == 78498); 44 | } 45 | 46 | __attribute__((used)) 47 | void bench_write() 48 | { 49 | assert(t == 0); 50 | t = 1; 51 | assert(nprimes == 78498); 52 | } 53 | 54 | asm(".global one_vmexit\n" \ 55 | ".type one_vmexit, function\n" \ 56 | "one_vmexit:\n" \ 57 | " out %ax, $1\n" \ 58 | " ret\n"); 59 | extern void one_vmexit(); 60 | 61 | __attribute__((used)) 62 | void bench_vmexits(int count) 63 | { 64 | while (count--) one_vmexit(); 65 | } 66 | 67 | #include 68 | #include 69 | 70 | static void* thread_function1(void* data) 71 | { 72 | printf("Inside thread function1, x = %d\n", *(int*) data); 73 | static __thread int test = 2021; 74 | printf("test @ %p, test = %d\n", &test, test); 75 | assert(test == 2021); 76 | fflush(stdout); 77 | pthread_exit(NULL); 78 | } 79 | static void* thread_function2(void* data) 80 | { 81 | printf("Inside thread function2, x = %d\n", *(int*) data); 82 | static __thread int test = 2022; 83 | assert(test == 2022); 84 | 85 | printf("Yielding from thread2, expecting to be returned to main thread\n"); 86 | sched_yield(); 87 | printf("Returned to thread2, expecting to exit to after main thread yield\n"); 88 | 89 | pthread_exit(NULL); 90 | } 91 | 92 | void test_threads() 93 | { 94 | int x = 666; 95 | pthread_t t1; 96 | pthread_t t2; 97 | int res; 98 | 99 | printf("*** Testing pthread_create and sched_yield...\n"); 100 | res = pthread_create(&t1, NULL, thread_function1, &x); 101 | if (res < 0) { 102 | printf("Failed to create thread!\n"); 103 | return; 104 | } 105 | pthread_join(t1, NULL); 106 | 107 | res = pthread_create(&t2, NULL, thread_function2, &x); 108 | if (res < 0) { 109 | printf("Failed to create thread!\n"); 110 | return; 111 | } 112 | 113 | printf("Yielding from main thread, expecting to return to thread2\n"); 114 | // return back to finish thread2 115 | sched_yield(); 116 | printf("After yielding from main thread, looking good!\n"); 117 | // remove the thread 118 | pthread_join(t2, NULL); 119 | 120 | printf("SUCCESS\n"); 121 | } 122 | -------------------------------------------------------------------------------- /guest/musl/s.gdb: -------------------------------------------------------------------------------- 1 | set debuginfod enabled on 2 | file musl 3 | layout next 4 | layout next 5 | target remote localhost:2159 6 | -------------------------------------------------------------------------------- /guest/musl/simple.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main() 4 | { 5 | return 0; 6 | } 7 | 8 | static int t = 0; 9 | 10 | __attribute__((used)) 11 | void bench() 12 | { 13 | //assert(t == 0); 14 | //t = 1; 15 | } 16 | 17 | __attribute__((used)) 18 | void bench_write() 19 | { 20 | assert(t == 0); 21 | t = 1; 22 | } 23 | 24 | asm(".global one_vmexit\n" \ 25 | ".type one_vmexit, function\n" \ 26 | "one_vmexit:\n" \ 27 | " out %ax, $1\n" \ 28 | " ret\n"); 29 | extern void one_vmexit(); 30 | 31 | __attribute__((used)) 32 | void bench_vmexits(int count) 33 | { 34 | while (count--) one_vmexit(); 35 | } 36 | -------------------------------------------------------------------------------- /guest/nim/build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | -------------------------------------------------------------------------------- /guest/nim/program.nim: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/varnish/tinykvm/8e9d2497a2c6f84cb539af00aaa69af146af309e/guest/nim/program.nim -------------------------------------------------------------------------------- /guest/nim_storage/.gitignore: -------------------------------------------------------------------------------- 1 | *.syms 2 | storage 3 | main 4 | *_nimcache/ 5 | -------------------------------------------------------------------------------- /guest/nim_storage/build.sh: -------------------------------------------------------------------------------- 1 | NIM_LIBS=`whereis nim` 2 | NIM_LIBS="${NIM_LIBS##*: }" 3 | NIM_LIBS="${NIM_LIBS/bin*/lib}" 4 | echo ">>> Nim libs: $NIM_LIBS" 5 | 6 | WARN="-Wno-discarded-qualifiers" 7 | 8 | set -ev 9 | rm -rf $PWD/storage_nimcache 10 | rm -rf $PWD/main_nimcache 11 | 12 | nim c --nimcache:$PWD/storage_nimcache --colors:on --os:linux --mm:arc --noMain --app:lib -d:release -c storage.nim 13 | musl-gcc -static -O2 -DSTORAGE=1 -Wl,-Ttext-segment=0x44000000 $WARN -I$NIM_LIBS main.c storage_nimcache/*.c -o storage 14 | 15 | objcopy -w --extract-symbol --strip-symbol=!remote* --strip-symbol=* storage storage.syms 16 | 17 | nim c --nimcache:$PWD/main_nimcache --colors:on --os:linux --mm:arc --noMain --app:lib -d:release -c main.nim 18 | musl-gcc -static -O2 -Wl,--just-symbols=storage.syms $WARN -I$NIM_LIBS main.c main_nimcache/*.c -o main 19 | -------------------------------------------------------------------------------- /guest/nim_storage/main.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #define ARCH_SET_FS 0x1002 4 | #define ARCH_GET_FS 0x1003 5 | extern void arch_prctl(unsigned, ...); 6 | extern void NimMain(); 7 | extern long write(int, const void*, size_t); 8 | extern void _exit(int); 9 | 10 | static long stored_fs; 11 | 12 | static inline long get_fs() 13 | { 14 | long fs; 15 | arch_prctl(ARCH_GET_FS, &fs); 16 | return fs; 17 | } 18 | 19 | static int safeprint(const char* fmt, ...) 20 | { 21 | char buffer[4096]; 22 | 23 | va_list va; 24 | va_start(va, fmt); 25 | int len = vsnprintf(buffer, sizeof(buffer), fmt, va); 26 | va_end(va); 27 | 28 | return write(1, buffer, len); 29 | } 30 | 31 | void restore_fs() 32 | { 33 | // XXX: Don't try to print here. WONT WORK! 34 | long old_fs = get_fs(); 35 | arch_prctl(ARCH_SET_FS, stored_fs); 36 | safeprint("Restored FS 0x%lX\n", stored_fs); 37 | stored_fs = old_fs; 38 | } 39 | 40 | void quick_exit(int code) 41 | { 42 | stored_fs = get_fs(); 43 | _exit(code); 44 | } 45 | 46 | int main() 47 | { 48 | // Provoke proper stdio 49 | fflush(stdout); 50 | stored_fs = get_fs(); 51 | 52 | NimMain(); 53 | _exit(0); 54 | } 55 | -------------------------------------------------------------------------------- /guest/nim_storage/main.nim: -------------------------------------------------------------------------------- 1 | proc quick_exit(code: int) {.importc.} 2 | proc remote_calc(v: int): int {.importc.} 3 | proc remote_string(): string {.importc.} 4 | proc do_calculation() {.cdecl, exportc.} 5 | import json 6 | 7 | var j = %* { 8 | "name": "Hello", 9 | "email": "World", 10 | "books": ["Foundation"] 11 | } 12 | 13 | # Executed by fork of master VM 14 | proc do_calculation() = 15 | echo "Hello Nim World!\n" & j.pretty() 16 | echo "Remote calculation of 21 is " & $remote_calc(21) 17 | 18 | # Executed by master VM 19 | echo "Remote calculation of 21 is " & $remote_calc(21) 20 | echo "Remote string is " & remote_string() 21 | quick_exit(remote_calc(21)) 22 | -------------------------------------------------------------------------------- /guest/nim_storage/storage.nim: -------------------------------------------------------------------------------- 1 | proc quick_exit(code: int) {.importc.} 2 | proc remote_calc(v: int): int {.cdecl, exportc.} 3 | proc remote_string(): string {.cdecl, exportc.} 4 | import json 5 | 6 | var jj = %* { 7 | "name": "Hello", 8 | "email": "World", 9 | "books": ["Foundation"] 10 | } 11 | 12 | proc remote_calc(v: int): int = 13 | echo "Nim calculation!" 14 | return v * 2 15 | 16 | proc remote_string(): string = 17 | return jj.pretty() 18 | 19 | echo "Hello Nim Storage World!\njj: " & jj.pretty() 20 | quick_exit(0) 21 | -------------------------------------------------------------------------------- /guest/src/api.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | 5 | extern "C" long syscall(int scall, ...); 6 | extern "C" long native_syscall(int scall, ...); 7 | extern "C" __attribute__((noreturn)) void exit(int code) __THROW; 8 | 9 | #define PUBLIC(x) extern "C" __attribute__((used)) x 10 | 11 | 12 | extern uint32_t crc32c_sse42(const uint8_t* buffer, size_t len); 13 | 14 | inline uint32_t crc32c_sse42(const char* buffer, size_t len) { 15 | return crc32c_sse42((const uint8_t *)buffer, len); 16 | } 17 | -------------------------------------------------------------------------------- /guest/src/crc32c.cpp: -------------------------------------------------------------------------------- 1 | #include "api.hpp" 2 | 3 | #include 4 | 5 | inline bool ____is__aligned(const void* buffer, const int align) noexcept { 6 | return (((uintptr_t) buffer) & (align-1)) == 0; 7 | } 8 | 9 | uint32_t crc32c_sse42(const uint8_t* buffer, size_t len) 10 | { 11 | uint32_t hash = 0xFFFFFFFF; 12 | // 8-bits until 4-byte aligned 13 | while (!____is__aligned(buffer, 4) && len > 0) { 14 | hash = _mm_crc32_u8(hash, *buffer); buffer++; len--; 15 | } 16 | // 16 bytes at a time 17 | while (len >= 16) { 18 | hash = _mm_crc32_u32(hash, *(uint32_t*) (buffer + 0)); 19 | hash = _mm_crc32_u32(hash, *(uint32_t*) (buffer + 4)); 20 | hash = _mm_crc32_u32(hash, *(uint32_t*) (buffer + 8)); 21 | hash = _mm_crc32_u32(hash, *(uint32_t*) (buffer + 12)); 22 | buffer += 16; len -= 16; 23 | } 24 | // 4 bytes at a time 25 | while (len >= 4) { 26 | hash = _mm_crc32_u32(hash, *(uint32_t*) buffer); 27 | buffer += 4; len -= 4; 28 | } 29 | // remaining bytes 30 | if (len & 2) { 31 | hash = _mm_crc32_u16(hash, *(uint16_t*) buffer); 32 | buffer += 2; 33 | } 34 | if (len & 1) { 35 | hash = _mm_crc32_u8(hash, *buffer); 36 | } 37 | return hash ^ 0xFFFFFFFF; 38 | } 39 | -------------------------------------------------------------------------------- /guest/src/guest.cpp: -------------------------------------------------------------------------------- 1 | #include "api.hpp" 2 | 3 | size_t strlen(const char *str) 4 | { 5 | const char *s = str; 6 | while (*s) s++; 7 | return s - str; 8 | } 9 | 10 | inline void kprint(const char* string, size_t len) { 11 | syscall(1, string, len); 12 | } 13 | inline void kprint(const char* string) { 14 | kprint(string, strlen(string)); 15 | } 16 | 17 | int main(int argc, char** argv) 18 | { 19 | /* for (int i = 0; i < argc; i++) { 20 | kprint(argv[i]); 21 | }*/ 22 | 23 | //asm("hlt"); 24 | //syscall(158, 0x1003, 0x5678); 25 | //native_syscall(158, 0x1003, 0x5678); 26 | 27 | return 0x123; 28 | } 29 | 30 | struct Data { 31 | char buffer[128]; 32 | size_t len; 33 | }; 34 | 35 | #include 36 | PUBLIC(uint32_t empty(const Data& data)) 37 | { 38 | volatile __m256i xmm0; 39 | xmm0 = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); 40 | 41 | kprint(data.buffer, data.len); 42 | 43 | return crc32c_sse42(data.buffer, data.len);; 44 | } 45 | -------------------------------------------------------------------------------- /guest/src/start.cpp: -------------------------------------------------------------------------------- 1 | #include "api.hpp" 2 | 3 | asm(".global syscall\n" 4 | "syscall:\n" 5 | " add $0xffffa000, %edi\n" 6 | " movl $0, (%rdi)\n" 7 | " ret\n"); 8 | 9 | asm(".global native_syscall\n" 10 | "native_syscall:\n" 11 | " mov %rdi, %rax\n" 12 | " mov %rsi, %rdi\n" 13 | " mov %rdx, %rsi\n" 14 | " mov %rcx, %rdx\n" 15 | " syscall\n" 16 | " ret\n"); 17 | 18 | asm(".global rexit\n" 19 | "rexit:\n" 20 | " mov %rax, %rdi\n" 21 | " out %ax, $60\n"); 22 | 23 | asm(".global _start\n" 24 | "_start:\n" 25 | " xor %ebp, %ebp\n" 26 | " pop %rdi\n" 27 | " mov %rsp, %rsi\n" 28 | " call libc_start\n" 29 | " jmp rexit\n"); 30 | 31 | 32 | extern int main(int, char**); 33 | 34 | extern "C" 35 | int libc_start(int argc, char** argv) 36 | { 37 | /* Global constructors */ 38 | extern void(*__init_array_start [])(); 39 | extern void(*__init_array_end [])(); 40 | const int count = __init_array_end - __init_array_start; 41 | for (int i = 0; i < count; i++) { 42 | __init_array_start[i](); 43 | } 44 | 45 | return main(argc, argv); 46 | } 47 | 48 | extern "C" __attribute__((noreturn)) void exit(int code) __THROW { 49 | syscall(0, code); 50 | __builtin_unreachable(); 51 | } 52 | -------------------------------------------------------------------------------- /guest/storage/.gitignore: -------------------------------------------------------------------------------- 1 | *.syms 2 | storage 3 | main 4 | -------------------------------------------------------------------------------- /guest/storage/build.sh: -------------------------------------------------------------------------------- 1 | set -v 2 | gcc-12 -static -O2 -Wl,-Ttext-segment=0x44000000 storage.c -o storage 3 | 4 | objcopy -w --extract-symbol --strip-symbol=!remote* --strip-symbol=* storage storage.syms 5 | gcc-12 -static -O2 -Wl,--just-symbols=storage.syms main.c -o main 6 | -------------------------------------------------------------------------------- /guest/storage/main.c: -------------------------------------------------------------------------------- 1 | #include 2 | extern int remote_function(int(*callback)(int), int value); 3 | 4 | static int double_int(int value) 5 | { 6 | return value * 2; 7 | } 8 | 9 | int main() 10 | { 11 | printf("Jumping to %p\n", &remote_function); 12 | fflush(stdout); 13 | return remote_function(double_int, 21); 14 | } 15 | 16 | int do_calculation(int value) 17 | { 18 | return remote_function(double_int, value); 19 | } 20 | 21 | int simple_calculation(int value) 22 | { 23 | return value; 24 | } 25 | -------------------------------------------------------------------------------- /guest/storage/storage.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | extern int remote_function(int (*arg)(int), int value) 4 | { 5 | return arg(value); 6 | } 7 | 8 | int main() 9 | { 10 | printf("Hello from Storage!\n"); 11 | return 0; 12 | } 13 | -------------------------------------------------------------------------------- /guest/tests/build.sh: -------------------------------------------------------------------------------- 1 | musl-gcc -static -O0 -ggdb3 test.c -o musl_test 2 | gcc -static -O0 -ggdb3 test.c -o glibc_test 3 | #g++ -static -O2 cxx_test.cpp -o cxx_test 4 | -------------------------------------------------------------------------------- /guest/tests/cxx_test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | static long nprimes = 0; 8 | 9 | int main(int argc, char** argv) 10 | { 11 | char* test = (char *)malloc(14); 12 | strcpy(test, argv[1]); 13 | printf("%.*s\n", 13, test); 14 | free(test); 15 | 16 | static const int N = 1000000; 17 | char prime[N]; 18 | memset(prime, 1, sizeof(prime)); 19 | for (long n = 2; n < N; n++) 20 | { 21 | if (prime[n]) { 22 | nprimes += 1; 23 | for (long i = n*n; i < N; i += n) 24 | prime[i] = 0; 25 | } 26 | } 27 | return 666; 28 | } 29 | 30 | extern "C" __attribute__((used)) 31 | int test_return() 32 | { 33 | return 666; 34 | } 35 | 36 | extern "C" __attribute__((used)) 37 | void test_ud2() 38 | { 39 | asm("ud2"); 40 | } 41 | 42 | extern "C" __attribute__((used)) 43 | int test_read() 44 | { 45 | assert(nprimes == 78498); 46 | return 200; 47 | } 48 | 49 | static int t = 0; 50 | 51 | extern "C" __attribute__((used)) 52 | void test_write() 53 | { 54 | asm("" ::: "memory"); 55 | assert(t == 0); 56 | asm("" ::: "memory"); 57 | t = 1; 58 | asm("" ::: "memory"); 59 | assert(t == 1); 60 | } 61 | 62 | static int cow = 0; 63 | 64 | extern "C" __attribute__((used)) 65 | int test_copy_on_write() 66 | { 67 | assert(cow == 0); 68 | cow = 1; 69 | return 666; 70 | } 71 | 72 | extern "C" __attribute__((used)) 73 | long test_syscall() 74 | { 75 | register long status asm("rdi") = 555; 76 | long ret = 60; 77 | asm("syscall" : "+a"(ret) : "r"(status) : "rcx", "r11", "memory"); 78 | return ret; 79 | } 80 | 81 | extern "C" __attribute__((used)) 82 | long test_malloc() 83 | { 84 | int *p = (int *)malloc(1024 * 1024 * 1); 85 | *p = 44; 86 | return (long)p; 87 | } 88 | 89 | 90 | #include 91 | #include 92 | #include 93 | #include "/home/gonzo/git/vmprograms/examples/lodepng/lodepng.h" 94 | 95 | inline constexpr uint32_t bgr24(uint32_t r, uint32_t g, uint32_t b) { 96 | return r | (g << 8) | (b << 16) | (255 << 24); 97 | } 98 | 99 | static constexpr std::array color_mapping { 100 | bgr24(66, 30, 15), 101 | bgr24(25, 7, 26), 102 | bgr24(9, 1, 47), 103 | bgr24(4, 4, 73), 104 | bgr24(0, 7, 100), 105 | bgr24(12, 44, 138), 106 | bgr24(24, 82, 177), 107 | bgr24(57, 125, 209), 108 | bgr24(134, 181, 229), 109 | bgr24(211, 236, 248), 110 | bgr24(241, 233, 191), 111 | bgr24(248, 201, 95), 112 | bgr24(255, 170, 0), 113 | bgr24(204, 128, 0), 114 | bgr24(153, 87, 0), 115 | bgr24(106, 52, 3), 116 | }; 117 | 118 | inline void encode_color(uint32_t& px, int count, int max_count) 119 | { 120 | px = color_mapping[count & 15]; 121 | } 122 | 123 | using fractalf_t = float; 124 | 125 | // Function to draw mandelbrot set 126 | template 127 | __attribute__((optimize("unroll-loops"))) 128 | std::array 129 | fractal(fractalf_t left, fractalf_t top, fractalf_t xside, fractalf_t yside) 130 | { 131 | std::array bitmap {}; 132 | 133 | // setting up the xscale and yscale 134 | const fractalf_t xscale = xside / DimX; 135 | const fractalf_t yscale = yside / DimY; 136 | 137 | // scanning every point in that rectangular area. 138 | // Each point represents a Complex number (x + yi). 139 | // Iterate that complex number 140 | for (int y = 0; y < DimY / 2; y++) 141 | #pragma GCC unroll(8) 142 | for (int x = 0; x < DimX; x++) 143 | { 144 | fractalf_t c_real = x * xscale + left; 145 | fractalf_t c_imag = y * yscale + top; 146 | fractalf_t z_real = 0; 147 | fractalf_t z_imag = 0; 148 | int count = 0; 149 | 150 | // Calculate whether c(c_real + c_imag) belongs 151 | // to the Mandelbrot set or not and draw a pixel 152 | // at coordinates (x, y) accordingly 153 | // If you reach the Maximum number of iterations 154 | // and If the distance from the origin is 155 | // greater than 2 exit the loop 156 | #pragma GCC unroll 4 157 | while ((z_real * z_real + z_imag * z_imag < 4) 158 | && (count < MaxCount)) 159 | { 160 | // Calculate Mandelbrot function 161 | // z = z*z + c where z is a complex number 162 | fractalf_t tempx = 163 | z_real * z_real - z_imag * z_imag + c_real; 164 | z_imag = 2 * z_real * z_imag + c_imag; 165 | z_real = tempx; 166 | count++; 167 | } 168 | 169 | encode_color(bitmap[x + y * DimX], count, MaxCount); 170 | } 171 | for (int y = 0; y < DimY / 2; y++) { 172 | memcpy(&bitmap[(DimY-1 - y) * DimX], &bitmap[y * DimX], 4 * DimX); 173 | } 174 | return bitmap; 175 | } 176 | 177 | asm(".global backend_response\n" \ 178 | ".type backend_response, function\n" \ 179 | "backend_response:\n" \ 180 | " mov $0xFFFF, %eax\n" \ 181 | " out %eax, $0\n"); 182 | 183 | extern "C" void __attribute__((noreturn)) 184 | backend_response(const void *t, uint64_t, const void *c, uint64_t); 185 | 186 | extern "C" __attribute__((used)) 187 | long test_expensive() 188 | { 189 | constexpr int counter = 0; 190 | constexpr size_t width = 512; 191 | constexpr size_t height = 512; 192 | 193 | const fractalf_t factor = powf(2.0, counter * -0.1); 194 | const fractalf_t x1 = -1.5; 195 | const fractalf_t x2 = 2.0 * factor; 196 | const fractalf_t y1 = -1.0 * factor; 197 | const fractalf_t y2 = 2.0 * factor; 198 | 199 | auto bitmap = fractal (x1, y1, x2, y2); 200 | auto* data = (const uint8_t *)bitmap.data(); 201 | 202 | std::vector png; 203 | lodepng::encode(png, data, width, height); 204 | 205 | const char ctype[] = "image/png"; 206 | backend_response(ctype, sizeof(ctype)-1, png.data(), png.size()); 207 | } 208 | -------------------------------------------------------------------------------- /guest/tests/debug.sh: -------------------------------------------------------------------------------- 1 | gdb -x remote.gdb 2 | -------------------------------------------------------------------------------- /guest/tests/remote.gdb: -------------------------------------------------------------------------------- 1 | file glibc_test 2 | target remote localhost:2159 3 | layout next 4 | layout next 5 | #set debug remote 1 6 | break test.c:32 7 | cont 8 | -------------------------------------------------------------------------------- /guest/tests/test.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | static long nprimes = 0; 8 | 9 | int main(int argc, char** argv) 10 | { 11 | char* test = (char *)malloc(14); 12 | strcpy(test, argv[1]); 13 | printf("%.*s\n", 13, test); 14 | free(test); 15 | 16 | static const int N = 1000000; 17 | char prime[N]; 18 | memset(prime, 1, sizeof(prime)); 19 | for (long n = 2; n < N; n++) 20 | { 21 | if (prime[n]) { 22 | nprimes += 1; 23 | for (long i = n*n; i < N; i += n) 24 | prime[i] = 0; 25 | } 26 | } 27 | return 666; 28 | } 29 | 30 | __attribute__((used)) 31 | int test_return() 32 | { 33 | return 666; 34 | } 35 | 36 | __attribute__((used)) 37 | void test_ud2() 38 | { 39 | asm("ud2"); 40 | } 41 | 42 | __attribute__((used)) 43 | int test_read() 44 | { 45 | assert(nprimes == 78498); 46 | return 200; 47 | } 48 | 49 | static int t = 0; 50 | 51 | __attribute__((used)) 52 | void test_write() 53 | { 54 | asm("" ::: "memory"); 55 | assert(t == 0); 56 | asm("" ::: "memory"); 57 | t = 1; 58 | asm("" ::: "memory"); 59 | assert(t == 1); 60 | } 61 | 62 | static int cow = 0; 63 | 64 | __attribute__((used)) 65 | int test_copy_on_write() 66 | { 67 | assert(cow == 0); 68 | cow = 1; 69 | return 666; 70 | } 71 | 72 | __attribute__((used)) 73 | long test_syscall() 74 | { 75 | register long status asm("rdi") = 555; 76 | long ret = 60; 77 | asm("syscall" : "+a"(ret) : "r"(status) : "rcx", "r11", "memory"); 78 | return ret; 79 | } 80 | 81 | __attribute__((used)) 82 | long test_malloc() 83 | { 84 | 85 | int* p = (int *)malloc(4); 86 | 87 | return (uintptr_t) p; 88 | } 89 | 90 | 91 | __attribute__((used)) 92 | int write_value(int value) 93 | { 94 | cow = value; 95 | return value; 96 | } 97 | __attribute__((used)) 98 | int test_is_value(int value) 99 | { 100 | assert(cow == value); 101 | return 666; 102 | } 103 | 104 | __attribute__((used)) 105 | int test_loop() 106 | { 107 | while(1); 108 | } 109 | 110 | asm(".global vcpuid\n" 111 | ".type vcpuid, @function\n" 112 | "vcpuid:\n" 113 | " mov %gs:(0x0), %eax\n" 114 | " ret\n"); 115 | extern int vcpuid(); 116 | 117 | __attribute__((used)) 118 | int test_vcpu() 119 | { 120 | return vcpuid(); 121 | } 122 | -------------------------------------------------------------------------------- /lib/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | project(tinykvm) 3 | # 4 | # C++17 KVM library 5 | # 6 | 7 | option(KVM_EXPERIMENTAL "Enable experimental features" OFF) 8 | 9 | if (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)") 10 | set (TINYKVM_ARCH "AMD64" CACHE STRING "TinyKVM Arch Backend") 11 | else() 12 | # TODO: Detect ARM properly 13 | set (TINYKVM_ARCH "ARM64" CACHE STRING "TinyKVM Arch Backend") 14 | endif() 15 | 16 | set (SOURCES 17 | tinykvm/machine.cpp 18 | tinykvm/machine_elf.cpp 19 | tinykvm/machine_env.cpp 20 | tinykvm/machine_utils.cpp 21 | tinykvm/memory.cpp 22 | tinykvm/memory_bank.cpp 23 | tinykvm/memory_maps.cpp 24 | tinykvm/page_streaming.cpp 25 | tinykvm/remote.cpp 26 | tinykvm/smp.cpp 27 | tinykvm/vcpu.cpp 28 | tinykvm/vcpu_run.cpp 29 | 30 | tinykvm/linux/fds.cpp 31 | tinykvm/linux/signals.cpp 32 | tinykvm/linux/system_calls.cpp 33 | tinykvm/linux/threads.cpp 34 | ) 35 | if (TINYKVM_ARCH STREQUAL "AMD64") 36 | list(APPEND SOURCES 37 | tinykvm/amd64/gdt.cpp 38 | tinykvm/amd64/idt.cpp 39 | tinykvm/amd64/tss.cpp 40 | tinykvm/amd64/paging.cpp 41 | tinykvm/amd64/usercode.cpp 42 | tinykvm/amd64/vdso.cpp 43 | tinykvm/rsp_client.cpp 44 | ) 45 | endif() 46 | 47 | add_library(tinykvm STATIC ${SOURCES}) 48 | target_compile_definitions(tinykvm PUBLIC _GNU_SOURCE=1) 49 | target_include_directories(tinykvm PUBLIC .) 50 | target_compile_features(tinykvm PUBLIC cxx_std_20) 51 | target_link_libraries(tinykvm PUBLIC pthread rt) 52 | 53 | set_source_files_properties( 54 | tinykvm/page_streaming.cpp 55 | PROPERTIES COMPILE_FLAGS -mavx2) 56 | 57 | if (CMAKE_BUILD_TYPE STREQUAL "Debug") 58 | target_compile_options(tinykvm PUBLIC -O0 -ggdb3) 59 | else() 60 | target_compile_options(tinykvm PUBLIC -O2 -g) 61 | endif() 62 | if (KVM_EXPERIMENTAL) 63 | target_compile_definitions(tinykvm PUBLIC TINYKVM_FAST_EXECUTION_TIMEOUT=1) 64 | endif() 65 | -------------------------------------------------------------------------------- /lib/tinykvm/amd64/amd64.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PAGE_SIZE 2 | #define PAGE_SIZE 4096 3 | #endif 4 | 5 | /* CR0 bits */ 6 | #define CR0_PE 1u 7 | #define CR0_MP (1U << 1) 8 | #define CR0_EM (1U << 2) 9 | #define CR0_TS (1U << 3) 10 | #define CR0_ET (1U << 4) 11 | #define CR0_NE (1U << 5) 12 | #define CR0_WP (1U << 16) 13 | #define CR0_AM (1U << 18) 14 | #define CR0_NW (1U << 29) 15 | #define CR0_CD (1U << 30) 16 | #define CR0_PG (1U << 31) 17 | 18 | /* CR4 bits */ 19 | #define CR4_VME 1 20 | #define CR4_PVI (1U << 1) 21 | #define CR4_TSD (1U << 2) 22 | #define CR4_DE (1U << 3) 23 | #define CR4_PSE (1U << 4) 24 | #define CR4_PAE (1U << 5) 25 | #define CR4_MCE (1U << 6) 26 | #define CR4_PGE (1U << 7) 27 | #define CR4_PCE (1U << 8) 28 | #define CR4_OSFXSR (1U << 9) 29 | #define CR4_OSXMMEXCPT (1U << 10) 30 | #define CR4_UMIP (1U << 11) 31 | #define CR4_VMXE (1U << 13) 32 | #define CR4_SMXE (1U << 14) 33 | #define CR4_FSGSBASE (1U << 16) 34 | #define CR4_PCIDE (1U << 17) 35 | #define CR4_OSXSAVE (1U << 18) 36 | #define CR4_SMEP (1U << 20) 37 | #define CR4_SMAP (1U << 21) 38 | #define CR4_CET (1U << 23) 39 | 40 | #define EFER_SCE 1 41 | #define EFER_LME (1U << 8) 42 | #define EFER_LMA (1U << 10) 43 | #define EFER_NXE (1U << 11) 44 | 45 | /* 64-bit page * entry bits */ 46 | #define PDE64_PRESENT 1UL 47 | #define PDE64_RW (1UL << 1) 48 | #define PDE64_USER (1UL << 2) 49 | #define PDE64_WRITE_THROUGH (1UL << 3) 50 | #define PDE64_CACHE_DISABLE (1UL << 4) 51 | #define PDE64_ACCESSED (1UL << 5) 52 | #define PDE64_DIRTY (1UL << 6) 53 | #define PDE64_PS (1UL << 7) 54 | #define PDE64_G (1UL << 8) 55 | #define PDE64_NX (1UL << 63) 56 | 57 | #define PDE64_PDPT_SIZE (1ULL << 39) 58 | #define PDE64_PD_SIZE (1ULL << 30) 59 | #define PDE64_PT_SIZE (1ULL << 21) 60 | #define PDE64_PTE_SIZE (1ULL << 12) 61 | 62 | 63 | #define AMD64_MSR_STAR 0xC0000081 64 | #define AMD64_MSR_LSTAR 0xC0000082 65 | 66 | #define AMD64_MSR_FS_BASE 0xC0000100 67 | #define AMD64_MSR_GS_BASE 0xC0000101 68 | -------------------------------------------------------------------------------- /lib/tinykvm/amd64/builtin/.gitignore: -------------------------------------------------------------------------------- 1 | interrupts 2 | vsyscall 3 | usercode 4 | -------------------------------------------------------------------------------- /lib/tinykvm/amd64/builtin/assembly.sh: -------------------------------------------------------------------------------- 1 | nasm -f bin -o interrupts interrupts.asm 2 | xxd -i interrupts > kernel_assembly.h 3 | -------------------------------------------------------------------------------- /lib/tinykvm/amd64/builtin/disassembly.sh: -------------------------------------------------------------------------------- 1 | objdump -b binary -mi386:x86-64 -D interrupts 2 | -------------------------------------------------------------------------------- /lib/tinykvm/amd64/builtin/interrupts.asm: -------------------------------------------------------------------------------- 1 | [BITS 64] 2 | global vm64_exception 3 | 4 | ;; CPU exception frame: 5 | ;; 1. stack rsp+32 6 | ;; 2. rflags rsp+24 7 | ;; 3. cs rsp+16 8 | ;; 4. rip rsp+8 9 | ;; 5. code rsp+0 10 | %macro CPU_EXCEPT 1 11 | ALIGN 0x8 12 | out 128 + %1, ax 13 | iretq 14 | %endmacro 15 | %macro CPU_EXCEPT_CODE 1 16 | ALIGN 0x8 17 | out 128 + %1, ax 18 | jmp .vm64_pop_code 19 | %endmacro 20 | %macro CPU_EXCEPT_PF 1 21 | ALIGN 0x8 22 | jmp .vm64_page_fault 23 | %endmacro 24 | 25 | dw .vm64_syscall 26 | dw .vm64_gettimeofday 27 | dw .vm64_exception 28 | dw .vm64_except1 - .vm64_exception 29 | dw .vm64_dso 30 | 31 | ALIGN 0x10 32 | .vm64_syscall: 33 | cmp ax, 158 ;; PRCTL 34 | je .vm64_prctl 35 | cmp ax, 228 ;; CLOCK_GETTIME 36 | je .vm64_clock_gettime 37 | cmp eax, 0x1F777 ;; ENTRY SYSCALL 38 | je .vm64_entrycall 39 | cmp eax, 0x1F707 ;; REENTRY SYSCALL 40 | je .vm64_reentrycall 41 | out 0, eax 42 | o64 sysret 43 | 44 | .vm64_prctl: 45 | stac 46 | push rsi 47 | push rcx 48 | push rdx 49 | cmp rdi, 0x1002 ;; PRCTL: SET_FS 50 | jne .vm64_prctl_get 51 | ;; SET_FS := rsi 52 | mov ecx, 0xC0000100 ;; FSBASE 53 | mov eax, esi ;; low-32 FS base 54 | shr rsi, 32 55 | mov edx, esi ;; high-32 FS base 56 | wrmsr 57 | xor rax, rax ;; return 0 58 | .vm64_prctl_end: 59 | pop rdx 60 | pop rcx 61 | pop rsi 62 | clac 63 | o64 sysret 64 | .vm64_prctl_get: 65 | cmp rdi, 0x1003 ;; PRCTL: GET_FS 66 | jne .vm64_prctl_trap 67 | ;; GET_FS [rsi] := FSBASE 68 | mov ecx, 0xC0000100 ;; FSBASE 69 | rdmsr 70 | shl rdx, 32 ;; lift high-32 FS base 71 | or rdx, rax ;; low-32 FS base 72 | mov [rsi], rax 73 | xor rax, rax ;; return 0 74 | jmp .vm64_prctl_end 75 | 76 | .vm64_prctl_trap: 77 | ;; PRCTL fallback to host syscall trap 78 | out 0, ax 79 | jmp .vm64_prctl_end 80 | 81 | .read_system_time: 82 | push rbx 83 | push rcx 84 | push rdx 85 | ;; Check if the system time MSR has already been set 86 | mov rax, [0x3030] ;; system-time nanoseconds 87 | ;; If the system time is zero, we need to set it 88 | test rax, rax 89 | jnz .system_time_already_set 90 | ;; 0x4b564d01 MSR_KVM_SYSTEM_TIME_NEW 91 | mov ecx, 0x4b564d01 ;; MSR_KVM_SYSTEM_TIME_NEW 92 | mov eax, 0x3021 ;; data 93 | mov edx, 0 ;; zero high-32 bits 94 | wrmsr 95 | .system_time_already_set: 96 | ;; Read TSC 97 | rdtsc 98 | ;; Add EDX to RAX for full 64-bit TSC value 99 | shl rdx, 32 100 | or rax, rdx 101 | ;; Calculate the system time in nanoseconds 102 | ;; time = (current_tsc - tsc_timestamp) 103 | ;; if (tsc_shift >= 0) 104 | ;; time <<= tsc_shift; 105 | ;; else 106 | ;; time >>= -tsc_shift; 107 | ;; time = (time * tsc_to_system_mul) >> 32 108 | ;; time = time + system_time 109 | mov rdx, [0x3028] ;; tsc_timestamp 110 | sub rax, rdx ;; current_tsc - tsc_timestamp 111 | ;; Check if tsc_shift is negative 112 | ;; Load 8-bit signed value from system-time 113 | mov cl, [0x3030 + 28] ;; tsc_shift 114 | ;; Left shift (assumes tsc_shift >= 0) 115 | test cl, cl 116 | js .system_time_neg_tsc_shift 117 | ;; If tsc_shift is positive, shift left 118 | shl rax, cl ;; rax = rax << tsc_shift 119 | jmp .system_time_tsc_shift_done 120 | .system_time_neg_tsc_shift: 121 | ;; If tsc_shift is negative, shift right 122 | neg ecx 123 | shr rax, cl ;; rax = rax >> -tsc_shift 124 | .system_time_tsc_shift_done: 125 | ;; Multiply by tsc_to_system_mul 126 | mov ecx, [0x3038] ;; tsc_to_system_mul 127 | mul rcx ;; into RAX:RDX 128 | ;; Right shift by 32 bits 129 | shr rax, 32 130 | ;; Add the system time base 131 | mov rdx, [0x3030 + 16] ;; system_time_base 132 | add rax, rdx ;; time = time + system_time_base 133 | 134 | ;; Test version is even 135 | mov ebx, [0x3030] ;; version 136 | and ebx, 1 137 | ;;jnp .system_time_already_set ;; read again 138 | 139 | pop rdx 140 | pop rcx 141 | pop rbx 142 | ret 143 | 144 | .read_wall_clock: 145 | push rbx 146 | push rcx 147 | push rdx 148 | ;; Check if the wall clock MSR has already been set 149 | mov eax, [0x3004] ;; seconds since epoch 150 | test eax, eax 151 | jnz .wall_clock_already_set 152 | ;; Read the PV clock MSR 153 | mov ecx, 0x4b564d00 ;; MSR_KVM_WALL_CLOCK_NEW 154 | mov eax, 0x3000 ;; data 155 | mov edx, 0 ;; zero high-32 bits 156 | wrmsr 157 | .wall_clock_already_set: 158 | ;; Read the wall clock 159 | mov eax, DWORD [0x3004] ;; sec 160 | mov ecx, DWORD [0x3008] ;; nsec 161 | ;; Convert to nanoseconds 162 | mov rbx, 1000000000 ;; 1e9 163 | mov rdx, 0 ;; clear rdx 164 | mul rbx ;; rax = sec * 1e9 165 | add rax, rcx ;; rax = sec * 1e9 + nsec 166 | pop rdx 167 | pop rcx 168 | pop rbx 169 | ret 170 | 171 | .vm64_clock_gettime: 172 | ;; rdi = clockid 173 | ;; rsi = timespec 174 | stac 175 | push rbx 176 | push rcx 177 | push rdx 178 | ;; Verify that destination is at least 0x100000 179 | cmp rsi, 0x100000 180 | jb .vm64_clock_gettime_error 181 | ;; Get system time into rax 182 | call .read_system_time 183 | ;; If clockid is CLOCK_MONOTONIC, we are done 184 | test rdi, rdi 185 | jnz .finish_up_clock_gettime 186 | ;; If clockid is CLOCK_REALTIME, we need to add 187 | ;; the wall clock time from system time 188 | call .read_wall_clock 189 | .finish_up_clock_gettime: 190 | ;; RAX now contains the clock time in nanoseconds 191 | ;; Split RAX into seconds and nanoseconds 192 | mov rdx, 0 ;; 193 | mov rbx, 1000000000 ;; 1e9 194 | div rbx ;; rax = seconds, rdx = clock_time % 1e9 195 | ;; Store to guest timespec 196 | mov [rsi], rax ;; Store tv_sec 197 | mov [rsi + 8], rdx ;; Store tv_nsec 198 | ;; Restore registers 199 | pop rdx 200 | pop rcx 201 | pop rbx 202 | clac 203 | ;; Return to the caller 204 | xor eax, eax 205 | o64 sysret 206 | .vm64_clock_gettime_error: 207 | mov rax, -14 ;; EFAULT 208 | o64 sysret 209 | 210 | .vm64_gettimeofday: 211 | mov eax, 96 ;; gettimeofday 212 | out 0, ax 213 | ret 214 | 215 | .vm64_dso: 216 | mov eax, .vm64_gettimeofday 217 | ret 218 | 219 | .vm64_entrycall: 220 | ;; Reset pagetables 221 | mov rax, cr3 222 | mov cr3, rax 223 | o64 sysret 224 | 225 | .vm64_reentrycall: 226 | o64 sysret 227 | 228 | .vm64_page_fault: 229 | push rdi 230 | mov rdi, cr2 231 | out 128 + 14, ax 232 | invlpg [rdi] 233 | pop rdi 234 | 235 | .vm64_pop_code: 236 | add rsp, 8 237 | iretq 238 | 239 | .vm64_timeout: 240 | out 128 + 33, ax 241 | iretq 242 | 243 | ALIGN 0x8 244 | .vm64_exception: 245 | CPU_EXCEPT 0 246 | ALIGN 0x8 247 | .vm64_except1: 248 | CPU_EXCEPT 1 249 | CPU_EXCEPT 2 250 | CPU_EXCEPT 3 251 | CPU_EXCEPT 4 252 | CPU_EXCEPT 5 253 | CPU_EXCEPT 6 254 | CPU_EXCEPT 7 255 | CPU_EXCEPT_CODE 8 ;; double fault 256 | CPU_EXCEPT 9 257 | CPU_EXCEPT_CODE 10 258 | CPU_EXCEPT_CODE 11 259 | CPU_EXCEPT_CODE 12 260 | CPU_EXCEPT_CODE 13 261 | CPU_EXCEPT_PF 14 262 | CPU_EXCEPT 15 263 | CPU_EXCEPT 16 264 | CPU_EXCEPT_CODE 17 265 | CPU_EXCEPT 18 266 | CPU_EXCEPT 19 267 | CPU_EXCEPT 20 268 | ALIGN 0x8 ;; timer interrupt 269 | jmp .vm64_timeout 270 | -------------------------------------------------------------------------------- /lib/tinykvm/amd64/builtin/kernel_assembly.h: -------------------------------------------------------------------------------- 1 | unsigned char interrupts[] = { 2 | 0x10, 0x00, 0x82, 0x01, 0xb8, 0x01, 0x08, 0x00, 0x8b, 0x01, 0x90, 0x90, 3 | 0x90, 0x90, 0x90, 0x90, 0x66, 0x3d, 0x9e, 0x00, 0x74, 0x25, 0x66, 0x3d, 4 | 0xe4, 0x00, 0x0f, 0x84, 0x1b, 0x01, 0x00, 0x00, 0x3d, 0x77, 0xf7, 0x01, 5 | 0x00, 0x0f, 0x84, 0x66, 0x01, 0x00, 0x00, 0x3d, 0x07, 0xf7, 0x01, 0x00, 6 | 0x0f, 0x84, 0x64, 0x01, 0x00, 0x00, 0xe7, 0x00, 0x48, 0x0f, 0x07, 0x0f, 7 | 0x01, 0xcb, 0x56, 0x51, 0x52, 0x48, 0x81, 0xff, 0x02, 0x10, 0x00, 0x00, 8 | 0x75, 0x1b, 0xb9, 0x00, 0x01, 0x00, 0xc0, 0x89, 0xf0, 0x48, 0xc1, 0xee, 9 | 0x20, 0x89, 0xf2, 0x0f, 0x30, 0x48, 0x31, 0xc0, 0x5a, 0x59, 0x5e, 0x0f, 10 | 0x01, 0xca, 0x48, 0x0f, 0x07, 0x48, 0x81, 0xff, 0x03, 0x10, 0x00, 0x00, 11 | 0x75, 0x16, 0xb9, 0x00, 0x01, 0x00, 0xc0, 0x0f, 0x32, 0x48, 0xc1, 0xe2, 12 | 0x20, 0x48, 0x09, 0xc2, 0x48, 0x89, 0x06, 0x48, 0x31, 0xc0, 0xeb, 0xd8, 13 | 0x66, 0xe7, 0x00, 0xeb, 0xd3, 0x53, 0x51, 0x52, 0x48, 0x8b, 0x04, 0x25, 14 | 0x30, 0x30, 0x00, 0x00, 0x48, 0x85, 0xc0, 0x75, 0x11, 0xb9, 0x01, 0x4d, 15 | 0x56, 0x4b, 0xb8, 0x21, 0x30, 0x00, 0x00, 0xba, 0x00, 0x00, 0x00, 0x00, 16 | 0x0f, 0x30, 0x0f, 0x31, 0x48, 0xc1, 0xe2, 0x20, 0x48, 0x09, 0xd0, 0x48, 17 | 0x8b, 0x14, 0x25, 0x28, 0x30, 0x00, 0x00, 0x48, 0x29, 0xd0, 0x8a, 0x0c, 18 | 0x25, 0x4c, 0x30, 0x00, 0x00, 0x84, 0xc9, 0x78, 0x05, 0x48, 0xd3, 0xe0, 19 | 0xeb, 0x05, 0xf7, 0xd9, 0x48, 0xd3, 0xe8, 0x8b, 0x0c, 0x25, 0x38, 0x30, 20 | 0x00, 0x00, 0x48, 0xf7, 0xe1, 0x48, 0xc1, 0xe8, 0x20, 0x48, 0x8b, 0x14, 21 | 0x25, 0x40, 0x30, 0x00, 0x00, 0x48, 0x01, 0xd0, 0x8b, 0x1c, 0x25, 0x30, 22 | 0x30, 0x00, 0x00, 0x83, 0xe3, 0x01, 0x5a, 0x59, 0x5b, 0xc3, 0x53, 0x51, 23 | 0x52, 0x8b, 0x04, 0x25, 0x04, 0x30, 0x00, 0x00, 0x85, 0xc0, 0x75, 0x11, 24 | 0xb9, 0x00, 0x4d, 0x56, 0x4b, 0xb8, 0x00, 0x30, 0x00, 0x00, 0xba, 0x00, 25 | 0x00, 0x00, 0x00, 0x0f, 0x30, 0x8b, 0x04, 0x25, 0x04, 0x30, 0x00, 0x00, 26 | 0x8b, 0x0c, 0x25, 0x08, 0x30, 0x00, 0x00, 0xbb, 0x00, 0xca, 0x9a, 0x3b, 27 | 0xba, 0x00, 0x00, 0x00, 0x00, 0x48, 0xf7, 0xe3, 0x48, 0x01, 0xc8, 0x5a, 28 | 0x59, 0x5b, 0xc3, 0x0f, 0x01, 0xcb, 0x53, 0x51, 0x52, 0x48, 0x81, 0xfe, 29 | 0x00, 0x00, 0x10, 0x00, 0x72, 0x2e, 0xe8, 0x3a, 0xff, 0xff, 0xff, 0x48, 30 | 0x85, 0xff, 0x75, 0x05, 0xe8, 0xa1, 0xff, 0xff, 0xff, 0xba, 0x00, 0x00, 31 | 0x00, 0x00, 0xbb, 0x00, 0xca, 0x9a, 0x3b, 0x48, 0xf7, 0xf3, 0x48, 0x89, 32 | 0x06, 0x48, 0x89, 0x56, 0x08, 0x5a, 0x59, 0x5b, 0x0f, 0x01, 0xca, 0x31, 33 | 0xc0, 0x48, 0x0f, 0x07, 0x48, 0xc7, 0xc0, 0xf2, 0xff, 0xff, 0xff, 0x48, 34 | 0x0f, 0x07, 0xb8, 0x60, 0x00, 0x00, 0x00, 0x66, 0xe7, 0x00, 0xc3, 0xb8, 35 | 0x82, 0x01, 0x00, 0x00, 0xc3, 0x0f, 0x20, 0xd8, 0x0f, 0x22, 0xd8, 0x48, 36 | 0x0f, 0x07, 0x48, 0x0f, 0x07, 0x57, 0x0f, 0x20, 0xd7, 0x66, 0xe7, 0x8e, 37 | 0x0f, 0x01, 0x3f, 0x5f, 0x48, 0x83, 0xc4, 0x08, 0x48, 0xcf, 0x66, 0xe7, 38 | 0xa1, 0x48, 0xcf, 0x90, 0x90, 0x90, 0x90, 0x90, 0x66, 0xe7, 0x80, 0x48, 39 | 0xcf, 0x90, 0x90, 0x90, 0x66, 0xe7, 0x81, 0x48, 0xcf, 0x90, 0x90, 0x90, 40 | 0x66, 0xe7, 0x82, 0x48, 0xcf, 0x90, 0x90, 0x90, 0x66, 0xe7, 0x83, 0x48, 41 | 0xcf, 0x90, 0x90, 0x90, 0x66, 0xe7, 0x84, 0x48, 0xcf, 0x90, 0x90, 0x90, 42 | 0x66, 0xe7, 0x85, 0x48, 0xcf, 0x90, 0x90, 0x90, 0x66, 0xe7, 0x86, 0x48, 43 | 0xcf, 0x90, 0x90, 0x90, 0x66, 0xe7, 0x87, 0x48, 0xcf, 0x90, 0x90, 0x90, 44 | 0x66, 0xe7, 0x88, 0xeb, 0xab, 0x90, 0x90, 0x90, 0x66, 0xe7, 0x89, 0x48, 45 | 0xcf, 0x90, 0x90, 0x90, 0x66, 0xe7, 0x8a, 0xeb, 0x9b, 0x90, 0x90, 0x90, 46 | 0x66, 0xe7, 0x8b, 0xeb, 0x93, 0x90, 0x90, 0x90, 0x66, 0xe7, 0x8c, 0xeb, 47 | 0x8b, 0x90, 0x90, 0x90, 0x66, 0xe7, 0x8d, 0xeb, 0x83, 0x90, 0x90, 0x90, 48 | 0xe9, 0x70, 0xff, 0xff, 0xff, 0x90, 0x90, 0x90, 0x66, 0xe7, 0x8f, 0x48, 49 | 0xcf, 0x90, 0x90, 0x90, 0x66, 0xe7, 0x90, 0x48, 0xcf, 0x90, 0x90, 0x90, 50 | 0x66, 0xe7, 0x91, 0xe9, 0x60, 0xff, 0xff, 0xff, 0x66, 0xe7, 0x92, 0x48, 51 | 0xcf, 0x90, 0x90, 0x90, 0x66, 0xe7, 0x93, 0x48, 0xcf, 0x90, 0x90, 0x90, 52 | 0x66, 0xe7, 0x94, 0x48, 0xcf, 0x90, 0x90, 0x90, 0xe9, 0x49, 0xff, 0xff, 53 | 0xff 54 | }; 55 | unsigned int interrupts_len = 613; 56 | -------------------------------------------------------------------------------- /lib/tinykvm/amd64/builtin/usercode.asm: -------------------------------------------------------------------------------- 1 | [BITS 64] 2 | 3 | dw .vm64_entry 4 | dw .vm64_rexit 5 | dw .vm64_preserving_entry 6 | dw 0 7 | dd .vm64_cpuid 8 | 9 | ALIGN 0x10 10 | ;; The entry function, jumps to real function 11 | .vm64_entry: 12 | ;; Execute a pagetable flushing system call that 13 | ;; ensures that even if we are entering in kernel mode, 14 | ;; we are calling the user function in usermode. 15 | ;; This cannot realistically be improved upon. 16 | mov r13, rcx 17 | mov rax, 0x1F777 18 | syscall 19 | mov rcx, r13 20 | jmp r15 21 | ;; The exit function (pre-written to stack) 22 | .vm64_rexit: 23 | mov rdi, rax 24 | .vm64_rexit_retry: 25 | mov eax, 0xFFFF 26 | out 0, eax 27 | jmp .vm64_rexit_retry 28 | .vm64_preserving_entry: 29 | ;; This is the entry point for a paused VM where 30 | ;; its in the middle of a user program, so every 31 | ;; register must be preserved. We need to flush 32 | ;; the pagetables to ensure the guest can see the 33 | ;; correct memory. Since the guest is potentially 34 | ;; blind here, the host has pushed the registers 35 | ;; necessary to perform the syscall safely. 36 | mov rax, 0x1F777 37 | syscall 38 | pop r11 ;; used by syscall for rflags 39 | pop rcx ;; used by syscall for rip 40 | pop rax 41 | ;; With the registers restored, we can now 42 | ;; return to the guest program. 43 | ret 44 | 45 | 46 | %macro vcputable 1 47 | dd %1 48 | dd 0 49 | dd 0 50 | dd 0 51 | %endmacro 52 | 53 | ALIGN 0x8 54 | .vm64_cpuid: 55 | vcputable 0 56 | vcputable 1 57 | vcputable 2 58 | vcputable 3 59 | vcputable 4 60 | vcputable 5 61 | vcputable 6 62 | vcputable 7 63 | vcputable 8 64 | vcputable 9 65 | vcputable 10 66 | vcputable 11 67 | vcputable 12 68 | vcputable 13 69 | vcputable 14 70 | vcputable 15 71 | vcputable 16 72 | .vm64_cpuid_end: 73 | -------------------------------------------------------------------------------- /lib/tinykvm/amd64/builtin/usercode_assembly.sh: -------------------------------------------------------------------------------- 1 | nasm -f bin -o usercode usercode.asm 2 | xxd -i usercode 3 | -------------------------------------------------------------------------------- /lib/tinykvm/amd64/builtin/vsyscall.asm: -------------------------------------------------------------------------------- 1 | [BITS 64] 2 | 3 | org 0xFFFFFFFFFF600000 4 | .vsyscall_gettimeofday: 5 | mov ax, 96 ;; gettimeofday 6 | out 0, ax 7 | ret 8 | -------------------------------------------------------------------------------- /lib/tinykvm/amd64/builtin/vsyscall_assembly.sh: -------------------------------------------------------------------------------- 1 | nasm -f bin -o vsyscall vsyscall.asm 2 | xxd -i vsyscall 3 | -------------------------------------------------------------------------------- /lib/tinykvm/amd64/gdt.cpp: -------------------------------------------------------------------------------- 1 | #include "gdt.hpp" 2 | 3 | #include "../common.hpp" 4 | #include 5 | #include 6 | #include 7 | 8 | #define GDT_ACCESS_DUMMY 0x0 9 | #define GDT_ACCESS_TSS 0x09 10 | #define GDT_ACCESS_CODE 0x9A 11 | #define GDT_ACCESS_DATA 0x92 12 | #define GDT_ACCESS_CODE3 0xFA 13 | #define GDT_ACCESS_DATA3 0xF2 14 | #define GDT_ACCESS_TSS0 0x85 15 | #define GDT_ACCESS_TSS3 0xE5 16 | 17 | #define FLAGS_X32_PAGE 0xC 18 | #define FLAGS_X64_PAGE 0xA 19 | #define FLAGS_X64_TSS 0x4 20 | 21 | struct GDT_desc 22 | { 23 | uint16_t size; 24 | uint64_t offset; 25 | } __attribute__((packed)); 26 | 27 | struct GDT_entry 28 | { 29 | uint32_t limit_lo : 16; 30 | uint32_t base_lo : 24; 31 | uint32_t access : 8; 32 | uint32_t limit_hi : 4; 33 | uint32_t flags : 4; 34 | uint32_t base_hi : 8; 35 | } __attribute__((packed)); 36 | 37 | void GDT_write_segment(void* area, uint8_t flags) 38 | { 39 | auto* entry = (GDT_entry*) area; 40 | entry->limit_lo = 0xFFFF; 41 | entry->base_lo = 0; 42 | entry->access = flags; 43 | entry->limit_hi = 0xF; 44 | entry->flags = FLAGS_X64_PAGE; 45 | entry->base_hi = 0; 46 | } 47 | void GDT_write_TSS_segment(void* area, uint64_t tss_addr, uint32_t size) 48 | { 49 | auto* entry = (GDT_entry*) area; 50 | entry->limit_lo = size; 51 | entry->limit_hi = 0; 52 | entry->access = GDT_ACCESS_TSS; 53 | entry->flags = FLAGS_X64_TSS; 54 | entry->base_lo = tss_addr & 0xFFFFFF; 55 | entry->base_hi = tss_addr >> 24; 56 | } 57 | 58 | void setup_amd64_segments(uint64_t gdt_addr, char* gdt_ptr) 59 | { 60 | (void)gdt_addr; 61 | 62 | /* Null segment */ 63 | memset(gdt_ptr + 0x0, 0, 8); 64 | /* Kernel mode */ 65 | GDT_write_segment(gdt_ptr + 0x8, GDT_ACCESS_CODE); 66 | GDT_write_segment(gdt_ptr + 0x10, GDT_ACCESS_DATA); 67 | /* Null user-base segment */ 68 | memset(gdt_ptr + 0x18, 0, 8); 69 | /* User mode */ 70 | GDT_write_segment(gdt_ptr + 0x20, GDT_ACCESS_DATA3); 71 | GDT_write_segment(gdt_ptr + 0x28, GDT_ACCESS_CODE3); 72 | 73 | /* TSS segment (initialized later) */ 74 | memset(gdt_ptr + 0x30, 0, 8); 75 | } 76 | 77 | void setup_amd64_segment_regs(struct kvm_sregs& sregs, uint64_t gdt_addr) 78 | { 79 | /* Code segment */ 80 | struct kvm_segment seg = { 81 | .base = 0, 82 | .limit = 0xffffffff, 83 | .selector = 0x2B, 84 | .type = 11, /* Code: execute, read, accessed */ 85 | .present = 1, 86 | .dpl = 3, /* User-mode */ 87 | .db = 0, 88 | .s = 1, /* Code/data */ 89 | .l = 1, /* 64-bit */ 90 | .g = 1, /* 4KB granularity */ 91 | }; 92 | sregs.cs = seg; 93 | 94 | /* Data segment */ 95 | seg.type = 3; /* Data: read/write, accessed */ 96 | seg.selector = 0x23; 97 | sregs.ds = sregs.es = sregs.ss = seg; 98 | 99 | /* GDT dtable */ 100 | sregs.gdt.base = gdt_addr; 101 | sregs.gdt.limit = sizeof(GDT_entry) * 7 - 1; 102 | } 103 | 104 | TINYKVM_COLD() 105 | void print_gdt_entries(const void* area, size_t count) 106 | { 107 | const auto* entry = (const GDT_entry*) area; 108 | for (size_t i = 0; i < count; i++) { 109 | const auto a = entry[i].access; 110 | const auto f = entry[i].flags; 111 | printf("GDT %2zx: P=%u DPL=%u S=%u Ex=%u DC=%u RW=%u G=%u Sz=%u L=%u\n", 112 | 8*i, a >> 7, (a >> 5) & 0x3, (a >> 4) & 1, (a >> 3) & 1, 113 | a & 0x4, a & 0x2, f & 0x8, f & 0x4, f & 0x2); 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /lib/tinykvm/amd64/gdt.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | 5 | extern void setup_amd64_segments(uint64_t gdt_addr, char* gdt_ptr); 6 | extern void setup_amd64_segment_regs(struct kvm_sregs&, uint64_t gdt_addr); 7 | 8 | extern void GDT_write_segment(void* area, uint8_t flags); 9 | extern void GDT_write_TSS_segment(void* area, uint64_t tss_addr, uint32_t size); 10 | extern void GDT_reload(uint16_t); 11 | extern void print_gdt_entries(const void* area, size_t count); 12 | -------------------------------------------------------------------------------- /lib/tinykvm/amd64/idt.cpp: -------------------------------------------------------------------------------- 1 | #include "idt.hpp" 2 | 3 | #include "../common.hpp" 4 | #include 5 | #include 6 | #include 7 | #include 8 | struct kvm_sregs; 9 | 10 | namespace tinykvm { 11 | 12 | // 64-bit IDT entry 13 | struct IDTentry { 14 | uint16_t offset_1; // offset bits 0..15 15 | uint16_t selector; // a code segment selector in GDT or LDT 16 | uint8_t ist; // 3-bit interrupt stack table offset 17 | uint8_t type_attr; // type and attributes, see below 18 | uint16_t offset_2; // offset bits 16..31 19 | uint32_t offset_3; // 32..63 20 | uint32_t zero2; 21 | }; 22 | static_assert(sizeof(IDTentry) == 16, "AMD64 IDT entries are 16-bytes"); 23 | 24 | #define IDT_GATE_INTR 0x0e 25 | #define IDT_CPL0 0x00 26 | #define IDT_CPL3 0x60 27 | #define IDT_PRESENT 0x80 28 | 29 | struct IDT 30 | { 31 | /* Just enough for CPU exceptions and 1 timer interrupt */ 32 | std::array entry; 33 | }; 34 | 35 | union addr_helper { 36 | uint64_t whole; 37 | struct { 38 | uint16_t lo16; 39 | uint16_t hi16; 40 | uint32_t top32; 41 | }; 42 | }; 43 | 44 | static void set_entry( 45 | IDTentry& idt_entry, 46 | uint64_t handler, 47 | uint16_t segment_sel, 48 | uint8_t attributes) 49 | { 50 | addr_helper addr { .whole = handler }; 51 | idt_entry.offset_1 = addr.lo16; 52 | idt_entry.offset_2 = addr.hi16; 53 | idt_entry.offset_3 = addr.top32; 54 | idt_entry.selector = segment_sel; 55 | idt_entry.type_attr = attributes; 56 | idt_entry.ist = 1; 57 | idt_entry.zero2 = 0; 58 | } 59 | 60 | void set_exception_handler(void* area, uint8_t vec, uint64_t handler) 61 | { 62 | auto& idt = *(IDT *)area; 63 | set_entry(idt.entry[vec], handler, 0x8, IDT_PRESENT | IDT_CPL0 | IDT_GATE_INTR); 64 | /* Use second IST for double faults */ 65 | //idt.entry[vec].ist = (vec != 8) ? 1 : 2; 66 | } 67 | 68 | /* unsigned interrupts[] = { ... } */ 69 | #include "builtin/kernel_assembly.h" 70 | static_assert(sizeof(interrupts) > 10 && sizeof(interrupts) <= 4096, 71 | "Interrupts array must be container within a 4KB page"); 72 | 73 | const iasm_header& interrupt_header() { 74 | return *(const iasm_header*) &interrupts[0]; 75 | } 76 | iasm_header& mutable_interrupt_header() { 77 | return *(iasm_header*) &interrupts[0]; 78 | } 79 | 80 | void setup_amd64_exception_regs(struct kvm_sregs& sregs, uint64_t addr) 81 | { 82 | sregs.idt.base = addr; 83 | sregs.idt.limit = sizeof(IDT) - 1; 84 | } 85 | 86 | void setup_amd64_exceptions(uint64_t addr, void* area, void* except_area) 87 | { 88 | uint64_t offset = addr + interrupt_header().vm64_exception; 89 | for (int i = 0; i <= 20; i++) { 90 | if (i == 15) continue; 91 | //printf("Exception handler %d at 0x%lX\n", i, offset); 92 | set_exception_handler(area, i, offset); 93 | offset += interrupt_header().vm64_except_size; 94 | } 95 | // Program the timer interrupt (which sends NMI) 96 | offset += interrupt_header().vm64_except_size; 97 | set_exception_handler(area, 32, offset); 98 | // Install exception handling code 99 | std::memcpy(except_area, interrupts, sizeof(interrupts)); 100 | } 101 | 102 | TINYKVM_COLD() 103 | void print_exception_handlers(const void* area) 104 | { 105 | auto* idt = (IDT*) area; 106 | for (unsigned i = 0; i < idt->entry.size(); i++) { 107 | const auto& entry = idt->entry[i]; 108 | addr_helper addr; 109 | addr.lo16 = entry.offset_1; 110 | addr.hi16 = entry.offset_2; 111 | addr.top32 = entry.offset_3; 112 | printf("IDT %u: func=0x%lX sel=0x%X p=%d dpl=%d type=0x%X ist=%u\n", 113 | i, addr.whole, entry.selector, entry.type_attr >> 7, 114 | (entry.type_attr >> 5) & 0x3, entry.type_attr & 0xF, entry.ist); 115 | } 116 | } 117 | 118 | struct AMD64_Ex { 119 | const char* name; 120 | bool has_code; 121 | }; 122 | static constexpr std::array exceptions = 123 | { 124 | AMD64_Ex{"Divide-by-zero Error", false}, 125 | AMD64_Ex{"Debug", false}, 126 | AMD64_Ex{"Non-Maskable Interrupt", false}, 127 | AMD64_Ex{"Breakpoint", false}, 128 | AMD64_Ex{"Overflow", false}, 129 | AMD64_Ex{"Bound Range Exceeded", false}, 130 | AMD64_Ex{"Invalid Opcode", false}, 131 | AMD64_Ex{"Device Not Available", false}, 132 | AMD64_Ex{"Double Fault", true}, 133 | AMD64_Ex{"Reserved", false}, 134 | AMD64_Ex{"Invalid TSS", true}, 135 | AMD64_Ex{"Segment Not Present", true}, 136 | AMD64_Ex{"Stack-Segment Fault", true}, 137 | AMD64_Ex{"General Protection Fault", true}, 138 | AMD64_Ex{"Page Fault", true}, 139 | AMD64_Ex{"Reserved", false}, 140 | AMD64_Ex{"x87 Floating-point Exception", false}, 141 | AMD64_Ex{"Alignment Check", true}, 142 | AMD64_Ex{"Machine Check", false}, 143 | AMD64_Ex{"SIMD Floating-point Exception", false}, 144 | AMD64_Ex{"Virtualization Exception", false}, 145 | AMD64_Ex{"Reserved", false}, 146 | AMD64_Ex{"Reserved", false}, 147 | AMD64_Ex{"Reserved", false}, 148 | AMD64_Ex{"Reserved", false}, 149 | AMD64_Ex{"Reserved", false}, 150 | AMD64_Ex{"Reserved", false}, 151 | AMD64_Ex{"Reserved", false}, 152 | AMD64_Ex{"Reserved", false}, 153 | AMD64_Ex{"Reserved", false}, 154 | AMD64_Ex{"Security Exception", false}, 155 | AMD64_Ex{"Reserved", false}, 156 | AMD64_Ex{"Reserved", false}, 157 | AMD64_Ex{"Execution Timeout", false}, 158 | }; 159 | 160 | const char* amd64_exception_name(uint8_t intr) { 161 | return exceptions.at(intr).name; 162 | } 163 | bool amd64_exception_code(uint8_t intr) { 164 | return exceptions.at(intr).has_code; 165 | } 166 | 167 | } 168 | -------------------------------------------------------------------------------- /lib/tinykvm/amd64/idt.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "../memory.hpp" 3 | #include "memory_layout.hpp" 4 | struct kvm_sregs; 5 | 6 | namespace tinykvm { 7 | extern void setup_amd64_exception_regs(struct kvm_sregs& sregs, uint64_t addr); 8 | extern void setup_amd64_exceptions(uint64_t addr, void* area, void* code_area); 9 | 10 | extern void set_exception_handler(void* area, uint8_t vec, uint64_t handler); 11 | extern void print_exception_handlers(const void* area); 12 | 13 | extern const char* amd64_exception_name(uint8_t); 14 | extern bool amd64_exception_code(uint8_t); 15 | 16 | struct iasm_header { 17 | uint16_t vm64_syscall; 18 | uint16_t vm64_gettimeofday; 19 | uint16_t vm64_exception; 20 | uint16_t vm64_except_size; 21 | uint16_t vm64_dso; 22 | 23 | uint64_t translated_vm_syscall(const vMemory& memory) const noexcept 24 | { 25 | return memory.physbase + INTR_ASM_ADDR + vm64_syscall; 26 | } 27 | }; 28 | const iasm_header& interrupt_header(); 29 | iasm_header& mutable_interrupt_header(); 30 | } 31 | -------------------------------------------------------------------------------- /lib/tinykvm/amd64/lapic.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #define AMD64_MSR_APICBASE 0x1B 3 | #define AMD64_MSR_XAPIC_ENABLE 0x800 4 | #define AMD64_MSR_X2APIC_ENABLE 0xC00 5 | 6 | #define AMD64_APIC_MODE_EXTINT 0x7 7 | #define AMD64_APIC_MODE_NMI 0x4 8 | 9 | typedef unsigned int __u32; 10 | 11 | struct local_apic { 12 | 13 | /*000*/ struct { __u32 __reserved[4]; } __reserved_01; 14 | 15 | /*010*/ struct { __u32 __reserved[4]; } __reserved_02; 16 | 17 | /*020*/ struct { /* APIC ID Register */ 18 | __u32 __reserved_1 : 24, 19 | phys_apic_id : 4, 20 | __reserved_2 : 4; 21 | __u32 __reserved[3]; 22 | } id; 23 | 24 | /*030*/ const 25 | struct { /* APIC Version Register */ 26 | __u32 version : 8, 27 | __reserved_1 : 8, 28 | max_lvt : 8, 29 | __reserved_2 : 8; 30 | __u32 __reserved[3]; 31 | } version; 32 | 33 | /*040*/ struct { __u32 __reserved[4]; } __reserved_03; 34 | 35 | /*050*/ struct { __u32 __reserved[4]; } __reserved_04; 36 | 37 | /*060*/ struct { __u32 __reserved[4]; } __reserved_05; 38 | 39 | /*070*/ struct { __u32 __reserved[4]; } __reserved_06; 40 | 41 | /*080*/ struct { /* Task Priority Register */ 42 | __u32 priority : 8, 43 | __reserved_1 : 24; 44 | __u32 __reserved_2[3]; 45 | } tpr; 46 | 47 | /*090*/ const 48 | struct { /* Arbitration Priority Register */ 49 | __u32 priority : 8, 50 | __reserved_1 : 24; 51 | __u32 __reserved_2[3]; 52 | } apr; 53 | 54 | /*0A0*/ const 55 | struct { /* Processor Priority Register */ 56 | __u32 priority : 8, 57 | __reserved_1 : 24; 58 | __u32 __reserved_2[3]; 59 | } ppr; 60 | 61 | /*0B0*/ struct { /* End Of Interrupt Register */ 62 | __u32 eoi; 63 | __u32 __reserved[3]; 64 | } eoi; 65 | 66 | /*0C0*/ struct { __u32 __reserved[4]; } __reserved_07; 67 | 68 | /*0D0*/ struct { /* Logical Destination Register */ 69 | __u32 __reserved_1 : 24, 70 | logical_dest : 8; 71 | __u32 __reserved_2[3]; 72 | } ldr; 73 | 74 | /*0E0*/ struct { /* Destination Format Register */ 75 | __u32 __reserved_1 : 28, 76 | model : 4; 77 | __u32 __reserved_2[3]; 78 | } dfr; 79 | 80 | /*0F0*/ struct { /* Spurious Interrupt Vector Register */ 81 | __u32 spurious_vector : 8, 82 | apic_enabled : 1, 83 | focus_cpu : 1, 84 | __reserved_2 : 22; 85 | __u32 __reserved_3[3]; 86 | } svr; 87 | 88 | /*100*/ struct { /* In Service Register */ 89 | /*170*/ __u32 bitfield; 90 | __u32 __reserved[3]; 91 | } isr [8]; 92 | 93 | /*180*/ struct { /* Trigger Mode Register */ 94 | /*1F0*/ __u32 bitfield; 95 | __u32 __reserved[3]; 96 | } tmr [8]; 97 | 98 | /*200*/ struct { /* Interrupt Request Register */ 99 | /*270*/ __u32 bitfield; 100 | __u32 __reserved[3]; 101 | } irr [8]; 102 | 103 | /*280*/ union { /* Error Status Register */ 104 | struct { 105 | __u32 send_cs_error : 1, 106 | receive_cs_error : 1, 107 | send_accept_error : 1, 108 | receive_accept_error : 1, 109 | __reserved_1 : 1, 110 | send_illegal_vector : 1, 111 | receive_illegal_vector : 1, 112 | illegal_register_address : 1, 113 | __reserved_2 : 24; 114 | __u32 __reserved_3[3]; 115 | } error_bits; 116 | struct { 117 | __u32 errors; 118 | __u32 __reserved_3[3]; 119 | } all_errors; 120 | } esr; 121 | 122 | /*290*/ struct { __u32 __reserved[4]; } __reserved_08; 123 | 124 | /*2A0*/ struct { __u32 __reserved[4]; } __reserved_09; 125 | 126 | /*2B0*/ struct { __u32 __reserved[4]; } __reserved_10; 127 | 128 | /*2C0*/ struct { __u32 __reserved[4]; } __reserved_11; 129 | 130 | /*2D0*/ struct { __u32 __reserved[4]; } __reserved_12; 131 | 132 | /*2E0*/ struct { __u32 __reserved[4]; } __reserved_13; 133 | 134 | /*2F0*/ struct { __u32 __reserved[4]; } __reserved_14; 135 | 136 | /*300*/ struct { /* Interrupt Command Register 1 */ 137 | __u32 vector : 8, 138 | delivery_mode : 3, 139 | destination_mode : 1, 140 | delivery_status : 1, 141 | __reserved_1 : 1, 142 | level : 1, 143 | trigger : 1, 144 | __reserved_2 : 2, 145 | shorthand : 2, 146 | __reserved_3 : 12; 147 | __u32 __reserved_4[3]; 148 | } icr1; 149 | 150 | /*310*/ struct { /* Interrupt Command Register 2 */ 151 | union { 152 | __u32 __reserved_1 : 24, 153 | phys_dest : 4, 154 | __reserved_2 : 4; 155 | __u32 __reserved_3 : 24, 156 | logical_dest : 8; 157 | } dest; 158 | __u32 __reserved_4[3]; 159 | } icr2; 160 | 161 | /*320*/ struct { /* LVT - Timer */ 162 | __u32 vector : 8, 163 | __reserved_1 : 4, 164 | delivery_status : 1, 165 | __reserved_2 : 3, 166 | mask : 1, 167 | timer_mode : 1, 168 | __reserved_3 : 14; 169 | __u32 __reserved_4[3]; 170 | } lvt_timer; 171 | 172 | /*330*/ struct { /* LVT - Thermal Sensor */ 173 | __u32 vector : 8, 174 | delivery_mode : 3, 175 | __reserved_1 : 1, 176 | delivery_status : 1, 177 | __reserved_2 : 3, 178 | mask : 1, 179 | __reserved_3 : 15; 180 | __u32 __reserved_4[3]; 181 | } lvt_thermal; 182 | 183 | /*340*/ struct { /* LVT - Performance Counter */ 184 | __u32 vector : 8, 185 | delivery_mode : 3, 186 | __reserved_1 : 1, 187 | delivery_status : 1, 188 | __reserved_2 : 3, 189 | mask : 1, 190 | __reserved_3 : 15; 191 | __u32 __reserved_4[3]; 192 | } lvt_pc; 193 | 194 | /*350*/ struct { /* LVT - LINT0 */ 195 | __u32 vector : 8, 196 | delivery_mode : 3, 197 | __reserved_1 : 1, 198 | delivery_status : 1, 199 | polarity : 1, 200 | remote_irr : 1, 201 | trigger : 1, 202 | mask : 1, 203 | __reserved_2 : 15; 204 | __u32 __reserved_3[3]; 205 | } lvt_lint0; 206 | 207 | /*360*/ struct { /* LVT - LINT1 */ 208 | __u32 vector : 8, 209 | delivery_mode : 3, 210 | __reserved_1 : 1, 211 | delivery_status : 1, 212 | polarity : 1, 213 | remote_irr : 1, 214 | trigger : 1, 215 | mask : 1, 216 | __reserved_2 : 15; 217 | __u32 __reserved_3[3]; 218 | } lvt_lint1; 219 | 220 | /*370*/ struct { /* LVT - Error */ 221 | __u32 vector : 8, 222 | __reserved_1 : 4, 223 | delivery_status : 1, 224 | __reserved_2 : 3, 225 | mask : 1, 226 | __reserved_3 : 15; 227 | __u32 __reserved_4[3]; 228 | } lvt_error; 229 | 230 | /*380*/ struct { /* Timer Initial Count Register */ 231 | __u32 initial_count; 232 | __u32 __reserved_2[3]; 233 | } timer_icr; 234 | 235 | /*390*/ struct { /* Timer Current Count Register */ 236 | __u32 curr_count; 237 | __u32 __reserved_2[3]; 238 | } timer_ccr; 239 | 240 | /*3A0*/ struct { __u32 __reserved[4]; } __reserved_16; 241 | 242 | /*3B0*/ struct { __u32 __reserved[4]; } __reserved_17; 243 | 244 | /*3C0*/ struct { __u32 __reserved[4]; } __reserved_18; 245 | 246 | /*3D0*/ struct { __u32 __reserved[4]; } __reserved_19; 247 | 248 | /*3E0*/ struct { /* Timer Divide Configuration Register */ 249 | __u32 divisor : 4, 250 | __reserved_1 : 28; 251 | __u32 __reserved_2[3]; 252 | } timer_dcr; 253 | 254 | /*3F0*/ struct { __u32 __reserved[4]; } __reserved_20; 255 | 256 | } __attribute__ ((packed)); 257 | -------------------------------------------------------------------------------- /lib/tinykvm/amd64/memory_layout.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | namespace tinykvm { 5 | static constexpr uint64_t GDT_ADDR = 0x1600; 6 | static constexpr uint64_t TSS_ADDR = 0x1700; 7 | static constexpr uint64_t IDT_ADDR = 0x1800; 8 | static constexpr uint64_t INTR_ASM_ADDR = 0x2000; 9 | static constexpr uint64_t IST_ADDR = 0x3000; 10 | static constexpr uint64_t IST2_ADDR = 0x4000; 11 | static constexpr uint64_t IST_END_ADDR = 0x5000; 12 | static constexpr uint64_t USER_ASM_ADDR = 0x5000; 13 | static constexpr uint64_t VSYS_ADDR = 0x6000; 14 | static constexpr uint64_t TSS_SMP_ADDR = 0x7000; 15 | static constexpr uint64_t TSS_SMP2_ADDR = 0x8000; 16 | // After the last fixed page, every page after 17 | // is a fixed page table directory. Any further 18 | // allocations happen using memory banks. 19 | static constexpr uint64_t PT_ADDR = 0x9000; 20 | 21 | // The size of the interrupt stacks on each SMP 22 | // vCPU, offset from IST_ADDR. We allow 17 vCPUs. 23 | static constexpr uint64_t TSS_SMP_STACK = 240; 24 | // Maximum size of interrupt and exception frame 25 | static constexpr uint64_t INTR_STACK_FRAME = 48; 26 | } 27 | -------------------------------------------------------------------------------- /lib/tinykvm/amd64/paging.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "../memory.hpp" 3 | #include 4 | 5 | namespace tinykvm { 6 | 7 | extern uint64_t setup_amd64_paging(vMemory&, 8 | std::string_view binary, 9 | const std::vector& remappings, 10 | bool split_hugepages); 11 | extern void print_pagetables(const vMemory&); 12 | 13 | using foreach_page_t = std::function; 14 | extern void foreach_page(vMemory&, foreach_page_t callback, bool skip_oob_addresses = true); 15 | extern void foreach_page(const vMemory&, foreach_page_t callback, bool skip_oob_addresses = true); 16 | extern void foreach_page_makecow(vMemory&, uint64_t kernel_end, uint64_t shared_memory_boundary); 17 | 18 | extern void page_at(vMemory&, uint64_t addr, foreach_page_t, bool ignore_missing = false); 19 | extern char * writable_page_at(vMemory&, uint64_t addr, uint64_t flags, bool zeroes = false); 20 | extern char * readable_page_at(const vMemory&, uint64_t addr, uint64_t flags); 21 | 22 | static inline bool page_is_zeroed(const uint64_t* page) { 23 | for (size_t i = 0; i < 512; i += 8) { 24 | if ((page[i+0] | page[i+1] | page[i+2] | page[i+3]) != 0 || 25 | (page[i+4] | page[i+5] | page[i+6] | page[i+7]) != 0) 26 | return false; 27 | } 28 | return true; 29 | } 30 | 31 | static constexpr inline uint64_t PageMask() { 32 | return vMemory::PageSize() - 1UL; 33 | } 34 | 35 | } // tinykvm 36 | -------------------------------------------------------------------------------- /lib/tinykvm/amd64/tss.cpp: -------------------------------------------------------------------------------- 1 | #include "tss.hpp" 2 | 3 | #include 4 | #include 5 | #include "../memory.hpp" 6 | #include "memory_layout.hpp" 7 | #include "gdt.hpp" 8 | 9 | namespace tinykvm { 10 | 11 | struct AMD64_TSS 12 | { 13 | uint32_t ign; // 4 14 | uint64_t rsp0; // 12 15 | uint64_t rsp1; // 20 16 | uint64_t rsp2; // 28 17 | uint32_t ign2; // 32 18 | uint32_t ign3; // 36 19 | uint64_t ist1; 20 | uint64_t ist2; 21 | uint64_t ist3; 22 | uint64_t ist4; 23 | uint64_t ist5; 24 | uint64_t ist6; 25 | uint64_t ist7; // 92 0x5C 26 | uint32_t ign4; 27 | uint32_t ign5; 28 | uint16_t ign6; 29 | uint16_t iomap_base; 30 | } __attribute__((packed)); 31 | 32 | static constexpr uint16_t tss_sel = 0x30; 33 | 34 | 35 | void setup_amd64_tss(vMemory& memory) 36 | { 37 | const auto tss_base = memory.physbase + TSS_ADDR; 38 | const auto ist_base = memory.physbase + IST_ADDR; 39 | auto* tss_ptr = memory.at(tss_base); 40 | 41 | auto& tss = *(AMD64_TSS *)tss_ptr; 42 | std::memset(&tss, 0, sizeof(tss)); 43 | tss.rsp0 = ist_base + 0x1000; 44 | tss.rsp1 = 0; 45 | tss.rsp2 = 0; 46 | tss.ist1 = ist_base + 0x1000; 47 | tss.ist2 = ist_base + 0x800; 48 | tss.iomap_base = 104; // unused 49 | 50 | auto* gdt_ptr = memory.at(memory.physbase + GDT_ADDR); 51 | GDT_write_TSS_segment(gdt_ptr + tss_sel, tss_base, sizeof(AMD64_TSS)-1); 52 | } 53 | 54 | void setup_amd64_tss_smp(vMemory& memory) 55 | { 56 | const auto ist_base = memory.physbase + IST_ADDR; 57 | auto* smp_tss_ptr = memory.at(memory.physbase + TSS_SMP_ADDR); 58 | 59 | auto* tss = (AMD64_TSS *)smp_tss_ptr; 60 | for (size_t c = 0; c < 17; c++) { 61 | /** XXX: TSS_SMP_STACK exception stack enough? */ 62 | tss[c].rsp0 = ist_base + TSS_SMP_STACK * (c + 1); 63 | tss[c].rsp1 = 0; 64 | tss[c].rsp2 = 0; 65 | tss[c].ist1 = tss[c].rsp0; 66 | tss[c].iomap_base = 104; // unused 67 | } 68 | } 69 | 70 | void setup_amd64_tss_regs(struct kvm_sregs& sregs, uint64_t tss_addr) 71 | { 72 | struct kvm_segment seg = { 73 | .base = tss_addr, 74 | .limit = sizeof(AMD64_TSS)-1, 75 | .selector = tss_sel, 76 | .type = 11, 77 | .present = 1, 78 | .dpl = 3, /* User-mode */ 79 | .db = 0, 80 | .s = 0, /* Gate */ 81 | .l = 0, /* 64-bit */ 82 | .g = 0, /* Byte granularity */ 83 | }; 84 | sregs.tr = seg; 85 | } 86 | 87 | } 88 | -------------------------------------------------------------------------------- /lib/tinykvm/amd64/tss.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | struct kvm_sregs; 4 | 5 | namespace tinykvm { 6 | struct vMemory; 7 | 8 | extern void setup_amd64_tss(vMemory&); 9 | 10 | extern void setup_amd64_tss_smp(vMemory&); 11 | 12 | extern void setup_amd64_tss_regs(struct kvm_sregs& sregs, uint64_t tss_addr); 13 | 14 | } 15 | -------------------------------------------------------------------------------- /lib/tinykvm/amd64/usercode.cpp: -------------------------------------------------------------------------------- 1 | #include "usercode.hpp" 2 | #include 3 | 4 | namespace tinykvm { 5 | 6 | static const unsigned char usercode[] = { 7 | 0x10, 0x00, 0x20, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 8 | 0x90, 0x90, 0x90, 0x90, 0x49, 0x89, 0xcd, 0xb8, 0x77, 0xf7, 0x01, 0x00, 9 | 0x0f, 0x05, 0x4c, 0x89, 0xe9, 0x41, 0xff, 0xe7, 0x48, 0x89, 0xc7, 0xb8, 10 | 0xff, 0xff, 0x00, 0x00, 0xe7, 0x00, 0xeb, 0xf7, 0xb8, 0x77, 0xf7, 0x01, 11 | 0x00, 0x0f, 0x05, 0x41, 0x5b, 0x59, 0x58, 0xc3, 0x00, 0x00, 0x00, 0x00, 12 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 13 | 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 14 | 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 15 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 16 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 17 | 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 18 | 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 19 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 20 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 21 | 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 22 | 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 23 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 24 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 25 | 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 26 | 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 27 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 28 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 29 | 0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 30 | 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 31 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 32 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 33 | 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 34 | 0x00, 0x00, 0x00, 0x00 35 | }; 36 | 37 | const user_asm_header &usercode_header() 38 | { 39 | return *(const user_asm_header *)&usercode[0]; 40 | } 41 | 42 | void setup_vm64_usercode(void* usercode_area) 43 | { 44 | std::memcpy(usercode_area, usercode, sizeof(usercode)); 45 | } 46 | 47 | } // tinykvm 48 | -------------------------------------------------------------------------------- /lib/tinykvm/amd64/usercode.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "../memory.hpp" 3 | #include "memory_layout.hpp" 4 | 5 | namespace tinykvm { 6 | 7 | struct user_asm_header { 8 | uint16_t vm64_entry; 9 | uint16_t vm64_rexit; 10 | uint16_t vm64_preserving_entry; 11 | uint16_t vm64_unused; 12 | uint32_t vm64_cpuid; 13 | 14 | uint64_t translated_vm_entry(const vMemory& memory) const noexcept { 15 | return memory.physbase + USER_ASM_ADDR + vm64_entry; 16 | } 17 | uint64_t translated_vm_rexit(const vMemory& memory) const noexcept { 18 | return memory.physbase + USER_ASM_ADDR + vm64_rexit; 19 | } 20 | uint64_t translated_vm_preserving_entry(const vMemory& memory) const noexcept { 21 | return memory.physbase + USER_ASM_ADDR + vm64_preserving_entry; 22 | } 23 | uint64_t translated_vm_cpuid(const vMemory& memory) const noexcept { 24 | return memory.physbase + USER_ASM_ADDR + vm64_cpuid; 25 | } 26 | }; 27 | extern const user_asm_header& usercode_header(); 28 | 29 | extern void setup_vm64_usercode(void* usercode_area); 30 | 31 | } 32 | -------------------------------------------------------------------------------- /lib/tinykvm/amd64/vdso.cpp: -------------------------------------------------------------------------------- 1 | #include "vdso.hpp" 2 | 3 | namespace tinykvm { 4 | 5 | __attribute__((aligned(4096))) 6 | static const std::array vsys = { 7 | 0x66, 0xb8, 0x60, 0x00, 0x66, 0xe7, 0x00, 0xc3 8 | }; 9 | 10 | const std::array& vsys_page() { 11 | return vsys; 12 | } 13 | 14 | } // tinykvm 15 | -------------------------------------------------------------------------------- /lib/tinykvm/amd64/vdso.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | 5 | namespace tinykvm { 6 | static constexpr uint64_t VSYSCALL_AREA = 0xFFFF600000; 7 | 8 | const std::array& vsys_page(); 9 | } 10 | -------------------------------------------------------------------------------- /lib/tinykvm/arm64/memory_layout.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | namespace tinykvm { 5 | static constexpr uint64_t PT_ADDR = 0x9000; 6 | 7 | } 8 | -------------------------------------------------------------------------------- /lib/tinykvm/common.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef LIKELY 4 | #define LIKELY(x) __builtin_expect((x), 1) 5 | #endif 6 | #ifndef UNLIKELY 7 | #define UNLIKELY(x) __builtin_expect((x), 0) 8 | #endif 9 | 10 | #ifndef TINYKVM_MAX_SYSCALLS 11 | #define TINYKVM_MAX_SYSCALLS 512 12 | #endif 13 | 14 | #define TINYKVM_COLD() __attribute__ ((cold)) 15 | 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | namespace tinykvm 23 | { 24 | struct VirtualRemapping { 25 | uint64_t phys; 26 | uint64_t virt; 27 | size_t size; 28 | bool writable = false; 29 | bool executable = false; 30 | bool blackout = false; /* Unmapped virtual area */ 31 | }; 32 | 33 | struct MachineOptions { 34 | uint64_t max_mem = 16ULL << 20; /* 16MB */ 35 | uint32_t max_cow_mem = 0; 36 | uint32_t stack_size = 1600UL << 10; /* 1600KB */ 37 | uint32_t reset_free_work_mem = 0; /* reset_to() */ 38 | uint32_t dylink_address_hint = 0x200000; /* 2MB */ 39 | uint32_t heap_address_hint = 0; 40 | uint64_t vmem_base_address = 0; 41 | std::string_view binary = {}; 42 | std::vector remappings {}; 43 | 44 | bool verbose_loader = false; 45 | bool short_lived = false; 46 | bool hugepages = false; 47 | bool transparent_hugepages = false; 48 | /* When enabled, master VMs will write directly 49 | to their own main memory instead of memory banks, 50 | allowing forks to immediately see changes. */ 51 | bool master_direct_memory_writes = false; 52 | /* When enabled, split hugepages during page faults. */ 53 | bool split_hugepages = false; 54 | /* When enabled, reset_to() will accept a different 55 | master VM than the original, but at a steep cost. */ 56 | bool allow_reset_to_new_master = false; 57 | /* When enabled, reset_to() will copy all registers 58 | from the master VM to the new VM. */ 59 | bool reset_copy_all_registers = true; 60 | /* When reset_enter_usermode is enabled, the guest will 61 | be forced into usermode after reset_to(). */ 62 | bool reset_enter_usermode = true; 63 | /* When enabled, reset_to() will copy all memory 64 | from the master VM to the forked VM instead of 65 | resetting the memory banks. */ 66 | bool reset_keep_all_work_memory = false; 67 | /* Force-relocate fixed addresses with mmap(). */ 68 | bool relocate_fixed_mmap = true; 69 | /* Make heap executable, to support JIT. */ 70 | bool executable_heap = false; 71 | /* When using hugepages, cover the given size with 72 | hugepages, unless 0, in which case the entire 73 | main memory will be covered. */ 74 | size_t hugepages_arena_size = 0UL; 75 | }; 76 | 77 | class MachineException : public std::exception { 78 | public: 79 | MachineException(const char* msg, uint64_t data = 0) 80 | : m_msg(msg), m_data(data) {} 81 | const char* what() const noexcept override { 82 | return m_msg; 83 | } 84 | auto data() const noexcept { return m_data; } 85 | protected: 86 | const char* m_msg; 87 | uint64_t m_data; 88 | }; 89 | 90 | class MachineTimeoutException: public MachineException { 91 | public: 92 | using MachineException::MachineException; 93 | float seconds() const noexcept { return data() / 1000.0; } 94 | }; 95 | 96 | class MemoryException: public MachineException { 97 | public: 98 | MemoryException(const char* msg, uint64_t addr, uint64_t sz) 99 | : MachineException{msg, addr}, m_size(sz) {} 100 | const char* what() const noexcept override { 101 | return m_msg; 102 | } 103 | auto addr() const noexcept { return data(); } 104 | auto size() const noexcept { return m_size; } 105 | private: 106 | uint64_t m_size; 107 | }; 108 | 109 | template constexpr std::false_type always_false {}; 110 | 111 | template 112 | struct is_string 113 | : public std::disjunction< 114 | std::is_same::type>, 115 | std::is_same::type> 116 | > {}; 117 | 118 | template 119 | struct is_stdstring : public std::is_same> {}; 120 | 121 | struct PerVCPUTable { 122 | int cpuid; 123 | int userval1; 124 | int userval2; 125 | int userval3; 126 | }; 127 | 128 | struct DynamicElf { 129 | std::string interpreter; 130 | bool is_dynamic; 131 | 132 | bool has_interpreter() const noexcept { 133 | return !interpreter.empty(); 134 | } 135 | }; 136 | extern DynamicElf is_dynamic_elf(std::string_view bin); 137 | } 138 | -------------------------------------------------------------------------------- /lib/tinykvm/forward.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | struct kvm_run; 3 | struct kvm_regs; 4 | struct kvm_sregs; 5 | struct kvm_lapic_state; 6 | #include 7 | 8 | namespace tinykvm { 9 | 10 | #ifndef TINYKVM_ARCH 11 | #define TINYKVM_ARCH_AMD64 12 | #endif 13 | 14 | #if defined(TINYKVM_ARCH_AMD64) 15 | 16 | struct tinykvm_x86regs { 17 | __u64 rax, rbx, rcx, rdx; 18 | __u64 rsi, rdi, rsp, rbp; 19 | __u64 r8, r9, r10, r11; 20 | __u64 r12, r13, r14, r15; 21 | __u64 rip, rflags; 22 | }; 23 | 24 | struct tinykvm_x86fpuregs { 25 | __u8 fpr[8][16]; 26 | __u16 fcw; 27 | __u16 fsw; 28 | __u8 ftwx; /* in fxsave format */ 29 | __u8 pad1; 30 | __u16 last_opcode; 31 | __u64 last_ip; 32 | __u64 last_dp; 33 | __u8 xmm[16][16]; 34 | __u32 mxcsr; 35 | __u32 pad2; 36 | }; 37 | 38 | #define tinykvm_regs tinykvm_x86regs 39 | #define tinykvm_fpuregs tinykvm_x86fpuregs 40 | 41 | #elif defined(TINYKVM_ARCH_ARM64) 42 | 43 | #define tinykvm_regs tinykvm_arm64regs 44 | #define tinykvm_fpuregs tinykvm_arm64fpuregs 45 | 46 | #endif 47 | 48 | struct RSPClient; 49 | } 50 | -------------------------------------------------------------------------------- /lib/tinykvm/linux/signals.cpp: -------------------------------------------------------------------------------- 1 | #include "../machine.hpp" 2 | #include "threads.hpp" 3 | 4 | namespace tinykvm { 5 | 6 | Signals::Signals() {} 7 | Signals::~Signals() {} 8 | 9 | SignalAction& Signals::get(int sig) { 10 | if (sig > 0) 11 | return signals.at(sig-1); 12 | throw MachineException("Signal 0 invoked", sig); 13 | } 14 | 15 | void Signals::enter(vCPU& cpu, int sig) 16 | { 17 | if (sig == 0) return; 18 | auto& regs = cpu.registers(); 19 | 20 | auto& sigact = signals.at(sig); 21 | if (sigact.altstack) { 22 | const int tid = cpu.machine().threads().gettid(); 23 | // Change to alternate per-thread stack 24 | auto& stack = per_thread(tid).stack; 25 | // But only if non-zero 26 | if (stack.ss_sp != 0x0) { 27 | regs.rsp = stack.ss_sp + stack.ss_size; 28 | } 29 | } 30 | 31 | //cpu.machine().enter_usermode(); 32 | regs.rcx = sigact.handler; 33 | cpu.set_registers(regs); 34 | } 35 | 36 | SignalAction& Machine::sigaction(int sig) 37 | { 38 | return signals().get(sig); 39 | } 40 | 41 | } // tinykvm 42 | -------------------------------------------------------------------------------- /lib/tinykvm/linux/signals.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "../forward.hpp" 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | namespace tinykvm { 9 | struct vCPU; 10 | 11 | struct SignalStack { 12 | uint64_t ss_sp = 0x0; 13 | int ss_flags = 0x0; 14 | uint64_t ss_size = 0; 15 | }; 16 | 17 | struct SignalAction { 18 | static constexpr uint64_t SIG_UNSET = ~0ULL; 19 | 20 | bool is_unset() const noexcept { 21 | return handler == 0x0 || handler == SIG_UNSET; 22 | } 23 | 24 | uint64_t handler = SIG_UNSET; 25 | bool altstack = false; 26 | unsigned mask = 0x0; 27 | uint64_t restorer = 0x0; 28 | }; 29 | 30 | struct SignalReturn { 31 | tinykvm_x86regs regs; 32 | }; 33 | 34 | struct SignalPerThread { 35 | SignalStack stack; 36 | SignalReturn sigret; 37 | }; 38 | 39 | struct Signals { 40 | SignalAction& get(int sig); 41 | void enter(vCPU&, int sig); 42 | 43 | // TODO: Lock this in the future, for multiproessing 44 | auto& per_thread(int tid) { return m_per_thread[tid]; } 45 | 46 | Signals(); 47 | ~Signals(); 48 | private: 49 | std::array signals {}; 50 | std::map m_per_thread; 51 | }; 52 | 53 | 54 | } // tinykvm 55 | -------------------------------------------------------------------------------- /lib/tinykvm/linux/threads.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "../forward.hpp" 3 | #include 4 | #include 5 | #include 6 | 7 | namespace tinykvm { 8 | struct Machine; 9 | struct MultiThreading; 10 | 11 | struct Thread { 12 | struct MultiThreading& mt; 13 | const int tid; 14 | struct tinykvm_x86regs stored_regs; 15 | uint64_t fsbase; 16 | uint64_t clear_tid; 17 | 18 | void suspend(uint64_t rv); 19 | struct tinykvm_x86regs activate(); 20 | void resume(); 21 | void exit(); 22 | 23 | Thread(MultiThreading&, int tid, uint64_t tls, uint64_t stack); 24 | Thread(MultiThreading&, const Thread& other); 25 | }; 26 | 27 | struct MultiThreading { 28 | Thread& get_thread(); 29 | Thread* get_thread(int tid); /* or nullptr */ 30 | int gettid() { return get_thread().tid; } 31 | 32 | Thread& create(int flags, uint64_t ctid, uint64_t ptid, 33 | uint64_t stack, uint64_t tls); 34 | bool suspend_and_yield(int64_t result = 0); 35 | void erase_thread(int tid); 36 | void wakeup_next(); 37 | 38 | void reset_to(const MultiThreading& other); 39 | 40 | MultiThreading(Machine&); 41 | Machine& machine; 42 | private: 43 | std::map m_threads; 44 | std::vector m_suspended; 45 | Thread* m_current = nullptr; 46 | int thread_counter = 1; 47 | friend struct Thread; 48 | }; 49 | 50 | } 51 | -------------------------------------------------------------------------------- /lib/tinykvm/machine_env.cpp: -------------------------------------------------------------------------------- 1 | #include "machine.hpp" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "util/elf.hpp" 9 | 10 | namespace tinykvm { 11 | using address_t = Machine::address_t; 12 | 13 | template 14 | struct AuxVec 15 | { 16 | T a_type; /* Entry type */ 17 | T a_val; /* Register value */ 18 | }; 19 | 20 | static inline 21 | void push_arg(Machine& m, std::vector& vec, address_t& dst, const std::string& str) 22 | { 23 | dst -= str.size()+1; 24 | dst &= ~0x7LL; // maintain alignment 25 | vec.push_back(dst); 26 | m.copy_to_guest(dst, str.data(), str.size()+1); 27 | } 28 | static inline 29 | void push_aux(std::vector& vec, AuxVec aux) 30 | { 31 | vec.push_back(aux.a_type); 32 | vec.push_back(aux.a_val); 33 | } 34 | static inline 35 | void push_down(Machine& m, address_t& dst, const void* data, size_t size) 36 | { 37 | dst -= size; 38 | dst &= ~0x7LL; // maintain alignment 39 | m.copy_to_guest(dst, data, size); 40 | } 41 | 42 | void Machine::setup_argv(__u64& rsp, 43 | const std::vector& args, 44 | const std::vector& env) 45 | { 46 | // Arguments to main() 47 | std::vector argv; 48 | argv.push_back(args.size()); // argc 49 | for (const auto& string : args) { 50 | argv.push_back(stack_push(rsp, string)); 51 | } 52 | argv.push_back(0x0); 53 | for (const auto& string : env) { 54 | argv.push_back(stack_push(rsp, string)); 55 | } 56 | argv.push_back(0x0); 57 | 58 | // Extra aligned SP and copy the arguments over 59 | const size_t argsize = argv.size() * sizeof(argv[0]); 60 | rsp -= argsize; 61 | rsp &= ~0xFLL; // 16-byte stack alignment 62 | 63 | this->copy_to_guest(rsp, argv.data(), argsize); 64 | } 65 | void Machine::setup_argv( 66 | const std::vector& args, 67 | const std::vector& env) 68 | { 69 | struct tinykvm_x86regs regs {}; 70 | this->setup_registers(regs); 71 | this->setup_argv(regs.rsp, args, env); 72 | // Set registers back 73 | this->set_registers(regs); 74 | } 75 | 76 | void Machine::setup_linux(__u64& rsp, 77 | const std::vector& args, 78 | const std::vector& env) 79 | { 80 | address_t dst = rsp; 81 | 82 | /* Generate stack canary */ 83 | auto gen = std::default_random_engine(time(0)); 84 | std::uniform_int_distribution rand(0,256); 85 | 86 | std::array canary; 87 | std::generate(canary.begin(), canary.end(), [&] { return rand(gen); }); 88 | push_down(*this, dst, canary.data(), canary.size()); 89 | const auto canary_addr = dst; 90 | 91 | const char platform[] = "x86_64"; 92 | push_down(*this, dst, platform, sizeof(platform)); 93 | const auto platform_addr = dst; 94 | 95 | /* ELF program headers */ 96 | const auto* binary_ehdr = elf_offset (m_binary, 0); 97 | const auto* binary_phdr = elf_offset (m_binary, binary_ehdr->e_phoff); 98 | const unsigned phdr_count = binary_ehdr->e_phnum; 99 | 100 | /* Check if we have a PT_PHDR program header already loaded into memory */ 101 | address_t phdr_location = 0; 102 | for (unsigned i = 0; i < phdr_count; i++) { 103 | if (binary_phdr[i].p_type == PT_PHDR) { 104 | phdr_location = this->m_image_base + binary_phdr[i].p_vaddr; 105 | break; 106 | } 107 | } 108 | if (phdr_location == 0) { 109 | /* Push program headers */ 110 | dst -= phdr_count * sizeof(Elf64_Phdr); 111 | dst &= ~0xFLL; 112 | phdr_location = dst; 113 | this->copy_to_guest(dst, binary_phdr, phdr_count * sizeof(Elf64_Phdr)); 114 | } 115 | 116 | /* Push arguments to main() */ 117 | std::vector argv; 118 | argv.push_back(args.size()); // argc 119 | for (const auto& string : args) { 120 | push_arg(*this, argv, dst, string); 121 | } 122 | argv.push_back(0x0); 123 | 124 | /* Push environment vars */ 125 | for (const auto& string : env) { 126 | push_arg(*this, argv, dst, string); 127 | } 128 | argv.push_back(0x0); 129 | 130 | /* Push auxiliary vector */ 131 | push_aux(argv, {AT_PAGESZ, vMemory::PageSize()}); 132 | push_aux(argv, {AT_CLKTCK, 100}); 133 | 134 | // ELF related 135 | push_aux(argv, {AT_PHDR, phdr_location}); 136 | push_aux(argv, {AT_PHENT, sizeof(Elf64_Phdr)}); 137 | push_aux(argv, {AT_PHNUM, phdr_count}); 138 | 139 | // Misc 140 | const address_t base_address = (this->m_image_base + binary_ehdr->e_entry) & ~0xFFFFFFL; // XXX: Guesstimate! 141 | const address_t entry_address = this->m_image_base + binary_ehdr->e_entry; 142 | push_aux(argv, {AT_BASE, base_address}); 143 | push_aux(argv, {AT_ENTRY, entry_address}); 144 | push_aux(argv, {AT_HWCAP, getauxval(AT_HWCAP)}); 145 | push_aux(argv, {AT_HWCAP2, getauxval(AT_HWCAP2)}); 146 | #ifdef AT_HWCAP3 147 | push_aux(argv, {AT_HWCAP3, getauxval(AT_HWCAP3)}); 148 | # ifdef AT_HWCAP4 149 | push_aux(argv, {AT_HWCAP4, getauxval(AT_HWCAP4)}); 150 | # endif 151 | #endif 152 | push_aux(argv, {AT_UID, 1000}); 153 | push_aux(argv, {AT_EUID, 0}); 154 | push_aux(argv, {AT_GID, 0}); 155 | push_aux(argv, {AT_EGID, 0}); 156 | push_aux(argv, {AT_SECURE, 0}); 157 | push_aux(argv, {AT_PLATFORM, platform_addr}); 158 | push_aux(argv, {AT_MINSIGSTKSZ, getauxval(AT_MINSIGSTKSZ)}); 159 | 160 | push_aux(argv, {AT_DCACHEBSIZE, getauxval(AT_DCACHEBSIZE)}); 161 | push_aux(argv, {AT_ICACHEBSIZE, getauxval(AT_ICACHEBSIZE)}); 162 | push_aux(argv, {AT_L1D_CACHEGEOMETRY, getauxval(AT_L1D_CACHEGEOMETRY)}); 163 | push_aux(argv, {AT_L1D_CACHESIZE, getauxval(AT_L1D_CACHESIZE)}); 164 | push_aux(argv, {AT_L1I_CACHEGEOMETRY, getauxval(AT_L1I_CACHEGEOMETRY)}); 165 | push_aux(argv, {AT_L1I_CACHESIZE, getauxval(AT_L1I_CACHESIZE)}); 166 | push_aux(argv, {AT_L2_CACHEGEOMETRY, getauxval(AT_L2_CACHEGEOMETRY)}); 167 | push_aux(argv, {AT_L2_CACHESIZE, getauxval(AT_L2_CACHESIZE)}); 168 | push_aux(argv, {AT_L3_CACHEGEOMETRY, getauxval(AT_L3_CACHEGEOMETRY)}); 169 | push_aux(argv, {AT_L3_CACHESIZE, getauxval(AT_L3_CACHESIZE)}); 170 | push_aux(argv, {AT_UCACHEBSIZE, getauxval(AT_UCACHEBSIZE)}); 171 | 172 | // Canary / randomness 173 | push_aux(argv, {AT_RANDOM, canary_addr}); 174 | push_aux(argv, {AT_NULL, 0}); 175 | 176 | // from this point on the stack is starting, pointing @ argc 177 | // install the arg vector 178 | const size_t argsize = argv.size() * sizeof(argv[0]); 179 | dst -= argsize; 180 | dst &= ~0xFLL; // 16-byte stack alignment 181 | this->copy_to_guest(dst, argv.data(), argsize); 182 | // re-initialize machine stack-pointer 183 | rsp = dst; 184 | } 185 | void Machine::setup_linux( 186 | const std::vector& args, 187 | const std::vector& env) 188 | { 189 | auto& regs = this->registers(); 190 | regs = {}; 191 | this->setup_registers(regs); 192 | this->setup_linux(regs.rsp, args, env); 193 | // Set registers back 194 | this->set_registers(regs); 195 | } 196 | 197 | } 198 | -------------------------------------------------------------------------------- /lib/tinykvm/machine_inline.hpp: -------------------------------------------------------------------------------- 1 | /* APIC timer counter calculations for execution timeouts */ 2 | constexpr inline float ticks_to_seconds(uint32_t ticks) { 3 | return ticks / 1000.0; 4 | } 5 | constexpr inline uint32_t to_ticks(float seconds) { 6 | const float val = seconds * 1000.0; 7 | return (val < (float)UINT32_MAX) ? (uint32_t)val : UINT32_MAX; 8 | } 9 | 10 | inline void Machine::stop(bool s) 11 | { 12 | vcpu.stopped = s; 13 | } 14 | 15 | inline void Machine::system_call(vCPU& cpu, unsigned idx) 16 | { 17 | if (idx < m_syscalls.size()) { 18 | const auto handler = m_syscalls[idx]; 19 | if (handler != nullptr) { 20 | handler(cpu); 21 | return; 22 | } 23 | } 24 | m_unhandled_syscall(cpu, idx); 25 | } 26 | 27 | inline tinykvm_x86regs& Machine::registers() { 28 | return vcpu.registers(); 29 | } 30 | inline const tinykvm_x86regs& Machine::registers() const { 31 | return vcpu.registers(); 32 | } 33 | inline void Machine::set_registers(const tinykvm_x86regs& regs) { 34 | vcpu.set_registers(regs); 35 | } 36 | inline tinykvm_fpuregs Machine::fpu_registers() const { 37 | return vcpu.fpu_registers(); 38 | } 39 | inline void Machine::set_fpu_registers(const tinykvm_fpuregs& regs) { 40 | vcpu.set_fpu_registers(regs); 41 | } 42 | inline const struct kvm_sregs& Machine::get_special_registers() const { 43 | return vcpu.get_special_registers(); 44 | } 45 | inline void Machine::set_special_registers(const struct kvm_sregs& sregs) { 46 | vcpu.set_special_registers(sregs); 47 | } 48 | 49 | 50 | template inline constexpr 51 | void Machine::setup_call(tinykvm_x86regs& regs, 52 | uint64_t addr, uint64_t rsp, 53 | Args&&... args) 54 | { 55 | regs = {}; 56 | /* Set IOPL=3 to allow I/O instructions in usermode */ 57 | regs.rflags = 2 | (3 << 12); 58 | if (this->m_just_reset) { 59 | this->m_just_reset = false; 60 | regs.r15 = addr; 61 | regs.rip = this->entry_address(); 62 | } else { 63 | regs.rip = addr; 64 | } 65 | regs.rsp = rsp; 66 | [[maybe_unused]] unsigned iargs = 0; 67 | ([&] { 68 | auto& reg = [iargs, ®s] () mutable -> unsigned long long& { 69 | if (iargs == 0) 70 | return regs.rdi; 71 | else if (iargs == 1) 72 | return regs.rsi; 73 | else if (iargs == 2) 74 | return regs.rdx; 75 | else if (iargs == 3) 76 | return regs.rcx; 77 | else if (iargs == 4) 78 | return regs.r8; 79 | else if (iargs == 5) 80 | return regs.r9; 81 | throw MachineException("Too many vmcall arguments"); 82 | }(); 83 | if constexpr (std::is_integral_v>) { 84 | reg = args; 85 | iargs ++; 86 | } else if constexpr (is_stdstring::value) { 87 | reg = stack_push(regs.rsp, args.c_str(), args.size()+1); 88 | iargs ++; 89 | } else if constexpr (is_string::value) { 90 | reg = stack_push_cstr(regs.rsp, args); 91 | iargs ++; 92 | } else if constexpr (std::is_pod_v>) { 93 | reg = stack_push(regs.rsp, args); 94 | iargs ++; 95 | } else { 96 | static_assert(always_false, "Unknown vmcall argument type"); 97 | } 98 | }(), ...); 99 | 100 | /* Re-align stack for SSE */ 101 | regs.rsp &= ~(uint64_t) 0xF; 102 | /* Push return value last */ 103 | stack_push (regs.rsp, exit_address()); 104 | /* VM needs to be in user-mode to make a vmcall. */ 105 | this->enter_usermode(); 106 | } 107 | 108 | inline void Machine::vmresume(float timeout) 109 | { 110 | auto& regs = vcpu.registers(); 111 | if (this->m_just_reset) { 112 | this->m_just_reset = false; 113 | // We have to go the long way around using the preserving entry 114 | // point, because the guest cannot see the correct memory now. 115 | // Carefully push RAX, RCX and R11 (used by SYSCALL instruction) 116 | // which will be popped by the preserving entry point. And finally, 117 | // push the old RIP which will be used by the RET instruction. 118 | struct PreservedRegisters { 119 | uint64_t r11; 120 | uint64_t rcx; 121 | uint64_t rax; 122 | uint64_t rip; // for the RET instruction 123 | } pvs; 124 | regs.rsp -= sizeof(pvs); 125 | pvs.rip = regs.rip; 126 | pvs.rax = regs.rax; 127 | pvs.rcx = regs.rcx; 128 | pvs.r11 = regs.r11; 129 | // Push the registers 130 | this->copy_to_guest(regs.rsp, &pvs, sizeof(pvs)); 131 | // Set the new registers 132 | regs.rip = this->preserving_entry_address(); 133 | vcpu.set_registers(regs); 134 | } else { 135 | // Nothing to do as the registers are already set 136 | // and the guest can see the memory. 137 | } 138 | this->run_in_usermode(timeout); 139 | } 140 | 141 | inline void Machine::setup_clone(tinykvm_x86regs& regs, address_t stack) 142 | { 143 | /* Set IOPL=3 to allow I/O instructions */ 144 | regs.rflags = 2 | (3 << 12); 145 | regs.r15 = regs.rip; 146 | regs.rip = this->entry_address(); 147 | regs.rbp = 0; 148 | regs.rsp = stack; 149 | } 150 | 151 | template inline constexpr 152 | void Machine::vmcall(uint64_t addr, Args&&... args) 153 | { 154 | auto& regs = vcpu.registers(); 155 | this->setup_call(regs, addr, this->stack_address(), std::forward (args)...); 156 | vcpu.set_registers(regs); 157 | this->run(); 158 | } 159 | 160 | template inline 161 | void Machine::vmcall(const char* function, Args&&... args) 162 | { 163 | auto address = address_of(function); 164 | vmcall(address, std::forward (args)...); 165 | } 166 | 167 | template inline constexpr 168 | void Machine::timed_vmcall(uint64_t addr, float timeout, Args&&... args) 169 | { 170 | auto& regs = vcpu.registers(); 171 | this->setup_call(regs, addr, 172 | this->stack_address(), std::forward (args)...); 173 | vcpu.set_registers(regs); 174 | this->run(timeout); 175 | } 176 | 177 | template inline constexpr 178 | void Machine::timed_vmcall_stack(uint64_t addr, uint64_t stk, float timeout, Args&&... args) 179 | { 180 | auto& regs = vcpu.registers(); 181 | this->setup_call(regs, addr, stk, std::forward (args)...); 182 | vcpu.set_registers(regs); 183 | this->run(timeout); 184 | } 185 | 186 | inline uint64_t Machine::stack_push(__u64& sp, const std::string& string) 187 | { 188 | return stack_push(sp, string.data(), string.size()+1); /* zero */ 189 | } 190 | template 191 | inline uint64_t Machine::stack_push(__u64& sp, const T& type) 192 | { 193 | return stack_push(sp, &type, sizeof(T)); 194 | } 195 | template 196 | inline uint64_t Machine::stack_push_std_array(__u64& sp, const T& type, size_t N) 197 | { 198 | return stack_push(sp, type.data(), N * sizeof(typename T::value_type)); 199 | } 200 | 201 | inline vMemory& Machine::main_memory() noexcept 202 | { 203 | return memory; 204 | } 205 | inline const vMemory& Machine::main_memory() const noexcept 206 | { 207 | return memory; 208 | } 209 | 210 | inline std::string_view Machine::memory_at(uint64_t a, size_t s) const 211 | { 212 | return memory.view(a, s); 213 | } 214 | template 215 | inline T* Machine::rw_memory_at(uint64_t a, size_t s) 216 | { 217 | return (T*) memory.safely_at(a, s); 218 | } 219 | inline bool Machine::memory_safe_at(uint64_t a, size_t s) const 220 | { 221 | return memory.safely_within(a, s); 222 | } 223 | 224 | inline Signals& Machine::signals() { 225 | if (m_signals == nullptr) m_signals.reset(new Signals); 226 | return *m_signals; 227 | } 228 | -------------------------------------------------------------------------------- /lib/tinykvm/memory.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "common.hpp" 3 | #include "memory_bank.hpp" 4 | #include "virtual_mem.hpp" 5 | #include 6 | #include 7 | #include 8 | 9 | namespace tinykvm { 10 | struct Machine; 11 | struct MemoryBanks; 12 | 13 | struct vMemory { 14 | static constexpr uint64_t PageSize() { 15 | return 4096u; 16 | } 17 | 18 | Machine& machine; 19 | uint64_t physbase; 20 | uint64_t safebase; 21 | uint64_t page_tables; 22 | /* Optional executable memory range */ 23 | uint64_t vmem_exec_begin = 0; 24 | uint64_t vmem_exec_end = 0; 25 | /* Counter for the number of pages that have been unlocked 26 | in the main memory. */ 27 | size_t unlocked_pages = 0; 28 | /* Linear memory */ 29 | char* ptr; 30 | size_t size; 31 | bool owned = true; 32 | /* Use memory banks only for page tables, write directly 33 | to main memory. Used with is_forkable_master(). */ 34 | bool main_memory_writes = false; 35 | /* Split into small pages (4K) when reaching a leaf hugepage. */ 36 | bool split_hugepages = true; 37 | /* Executable heap */ 38 | bool executable_heap = false; 39 | /* Dynamic page memory */ 40 | MemoryBanks banks; // fault-in memory banks 41 | /* SMP mutex */ 42 | std::mutex mtx_smp; 43 | bool smp_guards_enabled = false; 44 | 45 | /* Unsafe */ 46 | bool within(uint64_t addr, size_t asize) const noexcept { 47 | return (addr >= physbase) && (addr + asize <= physbase + this->size) && (addr <= addr + asize); 48 | } 49 | char* at(uint64_t addr, size_t asize = 8); 50 | const char* at(uint64_t addr, size_t asize = 8) const; 51 | uint64_t* page_at(uint64_t addr) const; 52 | /* Safe */ 53 | bool safely_within(uint64_t addr, size_t asize) const noexcept { 54 | return (addr >= safebase) && (addr + asize <= physbase + this->size); 55 | } 56 | const char* safely_at(uint64_t addr, size_t asize) const; 57 | char* safely_at(uint64_t addr, size_t asize); 58 | std::string_view view(uint64_t addr, size_t asize) const; 59 | 60 | char *get_userpage_at(uint64_t addr) const; 61 | char *get_kernelpage_at(uint64_t addr) const; 62 | char *get_writable_page(uint64_t addr, uint64_t flags, bool zeroes); 63 | MemoryBank::Page new_page(); 64 | MemoryBank::Page new_hugepage(); 65 | 66 | bool compare(const vMemory& other); 67 | /* When a main VM has direct memory writes enabled, it can 68 | write directly to its own memory, but in order to constrain 69 | the memory usage, we need to keep track of the number of 70 | pages that have been unlocked. */ 71 | void increment_unlocked_pages(size_t pages); 72 | size_t unlocked_memory_pages() const noexcept { 73 | return unlocked_pages; 74 | } 75 | 76 | VirtualMem vmem() const; 77 | 78 | [[noreturn]] static void memory_exception(const char*, uint64_t, uint64_t); 79 | bool fork_reset(const Machine&, const MachineOptions&); // Returns true if a full reset was done 80 | void fork_reset(const vMemory& other, const MachineOptions&); 81 | static vMemory New(Machine&, const MachineOptions&, uint64_t phys, uint64_t safe, size_t size); 82 | /* Returns true when this VM uses banking only to make page tables writable 83 | again in order to support itself. It has already been made forkable. */ 84 | bool is_forkable_master() const noexcept; 85 | 86 | uint64_t expectedUsermodeFlags() const noexcept; 87 | 88 | /* Create new identity-mapped memory regions */ 89 | vMemory(Machine&, const MachineOptions&, uint64_t, uint64_t, char*, size_t, bool = true); 90 | /* Loan memory from another machine */ 91 | vMemory(Machine&, const MachineOptions&, const vMemory& other); 92 | ~vMemory(); 93 | 94 | static uint64_t overaligned_memsize(uint64_t size) { 95 | static constexpr uint64_t ALIGN = 1ULL << 21; 96 | return (size + (ALIGN - 1)) & ~(ALIGN - 1); 97 | } 98 | private: 99 | using AllocationResult = std::tuple; 100 | static AllocationResult allocate_mapped_memory(const MachineOptions&, size_t size); 101 | }; 102 | 103 | } 104 | -------------------------------------------------------------------------------- /lib/tinykvm/memory_bank.cpp: -------------------------------------------------------------------------------- 1 | #include "memory_bank.hpp" 2 | 3 | #include "common.hpp" 4 | #include "machine.hpp" 5 | #include "virtual_mem.hpp" 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | namespace tinykvm { 12 | static constexpr bool VERBOSE_MEMORY_BANK = false; 13 | static constexpr bool MADVISE_NOT_DELETE = true; 14 | 15 | MemoryBanks::MemoryBanks(Machine& machine, const MachineOptions& options) 16 | : m_machine { machine }, 17 | m_arena_begin { ARENA_BASE_ADDRESS }, 18 | m_arena_next { m_arena_begin }, 19 | m_idx_begin { FIRST_BANK_IDX }, 20 | m_idx { m_idx_begin } 21 | { 22 | this->set_max_pages(options.max_cow_mem / vMemory::PageSize(), 23 | options.hugepages_arena_size / vMemory::PageSize()); 24 | } 25 | void MemoryBanks::set_max_pages(size_t new_max, size_t new_hugepages) 26 | { 27 | this->m_max_pages = new_max; 28 | this->m_hugepage_banks = new_hugepages / MemoryBank::N_PAGES; 29 | //printf("Memory banks: %u pages, %u hugepages\n", 30 | // m_max_pages, m_hugepage_banks * MemoryBank::N_PAGES); 31 | /* Reserve the maximum number of banks possible. 32 | NOTE: DO NOT modify this! Needs deque behavior. */ 33 | m_mem.reserve((m_max_pages + MemoryBank::N_PAGES-1) / MemoryBank::N_PAGES); 34 | } 35 | 36 | char* MemoryBanks::try_alloc(size_t N, bool try_hugepages) 37 | { 38 | char* ptr = (char*)MAP_FAILED; 39 | if (try_hugepages && N == 512) { 40 | ptr = (char*) mmap(NULL, N * vMemory::PageSize(), PROT_READ | PROT_WRITE, 41 | MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE | MAP_HUGETLB, -1, 0); 42 | } 43 | if (ptr == MAP_FAILED) { 44 | return (char*) mmap(NULL, N * vMemory::PageSize(), PROT_READ | PROT_WRITE, 45 | MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, -1, 0); 46 | } 47 | return ptr; 48 | } 49 | 50 | MemoryBank& MemoryBanks::allocate_new_bank(uint64_t addr) 51 | { 52 | size_t pages = MemoryBank::N_PAGES; 53 | const bool try_hugepages = m_mem.size() < m_hugepage_banks; 54 | char* mem = this->try_alloc(pages, try_hugepages); 55 | if (mem == nullptr) { 56 | pages = 4; 57 | mem = this->try_alloc(pages, false); 58 | } 59 | 60 | const size_t size = pages * vMemory::PageSize(); 61 | if (mem != nullptr) { 62 | m_mem.emplace_back(*this, mem, addr, pages, m_idx); 63 | 64 | VirtualMem vmem { addr, mem, size }; 65 | //printf("Installing memory %u at 0x%lX from 0x%lX, %zu pages\n", 66 | // m_idx, addr, (uintptr_t) mem, N_PAGES); 67 | m_machine.install_memory(m_idx++, vmem, false); 68 | 69 | return m_mem.back(); 70 | } 71 | throw MemoryException("Failed to allocate memory bank", 0, size); 72 | } 73 | MemoryBank& MemoryBanks::get_available_bank(size_t pages) 74 | { 75 | /* Hugepages are 512 4k pages, and consume a whole bank, right now. */ 76 | for (unsigned idx = 0; idx < m_mem.size(); idx++) { 77 | auto& bank = m_mem.at(idx); 78 | if (bank.room_for(pages)) { 79 | return bank; 80 | } 81 | } 82 | /* Allocate new memory bank if we are not maxing out memory */ 83 | if (m_num_pages < m_max_pages) { 84 | if constexpr (VERBOSE_MEMORY_BANK) { 85 | printf("Allocating new bank at 0x%lX with total pages %u/%u\n", 86 | m_arena_next, m_num_pages + MemoryBank::N_PAGES, m_max_pages); 87 | } 88 | auto& bank = this->allocate_new_bank(m_arena_next); 89 | m_num_pages += bank.n_pages; 90 | m_arena_next += bank.size(); 91 | return bank; 92 | } 93 | if constexpr (VERBOSE_MEMORY_BANK) { 94 | fprintf(stderr, "Out of working memory requesting %zu pages, %u vs %u max pages\n", 95 | pages, m_num_pages, m_max_pages); 96 | } 97 | throw MemoryException("Out of working memory", 98 | m_num_pages * vMemory::PageSize(), m_max_pages * vMemory::PageSize()); 99 | } 100 | void MemoryBanks::reset(const MachineOptions& options) 101 | { 102 | /* New maximum pages total in banks. */ 103 | this->m_max_pages = options.max_cow_mem / vMemory::PageSize(); 104 | 105 | /* Free memory belonging to banks after the free limit. */ 106 | size_t limit_pages = options.reset_free_work_mem / vMemory::PageSize(); 107 | /* Avoid freeing memory from the first bank, which always has 4k pages. */ 108 | size_t final_banks = std::max(size_t(1u), limit_pages / MemoryBank::N_PAGES); 109 | 110 | if constexpr (MADVISE_NOT_DELETE) 111 | { 112 | /* Instead of removing the banks, give memory back to kernel */ 113 | for (size_t i = final_banks; i < m_mem.size(); i++) { 114 | if (m_mem[i].dirty_size() > 0) 115 | madvise(m_mem[i].mem, m_mem[i].dirty_size(), MADV_FREE); 116 | /* WARNING: MADV_FREE does not immediately free, so we can *not* consider them reclaimed. :( */ 117 | //m_mem[i].n_dirty = 0; 118 | } 119 | } else { 120 | /* Erase the last N elements after final_banks */ 121 | while (final_banks < m_mem.size()) { 122 | this->m_idx--; 123 | this->m_num_pages -= m_mem.back().n_pages; 124 | m_machine.delete_memory(this->m_idx); 125 | m_mem.pop_back(); 126 | } 127 | } 128 | 129 | /* Reset page usage for remaining banks */ 130 | for (auto& bank : m_mem) { 131 | bank.n_used = 0; 132 | } 133 | } 134 | 135 | MemoryBank::MemoryBank(MemoryBanks& b, char* p, uint64_t a, uint16_t np, uint16_t x) 136 | : mem(p), addr(a), n_pages(np), idx(x), banks(b) 137 | {} 138 | MemoryBank::~MemoryBank() 139 | { 140 | munmap(this->mem, this->n_pages * vMemory::PageSize()); 141 | } 142 | 143 | MemoryBank::Page MemoryBank::get_next_page(size_t pages) 144 | { 145 | assert(this->n_used + pages <= this->n_pages); 146 | const uint64_t offset = vMemory::PageSize() * this->n_used; 147 | const bool dirty = this->n_used < this->n_dirty; 148 | this->n_used += pages; 149 | this->n_dirty = std::max(this->n_used, this->n_dirty); 150 | return {(uint64_t *)&mem[offset], addr + offset, pages * vMemory::PageSize(), dirty}; 151 | } 152 | 153 | VirtualMem MemoryBank::to_vmem() const noexcept 154 | { 155 | return VirtualMem {this->addr, this->mem, this->size()}; 156 | } 157 | 158 | } // tinykvm 159 | -------------------------------------------------------------------------------- /lib/tinykvm/memory_bank.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include "common.hpp" 5 | #include "virtual_mem.hpp" 6 | 7 | namespace tinykvm { 8 | struct Machine; 9 | struct MemoryBanks; 10 | 11 | struct MemoryBank { 12 | // This is 1x 2MB page (second-level amd64 page) 13 | static constexpr unsigned N_PAGES = 1u * 512; 14 | static constexpr unsigned SEARCH_TRESHOLD = 512u; 15 | 16 | char* mem; 17 | uint64_t addr; 18 | uint16_t n_used = 0; 19 | uint16_t n_dirty = 0; 20 | const uint16_t n_pages; 21 | const uint16_t idx; 22 | MemoryBanks& banks; 23 | 24 | bool within(uint64_t a, uint64_t s) const noexcept { 25 | return (a >= addr) && (a + s <= addr + this->size()) && (a <= a + s); 26 | } 27 | char* at(uint64_t paddr) { 28 | return &mem[paddr - this->addr]; 29 | } 30 | const char* at(uint64_t paddr) const { 31 | return &mem[paddr - this->addr]; 32 | } 33 | uint64_t size() const noexcept { return n_pages * 4096; } 34 | uint64_t dirty_size() const noexcept { return n_dirty * 4096; } 35 | bool empty() const noexcept { return n_used == n_pages; } 36 | bool room_for(size_t pages) const noexcept { return n_used + pages <= n_pages; } 37 | struct Page { 38 | uint64_t* pmem; 39 | uint64_t addr; 40 | size_t size; 41 | bool dirty; 42 | }; 43 | Page get_next_page(size_t n_pages); 44 | 45 | VirtualMem to_vmem() const noexcept; 46 | 47 | MemoryBank(MemoryBanks&, char*, uint64_t, uint16_t n, uint16_t idx); 48 | ~MemoryBank(); 49 | }; 50 | 51 | struct MemoryBanks { 52 | static constexpr unsigned FIRST_BANK_IDX = 2; 53 | static constexpr uint64_t ARENA_BASE_ADDRESS = 0x7000000000; 54 | 55 | MemoryBanks(Machine&, const MachineOptions&); 56 | 57 | MemoryBank& get_available_bank(size_t n_pages); 58 | void reset(const MachineOptions&); 59 | void set_max_pages(size_t new_max, size_t new_hugepages); 60 | size_t max_pages() const noexcept { return m_max_pages; } 61 | 62 | bool using_hugepages() const noexcept { return m_hugepage_banks > 0; } 63 | size_t banks_with_hugepages() const noexcept { return m_hugepage_banks; } 64 | 65 | auto begin() { return m_mem.begin(); } 66 | auto end() { return m_mem.end(); } 67 | auto begin() const { return m_mem.cbegin(); } 68 | auto end() const { return m_mem.cend(); } 69 | 70 | private: 71 | MemoryBank& allocate_new_bank(uint64_t addr); 72 | char* try_alloc(size_t N, bool try_hugepages); 73 | 74 | std::vector m_mem; 75 | Machine& m_machine; 76 | const uint64_t m_arena_begin; 77 | uint64_t m_arena_next; 78 | const uint16_t m_idx_begin; 79 | uint16_t m_idx; 80 | /* Number of initial banks that will allocate backing memory using hugepages */ 81 | uint32_t m_hugepage_banks = 0; 82 | uint32_t m_num_pages = 0; 83 | /* Max number of pages in all the banks */ 84 | uint32_t m_max_pages; 85 | 86 | friend struct MemoryBank; 87 | }; 88 | 89 | } 90 | -------------------------------------------------------------------------------- /lib/tinykvm/memory_maps.cpp: -------------------------------------------------------------------------------- 1 | #include "machine.hpp" 2 | 3 | namespace tinykvm { 4 | static constexpr size_t MMAP_COLLISION_TRESHOLD = 512ULL << 20; // 512MB 5 | static constexpr uint64_t PageMask = vMemory::PageSize()-1; 6 | static constexpr bool VERBOSE_MMAP_CACHE = false; 7 | 8 | MMapCache::Range MMapCache::find(uint64_t size) 9 | { 10 | auto it = m_free_ranges.begin(); 11 | while (it != m_free_ranges.end()) 12 | { 13 | auto& r = *it; 14 | if (r.size >= size) { 15 | const Range result { r.addr, size }; 16 | if (r.size > size) { 17 | r.addr += size; 18 | r.size -= size; 19 | } else { 20 | m_free_ranges.erase(it); 21 | } 22 | if constexpr (VERBOSE_MMAP_CACHE) 23 | printf("MMapCache: Found free range %lx %lx\n", result.addr, result.addr + result.size); 24 | return result; 25 | } 26 | ++it; 27 | } 28 | return Range{}; 29 | } 30 | 31 | const MMapCache::Range* MMapCache::find_collision(const std::vector& ranges, const Range& r) 32 | { 33 | for (auto& line : ranges) 34 | { 35 | if (line.overlaps(r.addr, r.size)) 36 | { 37 | // Collision with another range 38 | return &line; 39 | } 40 | } 41 | return nullptr; 42 | } 43 | 44 | void MMapCache::insert_free(uint64_t addr, uint64_t size) 45 | { 46 | if (addr + size > current()) 47 | { 48 | throw MemoryException("MMapCache: Invalid free range", addr, size); 49 | } 50 | if constexpr (VERBOSE_MMAP_CACHE) 51 | printf("MMapCache: Inserting free range %lx %lx\n", addr, addr + size); 52 | 53 | // Check for collisions with other ranges 54 | if (find_collision(m_free_ranges, { addr, size })) 55 | { 56 | throw MemoryException("MMapCache: Collision detected inserting free range", addr, size); 57 | } 58 | // Connect existing ranges if they are adjacent 59 | for (Range& free_range : m_free_ranges) 60 | { 61 | if (free_range.addr + free_range.size == addr) 62 | { 63 | if constexpr (VERBOSE_MMAP_CACHE) 64 | printf("MMapCache: Merging free range *above* %lx %lx with result %lx %lx\n", 65 | free_range.addr, free_range.addr + free_range.size, 66 | free_range.addr, free_range.addr + free_range.size + size); 67 | 68 | free_range.size += size; 69 | return; 70 | } 71 | else if (free_range.addr == addr + size) 72 | { 73 | if constexpr (VERBOSE_MMAP_CACHE) 74 | printf("MMapCache: Merging free range *below* %lx %lx with result %lx %lx\n", 75 | free_range.addr, free_range.addr + free_range.size, 76 | addr, addr + size + free_range.size); 77 | free_range.addr = addr; 78 | free_range.size += size; 79 | return; 80 | } 81 | } 82 | 83 | if (m_free_ranges.size() >= m_max_tracked_ranges) { 84 | throw MemoryException("MMapCache: Too many free ranges", addr, size); 85 | } 86 | m_free_ranges.push_back({addr, size}); 87 | } 88 | void MMapCache::insert_used(uint64_t addr, uint64_t size) 89 | { 90 | if (!m_used_ranges.empty()) { 91 | if (m_used_ranges.back().addr + m_used_ranges.back().size == addr) { 92 | m_used_ranges.back().size += size; 93 | return; 94 | } 95 | } 96 | if (m_used_ranges.size() >= m_max_tracked_ranges) { 97 | throw MemoryException("MMapCache: Too many used ranges", addr, size); 98 | } 99 | m_used_ranges.push_back({addr, size}); 100 | } 101 | 102 | void MMapCache::remove(uint64_t addr, uint64_t size, std::vector& ranges) 103 | { 104 | for (auto it = ranges.begin(); it != ranges.end();) 105 | { 106 | Range& r = *it; 107 | if (r.overlaps(addr, size)) 108 | { 109 | if (addr <= r.addr && addr + size >= r.addr + r.size) 110 | { 111 | // The range fully overlaps the given range 112 | it = ranges.erase(it); 113 | continue; 114 | } 115 | else if (addr < r.addr) 116 | { 117 | // The removed range is below/before the given range 118 | // since it doesn't cover the whole range, we know that 119 | // the range remains and addr doesn't change 120 | r.size -= (addr + size) - r.addr; 121 | r.addr = addr + size; 122 | ++it; 123 | continue; 124 | } 125 | else // addr >= r.addr 126 | { 127 | // The removed range is above the given range 128 | r.size = addr - r.addr; 129 | if (r.size == 0) 130 | { 131 | it = ranges.erase(it); 132 | } 133 | else 134 | { 135 | ++it; 136 | } 137 | continue; 138 | } 139 | throw MemoryException("Unreachable", addr, size); 140 | } 141 | else ++it; 142 | } 143 | if (find_collision(ranges, { addr, size })) 144 | { 145 | throw MemoryException("MMapCache: Failed to remove range", addr, size); 146 | } 147 | } 148 | void MMapCache::remove_free(uint64_t addr, uint64_t size) 149 | { 150 | remove(addr, size, m_free_ranges); 151 | } 152 | void MMapCache::remove_used(uint64_t addr, uint64_t size) 153 | { 154 | remove(addr, size, m_used_ranges); 155 | } 156 | 157 | Machine::address_t Machine::mmap_allocate(size_t bytes, int prot) 158 | { 159 | (void)prot; 160 | bytes = (bytes + PageMask) & ~PageMask; 161 | 162 | auto range = mmap_cache().find(bytes); 163 | if (!range.empty()) 164 | { 165 | if (UNLIKELY(range.addr < this->mmap_start())) { 166 | throw MemoryException("MMapCache: Invalid range (below mmap_start)", range.addr, range.size); 167 | } 168 | else if (UNLIKELY(range.addr + range.size > this->mmap_cache().current())) { 169 | throw MemoryException("MMapCache: Invalid range (exceeds current address)", range.addr, range.size); 170 | } 171 | 172 | if (this->mmap_cache().track_used_ranges()) 173 | { 174 | if (this->mmap_cache().find_collision(this->mmap_cache().used_ranges(), range)) 175 | { 176 | throw MemoryException("MMapCache: Collision detected re-using free range", range.addr, range.size); 177 | } 178 | this->mmap_cache().insert_used(range.addr, range.size); 179 | } 180 | return range.addr; 181 | } 182 | 183 | const address_t result = this->mmap_cache().current(); 184 | /* Bytes rounded up to nearest PAGE_SIZE. */ 185 | this->mmap_cache().current() += bytes; 186 | 187 | if (this->mmap_cache().track_used_ranges()) 188 | { 189 | MMapCache::Range range { result, bytes }; 190 | if (this->mmap_cache().find_collision(this->mmap_cache().used_ranges(), range)) 191 | { 192 | throw MemoryException("MMapCache: Collision detected after incrementing current", result, bytes); 193 | } 194 | this->mmap_cache().insert_used(result, bytes); 195 | } 196 | return result; 197 | } 198 | 199 | Machine::address_t Machine::mmap_fixed_allocate(uint64_t addr, size_t bytes, bool is_fixed, int prot) 200 | { 201 | if (UNLIKELY(addr < this->mmap_start())) { 202 | throw MemoryException("MMapCache: Invalid range (below mmap_start)", addr, bytes); 203 | } else if (UNLIKELY(addr + bytes > this->mmap_cache().current())) { 204 | throw MemoryException("MMapCache: Invalid range (exceeds current address)", addr, bytes); 205 | } 206 | 207 | bytes = (bytes + PageMask) & ~PageMask; 208 | if (!is_fixed) 209 | { 210 | return mmap_allocate(bytes); 211 | } 212 | 213 | // Make sure there is no free range in the way 214 | mmap_cache().remove_free(addr, bytes); 215 | 216 | if (this->mmap_cache().track_used_ranges()) 217 | { 218 | MMapCache::Range range { addr, bytes }; 219 | // Only insert the range if it doesn't collide with any other used ranges 220 | // as this is a fixed mapping, which can be placed anywhere. 221 | if (this->mmap_cache().find_collision(this->mmap_cache().used_ranges(), range) == nullptr) 222 | { 223 | this->mmap_cache().insert_used(addr, bytes); 224 | } 225 | } 226 | 227 | // If the mapping is within a certain range, we should adjust 228 | // the current mmap address to the end of the new mapping. This is 229 | // to avoid future collisions when allocating. 230 | if (mmap_cache().current() < addr + bytes) 231 | { 232 | if (addr < mmap_cache().current() + MMAP_COLLISION_TRESHOLD) 233 | { 234 | const uint64_t current_addr = mmap_cache().current(); 235 | // Adjust the current mmap address to the end of the new mapping 236 | mmap_cache().current() = addr + bytes; 237 | // Insert the unused area between the current mmap address and the new mapping 238 | const size_t unused_size = addr - current_addr; 239 | if (unused_size > 0) { 240 | mmap_cache().insert_free(current_addr, unused_size); 241 | } 242 | if constexpr (VERBOSE_MMAP_CACHE) 243 | printf("MMapCache: Adjusting current mmap address to %lx\n", mmap_cache().current()); 244 | } 245 | } 246 | 247 | // Simply return the address 248 | return addr; 249 | } 250 | 251 | bool Machine::mmap_unmap(uint64_t addr, size_t size) 252 | { 253 | bool relaxed = false; 254 | if (addr + size == this->mmap_cache().current() && addr < this->mmap_cache().current()) 255 | { 256 | this->mmap_cache().remove_free(addr, size); 257 | this->mmap_cache().current() = (addr + PageMask) & ~PageMask; 258 | relaxed = true; 259 | } 260 | else if (addr >= this->mmap_start()) 261 | { 262 | // If relaxation didn't happen, put in the cache for later. 263 | this->mmap_cache().insert_free(addr, size); 264 | } 265 | if (this->mmap_cache().track_used_ranges()) 266 | { 267 | this->mmap_cache().remove_used(addr, size); 268 | } 269 | return relaxed; 270 | } 271 | 272 | Machine::address_t Machine::mmap_current() const noexcept 273 | { 274 | return this->mmap_cache().current(); 275 | } 276 | 277 | bool Machine::mmap_relax(uint64_t addr, size_t size, size_t new_size) 278 | { 279 | if (this->mmap_cache().current() == addr + size && new_size <= size) { 280 | this->mmap_cache().current() = (addr + new_size + PageMask) & ~PageMask; 281 | return true; 282 | } 283 | return false; 284 | } 285 | 286 | void Machine::do_mmap_callback(vCPU& cpu, address_t addr, size_t size, 287 | int prot, int flags, int fd, address_t offset) 288 | { 289 | m_mmap_func(cpu, addr, size, prot, flags, fd, offset); 290 | } 291 | 292 | } // tinykvm 293 | -------------------------------------------------------------------------------- /lib/tinykvm/mmap_cache.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | namespace tinykvm 5 | { 6 | struct MMapCache 7 | { 8 | struct Range { 9 | uint64_t addr = 0x0; 10 | uint64_t size = 0u; 11 | 12 | constexpr bool empty() const noexcept { return size == 0u; } 13 | // Invalidate if one of the ranges is in the other (both ways!) 14 | constexpr bool overlaps(uint64_t mem, uint64_t memsize) const noexcept { 15 | return (mem + memsize > this->addr) && (mem < this->addr + this->size); 16 | } 17 | constexpr bool equals(uint64_t mem, uint64_t memsize) const noexcept { 18 | return (this->addr == mem) && (this->addr + this->size == mem + memsize); 19 | } 20 | }; 21 | 22 | uint64_t& current() noexcept { return m_mm; } 23 | const uint64_t& current() const noexcept { return m_mm; } 24 | 25 | Range find(uint64_t size); 26 | 27 | const Range* find_collision(const std::vector& ranges, const Range& r); 28 | 29 | void insert_free(uint64_t addr, uint64_t size); 30 | void insert_used(uint64_t addr, uint64_t size); 31 | void remove_free(uint64_t addr, uint64_t size); 32 | void remove_used(uint64_t addr, uint64_t size); 33 | 34 | bool track_used_ranges() const noexcept { return m_track_used_ranges; } 35 | void set_track_used_ranges(bool track) noexcept { m_track_used_ranges = track; } 36 | 37 | const std::vector& free_ranges() const noexcept { return m_free_ranges; } 38 | const std::vector& used_ranges() const noexcept { return m_used_ranges; } 39 | private: 40 | void remove(uint64_t addr, uint64_t size, std::vector& ranges); 41 | std::vector m_free_ranges; 42 | std::vector m_used_ranges; 43 | uint64_t m_mm = 0x0; 44 | bool m_track_used_ranges = true; 45 | size_t m_max_tracked_ranges = 4096; 46 | }; 47 | } // tinykvm 48 | -------------------------------------------------------------------------------- /lib/tinykvm/page_streaming.cpp: -------------------------------------------------------------------------------- 1 | #include "page_streaming.hpp" 2 | 3 | #include 4 | 5 | namespace tinykvm { 6 | 7 | #ifdef ENABLE_AVX2_PAGE_UTILS 8 | void page_duplicate(uint64_t* dest, const uint64_t* source) 9 | { 10 | #if defined(__AVX2__) 11 | for (size_t i = 0; i < 16; i++) { 12 | auto i0 = _mm256_stream_load_si256((__m256i *)&source[4 * 0]); 13 | auto i1 = _mm256_stream_load_si256((__m256i *)&source[4 * 1]); 14 | auto i2 = _mm256_stream_load_si256((__m256i *)&source[4 * 2]); 15 | auto i3 = _mm256_stream_load_si256((__m256i *)&source[4 * 3]); 16 | auto i4 = _mm256_stream_load_si256((__m256i *)&source[4 * 4]); 17 | auto i5 = _mm256_stream_load_si256((__m256i *)&source[4 * 5]); 18 | auto i6 = _mm256_stream_load_si256((__m256i *)&source[4 * 6]); 19 | auto i7 = _mm256_stream_load_si256((__m256i *)&source[4 * 7]); 20 | 21 | _mm256_stream_pd((double *)&dest[4 * 0], *(__m256d *) &i0); 22 | _mm256_stream_pd((double *)&dest[4 * 1], *(__m256d *) &i1); 23 | _mm256_stream_pd((double *)&dest[4 * 2], *(__m256d *) &i2); 24 | _mm256_stream_pd((double *)&dest[4 * 3], *(__m256d *) &i3); 25 | _mm256_stream_pd((double *)&dest[4 * 4], *(__m256d *) &i4); 26 | _mm256_stream_pd((double *)&dest[4 * 5], *(__m256d *) &i5); 27 | _mm256_stream_pd((double *)&dest[4 * 6], *(__m256d *) &i6); 28 | _mm256_stream_pd((double *)&dest[4 * 7], *(__m256d *) &i7); 29 | dest += 4 * 8; 30 | source += 4 * 8; 31 | } 32 | #else 33 | std::memcpy(dest, source, 4096); 34 | #endif 35 | } 36 | 37 | void page_memzero(uint64_t* dest) 38 | { 39 | #if defined(__AVX2__) 40 | auto iz = _mm256_setzero_si256(); 41 | for (size_t i = 0; i < 16; i++) { 42 | _mm256_stream_pd((double *)&dest[4 * 0], *(__m256d *) &iz); 43 | _mm256_stream_pd((double *)&dest[4 * 1], *(__m256d *) &iz); 44 | _mm256_stream_pd((double *)&dest[4 * 2], *(__m256d *) &iz); 45 | _mm256_stream_pd((double *)&dest[4 * 3], *(__m256d *) &iz); 46 | _mm256_stream_pd((double *)&dest[4 * 4], *(__m256d *) &iz); 47 | _mm256_stream_pd((double *)&dest[4 * 5], *(__m256d *) &iz); 48 | _mm256_stream_pd((double *)&dest[4 * 6], *(__m256d *) &iz); 49 | _mm256_stream_pd((double *)&dest[4 * 7], *(__m256d *) &iz); 50 | dest += 4 * 8; 51 | } 52 | #else 53 | std::memset(dest, 0, 4096); 54 | #endif 55 | } 56 | #endif 57 | 58 | void avx2_page_duplicate(uint64_t* dest, const uint64_t* source) 59 | { 60 | for (size_t i = 0; i < 16; i++) { 61 | _mm_prefetch((const char *)&source[4 * 0], _MM_HINT_T0); 62 | _mm_prefetch((const char *)&source[4 * 1], _MM_HINT_T0); 63 | _mm_prefetch((const char *)&source[4 * 2], _MM_HINT_T0); 64 | _mm_prefetch((const char *)&source[4 * 3], _MM_HINT_T0); 65 | _mm_prefetch((const char *)&source[4 * 4], _MM_HINT_T0); 66 | _mm_prefetch((const char *)&source[4 * 5], _MM_HINT_T0); 67 | _mm_prefetch((const char *)&source[4 * 6], _MM_HINT_T0); 68 | _mm_prefetch((const char *)&source[4 * 7], _MM_HINT_T0); 69 | auto i0 = _mm256_stream_load_si256((__m256i *)&source[4 * 0]); 70 | auto i1 = _mm256_stream_load_si256((__m256i *)&source[4 * 1]); 71 | auto i2 = _mm256_stream_load_si256((__m256i *)&source[4 * 2]); 72 | auto i3 = _mm256_stream_load_si256((__m256i *)&source[4 * 3]); 73 | auto i4 = _mm256_stream_load_si256((__m256i *)&source[4 * 4]); 74 | auto i5 = _mm256_stream_load_si256((__m256i *)&source[4 * 5]); 75 | auto i6 = _mm256_stream_load_si256((__m256i *)&source[4 * 6]); 76 | auto i7 = _mm256_stream_load_si256((__m256i *)&source[4 * 7]); 77 | 78 | _mm256_stream_pd((double *)&dest[4 * 0], *(__m256d *) &i0); 79 | _mm256_stream_pd((double *)&dest[4 * 1], *(__m256d *) &i1); 80 | _mm256_stream_pd((double *)&dest[4 * 2], *(__m256d *) &i2); 81 | _mm256_stream_pd((double *)&dest[4 * 3], *(__m256d *) &i3); 82 | _mm256_stream_pd((double *)&dest[4 * 4], *(__m256d *) &i4); 83 | _mm256_stream_pd((double *)&dest[4 * 5], *(__m256d *) &i5); 84 | _mm256_stream_pd((double *)&dest[4 * 6], *(__m256d *) &i6); 85 | _mm256_stream_pd((double *)&dest[4 * 7], *(__m256d *) &i7); 86 | dest += 4 * 8; 87 | source += 4 * 8; 88 | } 89 | } 90 | void avx2_page_dupliteit(uint64_t* dest, const uint64_t* source) 91 | { 92 | for (size_t i = 0; i < 16; i++) { 93 | #pragma unroll(8) 94 | for (int j = 0; j < 8; j++) { 95 | __m256i zmm = _mm256_load_si256((__m256i *)&source[4 * j]); 96 | int is_zero = _mm256_testz_si256(zmm, zmm); 97 | if (is_zero == 0) 98 | _mm256_store_si256((__m256i *)&dest[4 * j], zmm); 99 | } 100 | dest += 4 * 8; 101 | source += 4 * 8; 102 | } 103 | } 104 | 105 | } // tinykvm 106 | -------------------------------------------------------------------------------- /lib/tinykvm/page_streaming.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | //#define ENABLE_AVX2_PAGE_UTILS 4 | 5 | namespace tinykvm { 6 | extern void avx2_page_duplicate(uint64_t* dest, const uint64_t* source); 7 | extern void avx2_page_dupliteit(uint64_t* dest, const uint64_t* source); 8 | 9 | #ifdef ENABLE_AVX2_PAGE_UTILS 10 | extern void page_duplicate(uint64_t* dest, const uint64_t* source); 11 | extern void page_memzero(uint64_t* dest); 12 | #else 13 | inline void page_duplicate(uint64_t* dest, const uint64_t* source) 14 | { 15 | //std::memcpy(dest, source, 4096); 16 | avx2_page_duplicate(dest, source); 17 | } 18 | 19 | inline void page_memzero(uint64_t* dest) 20 | { 21 | std::memset(dest, 0, 4096); 22 | } 23 | #endif 24 | 25 | } 26 | -------------------------------------------------------------------------------- /lib/tinykvm/remote.cpp: -------------------------------------------------------------------------------- 1 | #include "machine.hpp" 2 | 3 | namespace tinykvm { 4 | 5 | Machine& Machine::remote() 6 | { 7 | if (this->is_remote_connected()) 8 | return *m_remote; 9 | throw MachineException("Remote not enabled"); 10 | } 11 | const Machine& Machine::remote() const 12 | { 13 | if (this->is_remote_connected()) 14 | return *m_remote; 15 | throw MachineException("Remote not enabled"); 16 | } 17 | 18 | void Machine::remote_connect(Machine& remote) 19 | { 20 | // Install the remote memory in this machine 21 | const auto remote_vmem = remote.main_memory().vmem(); 22 | this->install_memory(1, remote_vmem, false); 23 | 24 | // Copy gigabyte pages covered by remote memory into these page tables 25 | static constexpr uint64_t PDE64_ADDR_MASK = ~0x8000000000000FFF; 26 | auto* main_pml4 = this->main_memory().page_at(this->main_memory().page_tables); 27 | auto* main_pdpt = this->main_memory().page_at(main_pml4[0] & PDE64_ADDR_MASK); 28 | 29 | auto* remote_pml4 = remote.main_memory().page_at(remote.main_memory().page_tables); 30 | auto* remote_pdpt = remote.main_memory().page_at(remote_pml4[0] & PDE64_ADDR_MASK); 31 | 32 | // Gigabyte starting index and end index (rounded up) 33 | const auto begin = remote_vmem.physbase >> 30; 34 | const auto end = (remote_vmem.physbase + remote_vmem.size + 0x3FFFFFFF) >> 30; 35 | 36 | // Install gigabyte entries from remote VM into this VM 37 | // The VM and page tables technically support 2MB region alignments. 38 | for (size_t i = begin; i < end; i++) 39 | { 40 | main_pdpt[i] = remote_pdpt[i]; // GB-page 41 | } 42 | 43 | // Finalize 44 | this->m_remote = &remote; 45 | } 46 | 47 | } // tinykvm 48 | -------------------------------------------------------------------------------- /lib/tinykvm/rsp_client.hpp: -------------------------------------------------------------------------------- 1 | #include "machine.hpp" 2 | #include 3 | 4 | namespace tinykvm { 5 | struct RSPClient; 6 | 7 | struct RSP 8 | { 9 | // Wait for a connection for @timeout_secs 10 | std::unique_ptr accept(int timeout_secs = 10); 11 | int fd() const noexcept { return server_fd; } 12 | 13 | RSP(vCPU&, uint16_t); 14 | RSP(Machine&, uint16_t); 15 | ~RSP(); 16 | 17 | private: 18 | vCPU& m_cpu; 19 | int server_fd; 20 | }; 21 | 22 | struct RSPClient 23 | { 24 | using StopFunc = void(*)(RSPClient&); 25 | bool is_closed() const noexcept { return m_closed; } 26 | bool is_connected() const noexcept { return !m_closed; } 27 | 28 | bool process_one(); 29 | bool send(const char* str); 30 | bool sendf(const char* fmt, ...); 31 | void reply_ack(); 32 | void reply_ok(); 33 | void interrupt(); 34 | void kill(); 35 | 36 | auto& cpu() { return *m_cpu; } 37 | auto& machine() { return m_cpu->machine(); } 38 | void set_vcpu(vCPU& cpu) { m_cpu = &cpu; } 39 | void set_break_limit(uint64_t limit) { m_breaklimit = limit; } 40 | void set_verbose(bool v) { m_verbose = v; } 41 | void on_stopped(StopFunc f) { m_on_stopped = f; } 42 | 43 | RSPClient(vCPU& cpu, int fd); 44 | ~RSPClient(); 45 | 46 | private: 47 | static constexpr char lut[] = "0123456789abcdef"; 48 | static const int PACKET_SIZE = 2000; 49 | template 50 | inline void putreg(char*& d, const char* end, const T& reg); 51 | inline void putreg(char*& d, const char* end, const uint8_t* reg, size_t len); 52 | int forge_packet(char* dst, size_t dstlen, const char*, int); 53 | int forge_packet(char* dst, size_t dstlen, const char*, va_list); 54 | void process_data(); 55 | void handle_query(); 56 | void handle_breakpoint(); 57 | void handle_continue(); 58 | void handle_step(); 59 | void handle_executing(); 60 | void handle_multithread(); 61 | void handle_readreg(); 62 | void handle_writereg(); 63 | void handle_readmem(); 64 | void handle_writemem(); 65 | void report_gprs(); 66 | void report_status(); 67 | void close_now(); 68 | vCPU* m_cpu; 69 | uint64_t m_breaklimit = 1'000; 70 | int sockfd; 71 | bool m_closed = false; 72 | bool m_verbose = false; 73 | std::string buffer; 74 | std::array m_bp = {0}; 75 | size_t bp_iterator = 0; 76 | StopFunc m_on_stopped = nullptr; 77 | }; 78 | 79 | } 80 | -------------------------------------------------------------------------------- /lib/tinykvm/smp.cpp: -------------------------------------------------------------------------------- 1 | #include "smp.hpp" 2 | 3 | #include "machine.hpp" 4 | #include 5 | #include 6 | #include 7 | 8 | namespace tinykvm { 9 | 10 | SMP& Machine::smp() { 11 | if (m_smp == nullptr) 12 | m_smp.reset(new SMP(*this)); 13 | return *m_smp; 14 | } 15 | const SMP& Machine::smp() const { 16 | if (m_smp == nullptr) 17 | m_smp.reset(new SMP(const_cast (*this))); 18 | return *m_smp; 19 | } 20 | bool Machine::smp_active() const noexcept { 21 | if (m_smp == nullptr) return false; 22 | return smp().smp_active() != 0; 23 | } 24 | int Machine::smp_active_count() const noexcept { 25 | if (m_smp == nullptr) return 0; 26 | return smp().smp_active(); 27 | } 28 | void Machine::smp_wait() { 29 | if (m_smp == nullptr) 30 | return; 31 | smp().wait(); 32 | } 33 | void Machine::smp_vcpu_broadcast(std::function callback) 34 | { 35 | if (m_smp == nullptr) 36 | return; 37 | smp().broadcast(std::move(callback)); 38 | } 39 | 40 | SMP::~SMP() 41 | { 42 | m_cpus.clear(); 43 | } 44 | 45 | 46 | SMP::MPvCPU::MPvCPU(int c, Machine& m) 47 | : thpool(1, 0, false) 48 | { 49 | /* We store the CPU ID in GSBASE register 50 | XXX: We do not make sure that vCPUs stay on a specific 51 | thread here, which will decimate performance. */ 52 | auto f = thpool.enqueue([this, c, &m] { 53 | this->cpu.smp_init(c, m); 54 | }); 55 | } 56 | SMP::MPvCPU::~MPvCPU() {} 57 | 58 | 59 | void SMP::MPvCPU::blocking_message(std::function func) 60 | { 61 | auto res = thpool.enqueue([this, func] { 62 | func(this->cpu); 63 | }); 64 | res.get(); 65 | } 66 | 67 | void SMP::MPvCPU::async_exec(MPvCPU_data& data) 68 | { 69 | /* To get the best performance we do: 70 | 1. Allocate regs on heap. 71 | 2. Set regs and timeout in MP vCPU. 72 | 3. Assign and delete regs at vCPU thread. 73 | 4. Start the vCPU with timeout in vCPU (for SSO). 74 | 75 | This means it is *NOT* possible to schedule more than 76 | one execution at the same time due to regs race. 77 | */ 78 | thpool.enqueue([&data] { 79 | auto& vcpu = *data.vcpu; 80 | try { 81 | /*printf("Working from vCPU %d, RIP=0x%llX RSP=0x%llX ARG=0x%llX\n", 82 | cpu.cpu_id, regs->rip, regs->rsp, regs->rsi);*/ 83 | vcpu.set_registers(data.regs); 84 | 85 | vcpu.run(data.ticks); 86 | vcpu.decrement_smp_count(); 87 | 88 | } catch (const tinykvm::MemoryException& e) { 89 | printf("SMP memory exception: %s (addr=0x%lX, size=0x%lX)\n", 90 | e.what(), e.addr(), e.size()); 91 | vcpu.decrement_smp_count(); 92 | throw; 93 | } catch (const std::exception& e) { 94 | printf("SMP exception: %s\n", e.what()); 95 | vcpu.decrement_smp_count(); 96 | throw; 97 | } 98 | }); 99 | } 100 | 101 | SMP::MPvCPU_data* SMP::smp_allocate_vcpu_data(size_t num_cpus) 102 | { 103 | auto* data = new MPvCPU_data[num_cpus]; 104 | 105 | std::lock_guard lock(m_smp_data_mtx); 106 | m_smp_data.push_back(data); 107 | 108 | return data; 109 | } 110 | void SMP::prepare_cpus(size_t num_cpus) 111 | { 112 | if (m_cpus.size() < num_cpus) { 113 | while (m_cpus.size() < num_cpus) { 114 | /* NB: The cpu ids start at 1..2..3.. */ 115 | const int c = 1 + m_cpus.size(); 116 | m_cpus.emplace_back(c, machine()); 117 | } 118 | //printf("%zu SMP vCPUs initialized\n", this->m_cpus.size()); 119 | } 120 | } 121 | void vCPU::decrement_smp_count() 122 | { 123 | auto& smp = machine().smp(); 124 | const int v = __sync_fetch_and_sub(&smp.m_smp_active, 1); 125 | /* Check if we are the lucky one to clear out the SMP registers. */ 126 | if (UNLIKELY(v == 1)) 127 | { 128 | /* Create temporary vector and swap in contents. */ 129 | smp.m_smp_data_mtx.lock(); 130 | auto tmp = std::move(smp.m_smp_data); 131 | smp.m_smp_data_mtx.unlock(); 132 | /* Delete registers one by one, then let it destruct. */ 133 | for (auto* regs : tmp) 134 | delete[] regs; 135 | } 136 | } 137 | 138 | void SMP::broadcast(std::function func) 139 | { 140 | for (auto& cpu : this->m_cpus) { 141 | cpu.blocking_message(func); 142 | } 143 | } 144 | 145 | void SMP::timed_smpcall_array(size_t num_cpus, 146 | address_t stack_base, uint32_t stack_size, 147 | address_t addr, float timeout, 148 | address_t array, uint32_t array_isize) 149 | { 150 | assert(num_cpus != 0); 151 | this->prepare_cpus(num_cpus); 152 | auto* data = smp_allocate_vcpu_data(num_cpus); 153 | 154 | __sync_fetch_and_add(&m_smp_active, num_cpus); 155 | 156 | for (size_t c = 0; c < m_cpus.size(); c++) { 157 | data[c].vcpu = &m_cpus[c].cpu; 158 | data[c].ticks = to_ticks(timeout); 159 | machine().setup_call(data[c].regs, addr, 160 | stack_base + (c+1) * stack_size, 161 | array + (c+1) * array_isize, 162 | array_isize); 163 | m_cpus[c].async_exec(data[c]); 164 | } 165 | } 166 | 167 | void SMP::timed_smpcall_clone(size_t num_cpus, 168 | address_t stack_base, uint32_t stack_size, 169 | float timeout, const tinykvm_x86regs& regs) 170 | { 171 | assert(num_cpus != 0); 172 | this->prepare_cpus(num_cpus); 173 | auto* data = smp_allocate_vcpu_data(num_cpus); 174 | 175 | __sync_fetch_and_add(&m_smp_active, num_cpus); 176 | 177 | for (size_t c = 0; c < m_cpus.size(); c++) { 178 | data[c].vcpu = &m_cpus[c].cpu; 179 | data[c].ticks = to_ticks(timeout); 180 | data[c].regs = regs; 181 | machine().setup_clone(data[c].regs, 182 | stack_base + (c+1) * stack_size); 183 | 184 | m_cpus[c].async_exec(data[c]); 185 | } 186 | } 187 | 188 | void SMP::wait() 189 | { 190 | for (size_t c = 0; c < m_cpus.size(); c++) { 191 | m_cpus[c].thpool.wait_until_nothing_in_flight(); 192 | } 193 | } 194 | 195 | std::vector SMP::gather_return_values(unsigned cpus) 196 | { 197 | if (cpus == 0 || cpus > m_cpus.size()) 198 | cpus = m_cpus.size(); 199 | 200 | std::vector results; 201 | results.resize(cpus); 202 | for (size_t c = 0; c < cpus; c++) { 203 | m_cpus[c].blocking_message([&] (auto& cpu) { 204 | //printf("CPU %zu result: 0x%llu\n", c, cpu.registers().rdi); 205 | results[c] = cpu.registers().rdi; 206 | }); 207 | } 208 | return results; 209 | } 210 | 211 | } 212 | -------------------------------------------------------------------------------- /lib/tinykvm/smp.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "machine.hpp" 3 | #include 4 | #include 5 | #include 6 | #include "util/threadpool.h" 7 | 8 | namespace tinykvm 9 | { 10 | struct SMP { 11 | using address_t = uint64_t; 12 | 13 | template 14 | void timed_smpcall(size_t cpus, 15 | address_t stack, uint32_t stack_size, 16 | address_t addr, float tmo, Args&&...); 17 | void timed_smpcall_array(size_t cpus, 18 | address_t stack, uint32_t stack_size, 19 | address_t addr, float tmo, 20 | address_t array, uint32_t array_item_size); 21 | void timed_smpcall_clone(size_t num_cpus, 22 | address_t stack_base, uint32_t stack_size, 23 | float timeout, const tinykvm_x86regs& regs); 24 | 25 | int smp_active() const noexcept { return m_smp_active; } 26 | void wait(); 27 | /* Retrieve return values from a smpcall */ 28 | std::vector gather_return_values(unsigned cpus = 0); 29 | 30 | void broadcast(std::function); 31 | 32 | Machine& machine() noexcept { return m_machine; } 33 | const Machine& machine() const noexcept { return m_machine; } 34 | 35 | struct MPvCPU_data 36 | { 37 | vCPU* vcpu = nullptr; 38 | uint32_t ticks = 0; 39 | struct tinykvm_x86regs regs; 40 | }; 41 | struct MPvCPU 42 | { 43 | void blocking_message(std::function); 44 | void async_exec(struct MPvCPU_data &); 45 | 46 | MPvCPU(int, Machine &); 47 | ~MPvCPU(); 48 | vCPU cpu; 49 | ThreadPool thpool; 50 | }; 51 | 52 | SMP(Machine& m) : m_machine{m} {} 53 | ~SMP(); 54 | private: 55 | MPvCPU_data* smp_allocate_vcpu_data(size_t); 56 | void prepare_cpus(size_t num_cpus); 57 | vCPU& smp_cpu(size_t idx); 58 | 59 | Machine& m_machine; 60 | std::deque m_cpus; 61 | std::vector m_smp_data; 62 | std::mutex m_smp_data_mtx; 63 | int m_smp_active = 0; 64 | 65 | friend struct vCPU; 66 | }; 67 | 68 | template inline 69 | void SMP::timed_smpcall(size_t num_cpus, 70 | address_t stack_base, uint32_t stack_size, 71 | address_t addr, float timeout, Args&&... args) 72 | { 73 | assert(num_cpus != 0); 74 | this->prepare_cpus(num_cpus); 75 | auto* data = smp_allocate_vcpu_data(num_cpus); 76 | 77 | /* XXX: This counter can be wrong when exceptions 78 | happen during setup_call and async_exec. */ 79 | __sync_fetch_and_add(&m_smp_active, num_cpus); 80 | 81 | for (size_t c = 0; c < num_cpus; c++) { 82 | data[c].vcpu = &m_cpus[c].cpu; 83 | data[c].ticks = to_ticks(timeout); 84 | machine().setup_call(data[c].regs, addr, 85 | stack_base + (c+1) * stack_size, 86 | std::forward (args)...); 87 | m_cpus[c].async_exec(data[c]); 88 | } 89 | } 90 | 91 | } // tinykvm 92 | -------------------------------------------------------------------------------- /lib/tinykvm/util/elf.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "../common.hpp" // for MachineException 3 | #include "elf.h" 4 | 5 | namespace tinykvm { 6 | 7 | template 8 | inline const T* elf_offset(std::string_view binary, intptr_t ofs) { 9 | if (ofs < 0 || ofs + sizeof(T) > binary.size()) 10 | throw MachineException("Invalid ELF offset", ofs); 11 | return (const T*) &binary.at(ofs); 12 | } 13 | template 14 | inline const T* elf_offset_array(std::string_view binary, intptr_t ofs, size_t count) { 15 | if (ofs < 0 || ofs + count * sizeof(T) > binary.size()) 16 | throw MachineException("Invalid ELF offset", ofs); 17 | return (const T*) &binary.at(ofs); 18 | } 19 | inline const auto* elf_header(std::string_view binary) { 20 | return elf_offset (binary, 0); 21 | } 22 | inline bool validate_header(const Elf64_Ehdr* hdr) 23 | { 24 | if (hdr->e_ident[EI_MAG0] != 0x7F || 25 | hdr->e_ident[EI_MAG1] != 'E' || 26 | hdr->e_ident[EI_MAG2] != 'L' || 27 | hdr->e_ident[EI_MAG3] != 'F') 28 | return false; 29 | return hdr->e_ident[EI_CLASS] == ELFCLASS64; 30 | } 31 | 32 | } 33 | -------------------------------------------------------------------------------- /lib/tinykvm/util/threadtask.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "function.hpp" 12 | 13 | namespace tinykvm { 14 | 15 | /** 16 | * Task queue for single thread 17 | */ 18 | template > 19 | class ThreadTask { 20 | public: 21 | explicit ThreadTask(int nice, bool low_prio); 22 | auto enqueue(Function&&) -> std::future; 23 | void wait_until_empty(); 24 | void wait_until_nothing_in_flight(); 25 | void set_nice(int nice) { m_nice = nice; } 26 | void set_low_prio(bool low_prio) { m_prio_low = low_prio; } 27 | ~ThreadTask(); 28 | 29 | size_t racy_queue_size() { 30 | std::unique_lock lock(queue_mutex); 31 | return tasks.size(); 32 | } 33 | 34 | private: 35 | void start_worker(std::unique_lock const& lock); 36 | 37 | std::thread worker; 38 | // the task queue 39 | std::queue< std::packaged_task > tasks; 40 | // stop signal 41 | bool m_stop = false; 42 | // task thread priorities 43 | bool m_prio_low = false; 44 | int m_nice = 0; 45 | 46 | // synchronization 47 | std::mutex queue_mutex; 48 | std::condition_variable condition_producers; 49 | std::condition_variable condition_consumers; 50 | 51 | std::mutex in_flight_mutex; 52 | std::condition_variable in_flight_condition; 53 | std::atomic in_flight; 54 | 55 | struct handle_in_flight_decrement 56 | { 57 | ThreadTask& m_tp; 58 | 59 | handle_in_flight_decrement(ThreadTask& tp) 60 | : m_tp(tp) 61 | { } 62 | 63 | ~handle_in_flight_decrement() 64 | { 65 | std::size_t prev 66 | = std::atomic_fetch_sub_explicit(&m_tp.in_flight, 67 | std::size_t(1), 68 | std::memory_order_acq_rel); 69 | if (prev == 1) 70 | { 71 | std::unique_lock guard(m_tp.in_flight_mutex); 72 | m_tp.in_flight_condition.notify_all(); 73 | } 74 | } 75 | }; 76 | }; 77 | 78 | // the constructor just launches some amount of workers 79 | template 80 | inline ThreadTask::ThreadTask(int nice, bool low_prio) 81 | : m_prio_low { low_prio }, 82 | m_nice { nice }, 83 | in_flight(0) 84 | { 85 | std::unique_lock lock(this->queue_mutex); 86 | start_worker(lock); 87 | } 88 | 89 | // add new work item to the pool 90 | template 91 | inline auto ThreadTask::enqueue(Function&& func) -> std::future 92 | { 93 | auto task = std::packaged_task(std::move(func)); 94 | std::future res = task.get_future(); 95 | 96 | std::unique_lock lock(queue_mutex); 97 | // don't allow enqueueing after stopping the pool 98 | if (m_stop) 99 | throw std::runtime_error("enqueue on stopped ThreadTask"); 100 | 101 | tasks.push(std::move(task)); 102 | std::atomic_fetch_add_explicit(&in_flight, 103 | std::size_t(1), 104 | std::memory_order_relaxed); 105 | condition_consumers.notify_one(); 106 | 107 | return res; 108 | } 109 | 110 | 111 | // the destructor joins all threads 112 | template 113 | inline ThreadTask::~ThreadTask() 114 | { 115 | std::unique_lock lock(queue_mutex); 116 | m_stop = true; 117 | condition_consumers.notify_all(); 118 | condition_producers.notify_all(); 119 | condition_consumers.wait(lock); //, [this]{ return this->worker.joinable(); }); 120 | this->worker.join(); 121 | assert(in_flight == 0); 122 | } 123 | 124 | template 125 | inline void ThreadTask::wait_until_empty() 126 | { 127 | std::unique_lock lock(this->queue_mutex); 128 | this->condition_producers.wait(lock, 129 | [this]{ return this->tasks.empty(); }); 130 | } 131 | 132 | template 133 | inline void ThreadTask::wait_until_nothing_in_flight() 134 | { 135 | std::unique_lock lock(this->in_flight_mutex); 136 | this->in_flight_condition.wait(lock, 137 | [this]{ return this->in_flight == 0; }); 138 | } 139 | 140 | template 141 | inline void ThreadTask::start_worker( 142 | std::unique_lock const &lock) 143 | { 144 | assert(lock.owns_lock() && lock.mutex() == &this->queue_mutex); 145 | (void)lock; 146 | 147 | this->worker = std::thread([this] { 148 | pthread_setschedprio(pthread_self(), this->m_nice); 149 | for(;;) 150 | { 151 | std::packaged_task task; 152 | bool notify; 153 | 154 | { 155 | std::unique_lock lock(this->queue_mutex); 156 | this->condition_consumers.wait(lock, 157 | [this]{ 158 | return this->m_stop || !this->tasks.empty(); 159 | }); 160 | 161 | // deal with shutdown 162 | if ((this->m_stop && this->tasks.empty())) 163 | { 164 | // detach this worker, effectively marking it stopped 165 | //this->worker.detach(); 166 | this->condition_consumers.notify_all(); 167 | return; 168 | } 169 | else if (!this->tasks.empty()) 170 | { 171 | task = std::move(this->tasks.front()); 172 | this->tasks.pop(); 173 | notify = this->tasks.empty(); 174 | } 175 | else 176 | continue; 177 | } 178 | 179 | handle_in_flight_decrement guard(*this); 180 | 181 | if (notify) 182 | { 183 | std::unique_lock lock(this->queue_mutex); 184 | condition_producers.notify_all(); 185 | } 186 | 187 | task(); 188 | } 189 | }); 190 | } 191 | 192 | } // tinykvm 193 | -------------------------------------------------------------------------------- /lib/tinykvm/vcpu.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "common.hpp" 3 | #include "forward.hpp" 4 | 5 | namespace tinykvm 6 | { 7 | struct Machine; 8 | 9 | struct vCPU 10 | { 11 | void init(int id, Machine&, const MachineOptions&); 12 | void smp_init(int id, Machine &); 13 | void deinit(); 14 | tinykvm_x86regs& registers(); 15 | const tinykvm_x86regs& registers() const; 16 | void set_registers(const struct tinykvm_x86regs &); 17 | tinykvm_fpuregs fpu_registers() const; 18 | void set_fpu_registers(const struct tinykvm_fpuregs &); 19 | const struct kvm_sregs& get_special_registers() const; 20 | struct kvm_sregs& get_special_registers(); 21 | void set_special_registers(const struct kvm_sregs &); 22 | 23 | void run(uint32_t tix); 24 | long run_once(); 25 | void stop() { stopped = true; } 26 | void disable_timer(); 27 | std::string_view io_data() const; 28 | 29 | bool is_usermode() const; 30 | bool is_kernelmode() const; 31 | void enter_usermode(); 32 | 33 | void print_registers() const; 34 | void handle_exception(uint8_t intr); 35 | unsigned exception_extra_offset(uint8_t intr); 36 | void decrement_smp_count(); 37 | 38 | auto& machine() { return *m_machine; } 39 | const auto& machine() const { return *m_machine; } 40 | 41 | void set_vcpu_table_at(unsigned index, int value); 42 | bool timed_out() const; 43 | 44 | int fd = -1; 45 | int cpu_id = 0; 46 | bool stopped = true; 47 | uint8_t current_exception = 0; 48 | uint32_t timer_ticks = 0; 49 | void* timer_id = nullptr; 50 | 51 | private: 52 | struct kvm_run* kvm_run = nullptr; 53 | Machine* m_machine = nullptr; 54 | 55 | uint64_t vcpu_table_addr() const noexcept; 56 | }; 57 | 58 | } // namespace tinykvm 59 | -------------------------------------------------------------------------------- /lib/tinykvm/virtual_mem.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | namespace tinykvm { 5 | 6 | struct VirtualMem { 7 | uint64_t physbase; 8 | char * ptr; 9 | uint64_t size; 10 | 11 | VirtualMem(uint64_t phys, char* p, uint64_t s) 12 | : physbase(phys), ptr(p), size(s) {} 13 | 14 | static VirtualMem New(uint64_t physical, char* ptr, uint64_t size) { 15 | return VirtualMem { physical, ptr, size }; 16 | } 17 | }; 18 | 19 | } 20 | -------------------------------------------------------------------------------- /src/assert.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void do_kassert(bool pred, const char* text, int line, const char* file, const char* function) 4 | { 5 | if (pred) return; 6 | fprintf(stderr, "%s:%d assertion failed in function %s: %s\n", 7 | file, line, function, text); 8 | std::abort(); 9 | } 10 | 11 | #define KASSERT(pred) \ 12 | do_kassert(pred, #pred, __LINE__, __FILE__, __FUNCTION__) 13 | -------------------------------------------------------------------------------- /src/load_file.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | std::vector load_file(const std::string& filename) 4 | { 5 | size_t size = 0; 6 | FILE* f = fopen(filename.c_str(), "rb"); 7 | if (f == NULL) throw std::runtime_error("Could not open file: " + filename); 8 | 9 | fseek(f, 0, SEEK_END); 10 | size = ftell(f); 11 | fseek(f, 0, SEEK_SET); 12 | 13 | std::vector result(size); 14 | if (size != fread(result.data(), 1, size, f)) 15 | { 16 | fclose(f); 17 | throw std::runtime_error("Error when reading from file: " + filename); 18 | } 19 | fclose(f); 20 | return result; 21 | } 22 | -------------------------------------------------------------------------------- /src/simple.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "assert.hpp" 6 | #include "load_file.hpp" 7 | 8 | #include 9 | #define GUEST_MEMORY 0x80000000 /* 2GB memory */ 10 | #define GUEST_WORK_MEM 1024UL * 1024*1024 /* MB working mem */ 11 | 12 | static uint64_t verify_exists(tinykvm::Machine& vm, const char* name) 13 | { 14 | uint64_t addr = vm.address_of(name); 15 | if (addr == 0x0) { 16 | // fprintf(stderr, "Error: '%s' is missing\n", name); 17 | // exit(1); 18 | } 19 | return addr; 20 | } 21 | 22 | inline timespec time_now(); 23 | inline long nanodiff(timespec start_time, timespec end_time); 24 | 25 | int main(int argc, char** argv) 26 | { 27 | if (argc < 2) { 28 | fprintf(stderr, "Missing argument: 64-bit ELF binary\n"); 29 | exit(1); 30 | } 31 | std::vector binary; 32 | std::vector args; 33 | binary = load_file(argv[1]); 34 | 35 | const tinykvm::DynamicElf dyn_elf = tinykvm::is_dynamic_elf( 36 | std::string_view{(const char*)binary.data(), binary.size()}); 37 | if (dyn_elf.is_dynamic) { 38 | // Add ld-linux.so.2 as first argument 39 | static const std::string ld_linux_so = "/lib64/ld-linux-x86-64.so.2"; 40 | binary = load_file(ld_linux_so); 41 | args.push_back(ld_linux_so); 42 | } 43 | 44 | for (int i = 1; i < argc; i++) 45 | { 46 | args.push_back(argv[i]); 47 | } 48 | 49 | tinykvm::Machine::init(); 50 | 51 | tinykvm::Machine::install_unhandled_syscall_handler( 52 | [] (tinykvm::vCPU& cpu, unsigned scall) { 53 | switch (scall) { 54 | case 0x10000: 55 | cpu.stop(); 56 | break; 57 | case 0x10001: 58 | throw "Unimplemented"; 59 | case 0x10707: 60 | throw "Unimplemented"; 61 | default: 62 | printf("Unhandled system call: %u\n", scall); 63 | auto regs = cpu.registers(); 64 | regs.rax = -ENOSYS; 65 | cpu.set_registers(regs); 66 | } 67 | }); 68 | 69 | const std::vector remappings { 70 | { 71 | .phys = 0x0, 72 | .virt = 0xC000000000, 73 | .size = 512ULL << 20, 74 | } 75 | }; 76 | 77 | /* Setup */ 78 | const tinykvm::MachineOptions options { 79 | .max_mem = GUEST_MEMORY, 80 | .max_cow_mem = GUEST_WORK_MEM, 81 | .reset_free_work_mem = 0, 82 | .vmem_base_address = uint64_t(getenv("UPPER") != nullptr ? 0x40000000 : 0x0), 83 | .remappings {remappings}, 84 | .verbose_loader = true, 85 | .hugepages = (getenv("HUGE") != nullptr), 86 | .relocate_fixed_mmap = (getenv("GO") == nullptr), 87 | .executable_heap = dyn_elf.is_dynamic, 88 | }; 89 | tinykvm::Machine master_vm {binary, options}; 90 | //master_vm.print_pagetables(); 91 | if (dyn_elf.is_dynamic) { 92 | static const std::vector allowed_readable_paths({ 93 | argv[1], 94 | // Add all common standard libraries to the list of allowed readable paths 95 | "/lib64/ld-linux-x86-64.so.2", 96 | "/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2", 97 | "/lib/x86_64-linux-gnu/libgcc_s.so.1", 98 | "/lib/x86_64-linux-gnu/libc.so.6", 99 | "/lib/x86_64-linux-gnu/libm.so.6", 100 | "/lib/x86_64-linux-gnu/libpthread.so.0", 101 | "/lib/x86_64-linux-gnu/libdl.so.2", 102 | "/lib/x86_64-linux-gnu/libstdc++.so.6", 103 | "/lib/x86_64-linux-gnu/librt.so.1", 104 | "/lib/x86_64-linux-gnu/libz.so.1", 105 | "/lib/x86_64-linux-gnu/libexpat.so.1", 106 | "/lib/x86_64-linux-gnu/glibc-hwcaps/x86-64-v2/libstdc++.so.6", 107 | "/lib/x86_64-linux-gnu/glibc-hwcaps/x86-64-v3/libstdc++.so.6", 108 | "/lib/x86_64-linux-gnu/glibc-hwcaps/x86-64-v4/libstdc++.so.6", 109 | }); 110 | master_vm.fds().set_open_readable_callback( 111 | [&] (std::string& path) -> bool { 112 | return std::find(allowed_readable_paths.begin(), 113 | allowed_readable_paths.end(), path) != allowed_readable_paths.end(); 114 | } 115 | ); 116 | } 117 | 118 | master_vm.setup_linux( 119 | args, 120 | {"LC_TYPE=C", "LC_ALL=C", "USER=root"}); 121 | 122 | const auto rsp = master_vm.stack_address(); 123 | 124 | uint64_t call_addr = verify_exists(master_vm, "my_backend"); 125 | 126 | /* Remote debugger session */ 127 | if (getenv("DEBUG")) 128 | { 129 | auto* vm = &master_vm; 130 | tinykvm::tinykvm_x86regs regs; 131 | 132 | if (getenv("VMCALL")) { 133 | master_vm.run(); 134 | } 135 | if (getenv("FORK")) { 136 | master_vm.prepare_copy_on_write(); 137 | vm = new tinykvm::Machine {master_vm, options}; 138 | vm->setup_call(regs, call_addr, rsp); 139 | vm->set_registers(regs); 140 | } else if (getenv("VMCALL")) { 141 | master_vm.setup_call(regs, call_addr, rsp); 142 | master_vm.set_registers(regs); 143 | } 144 | 145 | tinykvm::RSP server {*vm, 2159}; 146 | printf("Waiting for connection localhost:2159...\n"); 147 | auto client = server.accept(); 148 | if (client != nullptr) { 149 | /* Debugging session of _start -> main() */ 150 | printf("Connected\n"); 151 | try { 152 | //client->set_verbose(true); 153 | while (client->process_one()); 154 | } catch (const tinykvm::MachineException& e) { 155 | printf("EXCEPTION %s: %lu\n", e.what(), e.data()); 156 | vm->print_registers(); 157 | } 158 | } else { 159 | /* Resume execution normally */ 160 | vm->run(); 161 | } 162 | /* Exit after debugging */ 163 | return 0; 164 | } 165 | 166 | asm("" ::: "memory"); 167 | auto t0 = time_now(); 168 | asm("" ::: "memory"); 169 | 170 | /* Normal execution of _start -> main() */ 171 | try { 172 | master_vm.run(); 173 | } catch (const tinykvm::MachineException& me) { 174 | master_vm.print_registers(); 175 | fprintf(stderr, "Machine exception: %s Data: 0x%lX\n", me.what(), me.data()); 176 | throw; 177 | } catch (...) { 178 | master_vm.print_registers(); 179 | throw; 180 | } 181 | 182 | asm("" ::: "memory"); 183 | auto t1 = time_now(); 184 | asm("" ::: "memory"); 185 | 186 | if (call_addr == 0x0) { 187 | double t = nanodiff(t0, t1) / 1e9; 188 | printf("Time: %fs Return value: %ld\n", t, master_vm.return_value()); 189 | return 0; 190 | } 191 | 192 | /* Fork master VM */ 193 | master_vm.prepare_copy_on_write(); 194 | tinykvm::Machine vm{master_vm, options}; 195 | 196 | /* Make a VM function call */ 197 | tinykvm::tinykvm_regs regs; 198 | vm.setup_call(regs, call_addr, rsp); 199 | //regs.rip = vm.entry_address_if_usermode(); 200 | vm.set_registers(regs); 201 | printf("Calling fork at 0x%lX\n", call_addr); 202 | vm.run(8.0f); 203 | 204 | /* Re-run */ 205 | //vm.reset_to(master_vm, options); 206 | 207 | vm.setup_call(regs, call_addr, rsp); 208 | //regs.rip = vm.entry_address_if_usermode(); 209 | vm.set_registers(regs); 210 | printf("Calling fork at 0x%lX\n", call_addr); 211 | vm.run(8.0f); 212 | } 213 | 214 | timespec time_now() 215 | { 216 | timespec t; 217 | clock_gettime(CLOCK_THREAD_CPUTIME_ID, &t); 218 | return t; 219 | } 220 | long nanodiff(timespec start_time, timespec end_time) 221 | { 222 | return (end_time.tv_sec - start_time.tv_sec) * (long)1e9 + (end_time.tv_nsec - start_time.tv_nsec); 223 | } 224 | -------------------------------------------------------------------------------- /src/storage.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "assert.hpp" 5 | #include "load_file.hpp" 6 | #include "timing.hpp" 7 | 8 | #include 9 | #define GUEST_MEMORY 0x40000000 /* 1024MB memory */ 10 | #define GUEST_WORK_MEM 256UL * 1024*1024 /* MB working mem */ 11 | 12 | static double timed_action(std::function action) 13 | { 14 | asm("" ::: "memory"); 15 | auto t0 = time_now(); 16 | asm("" ::: "memory"); 17 | 18 | action(); 19 | 20 | asm("" ::: "memory"); 21 | auto t1 = time_now(); 22 | asm("" ::: "memory"); 23 | 24 | return nanodiff(t0, t1) / 1e9; 25 | } 26 | 27 | int main(int argc, char** argv) 28 | { 29 | if (argc < 3) { 30 | fprintf(stderr, "%s [guest ELF] [storage ELF]\n", argv[0]); 31 | exit(1); 32 | } 33 | const auto guest_binary = load_file(argv[1]); 34 | const auto storage_binary = load_file(argv[2]); 35 | printf(">>> Guest: %s >>> Storage: %s\n", argv[1], argv[2]); 36 | 37 | tinykvm::Machine::init(); 38 | 39 | tinykvm::Machine::install_unhandled_syscall_handler( 40 | [] (tinykvm::vCPU& cpu, unsigned scall) { 41 | switch (scall) { 42 | case 0x10000: 43 | cpu.stop(); 44 | break; 45 | case 0x10001: 46 | throw "Unimplemented"; 47 | case 0x10707: 48 | throw "Unimplemented"; 49 | default: 50 | printf("Unhandled system call: %u\n", scall); 51 | auto regs = cpu.registers(); 52 | regs.rax = -ENOSYS; 53 | cpu.set_registers(regs); 54 | } 55 | }); 56 | 57 | /* Setup */ 58 | const tinykvm::MachineOptions options { 59 | .max_mem = GUEST_MEMORY, 60 | .max_cow_mem = GUEST_WORK_MEM, 61 | .reset_free_work_mem = 0, 62 | .verbose_loader = false, 63 | .hugepages = (getenv("HUGE") != nullptr), 64 | }; 65 | tinykvm::Machine master_vm {guest_binary, options}; 66 | master_vm.setup_linux( 67 | {"main", "Hello Main World!"}, 68 | {"LC_TYPE=C", "LC_ALL=C", "USER=root"}); 69 | //master_vm.print_pagetables(); 70 | 71 | /* Create storage VM */ 72 | const tinykvm::MachineOptions storage_options { 73 | .max_mem = 256ULL << 20, // MB 74 | .vmem_base_address = 1ULL << 30, // 1GB 75 | .verbose_loader = false, 76 | .hugepages = (getenv("HUGE") != nullptr), 77 | }; 78 | tinykvm::Machine storage_vm{storage_binary, storage_options}; 79 | storage_vm.setup_linux( 80 | {"storage", "Hello Storage World!"}, 81 | {"LC_TYPE=C", "LC_ALL=C", "USER=root"}); 82 | storage_vm.run(5.0f); 83 | 84 | master_vm.remote_connect(storage_vm); 85 | 86 | auto tdiff = timed_action([&] { 87 | try { 88 | master_vm.run(); 89 | } catch (const tinykvm::MachineException& e) { 90 | fprintf(stderr, "Exception: %s with data 0x%lX\n", 91 | e.what(), e.data()); 92 | } catch (const tinykvm::MemoryException& e) { 93 | fprintf(stderr, "Exception: %s at 0x%lX (size=%lu)\n", 94 | e.what(), e.data(), e.size()); 95 | } 96 | }); 97 | printf("Call time: %fms Return value: %ld\n", tdiff*1e3, master_vm.return_value()); 98 | 99 | /* Allow forking the master VM */ 100 | master_vm.prepare_copy_on_write(GUEST_WORK_MEM, 1ULL << 30); 101 | 102 | /* Fork the master VM, and install remote memory */ 103 | tinykvm::Machine vm{master_vm, options}; 104 | assert(vm.is_remote_connected()); 105 | 106 | /* Call 'do_calculation' with 21 as argument */ 107 | const auto call_addr = vm.address_of("do_calculation"); 108 | auto fork_tdiff = timed_action([&] { 109 | vm.timed_vmcall(call_addr, 5.0f, 21); 110 | }); 111 | printf("Fork call time: %fms Return value: %ld\n", fork_tdiff*1e3, vm.return_value()); 112 | } 113 | -------------------------------------------------------------------------------- /src/tests.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "assert.hpp" 5 | #include "load_file.hpp" 6 | 7 | #include 8 | #include 9 | #define GUEST_MEMORY 0x10000000 /* 256MB memory */ 10 | #define GUEST_WORK_MEM 2*1024*1024 /* 2MB working memory */ 11 | 12 | std::vector load_file(const std::string& filename); 13 | static void test_master_vm(tinykvm::Machine&); 14 | static void test_forking(tinykvm::Machine&); 15 | static void test_copy_on_write(tinykvm::Machine&); 16 | static void test_vcpu(tinykvm::Machine&); 17 | 18 | static void verify_exists(tinykvm::Machine& vm, const char* name) 19 | { 20 | if (vm.address_of(name) == 0x0) { 21 | fprintf(stderr, "Error: '%s' is missing\n", name); 22 | exit(1); 23 | } 24 | } 25 | 26 | int main(int argc, char** argv) 27 | { 28 | if (argc < 2) { 29 | fprintf(stderr, "Missing argument: 64-bit ELF binary\n"); 30 | exit(1); 31 | } 32 | const auto binary = load_file(argv[1]); 33 | 34 | tinykvm::Machine::init(); 35 | 36 | /* Setup */ 37 | const tinykvm::MachineOptions options { 38 | .max_mem = GUEST_MEMORY, 39 | .max_cow_mem = GUEST_WORK_MEM, 40 | .verbose_loader = false, 41 | }; 42 | tinykvm::Machine master_vm {binary, options}; 43 | master_vm.setup_linux( 44 | {"kvmtest", "Hello World!\n"}, 45 | {"LC_TYPE=C", "LC_ALL=C", "USER=root"}); 46 | const auto rsp = master_vm.stack_address(); 47 | 48 | verify_exists(master_vm, "test_return"); 49 | verify_exists(master_vm, "test_ud2"); 50 | verify_exists(master_vm, "test_read"); 51 | verify_exists(master_vm, "test_copy_on_write"); 52 | verify_exists(master_vm, "write_value"); 53 | verify_exists(master_vm, "test_is_value"); 54 | verify_exists(master_vm, "test_loop"); 55 | verify_exists(master_vm, "test_vcpu"); 56 | 57 | /* Remote debugger session */ 58 | if (getenv("DEBUG")) 59 | { 60 | auto* vm = &master_vm; 61 | tinykvm::tinykvm_x86regs regs; 62 | 63 | if (getenv("VMCALL")) { 64 | master_vm.run(); 65 | } 66 | if (getenv("FORK")) { 67 | master_vm.prepare_copy_on_write(); 68 | vm = new tinykvm::Machine {master_vm, options}; 69 | vm->setup_call(regs, vm->address_of("test_return"), rsp); 70 | vm->set_registers(regs); 71 | } else { 72 | master_vm.setup_call(regs, master_vm.address_of("test_return"), rsp); 73 | master_vm.set_registers(regs); 74 | } 75 | 76 | tinykvm::RSP server {*vm, 2159}; 77 | printf("Waiting for connection localhost:2159...\n"); 78 | auto client = server.accept(); 79 | if (client != nullptr) { 80 | /* Debugging session of _start -> main() */ 81 | printf("Connected\n"); 82 | try { 83 | //client->set_verbose(true); 84 | while (client->process_one()); 85 | } catch (const tinykvm::MachineException& e) { 86 | printf("EXCEPTION %s: %lu\n", e.what(), e.data()); 87 | vm->print_registers(); 88 | } 89 | } else { 90 | /* Resume execution normally */ 91 | vm->run(); 92 | } 93 | /* Exit after debugging */ 94 | return 0; 95 | } 96 | else { 97 | /* Normal execution of _start -> main() */ 98 | master_vm.run(); 99 | } 100 | /* Verify VM exit status */ 101 | auto regs = master_vm.registers(); 102 | KASSERT(regs.rdi == 666); 103 | printf("*** Program startup OK\n"); 104 | 105 | printf("--- Beginning Master VM tests ---\n"); 106 | test_master_vm(master_vm); 107 | printf("*** Master VM OK\n"); 108 | 109 | printf("--- Beginning VM vCPU tests ---\n"); 110 | for (size_t i = 0; i < 100; i++) { 111 | test_vcpu(master_vm); 112 | } 113 | printf("*** VM vCPU OK\n"); 114 | 115 | /* Make the master VM able to mass-produce copies. 116 | Make room for 16 CoW-pages enabling usage afterwards. */ 117 | master_vm.prepare_copy_on_write(65536); 118 | 119 | printf("--- Beginning CoW VM master tests ---\n"); 120 | test_master_vm(master_vm); 121 | printf("*** VM CoW master tests OK\n"); 122 | 123 | printf("--- Beginning VM fork tests ---\n"); 124 | for (size_t i = 0; i < 100; i++) { 125 | test_forking(master_vm); 126 | } 127 | printf("*** VM forking OK\n"); 128 | 129 | printf("--- Beginning VM copy-on-write tests ---\n"); 130 | for (size_t i = 0; i < 100; i++) { 131 | test_copy_on_write(master_vm); 132 | } 133 | printf("*** VM copy-on-write OK\n"); 134 | 135 | printf("Nice! Tests passed.\n"); 136 | return 0; 137 | } 138 | 139 | void test_master_vm(tinykvm::Machine& vm) 140 | { 141 | /* Call into master VM */ 142 | vm.vmcall("test_return"); 143 | KASSERT(vm.return_value() == 666); 144 | try { 145 | vm.vmcall("test_ud2"); 146 | } catch (const tinykvm::MachineException& me) { 147 | /* Allow invalid opcode exception */ 148 | KASSERT(me.data() == 6); 149 | } 150 | vm.vmcall("test_syscall"); 151 | KASSERT(vm.return_value() == 555); 152 | vm.vmcall("test_read"); 153 | KASSERT(vm.return_value() == 200); 154 | vm.vmcall("test_malloc"); 155 | KASSERT(vm.return_value() != 0); 156 | 157 | printf("--- Testing endless loop ---\n"); 158 | /* To test endless loops we rely on the Machine to 159 | throw an exact exception, namely tinykvm::MachineTimeoutException. 160 | To make sure this happens we throw another exception right 161 | after the timed_vmcall, which throws a runtime_error instead. 162 | The program will fail with an uncaught exception if the timeout 163 | doesn't happen naturally after 1 second. */ 164 | try { 165 | const auto addr = vm.address_of("test_loop"); 166 | vm.timed_vmcall(addr, 1.0); 167 | throw std::runtime_error("Timeout exception failed"); 168 | } catch (const tinykvm::MachineTimeoutException& me) { 169 | KASSERT(me.seconds() == 1.0); 170 | printf("*** Timeout OK\n"); 171 | } 172 | 173 | printf("--- Testing multi-processing ---\n"); 174 | //vm.print_exception_handlers(); 175 | auto tr_addr = vm.address_of("test_read"); 176 | auto tret_addr = vm.address_of("test_return"); 177 | vm.smp().timed_smpcall(20, 0x200000, 0x10000, tr_addr, 2.0f); 178 | auto results = vm.smp().gather_return_values(); 179 | for (const auto res : results) { 180 | KASSERT(res == 200); 181 | } 182 | /* Run SMP vCPUs a 100 times */ 183 | for (int i = 0; i < 100; i++) { 184 | vm.smp().timed_smpcall(2, 0x200000, 0x10000, tret_addr, 2.0f); 185 | } 186 | /* Run test_read (200) */ 187 | vm.smp().timed_smpcall(8, 0x200000, 0x10000, tr_addr, 2.0f); 188 | /* Run test_return (666) */ 189 | vm.smp().timed_smpcall(8, 0x200000, 0x10000, tret_addr, 2.0f); 190 | results = vm.smp().gather_return_values(8); 191 | for (const auto res : results) { 192 | KASSERT(res == 666); 193 | } 194 | printf("*** Multi-processing OK\n"); 195 | } 196 | 197 | void test_forking(tinykvm::Machine& master_vm) 198 | { 199 | /* Create VM fork */ 200 | const tinykvm::MachineOptions options { 201 | .max_mem = GUEST_MEMORY, 202 | .max_cow_mem = GUEST_WORK_MEM, 203 | .verbose_loader = false 204 | }; 205 | tinykvm::Machine vm {master_vm, options}; 206 | 207 | /* Call into VM */ 208 | for (size_t i = 0; i < 20; i++) 209 | { 210 | vm.vmcall("test_return"); 211 | KASSERT(vm.return_value() == 666); 212 | vm.set_printer([] (auto, size_t) {}); 213 | try { 214 | vm.vmcall("test_ud2"); 215 | } catch (const tinykvm::MachineException& me) { 216 | /* Allow invalid opcode exception */ 217 | KASSERT(me.data() == 6); 218 | try { 219 | /* Retry exception */ 220 | vm.run(); 221 | } catch (const tinykvm::MachineException& me) { 222 | /* Allow invalid opcode exception */ 223 | KASSERT(me.data() == 6); 224 | } 225 | } 226 | vm.set_printer(); 227 | vm.vmcall("test_syscall"); 228 | KASSERT(vm.return_value() == 555); 229 | vm.vmcall("test_read"); 230 | KASSERT(vm.return_value() == 200); 231 | vm.vmcall("test_malloc"); 232 | KASSERT(vm.return_value() != 0); 233 | static int run_once = 0; 234 | if (run_once++ == 0) try { 235 | const auto addr = vm.address_of("test_loop"); 236 | vm.timed_vmcall(addr, 1.0); 237 | throw std::runtime_error("Timeout exception failed"); 238 | } catch (const tinykvm::MachineTimeoutException& me) { 239 | KASSERT(me.seconds() == 1.0); 240 | } 241 | } 242 | 243 | /* Reset and call into VM */ 244 | for (size_t i = 0; i < 20; i++) 245 | { 246 | vm.reset_to(master_vm, options); 247 | vm.vmcall("test_return"); 248 | KASSERT(vm.return_value() == 666); 249 | vm.set_printer([] (auto, size_t) {}); 250 | try { 251 | vm.vmcall("test_ud2"); 252 | } catch (const tinykvm::MachineException& me) { 253 | /* Allow invalid opcode exception */ 254 | KASSERT(me.data() == 6); 255 | try { 256 | /* Retry exception */ 257 | vm.run(); 258 | } catch (const tinykvm::MachineException& me) { 259 | /* Allow invalid opcode exception */ 260 | KASSERT(me.data() == 6); 261 | } 262 | } 263 | vm.set_printer(); 264 | vm.vmcall("test_syscall"); 265 | KASSERT(vm.return_value() == 555); 266 | vm.vmcall("test_read"); 267 | KASSERT(vm.return_value() == 200); 268 | vm.vmcall("test_malloc"); 269 | KASSERT(vm.return_value() != 0); 270 | /* Timeouts take a second, but should result in exception. */ 271 | static int run_once = 0; 272 | if (run_once++ == 0) try { 273 | printf("Testing forked execution timeout\n"); 274 | const auto addr = vm.address_of("test_loop"); 275 | vm.timed_vmcall(addr, 1.0); 276 | throw std::runtime_error("Timeout exception failed"); 277 | } catch (const tinykvm::MachineTimeoutException& me) { 278 | KASSERT(me.seconds() == 1.0); 279 | printf("Forked execution timeout OK\n"); 280 | } 281 | } 282 | } 283 | 284 | void test_copy_on_write(tinykvm::Machine& master_vm) 285 | { 286 | const tinykvm::MachineOptions options { 287 | .max_mem = GUEST_MEMORY, 288 | .max_cow_mem = GUEST_WORK_MEM, 289 | .verbose_loader = false 290 | }; 291 | tinykvm::Machine vm {master_vm, options}; 292 | 293 | for (size_t i = 0; i < 10; i++) 294 | { 295 | try { 296 | vm.reset_to(master_vm, options); 297 | vm.vmcall("test_copy_on_write"); 298 | KASSERT(vm.return_value() == 666); 299 | vm.vmcall("test_malloc"); 300 | KASSERT(vm.return_value() != 0); 301 | //vm.vmcall("test_expensive"); 302 | //KASSERT(vm.return_value() != 0); 303 | 304 | vm.vmcall("write_value", 10 + i); 305 | KASSERT(vm.return_value() == 10 + i); 306 | vm.vmcall("test_is_value", 10 + i); 307 | KASSERT(vm.return_value() == 666); 308 | } catch (...) { 309 | vm.print_pagetables(); 310 | vm.print_registers(); 311 | fprintf(stderr, "first vm.reset_to(vm) failed\n"); 312 | throw; 313 | } 314 | /* We have to acknowledge that the parent VM for 'vm' 315 | falls out-of-scope here, which is dangerous, but 316 | *must* be supported. */ 317 | } 318 | } 319 | 320 | void test_vcpu(tinykvm::Machine& master_vm) 321 | { 322 | for (size_t i = 0; i < 10; i++) 323 | { 324 | try { 325 | master_vm.cpu().set_vcpu_table_at(0, i); 326 | master_vm.vmcall("test_vcpu"); 327 | KASSERT(master_vm.return_value() == i); 328 | } catch (...) { 329 | //master_vm.print_pagetables(); 330 | master_vm.print_registers(); 331 | throw; 332 | } 333 | } 334 | } 335 | -------------------------------------------------------------------------------- /src/timing.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | inline timespec time_now() 4 | { 5 | timespec t; 6 | clock_gettime(CLOCK_THREAD_CPUTIME_ID, &t); 7 | return t; 8 | } 9 | inline long nanodiff(timespec start_time, timespec end_time) 10 | { 11 | return (end_time.tv_sec - start_time.tv_sec) * (long)1e9 + (end_time.tv_nsec - start_time.tv_nsec); 12 | } 13 | -------------------------------------------------------------------------------- /tests/run_unit_tests.sh: -------------------------------------------------------------------------------- 1 | FOLDER=build_unittests 2 | set -e 3 | 4 | mkdir -p $FOLDER 5 | pushd $FOLDER 6 | cmake ../unit -DCMAKE_BUILD_TYPE=Debug 7 | make -j4 8 | ctest --verbose . $@ 9 | popd 10 | -------------------------------------------------------------------------------- /tests/unit/.gitignore: -------------------------------------------------------------------------------- 1 | minimal 2 | -------------------------------------------------------------------------------- /tests/unit/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | project(tinykvm_unittests CXX) 3 | 4 | set(CMAKE_CXX_FLAGS "-Wall -Wextra -ggdb3") 5 | 6 | add_subdirectory(../../lib tinykvm) 7 | add_subdirectory(../Catch2 Catch2) 8 | 9 | enable_testing() 10 | 11 | function(add_unit_test NAME) 12 | add_executable(${NAME} 13 | ${ARGN} 14 | codebuilder.cpp 15 | ) 16 | target_link_libraries(${NAME} tinykvm Catch2WithMain) 17 | add_test( 18 | NAME test_${NAME} 19 | COMMAND ${NAME} 20 | ) 21 | endfunction() 22 | 23 | add_unit_test(basic basic.cpp) 24 | add_unit_test(fork fork.cpp) 25 | add_unit_test(mmap mmap.cpp) 26 | add_unit_test(remote remote.cpp) 27 | add_unit_test(reset reset.cpp) 28 | add_unit_test(timeout timeout.cpp) 29 | add_unit_test(tegridy tegridy.cpp) 30 | -------------------------------------------------------------------------------- /tests/unit/basic.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | extern std::vector build_and_load(const std::string& code); 5 | static const uint64_t MAX_MEMORY = 8ul << 20; /* 8MB */ 6 | static const std::vector env { 7 | "LC_TYPE=C", "LC_ALL=C", "USER=root" 8 | }; 9 | 10 | TEST_CASE("Initialize KVM", "[Initialize]") 11 | { 12 | // Create KVM file descriptors etc. 13 | tinykvm::Machine::init(); 14 | } 15 | 16 | TEST_CASE("Instantiate machines", "[Instantiate]") 17 | { 18 | const auto binary = build_and_load(R"M( 19 | int main() { 20 | return 666; 21 | })M"); 22 | 23 | tinykvm::Machine machine { binary, { .max_mem = MAX_MEMORY } }; 24 | 25 | // The starting address is somewhere in the program area 26 | REQUIRE(machine.start_address() > 0x400000); 27 | REQUIRE(machine.stack_address() > machine.start_address()); 28 | } 29 | 30 | TEST_CASE("Runtime setup and execution", "[Output]") 31 | { 32 | const auto binary = build_and_load(R"M( 33 | #include 34 | int main(int argc, char** argv) { 35 | if (strcmp(argv[0], "are we passing this correctly?") == 0) 36 | return 666; 37 | else 38 | return -1; 39 | })M"); 40 | 41 | tinykvm::Machine machine { binary, { .max_mem = MAX_MEMORY } }; 42 | // We need to create a Linux environment for runtimes to work well 43 | machine.setup_linux({"are we passing this correctly?"}, env); 44 | machine.run(2.0f); 45 | 46 | REQUIRE(machine.return_value() == 666); 47 | } 48 | 49 | TEST_CASE("Execution timeout", "[Output]") 50 | { 51 | const auto binary = build_and_load(R"M( 52 | #include 53 | int main() { 54 | while (1); 55 | })M"); 56 | 57 | tinykvm::Machine machine { binary, { .max_mem = MAX_MEMORY } }; 58 | // We need to create a Linux environment for runtimes to work well 59 | machine.setup_linux({"are we passing this correctly?"}, env); 60 | REQUIRE_THROWS([&] { 61 | machine.run(1.0f); 62 | }()); 63 | } 64 | 65 | TEST_CASE("Catch output from write system call", "[Output]") 66 | { 67 | bool output_is_hello_world = false; 68 | const auto binary = build_and_load(R"M( 69 | extern long write(int, const void*, unsigned long); 70 | int main() { 71 | write(1, "Hello World!", 12); 72 | })M"); 73 | 74 | tinykvm::Machine machine { binary, { .max_mem = MAX_MEMORY } }; 75 | // We need to create a Linux environment for runtimes to work well 76 | machine.setup_linux({"basic"}, env); 77 | 78 | machine.set_printer([&] (const char* data, size_t size) { 79 | std::string text{data, data + size}; 80 | output_is_hello_world = (text == "Hello World!"); 81 | }); 82 | // Run for at most 4 seconds before giving up 83 | machine.run(4.0f); 84 | 85 | // We require that the write system call forwarded to the printer 86 | // and the data matched 'Hello World!'. 87 | REQUIRE(output_is_hello_world); 88 | } 89 | -------------------------------------------------------------------------------- /tests/unit/codebuilder.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "crc32.hpp" 7 | static constexpr bool VERBOSE_COMPILER = true; 8 | 9 | std::string compile_command(const std::string& cc, 10 | const std::string& outfile, const std::string& codefile, 11 | const std::string& arguments) 12 | { 13 | return cc + " -O2 -static -std=c11 " + arguments + " -x c -o " + outfile + " " + codefile; 14 | } 15 | std::string env_with_default(const char* var, const std::string& defval) { 16 | std::string value = defval; 17 | if (const char* envval = getenv(var); envval) value = std::string(envval); 18 | return value; 19 | } 20 | 21 | std::vector load_file(const std::string& filename) 22 | { 23 | size_t size = 0; 24 | FILE* f = fopen(filename.c_str(), "rb"); 25 | if (f == NULL) throw std::runtime_error("Could not open file: " + filename); 26 | 27 | fseek(f, 0, SEEK_END); 28 | size = ftell(f); 29 | fseek(f, 0, SEEK_SET); 30 | 31 | std::vector result(size); 32 | if (size != fread(result.data(), 1, size, f)) 33 | { 34 | fclose(f); 35 | throw std::runtime_error("Error when reading from file: " + filename); 36 | } 37 | fclose(f); 38 | return result; 39 | } 40 | 41 | std::string build(const std::string& code, const std::string& compiler_args) 42 | { 43 | // Create temporary filenames for code and binary 44 | char code_filename[64]; 45 | strncpy(code_filename, "/tmp/builder-XXXXXX", sizeof(code_filename)); 46 | // Open temporary code file with owner privs 47 | const int code_fd = mkstemp(code_filename); 48 | if (code_fd < 0) { 49 | throw std::runtime_error( 50 | "Unable to create temporary file for code: " + std::string(code_filename)); 51 | } 52 | // Write code to temp code file 53 | const ssize_t code_len = write(code_fd, code.c_str(), code.size()); 54 | if (code_len < (ssize_t) code.size()) { 55 | unlink(code_filename); 56 | throw std::runtime_error("Unable to write to temporary file"); 57 | } 58 | // Compile code to binary file 59 | char bin_filename[256]; 60 | const uint32_t checksum = crc32(code.c_str(), code.size()); 61 | (void)snprintf(bin_filename, sizeof(bin_filename), 62 | "/tmp/binary-%08X", checksum); 63 | 64 | auto cc = env_with_default("CC", "gcc"); 65 | auto command = compile_command(cc, bin_filename, code_filename, compiler_args); 66 | if constexpr (VERBOSE_COMPILER) { 67 | printf("Command: %s\n", command.c_str()); 68 | } 69 | // Compile program 70 | FILE* f = popen(command.c_str(), "r"); 71 | if (f == nullptr) { 72 | unlink(code_filename); 73 | throw std::runtime_error("Unable to compile code"); 74 | } 75 | pclose(f); 76 | unlink(code_filename); 77 | 78 | return bin_filename; 79 | } 80 | std::vector build_and_load(const std::string& code) 81 | { 82 | return load_file(build(code, "")); 83 | } 84 | std::pair< 85 | std::string, 86 | std::vector 87 | > build_and_load(const std::string& code, const std::string& args) 88 | { 89 | const auto file = build(code, args); 90 | return {file, load_file(file)}; 91 | } 92 | -------------------------------------------------------------------------------- /tests/unit/crc32.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | template 5 | inline constexpr auto gen_crc32_table() 6 | { 7 | constexpr auto num_iterations = 8; 8 | auto crc32_table = std::array {}; 9 | 10 | for (auto byte = 0u; byte < crc32_table.size(); ++byte) { 11 | auto crc = byte; 12 | 13 | for (auto i = 0; i < num_iterations; ++i) { 14 | auto mask = -(crc & 1); 15 | crc = (crc >> 1) ^ (POLYNOMIAL & mask); 16 | } 17 | 18 | crc32_table[byte] = crc; 19 | } 20 | return crc32_table; 21 | } 22 | 23 | template 24 | inline constexpr auto crc32(const char* data) 25 | { 26 | constexpr auto crc32_table = gen_crc32_table(); 27 | 28 | auto crc = 0xFFFFFFFFu; 29 | for (auto i = 0u; auto c = data[i]; ++i) { 30 | crc = crc32_table[(crc ^ c) & 0xFF] ^ (crc >> 8); 31 | } 32 | return ~crc; 33 | } 34 | 35 | template 36 | inline constexpr auto crc32(const void* vdata, const size_t len) 37 | { 38 | constexpr auto crc32_table = gen_crc32_table(); 39 | 40 | auto* data = (const uint8_t*) vdata; 41 | auto crc = 0xFFFFFFFFu; 42 | for (auto i = 0u; i < len; ++i) { 43 | crc = crc32_table[(crc ^ data[i]) & 0xFF] ^ (crc >> 8); 44 | } 45 | return ~crc; 46 | } 47 | -------------------------------------------------------------------------------- /tests/unit/mmap.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | extern std::vector build_and_load(const std::string &code); 5 | static const uint64_t MAX_MEMORY = 8ul << 20; /* 8MB */ 6 | static const std::vector env{ 7 | "LC_TYPE=C", "LC_ALL=C", "USER=root"}; 8 | 9 | TEST_CASE("Initialize KVM", "[Initialize]") 10 | { 11 | // Create KVM file descriptors etc. 12 | tinykvm::Machine::init(); 13 | } 14 | 15 | TEST_CASE("Basic mmap and munmap", "[MMAP]") 16 | { 17 | const auto binary = build_and_load(R"M( 18 | #include 19 | #include 20 | int main(int argc, char** argv) { 21 | return 666; 22 | } 23 | void* do_mmap(size_t size) { 24 | void *res = mmap(NULL, size, 0x7, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 25 | //printf("mmap(%zu) = %p\n", size, res); 26 | //fflush(stdout); 27 | return res; 28 | } 29 | int do_munmap(void* addr, size_t size) { 30 | int res = munmap(addr, size); 31 | //printf("munmap(%p, %zu) = %d\n", addr, size, res); 32 | //fflush(stdout); 33 | return res; 34 | } 35 | )M"); 36 | 37 | tinykvm::Machine machine{binary, {.max_mem = MAX_MEMORY}}; 38 | machine.setup_linux({"program"}, env); 39 | //machine.set_verbose_system_calls(true); 40 | machine.run(2.0f); 41 | REQUIRE(machine.return_value() == 666); 42 | 43 | for (int i = 0; i < 10; ++i) 44 | { 45 | // Make a single mmap call 46 | machine.vmcall("do_mmap", 0x1000000); 47 | const uint64_t guest_mmap_addr = machine.return_value(); 48 | REQUIRE(guest_mmap_addr >= machine.mmap_start()); 49 | REQUIRE(guest_mmap_addr != ~0UL); 50 | REQUIRE((guest_mmap_addr & 0xFFF) == 0); 51 | // Since this is a single page, we can use writable_memview 52 | // on a page (which must be sequential in memory) 53 | auto mmap_page = machine.writable_memview(guest_mmap_addr, 0x1000); 54 | REQUIRE(!mmap_page.empty()); 55 | // We can memset the entire page 56 | std::memset(mmap_page.data(), 0xFF, mmap_page.size()); 57 | 58 | // Unmapping and then mapping again should return the same address 59 | machine.vmcall("do_munmap", guest_mmap_addr, 0x1000000); 60 | REQUIRE(machine.return_value() == 0); 61 | 62 | machine.vmcall("do_mmap", 0x1000000); 63 | const uint64_t new_guest_mmap_addr = machine.return_value(); 64 | REQUIRE(new_guest_mmap_addr == guest_mmap_addr); 65 | // Check that the address is still valid 66 | auto mmap_page_after_unmap = machine.writable_memview(new_guest_mmap_addr, 0x1000); 67 | REQUIRE(!mmap_page_after_unmap.empty()); 68 | 69 | // Unmap the page 70 | machine.vmcall("do_munmap", new_guest_mmap_addr, 0x1000000); 71 | } 72 | } 73 | 74 | TEST_CASE("Randomize mappings avoiding collisions", "[MMAP]") 75 | { 76 | const auto binary = build_and_load(R"M( 77 | #include 78 | #include 79 | int main(int argc, char** argv) { 80 | return 666; 81 | } 82 | void* do_mmap(size_t size) { 83 | void *res = mmap(NULL, size, 0x7, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 84 | return res; 85 | } 86 | void* do_fixed_mmap(void* m, size_t size) { 87 | void *res = mmap(m, size, 0x7, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 88 | return res; 89 | } 90 | int do_munmap(void* addr, size_t size) { 91 | int res = munmap(addr, size); 92 | return res; 93 | } 94 | )M"); 95 | 96 | tinykvm::Machine machine{binary, {.max_mem = MAX_MEMORY}}; 97 | machine.setup_linux({"program"}, env); 98 | //machine.set_verbose_system_calls(true); 99 | machine.run(2.0f); 100 | REQUIRE(machine.return_value() == 666); 101 | 102 | struct Mapping 103 | { 104 | uint64_t addr; 105 | size_t size; 106 | 107 | bool exists(const std::vector& mappings) const 108 | { 109 | for (const auto& m : mappings) 110 | { 111 | if (m.addr == addr && m.size == size) 112 | return true; 113 | } 114 | return false; 115 | } 116 | bool within(const std::vector& mappings) const 117 | { 118 | for (const auto& m : mappings) 119 | { 120 | if (addr + size > m.addr && addr < m.addr + m.size) 121 | return true; 122 | } 123 | return false; 124 | } 125 | bool overlaps(const Mapping& other) const 126 | { 127 | return addr < other.addr + other.size && addr + size > other.addr; 128 | } 129 | }; 130 | std::vector mappings; 131 | 132 | // Create a large number of mappings 133 | for (int i = 0; i < 10000; ++i) 134 | { 135 | // Make a random decision to either map or unmap 136 | const int decision = (rand() % 5); 137 | const bool do_mmap = decision == 0; 138 | const bool do_munmap = decision == 1; 139 | const bool do_mmap_within_mapping = decision == 2; 140 | const bool do_munmap_lower_half = decision == 3; 141 | const bool do_munmap_upper_half = decision == 4; 142 | if (do_mmap) 143 | { 144 | // Make a random page-aligned size 145 | const size_t size = (rand() % 1000 + 1) * 0x1000; 146 | machine.vmcall("do_mmap", size); 147 | const uint64_t guest_mmap_addr = machine.return_value(); 148 | REQUIRE(guest_mmap_addr >= machine.mmap_start()); 149 | 150 | // Add the mapping to the list 151 | Mapping m{guest_mmap_addr, size}; 152 | const bool collision = m.within(mappings); 153 | if (collision) 154 | { 155 | fprintf(stderr, "Collision detected: %p -> %p (%zu)\n", 156 | (void*)m.addr, (void*)(m.addr + m.size), m.size); 157 | fprintf(stderr, "Collision with: "); 158 | for (const auto& m2 : mappings) 159 | { 160 | if (m2.overlaps(m)) 161 | fprintf(stderr, "%p -> %p (%zu) ", 162 | (void*)m2.addr, (void*)(m2.addr + m2.size), m2.size); 163 | } 164 | } 165 | REQUIRE(!collision); 166 | mappings.push_back(m); 167 | } 168 | else if (do_munmap) 169 | { 170 | // Unmap a random mapping 171 | if (mappings.empty()) 172 | continue; 173 | const size_t index = rand() % mappings.size(); 174 | const auto& m = mappings[index]; 175 | machine.vmcall("do_munmap", m.addr, m.size); 176 | REQUIRE(machine.return_value() == 0); 177 | 178 | // Remove the mapping from the list 179 | mappings.erase(mappings.begin() + index); 180 | } 181 | else if (do_mmap_within_mapping) 182 | { 183 | if (mappings.empty()) 184 | continue; 185 | const size_t index = rand() % mappings.size(); 186 | const auto& m = mappings[index]; 187 | machine.vmcall("do_fixed_mmap", m.addr, m.size); 188 | const uint64_t guest_mmap_addr = machine.return_value(); 189 | REQUIRE(guest_mmap_addr == m.addr); 190 | } 191 | else if (do_munmap_lower_half || do_munmap_upper_half) 192 | { 193 | if (mappings.empty()) 194 | continue; 195 | const size_t index = rand() % mappings.size(); 196 | auto& m = mappings[index]; 197 | size_t remove_size = std::max(m.size / 2, size_t(0x1000)); 198 | remove_size = (remove_size + 0xFFF) & ~0xFFF; // Align to page size 199 | const size_t new_size = m.size - remove_size; 200 | if (do_munmap_lower_half) 201 | { 202 | machine.vmcall("do_munmap", m.addr, remove_size); 203 | REQUIRE(machine.return_value() == 0); 204 | // Adjust or remove the mapping from the list 205 | if (new_size >= 0x1000) 206 | { 207 | m.addr += remove_size; 208 | m.size = new_size; 209 | } 210 | else 211 | { 212 | // Remove the mapping from the list 213 | mappings.erase(mappings.begin() + index); 214 | } 215 | } 216 | else // Upper half 217 | { 218 | const uint64_t remove_addr = m.addr + new_size; 219 | machine.vmcall("do_munmap", remove_addr, remove_size); 220 | REQUIRE(machine.return_value() == 0); 221 | // Adjust or remove the mapping from the list 222 | if (new_size >= 0x1000) 223 | { 224 | m.size = new_size; 225 | } 226 | else 227 | { 228 | // Remove the mapping from the list 229 | mappings.erase(mappings.begin() + index); 230 | } 231 | } 232 | } 233 | } 234 | } 235 | -------------------------------------------------------------------------------- /tests/unit/remote.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | extern std::pair< 6 | std::string, 7 | std::vector 8 | > build_and_load(const std::string& code, const std::string& args); 9 | static const uint64_t MAX_MEMORY = 8ul << 20; /* 8MB */ 10 | static const uint64_t MAX_COWMEM = 1ul << 20; /* 1MB */ 11 | static const std::vector env { 12 | "LC_TYPE=C", "LC_ALL=C", "USER=root" 13 | }; 14 | 15 | TEST_CASE("Initialize KVM", "[Remote]") 16 | { 17 | // Create KVM file descriptors etc. 18 | tinykvm::Machine::init(); 19 | } 20 | 21 | TEST_CASE("Print from remote VM", "[Remote]") 22 | { 23 | const auto storage_binary = build_and_load(R"M( 24 | extern long write(int, const void*, unsigned long); 25 | int main() { 26 | } 27 | extern void remote_hello_world() { 28 | write(1, "Hello Remote World!", 19); 29 | })M", "-Wl,-Ttext-segment=0x40400000"); 30 | 31 | // Extract storage remote symbols 32 | const std::string command = "objcopy -w --extract-symbol --strip-symbol=!remote* --strip-symbol=* " + storage_binary.first + " storage.syms"; 33 | FILE* f = popen(command.c_str(), "r"); 34 | if (f == nullptr) { 35 | throw std::runtime_error("Unable to extract remote symbols"); 36 | } 37 | pclose(f); 38 | 39 | const auto main_binary = build_and_load(R"M( 40 | extern void remote_hello_world(); 41 | int main() { 42 | remote_hello_world(); 43 | } 44 | )M", "-Wl,--just-symbols=storage.syms"); 45 | 46 | tinykvm::Machine storage { storage_binary.second, { 47 | .max_mem = 16ULL << 20, // MB 48 | .vmem_base_address = 1ULL << 30, // 1GB 49 | } }; 50 | storage.setup_linux({"storage"}, env); 51 | storage.run(4.0f); 52 | 53 | tinykvm::Machine machine { main_binary.second, { .max_mem = MAX_MEMORY } }; 54 | machine.setup_linux({"main"}, env); 55 | machine.remote_connect(storage); 56 | 57 | bool output_is_hello_world = false; 58 | machine.set_printer([&] (const char* data, size_t size) { 59 | std::string text{data, data + size}; 60 | output_is_hello_world = (text == "Hello Remote World!"); 61 | }); 62 | 63 | machine.run(4.0f); 64 | REQUIRE(output_is_hello_world); 65 | } 66 | -------------------------------------------------------------------------------- /tests/unit/reset.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | extern std::vector build_and_load(const std::string& code); 6 | static const uint64_t MAX_MEMORY = 32ul << 20; /* 32MB */ 7 | static const uint64_t MAX_COWMEM = 8ul << 20; /* 8MB */ 8 | static const std::vector env { 9 | "LC_TYPE=C", "LC_ALL=C", "USER=root" 10 | }; 11 | 12 | TEST_CASE("Initialize KVM", "[Initialize]") 13 | { 14 | // Create KVM file descriptors etc. 15 | tinykvm::Machine::init(); 16 | } 17 | 18 | TEST_CASE("Execute function in reset VM", "[Reset]") 19 | { 20 | const auto binary = build_and_load(R"M( 21 | static int a = 0; 22 | static int b = 1; 23 | int main() { 24 | } 25 | extern long get_a() { 26 | int ta = a; 27 | a = 333; 28 | return ta; 29 | } 30 | extern long get_b() { 31 | int tb = b; 32 | b = 666; 33 | return tb; 34 | } 35 | extern long get_mmap(int *z) { 36 | int total = z[100] + z[200] + z[300] + z[400]; 37 | z[100] = 22; 38 | z[200] = 44; 39 | z[300] = 66; 40 | z[400] = 88; 41 | return total; 42 | })M"); 43 | 44 | tinykvm::Machine machine { binary, { .max_mem = MAX_MEMORY } }; 45 | // We need to create a Linux environment for runtimes to work well 46 | machine.setup_linux({"reset"}, env); 47 | 48 | // Run for at most 4 seconds before giving up 49 | machine.run(4.0f); 50 | // Make machine forkable (no working memory) 51 | machine.prepare_copy_on_write(0); 52 | 53 | auto maddr = machine.mmap_allocate(0x1000); 54 | 55 | // Create fork 56 | auto fork = tinykvm::Machine { machine, { 57 | .max_mem = MAX_MEMORY, .max_cow_mem = MAX_COWMEM 58 | } }; 59 | 60 | for (size_t i = 0; i < 15; i++) 61 | { 62 | auto& m = fork; 63 | m.timed_vmcall(m.address_of("get_a"), 2.0f); 64 | REQUIRE(m.return_value() == 0); 65 | 66 | m.timed_vmcall(m.address_of("get_b"), 2.0f); 67 | REQUIRE(m.return_value() == 1); 68 | 69 | m.timed_vmcall(m.address_of("get_mmap"), 2.0f, (uint64_t)maddr); 70 | REQUIRE(m.return_value() == 0); 71 | 72 | m.reset_to(machine, { 73 | .max_mem = MAX_MEMORY, 74 | .max_cow_mem = MAX_COWMEM 75 | }); 76 | } 77 | } 78 | 79 | TEST_CASE("Execute function in VM (crash recovery)", "[Reset]") 80 | { 81 | const auto binary = build_and_load(R"M( 82 | #include 83 | #include 84 | int main() { 85 | printf("Main!\n"); 86 | } 87 | 88 | __asm__(".global some_syscall\n" 89 | ".type some_syscall, @function\n" 90 | "some_syscall:\n" 91 | ".cfi_startproc\n" 92 | " mov $0x10000, %eax\n" 93 | " out %eax, $0\n" 94 | " ret\n" 95 | ".cfi_endproc\n"); 96 | extern long some_syscall(); 97 | 98 | extern long hello_world(const char *arg) { 99 | printf("%s\n", arg); 100 | fflush(stdout); 101 | return some_syscall(); 102 | } 103 | extern void crash(const char *arg) { 104 | some_syscall(); 105 | printf("%s\n", arg); 106 | fflush(stdout); 107 | some_syscall(); 108 | assert(0); 109 | })M"); 110 | 111 | tinykvm::Machine machine { binary, { .max_mem = MAX_MEMORY } }; 112 | // We need to create a Linux environment for runtimes to work well 113 | machine.setup_linux({"reset"}, env); 114 | 115 | // Run for at most 4 seconds before giving up 116 | machine.run(4.0f); 117 | // Make machine forkable (no working memory) 118 | machine.prepare_copy_on_write(0); 119 | 120 | // Create fork 121 | auto fork = tinykvm::Machine { machine, { 122 | .max_mem = MAX_MEMORY, .max_cow_mem = MAX_COWMEM 123 | } }; 124 | 125 | tinykvm::Machine::install_unhandled_syscall_handler( 126 | [] (tinykvm::vCPU& cpu, unsigned scall) { 127 | auto regs = cpu.registers(); 128 | switch (scall) { 129 | case 0x10000: // Some function 130 | regs.rax = 1023; 131 | break; 132 | default: 133 | regs.rax = -ENOSYS; 134 | } 135 | cpu.set_registers(regs); 136 | }); 137 | 138 | bool output_is_hello_world = false; 139 | fork.set_printer([&] (const char* data, size_t size) { 140 | std::string text{data, data + size}; 141 | if (text == "Hello World!") 142 | output_is_hello_world = true; 143 | }); 144 | 145 | // Print and crash, verify recovery after reset 146 | for (size_t i = 0; i < 15; i++) 147 | { 148 | auto& m = fork; 149 | 150 | output_is_hello_world = false; 151 | m.timed_vmcall(m.address_of("hello_world"), 2.0f, "Hello World!"); 152 | REQUIRE(m.return_value() == 1023); 153 | REQUIRE(output_is_hello_world); 154 | 155 | output_is_hello_world = false; 156 | m.timed_vmcall(m.address_of("hello_world"), 2.0f, "Hello World!"); 157 | REQUIRE(m.return_value() == 1023); 158 | REQUIRE(output_is_hello_world); 159 | 160 | output_is_hello_world = false; 161 | try { 162 | m.timed_vmcall(m.address_of("crash"), 2.0f, "Hello World!"); 163 | } catch (...) {} 164 | REQUIRE(output_is_hello_world); 165 | 166 | m.reset_to(machine, { 167 | .max_mem = MAX_MEMORY, 168 | .max_cow_mem = MAX_COWMEM 169 | }); 170 | } 171 | } 172 | -------------------------------------------------------------------------------- /tests/unit/tegridy.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | extern std::vector build_and_load(const std::string&); 5 | static const uint64_t MAX_MEMORY = 8ul << 20; /* 8MB */ 6 | static const std::vector env{ 7 | "LC_TYPE=C", "LC_ALL=C", "USER=root"}; 8 | 9 | TEST_CASE("Initialize KVM", "[Initialize]") 10 | { 11 | // Create KVM file descriptors etc. 12 | tinykvm::Machine::init(); 13 | } 14 | 15 | TEST_CASE("Writes to kernel memory", "[Integrity]") 16 | { 17 | const auto binary = build_and_load(R"M( 18 | #include 19 | int main() { 20 | printf("Main!\n"); 21 | return 666; 22 | } 23 | void kwrite(long* area) 24 | { 25 | *area = 0x1234; 26 | __asm__("hlt"); 27 | } 28 | void still_works() 29 | { 30 | printf("Hello World!\n"); 31 | })M"); 32 | 33 | /* Create and initialize stdout printing */ 34 | tinykvm::Machine machine{binary, {.max_mem = MAX_MEMORY}}; 35 | machine.setup_linux({"tegridy"}, env); 36 | machine.run(4.0f); 37 | 38 | const auto func = machine.address_of("kwrite"); 39 | REQUIRE(func != 0x0); 40 | REQUIRE(machine.address_of("still_works") != 0x0); 41 | 42 | bool output_is_hello_world = false; 43 | machine.set_printer([&] (const char* data, size_t size) { 44 | std::string text{data, data + size}; 45 | if (text == "Hello World!") 46 | output_is_hello_world = true; 47 | }); 48 | 49 | machine.timed_vmcall( 50 | machine.address_of("still_works"), 1.0f); 51 | 52 | REQUIRE(output_is_hello_world); 53 | output_is_hello_world = false; 54 | 55 | /* Write something at every X bytes */ 56 | for (long addr = 0x0; addr < 0x12000; addr += 0x10) 57 | { 58 | try 59 | { 60 | machine.timed_vmcall(func, 1.0f, addr); 61 | } 62 | catch (const tinykvm::MachineException& me) 63 | { 64 | REQUIRE(std::string(me.what()) != "Halt from kernel space"); 65 | } 66 | } 67 | 68 | machine.timed_vmcall( 69 | machine.address_of("still_works"), 1.0f); 70 | 71 | REQUIRE(output_is_hello_world); 72 | } 73 | 74 | TEST_CASE("Jumps to kernel memory", "[Integrity]") 75 | { 76 | const auto binary = build_and_load(R"M( 77 | #include 78 | int main() { 79 | printf("Main!\n"); 80 | return 666; 81 | } 82 | void still_works() 83 | { 84 | printf("Hello World!\n"); 85 | })M"); 86 | 87 | /* Create and initialize stdout printing */ 88 | tinykvm::Machine machine{binary, {.max_mem = MAX_MEMORY}}; 89 | machine.setup_linux({"tegridy"}, env); 90 | machine.run(4.0f); 91 | 92 | REQUIRE(machine.address_of("still_works") != 0x0); 93 | 94 | bool output_is_hello_world = false; 95 | machine.set_printer([&] (const char* data, size_t size) { 96 | std::string text{data, data + size}; 97 | if (text == "Hello World!") 98 | output_is_hello_world = true; 99 | }); 100 | 101 | machine.timed_vmcall( 102 | machine.address_of("still_works"), 1.0f); 103 | 104 | REQUIRE(output_is_hello_world); 105 | output_is_hello_world = false; 106 | 107 | /* Write something at every X bytes */ 108 | for (long addr = 0x0; addr < 0x12000; addr += 0x10) 109 | { 110 | try 111 | { 112 | machine.timed_vmcall(addr, 1.0f, 0x1234); 113 | } 114 | catch (const std::exception& e) 115 | { 116 | // "Shutdown! Triple fault?" 117 | } 118 | } 119 | 120 | machine.timed_vmcall( 121 | machine.address_of("still_works"), 1.0f); 122 | 123 | REQUIRE(output_is_hello_world); 124 | } 125 | -------------------------------------------------------------------------------- /tests/unit/timeout.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include 6 | extern std::vector build_and_load(const std::string& code); 7 | static const uint64_t MAX_MEMORY = 32ul << 20; /* 32MB */ 8 | static const uint64_t MAX_COWMEM = 8ul << 20; /* 8MB */ 9 | static const std::vector env { 10 | "LC_TYPE=C", "LC_ALL=C", "USER=root" 11 | }; 12 | 13 | TEST_CASE("Initialize KVM", "[Initialize]") 14 | { 15 | // Create KVM file descriptors etc. 16 | tinykvm::Machine::init(); 17 | } 18 | 19 | TEST_CASE("Multiple timeouts inside guest", "[Timeout]") 20 | { 21 | const auto good_binary = build_and_load(R"M( 22 | int main() { 23 | return 0; 24 | })M"); 25 | const auto bad_binary = build_and_load(R"M( 26 | int main() { 27 | while (1); 28 | })M"); 29 | 30 | std::vector threads; 31 | 32 | for (size_t i = 0; i < 100; i++) 33 | { 34 | // Good program 35 | threads.push_back(std::thread([&] { 36 | tinykvm::Machine machine { good_binary, { .max_mem = MAX_MEMORY } }; 37 | machine.setup_linux({"timeout"}, env); 38 | // This must *NOT* cause a timeout exception 39 | try { 40 | machine.run(1.0f); 41 | } catch (const tinykvm::MachineTimeoutException& e) { 42 | throw std::runtime_error("Timeout in good program"); 43 | } 44 | })); 45 | // Bad program 46 | threads.push_back(std::thread([&] { 47 | tinykvm::Machine machine { bad_binary, { .max_mem = MAX_MEMORY } }; 48 | machine.setup_linux({"timeout"}, env); 49 | // This must cause a timeout exception 50 | try { 51 | machine.run(1.0f); 52 | } catch (const tinykvm::MachineTimeoutException& e) { 53 | return; 54 | } 55 | throw std::runtime_error("No timeout"); 56 | })); 57 | } 58 | for (auto& thread : threads) 59 | thread.join(); 60 | } 61 | 62 | TEST_CASE("Multiple timeouts in Linux system call", "[Timeout]") 63 | { 64 | const auto binary = build_and_load(R"M( 65 | extern long write(int, const void*, unsigned long); 66 | int main() { 67 | while (1) { 68 | //for (volatile unsigned long i = 0; i < 40000000UL; i++); 69 | write(1, "Hello World!", 12); 70 | } 71 | })M"); 72 | 73 | std::vector threads; 74 | 75 | for (size_t i = 0; i < 100; i++) 76 | { 77 | threads.push_back(std::thread([&] { 78 | tinykvm::Machine machine { binary, { .max_mem = MAX_MEMORY } }; 79 | machine.setup_linux({"timeout"}, env); 80 | // This will cause every write to sleep for 1 second. 81 | machine.set_printer([&] (const char*, size_t) { 82 | std::this_thread::sleep_for(std::chrono::seconds(1)); 83 | }); 84 | // This must cause a timeout exception 85 | try { 86 | machine.run(1.0f); 87 | } catch (const tinykvm::MachineTimeoutException& e) { 88 | return; 89 | } 90 | throw std::runtime_error("No timeout"); 91 | })); 92 | } 93 | for (auto& thread : threads) 94 | thread.join(); 95 | } 96 | --------------------------------------------------------------------------------