├── .github
└── workflows
│ ├── cmake.yml
│ └── unittests.yml
├── .gitignore
├── .gitmodules
├── CMakeLists.txt
├── CODE_OF_CONDUCT.md
├── CONTRIBUTOR_LICENSE_AGREEMENT.md
├── LICENSE
├── README.md
├── fuzz
├── CMakeLists.txt
├── fuzz.cpp
├── fuzzer.sh
└── helpers.cpp
├── guest
├── .gdbinit
├── STREAM
│ ├── .gitignore
│ ├── build.sh
│ ├── hugepage_stream.c
│ ├── results.txt
│ └── stream.c
├── build.sh
├── glibc
│ ├── .gdbinit
│ ├── float.c
│ ├── float.gdb
│ ├── glibc.c
│ └── s.gdb
├── go
│ ├── main.go
│ └── s.gdb
├── mini
│ ├── build.sh
│ ├── mini.asm
│ └── s.gdb
├── musl
│ ├── .gdbinit
│ ├── build.sh
│ ├── glibc.gdb
│ ├── musl.c
│ ├── s.gdb
│ └── simple.c
├── nim
│ ├── build.sh
│ └── program.nim
├── nim_storage
│ ├── .gitignore
│ ├── build.sh
│ ├── main.c
│ ├── main.nim
│ └── storage.nim
├── src
│ ├── api.hpp
│ ├── crc32c.cpp
│ ├── guest.cpp
│ └── start.cpp
├── storage
│ ├── .gitignore
│ ├── build.sh
│ ├── main.c
│ └── storage.c
└── tests
│ ├── build.sh
│ ├── cxx_test.cpp
│ ├── debug.sh
│ ├── remote.gdb
│ └── test.c
├── lib
├── CMakeLists.txt
└── tinykvm
│ ├── amd64
│ ├── amd64.hpp
│ ├── builtin
│ │ ├── .gitignore
│ │ ├── assembly.sh
│ │ ├── disassembly.sh
│ │ ├── interrupts.asm
│ │ ├── kernel_assembly.h
│ │ ├── usercode.asm
│ │ ├── usercode_assembly.sh
│ │ ├── vsyscall.asm
│ │ └── vsyscall_assembly.sh
│ ├── gdt.cpp
│ ├── gdt.hpp
│ ├── idt.cpp
│ ├── idt.hpp
│ ├── lapic.hpp
│ ├── memory_layout.hpp
│ ├── paging.cpp
│ ├── paging.hpp
│ ├── tss.cpp
│ ├── tss.hpp
│ ├── usercode.cpp
│ ├── usercode.hpp
│ ├── vdso.cpp
│ └── vdso.hpp
│ ├── arm64
│ └── memory_layout.hpp
│ ├── common.hpp
│ ├── forward.hpp
│ ├── linux
│ ├── fds.cpp
│ ├── fds.hpp
│ ├── signals.cpp
│ ├── signals.hpp
│ ├── system_calls.cpp
│ ├── threads.cpp
│ └── threads.hpp
│ ├── machine.cpp
│ ├── machine.hpp
│ ├── machine_elf.cpp
│ ├── machine_env.cpp
│ ├── machine_inline.hpp
│ ├── machine_utils.cpp
│ ├── memory.cpp
│ ├── memory.hpp
│ ├── memory_bank.cpp
│ ├── memory_bank.hpp
│ ├── memory_maps.cpp
│ ├── mmap_cache.hpp
│ ├── page_streaming.cpp
│ ├── page_streaming.hpp
│ ├── remote.cpp
│ ├── rsp_client.cpp
│ ├── rsp_client.hpp
│ ├── smp.cpp
│ ├── smp.hpp
│ ├── util
│ ├── elf.h
│ ├── elf.hpp
│ ├── function.hpp
│ ├── threadpool.h
│ └── threadtask.hpp
│ ├── vcpu.cpp
│ ├── vcpu.hpp
│ ├── vcpu_run.cpp
│ └── virtual_mem.hpp
├── src
├── assert.hpp
├── bench.cpp
├── load_file.hpp
├── simple.cpp
├── storage.cpp
├── tests.cpp
└── timing.hpp
└── tests
├── run_unit_tests.sh
└── unit
├── .gitignore
├── CMakeLists.txt
├── basic.cpp
├── codebuilder.cpp
├── crc32.hpp
├── fork.cpp
├── mmap.cpp
├── remote.cpp
├── reset.cpp
├── tegridy.cpp
└── timeout.cpp
/.github/workflows/cmake.yml:
--------------------------------------------------------------------------------
1 | name: CMake Build
2 |
3 | on:
4 | push:
5 | branches: [ master ]
6 | pull_request:
7 | branches: [ master ]
8 |
9 | jobs:
10 | build:
11 | runs-on: ubuntu-latest
12 | env:
13 | BUILD_TYPE: ${{ matrix.builtype }}
14 |
15 | strategy:
16 | matrix:
17 | compiler: [g++, clang++]
18 | buildtype: [Debug, Release]
19 | steps:
20 | - uses: actions/checkout@v2
21 |
22 | - name: Install dependencies
23 | run: sudo apt-get install -y cmake g++ clang
24 |
25 | - name: Emulator configuration
26 | working-directory: ${{github.workspace}}
27 | env:
28 | CXX: ${{ matrix.compiler }}
29 | run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}}
30 |
31 | - name: Emulator build
32 | run: cmake --build ${{github.workspace}}/build
33 |
--------------------------------------------------------------------------------
/.github/workflows/unittests.yml:
--------------------------------------------------------------------------------
1 | name: Unit Tests
2 | on:
3 | workflow_dispatch:
4 |
5 | jobs:
6 | build:
7 | runs-on: ubuntu-latest
8 | defaults:
9 | run:
10 | working-directory: ${{github.workspace}}/tests/unit
11 |
12 | steps:
13 | - uses: actions/checkout@v2
14 |
15 | - name: Install dependencies
16 | run: |
17 | git submodule update --init ${{github.workspace}}/tests/Catch2
18 |
19 | - name: Configure
20 | run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}}
21 |
22 | - name: Build the unittests
23 | run: cmake --build ${{github.workspace}}/build
24 |
25 | - name: Run tests
26 | working-directory: ${{github.workspace}}/build
27 | run: ctest --verbose .
28 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | **/build_*
2 | **/build
3 | **/.build
4 | .vscode/
5 | guest/interrupts
6 | guest/guest.elf
7 | guest/musl/musl
8 | guest/musl/glibc
9 | guest/musl/simple
10 | guest/glibc/glibc
11 | guest/glibc/float
12 | guest/go/go
13 |
14 | guest/tests/glibc_test
15 | guest/tests/musl_test
16 | guest/tests/cxx_test
17 | guest/mini/mini.o
18 | guest/mini/mini
19 |
20 | crash-*
21 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "tests/Catch2"]
2 | path = tests/Catch2
3 | url = https://github.com/catchorg/Catch2.git
4 |
--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.10)
2 | project(tinykvm CXX)
3 |
4 | option(FLTO "Link-time optimizations" OFF)
5 | option(SANITIZE "Enable address and ub sanitizers" OFF)
6 |
7 | set(CMAKE_CXX_FLAGS "-Wall -Wextra -O2 -ggdb3")
8 |
9 | if (FLTO)
10 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -flto=thin")
11 | set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=lld")
12 | endif()
13 | if (SANITIZE)
14 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address,undefined")
15 | endif()
16 |
17 | add_subdirectory(lib tinykvm)
18 |
19 | target_compile_definitions(tinykvm PUBLIC
20 | TINYKVM_ARCH_${TINYKVM_ARCH}=1
21 | TINYKVM_ARCH="${TINYKVM_ARCH}")
22 |
23 | add_executable(bench
24 | src/bench.cpp
25 | )
26 | target_link_libraries(bench tinykvm)
27 |
28 | add_executable(tinytest
29 | src/tests.cpp
30 | )
31 | target_link_libraries(tinytest tinykvm)
32 |
33 | add_executable(simplekvm
34 | src/simple.cpp
35 | )
36 | target_link_libraries(simplekvm tinykvm)
37 |
38 | add_executable(storagekvm
39 | src/storage.cpp
40 | )
41 | target_link_libraries(storagekvm tinykvm)
42 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Code of Conduct
2 |
3 | In the interest of fostering an open and welcoming environment, we as
4 | contributors and maintainers pledge to make participation in our project
5 | and our community a harassment-free experience for everyone.
6 |
7 | ## Expected Behavior
8 |
9 | - Be respectful, considerate, and constructive in all communications.
10 | - Value diverse perspectives and experiences.
11 | - Focus on ideas, not individuals.
12 | - Assume good intentions and engage in positive, open dialogue.
13 |
14 | ## Unacceptable Behavior
15 |
16 | - Harassment, hate speech, or derogatory comments.
17 | - Personal attacks or demeaning remarks.
18 | - Any conduct that could be reasonably perceived as unprofessional.
19 |
20 | ## Reporting Issues
21 |
22 | If you experience or witness unacceptable behavior, please contact
23 | compliance@varnish-software.com. All reports will be handled with confidentiality
24 | and prompt attention.
25 |
26 | *This Code of Conduct is adapted from the Contributor Covenant v2.1*
27 | (see [Contributor Covenant](https://www.contributor-covenant.org/version/2/1/code_of_conduct.html) for more details).
28 |
--------------------------------------------------------------------------------
/CONTRIBUTOR_LICENSE_AGREEMENT.md:
--------------------------------------------------------------------------------
1 | # TinyKVM Contributor License Agreement
2 |
3 | Thank you for your interest in contributing to TinyKVM and VMOD-TinyKVM
4 | (the "Project"). By submitting a Contribution to this Project, you agree to
5 | the following terms.
6 |
7 | ## 1. Definitions
8 |
9 | - **Contribution**: Any code, documentation, or other materials you submit.
10 | - **Project**: The TinyKVM/VMOD-TinyKVM codebase, distributed under GPL‑3.0 for
11 | open source purposes and subject to a commercial license by Varnish Software.
12 |
13 | ## 2. Grant of Rights
14 |
15 | By contributing, you grant Varnish Software and the Project a worldwide,
16 | royalty‑free, non‑exclusive, perpetual, and irrevocable license to use,
17 | reproduce, modify, and distribute your Contribution under the terms of:
18 | - The GNU General Public License (GPL‑3.0), and/or
19 | - A commercial license as determined by Varnish Software.
20 |
21 | ## 3. Representations and Warranties
22 |
23 | You represent that:
24 | - Your Contribution is your original work.
25 | - You have the right to grant the above licenses.
26 | - Your Contribution does not infringe on any third-party rights.
27 |
28 | ## 4. Patent License
29 |
30 | You grant a patent license to any patents you hold that are necessarily infringed
31 | by your Contribution.
32 |
33 | ## 5. Acknowledgement
34 |
35 | By submitting your Contribution, you acknowledge that you have read and agree
36 | to the terms of this Contributor License Agreement.
37 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | TinyKVM and VMOD-TinyKVM
2 | Copyright (C) 2025 Varnish Software
3 |
4 | This program is free software: you can redistribute it and/or modify
5 | it under the terms of the GNU General Public License as published by
6 | the Free Software Foundation, version 3.
7 |
8 | This program is distributed in the hope that it will be useful,
9 | but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | GNU General Public License for more details.
12 |
13 | You should have received a copy of the GNU General Public License
14 | along with this program. If not, see .
15 |
16 | --------------------------------------------------------------------
17 | For commercial licensing inquiries, please contact:
18 | Varnish Software - compliance@varnish-software.com
19 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | TinyKVM userspace emulator library
2 | ==============
3 |
4 | TinyKVM is a simple, slim and specialized userspace emulator library with _native performance_.
5 |
6 | TinyKVM is designed to execute regular Linux programs and also excels at request-based workloads in high-performance HTTP caches and web servers.
7 |
8 | KVM is the most robust, battle-hardened virtualization API that exists right now. It is only 40k LOC in the kernel, and it is the foundation of the modern public cloud. TinyKVM uses only a fraction of the KVM API.
9 |
10 |
11 | ## Userspace Emulation
12 |
13 | Userspace emulation means running userspace programs. You can take a regular Linux program that you just built in your terminal and run it in TinyKVM. It will have the same exact run-time, the same exact CPU features and so on.
14 |
15 | The rule-of-thumb is thus: If you can run it locally on your machine, you can run it in TinyKVM, at the same speed.
16 |
17 | But there are some differences:
18 |
19 | - TinyKVM has an execution timeout feature, allowing automatic stopping of stuck programs
20 | - TinyKVM has memory limits
21 | - TinyKVM can fork an initialized program into hundreds of pre-initialized VMs
22 | - TinyKVM can load programs while preferring hugepages, leading to performance gains
23 |
24 |
25 | ## Hardware Virtualization
26 |
27 | A very understated feature of running directly on the CPU using hardware virtualization is that you don't need fancy toolchains to build programs. This is a most surprising and welcome feature as building and working with other architectures is often a struggle.
28 |
29 | Secondly, as CPUs evolve, so does TinyKVM. It never has to be updated, yet it will continue to run at native speeds on your CPU.
30 |
31 |
32 | ## Licensing
33 |
34 | TinyKVM and VMOD-TinyKVM are released under a dual licensing model:
35 |
36 | - **Open Source License**: GPL‑3.0 (see [LICENSE](LICENSE)).
37 | - **Commercial License**: Available under terms controlled by Varnish Software.
38 |
39 | For commercial licensing inquiries, please contact:
40 | compliance@varnish-software.com.
41 |
42 | ## Contributing
43 |
44 | We welcome contributions! By submitting a pull request or other contribution,
45 | you agree to our [Contributor License Agreement](CONTRIBUTOR_LICENSE_AGREEMENT.md)
46 | and our [Code of Conduct](CODE_OF_CONDUCT.md).
47 |
48 | For details on how to contribute, please refer to this document.
49 |
--------------------------------------------------------------------------------
/fuzz/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.9)
2 | project(riscv CXX)
3 |
4 | #set(FUZZER_MODE "memory,undefined" "-fsanitize-memory-track-origins")
5 | set(FUZZER_MODE "address,undefined")
6 |
7 | add_subdirectory(../lib lib)
8 | target_compile_options(tinykvm PUBLIC "-fsanitize=${FUZZER_MODE},fuzzer-no-link")
9 | target_compile_definitions(tinykvm PUBLIC FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION=1)
10 |
11 | if (NOT CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
12 | message(FATAL_ERROR "libfuzzer is part of the Clang compiler suite.")
13 | endif()
14 |
15 | set(SOURCES
16 | fuzz.cpp
17 | )
18 |
19 | function(add_fuzzer NAME MODE)
20 | add_executable(${NAME} ${SOURCES})
21 | target_link_libraries(${NAME} PUBLIC tinykvm)
22 | set_target_properties(${NAME} PROPERTIES CXX_STANDARD 17)
23 | target_link_libraries(${NAME} PUBLIC "-fsanitize=${FUZZER_MODE},fuzzer")
24 | target_link_libraries(${NAME} PUBLIC "-fuse-ld=lld")
25 | target_compile_definitions(${NAME} PRIVATE ${MODE}=1)
26 | endfunction()
27 |
28 | add_fuzzer(elffuzzer FUZZ_ELF)
29 |
--------------------------------------------------------------------------------
/fuzz/fuzz.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include "helpers.cpp"
3 |
4 | static const std::vector empty;
5 | static constexpr float TIMEOUT = 5.0f;
6 |
7 | const tinykvm::MachineOptions options {
8 | };
9 | static tinykvm::Machine* machine;
10 |
11 | // In order to be able to inspect a coredump we want to
12 | // crash on every ASAN error.
13 | extern "C" void __asan_on_error()
14 | {
15 | abort();
16 | }
17 | extern "C" void __msan_on_error()
18 | {
19 | abort();
20 | }
21 |
22 | static inline void fuzz_elf_loader(const uint8_t* data, size_t len)
23 | {
24 | using namespace tinykvm;
25 | const std::string_view bin {(const char*) data, len};
26 | try {
27 | machine->reset_to(bin, options);
28 | //machine->run(TIMEOUT);
29 | } catch (const MachineException& e) {
30 | //printf(">>> Exception: %s\n", e.what());
31 | }
32 | }
33 |
34 | extern "C"
35 | void LLVMFuzzerTestOneInput(const uint8_t* data, size_t len)
36 | {
37 | if (machine == nullptr) {
38 | tinykvm::Machine::init();
39 |
40 | machine = new tinykvm::Machine { std::string_view{}, options };
41 | machine->install_unhandled_syscall_handler([] (auto&, unsigned) {});
42 | }
43 | #if defined(FUZZ_ELF)
44 | fuzz_elf_loader(data, len);
45 | #else
46 | #error "Unknown fuzzing mode"
47 | #endif
48 | }
49 |
--------------------------------------------------------------------------------
/fuzz/fuzzer.sh:
--------------------------------------------------------------------------------
1 | export ASAN_OPTIONS=disable_coredump=0::unmap_shadow_on_exit=1::handle_segv=0::handle_sigfpe=0
2 | export CXX=clang++-14
3 |
4 | set -e
5 | mkdir -p .build
6 | pushd .build
7 | cmake .. -DCMAKE_BUILD_TYPE=Debug
8 | make -j4
9 | popd
10 |
11 | echo "Starting: ./build/elffuzzer -fork=1 -handle_fpe=0"
12 | ./.build/elffuzzer -max_len=8192 -handle_fpe=0 -handle_segv=0 -handle_abrt=0 $@
13 |
--------------------------------------------------------------------------------
/fuzz/helpers.cpp:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | /* It is necessary to link with libgcc when fuzzing.
4 | See llvm.org/PR30643 for details. */
5 | __attribute__((weak, no_sanitize("undefined")))
6 | extern "C" __int128_t
7 | __muloti4(__int128_t a, __int128_t b, int* overflow) {
8 | const int N = (int)(sizeof(__int128_t) * CHAR_BIT);
9 | const __int128_t MIN = (__int128_t)1 << (N - 1);
10 | const __int128_t MAX = ~MIN;
11 | *overflow = 0;
12 | __int128_t result = a * b;
13 | if (a == MIN) {
14 | if (b != 0 && b != 1)
15 | *overflow = 1;
16 | return result;
17 | }
18 | if (b == MIN) {
19 | if (a != 0 && a != 1)
20 | *overflow = 1;
21 | return result;
22 | }
23 | __int128_t sa = a >> (N - 1);
24 | __int128_t abs_a = (a ^ sa) - sa;
25 | __int128_t sb = b >> (N - 1);
26 | __int128_t abs_b = (b ^ sb) - sb;
27 | if (abs_a < 2 || abs_b < 2)
28 | return result;
29 | if (sa == sb) {
30 | if (abs_a > MAX / abs_b)
31 | *overflow = 1;
32 | } else {
33 | if (abs_a > MIN / -abs_b)
34 | *overflow = 1;
35 | }
36 | return result;
37 | }
38 |
--------------------------------------------------------------------------------
/guest/.gdbinit:
--------------------------------------------------------------------------------
1 | file guest.elf
2 | layout next
3 | layout next
4 | target remote localhost:2159
5 | break main
6 |
--------------------------------------------------------------------------------
/guest/STREAM/.gitignore:
--------------------------------------------------------------------------------
1 | stream
2 |
--------------------------------------------------------------------------------
/guest/STREAM/build.sh:
--------------------------------------------------------------------------------
1 | gcc-11 -static -O3 -march=native stream.c -o stream
2 |
--------------------------------------------------------------------------------
/guest/STREAM/results.txt:
--------------------------------------------------------------------------------
1 | Natively run:
2 | -------------------------------------------------------------
3 | STREAM version $Revision: 5.10 $
4 | -------------------------------------------------------------
5 | This system uses 8 bytes per array element.
6 | -------------------------------------------------------------
7 | Array size = 8000000 (elements), Offset = 0 (elements)
8 | Memory per array = 61.0 MiB (= 0.1 GiB).
9 | Total memory required = 183.1 MiB (= 0.2 GiB).
10 | Each kernel will be executed 10 times.
11 | The *best* time for each kernel (excluding the first iteration)
12 | will be used to compute the reported bandwidth.
13 | -------------------------------------------------------------
14 | Your clock granularity/precision appears to be 7 microseconds.
15 | Each test below will take on the order of 8604 microseconds.
16 | (= 1229 clock ticks)
17 | Increase the size of the arrays if this shows that
18 | you are not getting at least 20 clock ticks per test.
19 | -------------------------------------------------------------
20 | WARNING -- The above is only a rough guideline.
21 | For best results, please be sure you know the
22 | precision of your system timer.
23 | -------------------------------------------------------------
24 | Function Best Rate MB/s Avg time Min time Max time
25 | Copy: 21727.7 0.006434 0.005891 0.006807
26 | Scale: 13892.0 0.009927 0.009214 0.010536
27 | Add: 14385.4 0.014087 0.013347 0.015086
28 | Triad: 14826.3 0.014147 0.012950 0.015277
29 | -------------------------------------------------------------
30 | Solution Validates: avg error less than 1.000000e-13 on all three arrays
31 | -------------------------------------------------------------
32 |
33 | Inside a VM:
34 | -------------------------------------------------------------
35 | STREAM version $Revision: 5.10 $
36 | -------------------------------------------------------------
37 | This system uses 8 bytes per array element.
38 | -------------------------------------------------------------
39 | Array size = 8000000 (elements), Offset = 0 (elements)
40 | Memory per array = 61.0 MiB (= 0.1 GiB).
41 | Total memory required = 183.1 MiB (= 0.2 GiB).
42 | Each kernel will be executed 10 times.
43 | The *best* time for each kernel (excluding the first iteration)
44 | will be used to compute the reported bandwidth.
45 | -------------------------------------------------------------
46 | Your clock granularity/precision appears to be 1 microseconds.
47 | Each test below will take on the order of 8112 microseconds.
48 | (= 8112 clock ticks)
49 | Increase the size of the arrays if this shows that
50 | you are not getting at least 20 clock ticks per test.
51 | -------------------------------------------------------------
52 | WARNING -- The above is only a rough guideline.
53 | For best results, please be sure you know the
54 | precision of your system timer.
55 | -------------------------------------------------------------
56 | Function Best Rate MB/s Avg time Min time Max time
57 | Copy: 21783.3 0.006461 0.005876 0.007131
58 | Scale: 13986.1 0.010020 0.009152 0.010908
59 | Add: 14413.4 0.013905 0.013321 0.015244
60 | Triad: 14648.6 0.014018 0.013107 0.014823
61 | -------------------------------------------------------------
62 | Solution Validates: avg error less than 1.000000e-13 on all three arrays
63 | -------------------------------------------------------------
64 |
65 |
66 | Calling a function in a forked VM:
67 | -------------------------------------------------------------
68 | STREAM version $Revision: 5.10 $
69 | -------------------------------------------------------------
70 | This system uses 8 bytes per array element.
71 | -------------------------------------------------------------
72 | Array size = 8000000 (elements), Offset = 0 (elements)
73 | Memory per array = 61.0 MiB (= 0.1 GiB).
74 | Total memory required = 183.1 MiB (= 0.2 GiB).
75 | Each kernel will be executed 10 times.
76 | The *best* time for each kernel (excluding the first iteration)
77 | will be used to compute the reported bandwidth.
78 | -------------------------------------------------------------
79 | Your clock granularity/precision appears to be 7 microseconds.
80 | Each test below will take on the order of 7499 microseconds.
81 | (= 1071 clock ticks)
82 | Increase the size of the arrays if this shows that
83 | you are not getting at least 20 clock ticks per test.
84 | -------------------------------------------------------------
85 | WARNING -- The above is only a rough guideline.
86 | For best results, please be sure you know the
87 | precision of your system timer.
88 | -------------------------------------------------------------
89 | Function Best Rate MB/s Avg time Min time Max time
90 | Copy: 21473.1 0.007722 0.005961 0.008135
91 | Scale: 13848.3 0.012373 0.009243 0.012904
92 | Add: 14665.4 0.016909 0.013092 0.016773
93 | Triad: 14174.9 0.017208 0.013545 0.016800
94 | -------------------------------------------------------------
95 | Solution Validates: avg error less than 1.000000e-13 on all three arrays
96 | -------------------------------------------------------------
97 |
--------------------------------------------------------------------------------
/guest/build.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | WARN="-Wall -Wextra"
3 | CUSTOM="-static -ffreestanding -nostdlib -fno-exceptions -fno-rtti"
4 | COMMON="-O2 -ggdb3 -march=native -fno-omit-frame-pointer $CUSTOM"
5 | FILES="src/guest.cpp src/crc32c.cpp src/start.cpp"
6 | SYMS="-Wl,--defsym=syscall_entry=0x2000"
7 |
8 | g++ $WARN $COMMON -Ttext=201000 $SYMS $FILES -o guest.elf
9 |
--------------------------------------------------------------------------------
/guest/glibc/.gdbinit:
--------------------------------------------------------------------------------
1 | file glinux
2 | layout next
3 | target remote localhost:2159
4 | break main
5 |
--------------------------------------------------------------------------------
/guest/glibc/float.c:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | __attribute__((noinline))
4 | static void call_other_function()
5 | {
6 | }
7 |
8 | __attribute__((noinline))
9 | static void takes_float(float f)
10 | {
11 | call_other_function();
12 | printf("Float has value: %f\n", f);
13 | }
14 |
15 | int main()
16 | {
17 | takes_float(111.0f);
18 | }
19 |
--------------------------------------------------------------------------------
/guest/glibc/float.gdb:
--------------------------------------------------------------------------------
1 | file float
2 | layout prev
3 | layout next
4 | target remote localhost:2159
5 |
6 | set debug remote 1
7 |
8 | break takes_float
9 | continue
10 |
--------------------------------------------------------------------------------
/guest/glibc/glibc.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | static void test_threads();
6 | extern "C" int gettid();
7 |
8 | static int threads_test_suite_ok = 0;
9 | int main()
10 | {
11 | char* test = (char *)malloc(14);
12 | strcpy(test, "Hello World!\n");
13 | printf("%.*s", 13, test);
14 |
15 | test_threads();
16 | return 0;
17 | }
18 |
19 | extern "C" __attribute__((used))
20 | void test()
21 | {
22 | /* Verify that the threads test-suite passed */
23 | assert(threads_test_suite_ok == 1);
24 | }
25 |
26 | #include
27 | #include
28 | #include
29 | #include // C++ threads
30 | #include
31 |
32 | struct testdata
33 | {
34 | int depth = 0;
35 | const int max_depth = 10;
36 | std::vector threads;
37 | };
38 | static pthread_mutex_t mtx;
39 |
40 | extern "C" {
41 | static void* thread_function1(void* data)
42 | {
43 | printf("Inside thread function1, x = %d\n", *(int*) data);
44 | thread_local int test = 2021;
45 | printf("test @ %p, test = %d\n", &test, test);
46 | assert(test == 2021);
47 | return NULL;
48 | }
49 | static void* thread_function2(void* data)
50 | {
51 | printf("Inside thread function2, x = %d\n", *(int*) data);
52 | thread_local int test = 2022;
53 | assert(test == 2022);
54 | pthread_mutex_lock(&mtx);
55 |
56 | printf("Yielding from thread2, expecting to be returned to main thread\n");
57 | sched_yield();
58 | printf("Returned to thread2, expecting to exit to after main thread yield\n");
59 |
60 | pthread_mutex_unlock(&mtx);
61 | pthread_exit(NULL);
62 | }
63 | static void* recursive_function(void* tdata)
64 | {
65 | auto* data = (testdata*) tdata;
66 | data->depth++;
67 | printf("%d: Thread depth %d / %d\n",
68 | gettid(), data->depth, data->max_depth);
69 |
70 | if (data->depth < data->max_depth)
71 | {
72 | pthread_t t;
73 | int res = pthread_create(&t, NULL, recursive_function, data);
74 | if (res < 0) {
75 | printf("Failed to create thread!\n");
76 | return NULL;
77 | }
78 | data->threads.push_back(t);
79 | }
80 | printf("%d: Thread yielding %d / %d\n",
81 | gettid(), data->depth, data->max_depth);
82 | sched_yield();
83 |
84 | printf("%d: Thread exiting %d / %d\n",
85 | gettid(), data->depth, data->max_depth);
86 | data->depth--;
87 | return NULL;
88 | }
89 | }
90 |
91 | void test_threads()
92 | {
93 | int x = 666;
94 | pthread_t t1;
95 | pthread_t t2;
96 | int res;
97 | pthread_mutex_init(&mtx, NULL);
98 |
99 | //printf("*** Testing pthread_create and sched_yield...\n");
100 | res = pthread_create(&t1, NULL, thread_function1, &x);
101 | if (res < 0) {
102 | printf("Failed to create thread!\n");
103 | return;
104 | }
105 | pthread_join(t1, NULL);
106 |
107 | res = pthread_create(&t2, NULL, thread_function2, &x);
108 | if (res < 0) {
109 | printf("Failed to create thread!\n");
110 | return;
111 | }
112 |
113 | printf("Yielding from main thread, expecting to return to thread2\n");
114 | // Ride back to thread2 using contested lock
115 | pthread_mutex_lock(&mtx);
116 | pthread_mutex_unlock(&mtx);
117 | printf("After yielding from main thread, looking good!\n");
118 | // remove the thread
119 | pthread_join(t2, NULL);
120 |
121 | printf("*** Now testing recursive threads...\n");
122 | static testdata rdata;
123 | recursive_function(&rdata);
124 | // now we have to yield until all the detached children also exit
125 | printf("*** Yielding until all children are dead!\n");
126 | while (rdata.depth > 0) sched_yield();
127 |
128 | printf("*** Joining until all children are freed!\n");
129 | for (auto pt : rdata.threads) pthread_join(pt, NULL);
130 |
131 | auto* cpp_thread = new std::thread(
132 | [] (int a, long long b, std::string c) -> void {
133 | printf("Hello from a C++ thread\n");
134 | assert(a == 1);
135 | assert(b == 2LL);
136 | assert(c == std::string("test"));
137 | printf("C++ thread arguments are OK, yielding...\n");
138 | std::this_thread::yield();
139 | printf("C++ thread exiting...\n");
140 | },
141 | 1, 2L, std::string("test"));
142 | printf("Returned to main. Yielding back...\n");
143 | std::this_thread::yield();
144 | printf("Returned to main. Joining the C++ thread\n");
145 | cpp_thread->join();
146 | printf("Deleting the C++ thread\n");
147 | delete cpp_thread;
148 |
149 | printf("SUCCESS\n");
150 | threads_test_suite_ok = 1;
151 | }
152 |
--------------------------------------------------------------------------------
/guest/glibc/s.gdb:
--------------------------------------------------------------------------------
1 | file glibc
2 | layout next
3 | target remote localhost:2159
4 | continue
5 |
--------------------------------------------------------------------------------
/guest/go/main.go:
--------------------------------------------------------------------------------
1 | package main
2 | import "fmt"
3 |
4 | func main() {
5 | fmt.Println("hello world")
6 | }
7 |
--------------------------------------------------------------------------------
/guest/go/s.gdb:
--------------------------------------------------------------------------------
1 | file go
2 | #break proc.go:1208
3 | #break asm_amd64.s:272
4 | #break proc.go:113
5 | #break asm_amd64.s:368
6 | break proc.go:4297
7 | layout next
8 | target remote localhost:2159
9 |
--------------------------------------------------------------------------------
/guest/mini/build.sh:
--------------------------------------------------------------------------------
1 | nasm -f elf64 mini.asm -o mini.o
2 | gcc -static -Wall -nostartfiles -Ttext=0x200000 -Wl,-utest mini.o -o mini
3 |
--------------------------------------------------------------------------------
/guest/mini/mini.asm:
--------------------------------------------------------------------------------
1 | [BITS 64]
2 | global _start:function
3 | global test:function
4 | global rexit:function
5 |
6 | SECTION .text
7 |
8 | ALIGN 0x8
9 | _start:
10 | mov di, 0x1337
11 |
12 | mov rsp, 0x1ff000
13 | push rdi
14 |
15 | mov ax, 60 ;; exit
16 | syscall
17 |
18 | ALIGN 0x8
19 | test:
20 | ret
21 |
22 | ALIGN 0x8
23 | rexit:
24 | mov rdi, rax
25 | mov rax, 60 ;; exit
26 | syscall
27 |
--------------------------------------------------------------------------------
/guest/mini/s.gdb:
--------------------------------------------------------------------------------
1 | file mini
2 | layout next
3 | layout next
4 | target remote localhost:2159
5 |
--------------------------------------------------------------------------------
/guest/musl/.gdbinit:
--------------------------------------------------------------------------------
1 | file musl
2 | layout next
3 | target remote localhost:2159
4 | #break __init_tls
5 | break main
6 |
--------------------------------------------------------------------------------
/guest/musl/build.sh:
--------------------------------------------------------------------------------
1 | musl-gcc -static -O2 -ggdb3 musl.c -o musl
2 | musl-gcc -static -O2 -ggdb3 simple.c -o simple
3 | gcc -static -O2 -ggdb3 musl.c -o glibc
4 |
--------------------------------------------------------------------------------
/guest/musl/glibc.gdb:
--------------------------------------------------------------------------------
1 | file glibc
2 | layout next
3 | layout next
4 | target remote localhost:2159
5 |
--------------------------------------------------------------------------------
/guest/musl/musl.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | static void test_threads();
5 |
6 | static long nprimes = 0;
7 |
8 | __attribute__((noinline))
9 | void test_ud2()
10 | {
11 | asm("ud2");
12 | }
13 |
14 | int main(int argc, char** argv)
15 | {
16 | printf("Hello musl World!\n");
17 |
18 | static const int N = 1000000;
19 | char prime[N];
20 | memset(prime, 1, sizeof(prime));
21 | for (long n = 2; n < N; n++)
22 | {
23 | if (prime[n]) {
24 | nprimes += 1;
25 | for (long i = n*n; i < N; i += n)
26 | prime[i] = 0;
27 | }
28 | }
29 |
30 | test_threads();
31 | //test_ud2();
32 | return 0;
33 | }
34 |
35 | #include
36 | static int t = 0;
37 |
38 | __attribute__((used))
39 | void bench()
40 | {
41 | //assert(t == 0);
42 | //t = 1;
43 | assert(nprimes == 78498);
44 | }
45 |
46 | __attribute__((used))
47 | void bench_write()
48 | {
49 | assert(t == 0);
50 | t = 1;
51 | assert(nprimes == 78498);
52 | }
53 |
54 | asm(".global one_vmexit\n" \
55 | ".type one_vmexit, function\n" \
56 | "one_vmexit:\n" \
57 | " out %ax, $1\n" \
58 | " ret\n");
59 | extern void one_vmexit();
60 |
61 | __attribute__((used))
62 | void bench_vmexits(int count)
63 | {
64 | while (count--) one_vmexit();
65 | }
66 |
67 | #include
68 | #include
69 |
70 | static void* thread_function1(void* data)
71 | {
72 | printf("Inside thread function1, x = %d\n", *(int*) data);
73 | static __thread int test = 2021;
74 | printf("test @ %p, test = %d\n", &test, test);
75 | assert(test == 2021);
76 | fflush(stdout);
77 | pthread_exit(NULL);
78 | }
79 | static void* thread_function2(void* data)
80 | {
81 | printf("Inside thread function2, x = %d\n", *(int*) data);
82 | static __thread int test = 2022;
83 | assert(test == 2022);
84 |
85 | printf("Yielding from thread2, expecting to be returned to main thread\n");
86 | sched_yield();
87 | printf("Returned to thread2, expecting to exit to after main thread yield\n");
88 |
89 | pthread_exit(NULL);
90 | }
91 |
92 | void test_threads()
93 | {
94 | int x = 666;
95 | pthread_t t1;
96 | pthread_t t2;
97 | int res;
98 |
99 | printf("*** Testing pthread_create and sched_yield...\n");
100 | res = pthread_create(&t1, NULL, thread_function1, &x);
101 | if (res < 0) {
102 | printf("Failed to create thread!\n");
103 | return;
104 | }
105 | pthread_join(t1, NULL);
106 |
107 | res = pthread_create(&t2, NULL, thread_function2, &x);
108 | if (res < 0) {
109 | printf("Failed to create thread!\n");
110 | return;
111 | }
112 |
113 | printf("Yielding from main thread, expecting to return to thread2\n");
114 | // return back to finish thread2
115 | sched_yield();
116 | printf("After yielding from main thread, looking good!\n");
117 | // remove the thread
118 | pthread_join(t2, NULL);
119 |
120 | printf("SUCCESS\n");
121 | }
122 |
--------------------------------------------------------------------------------
/guest/musl/s.gdb:
--------------------------------------------------------------------------------
1 | set debuginfod enabled on
2 | file musl
3 | layout next
4 | layout next
5 | target remote localhost:2159
6 |
--------------------------------------------------------------------------------
/guest/musl/simple.c:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | int main()
4 | {
5 | return 0;
6 | }
7 |
8 | static int t = 0;
9 |
10 | __attribute__((used))
11 | void bench()
12 | {
13 | //assert(t == 0);
14 | //t = 1;
15 | }
16 |
17 | __attribute__((used))
18 | void bench_write()
19 | {
20 | assert(t == 0);
21 | t = 1;
22 | }
23 |
24 | asm(".global one_vmexit\n" \
25 | ".type one_vmexit, function\n" \
26 | "one_vmexit:\n" \
27 | " out %ax, $1\n" \
28 | " ret\n");
29 | extern void one_vmexit();
30 |
31 | __attribute__((used))
32 | void bench_vmexits(int count)
33 | {
34 | while (count--) one_vmexit();
35 | }
36 |
--------------------------------------------------------------------------------
/guest/nim/build.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
--------------------------------------------------------------------------------
/guest/nim/program.nim:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/varnish/tinykvm/8e9d2497a2c6f84cb539af00aaa69af146af309e/guest/nim/program.nim
--------------------------------------------------------------------------------
/guest/nim_storage/.gitignore:
--------------------------------------------------------------------------------
1 | *.syms
2 | storage
3 | main
4 | *_nimcache/
5 |
--------------------------------------------------------------------------------
/guest/nim_storage/build.sh:
--------------------------------------------------------------------------------
1 | NIM_LIBS=`whereis nim`
2 | NIM_LIBS="${NIM_LIBS##*: }"
3 | NIM_LIBS="${NIM_LIBS/bin*/lib}"
4 | echo ">>> Nim libs: $NIM_LIBS"
5 |
6 | WARN="-Wno-discarded-qualifiers"
7 |
8 | set -ev
9 | rm -rf $PWD/storage_nimcache
10 | rm -rf $PWD/main_nimcache
11 |
12 | nim c --nimcache:$PWD/storage_nimcache --colors:on --os:linux --mm:arc --noMain --app:lib -d:release -c storage.nim
13 | musl-gcc -static -O2 -DSTORAGE=1 -Wl,-Ttext-segment=0x44000000 $WARN -I$NIM_LIBS main.c storage_nimcache/*.c -o storage
14 |
15 | objcopy -w --extract-symbol --strip-symbol=!remote* --strip-symbol=* storage storage.syms
16 |
17 | nim c --nimcache:$PWD/main_nimcache --colors:on --os:linux --mm:arc --noMain --app:lib -d:release -c main.nim
18 | musl-gcc -static -O2 -Wl,--just-symbols=storage.syms $WARN -I$NIM_LIBS main.c main_nimcache/*.c -o main
19 |
--------------------------------------------------------------------------------
/guest/nim_storage/main.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #define ARCH_SET_FS 0x1002
4 | #define ARCH_GET_FS 0x1003
5 | extern void arch_prctl(unsigned, ...);
6 | extern void NimMain();
7 | extern long write(int, const void*, size_t);
8 | extern void _exit(int);
9 |
10 | static long stored_fs;
11 |
12 | static inline long get_fs()
13 | {
14 | long fs;
15 | arch_prctl(ARCH_GET_FS, &fs);
16 | return fs;
17 | }
18 |
19 | static int safeprint(const char* fmt, ...)
20 | {
21 | char buffer[4096];
22 |
23 | va_list va;
24 | va_start(va, fmt);
25 | int len = vsnprintf(buffer, sizeof(buffer), fmt, va);
26 | va_end(va);
27 |
28 | return write(1, buffer, len);
29 | }
30 |
31 | void restore_fs()
32 | {
33 | // XXX: Don't try to print here. WONT WORK!
34 | long old_fs = get_fs();
35 | arch_prctl(ARCH_SET_FS, stored_fs);
36 | safeprint("Restored FS 0x%lX\n", stored_fs);
37 | stored_fs = old_fs;
38 | }
39 |
40 | void quick_exit(int code)
41 | {
42 | stored_fs = get_fs();
43 | _exit(code);
44 | }
45 |
46 | int main()
47 | {
48 | // Provoke proper stdio
49 | fflush(stdout);
50 | stored_fs = get_fs();
51 |
52 | NimMain();
53 | _exit(0);
54 | }
55 |
--------------------------------------------------------------------------------
/guest/nim_storage/main.nim:
--------------------------------------------------------------------------------
1 | proc quick_exit(code: int) {.importc.}
2 | proc remote_calc(v: int): int {.importc.}
3 | proc remote_string(): string {.importc.}
4 | proc do_calculation() {.cdecl, exportc.}
5 | import json
6 |
7 | var j = %* {
8 | "name": "Hello",
9 | "email": "World",
10 | "books": ["Foundation"]
11 | }
12 |
13 | # Executed by fork of master VM
14 | proc do_calculation() =
15 | echo "Hello Nim World!\n" & j.pretty()
16 | echo "Remote calculation of 21 is " & $remote_calc(21)
17 |
18 | # Executed by master VM
19 | echo "Remote calculation of 21 is " & $remote_calc(21)
20 | echo "Remote string is " & remote_string()
21 | quick_exit(remote_calc(21))
22 |
--------------------------------------------------------------------------------
/guest/nim_storage/storage.nim:
--------------------------------------------------------------------------------
1 | proc quick_exit(code: int) {.importc.}
2 | proc remote_calc(v: int): int {.cdecl, exportc.}
3 | proc remote_string(): string {.cdecl, exportc.}
4 | import json
5 |
6 | var jj = %* {
7 | "name": "Hello",
8 | "email": "World",
9 | "books": ["Foundation"]
10 | }
11 |
12 | proc remote_calc(v: int): int =
13 | echo "Nim calculation!"
14 | return v * 2
15 |
16 | proc remote_string(): string =
17 | return jj.pretty()
18 |
19 | echo "Hello Nim Storage World!\njj: " & jj.pretty()
20 | quick_exit(0)
21 |
--------------------------------------------------------------------------------
/guest/src/api.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include
3 | #include
4 |
5 | extern "C" long syscall(int scall, ...);
6 | extern "C" long native_syscall(int scall, ...);
7 | extern "C" __attribute__((noreturn)) void exit(int code) __THROW;
8 |
9 | #define PUBLIC(x) extern "C" __attribute__((used)) x
10 |
11 |
12 | extern uint32_t crc32c_sse42(const uint8_t* buffer, size_t len);
13 |
14 | inline uint32_t crc32c_sse42(const char* buffer, size_t len) {
15 | return crc32c_sse42((const uint8_t *)buffer, len);
16 | }
17 |
--------------------------------------------------------------------------------
/guest/src/crc32c.cpp:
--------------------------------------------------------------------------------
1 | #include "api.hpp"
2 |
3 | #include
4 |
5 | inline bool ____is__aligned(const void* buffer, const int align) noexcept {
6 | return (((uintptr_t) buffer) & (align-1)) == 0;
7 | }
8 |
9 | uint32_t crc32c_sse42(const uint8_t* buffer, size_t len)
10 | {
11 | uint32_t hash = 0xFFFFFFFF;
12 | // 8-bits until 4-byte aligned
13 | while (!____is__aligned(buffer, 4) && len > 0) {
14 | hash = _mm_crc32_u8(hash, *buffer); buffer++; len--;
15 | }
16 | // 16 bytes at a time
17 | while (len >= 16) {
18 | hash = _mm_crc32_u32(hash, *(uint32_t*) (buffer + 0));
19 | hash = _mm_crc32_u32(hash, *(uint32_t*) (buffer + 4));
20 | hash = _mm_crc32_u32(hash, *(uint32_t*) (buffer + 8));
21 | hash = _mm_crc32_u32(hash, *(uint32_t*) (buffer + 12));
22 | buffer += 16; len -= 16;
23 | }
24 | // 4 bytes at a time
25 | while (len >= 4) {
26 | hash = _mm_crc32_u32(hash, *(uint32_t*) buffer);
27 | buffer += 4; len -= 4;
28 | }
29 | // remaining bytes
30 | if (len & 2) {
31 | hash = _mm_crc32_u16(hash, *(uint16_t*) buffer);
32 | buffer += 2;
33 | }
34 | if (len & 1) {
35 | hash = _mm_crc32_u8(hash, *buffer);
36 | }
37 | return hash ^ 0xFFFFFFFF;
38 | }
39 |
--------------------------------------------------------------------------------
/guest/src/guest.cpp:
--------------------------------------------------------------------------------
1 | #include "api.hpp"
2 |
3 | size_t strlen(const char *str)
4 | {
5 | const char *s = str;
6 | while (*s) s++;
7 | return s - str;
8 | }
9 |
10 | inline void kprint(const char* string, size_t len) {
11 | syscall(1, string, len);
12 | }
13 | inline void kprint(const char* string) {
14 | kprint(string, strlen(string));
15 | }
16 |
17 | int main(int argc, char** argv)
18 | {
19 | /* for (int i = 0; i < argc; i++) {
20 | kprint(argv[i]);
21 | }*/
22 |
23 | //asm("hlt");
24 | //syscall(158, 0x1003, 0x5678);
25 | //native_syscall(158, 0x1003, 0x5678);
26 |
27 | return 0x123;
28 | }
29 |
30 | struct Data {
31 | char buffer[128];
32 | size_t len;
33 | };
34 |
35 | #include
36 | PUBLIC(uint32_t empty(const Data& data))
37 | {
38 | volatile __m256i xmm0;
39 | xmm0 = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
40 |
41 | kprint(data.buffer, data.len);
42 |
43 | return crc32c_sse42(data.buffer, data.len);;
44 | }
45 |
--------------------------------------------------------------------------------
/guest/src/start.cpp:
--------------------------------------------------------------------------------
1 | #include "api.hpp"
2 |
3 | asm(".global syscall\n"
4 | "syscall:\n"
5 | " add $0xffffa000, %edi\n"
6 | " movl $0, (%rdi)\n"
7 | " ret\n");
8 |
9 | asm(".global native_syscall\n"
10 | "native_syscall:\n"
11 | " mov %rdi, %rax\n"
12 | " mov %rsi, %rdi\n"
13 | " mov %rdx, %rsi\n"
14 | " mov %rcx, %rdx\n"
15 | " syscall\n"
16 | " ret\n");
17 |
18 | asm(".global rexit\n"
19 | "rexit:\n"
20 | " mov %rax, %rdi\n"
21 | " out %ax, $60\n");
22 |
23 | asm(".global _start\n"
24 | "_start:\n"
25 | " xor %ebp, %ebp\n"
26 | " pop %rdi\n"
27 | " mov %rsp, %rsi\n"
28 | " call libc_start\n"
29 | " jmp rexit\n");
30 |
31 |
32 | extern int main(int, char**);
33 |
34 | extern "C"
35 | int libc_start(int argc, char** argv)
36 | {
37 | /* Global constructors */
38 | extern void(*__init_array_start [])();
39 | extern void(*__init_array_end [])();
40 | const int count = __init_array_end - __init_array_start;
41 | for (int i = 0; i < count; i++) {
42 | __init_array_start[i]();
43 | }
44 |
45 | return main(argc, argv);
46 | }
47 |
48 | extern "C" __attribute__((noreturn)) void exit(int code) __THROW {
49 | syscall(0, code);
50 | __builtin_unreachable();
51 | }
52 |
--------------------------------------------------------------------------------
/guest/storage/.gitignore:
--------------------------------------------------------------------------------
1 | *.syms
2 | storage
3 | main
4 |
--------------------------------------------------------------------------------
/guest/storage/build.sh:
--------------------------------------------------------------------------------
1 | set -v
2 | gcc-12 -static -O2 -Wl,-Ttext-segment=0x44000000 storage.c -o storage
3 |
4 | objcopy -w --extract-symbol --strip-symbol=!remote* --strip-symbol=* storage storage.syms
5 | gcc-12 -static -O2 -Wl,--just-symbols=storage.syms main.c -o main
6 |
--------------------------------------------------------------------------------
/guest/storage/main.c:
--------------------------------------------------------------------------------
1 | #include
2 | extern int remote_function(int(*callback)(int), int value);
3 |
4 | static int double_int(int value)
5 | {
6 | return value * 2;
7 | }
8 |
9 | int main()
10 | {
11 | printf("Jumping to %p\n", &remote_function);
12 | fflush(stdout);
13 | return remote_function(double_int, 21);
14 | }
15 |
16 | int do_calculation(int value)
17 | {
18 | return remote_function(double_int, value);
19 | }
20 |
21 | int simple_calculation(int value)
22 | {
23 | return value;
24 | }
25 |
--------------------------------------------------------------------------------
/guest/storage/storage.c:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | extern int remote_function(int (*arg)(int), int value)
4 | {
5 | return arg(value);
6 | }
7 |
8 | int main()
9 | {
10 | printf("Hello from Storage!\n");
11 | return 0;
12 | }
13 |
--------------------------------------------------------------------------------
/guest/tests/build.sh:
--------------------------------------------------------------------------------
1 | musl-gcc -static -O0 -ggdb3 test.c -o musl_test
2 | gcc -static -O0 -ggdb3 test.c -o glibc_test
3 | #g++ -static -O2 cxx_test.cpp -o cxx_test
4 |
--------------------------------------------------------------------------------
/guest/tests/cxx_test.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include
6 |
7 | static long nprimes = 0;
8 |
9 | int main(int argc, char** argv)
10 | {
11 | char* test = (char *)malloc(14);
12 | strcpy(test, argv[1]);
13 | printf("%.*s\n", 13, test);
14 | free(test);
15 |
16 | static const int N = 1000000;
17 | char prime[N];
18 | memset(prime, 1, sizeof(prime));
19 | for (long n = 2; n < N; n++)
20 | {
21 | if (prime[n]) {
22 | nprimes += 1;
23 | for (long i = n*n; i < N; i += n)
24 | prime[i] = 0;
25 | }
26 | }
27 | return 666;
28 | }
29 |
30 | extern "C" __attribute__((used))
31 | int test_return()
32 | {
33 | return 666;
34 | }
35 |
36 | extern "C" __attribute__((used))
37 | void test_ud2()
38 | {
39 | asm("ud2");
40 | }
41 |
42 | extern "C" __attribute__((used))
43 | int test_read()
44 | {
45 | assert(nprimes == 78498);
46 | return 200;
47 | }
48 |
49 | static int t = 0;
50 |
51 | extern "C" __attribute__((used))
52 | void test_write()
53 | {
54 | asm("" ::: "memory");
55 | assert(t == 0);
56 | asm("" ::: "memory");
57 | t = 1;
58 | asm("" ::: "memory");
59 | assert(t == 1);
60 | }
61 |
62 | static int cow = 0;
63 |
64 | extern "C" __attribute__((used))
65 | int test_copy_on_write()
66 | {
67 | assert(cow == 0);
68 | cow = 1;
69 | return 666;
70 | }
71 |
72 | extern "C" __attribute__((used))
73 | long test_syscall()
74 | {
75 | register long status asm("rdi") = 555;
76 | long ret = 60;
77 | asm("syscall" : "+a"(ret) : "r"(status) : "rcx", "r11", "memory");
78 | return ret;
79 | }
80 |
81 | extern "C" __attribute__((used))
82 | long test_malloc()
83 | {
84 | int *p = (int *)malloc(1024 * 1024 * 1);
85 | *p = 44;
86 | return (long)p;
87 | }
88 |
89 |
90 | #include
91 | #include
92 | #include
93 | #include "/home/gonzo/git/vmprograms/examples/lodepng/lodepng.h"
94 |
95 | inline constexpr uint32_t bgr24(uint32_t r, uint32_t g, uint32_t b) {
96 | return r | (g << 8) | (b << 16) | (255 << 24);
97 | }
98 |
99 | static constexpr std::array color_mapping {
100 | bgr24(66, 30, 15),
101 | bgr24(25, 7, 26),
102 | bgr24(9, 1, 47),
103 | bgr24(4, 4, 73),
104 | bgr24(0, 7, 100),
105 | bgr24(12, 44, 138),
106 | bgr24(24, 82, 177),
107 | bgr24(57, 125, 209),
108 | bgr24(134, 181, 229),
109 | bgr24(211, 236, 248),
110 | bgr24(241, 233, 191),
111 | bgr24(248, 201, 95),
112 | bgr24(255, 170, 0),
113 | bgr24(204, 128, 0),
114 | bgr24(153, 87, 0),
115 | bgr24(106, 52, 3),
116 | };
117 |
118 | inline void encode_color(uint32_t& px, int count, int max_count)
119 | {
120 | px = color_mapping[count & 15];
121 | }
122 |
123 | using fractalf_t = float;
124 |
125 | // Function to draw mandelbrot set
126 | template
127 | __attribute__((optimize("unroll-loops")))
128 | std::array
129 | fractal(fractalf_t left, fractalf_t top, fractalf_t xside, fractalf_t yside)
130 | {
131 | std::array bitmap {};
132 |
133 | // setting up the xscale and yscale
134 | const fractalf_t xscale = xside / DimX;
135 | const fractalf_t yscale = yside / DimY;
136 |
137 | // scanning every point in that rectangular area.
138 | // Each point represents a Complex number (x + yi).
139 | // Iterate that complex number
140 | for (int y = 0; y < DimY / 2; y++)
141 | #pragma GCC unroll(8)
142 | for (int x = 0; x < DimX; x++)
143 | {
144 | fractalf_t c_real = x * xscale + left;
145 | fractalf_t c_imag = y * yscale + top;
146 | fractalf_t z_real = 0;
147 | fractalf_t z_imag = 0;
148 | int count = 0;
149 |
150 | // Calculate whether c(c_real + c_imag) belongs
151 | // to the Mandelbrot set or not and draw a pixel
152 | // at coordinates (x, y) accordingly
153 | // If you reach the Maximum number of iterations
154 | // and If the distance from the origin is
155 | // greater than 2 exit the loop
156 | #pragma GCC unroll 4
157 | while ((z_real * z_real + z_imag * z_imag < 4)
158 | && (count < MaxCount))
159 | {
160 | // Calculate Mandelbrot function
161 | // z = z*z + c where z is a complex number
162 | fractalf_t tempx =
163 | z_real * z_real - z_imag * z_imag + c_real;
164 | z_imag = 2 * z_real * z_imag + c_imag;
165 | z_real = tempx;
166 | count++;
167 | }
168 |
169 | encode_color(bitmap[x + y * DimX], count, MaxCount);
170 | }
171 | for (int y = 0; y < DimY / 2; y++) {
172 | memcpy(&bitmap[(DimY-1 - y) * DimX], &bitmap[y * DimX], 4 * DimX);
173 | }
174 | return bitmap;
175 | }
176 |
177 | asm(".global backend_response\n" \
178 | ".type backend_response, function\n" \
179 | "backend_response:\n" \
180 | " mov $0xFFFF, %eax\n" \
181 | " out %eax, $0\n");
182 |
183 | extern "C" void __attribute__((noreturn))
184 | backend_response(const void *t, uint64_t, const void *c, uint64_t);
185 |
186 | extern "C" __attribute__((used))
187 | long test_expensive()
188 | {
189 | constexpr int counter = 0;
190 | constexpr size_t width = 512;
191 | constexpr size_t height = 512;
192 |
193 | const fractalf_t factor = powf(2.0, counter * -0.1);
194 | const fractalf_t x1 = -1.5;
195 | const fractalf_t x2 = 2.0 * factor;
196 | const fractalf_t y1 = -1.0 * factor;
197 | const fractalf_t y2 = 2.0 * factor;
198 |
199 | auto bitmap = fractal (x1, y1, x2, y2);
200 | auto* data = (const uint8_t *)bitmap.data();
201 |
202 | std::vector png;
203 | lodepng::encode(png, data, width, height);
204 |
205 | const char ctype[] = "image/png";
206 | backend_response(ctype, sizeof(ctype)-1, png.data(), png.size());
207 | }
208 |
--------------------------------------------------------------------------------
/guest/tests/debug.sh:
--------------------------------------------------------------------------------
1 | gdb -x remote.gdb
2 |
--------------------------------------------------------------------------------
/guest/tests/remote.gdb:
--------------------------------------------------------------------------------
1 | file glibc_test
2 | target remote localhost:2159
3 | layout next
4 | layout next
5 | #set debug remote 1
6 | break test.c:32
7 | cont
8 |
--------------------------------------------------------------------------------
/guest/tests/test.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include
6 |
7 | static long nprimes = 0;
8 |
9 | int main(int argc, char** argv)
10 | {
11 | char* test = (char *)malloc(14);
12 | strcpy(test, argv[1]);
13 | printf("%.*s\n", 13, test);
14 | free(test);
15 |
16 | static const int N = 1000000;
17 | char prime[N];
18 | memset(prime, 1, sizeof(prime));
19 | for (long n = 2; n < N; n++)
20 | {
21 | if (prime[n]) {
22 | nprimes += 1;
23 | for (long i = n*n; i < N; i += n)
24 | prime[i] = 0;
25 | }
26 | }
27 | return 666;
28 | }
29 |
30 | __attribute__((used))
31 | int test_return()
32 | {
33 | return 666;
34 | }
35 |
36 | __attribute__((used))
37 | void test_ud2()
38 | {
39 | asm("ud2");
40 | }
41 |
42 | __attribute__((used))
43 | int test_read()
44 | {
45 | assert(nprimes == 78498);
46 | return 200;
47 | }
48 |
49 | static int t = 0;
50 |
51 | __attribute__((used))
52 | void test_write()
53 | {
54 | asm("" ::: "memory");
55 | assert(t == 0);
56 | asm("" ::: "memory");
57 | t = 1;
58 | asm("" ::: "memory");
59 | assert(t == 1);
60 | }
61 |
62 | static int cow = 0;
63 |
64 | __attribute__((used))
65 | int test_copy_on_write()
66 | {
67 | assert(cow == 0);
68 | cow = 1;
69 | return 666;
70 | }
71 |
72 | __attribute__((used))
73 | long test_syscall()
74 | {
75 | register long status asm("rdi") = 555;
76 | long ret = 60;
77 | asm("syscall" : "+a"(ret) : "r"(status) : "rcx", "r11", "memory");
78 | return ret;
79 | }
80 |
81 | __attribute__((used))
82 | long test_malloc()
83 | {
84 |
85 | int* p = (int *)malloc(4);
86 |
87 | return (uintptr_t) p;
88 | }
89 |
90 |
91 | __attribute__((used))
92 | int write_value(int value)
93 | {
94 | cow = value;
95 | return value;
96 | }
97 | __attribute__((used))
98 | int test_is_value(int value)
99 | {
100 | assert(cow == value);
101 | return 666;
102 | }
103 |
104 | __attribute__((used))
105 | int test_loop()
106 | {
107 | while(1);
108 | }
109 |
110 | asm(".global vcpuid\n"
111 | ".type vcpuid, @function\n"
112 | "vcpuid:\n"
113 | " mov %gs:(0x0), %eax\n"
114 | " ret\n");
115 | extern int vcpuid();
116 |
117 | __attribute__((used))
118 | int test_vcpu()
119 | {
120 | return vcpuid();
121 | }
122 |
--------------------------------------------------------------------------------
/lib/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.10)
2 | project(tinykvm)
3 | #
4 | # C++17 KVM library
5 | #
6 |
7 | option(KVM_EXPERIMENTAL "Enable experimental features" OFF)
8 |
9 | if (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)")
10 | set (TINYKVM_ARCH "AMD64" CACHE STRING "TinyKVM Arch Backend")
11 | else()
12 | # TODO: Detect ARM properly
13 | set (TINYKVM_ARCH "ARM64" CACHE STRING "TinyKVM Arch Backend")
14 | endif()
15 |
16 | set (SOURCES
17 | tinykvm/machine.cpp
18 | tinykvm/machine_elf.cpp
19 | tinykvm/machine_env.cpp
20 | tinykvm/machine_utils.cpp
21 | tinykvm/memory.cpp
22 | tinykvm/memory_bank.cpp
23 | tinykvm/memory_maps.cpp
24 | tinykvm/page_streaming.cpp
25 | tinykvm/remote.cpp
26 | tinykvm/smp.cpp
27 | tinykvm/vcpu.cpp
28 | tinykvm/vcpu_run.cpp
29 |
30 | tinykvm/linux/fds.cpp
31 | tinykvm/linux/signals.cpp
32 | tinykvm/linux/system_calls.cpp
33 | tinykvm/linux/threads.cpp
34 | )
35 | if (TINYKVM_ARCH STREQUAL "AMD64")
36 | list(APPEND SOURCES
37 | tinykvm/amd64/gdt.cpp
38 | tinykvm/amd64/idt.cpp
39 | tinykvm/amd64/tss.cpp
40 | tinykvm/amd64/paging.cpp
41 | tinykvm/amd64/usercode.cpp
42 | tinykvm/amd64/vdso.cpp
43 | tinykvm/rsp_client.cpp
44 | )
45 | endif()
46 |
47 | add_library(tinykvm STATIC ${SOURCES})
48 | target_compile_definitions(tinykvm PUBLIC _GNU_SOURCE=1)
49 | target_include_directories(tinykvm PUBLIC .)
50 | target_compile_features(tinykvm PUBLIC cxx_std_20)
51 | target_link_libraries(tinykvm PUBLIC pthread rt)
52 |
53 | set_source_files_properties(
54 | tinykvm/page_streaming.cpp
55 | PROPERTIES COMPILE_FLAGS -mavx2)
56 |
57 | if (CMAKE_BUILD_TYPE STREQUAL "Debug")
58 | target_compile_options(tinykvm PUBLIC -O0 -ggdb3)
59 | else()
60 | target_compile_options(tinykvm PUBLIC -O2 -g)
61 | endif()
62 | if (KVM_EXPERIMENTAL)
63 | target_compile_definitions(tinykvm PUBLIC TINYKVM_FAST_EXECUTION_TIMEOUT=1)
64 | endif()
65 |
--------------------------------------------------------------------------------
/lib/tinykvm/amd64/amd64.hpp:
--------------------------------------------------------------------------------
1 | #ifndef PAGE_SIZE
2 | #define PAGE_SIZE 4096
3 | #endif
4 |
5 | /* CR0 bits */
6 | #define CR0_PE 1u
7 | #define CR0_MP (1U << 1)
8 | #define CR0_EM (1U << 2)
9 | #define CR0_TS (1U << 3)
10 | #define CR0_ET (1U << 4)
11 | #define CR0_NE (1U << 5)
12 | #define CR0_WP (1U << 16)
13 | #define CR0_AM (1U << 18)
14 | #define CR0_NW (1U << 29)
15 | #define CR0_CD (1U << 30)
16 | #define CR0_PG (1U << 31)
17 |
18 | /* CR4 bits */
19 | #define CR4_VME 1
20 | #define CR4_PVI (1U << 1)
21 | #define CR4_TSD (1U << 2)
22 | #define CR4_DE (1U << 3)
23 | #define CR4_PSE (1U << 4)
24 | #define CR4_PAE (1U << 5)
25 | #define CR4_MCE (1U << 6)
26 | #define CR4_PGE (1U << 7)
27 | #define CR4_PCE (1U << 8)
28 | #define CR4_OSFXSR (1U << 9)
29 | #define CR4_OSXMMEXCPT (1U << 10)
30 | #define CR4_UMIP (1U << 11)
31 | #define CR4_VMXE (1U << 13)
32 | #define CR4_SMXE (1U << 14)
33 | #define CR4_FSGSBASE (1U << 16)
34 | #define CR4_PCIDE (1U << 17)
35 | #define CR4_OSXSAVE (1U << 18)
36 | #define CR4_SMEP (1U << 20)
37 | #define CR4_SMAP (1U << 21)
38 | #define CR4_CET (1U << 23)
39 |
40 | #define EFER_SCE 1
41 | #define EFER_LME (1U << 8)
42 | #define EFER_LMA (1U << 10)
43 | #define EFER_NXE (1U << 11)
44 |
45 | /* 64-bit page * entry bits */
46 | #define PDE64_PRESENT 1UL
47 | #define PDE64_RW (1UL << 1)
48 | #define PDE64_USER (1UL << 2)
49 | #define PDE64_WRITE_THROUGH (1UL << 3)
50 | #define PDE64_CACHE_DISABLE (1UL << 4)
51 | #define PDE64_ACCESSED (1UL << 5)
52 | #define PDE64_DIRTY (1UL << 6)
53 | #define PDE64_PS (1UL << 7)
54 | #define PDE64_G (1UL << 8)
55 | #define PDE64_NX (1UL << 63)
56 |
57 | #define PDE64_PDPT_SIZE (1ULL << 39)
58 | #define PDE64_PD_SIZE (1ULL << 30)
59 | #define PDE64_PT_SIZE (1ULL << 21)
60 | #define PDE64_PTE_SIZE (1ULL << 12)
61 |
62 |
63 | #define AMD64_MSR_STAR 0xC0000081
64 | #define AMD64_MSR_LSTAR 0xC0000082
65 |
66 | #define AMD64_MSR_FS_BASE 0xC0000100
67 | #define AMD64_MSR_GS_BASE 0xC0000101
68 |
--------------------------------------------------------------------------------
/lib/tinykvm/amd64/builtin/.gitignore:
--------------------------------------------------------------------------------
1 | interrupts
2 | vsyscall
3 | usercode
4 |
--------------------------------------------------------------------------------
/lib/tinykvm/amd64/builtin/assembly.sh:
--------------------------------------------------------------------------------
1 | nasm -f bin -o interrupts interrupts.asm
2 | xxd -i interrupts > kernel_assembly.h
3 |
--------------------------------------------------------------------------------
/lib/tinykvm/amd64/builtin/disassembly.sh:
--------------------------------------------------------------------------------
1 | objdump -b binary -mi386:x86-64 -D interrupts
2 |
--------------------------------------------------------------------------------
/lib/tinykvm/amd64/builtin/interrupts.asm:
--------------------------------------------------------------------------------
1 | [BITS 64]
2 | global vm64_exception
3 |
4 | ;; CPU exception frame:
5 | ;; 1. stack rsp+32
6 | ;; 2. rflags rsp+24
7 | ;; 3. cs rsp+16
8 | ;; 4. rip rsp+8
9 | ;; 5. code rsp+0
10 | %macro CPU_EXCEPT 1
11 | ALIGN 0x8
12 | out 128 + %1, ax
13 | iretq
14 | %endmacro
15 | %macro CPU_EXCEPT_CODE 1
16 | ALIGN 0x8
17 | out 128 + %1, ax
18 | jmp .vm64_pop_code
19 | %endmacro
20 | %macro CPU_EXCEPT_PF 1
21 | ALIGN 0x8
22 | jmp .vm64_page_fault
23 | %endmacro
24 |
25 | dw .vm64_syscall
26 | dw .vm64_gettimeofday
27 | dw .vm64_exception
28 | dw .vm64_except1 - .vm64_exception
29 | dw .vm64_dso
30 |
31 | ALIGN 0x10
32 | .vm64_syscall:
33 | cmp ax, 158 ;; PRCTL
34 | je .vm64_prctl
35 | cmp ax, 228 ;; CLOCK_GETTIME
36 | je .vm64_clock_gettime
37 | cmp eax, 0x1F777 ;; ENTRY SYSCALL
38 | je .vm64_entrycall
39 | cmp eax, 0x1F707 ;; REENTRY SYSCALL
40 | je .vm64_reentrycall
41 | out 0, eax
42 | o64 sysret
43 |
44 | .vm64_prctl:
45 | stac
46 | push rsi
47 | push rcx
48 | push rdx
49 | cmp rdi, 0x1002 ;; PRCTL: SET_FS
50 | jne .vm64_prctl_get
51 | ;; SET_FS := rsi
52 | mov ecx, 0xC0000100 ;; FSBASE
53 | mov eax, esi ;; low-32 FS base
54 | shr rsi, 32
55 | mov edx, esi ;; high-32 FS base
56 | wrmsr
57 | xor rax, rax ;; return 0
58 | .vm64_prctl_end:
59 | pop rdx
60 | pop rcx
61 | pop rsi
62 | clac
63 | o64 sysret
64 | .vm64_prctl_get:
65 | cmp rdi, 0x1003 ;; PRCTL: GET_FS
66 | jne .vm64_prctl_trap
67 | ;; GET_FS [rsi] := FSBASE
68 | mov ecx, 0xC0000100 ;; FSBASE
69 | rdmsr
70 | shl rdx, 32 ;; lift high-32 FS base
71 | or rdx, rax ;; low-32 FS base
72 | mov [rsi], rax
73 | xor rax, rax ;; return 0
74 | jmp .vm64_prctl_end
75 |
76 | .vm64_prctl_trap:
77 | ;; PRCTL fallback to host syscall trap
78 | out 0, ax
79 | jmp .vm64_prctl_end
80 |
81 | .read_system_time:
82 | push rbx
83 | push rcx
84 | push rdx
85 | ;; Check if the system time MSR has already been set
86 | mov rax, [0x3030] ;; system-time nanoseconds
87 | ;; If the system time is zero, we need to set it
88 | test rax, rax
89 | jnz .system_time_already_set
90 | ;; 0x4b564d01 MSR_KVM_SYSTEM_TIME_NEW
91 | mov ecx, 0x4b564d01 ;; MSR_KVM_SYSTEM_TIME_NEW
92 | mov eax, 0x3021 ;; data
93 | mov edx, 0 ;; zero high-32 bits
94 | wrmsr
95 | .system_time_already_set:
96 | ;; Read TSC
97 | rdtsc
98 | ;; Add EDX to RAX for full 64-bit TSC value
99 | shl rdx, 32
100 | or rax, rdx
101 | ;; Calculate the system time in nanoseconds
102 | ;; time = (current_tsc - tsc_timestamp)
103 | ;; if (tsc_shift >= 0)
104 | ;; time <<= tsc_shift;
105 | ;; else
106 | ;; time >>= -tsc_shift;
107 | ;; time = (time * tsc_to_system_mul) >> 32
108 | ;; time = time + system_time
109 | mov rdx, [0x3028] ;; tsc_timestamp
110 | sub rax, rdx ;; current_tsc - tsc_timestamp
111 | ;; Check if tsc_shift is negative
112 | ;; Load 8-bit signed value from system-time
113 | mov cl, [0x3030 + 28] ;; tsc_shift
114 | ;; Left shift (assumes tsc_shift >= 0)
115 | test cl, cl
116 | js .system_time_neg_tsc_shift
117 | ;; If tsc_shift is positive, shift left
118 | shl rax, cl ;; rax = rax << tsc_shift
119 | jmp .system_time_tsc_shift_done
120 | .system_time_neg_tsc_shift:
121 | ;; If tsc_shift is negative, shift right
122 | neg ecx
123 | shr rax, cl ;; rax = rax >> -tsc_shift
124 | .system_time_tsc_shift_done:
125 | ;; Multiply by tsc_to_system_mul
126 | mov ecx, [0x3038] ;; tsc_to_system_mul
127 | mul rcx ;; into RAX:RDX
128 | ;; Right shift by 32 bits
129 | shr rax, 32
130 | ;; Add the system time base
131 | mov rdx, [0x3030 + 16] ;; system_time_base
132 | add rax, rdx ;; time = time + system_time_base
133 |
134 | ;; Test version is even
135 | mov ebx, [0x3030] ;; version
136 | and ebx, 1
137 | ;;jnp .system_time_already_set ;; read again
138 |
139 | pop rdx
140 | pop rcx
141 | pop rbx
142 | ret
143 |
144 | .read_wall_clock:
145 | push rbx
146 | push rcx
147 | push rdx
148 | ;; Check if the wall clock MSR has already been set
149 | mov eax, [0x3004] ;; seconds since epoch
150 | test eax, eax
151 | jnz .wall_clock_already_set
152 | ;; Read the PV clock MSR
153 | mov ecx, 0x4b564d00 ;; MSR_KVM_WALL_CLOCK_NEW
154 | mov eax, 0x3000 ;; data
155 | mov edx, 0 ;; zero high-32 bits
156 | wrmsr
157 | .wall_clock_already_set:
158 | ;; Read the wall clock
159 | mov eax, DWORD [0x3004] ;; sec
160 | mov ecx, DWORD [0x3008] ;; nsec
161 | ;; Convert to nanoseconds
162 | mov rbx, 1000000000 ;; 1e9
163 | mov rdx, 0 ;; clear rdx
164 | mul rbx ;; rax = sec * 1e9
165 | add rax, rcx ;; rax = sec * 1e9 + nsec
166 | pop rdx
167 | pop rcx
168 | pop rbx
169 | ret
170 |
171 | .vm64_clock_gettime:
172 | ;; rdi = clockid
173 | ;; rsi = timespec
174 | stac
175 | push rbx
176 | push rcx
177 | push rdx
178 | ;; Verify that destination is at least 0x100000
179 | cmp rsi, 0x100000
180 | jb .vm64_clock_gettime_error
181 | ;; Get system time into rax
182 | call .read_system_time
183 | ;; If clockid is CLOCK_MONOTONIC, we are done
184 | test rdi, rdi
185 | jnz .finish_up_clock_gettime
186 | ;; If clockid is CLOCK_REALTIME, we need to add
187 | ;; the wall clock time from system time
188 | call .read_wall_clock
189 | .finish_up_clock_gettime:
190 | ;; RAX now contains the clock time in nanoseconds
191 | ;; Split RAX into seconds and nanoseconds
192 | mov rdx, 0 ;;
193 | mov rbx, 1000000000 ;; 1e9
194 | div rbx ;; rax = seconds, rdx = clock_time % 1e9
195 | ;; Store to guest timespec
196 | mov [rsi], rax ;; Store tv_sec
197 | mov [rsi + 8], rdx ;; Store tv_nsec
198 | ;; Restore registers
199 | pop rdx
200 | pop rcx
201 | pop rbx
202 | clac
203 | ;; Return to the caller
204 | xor eax, eax
205 | o64 sysret
206 | .vm64_clock_gettime_error:
207 | mov rax, -14 ;; EFAULT
208 | o64 sysret
209 |
210 | .vm64_gettimeofday:
211 | mov eax, 96 ;; gettimeofday
212 | out 0, ax
213 | ret
214 |
215 | .vm64_dso:
216 | mov eax, .vm64_gettimeofday
217 | ret
218 |
219 | .vm64_entrycall:
220 | ;; Reset pagetables
221 | mov rax, cr3
222 | mov cr3, rax
223 | o64 sysret
224 |
225 | .vm64_reentrycall:
226 | o64 sysret
227 |
228 | .vm64_page_fault:
229 | push rdi
230 | mov rdi, cr2
231 | out 128 + 14, ax
232 | invlpg [rdi]
233 | pop rdi
234 |
235 | .vm64_pop_code:
236 | add rsp, 8
237 | iretq
238 |
239 | .vm64_timeout:
240 | out 128 + 33, ax
241 | iretq
242 |
243 | ALIGN 0x8
244 | .vm64_exception:
245 | CPU_EXCEPT 0
246 | ALIGN 0x8
247 | .vm64_except1:
248 | CPU_EXCEPT 1
249 | CPU_EXCEPT 2
250 | CPU_EXCEPT 3
251 | CPU_EXCEPT 4
252 | CPU_EXCEPT 5
253 | CPU_EXCEPT 6
254 | CPU_EXCEPT 7
255 | CPU_EXCEPT_CODE 8 ;; double fault
256 | CPU_EXCEPT 9
257 | CPU_EXCEPT_CODE 10
258 | CPU_EXCEPT_CODE 11
259 | CPU_EXCEPT_CODE 12
260 | CPU_EXCEPT_CODE 13
261 | CPU_EXCEPT_PF 14
262 | CPU_EXCEPT 15
263 | CPU_EXCEPT 16
264 | CPU_EXCEPT_CODE 17
265 | CPU_EXCEPT 18
266 | CPU_EXCEPT 19
267 | CPU_EXCEPT 20
268 | ALIGN 0x8 ;; timer interrupt
269 | jmp .vm64_timeout
270 |
--------------------------------------------------------------------------------
/lib/tinykvm/amd64/builtin/kernel_assembly.h:
--------------------------------------------------------------------------------
1 | unsigned char interrupts[] = {
2 | 0x10, 0x00, 0x82, 0x01, 0xb8, 0x01, 0x08, 0x00, 0x8b, 0x01, 0x90, 0x90,
3 | 0x90, 0x90, 0x90, 0x90, 0x66, 0x3d, 0x9e, 0x00, 0x74, 0x25, 0x66, 0x3d,
4 | 0xe4, 0x00, 0x0f, 0x84, 0x1b, 0x01, 0x00, 0x00, 0x3d, 0x77, 0xf7, 0x01,
5 | 0x00, 0x0f, 0x84, 0x66, 0x01, 0x00, 0x00, 0x3d, 0x07, 0xf7, 0x01, 0x00,
6 | 0x0f, 0x84, 0x64, 0x01, 0x00, 0x00, 0xe7, 0x00, 0x48, 0x0f, 0x07, 0x0f,
7 | 0x01, 0xcb, 0x56, 0x51, 0x52, 0x48, 0x81, 0xff, 0x02, 0x10, 0x00, 0x00,
8 | 0x75, 0x1b, 0xb9, 0x00, 0x01, 0x00, 0xc0, 0x89, 0xf0, 0x48, 0xc1, 0xee,
9 | 0x20, 0x89, 0xf2, 0x0f, 0x30, 0x48, 0x31, 0xc0, 0x5a, 0x59, 0x5e, 0x0f,
10 | 0x01, 0xca, 0x48, 0x0f, 0x07, 0x48, 0x81, 0xff, 0x03, 0x10, 0x00, 0x00,
11 | 0x75, 0x16, 0xb9, 0x00, 0x01, 0x00, 0xc0, 0x0f, 0x32, 0x48, 0xc1, 0xe2,
12 | 0x20, 0x48, 0x09, 0xc2, 0x48, 0x89, 0x06, 0x48, 0x31, 0xc0, 0xeb, 0xd8,
13 | 0x66, 0xe7, 0x00, 0xeb, 0xd3, 0x53, 0x51, 0x52, 0x48, 0x8b, 0x04, 0x25,
14 | 0x30, 0x30, 0x00, 0x00, 0x48, 0x85, 0xc0, 0x75, 0x11, 0xb9, 0x01, 0x4d,
15 | 0x56, 0x4b, 0xb8, 0x21, 0x30, 0x00, 0x00, 0xba, 0x00, 0x00, 0x00, 0x00,
16 | 0x0f, 0x30, 0x0f, 0x31, 0x48, 0xc1, 0xe2, 0x20, 0x48, 0x09, 0xd0, 0x48,
17 | 0x8b, 0x14, 0x25, 0x28, 0x30, 0x00, 0x00, 0x48, 0x29, 0xd0, 0x8a, 0x0c,
18 | 0x25, 0x4c, 0x30, 0x00, 0x00, 0x84, 0xc9, 0x78, 0x05, 0x48, 0xd3, 0xe0,
19 | 0xeb, 0x05, 0xf7, 0xd9, 0x48, 0xd3, 0xe8, 0x8b, 0x0c, 0x25, 0x38, 0x30,
20 | 0x00, 0x00, 0x48, 0xf7, 0xe1, 0x48, 0xc1, 0xe8, 0x20, 0x48, 0x8b, 0x14,
21 | 0x25, 0x40, 0x30, 0x00, 0x00, 0x48, 0x01, 0xd0, 0x8b, 0x1c, 0x25, 0x30,
22 | 0x30, 0x00, 0x00, 0x83, 0xe3, 0x01, 0x5a, 0x59, 0x5b, 0xc3, 0x53, 0x51,
23 | 0x52, 0x8b, 0x04, 0x25, 0x04, 0x30, 0x00, 0x00, 0x85, 0xc0, 0x75, 0x11,
24 | 0xb9, 0x00, 0x4d, 0x56, 0x4b, 0xb8, 0x00, 0x30, 0x00, 0x00, 0xba, 0x00,
25 | 0x00, 0x00, 0x00, 0x0f, 0x30, 0x8b, 0x04, 0x25, 0x04, 0x30, 0x00, 0x00,
26 | 0x8b, 0x0c, 0x25, 0x08, 0x30, 0x00, 0x00, 0xbb, 0x00, 0xca, 0x9a, 0x3b,
27 | 0xba, 0x00, 0x00, 0x00, 0x00, 0x48, 0xf7, 0xe3, 0x48, 0x01, 0xc8, 0x5a,
28 | 0x59, 0x5b, 0xc3, 0x0f, 0x01, 0xcb, 0x53, 0x51, 0x52, 0x48, 0x81, 0xfe,
29 | 0x00, 0x00, 0x10, 0x00, 0x72, 0x2e, 0xe8, 0x3a, 0xff, 0xff, 0xff, 0x48,
30 | 0x85, 0xff, 0x75, 0x05, 0xe8, 0xa1, 0xff, 0xff, 0xff, 0xba, 0x00, 0x00,
31 | 0x00, 0x00, 0xbb, 0x00, 0xca, 0x9a, 0x3b, 0x48, 0xf7, 0xf3, 0x48, 0x89,
32 | 0x06, 0x48, 0x89, 0x56, 0x08, 0x5a, 0x59, 0x5b, 0x0f, 0x01, 0xca, 0x31,
33 | 0xc0, 0x48, 0x0f, 0x07, 0x48, 0xc7, 0xc0, 0xf2, 0xff, 0xff, 0xff, 0x48,
34 | 0x0f, 0x07, 0xb8, 0x60, 0x00, 0x00, 0x00, 0x66, 0xe7, 0x00, 0xc3, 0xb8,
35 | 0x82, 0x01, 0x00, 0x00, 0xc3, 0x0f, 0x20, 0xd8, 0x0f, 0x22, 0xd8, 0x48,
36 | 0x0f, 0x07, 0x48, 0x0f, 0x07, 0x57, 0x0f, 0x20, 0xd7, 0x66, 0xe7, 0x8e,
37 | 0x0f, 0x01, 0x3f, 0x5f, 0x48, 0x83, 0xc4, 0x08, 0x48, 0xcf, 0x66, 0xe7,
38 | 0xa1, 0x48, 0xcf, 0x90, 0x90, 0x90, 0x90, 0x90, 0x66, 0xe7, 0x80, 0x48,
39 | 0xcf, 0x90, 0x90, 0x90, 0x66, 0xe7, 0x81, 0x48, 0xcf, 0x90, 0x90, 0x90,
40 | 0x66, 0xe7, 0x82, 0x48, 0xcf, 0x90, 0x90, 0x90, 0x66, 0xe7, 0x83, 0x48,
41 | 0xcf, 0x90, 0x90, 0x90, 0x66, 0xe7, 0x84, 0x48, 0xcf, 0x90, 0x90, 0x90,
42 | 0x66, 0xe7, 0x85, 0x48, 0xcf, 0x90, 0x90, 0x90, 0x66, 0xe7, 0x86, 0x48,
43 | 0xcf, 0x90, 0x90, 0x90, 0x66, 0xe7, 0x87, 0x48, 0xcf, 0x90, 0x90, 0x90,
44 | 0x66, 0xe7, 0x88, 0xeb, 0xab, 0x90, 0x90, 0x90, 0x66, 0xe7, 0x89, 0x48,
45 | 0xcf, 0x90, 0x90, 0x90, 0x66, 0xe7, 0x8a, 0xeb, 0x9b, 0x90, 0x90, 0x90,
46 | 0x66, 0xe7, 0x8b, 0xeb, 0x93, 0x90, 0x90, 0x90, 0x66, 0xe7, 0x8c, 0xeb,
47 | 0x8b, 0x90, 0x90, 0x90, 0x66, 0xe7, 0x8d, 0xeb, 0x83, 0x90, 0x90, 0x90,
48 | 0xe9, 0x70, 0xff, 0xff, 0xff, 0x90, 0x90, 0x90, 0x66, 0xe7, 0x8f, 0x48,
49 | 0xcf, 0x90, 0x90, 0x90, 0x66, 0xe7, 0x90, 0x48, 0xcf, 0x90, 0x90, 0x90,
50 | 0x66, 0xe7, 0x91, 0xe9, 0x60, 0xff, 0xff, 0xff, 0x66, 0xe7, 0x92, 0x48,
51 | 0xcf, 0x90, 0x90, 0x90, 0x66, 0xe7, 0x93, 0x48, 0xcf, 0x90, 0x90, 0x90,
52 | 0x66, 0xe7, 0x94, 0x48, 0xcf, 0x90, 0x90, 0x90, 0xe9, 0x49, 0xff, 0xff,
53 | 0xff
54 | };
55 | unsigned int interrupts_len = 613;
56 |
--------------------------------------------------------------------------------
/lib/tinykvm/amd64/builtin/usercode.asm:
--------------------------------------------------------------------------------
1 | [BITS 64]
2 |
3 | dw .vm64_entry
4 | dw .vm64_rexit
5 | dw .vm64_preserving_entry
6 | dw 0
7 | dd .vm64_cpuid
8 |
9 | ALIGN 0x10
10 | ;; The entry function, jumps to real function
11 | .vm64_entry:
12 | ;; Execute a pagetable flushing system call that
13 | ;; ensures that even if we are entering in kernel mode,
14 | ;; we are calling the user function in usermode.
15 | ;; This cannot realistically be improved upon.
16 | mov r13, rcx
17 | mov rax, 0x1F777
18 | syscall
19 | mov rcx, r13
20 | jmp r15
21 | ;; The exit function (pre-written to stack)
22 | .vm64_rexit:
23 | mov rdi, rax
24 | .vm64_rexit_retry:
25 | mov eax, 0xFFFF
26 | out 0, eax
27 | jmp .vm64_rexit_retry
28 | .vm64_preserving_entry:
29 | ;; This is the entry point for a paused VM where
30 | ;; its in the middle of a user program, so every
31 | ;; register must be preserved. We need to flush
32 | ;; the pagetables to ensure the guest can see the
33 | ;; correct memory. Since the guest is potentially
34 | ;; blind here, the host has pushed the registers
35 | ;; necessary to perform the syscall safely.
36 | mov rax, 0x1F777
37 | syscall
38 | pop r11 ;; used by syscall for rflags
39 | pop rcx ;; used by syscall for rip
40 | pop rax
41 | ;; With the registers restored, we can now
42 | ;; return to the guest program.
43 | ret
44 |
45 |
46 | %macro vcputable 1
47 | dd %1
48 | dd 0
49 | dd 0
50 | dd 0
51 | %endmacro
52 |
53 | ALIGN 0x8
54 | .vm64_cpuid:
55 | vcputable 0
56 | vcputable 1
57 | vcputable 2
58 | vcputable 3
59 | vcputable 4
60 | vcputable 5
61 | vcputable 6
62 | vcputable 7
63 | vcputable 8
64 | vcputable 9
65 | vcputable 10
66 | vcputable 11
67 | vcputable 12
68 | vcputable 13
69 | vcputable 14
70 | vcputable 15
71 | vcputable 16
72 | .vm64_cpuid_end:
73 |
--------------------------------------------------------------------------------
/lib/tinykvm/amd64/builtin/usercode_assembly.sh:
--------------------------------------------------------------------------------
1 | nasm -f bin -o usercode usercode.asm
2 | xxd -i usercode
3 |
--------------------------------------------------------------------------------
/lib/tinykvm/amd64/builtin/vsyscall.asm:
--------------------------------------------------------------------------------
1 | [BITS 64]
2 |
3 | org 0xFFFFFFFFFF600000
4 | .vsyscall_gettimeofday:
5 | mov ax, 96 ;; gettimeofday
6 | out 0, ax
7 | ret
8 |
--------------------------------------------------------------------------------
/lib/tinykvm/amd64/builtin/vsyscall_assembly.sh:
--------------------------------------------------------------------------------
1 | nasm -f bin -o vsyscall vsyscall.asm
2 | xxd -i vsyscall
3 |
--------------------------------------------------------------------------------
/lib/tinykvm/amd64/gdt.cpp:
--------------------------------------------------------------------------------
1 | #include "gdt.hpp"
2 |
3 | #include "../common.hpp"
4 | #include
5 | #include
6 | #include
7 |
8 | #define GDT_ACCESS_DUMMY 0x0
9 | #define GDT_ACCESS_TSS 0x09
10 | #define GDT_ACCESS_CODE 0x9A
11 | #define GDT_ACCESS_DATA 0x92
12 | #define GDT_ACCESS_CODE3 0xFA
13 | #define GDT_ACCESS_DATA3 0xF2
14 | #define GDT_ACCESS_TSS0 0x85
15 | #define GDT_ACCESS_TSS3 0xE5
16 |
17 | #define FLAGS_X32_PAGE 0xC
18 | #define FLAGS_X64_PAGE 0xA
19 | #define FLAGS_X64_TSS 0x4
20 |
21 | struct GDT_desc
22 | {
23 | uint16_t size;
24 | uint64_t offset;
25 | } __attribute__((packed));
26 |
27 | struct GDT_entry
28 | {
29 | uint32_t limit_lo : 16;
30 | uint32_t base_lo : 24;
31 | uint32_t access : 8;
32 | uint32_t limit_hi : 4;
33 | uint32_t flags : 4;
34 | uint32_t base_hi : 8;
35 | } __attribute__((packed));
36 |
37 | void GDT_write_segment(void* area, uint8_t flags)
38 | {
39 | auto* entry = (GDT_entry*) area;
40 | entry->limit_lo = 0xFFFF;
41 | entry->base_lo = 0;
42 | entry->access = flags;
43 | entry->limit_hi = 0xF;
44 | entry->flags = FLAGS_X64_PAGE;
45 | entry->base_hi = 0;
46 | }
47 | void GDT_write_TSS_segment(void* area, uint64_t tss_addr, uint32_t size)
48 | {
49 | auto* entry = (GDT_entry*) area;
50 | entry->limit_lo = size;
51 | entry->limit_hi = 0;
52 | entry->access = GDT_ACCESS_TSS;
53 | entry->flags = FLAGS_X64_TSS;
54 | entry->base_lo = tss_addr & 0xFFFFFF;
55 | entry->base_hi = tss_addr >> 24;
56 | }
57 |
58 | void setup_amd64_segments(uint64_t gdt_addr, char* gdt_ptr)
59 | {
60 | (void)gdt_addr;
61 |
62 | /* Null segment */
63 | memset(gdt_ptr + 0x0, 0, 8);
64 | /* Kernel mode */
65 | GDT_write_segment(gdt_ptr + 0x8, GDT_ACCESS_CODE);
66 | GDT_write_segment(gdt_ptr + 0x10, GDT_ACCESS_DATA);
67 | /* Null user-base segment */
68 | memset(gdt_ptr + 0x18, 0, 8);
69 | /* User mode */
70 | GDT_write_segment(gdt_ptr + 0x20, GDT_ACCESS_DATA3);
71 | GDT_write_segment(gdt_ptr + 0x28, GDT_ACCESS_CODE3);
72 |
73 | /* TSS segment (initialized later) */
74 | memset(gdt_ptr + 0x30, 0, 8);
75 | }
76 |
77 | void setup_amd64_segment_regs(struct kvm_sregs& sregs, uint64_t gdt_addr)
78 | {
79 | /* Code segment */
80 | struct kvm_segment seg = {
81 | .base = 0,
82 | .limit = 0xffffffff,
83 | .selector = 0x2B,
84 | .type = 11, /* Code: execute, read, accessed */
85 | .present = 1,
86 | .dpl = 3, /* User-mode */
87 | .db = 0,
88 | .s = 1, /* Code/data */
89 | .l = 1, /* 64-bit */
90 | .g = 1, /* 4KB granularity */
91 | };
92 | sregs.cs = seg;
93 |
94 | /* Data segment */
95 | seg.type = 3; /* Data: read/write, accessed */
96 | seg.selector = 0x23;
97 | sregs.ds = sregs.es = sregs.ss = seg;
98 |
99 | /* GDT dtable */
100 | sregs.gdt.base = gdt_addr;
101 | sregs.gdt.limit = sizeof(GDT_entry) * 7 - 1;
102 | }
103 |
104 | TINYKVM_COLD()
105 | void print_gdt_entries(const void* area, size_t count)
106 | {
107 | const auto* entry = (const GDT_entry*) area;
108 | for (size_t i = 0; i < count; i++) {
109 | const auto a = entry[i].access;
110 | const auto f = entry[i].flags;
111 | printf("GDT %2zx: P=%u DPL=%u S=%u Ex=%u DC=%u RW=%u G=%u Sz=%u L=%u\n",
112 | 8*i, a >> 7, (a >> 5) & 0x3, (a >> 4) & 1, (a >> 3) & 1,
113 | a & 0x4, a & 0x2, f & 0x8, f & 0x4, f & 0x2);
114 | }
115 | }
116 |
--------------------------------------------------------------------------------
/lib/tinykvm/amd64/gdt.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include
3 | #include
4 |
5 | extern void setup_amd64_segments(uint64_t gdt_addr, char* gdt_ptr);
6 | extern void setup_amd64_segment_regs(struct kvm_sregs&, uint64_t gdt_addr);
7 |
8 | extern void GDT_write_segment(void* area, uint8_t flags);
9 | extern void GDT_write_TSS_segment(void* area, uint64_t tss_addr, uint32_t size);
10 | extern void GDT_reload(uint16_t);
11 | extern void print_gdt_entries(const void* area, size_t count);
12 |
--------------------------------------------------------------------------------
/lib/tinykvm/amd64/idt.cpp:
--------------------------------------------------------------------------------
1 | #include "idt.hpp"
2 |
3 | #include "../common.hpp"
4 | #include
5 | #include
6 | #include
7 | #include
8 | struct kvm_sregs;
9 |
10 | namespace tinykvm {
11 |
12 | // 64-bit IDT entry
13 | struct IDTentry {
14 | uint16_t offset_1; // offset bits 0..15
15 | uint16_t selector; // a code segment selector in GDT or LDT
16 | uint8_t ist; // 3-bit interrupt stack table offset
17 | uint8_t type_attr; // type and attributes, see below
18 | uint16_t offset_2; // offset bits 16..31
19 | uint32_t offset_3; // 32..63
20 | uint32_t zero2;
21 | };
22 | static_assert(sizeof(IDTentry) == 16, "AMD64 IDT entries are 16-bytes");
23 |
24 | #define IDT_GATE_INTR 0x0e
25 | #define IDT_CPL0 0x00
26 | #define IDT_CPL3 0x60
27 | #define IDT_PRESENT 0x80
28 |
29 | struct IDT
30 | {
31 | /* Just enough for CPU exceptions and 1 timer interrupt */
32 | std::array entry;
33 | };
34 |
35 | union addr_helper {
36 | uint64_t whole;
37 | struct {
38 | uint16_t lo16;
39 | uint16_t hi16;
40 | uint32_t top32;
41 | };
42 | };
43 |
44 | static void set_entry(
45 | IDTentry& idt_entry,
46 | uint64_t handler,
47 | uint16_t segment_sel,
48 | uint8_t attributes)
49 | {
50 | addr_helper addr { .whole = handler };
51 | idt_entry.offset_1 = addr.lo16;
52 | idt_entry.offset_2 = addr.hi16;
53 | idt_entry.offset_3 = addr.top32;
54 | idt_entry.selector = segment_sel;
55 | idt_entry.type_attr = attributes;
56 | idt_entry.ist = 1;
57 | idt_entry.zero2 = 0;
58 | }
59 |
60 | void set_exception_handler(void* area, uint8_t vec, uint64_t handler)
61 | {
62 | auto& idt = *(IDT *)area;
63 | set_entry(idt.entry[vec], handler, 0x8, IDT_PRESENT | IDT_CPL0 | IDT_GATE_INTR);
64 | /* Use second IST for double faults */
65 | //idt.entry[vec].ist = (vec != 8) ? 1 : 2;
66 | }
67 |
68 | /* unsigned interrupts[] = { ... } */
69 | #include "builtin/kernel_assembly.h"
70 | static_assert(sizeof(interrupts) > 10 && sizeof(interrupts) <= 4096,
71 | "Interrupts array must be container within a 4KB page");
72 |
73 | const iasm_header& interrupt_header() {
74 | return *(const iasm_header*) &interrupts[0];
75 | }
76 | iasm_header& mutable_interrupt_header() {
77 | return *(iasm_header*) &interrupts[0];
78 | }
79 |
80 | void setup_amd64_exception_regs(struct kvm_sregs& sregs, uint64_t addr)
81 | {
82 | sregs.idt.base = addr;
83 | sregs.idt.limit = sizeof(IDT) - 1;
84 | }
85 |
86 | void setup_amd64_exceptions(uint64_t addr, void* area, void* except_area)
87 | {
88 | uint64_t offset = addr + interrupt_header().vm64_exception;
89 | for (int i = 0; i <= 20; i++) {
90 | if (i == 15) continue;
91 | //printf("Exception handler %d at 0x%lX\n", i, offset);
92 | set_exception_handler(area, i, offset);
93 | offset += interrupt_header().vm64_except_size;
94 | }
95 | // Program the timer interrupt (which sends NMI)
96 | offset += interrupt_header().vm64_except_size;
97 | set_exception_handler(area, 32, offset);
98 | // Install exception handling code
99 | std::memcpy(except_area, interrupts, sizeof(interrupts));
100 | }
101 |
102 | TINYKVM_COLD()
103 | void print_exception_handlers(const void* area)
104 | {
105 | auto* idt = (IDT*) area;
106 | for (unsigned i = 0; i < idt->entry.size(); i++) {
107 | const auto& entry = idt->entry[i];
108 | addr_helper addr;
109 | addr.lo16 = entry.offset_1;
110 | addr.hi16 = entry.offset_2;
111 | addr.top32 = entry.offset_3;
112 | printf("IDT %u: func=0x%lX sel=0x%X p=%d dpl=%d type=0x%X ist=%u\n",
113 | i, addr.whole, entry.selector, entry.type_attr >> 7,
114 | (entry.type_attr >> 5) & 0x3, entry.type_attr & 0xF, entry.ist);
115 | }
116 | }
117 |
118 | struct AMD64_Ex {
119 | const char* name;
120 | bool has_code;
121 | };
122 | static constexpr std::array exceptions =
123 | {
124 | AMD64_Ex{"Divide-by-zero Error", false},
125 | AMD64_Ex{"Debug", false},
126 | AMD64_Ex{"Non-Maskable Interrupt", false},
127 | AMD64_Ex{"Breakpoint", false},
128 | AMD64_Ex{"Overflow", false},
129 | AMD64_Ex{"Bound Range Exceeded", false},
130 | AMD64_Ex{"Invalid Opcode", false},
131 | AMD64_Ex{"Device Not Available", false},
132 | AMD64_Ex{"Double Fault", true},
133 | AMD64_Ex{"Reserved", false},
134 | AMD64_Ex{"Invalid TSS", true},
135 | AMD64_Ex{"Segment Not Present", true},
136 | AMD64_Ex{"Stack-Segment Fault", true},
137 | AMD64_Ex{"General Protection Fault", true},
138 | AMD64_Ex{"Page Fault", true},
139 | AMD64_Ex{"Reserved", false},
140 | AMD64_Ex{"x87 Floating-point Exception", false},
141 | AMD64_Ex{"Alignment Check", true},
142 | AMD64_Ex{"Machine Check", false},
143 | AMD64_Ex{"SIMD Floating-point Exception", false},
144 | AMD64_Ex{"Virtualization Exception", false},
145 | AMD64_Ex{"Reserved", false},
146 | AMD64_Ex{"Reserved", false},
147 | AMD64_Ex{"Reserved", false},
148 | AMD64_Ex{"Reserved", false},
149 | AMD64_Ex{"Reserved", false},
150 | AMD64_Ex{"Reserved", false},
151 | AMD64_Ex{"Reserved", false},
152 | AMD64_Ex{"Reserved", false},
153 | AMD64_Ex{"Reserved", false},
154 | AMD64_Ex{"Security Exception", false},
155 | AMD64_Ex{"Reserved", false},
156 | AMD64_Ex{"Reserved", false},
157 | AMD64_Ex{"Execution Timeout", false},
158 | };
159 |
160 | const char* amd64_exception_name(uint8_t intr) {
161 | return exceptions.at(intr).name;
162 | }
163 | bool amd64_exception_code(uint8_t intr) {
164 | return exceptions.at(intr).has_code;
165 | }
166 |
167 | }
168 |
--------------------------------------------------------------------------------
/lib/tinykvm/amd64/idt.hpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include "../memory.hpp"
3 | #include "memory_layout.hpp"
4 | struct kvm_sregs;
5 |
6 | namespace tinykvm {
7 | extern void setup_amd64_exception_regs(struct kvm_sregs& sregs, uint64_t addr);
8 | extern void setup_amd64_exceptions(uint64_t addr, void* area, void* code_area);
9 |
10 | extern void set_exception_handler(void* area, uint8_t vec, uint64_t handler);
11 | extern void print_exception_handlers(const void* area);
12 |
13 | extern const char* amd64_exception_name(uint8_t);
14 | extern bool amd64_exception_code(uint8_t);
15 |
16 | struct iasm_header {
17 | uint16_t vm64_syscall;
18 | uint16_t vm64_gettimeofday;
19 | uint16_t vm64_exception;
20 | uint16_t vm64_except_size;
21 | uint16_t vm64_dso;
22 |
23 | uint64_t translated_vm_syscall(const vMemory& memory) const noexcept
24 | {
25 | return memory.physbase + INTR_ASM_ADDR + vm64_syscall;
26 | }
27 | };
28 | const iasm_header& interrupt_header();
29 | iasm_header& mutable_interrupt_header();
30 | }
31 |
--------------------------------------------------------------------------------
/lib/tinykvm/amd64/lapic.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #define AMD64_MSR_APICBASE 0x1B
3 | #define AMD64_MSR_XAPIC_ENABLE 0x800
4 | #define AMD64_MSR_X2APIC_ENABLE 0xC00
5 |
6 | #define AMD64_APIC_MODE_EXTINT 0x7
7 | #define AMD64_APIC_MODE_NMI 0x4
8 |
9 | typedef unsigned int __u32;
10 |
11 | struct local_apic {
12 |
13 | /*000*/ struct { __u32 __reserved[4]; } __reserved_01;
14 |
15 | /*010*/ struct { __u32 __reserved[4]; } __reserved_02;
16 |
17 | /*020*/ struct { /* APIC ID Register */
18 | __u32 __reserved_1 : 24,
19 | phys_apic_id : 4,
20 | __reserved_2 : 4;
21 | __u32 __reserved[3];
22 | } id;
23 |
24 | /*030*/ const
25 | struct { /* APIC Version Register */
26 | __u32 version : 8,
27 | __reserved_1 : 8,
28 | max_lvt : 8,
29 | __reserved_2 : 8;
30 | __u32 __reserved[3];
31 | } version;
32 |
33 | /*040*/ struct { __u32 __reserved[4]; } __reserved_03;
34 |
35 | /*050*/ struct { __u32 __reserved[4]; } __reserved_04;
36 |
37 | /*060*/ struct { __u32 __reserved[4]; } __reserved_05;
38 |
39 | /*070*/ struct { __u32 __reserved[4]; } __reserved_06;
40 |
41 | /*080*/ struct { /* Task Priority Register */
42 | __u32 priority : 8,
43 | __reserved_1 : 24;
44 | __u32 __reserved_2[3];
45 | } tpr;
46 |
47 | /*090*/ const
48 | struct { /* Arbitration Priority Register */
49 | __u32 priority : 8,
50 | __reserved_1 : 24;
51 | __u32 __reserved_2[3];
52 | } apr;
53 |
54 | /*0A0*/ const
55 | struct { /* Processor Priority Register */
56 | __u32 priority : 8,
57 | __reserved_1 : 24;
58 | __u32 __reserved_2[3];
59 | } ppr;
60 |
61 | /*0B0*/ struct { /* End Of Interrupt Register */
62 | __u32 eoi;
63 | __u32 __reserved[3];
64 | } eoi;
65 |
66 | /*0C0*/ struct { __u32 __reserved[4]; } __reserved_07;
67 |
68 | /*0D0*/ struct { /* Logical Destination Register */
69 | __u32 __reserved_1 : 24,
70 | logical_dest : 8;
71 | __u32 __reserved_2[3];
72 | } ldr;
73 |
74 | /*0E0*/ struct { /* Destination Format Register */
75 | __u32 __reserved_1 : 28,
76 | model : 4;
77 | __u32 __reserved_2[3];
78 | } dfr;
79 |
80 | /*0F0*/ struct { /* Spurious Interrupt Vector Register */
81 | __u32 spurious_vector : 8,
82 | apic_enabled : 1,
83 | focus_cpu : 1,
84 | __reserved_2 : 22;
85 | __u32 __reserved_3[3];
86 | } svr;
87 |
88 | /*100*/ struct { /* In Service Register */
89 | /*170*/ __u32 bitfield;
90 | __u32 __reserved[3];
91 | } isr [8];
92 |
93 | /*180*/ struct { /* Trigger Mode Register */
94 | /*1F0*/ __u32 bitfield;
95 | __u32 __reserved[3];
96 | } tmr [8];
97 |
98 | /*200*/ struct { /* Interrupt Request Register */
99 | /*270*/ __u32 bitfield;
100 | __u32 __reserved[3];
101 | } irr [8];
102 |
103 | /*280*/ union { /* Error Status Register */
104 | struct {
105 | __u32 send_cs_error : 1,
106 | receive_cs_error : 1,
107 | send_accept_error : 1,
108 | receive_accept_error : 1,
109 | __reserved_1 : 1,
110 | send_illegal_vector : 1,
111 | receive_illegal_vector : 1,
112 | illegal_register_address : 1,
113 | __reserved_2 : 24;
114 | __u32 __reserved_3[3];
115 | } error_bits;
116 | struct {
117 | __u32 errors;
118 | __u32 __reserved_3[3];
119 | } all_errors;
120 | } esr;
121 |
122 | /*290*/ struct { __u32 __reserved[4]; } __reserved_08;
123 |
124 | /*2A0*/ struct { __u32 __reserved[4]; } __reserved_09;
125 |
126 | /*2B0*/ struct { __u32 __reserved[4]; } __reserved_10;
127 |
128 | /*2C0*/ struct { __u32 __reserved[4]; } __reserved_11;
129 |
130 | /*2D0*/ struct { __u32 __reserved[4]; } __reserved_12;
131 |
132 | /*2E0*/ struct { __u32 __reserved[4]; } __reserved_13;
133 |
134 | /*2F0*/ struct { __u32 __reserved[4]; } __reserved_14;
135 |
136 | /*300*/ struct { /* Interrupt Command Register 1 */
137 | __u32 vector : 8,
138 | delivery_mode : 3,
139 | destination_mode : 1,
140 | delivery_status : 1,
141 | __reserved_1 : 1,
142 | level : 1,
143 | trigger : 1,
144 | __reserved_2 : 2,
145 | shorthand : 2,
146 | __reserved_3 : 12;
147 | __u32 __reserved_4[3];
148 | } icr1;
149 |
150 | /*310*/ struct { /* Interrupt Command Register 2 */
151 | union {
152 | __u32 __reserved_1 : 24,
153 | phys_dest : 4,
154 | __reserved_2 : 4;
155 | __u32 __reserved_3 : 24,
156 | logical_dest : 8;
157 | } dest;
158 | __u32 __reserved_4[3];
159 | } icr2;
160 |
161 | /*320*/ struct { /* LVT - Timer */
162 | __u32 vector : 8,
163 | __reserved_1 : 4,
164 | delivery_status : 1,
165 | __reserved_2 : 3,
166 | mask : 1,
167 | timer_mode : 1,
168 | __reserved_3 : 14;
169 | __u32 __reserved_4[3];
170 | } lvt_timer;
171 |
172 | /*330*/ struct { /* LVT - Thermal Sensor */
173 | __u32 vector : 8,
174 | delivery_mode : 3,
175 | __reserved_1 : 1,
176 | delivery_status : 1,
177 | __reserved_2 : 3,
178 | mask : 1,
179 | __reserved_3 : 15;
180 | __u32 __reserved_4[3];
181 | } lvt_thermal;
182 |
183 | /*340*/ struct { /* LVT - Performance Counter */
184 | __u32 vector : 8,
185 | delivery_mode : 3,
186 | __reserved_1 : 1,
187 | delivery_status : 1,
188 | __reserved_2 : 3,
189 | mask : 1,
190 | __reserved_3 : 15;
191 | __u32 __reserved_4[3];
192 | } lvt_pc;
193 |
194 | /*350*/ struct { /* LVT - LINT0 */
195 | __u32 vector : 8,
196 | delivery_mode : 3,
197 | __reserved_1 : 1,
198 | delivery_status : 1,
199 | polarity : 1,
200 | remote_irr : 1,
201 | trigger : 1,
202 | mask : 1,
203 | __reserved_2 : 15;
204 | __u32 __reserved_3[3];
205 | } lvt_lint0;
206 |
207 | /*360*/ struct { /* LVT - LINT1 */
208 | __u32 vector : 8,
209 | delivery_mode : 3,
210 | __reserved_1 : 1,
211 | delivery_status : 1,
212 | polarity : 1,
213 | remote_irr : 1,
214 | trigger : 1,
215 | mask : 1,
216 | __reserved_2 : 15;
217 | __u32 __reserved_3[3];
218 | } lvt_lint1;
219 |
220 | /*370*/ struct { /* LVT - Error */
221 | __u32 vector : 8,
222 | __reserved_1 : 4,
223 | delivery_status : 1,
224 | __reserved_2 : 3,
225 | mask : 1,
226 | __reserved_3 : 15;
227 | __u32 __reserved_4[3];
228 | } lvt_error;
229 |
230 | /*380*/ struct { /* Timer Initial Count Register */
231 | __u32 initial_count;
232 | __u32 __reserved_2[3];
233 | } timer_icr;
234 |
235 | /*390*/ struct { /* Timer Current Count Register */
236 | __u32 curr_count;
237 | __u32 __reserved_2[3];
238 | } timer_ccr;
239 |
240 | /*3A0*/ struct { __u32 __reserved[4]; } __reserved_16;
241 |
242 | /*3B0*/ struct { __u32 __reserved[4]; } __reserved_17;
243 |
244 | /*3C0*/ struct { __u32 __reserved[4]; } __reserved_18;
245 |
246 | /*3D0*/ struct { __u32 __reserved[4]; } __reserved_19;
247 |
248 | /*3E0*/ struct { /* Timer Divide Configuration Register */
249 | __u32 divisor : 4,
250 | __reserved_1 : 28;
251 | __u32 __reserved_2[3];
252 | } timer_dcr;
253 |
254 | /*3F0*/ struct { __u32 __reserved[4]; } __reserved_20;
255 |
256 | } __attribute__ ((packed));
257 |
--------------------------------------------------------------------------------
/lib/tinykvm/amd64/memory_layout.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include
3 |
4 | namespace tinykvm {
5 | static constexpr uint64_t GDT_ADDR = 0x1600;
6 | static constexpr uint64_t TSS_ADDR = 0x1700;
7 | static constexpr uint64_t IDT_ADDR = 0x1800;
8 | static constexpr uint64_t INTR_ASM_ADDR = 0x2000;
9 | static constexpr uint64_t IST_ADDR = 0x3000;
10 | static constexpr uint64_t IST2_ADDR = 0x4000;
11 | static constexpr uint64_t IST_END_ADDR = 0x5000;
12 | static constexpr uint64_t USER_ASM_ADDR = 0x5000;
13 | static constexpr uint64_t VSYS_ADDR = 0x6000;
14 | static constexpr uint64_t TSS_SMP_ADDR = 0x7000;
15 | static constexpr uint64_t TSS_SMP2_ADDR = 0x8000;
16 | // After the last fixed page, every page after
17 | // is a fixed page table directory. Any further
18 | // allocations happen using memory banks.
19 | static constexpr uint64_t PT_ADDR = 0x9000;
20 |
21 | // The size of the interrupt stacks on each SMP
22 | // vCPU, offset from IST_ADDR. We allow 17 vCPUs.
23 | static constexpr uint64_t TSS_SMP_STACK = 240;
24 | // Maximum size of interrupt and exception frame
25 | static constexpr uint64_t INTR_STACK_FRAME = 48;
26 | }
27 |
--------------------------------------------------------------------------------
/lib/tinykvm/amd64/paging.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include "../memory.hpp"
3 | #include
4 |
5 | namespace tinykvm {
6 |
7 | extern uint64_t setup_amd64_paging(vMemory&,
8 | std::string_view binary,
9 | const std::vector& remappings,
10 | bool split_hugepages);
11 | extern void print_pagetables(const vMemory&);
12 |
13 | using foreach_page_t = std::function;
14 | extern void foreach_page(vMemory&, foreach_page_t callback, bool skip_oob_addresses = true);
15 | extern void foreach_page(const vMemory&, foreach_page_t callback, bool skip_oob_addresses = true);
16 | extern void foreach_page_makecow(vMemory&, uint64_t kernel_end, uint64_t shared_memory_boundary);
17 |
18 | extern void page_at(vMemory&, uint64_t addr, foreach_page_t, bool ignore_missing = false);
19 | extern char * writable_page_at(vMemory&, uint64_t addr, uint64_t flags, bool zeroes = false);
20 | extern char * readable_page_at(const vMemory&, uint64_t addr, uint64_t flags);
21 |
22 | static inline bool page_is_zeroed(const uint64_t* page) {
23 | for (size_t i = 0; i < 512; i += 8) {
24 | if ((page[i+0] | page[i+1] | page[i+2] | page[i+3]) != 0 ||
25 | (page[i+4] | page[i+5] | page[i+6] | page[i+7]) != 0)
26 | return false;
27 | }
28 | return true;
29 | }
30 |
31 | static constexpr inline uint64_t PageMask() {
32 | return vMemory::PageSize() - 1UL;
33 | }
34 |
35 | } // tinykvm
36 |
--------------------------------------------------------------------------------
/lib/tinykvm/amd64/tss.cpp:
--------------------------------------------------------------------------------
1 | #include "tss.hpp"
2 |
3 | #include
4 | #include
5 | #include "../memory.hpp"
6 | #include "memory_layout.hpp"
7 | #include "gdt.hpp"
8 |
9 | namespace tinykvm {
10 |
11 | struct AMD64_TSS
12 | {
13 | uint32_t ign; // 4
14 | uint64_t rsp0; // 12
15 | uint64_t rsp1; // 20
16 | uint64_t rsp2; // 28
17 | uint32_t ign2; // 32
18 | uint32_t ign3; // 36
19 | uint64_t ist1;
20 | uint64_t ist2;
21 | uint64_t ist3;
22 | uint64_t ist4;
23 | uint64_t ist5;
24 | uint64_t ist6;
25 | uint64_t ist7; // 92 0x5C
26 | uint32_t ign4;
27 | uint32_t ign5;
28 | uint16_t ign6;
29 | uint16_t iomap_base;
30 | } __attribute__((packed));
31 |
32 | static constexpr uint16_t tss_sel = 0x30;
33 |
34 |
35 | void setup_amd64_tss(vMemory& memory)
36 | {
37 | const auto tss_base = memory.physbase + TSS_ADDR;
38 | const auto ist_base = memory.physbase + IST_ADDR;
39 | auto* tss_ptr = memory.at(tss_base);
40 |
41 | auto& tss = *(AMD64_TSS *)tss_ptr;
42 | std::memset(&tss, 0, sizeof(tss));
43 | tss.rsp0 = ist_base + 0x1000;
44 | tss.rsp1 = 0;
45 | tss.rsp2 = 0;
46 | tss.ist1 = ist_base + 0x1000;
47 | tss.ist2 = ist_base + 0x800;
48 | tss.iomap_base = 104; // unused
49 |
50 | auto* gdt_ptr = memory.at(memory.physbase + GDT_ADDR);
51 | GDT_write_TSS_segment(gdt_ptr + tss_sel, tss_base, sizeof(AMD64_TSS)-1);
52 | }
53 |
54 | void setup_amd64_tss_smp(vMemory& memory)
55 | {
56 | const auto ist_base = memory.physbase + IST_ADDR;
57 | auto* smp_tss_ptr = memory.at(memory.physbase + TSS_SMP_ADDR);
58 |
59 | auto* tss = (AMD64_TSS *)smp_tss_ptr;
60 | for (size_t c = 0; c < 17; c++) {
61 | /** XXX: TSS_SMP_STACK exception stack enough? */
62 | tss[c].rsp0 = ist_base + TSS_SMP_STACK * (c + 1);
63 | tss[c].rsp1 = 0;
64 | tss[c].rsp2 = 0;
65 | tss[c].ist1 = tss[c].rsp0;
66 | tss[c].iomap_base = 104; // unused
67 | }
68 | }
69 |
70 | void setup_amd64_tss_regs(struct kvm_sregs& sregs, uint64_t tss_addr)
71 | {
72 | struct kvm_segment seg = {
73 | .base = tss_addr,
74 | .limit = sizeof(AMD64_TSS)-1,
75 | .selector = tss_sel,
76 | .type = 11,
77 | .present = 1,
78 | .dpl = 3, /* User-mode */
79 | .db = 0,
80 | .s = 0, /* Gate */
81 | .l = 0, /* 64-bit */
82 | .g = 0, /* Byte granularity */
83 | };
84 | sregs.tr = seg;
85 | }
86 |
87 | }
88 |
--------------------------------------------------------------------------------
/lib/tinykvm/amd64/tss.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include
3 | struct kvm_sregs;
4 |
5 | namespace tinykvm {
6 | struct vMemory;
7 |
8 | extern void setup_amd64_tss(vMemory&);
9 |
10 | extern void setup_amd64_tss_smp(vMemory&);
11 |
12 | extern void setup_amd64_tss_regs(struct kvm_sregs& sregs, uint64_t tss_addr);
13 |
14 | }
15 |
--------------------------------------------------------------------------------
/lib/tinykvm/amd64/usercode.cpp:
--------------------------------------------------------------------------------
1 | #include "usercode.hpp"
2 | #include
3 |
4 | namespace tinykvm {
5 |
6 | static const unsigned char usercode[] = {
7 | 0x10, 0x00, 0x20, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00,
8 | 0x90, 0x90, 0x90, 0x90, 0x49, 0x89, 0xcd, 0xb8, 0x77, 0xf7, 0x01, 0x00,
9 | 0x0f, 0x05, 0x4c, 0x89, 0xe9, 0x41, 0xff, 0xe7, 0x48, 0x89, 0xc7, 0xb8,
10 | 0xff, 0xff, 0x00, 0x00, 0xe7, 0x00, 0xeb, 0xf7, 0xb8, 0x77, 0xf7, 0x01,
11 | 0x00, 0x0f, 0x05, 0x41, 0x5b, 0x59, 0x58, 0xc3, 0x00, 0x00, 0x00, 0x00,
12 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
13 | 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
14 | 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
15 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
16 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
17 | 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
18 | 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
19 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
20 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
21 | 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
22 | 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
23 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00,
24 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
25 | 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
26 | 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
27 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
28 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
29 | 0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
30 | 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
31 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00,
32 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
33 | 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
34 | 0x00, 0x00, 0x00, 0x00
35 | };
36 |
37 | const user_asm_header &usercode_header()
38 | {
39 | return *(const user_asm_header *)&usercode[0];
40 | }
41 |
42 | void setup_vm64_usercode(void* usercode_area)
43 | {
44 | std::memcpy(usercode_area, usercode, sizeof(usercode));
45 | }
46 |
47 | } // tinykvm
48 |
--------------------------------------------------------------------------------
/lib/tinykvm/amd64/usercode.hpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include "../memory.hpp"
3 | #include "memory_layout.hpp"
4 |
5 | namespace tinykvm {
6 |
7 | struct user_asm_header {
8 | uint16_t vm64_entry;
9 | uint16_t vm64_rexit;
10 | uint16_t vm64_preserving_entry;
11 | uint16_t vm64_unused;
12 | uint32_t vm64_cpuid;
13 |
14 | uint64_t translated_vm_entry(const vMemory& memory) const noexcept {
15 | return memory.physbase + USER_ASM_ADDR + vm64_entry;
16 | }
17 | uint64_t translated_vm_rexit(const vMemory& memory) const noexcept {
18 | return memory.physbase + USER_ASM_ADDR + vm64_rexit;
19 | }
20 | uint64_t translated_vm_preserving_entry(const vMemory& memory) const noexcept {
21 | return memory.physbase + USER_ASM_ADDR + vm64_preserving_entry;
22 | }
23 | uint64_t translated_vm_cpuid(const vMemory& memory) const noexcept {
24 | return memory.physbase + USER_ASM_ADDR + vm64_cpuid;
25 | }
26 | };
27 | extern const user_asm_header& usercode_header();
28 |
29 | extern void setup_vm64_usercode(void* usercode_area);
30 |
31 | }
32 |
--------------------------------------------------------------------------------
/lib/tinykvm/amd64/vdso.cpp:
--------------------------------------------------------------------------------
1 | #include "vdso.hpp"
2 |
3 | namespace tinykvm {
4 |
5 | __attribute__((aligned(4096)))
6 | static const std::array vsys = {
7 | 0x66, 0xb8, 0x60, 0x00, 0x66, 0xe7, 0x00, 0xc3
8 | };
9 |
10 | const std::array& vsys_page() {
11 | return vsys;
12 | }
13 |
14 | } // tinykvm
15 |
--------------------------------------------------------------------------------
/lib/tinykvm/amd64/vdso.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include
3 | #include
4 |
5 | namespace tinykvm {
6 | static constexpr uint64_t VSYSCALL_AREA = 0xFFFF600000;
7 |
8 | const std::array& vsys_page();
9 | }
10 |
--------------------------------------------------------------------------------
/lib/tinykvm/arm64/memory_layout.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include
3 |
4 | namespace tinykvm {
5 | static constexpr uint64_t PT_ADDR = 0x9000;
6 |
7 | }
8 |
--------------------------------------------------------------------------------
/lib/tinykvm/common.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #ifndef LIKELY
4 | #define LIKELY(x) __builtin_expect((x), 1)
5 | #endif
6 | #ifndef UNLIKELY
7 | #define UNLIKELY(x) __builtin_expect((x), 0)
8 | #endif
9 |
10 | #ifndef TINYKVM_MAX_SYSCALLS
11 | #define TINYKVM_MAX_SYSCALLS 512
12 | #endif
13 |
14 | #define TINYKVM_COLD() __attribute__ ((cold))
15 |
16 | #include
17 | #include
18 | #include
19 | #include
20 | #include
21 |
22 | namespace tinykvm
23 | {
24 | struct VirtualRemapping {
25 | uint64_t phys;
26 | uint64_t virt;
27 | size_t size;
28 | bool writable = false;
29 | bool executable = false;
30 | bool blackout = false; /* Unmapped virtual area */
31 | };
32 |
33 | struct MachineOptions {
34 | uint64_t max_mem = 16ULL << 20; /* 16MB */
35 | uint32_t max_cow_mem = 0;
36 | uint32_t stack_size = 1600UL << 10; /* 1600KB */
37 | uint32_t reset_free_work_mem = 0; /* reset_to() */
38 | uint32_t dylink_address_hint = 0x200000; /* 2MB */
39 | uint32_t heap_address_hint = 0;
40 | uint64_t vmem_base_address = 0;
41 | std::string_view binary = {};
42 | std::vector remappings {};
43 |
44 | bool verbose_loader = false;
45 | bool short_lived = false;
46 | bool hugepages = false;
47 | bool transparent_hugepages = false;
48 | /* When enabled, master VMs will write directly
49 | to their own main memory instead of memory banks,
50 | allowing forks to immediately see changes. */
51 | bool master_direct_memory_writes = false;
52 | /* When enabled, split hugepages during page faults. */
53 | bool split_hugepages = false;
54 | /* When enabled, reset_to() will accept a different
55 | master VM than the original, but at a steep cost. */
56 | bool allow_reset_to_new_master = false;
57 | /* When enabled, reset_to() will copy all registers
58 | from the master VM to the new VM. */
59 | bool reset_copy_all_registers = true;
60 | /* When reset_enter_usermode is enabled, the guest will
61 | be forced into usermode after reset_to(). */
62 | bool reset_enter_usermode = true;
63 | /* When enabled, reset_to() will copy all memory
64 | from the master VM to the forked VM instead of
65 | resetting the memory banks. */
66 | bool reset_keep_all_work_memory = false;
67 | /* Force-relocate fixed addresses with mmap(). */
68 | bool relocate_fixed_mmap = true;
69 | /* Make heap executable, to support JIT. */
70 | bool executable_heap = false;
71 | /* When using hugepages, cover the given size with
72 | hugepages, unless 0, in which case the entire
73 | main memory will be covered. */
74 | size_t hugepages_arena_size = 0UL;
75 | };
76 |
77 | class MachineException : public std::exception {
78 | public:
79 | MachineException(const char* msg, uint64_t data = 0)
80 | : m_msg(msg), m_data(data) {}
81 | const char* what() const noexcept override {
82 | return m_msg;
83 | }
84 | auto data() const noexcept { return m_data; }
85 | protected:
86 | const char* m_msg;
87 | uint64_t m_data;
88 | };
89 |
90 | class MachineTimeoutException: public MachineException {
91 | public:
92 | using MachineException::MachineException;
93 | float seconds() const noexcept { return data() / 1000.0; }
94 | };
95 |
96 | class MemoryException: public MachineException {
97 | public:
98 | MemoryException(const char* msg, uint64_t addr, uint64_t sz)
99 | : MachineException{msg, addr}, m_size(sz) {}
100 | const char* what() const noexcept override {
101 | return m_msg;
102 | }
103 | auto addr() const noexcept { return data(); }
104 | auto size() const noexcept { return m_size; }
105 | private:
106 | uint64_t m_size;
107 | };
108 |
109 | template constexpr std::false_type always_false {};
110 |
111 | template
112 | struct is_string
113 | : public std::disjunction<
114 | std::is_same::type>,
115 | std::is_same::type>
116 | > {};
117 |
118 | template
119 | struct is_stdstring : public std::is_same> {};
120 |
121 | struct PerVCPUTable {
122 | int cpuid;
123 | int userval1;
124 | int userval2;
125 | int userval3;
126 | };
127 |
128 | struct DynamicElf {
129 | std::string interpreter;
130 | bool is_dynamic;
131 |
132 | bool has_interpreter() const noexcept {
133 | return !interpreter.empty();
134 | }
135 | };
136 | extern DynamicElf is_dynamic_elf(std::string_view bin);
137 | }
138 |
--------------------------------------------------------------------------------
/lib/tinykvm/forward.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 | struct kvm_run;
3 | struct kvm_regs;
4 | struct kvm_sregs;
5 | struct kvm_lapic_state;
6 | #include
7 |
8 | namespace tinykvm {
9 |
10 | #ifndef TINYKVM_ARCH
11 | #define TINYKVM_ARCH_AMD64
12 | #endif
13 |
14 | #if defined(TINYKVM_ARCH_AMD64)
15 |
16 | struct tinykvm_x86regs {
17 | __u64 rax, rbx, rcx, rdx;
18 | __u64 rsi, rdi, rsp, rbp;
19 | __u64 r8, r9, r10, r11;
20 | __u64 r12, r13, r14, r15;
21 | __u64 rip, rflags;
22 | };
23 |
24 | struct tinykvm_x86fpuregs {
25 | __u8 fpr[8][16];
26 | __u16 fcw;
27 | __u16 fsw;
28 | __u8 ftwx; /* in fxsave format */
29 | __u8 pad1;
30 | __u16 last_opcode;
31 | __u64 last_ip;
32 | __u64 last_dp;
33 | __u8 xmm[16][16];
34 | __u32 mxcsr;
35 | __u32 pad2;
36 | };
37 |
38 | #define tinykvm_regs tinykvm_x86regs
39 | #define tinykvm_fpuregs tinykvm_x86fpuregs
40 |
41 | #elif defined(TINYKVM_ARCH_ARM64)
42 |
43 | #define tinykvm_regs tinykvm_arm64regs
44 | #define tinykvm_fpuregs tinykvm_arm64fpuregs
45 |
46 | #endif
47 |
48 | struct RSPClient;
49 | }
50 |
--------------------------------------------------------------------------------
/lib/tinykvm/linux/signals.cpp:
--------------------------------------------------------------------------------
1 | #include "../machine.hpp"
2 | #include "threads.hpp"
3 |
4 | namespace tinykvm {
5 |
6 | Signals::Signals() {}
7 | Signals::~Signals() {}
8 |
9 | SignalAction& Signals::get(int sig) {
10 | if (sig > 0)
11 | return signals.at(sig-1);
12 | throw MachineException("Signal 0 invoked", sig);
13 | }
14 |
15 | void Signals::enter(vCPU& cpu, int sig)
16 | {
17 | if (sig == 0) return;
18 | auto& regs = cpu.registers();
19 |
20 | auto& sigact = signals.at(sig);
21 | if (sigact.altstack) {
22 | const int tid = cpu.machine().threads().gettid();
23 | // Change to alternate per-thread stack
24 | auto& stack = per_thread(tid).stack;
25 | // But only if non-zero
26 | if (stack.ss_sp != 0x0) {
27 | regs.rsp = stack.ss_sp + stack.ss_size;
28 | }
29 | }
30 |
31 | //cpu.machine().enter_usermode();
32 | regs.rcx = sigact.handler;
33 | cpu.set_registers(regs);
34 | }
35 |
36 | SignalAction& Machine::sigaction(int sig)
37 | {
38 | return signals().get(sig);
39 | }
40 |
41 | } // tinykvm
42 |
--------------------------------------------------------------------------------
/lib/tinykvm/linux/signals.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include "../forward.hpp"
3 | #include
4 | #include