├── .gitignore ├── Cargo.toml ├── .cargo └── config ├── link-script ├── src_c ├── hello.c ├── build.sh ├── syscall.asm ├── guess.c └── bytes.c ├── self.json ├── src ├── bin │ ├── hello.rs │ ├── bytes.rs │ └── guess.rs └── calls.asm ├── README.md └── mini_elf.py /.gitignore: -------------------------------------------------------------------------------- 1 | Cargo.lock 2 | binaries/ 3 | src_c/build/ 4 | target/ 5 | release/ 6 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rust-asm" 3 | version = "0.1.0" 4 | 5 | [dependencies] 6 | -------------------------------------------------------------------------------- /.cargo/config: -------------------------------------------------------------------------------- 1 | [build] 2 | target = "self" 3 | 4 | [target.self] 5 | rustflags=["-C", "lto", "-C", "opt-level=z"] 6 | -------------------------------------------------------------------------------- /link-script: -------------------------------------------------------------------------------- 1 | ENTRY(_start) 2 | 3 | SECTIONS { 4 | . = 0x400078; 5 | .text . : AT(0x400078) ALIGN(1) SUBALIGN(1) { 6 | *(*.*) 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /src_c/hello.c: -------------------------------------------------------------------------------- 1 | void asm_exit(unsigned long long r); 2 | long long asm_write(unsigned long long fd, void *buf, unsigned long long bytes); 3 | 4 | void _start() { 5 | asm_write(1, "Hello, world!\n", 14); 6 | asm_exit(0); 7 | } 8 | -------------------------------------------------------------------------------- /src_c/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | ASMFILE="syscall.asm" 4 | CFILES="hello.c guess.c bytes.c" 5 | LINKSCRIPT="../link-script" 6 | MINIFIER="../mini_elf.py" 7 | 8 | rm -rf build 9 | mkdir build 10 | objfile_asm=build/$(basename $ASMFILE .asm).o 11 | nasm -f elf64 -o $objfile_asm $ASMFILE 12 | for cfile in $CFILES; do 13 | execfile=build/$(basename $cfile .c) 14 | objfile=build/$(basename $cfile .c).o 15 | gcc -Os -c -fomit-frame-pointer -fno-exceptions \ 16 | -fno-asynchronous-unwind-tables -o $objfile $cfile 17 | ld -T $LINKSCRIPT --omagic -o $execfile $objfile $objfile_asm 18 | done 19 | -------------------------------------------------------------------------------- /self.json: -------------------------------------------------------------------------------- 1 | { 2 | "llvm-target": "x86_64-unknown-linux-gnu", 3 | "data-layout": "e-m:e-i64:64-f80:128-n8:16:32:64-S128", 4 | "arch": "x86_64", 5 | "target-endian": "little", 6 | "target-pointer-width": "64", 7 | "target-c-int-width": "32", 8 | "executables": true, 9 | "linker-flavor": "gcc", 10 | "linker": "gcc", 11 | "pre-link-args": { 12 | "gcc": [ 13 | "-Wl,--script=link-script", 14 | "-Wl,--omagic", 15 | "-s", 16 | "-static", 17 | "-nostartfiles" 18 | ] 19 | }, 20 | "panic-strategy": "abort", 21 | "os": "linux" 22 | } 23 | -------------------------------------------------------------------------------- /src_c/syscall.asm: -------------------------------------------------------------------------------- 1 | global asm_exit 2 | global asm_open 3 | global asm_write 4 | global asm_read 5 | 6 | ; rdi - exitcode 7 | asm_exit: 8 | mov rax, 60 9 | syscall 10 | 11 | ; rdi - pathname (null-terminated string) 12 | ; rsi - flags, rdx - accessrights 13 | ; returns: file descriptor or error code (negative) 14 | asm_open: 15 | mov rax, 2 16 | syscall 17 | ret 18 | 19 | ; rdi - fd, rsi - flags, rdx - accessrights 20 | ; returns: number of bytes read or error code (negative) 21 | asm_read: 22 | mov rax, 0 23 | syscall 24 | ret 25 | 26 | ; rdi - fd, rsi - flags, rdx - accessrights 27 | ; returns: number of bytes read or error code (negative) 28 | asm_write: 29 | mov rax, 1 30 | syscall 31 | ret 32 | 33 | -------------------------------------------------------------------------------- /src/bin/hello.rs: -------------------------------------------------------------------------------- 1 | #![no_std] 2 | #![no_main] 3 | 4 | use core::panic::PanicInfo; 5 | 6 | extern { 7 | fn asm_exit(exit_code: isize); 8 | fn asm_write(fd: usize, buf: *const u8, nbytes: usize) -> isize; 9 | fn asm_panic(); 10 | } 11 | 12 | fn exit(exit_code: isize) { 13 | unsafe { asm_exit(exit_code); } 14 | } 15 | 16 | fn print_str(s: &str) { 17 | unsafe { asm_write(1, s.as_ptr(), s.len()); } 18 | } 19 | 20 | #[no_mangle] 21 | fn main() { 22 | print_str("Hello, world!\n"); 23 | exit(0); 24 | } 25 | 26 | #[panic_handler] 27 | fn panic(_info: &PanicInfo) -> ! { 28 | // When using LTO optimization, this panic handler is being 29 | // be optimized away if it only contains the loop. 30 | // This leads to undefined behavior in case of a panic. 31 | // We can prevent this by calling external (assembly) 32 | // code from the panic handler. 33 | unsafe { asm_panic(); } 34 | loop {} 35 | } 36 | -------------------------------------------------------------------------------- /src/calls.asm: -------------------------------------------------------------------------------- 1 | global asm_exit 2 | global asm_open 3 | global asm_write 4 | global asm_read 5 | global asm_panic 6 | global _start 7 | extern main 8 | 9 | ; rdi - exitcode 10 | asm_exit: 11 | mov rax, 60 12 | syscall 13 | 14 | ; rdi - pathname (null-terminated string) 15 | ; rsi - flags, rdx - accessrights 16 | ; returns: file descriptor or error code (negative) 17 | asm_open: 18 | mov rax, 2 19 | syscall 20 | ret 21 | 22 | ; rdi - fd, rsi - flags, rdx - accessrights 23 | ; returns: number of bytes read or error code (negative) 24 | asm_read: 25 | mov rax, 0 26 | syscall 27 | ret 28 | 29 | ; rdi - fd, rsi - flags, rdx - accessrights 30 | ; returns: number of bytes read or error code (negative) 31 | asm_write: 32 | mov rax, 1 33 | syscall 34 | ret 35 | 36 | ; 16-byte align the stack (prevents segfaults with SSE instructions) 37 | ; then call main 38 | _start: 39 | and rsp, 0xfffffffffffffff0 40 | call main 41 | 42 | ; infinite loop for the rust panic handler 43 | asm_panic: 44 | jmp asm_panic 45 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # tiny-rust-binaries: Tiny amd64 ELF executables using nightly Rust 2 | 3 | This shows a way to create executables of minimal binary size for various 4 | toy programs written in Rust: 5 | 6 | | **Program** | **Description** | 7 | | -- | -- | 8 | | hello | prints `Hello, world!` | 9 | | guess | Guess a random number from 1 to 100 | 10 | | bytes | Print the number of occurrences for all 256 possible byte values, sorted by ascending frequency | 11 | 12 | Various techniques are employed to minimize the executable size: 13 | 14 | - Not using the Rust standard library, only Rust core. 15 | Interaction with the system happens by syscalls from assembly code 16 | (see `src/calls.asm`). 17 | - Using codegen options `lto, opt-level=z`. 18 | - A linker script that discards unnecessary sections and packs all code 19 | into one executable segment. This segment is aligned to be compatible 20 | with the next step, which is the reason why the binaries generated by the 21 | Rust compiler (in `target/self/release`) will segfault when trying to 22 | directly execute them. 23 | - A minifier program that extracts the code segment from the ELF file 24 | generated by the Rust compiler and puts it into a minimal ELF file with 25 | just one program header and no section headers (only 120 bytes of header 26 | overhead). 27 | 28 | ## Sizes 29 | 30 | Each of the toy programs was implemented in Rust (`src/bin/*.rs`), 31 | and also in C (`src_c/*.c`), trying to make the code as similar as possible. 32 | Both programs were subjected to the same linking and size minimization process. 33 | These are the executable and source file sizes when building on my system: 34 | 35 | | **Program** | **Executable size for C (bytes)** | **Executable size for Rust (bytes)** | **Rust is larger by ...** | 36 | | --: | --: | --: | --: | 37 | | hello | 225 | 239 | 6.2% | 38 | | guess | 935 | 1068 | 14.2% | 39 | | bytes | 1004 | 1183 | 17.8% | 40 | | **Program** | **Source size for C (bytes)** | **Source size for Rust (bytes)** | **Rust is larger by ...** | 41 | | hello | 196 | 775 | 295.4% | 42 | | guess | 2869 | 3780 | 31.8% | 43 | | bytes | 2916 | 3560 | 22.1% | 44 | 45 | ## Prerequisites 46 | 47 | - amd64 system running Linux 48 | - gcc toolchain 49 | - nightly Rust toolchain 50 | - cargo-xbuild 51 | - nasm 52 | 53 | ## How to build 54 | 55 | Just run `./build.sh`, the binaries are then found in the `binaries/` directory. 56 | For the Rust programs they are named `binaries/rust_*`, for the C programs 57 | `binaries/c_*`. 58 | 59 | For the `bytes` program, you need to give a file as input to get statistics. 60 | You could run it on its own binary file, like this: 61 | `binaries/rust_bytes < binaries/rust_bytes`. 62 | -------------------------------------------------------------------------------- /src_c/guess.c: -------------------------------------------------------------------------------- 1 | void asm_exit(int exitcode); 2 | int asm_open(char *path, long long flags, long long mode); 3 | int asm_write(long long fd, char *buf, long long nbytes); 4 | int asm_read(long long fd, char *buf, long long nbytes); 5 | 6 | void print_byte(char c) { 7 | asm_write(1, &c, 1); 8 | } 9 | 10 | void println() { 11 | print_byte('\n'); 12 | } 13 | 14 | void print_str(char *str, int len) { 15 | asm_write(1, str, len); 16 | } 17 | 18 | void print_int(unsigned long long i) { 19 | char chars[20]; 20 | int k; 21 | 22 | if (i == 0) { 23 | print_byte('0'); 24 | return; 25 | } 26 | k = 0; 27 | while (i > 0) { 28 | chars[k] = '0' + (i % 10); 29 | i /= 10; 30 | k += 1; 31 | } 32 | while (k > 0) { 33 | print_byte(chars[k - 1]); 34 | k -= 1; 35 | } 36 | } 37 | 38 | int read_byte(int fd) { 39 | unsigned char c; 40 | if (asm_read(fd, &c, 1) == 1) { 41 | return c; 42 | } else { 43 | return -1; 44 | } 45 | } 46 | 47 | int read_line(char* line, int len) { 48 | int bytes_read = 0; 49 | int too_long = 0; 50 | int c; 51 | while (1) { 52 | c = read_byte(1); 53 | if (c == -1) { 54 | return -1; 55 | } 56 | if (bytes_read >= len) { 57 | too_long = 1; 58 | } else { 59 | line[bytes_read] = c; 60 | bytes_read++; 61 | } 62 | if (c == '\n') { 63 | break; 64 | } 65 | } 66 | if (too_long) { 67 | return -1; 68 | } else { 69 | return bytes_read; 70 | } 71 | } 72 | 73 | int is_digit(char c) { 74 | return ('0' <= c && c <= '9'); 75 | } 76 | 77 | long long read_int_line() { 78 | char line[11]; 79 | int bytes_read; 80 | int i; 81 | long long x; 82 | 83 | bytes_read = read_line(line, 11); 84 | if (bytes_read < 2) { 85 | return -1; 86 | } 87 | x = 0; 88 | for (i = 0; i < bytes_read - 1; i++) { 89 | if (is_digit(line[i])) { 90 | x = 10 * x + line[i] - '0'; 91 | } else { 92 | return -1; 93 | } 94 | } 95 | if (x < 4294967296) { 96 | return x; 97 | } else { 98 | return -1; 99 | } 100 | } 101 | 102 | int random_int(int min, int max) { 103 | unsigned char bytes[64]; 104 | int fd; 105 | int i; 106 | int diff; 107 | int x; 108 | 109 | fd = asm_open("/dev/urandom", 0, 0); 110 | for (i = 0; i < 64; i++) { 111 | bytes[i] = read_byte(fd); 112 | } 113 | diff = max - min; 114 | x = 0; 115 | for (i = 0; i < 64; i++) { 116 | x = (256 * x + bytes[i]) % diff; 117 | } 118 | return min + x; 119 | } 120 | 121 | void _start() { 122 | int secret_number; 123 | long long guess; 124 | 125 | print_str("Guess the number!\n", 18); 126 | 127 | secret_number = random_int(1, 100 + 1); 128 | while (1) { 129 | print_str("Please input your guess.\n", 25); 130 | guess = read_int_line(); 131 | if (guess == -1) { 132 | continue; 133 | } 134 | print_str("You guessed: ", 13); print_int(guess); println(); 135 | 136 | if (guess < secret_number) { 137 | print_str("Too small!\n", 11); 138 | } else if (guess > secret_number) { 139 | print_str("Too big!\n", 9); 140 | } else { 141 | print_str("You win!\n", 9); 142 | break; 143 | } 144 | } 145 | asm_exit(0); 146 | } 147 | -------------------------------------------------------------------------------- /src_c/bytes.c: -------------------------------------------------------------------------------- 1 | void asm_exit(int exitcode); 2 | int asm_write(long long fd, char *buf, long long nbytes); 3 | int asm_read(long long fd, char *buf, long long nbytes); 4 | 5 | void print_byte(char c) { 6 | asm_write(1, &c, 1); 7 | } 8 | 9 | void println() { 10 | print_byte('\n'); 11 | } 12 | 13 | void print_str(char *str, int len) { 14 | asm_write(1, str, len); 15 | } 16 | 17 | void print_int(unsigned long long n, int base, int min_chars, char pad_char) { 18 | char chars[64]; 19 | int i, digit; 20 | 21 | for (i = 0; i < 64; i++) { 22 | chars[i] = 0; 23 | } 24 | chars[0] = '0'; 25 | for (i = 1; i < min_chars; i++) { 26 | chars[i] = pad_char; 27 | } 28 | 29 | i = 0; 30 | while (n > 0) { 31 | digit = n % base; 32 | if (digit >= 10) { 33 | chars[i] = 'a' + digit - 10; 34 | } else { 35 | chars[i] = '0' + digit; 36 | } 37 | n /= base; 38 | i++; 39 | } 40 | 41 | for (i = 63; i >= 0; i--) { 42 | if (chars[i] > 0) { 43 | print_byte(chars[i]); 44 | } 45 | } 46 | } 47 | 48 | int read_byte(int fd) { 49 | unsigned char c; 50 | if (asm_read(fd, &c, 1) == 1) { 51 | return c; 52 | } else { 53 | return -1; 54 | } 55 | } 56 | 57 | typedef struct { 58 | unsigned char byte; 59 | long long count; 60 | } CountForByte; 61 | 62 | int is_less(CountForByte a, CountForByte b) { 63 | if (a.count < b.count) { 64 | return 1; 65 | } else if (a.count > b.count) { 66 | return 0; 67 | } else { 68 | return (a.byte < b.byte); 69 | } 70 | } 71 | 72 | void insertion_sort(CountForByte *stats, int len) { 73 | int i, j; 74 | CountForByte tmp; 75 | 76 | for (i = 1; i < len; i++) { 77 | for (j = i - 1; j >= 0; j--) { 78 | if (is_less(stats[j + 1], stats[j])) { 79 | tmp = stats[j]; 80 | stats[j] = stats[j + 1]; 81 | stats[j + 1] = tmp; 82 | } 83 | } 84 | } 85 | } 86 | 87 | void print_stats(CountForByte *stats, int len) { 88 | int i; 89 | long long total_count, divisor, rel_frequency; 90 | 91 | total_count = 0; 92 | for (i = 0; i < len; i++) { 93 | total_count += stats[i].count; 94 | } 95 | if (total_count > 0) { 96 | divisor = total_count; 97 | } else { 98 | divisor = 1; 99 | } 100 | 101 | for (i = 0; i < len; i++) { 102 | rel_frequency = stats[i].count * 1000 / divisor; 103 | print_int(stats[i].byte, 16, 2, '0'); 104 | print_str(": ", 2); 105 | print_int(stats[i].count, 10, 10, ' '); 106 | print_str(" (", 2); 107 | print_int(rel_frequency / 10, 10, 2, ' '); 108 | print_str(".", 1); 109 | print_int(rel_frequency % 10, 10, 1, ' '); 110 | print_str("%)", 2); 111 | println(); 112 | } 113 | 114 | print_int(total_count, 10, 1, ' '); 115 | print_str(" bytes", 6); 116 | println(); 117 | } 118 | 119 | void _start() { 120 | int i; 121 | int c; 122 | 123 | CountForByte stats[256]; 124 | for (i = 0; i < 256; i++) { 125 | stats[i].byte = i; 126 | stats[i].count = 0; 127 | } 128 | while (1) { 129 | c = read_byte(0); 130 | if (c == -1) { 131 | break; 132 | } 133 | stats[c].count++; 134 | } 135 | 136 | insertion_sort(stats, 256); 137 | print_stats(stats, 256); 138 | asm_exit(0); 139 | } 140 | -------------------------------------------------------------------------------- /src/bin/bytes.rs: -------------------------------------------------------------------------------- 1 | #![no_std] 2 | #![no_main] 3 | 4 | use core::panic::PanicInfo; 5 | 6 | extern { 7 | fn asm_exit(exit_code: isize); 8 | fn asm_write(fd: usize, buf: *const u8, nbytes: usize) -> isize; 9 | fn asm_read(fd: usize, buf: *const u8, nbytes: usize) -> isize; 10 | fn asm_panic(); 11 | } 12 | 13 | fn exit(exit_code: isize) { 14 | unsafe { asm_exit(exit_code); } 15 | } 16 | 17 | fn print_byte(c: u8) { 18 | unsafe { asm_write(1, &c, 1); } 19 | } 20 | 21 | fn println() { 22 | print_byte('\n' as u8); 23 | } 24 | 25 | fn print_str(s: &str) { 26 | unsafe { asm_write(1, s.as_ptr(), s.len()); } 27 | } 28 | 29 | fn print_int(n: u64, base: u8, min_chars: u8, pad_char: u8) { 30 | let mut digits:[u8; 64] = [0; 64]; 31 | digits[0] = '0' as u8; 32 | for i in 1..(min_chars as usize) { 33 | digits[i] = pad_char; 34 | } 35 | let mut x = n; 36 | let mut i = 0; 37 | while x > 0 { 38 | let value = (x % (base as u64)) as u8; 39 | digits[i] = if value >= 10 { 40 | value - 10 + ('a' as u8) 41 | } else { 42 | value + ('0' as u8) 43 | }; 44 | x /= base as u64; 45 | i += 1; 46 | } 47 | for digit in digits.iter().filter(|&&d| d > 0).rev() { 48 | print_byte(*digit); 49 | } 50 | } 51 | 52 | fn read_byte(fd: usize) -> Option { 53 | let c: u8 = 0; 54 | if unsafe { asm_read(fd, &c, 1) } == 1 { 55 | Some(c) 56 | } else { 57 | None 58 | } 59 | } 60 | 61 | //~ fn insertion_sort(v: &mut [T], is_less: F) 62 | //~ where F: Fn(&T, &T) -> bool, 63 | //~ { 64 | //~ for i in 1..v.len() { 65 | //~ for j in (0..i).rev() { 66 | //~ if is_less(&v[j + 1], &v[j]) { 67 | //~ v.swap(j, j + 1); 68 | //~ } 69 | //~ } 70 | //~ } 71 | //~ } 72 | 73 | fn insertion_sort(stats: &mut [CountForByte]) { 74 | for i in 1..stats.len() { 75 | for j in (0..i).rev() { 76 | if is_less(stats[j + 1], stats[j]) { 77 | stats.swap(j, j + 1); 78 | } 79 | } 80 | } 81 | } 82 | 83 | #[derive(Copy, Clone)] 84 | struct CountForByte { 85 | byte: u8, 86 | count: u64 87 | } 88 | 89 | // primary: ascending order on count 90 | // secondary: ascending order on byte 91 | fn is_less(a: CountForByte, b: CountForByte) -> bool { 92 | if a.count < b.count { 93 | true 94 | } else if a.count > b.count { 95 | false 96 | } else { 97 | a.byte < b.byte 98 | } 99 | } 100 | 101 | #[no_mangle] 102 | fn main() { 103 | let mut stats = [CountForByte {byte: 0, count: 0}; 256]; 104 | for i in 0 .. stats.len() { 105 | stats[i].byte = i as u8; 106 | } 107 | loop { 108 | match read_byte(0) { 109 | Some(c) => stats[c as usize].count += 1, 110 | None => break 111 | } 112 | } 113 | insertion_sort(&mut stats); 114 | print_stats(&stats); 115 | exit(0); 116 | } 117 | 118 | fn print_stats(stats: &[CountForByte]) { 119 | let total_count: u64 = stats.iter().map(|bs| bs.count).sum(); 120 | let divisor = if total_count == 0 {1} else {total_count}; 121 | for byte_stats in stats { 122 | let rel_frequency = byte_stats.count * 1000 / divisor; 123 | print_int(byte_stats.byte as u64, 16, 2, '0' as u8); 124 | print_str(": "); 125 | print_int(byte_stats.count as u64, 10, 10, ' ' as u8); 126 | print_str(" ("); 127 | print_int(rel_frequency / 10, 10, 2, ' ' as u8); 128 | print_str("."); 129 | print_int(rel_frequency % 10, 10, 1, ' ' as u8); 130 | print_str("%)"); 131 | println(); 132 | } 133 | print_int(total_count, 10, 1, ' ' as u8); 134 | print_str(" bytes"); 135 | println(); 136 | } 137 | 138 | #[panic_handler] 139 | fn panic(_info: &PanicInfo) -> ! { 140 | // When using LTO optimization, this panic handler is being 141 | // be optimized away if it only contains the loop. 142 | // This leads to undefined behavior in case of a panic. 143 | // We can prevent this by calling external (assembly) 144 | // code from the panic handler. 145 | unsafe { asm_panic(); } 146 | loop {} 147 | } 148 | -------------------------------------------------------------------------------- /src/bin/guess.rs: -------------------------------------------------------------------------------- 1 | #![no_std] 2 | #![no_main] 3 | 4 | use core::panic::PanicInfo; 5 | use core::cmp::Ordering; 6 | 7 | extern { 8 | fn asm_exit(exit_code: isize); 9 | fn asm_open(path: *const u8, flags: usize, mode: usize) -> isize; 10 | fn asm_write(fd: usize, buf: *const u8, nbytes: usize) -> isize; 11 | fn asm_read(fd: usize, buf: *const u8, nbytes: usize) -> isize; 12 | fn asm_panic(); 13 | } 14 | 15 | fn exit(exit_code: isize) { 16 | unsafe { asm_exit(exit_code); } 17 | } 18 | 19 | fn print_byte(c: u8) { 20 | unsafe { asm_write(1, &c, 1); } 21 | } 22 | 23 | fn println() { 24 | print_byte('\n' as u8); 25 | } 26 | 27 | fn print_str(s: &str) { 28 | unsafe { asm_write(1, s.as_ptr(), s.len()); } 29 | } 30 | 31 | fn print_int(n: u64) { 32 | let mut digits:[u8; 20] = [0; 20]; 33 | digits[0] = '0' as u8; 34 | let mut x = n; 35 | let mut i = 0; 36 | while x > 0 { 37 | digits[i] = ((x % 10) as u8) + ('0' as u8); 38 | x /= 10; 39 | i += 1; 40 | } 41 | for digit in digits.iter().filter(|&&d| d > 0).rev() { 42 | print_byte(*digit); 43 | } 44 | } 45 | 46 | fn read_byte(fd: usize) -> Option { 47 | let c: u8 = 0; 48 | if unsafe { asm_read(fd, &c, 1) } == 1 { 49 | Some(c) 50 | } else { 51 | None 52 | } 53 | } 54 | 55 | fn read_line(line: &mut [u8]) -> Option { 56 | let mut bytes_read = 0; 57 | let mut too_long = false; 58 | loop { 59 | match read_byte(1) { 60 | Some(c) => { 61 | if bytes_read >= line.len() { 62 | too_long = true; 63 | } else { 64 | line[bytes_read] = c; 65 | bytes_read += 1; 66 | } 67 | if c == '\n' as u8 { 68 | break; 69 | } 70 | }, 71 | None => return None, 72 | } 73 | } 74 | if too_long { 75 | None 76 | } else { 77 | Some(bytes_read) 78 | } 79 | } 80 | 81 | fn is_digit(c: u8) -> bool { 82 | '0' as u8 <= c && c <= '9' as u8 83 | } 84 | 85 | fn read_int_line() -> Option { 86 | let mut line: [u8; 11] = [0; 11]; 87 | let bytes_read = match read_line(&mut line) { 88 | Some(v) if v >= 2 => v, 89 | Some(_) => return None, 90 | None => return None 91 | }; 92 | let mut x = 0u64; 93 | for i in 0..(bytes_read - 1) { 94 | if is_digit(line[i]) { 95 | x = 10 * x + ((line[i] - ('0' as u8)) as u64); 96 | } else { 97 | return None; 98 | } 99 | } 100 | if x <= u32::max_value() as u64 { 101 | Some(x as u32) 102 | } else { 103 | None 104 | } 105 | } 106 | 107 | fn random_int(min: u32, max: u32) -> u32 { 108 | let path = "/dev/urandom\0"; 109 | let fd = unsafe { asm_open(path.as_ptr(), 0, 0) as usize }; 110 | let mut random_bytes: [u8; 64] = [0; 64]; 111 | for i in 0..random_bytes.len() { 112 | random_bytes[i] = read_byte(fd).unwrap(); 113 | } 114 | let diff = max - min; 115 | let mut x = 0; 116 | for i in 0..random_bytes.len() { 117 | x = ((256 * x) + (random_bytes[i] as u64)) % (diff as u64); 118 | } 119 | min + (x as u32) 120 | } 121 | 122 | #[no_mangle] 123 | fn main() { 124 | print_str("Guess the number!\n"); 125 | 126 | let secret_number = random_int(1, 100 + 1); 127 | 128 | loop { 129 | print_str("Please input your guess.\n"); 130 | 131 | let guess: u32 = match read_int_line() { 132 | Some(num) => num, 133 | None => continue, 134 | }; 135 | 136 | print_str("You guessed: "); print_int(guess as u64); println(); 137 | 138 | match guess.cmp(&secret_number) { 139 | Ordering::Less => print_str("Too small!\n"), 140 | Ordering::Greater => print_str("Too big!\n"), 141 | Ordering::Equal => { 142 | print_str("You win!\n"); 143 | break; 144 | } 145 | } 146 | } 147 | exit(0); 148 | } 149 | 150 | #[panic_handler] 151 | fn panic(_info: &PanicInfo) -> ! { 152 | // When using LTO optimization, this panic handler is being 153 | // be optimized away if it only contains the loop. 154 | // This leads to undefined behavior in case of a panic. 155 | // We can prevent this by calling external (assembly) 156 | // code from the panic handler. 157 | unsafe { asm_panic(); } 158 | loop {} 159 | } 160 | 161 | -------------------------------------------------------------------------------- /mini_elf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | """ 3 | Create a minimal executable ELF file: 4 | This takes an input ELF file, extracts the .text segment 5 | and compiles it into a new, minimal ELF file, 6 | containing only the ELF header, one program header 7 | and the text segment. 8 | For the program not to segfault, the .text segment 9 | of the original program must be linked so it works 10 | when mapped to start at virtual address 0x400000. 11 | """ 12 | 13 | from sys import stdin, stdout, stderr 14 | pdbg = lambda msg: print(msg, file=stderr) 15 | 16 | 17 | def from_little_endian(raw_bytes): 18 | x = 0 19 | for b in raw_bytes[::-1]: 20 | x = 0x100 * x + b 21 | return x 22 | 23 | def to_little_endian(number, nbytes): 24 | bytestr = bytes() 25 | while number > 0: 26 | bytestr += bytes([number % 256]) 27 | number //= 256 28 | if len(bytestr) > nbytes: 29 | raise Exception("number too big") 30 | bytestr += bytes([0x00] * (nbytes - len(bytestr))) 31 | return bytestr 32 | 33 | 34 | # read ELF header, parse entry point and program header start, 35 | # number of program headers 36 | indata = stdin.buffer.read() 37 | elf_header = indata[:64] 38 | entry_point_raw = elf_header[0x18:0x20] 39 | pdbg("entry point: " + str(from_little_endian(entry_point_raw))) 40 | phdr_offset = from_little_endian(elf_header[0x20:0x28]) 41 | pdbg("phdr offset: " + str(phdr_offset)) 42 | num_phdrs = from_little_endian(elf_header[0x38:0x3a]) 43 | pdbg("number of phdrs: " + str(num_phdrs)) 44 | 45 | # iterate over program headers, find the first one 46 | # which has a nonzero size and the executable flag set 47 | # this contains the boundaries of the .text segment 48 | program_start, program_size = None, None 49 | for i in range(num_phdrs): 50 | # ELF64 - program headers are 56 bytes long 51 | phdr_start = phdr_offset + 56 * i 52 | phdr_end = phdr_offset + 56 * (i + 1) 53 | phdr = indata[phdr_start:phdr_end] 54 | phdr_flags = from_little_endian(phdr[4:8]) 55 | phdr_program_start = from_little_endian(phdr[8:16]) 56 | phdr_program_size = from_little_endian(phdr[40:48]) 57 | # 0x4 - read, 0x2 - write, 0x1 - exec 58 | if phdr_flags & 0x1 and phdr_program_size > 0: 59 | program_start = phdr_program_start 60 | program_size = phdr_program_size 61 | break 62 | 63 | if program_start is None: 64 | raise Exception("found no suitable program header") 65 | 66 | pdbg("program start: " + str(program_start) 67 | + ", program size: " + str(program_size)) 68 | # extract the .text segment 69 | program_data = indata[program_start:program_start + program_size] 70 | 71 | 72 | # write ELF header 73 | data = bytes() 74 | data += bytes([0x7f, 0x45, 0x4c, 0x46]) # magic 4 bytes 75 | data += bytes([0x02]) # 64 bit format 76 | data += bytes([0x01]) # little endian 77 | data += bytes([0x01]) # current ELF version 78 | data += bytes([0x00]) # OS ABI - System V 79 | data += bytes([0x00]) # ABI version 80 | data += bytes([0x00, 0x00, 0x00, 0x00, # padding 81 | 0x00, 0x00, 0x00]) 82 | data += bytes([0x02, 0x00]) # object file type - EXEC 83 | data += bytes([0x3e, 0x00]) # target architecture - AMD64 84 | data += bytes([0x01, 0x00, 0x00, 0x00]) # ELF version 85 | data += entry_point_raw # entry point of the original program (8 bytes) 86 | data += bytes([0x40, 0x00, 0x00, 0x00, # program header offset 87 | 0x00, 0x00, 0x00, 0x00]) # starts right after the 88 | # ELF header 89 | data += bytes([0x00, 0x00, 0x00, 0x00, # section header offset 90 | 0x00, 0x00, 0x00, 0x00]) # there are no section headers 91 | data += bytes([0x00, 0x00, 0x00, 0x00]) # machine specific flags 92 | data += bytes([0x40, 0x00]) # size of this header - 64 bytes 93 | data += bytes([0x38, 0x00]) # program header size - 56 bytes 94 | data += bytes([0x01, 0x00]) # number of program headers - 1 95 | data += bytes([0x40, 0x00]) # section header size - 64 bytes 96 | data += bytes([0x00, 0x00]) # number of section headers - 0 97 | data += bytes([0x00, 0x00]) # index of section name table 98 | 99 | # write program header 100 | data += bytes([0x01, 0x00, 0x00, 0x00]) # segment type - LOAD 101 | data += bytes([0x07, 0x00, 0x00, 0x00]) # flags - RWX 102 | data += bytes([0x78, 0x00, 0x00, 0x00, 103 | 0x00, 0x00, 0x00, 0x00]) # offset of loadable data 104 | # within the ELF file 105 | # data starts after section 106 | # and program header (120 bytes) 107 | data += bytes([0x78, 0x00, 0x40, 0x00, # virtual address of 108 | 0x00, 0x00, 0x00, 0x00]) # loadable data, aligned 109 | # with offset (mod 4096) 110 | data += bytes([0x78, 0x00, 0x40, 0x00, # physical address of 111 | 0x00, 0x00, 0x00, 0x00]) # loadable data 112 | # use same as virtual 113 | data += to_little_endian(program_size, 8) # size of loadable data 114 | # within the ELF file 115 | # same as for original program 116 | data += to_little_endian(program_size, 8) # size of loadable data 117 | # when in memory, same value 118 | data += bytes([0x00, 0x10, 0x00, 0x00, # alignment of segment 119 | 0x00, 0x00, 0x00, 0x00]) # in memory - 4KB 120 | 121 | # finally, paste the code of the original program 122 | data += program_data 123 | stdout.buffer.write(data) 124 | --------------------------------------------------------------------------------