├── data ├── .gitignore ├── Makefile ├── test_dyn.c └── test_exec.c ├── .gitignore ├── Cargo.toml ├── shim ├── link.x └── main.rs ├── LICENSE-0BSD.txt ├── src ├── main.rs ├── repr.rs ├── parse.rs └── emit.rs ├── Cargo.lock └── README.md /data/.gitignore: -------------------------------------------------------------------------------- 1 | *.elf 2 | *.so 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /*.elf 3 | /*.so 4 | -------------------------------------------------------------------------------- /data/Makefile: -------------------------------------------------------------------------------- 1 | test_exec.elf: test_exec.c libtest_dyn.so 2 | musl-gcc -fPIC -fPIE -o $@ $^ 3 | libtest_dyn.so: test_dyn.c 4 | musl-gcc -fPIC -shared -o $@ $^ 5 | clean: 6 | rm -f *.elf *.so 7 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "superlinker" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | elf = "0.7.4" 8 | object = { version = "0.36.5", default-features = false, features = ["elf", "write_std"] } 9 | 10 | [build-dependencies] 11 | elf = "0.7.4" 12 | -------------------------------------------------------------------------------- /data/test_dyn.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int def_in_dyn = 1234; 4 | extern int def_in_exec; 5 | 6 | void dyn_main() { 7 | puts("hello from dyn_main()!"); 8 | printf("dyn: def_in_exec=%d def_in_dyn=%d\n", def_in_exec, def_in_dyn); 9 | def_in_dyn = 2; 10 | def_in_exec = 4; 11 | printf("dyn: def_in_exec=%d def_in_dyn=%d\n", def_in_exec, def_in_dyn); 12 | puts("goodbye from dyn_main()!"); 13 | } 14 | -------------------------------------------------------------------------------- /shim/link.x: -------------------------------------------------------------------------------- 1 | ENTRY(_start) 2 | 3 | PHDRS { 4 | load PT_LOAD; 5 | dynamic PT_DYNAMIC; 6 | } 7 | 8 | SECTIONS { 9 | shim_base = .; 10 | .text : { 11 | *(.text.entry) 12 | *(.text .text.*) 13 | *(.rodata .rodata.*) 14 | . = ALIGN(8); 15 | } : load 16 | shim_data = .; 17 | .dynamic : { *(.dynamic) } : dynamic 18 | /DISCARD/ : { *(.dynsym .gnu.hash .hash .dynstr) } 19 | } 20 | -------------------------------------------------------------------------------- /data/test_exec.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | extern int def_in_dyn; 4 | int def_in_exec = 5678; 5 | 6 | void dyn_main(); 7 | 8 | int main() { 9 | puts("hello from main()!"); 10 | printf("exec: def_in_exec=%d def_in_dyn=%d\n", def_in_exec, def_in_dyn); 11 | def_in_dyn = 1; 12 | def_in_exec = 3; 13 | printf("exec: def_in_exec=%d def_in_dyn=%d\n", def_in_exec, def_in_dyn); 14 | dyn_main(); 15 | printf("exec: def_in_exec=%d def_in_dyn=%d\n", def_in_exec, def_in_dyn); 16 | puts("goodbye from main()!"); 17 | } 18 | -------------------------------------------------------------------------------- /LICENSE-0BSD.txt: -------------------------------------------------------------------------------- 1 | Permission to use, copy, modify, and/or distribute this software for 2 | any purpose with or without fee is hereby granted. 3 | 4 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 5 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 6 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 7 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 8 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN 9 | AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT 10 | OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 11 | 12 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | use elf::endian::AnyEndian; 2 | 3 | mod repr; 4 | mod parse; 5 | mod emit; 6 | 7 | fn make_executable>(path: P) -> std::io::Result<()> { 8 | use std::os::unix::fs::PermissionsExt; 9 | 10 | let mut perms = std::fs::metadata(&path)?.permissions(); 11 | perms.set_mode(0o755); 12 | std::fs::set_permissions(&path, perms)?; 13 | Ok(()) 14 | } 15 | 16 | fn main() { 17 | let output_filename = std::path::PathBuf::from(std::env::args().nth(1).expect("Usage: $0 ...")); 18 | let input_filename = std::path::PathBuf::from(std::env::args().nth(2).expect("Usage: $0 ...")); 19 | let merge_filenames = std::env::args().skip(3).map(|arg| std::path::PathBuf::from(arg)); 20 | 21 | let input_data = std::fs::read(&input_filename).expect("Could not read input file"); 22 | let input_soname = input_filename.file_name().and_then(|name| name.to_str()); 23 | let mut input_image = parse::parse_elf::(&input_data[..], input_soname).expect("Could not parse input file"); 24 | 25 | for merge_filename in merge_filenames { 26 | let merge_data = std::fs::read(&merge_filename).expect("Could not read merge file"); 27 | let merge_soname = merge_filename.file_name().and_then(|name| name.to_str()); 28 | let merge_image = parse::parse_elf::(&merge_data[..], merge_soname).expect("Could not parse merge file"); 29 | merge_image.merge_into(&mut input_image); 30 | } 31 | 32 | let output_data = emit::emit_elf(&input_image).expect("Could not emit output file"); 33 | std::fs::write(&output_filename, output_data).expect("Could not write output file"); 34 | make_executable(&output_filename).expect("Could not make output file executable"); 35 | } 36 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "cfg-if" 7 | version = "1.0.0" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 10 | 11 | [[package]] 12 | name = "crc32fast" 13 | version = "1.4.2" 14 | source = "registry+https://github.com/rust-lang/crates.io-index" 15 | checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" 16 | dependencies = [ 17 | "cfg-if", 18 | ] 19 | 20 | [[package]] 21 | name = "elf" 22 | version = "0.7.4" 23 | source = "registry+https://github.com/rust-lang/crates.io-index" 24 | checksum = "4445909572dbd556c457c849c4ca58623d84b27c8fff1e74b0b4227d8b90d17b" 25 | 26 | [[package]] 27 | name = "equivalent" 28 | version = "1.0.1" 29 | source = "registry+https://github.com/rust-lang/crates.io-index" 30 | checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" 31 | 32 | [[package]] 33 | name = "foldhash" 34 | version = "0.1.3" 35 | source = "registry+https://github.com/rust-lang/crates.io-index" 36 | checksum = "f81ec6369c545a7d40e4589b5597581fa1c441fe1cce96dd1de43159910a36a2" 37 | 38 | [[package]] 39 | name = "hashbrown" 40 | version = "0.15.0" 41 | source = "registry+https://github.com/rust-lang/crates.io-index" 42 | checksum = "1e087f84d4f86bf4b218b927129862374b72199ae7d8657835f1e89000eea4fb" 43 | dependencies = [ 44 | "foldhash", 45 | ] 46 | 47 | [[package]] 48 | name = "indexmap" 49 | version = "2.6.0" 50 | source = "registry+https://github.com/rust-lang/crates.io-index" 51 | checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da" 52 | dependencies = [ 53 | "equivalent", 54 | "hashbrown", 55 | ] 56 | 57 | [[package]] 58 | name = "memchr" 59 | version = "2.7.4" 60 | source = "registry+https://github.com/rust-lang/crates.io-index" 61 | checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" 62 | 63 | [[package]] 64 | name = "object" 65 | version = "0.36.5" 66 | source = "registry+https://github.com/rust-lang/crates.io-index" 67 | checksum = "aedf0a2d09c573ed1d8d85b30c119153926a2b36dce0ab28322c09a117a4683e" 68 | dependencies = [ 69 | "crc32fast", 70 | "hashbrown", 71 | "indexmap", 72 | "memchr", 73 | ] 74 | 75 | [[package]] 76 | name = "superlinker" 77 | version = "0.1.0" 78 | dependencies = [ 79 | "elf", 80 | "object", 81 | ] 82 | -------------------------------------------------------------------------------- /shim/main.rs: -------------------------------------------------------------------------------- 1 | #![feature(auto_traits)] 2 | #![feature(decl_macro)] 3 | #![feature(intrinsics)] 4 | #![feature(lang_items)] 5 | #![feature(no_core)] 6 | #![feature(rustc_attrs)] 7 | #![allow(internal_features)] 8 | #![no_core] 9 | #![no_std] 10 | #![no_main] 11 | #![no_builtins] 12 | 13 | #[repr(C)] 14 | struct ShimData { 15 | user_entry_rel: usize, 16 | interp_entry_rel: usize, 17 | interp_base_rel: usize, 18 | interp_phnum: usize, 19 | } 20 | 21 | #[repr(C)] 22 | struct Auxv { 23 | tag: usize, 24 | value: usize, 25 | } 26 | 27 | const AT_BASE: usize = 0x7; 28 | const AT_ENTRY: usize = 0x9; 29 | const AT_PHNUM: usize = 0x5; 30 | 31 | unsafe fn find_auxv(mut stack: *mut usize) -> *mut Auxv { 32 | let argc = *stack; 33 | 34 | // Skip argc, arguments, terminating NULL 35 | stack = offset(stack, wrapping_add(argc, 2)); 36 | 37 | // Skip environment 38 | while *stack != 0 { 39 | stack = offset(stack, 1); 40 | } 41 | 42 | // Skip terminating NULL 43 | stack = offset(stack, 1); 44 | 45 | stack as _ 46 | } 47 | 48 | #[no_mangle] // Make disassembly slightly easier to read 49 | #[deny(dead_code, reason = "If you see this it means the architecture is unsupported")] 50 | unsafe extern "C" fn shim_main(stack: *mut usize, data: &ShimData, base: usize) -> usize { 51 | let mut auxv = find_auxv(stack); 52 | 53 | loop { 54 | let a: &mut Auxv = &mut *auxv; 55 | auxv = offset(auxv, 1); 56 | 57 | match a.tag { 58 | 0 => break, 59 | AT_BASE => a.value = wrapping_add(data.interp_base_rel, base), 60 | AT_ENTRY => a.value = wrapping_add(data.user_entry_rel, base), 61 | AT_PHNUM => a.value = data.interp_phnum, 62 | _ => {} 63 | } 64 | } 65 | 66 | wrapping_add(data.interp_entry_rel, base) 67 | } 68 | 69 | #[cfg(target_arch = "x86_64")] 70 | global_asm!( 71 | r#" 72 | .pushsection .text.entry 73 | 74 | .global _start 75 | _start: 76 | mov rbp, rsp // Save initial stack pointer 77 | 78 | // Args for shim_main 79 | mov rdi, rsp 80 | lea rsi, [rip + shim_data] 81 | lea rdx, [rip + shim_base] 82 | 83 | and rsp, -16 // Align stack for function call 84 | call {shim_main} 85 | 86 | mov rsp, rbp // Restore stack 87 | jmp rax // Jump to interp_entry 88 | 89 | .popsection 90 | "#, 91 | shim_main = sym shim_main 92 | ); 93 | 94 | // >>> Here be dragons <<< 95 | 96 | mod intrinsic { 97 | use super::*; 98 | 99 | extern "rust-intrinsic" { 100 | #[rustc_safe_intrinsic] 101 | #[rustc_nounwind] 102 | pub(super) fn wrapping_add(a: T, b: T) -> T; 103 | 104 | #[rustc_nounwind] 105 | pub(super) fn offset(dst: Ptr, offset: Delta) -> Ptr; 106 | } 107 | } 108 | 109 | unsafe fn offset(dst: *mut T, offset: usize) -> *mut T { 110 | intrinsic::offset(dst, offset) 111 | } 112 | 113 | fn wrapping_add(a: usize, b: usize) -> usize { 114 | intrinsic::wrapping_add(a, b) 115 | } 116 | 117 | #[rustc_builtin_macro] 118 | pub macro global_asm() {} 119 | 120 | #[lang = "sized"] 121 | trait Sized {} 122 | 123 | #[lang = "receiver"] 124 | trait Receiver {} 125 | impl Receiver for &T {} 126 | 127 | #[lang = "freeze"] 128 | auto trait Freeze {} 129 | 130 | #[lang = "copy"] 131 | trait Copy {} 132 | impl Copy for *mut T {} 133 | impl Copy for usize {} 134 | impl Copy for bool {} 135 | 136 | #[allow(dead_code)] // Spurious 137 | #[lang = "eq"] 138 | trait PartialEq { 139 | fn eq(&self, other: &Rhs) -> bool; 140 | fn ne(&self, other: &Rhs) -> bool; 141 | } 142 | 143 | impl PartialEq for usize { 144 | fn eq(&self, other: &usize) -> bool { 145 | *self == *other 146 | } 147 | fn ne(&self, other: &usize) -> bool { 148 | *self != *other 149 | } 150 | } 151 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # superlinker 2 | 3 | Superlinker is a tool that can combine executables and shared libraries into even larger products, just like object files are combined into executables and shared libraries. It works well enough for [building a self-contained Python distribution](#python) from off-the-shelf packages. 4 | 5 | ## Why? 6 | 7 | Wouldn't it be funny if your entire OS image consisted of only one shared object that was *really*, ***really*** large? 8 | 9 | ## How? 10 | 11 | Superlinker is structured essentially like a compiler whose inputs and outputs are interpreted programs (ELF `ET_DYN` PIE executables or shared libraries). Its frontend lifts an ELF `ET_DYN` object into an abstract and simple intermediate representation, and its backend lowers this representation back to an ELF `ET_DYN` object. While memory mappings (`PT_LOAD` segments) are retained essentially intact, none of the ELF headers are copied from the inputs to the outputs. Of the many possible transformations, the currently implemented one rebases and merges several ELF objects. This approach is quite robust. 12 | 13 | Additionally, Superlinker is able to merge the dynamic linker itself into an executable, which enables transforming system-dependent executables into executables that run anywhere. (The resulting executable is still an `ET_DYN` object to retain the benefits of ASLR, but it has no load-time dependencies.) This is implemented with an executable shim that emulates the kernel ABI for `PT_INTERP` loaded objects, and so is not tied to a specific libc, but currently only tested with [musl libc][]. 14 | 15 | The intermediate representation features architecture-, target-, and (somewhat) format-independent model of loadable segments, relocations, symbols, and image interpreters, biased towards ELF without directly requiring it. The frontend and backend are currently ported to `amd64` only. Although not strictly required for functioning, section headers are emitted as a courtesy for `libbfd` based tools (e.g. `objdump`). 16 | 17 | [musl libc]: https://musl-libc.org 18 | 19 | ## Use? 20 | 21 | First, install [Rust][] and run `cargo build`. 22 | 23 | ``` 24 | Usage: ./target/debug/superlinker [ [ ...]] 25 | ``` 26 | 27 | [rust]: https://rust-lang.org/ 28 | 29 | ## Show? 30 | 31 | ``` 32 | $ make -C data # prepare test files 33 | $ ./data/test_exec.elf 34 | Error loading shared library libtest_dyn.so: No such file or directory (needed by ./data/test_exec.elf) 35 | Error relocating ./data/test_exec.elf: dyn_main: symbol not found 36 | $ readelf -d ./data/test_exec.elf 37 | 38 | Dynamic section at offset 0x2e00 contains 25 entries: 39 | Tag Type Name/Value 40 | 0x0000000000000001 (NEEDED) Shared library: [libtest_dyn.so] 41 | 0x0000000000000001 (NEEDED) Shared library: [libc.so] 42 | 0x000000000000000c (INIT) 0x1000 43 | ... 44 | $ ./target/debug/superlinker merged.elf data/test_exec.elf data/libtest_dyn.so /lib/x86_64-linux-musl/libc.so 45 | merge_into: merging source image "libtest_dyn.so" into target image "test_exec.elf" 46 | merge_into: rebasing source image by +0x5000 47 | merge_into: ignoring source special symbol _init 48 | merge_into: using source global symbol dyn_main to resolve target import 49 | merge_into: ignoring source special symbol _fini 50 | merge_into: removing extinguished dependency "libtest_dyn.so" 51 | merge_into: merging source image "libc.so" into target image "test_exec.elf" 52 | merge_into: rebasing source image by +0xa000 53 | merge_into: using source global symbol puts to resolve target import 54 | merge_into: forcing target special symbol _init to come from libc 55 | merge_into: forcing target special symbol _fini to come from libc 56 | merge_into: using source global symbol __cxa_finalize to resolve target missing weak symbol 57 | merge_into: using source global symbol __libc_start_main to resolve target import 58 | merge_into: removing extinguished dependency "libc.so" 59 | merge_into: embedding the source image into target object as its interpreter 60 | $ ./merged.elf 61 | hello from main()! 62 | hello from dyn_main()! 63 | $ readelf -d ./merged.elf 64 | 65 | Dynamic section at offset 0x2000 contains 9 entries: 66 | Tag Type Name/Value 67 | 0x0000000000000005 (STRTAB) 0x20a0 68 | 0x000000000000000a (STRSZ) 15767 (bytes) 69 | 0x000000000000000b (SYMENT) 24 (bytes) 70 | 0x0000000000000006 (SYMTAB) 0x5e38 71 | 0x0000000000000004 (HASH) 0xfe70 72 | 0x0000000000000007 (RELA) 0x11940 73 | 0x0000000000000008 (RELASZ) 2640 (bytes) 74 | 0x0000000000000009 (RELAENT) 24 (bytes) 75 | 0x0000000000000000 (NULL) 0x0 76 | ``` 77 | 78 | ## Flaws? 79 | 80 | Although the core approach is sound, this implementation has flaws, most of which are fixable: 81 | 82 | - All of the code continues to use the dynamic linking ABI, i.e. procedure calls go through PLT and global accesses go through GOT. This is the only flaw inherent to the approach. 83 | - Executable and shared object formats are notoriously complex and this implementation is bound to have bugs. 84 | - Moreover, some of the more obscure features are not implemented rigorously or at all (e.g. symbol scoping, visibility, and versioning). 85 | - All GOT and PLT optimizations are disabled. (This means that `DT_JMPREL`, `DT_PLTREL`, and `DT_PLTRELSZ` entries are stripped.) 86 | - PLT optimizations at least could be added back with additional work. 87 | - Only the `global-dynamic` TLS model is supported. 88 | - Only "Rela" relocations are implemented and tested, though "Rel" relocations would be trivial to add. 89 | - `DT_GNU_HASH` is not supported, and the number of `DT_HASH` buckets is randomly fixed at 4. 90 | - Although ASLR is supported (Superlinker only produces position independent executables), `PT_GNU_STACK` and `PT_GNU_RELRO` are not supported and stripped. 91 | - Exception handling currently isn't supported, and `PT_GNU_EH_FRAME` is stripped. 92 | - Some of the internal book-keeping probably has O(n²) complexity. 93 | 94 | The implementation is less than a thousand lines long, written with portability in mind, and extensively commented, so it should not be too difficult to address most of these flaws. It should even run on Windows! 95 | 96 | ## Python? 97 | 98 | Although tedious, it is possible to use Superlinker to build a fully self-contained Python distribution without source modifications or, in fact, touching source at all. First, link the combination of the Python executable, its dependencies, and essential modules. Using Alpine Linux 3.20 as the base distribution, run: 99 | 100 | ``` 101 | # apk add python3 102 | $ ./superlinker py.elf /usr/bin/python3.12 /usr/lib/libpython3.12.so.1.0 \ 103 | /usr/lib/python3.12/lib-dynload/math.cpython-312-x86_64-linux-musl.so \ 104 | /usr/lib/python3.12/lib-dynload/binascii.cpython-312-x86_64-linux-musl.so \ 105 | /usr/lib/python3.12/lib-dynload/zlib.cpython-312-x86_64-linux-musl.so \ 106 | /usr/lib/python3.12/lib-dynload/array.cpython-312-x86_64-linux-musl.so \ 107 | /usr/lib/python3.12/lib-dynload/_struct.cpython-312-x86_64-linux-musl.so \ 108 | /usr/lib/python3.12/lib-dynload/_ctypes.cpython-312-x86_64-linux-musl.so \ 109 | /usr/lib/python3.12/lib-dynload/readline.cpython-312-x86_64-linux-musl.so \ 110 | /usr/lib/libreadline.so.8 /usr/lib/libncursesw.so.6 /usr/lib/libffi.so.8 \ 111 | /lib/libz.so.1 /lib/ld-musl-x86_64.so.1 112 | ``` 113 | 114 | Python has a little known function where it can [treat a zip archive as if it was a directory][zipimport], which will come in handy when packaging the (portable subset of) standard library modules: 115 | 116 | ``` 117 | # apk add fastjar 118 | $ fastjar 0cvf py.zip -C /usr/lib/python3.12/ . 119 | ``` 120 | 121 | Note the `0` (that's a zero) option for `fastjar`; Python loads compressed zip archives using its own `zipimport` standard library module, which means that it cannot be compressed when it is a part of a zip archive itself. 122 | 123 | Even though Python has all of these modules linked into it, it's currently unaware of that, and an attempt to import any of them will fail. This can be solved with a little bit of Python code: 124 | 125 | ``` 126 | $ cat >sitecustomize.py <py.run 150 | $ chmod +x py.run 151 | ``` 152 | 153 | The final touch this distribution needs is the `PYTHONPATH` environment variable: 154 | 155 | ``` 156 | $ PYTHONPATH=$(pwd)/py.run ./py.run 157 | Could not find platform independent libraries 158 | Could not find platform dependent libraries 159 | Python 3.12.7 (main, Oct 7 2024, 11:30:19) [GCC 13.2.1 20240309] on linux 160 | Type "help", "copyright", "credits" or "license" for more information. 161 | >>> import sys, zipfile 162 | >>> print([zi.filename for zi in zipfile.ZipFile(sys.executable).filelist][:10]) 163 | ['META-INF/', 'META-INF/MANIFEST.MF', './', '_collections_abc.py', 'socket.py', '__pycache__/', '__pycache__/heapq.cpython-312.pyc', '__pycache__/codecs.cpython-312.pyc', '__pycache__/shutil.cpython-312.pyc', '__pycache__/ssl.cpython-312.pyc'] 164 | >>> import zlib 165 | >>> zlib.crc32(b"spam") 166 | 1138425661 167 | ``` 168 | 169 | [zipimport]: https://docs.python.org/3/library/zipimport.html 170 | 171 | ## Past? 172 | 173 | If you like Superlinker, you might also enjoy [unfork][]. 174 | 175 | [unfork]: https://github.com/whitequark/unfork 176 | 177 | ## License? 178 | 179 | [0-clause BSD](LICENSE-0BSD.txt). 180 | -------------------------------------------------------------------------------- /src/repr.rs: -------------------------------------------------------------------------------- 1 | use std::collections::{HashMap, HashSet}; 2 | 3 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 4 | pub enum LoadMode { 5 | ReadOnly, 6 | ReadWrite, 7 | ReadExecute, 8 | } 9 | 10 | #[derive(Debug, Clone)] 11 | pub struct LoadSegment { 12 | pub addr: u64, // virtual address, relative to object base 13 | pub size: u64, // size in virtual memory 14 | pub data: Vec, // data to load at [addr..addr+size); can be smaller than size in virtual memory 15 | pub mode: LoadMode, 16 | } 17 | 18 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 19 | pub enum SymbolKind { 20 | Code, 21 | Data, 22 | Unknown, 23 | } 24 | 25 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 26 | pub enum SymbolScope { 27 | Local, 28 | Global, 29 | Import, 30 | Weak, 31 | } 32 | 33 | #[derive(Debug, Clone, PartialEq, Eq)] 34 | pub struct Symbol { 35 | pub name: String, 36 | pub kind: SymbolKind, 37 | pub scope: SymbolScope, 38 | pub value: u64, 39 | pub size: u64, 40 | pub abs: bool, 41 | } 42 | 43 | #[derive(Debug, Clone)] 44 | pub enum RelocationTarget { 45 | // R_X86_64_64 46 | // R_X86_64_GLOB_DAT 47 | // R_X86_64_JUMP_SLOT 48 | // = S + A 49 | Symbol { symbol: String, addend: i64 }, 50 | // R_X86_64_RELATIVE 51 | // = B + A 52 | Base { addend: i64 }, 53 | // R_X86_64_COPY 54 | Copy { symbol: String }, 55 | // R_X86_64_NONE 56 | None, 57 | // ... to be continued? 58 | 59 | ElfSpecific(u32), // any that doesn't need and/or can't be portably processed 60 | } 61 | 62 | #[derive(Debug, Clone)] 63 | pub struct Relocation { 64 | pub offset: u64, 65 | pub target: RelocationTarget, 66 | } 67 | 68 | #[derive(Debug, Clone)] 69 | pub enum Interpreter { 70 | Absent, 71 | External(String), 72 | Internal { base: u64, entry: u64, segments: usize }, 73 | } 74 | 75 | #[derive(Debug, Clone)] 76 | pub struct Image { 77 | pub machine: u16, // ELF machine 78 | pub alignment: u64, // integer that is a power of 2 79 | pub segments: Vec, // sorted in ascending order 80 | pub tls_image: Option>, 81 | pub symbols: Vec, 82 | pub relocations: Vec, 83 | pub initializers: Vec, 84 | pub finalizers: Vec, 85 | pub dependencies: Vec, // requests images by name 86 | pub image_names: Vec, // requested via dependencies 87 | pub interpreter: Interpreter, 88 | pub entry: u64, 89 | } 90 | 91 | impl Image { 92 | pub fn display_image_name(&self) -> &str { 93 | self.image_names.first().map(|name| &name[..]).unwrap_or("") 94 | } 95 | 96 | pub fn segment_bounds(&self) -> (u64, u64) { 97 | match (self.segments.first(), self.segments.last()) { 98 | (Some(first), Some(last)) => 99 | (first.addr, ((last.addr + last.size - 1) | (self.alignment - 1)) + 1), 100 | _ => (0, 0) 101 | } 102 | } 103 | 104 | pub fn rebase(&mut self, offset: u64) { 105 | assert!(offset % self.alignment == 0, "Rebase offset must be aligned"); 106 | for segment in self.segments.iter_mut() { 107 | segment.addr += offset; 108 | } 109 | for symbol in self.symbols.iter_mut() { 110 | // The intermediate representation currently doesn't include absolute symbols. 111 | if symbol.value != 0 { 112 | symbol.value += offset; 113 | } 114 | } 115 | for relocation in self.relocations.iter_mut() { 116 | relocation.offset += offset; 117 | match relocation.target { 118 | RelocationTarget::Base { ref mut addend } => 119 | *addend += offset as i64, 120 | RelocationTarget::Symbol { .. } | 121 | RelocationTarget::Copy { .. } | 122 | RelocationTarget::None | 123 | RelocationTarget::ElfSpecific(_) => () 124 | } 125 | } 126 | for initializer in self.initializers.iter_mut() { 127 | *initializer += offset; 128 | } 129 | for finalizer in self.finalizers.iter_mut() { 130 | *finalizer += offset; 131 | } 132 | match self.interpreter { 133 | Interpreter::Absent | Interpreter::External(_) => (), 134 | Interpreter::Internal { ref mut base, ref mut entry, .. } => { 135 | *base += offset; 136 | *entry += offset; 137 | }, 138 | } 139 | self.entry += offset; 140 | } 141 | 142 | pub fn merge_into(mut self, target: &mut Image) { 143 | // Check that the two images can be merged. 144 | assert!(self.machine == target.machine); 145 | assert!(self.alignment == target.alignment); 146 | eprintln!("merge_into: merging source image {} into target image {}", 147 | self.display_image_name(), target.display_image_name()); 148 | // Relocate this image to be fully above the target. 149 | let (_target_begin, target_end) = target.segment_bounds(); 150 | eprintln!("merge_into: rebasing source image by +{:#x}", target_end); 151 | self.rebase(target_end); 152 | // Merge this image's segments. 153 | target.segments.append(&mut self.segments); 154 | if self.tls_image.is_some() { 155 | if target.tls_image.is_none() { 156 | target.tls_image = self.tls_image.take(); 157 | } else { 158 | panic!("Merging TLS images is not implemented"); 159 | } 160 | } 161 | match (&self.interpreter, &mut target.interpreter) { 162 | (Interpreter::Absent, Interpreter::Absent) | 163 | (Interpreter::Absent, Interpreter::External(..)) => { 164 | // Merging executable + library or library + library 165 | self.merge_dynamic(target); 166 | } 167 | (source_interpreter @ Interpreter::Internal { .. }, 168 | target_interpreter @ Interpreter::External(_)) => { 169 | // Merging interpreter + executable 170 | eprintln!("merge_into: embedding the source image into target object as its interpreter"); 171 | *target_interpreter = source_interpreter.clone(); 172 | } 173 | (source_interpreter, target_interpreter) => 174 | panic!("Cannot merge source object with interpreter {:?} into target object with interpreter {:?}", 175 | source_interpreter, target_interpreter) 176 | } 177 | } 178 | 179 | fn merge_dynamic(mut self, target: &mut Image) { 180 | // Index the target image's symbol table. 181 | let mut target_symbol_map = HashMap::new(); 182 | for (symbol_index, symbol) in target.symbols.iter().enumerate() { 183 | if target_symbol_map.insert(symbol.name.clone(), symbol_index).is_some() { 184 | panic!("Duplicate symbol {:?} in target image", symbol.name.as_str()); 185 | } 186 | } 187 | // Merge symbols. 188 | let mut apply_copy_relocs_later = Vec::new(); 189 | for source_symbol in self.symbols.into_iter() { 190 | let symbol_name = source_symbol.name.to_owned(); 191 | let target_symbol = target_symbol_map.get(&symbol_name).map(|index| &mut target.symbols[*index]); 192 | match (source_symbol, target_symbol) { 193 | (source_symbol, None) => { 194 | // eprintln!("merge_into: adding new symbol {:?}", &symbol_name); 195 | target_symbol_map.insert(symbol_name.clone(), target.symbols.len()); 196 | target.symbols.push(source_symbol); 197 | } 198 | (_source_symbol @ Symbol { scope: SymbolScope::Weak, value: 0, .. }, 199 | Some(_target_symbol @ &mut Symbol { scope: SymbolScope::Weak, value: 0, .. })) => (), 200 | (_source_symbol @ Symbol { scope: SymbolScope::Weak, value: 0, .. }, 201 | Some(_target_symbol @ &mut Symbol { scope: SymbolScope::Weak, .. })) => { 202 | eprintln!("merge_into: replacing source weak symbol {:?} with target weak symbol", &symbol_name); 203 | } 204 | (source_symbol @ Symbol { scope: SymbolScope::Weak, .. }, 205 | Some(target_symbol @ &mut Symbol { scope: SymbolScope::Weak, value: 0, .. })) => { 206 | eprintln!("merge_into: using source weak symbol {:?} to resolve target missing weak symbol", &symbol_name); 207 | target_symbol.scope = source_symbol.scope; 208 | target_symbol.kind = source_symbol.kind; 209 | target_symbol.value = source_symbol.value; 210 | } 211 | (source_symbol @ Symbol { scope: SymbolScope::Weak, .. }, 212 | Some(target_symbol @ &mut Symbol { scope: SymbolScope::Weak, .. })) => { 213 | eprintln!("merge_into: using source weak symbol {:?} to resolve target missing weak symbol", &symbol_name); 214 | target_symbol.scope = source_symbol.scope; 215 | target_symbol.kind = source_symbol.kind; 216 | target_symbol.value = source_symbol.value; 217 | } 218 | (source_symbol @ Symbol { scope: SymbolScope::Global | SymbolScope::Weak, .. }, 219 | Some(target_symbol @ &mut Symbol { scope: SymbolScope::Import, .. })) => { 220 | eprintln!("merge_into: using source symbol {:?} to resolve target import", &symbol_name); 221 | target_symbol.scope = source_symbol.scope; 222 | target_symbol.kind = source_symbol.kind; 223 | target_symbol.value = source_symbol.value; 224 | }, 225 | (_source_symbol @ Symbol { scope: SymbolScope::Import, .. }, 226 | Some(_target_symbol @ &mut Symbol { scope: SymbolScope::Global | SymbolScope::Weak, .. })) => { 227 | eprintln!("merge_into: using target symbol {:?} to resolve source import", &symbol_name); 228 | }, 229 | (source_symbol @ Symbol { scope: SymbolScope::Global, .. }, 230 | Some(target_symbol @ &mut Symbol { scope: SymbolScope::Weak, value: 0, .. })) => { 231 | eprintln!("merge_into: using source global symbol {:?} to resolve target missing weak symbol", &symbol_name); 232 | target_symbol.scope = source_symbol.scope; 233 | target_symbol.kind = source_symbol.kind; 234 | target_symbol.value = source_symbol.value; 235 | }, 236 | (Symbol { scope: SymbolScope::Weak, value: 0, .. }, 237 | Some(&mut Symbol { scope: SymbolScope::Global, .. })) => { 238 | eprintln!("merge_into: using target global symbol {:?} to resolve source missing weak symbol", &symbol_name); 239 | }, 240 | (source_symbol, Some(target_symbol @ &mut Symbol { .. })) 241 | if symbol_name == "_init" || symbol_name == "_fini" => { 242 | if self.image_names.iter().find(|name| **name == "libc.so").is_some() { 243 | eprintln!("merge_into: forcing target special symbol {:?} to come from libc", &symbol_name); 244 | target_symbol.scope = SymbolScope::Global; 245 | target_symbol.kind = source_symbol.kind; 246 | target_symbol.value = source_symbol.value; 247 | } else { 248 | eprintln!("merge_into: ignoring source special symbol {:?}", &symbol_name) 249 | } 250 | } 251 | (source_symbol @ Symbol { scope: SymbolScope::Global, kind: SymbolKind::Data, .. }, 252 | Some(target_symbol @ &mut Symbol { scope: SymbolScope::Global, kind: SymbolKind::Data, .. })) 253 | if source_symbol.size == target_symbol.size => { 254 | eprintln!("merge_into: replacing source global data symbol {:?} with the same target global data symbol", &symbol_name); 255 | for (reloc_index, reloc) in target.relocations.iter().enumerate() { 256 | if let Relocation { target: RelocationTarget::Copy { symbol: copy_symbol_name }, .. } = &reloc { 257 | if symbol_name == *copy_symbol_name { 258 | apply_copy_relocs_later.push((reloc_index, source_symbol.clone())); 259 | } 260 | } 261 | } 262 | }, 263 | (source_symbol, Some(target_symbol)) if &source_symbol == target_symbol => (), 264 | (source_symbol, Some(target_symbol)) => { 265 | panic!("Cannot merge source symbol {:?} into target symbol {:?}", 266 | source_symbol, target_symbol) 267 | } 268 | } 269 | } 270 | // Apply copy relocations, if any were triggered. 271 | for (reloc_index, source_symbol) in apply_copy_relocs_later.into_iter() { 272 | let target_reloc = &mut target.relocations[reloc_index]; 273 | eprintln!("merge_into: applying copy relocation for symbol {:?}: copying {:#x}{:+#x} => {:#x}", 274 | &source_symbol.name, source_symbol.value, source_symbol.size, target_reloc.offset); 275 | let source_data = target.segments.iter().find_map(|segment| { 276 | if source_symbol.value >= segment.addr && 277 | source_symbol.value + source_symbol.size <= segment.addr + segment.size { 278 | let range_begin = (source_symbol.value - segment.addr) as usize; 279 | let range_end = (source_symbol.value - segment.addr + source_symbol.size) as usize; 280 | if let Some(data) = segment.data.get(range_begin..range_end) { 281 | Some(data.to_owned()) 282 | } else { 283 | Some(vec![0; source_symbol.size as usize]) 284 | } 285 | } else { 286 | None 287 | } 288 | }).expect("Failed to find source segment for copy relocation"); 289 | for segment in target.segments.iter_mut() { 290 | if target_reloc.offset >= segment.addr && 291 | target_reloc.offset + source_symbol.size <= segment.addr + segment.size { 292 | let range_begin = (target_reloc.offset - segment.addr) as usize; 293 | let range_end = (target_reloc.offset - segment.addr + source_symbol.size) as usize; 294 | if segment.data.len() < range_end { 295 | segment.data.resize(range_end, 0); 296 | } 297 | segment.data.get_mut(range_begin..range_end) 298 | .expect("Failed to slice target data for copy relocation") 299 | .copy_from_slice(&source_data); 300 | } 301 | } 302 | target_reloc.target = RelocationTarget::None; 303 | } 304 | // Merge relocations. Relocations can never be removed, even if they refer to the self. 305 | target.relocations.append(&mut self.relocations); 306 | // Merge initializers and finalizers. 307 | target.initializers.append(&mut self.initializers); 308 | // Merge dependencies. 309 | let mut target_dependency_set = HashSet::new(); 310 | for target_dependency in target.dependencies.iter() { 311 | target_dependency_set.insert(target_dependency.clone()); 312 | } 313 | for source_dependency in self.dependencies.into_iter() { 314 | if target.image_names.iter().find(|&image_name| *image_name == source_dependency).is_some() { continue } 315 | if target_dependency_set.insert(source_dependency.clone()) { 316 | eprintln!("merge_into: adding new dependency {:?}", source_dependency); 317 | } 318 | } 319 | for source_image_name in self.image_names.iter() { 320 | if target_dependency_set.remove(source_image_name) { 321 | eprintln!("merge_into: removing extinguished dependency {:?}", &source_image_name); 322 | } 323 | } 324 | target.dependencies = target_dependency_set.into_iter().collect::>(); 325 | // Merge image names. 326 | target.image_names.append(&mut self.image_names); 327 | } 328 | } 329 | -------------------------------------------------------------------------------- /src/parse.rs: -------------------------------------------------------------------------------- 1 | use elf::abi::*; 2 | use elf::endian::EndianParse; 3 | use elf::relocation::RelaIterator; 4 | use elf::ElfBytes; 5 | 6 | use crate::repr::*; 7 | 8 | pub const DT_RELR: i64 = 36; 9 | pub const DT_RELRSZ: i64 = 35; 10 | 11 | fn elf_vaddr_to_offset(elf_data: &[u8], addr: u64) -> Result { 12 | let elf_file = ElfBytes::::minimal_parse(elf_data).expect("Cannot parse"); 13 | let elf_segments = elf_file.segments().expect("No segments"); 14 | elf_segments 15 | .iter() 16 | .find(|segment| addr >= segment.p_vaddr && addr <= segment.p_vaddr + segment.p_memsz) 17 | .map(|segment| (addr + segment.p_offset - segment.p_vaddr) as usize) 18 | .ok_or(()) 19 | } 20 | 21 | fn elf_vaddr_size_to_offset_range(elf_data: &[u8], addr: u64, size: u64) 22 | -> Result, ()> { 23 | match (elf_vaddr_to_offset::(elf_data, addr), elf_vaddr_to_offset::(elf_data, addr + size)) { 24 | (Ok(offset_start), Ok(offset_end)) => Ok(offset_start..offset_end), 25 | _ => Err(()) 26 | } 27 | } 28 | 29 | pub fn parse_elf(elf_data: &[u8], soname: Option<&str>) -> Result { 30 | let elf_file = ElfBytes::::minimal_parse(elf_data)?; 31 | let machine = elf_file.ehdr.e_machine; 32 | let elf_common = elf_file.find_common_data()?; 33 | let elf_segments = elf_file.segments().expect("No segments"); 34 | let alignment = elf_segments 35 | .iter() 36 | .filter_map(|elf_segment| { 37 | if elf_segment.p_type == PT_LOAD { Some(elf_segment.p_align) } else { None } 38 | }) 39 | .max() 40 | .unwrap_or(1); 41 | let segments = elf_segments 42 | .iter() 43 | .filter_map(|elf_segment| { 44 | if elf_segment.p_type == PT_LOAD { 45 | let addr = elf_segment.p_vaddr; 46 | let size = elf_segment.p_memsz; 47 | let data = elf_file.segment_data(&elf_segment) 48 | .expect("No data for PT_LOAD") 49 | .to_owned(); 50 | let mode = if elf_segment.p_flags == PF_R { 51 | LoadMode::ReadOnly 52 | } else if elf_segment.p_flags == PF_R | PF_W { 53 | LoadMode::ReadWrite 54 | } else if elf_segment.p_flags == PF_R | PF_X { 55 | LoadMode::ReadExecute 56 | } else { 57 | panic!("Unknown segment flags: {}", 58 | elf::to_str::p_flags_to_string(elf_segment.p_flags)) 59 | }; 60 | Some(LoadSegment { addr, size, data, mode }) 61 | } else { 62 | None 63 | } 64 | 65 | }) 66 | .collect::>(); 67 | let tls_image = elf_segments 68 | .iter() 69 | .find_map(|elf_segment| { 70 | if elf_segment.p_type == PT_TLS { 71 | let mut tls_image = vec![0; elf_segment.p_memsz as usize]; 72 | let data = elf_file.segment_data(&elf_segment) 73 | .expect("No data for PT_TLS") 74 | .to_owned(); 75 | tls_image[..data.len()].copy_from_slice(&data[..]); 76 | Some(tls_image) 77 | } else { 78 | None 79 | } 80 | }); 81 | let elf_dynsyms = elf_common.dynsyms.as_ref().expect("No dynamic symbol table"); 82 | let elf_dynsyms_strs = elf_common.dynsyms_strs.as_ref().expect("No dynamic symbol string table"); 83 | let symbols = elf_dynsyms 84 | .clone() 85 | .into_iter() 86 | .skip(1) 87 | .filter_map(|elf_symbol| { 88 | // The type of the symbol can be `STT_NOTYPE` if it is a reference to a symbol that the static linker could 89 | // not discover at link time. This is independent of how the symbol was declared in C, i.e. `extern int a;`, 90 | // `extern int a(void);`, and `extern double a;` all become `STT_NOTYPE` when the symbol isn't resolved. 91 | // Weak symbols generally end up as `STT_NOTYPE`, unless defined in the same object. 92 | let elf_symtype = elf_symbol.st_symtype(); 93 | if elf_symtype == STT_FUNC || elf_symtype == STT_OBJECT || elf_symtype == STT_NOTYPE { 94 | let name = elf_dynsyms_strs 95 | .get(elf_symbol.st_name as usize) 96 | .expect("Invalid symbol name") 97 | .to_owned(); 98 | let kind = if elf_symtype == STT_FUNC { 99 | SymbolKind::Code 100 | } else if elf_symtype == STT_OBJECT { 101 | SymbolKind::Data 102 | } else { 103 | SymbolKind::Unknown 104 | }; 105 | let value = elf_symbol.st_value; 106 | let scope = if elf_symbol.st_bind() == STB_GLOBAL { 107 | if elf_symbol.is_undefined() { 108 | SymbolScope::Import 109 | } else { 110 | SymbolScope::Global 111 | } 112 | } else if elf_symbol.st_bind() == STB_WEAK { 113 | SymbolScope::Weak 114 | } else if elf_symbol.st_bind() == STB_LOCAL { 115 | SymbolScope::Local 116 | } else { 117 | panic!("Unhandled symbol visibility: {}", 118 | elf::to_str::st_bind_to_str(elf_symbol.st_bind()).unwrap_or("")) 119 | }; 120 | if elf_symbol.st_shndx == SHN_COMMON { 121 | panic!("Unhandled special shndx {:#x}", elf_symbol.st_shndx); 122 | } 123 | let size = elf_symbol.st_size; 124 | Some(Symbol { name, kind, scope, value, size, abs: (elf_symbol.st_shndx == SHN_ABS) }) 125 | } else if elf_symtype == STT_TLS { 126 | panic!("Unhangled STT_TLS symbol"); 127 | } else { 128 | None 129 | } 130 | }) 131 | .collect::>(); 132 | let elf_dynamic = elf_common.dynamic.map(|elf_dynamic| { 133 | elf_dynamic.into_iter().collect::>() 134 | }).unwrap_or(Vec::new()); 135 | let parse_elf_rela = |elf_rela_data| { 136 | RelaIterator::new(elf_file.ehdr.endianness, elf_file.ehdr.class, elf_rela_data) 137 | .map(|elf_rela| { 138 | let offset = elf_rela.r_offset; 139 | let target = if elf_file.ehdr.e_machine == EM_X86_64 { 140 | let symbol = if elf_rela.r_sym == 0 { None } else { 141 | let elf_symbol = elf_dynsyms 142 | .get(elf_rela.r_sym as usize) 143 | .expect("Invalid symbol reference in relocation"); 144 | let elf_symbol_name = elf_dynsyms_strs 145 | .get(elf_symbol.st_name as usize) 146 | .expect("Invalid symbol name in relocation"); 147 | Some(elf_symbol_name.to_owned()) 148 | }; 149 | // Both `R_X86_64_GLOB_DAT` and `R_X86_64_JUMP_SLOT` relocations can be expressed in terms of 150 | // the more general and less optimized `R_X86_64_64` relocation, which is what the emitter is using. 151 | if elf_rela.r_type == R_X86_64_64 { 152 | RelocationTarget::Symbol { 153 | symbol: symbol.expect("R_X86_64_64 requires a symbol"), 154 | addend: elf_rela.r_addend 155 | } 156 | } else if elf_rela.r_type == R_X86_64_GLOB_DAT { 157 | RelocationTarget::Symbol { 158 | symbol: symbol.expect("R_X86_64_GLOB_DAT requires a symbol"), 159 | addend: elf_rela.r_addend 160 | } 161 | } else if elf_rela.r_type == R_X86_64_JUMP_SLOT { 162 | RelocationTarget::Symbol { 163 | symbol: symbol.expect("R_X86_64_JUMP_SLOT requires a symbol"), 164 | addend: elf_rela.r_addend 165 | } 166 | } else if elf_rela.r_type == R_X86_64_RELATIVE { 167 | assert!(elf_rela.r_sym == 0, "R_X86_64_RELATIVE accepts no symbol"); 168 | RelocationTarget::Base { addend: elf_rela.r_addend } 169 | } else if elf_rela.r_type == R_X86_64_COPY { 170 | RelocationTarget::Copy { 171 | symbol: symbol.expect("R_X86_64_COPY requires a symbol"), 172 | } 173 | } else if [R_X86_64_DTPMOD64].contains(&elf_rela.r_type) { 174 | assert!(elf_rela.r_sym == 0, "Generic relocation mechanism accepts no symbol"); 175 | assert!(elf_rela.r_addend == 0, "Generic relocation mechanism accepts no addend"); 176 | RelocationTarget::ElfSpecific(elf_rela.r_type) 177 | } else { 178 | panic!("Unhandled relocation type: {}", elf_rela.r_type) 179 | } 180 | } else { 181 | panic!("Unhandled machine for RELA relocations: {}", 182 | elf::to_str::e_machine_to_str(elf_file.ehdr.e_machine) 183 | .unwrap_or("")) 184 | }; 185 | Relocation { offset, target } 186 | }) 187 | .collect::>() 188 | }; 189 | let elf_dynamic_rela = elf_dynamic.iter().find_map(|elf_dyn| { 190 | if elf_dyn.d_tag == DT_RELA { Some(elf_dyn.clone().d_val()) } else { None } 191 | }); 192 | let elf_dynamic_relasz = elf_dynamic.iter().find_map(|elf_dyn| { 193 | if elf_dyn.d_tag == DT_RELASZ { Some(elf_dyn.clone().d_val()) } else { None } 194 | }); 195 | let elf_dynamic_pltrel = elf_dynamic.iter().find_map(|elf_dyn| { 196 | if elf_dyn.d_tag == DT_PLTREL { Some(elf_dyn.clone().d_val() as i64) } else { None } 197 | }); 198 | let elf_dynamic_jmprel = elf_dynamic.iter().find_map(|elf_dyn| { 199 | if elf_dyn.d_tag == DT_JMPREL { Some(elf_dyn.clone().d_val()) } else { None } 200 | }); 201 | let elf_dynamic_pltrelsz = elf_dynamic.iter().find_map(|elf_dyn| { 202 | if elf_dyn.d_tag == DT_PLTRELSZ { Some(elf_dyn.clone().d_val()) } else { None } 203 | }); 204 | let elf_dynamic_relr = elf_dynamic.iter().find_map(|elf_dyn| { 205 | if elf_dyn.d_tag == DT_RELR { Some(elf_dyn.clone().d_val()) } else { None } 206 | }); 207 | let elf_dynamic_relrsz = elf_dynamic.iter().find_map(|elf_dyn| { 208 | if elf_dyn.d_tag == DT_RELRSZ { Some(elf_dyn.clone().d_val()) } else { None } 209 | }); 210 | let mut data_relocations = match (elf_dynamic_rela, elf_dynamic_relasz) { 211 | (Some(elf_dynamic_rela), Some(elf_dynamic_relasz)) => { 212 | let rela_range = 213 | elf_vaddr_size_to_offset_range::(elf_data, elf_dynamic_rela, elf_dynamic_relasz) 214 | .expect("Rela data out of bounds"); 215 | parse_elf_rela(&elf_data[rela_range]) 216 | } 217 | (None, None) => Vec::new(), 218 | _ => panic!("Expected dynamic table to have both or neither of PT_RELA and PT_RELASZ") 219 | }; 220 | let mut code_relocations = match (elf_dynamic_pltrel, elf_dynamic_jmprel, elf_dynamic_pltrelsz) { 221 | (Some(elf_dynamic_pltrel), Some(elf_dynamic_jmprel), Some(elf_dynamic_pltrelsz)) => { 222 | let jmprel_range = 223 | elf_vaddr_size_to_offset_range::(elf_data, elf_dynamic_jmprel, elf_dynamic_pltrelsz) 224 | .expect("Jmprel data out of bounds"); 225 | if elf_dynamic_pltrel == DT_RELA { 226 | parse_elf_rela(&elf_data[jmprel_range]) 227 | // } else if elf_dynamic_pltrel == DT_REL { 228 | // parse_elf_rel(elf_pltrel_data) 229 | } else { 230 | panic!("Unhandled PLT relocation type: {}", 231 | elf::to_str::d_tag_to_str(elf_dynamic_pltrel) 232 | .unwrap_or("")); 233 | } 234 | } 235 | (None, None, None) => Vec::new(), 236 | _ => panic!("Expected dynamic table to have all or none of PT_PLTREL, PT_JMPREL, and PT_PLTRELSZ") 237 | }; 238 | let mut relr_relocations = Vec::new(); 239 | match (elf_dynamic_relr, elf_dynamic_relrsz) { 240 | (Some(elf_dynamic_relr), Some(elf_dynamic_relrsz)) => { 241 | let relr_range = 242 | elf_vaddr_size_to_offset_range::(elf_data, elf_dynamic_relr, elf_dynamic_relrsz) 243 | .expect("Relr data out of bounds"); 244 | let elf_relr_data = &elf_data[relr_range]; 245 | let parse = E::from_ei_data(elf_data[EI_DATA]).unwrap(); 246 | let get_addend = |addr| { 247 | let mut file_offset = elf_vaddr_to_offset::(elf_data, addr) 248 | .expect("Relr target out of bounds"); 249 | parse.parse_i64_at(&mut file_offset, elf_data).unwrap() 250 | }; 251 | let mut push_relr = |addr| 252 | relr_relocations.push(Relocation { 253 | offset: addr, 254 | target: RelocationTarget::Base { addend: get_addend(addr) } 255 | }); 256 | let mut offset = 0; 257 | let mut next_rel = 0; 258 | while offset < elf_relr_data.len() { 259 | let mut entry = parse.parse_u64_at(&mut offset, elf_relr_data).unwrap(); 260 | if (entry & 1) == 0 { 261 | push_relr(entry as u64); 262 | next_rel = entry + 8; 263 | } else { 264 | let mut iter_rel = next_rel; 265 | while (entry & !1) != 0 { 266 | entry >>= 1; 267 | if entry & 1 == 1 { 268 | push_relr(iter_rel as u64); 269 | } 270 | iter_rel += 8; 271 | } 272 | next_rel = next_rel + 8 * 63; 273 | } 274 | } 275 | } 276 | (None, None) => (), 277 | _ => panic!("Expected dynamic table to have all or none of DT_RELR and DT_RELRSZ") 278 | }; 279 | let mut relocations = Vec::new(); 280 | relocations.append(&mut relr_relocations); // ABI suggests processing Relr first 281 | relocations.append(&mut data_relocations); 282 | relocations.append(&mut code_relocations); 283 | let dependencies = elf_dynamic.iter().filter_map(|elf_dyn| { 284 | if elf_dyn.d_tag == DT_NEEDED { 285 | Some(elf_dynsyms_strs 286 | .get(elf_dyn.clone().d_val() as usize) 287 | .expect("Invalid DT_NEEDED name") 288 | .to_owned()) 289 | } else { 290 | None 291 | } 292 | }).collect::>(); 293 | let elf_dynamic_init = elf_dynamic.iter().find_map(|elf_dyn| { 294 | if elf_dyn.d_tag == DT_INIT { Some(elf_dyn.clone().d_val() as usize) } else { None } 295 | }); 296 | let elf_dynamic_init_array = elf_dynamic.iter().find_map(|elf_dyn| { 297 | if elf_dyn.d_tag == DT_INIT_ARRAY { Some(elf_dyn.clone().d_val()) } else { None } 298 | }); 299 | let elf_dynamic_init_arraysz = elf_dynamic.iter().find_map(|elf_dyn| { 300 | if elf_dyn.d_tag == DT_INIT_ARRAYSZ { Some(elf_dyn.clone().d_val()) } else { None } 301 | }); 302 | let elf_dynamic_fini = elf_dynamic.iter().find_map(|elf_dyn| { 303 | if elf_dyn.d_tag == DT_FINI { Some(elf_dyn.clone().d_val() as usize) } else { None } 304 | }); 305 | let elf_dynamic_fini_array = elf_dynamic.iter().find_map(|elf_dyn| { 306 | if elf_dyn.d_tag == DT_FINI_ARRAY { Some(elf_dyn.clone().d_val()) } else { None } 307 | }); 308 | let elf_dynamic_fini_arraysz = elf_dynamic.iter().find_map(|elf_dyn| { 309 | if elf_dyn.d_tag == DT_FINI_ARRAYSZ { Some(elf_dyn.clone().d_val()) } else { None } 310 | }); 311 | let mut initializers = Vec::new(); 312 | if let Some(init_func) = elf_dynamic_init { initializers.push(init_func as u64) } 313 | match (elf_dynamic_init_array, elf_dynamic_init_arraysz) { 314 | (Some(init_func_array), Some(init_func_array_sz)) => { 315 | let init_func_range = 316 | elf_vaddr_size_to_offset_range::(elf_data, init_func_array, init_func_array_sz) 317 | .expect("Init array data out of bounds"); 318 | let elf_init_funcs = &elf_data[init_func_range]; 319 | let parse = E::from_ei_data(elf_data[EI_DATA]).unwrap(); 320 | let mut offset = 0; 321 | while offset < elf_init_funcs.len() { 322 | initializers.push(parse.parse_u64_at(&mut offset, elf_init_funcs).unwrap()) 323 | } 324 | } 325 | (None, None) => (), 326 | _ => panic!("Expected dynamic table to have both or neither of DT_INIT_ARRAY and DT_INIT_ARRAYSZ") 327 | } 328 | let mut finalizers = Vec::new(); 329 | match (elf_dynamic_fini_array, elf_dynamic_fini_arraysz) { 330 | (Some(fini_func_array), Some(fini_func_array_sz)) => { 331 | let fini_func_range = 332 | elf_vaddr_size_to_offset_range::(elf_data, fini_func_array, fini_func_array_sz) 333 | .expect("Fini array data out of bounds"); 334 | let elf_fini_funcs = &elf_data[fini_func_range]; 335 | let parse = E::from_ei_data(elf_data[EI_DATA]).unwrap(); 336 | let mut offset = 0; 337 | while offset < elf_fini_funcs.len() { 338 | finalizers.push(parse.parse_u64_at(&mut offset, elf_fini_funcs).unwrap()) 339 | } 340 | } 341 | (None, None) => (), 342 | _ => panic!("Expected dynamic table to have both or neither of DT_FINI_ARRAY and DT_FINI_ARRAYSZ") 343 | } 344 | if let Some(init_func) = elf_dynamic_fini { finalizers.push(init_func as u64) } 345 | let mut image_names = elf_dynamic.iter().filter_map(|elf_dyn| { 346 | if elf_dyn.d_tag == DT_SONAME { 347 | Some(elf_dynsyms_strs 348 | .get(elf_dyn.clone().d_val() as usize) 349 | .expect("Invalid DT_SONAME name") 350 | .to_owned()) 351 | } else { 352 | None 353 | } 354 | }).collect::>(); 355 | if image_names.is_empty() { // DT_SONAMEs take priority 356 | if let Some(name) = soname { image_names.push(name.to_owned()) }; 357 | } 358 | let interpreter = elf_segments.iter().find_map(|elf_segment| { 359 | // If PT_INTERP exists, it specifies a path to the external interpreter. 360 | if elf_segment.p_type == PT_INTERP { 361 | let path = elf_file.segment_data(&elf_segment).ok() 362 | .and_then(|data| String::from_utf8(data[..data.len() - 1].to_owned()).ok()) 363 | .expect("Invalid PT_INTERP path"); 364 | Some(Interpreter::External(path)) 365 | } else { 366 | None 367 | } 368 | }).unwrap_or_else(|| { 369 | if elf_file.ehdr.e_entry != 0 { 370 | // If PT_INTERP does not exist (and this is an ET_DYN), but there is an entry point, then this object is 371 | // its own interpreter. Record the values required to invoke it according to the kernel ABI later, once 372 | // we combine it with something to load. 373 | Interpreter::Internal { 374 | // Assume the PIE isn't prelinked to a weird address, which really shouldn't happen; it's a real pain 375 | // to try and figure out exactly what the base is supposed to be, since it doesn't explicitly appear in 376 | // any of the ELF structures. 377 | base: 0, 378 | entry: elf_file.ehdr.e_entry, 379 | // musl libc does some hair-raising manipulations with segments; namely, it uses padding around segment 380 | // data as Free Real Estate™ for its malloc, in both the dynamic linker itself (ld.so) as well as 381 | // whatever it's loading. While this works fine with normal kernel PT_INTERP logic, ours is pecularly 382 | // different in that the image of the interpreter overlaps the image of the loadee. As a result, musl's 383 | // dynamic loader causes its malloc to perform a 'double alloc', which is somehow even more destructive 384 | // than a double free. To avoid this, we're hiding the interpreter from itself by reducing the amount 385 | // of program headers available via `auxv[AT_PHNUM]`, which is the ABI-prescribed mechanism for ld.so 386 | // to find out how many it needs to relocate. The success of this requires ld.so to not look at 387 | // the actual ELF header for our binary, but since it doesn't even have a pointer to it, this should 388 | // all work just fine. 389 | segments: segments.len(), 390 | } 391 | } else { 392 | // Probably just a shared library. 393 | Interpreter::Absent 394 | } 395 | }); 396 | let entry = elf_file.ehdr.e_entry; 397 | Ok(Image { 398 | machine, 399 | alignment, 400 | segments, 401 | tls_image, 402 | symbols, 403 | relocations, 404 | initializers, 405 | finalizers, 406 | dependencies, 407 | image_names, 408 | interpreter, 409 | entry, 410 | }) 411 | } 412 | -------------------------------------------------------------------------------- /src/emit.rs: -------------------------------------------------------------------------------- 1 | use object::{elf::*, Endianness}; 2 | use object::write::elf::{Class, FileHeader, ProgramHeader, Rel, SectionHeader, Sym, Writer}; 3 | 4 | use crate::repr::*; 5 | 6 | fn make_static_str(s: impl AsRef) -> &'static str { 7 | s.as_ref().to_owned().leak() 8 | } 9 | 10 | fn make_shim( 11 | machine: u16, 12 | base: u64, 13 | interp_base: u64, 14 | interp_phdrs: usize, 15 | interp_entry: u64, 16 | user_entry: u64 17 | ) -> Vec { 18 | // When the interpreter is loaded by the kernel, the kernel communicates several key parameters to it through 19 | // the auxiliary vector; most importantly, AT_BASE, AT_ENTRY, and AT_PH*. For the dynamic loader to function, 20 | // AT_BASE must be set to its own ELF header (to which it maintains an internal PC-relative reference). 21 | // For the dynamic loader to jump to the executable after loading, AT_ENTRY must be set to the user entry point 22 | // (`e_entry` of the PIE). For the dynamic loader to relocate the executable, AT_PH* must be pointing to the user 23 | // executable's entry point. 24 | // 25 | // All of these parameters are configured by the kernel when it's launching an interpreter via the PT_INTERP 26 | // mechanism. However, if we link the interpreter in, the kernel will instead point these parameters to our 27 | // combined executable. Luckily, AT_PH* already have the right values, so the only modifications needed are 28 | // to AT_BASE (which *must* point to the `\x7FELF` of the interpreter) and AT_ENTRY (which must point to 29 | // the PIE entry point). Since we interpose this shim using the `e_entry` file header field, we must restore 30 | // the original `e_entry` by modifying `AT_ENTRY`. 31 | // 32 | // The shim consists of a code part (blob built from shim/ in build.rs and included) and data part (built here). 33 | // The code part must be completely position independent (no relocations) and padding to align the data part 34 | // must be included in the blob. The code part reads the data part using pc-relative addresses. 35 | 36 | macro_rules! shim_blob { 37 | ($target:expr) => { 38 | include_bytes!(concat!(env!("OUT_DIR"), "/shim/", $target, "/shim.bin")) 39 | }; 40 | } 41 | 42 | let code = if machine == EM_X86_64 { 43 | shim_blob!("x86_64-unknown-none").to_vec() 44 | } else { 45 | panic!("Shim not implemented for machine: {:?}", machine) 46 | }; 47 | 48 | let mut code = code.to_vec(); 49 | 50 | // Append data part of shim 51 | 52 | // Keep in sync with shim/main.rs 53 | // TODO: Handle 32-bit stuff 54 | code.extend(user_entry.wrapping_sub(base).to_le_bytes()); 55 | code.extend(interp_entry.wrapping_sub(base).to_le_bytes()); 56 | code.extend(interp_base.wrapping_sub(base).to_le_bytes()); 57 | code.extend((interp_phdrs as u64).to_le_bytes()); 58 | 59 | code.resize(((code.len() - 1) | 0xff) + 1, 0); // pad to make it easier to edit in binja 60 | code 61 | } 62 | 63 | pub fn emit_elf(image: &Image) -> object::write::Result> { 64 | #[derive(Debug)] 65 | enum InterpreterOut { 66 | Path { bytes: Vec }, 67 | Shim { base: u64, entry: u64, phdrs: usize, code_len: usize }, 68 | None, 69 | } 70 | 71 | #[derive(Debug)] 72 | struct LoadSectionOut { 73 | index: object::write::elf::SectionIndex, 74 | name: object::write::StringId, 75 | mode: LoadMode, 76 | size: u64, 77 | addr: u64, 78 | load: bool, 79 | } 80 | 81 | #[derive(Debug)] 82 | #[allow(unused)] 83 | struct DynamicSymbolOut { 84 | index: object::write::elf::SymbolIndex, 85 | name: object::write::StringId, 86 | hash: u32, 87 | } 88 | 89 | let (endian, class, is_rela); 90 | if image.machine == object::elf::EM_X86_64 { 91 | endian = Endianness::Little; 92 | class = Class { is_64: true }; 93 | is_rela = true; 94 | } else { 95 | panic!("Unhandled machine: {}", image.machine) 96 | } 97 | 98 | let out_interp = match image.interpreter { 99 | Interpreter::External(ref path) => { 100 | let mut bytes = path.as_bytes().to_vec(); 101 | bytes.push(0); 102 | InterpreterOut::Path { bytes } 103 | } 104 | Interpreter::Internal { base, entry, segments: phdrs } => { 105 | let code = make_shim(image.machine, 0, 0, 0, 0, 0); // can't resolve references yet 106 | InterpreterOut::Shim { base, entry, phdrs, code_len: code.len() } 107 | }, 108 | Interpreter::Absent => 109 | InterpreterOut::None 110 | }; 111 | 112 | let mut elf_data = Vec::new(); 113 | let mut obj_writer = Writer::new(endian, class.is_64, &mut elf_data); 114 | 115 | // Reserve space for file and program headers. 116 | // These are the things the dynamic linker cares about. 117 | obj_writer.reserve_file_header(); 118 | let obj_phdr_offset = obj_writer.reserved_len(); 119 | let interp_phdr_count = match &out_interp { 120 | InterpreterOut::Path { .. } => /* PT_INTERP */1, 121 | InterpreterOut::Shim { .. } => /* PT_LOAD */1, 122 | InterpreterOut::None => 0, 123 | }; 124 | let tls_phdr_count = if image.tls_image.is_some() { 1 } else { 0 }; 125 | let phdr_count = 126 | /* PT_PHDR */1 127 | + /* PT_LOAD for ELF file and program headers */1 128 | + /* PT_INTERP or PT_LOAD for interpreter thunk */interp_phdr_count 129 | + /* PT_DYNAMIC */1 130 | + /* PT_TLS if needed */tls_phdr_count 131 | + /* PT_LOAD for PT_DYNAMIC, PT_TLS, etc */1 132 | + /* PT_LOAD[..] */image.segments.len(); 133 | obj_writer.reserve_program_headers(phdr_count as u32); 134 | let obj_interp_offset = if let InterpreterOut::Path { bytes } = &out_interp { 135 | obj_writer.reserve(bytes.len(), 1) 136 | } else { 0 }; 137 | let obj_headers_end = obj_writer.reserved_len(); 138 | let obj_shim_offset = if let InterpreterOut::Shim { code_len, .. } = out_interp { 139 | obj_writer.reserve(code_len, image.alignment as usize) 140 | } else { 0 }; 141 | 142 | // Reserve space for dynamic linker information. 143 | // This is the stuff the dynamic linker *really* cares about. 144 | let mut out_sonames = Vec::new(); 145 | for image_name in image.image_names.iter() { 146 | out_sonames.push(obj_writer.add_dynamic_string(image_name.as_ref())); 147 | } 148 | let mut out_needful = Vec::new(); 149 | for dependency in image.dependencies.iter() { 150 | out_needful.push(obj_writer.add_dynamic_string(dependency.as_ref())); 151 | } 152 | let mut out_dynsyms = Vec::new(); 153 | for symbol in image.symbols.iter() { 154 | let index = obj_writer.reserve_dynamic_symbol_index(); 155 | let name = obj_writer.add_dynamic_string(symbol.name.as_ref()); 156 | let hash = object::elf::hash(symbol.name.as_ref()); 157 | out_dynsyms.push(DynamicSymbolOut { index, name, hash }); 158 | } 159 | obj_writer.reserve(0, image.alignment as usize); 160 | let dynamic_count = 161 | /* DT_SONAME */image.image_names.len() 162 | + /* DT_NEEDED */image.dependencies.len() 163 | + /* DT_STRTAB */1 164 | + /* DT_STRSZ */1 165 | + /* DT_SYMENT */1 166 | + /* DT_SYMTAB */1 167 | + /* DT_HASH */1 168 | + /* DT_REL(A) */1 169 | + /* DT_REL(A)SZ */1 170 | + /* DT_REL(A)ENT */1 171 | + /* DT_INIT_ARRAY */1 172 | + /* DT_INIT_ARRAYSZ */1 173 | + /* DT_FINI_ARRAY */1 174 | + /* DT_FINI_ARRAYSZ */1 175 | + /* DT_NULL */1; 176 | let obj_dynamic_offset = obj_writer.reserve_dynamic(dynamic_count); 177 | let obj_dynstr_offset = obj_writer.reserve_dynstr(); 178 | let obj_dynstr_length = obj_writer.dynstr_len(); 179 | let obj_dynsym_offset = obj_writer.reserve_dynsym(); 180 | let hash_bucket_count = 4; // TODO: chosen at random 181 | let hash_index_base = 1; // null symbol 182 | let hash_chain_count = hash_index_base + out_dynsyms.len() as u32; 183 | let obj_hash_offset = obj_writer.reserve_hash(hash_bucket_count, hash_chain_count); 184 | let relocation_count = image.relocations.len() + image.initializers.len() + image.finalizers.len(); 185 | let obj_reloc_offset = obj_writer.reserve_relocations(relocation_count, is_rela); 186 | let obj_dt_init_array_length = image.initializers.len() * if class.is_64 { 8 } else { 4 }; 187 | let obj_dt_init_array_offset = obj_writer.reserve(obj_dt_init_array_length, class.align() as usize); 188 | let obj_dt_fini_array_length = image.finalizers.len() * if class.is_64 { 8 } else { 4 }; 189 | let obj_dt_fini_array_offset = obj_writer.reserve(obj_dt_fini_array_length, class.align() as usize); 190 | let obj_tls_offset = if let Some(ref tls_data) = image.tls_image { 191 | obj_writer.reserve(tls_data.len(), class.align() as usize) 192 | } else { 0 }; 193 | let obj_dynamic_end = obj_writer.reserved_len(); 194 | 195 | // Reserve space for section headers. 196 | // This is the stuff that `objdump` cares about. Yes, even if there is a perfectly valid PT_DYNAMIC, it will look 197 | // for `.dynamic`/`.dynsym`/etc. 198 | obj_writer.reserve_null_section_index(); 199 | obj_writer.reserve_shstrtab_section_index(); 200 | obj_writer.reserve_dynamic_section_index(); 201 | obj_writer.reserve_dynstr_section_index(); 202 | let obj_dynsym_section_index = obj_writer.reserve_dynsym_section_index(); 203 | obj_writer.reserve_hash_section_index(); 204 | let _obj_reloc_dyn_section_index = obj_writer.reserve_section_index(); 205 | let obj_reloc_dyn_section_name = obj_writer.add_section_name(if is_rela { b".rela.dyn" } else { b".rel.dyn" }); 206 | let obj_shim_section_index_name = if let InterpreterOut::Shim { .. } = out_interp { 207 | Some((obj_writer.reserve_section_index(), obj_writer.add_section_name(b"shim"))) 208 | } else { None }; 209 | let mut out_load_sections = Vec::new(); 210 | for (segment_index, segment) in image.segments.iter().enumerate() { 211 | let mut make_section = |name, size, addr, load| { 212 | let index = obj_writer.reserve_section_index(); 213 | let name = obj_writer.add_section_name(make_static_str(name).as_ref()); 214 | out_load_sections.push(LoadSectionOut { index, name, mode: segment.mode, size, addr, load }) 215 | }; 216 | // A segment can be only partially mapped from disk, i.e. in the case of `p_filesz != 0 && p_filesz < p_memsz`. 217 | // Sections are either fully mapped or fully unmapped. Thus, we need to split the segment into two sections 218 | // to make this case work. (Remember that this is _still_ only for objdump.) 219 | let dataful_name = format!("image.{}.{}", segment_index, match segment.mode { 220 | LoadMode::ReadOnly => "ro", 221 | LoadMode::ReadWrite => "rw", 222 | LoadMode::ReadExecute => "rx", 223 | }); 224 | let dataless_name = format!("image.{}.rwz", segment_index); 225 | if segment.data.len() as u64 == segment.size { 226 | make_section(dataful_name, segment.data.len() as u64, segment.addr, /*load=*/true); 227 | } else if segment.data.len() == 0 { 228 | make_section(dataless_name, segment.size, segment.addr, /*load=*/false); 229 | } else { 230 | make_section(dataful_name, segment.data.len() as u64, segment.addr, /*load=*/true); 231 | make_section(dataless_name, segment.size - segment.data.len() as u64, 232 | segment.addr + segment.data.len() as u64, /*load=*/false); 233 | } 234 | } 235 | obj_writer.reserve_shstrtab(); 236 | obj_writer.reserve_section_headers(); 237 | 238 | // Reserve space for image segments. 239 | let image_file_offset = obj_writer.reserve(0, image.alignment as usize); 240 | eprintln!("emit_elf: emitting images at offset {:+#x}", image_file_offset); 241 | for segment in image.segments.iter() { 242 | assert!(segment.data.len() as u64 <= segment.size); 243 | obj_writer.reserve_until(image_file_offset + segment.addr as usize + segment.size as usize); 244 | } 245 | 246 | // Write file and program headers. 247 | let entry = match &out_interp { 248 | InterpreterOut::Path { .. } => image_file_offset as u64 + image.entry, 249 | InterpreterOut::Shim { .. } => obj_shim_offset as u64, 250 | InterpreterOut::None => 0, 251 | }; 252 | obj_writer.write_file_header(&FileHeader { 253 | os_abi: 0, 254 | abi_version: 0, 255 | e_type: ET_DYN, 256 | e_machine: image.machine, 257 | e_entry: entry, 258 | e_flags: 0, 259 | })?; 260 | // We use a 1:1 mapping between file offsets and virtual addresses (before rebasing). This is already how many 261 | // shared objects are laid out. It also simplifies both internal bookkeeping and debugging. 262 | let mut write_program_header = |type_, flags, offset, size, align| { 263 | obj_writer.write_program_header(&ProgramHeader { 264 | p_type: type_, 265 | p_flags: flags, 266 | p_offset: offset as u64, 267 | p_vaddr: offset as u64, 268 | p_paddr: offset as u64, 269 | p_filesz: size as u64, 270 | p_memsz: size as u64, 271 | p_align: align, 272 | }) 273 | }; 274 | // musl uses the difference between AT_PHDR and PT_PHDR to find out where the application is loaded, if it 275 | // is mapped by the kernel. Omitting this program header causes it to explode in a really amusing way. 276 | // As of Linux 6.10, the kernel always maps the application, and then if it has an interpreter, maps that too 277 | // and runs its entry point instead of the application's. 278 | write_program_header(PT_PHDR, PF_R, 279 | obj_phdr_offset, class.program_header_size() * phdr_count, class.align() as u64); 280 | // The ELF program headers must be loaded in order for the interpreter to be able to parse the file. Although 281 | // it is not required by the ABI to load the file headers, it's easier to do that anyway. (Most Linux binaries 282 | // do load them.) 283 | write_program_header(PT_LOAD, PF_R, 284 | 0, obj_headers_end, image.alignment); 285 | match &out_interp { 286 | InterpreterOut::Path { bytes } => 287 | // Kernel uses PT_INTERP to find out which interpreter to load. 288 | write_program_header(PT_INTERP, PF_R, 289 | obj_interp_offset, bytes.len(), /*align=*/1), 290 | InterpreterOut::Shim { code_len, .. } => 291 | // Shim uses kernel ABI to bootstrap the built-in interpreter. 292 | write_program_header(PT_LOAD, PF_R | PF_X, 293 | obj_shim_offset, *code_len, /*align=*/image.alignment), 294 | InterpreterOut::None => () 295 | } 296 | // The ELF dynamic information must be loaded too, for the same reasons. The PT_DYNAMIC program header points 297 | // to the beginning of this information, which contains the dynamic table, and is followed by the entities 298 | // that are referenced by the table. These are mapped read-write since the interpreter modifies them in-place. 299 | write_program_header(PT_DYNAMIC, PF_R | PF_W, 300 | obj_dynamic_offset, class.dyn_size() * dynamic_count, class.align() as u64); 301 | if let Some(ref tls_data) = image.tls_image { 302 | // The TLS section piggybacks on the PT_DYNAMIC's PT_LOAD. This isn't how it's usually done but it should be 303 | // fine, for now at least. 304 | write_program_header(PT_TLS, PF_R, 305 | obj_tls_offset, tls_data.len(), class.align() as u64); 306 | } 307 | write_program_header(PT_LOAD, PF_R | PF_W, 308 | obj_dynamic_offset, obj_dynamic_end - obj_dynamic_offset, class.align() as u64); 309 | // The image segments are loaded as-is. In the segments, `segment.size` could be bigger than `segment.data`, with 310 | // the remainder zeroed on load. Such a segment would be typically the last one. For our purposes this is 311 | // undesirable and we pad everything to the memory size. 312 | for segment in image.segments.iter() { 313 | let obj_flags = match segment.mode { 314 | LoadMode::ReadOnly => PF_R, 315 | LoadMode::ReadWrite => PF_R | PF_W, 316 | LoadMode::ReadExecute => PF_R | PF_X, 317 | }; 318 | write_program_header(PT_LOAD, obj_flags, 319 | image_file_offset + segment.addr as usize, segment.size as usize, image.alignment); 320 | } 321 | 322 | // Write dynamic linker information. 323 | match &out_interp { 324 | InterpreterOut::Path { bytes } => { 325 | obj_writer.pad_until(obj_interp_offset); 326 | obj_writer.write(&bytes); 327 | } 328 | InterpreterOut::Shim { base: interp_base, entry: interp_entry, phdrs: interp_phdrs, code_len } => { 329 | let code = make_shim( 330 | image.machine, 331 | obj_shim_offset as u64, 332 | image_file_offset as u64 + *interp_base, 333 | *interp_phdrs, 334 | image_file_offset as u64 + *interp_entry, 335 | image_file_offset as u64 + image.entry 336 | ); 337 | assert_eq!(code.len(), *code_len); 338 | obj_writer.pad_until(obj_shim_offset); 339 | obj_writer.write(&code); 340 | } 341 | InterpreterOut::None => (), 342 | } 343 | obj_writer.pad_until(obj_dynamic_offset); 344 | for out_soname in out_sonames { 345 | obj_writer.write_dynamic_string(DT_SONAME, out_soname); 346 | } 347 | for out_needed in out_needful { 348 | obj_writer.write_dynamic_string(DT_NEEDED, out_needed); // do the needful 349 | } 350 | obj_writer.write_dynamic(DT_STRTAB, obj_dynstr_offset as u64); 351 | obj_writer.write_dynamic(DT_STRSZ, obj_dynstr_length as u64); 352 | obj_writer.write_dynamic(DT_SYMENT, class.sym_size() as u64); 353 | obj_writer.write_dynamic(DT_SYMTAB, obj_dynsym_offset as u64); 354 | obj_writer.write_dynamic(DT_HASH, obj_hash_offset as u64); 355 | obj_writer.write_dynamic(if is_rela { DT_RELA } else { DT_REL }, 356 | obj_reloc_offset as u64); 357 | obj_writer.write_dynamic(if is_rela { DT_RELASZ } else { DT_RELSZ }, 358 | (class.rel_size(is_rela) * relocation_count) as u64); 359 | obj_writer.write_dynamic(if is_rela { DT_RELAENT } else { DT_RELENT }, 360 | class.rel_size(is_rela) as u64); 361 | obj_writer.write_dynamic(DT_INIT_ARRAY, obj_dt_init_array_offset as u64); 362 | obj_writer.write_dynamic(DT_INIT_ARRAYSZ, obj_dt_init_array_length as u64); 363 | obj_writer.write_dynamic(DT_FINI_ARRAY, obj_dt_fini_array_offset as u64); 364 | obj_writer.write_dynamic(DT_FINI_ARRAYSZ, obj_dt_fini_array_length as u64); 365 | obj_writer.write_dynamic(DT_NULL, 0); 366 | obj_writer.write_dynstr(); 367 | obj_writer.write_null_dynamic_symbol(); 368 | for symbol in image.symbols.iter() { 369 | let obj_symtype = match symbol.kind { 370 | SymbolKind::Code => STT_FUNC, 371 | SymbolKind::Data => STT_OBJECT, 372 | SymbolKind::Unknown => STT_NOTYPE, 373 | }; 374 | let obj_bind = match symbol.scope { 375 | SymbolScope::Local => STB_LOCAL, 376 | SymbolScope::Global => STB_GLOBAL, 377 | SymbolScope::Import => STB_GLOBAL, 378 | SymbolScope::Weak => STB_WEAK, 379 | }; 380 | // In symbol tables, relocations must be associated with a section, even in an executable or shared object 381 | // where the address of the section is unimportant. Nevertheless, find which section they belong to. 382 | let (obj_value, obj_section, obj_shndx); 383 | if symbol.abs { 384 | obj_value = symbol.value; 385 | obj_section = None; 386 | obj_shndx = SHN_ABS; 387 | } else if symbol.value == 0 { 388 | obj_value = 0; 389 | obj_section = None; 390 | obj_shndx = 0; 391 | } else { 392 | obj_value = image_file_offset as u64 + symbol.value; 393 | obj_section = out_load_sections.iter().find_map(|&LoadSectionOut { addr, size, index, .. }| { 394 | // Neither `symbol` nor `out_load_sections` are relocated by `image_file_offset` here. 395 | if symbol.value >= addr && symbol.value < addr + size { Some(index) } else { None } 396 | }); 397 | obj_shndx = 0; 398 | }; 399 | obj_writer.write_dynamic_symbol(&Sym { 400 | name: Some(obj_writer.get_dynamic_string(symbol.name.as_ref())), 401 | section: obj_section, 402 | st_info: (obj_bind << 4) | obj_symtype, 403 | st_other: 0, 404 | st_shndx: obj_shndx, // automatically filled in if `section` is specified 405 | st_value: obj_value, 406 | st_size: symbol.size, 407 | }); 408 | } 409 | obj_writer.write_hash(hash_bucket_count, hash_chain_count, |index| { 410 | Some(out_dynsyms.get(index.checked_sub(hash_index_base)? as usize)?.hash) 411 | }); 412 | obj_writer.write_align_relocation(); 413 | let find_symbol = |name| 414 | image.symbols.iter().position(|symbol| symbol.name == name).map(|index| index + 1).unwrap_or(0) as u32; 415 | for relocation in image.relocations.iter() { 416 | let (obj_reltype, obj_relsym, obj_addend); 417 | if image.machine == object::elf::EM_X86_64 { 418 | match relocation.target.clone() { 419 | RelocationTarget::Symbol { symbol: symbol_name, addend } => { 420 | obj_reltype = R_X86_64_64; 421 | obj_relsym = find_symbol(symbol_name); 422 | obj_addend = addend; 423 | }, 424 | RelocationTarget::Base { addend } => { 425 | obj_reltype = R_X86_64_RELATIVE; 426 | obj_relsym = 0; 427 | obj_addend = image_file_offset as i64 + addend; 428 | }, 429 | RelocationTarget::Copy { symbol: symbol_name } => { 430 | obj_reltype = R_X86_64_COPY; 431 | obj_relsym = find_symbol(symbol_name); 432 | obj_addend = 0; 433 | }, 434 | RelocationTarget::None => { 435 | obj_reltype = R_X86_64_NONE; 436 | obj_relsym = 0; 437 | obj_addend = 0; 438 | } 439 | RelocationTarget::ElfSpecific(reltype) => { 440 | obj_reltype = reltype; 441 | obj_relsym = 0; 442 | obj_addend = 0; 443 | } 444 | } 445 | } else { 446 | unreachable!() 447 | } 448 | obj_writer.write_relocation(is_rela, &Rel { 449 | // In executables and shared libraries, relocations are applied at a virtual address. 450 | r_offset: image_file_offset as u64 + relocation.offset, 451 | r_sym: obj_relsym, 452 | r_type: obj_reltype, 453 | r_addend: obj_addend, 454 | }); 455 | } 456 | for (index, lifetimizer) in image.initializers.iter().chain(image.finalizers.iter()).enumerate() { 457 | obj_writer.write_relocation(is_rela, &Rel { 458 | // All DT_INIT/DT_INIT_ARRAY/DT_FINI_ARRAY/DT_FINI addresses must be relocated with the object. 459 | r_offset: (obj_dt_init_array_offset + index * 8) as u64, 460 | r_sym: 0, 461 | r_type: R_X86_64_RELATIVE, 462 | r_addend: (image_file_offset as u64 + *lifetimizer) as i64, 463 | }); 464 | } 465 | obj_writer.pad_until(obj_dt_fini_array_offset + obj_dt_fini_array_length); 466 | if let Some(ref tls_data) = image.tls_image { 467 | obj_writer.write(&tls_data); 468 | } 469 | 470 | // Write section headers. 471 | obj_writer.write_shstrtab(); 472 | obj_writer.write_null_section_header(); 473 | obj_writer.write_shstrtab_section_header(); 474 | obj_writer.write_dynamic_section_header(obj_dynamic_offset as u64); 475 | obj_writer.write_dynstr_section_header(obj_dynstr_offset as u64); 476 | obj_writer.write_dynsym_section_header(obj_dynsym_offset as u64, 1); 477 | obj_writer.write_hash_section_header(obj_hash_offset as u64); 478 | obj_writer.write_section_header(&SectionHeader { 479 | name: Some(obj_reloc_dyn_section_name), 480 | sh_type: if is_rela { SHT_RELA } else { SHT_REL }, 481 | sh_flags: SHF_ALLOC as u64, 482 | sh_addr: obj_reloc_offset as u64, 483 | sh_offset: obj_reloc_offset as u64, 484 | sh_size: (class.rel_size(is_rela) * relocation_count) as u64, 485 | sh_link: obj_dynsym_section_index.0, 486 | sh_info: 0, 487 | sh_addralign: class.align() as u64, 488 | sh_entsize: class.rel_size(is_rela) as u64, 489 | }); 490 | if let InterpreterOut::Shim { code_len, .. } = out_interp { 491 | if let Some((_obj_shim_section_index, obj_shim_section_name)) = obj_shim_section_index_name { 492 | obj_writer.write_section_header(&SectionHeader { 493 | name: Some(obj_shim_section_name), 494 | sh_type: SHT_PROGBITS, 495 | sh_flags: SHF_ALLOC as u64, 496 | sh_addr: obj_shim_offset as u64, 497 | sh_offset: obj_shim_offset as u64, 498 | sh_size: code_len as u64, 499 | sh_link: 0, 500 | sh_info: 0, 501 | sh_addralign: class.align() as u64, 502 | sh_entsize: class.align() as u64, 503 | }); 504 | } else { unreachable!() } 505 | } 506 | for out_load_section in out_load_sections { 507 | let sh_flags = match out_load_section.mode { 508 | LoadMode::ReadOnly => SHF_ALLOC, 509 | LoadMode::ReadWrite => SHF_ALLOC | SHF_WRITE, 510 | LoadMode::ReadExecute => SHF_ALLOC | SHF_EXECINSTR, 511 | }; 512 | obj_writer.write_section_header(&SectionHeader { 513 | name: Some(out_load_section.name), 514 | sh_type: if out_load_section.load { SHT_PROGBITS } else { SHT_NOBITS }, 515 | sh_flags: sh_flags as u64, 516 | sh_addr: image_file_offset as u64 + out_load_section.addr, 517 | sh_offset: image_file_offset as u64 + out_load_section.addr, 518 | sh_size: out_load_section.size, 519 | sh_link: SHN_UNDEF as u32, 520 | sh_info: 0, 521 | sh_addralign: image.alignment, 522 | sh_entsize: 0, 523 | }); 524 | } 525 | 526 | // Write image segments. 527 | for segment in image.segments.iter() { 528 | obj_writer.pad_until(image_file_offset + segment.addr as usize); 529 | obj_writer.write(segment.data.as_ref()); 530 | obj_writer.pad_until(image_file_offset + segment.addr as usize + segment.size as usize); 531 | } 532 | 533 | // If the reserved amount and written amount are the same, the file is probably good. 534 | assert_eq!(obj_writer.reserved_len(), obj_writer.len()); 535 | 536 | Ok(elf_data) 537 | } 538 | --------------------------------------------------------------------------------