├── .gitignore ├── .ignore ├── README.md ├── bin ├── ez-aarch64 └── ez-x86_64 ├── build-and-check.rb ├── default.nix ├── lib ├── arch │ ├── aarch64.nix │ ├── default.nix │ └── x86_64.nix ├── attrs.nix ├── binary.nix ├── bits.nix ├── buildProgram.nix ├── ctypes.nix ├── default.nix ├── dsl.nix ├── elf │ ├── constants.nix │ └── default.nix ├── functions.nix ├── linux │ ├── aarch64.nix │ ├── default.nix │ ├── generic.nix │ └── x86_64.nix ├── lists.nix ├── maths.nix ├── nix.nix └── strings.nix ├── run-tests.rb ├── shell.nix ├── support ├── generate-aarch64-opcode-tests.rb ├── generate-x86_64-opcode-tests.rb └── lib │ ├── elf.rb │ ├── refinements.rb │ └── shared.rb └── tests ├── default.nix ├── lib ├── arch │ ├── aarch64.instructions.nix │ ├── default.nix │ └── x86_64.instructions.nix ├── attrs.nix ├── binary.nix ├── ctypes.nix ├── default.nix ├── functions.nix ├── lists.nix ├── maths.nix ├── nix.nix └── strings.nix └── self-tests.nix /.gitignore: -------------------------------------------------------------------------------- 1 | result 2 | result-* 3 | *.bin 4 | *.xxd 5 | *.hex 6 | temp.asm 7 | temp.asm.* 8 | -------------------------------------------------------------------------------- /.ignore: -------------------------------------------------------------------------------- 1 | tests/lib/arch/aarch64.instructions.nix 2 | tests/lib/arch/x86_64.instructions.nix 3 | 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Nix is Hard 2 | =========== 3 | 4 | A futile attempt at building binaries using Nix... 5 | 6 | * * * 7 | 8 | syscalls, in my Nix? 9 | -------------------- 10 | 11 | No, not really. But too close for comfort, too. 12 | 13 | This set of expression allows producing binaries for some platforms, directly from the Nix expression. 14 | 15 | These binaries are useful as building blocks for a sort of bootstrap, or as an exercise in futility. 16 | 17 | With these expressions, building some higher level abstractions that produce *complex outputs* is possible. 18 | 19 | 20 | Okay, but why this instead of bundling binaries? 21 | ------------------------------------------------ 22 | 23 | After all, it's kind of like having a binary, but with more steps, right? 24 | 25 | Well, morally yes. In practice, this is a bit more subtle. 26 | 27 | The way it's been built, a set of *higher order functions* allows pasting useful bits to build programs, in a much more agnostic way than if they had been authored using assembly. 28 | 29 | In a weird way, this is closer to a "syscall-oriented assembly", than pure assembly. 30 | 31 | What does that mean? 32 | 33 | That a lot of the hard parts of writing assembly are waved away. 34 | 35 | The syscall ABI is handled transparently. 36 | Even between the different platforms. 37 | 38 | What you're left with, is a way to paste syscalls together into useful building blocks. 39 | 40 | 41 | What can I do with this? 42 | ------------------------ 43 | 44 | The simplest useful thing this allows is building a single-purpose binary that does one syscall. 45 | 46 | In turn, that syscall allows building higher level semantics. 47 | 48 | Why is this useful? 49 | Think about the build sandbox with a bare `derivation {}`. 50 | 51 | There's nothing but a `busybox sh` in there. 52 | There's no `chmod`, no `cp`, so there's no way to get an executable binary from a bare `derivation {}` without involving a binary seed. 53 | 54 | Well, with these expressions, your binary seed is easier to reason about, 55 | *and* a chunk of the architecture-specific details are handled for you. 56 | 57 | > [!NOTE] 58 | > To be more accurate, this tooling *is* a binary seed. 59 | > 60 | > The Nix evaluator and build sandbox could be considered part of the binary seed, 61 | > though the design presumes the binaries are *coincidentally* built using Nix, 62 | > and they are meant to be usable outside of Nix builds. 63 | > 64 | > This is hoping to produce a somewhat more auditable binary seed, 65 | > compared to using a pre-built archive of binaries built using some toolchain. 66 | > 67 | > The implementation details of the Elf files is self-contained. 68 | > 69 | > > Tangentially, this does not allow *trusting trust*, 70 | > > as the Nix build environment could still produce different binaries into the sandbox, 71 | > > and give you back the intended binaries in the output. 72 | > > 73 | > > Without trusting that your Nix evaluator, then builders, the binaries cannot be trusted. 74 | > > 75 | > > An implicit trust in the Nix evaluator and builders is expected here. 76 | > > 77 | > > The same implicit trust must exist as long as the starting point includes Nix, 78 | > > for any other binary seed in use. 79 | 80 | 81 | * * * 82 | 83 | Rough edges 84 | ----------- 85 | 86 | ### Incomplete unsigned 64 bit integers 87 | 88 | Nix uses 64 bit signed integers for numbers. 89 | 90 | We don't have the ability to *represent* numbers with the most significant bit set as unsigned. 91 | 92 | Either use the negative equivalent representation (for constants as numbers), or implement directly as bytes (a list of 0~255 integers). 93 | -------------------------------------------------------------------------------- /bin/ez-aarch64: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | set -u 5 | 6 | f="${1/.asm/}" 7 | 8 | AS_FLAGS=( 9 | --gen-debug 10 | -no-pad-sections 11 | ) 12 | LD_FLAGS=( 13 | --omagic 14 | --no-apply-dynamic-relocs 15 | ) 16 | 17 | ( 18 | PS4=" $ " 19 | set -x 20 | aarch64-unknown-linux-gnu-as "${AS_FLAGS[@]}" -o "$f".o "$f".asm 21 | aarch64-unknown-linux-gnu-ld "${LD_FLAGS[@]}" -o "$f" "$f".o 22 | 23 | aarch64-unknown-linux-gnu-objdump --disassembler-color=on --disassemble --insn-width=12 --visualize-jumps=color ./"$f" 24 | ) 25 | set +e 26 | ( 27 | PS4=" $ " 28 | set -x 29 | qemu-aarch64 "$f" 30 | ) 31 | printf ' → exit status: %s\n' "$?" 32 | -------------------------------------------------------------------------------- /bin/ez-x86_64: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | set -u 5 | 6 | f="${1/.asm/}" 7 | 8 | ( 9 | PS4=" $ " 10 | set -x 11 | nasm -O0 -f elf64 "$f".asm 12 | ld "$f.o" -o "$f" 13 | objdump --architecture=i386 -Mintel,x86-64 --disassembler-color=on --disassemble --wide --insn-width=12 --visualize-jumps=color ./"$f" 14 | ) 15 | set +e 16 | ( 17 | PS4=" $ " 18 | set -x 19 | "./$f" 20 | ) 21 | printf ' → exit status: %s\n' "$?" 22 | -------------------------------------------------------------------------------- /build-and-check.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nix-shell 2 | #!nix-shell -p ruby xxd 3 | #!nix-shell -i ruby 4 | 5 | require "open3" 6 | require "shellwords" 7 | 8 | def run(*cmd) 9 | $stdout.puts "" 10 | $stdout.puts " $ #{cmd.shelljoin}" 11 | system(*cmd) 12 | end 13 | 14 | def build_attr(attr) 15 | cmd = ["nix-build", "-A", attr] 16 | $stdout.puts "" 17 | $stdout.puts " $ #{cmd.shelljoin}" 18 | stdout, status = Open3.capture2(*cmd) 19 | unless status.success? 20 | exit status.exitstatus 21 | end 22 | 23 | $stdout.write(stdout) 24 | stdout 25 | end 26 | 27 | params, attrs = ARGV.partition { _1.match(/^--/) } 28 | 29 | if attrs.length == 0 30 | $stderr.puts "Usage: ./build-and-check.rb [--verbose] " 31 | # TODO: list attrs from expression 32 | exit 0 33 | end 34 | 35 | VERBOSE = params.include?("--verbose") 36 | 37 | CMDS = [ 38 | %w(readelf --program-headers --section-headers), 39 | %w(objdump --architecture=i386 -Mintel,x86-64 --wide --disassembler-color=on --insn-width=10) 40 | .tap do |cmd| 41 | if VERBOSE then 42 | cmd << "--disassemble-all" 43 | else 44 | cmd << "--disassemble" 45 | end 46 | end, 47 | if VERBOSE then %w(xxd) end, 48 | ].compact 49 | 50 | failed = false 51 | 52 | attrs.each do |attr| 53 | result = build_attr(attr).strip() 54 | 55 | if result == "" 56 | $stderr.puts "No paths returned by `nix-build`..." 57 | $stderr.puts "Make sure a derivation is being produced... I guess..." 58 | exit 3 59 | end 60 | 61 | result_bin = 62 | Dir.glob(File.join(result, "*")).tap do |paths| 63 | if paths.length != 1 64 | $stderr.puts "NOTE: unexpected layout in result path. Expected 1, found #{paths.length}" 65 | $stderr.puts "Paths:" 66 | paths.each do |path| 67 | $stderr.puts " - #{path}" 68 | end 69 | exit 1 70 | end 71 | end 72 | .first 73 | 74 | CMDS.each do |cmd| 75 | run(*cmd, result_bin) 76 | end 77 | 78 | if run(result_bin) 79 | puts "" 80 | puts " ✅ output of #{attr} ran successfully!" 81 | puts "" 82 | else 83 | puts "" 84 | if $?.signaled? 85 | puts " ❌ output of #{attr} exited on signal #{Signal.signame($?.termsig)} (#{$?.termsig.inspect})!" 86 | if [ 4, 11 ].include?($?.termsig) 87 | puts " (note: this is almost guaranteed to be an error...)" 88 | else 89 | puts " (note: this might not be an error, if the program is expected to fail...)" 90 | end 91 | else 92 | puts " ⚠️ output of #{attr} returned #{$?.exitstatus.inspect}!" 93 | puts " (note: this might not be an error, if the program is expected to return non-zero...)" 94 | end 95 | puts "" 96 | failed = true 97 | end 98 | end 99 | 100 | exit 1 if failed 101 | -------------------------------------------------------------------------------- /default.nix: -------------------------------------------------------------------------------- 1 | let 2 | lib = import ./lib; 3 | 4 | inherit (lib) 5 | buildProgram 6 | ; 7 | inherit (lib.linux.select builtins.currentSystem) 8 | arch 9 | dsl 10 | STDOUT 11 | ; 12 | 13 | simple = buildProgram { 14 | inherit arch; 15 | name = "simple"; 16 | code = dsl.syscall.exit 42; 17 | }; 18 | 19 | hello = buildProgram { 20 | inherit arch; 21 | name = "hello"; 22 | code = 23 | { getString, ... }: 24 | builtins.concatLists [ 25 | (dsl.syscall.write STDOUT (getString "hello").addr (getString "hello").length) 26 | (dsl.syscall.exit 0) 27 | ] 28 | ; 29 | strings = 30 | { 31 | hello = '' 32 | @ ${builtins.nixVersion} 33 | @ ${builtins.currentSystem} 34 | Etiam mauris nulla, suscipit ut gravida eget, tincidunt ac orci. Suspendisse 35 | aliquet lacus semper imperdiet elementum. Cras sodales lorem nec mauris 36 | bibendum elementum. Phasellus sed metus non orci posuere pretium feugiat at 37 | nisl. Suspendisse facilisis eget est non facilisis. Curabitur ac enim iaculis, 38 | vehicula purus ut, sodales ante. Nunc finibus imperdiet tellus in pharetra. 39 | ''; 40 | } 41 | ; 42 | }; 43 | 44 | "chmod+x" = chmod_x; 45 | "chmod_x" = buildProgram { 46 | inherit arch; 47 | name = "chmod+x"; 48 | code = 49 | { getString, ... }: 50 | builtins.concatLists [ 51 | (dsl.argv1_to_reg { 52 | register = "ARG1"; 53 | errorMessage = getString "no_argv1"; 54 | }) 55 | (dsl.syscall.fchmodat lib.linux.generic.AT_FDCWD null 511 /* 0777 */) 56 | # Move return value from previous syscall into ARG0 57 | # Making the tool return the return value of the syscall 58 | (dsl.copy_reg "ARG0" "RETURN") 59 | (dsl.syscall.exit null) 60 | ] 61 | ; 62 | strings = 63 | { 64 | # Note: no argv0 support yet 65 | no_argv1 = "Usage: chmod+x \n"; 66 | } 67 | ; 68 | }; 69 | in 70 | { 71 | inherit lib; 72 | inherit simple; 73 | inherit hello; 74 | inherit chmod_x; 75 | inherit "chmod+x"; 76 | } 77 | -------------------------------------------------------------------------------- /lib/arch/aarch64.nix: -------------------------------------------------------------------------------- 1 | { lib }: 2 | 3 | let 4 | inherit (lib) 5 | optional 6 | optionals 7 | ; 8 | self = lib.arch.aarch64; 9 | inherit (self) 10 | registers 11 | instructionToBytes 12 | ; 13 | in 14 | { 15 | arch = { 16 | aarch64 = { 17 | name = "aarch64"; 18 | synonymousArches = [ "arm64" ]; 19 | 20 | ELF = { 21 | EM = lib.ELF.constants.EM_AARCH64; 22 | bits = 64; 23 | }; 24 | 25 | # Transforms the 32 bit unsigned value representing an instruction into its bytes constituents 26 | instructionToBytes = 27 | # Let's borrow this handy function 28 | lib.ctypes.toUint32 29 | ; 30 | 31 | # The 32 AArch64 registers 32 | registers = 33 | let 34 | prefixWidth = { x = 64; w = 32; r = 64 /* Makes `sp` to `sp` work at 64 bit. */; }; 35 | attrsFor = 36 | offset: prefix: 37 | rec { 38 | name = 39 | if offset == 31 40 | then if prefix == "r" then "sp" else "${prefix}zr" 41 | else "${prefix}${toString offset}" 42 | ; 43 | value = { 44 | inherit name; 45 | inherit offset; 46 | width = prefixWidth."${prefix}"; 47 | is32 = value.width == 32; 48 | is64 = value.width == 64; 49 | }; 50 | } 51 | ; 52 | in 53 | builtins.listToAttrs ( 54 | builtins.concatLists ( 55 | (builtins.genList (offset: 56 | builtins.map (attrsFor offset) (builtins.attrNames prefixWidth) 57 | ) 31) 58 | ) ++ [ (attrsFor 31 "r") (attrsFor 31 "w") (attrsFor 31 "x") ] 59 | ) 60 | ; 61 | 62 | instructions = { 63 | NOP = 64 | [ (lib.comment "aarch64: NOP") 31 32 3 213 ] 65 | ; 66 | 67 | syscall = 68 | [ (lib.comment "aarch64: syscall (svc 0)") 1 0 0 212 ] 69 | ; 70 | 71 | ADD_imm = 72 | # 73 | # ╒══════╤══════╤══════╤══════╤══════╤══════╤══════╤══════╦══════╤══════╤══════╤══════╤══════╤══════╤══════╤══════╕ 74 | # ╒══════╡ 31 │ 30 │ 29 │ 28 │ 27 │ 26 │ 25 │ 24 ║ 23 │ 22 │ 21 │ 20 │ 19 │ 18 │ 17 │ 16 │ 75 | # │ ADDim│░░░░░░│ 0 │ 0 │ 1 │ 0 │ 0 │ 0 │ 1 ║ 0 │ 0 │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░│ 76 | # ╘══════╡ sf │ op │ S │ │ sh │ imm12 ... │ 77 | # ├──────┬──────┬──────┬──────┬──────┬──────┬──────┬──────╥──────┬──────┬──────┬──────┬──────┬──────┬──────┬──────┤ 78 | # ╒══════╡ 15 │ 14 │ 13 │ 12 │ 11 │ 10 │ 9 │ 8 ║ 7 │ 6 │ 5 │ 4 │ 3 │ 2 │ 1 │ 0 │ 79 | # │ ADDim│░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░│░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░│░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░│ 80 | # ╘══════╡ ... imm12 │ Rn (source) │ Rd (destination) │ 81 | # ╘═══════════════════════════════════════════════════════════════════════════════════════════════════════════════╛ 82 | # See: https://developer.arm.com/documentation/ddi0602/2024-12/Base-Instructions/ADD--immediate---Add-immediate-value- 83 | # AKA: ADD_64_addsub_imm 84 | # AKA: ADD_32_addsub_imm 85 | # Encoded operation: 100100010xxxxxxxxxxxxxxxxxxxxxxx 86 | # 87 | reg: value: 88 | let value' = value; in # break infrec 89 | let 90 | value = 91 | if builtins.isInt value' then value' else 92 | if builtins.isList value' then (lib.bytesUnsignedToNumber value') else 93 | (throw "ADD_imm called with value of unexpected type (${builtins.typeOf value'}); expected integer or list.") 94 | ; 95 | in 96 | let 97 | reg' = registers."${reg}"; 98 | valueBytes = lib.ctypes.toUint64 value; 99 | instruction = 100 | builtins.foldl' builtins.add 0 ([] 101 | ++ optional reg'.is64 (lib.bitShiftLeft 1 31) # bit[31] (size) 102 | ++ [ 103 | (lib.bitShiftLeft 34/* 0b0010_0010 */ 23) # bit[23:30] 104 | (lib.bitShiftLeft value 10) # bit[10:21] (imm12) 105 | (lib.bitShiftLeft reg'.offset 5) # bit[5:9] (Rn) 106 | (reg'.offset) # bit[5:9] (Rd) 107 | ] 108 | ) 109 | ; 110 | in 111 | [ (lib.comment "aarch64: ADD_imm ${reg} ${toString value}") ] 112 | ++ (instructionToBytes instruction) 113 | ; 114 | 115 | # TODO: The bits table 116 | # TODO: Generate all conditions, for B and BC 117 | B.NE = 118 | rel: 119 | # See: https://developer.arm.com/documentation/ddi0602/2024-12/Base-Instructions/B-cond--Branch-conditionally-?lang=en 120 | # `B_cond` (B_only_condbranch) 121 | # Encoded operation: 01010100xxxxxxxxxxxxxxxxxxx0xxxx 122 | # `BC_cond` (BC_only_condbranch) 123 | # Encoded operation: 01010100xxxxxxxxxxxxxxxxxxx1xxxx 124 | # B.__ and BC.__ are implemented from the same base, with o0 set diffently 125 | # BC o0 == 1 126 | # B o0 == 0 127 | let 128 | NE = 1; # 0b0001 129 | instruction = 130 | builtins.foldl' builtins.add 0 ([] 131 | ++ [ 132 | (lib.bitShiftLeft 84/* 0b0101_0100 */ 24) # bit[24:31] 133 | (lib.bitShiftLeft (rel / 4) 5) # bit[5:23] (imm19) 134 | # # bit[4] (o0) 135 | NE # bit[0:4] (cond) 136 | ] 137 | ) 138 | ; 139 | in 140 | if (lib.mod rel 4) != 0 then (throw "B.NE called with relative offset not divisible by 4.") else 141 | [ (lib.comment "aarch64: B.NE PC+${toString rel}") ] 142 | ++ instructionToBytes instruction 143 | ; 144 | 145 | CMP_imm = 146 | # ╒══════╤══════╤══════╤══════╤══════╤══════╤══════╤══════╦══════╤══════╤══════╤══════╤══════╤══════╤══════╤══════╕ 147 | # ╒══════╡ 31 │ 30 │ 29 │ 28 │ 27 │ 26 │ 25 │ 24 ║ 23 │ 22 │ 21 │ 20 │ 19 │ 18 │ 17 │ 16 │ 148 | # │ CMP │░░░░░░│ 1 │ 1 │ 1 │ 0 │ 0 │ 0 │ 1 ║ 0 │ 0 │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░│ 149 | # ╘══════╡ sf │ op │ S │ ║ │ sh │ imm12 ... │ 150 | # ├──────┬──────┬──────┬──────┬──────┬──────┬──────┬──────╥──────┬──────┬──────┬──────┬──────┬──────┬──────┬──────┤ 151 | # ╒══════╡ 15 │ 14 │ 13 │ 12 │ 11 │ 10 │ 9 │ 8 ║ 7 │ 6 │ 5 │ 4 │ 3 │ 2 │ 1 │ 0 │ 152 | # │ CMP │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░│░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░│ 1 │ 1 │ 1 │ 1 │ 1 │ 153 | # ╘══════╡ ... imm12 │ Rn │ Rd │ 154 | # ╘═══════════════════════════════════════════════════════════════════════════════════════════════════════════════╛ 155 | # See: https://developer.arm.com/documentation/ddi0602/2024-12/Base-Instructions/CMP--immediate---Compare--immediate---an-alias-of-SUBS--immediate-- 156 | # See: https://developer.arm.com/documentation/ddi0602/2024-12/Base-Instructions/SUBS--immediate---Subtract-immediate-value--setting-flags- 157 | # AKA: SUBS_64S_addsub_imm 158 | # AKA: SUBS_32S_addsub_imm 159 | # Encoded operation: 11111010010xxxxxxxxx10xxxxx0xxxx 160 | 161 | reg: value: 162 | let value' = value; in # break infrec 163 | let 164 | value = 165 | if builtins.isInt value' then value' else 166 | if builtins.isList value' then (lib.bytesUnsignedToNumber value') else 167 | (throw "CMP_imm called with value of unexpected type (${builtins.typeOf value'}); expected integer or list.") 168 | ; 169 | in 170 | let 171 | reg' = registers."${reg}"; 172 | valueBytes = lib.ctypes.toUint64 value; 173 | instruction = 174 | builtins.foldl' builtins.add 0 ([] 175 | ++ optional reg'.is64 (lib.bitShiftLeft 1 31) # bit[31] (size) 176 | ++ [ 177 | (lib.bitShiftLeft 226/* 0b1110_0010 */ 23) # bit[23:30] 178 | (lib.bitShiftLeft value 10) # bit[10:21] (imm12) 179 | (lib.bitShiftLeft reg'.offset 5) # bit[5:9] (Rn) 180 | (31 /* 0b1_1111 */) # bit[0:4] (Rd) 181 | ] 182 | ) 183 | ; 184 | in 185 | [ (lib.comment "aarch64: CMP_imm ${reg} ${toString value}") ] 186 | ++ (instructionToBytes instruction) 187 | ; 188 | 189 | LDR_mem = 190 | # ╒══════╤══════╤══════╤══════╤══════╤══════╤══════╤══════╦══════╤══════╤══════╤══════╤══════╤══════╤══════╤══════╕ 191 | # ╒══════╡ 31 │ 30 │ 29 │ 28 │ 27 │ 26 │ 25 │ 24 ║ 23 │ 22 │ 21 │ 20 │ 19 │ 18 │ 17 │ 16 │ 192 | # │ LDR │ 1 │░░░░░░│ 1 │ 1 │ 1 │ 0 │ 0 │ 1 ║ 0 │ 1 │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░│ 193 | # ╘══════╡ size │ │ VR │ ║ opc │ imm12 ... │ 194 | # ├──────┬──────┬──────┬──────┬──────┬──────┬──────┬──────╥──────┬──────┬──────┬──────┬──────┬──────┬──────┬──────┤ 195 | # ╒══════╡ 15 │ 14 │ 13 │ 12 │ 11 │ 10 │ 9 │ 8 ║ 7 │ 6 │ 5 │ 4 │ 3 │ 2 │ 1 │ 0 │ 196 | # │ LDR │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░│░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░│░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░│ 197 | # ╘══════╡ ... imm12 │ Rn │ Rt │ 198 | # ╘═══════════════════════════════════════════════════════════════════════════════════════════════════════════════╛ 199 | # See: 200 | # AKA: LDR_64_ldst_pos 201 | # AKA: LDR_32_ldst_pos 202 | # 203 | # Encoded operation: 1x11100101xxxxxxxxxxxxxxxxxxxxxx 204 | # 205 | # `dest` is the destination register name. 206 | # `src` is the address's source register name. 207 | # 208 | # We are not implementing the `imm12` offset. 209 | dest: src: 210 | let 211 | dest' = registers."${dest}"; 212 | src' = registers."${src}"; 213 | instruction = 214 | builtins.foldl' builtins.add 0 ([] 215 | ++ [ (lib.bitShiftLeft 1 31) ] # bit[31] (size) 216 | ++ optional dest'.is64 (lib.bitShiftLeft 1 30) # bit[30] (size) 217 | ++ [ 218 | (lib.bitShiftLeft 229/* 0b11100101 */ 22) # bit[22:29] 219 | # No `imm12` value 220 | (lib.bitShiftLeft src'.offset 5) # bit[5:9] (Rn) 221 | (dest'.offset) # bit[0:4] (Rt) 222 | ] 223 | ) 224 | ; 225 | in 226 | [ (lib.comment "aarch64: LDR_mem ${dest} ${src}") ] 227 | ++ (instructionToBytes instruction) 228 | ; 229 | 230 | MOV_reg = 231 | # NOTE: MOV operations are aliases of ORR. 232 | # 233 | # ╒══════╤══════╤══════╤══════╤══════╤══════╤══════╤══════╦══════╤══════╤══════╤══════╤══════╤══════╤══════╤══════╕ 234 | # ╒══════╡ 31 │ 30 │ 29 │ 28 │ 27 │ 26 │ 25 │ 24 ║ 23 │ 22 │ 21 │ 20 │ 19 │ 18 │ 17 │ 16 │ 235 | # │ ORRsr│░░░░░░│ 0 │ 1 │ 0 │ 1 │ 0 │ 1 │ 0 ║ 0 │ 0 │ 0 │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░│ 236 | # ╘══════╡ sf │ opc ║ shift │ N │ Rm (source) │ 237 | # ├──────┬──────┬──────┬──────┬──────┬──────┬──────┬──────╥──────┬──────┬──────┬──────┬──────┬──────┬──────┬──────┤ 238 | # ╒══════╡ 15 │ 14 │ 13 │ 12 │ 11 │ 10 │ 9 │ 8 ║ 7 │ 6 │ 5 │ 4 │ 3 │ 2 │ 1 │ 0 │ 239 | # │ ORRsr│ 0 │ 0 │ 0 │ 0 │ 0 │ 0 │ 1 │ 1 ║ 1 │ 1 │ 1 │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░│ 240 | # ╘══════╡ imm6 │ Rn │ Rd (destination) │ 241 | # ╘═══════════════════════════════════════════════════════════════════════════════════════════════════════════════╛ 242 | # See: https://developer.arm.com/documentation/ddi0602/2024-12/Base-Instructions/MOV--register---Move-register-value--an-alias-of-ORR--shifted-register-- 243 | # See: https://developer.arm.com/documentation/ddi0602/2024-12/Base-Instructions/ORR--shifted-register---Bitwise-OR--shifted-register-- 244 | # AKA: ORR_64_log_shift 245 | # AKA: ORR_32_log_shift 246 | # Encoded operation: 10101010xx0xxxxxxxxxxxxxxxxxxxxx 247 | # 248 | # ╒══════╤══════╤══════╤══════╤══════╤══════╤══════╤══════╦══════╤══════╤══════╤══════╤══════╤══════╤══════╤══════╕ 249 | # ╒══════╡ 31 │ 30 │ 29 │ 28 │ 27 │ 26 │ 25 │ 24 ║ 23 │ 22 │ 21 │ 20 │ 19 │ 18 │ 17 │ 16 │ 250 | # │ ADDim│░░░░░░│ 0 │ 0 │ 1 │ 0 │ 0 │ 0 │ 1 ║ 0 │ 0 │ 0 │ 0 │ 0 │ 0 │ 0 │ 0 │ 251 | # ╘══════╡ sf │ op │ S │ │ sh │ imm12 ... │ 252 | # ├──────┬──────┬──────┬──────┬──────┬──────┬──────┬──────╥──────┬──────┬──────┬──────┬──────┬──────┬──────┬──────┤ 253 | # ╒══════╡ 15 │ 14 │ 13 │ 12 │ 11 │ 10 │ 9 │ 8 ║ 7 │ 6 │ 5 │ 4 │ 3 │ 2 │ 1 │ 0 │ 254 | # │ ADDim│ 0 │ 0 │ 0 │ 0 │ 0 │ 0 │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░│░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░│ 255 | # ╘══════╡ ... imm12 │ Rn (source) │ Rd (destination) │ 256 | # ╘═══════════════════════════════════════════════════════════════════════════════════════════════════════════════╛ 257 | # See: https://developer.arm.com/documentation/ddi0602/2024-12/Base-Instructions/MOV--to-from-SP---Move-register-value-to-or-from-SP--an-alias-of-ADD--immediate-- 258 | # See: https://developer.arm.com/documentation/ddi0602/2024-12/Base-Instructions/ADD--immediate---Add-immediate-value- 259 | # AKA: ADD_64_addsub_imm 260 | # AKA: ADD_32_addsub_imm 261 | # Encoded operation: 100100010xxxxxxxxxxxxxxxxxxxxxxx 262 | # 263 | # 264 | # NOTE: ORR (shifted register) cannot operate on SP. 265 | # The convention is to use ADD (immediate) when dealing with SP. 266 | # 267 | # `dest` is the destination register name. 268 | # `src` is the source register name. 269 | dest: src: 270 | let 271 | dest' = registers."${dest}"; 272 | src' = registers."${src}"; 273 | instruction = 274 | if src == "sp" || dest == "sp" 275 | then 276 | builtins.foldl' builtins.add 0 ([] 277 | ++ optional dest'.is64 (lib.bitShiftLeft 1 31) # bit[31] (sf) 278 | ++ [ 279 | (lib.bitShiftLeft 34 /* 0b0010_0010 */ 23) # bit[23:30] (shift) 280 | (lib.bitShiftLeft src'.offset 5) # bit[5:9] (Rn) 281 | (dest'.offset) # bit[0:4] (Rd) 282 | ] 283 | ) 284 | else 285 | builtins.foldl' builtins.add 0 ([] 286 | ++ optional dest'.is64 (lib.bitShiftLeft 1 31) # bit[31] (sf) 287 | ++ [ 288 | (lib.bitShiftLeft 42 /* 0b010_1010 */ 24) # bit[24:30] (shift) 289 | (0) # bit[21:23] (shift) 290 | (lib.bitShiftLeft src'.offset 16) # bit[16:20] (Rm) 291 | (0) # bit[10:15] (imm6) 292 | (lib.bitShiftLeft 31 /* 0b1_1111 */ 5) # bit[5:9] (Rn) 293 | (dest'.offset) # bit[0:4] (Rd) 294 | ] 295 | ) 296 | ; 297 | in 298 | [ (lib.comment "aarch64: MOV_reg ${dest} ${src}") ] 299 | ++ (instructionToBytes instruction) 300 | ; 301 | 302 | MOV_imm = 303 | # NOTE: MOVK and MOVZ are combined to form this synthetic MOV_imm. 304 | # ╒══════╤══════╤══════╤══════╤══════╤══════╤══════╤══════╦══════╤══════╤══════╤══════╤══════╤══════╤══════╤══════╕ 305 | # ╒══════╡ 31 │ 30 │ 29 │ 28 │ 27 │ 26 │ 25 │ 24 ║ 23 │ 22 │ 21 │ 20 │ 19 │ 18 │ 17 │ 16 │ 306 | # │ MOVZ │░░░░░░│ 1 │ 0 │ 1 │ 0 │ 0 │ 1 │ 0 ║ 1 │░░░░░░░░░░░░░│░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░│ 307 | # │ MOVK │░░░░░░│ 1 │ 1 │ 1 │ 0 │ 0 │ 1 │ 0 ║ 1 │░░░░░░░░░░░░░│░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░│ 308 | # ╘══════╡ sf │ opc ^^ │ hw │ imm16 │ 309 | # ├──────┬──────┬──────┬──────┬──────┬──────┬──────┬──────╥──────┬──────┬──────┬──────┬──────┬──────┬──────┬──────┤ 310 | # ╒══════╡ 15 │ 14 │ 13 │ 12 │ 11 │ 10 │ 9 │ 8 ║ 7 │ 6 │ 5 │ 4 │ 3 │ 2 │ 1 │ 0 │ 311 | # │ MOVZ │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░│░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░│ 312 | # │ MOVK │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░│░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░│ 313 | # ╘══════╡ ... imm16 │ Rd (destination) │ 314 | # ╘═══════════════════════════════════════════════════════════════════════════════════════════════════════════════╛ 315 | # See: https://developer.arm.com/documentation/ddi0602/2024-12/Base-Instructions/MOVZ--Move-wide-with-zero- 316 | # See: https://developer.arm.com/documentation/ddi0602/2024-12/Base-Instructions/MOVK--Move-wide-with-keep- 317 | # 318 | # `reg` is the destination register name 319 | # `value` is either the numeric representation or the list of bytes to place in destination `reg`. 320 | reg: value: 321 | let value' = value; in # break infrec 322 | let 323 | reg' = registers."${reg}"; 324 | value = 325 | if builtins.isInt value' 326 | then 327 | if reg'.is64 328 | then if value' < 0 then lib.ctypes.toInt64 value' else lib.ctypes.toUint64 value' 329 | else if value' < 0 then lib.ctypes.toInt32 value' else lib.ctypes.toUint32 value' 330 | else 331 | if builtins.isList value' then value' else 332 | (throw "MOV_imm called with value of unexpected type (${builtins.typeOf value'}); expected integer or list.") 333 | ; 334 | in 335 | let 336 | oneInstruction = 337 | value: shift: 338 | let 339 | # NOTE: handles LSB only 340 | at = 341 | i: 342 | let offset = shift*2+i; in 343 | if offset < (builtins.length value) 344 | then 345 | builtins.elemAt value offset 346 | else 347 | 0 348 | ; 349 | imm16 = 350 | lib.bytesUnsignedToNumber [ 351 | (at 0) 352 | (at 1) 353 | ] 354 | ; 355 | in 356 | builtins.foldl' builtins.add 0 ([] 357 | ++ optional reg'.is64 (lib.bitShiftLeft 1 31) # bit[31] (sf) 358 | ++ [ 359 | ( 360 | # Zero register with first move. 361 | if shift == 0 362 | then (1384120320) # bit[23:30] 10100101 MOVZ 363 | else (1920991232) # bit[23:30] 11100101 MOVK 364 | ) 365 | (lib.bitShiftLeft shift 21) # bit[21:22] (hw) 366 | (lib.bitShiftLeft imm16 5) # bit[5:20] (imm16) 367 | (reg'.offset) # bit[0:4] (Rd) 368 | ] 369 | ) 370 | ; 371 | in 372 | [ (lib.comment "aarch64: MOV_imm ${reg} ${toString value}") ] 373 | # The different MOV instructions are hard. 374 | # Let's make this simple, and always do the pedantic movk/movz grouping. 375 | # We *could* (later) optimize this and skip imm16 parts, past the first, that are zero. 376 | ++ (instructionToBytes (oneInstruction value 0)) 377 | ++ (instructionToBytes (oneInstruction value 1)) 378 | ++ optionals (reg'.is64) (instructionToBytes (oneInstruction value 2)) 379 | ++ optionals (reg'.is64) (instructionToBytes (oneInstruction value 3)) 380 | ; 381 | }; 382 | }; 383 | }; 384 | } 385 | -------------------------------------------------------------------------------- /lib/arch/default.nix: -------------------------------------------------------------------------------- 1 | { lib }: 2 | 3 | let 4 | # Help out users in some situations with slightly wrong names being used. 5 | architectureHintsFor = 6 | arch: 7 | builtins.listToAttrs ( 8 | builtins.map ( 9 | name: 10 | { 11 | inherit name; 12 | value = builtins.throw "architecture '${name}' is named '${arch.name}'."; 13 | } 14 | ) arch.synonymousArches 15 | ) 16 | ; 17 | synonyms = {} 18 | // (architectureHintsFor lib.arch.x86_64) 19 | ; 20 | in 21 | { 22 | arch = { 23 | select = 24 | system: 25 | let 26 | parsed' = lib.parseSystem system; 27 | parsed = 28 | if builtins.isNull parsed' 29 | then system 30 | else parsed'.arch 31 | ; 32 | found = lib.arch."${parsed}" or null; 33 | synonym = synonyms."${parsed}" or null; 34 | in 35 | if !builtins.isNull found then found else 36 | if !builtins.isNull synonym then synonym else 37 | throw "Unknown or unimplemented architecture '${parsed}'` in `arch`." 38 | ; 39 | } 40 | // (import ./aarch64.nix { inherit lib; }).arch 41 | // (import ./x86_64.nix { inherit lib; }).arch 42 | ; 43 | } 44 | -------------------------------------------------------------------------------- /lib/arch/x86_64.nix: -------------------------------------------------------------------------------- 1 | { lib }: 2 | 3 | let 4 | inherit (lib) 5 | bitShiftRight 6 | chars 7 | join 8 | mod 9 | numberToBytes 10 | optional 11 | optionals 12 | padBytesRight 13 | ; 14 | inherit (lib.arch.x86_64) 15 | registers 16 | prefix 17 | ; 18 | 19 | # Binary numbers 20 | b0100_0000 = 64; # 0x40 21 | b1100_0000 = 192; # 0xC0 22 | b100 = 4; 23 | b101 = 5; 24 | b110 = 6; 25 | b111 = 7; 26 | 27 | synonymousArches = 28 | [ 29 | "x86-64" 30 | "amd64" 31 | ] 32 | ; 33 | 34 | # The power of two length of an operand, in bytes. 35 | # NOTE: implementation is not great, but works. 36 | operandWidth = 37 | value: 38 | let 39 | bytesCount = lib.bytesCount (numberToBytes value); 40 | in 41 | # NOTE: Nix currently doesn't handle integer types bigger than 64 bit... 42 | # We are cheating a bit here by assuming this. 43 | if bytesCount >= 8 then 8 else 44 | if bytesCount >= 4 then 4 else 45 | if bytesCount >= 2 then 2 else 46 | 1 47 | ; 48 | in 49 | { 50 | arch = { 51 | x86_64 = { 52 | name = "x86_64"; 53 | inherit synonymousArches; 54 | ELF = { 55 | EM = lib.ELF.constants.EM_X86_64; 56 | bits = 64; 57 | }; 58 | 59 | # https://wiki.osdev.org/X86-64_Instruction_Encoding#Registers 60 | # NOTE: these valures are not necessarily sufficient to encode in opcode operands. 61 | # NOTE: extended registers may require using the R or B REX flags. 62 | registers = 63 | let 64 | bits = [ 8 16 32 64 ]; 65 | at = builtins.elemAt; 66 | # The four following lists are combined to form register names. 67 | _classic_8A = chars "acdbacdb"; 68 | _classic_8L = chars "llllhhhh"; 69 | _classic_16A = chars "acdbsbsd"; 70 | _classic_16L = chars "xxxxppii"; 71 | # The classic 8 bit registers can't neatly be named like the following ones. 72 | _classic_8 = 73 | builtins.genList 74 | (i: rec { 75 | name = "${at _classic_8A i}${at _classic_8L i}"; 76 | value = { 77 | width = 8; 78 | reg = i; 79 | extended = false; 80 | offset = i; 81 | isAX = value.offset == 0; 82 | }; 83 | }) 8 84 | ; 85 | # The 32 and 64 bit variant are prefixed. 86 | _classic_16P = [ (throw "(shouldn't happen)") "" "e" "r" ]; 87 | _classic_16 = 88 | width: 89 | builtins.genList 90 | (i: rec { 91 | name = "${at _classic_16P width}${at _classic_16A i}${at _classic_16L i}"; 92 | value = { 93 | width = at bits width; 94 | reg = i; 95 | extended = false; 96 | offset = i; 97 | isAX = value.offset == 0; 98 | }; 99 | }) 8 100 | ; 101 | # The 8, 16 and 32 bit variants are suffixed. 102 | _extended_S = [ "b" "w" "d" "" ]; 103 | _extended = 104 | width: 105 | builtins.genList (i: rec { 106 | name = "r${toString (value.reg)}${at _extended_S width}"; 107 | value = { 108 | width = at bits width; 109 | reg = i + 8; 110 | extended = true; 111 | offset = i; 112 | isAX = false; 113 | }; 114 | }) 8 115 | ; 116 | self = (builtins.listToAttrs ( 117 | builtins.concatLists [ 118 | (_classic_8) 119 | (_classic_16 1) # 16 bit 120 | (_classic_16 2) # 32 bit 121 | (_classic_16 3) # 64 bit 122 | (_extended 0) # 8 bit 123 | (_extended 1) # 16 bit 124 | (_extended 2) # 32 bit 125 | (_extended 3) # 64 bit 126 | ] 127 | )) // { 128 | spl = self.ah; 129 | bpl = self.ch; 130 | sil = self.dh; 131 | dil = self.bh; 132 | }; 133 | in 134 | self 135 | ; 136 | prefix = { 137 | # Table 2-4. REX Prefix Fields [BITS: 0100WRXB] 138 | REX = 139 | flags: 140 | let 141 | # Values 142 | flagValues = { 143 | W = 8; # 0b1000 → 64 bit operands 144 | R = 4; # 0b0100 → Extension of the **ModR/M reg field** 145 | X = 2; # 0b0010 → Extension of the SIB index field 146 | B = 1; # 0b0001 → Extension of the **ModR/M r/m field**, SIB base field, or Opcode reg field 147 | }; 148 | valueList = 149 | builtins.map ( 150 | char: 151 | flagValues."${char}" 152 | or (throw "REX flags invalid (used: '${flags}') the '${char}' flag was the first invalid one.") 153 | ) (chars flags) 154 | ; 155 | in 156 | b0100_0000 + ( 157 | builtins.foldl' (a: b: a + b) 0 (valueList) 158 | ) 159 | ; 160 | }; 161 | MODRM = { 162 | mod = { 163 | indirect = 0; # For pedantic completeness; [r/m] 164 | # NOTE: 01 and 10 not supported yet. 165 | direct = b1100_0000; # r/m 166 | }; 167 | }; 168 | instructions = 169 | let 170 | inherit (lib.arch.x86_64) 171 | MODRM 172 | ; 173 | in 174 | { 175 | MOV_reg = 176 | into: from: 177 | let 178 | into' = registers."${into}"; 179 | from' = registers."${from}"; 180 | rex_flags = [] 181 | ++ (optional (into'.width == 64) "W") 182 | ++ (optional (into'.extended) "B") 183 | ++ (optional (from'.extended) "R") 184 | ; 185 | rex_value = 186 | prefix.REX (join rex_flags) 187 | ; 188 | operand = [( 189 | (MODRM.mod.direct) 190 | + (into'.offset) 191 | + (from'.offset * 8) 192 | )]; 193 | opcode = [( 194 | if into'.width == 8 195 | then 136 # 0x88 196 | else 137 # 0x89 197 | )]; 198 | opcode_prefix = 199 | if into'.width == 16 then [ 102 /* 0x66 */ ] else 200 | [] 201 | ; 202 | in 203 | if into'.width != from'.width 204 | then throw "'MOV_reg ${into},${from} ...' used with different size operands (${toString into'.width},${toString from'.width})" 205 | else [] 206 | ++ opcode_prefix 207 | ++ (optional (rex_value != b0100_0000) rex_value) 208 | ++ opcode 209 | ++ operand 210 | ; 211 | 212 | MOV_imm = 213 | reg: value: 214 | let 215 | reg' = registers."${reg}"; 216 | rex_flags = [] 217 | ++ (optional (reg'.width == 64) "W") 218 | ++ (optional (reg'.extended) "B") 219 | ; 220 | rex_value = 221 | prefix.REX (join rex_flags) 222 | ; 223 | regLength = bitShiftRight reg'.width (4-1); 224 | valueLength = lib.bytesCount value; 225 | opcode = [( 226 | if reg'.width == 8 227 | then (176 + reg'.offset) # 0xB0 228 | else (184 + reg'.offset) # 0xB8 229 | )]; 230 | opcode_prefix = 231 | if reg'.width == 16 then [ 102 /* 0x66 */ ] else 232 | [] 233 | ; 234 | in 235 | if valueLength > regLength 236 | then throw "'MOV_imm ${reg} ...' used with immediate value too large. Expected at most ${toString regLength} bytes, got ${toString valueLength}" 237 | else [] 238 | ++ opcode_prefix 239 | ++ (optional (rex_value != b0100_0000) rex_value) 240 | ++ opcode 241 | ++ (padBytesRight regLength value) 242 | ; 243 | 244 | MOV_from_mem = 245 | into: from: 246 | let 247 | into' = registers."${into}"; 248 | from' = registers."${from}"; 249 | rex_flags = [] 250 | ++ (optional (into'.width == 64) "W") 251 | ++ (optional (from'.extended) "B") 252 | ++ (optional (into'.extended) "R") 253 | ; 254 | rex_value = 255 | prefix.REX (join rex_flags) 256 | ; 257 | opcode = [( 258 | if into'.width == 8 259 | then 138 # 0x8A 260 | else 139 # 0x8B 261 | )]; 262 | operand = 263 | ( 264 | if from'.offset == b101 265 | then (lib.bitShiftLeft 1 6) # 01.___.___ 266 | else MODRM.mod.indirect # 00.___.___ 267 | ) 268 | + (into'.offset * 8) 269 | + (from'.offset) 270 | ; 271 | additional_byte = 272 | if from'.offset == b100 273 | then [ 36 ] # SIB to 00.100.100; 0x24 274 | else 275 | if from'.offset == b101 # RBP/R13 276 | then [ 0 ] # Displacement of zero 277 | else null 278 | ; 279 | in 280 | 281 | if into'.width != 64 282 | then (throw "FIXME: MOV_from_mem only implements 64 bit operands at the moment.") else 283 | if into'.width != from'.width 284 | then (throw "'MOV_from_mem ${into},${from} ...' used with different size operands (${toString into'.width},${toString from'.width})") else 285 | 286 | (optional (rex_value != b0100_0000) rex_value) 287 | ++ opcode 288 | ++ [ operand ] 289 | ++ (optionals (additional_byte != null) additional_byte) 290 | ; 291 | 292 | # 0F 05 293 | syscall = [ 15 5 ]; 294 | 295 | # 296 | # Control flow 297 | # 298 | 299 | # Comparison 300 | CMP_imm = 301 | reg: value: 302 | let 303 | reg' = registers."${reg}"; 304 | rex_flags = [] 305 | ++ (optional (reg'.width == 64) "W") 306 | ++ (optional (reg'.extended) "B") 307 | ; 308 | rex_value = 309 | prefix.REX (join rex_flags) 310 | ; 311 | regLength = 312 | let val = bitShiftRight reg'.width (4-1); in 313 | # Fixup for imm32 on 64 bit registers 314 | if val == 8 then 4 else val 315 | ; 316 | valueLength = lib.bytesCount value; 317 | opcode = [( 318 | if reg'.isAX 319 | then ( 320 | 60 /* 0x3c */ 321 | + ( if reg'.width == 8 then 0 else 1 ) 322 | ) else ( 323 | 128 /* 0x80 */ 324 | + ( if reg'.width == 8 then 0 else 1 ) 325 | ) 326 | )]; 327 | operand = 328 | (MODRM.mod.direct) 329 | + (b111 * 8) # /7 330 | + (reg'.offset) 331 | ; 332 | opcode_prefix = 333 | if reg'.width == 16 then [ 102 /* 0x66 */ ] else 334 | [] 335 | ; 336 | in 337 | if valueLength > regLength 338 | then throw "'CMP_imm ${reg} ...' used with immediate value too large. Expected at most ${toString regLength} bytes, got ${toString valueLength}" 339 | else [] 340 | ++ opcode_prefix 341 | ++ (optional (rex_value != b0100_0000) rex_value) 342 | # NOTE: no imm64!!!! 343 | ++ opcode 344 | ++ (optional (!reg'.isAX) operand) 345 | ++ (padBytesRight regLength value) 346 | ; 347 | 348 | ADD_imm = 349 | reg: value: 350 | let 351 | reg' = registers."${reg}"; 352 | rex_flags = [] 353 | ++ (optional (reg'.width == 64) "W") 354 | ++ (optional (reg'.extended) "B") 355 | ; 356 | rex_value = 357 | prefix.REX (join rex_flags) 358 | ; 359 | regLength = 360 | let val = bitShiftRight reg'.width (4-1); in 361 | # Fixup for imm32 on 64 bit registers 362 | if val == 8 then 4 else val 363 | ; 364 | valueLength = lib.bytesCount value; 365 | opcode = [( 366 | if reg'.isAX 367 | then ( 368 | 4 /* 0x04 */ 369 | + ( if reg'.width == 8 then 0 else 1 ) 370 | ) else ( 371 | 128 /* 0x80 */ 372 | + ( if reg'.width == 8 then 0 else 1 ) 373 | ) 374 | )]; 375 | operand = 376 | (MODRM.mod.direct) 377 | + (0 * 8) # /0 378 | + (reg'.offset) 379 | ; 380 | opcode_prefix = 381 | if reg'.width == 16 then [ 102 /* 0x66 */ ] else 382 | [] 383 | ; 384 | in 385 | if valueLength > regLength 386 | then throw "'ADD_imm ${reg} ...' used with immediate value too large. Expected at most ${toString regLength} bytes, got ${toString valueLength}" 387 | else [] 388 | ++ opcode_prefix 389 | ++ (optional (rex_value != b0100_0000) rex_value) 390 | # NOTE: no imm64!!!! 391 | ++ opcode 392 | ++ (optional (!reg'.isAX) operand) 393 | ++ (padBytesRight regLength value) 394 | ; 395 | 396 | # Jump if equal; All are relative jumps. 397 | # (Conditional absolute jumps are not a thing in x86) 398 | JE = 399 | value: 400 | let 401 | byteWidth = operandWidth value; 402 | operandLength = 4; 403 | in 404 | [(lib.comment "x86_64: JE ${toString value}")] ++ 405 | ( 406 | if byteWidth <= 32 407 | then ( 408 | [ 15 132 ] # 0x0f 0x84; JE rel32 409 | ++ (padBytesRight operandLength (numberToBytes (value - operandLength))) 410 | ) 411 | else 412 | throw "64 bit operands not supported for JE." 413 | ) 414 | ; 415 | 416 | # Jump if not equal; All are relative jumps. 417 | # (Conditional absolute jumps are not a thing in x86) 418 | JNE = 419 | value: 420 | let 421 | byteWidth = operandWidth value; 422 | operandLength = 4; 423 | in 424 | [(lib.comment "x86_64: JNE ${toString value}")] ++ 425 | ( 426 | if byteWidth < 32 427 | then ( 428 | [ 15 133 ] # 0x0f 0x85; JNE rel32 429 | ++ (padBytesRight operandLength (numberToBytes (value - operandLength))) 430 | ) 431 | else 432 | throw "64 bit operands not supported for JNE." 433 | ) 434 | ; 435 | } 436 | ; 437 | }; 438 | }; 439 | } 440 | -------------------------------------------------------------------------------- /lib/attrs.nix: -------------------------------------------------------------------------------- 1 | { lib }: 2 | 3 | rec { 4 | merge = a: b: a // b; 5 | concatAttrs = 6 | list: 7 | builtins.foldl' merge {} list 8 | ; 9 | } 10 | -------------------------------------------------------------------------------- /lib/binary.nix: -------------------------------------------------------------------------------- 1 | { lib }: 2 | 3 | let 4 | tarnix = import ((builtins.fetchTarball { 5 | url = "https://github.com/puckipedia/tarnix/archive/140d8d6f9e4d566213d00edff04ac22c2e0c2590.tar.gz"; 6 | sha256 = "sha256:0ck9y6dqn45gi28agijfmbhdhdrk7xq37qlxg895xv1l5342p9fn"; 7 | }) + "/ustar.nix"); 8 | in 9 | rec 10 | { 11 | mkBinary = 12 | { 13 | name, 14 | bytes, 15 | executable ? false, 16 | }: 17 | let 18 | tar = builtins.toFile "${name}.tar" (with tarnix; 19 | makeTar [ 20 | (file name executable bytes) 21 | ] 22 | ); 23 | in 24 | derivation { 25 | inherit name; 26 | channelName = name; 27 | system = "(${name})"; 28 | builder = "builtin:unpack-channel"; 29 | src = "${tar}"; 30 | } 31 | ; 32 | 33 | # https://en.wikipedia.org/wiki/Basic_Latin_(Unicode_block) 34 | basicLatinChars = 35 | " !" + ''"'' + "#$%&'()*+,-./" + 36 | "0123456789" + 37 | ":;<=>%@" + 38 | "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + 39 | "[\\]^_`" + 40 | "abcdefghijklmnopqrstuvwxyz" + 41 | "{|}~" 42 | ; 43 | 44 | basicLatinToBytes = 45 | let 46 | space = 32; 47 | tilde = 126; 48 | lut = (builtins.listToAttrs ( 49 | builtins.genList (i: { 50 | value = i + space; 51 | name = builtins.substring i 1 basicLatinChars; 52 | }) (tilde - space) 53 | )) // { 54 | "\n" = 10; 55 | }; 56 | in 57 | str: 58 | builtins.map 59 | (x: lut."${x}") 60 | (lib.chars str) 61 | ; 62 | 63 | # LSB only 64 | bytesUnsignedToNumber = 65 | bytes: 66 | if ((builtins.filter (byte: byte > 256) bytes) != []) 67 | then throw "bytesUnsignedToNumber called with overflowed byte value (>= 256)" 68 | else 69 | ( 70 | builtins.foldl' 71 | (state: byte: { 72 | offset = state.offset + 1; 73 | value = state.value 74 | + (lib.bitShiftLeft byte (8*state.offset)) 75 | ; 76 | }) 77 | { value = 0; offset = 0; } 78 | bytes 79 | ).value 80 | ; 81 | 82 | # NOTE: range: 9223372036854775807 to -9223372036854775808 83 | # NOTE: order is LSB first. 84 | numberToBytes = 85 | value: 86 | if value < 0 87 | then (throw "numberToBytes given a negative argument. Not supported yet.") 88 | else 89 | let 90 | splitBytes = 91 | { value, coll ? [] }: 92 | if value < 256 93 | then coll ++ [ value ] 94 | else (splitBytes { 95 | value = value / 256; 96 | coll = coll ++ [ (value - (value / 256 * 256)) ]; 97 | }) 98 | ; 99 | in 100 | splitBytes { inherit value; } 101 | ; 102 | padBytesLeft = 103 | amount: bytes: 104 | let 105 | len = lib.bytesCount bytes; 106 | todo = 107 | if len <= amount 108 | then amount - len 109 | else throw "padBytesLeft for ${toString amount} bytes called with list ${toString len} long." 110 | ; 111 | in 112 | (builtins.genList (x: 0) todo) ++ bytes 113 | ; 114 | padBytesRight = 115 | amount: bytes: 116 | let 117 | len = lib.bytesCount bytes; 118 | todo = 119 | if len <= amount 120 | then amount - len 121 | else throw "padBytesRight for ${toString amount} bytes called with list ${toString len} long." 122 | ; 123 | in 124 | bytes ++ (builtins.genList (x: 0) todo) 125 | ; 126 | 127 | twosComplement = 128 | width: value: 129 | if value >= 0 130 | then padBytesRight width (numberToBytes value) 131 | else 132 | lib.mapWithIndex 133 | (i: byte: 134 | builtins.bitOr # (then) Set the sign bit 135 | (if i == (width - 1) then lib.b10000000 else 0) 136 | (builtins.bitXor lib.b11111111 byte) # (first) Invert every bit 137 | ) 138 | (padBytesRight width (numberToBytes ((lib.abs value)-1))) 139 | ; 140 | 141 | getAlignedLength = 142 | alignment: length: 143 | if alignment == 0 144 | then length 145 | else 146 | if length <= 0 147 | then 0 148 | else 149 | length + 150 | (alignment - (lib.mod (length - 1) alignment) - 1) 151 | ; 152 | 153 | padToAlignment = 154 | alignment: bytes: 155 | let 156 | length = builtins.length bytes; 157 | alignedLength = getAlignedLength alignment length; 158 | padding = alignedLength - length; 159 | in 160 | bytes ++ 161 | (builtins.genList (_: 0) padding) 162 | ; 163 | } 164 | -------------------------------------------------------------------------------- /lib/bits.nix: -------------------------------------------------------------------------------- 1 | { lib }: 2 | 3 | # This file presents aliases for the common useful bit patterns. 4 | { 5 | # Single bit set. 6 | b00000001 = 1; # 0x1 7 | b00000010 = 2; # 0x2 8 | b00000100 = 4; # 0x4 9 | b00001000 = 8; # 0x8 10 | b00010000 = 16; # 0x10 11 | b00100000 = 32; # 0x20 12 | b01000000 = 64; # 0x40 13 | b10000000 = 128; # 0x80 14 | b00000001_00000000 = 256; # 0x100 15 | b00000010_00000000 = 512; # 0x200 16 | b00000100_00000000 = 1024; # 0x400 17 | b00001000_00000000 = 2048; # 0x800 18 | b00010000_00000000 = 4096; # 0x1000 19 | b00100000_00000000 = 8192; # 0x2000 20 | b01000000_00000000 = 16384; # 0x4000 21 | b10000000_00000000 = 32768; # 0x8000 22 | b00000001_00000000_00000000 = 65536; # 0x10000 23 | b00000010_00000000_00000000 = 131072; # 0x20000 24 | b00000100_00000000_00000000 = 262144; # 0x40000 25 | b00001000_00000000_00000000 = 524288; # 0x80000 26 | b00010000_00000000_00000000 = 1048576; # 0x100000 27 | b00100000_00000000_00000000 = 2097152; # 0x200000 28 | b01000000_00000000_00000000 = 4194304; # 0x400000 29 | b10000000_00000000_00000000 = 8388608; # 0x800000 30 | b00000001_00000000_00000000_00000000 = 16777216; # 0x1000000 31 | b00000010_00000000_00000000_00000000 = 33554432; # 0x2000000 32 | b00000100_00000000_00000000_00000000 = 67108864; # 0x4000000 33 | b00001000_00000000_00000000_00000000 = 134217728; # 0x8000000 34 | b00010000_00000000_00000000_00000000 = 268435456; # 0x10000000 35 | b00100000_00000000_00000000_00000000 = 536870912; # 0x20000000 36 | b01000000_00000000_00000000_00000000 = 1073741824; # 0x40000000 37 | b10000000_00000000_00000000_00000000 = 2147483648; # 0x80000000 38 | 39 | # All bits set. 40 | b11111111 = 255; # 0xff 41 | b11111111_11111111 = 65535; # 0xffff 42 | b11111111_11111111_11111111_11111111 = 4294967295; # 0xffffffff 43 | # ... except for 64 bit (not representable) 44 | b01111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111 45 | = 9223372036854775807; # 0x7fffffffffffffff 46 | } 47 | -------------------------------------------------------------------------------- /lib/buildProgram.nix: -------------------------------------------------------------------------------- 1 | { lib, ... }: 2 | 3 | let 4 | inherit (lib) 5 | mkBinary 6 | mkElf 7 | ; 8 | in 9 | { 10 | # 11 | # Wrapper to streamline building a program. 12 | # 13 | # The `code` argument takes the usual list of bytes, or function returning one. 14 | # Using the DSL helpers to produce the list of bytes is expected. 15 | # Data can be provided with either a list of bytes with `data`, or a `mkCStrings` input with `strings`. 16 | # 17 | # An helper function (`getString`) can be used to get the attrset for the string, 18 | # augmented with the `addr` for the string, when `strings` is provided. 19 | # It takes the attrname of the string as a parameter (or the full string, when using a list for `mkCStrings`). 20 | # 21 | buildProgram = 22 | { name # Name of the program, and the binary 23 | , arch # Architecture for the program 24 | , code 25 | , data ? null 26 | , strings ? null 27 | }: 28 | 29 | if data != null && strings != null 30 | then throw "Calling buildProgram with both `data` and `strings` is invalid." 31 | else 32 | let data' = data; in # Break reference cycle 33 | let strings' = strings; in # Break reference cycle 34 | let 35 | strings = if strings' != null then lib.cstrings.mkCStrings strings' else null; 36 | data = 37 | if !builtins.isNull strings 38 | then strings.bytes 39 | else data' 40 | ; 41 | elf = mkElf { 42 | inherit arch; 43 | sections = 44 | { mkElfSection }: 45 | [ 46 | (mkElfSection { 47 | name = ".text"; 48 | type = "SHT_PROGBITS"; 49 | bytes = 50 | if builtins.isFunction code && !builtins.isNull strings 51 | then (args: code (args // { 52 | getString = 53 | name: 54 | { 55 | addr = (args.sections.".rodata".addr or 0) + strings.offsets."${name}"; 56 | offset = strings.offsets."${name}"; 57 | length = builtins.stringLength strings.strings."${name}"; 58 | } 59 | ; 60 | })) 61 | else code 62 | ; 63 | }) 64 | ] 65 | ++ 66 | (lib.optional (data != null) 67 | (mkElfSection { 68 | name = ".rodata"; 69 | type = "SHT_PROGBITS"; 70 | bytes = data; 71 | }) 72 | ) 73 | ; 74 | }; 75 | in 76 | (mkBinary { 77 | inherit name; 78 | inherit (elf) bytes; 79 | executable = true; 80 | }) // { 81 | inherit elf; 82 | inherit code; 83 | inherit strings; 84 | } 85 | ; 86 | } 87 | -------------------------------------------------------------------------------- /lib/ctypes.nix: -------------------------------------------------------------------------------- 1 | { lib }: 2 | 3 | let 4 | inherit (lib) 5 | abs 6 | mod 7 | padBytesRight 8 | numberToBytes 9 | collapseSpaces 10 | ; 11 | in 12 | 13 | # NOTE: All types are LSB right now. 14 | # It's fine for the moment. 15 | # All current and planned relevant platforms are LSB. 16 | # - x86 17 | # - x86_64 18 | # - armv7l 19 | # - aarch64 20 | # - riscv64 21 | { 22 | ctypes = rec { 23 | mkUint = 24 | size: 25 | if size <= 0 26 | then (throw "mkUint called with a zero or negative bits width (${toString size} <= 0)") 27 | else 28 | let bytes_count = size / 8; in 29 | if (mod size 8) != 0 30 | then (throw "mkUint called with a non-byte-aligned bits width (${toString size} % 8 != 0)") 31 | else 32 | let 33 | max = 34 | (builtins.foldl' builtins.add 0 (builtins.genList (i: lib.bitShiftLeft 1 i) (size))) 35 | ; 36 | min = 0; 37 | # can't check 64 bit values for over/underflow. 38 | doCheck = size < 64; 39 | in 40 | value: 41 | if doCheck && value > max 42 | then (throw "Unsigned value ${toString value} is bigger than the unsigned ${toString size} bits type allows.") 43 | else 44 | if doCheck && value < min 45 | then (throw "Unsigned value ${toString value} is smaller than the unsigned ${toString size} bits type allows.") 46 | else 47 | padBytesRight bytes_count (numberToBytes value) 48 | ; 49 | toUint8 = mkUint 8; 50 | toUint16 = mkUint 16; 51 | toUint32 = mkUint 32; 52 | # NOTE: Nix uses signed 64 bit integers. The most significant bit will cause issues when representing large values. 53 | toUint64 = mkUint 64; 54 | 55 | mkInt = 56 | size: 57 | if size <= 0 58 | then (throw "mkInt called with a zero or negative bits width (${toString size} <= 0)") 59 | else 60 | let bytes_count = size / 8; in 61 | if (mod size 8) != 0 62 | then (throw "mkInt called with a non-byte-aligned bits width (${toString size} % 8 != 0)") 63 | else 64 | let 65 | max = 66 | (builtins.foldl' builtins.add 0 (builtins.genList (i: lib.bitShiftLeft 1 i) (size - 1))) 67 | ; 68 | min = max * (-1) - 1; 69 | # can't check 64 bit values for over/underflow. 70 | doCheck = size < 64; 71 | in 72 | value: 73 | if doCheck && value > max 74 | then (throw "Signed value ${toString value} is bigger than the signed ${toString size} bits type allows.") 75 | else 76 | if doCheck && value < min 77 | then (throw "Signed value ${toString value} is smaller than the signed ${toString size} bits type allows.") 78 | else (lib.twosComplement bytes_count value) 79 | ; 80 | 81 | toInt8 = mkInt 8; 82 | toInt16 = mkInt 16; 83 | toInt32 = mkInt 32; 84 | toInt64 = mkInt 64; 85 | 86 | # https://en.cppreference.com/w/cpp/language/types#Data_models 87 | dataModels = { 88 | _all = { 89 | # https://pubs.opengroup.org/onlinepubs/9799919799/basedefs/stdint.h.html 90 | # stdint 91 | int8_t = toInt8; 92 | int16_t = toInt16; 93 | int32_t = toInt32; 94 | int64_t = toInt64; 95 | uint8_t = toUint8; 96 | uint16_t = toUint16; 97 | uint32_t = toUint32; 98 | uint64_t = toUint64; 99 | }; 100 | ILP32 = throw "TODO: ILP32 data model"; 101 | LP64 = dataModels._all // { 102 | "signed char" = toInt8; 103 | "unsigned char" = toUint8; 104 | 105 | "short" = toInt16; 106 | "short int" = toInt16; 107 | "signed short" = toInt16; 108 | "signed short int" = toInt16; 109 | "unsigned short" = toUint16; 110 | "unsigned short int" = toUint16; 111 | 112 | "int" = toInt32; 113 | "signed" = toInt32; 114 | "signed int" = toInt32; 115 | "unsigned" = toUint32; 116 | "unsigned int" = toUint32; 117 | 118 | "long" = toInt64; 119 | "long int" = toInt64; 120 | "signed long" = toInt64; 121 | "signed long int" = toInt64; 122 | "unsigned long" = toUint64; 123 | "unsigned long int" = toUint64; 124 | 125 | "long long" = toInt64; 126 | "long long int" = toInt64; 127 | "signed long long" = toInt64; 128 | "signed long long int" = toInt64; 129 | "unsigned long long" = toUint64; 130 | "unsigned long long int" = toUint64; 131 | 132 | # 133 | # 134 | # 135 | 136 | # For 64 bit 137 | "intptr_t" = dataModels.LP64."long int"; 138 | "uintptr_t" = dataModels.LP64."unsigned long int"; 139 | "ptrdiff_t" = dataModels.LP64.uintptr_t; 140 | }; 141 | }; 142 | parseDecl = 143 | dataModel: 144 | decl: 145 | let 146 | fields = 147 | builtins.match "^([^*]*)([* ]+)([^ ]+)$" ( 148 | # Collapse all spaces 149 | collapseSpaces ( 150 | # Make sure pointers are well identifiable 151 | builtins.replaceStrings [ "*" ] [ " * " ] decl 152 | ) 153 | ) 154 | ; 155 | in 156 | if fields == null 157 | then (throw "Could not parse declaration '${decl}'.") 158 | else 159 | let 160 | dataModel' = 161 | if builtins.isString dataModel 162 | then dataModels."${dataModel}" 163 | else dataModel 164 | ; 165 | type = collapseSpaces (builtins.elemAt fields 0); 166 | pointer'= collapseSpaces (builtins.elemAt fields 1); 167 | name = collapseSpaces (builtins.elemAt fields 2); 168 | pointer = pointer' == "*"; 169 | # NOTE: Pointers are converted as uintptr_t, it's only for machine code interfacing. 170 | # This is not producing an API. 171 | convert = 172 | if pointer 173 | then dataModel'."uintptr_t" 174 | else 175 | dataModel'."${type}" 176 | or (throw "No type found for '${type}'") 177 | ; 178 | in 179 | { 180 | inherit 181 | type 182 | name 183 | pointer 184 | convert 185 | ; 186 | } 187 | ; 188 | }; 189 | cstrings = { 190 | # Given an attrset of `{ $name = "string"; }`, 191 | # returns an attrset with the following shape: 192 | # ``` 193 | # { 194 | # bytes = [ /*bytes*/ ]; # NUL-terminated strings concatenated together 195 | # offsets = { $name = offset; }; # Offset zero-indexed from the start 196 | # strings = {/* orginal input */}; # The original input 197 | # } 198 | # ``` 199 | # Note that list of bytes are also accepted as inputs, in addition to strings. 200 | # This enables involving more complex strings than possible with basicLatinToBytes (by i.e. pre-processing them). 201 | # 202 | # The attribute names are explicitly lexicographically sorted. 203 | # 204 | # Given a list of strings, it is transformed to an attrset keyed by the string values. 205 | mkCStrings = 206 | strings': 207 | let 208 | strings = 209 | if builtins.isList strings' 210 | then (lib.listToAttrsStrings strings') 211 | else if builtins.isAttrs strings' 212 | then strings' 213 | else (builtins.throw "Argument to cstrings.mkCStrings not a list or an attrset (was ${builtins.typeOf strings'}).") 214 | ; 215 | data = 216 | builtins.foldl' 217 | (prev: name: 218 | let 219 | value = strings."${name}"; 220 | in 221 | { 222 | offsets = prev.offsets // { 223 | "${name}" = lib.bytesCount prev.bytes; 224 | }; 225 | bytes = prev.bytes 226 | ++ ( 227 | if builtins.isString value 228 | then (lib.basicLatinToBytes value) 229 | else if builtins.isList value 230 | then value 231 | else (throw "Element of unexpected type (${builtins.typeOf value}) for ${name} given to mkCStrings") 232 | ) 233 | ++ [ 0 ] 234 | ; 235 | } 236 | ) 237 | { offsets = {}; bytes = []; } 238 | (builtins.sort builtins.lessThan (builtins.attrNames strings)) 239 | ; 240 | length = lib.bytesCount data.bytes; 241 | in 242 | { 243 | inherit (data) 244 | offsets 245 | bytes 246 | ; 247 | inherit 248 | length 249 | strings 250 | ; 251 | } 252 | ; 253 | }; 254 | } 255 | -------------------------------------------------------------------------------- /lib/default.nix: -------------------------------------------------------------------------------- 1 | let 2 | inherit (import ./attrs.nix { lib = {}; }) 3 | concatAttrs 4 | ; 5 | lib = 6 | (concatAttrs [ 7 | # Nix 8 | (import ./attrs.nix { inherit lib; }) 9 | (import ./lists.nix { inherit lib; }) 10 | (import ./bits.nix { inherit lib; }) 11 | (import ./maths.nix { inherit lib; }) 12 | (import ./strings.nix { inherit lib; }) 13 | (import ./functions.nix { inherit lib; }) 14 | (import ./nix.nix { inherit lib; }) 15 | 16 | # This nonsense 17 | (import ./binary.nix { inherit lib; }) 18 | (import ./dsl.nix { inherit lib; }) 19 | (import ./ctypes.nix { inherit lib; }) 20 | (import ./elf { inherit lib; }) 21 | (import ./arch { inherit lib; }) 22 | (import ./linux { inherit lib; }) 23 | (import ./buildProgram.nix { inherit lib; }) 24 | ]) 25 | ; 26 | in 27 | lib 28 | -------------------------------------------------------------------------------- /lib/dsl.nix: -------------------------------------------------------------------------------- 1 | { lib }: 2 | 3 | { 4 | comment = 5 | comment: 6 | { 7 | __dsl = true; 8 | __type = "comment"; 9 | inherit comment; 10 | } 11 | ; 12 | # Only strip 13 | stripComments = 14 | builtins.filter 15 | (el: !builtins.isList el && !(el.__dsl or false) && el.__type or "" != "comment") 16 | ; 17 | 18 | bytesCount = 19 | bytes: 20 | (builtins.length (lib.stripComments bytes)) 21 | ; 22 | } 23 | -------------------------------------------------------------------------------- /lib/elf/constants.nix: -------------------------------------------------------------------------------- 1 | # NOTE: no `lib`, only numeric (and list) constants here. 2 | { 3 | # Identification 4 | EI_MAG = [ 5 | 127 69 76 70 # | 7f 45 4c 46 | .ELF 6 | ]; 7 | EI_CLASS = { 8 | ELFCLASSNONE = 0; 9 | ELFCLASS32 = 1; 10 | ELFCLASS64 = 2; 11 | }; 12 | EI_DATA = { 13 | ELFDATANONE = 0; # Invalid data encoding 14 | ELFDATA2LSB = 1; # Encoding ELFDATA2LSB specifies 2's complement values, with the least significant byte occupying the lowest address. 15 | ELFDATA2MSB = 2; # Encoding ELFDATA2MSB specifies 2's complement values, with the most significant byte occupying the lowest address. 16 | }; 17 | EI_VERSION = { 18 | EV_NONE = 0; 19 | EV_CURRENT = 1; 20 | }; 21 | EI_OSABI = { 22 | ELFOSABI_NONE = 0; # No extensions or unspecified 23 | ELFOSABI_HPUX = 1; # Hewlett-Packard HP-UX 24 | ELFOSABI_NETBSD = 2; # NetBSD 25 | ELFOSABI_LINUX = 3; # Linux 26 | ELFOSABI_SOLARIS = 6; # Sun Solaris 27 | ELFOSABI_AIX = 7; # AIX 28 | ELFOSABI_IRIX = 8; # IRIX 29 | ELFOSABI_FREEBSD = 9; # FreeBSD 30 | ELFOSABI_TRU64 = 10; # Compaq TRU64 UNIX 31 | ELFOSABI_MODESTO = 11; # Novell Modesto 32 | ELFOSABI_OPENBSD = 12; # Open BSD 33 | ELFOSABI_OPENVMS = 13; # Open VMS 34 | ELFOSABI_NSK = 14; # Hewlett-Packard Non-Stop Kernel 35 | # 64-255 # Architecture-specific value range 36 | }; 37 | EI_ABIVERSION = { 38 | unspecified = 0; 39 | }; 40 | # Computed, but trivial. 41 | EI_PAD = builtins.genList (_: 0) (16 - 9); 42 | 43 | # Type 44 | ET_NONE = 0; # No file type 45 | ET_REL = 1; # Relocatable file 46 | ET_EXEC = 2; # Executable file 47 | ET_DYN = 3; # Shared object file 48 | ET_CORE = 4; # Core file 49 | #ET_LOOS = 0xfe00; # Operating system-specific 50 | #ET_HIOS = 0xfeff; # Operating system-specific 51 | #ET_LOPROC = 0xff00; # Processor-specific 52 | #ET_HIPROC = 0xffff; # Processor-specific 53 | 54 | # Machine 55 | EM_NONE = 0; # No machine 56 | EM_M32 = 1; # AT&T WE 32100 57 | EM_SPARC = 2; # SPARC 58 | EM_386 = 3; # Intel 80386 59 | EM_68K = 4; # Motorola 68000 60 | EM_88K = 5; # Motorola 88000 61 | EM_860 = 7; # Intel 80860 62 | EM_MIPS = 8; # MIPS I Architecture 63 | EM_S370 = 9; # IBM System/370 Processor 64 | EM_MIPS_RS3_LE = 10; # MIPS RS3000 Little-endian 65 | EM_PARISC = 15; # Hewlett-Packard PA-RISC 66 | EM_VPP500 = 17; # Fujitsu VPP500 67 | EM_SPARC32PLUS = 18; # Enhanced instruction set SPARC 68 | EM_960 = 19; # Intel 80960 69 | EM_PPC = 20; # PowerPC 70 | EM_PPC64 = 21; # 64-bit PowerPC 71 | EM_S390 = 22; # IBM System/390 Processor 72 | EM_V800 = 36; # NEC V800 73 | EM_FR20 = 37; # Fujitsu FR20 74 | EM_RH32 = 38; # TRW RH-32 75 | EM_RCE = 39; # Motorola RCE 76 | EM_ARM = 40; # Advanced RISC Machines ARM 77 | EM_ALPHA = 41; # Digital Alpha 78 | EM_SH = 42; # Hitachi SH 79 | EM_SPARCV9 = 43; # SPARC Version 9 80 | EM_TRICORE = 44; # Siemens TriCore embedded processor 81 | EM_ARC = 45; # Argonaut RISC Core, Argonaut Technologies Inc. 82 | EM_H8_300 = 46; # Hitachi H8/300 83 | EM_H8_300H = 47; # Hitachi H8/300H 84 | EM_H8S = 48; # Hitachi H8S 85 | EM_H8_500 = 49; # Hitachi H8/500 86 | EM_IA_64 = 50; # Intel IA-64 processor architecture 87 | EM_MIPS_X = 51; # Stanford MIPS-X 88 | EM_COLDFIRE = 52; # Motorola ColdFire 89 | EM_68HC12 = 53; # Motorola M68HC12 90 | EM_MMA = 54; # Fujitsu MMA Multimedia Accelerator 91 | EM_PCP = 55; # Siemens PCP 92 | EM_NCPU = 56; # Sony nCPU embedded RISC processor 93 | EM_NDR1 = 57; # Denso NDR1 microprocessor 94 | EM_STARCORE = 58; # Motorola Star*Core processor 95 | EM_ME16 = 59; # Toyota ME16 processor 96 | EM_ST100 = 60; # STMicroelectronics ST100 processor 97 | EM_TINYJ = 61; # Advanced Logic Corp. TinyJ embedded processor family 98 | EM_X86_64 = 62; # AMD x86-64 architecture 99 | EM_PDSP = 63; # Sony DSP Processor 100 | EM_PDP10 = 64; # Digital Equipment Corp. PDP-10 101 | EM_PDP11 = 65; # Digital Equipment Corp. PDP-11 102 | EM_FX66 = 66; # Siemens FX66 microcontroller 103 | EM_ST9PLUS = 67; # STMicroelectronics ST9+ 8/16 bit microcontroller 104 | EM_ST7 = 68; # STMicroelectronics ST7 8-bit microcontroller 105 | EM_68HC16 = 69; # Motorola MC68HC16 Microcontroller 106 | EM_68HC11 = 70; # Motorola MC68HC11 Microcontroller 107 | EM_68HC08 = 71; # Motorola MC68HC08 Microcontroller 108 | EM_68HC05 = 72; # Motorola MC68HC05 Microcontroller 109 | EM_SVX = 73; # Silicon Graphics SVx 110 | EM_ST19 = 74; # STMicroelectronics ST19 8-bit microcontroller 111 | EM_VAX = 75; # Digital VAX 112 | EM_CRIS = 76; # Axis Communications 32-bit embedded processor 113 | EM_JAVELIN = 77; # Infineon Technologies 32-bit embedded processor 114 | EM_FIREPATH = 78; # Element 14 64-bit DSP Processor 115 | EM_ZSP = 79; # LSI Logic 16-bit DSP Processor 116 | EM_MMIX = 80; # Donald Knuth's educational 64-bit processor 117 | EM_HUANY = 81; # Harvard University machine-independent object files 118 | EM_PRISM = 82; # SiTera Prism 119 | EM_AVR = 83; # Atmel AVR 8-bit microcontroller 120 | EM_FR30 = 84; # Fujitsu FR30 121 | EM_D10V = 85; # Mitsubishi D10V 122 | EM_D30V = 86; # Mitsubishi D30V 123 | EM_V850 = 87; # NEC v850 124 | EM_M32R = 88; # Mitsubishi M32R 125 | EM_MN10300 = 89; # Matsushita MN10300 126 | EM_MN10200 = 90; # Matsushita MN10200 127 | EM_PJ = 91; # picoJava 128 | EM_OPENRISC = 92; # OpenRISC 32-bit embedded processor 129 | EM_ARC_A5 = 93; # ARC Cores Tangent-A5 130 | EM_XTENSA = 94; # Tensilica Xtensa Architecture 131 | EM_VIDEOCORE = 95; # Alphamosaic VideoCore processor 132 | EM_TMM_GPP = 96; # Thompson Multimedia General Purpose Processor 133 | EM_NS32K = 97; # National Semiconductor 32000 series 134 | EM_TPC = 98; # Tenor Network TPC processor 135 | EM_SNP1K = 99; # Trebia SNP 1000 processor 136 | EM_ST200 = 100; # STMicroelectronics (www.st.com) ST200 microcontroller 137 | EM_IP2K = 101; # Ubicom IP2022 micro controller 138 | EM_MAX = 102; # MAX Processor 139 | EM_CR = 103; # National Semiconductor CompactRISC 140 | EM_F2MC16 = 104; # Fujitsu F2MC16 141 | EM_MSP430 = 105; # TI msp430 micro controller 142 | EM_BLACKFIN = 106; # ADI Blackfin 143 | EM_SE_C33 = 107; # S1C33 Family of Seiko Epson processors 144 | EM_SEP = 108; # Sharp embedded microprocessor 145 | EM_ARCA = 109; # Arca RISC Microprocessor 146 | EM_UNICORE = 110; # Microprocessor series from PKU-Unity Ltd. and MPRC of Peking University 147 | EM_EXCESS = 111; # eXcess: 16/32/64-bit configurable embedded CPU 148 | EM_DXP = 112; # Icera Semiconductor Inc. Deep Execution Processor 149 | EM_ALTERA_NIOS2 = 113; # Altera Nios II soft-core processor 150 | EM_CRX = 114; # National Semiconductor CRX 151 | EM_XGATE = 115; # Motorola XGATE embedded processor 152 | EM_C166 = 116; # Infineon C16x/XC16x processor 153 | EM_M16C = 117; # Renesas M16C series microprocessors 154 | EM_DSPIC30F = 118; # Microchip Technology dsPIC30F Digital Signal Controller 155 | EM_CE = 119; # Freescale Communication Engine RISC core 156 | EM_M32C = 120; # Renesas M32C series microprocessors 157 | EM_TSK3000 = 131; # Altium TSK3000 core 158 | EM_RS08 = 132; # Freescale RS08 embedded processor 159 | EM_ECOG2 = 134; # Cyan Technology eCOG2 microprocessor 160 | EM_SCORE = 135; # Sunplus Score 161 | EM_SCORE7 = 135; # Sunplus S+core7 RISC processor 162 | EM_DSP24 = 136; # New Japan Radio (NJR) 24-bit DSP Processor 163 | EM_VIDEOCORE3 = 137; # Broadcom VideoCore III processor 164 | EM_LATTICEMICO32 = 138; # RISC processor for Lattice FPGA architecture 165 | EM_SE_C17 = 139; # Seiko Epson C17 family 166 | EM_TI_C6000 = 140; # Texas Instruments TMS320C6000 DSP family 167 | EM_TI_C2000 = 141; # Texas Instruments TMS320C2000 DSP family 168 | EM_TI_C5500 = 142; # Texas Instruments TMS320C55x DSP family 169 | EM_MMDSP_PLUS = 160; # STMicroelectronics 64bit VLIW Data Signal Processor 170 | EM_CYPRESS_M8C = 161; # Cypress M8C microprocessor 171 | EM_R32C = 162; # Renesas R32C series microprocessors 172 | EM_TRIMEDIA = 163; # NXP Semiconductors TriMedia architecture family 173 | EM_QDSP6 = 164; # QUALCOMM DSP6 Processor 174 | EM_8051 = 165; # Intel 8051 and variants 175 | EM_STXP7X = 166; # STMicroelectronics STxP7x family 176 | EM_NDS32 = 167; # Andes Technology compact code size embedded RISC processor family 177 | EM_ECOG1 = 168; # Cyan Technology eCOG1X family 178 | EM_ECOG1X = 168; # Cyan Technology eCOG1X family 179 | EM_MAXQ30 = 169; # Dallas Semiconductor MAXQ30 Core Micro-controllers 180 | EM_XIMO16 = 170; # New Japan Radio (NJR) 16-bit DSP Processor 181 | EM_MANIK = 171; # M2000 Reconfigurable RISC Microprocessor 182 | EM_CRAYNV2 = 172; # Cray Inc. NV2 vector architecture 183 | EM_RX = 173; # Renesas RX family 184 | EM_METAG = 174; # Imagination Technologies Meta processor architecture 185 | EM_MCST_ELBRUS = 175; # MCST Elbrus general purpose hardware architecture 186 | EM_ECOG16 = 176; # Cyan Technology eCOG16 family 187 | EM_CR16 = 177; # National Semiconductor CompactRISC 16-bit processor 188 | EM_ETPU = 178; # Freescale Extended Time Processing Unit 189 | EM_SLE9X = 179; # Infineon Technologies SLE9X core 190 | EM_L1OM = 180; # Intel L1OM 191 | EM_K1OM = 181; # Intel K1OM 192 | EM_AARCH64 = 183; # ARM 64-bit architecture 193 | EM_AVR32 = 185; # Atmel Corporation 32-bit microprocessor family 194 | EM_STM8 = 186; # STMicroeletronics STM8 8-bit microcontroller 195 | EM_TILE64 = 187; # Tilera TILE64 multicore architecture family 196 | EM_TILEPRO = 188; # Tilera TILEPro multicore architecture family 197 | EM_MICROBLAZE = 189; # Xilinx MicroBlaze 32-bit RISC soft processor core 198 | EM_CUDA = 190; # NVIDIA CUDA architecture 199 | EM_TILEGX = 191; # Tilera TILE-Gx multicore architecture family 200 | EM_RL78 = 197; # Renesas RL78 family. 201 | EM_78K0R = 199; # Renesas 78K0R. 202 | 203 | # Version 204 | EV_NONE = 0; 205 | EV_CURRENT = 1; 206 | 207 | ELF_PHDR = { 208 | # p_type 209 | PT_NULL = 0; # Unused 210 | PT_LOAD = 1; # Loadable segment (i.e. an executable) 211 | PT_DYNAMIC = 2; # Dynamic linking information 212 | PT_INTERP = 3; # Pathname of interpreter 213 | PT_NOTE = 4; # Auxiliary information 214 | PT_SHLIB = 5; # (not used) 215 | PT_PHDR = 6; # Location of the program header 216 | PT_TLS = 7; # Thread local storage 217 | #PT_LOOS = 0x60000000; 218 | #PT_HIOS = 0x6fffffff; 219 | #PT_LOPROC = 0x70000000; 220 | #PT_HIPROC = 0x7fffffff; 221 | # p_flags 222 | PF_X = 1; # Execute 223 | PF_W = 2; # Write 224 | PF_R = 4; # Read 225 | #PF_MASKOS = 0x0ff00000 # Unspecified 226 | #PF_MASKPROC = 0xf0000000 # Unspecified 227 | }; 228 | 229 | ELF_SHDR = { 230 | # sh_type 231 | SHT_NULL = 0; # inactive 232 | SHT_PROGBITS = 1; # program defined information 233 | SHT_SYMTAB = 2; # symbol table section 234 | SHT_STRTAB = 3; # string table section 235 | SHT_RELA = 4; # relocation section with addends 236 | SHT_HASH = 5; # symbol hash table section 237 | SHT_DYNAMIC = 6; # dynamic section 238 | SHT_NOTE = 7; # note section 239 | SHT_NOBITS = 8; # no space section 240 | SHT_REL = 9; # relocation section - no addends 241 | SHT_SHLIB = 10; # reserved - purpose unknown 242 | SHT_DYNSYM = 11; # dynamic symbol table section 243 | SHT_INIT_ARRAY = 14; # Initialization function pointers. 244 | SHT_FINI_ARRAY = 15; # Termination function pointers. 245 | SHT_PREINIT_ARRAY = 16; # Pre-initialization function ptrs. 246 | SHT_GROUP = 17; # Section group. 247 | SHT_SYMTAB_SHNDX = 18; # Section indexes (see SHN_XINDEX). 248 | #SHT_LOOS = 0x60000000; # First of OS specific semantics 249 | #SHT_LOSUNW = 0x6ffffff4; 250 | #SHT_SUNW_dof = 0x6ffffff4; 251 | #SHT_SUNW_cap = 0x6ffffff5; 252 | #SHT_GNU_ATTRIBUTES = 0x6ffffff5; 253 | #SHT_SUNW_SIGNATURE = 0x6ffffff6; 254 | #SHT_GNU_HASH = 0x6ffffff6; 255 | #SHT_GNU_LIBLIST = 0x6ffffff7; 256 | #SHT_SUNW_ANNOTATE = 0x6ffffff7; 257 | #SHT_SUNW_DEBUGSTR = 0x6ffffff8; 258 | #SHT_SUNW_DEBUG = 0x6ffffff9; 259 | #SHT_SUNW_move = 0x6ffffffa; 260 | #SHT_SUNW_COMDAT = 0x6ffffffb; 261 | #SHT_SUNW_syminfo = 0x6ffffffc; 262 | #SHT_SUNW_verdef = 0x6ffffffd; 263 | #SHT_GNU_verdef = 0x6ffffffd; # Symbol versions provided 264 | #SHT_SUNW_verneed = 0x6ffffffe; 265 | #SHT_GNU_verneed = 0x6ffffffe; # Symbol versions required 266 | #SHT_SUNW_versym = 0x6fffffff; 267 | #SHT_GNU_versym = 0x6fffffff; # Symbol version table 268 | #SHT_HISUNW = 0x6fffffff; 269 | #SHT_HIOS = 0x6fffffff; # Last of OS specific semantics 270 | #SHT_LOPROC = 0x70000000; # reserved range for processor 271 | #SHT_X86_64_UNWIND = 0x70000001; # unwind information 272 | #SHT_AMD64_UNWIND = SHT_X86_64_UNWIND; 273 | 274 | SHN_UNDEF = 0; 275 | 276 | SHF_WRITE = 1; # Section contains writable data. 277 | SHF_ALLOC = 2; # Section occupies memory. 278 | SHF_EXECINSTR = 4; # Section contains instructions. 279 | SHF_MERGE = 16; # Section may be merged. 280 | SHF_STRINGS = 32; # Section contains strings. 281 | SHF_INFO_LINK = 64; # sh_info holds section index. 282 | SHF_LINK_ORDER = 128; # Special ordering requirements. 283 | SHF_OS_NONCONFORMING = 256; # OS-specific processing required. 284 | SHF_GROUP = 512; # Member of section group. 285 | SHF_TLS = 1024; # Section contains TLS data. 286 | SHF_COMPRESSED = 2048; # Section contains compressed data. 287 | #SHF_MASKOS = 0x0ff00000; # OS-specific semantics. 288 | #SHF_MASKPROC = 0xf0000000; # Processor-specific semantics. 289 | }; 290 | } 291 | -------------------------------------------------------------------------------- /lib/elf/default.nix: -------------------------------------------------------------------------------- 1 | { lib }: 2 | 3 | let 4 | constants = import ./constants.nix; 5 | K = constants; 6 | 7 | # FIXME figure out better or appropriate defaults to use. 8 | DEFAULT_LOAD_ADDR = 10 * 16 * 1024; 9 | # (Default alignment to be on the safe side; power of two, and "relatively large".) 10 | DEFAULT_ALIGNMENT = 16; 11 | 12 | elfCtypes = with lib.ctypes; { 13 | Elf32_Addr = toUint32; 14 | Elf32_Half = toUint16; 15 | Elf32_Off = toUint32; 16 | Elf32_Sword = toInt32; 17 | Elf32_Word = toUint32; 18 | Elf32_Lword = toUint64; 19 | 20 | Elf64_Addr = toUint64; 21 | Elf64_Half = toUint16; 22 | Elf64_Off = toUint64; 23 | Elf64_Sword = toInt32; 24 | Elf64_Sxword = toInt64; 25 | Elf64_Word = toUint32; 26 | Elf64_Lword = toUint64; 27 | Elf64_Xword = toUint64; 28 | }; 29 | 30 | sized_types = { 31 | "64" = with elfCtypes; { 32 | ElfMach_Addr = Elf64_Addr; 33 | ElfMach_Half = Elf64_Half; 34 | ElfMach_Off = Elf64_Off; 35 | #ElfMach_ = Elf64_Sword; 36 | #ElfMach_ = Elf64_Sxword; 37 | ElfMach_Word = Elf64_Word; 38 | ElfMach_Lword = Elf64_Lword; 39 | ElfMach_Xword = Elf64_Xword; 40 | }; 41 | "32" = with elfCtypes; { 42 | ElfMach_Addr = Elf32_Addr; 43 | ElfMach_Half = Elf32_Half; 44 | ElfMach_Off = Elf32_Off; 45 | #ElfMach_Sword = Elf32_Sword; 46 | ElfMach_Sxword = throw "No Elf32_Sxword"; 47 | ElfMach_Word = Elf32_Word; 48 | ElfMach_Lword = Elf32_Lword; 49 | ElfMach_Xword = throw "No Elf32_Xword"; 50 | }; 51 | }; 52 | in 53 | rec { 54 | ELF = { 55 | inherit constants; 56 | 57 | # Size of the types defined afterward. 58 | sizeof = { 59 | Elf64_Ehdr = lib.bytesCount (ELF.mkElfHeader { bits = 64; e_entry = 0; e_ehsize = 0; e_machine = 0; }); 60 | Elf64_Phdr = lib.bytesCount (ELF.mkProgramHeader { bits = 64; load_addr = 0; p_filesz = 0; }); 61 | Elf64_Shdr = lib.bytesCount (ELF.mkSectionHeader { bits = 64; }); 62 | Elf32_Ehdr = lib.bytesCount (ELF.mkElfHeader { bits = 32; e_entry = 0; e_ehsize = 0; e_machine = 0; }); 63 | Elf32_Phdr = lib.bytesCount (ELF.mkProgramHeader { bits = 32; load_addr = 0; p_filesz = 0; }); 64 | Elf32_Shdr = lib.bytesCount (ELF.mkSectionHeader { bits = 32; }); 65 | }; 66 | 67 | # ELF header, first thing in an ELF file. 68 | mkElfHeader = 69 | { bits 70 | , e_entry 71 | , e_flags ? 0 72 | 73 | , e_ehsize ? ELF.sizeof."Elf${toString bits}_Ehdr" 74 | 75 | , e_phoff ? e_ehsize 76 | , e_phentsize ? ELF.sizeof."Elf${toString bits}_Phdr" 77 | 78 | , e_shoff ? (e_phoff + e_phentsize) 79 | , e_shentsize ? ELF.sizeof."Elf${toString bits}_Shdr" 80 | , e_shnum ? 0 81 | , e_shstrndx ? K.ELF_SHDR.SHN_UNDEF 82 | 83 | , e_type ? K.ET_EXEC 84 | , e_machine 85 | }: 86 | let 87 | inherit (sized_types."${toString bits}") 88 | ElfMach_Addr 89 | ElfMach_Off 90 | ElfMach_Word 91 | ElfMach_Half 92 | ; 93 | in 94 | builtins.concatLists [ 95 | # https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html 96 | /* 0x0000 */ 97 | K.EI_MAG 98 | [ 99 | /* 0x04 */ (if bits == 64 then K.EI_CLASS.ELFCLASS64 else K.EI_CLASS.ELFCLASS32) 100 | /* 0x05 */ K.EI_DATA.ELFDATA2LSB 101 | /* 0x06 */ K.EI_VERSION.EV_CURRENT 102 | /* 0x07 */ K.EI_OSABI.ELFOSABI_NONE 103 | /* 0x08 */ K.EI_ABIVERSION.unspecified 104 | ] 105 | /* 0x0009 */ 106 | K.EI_PAD 107 | 108 | /* 0x000010 */ 109 | (ElfMach_Half e_type) # e_type 110 | (ElfMach_Half e_machine) # e_machine 111 | (ElfMach_Word K.EV_CURRENT) # e_version 112 | 113 | /* 0x000018 */ 114 | # e_entry Entry point. 115 | (ElfMach_Addr e_entry) 116 | /* 0x000020 */ 117 | (ElfMach_Off e_phoff) # e_phoff Program header file offset. 118 | /* 0x000028 */ 119 | (ElfMach_Off e_shoff) # e_shoff Section header file offset. 120 | 121 | (ElfMach_Word e_flags) # e_flags Architecture-specific flags. 122 | (ElfMach_Half e_ehsize) # e_ehsize Size of ELF header in bytes. 123 | (ElfMach_Half e_phentsize) # e_phentsize Size of program header entry. 124 | (ElfMach_Half 1) # e_phnum Number of program header entries. 125 | (ElfMach_Half e_shentsize) # e_shentsize Size of section header entry. 126 | (ElfMach_Half e_shnum) # e_shnum Number of section header entries. 127 | (ElfMach_Half e_shstrndx) # e_shstrndx String table section header index. (NOTE: 0 == undefined, missing, irrelevant) 128 | ] 129 | ; 130 | mkProgramHeader = 131 | { load_addr 132 | , bits 133 | , type ? "PT_LOAD" 134 | , p_filesz 135 | , p_memsz ? p_filesz 136 | , p_align ? 0 137 | }: 138 | let 139 | inherit (constants) ELF_PHDR; 140 | # TODO: when type != PT_LOAD, some other flags? 141 | flags = with constants.ELF_PHDR; PF_X + PF_R; 142 | in 143 | builtins.concatLists ( 144 | let 145 | inherit (sized_types."${toString bits}") 146 | ElfMach_Addr 147 | ElfMach_Off 148 | ElfMach_Word 149 | ElfMach_Xword 150 | ; 151 | in 152 | [ 153 | (ElfMach_Word ELF_PHDR.${type}) # p_type Entry type. 154 | (ElfMach_Word flags) # p_flags Access permission flags. 155 | (ElfMach_Off 0) # p_offset File offset of contents. 156 | (ElfMach_Addr load_addr) # p_vaddr Virtual address in memory image. 157 | (ElfMach_Addr load_addr) # p_paddr Physical address (not used). 158 | (ElfMach_Xword p_filesz) # p_filesz Size of contents in file. 159 | (ElfMach_Xword p_memsz) # p_memsz Size of contents in memory. 160 | (ElfMach_Xword p_align) # p_align Alignment in memory and file. 161 | ] 162 | ) 163 | ; 164 | mkSectionHeader = 165 | { bits 166 | , sh_name ? 0 167 | , sh_type ? K.ELF_SHDR.SHT_NULL 168 | , sh_offset ? 0 169 | , sh_addr ? 0 170 | , sh_size ? 0 171 | , sh_flags ? 0 172 | , sh_addralign ? 0 173 | }: 174 | builtins.concatLists ( 175 | let 176 | inherit (sized_types."${toString bits}") 177 | ElfMach_Addr 178 | ElfMach_Off 179 | ElfMach_Word 180 | ElfMach_Xword 181 | ; 182 | in 183 | [ 184 | (ElfMach_Word sh_name) # sh_name Name index in string table 185 | (ElfMach_Word sh_type) # sh_type Section content and semantics 186 | (ElfMach_Xword sh_flags) # sh_flags Misc section attribtues 187 | (ElfMach_Addr sh_addr) # sh_addr Address of first byte, or 0 if not in memory 188 | (ElfMach_Off sh_offset) # sh_offset Section offset from beginning of file 189 | (ElfMach_Xword sh_size) # sh_size Length of section in file 190 | (ElfMach_Word 0) # sh_link (Type-dependent data) 191 | (ElfMach_Word 0) # sh_info (Type-dependent info) 192 | (ElfMach_Xword sh_addralign) # sh_addralign Alignment constraints (powers of two) 193 | (ElfMach_Xword 0) # sh_entsize Entry size for fixed-size entries, or 0 194 | ] 195 | ) 196 | ; 197 | mkShFlags = 198 | let 199 | # Same as `readelf` uses. 200 | # W (write), A (alloc), X (execute), M (merge), S (strings), I (info), 201 | # L (link order), O (extra OS processing required), G (group), T (TLS), 202 | # C (compressed), x (unknown), o (OS specific), E (exclude), 203 | # D (mbind), l (large), p (processor specific) 204 | letterValues = { 205 | "W" = K.ELF_SHDR.SHF_WRITE; 206 | "A" = K.ELF_SHDR.SHF_ALLOC; 207 | "X" = K.ELF_SHDR.SHF_EXECINSTR; 208 | "M" = K.ELF_SHDR.SHF_MERGE; 209 | "S" = K.ELF_SHDR.SHF_STRINGS; 210 | "I" = K.ELF_SHDR.SHF_INFO_LINK; 211 | 212 | "L" = K.ELF_SHDR.SHF_LINK_ORDER; 213 | "O" = K.ELF_SHDR.SHF_OS_NONCONFORMING; 214 | "G" = K.ELF_SHDR.SHF_GROUP; 215 | "T" = K.ELF_SHDR.SHF_TLS; 216 | 217 | "C" = K.ELF_SHDR.SHF_COMPRESSED; 218 | }; 219 | in 220 | flags: 221 | if builtins.isInt flags 222 | then flags 223 | else if builtins.isString flags 224 | then ( 225 | builtins.foldl' 226 | ( 227 | flags: letter: 228 | flags + letterValues."${letter}" 229 | ) 230 | 0 231 | (lib.chars flags) 232 | ) else (throw "argument of unexpected type (${builtins.typeOf flags}) given to mkShFlags") 233 | ; 234 | defaultSectionFlags = { 235 | # Some default flags... 236 | ".text" = "AX"; 237 | ".rodata" = "A"; 238 | }; 239 | }; 240 | 241 | # Given a `bytes` function for a section... 242 | # - When a list, returns it 243 | # - When a function, calls it with all required arguments set to `0`. 244 | bogusSectionBytes = 245 | bytes: 246 | if builtins.isList bytes 247 | then bytes 248 | else if builtins.isFunction bytes 249 | then ( 250 | let 251 | fnArgs = builtins.functionArgs bytes; 252 | requiredArgs = 253 | builtins.filter 254 | (name: fnArgs."${name}" == false) 255 | (builtins.attrNames fnArgs) 256 | ; 257 | args = 258 | builtins.listToAttrs ( 259 | builtins.map 260 | (name: { inherit name; value = 0; }) 261 | (requiredArgs) 262 | ) 263 | ; 264 | in 265 | bytes args 266 | ) else (throw "`bytes` attribute of unexpected type (${builtins.typeOf bytes})") 267 | ; 268 | 269 | # 270 | # NOTE: section names *must be unique*, or else expect unexpected behaviour. 271 | # (repeating null section bytes is fine.) 272 | # 273 | mkElf = 274 | { bits ? arch.ELF.bits 275 | , machine ? arch.ELF.EM 276 | , arch ? null 277 | , load_addr ? DEFAULT_LOAD_ADDR 278 | , type ? "PT_LOAD" # An executable 279 | , sections # ***list*** of sections 280 | , alignment ? DEFAULT_ALIGNMENT # Sections will start on this alignment boundary. 281 | }: 282 | # Algorithm: 283 | # - Pick all sections. 284 | # - Collect offsets into attrs. 285 | # - Collect section headers. 286 | # - Collect program bytes by calling bytes section appropriately. 287 | let 288 | # This is within `mkElf` as it needs to know about the whole Elf alignment. 289 | mkElfSection = 290 | { name ? null # Name of the section, or null for unnamed section. 291 | , type # Type, either the constant name (SHT_PROGBITS) or the value (1). 292 | , bytes # bytes, either a list of bytes, or a set-pattern function returning a list of bytes. 293 | , flags ? null # Flags for the section 294 | }: 295 | let type' = type; in # Break reference 296 | let 297 | # Eagerly resolve to integer type 298 | self = 299 | { 300 | # 301 | # Header data 302 | # 303 | inherit name; 304 | type = 305 | if builtins.isInt type' 306 | then type' 307 | else K.ELF_SHDR."${type'}" 308 | ; 309 | # Pick appropirate default flag for some well-known sections 310 | flags = 311 | ELF.mkShFlags ( 312 | if !builtins.isNull flags 313 | then flags 314 | else 315 | if self.type == K.ELF_SHDR."SHT_PROGBITS" 316 | then ELF.defaultSectionFlags."${name}" or 0 317 | else 0 318 | ) 319 | ; 320 | # 321 | # Section properties 322 | # 323 | inherit bytes; 324 | length = lib.bytesCount (bogusSectionBytes bytes); 325 | alignedLength = lib.getAlignedLength alignment self.length; 326 | } 327 | ; 328 | in 329 | self 330 | ; 331 | # Provide the mkElfSection function now that we know the alignment. 332 | sections' = sections { 333 | inherit mkElfSection; 334 | }; 335 | in 336 | let 337 | dot_text_section = 338 | sections_by_name.".text" or null 339 | ; 340 | 341 | entry_point = 342 | if builtins.isNull dot_text_section 343 | then 0 344 | else dot_text_section.addr 345 | ; 346 | 347 | # NOTE: elfHeadersBytes depends on this value to be set, so let's just use the (aligned) static type sizes. 348 | elf_headers_length = lib.getAlignedLength alignment ( 349 | ELF.sizeof."Elf${toString bits}_Ehdr" 350 | + ELF.sizeof."Elf${toString bits}_Phdr" 351 | ); 352 | 353 | 354 | # The Elf header *and* Program header 355 | # NOTE: the header pair needs to be aligned here. 356 | elfHeadersBytes = lib.padToAlignment alignment (builtins.concatLists [ 357 | (ELF.mkElfHeader { 358 | inherit bits; 359 | e_entry = entry_point; 360 | e_shoff = elf_headers_length; 361 | e_shnum = sections_count; 362 | e_machine = arch.ELF.EM; 363 | e_shstrndx = 1; # hardcoded since we know it's .shstrtab is first (after null) 364 | }) 365 | (ELF.mkProgramHeader { 366 | inherit bits; 367 | inherit load_addr; 368 | p_align = alignment; 369 | p_filesz = 370 | let 371 | last = lib.last sections; 372 | in 373 | last.offset + last.alignedLength 374 | ; 375 | }) 376 | ]); 377 | 378 | section_names = 379 | lib.cstrings.mkCStrings ( 380 | [ 381 | "(unnamed)" 382 | ".shstrtab" 383 | ] ++ 384 | ( 385 | builtins.filter 386 | (name: name != null) 387 | (builtins.map (section: section.name) sections') 388 | ) 389 | ) 390 | ; 391 | 392 | shstrtab = 393 | mkElfSection { 394 | name = ".shstrtab"; 395 | type = "SHT_STRTAB"; 396 | bytes = section_names.bytes; 397 | } 398 | ; 399 | 400 | # FIXME: find doc about whether this is needed or just a useful workaround for e_shstrndx 401 | null_section = 402 | mkElfSection { 403 | type = "SHT_NULL"; 404 | bytes = []; 405 | } 406 | ; 407 | 408 | innate_sections = [ null_section shstrtab ]; 409 | 410 | sections = 411 | ( 412 | builtins.foldl' 413 | (prev: section: 414 | { 415 | offset = prev.offset + section.alignedLength; 416 | sections = prev.sections ++ [ 417 | (section // { 418 | # FIXME: this is not right for things that want to be loaded elsewhere 419 | addr = load_addr + prev.offset; 420 | inherit (prev) offset; 421 | }) 422 | ]; 423 | } 424 | ) 425 | { 426 | offset = total_headers_length; 427 | sections = []; 428 | } 429 | (innate_sections ++ sections') 430 | ).sections 431 | ; 432 | 433 | sections_by_name = 434 | builtins.listToAttrs ( 435 | lib.mapReverse (section: { inherit (section) name; value = section; }) ( 436 | builtins.filter 437 | (section: section.name != null) 438 | sections 439 | ) 440 | ) 441 | ; 442 | 443 | sections_count = 444 | (lib.bytesCount sections') + (lib.bytesCount innate_sections) 445 | ; 446 | 447 | # NOTE: we cannot rely on `section_headers` since it needs this length to compute offsets. 448 | section_headers_length = lib.getAlignedLength alignment ( 449 | ELF.sizeof."Elf${toString bits}_Shdr" * sections_count 450 | ); 451 | 452 | total_headers_length = 453 | elf_headers_length + section_headers_length 454 | ; 455 | 456 | # NOTE: the header group needs to be aligned here. 457 | section_headers = 458 | let 459 | headers = 460 | builtins.map ( 461 | section: 462 | ELF.mkSectionHeader { 463 | sh_name = 464 | if builtins.isString section.name 465 | then section_names.offsets."${section.name}" 466 | else 0 467 | ; 468 | inherit bits; 469 | sh_type = section.type; 470 | sh_offset = section.offset; 471 | sh_size = section.length; 472 | sh_flags = section.flags; 473 | sh_addr = section.addr; 474 | sh_addralign = alignment; 475 | } 476 | ) sections 477 | ; 478 | in 479 | rec { 480 | bytes = lib.padToAlignment alignment (builtins.concatLists headers); 481 | length = lib.getAlignedLength alignment (lib.bytesCount bytes); 482 | } 483 | ; 484 | 485 | # NOTE: the sections need to be individually aligned here. 486 | sections_bytes = builtins.concatLists ( 487 | builtins.map ( 488 | section: 489 | let 490 | bytes = lib.stripComments ( 491 | if builtins.isList section.bytes 492 | then section.bytes 493 | else (section.bytes { 494 | load_addr = load_addr; 495 | section_addr = section.addr; 496 | sections = sections_by_name; 497 | }) 498 | ); 499 | actualLength = lib.bytesCount bytes; 500 | in 501 | if actualLength != section.length 502 | then throw "Section ${section.name} length (${toString section.length}) did not match actual length (${toString actualLength})" 503 | else 504 | lib.padToAlignment alignment bytes 505 | ) sections 506 | ); 507 | in 508 | { 509 | inherit section_names; 510 | inherit section_headers; 511 | inherit section_headers_length; 512 | inherit total_headers_length; 513 | bytes = builtins.concatLists [ 514 | elfHeadersBytes 515 | section_headers.bytes 516 | sections_bytes 517 | ]; 518 | inherit sections; 519 | } 520 | ; 521 | } 522 | -------------------------------------------------------------------------------- /lib/functions.nix: -------------------------------------------------------------------------------- 1 | { lib }: 2 | 3 | { 4 | identity = a: a; 5 | # 6 | # Given a list of functions, will provide lambda that applies them in order, returning a list of results. 7 | # 8 | # let 9 | # example = generateListLambda { 10 | # functions = [ 11 | # (arg: "A: ${toString arg}") 12 | # (arg: "B: ${toString arg}") 13 | # (arg: "C: ${toString arg}") 14 | # ]; 15 | # }; 16 | # in 17 | # example 100 20 3 18 | # => [ "A: 100" "B: 20" "C: 3" ] 19 | # 20 | # The `initial`, and `merge` arguments can be used to work with different types than lists. 21 | # 22 | # The `finally` argument can be used to transform the result. 23 | # 24 | generateListLambda = 25 | { functions 26 | , initial ? [] 27 | , merge ? (a: b: a ++ [ b ]) 28 | , finally ? lib.identity 29 | }: 30 | if functions == [] 31 | then throw "generateListLambda: empty functions parameter provided." 32 | else 33 | let 34 | len = builtins.length functions; 35 | apply = 36 | { curr 37 | , arg 38 | , coll ? initial 39 | }: 40 | let 41 | result = 42 | merge 43 | coll 44 | ((builtins.elemAt functions curr) arg) 45 | ; in 46 | if curr == len - 1 47 | then (finally result) 48 | else (newArg: apply { curr = curr + 1; coll = result; arg = newArg; }) 49 | ; 50 | in 51 | arg: apply { curr = 0; inherit arg; } 52 | ; 53 | } 54 | -------------------------------------------------------------------------------- /lib/linux/aarch64.nix: -------------------------------------------------------------------------------- 1 | { lib }: 2 | 3 | let 4 | self = lib.linux.aarch64; 5 | in 6 | { 7 | linux = { 8 | aarch64 = with lib.linux.generic; (lib.linux.generic // rec { 9 | arch = lib.arch.aarch64; 10 | # Kernel data types for aarch64 11 | dataModel = lib.linux.generic.dataModels.LP64; 12 | dsl = { 13 | parseLogicalReg = 14 | name: 15 | let 16 | numMatch = builtins.match ".*([0-5])" name; 17 | num = lib.toInt (builtins.head numMatch); 18 | in 19 | if !builtins.isString name 20 | then (throw "A string must be provided to parseLogicalReg") 21 | else 22 | if name == "RETURN" 23 | then RETURN_REGISTER 24 | else 25 | if numMatch != null 26 | then builtins.elemAt ARG_REGISTER num 27 | else 28 | name 29 | ; 30 | mkSyscall = 31 | syscall_name: 32 | args: 33 | lib.generateListLambda { 34 | finally = 35 | args: 36 | [(lib.comment "linux aarch64 syscall: ${syscall_name}")] 37 | ++ (syscall syscall_name args) 38 | ; 39 | functions = 40 | lib.mapWithIndex 41 | (arg_pos: syscall_arg: 42 | let 43 | inherit (lib.ctypes.parseDecl dataModel syscall_arg) 44 | name 45 | type 46 | convert 47 | ; 48 | expected_size = lib.bytesCount (convert 0); 49 | in 50 | value: 51 | if builtins.isList value 52 | then 53 | let len = (lib.bytesCount value); in 54 | if len > expected_size 55 | then throw ( 56 | "passed a ${toString len} bytes value to" 57 | + " argument '${name}' (#${toString (arg_pos + 1)})" 58 | + " of syscall '${syscall_name}';" 59 | + " a '${type}' (expecting at most ${toString expected_size} bytes)" 60 | ) 61 | else value 62 | else 63 | if builtins.isNull value 64 | then null 65 | else 66 | (convert value) 67 | ) 68 | args 69 | ; 70 | } 71 | ; 72 | # Copies the content of a *logical* register to another one. 73 | # i.e. (copy_reg "ARG0" "RETURN") 74 | # Copying to an architecture-specific register is possible (copy_reg "x20" "RETURN") but not inherently portable. 75 | copy_reg = 76 | # TODO: consider adding 'SCRATCH0~N' for non-syscall portable registers? 77 | into: from: 78 | (lib.arch.aarch64.instructions.MOV_reg (dsl.parseLogicalReg into) (dsl.parseLogicalReg from)) 79 | ; 80 | syscall = 81 | builtins.listToAttrs ( 82 | builtins.map ( 83 | data: 84 | let 85 | name = builtins.elemAt data 0; 86 | args = builtins.elemAt data 1; 87 | in 88 | { 89 | inherit name; 90 | value = dsl.mkSyscall name args; 91 | } 92 | ) [ 93 | ["write" [ 94 | "unsigned int fd" 95 | "char *buf" 96 | "size_t count" 97 | ]] 98 | ["fchmodat" [ 99 | "int dfd" 100 | "const char *filename" 101 | "umode_t mode" 102 | ]] 103 | ["exit" [ 104 | "int error_code" 105 | ]] 106 | ] 107 | ) 108 | ; 109 | argv1_to_reg = 110 | { register # Logical register names accepted (i.e. ARG0) 111 | , errorMessage ? null # An attrset with `offset` and `length` for a given string. 112 | }: 113 | 114 | let 115 | register' = dsl.parseLogicalReg register; 116 | errorFragment = builtins.concatLists [ 117 | (lib.optionals (!builtins.isNull errorMessage) 118 | (dsl.syscall.write STDOUT errorMessage.addr errorMessage.length) 119 | ) 120 | (dsl.syscall.exit 1) 121 | ]; 122 | # NOTE: we have to compensate for the jump's operand length too... 123 | relAfter_errorFragment = 0 124 | + 4 125 | + (lib.bytesCount errorFragment) 126 | ; 127 | in 128 | builtins.concatLists [ 129 | [(lib.comment " argv1 to register (${toString register})")] 130 | 131 | # We're using the output register as scratch to test presence of argv1 132 | # Get the *value* of argc 133 | (lib.arch.aarch64.instructions.LDR_mem register' "sp") 134 | 135 | # We're checking *strictly* for argc == 1 136 | (lib.arch.aarch64.instructions.CMP_imm register' (lib.ctypes.toUint32 1)) 137 | /* */ # When 1, move to after errorFragment 138 | /* */ (lib.arch.aarch64.instructions.B.NE relAfter_errorFragment) 139 | /* */ # else, error out 140 | /* */ errorFragment 141 | 142 | # Get the argc pointer 143 | (lib.arch.aarch64.instructions.MOV_reg register' "sp") 144 | # Skip over argc and argv0 145 | (lib.arch.aarch64.instructions.ADD_imm register' (lib.ctypes.toUint32 (2 * 8))) 146 | # Here we copy into the register (effectively (char*)argv[1]). 147 | (lib.arch.aarch64.instructions.LDR_mem register' register') 148 | 149 | [(lib.comment " argv1 to register (${toString register})")] 150 | ] 151 | ; 152 | }; 153 | # Syscall numbers 154 | _syscalls = { 155 | "fchmodat" = 53; 156 | "write" = 64; 157 | "exit" = 93; 158 | }; 159 | # Registers in argument position order 160 | ARG_REGISTER = [ 161 | "x0" 162 | "x1" 163 | "x2" 164 | "x3" 165 | "x4" 166 | "x5" 167 | ]; 168 | RETURN_REGISTER = "x0"; 169 | NR_REGISTER = "x8"; 170 | syscall = 171 | let 172 | inherit (lib.arch.aarch64) 173 | instructions 174 | ; 175 | in 176 | name: args: 177 | builtins.concatLists [ 178 | # Args 179 | ( 180 | builtins.concatLists ( 181 | builtins.genList 182 | (i: 183 | let 184 | arg = builtins.elemAt args i; 185 | in 186 | if arg == null then [] # Skip externally handled arg. 187 | else 188 | builtins.concatLists [ 189 | ( 190 | instructions.MOV_imm 191 | (builtins.elemAt ARG_REGISTER i) 192 | arg 193 | ) 194 | ] 195 | ) 196 | (builtins.length args) 197 | ) 198 | ) 199 | # We're setting up the syscall number last. 200 | # By passing null, a field is skipped from the args, and as such 201 | # any previous value set can beu sed. See `copy_reg`. 202 | ( 203 | instructions.MOV_imm 204 | NR_REGISTER 205 | _syscalls."${name}" 206 | ) 207 | instructions.syscall 208 | ] 209 | ; 210 | }); 211 | }; 212 | } 213 | -------------------------------------------------------------------------------- /lib/linux/default.nix: -------------------------------------------------------------------------------- 1 | { lib }: 2 | 3 | { 4 | linux = { 5 | select = 6 | system: 7 | let 8 | arch = (lib.arch.select system).name; 9 | in 10 | lib.linux."${arch}" 11 | or (throw "Unknown or unimplemented architecture '${arch}' for `linux`.") 12 | ; 13 | } 14 | // (import ./generic.nix { inherit lib; }).linux 15 | // (import ./aarch64.nix { inherit lib; }).linux 16 | // (import ./x86_64.nix { inherit lib; }).linux 17 | ; 18 | } 19 | -------------------------------------------------------------------------------- /lib/linux/generic.nix: -------------------------------------------------------------------------------- 1 | { lib }: 2 | 3 | { 4 | linux = { 5 | # Architecture-independent constants and implementation details. 6 | generic = { 7 | STDIN = 0; 8 | STDOUT = 1; 9 | STDERR = 2; 10 | # Special value used to indicate openat should use the current working directory. 11 | AT_FDCWD = -100; 12 | 13 | dataModels = { 14 | LP64 = 15 | let 16 | dataModel = lib.ctypes.dataModels.LP64; 17 | in 18 | dataModel // { 19 | # Most 32 bit architectures use "unsigned int" size_t, 20 | # and all 64 bit architectures use "unsigned long" size_t. 21 | "size_t" = dataModel."unsigned long"; 22 | "umode_t" = dataModel."unsigned short"; 23 | } 24 | ; 25 | }; 26 | }; 27 | }; 28 | } 29 | -------------------------------------------------------------------------------- /lib/linux/x86_64.nix: -------------------------------------------------------------------------------- 1 | { lib }: 2 | 3 | { 4 | linux = { 5 | x86_64 = with lib.linux.generic; (lib.linux.generic // rec { 6 | arch = lib.arch.x86_64; 7 | # Kernel data types for x86_64 8 | dataModel = lib.linux.generic.dataModels.LP64; 9 | dsl = { 10 | parseLogicalReg = 11 | name: 12 | let 13 | numMatch = builtins.match ".*([0-5])" name; 14 | num = lib.toInt (builtins.head numMatch); 15 | in 16 | if !builtins.isString name 17 | then (throw "A string must be provided to parseLogicalReg") 18 | else 19 | if name == "RETURN" 20 | then RETURN_REGISTER 21 | else 22 | if numMatch != null 23 | then builtins.elemAt ARG_REGISTER num 24 | else 25 | name 26 | ; 27 | mkSyscall = 28 | syscall_name: 29 | args: 30 | lib.generateListLambda { 31 | finally = 32 | args: 33 | [(lib.comment "linux x86_64 syscall: ${syscall_name}")] 34 | ++ (syscall syscall_name args) 35 | ; 36 | functions = 37 | lib.mapWithIndex 38 | (arg_pos: syscall_arg: 39 | let 40 | inherit (lib.ctypes.parseDecl dataModel syscall_arg) 41 | name 42 | type 43 | convert 44 | ; 45 | expected_size = lib.bytesCount (convert 0); 46 | in 47 | value: 48 | if builtins.isList value 49 | then 50 | let len = (lib.bytesCount value); in 51 | if len > expected_size 52 | then throw ( 53 | "passed a ${toString len} bytes value to" 54 | + " argument '${name}' (#${toString (arg_pos + 1)})" 55 | + " of syscall '${syscall_name}';" 56 | + " a '${type}' (expecting at most ${toString expected_size} bytes)" 57 | ) 58 | else value 59 | else 60 | if builtins.isNull value 61 | then null 62 | else 63 | (convert value) 64 | ) 65 | args 66 | ; 67 | } 68 | ; 69 | # Copies the content of a *logical* register to another one. 70 | # i.e. (copy_reg "ARG0" "RETURN") 71 | # Copying to a system register is possible to (copy_reg "r15" "RETURN") but not inherently portable. 72 | copy_reg = 73 | # TODO: consider adding 'SCRATCH0~N' for non-syscall registers? 74 | into: from: 75 | (lib.arch.x86_64.instructions.MOV_reg (dsl.parseLogicalReg into) (dsl.parseLogicalReg from)) 76 | ; 77 | syscall = 78 | builtins.listToAttrs ( 79 | builtins.map ( 80 | data: 81 | let 82 | name = builtins.elemAt data 0; 83 | args = builtins.elemAt data 1; 84 | in 85 | { 86 | inherit name; 87 | value = dsl.mkSyscall name args; 88 | } 89 | ) [ 90 | ["write" [ 91 | "unsigned int fd" 92 | "char *buf" 93 | "size_t count" 94 | ]] 95 | ["fchmodat" [ 96 | "int dfd" 97 | "const char *filename" 98 | "umode_t mode" 99 | ]] 100 | ["exit" [ 101 | "int error_code" 102 | ]] 103 | ] 104 | ) 105 | ; 106 | argv1_to_reg = 107 | { register # Logical register names accepted (i.e. ARG0) 108 | , errorMessage ? null # An attrset with `offset` and `length` for a given string. 109 | }: 110 | 111 | let 112 | register' = dsl.parseLogicalReg register; 113 | errorFragment = builtins.concatLists [ 114 | (lib.optionals (!builtins.isNull errorMessage) 115 | (dsl.syscall.write STDOUT errorMessage.addr errorMessage.length) 116 | ) 117 | (dsl.syscall.exit 1) 118 | ]; 119 | # NOTE: we have to compensate for JNE's operand length too... 120 | relAfter_errorFragment = 0 121 | + 4 122 | + (lib.bytesCount errorFragment) 123 | ; 124 | in 125 | builtins.concatLists [ 126 | [(lib.comment " argv1 to register (${toString register})")] 127 | 128 | # We're using the output register as scratch to test presence of argv1 129 | # Get the *value* of argc 130 | (lib.arch.x86_64.instructions.MOV_from_mem register' "rsp") 131 | 132 | # We're checking *strictly* for argc == 1 133 | (lib.arch.x86_64.instructions.CMP_imm register' (lib.ctypes.toUint32 1)) 134 | /* */ # When 1, move to after errorFragment 135 | /* */ (lib.arch.x86_64.instructions.JNE relAfter_errorFragment) 136 | /* */ # else, error out 137 | /* */ errorFragment 138 | 139 | # Get the argc pointer 140 | (lib.arch.x86_64.instructions.MOV_reg register' "rsp") 141 | # Skip over argc and argv0 142 | (lib.arch.x86_64.instructions.ADD_imm register' (lib.ctypes.toUint32 (2 * 8))) 143 | # Here we copy into the register (effectively (char*)argv[1]). 144 | (lib.arch.x86_64.instructions.MOV_from_mem register' register') 145 | 146 | [(lib.comment " argv1 to register (${toString register})")] 147 | ] 148 | ; 149 | }; 150 | # Syscall numbers 151 | _syscalls = { 152 | "write" = 1; 153 | "open" = 2; 154 | "close" = 3; 155 | "exit" = 60; 156 | "fchmodat" = 268; 157 | }; 158 | # Registers in argument position order 159 | ARG_REGISTER = [ 160 | "rdi" 161 | "rsi" 162 | "rdx" 163 | "r10" 164 | "r8" 165 | "r9" 166 | ]; 167 | RETURN_REGISTER = "rax"; 168 | NR_REGISTER = "rax"; 169 | syscall = 170 | let 171 | inherit (lib.arch.x86_64) 172 | instructions 173 | ; 174 | in 175 | name: args: 176 | builtins.concatLists [ 177 | # Args 178 | ( 179 | builtins.concatLists ( 180 | builtins.genList 181 | (i: 182 | let 183 | arg = builtins.elemAt args i; 184 | is64 = (builtins.length arg) > 4; 185 | pad = 186 | if is64 187 | then 8 188 | else 4 189 | ; 190 | in 191 | if arg == null then [] # Skip externally handled arg. 192 | else 193 | builtins.concatLists [ 194 | ( 195 | instructions.MOV_imm 196 | (builtins.elemAt ARG_REGISTER i) 197 | (lib.padBytesRight pad arg) 198 | ) 199 | ] 200 | ) 201 | (builtins.length args) 202 | ) 203 | ) 204 | # We're setting up the syscall number last. 205 | # By passing null, a field is skipped from the args, and as such 206 | # any previous value set can beu sed. See `copy_reg`. 207 | ( 208 | instructions.MOV_imm 209 | NR_REGISTER 210 | (lib.ctypes.toUint32 _syscalls."${name}") 211 | ) 212 | instructions.syscall 213 | ] 214 | ; 215 | }); 216 | }; 217 | } 218 | -------------------------------------------------------------------------------- /lib/lists.nix: -------------------------------------------------------------------------------- 1 | { lib }: 2 | 3 | { 4 | optional = 5 | condition: el: 6 | if condition then [ el ] else [] 7 | ; 8 | optionals = 9 | condition: list: 10 | if condition then list else [] 11 | ; 12 | mapReverse = 13 | fn: list: 14 | let 15 | len = builtins.length list; 16 | in 17 | builtins.genList ( 18 | i: 19 | let 20 | el = builtins.elemAt list (len - i - 1); 21 | in 22 | fn el 23 | ) len 24 | ; 25 | mapWithIndex = 26 | fn: list: 27 | let 28 | len = builtins.length list; 29 | in 30 | builtins.genList ( 31 | i: 32 | let el = builtins.elemAt list i; in 33 | fn i el 34 | ) len 35 | ; 36 | # Given a list of strings, returns an attrset of the same strings, key and values matching. 37 | listToAttrsStrings = 38 | strings: 39 | builtins.listToAttrs ( 40 | builtins.map 41 | (str: { name = str; value = str; }) 42 | strings 43 | ) 44 | ; 45 | # Given a predicate, and a list, find the first element matching and returns, or null 46 | listFind = 47 | pred: list: 48 | builtins.foldl' 49 | ( 50 | found: candidate: 51 | if !(builtins.isNull found) then found 52 | else 53 | if (pred candidate) 54 | then candidate 55 | else null 56 | ) 57 | null 58 | list 59 | ; 60 | last = 61 | list: 62 | builtins.head (lib.mapReverse lib.identity list) 63 | ; 64 | } 65 | -------------------------------------------------------------------------------- /lib/maths.nix: -------------------------------------------------------------------------------- 1 | { lib }: 2 | 3 | let 4 | # 5 | # - Sign of return value is sign of the first operand. 6 | # - Negative magnitude (second operand) invert the shift operation. 7 | # 8 | _bitShift = 9 | op: value: magnitudes: 10 | let 11 | invOp = 12 | if op == "mul" 13 | then builtins.div 14 | else builtins.mul 15 | ; 16 | op' = 17 | if magnitudes < 0 18 | then invOp 19 | else builtins.${op} 20 | ; 21 | absRet = 22 | builtins.foldl' 23 | (curr: _: op' curr 2) 24 | (lib.abs value) 25 | (builtins.genList (_: 2) (lib.abs magnitudes)) 26 | ; 27 | sign = 28 | if value < 0 29 | then -1 30 | else 1 31 | ; 32 | in 33 | absRet * sign 34 | ; 35 | in 36 | { 37 | abs = 38 | num: 39 | if num < 0 40 | then (num * -1) 41 | else num 42 | ; 43 | 44 | # 45 | # Notes about negative values 46 | # --------------------------- 47 | # 48 | # The sign of the *divisor* will be used in the return value. 49 | # 50 | mod = 51 | dividend: divisor: 52 | if divisor == 0 53 | then (builtins.throw "mod called with zero divisor (mod ${toString dividend} ${toString divisor})") 54 | else 55 | let 56 | quotient = dividend / divisor; 57 | remainder = dividend - (quotient * divisor); 58 | sign = if divisor < 0 then -1 else 1; 59 | in 60 | (lib.abs remainder) * sign 61 | ; 62 | 63 | bitShiftLeft = _bitShift "mul"; 64 | bitShiftRight = _bitShift "div"; 65 | 66 | toInt = 67 | str: 68 | if !builtins.isString str then (throw "toInt can only operate on strings.") else 69 | if builtins.isNull (builtins.match "[0-9]+" str) then (throw "toInt can only convert strings containing decimal digits") else 70 | builtins.fromJSON str 71 | ; 72 | } 73 | -------------------------------------------------------------------------------- /lib/nix.nix: -------------------------------------------------------------------------------- 1 | { lib }: 2 | 3 | { 4 | # Given a Nix-flavoured system (i.e. `builtins.currentSystem`) 5 | # parse the system in semantics this tooling understands. 6 | # NOTE: an invalid "complete system" returns `null`. 7 | # This allows consumers of this function to accept an architecture 8 | # (i.e. `x86_64`) by checking the return value. 9 | parseSystem = 10 | system: 11 | let 12 | parts = 13 | (builtins.match "(.+)-([^-]+)" system) 14 | ; 15 | in 16 | if parts == null then null else 17 | builtins.listToAttrs ( 18 | lib.mapWithIndex 19 | (i: value: 20 | { 21 | name = 22 | if i == 0 23 | then "arch" 24 | else "os" 25 | ; 26 | inherit value; 27 | } 28 | ) 29 | parts 30 | ) 31 | ; 32 | } 33 | -------------------------------------------------------------------------------- /lib/strings.nix: -------------------------------------------------------------------------------- 1 | { lib }: 2 | 3 | { 4 | collapseSpaces = 5 | str: 6 | builtins.concatStringsSep " " ( 7 | builtins.filter 8 | (s: s != "" && s != []) 9 | (builtins.split " " str) 10 | ) 11 | ; 12 | chars = 13 | str: 14 | builtins.filter 15 | (el: el != "" && el != []) 16 | (builtins.split "" str) 17 | ; 18 | join = 19 | list: 20 | builtins.concatStringsSep "" list 21 | ; 22 | } 23 | -------------------------------------------------------------------------------- /run-tests.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nix-shell 2 | #!nix-shell -p ruby 3 | #!nix-shell -i ruby 4 | 5 | require "json" 6 | require "shellwords" 7 | 8 | # TODO: dry evaluate attributes, then eval each attribute independently? 9 | cmd = %W(nix-instantiate --strict --eval --json #{__dir__()}/tests --argstr output results) 10 | results = JSON.parse(`#{cmd.shelljoin}`) 11 | 12 | # TODO: print useful report instead of raw results. 13 | # For now it's fine since the order of the JSON object puts the useful info at the bottom. 14 | puts JSON.pretty_generate(results) 15 | -------------------------------------------------------------------------------- /shell.nix: -------------------------------------------------------------------------------- 1 | { pkgs ? import {} 2 | , additionalPackages ? [] 3 | }: pkgs.callPackage ( 4 | 5 | { mkShell 6 | , nasm 7 | , gdb 8 | , xxd 9 | , ruby 10 | , jq 11 | , qemu 12 | , pkgsCross 13 | }: 14 | 15 | mkShell { 16 | nativeBuildInputs = 17 | [ 18 | nasm 19 | gdb 20 | xxd 21 | ruby 22 | jq 23 | qemu 24 | pkgsCross.aarch64-multiplatform.stdenv.cc 25 | ] 26 | ++ ( 27 | builtins.map 28 | (attr: pkgs."${attr}") 29 | additionalPackages 30 | ) 31 | ; 32 | } 33 | 34 | ) {} 35 | -------------------------------------------------------------------------------- /support/generate-aarch64-opcode-tests.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | #!nix-shell -p ruby 3 | #!nix-shell -p pkgsCross.aarch64-multiplatform.stdenv.cc 4 | #!nix-shell -i ruby 5 | 6 | require_relative "lib/shared" 7 | using GeneratorRefinements 8 | 9 | module ASM_aarch64 10 | extend self 11 | 12 | def asm(instrs) 13 | <<~EOF 14 | .global _start 15 | .align 4 16 | .section .text 17 | _start: 18 | #{instrs} 19 | EOF 20 | end 21 | 22 | def asm_bytes(code) 23 | File.write(TEMP, asm(code)) 24 | 25 | $stderr.puts "→ #{code}" 26 | run( 27 | "aarch64-unknown-linux-gnu-as", 28 | "-no-pad-sections", 29 | "-o", TEMP_OBJ, 30 | TEMP, 31 | ) 32 | exit 1 unless $?.success? 33 | 34 | text_section_to_bytes(TEMP_OBJ) 35 | end 36 | 37 | def imm_max(width) 38 | ("1"*width).to_i(2) 39 | end 40 | 41 | TEMP = File.join(TOP, "temp.asm") 42 | TEMP_OBJ = "#{TEMP}.o" 43 | 44 | REGISTERS = 45 | begin 46 | registers = [] 47 | (0..30).each do |i| 48 | [ 49 | # We're not generating 32 bit registers instructions 50 | #[32, "w"], 51 | [64, "x"], 52 | ].each do |width, prefix| 53 | registers << { 54 | width: width, 55 | name: [prefix, i].join(""), 56 | } 57 | end 58 | end 59 | registers << { 60 | width: 64, 61 | name: "sp", 62 | } 63 | registers 64 | end.freeze 65 | 66 | INSTRUCTIONS = [ 67 | ["add", "ADD_imm", [:reg2, :imm12]], 68 | ["b.ne", "B.NE", [:off19]], 69 | ["cmp", "CMP_imm", [:reg, :off12]], 70 | ["ldr", "LDR_mem", [:reg_no_sp, :mem]], 71 | ["mov", "MOV_reg", [:reg, :reg]], 72 | # XXX this is not correct, we're synthesizing four instructions, which is not the convention. 73 | #["mov", "MOV_imm", [:reg, :imm64]], 74 | # syscall pseudo-instruction not tested; no operands, trivial and tested otherwise. 75 | ] 76 | 77 | REGISTER_OPERAND_TYPES = [ 78 | :reg, 79 | :reg2, # the same register, repeated twice, only in asm input 80 | :reg_no_sp, # but not sp 81 | :mem, 82 | ] 83 | 84 | def generate() 85 | file = File.open(File.join(TOP, "tests/lib/arch/aarch64.instructions.nix"), "w") 86 | file.puts <<~EOF 87 | { lib 88 | , expect 89 | , ... 90 | }: 91 | 92 | # 93 | # NOTE: tests generated using support/generate-aarch64-opcode-tests.rb 94 | # 95 | # The content of the tests is created from `aarch64-unknown-linux-gnu-as` output. 96 | # 97 | { 98 | EOF 99 | 100 | INSTRUCTIONS.each do |instruction_data| 101 | instruction, implementation_instruction, types = instruction_data 102 | instruction_tests = StringIO.new() 103 | instruction_tests.puts <<~EOF 104 | # 105 | # Test generation input: 106 | #{instruction_data.to_json().gsub(/^/, "# ")} 107 | # 108 | #{implementation_instruction.to_nix} = { 109 | EOF 110 | type_cases = types.map do |type| 111 | case type 112 | when :reg, :reg2, :reg_no_sp 113 | REGISTERS 114 | .select do |reg| 115 | case type 116 | when :reg_no_sp 117 | reg[:name] != "sp" 118 | else 119 | true 120 | end 121 | end 122 | .map do |reg| 123 | type = :reg if type == :reg_no_sp 124 | [type, reg] 125 | end 126 | when :mem 127 | REGISTERS.select{ _1[:width] == 64 }.map do |reg| 128 | [:mem, reg] 129 | end 130 | when /^imm\d+/ 131 | # TODO: all *n* bits set 132 | [ 133 | 0, 134 | 1, 135 | imm_max(type.to_s().scan(/\d+/).first.to_i()), 136 | ].map do |imm| 137 | [:imm, imm] 138 | end 139 | when /off\d+/ 140 | # Instructions are 4 bytes wide. 141 | # Must be divisibly by 4. 142 | [ 143 | 0, 144 | 4, 145 | imm_max(type.to_s().scan(/\d+/).first.to_i())/4*4, 146 | ].map do |imm| 147 | [:off, imm] 148 | end 149 | end 150 | end 151 | 152 | type_cases = 153 | case type_cases.length 154 | when 0 155 | throw "TODO: implement operands-less type cases..." 156 | when 1 157 | type_cases.first.map do |c| 158 | [c] 159 | end 160 | else 161 | type_cases.first.product(*type_cases[1..]) 162 | end 163 | 164 | type_cases.each do |operands| 165 | register_operands = operands.select { REGISTER_OPERAND_TYPES.include?(_1.first) } 166 | 167 | # Skip register pairings of different sizes 168 | next unless register_operands == [] || register_operands.map { _1[1][:width] }.inject(:==) 169 | 170 | # Some register names differ when extended registers are used 171 | extended = register_operands.any? { _1[1][:type] == :extended } 172 | 173 | # Transform operands into nasm operands 174 | asm_operands = operands.map do |type, operand| 175 | case type 176 | when :reg 177 | operand[:name] 178 | when :reg2 179 | [ 180 | operand[:name], 181 | operand[:name], 182 | ].join(", ") 183 | when :mem 184 | "[" + operand[:name] + "]" 185 | when :imm 186 | "##{operand}" 187 | when :off 188 | operand 189 | end 190 | end 191 | 192 | # Operands for our Nix implementation 193 | nix_operands = operands.map do |type, operand| 194 | case type 195 | when :reg, :reg2, :mem 196 | operand[:name] 197 | when :imm 198 | operand 199 | when :off 200 | operand 201 | end 202 | end 203 | 204 | # Compile... 205 | code = [instruction, asm_operands.join(", ")].join(" ") 206 | bytes = asm_bytes(code) 207 | instruction_tests.puts <<~EOF 208 | #{code.to_nix()} = expect ''#{code.to_nix} to be correct [#{bytes.map{_1.to_s(16)}.join(" ")}]'' 209 | (lib.stripComments (lib.arch.aarch64.instructions.#{implementation_instruction} #{nix_operands.map(&:to_nix).join(" ")})) 210 | #{bytes.to_nix} 211 | ; 212 | EOF 213 | .indent(2) 214 | end 215 | instruction_tests.puts <<~EOF 216 | }; 217 | EOF 218 | instruction_tests.puts() 219 | 220 | file.puts(instruction_tests.string().indent(2)) 221 | end 222 | 223 | file.puts("}") 224 | 225 | File.delete(TEMP) if File.exist?(TEMP) 226 | File.delete(TEMP_OBJ) if File.exist?(TEMP_OBJ) 227 | end 228 | end 229 | 230 | ASM_aarch64.generate() 231 | -------------------------------------------------------------------------------- /support/generate-x86_64-opcode-tests.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | #!nix-shell -p ruby nasm binutils 3 | #!nix-shell -i ruby 4 | 5 | require_relative "lib/shared" 6 | using GeneratorRefinements 7 | 8 | module ASM_x86_64 9 | extend self 10 | 11 | def asm(instrs) 12 | <<~EOF 13 | global _start 14 | section .text 15 | _start: 16 | #{instrs} 17 | EOF 18 | end 19 | 20 | def asm_bytes(code) 21 | File.write(TEMP, asm(code)) 22 | 23 | $stderr.puts "→ #{code}" 24 | run("nasm", "-O0", "-f", "elf64", TEMP, "-o", TEMP_OBJ) 25 | exit 1 unless $?.success? 26 | 27 | text_section_to_bytes(TEMP_OBJ) 28 | end 29 | 30 | REGISTERS = 31 | begin 32 | registers = [] 33 | 34 | classic_8A = "acdbacdb".split(""); 35 | classic_8L = "llllhhhh".split(""); 36 | classic_16A = "acdbsbsd".split(""); 37 | classic_16L = "xxxxppii".split(""); 38 | 39 | (0..7).each do |i| 40 | registers << { 41 | width: 8, 42 | name: [classic_8A[i], classic_8L[i]].join(""), 43 | } 44 | end 45 | [[16, ""], [32, "e"], [64, "r"]].each do |width, prefix| 46 | (0..7).each do |i| 47 | registers << { 48 | width: width, 49 | name: [prefix, classic_16A[i], classic_16L[i]].join(""), 50 | type: :classic, 51 | } 52 | end 53 | end 54 | 55 | (8..15).each do |i| 56 | [[8,"b"], [16,"w"], [32,"d"], [64,""]].each do |width, suffix| 57 | registers << { 58 | width: width, 59 | name: "r#{i}#{suffix}", 60 | type: :extended, 61 | } 62 | end 63 | end 64 | 65 | registers 66 | end.freeze 67 | 68 | def extended_classic(reg_data, extended) 69 | name = reg_data[:name] 70 | return name unless extended 71 | case name 72 | when "ah" 73 | "spl" 74 | when "ch" 75 | "bpl" 76 | when "dh" 77 | "sil" 78 | when "bh" 79 | "dil" 80 | else 81 | name 82 | end 83 | end 84 | 85 | TEMP = File.join(TOP, "temp.asm") 86 | TEMP_OBJ = "#{TEMP}.o" 87 | 88 | INSTRUCTIONS = [ 89 | ["mov", "MOV_reg", [:reg, :reg]], 90 | ["mov", "MOV_from_mem", [:reg, :mem]], 91 | ["mov", "MOV_imm", [:reg, :imm]], 92 | ["je", "JE", [:off]], 93 | ["jne", "JNE", [:off]], 94 | ["cmp", "CMP_imm", [:reg, :imm]], 95 | ["add", "ADD_imm", [:reg, :imm]], 96 | # syscall instruction not tested; no operands, trivial and tested otherwise. 97 | ] 98 | 99 | IMMEDIATES = [ 100 | # FIXME: Nix-based DSL uses a list of bytes currently (and pads it) 101 | # TODO: Use integer types as a valid immediate, for negative values support 102 | #-1, 103 | 0, 104 | 1, 105 | ] 106 | 107 | REGISTER_OPERAND_TYPES = [:reg, :mem] 108 | 109 | def generate() 110 | file = File.open(File.join(TOP, "tests/lib/arch/x86_64.instructions.nix"), "w") 111 | file.puts <<~EOF 112 | { lib 113 | , expect 114 | , ... 115 | }: 116 | 117 | # 118 | # NOTE: tests generated using support/generate-x86_64-opcode-tests.rb 119 | # 120 | # The content of the tests is created from `nasm` output. 121 | # 122 | { 123 | EOF 124 | 125 | INSTRUCTIONS.each do |instruction_data| 126 | instruction, implementation_instruction, types = instruction_data 127 | instruction_tests = StringIO.new() 128 | instruction_tests.puts <<~EOF 129 | # 130 | # Test generation input: 131 | #{instruction_data.to_json().gsub(/^/, "# ")} 132 | # 133 | #{implementation_instruction.to_nix} = { 134 | EOF 135 | type_cases = types.map do |type| 136 | case type 137 | when :reg 138 | REGISTERS.map do |reg| 139 | [:reg, reg] 140 | end 141 | when :mem 142 | REGISTERS.select{ _1[:width] == 64 }.map do |reg| 143 | [:mem, reg] 144 | end 145 | when :imm 146 | IMMEDIATES.map do |imm| 147 | [:imm, imm] 148 | end 149 | when :off 150 | [:off, 42] 151 | end 152 | end 153 | 154 | type_cases = 155 | case type_cases.length 156 | when 0 157 | throw "TODO: implement operands-less type cases..." 158 | when 1 159 | [ type_cases ] 160 | else 161 | type_cases.inject(:product) 162 | end 163 | 164 | type_cases.each do |operands| 165 | register_operands = operands.select { REGISTER_OPERAND_TYPES.include?(_1.first) } 166 | 167 | # Skip register pairings of different sizes 168 | next unless register_operands == [] || register_operands.map { _1[1][:width] }.inject(:==) 169 | 170 | # Some register names differ when extended registers are used 171 | extended = register_operands.any? { _1[1][:type] == :extended } 172 | 173 | # Transform operands into nasm operands 174 | asm_operands = operands.map do |type, operand| 175 | case type 176 | when :reg 177 | extended_classic(operand, extended) 178 | when :mem 179 | "[#{extended_classic(operand, extended)}]" 180 | when :imm 181 | operand 182 | when :off 183 | operand 184 | end 185 | end 186 | 187 | # Operands for our Nix implementation 188 | #{reg1.to_nix} #{reg2.to_nix} 189 | nix_operands = operands.map do |type, operand| 190 | case type 191 | when :reg, :mem 192 | extended_classic(operand, extended) 193 | when :imm 194 | [ operand ] 195 | when :off 196 | operand 197 | end 198 | end 199 | 200 | # Compile... 201 | code = [instruction, asm_operands.join(",")].join(" ") 202 | bytes = asm_bytes(code) 203 | instruction_tests.puts <<~EOF 204 | #{code.to_nix()} = expect ''#{code.to_nix} to be correct [#{bytes.map{_1.to_s(16)}.join(" ")}]'' 205 | (lib.stripComments (lib.arch.x86_64.instructions.#{implementation_instruction} #{nix_operands.map(&:to_nix).join(" ")})) 206 | #{bytes.to_nix} 207 | ; 208 | EOF 209 | .indent(2) 210 | end 211 | instruction_tests.puts <<~EOF 212 | }; 213 | EOF 214 | instruction_tests.puts() 215 | 216 | file.puts(instruction_tests.string().indent(2)) 217 | end 218 | 219 | file.puts("}") 220 | 221 | File.delete(TEMP) if File.exist?(TEMP) 222 | File.delete(TEMP_OBJ) if File.exist?(TEMP_OBJ) 223 | end 224 | end 225 | 226 | ASM_x86_64.generate() 227 | -------------------------------------------------------------------------------- /support/lib/elf.rb: -------------------------------------------------------------------------------- 1 | def text_section_to_bytes(file) 2 | slurp("objdump", "--full-contents", "--section=.text", file) 3 | .split("Contents of section .text:\n").last 4 | .split(/\n+/) 5 | .map { _1.strip().split(/\s+/, 2).last[0..(2*4*4+2)] } 6 | .join("") 7 | .gsub(/\s+/, "") 8 | .scan(/../) 9 | .map { _1.to_i(16) } 10 | end 11 | -------------------------------------------------------------------------------- /support/lib/refinements.rb: -------------------------------------------------------------------------------- 1 | module GeneratorRefinements 2 | refine Integer do 3 | def to_nix() 4 | if self < 0 then 5 | "(-"+self.abs.to_nix()+")" 6 | else 7 | to_json() 8 | end 9 | end 10 | end 11 | 12 | refine Array do 13 | def to_nix() 14 | "[ " + map(&:to_nix).join(" ") + " ]" 15 | end 16 | end 17 | 18 | refine String do 19 | def indent(n, char = " ") 20 | gsub(/^/, char * n) 21 | end 22 | 23 | def to_nix() 24 | to_json() # close enough 25 | end 26 | end 27 | end 28 | -------------------------------------------------------------------------------- /support/lib/shared.rb: -------------------------------------------------------------------------------- 1 | require "json" 2 | require "shellwords" 3 | require "stringio" 4 | require_relative "refinements" 5 | require_relative "elf" 6 | 7 | # Top of the project 8 | TOP = File.realpath(File.join(__dir__(), "../..")) 9 | 10 | def run(*cmd) 11 | $stderr.puts "" 12 | $stderr.puts " $ #{cmd.shelljoin}" 13 | system(*cmd) 14 | end 15 | 16 | def slurp(*cmd) 17 | $stderr.puts "" 18 | $stderr.puts " $ #{cmd.shelljoin}" 19 | `#{cmd.shelljoin}` 20 | end 21 | -------------------------------------------------------------------------------- /tests/default.nix: -------------------------------------------------------------------------------- 1 | { output ? "results" }: 2 | let 3 | isTest = attrs: attrs.__test or false; 4 | expect' = 5 | { name 6 | , value 7 | , expected 8 | , expectedResult ? "ok" 9 | }: 10 | let 11 | name' = 12 | name 13 | + (if expectedResult == "fail" then " to fail..." else "") 14 | + (if expectedResult == "throw" then " to throw..." else "") 15 | ; 16 | attempt = builtins.tryEval value; 17 | result = 18 | if !attempt.success 19 | then "throw" 20 | else 21 | if value == expected 22 | then "ok" 23 | else "fail" 24 | ; 25 | in 26 | { 27 | __test = true; 28 | name = name'; 29 | result = 30 | if result == expectedResult 31 | then "ok" 32 | else "fail(${result})" 33 | ; 34 | inherit 35 | expected 36 | ; 37 | value = 38 | if attempt.success 39 | then value 40 | else null 41 | ; 42 | } 43 | ; 44 | fns = { 45 | expect = 46 | name: value: expected: 47 | expect' { 48 | inherit 49 | name 50 | value 51 | expected 52 | ; 53 | } 54 | ; 55 | expectFail = 56 | name: value: expected: 57 | expect' { 58 | expectedResult = "fail"; 59 | inherit 60 | name 61 | value 62 | expected 63 | ; 64 | } 65 | ; 66 | expectThrow = 67 | name: value: 68 | expect' { 69 | expectedResult = "throw"; 70 | inherit 71 | name 72 | value 73 | ; 74 | expected = null; 75 | } 76 | ; 77 | }; 78 | 79 | collectTests' = 80 | { attrs, path ? [] }: 81 | 82 | builtins.concatLists ( 83 | builtins.map ( 84 | name: 85 | let 86 | value = attrs."${name}"; 87 | attrPath = path ++ [ name ]; 88 | in 89 | if builtins.isAttrs value && !(isTest value) 90 | then collectTests' { attrs = value; path = attrPath; } 91 | else [{ 92 | name = builtins.concatStringsSep "." attrPath; 93 | inherit value; 94 | }] 95 | ) (builtins.attrNames attrs) 96 | ) 97 | ; 98 | collectTests = attrs: collectTests' { inherit attrs; }; 99 | 100 | suite = collectTests { 101 | _selfTests = import ./self-tests.nix (fns); 102 | lib = import ./lib (fns); 103 | }; 104 | 105 | results = 106 | let 107 | filtered = 108 | builtins.partition 109 | (test: test.value.result == "ok") 110 | suite 111 | ; 112 | in 113 | { 114 | _tests = { 115 | _success = builtins.listToAttrs filtered.right; 116 | failure = builtins.listToAttrs filtered.wrong; 117 | }; 118 | failure = builtins.length filtered.wrong; 119 | success = builtins.length filtered.right; 120 | } 121 | ; 122 | in 123 | { 124 | inherit 125 | results 126 | suite 127 | isTest 128 | ; 129 | }."${output}" 130 | -------------------------------------------------------------------------------- /tests/lib/arch/default.nix: -------------------------------------------------------------------------------- 1 | fns: 2 | { 3 | aarch64.instructions = import ./aarch64.instructions.nix (fns); 4 | x86_64.instructions = import ./x86_64.instructions.nix (fns); 5 | } 6 | -------------------------------------------------------------------------------- /tests/lib/attrs.nix: -------------------------------------------------------------------------------- 1 | { lib 2 | , expect 3 | , ... }: 4 | 5 | { 6 | merge = { 7 | works = expect "that merge works" 8 | (lib.merge { a = "1"; } { b = "2"; }) 9 | { a = "1"; b = "2"; } 10 | ; 11 | }; 12 | concatAttrs = { 13 | works = expect "that concatAttrs works" 14 | (lib.concatAttrs [ { a = 1; } { b = 9; } { c = 3; } { b = 2; } ]) 15 | { a = 1; b = 2; c = 3; } 16 | ; 17 | }; 18 | } 19 | -------------------------------------------------------------------------------- /tests/lib/binary.nix: -------------------------------------------------------------------------------- 1 | { lib 2 | , expect 3 | , expectThrow 4 | , ... }: 5 | 6 | { 7 | basicLatinToBytes = { 8 | basic = expect "basicLatinToBytes to produce a list of bytes" 9 | (lib.basicLatinToBytes "Hello") 10 | [ 72 101 108 108 111 ] 11 | ; 12 | }; 13 | bytesUnsignedToNumber = { 14 | i_0 = expect "bytesUnsignedToNumber to produce 0 for [ 0 ]" 15 | (lib.bytesUnsignedToNumber [ 0 ]) 16 | 0 17 | ; 18 | i_255 = expect "bytesUnsignedToNumber to produce 255 for [ 255 ]" 19 | (lib.bytesUnsignedToNumber [ 255 ]) 20 | 255 21 | ; 22 | i_256 = expect "bytesUnsignedToNumber to produce 256 for [ 0 1 ]" 23 | (lib.bytesUnsignedToNumber [ 0 1 ]) 24 | 256 25 | ; 26 | i_65535 = expect "bytesUnsignedToNumber to produce 65535 for [ 255 255 ]" 27 | (lib.bytesUnsignedToNumber [ 255 255 ]) 28 | 65535 29 | ; 30 | }; 31 | numberToBytes = { 32 | singleByte = { 33 | i_0 = expect "numberToBytes to produce a single byte for (0)" (lib.numberToBytes 0) [ 0 ]; 34 | i_1 = expect "numberToBytes to produce a single byte for (1)" (lib.numberToBytes 1) [ 1 ]; 35 | i_255 = expect "numberToBytes to produce a single byte for (255)" (lib.numberToBytes 255) [ 255 ]; 36 | }; 37 | twoBytes = { 38 | # Confirms output is LSB. 39 | i_256 = expect "numberToBytes to produce two bytes for (256)" (lib.numberToBytes 256) [ 0 1 ]; 40 | i_65535 = expect "numberToBytes to produce two bytes for (65535)" (lib.numberToBytes 65535) [ 255 255 ]; 41 | }; 42 | }; 43 | padBytesLeft = { 44 | noop = { 45 | for_1 = expect "padBytesLeft to be a no-op with the same size input (1 byte)" 46 | (lib.padBytesLeft 1 [ 1 ]) 47 | [ 1 ] 48 | ; 49 | for_8 = expect "padBytesLeft to be a no-op with the same size input (8 bytes)" 50 | (lib.padBytesLeft 8 [ 1 2 3 4 5 6 7 8 ]) 51 | [ 1 2 3 4 5 6 7 8 ] 52 | ; 53 | }; 54 | toPad = { 55 | with_0 = expect "padBytesLeft to pad fully when given an empty list" 56 | (lib.padBytesLeft 3 []) 57 | [ 0 0 0 ] 58 | ; 59 | with_1 = expect "padBytesLeft to pad fully when given an empty list" 60 | (lib.padBytesLeft 6 [ 8 ]) 61 | [ 0 0 0 0 0 8 ] 62 | ; 63 | }; 64 | toError = expectThrow "padBytesLeft to throw on overflow" 65 | (lib.padBytesLeft 4 [ 1 2 3 4 5 ]) 66 | ; 67 | }; 68 | padBytesRight = { 69 | noop = { 70 | for_1 = expect "padBytesRight to be a no-op with the same size input (1 byte)" 71 | (lib.padBytesRight 1 [ 1 ]) 72 | [ 1 ] 73 | ; 74 | for_8 = expect "padBytesRight to be a no-op with the same size input (8 bytes)" 75 | (lib.padBytesRight 8 [ 1 2 3 4 5 6 7 8 ]) 76 | [ 1 2 3 4 5 6 7 8 ] 77 | ; 78 | }; 79 | toPad = { 80 | with_0 = expect "padBytesRight to pad fully when given an empty list" 81 | (lib.padBytesRight 3 []) 82 | [ 0 0 0 ] 83 | ; 84 | with_1 = expect "padBytesRight to pad fully when given an empty list" 85 | (lib.padBytesRight 6 [ 8 ]) 86 | [ 8 0 0 0 0 0 ] 87 | ; 88 | }; 89 | toError = expectThrow "padBytesRight to throw on overflow" 90 | (lib.padBytesRight 4 [ 1 2 3 4 5 ]) 91 | ; 92 | }; 93 | twosComplement = { 94 | # Tested through `ctypes.mkInt` 95 | }; 96 | getAlignedLength = { 97 | alignment_0 = { 98 | length_1 = 99 | expect "getAlignedLength with 0 alignment to be a no-op (1)" 100 | (lib.getAlignedLength 0 1) 101 | (1) 102 | ; 103 | length_255 = 104 | expect "getAlignedLength with 0 alignment to be a no-op (255)" 105 | (lib.getAlignedLength 0 255) 106 | (255) 107 | ; 108 | }; 109 | value_0 = { 110 | alignment_0 = 111 | expect "getAlignedLength (alignment 0) with value 0 to be 0" 112 | (lib.getAlignedLength 0 0) 113 | (0) 114 | ; 115 | alignment_8 = 116 | expect "getAlignedLength (alignment 8) with value 0 to be 0" 117 | (lib.getAlignedLength 8 0) 118 | (0) 119 | ; 120 | }; 121 | checks = { 122 | align_8_1 = 123 | expect "getAlignedLength 8 1" 124 | (lib.getAlignedLength 8 1) 125 | (8) 126 | ; 127 | align_8_7 = 128 | expect "getAlignedLength 8 7" 129 | (lib.getAlignedLength 8 7) 130 | (8) 131 | ; 132 | align_8_8 = 133 | expect "getAlignedLength 8 8" 134 | (lib.getAlignedLength 8 8) 135 | (8) 136 | ; 137 | align_8_9 = 138 | expect "getAlignedLength 8 9" 139 | (lib.getAlignedLength 8 9) 140 | (16) 141 | ; 142 | }; 143 | }; 144 | padToAlignment = { 145 | align_no-ops = { 146 | empty_0 = expect "padToAlignment with empty list to return an empty list (0)" 147 | (lib.padToAlignment 0 []) 148 | [] 149 | ; 150 | empty_1 = expect "padToAlignment with empty list to return an empty list (1)" 151 | (lib.padToAlignment 1 []) 152 | [] 153 | ; 154 | empty_4 = expect "padToAlignment with empty list to return an empty list (4)" 155 | (lib.padToAlignment 4 []) 156 | [] 157 | ; 158 | alignment_0_length_1 = expect "padToAlignment with alignment 0 to be a no-op ([ 1 ])" 159 | (lib.padToAlignment 0 [ 1 ]) 160 | [ 1 ] 161 | ; 162 | alignment_0_length_2 = expect "padToAlignment with alignment 0 to be a no-op ([ 1 2 ])" 163 | (lib.padToAlignment 0 [ 1 2 ]) 164 | [ 1 2 ] 165 | ; 166 | alignment_1_length_1 = expect "padToAlignment with alignment 1 to be a no-op ([ 1 ])" 167 | (lib.padToAlignment 1 [ 1 ]) 168 | [ 1 ] 169 | ; 170 | alignment_1_length_2 = expect "padToAlignment with alignment 1 to be a no-op ([ 1 2 ])" 171 | (lib.padToAlignment 1 [ 1 2 ]) 172 | [ 1 2 ] 173 | ; 174 | }; 175 | alignment = { 176 | alignment_4_length_1 = expect "padToAlignment 4 [ 1 ]" 177 | (lib.padToAlignment 4 [ 1 ]) 178 | [ 1 0 0 0 ] 179 | ; 180 | alignment_4_length_2 = expect "padToAlignment 4 [ 1 2 ]" 181 | (lib.padToAlignment 4 [ 1 2 ]) 182 | [ 1 2 0 0 ] 183 | ; 184 | }; 185 | }; 186 | } 187 | -------------------------------------------------------------------------------- /tests/lib/ctypes.nix: -------------------------------------------------------------------------------- 1 | { lib 2 | , expect 3 | , expectThrow 4 | , ... }: 5 | 6 | { 7 | parseDecl = { 8 | LP64 = { 9 | pointerType = expect "pointer types to be parsed" 10 | ((lib.ctypes.parseDecl "LP64" "char * varname") // { convert = null; }) 11 | { 12 | pointer = true; 13 | convert = null; 14 | name = "varname"; 15 | type = "char"; 16 | } 17 | ; 18 | pointerTypeSize = expect "pointer types to be equivalent to `uintptr_t`" 19 | ((lib.ctypes.parseDecl "LP64" "unsigned char * varname").convert 0) 20 | (lib.ctypes.dataModels.LP64."uintptr_t" 0) 21 | ; 22 | usignedChar = expect "parseDecl to handle a type" 23 | ((lib.ctypes.parseDecl "LP64" "unsigned char boop").convert 255) 24 | [ 255 ] 25 | ; 26 | unrecognizedType = expectThrow "parseDecl to not work with an unrecognized type" 27 | ((lib.ctypes.parseDecl "LP64" "unrecognized type").convert 42) 28 | ; 29 | incompleteDecl = expectThrow "parseDecl to not work with incomplete type" 30 | ((lib.ctypes.parseDecl "LP64" "char").convert 42) 31 | ; 32 | }; 33 | }; 34 | mkInt = { 35 | bits_40 = expect "A 40 bits wide uint to work" 36 | ((lib.ctypes.mkInt 40) 4096) 37 | [ 0 16 0 0 0 ] 38 | ; 39 | bits_7 = expectThrow "A bit width non-divisible by eight to fail" 40 | (lib.ctypes.mkInt 7) 41 | ; 42 | bits_0 = expectThrow "A zero bit width to fail" 43 | (lib.ctypes.mkInt 0) 44 | ; 45 | overflow = { 46 | w_8_v_128 = expectThrow "A signed overflow (positive) to fail" 47 | ((lib.ctypes.mkInt 8) (128)) 48 | ; 49 | w_8_v_-129 = expectThrow "A signed overflow (negative) to fail" 50 | ((lib.ctypes.mkInt 8) (-129)) 51 | ; 52 | }; 53 | negativeValues = { 54 | bits_8_val_-1 = expect "8 bit representation of -1" 55 | (lib.ctypes.mkInt 8 (-1)) 56 | [ 255 ] 57 | ; 58 | bits_8_val_-128 = expect "8 bit representation of -128" 59 | (lib.ctypes.mkInt 8 (-128)) 60 | [ 128 ] 61 | ; 62 | bits_8_val_-127 = expect "8 bit representation of -128" 63 | (lib.ctypes.mkInt 8 (-127)) 64 | [ 129 ] 65 | ; 66 | bits_16_val_-1 = expect "16 bit representation of -1" 67 | (lib.ctypes.mkInt 16 (-1)) 68 | [ 255 255 ] 69 | ; 70 | bits_16_val_-129 = expect "16 bit representation of -129" 71 | (lib.ctypes.mkInt 16 (-129)) 72 | [ 127 255 ] 73 | ; 74 | bits_16_val_-32768 = expect "16 bit representation of -32768" 75 | (lib.ctypes.mkInt 16 (-32768)) 76 | [ 0 128 ] 77 | ; 78 | }; 79 | }; 80 | mkUint = { 81 | bits_40 = expect "A 40 bits wide uint to work" 82 | ((lib.ctypes.mkUint 40) 4096) 83 | [ 0 16 0 0 0 ] 84 | ; 85 | bits_7 = expectThrow "A bit width non-divisible by eight to fail" 86 | (lib.ctypes.mkUint 7) 87 | ; 88 | bits_0 = expectThrow "A zero bit width to fail" 89 | (lib.ctypes.mkUint 0) 90 | ; 91 | negativeFail = expectThrow "A negative value provided to fail" 92 | ((lib.ctypes.mkUint 8) (-1)) 93 | ; 94 | overflow = { 95 | w_8_v_256 = expectThrow "An unsigned overflow to fail" 96 | ((lib.ctypes.mkInt 8) (256)) 97 | ; 98 | }; 99 | }; 100 | dataModels = { 101 | LP64 = { 102 | pointer = expect "pointer type to be 64 bit wide" 103 | (lib.ctypes.dataModels.LP64."uintptr_t" 0) 104 | [ 0 0 0 0 0 0 0 0 ] 105 | ; 106 | unsigned_char = expect "unsigned char to be 8 bit wide" 107 | (lib.ctypes.dataModels.LP64."unsigned char" 0) 108 | [ 0 ] 109 | ; 110 | }; 111 | }; 112 | 113 | cstrings = { 114 | mkCStrings = { 115 | worksWithAttrsSingle = expect "to work with attrs (single value)" 116 | (lib.cstrings.mkCStrings { hello = "Hello, World!"; }) 117 | { 118 | offsets = { hello = 0; }; 119 | bytes = [ 120 | 72 101 108 108 111 44 32 87 111 114 108 100 33 0 # "Hello, World!\0" 121 | ]; 122 | strings = { hello = "Hello, World!"; }; 123 | length = 14; 124 | } 125 | ; 126 | worksWithAttrsMany = expect "to work with attrs (multiple value)" 127 | (lib.cstrings.mkCStrings { z = "last"; hello = "Hello, World!"; }) 128 | { 129 | offsets = { hello = 0; z = 14; }; 130 | bytes = [ 131 | 72 101 108 108 111 44 32 87 111 114 108 100 33 0 # "Hello, World!\0" 132 | 108 97 115 116 0 # "last\0" 133 | ]; 134 | strings = { hello = "Hello, World!"; z = "last"; }; 135 | length = 19; 136 | } 137 | ; 138 | worksWithLists = expect "to work with lists" 139 | (lib.cstrings.mkCStrings [ "z_last" "Hello, World!" ]) 140 | { 141 | offsets = { "Hello, World!" = 0; z_last = 14; }; 142 | bytes = [ 143 | 72 101 108 108 111 44 32 87 111 114 108 100 33 0 # "Hello, World!\0" 144 | 122 95 108 97 115 116 0 # "z_last\0" 145 | ]; 146 | strings = { "Hello, World!" = "Hello, World!"; z_last = "z_last"; }; 147 | length = 21; 148 | } 149 | ; 150 | }; 151 | }; 152 | } 153 | -------------------------------------------------------------------------------- /tests/lib/default.nix: -------------------------------------------------------------------------------- 1 | fns': 2 | let 3 | lib = import ../../lib; 4 | fns = fns' // { inherit lib; }; 5 | in 6 | { 7 | maths = import ./maths.nix (fns); 8 | strings = import ./strings.nix (fns); 9 | lists = import ./lists.nix (fns); 10 | attrs = import ./attrs.nix (fns); 11 | functions = import ./functions.nix (fns); 12 | nix = import ./nix.nix (fns); 13 | binary = import ./binary.nix (fns); 14 | ctypes = import ./ctypes.nix (fns); 15 | arch = import ./arch (fns); 16 | } 17 | -------------------------------------------------------------------------------- /tests/lib/functions.nix: -------------------------------------------------------------------------------- 1 | { lib 2 | , expect 3 | , expectThrow 4 | , ... }: 5 | 6 | { 7 | identity = { 8 | works = expect "that identity works" (lib.identity "test") "test"; 9 | }; 10 | generateListLambda = { 11 | # TODO: write more tests... If this interface is not wrong. 12 | noEmpty = expectThrow "that generateListLambda fails with an empty list" 13 | (lib.generateListLambda { functions = []; }) 14 | ; 15 | basic = expect "that generateListLambda produces a single function from a list of one function" 16 | ((lib.generateListLambda { functions = [ lib.identity ]; }) null) 17 | [ null ] 18 | ; 19 | basicTwo = expect "that generateListLambda produces a 'two-args function' from a list of two functions" 20 | ((lib.generateListLambda { functions = [ lib.identity (i: i*2) ]; }) null 2) 21 | [ null 4 ] 22 | ; 23 | }; 24 | } 25 | -------------------------------------------------------------------------------- /tests/lib/lists.nix: -------------------------------------------------------------------------------- 1 | { lib 2 | , expect 3 | , ... 4 | }: 5 | 6 | { 7 | optional = { 8 | true = expect "(optional true 1) == [ 1 ]" (lib.optional true 1) [ 1 ]; 9 | false = expect "(optional false 1) == [ ]" (lib.optional false 1) [ ]; 10 | list = expect "(optional true [ 1 ]) == [ [ 1 ] ]" (lib.optional true [ 1 ]) [ [ 1 ] ]; 11 | }; 12 | optionals = { 13 | true = expect "(optionals true [ 2 3 ]) == [ 2 3 ]" (lib.optionals true [ 2 3 ]) [ 2 3 ]; 14 | false = expect "(optionals false [ 2 3 ]) == [ ]" (lib.optionals false [ 2 3 ]) [ ]; 15 | }; 16 | mapReverse = { 17 | works = expect "that map reverse works" 18 | (lib.mapReverse (i: toString i) [ 1 2 3 ]) 19 | [ "3" "2" "1" ] 20 | ; 21 | }; 22 | mapWithIndex = { 23 | works = expect "that map with index works" 24 | (lib.mapWithIndex (i: v: [ i v ]) [ "a" "b" "c" ]) 25 | [ 26 | [ 0 "a" ] 27 | [ 1 "b" ] 28 | [ 2 "c" ] 29 | ] 30 | ; 31 | }; 32 | listToAttrsStrings = { 33 | works = expect "that listToAttrsStrings works" 34 | (lib.listToAttrsStrings [ "a" "b" "c" ]) 35 | { a = "a"; b = "b"; c = "c"; } 36 | ; 37 | repeatedWorks = expect "that repeated strings works" 38 | (lib.listToAttrsStrings [ "a" "b" "c" "b" "a" ]) 39 | { a = "a"; b = "b"; c = "c"; } 40 | ; 41 | }; 42 | listFind = { 43 | findsThings = expect "that listFind finds the first element matching predicate" 44 | (lib.listFind (el: el.a == 2) [ { a = 1; b = 9; } { a = 2; b = 8; } { a = 2; b = 7; } ]) 45 | { a = 2; b = 8; } 46 | ; 47 | findsNothin = expect "that listFind finds nothing when nothing matches" 48 | (lib.listFind (el: el == 9) [ 1 2 3 4 ]) 49 | null 50 | ; 51 | }; 52 | last = { 53 | getsLastElement = expect "that last gets the last element" 54 | (lib.last [ 0 1 2 3 4 ]) 55 | 4 56 | ; 57 | }; 58 | } 59 | -------------------------------------------------------------------------------- /tests/lib/maths.nix: -------------------------------------------------------------------------------- 1 | { lib 2 | , expect 3 | , expectThrow 4 | , ... 5 | }: 6 | 7 | { 8 | abs = { 9 | zero = expect "absolute 0 to be 0" (lib.abs 0) 0; 10 | positive = expect "positive values to be positive" (lib.abs 5) 5; 11 | negative = expect "negative values to be positive" (lib.abs (- 5)) 5; 12 | }; 13 | mod = { 14 | zeroDivisor = { 15 | i_0 = expectThrow "0 % 0 == " (lib.mod 0 0); 16 | i_1 = expectThrow "1 % 0 == " (lib.mod 1 0); 17 | i_2 = expectThrow "2 % 0 == " (lib.mod 2 0); 18 | }; 19 | oneDivisor = { 20 | i_0 = expect "0 % 1 == 0" (lib.mod 0 1) 0; 21 | i_1 = expect "1 % 1 == 0" (lib.mod 1 1) 0; 22 | i_2 = expect "2 % 1 == 0" (lib.mod 2 1) 0; 23 | }; 24 | twoDivisor = { 25 | i_0 = expect "0 % 2 == 0" (lib.mod 0 2) 0; 26 | i_1 = expect "1 % 2 == 1" (lib.mod 1 2) 1; 27 | i_2 = expect "2 % 2 == 0" (lib.mod 2 2) 0; 28 | i_3 = expect "3 % 2 == 0" (lib.mod 3 2) 1; 29 | }; 30 | threeDivisor = { 31 | i_0 = expect "0 % 3 == 0" (lib.mod 0 3) 0; 32 | i_1 = expect "1 % 3 == 1" (lib.mod 1 3) 1; 33 | i_2 = expect "2 % 3 == 2" (lib.mod 2 3) 2; 34 | i_3 = expect "3 % 3 == 0" (lib.mod 3 3) 0; 35 | i_4 = expect "4 % 3 == 1" (lib.mod 4 3) 1; 36 | i_5 = expect "5 % 3 == 2" (lib.mod 5 3) 2; 37 | }; 38 | negativeDividend = { 39 | i_-1_mod_2 = expect "-1 % 2 == 1" (lib.mod (-1) 2) 1; 40 | i_-2_mod_2 = expect "-2 % 2 == 0" (lib.mod (-2) 2) 0; 41 | }; 42 | negativeDivisor = { 43 | i_0_mod_-2 = expect "0 % -2 == 0" (lib.mod 0 (-2)) (0); 44 | 45 | i_1_mod_-2 = expect "1 % -2 == -1" (lib.mod 1 (-2)) (-1); 46 | i_2_mod_-2 = expect "2 % -2 == 0" (lib.mod 2 (-2)) (0); 47 | 48 | i_-1_mod_-2 = expect "-1 % -2 == -1" (lib.mod (-1) (-2)) (-1); 49 | i_-2_mod_-2 = expect "-2 % -2 == 0" (lib.mod (-2) (-2)) (0); 50 | }; 51 | }; 52 | bitShiftLeft = { 53 | i_0_sl_1 = expect "0 << 1 == 0" (lib.bitShiftLeft 0 1) 0; 54 | i_1_sl_0 = expect "1 << 0 == 1" (lib.bitShiftLeft 1 0) 1; 55 | 56 | i_2_sl_1 = expect "2 << 1 == 4" (lib.bitShiftLeft 2 1) 4; 57 | i_1_sl_8 = expect "1 << 8 == 256" (lib.bitShiftLeft 1 8) 256; 58 | i_3_sl_7 = expect "3 << 7 == 384" (lib.bitShiftLeft 3 7) 384; 59 | i_1_sl_62 = expect "1 << 62 == 4611686018427387904" (lib.bitShiftLeft 1 62) 4611686018427387904; 60 | # Negative values 61 | i_-1_sl_2 = expect "-1 << 2 == -4" (lib.bitShiftLeft (-1) 2) (-4); 62 | # Negative magnitudes 63 | i_16_sl_-2 = expect "16 << -2 == 4" (lib.bitShiftLeft 16 (-2)) 4; 64 | 65 | max = expect "0b01111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111 to be representable" 66 | (builtins.foldl' builtins.add 0 (builtins.genList (i: lib.bitShiftLeft 1 i) 63)) 67 | 9223372036854775807 68 | ; 69 | }; 70 | bitShiftRight = { 71 | i_0_sr_1 = expect "0 >> 1 == 0" (lib.bitShiftRight 0 1) 0; 72 | i_1_sr_0 = expect "1 >> 0 == 1" (lib.bitShiftRight 1 0) 1; 73 | 74 | i_2_sr_1 = expect "2 >> 1 == 1" (lib.bitShiftRight 2 1) 1; 75 | i_2_sr_3 = expect "2 >> 3 == 0" (lib.bitShiftRight 2 3) 0; 76 | i_1_sr_8 = expect "1 >> 8 == 0" (lib.bitShiftRight 1 8) 0; 77 | 78 | i_big_sr_62 = expect "4611686018427387904 >> 62 == 1" (lib.bitShiftRight 4611686018427387904 62) 1; 79 | 80 | # Negative values 81 | i_-64_sr_1 = expect "-64 >> 1 == -32" (lib.bitShiftRight (-64) 1) (-32); 82 | i_-2_sr_3 = expect "-2 >> 3 == -1" (lib.bitShiftRight (-2) 1) (-1); 83 | 84 | # Negative magnitudes 85 | i_16_sr_-2 = expect "16 >> -2 == 64" (lib.bitShiftRight 16 (-2)) 64; 86 | }; 87 | 88 | toInt = { 89 | works = expect "toInt to convert numbers" 90 | (lib.toInt "1234") 91 | 1234 92 | ; 93 | failsNonDecimal = expectThrow "toInt to fail when non-decimal digits are used" 94 | (lib.toInt "12a34") 95 | ; 96 | failsNonString = expectThrow "toInt to fail when a non-string input is used" 97 | (lib.toInt 1) 98 | ; 99 | }; 100 | } 101 | -------------------------------------------------------------------------------- /tests/lib/nix.nix: -------------------------------------------------------------------------------- 1 | { lib 2 | , expect 3 | , ... 4 | }: 5 | 6 | { 7 | parseSystem = { 8 | parse_x86_64-linux = expect "parseSystem to parse x86_64-linux" 9 | (lib.parseSystem "x86_64-linux") 10 | { arch = "x86_64"; os = "linux"; } 11 | ; 12 | parse_aarch64-linux = expect "parseSystem to parse aarch64-linux" 13 | (lib.parseSystem "aarch64-linux") 14 | { arch = "aarch64"; os = "linux"; } 15 | ; 16 | nullForArch_x86_64 = expect "parseSystem to return null for a likely architecture names" 17 | (lib.parseSystem "x86_64") 18 | null 19 | ; 20 | nullForArch_aarch64 = expect "parseSystem to return null for a likely architecture names" 21 | (lib.parseSystem "aarch64") 22 | null 23 | ; 24 | }; 25 | } 26 | -------------------------------------------------------------------------------- /tests/lib/strings.nix: -------------------------------------------------------------------------------- 1 | { lib 2 | , expect 3 | , ... 4 | }: 5 | 6 | { 7 | collapseSpaces = { 8 | emptyString = 9 | expect "empty strings to work" (lib.collapseSpaces "") "" 10 | ; 11 | noSpaces = 12 | expect "no spaces to be a no-op" (lib.collapseSpaces "abcd") "abcd" 13 | ; 14 | startSingle = 15 | expect "space at start to be removed" (lib.collapseSpaces " single") "single" 16 | ; 17 | startMulti = 18 | expect "spaces at start to be removed" (lib.collapseSpaces " multi") "multi" 19 | ; 20 | endSingle = 21 | expect "space at end to be removed" (lib.collapseSpaces "single") "single" 22 | ; 23 | endMulti = 24 | expect "spaces at end to be removed" (lib.collapseSpaces "multi ") "multi" 25 | ; 26 | singleSpaces = 27 | expect "string with only single spaces are no-ops" (lib.collapseSpaces "a simple test") "a simple test" 28 | ; 29 | multipleSpaces = 30 | expect "string with spans of spaces to be collapsed into a single space" (lib.collapseSpaces "this is too spacious") "this is too spacious" 31 | ; 32 | }; 33 | 34 | chars = { 35 | emptyString = 36 | expect "empty strings to return empty list" (lib.chars "") [] 37 | ; 38 | singleChar = 39 | expect "single chars to return a single element" (lib.chars "x") [ "x" ] 40 | ; 41 | multiChars = 42 | expect "multi chars to return all chars" (lib.chars "simple test") [ "s" "i" "m" "p" "l" "e" " " "t" "e" "s" "t" ] 43 | ; 44 | }; 45 | 46 | join = { 47 | emptyString = 48 | expect "empty lists to return empty strings" (lib.join []) "" 49 | ; 50 | singleChar = 51 | expect "single char lists to return a single char" (lib.join [ "a" ]) "a" 52 | ; 53 | multiChars = 54 | expect "multi chars lists to return the equivalent string" (lib.join [ "a" "b" " " "c" "d" ]) "ab cd" 55 | ; 56 | }; 57 | } 58 | -------------------------------------------------------------------------------- /tests/self-tests.nix: -------------------------------------------------------------------------------- 1 | { expect, ... }: 2 | # Self tests... 3 | { 4 | _expect = 5 | let 6 | expected = 7 | { 8 | __test = true; 9 | result = "ok"; 10 | name = "check"; 11 | expected = 42; 12 | value = 42; 13 | } 14 | ; 15 | in 16 | expect "that expect works" 17 | (expect "check" 42 42) 18 | expected 19 | ; 20 | _expect_throw = 21 | let 22 | expected = 23 | { 24 | __test = true; 25 | result = "fail(throw)"; 26 | name = "check"; 27 | expected = "irrelevant"; 28 | value = null; 29 | } 30 | ; 31 | in 32 | expect "that expect will not fail on throw" 33 | (expect "check" (throw "ok") "irrelevant") 34 | expected 35 | ; 36 | } 37 | --------------------------------------------------------------------------------