├── lib ├── jit │ ├── version.rb │ ├── compiler.rb │ └── assembler.rb └── jit.rb ├── test ├── none.rb ├── plus.rb ├── minus.rb ├── local.rb ├── lt.rb ├── send.rb ├── fib.rb ├── branch.rb └── jit │ └── compiler_test.rb ├── bin ├── docker ├── setup ├── ruby ├── console └── bench ├── .gitignore ├── Gemfile ├── benchmark.yml ├── Rakefile ├── Dockerfile ├── jit.gemspec ├── LICENSE.txt └── README.md /lib/jit/version.rb: -------------------------------------------------------------------------------- 1 | module JIT 2 | VERSION = '0.1.0' 3 | end 4 | -------------------------------------------------------------------------------- /test/none.rb: -------------------------------------------------------------------------------- 1 | def none 2 | nil 3 | end 4 | 5 | none 6 | none 7 | p none 8 | -------------------------------------------------------------------------------- /test/plus.rb: -------------------------------------------------------------------------------- 1 | def plus 2 | 1 + 2 3 | end 4 | 5 | plus 6 | plus 7 | p plus 8 | -------------------------------------------------------------------------------- /test/minus.rb: -------------------------------------------------------------------------------- 1 | def minus 2 | 3 - 1 3 | end 4 | 5 | minus 6 | minus 7 | p minus 8 | -------------------------------------------------------------------------------- /test/local.rb: -------------------------------------------------------------------------------- 1 | def local(n) 2 | n 3 | end 4 | 5 | local(1) 6 | local(1) 7 | p local(2) 8 | -------------------------------------------------------------------------------- /test/lt.rb: -------------------------------------------------------------------------------- 1 | def lt(n) 2 | n < 2 3 | end 4 | 5 | lt(1) 6 | lt(1) 7 | p lt(1) 8 | p lt(2) 9 | -------------------------------------------------------------------------------- /bin/docker: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | exec docker run --rm -it -v "$(pwd):/app" k0kubun/rjit bash 3 | -------------------------------------------------------------------------------- /test/send.rb: -------------------------------------------------------------------------------- 1 | def foo(a) 2 | 1 + a 3 | end 4 | 5 | def bar 6 | foo(1) 7 | end 8 | 9 | bar 10 | bar 11 | p bar 12 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /.bundle/ 2 | /.yardoc 3 | /_yardoc/ 4 | /coverage/ 5 | /doc/ 6 | /pkg/ 7 | /spec/reports/ 8 | /tmp/ 9 | /Gemfile.lock 10 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | 3 | # Specify your gem's dependencies in jit.gemspec 4 | gemspec 5 | 6 | gem 'minitest' 7 | gem 'rake' 8 | -------------------------------------------------------------------------------- /test/fib.rb: -------------------------------------------------------------------------------- 1 | def fib(n) 2 | if n < 2 3 | return n 4 | end 5 | return fib(n-1) + fib(n-2) 6 | end 7 | 8 | fib(2) 9 | p fib(32) 10 | -------------------------------------------------------------------------------- /bin/setup: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -euo pipefail 3 | IFS=$'\n\t' 4 | set -vx 5 | 6 | bundle install 7 | 8 | # Do any other automated setup that you need to do here 9 | -------------------------------------------------------------------------------- /test/branch.rb: -------------------------------------------------------------------------------- 1 | def branch(flag) 2 | if flag 3 | 1 4 | else 5 | 0 6 | end 7 | end 8 | 9 | branch(true) 10 | branch(true) 11 | p branch(true) 12 | p branch(false) 13 | -------------------------------------------------------------------------------- /bin/ruby: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | repo_root="$(cd "$(dirname "$0")"; cd ..; pwd)" 3 | ruby="${RJIT_RUBY:-"ruby"}" 4 | exec "$ruby" "-r${repo_root}/lib/jit.rb" --rjit=pause --rjit-call-threshold=3 "$@" 5 | -------------------------------------------------------------------------------- /benchmark.yml: -------------------------------------------------------------------------------- 1 | prelude: | 2 | def fib(n) 3 | if n < 2 4 | return n 5 | end 6 | return fib(n-1) + fib(n-2) 7 | end 8 | 9 | fib(2) 10 | fib(2) 11 | benchmark: fib(32) 12 | loop_count: 15 13 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require 'bundler/gem_tasks' 2 | require 'rake/testtask' 3 | 4 | Rake::TestTask.new(:test) do |t| 5 | t.libs << 'lib' << 'test' 6 | t.test_files = %w[test/jit/*_test.rb] 7 | t.verbose = true 8 | end 9 | 10 | task default: :test 11 | -------------------------------------------------------------------------------- /bin/console: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | require 'bundler/setup' 4 | require 'jit' 5 | 6 | # You can add fixtures and/or initialization code here to make experimenting 7 | # with your gem easier. You can also use a different console, if you like. 8 | 9 | require 'irb' 10 | IRB.start(__FILE__) 11 | -------------------------------------------------------------------------------- /lib/jit.rb: -------------------------------------------------------------------------------- 1 | require_relative 'jit/version' 2 | require_relative 'jit/compiler' 3 | 4 | return unless RubyVM::RJIT.enabled? 5 | 6 | # Replace RJIT with JIT::Compiler 7 | RubyVM::RJIT::Compiler.prepend(Module.new { 8 | def compile(iseq, _) 9 | @compiler ||= JIT::Compiler.new 10 | @compiler.compile(iseq) 11 | end 12 | }) 13 | 14 | # Enable JIT compilation (paused by --rjit=pause) 15 | RubyVM::RJIT.resume 16 | -------------------------------------------------------------------------------- /bin/bench: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | repo_root="$(cd "$(dirname "$0")"; cd ..; pwd)" 3 | ruby="$(which ruby)" 4 | 5 | if ! which benchmark-driver > /dev/null; then 6 | gem install benchmark_driver 7 | fi 8 | 9 | benchmark-driver "${repo_root}/benchmark.yml" \ 10 | -e "no-jit::${ruby}" \ 11 | -e "rjit::${ruby} --rjit-call-threshold=3" \ 12 | -e "yjit::${ruby} --yjit-call-threshold=3" \ 13 | -e "ruby-jit::${ruby} --rjit=pause -r${repo_root}/lib/jit.rb --rjit-call-threshold=3" 14 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:22.04 2 | 3 | RUN apt-get update && apt-get install -y \ 4 | autoconf bison patch build-essential rustc libssl-dev libyaml-dev libreadline6-dev \ 5 | zlib1g-dev libgmp-dev libncurses5-dev libffi-dev libgdbm6 libgdbm-dev libdb-dev uuid-dev \ 6 | ruby git libcapstone-dev \ 7 | && rm -rf /var/lib/apt/lists/* 8 | 9 | ENV RUBY_REVISION=f2c367734f847a7277f09c583a0476086313fdc9 10 | RUN git clone --depth=1 https://github.com/ruby/ruby /ruby && cd /ruby && \ 11 | git fetch origin $RUBY_REVISION && git reset --hard $RUBY_REVISION && \ 12 | ./autogen.sh && \ 13 | ./configure --disable-install-doc --prefix=/usr/local --enable-yjit --enable-rjit=disasm && \ 14 | make -j8 && make install && apt-get remove -y ruby && rm -rf /ruby 15 | 16 | RUN mkdir /app 17 | WORKDIR /app 18 | -------------------------------------------------------------------------------- /jit.gemspec: -------------------------------------------------------------------------------- 1 | require_relative 'lib/jit/version' 2 | 3 | Gem::Specification.new do |spec| 4 | spec.name = 'jit' 5 | spec.version = JIT::VERSION 6 | spec.authors = ['Takashi Kokubun'] 7 | spec.email = ['takashikkbn@gmail.com'] 8 | 9 | spec.summary = 'Ruby JIT Challenge' 10 | spec.description = 'Ruby JIT Challenge' 11 | spec.homepage = 'https://github.com/k0kubun/ruby-jit-challenge' 12 | spec.required_ruby_version = '>= 3.3.0.dev' 13 | 14 | spec.files = Dir.chdir(__dir__) do 15 | `git ls-files -z`.split("\x0").reject do |f| 16 | (f == __FILE__) || f.match(%r{\A(?:(?:bin|test|spec|features)/|\.(?:git|circleci)|appveyor)}) 17 | end 18 | end 19 | spec.bindir = 'exe' 20 | spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) } 21 | spec.require_paths = ['lib'] 22 | end 23 | -------------------------------------------------------------------------------- /test/jit/compiler_test.rb: -------------------------------------------------------------------------------- 1 | require 'minitest/autorun' 2 | require 'open3' 3 | 4 | class JITCompilerTest < Minitest::Test 5 | REPO_ROOT = File.expand_path('../..', __dir__) 6 | 7 | def test_none 8 | assert_jit('test/none.rb', 'nil') 9 | end 10 | 11 | def test_plus 12 | assert_jit('test/plus.rb', '3') 13 | end 14 | 15 | def test_minus 16 | assert_jit('test/minus.rb', '2') 17 | end 18 | 19 | def test_local 20 | assert_jit('test/local.rb', '2') 21 | end 22 | 23 | def test_lt 24 | assert_jit('test/lt.rb', "true\nfalse") 25 | end 26 | 27 | def test_branch 28 | assert_jit('test/branch.rb', "1\n0") 29 | end 30 | 31 | def test_send 32 | assert_jit('test/send.rb', '2') 33 | end 34 | 35 | def test_fib 36 | assert_jit('test/fib.rb', '2178309') 37 | end 38 | 39 | private 40 | 41 | def assert_jit(path, expected) 42 | stdout, stderr, status = with_unbundled_env do 43 | Open3.capture3( 44 | RbConfig.ruby, "-r#{REPO_ROOT}/lib/jit.rb", '--rjit=pause', 45 | '--rjit-call-threshold=3', File.expand_path(path, REPO_ROOT) 46 | ) 47 | end 48 | assert_equal 0, status.exitstatus, 49 | "stdout:\n```\n#{stdout}```\n\nstderr:\n```\n#{stderr}```" 50 | assert_equal '', stderr 51 | assert_equal "#{expected}\n", stdout 52 | end 53 | 54 | def with_unbundled_env(&block) 55 | if defined?(Bundler) 56 | Bundler.with_unbundled_env { block.call } 57 | else 58 | block.call 59 | end 60 | end 61 | end 62 | -------------------------------------------------------------------------------- /lib/jit/compiler.rb: -------------------------------------------------------------------------------- 1 | require_relative 'assembler' 2 | 3 | module JIT 4 | class Compiler 5 | # Utilities to call C functions and interact with the Ruby VM. 6 | # See: https://github.com/ruby/ruby/blob/master/rjit_c.rb 7 | C = RubyVM::RJIT::C 8 | 9 | # Metadata for each YARV instruction. 10 | INSNS = RubyVM::RJIT::INSNS 11 | 12 | # Size of the JIT buffer 13 | JIT_BUF_SIZE = 1024 * 1024 14 | 15 | # Initialize a JIT buffer. Called only once. 16 | def initialize 17 | # Allocate 64MiB of memory. This returns the memory address. 18 | @jit_buf = C.mmap(JIT_BUF_SIZE) 19 | # The number of bytes that have been written to @jit_buf. 20 | @jit_pos = 0 21 | end 22 | 23 | # Compile a method. Called after --rjit-call-threshold calls. 24 | def compile(iseq) 25 | # Write machine code to this assembler. 26 | asm = Assembler.new 27 | 28 | # Iterate over each YARV instruction. 29 | insn_index = 0 30 | while insn_index < iseq.body.iseq_size 31 | insn = INSNS.fetch(C.rb_vm_insn_decode(iseq.body.iseq_encoded[insn_index])) 32 | case insn.name 33 | in :nop 34 | # none 35 | end 36 | insn_index += insn.len 37 | end 38 | 39 | # Write machine code into memory and use it as a JIT function. 40 | iseq.body.jit_func = write(asm) 41 | rescue Exception => e 42 | abort e.full_message 43 | end 44 | 45 | private 46 | 47 | # Write bytes in a given assembler into @jit_buf. 48 | # @param asm [JIT::Assembler] 49 | def write(asm) 50 | jit_addr = @jit_buf + @jit_pos 51 | 52 | # Append machine code to the JIT buffer 53 | C.mprotect_write(@jit_buf, JIT_BUF_SIZE) # make @jit_buf writable 54 | @jit_pos += asm.assemble(jit_addr) 55 | C.mprotect_exec(@jit_buf, JIT_BUF_SIZE) # make @jit_buf executable 56 | 57 | # Dump disassembly if --rjit-dump-disasm 58 | if C.rjit_opts.dump_disasm 59 | C.dump_disasm(jit_addr, @jit_buf + @jit_pos).each do |address, mnemonic, op_str| 60 | puts " 0x#{format("%x", address)}: #{mnemonic} #{op_str}" 61 | end 62 | puts 63 | end 64 | 65 | jit_addr 66 | end 67 | end 68 | end 69 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Ruby is copyrighted free software by Yukihiro Matsumoto . 2 | You can redistribute it and/or modify it under either the terms of the 3 | 2-clause BSDL (see the file BSDL), or the conditions below: 4 | 5 | 1. You may make and give away verbatim copies of the source form of the 6 | software without restriction, provided that you duplicate all of the 7 | original copyright notices and associated disclaimers. 8 | 9 | 2. You may modify your copy of the software in any way, provided that 10 | you do at least ONE of the following: 11 | 12 | a. place your modifications in the Public Domain or otherwise 13 | make them Freely Available, such as by posting said 14 | modifications to Usenet or an equivalent medium, or by allowing 15 | the author to include your modifications in the software. 16 | 17 | b. use the modified software only within your corporation or 18 | organization. 19 | 20 | c. give non-standard binaries non-standard names, with 21 | instructions on where to get the original software distribution. 22 | 23 | d. make other distribution arrangements with the author. 24 | 25 | 3. You may distribute the software in object code or binary form, 26 | provided that you do at least ONE of the following: 27 | 28 | a. distribute the binaries and library files of the software, 29 | together with instructions (in the manual page or equivalent) 30 | on where to get the original distribution. 31 | 32 | b. accompany the distribution with the machine-readable source of 33 | the software. 34 | 35 | c. give non-standard binaries non-standard names, with 36 | instructions on where to get the original software distribution. 37 | 38 | d. make other distribution arrangements with the author. 39 | 40 | 4. You may modify and include the part of the software into any other 41 | software (possibly commercial). But some files in the distribution 42 | are not written by the author, so that they are not under these terms. 43 | 44 | For the list of those files and their copying conditions, see the 45 | file LEGAL. 46 | 47 | 5. The scripts and library files supplied as input to or produced as 48 | output from the software do not automatically fall under the 49 | copyright of the software, but belong to whomever generated them, 50 | and may be sold commercially, and may be aggregated with this 51 | software. 52 | 53 | 6. THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR 54 | IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 55 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 56 | PURPOSE. 57 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Ruby JIT Challenge 2 | 3 | Supplemental material to [Ruby JIT Hacking Guide](https://rubykaigi.org/2023/presentations/k0kubun.html) for RubyKaigi 2023 4 | 5 | ## Introduction 6 | 7 | This is a small tutorial to write a JIT compiler in Ruby. 8 | We don't expect any prior experience in compilers or assembly languages. 9 | It's supposed to take only several minutes if you read all hints, but challenging if you don't. 10 | 11 | You'll write a JIT that can compile a Fibonacci benchmark. 12 | With relaxed implementation requirements, you'll hopefully create a JIT faster than existing Ruby JITs with ease. 13 | 14 | The goal of this repository is to make you feel comfortable using and/or contributing to Ruby JIT. 15 | More importantly, enjoy writing a compiler in Ruby. 16 | 17 | ## Setup 18 | 19 | This repository assumes an `x86_64-linux` environment. 20 | It also requires a Ruby master build to leverage RJIT's interface to integrate a custom JIT. 21 | 22 | It's recommended to use the following Docker container environment. 23 | There's also [bin/docker](./bin/docker) as a shorthand. 24 | 25 | ```bash 26 | $ docker run -it -v "$(pwd):/app" k0kubun/rjit bash 27 | ``` 28 | 29 | See [Dockerfile](./Dockerfile) if you want to prepare the same environment locally. 30 | 31 | ## Testing 32 | 33 | You'll build a JIT in multiple steps. 34 | Test scripts in `test/*.rb` will help you test them one by one. 35 | You can run them with your JIT enabled with [bin/ruby](./bin/ruby). 36 | 37 | ``` 38 | bin/ruby test/none.rb 39 | ``` 40 | 41 | You can also dump compiled code with `bin/ruby --rjit-dump-disasm test/none.rb`. 42 | 43 | For your convenience, `rake test` ([test/jit/compiler\_test.rb](./test/jit/compiler_test.rb)) 44 | runs all test scripts with your JIT enabled. 45 | 46 | ## 1. Compile nil 47 | 48 | First, we'll compile the following simple method that just returns nil. 49 | 50 | ```rb 51 | def none 52 | nil 53 | end 54 | ``` 55 | 56 | ### --dump=insns 57 | 58 | In CRuby, each Ruby method is internally compiled into an "Instruction Sequence", also known as ISeq. 59 | The CRuby interpreter executes Ruby code by looping over instructions in this sequence. 60 | 61 | Typically, a CRuby JIT takes an ISeq as input to the JIT compiler and outputs machine code 62 | that works in the same way as the ISeq. In this exercise, it's the only input you'll need to take care of. 63 | 64 | You can dump ISeqs in a file by `ruby --dump=insns option`. 65 | Let's have a look at the ISeq of `none` method. 66 | 67 | ``` 68 | $ ruby --dump=insns test/none.rb 69 | ... 70 | == disasm: # 71 | 0000 putnil ( 1)[Ca] 72 | 0001 leave ( 3)[Re] 73 | ``` 74 | 75 | This means that `none` consists of two instructions: `putnil` and `leave`. 76 | 77 | `putnil` instruction puts nil on the "stack" of the Ruby interpreter. Imagine `stack = []; stack << nil`. 78 | 79 | `leave` instruction is like `return`. It pops the stack top value and uses it as a return value of the method. 80 | Imagine `return stack.pop`. 81 | 82 | NOTE: Click ▼ to open hints. 83 | 84 |
85 | Assembler 86 | 87 | ### Assembler 88 | 89 | [lib/jit/assembler.rb](./lib/jit/assembler.rb) has an x86\_64 assembler that was copied from RJIT and then simplified. 90 | Feel free to remove it and write it from scratch, but this tutorial will not cover how to encode x86\_64 instructions. 91 | 92 | Here's example code using `Assembler`. 93 | 94 | ```rb 95 | asm = Assembler.new 96 | asm.mov(:rax, [:rsi, 8]) 97 | asm.add(:rax, 2) 98 | write(asm) 99 | ``` 100 | 101 | This writes the following machine code into memory. 102 | 103 | ```asm 104 | mov rax, [rsi + 8] 105 | add rax, 2 106 | ``` 107 | 108 | `rax` and `rsi` are registers. 109 | `[rsi + 8]` is memory access based off of a register, which reads memory 8 bytes after the address in `rsi`. 110 | `2` is an immediate value. 111 | 112 | See [lib/jit/assembler.rb](./lib/jit/assembler.rb) for what kind of input it can handle. 113 | 114 |
115 |
116 | Instructions 117 | 118 | ### Instructions 119 | 120 | There are various x86\_64 instructions. 121 | However, it's enough to use only the following instructions to pass tests in this tutorial. 122 | 123 | For `test/none.rb`, only `mov`, `add`, and `ret` are necessary. 124 | 125 | | Instruction | Description | Example | Effect | 126 | |:------------|:--------------------------------------------|:-------------|:-----------| 127 | | mov | Assign a value. | `mov rax, 1` | `rax = 1` | 128 | | add | Add a value. | `add rax, 1` | `rax += 1` | 129 | | sub | Subtract a value. | `sub rax, 1` | `rax -= 1` | 130 | | cmp | Compare values. Use it with cmovl. | `cmp rdi, rsi` | `rdi < rsi` | 131 | | cmovl | Assign a value if left < right. | `cmovl rax, rcx` | `rax = rcx if rdi < rsi` | 132 | | test | Compare values. Use it with jz. | `test rax, 1` | `rax & 1` | 133 | | jz | Jump if left and right have no common bits. | `jz 0x1234` | `goto 0x1234 if rax & 1 == 0` | 134 | | jmp | Jump to an address. | `jmp 0x1234` | `goto 0x1234` | 135 | | call | Call a function. | `call 0x1234` | `func()` | 136 | | ret | Return a value. | `ret` | `return rax` | 137 | 138 |
139 |
140 | Registers 141 | 142 | ### Registers 143 | 144 | Registers are like variables in machine code. 145 | You're free to use registers in whatever way, but a [reference implementation](https://github.com/k0kubun/ruby-jit-challenge/blob/k0kubun/lib/jit/compiler.rb) 146 | used only the following registers. 147 | 148 | | Register | Purpose | 149 | |:---------|:--------| 150 | | rdi | `ec` (execution context) is set when a JIT function is called. It represents a Ruby thread. Used when you push/pop a stack frame. | 151 | | rsi | `cfp` (control frame pointer) is set when a JIT function is called. It represents a stack frame. Used when you fetch a local variable or a receiver. | 152 | | rax | A JIT function return value to be set before `ret` instruction. It can be also used as a "scratch register" to hold temporary values. | 153 | | r8 | A general-purpose register. The reference implementation used this for the 1st slot of the Ruby VM stack, `stack[0]`. | 154 | | r9 | A general-purpose register. The reference implementation used this for the 2nd slot of the Ruby VM stack, `stack[1]`. | 155 | | r10 | A general-purpose register. The reference implementation used this for the 3rd slot of the Ruby VM stack, `stack[2]`. | 156 | | r11 | A general-purpose register. The reference implementation used this for the 4th slot of the Ruby VM stack, `stack[3]`. | 157 | 158 |
159 |
160 | Compiling putnil 161 | 162 | ### Compiling putnil 163 | 164 | Open [lib/jit/compiler.rb](./lib/jit/compiler.rb) and add a case for `putnil`. 165 | 166 | ```diff 167 | # Iterate over each YARV instruction. 168 | insn_index = 0 169 | while insn_index < iseq.body.iseq_size 170 | insn = INSNS.fetch(C.rb_vm_insn_decode(iseq.body.iseq_encoded[insn_index])) 171 | case insn.name 172 | in :nop 173 | # none 174 | + in :putnil 175 | + # ... 176 | end 177 | insn_index += insn.len 178 | end 179 | ``` 180 | 181 | Let's push `nil` onto the stack. 182 | In the scope of this tutorial, it's enough to use a random register as a replacement for a stack slot. 183 | 184 | Let's say you decided to use `r8` for `stack[0]`, you could write the code as follows, for example. 185 | 186 | ```diff 187 | + STACK = [:r8] 188 | 189 | # Iterate over each YARV instruction. 190 | insn_index = 0 191 | + stack_size = 0 192 | while insn_index < iseq.body.iseq_size 193 | insn = INSNS.fetch(C.rb_vm_insn_decode(iseq.body.iseq_encoded[insn_index])) 194 | case insn.name 195 | in :nop 196 | # none 197 | in :putnil 198 | + asm.mov(STACK[stack_size], C.to_value(nil)) 199 | + stack_size += 1 200 | end 201 | insn_index += insn.len 202 | end 203 | ``` 204 | 205 | `C` is a module with useful helpers to write a JIT. 206 | `C.to_value` converts any Ruby object into its representation in the C language (and machine code). 207 | 208 | `C.to_value(nil)` is 4, so this does `asm.mov(:r8, 4)`, which means `stack[0] = nil`. 209 | This value in `r8` should be then handled by subsequent instructions like `leave`. 210 | 211 |
212 |
213 | Compiling leave 214 | 215 | ### Compiling leave 216 | 217 | `leave` instruction needs to do two things. 218 | 219 | 1. Pop a stack frame 220 | 2. Return a value 221 | 222 | A JIT function is called after a corresponding stack frame is pushed. 223 | However, the Ruby VM is not responsible for popping the stack frame after calling the JIT function. 224 | So a JIT function needs to pop it on `leave` instruction. 225 | 226 | A stack frame `cfp` is in `rsi`. The interpreter reads `ec->cfp` to fetch the current stack frame and `ec` is in `rdi`. 227 | Therefore, you can generate code to pop a stack frame as follows. 228 | 229 | ```diff 230 | STACK = [:r8] 231 | + EC = :rdi 232 | + CFP = :rsi 233 | 234 | # Iterate over each YARV instruction. 235 | insn_index = 0 236 | stack_size = 0 237 | while insn_index < iseq.body.iseq_size 238 | insn = INSNS.fetch(C.rb_vm_insn_decode(iseq.body.iseq_encoded[insn_index])) 239 | case insn.name 240 | in :nop 241 | # none 242 | in :putnil 243 | asm.mov(STACK[stack_size], C.to_value(nil)) 244 | stack_size += 1 245 | + in :leave 246 | + asm.add(CFP, C.rb_control_frame_t.size) 247 | + asm.mov([EC, C.rb_execution_context_t.offsetof(:cfp)], CFP) 248 | end 249 | insn_index += insn.len 250 | end 251 | ``` 252 | 253 | The `cfp` grows downward; `cfp -= 1` pushes a frame, and `cfp += 1` pops a frame. 254 | Here, we want to pop a frame, so we do `cfp += 1`. 255 | When we increment a pointer, `1` actually means the size of what it points to. 256 | `cfp` is called `rb_control_frame_t` in the Ruby VM, and you can get its size by `C.rb_control_frame_t.size`. 257 | 258 | To set that to `ec->cfp`, you need to get a memory address based off of `ec`. 259 | The offset of `ec->cfp` relative to the head of `ec` is in `C.rb_execution_context_t.offsetof(:cfp)`. 260 | So you can use `[EC, C.rb_execution_context_t.offsetof(:cfp)]` to get `ec->cfp`. 261 | 262 | Finally, we'll return a value from the JIT function. 263 | You should set a stack-top value to `rax` and then put `ret` instruction. 264 | 265 | ```diff 266 | # Iterate over each YARV instruction. 267 | insn_index = 0 268 | stack_size = 0 269 | while insn_index < iseq.body.iseq_size 270 | insn = INSNS.fetch(C.rb_vm_insn_decode(iseq.body.iseq_encoded[insn_index])) 271 | case insn.name 272 | in :nop 273 | # none 274 | in :putnil 275 | asm.mov(STACK[stack_size], C.to_value(nil)) 276 | stack_size += 1 277 | in :leave 278 | asm.add(CFP, C.rb_control_frame_t.size) 279 | asm.mov([EC, C.rb_execution_context_t.offsetof(:cfp)], CFP) 280 | + asm.mov(:rax, STACK[stack_size - 1]) 281 | + asm.ret 282 | end 283 | insn_index += insn.len 284 | end 285 | ``` 286 | 287 | Now you should be able to execute `test/none.rb`. Test it as follows. 288 | 289 | ``` 290 | $ bin/ruby --rjit-dump-disasm test/none.rb 291 | 0x564e87d2c000: mov r8, 4 292 | 0x564e87d2c007: add rsi, 0x40 293 | 0x564e87d2c00b: mov qword ptr [rdi + 0x10], rsi 294 | 0x564e87d2c00f: mov rax, r8 295 | 0x564e87d2c012: ret 296 | 297 | nil 298 | ``` 299 | 300 | `rake test` should pass one test that runs `test/none.rb`. 301 | 302 | Also try changing what you're giving to `C.to_value` in `putnil` to double-check 303 | the interpreter is calling the JIT function you generated. 304 | 305 |
306 | 307 | ## 2. Compile 1 + 2 308 | 309 | Next, we'll compile something more interesting: `Integer#+`. 310 | 311 | ```rb 312 | def plus 313 | 1 + 2 314 | end 315 | ``` 316 | 317 | ### --dump=insns 318 | 319 | ``` 320 | $ ruby --dump=insns test/plus.rb 321 | ... 322 | == disasm: # 323 | 0000 putobject_INT2FIX_1_ ( 2)[LiCa] 324 | 0001 putobject 2 325 | 0003 opt_plus [CcCr] 326 | 0005 leave ( 3)[Re] 327 | ``` 328 | 329 | `plus` has four instructions: `putobject_INT2FIX_1_`, `putobject`, `opt_plus`, and `leave`. 330 | 331 | `putobject_INT2FIX_1_` is "operand unification" of `putobject 1`. 332 | `putnil` and `leave` didn't take any arguments, but `putobject` does. 333 | We call an argument of instructions an operand. 334 | At `0001`, there's `putobject` instruction, and its operand `2` is at `0002` before `opt_plus` at `0003`. 335 | At `0000`, there's `putobject_INT2FIX_1_` instruction, and its operand `INT2FIX(1)` is unified with `putobject`, 336 | so it doesn't take an operand, which makes the ISeq shorter. 337 | 338 | `putobject` (and `putobject_INT2FIX_1_`) pushes an operand to the stack. 339 | Both instructions and operands are in `iseq.body.iseq_encoded`. 340 | To get an operand for `0001 putobject` which is at `0002`, you need to look at `iseq.body.iseq_encoded[2]`. 341 | So that works like `stack << iseq.body.iseq_encoded[2]`. 342 | 343 | `opt_plus` pops two objects from the stack, calls `#+`, and pushes the result onto the stack. 344 | So it's `stack << stack.pop + stack.pop`. 345 | 346 |
347 | Compiling putobject 348 | 349 | ### Compiling putobject 350 | 351 | For `putobject_INT2FIX_1_`, you need to hard-code the operand as `1`. 352 | Instead of `INT2FIX(1)` that is used in C, you can use `C.to_value(1)` instead. 353 | So it can be: 354 | 355 | ```rb 356 | STACK = [:r8, :r9] 357 | 358 | in :putobject_INT2FIX_1_ 359 | asm.mov(STACK[stack_size], C.to_value(1)) 360 | stack_size += 1 361 | ``` 362 | 363 | For `putobject`, you need to get an operand from `iseq.body.iseq_encoded` as explained above. 364 | You could write: 365 | 366 | ```rb 367 | in :putobject 368 | operand = iseq.body.iseq_encoded[insn_index + 1] 369 | asm.mov(STACK[stack_size], operand) 370 | stack_size += 1 371 | ``` 372 | 373 |
374 | 375 |
376 | Compiling opt_plus 377 | 378 | ### Compiling opt\_plus 379 | 380 | `opt_plus` is capable of handling any `#+` methods, but specifically optimizes a few methods such as `Integer#+`. 381 | In this tutorial, we're going to handle only `Integer`s. It's okay to assume operands are all `Integer`s. 382 | 383 | In CRuby, a small-enough `Integer` is expressed as `(num << 1) + 1`. 384 | So an `Integer` object `1` is expressed as `(1 << 1) + 1`, which is `3`. 385 | 386 | You'll take `(num1 << 1) + 1` and `(num2 << 1) + 1` as operands. 387 | If you just add them, the result will be `((num1 + num2) << 1) + 2`. 388 | The actual representation for `num1 + num2` is `((num1 + num2) << 1) + 1`, 389 | so you'll need to subtract it by 1. 390 | 391 | Here's an example implementation. 392 | 393 | ```rb 394 | in :opt_plus 395 | recv = STACK[stack_size - 2] 396 | obj = STACK[stack_size - 1] 397 | 398 | asm.add(recv, obj) 399 | asm.sub(recv, 1) 400 | 401 | stack_size -= 1 402 | ``` 403 | 404 | Test those instructions with `bin/ruby --rjit-dump-disasm test/plus.rb`. 405 | 406 |
407 | 408 | ## 3. Compile fibonacci 409 | 410 | Finally, we'll have a look at the benchmark target, Fibonacci. 411 | 412 | ```rb 413 | def fib(n) 414 | if n < 2 415 | return n 416 | end 417 | return fib(n-1) + fib(n-2) 418 | end 419 | ``` 420 | 421 | ### --dump=insns 422 | 423 | ``` 424 | $ ruby --dump=insns test/fib.rb 425 | ... 426 | == disasm: # 427 | local table (size: 1, argc: 1 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1]) 428 | [ 1] n@0 429 | 0000 getlocal_WC_0 n@0 ( 2)[LiCa] 430 | 0002 putobject 2 431 | 0004 opt_lt [CcCr] 432 | 0006 branchunless 11 433 | 0008 getlocal_WC_0 n@0 ( 3)[Li] 434 | 0010 leave [Re] 435 | 0011 putself ( 5)[Li] 436 | 0012 getlocal_WC_0 n@0 437 | 0014 putobject_INT2FIX_1_ 438 | 0015 opt_minus [CcCr] 439 | 0017 opt_send_without_block 440 | 0019 putself 441 | 0020 getlocal_WC_0 n@0 442 | 0022 putobject 2 443 | 0024 opt_minus [CcCr] 444 | 0026 opt_send_without_block 445 | 0028 opt_plus [CcCr] 446 | 0030 leave ( 6)[Re] 447 | ``` 448 | 449 | `fib` has many more instructions. 450 | 451 | `opt_minus` and `opt_lt` are like `opt_plus` except it performs `#-` and `#<` respectively. 452 | 453 | `getlocal_WC_0` is operand unification of `getlocal *, 0` where `WC` stands for a wildcard. 454 | It pushes a local variable onto the stack. 455 | 456 | `branchunless` jumps to a destination specified by an operand when a stack-top value is 457 | false or nil. 458 | 459 | `putself` pushes a receiver onto the stack. 460 | 461 | `opt_send_without_block` calls a method with a receiver and arguments on the stack. 462 | 463 |
464 | Compiling opt_minus 465 | 466 | ### Compiling opt\_minus 467 | 468 | Remember `opt_plus`. 469 | You'll take `(num1 << 1) + 1` and `(num2 << 1) + 1` as operands. 470 | If you subtract one by the other, the result will be `((num1 - num2) << 1)`. 471 | But the actual representation for `num1 - num2` is `((num1 - num2) << 1) + 1`. 472 | So you'll need to add 1 to it. 473 | 474 | Here's an example implementation. 475 | 476 | ```rb 477 | STACK = [:r8, :r9, :r10, :r11] 478 | 479 | in :opt_minus 480 | recv = STACK[stack_size - 2] 481 | obj = STACK[stack_size - 1] 482 | 483 | asm.sub(recv, obj) 484 | asm.add(recv, 1) 485 | 486 | stack_size -= 1 487 | ``` 488 | 489 | Test the instruction with `bin/ruby --rjit-dump-disasm test/minus.rb`. 490 | 491 |
492 | 493 |
494 | Compiling getlocal 495 | 496 | ### Compiling getlocal 497 | 498 | `getlocal_WC_0` means `getlocal *, 0`. The `*` part is an operand and it has an index to the local variable from an "environment pointer" (EP). 499 | The `0` part is a "level", which shows how many levels of EPs you need to go deeper to get a local variable. 500 | This is needed when a local variable environment is nested, e.g. a block inside a method. 501 | Since it's `0` this time, you will not need to worry about digging EPs. You'll need to get the EP of the current "control frame" (`cfp`). 502 | 503 | `cfp` is in `rsi` and you can get the offset to `cfp->ep` from `C.rb_control_frame_t.offsetof(:ep)`. 504 | So `[:rsi, C.rb_control_frame_t.offsetof(:ep)]` can be used to get an EP. 505 | 506 | Once you get an EP, you need to find a local variable. The index is an operand, which can be fetched with `iseq.body.iseq_encoded[insn_index + 1]`. 507 | The index is a positive number but local variables actually live "below" the EP. So you have to negate the index. 508 | Besides, the unit of indexes is a `VALUE` type in C, which represents a Ruby object. So the index to a local variable from an EP is 509 | `-iseq.body.iseq_encoded[insn_index + 1] * C.VALUE.size`. 510 | 511 | All in all, an example implementation looks like this. 512 | 513 | ```rb 514 | in :getlocal_WC_0 515 | # Get EP 516 | asm.mov(:rax, [CFP, C.rb_control_frame_t.offsetof(:ep)]) 517 | 518 | # Load the local variable 519 | idx = iseq.body.iseq_encoded[insn_index + 1] 520 | asm.mov(STACK[stack_size], [:rax, -idx * C.VALUE.size]) 521 | 522 | stack_size += 1 523 | ``` 524 | 525 | Test the instruction with `bin/ruby --rjit-dump-disasm test/local.rb`. 526 | 527 |
528 | 529 |
530 | Compiling opt_lt 531 | 532 | ### Compiling opt\_lt 533 | 534 | Again, assume operands are `Integer`s. 535 | Comparing `(num1 << 1) + 1` and `(num2 << 1) + 1` would return the same result as comparing `num1` and `num2`. 536 | You'll use a `cmp` instruction that compares them. 537 | 538 | Once you compare the values, you'll need to generate code that conditionally returns something. 539 | `Integer#<` returns `true` or `false`. 540 | There's a family of instructions that conditionally set a value based on a prior `cmp` (or `test`). 541 | To conditionally set a value if `num1 < num2` holds based on the previous `cmp`, 542 | you can use `cmovl` (conditionally move if less). 543 | 544 | An example implementation is as follows. 545 | 546 | ```rb 547 | in :opt_lt 548 | recv = STACK[stack_size - 2] 549 | obj = STACK[stack_size - 1] 550 | 551 | asm.cmp(recv, obj) 552 | asm.mov(recv, C.to_value(false)) 553 | asm.mov(:rax, C.to_value(true)) 554 | asm.cmovl(recv, :rax) 555 | 556 | stack_size -= 1 557 | ``` 558 | 559 | Test the instruction with `bin/ruby --rjit-dump-disasm test/lt.rb`. 560 | 561 |
562 | 563 |
564 | Compiling putself 565 | 566 | ### Compiling putself 567 | 568 | `fib` method is called without an argument. In Ruby, it implicitly uses the receiver of the current frame (`cfp`). 569 | `cfp` is in `rsi`, and the offset to `cfp->self` (receiver) is implemented at `C.rb_control_frame_t.offsetof(:self)`. 570 | So `[:rsi, C.rb_control_frame_t.offsetof(:self)]` can be used to fetch a receiver. 571 | 572 | An example implementation looks like this. 573 | 574 | ```rb 575 | in :putself 576 | asm.mov(STACK[stack_size], [CFP, C.rb_control_frame_t.offsetof(:self)]) 577 | stack_size += 1 578 | ``` 579 | 580 |
581 | 582 |
583 | Compiling opt_send_without_block 584 | 585 | ### Compiling opt\_send\_without\_block 586 | 587 | Congratulations on making it to this stage. You've accomplished a lot already. 588 | I hope you've enjoyed your journey. 589 | We're going to tackle a couple of instructions that may be the most challenging part in this tutorial. 590 | If you get lost, consider just copying the code that is shown later and playing with it. 591 | 592 | `opt_send_without_block` supports various method calls. 593 | However, in this tutorial, it's okay to assume any method call is a Ruby method call. 594 | 595 | As long as you use `--rjit-call-threshold=3` (compile methods that have been called three times), 596 | the cache of all `opt_send_without_block` instructions is "warmed up" in all test scripts. 597 | It means that the cache has a reference to an ISeq. For simplicity in this tutorial, 598 | assume that it's not gonna change and you won't need to invalidate it. 599 | 600 | `opt_send_without_block` takes a "call data" operand, which is a pair of "call info" and "call cache". 601 | A call data object can be instantiated with `cd = C.rb_call_data.new(iseq.body.iseq_encoded[insn_index + 1])`. 602 | 603 | A call info is in `cd.ci`, which has information like the number of arguments. 604 | `ci` has a packed data structure which cannot be accessed like a normal struct. 605 | So you need to get the number of arguments using a special helper, `C.vm_ci_argc(ci)`. 606 | 607 | A call cache has a reference to an ISeq. `cd.cc.cme_.def.body.iseq.iseqptr` has a callee ISeq. 608 | For better performance, we want to compile everything and directly jump to an already-compiled address. 609 | You can call `compile(callee_iseq)` if `callee_iseq.body.jit_func` is still `0` (NULL in C). 610 | 611 | Once a callee function becomes ready, we need to prepare for calling a method. 612 | Since our `getlocal` implementation gets a local variable on the stack relative to an EP, 613 | we have to set arguments to the stack, which are local variables to the callee. 614 | 615 | The VM stack looks like this when you call a method. 616 | 617 | ``` 618 | | locals | cme | block_handler | frame type (callee EP) | stack bottom (callee SP) | 619 | ``` 620 | 621 | For locals, we want to put arguments. There's a "stack pointer" in `SP` which points to 622 | a free stack slot above the stack top. You could write values to it and keep bumping the SP until you finish writing all arguments. 623 | Once it's done, SP needs to be bumped three more times to accommodate a "cme" (callable method entry), a block handler, and a frame type. 624 | You don't need to use them in this tutorial. Just bump SP by 3 to get a callee SP. EP is one slot below that. 625 | 626 | Set those `sp` and `ep` fields to a callee `cfp` after bumping `cfp`. 627 | Remember what you did at `leave` instruction; pushing a frame means to subtract it by `C.rb_control_frame_t.size`. 628 | Since `putself` refers to it, you may set `cfp->self` as well, using `C.rb_control_frame_t.offsetof(:self)`. 629 | Note, however, that we don't actually use the receiver in `cfp` for method dispatch. You may just skip it. 630 | 631 | Before and after calling a callee function, you have to save and restore registers you're using for the stack 632 | so that the callee function can use them. 633 | We've used `r8`, `r9`, `r10`, and `r11` as `STACK`. You can use `push` instruction to push a register to the machine stack, 634 | and then use `pop` instruction in the reverse order to restore a register from the machine stack. 635 | 636 | An example implementation looks like this. 637 | 638 | ```rb 639 | in :opt_send_without_block 640 | # Compile the callee ISEQ 641 | cd = C.rb_call_data.new(iseq.body.iseq_encoded[insn_index + 1]) 642 | callee_iseq = cd.cc.cme_.def.body.iseq.iseqptr 643 | if callee_iseq.body.jit_func == 0 644 | compile(callee_iseq) 645 | end 646 | 647 | # Get SP 648 | asm.mov(:rax, [CFP, C.rb_control_frame_t.offsetof(:sp)]) 649 | # Spill arguments 650 | C.vm_ci_argc(cd.ci).times do |i| 651 | asm.mov([:rax, C.VALUE.size * i], STACK[stack_size - C.vm_ci_argc(cd.ci) + i]) 652 | end 653 | 654 | # Push cfp: ec->cfp = cfp - 1 655 | asm.sub(CFP, C.rb_control_frame_t.size) 656 | asm.mov([EC, C.rb_execution_context_t.offsetof(:cfp)], CFP) 657 | # Set SP 658 | asm.add(:rax, C.VALUE.size * (C.vm_ci_argc(cd.ci) + 3)) 659 | asm.mov([CFP, C.rb_control_frame_t.offsetof(:sp)], :rax) 660 | # Set EP 661 | asm.sub(:rax, C.VALUE.size) 662 | asm.mov([CFP, C.rb_control_frame_t.offsetof(:ep)], :rax) 663 | # Set receiver 664 | asm.mov(:rax, STACK[stack_size - C.vm_ci_argc(cd.ci) - 1]) 665 | asm.mov([CFP, C.rb_control_frame_t.offsetof(:self)], :rax) 666 | 667 | # Save stack registers 668 | STACK.each do |reg| 669 | asm.push(reg) 670 | end 671 | 672 | # Call the JIT func 673 | asm.call(callee_iseq.body.jit_func) 674 | 675 | # Pop stack registers 676 | STACK.reverse_each do |reg| 677 | asm.pop(reg) 678 | end 679 | 680 | # Set a return value 681 | asm.mov(STACK[stack_size - C.vm_ci_argc(cd.ci) - 1], :rax) 682 | 683 | stack_size -= C.vm_ci_argc(cd.ci) 684 | ``` 685 | 686 | Test the instruction with `bin/ruby --rjit-dump-disasm test/send.rb`. 687 | 688 | This code has some optimization opportunities when you need to support only `fib`. 689 | In fact, my [reference implementation](https://github.com/k0kubun/ruby-jit-challenge/blob/k0kubun/lib/jit/compiler.rb) 690 | is already a bit faster than that. It could be even faster, for example, if you use registers for local variables. 691 | 692 |
693 | 694 |
695 | Compiling branchunless 696 | 697 | ### Compiling branchunless 698 | 699 | It's almost there. This will be the last instruction you'll compile to run `fib`. 700 | This is probably the most interesting and challenging part of this tutorial. 701 | 702 | Supporting this instruction requires a major refactoring on the boilerplate code. 703 | It's because past test scripts run instructions from top to bottom whereas 704 | you need to jump to different instruction indexes based on runtime values. 705 | 706 | There's not only the jump support, but also complexity in dependencies. 707 | Let's have a look at `ruby --dump=insns test/branch.rb`. 708 | 709 | ``` 710 | == disasm: # 711 | local table (size: 1, argc: 1 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1]) 712 | [ 1] flag@0 713 | 0000 getlocal_WC_0 flag@0 ( 2)[LiCa] 714 | 0002 branchunless 6 715 | 716 | 0004 putobject_INT2FIX_1_ ( 3)[Li] 717 | 0005 leave ( 7)[Re] 718 | 719 | 0006 putobject_INT2FIX_0_ ( 5)[Li] 720 | 0007 leave ( 7)[Re] 721 | ``` 722 | 723 | I inserted newlines into the actual output to indicate "basic block" boundaries. 724 | There are three blocks: the first block from `0000`, the second block from `0004`, and the third block from `0006`. 725 | 726 | Let's say you start compiling the first block, you'll need to generate code to jump to the second block or the third block. 727 | However, the second block and the third block have not been compiled yet. You cannot compile it from top to bottom as before. 728 | 729 | Then, why not compile it from the second block and the third block, and then compile the first block? 730 | Sure, it works for this example. But what if the second block calls the first block? 731 | It's a circular dependency. And it's exactly what `fib` does. 732 | So you have to design the compiler in a way that it supports circular dependencies. 733 | 734 | One suggested solution is to write out dummy addresses first, and then rewrite them after all blocks are compiled. 735 | Rewriting a past address requires you to figure out the address that `Assembler` used. 736 | The `Assembler` in the boilerplate doesn't have such interface, so you have to define it yourself. 737 | 738 | For example, you could add this kind of interface. 739 | 740 | ```diff 741 | --- a/lib/jit/assembler.rb 742 | +++ b/lib/jit/assembler.rb 743 | @@ -50,6 +50,7 @@ module JIT 744 | end 745 | 746 | def assemble(addr) 747 | + set_start_addrs(addr) 748 | resolve_rel32(addr) 749 | resolve_labels 750 | 751 | @@ -905,6 +876,12 @@ module JIT 752 | @labels[label] = @bytes.size 753 | end 754 | 755 | + # Mark the starting addresses of a branch 756 | + def branch(branch) 757 | + @branches[@bytes.size] << branch 758 | + yield 759 | + end 760 | + 761 | private 762 | 763 | def insn(prefix: 0, opcode:, rd: nil, mod_rm: nil, disp: nil, imm: nil) 764 | @@ -1010,6 +987,14 @@ module JIT 765 | [Rel32.new(addr), Rel32Pad, Rel32Pad, Rel32Pad] 766 | end 767 | 768 | + def set_start_addrs(write_addr) 769 | + (@bytes.size + 1).times do |index| 770 | + @branches.fetch(index, []).each do |branch| 771 | + branch.start_addr = write_addr + index 772 | + end 773 | + end 774 | + end 775 | ``` 776 | 777 | Then a random object you're giving to `#branch` will get `start_addr` assigned. 778 | If the object also has a Proc to re-compile a branch, you can just buffer those objects 779 | and calls them later. 780 | 781 | To simplify the problem, you could split an ISeq into basic blocks, and just compile 782 | each block as before. Here's an example logic that works for the test scripts in this tutorial. 783 | 784 | ```rb 785 | # Get a list of basic blocks in a method 786 | def split_blocks(iseq, insn_index: 0, stack_size: 0, split_indexes: []) 787 | return [] if split_indexes.include?(insn_index) 788 | split_indexes << insn_index 789 | 790 | block = { start_index: insn_index, end_index: nil, stack_size: } 791 | blocks = [block] 792 | 793 | while insn_index < iseq.body.iseq_size 794 | insn = INSNS.fetch(C.rb_vm_insn_decode(iseq.body.iseq_encoded[insn_index])) 795 | case insn.name 796 | when :branchunless 797 | block[:end_index] = insn_index 798 | stack_size += sp_inc(iseq, insn_index) 799 | next_index = insn_index + insn.len 800 | blocks += split_blocks(iseq, insn_index: next_index, stack_size:, split_indexes:) 801 | blocks += split_blocks(iseq, insn_index: next_index + iseq.body.iseq_encoded[insn_index + 1], stack_size:, split_indexes:) 802 | break 803 | when :leave 804 | block[:end_index] = insn_index 805 | break 806 | else 807 | stack_size += sp_inc(iseq, insn_index) 808 | insn_index += insn.len 809 | end 810 | end 811 | 812 | blocks 813 | end 814 | 815 | # Get a stack size increase for a YARV instruction. 816 | def sp_inc(iseq, insn_index) 817 | insn = INSNS.fetch(C.rb_vm_insn_decode(iseq.body.iseq_encoded[insn_index])) 818 | case insn.name 819 | in :opt_plus | :opt_minus | :opt_lt | :leave | :branchunless 820 | -1 821 | in :nop 822 | 0 823 | in :putnil | :putobject_INT2FIX_0_ | :putobject_INT2FIX_1_ | :putobject | :putself | :getlocal_WC_0 824 | 1 825 | in :opt_send_without_block 826 | cd = C.rb_call_data.new(iseq.body.iseq_encoded[insn_index + 1]) 827 | -C.vm_ci_argc(cd.ci) 828 | end 829 | end 830 | ``` 831 | 832 | Each block is represented as a Hash that has `start_index`, `end_index`, and an initial `stack_size`. 833 | The first block's first address should be set to `iseq.body.jit_func`. 834 | 835 | Finally, let's compile `branchunless`. With `blocks` made by `split_blocks` and `branches = []`, an example implementation 836 | looks like this. 837 | 838 | ```rb 839 | Branch = Struct.new(:start_addr, :compile) 840 | 841 | in :branchunless 842 | next_index = insn_index + insn.len 843 | next_block = blocks.find { |block| block[:start_index] == next_index } 844 | 845 | jump_index = next_index + iseq.body.iseq_encoded[insn_index + 1] 846 | jump_block = blocks.find { |block| block[:start_index] == jump_index } 847 | 848 | # This `test` sets ZF only for Qnil and Qfalse, which lets jz jump. 849 | asm.test(STACK[stack_size - 1], ~C.to_value(nil)) 850 | 851 | branch = Branch.new 852 | branch.compile = proc do |asm| 853 | dummy_addr = @jit_buf + JIT_BUF_SIZE 854 | asm.jz(jump_block.fetch(:start_addr, dummy_addr)) 855 | asm.jmp(next_block.fetch(:start_addr, dummy_addr)) 856 | end 857 | asm.branch(branch) do 858 | branch.compile.call(asm) 859 | end 860 | branches << branch 861 | ``` 862 | 863 | The `branches` are then re-compiled with: 864 | 865 | ```rb 866 | branches.each do |branch| 867 | with_addr(branch[:start_addr]) do 868 | asm = Assembler.new 869 | branch.compile.call(asm) 870 | write(asm) 871 | end 872 | end 873 | ``` 874 | 875 | ```rb 876 | def with_addr(addr) 877 | jit_pos = @jit_pos 878 | @jit_pos = addr - @jit_buf 879 | yield 880 | ensure 881 | @jit_pos = jit_pos 882 | end 883 | ``` 884 | 885 | That's all. Test it with `bin/ruby --rjit-dump-disasm test/branch.rb`. 886 | If everything is done correctly, `bin/ruby test/fib.rb` should also work. 887 | 888 |
889 | 890 | ## 4. Benchmark 891 | 892 | Let's measure the performance. 893 | [bin/bench](./bin/bench) allows you to compare your JIT (ruby-jit) and other CRuby JITs. 894 | 895 | ``` 896 | $ bin/bench 897 | Calculating ------------------------------------- 898 | no-jit rjit yjit ruby-jit 899 | fib(32) 5.250 19.481 32.841 58.145 i/s 900 | 901 | Comparison: 902 | fib(32) 903 | ruby-jit: 58.1 i/s 904 | yjit: 32.8 i/s - 1.77x slower 905 | rjit: 19.5 i/s - 2.98x slower 906 | no-jit: 5.2 i/s - 11.08x slower 907 | ``` 908 | -------------------------------------------------------------------------------- /lib/jit/assembler.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | module JIT 3 | # 8-bit memory access 4 | class BytePtr < Data.define(:reg, :disp); end 5 | 6 | # 32-bit memory access 7 | class DwordPtr < Data.define(:reg, :disp); end 8 | 9 | # 64-bit memory access 10 | QwordPtr = Array 11 | 12 | # SystemV x64 calling convention 13 | C_ARGS = [:rdi, :rsi, :rdx, :rcx, :r8, :r9] 14 | C_RET = :rax 15 | 16 | # https://cdrdv2.intel.com/v1/dl/getContent/671110 17 | # Mostly an x86_64 assembler, but this also has some stuff that is useful for any architecture. 18 | class Assembler 19 | # rel8 jumps are made with labels 20 | class Label < Data.define(:id, :name); end 21 | 22 | # rel32 is inserted as [Rel32, Rel32Pad..] and converted on #resolve_rel32 23 | class Rel32 < Data.define(:addr); end 24 | Rel32Pad = Object.new 25 | 26 | # A set of ModR/M values encoded on #insn 27 | class ModRM < Data.define(:mod, :reg, :rm); end 28 | Mod00 = 0b00 # Mod 00: [reg] 29 | Mod01 = 0b01 # Mod 01: [reg]+disp8 30 | Mod10 = 0b10 # Mod 10: [reg]+disp32 31 | Mod11 = 0b11 # Mod 11: reg 32 | 33 | # REX = 0100WR0B 34 | REX_B = 0b01000001 35 | REX_R = 0b01000100 36 | REX_W = 0b01001000 37 | 38 | # Operand matchers 39 | R32 = -> (op) { op.is_a?(Symbol) && r32?(op) } 40 | R64 = -> (op) { op.is_a?(Symbol) && r64?(op) } 41 | IMM8 = -> (op) { op.is_a?(Integer) && imm8?(op) } 42 | IMM32 = -> (op) { op.is_a?(Integer) && imm32?(op) } 43 | IMM64 = -> (op) { op.is_a?(Integer) && imm64?(op) } 44 | 45 | def initialize 46 | @bytes = [] 47 | @labels = {} 48 | @label_id = 0 49 | @branches = Hash.new { |h, k| h[k] = [] } 50 | end 51 | 52 | def assemble(addr) 53 | resolve_rel32(addr) 54 | resolve_labels 55 | 56 | write_bytes(addr) 57 | 58 | @bytes.size 59 | ensure 60 | @bytes.clear 61 | end 62 | 63 | def size 64 | @bytes.size 65 | end 66 | 67 | # 68 | # Instructions 69 | # 70 | 71 | # ADD: dst = dst + src 72 | def add(dst, src) 73 | case [dst, src] 74 | # ADD r/m64, imm8 (Mod 00: [reg]) 75 | in [QwordPtr[R64 => dst_reg], IMM8 => src_imm] 76 | # REX.W + 83 /0 ib 77 | # MI: Operand 1: ModRM:r/m (r, w), Operand 2: imm8/16/32 78 | insn( 79 | prefix: REX_W, 80 | opcode: 0x83, 81 | mod_rm: ModRM[mod: Mod00, reg: 0, rm: dst_reg], 82 | imm: imm8(src_imm), 83 | ) 84 | # ADD r/m64, imm8 (Mod 11: reg) 85 | in [R64 => dst_reg, IMM8 => src_imm] 86 | # REX.W + 83 /0 ib 87 | # MI: Operand 1: ModRM:r/m (r, w), Operand 2: imm8/16/32 88 | insn( 89 | prefix: REX_W, 90 | opcode: 0x83, 91 | mod_rm: ModRM[mod: Mod11, reg: 0, rm: dst_reg], 92 | imm: imm8(src_imm), 93 | ) 94 | # ADD r/m64 imm32 (Mod 11: reg) 95 | in [R64 => dst_reg, IMM32 => src_imm] 96 | # REX.W + 81 /0 id 97 | # MI: Operand 1: ModRM:r/m (r, w), Operand 2: imm8/16/32 98 | insn( 99 | prefix: REX_W, 100 | opcode: 0x81, 101 | mod_rm: ModRM[mod: Mod11, reg: 0, rm: dst_reg], 102 | imm: imm32(src_imm), 103 | ) 104 | # ADD r/m64, r64 (Mod 11: reg) 105 | in [R64 => dst_reg, R64 => src_reg] 106 | # REX.W + 01 /r 107 | # MR: Operand 1: ModRM:r/m (r, w), Operand 2: ModRM:reg (r) 108 | insn( 109 | prefix: REX_W, 110 | opcode: 0x01, 111 | mod_rm: ModRM[mod: Mod11, reg: src_reg, rm: dst_reg], 112 | ) 113 | end 114 | end 115 | 116 | # AND: dst = dst & src 117 | def and(dst, src) 118 | case [dst, src] 119 | # AND r/m64, imm8 (Mod 11: reg) 120 | in [R64 => dst_reg, IMM8 => src_imm] 121 | # REX.W + 83 /4 ib 122 | # MI: Operand 1: ModRM:r/m (r, w), Operand 2: imm8/16/32 123 | insn( 124 | prefix: REX_W, 125 | opcode: 0x83, 126 | mod_rm: ModRM[mod: Mod11, reg: 4, rm: dst_reg], 127 | imm: imm8(src_imm), 128 | ) 129 | # AND r/m64, imm32 (Mod 11: reg) 130 | in [R64 => dst_reg, IMM32 => src_imm] 131 | # REX.W + 81 /4 id 132 | # MI: Operand 1: ModRM:r/m (r, w), Operand 2: imm8/16/32 133 | insn( 134 | prefix: REX_W, 135 | opcode: 0x81, 136 | mod_rm: ModRM[mod: Mod11, reg: 4, rm: dst_reg], 137 | imm: imm32(src_imm), 138 | ) 139 | # AND r64, r/m64 (Mod 01: [reg]+disp8) 140 | in [R64 => dst_reg, QwordPtr[R64 => src_reg, IMM8 => src_disp]] 141 | # REX.W + 23 /r 142 | # RM: Operand 1: ModRM:reg (r, w), Operand 2: ModRM:r/m (r) 143 | insn( 144 | prefix: REX_W, 145 | opcode: 0x23, 146 | mod_rm: ModRM[mod: Mod01, reg: dst_reg, rm: src_reg], 147 | disp: imm8(src_disp), 148 | ) 149 | end 150 | end 151 | 152 | # CALL: dst() 153 | def call(dst) 154 | case dst 155 | # CALL rel32 156 | in Integer => dst_addr 157 | # E8 cd 158 | # D: Operand 1: Offset 159 | insn(opcode: 0xe8, imm: rel32(dst_addr)) 160 | # CALL r/m64 (Mod 11: reg) 161 | in R64 => dst_reg 162 | # FF /2 163 | # M: Operand 1: ModRM:r/m (r) 164 | insn( 165 | opcode: 0xff, 166 | mod_rm: ModRM[mod: Mod11, reg: 2, rm: dst_reg], 167 | ) 168 | end 169 | end 170 | 171 | # CMOVE: dst = src if left == right 172 | def cmove(dst, src) 173 | case [dst, src] 174 | # CMOVE r64, r/m64 (Mod 11: reg) 175 | in [R64 => dst_reg, R64 => src_reg] 176 | # REX.W + 0F 44 /r 177 | # RM: Operand 1: ModRM:reg (r, w), Operand 2: ModRM:r/m (r) 178 | insn( 179 | prefix: REX_W, 180 | opcode: [0x0f, 0x44], 181 | mod_rm: ModRM[mod: Mod11, reg: dst_reg, rm: src_reg], 182 | ) 183 | end 184 | end 185 | 186 | # CMOVG: dst = src if left > right 187 | def cmovg(dst, src) 188 | case [dst, src] 189 | # CMOVG r64, r/m64 (Mod 11: reg) 190 | in [R64 => dst_reg, R64 => src_reg] 191 | # REX.W + 0F 4F /r 192 | # RM: Operand 1: ModRM:reg (r, w), Operand 2: ModRM:r/m (r) 193 | insn( 194 | prefix: REX_W, 195 | opcode: [0x0f, 0x4f], 196 | mod_rm: ModRM[mod: Mod11, reg: dst_reg, rm: src_reg], 197 | ) 198 | end 199 | end 200 | 201 | # CMOVGE: dst = src if left >= right 202 | def cmovge(dst, src) 203 | case [dst, src] 204 | # CMOVGE r64, r/m64 (Mod 11: reg) 205 | in [R64 => dst_reg, R64 => src_reg] 206 | # REX.W + 0F 4D /r 207 | # RM: Operand 1: ModRM:reg (r, w), Operand 2: ModRM:r/m (r) 208 | insn( 209 | prefix: REX_W, 210 | opcode: [0x0f, 0x4d], 211 | mod_rm: ModRM[mod: Mod11, reg: dst_reg, rm: src_reg], 212 | ) 213 | end 214 | end 215 | 216 | # CMOVL: dst = src if left < right 217 | def cmovl(dst, src) 218 | case [dst, src] 219 | # CMOVL r64, r/m64 (Mod 11: reg) 220 | in [R64 => dst_reg, R64 => src_reg] 221 | # REX.W + 0F 4C /r 222 | # RM: Operand 1: ModRM:reg (r, w), Operand 2: ModRM:r/m (r) 223 | insn( 224 | prefix: REX_W, 225 | opcode: [0x0f, 0x4c], 226 | mod_rm: ModRM[mod: Mod11, reg: dst_reg, rm: src_reg], 227 | ) 228 | end 229 | end 230 | 231 | # CMOVLE: dst = src if left <= right 232 | def cmovle(dst, src) 233 | case [dst, src] 234 | # CMOVLE r64, r/m64 (Mod 11: reg) 235 | in [R64 => dst_reg, R64 => src_reg] 236 | # REX.W + 0F 4E /r 237 | # RM: Operand 1: ModRM:reg (r, w), Operand 2: ModRM:r/m (r) 238 | insn( 239 | prefix: REX_W, 240 | opcode: [0x0f, 0x4e], 241 | mod_rm: ModRM[mod: Mod11, reg: dst_reg, rm: src_reg], 242 | ) 243 | end 244 | end 245 | 246 | # CMOVNE: dst = src if left != right 247 | def cmovne(dst, src) 248 | case [dst, src] 249 | # CMOVNE r64, r/m64 (Mod 11: reg) 250 | in [R64 => dst_reg, R64 => src_reg] 251 | # REX.W + 0F 45 /r 252 | # RM: Operand 1: ModRM:reg (r, w), Operand 2: ModRM:r/m (r) 253 | insn( 254 | prefix: REX_W, 255 | opcode: [0x0f, 0x45], 256 | mod_rm: ModRM[mod: Mod11, reg: dst_reg, rm: src_reg], 257 | ) 258 | end 259 | end 260 | 261 | # CMOVNZ: dst = src if left != 0 262 | def cmovnz(dst, src) 263 | case [dst, src] 264 | # CMOVNZ r64, r/m64 (Mod 11: reg) 265 | in [R64 => dst_reg, R64 => src_reg] 266 | # REX.W + 0F 45 /r 267 | # RM: Operand 1: ModRM:reg (r, w), Operand 2: ModRM:r/m (r) 268 | insn( 269 | prefix: REX_W, 270 | opcode: [0x0f, 0x45], 271 | mod_rm: ModRM[mod: Mod11, reg: dst_reg, rm: src_reg], 272 | ) 273 | end 274 | end 275 | 276 | # CMOVZ: dst = src if left == 0 277 | def cmovz(dst, src) 278 | case [dst, src] 279 | # CMOVZ r64, r/m64 (Mod 11: reg) 280 | in [R64 => dst_reg, R64 => src_reg] 281 | # REX.W + 0F 44 /r 282 | # RM: Operand 1: ModRM:reg (r, w), Operand 2: ModRM:r/m (r) 283 | insn( 284 | prefix: REX_W, 285 | opcode: [0x0f, 0x44], 286 | mod_rm: ModRM[mod: Mod11, reg: dst_reg, rm: src_reg], 287 | ) 288 | # CMOVZ r64, r/m64 (Mod 01: [reg]+disp8) 289 | in [R64 => dst_reg, QwordPtr[R64 => src_reg, IMM8 => src_disp]] 290 | # REX.W + 0F 44 /r 291 | # RM: Operand 1: ModRM:reg (r, w), Operand 2: ModRM:r/m (r) 292 | insn( 293 | prefix: REX_W, 294 | opcode: [0x0f, 0x44], 295 | mod_rm: ModRM[mod: Mod01, reg: dst_reg, rm: src_reg], 296 | disp: imm8(src_disp), 297 | ) 298 | end 299 | end 300 | 301 | # CMP: Compare left and right 302 | def cmp(left, right) 303 | case [left, right] 304 | # CMP r/m8, imm8 (Mod 01: [reg]+disp8) 305 | in [BytePtr[R64 => left_reg, IMM8 => left_disp], IMM8 => right_imm] 306 | # 80 /7 ib 307 | # MI: Operand 1: ModRM:r/m (r), Operand 2: imm8/16/32 308 | insn( 309 | opcode: 0x80, 310 | mod_rm: ModRM[mod: Mod01, reg: 7, rm: left_reg], 311 | disp: left_disp, 312 | imm: imm8(right_imm), 313 | ) 314 | # CMP r/m32, imm32 (Mod 01: [reg]+disp8) 315 | in [DwordPtr[R64 => left_reg, IMM8 => left_disp], IMM32 => right_imm] 316 | # 81 /7 id 317 | # MI: Operand 1: ModRM:r/m (r), Operand 2: imm8/16/32 318 | insn( 319 | opcode: 0x81, 320 | mod_rm: ModRM[mod: Mod01, reg: 7, rm: left_reg], 321 | disp: left_disp, 322 | imm: imm32(right_imm), 323 | ) 324 | # CMP r/m64, imm8 (Mod 01: [reg]+disp8) 325 | in [QwordPtr[R64 => left_reg, IMM8 => left_disp], IMM8 => right_imm] 326 | # REX.W + 83 /7 ib 327 | # MI: Operand 1: ModRM:r/m (r), Operand 2: imm8/16/32 328 | insn( 329 | prefix: REX_W, 330 | opcode: 0x83, 331 | mod_rm: ModRM[mod: Mod01, reg: 7, rm: left_reg], 332 | disp: left_disp, 333 | imm: imm8(right_imm), 334 | ) 335 | # CMP r/m64, imm8 (Mod 10: [reg]+disp32) 336 | in [QwordPtr[R64 => left_reg, IMM32 => left_disp], IMM8 => right_imm] 337 | # REX.W + 83 /7 ib 338 | # MI: Operand 1: ModRM:r/m (r), Operand 2: imm8/16/32 339 | insn( 340 | prefix: REX_W, 341 | opcode: 0x83, 342 | mod_rm: ModRM[mod: Mod10, reg: 7, rm: left_reg], 343 | disp: imm32(left_disp), 344 | imm: imm8(right_imm), 345 | ) 346 | # CMP r/m64, imm8 (Mod 11: reg) 347 | in [R64 => left_reg, IMM8 => right_imm] 348 | # REX.W + 83 /7 ib 349 | # MI: Operand 1: ModRM:r/m (r), Operand 2: imm8/16/32 350 | insn( 351 | prefix: REX_W, 352 | opcode: 0x83, 353 | mod_rm: ModRM[mod: Mod11, reg: 7, rm: left_reg], 354 | imm: imm8(right_imm), 355 | ) 356 | # CMP r/m64, imm32 (Mod 11: reg) 357 | in [R64 => left_reg, IMM32 => right_imm] 358 | # REX.W + 81 /7 id 359 | # MI: Operand 1: ModRM:r/m (r), Operand 2: imm8/16/32 360 | insn( 361 | prefix: REX_W, 362 | opcode: 0x81, 363 | mod_rm: ModRM[mod: Mod11, reg: 7, rm: left_reg], 364 | imm: imm32(right_imm), 365 | ) 366 | # CMP r/m64, r64 (Mod 01: [reg]+disp8) 367 | in [QwordPtr[R64 => left_reg, IMM8 => left_disp], R64 => right_reg] 368 | # REX.W + 39 /r 369 | # MR: Operand 1: ModRM:r/m (r), Operand 2: ModRM:reg (r) 370 | insn( 371 | prefix: REX_W, 372 | opcode: 0x39, 373 | mod_rm: ModRM[mod: Mod01, reg: right_reg, rm: left_reg], 374 | disp: left_disp, 375 | ) 376 | # CMP r/m64, r64 (Mod 10: [reg]+disp32) 377 | in [QwordPtr[R64 => left_reg, IMM32 => left_disp], R64 => right_reg] 378 | # REX.W + 39 /r 379 | # MR: Operand 1: ModRM:r/m (r), Operand 2: ModRM:reg (r) 380 | insn( 381 | prefix: REX_W, 382 | opcode: 0x39, 383 | mod_rm: ModRM[mod: Mod10, reg: right_reg, rm: left_reg], 384 | disp: imm32(left_disp), 385 | ) 386 | # CMP r/m64, r64 (Mod 11: reg) 387 | in [R64 => left_reg, R64 => right_reg] 388 | # REX.W + 39 /r 389 | # MR: Operand 1: ModRM:r/m (r), Operand 2: ModRM:reg (r) 390 | insn( 391 | prefix: REX_W, 392 | opcode: 0x39, 393 | mod_rm: ModRM[mod: Mod11, reg: right_reg, rm: left_reg], 394 | ) 395 | end 396 | end 397 | 398 | # JBE: Jump to dst if left >= right 399 | def jbe(dst) 400 | case dst 401 | # JBE rel8 402 | in Label => dst_label 403 | # 76 cb 404 | insn(opcode: 0x76, imm: dst_label) 405 | # JBE rel32 406 | in Integer => dst_addr 407 | # 0F 86 cd 408 | insn(opcode: [0x0f, 0x86], imm: rel32(dst_addr)) 409 | end 410 | end 411 | 412 | # JE: Jump to dst if left == right 413 | def je(dst) 414 | case dst 415 | # JE rel8 416 | in Label => dst_label 417 | # 74 cb 418 | insn(opcode: 0x74, imm: dst_label) 419 | # JE rel32 420 | in Integer => dst_addr 421 | # 0F 84 cd 422 | insn(opcode: [0x0f, 0x84], imm: rel32(dst_addr)) 423 | end 424 | end 425 | 426 | # JE: Jump to dst if left == right 427 | def jl(dst) 428 | case dst 429 | # JL rel32 430 | in Integer => dst_addr 431 | # 0F 8C cd 432 | insn(opcode: [0x0f, 0x8c], imm: rel32(dst_addr)) 433 | end 434 | end 435 | 436 | # JMP: Jump to dst 437 | def jmp(dst) 438 | case dst 439 | # JZ rel8 440 | in Label => dst_label 441 | # EB cb 442 | insn(opcode: 0xeb, imm: dst_label) 443 | # JMP rel32 444 | in Integer => dst_addr 445 | # E9 cd 446 | insn(opcode: 0xe9, imm: rel32(dst_addr)) 447 | # JMP r/m64 (Mod 01: [reg]+disp8) 448 | in QwordPtr[R64 => dst_reg, IMM8 => dst_disp] 449 | # FF /4 450 | insn(opcode: 0xff, mod_rm: ModRM[mod: Mod01, reg: 4, rm: dst_reg], disp: dst_disp) 451 | # JMP r/m64 (Mod 11: reg) 452 | in R64 => dst_reg 453 | # FF /4 454 | insn(opcode: 0xff, mod_rm: ModRM[mod: Mod11, reg: 4, rm: dst_reg]) 455 | end 456 | end 457 | 458 | # JNE: Jump to dst if left != right 459 | def jne(dst) 460 | case dst 461 | # JNE rel8 462 | in Label => dst_label 463 | # 75 cb 464 | insn(opcode: 0x75, imm: dst_label) 465 | # JNE rel32 466 | in Integer => dst_addr 467 | # 0F 85 cd 468 | insn(opcode: [0x0f, 0x85], imm: rel32(dst_addr)) 469 | end 470 | end 471 | 472 | # JNZ: Jump to dst if left != 0 473 | def jnz(dst) 474 | case dst 475 | # JE rel8 476 | in Label => dst_label 477 | # 75 cb 478 | insn(opcode: 0x75, imm: dst_label) 479 | # JNZ rel32 480 | in Integer => dst_addr 481 | # 0F 85 cd 482 | insn(opcode: [0x0f, 0x85], imm: rel32(dst_addr)) 483 | end 484 | end 485 | 486 | # JO: Jump to dst if overflow 487 | def jo(dst) 488 | case dst 489 | # JO rel32 490 | in Integer => dst_addr 491 | # 0F 80 cd 492 | insn(opcode: [0x0f, 0x80], imm: rel32(dst_addr)) 493 | end 494 | end 495 | 496 | # JZ: Jump to dst if left == 0 497 | def jz(dst) 498 | case dst 499 | # JZ rel8 500 | in Label => dst_label 501 | # 74 cb 502 | insn(opcode: 0x74, imm: dst_label) 503 | # JZ rel32 504 | in Integer => dst_addr 505 | # 0F 84 cd 506 | insn(opcode: [0x0f, 0x84], imm: rel32(dst_addr)) 507 | end 508 | end 509 | 510 | # LEA: dst = &src 511 | def lea(dst, src) 512 | case [dst, src] 513 | # LEA r64,m (Mod 01: [reg]+disp8) 514 | in [R64 => dst_reg, QwordPtr[R64 => src_reg, IMM8 => src_disp]] 515 | # REX.W + 8D /r 516 | # RM: Operand 1: ModRM:reg (w), Operand 2: ModRM:r/m (r) 517 | insn( 518 | prefix: REX_W, 519 | opcode: 0x8d, 520 | mod_rm: ModRM[mod: Mod01, reg: dst_reg, rm: src_reg], 521 | disp: imm8(src_disp), 522 | ) 523 | # LEA r64,m (Mod 10: [reg]+disp32) 524 | in [R64 => dst_reg, QwordPtr[R64 => src_reg, IMM32 => src_disp]] 525 | # REX.W + 8D /r 526 | # RM: Operand 1: ModRM:reg (w), Operand 2: ModRM:r/m (r) 527 | insn( 528 | prefix: REX_W, 529 | opcode: 0x8d, 530 | mod_rm: ModRM[mod: Mod10, reg: dst_reg, rm: src_reg], 531 | disp: imm32(src_disp), 532 | ) 533 | end 534 | end 535 | 536 | # MOV: dst = src 537 | def mov(dst, src) 538 | case dst 539 | in R32 => dst_reg 540 | case src 541 | # MOV r32 r/m32 (Mod 01: [reg]+disp8) 542 | in DwordPtr[R64 => src_reg, IMM8 => src_disp] 543 | # 8B /r 544 | # RM: Operand 1: ModRM:reg (w), Operand 2: ModRM:r/m (r) 545 | insn( 546 | opcode: 0x8b, 547 | mod_rm: ModRM[mod: Mod01, reg: dst_reg, rm: src_reg], 548 | disp: src_disp, 549 | ) 550 | # MOV r32, imm32 (Mod 11: reg) 551 | in IMM32 => src_imm 552 | # B8+ rd id 553 | # OI: Operand 1: opcode + rd (w), Operand 2: imm8/16/32/64 554 | insn( 555 | opcode: 0xb8, 556 | rd: dst_reg, 557 | imm: imm32(src_imm), 558 | ) 559 | end 560 | in R64 => dst_reg 561 | case src 562 | # MOV r64, r/m64 (Mod 00: [reg]) 563 | in QwordPtr[R64 => src_reg] 564 | # REX.W + 8B /r 565 | # RM: Operand 1: ModRM:reg (w), Operand 2: ModRM:r/m (r) 566 | insn( 567 | prefix: REX_W, 568 | opcode: 0x8b, 569 | mod_rm: ModRM[mod: Mod00, reg: dst_reg, rm: src_reg], 570 | ) 571 | # MOV r64, r/m64 (Mod 01: [reg]+disp8) 572 | in QwordPtr[R64 => src_reg, IMM8 => src_disp] 573 | # REX.W + 8B /r 574 | # RM: Operand 1: ModRM:reg (w), Operand 2: ModRM:r/m (r) 575 | insn( 576 | prefix: REX_W, 577 | opcode: 0x8b, 578 | mod_rm: ModRM[mod: Mod01, reg: dst_reg, rm: src_reg], 579 | disp: src_disp, 580 | ) 581 | # MOV r64, r/m64 (Mod 10: [reg]+disp32) 582 | in QwordPtr[R64 => src_reg, IMM32 => src_disp] 583 | # REX.W + 8B /r 584 | # RM: Operand 1: ModRM:reg (w), Operand 2: ModRM:r/m (r) 585 | insn( 586 | prefix: REX_W, 587 | opcode: 0x8b, 588 | mod_rm: ModRM[mod: Mod10, reg: dst_reg, rm: src_reg], 589 | disp: imm32(src_disp), 590 | ) 591 | # MOV r64, r/m64 (Mod 11: reg) 592 | in R64 => src_reg 593 | # REX.W + 8B /r 594 | # RM: Operand 1: ModRM:reg (w), Operand 2: ModRM:r/m (r) 595 | insn( 596 | prefix: REX_W, 597 | opcode: 0x8b, 598 | mod_rm: ModRM[mod: Mod11, reg: dst_reg, rm: src_reg], 599 | ) 600 | # MOV r/m64, imm32 (Mod 11: reg) 601 | in IMM32 => src_imm 602 | # REX.W + C7 /0 id 603 | # MI: Operand 1: ModRM:r/m (w), Operand 2: imm8/16/32/64 604 | insn( 605 | prefix: REX_W, 606 | opcode: 0xc7, 607 | mod_rm: ModRM[mod: Mod11, reg: 0, rm: dst_reg], 608 | imm: imm32(src_imm), 609 | ) 610 | # MOV r64, imm64 611 | in IMM64 => src_imm 612 | # REX.W + B8+ rd io 613 | # OI: Operand 1: opcode + rd (w), Operand 2: imm8/16/32/64 614 | insn( 615 | prefix: REX_W, 616 | opcode: 0xb8, 617 | rd: dst_reg, 618 | imm: imm64(src_imm), 619 | ) 620 | end 621 | in DwordPtr[R64 => dst_reg, IMM8 => dst_disp] 622 | case src 623 | # MOV r/m32, imm32 (Mod 01: [reg]+disp8) 624 | in IMM32 => src_imm 625 | # C7 /0 id 626 | # MI: Operand 1: ModRM:r/m (w), Operand 2: imm8/16/32/64 627 | insn( 628 | opcode: 0xc7, 629 | mod_rm: ModRM[mod: Mod01, reg: 0, rm: dst_reg], 630 | disp: dst_disp, 631 | imm: imm32(src_imm), 632 | ) 633 | end 634 | in QwordPtr[R64 => dst_reg] 635 | case src 636 | # MOV r/m64, imm32 (Mod 00: [reg]) 637 | in IMM32 => src_imm 638 | # REX.W + C7 /0 id 639 | # MI: Operand 1: ModRM:r/m (w), Operand 2: imm8/16/32/64 640 | insn( 641 | prefix: REX_W, 642 | opcode: 0xc7, 643 | mod_rm: ModRM[mod: Mod00, reg: 0, rm: dst_reg], 644 | imm: imm32(src_imm), 645 | ) 646 | # MOV r/m64, r64 (Mod 00: [reg]) 647 | in R64 => src_reg 648 | # REX.W + 89 /r 649 | # MR: Operand 1: ModRM:r/m (w), Operand 2: ModRM:reg (r) 650 | insn( 651 | prefix: REX_W, 652 | opcode: 0x89, 653 | mod_rm: ModRM[mod: Mod00, reg: src_reg, rm: dst_reg], 654 | ) 655 | end 656 | in QwordPtr[R64 => dst_reg, IMM8 => dst_disp] 657 | # Optimize encoding when disp is 0 658 | return mov([dst_reg], src) if dst_disp == 0 659 | 660 | case src 661 | # MOV r/m64, imm32 (Mod 01: [reg]+disp8) 662 | in IMM32 => src_imm 663 | # REX.W + C7 /0 id 664 | # MI: Operand 1: ModRM:r/m (w), Operand 2: imm8/16/32/64 665 | insn( 666 | prefix: REX_W, 667 | opcode: 0xc7, 668 | mod_rm: ModRM[mod: Mod01, reg: 0, rm: dst_reg], 669 | disp: dst_disp, 670 | imm: imm32(src_imm), 671 | ) 672 | # MOV r/m64, r64 (Mod 01: [reg]+disp8) 673 | in R64 => src_reg 674 | # REX.W + 89 /r 675 | # MR: Operand 1: ModRM:r/m (w), Operand 2: ModRM:reg (r) 676 | insn( 677 | prefix: REX_W, 678 | opcode: 0x89, 679 | mod_rm: ModRM[mod: Mod01, reg: src_reg, rm: dst_reg], 680 | disp: dst_disp, 681 | ) 682 | end 683 | in QwordPtr[R64 => dst_reg, IMM32 => dst_disp] 684 | case src 685 | # MOV r/m64, imm32 (Mod 10: [reg]+disp32) 686 | in IMM32 => src_imm 687 | # REX.W + C7 /0 id 688 | # MI: Operand 1: ModRM:r/m (w), Operand 2: imm8/16/32/64 689 | insn( 690 | prefix: REX_W, 691 | opcode: 0xc7, 692 | mod_rm: ModRM[mod: Mod10, reg: 0, rm: dst_reg], 693 | disp: imm32(dst_disp), 694 | imm: imm32(src_imm), 695 | ) 696 | # MOV r/m64, r64 (Mod 10: [reg]+disp32) 697 | in R64 => src_reg 698 | # REX.W + 89 /r 699 | # MR: Operand 1: ModRM:r/m (w), Operand 2: ModRM:reg (r) 700 | insn( 701 | prefix: REX_W, 702 | opcode: 0x89, 703 | mod_rm: ModRM[mod: Mod10, reg: src_reg, rm: dst_reg], 704 | disp: imm32(dst_disp), 705 | ) 706 | end 707 | end 708 | end 709 | 710 | # OR: dst = dst | src 711 | def or(dst, src) 712 | case [dst, src] 713 | # OR r/m64, imm8 (Mod 11: reg) 714 | in [R64 => dst_reg, IMM8 => src_imm] 715 | # REX.W + 83 /1 ib 716 | # MI: Operand 1: ModRM:r/m (r, w), Operand 2: imm8/16/32 717 | insn( 718 | prefix: REX_W, 719 | opcode: 0x83, 720 | mod_rm: ModRM[mod: Mod11, reg: 1, rm: dst_reg], 721 | imm: imm8(src_imm), 722 | ) 723 | # OR r/m64, imm32 (Mod 11: reg) 724 | in [R64 => dst_reg, IMM32 => src_imm] 725 | # REX.W + 81 /1 id 726 | # MI: Operand 1: ModRM:r/m (r, w), Operand 2: imm8/16/32 727 | insn( 728 | prefix: REX_W, 729 | opcode: 0x81, 730 | mod_rm: ModRM[mod: Mod11, reg: 1, rm: dst_reg], 731 | imm: imm32(src_imm), 732 | ) 733 | # OR r64, r/m64 (Mod 01: [reg]+disp8) 734 | in [R64 => dst_reg, QwordPtr[R64 => src_reg, IMM8 => src_disp]] 735 | # REX.W + 0B /r 736 | # RM: Operand 1: ModRM:reg (r, w), Operand 2: ModRM:r/m (r) 737 | insn( 738 | prefix: REX_W, 739 | opcode: 0x0b, 740 | mod_rm: ModRM[mod: Mod01, reg: dst_reg, rm: src_reg], 741 | disp: imm8(src_disp), 742 | ) 743 | end 744 | end 745 | 746 | # PUSH: Push src onto the stack 747 | def push(src) 748 | case src 749 | # PUSH r64 750 | in R64 => src_reg 751 | # 50+rd 752 | # O: Operand 1: opcode + rd (r) 753 | insn(opcode: 0x50, rd: src_reg) 754 | end 755 | end 756 | 757 | # POP: Pop from the stack to dst 758 | def pop(dst) 759 | case dst 760 | # POP r64 761 | in R64 => dst_reg 762 | # 58+ rd 763 | # O: Operand 1: opcode + rd (r) 764 | insn(opcode: 0x58, rd: dst_reg) 765 | end 766 | end 767 | 768 | # RET: Return 769 | def ret 770 | # RET 771 | # Near return: A return to a procedure within the current code segment 772 | insn(opcode: 0xc3) 773 | end 774 | 775 | # SAR: dst = dst >> src 776 | def sar(dst, src) 777 | case [dst, src] 778 | in [R64 => dst_reg, IMM8 => src_imm] 779 | # REX.W + C1 /7 ib 780 | # MI: Operand 1: ModRM:r/m (r, w), Operand 2: imm8 781 | insn( 782 | prefix: REX_W, 783 | opcode: 0xc1, 784 | mod_rm: ModRM[mod: Mod11, reg: 7, rm: dst_reg], 785 | imm: imm8(src_imm), 786 | ) 787 | end 788 | end 789 | 790 | # SUB: dst = dst - src 791 | def sub(dst, src) 792 | case [dst, src] 793 | # SUB r/m64, imm8 (Mod 11: reg) 794 | in [R64 => dst_reg, IMM8 => src_imm] 795 | # REX.W + 83 /5 ib 796 | # MI: Operand 1: ModRM:r/m (r, w), Operand 2: imm8/16/32 797 | insn( 798 | prefix: REX_W, 799 | opcode: 0x83, 800 | mod_rm: ModRM[mod: Mod11, reg: 5, rm: dst_reg], 801 | imm: imm8(src_imm), 802 | ) 803 | # SUB r/m64, r64 (Mod 11: reg) 804 | in [R64 => dst_reg, R64 => src_reg] 805 | # REX.W + 29 /r 806 | # MR: Operand 1: ModRM:r/m (r, w), Operand 2: ModRM:reg (r) 807 | insn( 808 | prefix: REX_W, 809 | opcode: 0x29, 810 | mod_rm: ModRM[mod: Mod11, reg: src_reg, rm: dst_reg], 811 | ) 812 | end 813 | end 814 | 815 | # TEST: Compare test and right 816 | def test(left, right) 817 | case [left, right] 818 | # TEST r/m8*, imm8 (Mod 01: [reg]+disp8) 819 | in [BytePtr[R64 => left_reg, IMM8 => left_disp], IMM8 => right_imm] 820 | # REX + F6 /0 ib 821 | # MI: Operand 1: ModRM:r/m (r), Operand 2: imm8/16/32 822 | insn( 823 | opcode: 0xf6, 824 | mod_rm: ModRM[mod: Mod01, reg: 0, rm: left_reg], 825 | disp: left_disp, 826 | imm: imm8(right_imm), 827 | ) 828 | # TEST r/m64, imm32 (Mod 01: [reg]+disp8) 829 | in [QwordPtr[R64 => left_reg, IMM8 => left_disp], IMM32 => right_imm] 830 | # REX.W + F7 /0 id 831 | # MI: Operand 1: ModRM:r/m (r), Operand 2: imm8/16/32 832 | insn( 833 | prefix: REX_W, 834 | opcode: 0xf7, 835 | mod_rm: ModRM[mod: Mod01, reg: 0, rm: left_reg], 836 | disp: left_disp, 837 | imm: imm32(right_imm), 838 | ) 839 | # TEST r/m64, imm32 (Mod 10: [reg]+disp32) 840 | in [QwordPtr[R64 => left_reg, IMM32 => left_disp], IMM32 => right_imm] 841 | # REX.W + F7 /0 id 842 | # MI: Operand 1: ModRM:r/m (r), Operand 2: imm8/16/32 843 | insn( 844 | prefix: REX_W, 845 | opcode: 0xf7, 846 | mod_rm: ModRM[mod: Mod10, reg: 0, rm: left_reg], 847 | disp: imm32(left_disp), 848 | imm: imm32(right_imm), 849 | ) 850 | # TEST r/m64, imm32 (Mod 11: reg) 851 | in [R64 => left_reg, IMM32 => right_imm] 852 | # REX.W + F7 /0 id 853 | # MI: Operand 1: ModRM:r/m (r), Operand 2: imm8/16/32 854 | insn( 855 | prefix: REX_W, 856 | opcode: 0xf7, 857 | mod_rm: ModRM[mod: Mod11, reg: 0, rm: left_reg], 858 | imm: imm32(right_imm), 859 | ) 860 | # TEST r/m32, r32 (Mod 11: reg) 861 | in [R32 => left_reg, R32 => right_reg] 862 | # 85 /r 863 | # MR: Operand 1: ModRM:r/m (r), Operand 2: ModRM:reg (r) 864 | insn( 865 | opcode: 0x85, 866 | mod_rm: ModRM[mod: Mod11, reg: right_reg, rm: left_reg], 867 | ) 868 | # TEST r/m64, r64 (Mod 11: reg) 869 | in [R64 => left_reg, R64 => right_reg] 870 | # REX.W + 85 /r 871 | # MR: Operand 1: ModRM:r/m (r), Operand 2: ModRM:reg (r) 872 | insn( 873 | prefix: REX_W, 874 | opcode: 0x85, 875 | mod_rm: ModRM[mod: Mod11, reg: right_reg, rm: left_reg], 876 | ) 877 | end 878 | end 879 | 880 | # XOR: dst = dst ^ src 881 | def xor(dst, src) 882 | case [dst, src] 883 | # XOR r/m64, r64 (Mod 11: reg) 884 | in [R64 => dst_reg, R64 => src_reg] 885 | # REX.W + 31 /r 886 | # MR: Operand 1: ModRM:r/m (r, w), Operand 2: ModRM:reg (r) 887 | insn( 888 | prefix: REX_W, 889 | opcode: 0x31, 890 | mod_rm: ModRM[mod: Mod11, reg: src_reg, rm: dst_reg], 891 | ) 892 | end 893 | end 894 | 895 | # 896 | # Utilities 897 | # 898 | 899 | def new_label(name) 900 | Label.new(id: @label_id += 1, name:) 901 | end 902 | 903 | # @param [RubyVM::RJIT::Assembler::Label] label 904 | def write_label(label) 905 | @labels[label] = @bytes.size 906 | end 907 | 908 | private 909 | 910 | def insn(prefix: 0, opcode:, rd: nil, mod_rm: nil, disp: nil, imm: nil) 911 | # Determine prefix 912 | if rd 913 | prefix |= REX_B if extended_reg?(rd) 914 | opcode += reg_code(rd) 915 | end 916 | if mod_rm 917 | prefix |= REX_R if mod_rm.reg.is_a?(Symbol) && extended_reg?(mod_rm.reg) 918 | prefix |= REX_B if mod_rm.rm.is_a?(Symbol) && extended_reg?(mod_rm.rm) 919 | end 920 | 921 | # Encode insn 922 | if prefix > 0 923 | @bytes.push(prefix) 924 | end 925 | @bytes.push(*Array(opcode)) 926 | if mod_rm 927 | mod_rm_byte = encode_mod_rm( 928 | mod: mod_rm.mod, 929 | reg: mod_rm.reg.is_a?(Symbol) ? reg_code(mod_rm.reg) : mod_rm.reg, 930 | rm: mod_rm.rm.is_a?(Symbol) ? reg_code(mod_rm.rm) : mod_rm.rm, 931 | ) 932 | @bytes.push(mod_rm_byte) 933 | end 934 | if disp 935 | @bytes.push(*Array(disp)) 936 | end 937 | if imm 938 | @bytes.push(*imm) 939 | end 940 | end 941 | 942 | def reg_code(reg) 943 | reg_code_extended(reg).first 944 | end 945 | 946 | # Table 2-2. 32-Bit Addressing Forms with the ModR/M Byte 947 | # 948 | # 7 6 5 4 3 2 1 0 949 | # +--+--+--+--+--+--+--+--+ 950 | # | Mod | Reg/ | R/M | 951 | # | | Opcode | | 952 | # +--+--+--+--+--+--+--+--+ 953 | # 954 | # The r/m field can specify a register as an operand or it can be combined 955 | # with the mod field to encode an addressing mode. 956 | # 957 | # /0: R/M is 0 (not used) 958 | # /r: R/M is a register 959 | def encode_mod_rm(mod:, reg: 0, rm: 0) 960 | if mod > 0b11 961 | raise ArgumentError, "too large Mod: #{mod}" 962 | end 963 | if reg > 0b111 964 | raise ArgumentError, "too large Reg/Opcode: #{reg}" 965 | end 966 | if rm > 0b111 967 | raise ArgumentError, "too large R/M: #{rm}" 968 | end 969 | (mod << 6) + (reg << 3) + rm 970 | end 971 | 972 | # ib: 1 byte 973 | def imm8(imm) 974 | unless imm8?(imm) 975 | raise ArgumentError, "unexpected imm8: #{imm}" 976 | end 977 | [imm].pack('c').unpack('c*') # TODO: consider uimm 978 | end 979 | 980 | # id: 4 bytes 981 | def imm32(imm) 982 | unless imm32?(imm) 983 | raise ArgumentError, "unexpected imm32: #{imm}" 984 | end 985 | [imm].pack('l').unpack('c*') # TODO: consider uimm 986 | end 987 | 988 | # io: 8 bytes 989 | def imm64(imm) 990 | unless imm64?(imm) 991 | raise ArgumentError, "unexpected imm64: #{imm}" 992 | end 993 | imm_bytes(imm, 8) 994 | end 995 | 996 | def imm_bytes(imm, num_bytes) 997 | bytes = [] 998 | bits = imm 999 | num_bytes.times do 1000 | bytes << (bits & 0xff) 1001 | bits >>= 8 1002 | end 1003 | if bits != 0 1004 | raise ArgumentError, "unexpected imm with #{num_bytes} bytes: #{imm}" 1005 | end 1006 | bytes 1007 | end 1008 | 1009 | def rel32(addr) 1010 | [Rel32.new(addr), Rel32Pad, Rel32Pad, Rel32Pad] 1011 | end 1012 | 1013 | def resolve_rel32(write_addr) 1014 | @bytes.each_with_index do |byte, index| 1015 | if byte.is_a?(Rel32) 1016 | src_addr = write_addr + index + 4 # offset 4 bytes for rel32 itself 1017 | dst_addr = byte.addr 1018 | rel32 = dst_addr - src_addr 1019 | raise "unexpected offset: #{rel32}" unless imm32?(rel32) 1020 | imm32(rel32).each_with_index do |rel_byte, rel_index| 1021 | @bytes[index + rel_index] = rel_byte 1022 | end 1023 | end 1024 | end 1025 | end 1026 | 1027 | def resolve_labels 1028 | @bytes.each_with_index do |byte, index| 1029 | if byte.is_a?(Label) 1030 | src_index = index + 1 # offset 1 byte for rel8 itself 1031 | dst_index = @labels.fetch(byte) 1032 | rel8 = dst_index - src_index 1033 | raise "unexpected offset: #{rel8}" unless imm8?(rel8) 1034 | @bytes[index] = rel8 1035 | end 1036 | end 1037 | end 1038 | 1039 | def write_bytes(addr) 1040 | Fiddle::Pointer.new(addr)[0, @bytes.size] = @bytes.pack('c*') 1041 | end 1042 | end 1043 | 1044 | module OperandMatcher 1045 | def imm8?(imm) 1046 | (-0x80..0x7f).include?(imm) 1047 | end 1048 | 1049 | def imm32?(imm) 1050 | (-0x8000_0000..0x7fff_ffff).include?(imm) # TODO: consider uimm 1051 | end 1052 | 1053 | def imm64?(imm) 1054 | (-0x8000_0000_0000_0000..0xffff_ffff_ffff_ffff).include?(imm) 1055 | end 1056 | 1057 | def r32?(reg) 1058 | if extended_reg?(reg) 1059 | reg.end_with?('d') 1060 | else 1061 | reg.start_with?('e') 1062 | end 1063 | end 1064 | 1065 | def r64?(reg) 1066 | if extended_reg?(reg) 1067 | reg.match?(/\Ar\d+\z/) 1068 | else 1069 | reg.start_with?('r') 1070 | end 1071 | end 1072 | 1073 | def extended_reg?(reg) 1074 | reg_code_extended(reg).last 1075 | end 1076 | 1077 | def reg_code_extended(reg) 1078 | case reg 1079 | # Not extended 1080 | when :al, :ax, :eax, :rax then [0, false] 1081 | when :cl, :cx, :ecx, :rcx then [1, false] 1082 | when :dl, :dx, :edx, :rdx then [2, false] 1083 | when :bl, :bx, :ebx, :rbx then [3, false] 1084 | when :ah, :sp, :esp, :rsp then [4, false] 1085 | when :ch, :bp, :ebp, :rbp then [5, false] 1086 | when :dh, :si, :esi, :rsi then [6, false] 1087 | when :bh, :di, :edi, :rdi then [7, false] 1088 | # Extended 1089 | when :r8b, :r8w, :r8d, :r8 then [0, true] 1090 | when :r9b, :r9w, :r9d, :r9 then [1, true] 1091 | when :r10b, :r10w, :r10d, :r10 then [2, true] 1092 | when :r11b, :r11w, :r11d, :r11 then [3, true] 1093 | when :r12b, :r12w, :r12d, :r12 then [4, true] 1094 | when :r13b, :r13w, :r13d, :r13 then [5, true] 1095 | when :r14b, :r14w, :r14d, :r14 then [6, true] 1096 | when :r15b, :r15w, :r15d, :r15 then [7, true] 1097 | else raise ArgumentError, "unexpected reg: #{reg.inspect}" 1098 | end 1099 | end 1100 | end 1101 | 1102 | class Assembler 1103 | include OperandMatcher 1104 | extend OperandMatcher 1105 | end 1106 | end 1107 | --------------------------------------------------------------------------------