├── Makefile ├── v.mod ├── .editorconfig ├── error └── error.v ├── examples ├── nop.s ├── puts.s ├── hello.s ├── mem_ref.s ├── call.s ├── loop.s ├── print_int.s ├── fibonacci.s ├── fizzbuzz.s ├── rule110.s └── gol.s ├── Dockerfile ├── .gitignore ├── main.s ├── token └── token.v ├── .github ├── workflows │ └── ci.yml └── ISSUE_TEMPLATE │ ├── feature-request.yml │ └── bug_report.yml ├── LICENSE.md ├── example.sh ├── vas.v ├── README.md ├── encoder ├── addr.v ├── stack_op.v ├── directive.v ├── indir_helper.v ├── sse_instr.v ├── register.v ├── arith.v └── encoder.v ├── CODE_OF_CONDUCT.md ├── lexer └── lexer.v └── elf └── elf.v /Makefile: -------------------------------------------------------------------------------- 1 | 2 | build: 3 | v . -o vas 4 | 5 | clean: 6 | rm *.o *.out ./vas examples/*.o 7 | 8 | -------------------------------------------------------------------------------- /v.mod: -------------------------------------------------------------------------------- 1 | Module { 2 | name: 'vas' 3 | description: 'Assembler written in V' 4 | version: '0.0.0' 5 | license: 'MIT' 6 | dependencies: [] 7 | } 8 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | [*] 2 | charset = utf-8 3 | end_of_line = lf 4 | insert_final_newline = true 5 | trim_trailing_whitespace = true 6 | 7 | [*.v] 8 | indent_style = tab 9 | indent_size = 4 10 | -------------------------------------------------------------------------------- /error/error.v: -------------------------------------------------------------------------------- 1 | module error 2 | 3 | import token 4 | 5 | pub fn print(pos token.Position, msg string) { 6 | err := '\u001b[1m$pos.file_name:$pos.line: \x1b[91merror\x1b[0m\u001b[1m: $msg \033[0m' 7 | eprintln(err) 8 | } 9 | 10 | -------------------------------------------------------------------------------- /examples/nop.s: -------------------------------------------------------------------------------- 1 | # ../vas nop.s && ld -o nop.out nop.o && ./nop.out 2 | # echo $? 3 | # > 0 4 | 5 | .global _start 6 | 7 | _start: 8 | nopq 9 | nopq 10 | nopq 11 | movq $60, %rax 12 | movq $0, %rdi 13 | syscall 14 | 15 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | 2 | FROM --platform=linux/x86_64 thevlang/vlang:debian-dev 3 | 4 | RUN apt-get update 5 | RUN apt-get -y install sudo && \ 6 | sudo apt-get -y install build-essential && \ 7 | sudo apt-get -y install git 8 | 9 | VOLUME /root/env 10 | WORKDIR /root/env 11 | 12 | 13 | -------------------------------------------------------------------------------- /examples/puts.s: -------------------------------------------------------------------------------- 1 | # ../vas puts.s && gcc -o puts.out puts.o && ./puts.out 2 | # > Hello, world! 3 | 4 | .data 5 | msg: 6 | .string "Hello world!" 7 | 8 | .text 9 | .global main 10 | main: 11 | leaq msg(%rip), %rdi 12 | callq puts 13 | movq $0, %rax 14 | retq 15 | 16 | .section .note.GNU-stack, "" 17 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | main 3 | vas 4 | *.exe 5 | *.exe~ 6 | *.so 7 | *.dylib 8 | *.dll 9 | *.out 10 | *.o 11 | 12 | 13 | # Ignore binary output folders 14 | bin/ 15 | *.py 16 | test.s 17 | nasm.s 18 | label_test.s 19 | 20 | # Ignore common editor/system specific metadata 21 | .DS_Store 22 | .idea/ 23 | .vscode/ 24 | *.iml 25 | -------------------------------------------------------------------------------- /main.s: -------------------------------------------------------------------------------- 1 | # Hello world! 2 | 3 | .global _start 4 | 5 | .data 6 | msg: 7 | .string "Hello, world!\n" 8 | 9 | .text 10 | _start: 11 | pushq %rbp 12 | movq %rsp, %rbp 13 | subq $16, %rsp 14 | 15 | movq $1, %rax 16 | movq $1, %rdi 17 | movq $msg, %rsi 18 | movq $14, %rdx 19 | syscall 20 | 21 | movq $60, %rax 22 | movq $0, %rdi 23 | syscall 24 | 25 | -------------------------------------------------------------------------------- /examples/hello.s: -------------------------------------------------------------------------------- 1 | # ../vas hello.s && ld -o hello.out hello.o && ./hello.out 2 | # > Hello, world! 3 | 4 | .global _start 5 | 6 | .text 7 | _start: 8 | pushq %rbp 9 | movq %rsp, %rbp 10 | subq $16, %rsp 11 | 12 | movq $1, %rax 13 | movq $1, %rdi 14 | leaq msg(%rip), %rsi 15 | movq $14, %rdx 16 | syscall 17 | 18 | movq $60, %rax 19 | movq $0, %rdi 20 | syscall 21 | 22 | .data 23 | msg: 24 | .string "Hello, world!\n" 25 | 26 | -------------------------------------------------------------------------------- /token/token.v: -------------------------------------------------------------------------------- 1 | module token 2 | 3 | pub struct Position { 4 | pub mut: 5 | file_name string 6 | line int 7 | } 8 | 9 | pub enum TokenKind { 10 | ident 11 | number 12 | string 13 | comma 14 | colon 15 | lpar 16 | rpar 17 | plus 18 | minus 19 | mul 20 | div 21 | percent 22 | dolor 23 | eol // enf of line 24 | eof // end of file 25 | } 26 | 27 | pub struct Token { 28 | pub: 29 | kind TokenKind 30 | pos Position 31 | lit string 32 | } 33 | 34 | -------------------------------------------------------------------------------- /examples/mem_ref.s: -------------------------------------------------------------------------------- 1 | # ../vas mem_ref.s && ld -o mem_ref.out mem_ref.o && ./mem_ref.out 2 | # > world! 3 | 4 | .global _start 5 | 6 | .data 7 | msg: 8 | .string "Hello, world!\n" 9 | 10 | .text 11 | _start: 12 | pushq %rbp 13 | movq %rsp, %rbp 14 | subq $16 + 1, %rsp 15 | 16 | # print "world!" 17 | movq $1, %rax 18 | movq $1, %rdi 19 | leaq msg+7(%rip), %rsi 20 | movq $8, %rdx 21 | syscall 22 | 23 | movq $60, %rax 24 | movq $0, %rdi 25 | syscall 26 | 27 | -------------------------------------------------------------------------------- /examples/call.s: -------------------------------------------------------------------------------- 1 | # ../vas call.s && ld -o call.out call.o && ./call.out 2 | # > Hello, world! 3 | 4 | .global _start 5 | 6 | .data 7 | msg: 8 | .string "Hello, world!\n" 9 | 10 | .text 11 | print_hello: 12 | movq $1, %rax 13 | movq $1, %rdi 14 | leaq msg(%rip), %rsi 15 | movq $14, %rdx 16 | syscall 17 | retq 18 | 19 | _start: 20 | pushq %rbp 21 | movq %rsp, %rbp 22 | subq $16, %rsp 23 | 24 | callq print_hello 25 | 26 | movq $60, %rax 27 | movq $0, %rdi 28 | syscall 29 | 30 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | jobs: 7 | build: 8 | strategy: 9 | matrix: 10 | os: ["ubuntu-latest"] 11 | runs-on: ${{ matrix.os }} 12 | 13 | steps: 14 | - name: Install V 15 | uses: vlang/setup-v@v1 16 | with: 17 | check-latest: true 18 | 19 | - name: Checkout ${{ github.event.repository.name }} 20 | uses: actions/checkout@v2 21 | 22 | - name: Build ${{ github.event.repository.name }} 23 | run: v . -enable-globals 24 | 25 | - name: Run Tests 26 | run: ./vas main.s -o main.o && ld -o main main.o && ./main 27 | 28 | 29 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright 2022 Ibuki Yoshida 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /examples/loop.s: -------------------------------------------------------------------------------- 1 | # ../vas -o loop.o loop.s && gcc -o loop.out loop.o && ./loop.out 2 | # message 1 3 | # message 2 4 | # message 3 5 | # ------------------- 6 | # message 1 7 | # message 2 8 | # message 3 9 | # ------------------- 10 | # message 1 11 | # message 2 12 | # message 3 13 | # ------------------- 14 | # message 1 15 | # message 2 16 | # message 3 17 | # ------------------- 18 | # message 1 19 | # message 2 20 | # message 3 21 | # ------------------- 22 | 23 | .global main 24 | 25 | .data 26 | msg1: 27 | .string "message 1" 28 | msg2: 29 | .string "message 2" 30 | line: 31 | .string "-------------------" 32 | 33 | .text 34 | foo: 35 | retq 36 | 37 | main: 38 | pushq %rbp 39 | movq %rsp, %rbp 40 | subq $16, %rsp 41 | 42 | # int a = 0 43 | movq $0, 0-4(%rbp) 44 | 45 | loop_main: 46 | # a == 5 -> jmp loop_end 47 | cmpq $5, 0-4(%rbp) 48 | je loop_end 49 | 50 | leaq msg1(%rip), %rdi 51 | callq puts 52 | 53 | leaq msg2(%rip), %rdi 54 | callq puts 55 | 56 | leaq msg3(%rip), %rdi 57 | callq puts 58 | 59 | leaq line(%rip), %rdi 60 | callq puts 61 | 62 | # a = a + 1 63 | addq $1, 0-4(%rbp) 64 | 65 | # jmp loop_main 66 | jmp loop_main 67 | 68 | loop_end: 69 | movq $0, %rdi 70 | callq exit 71 | 72 | .section .note.GNU-stack, "" 73 | 74 | .data 75 | msg3: 76 | .string "message 3" 77 | 78 | -------------------------------------------------------------------------------- /example.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | 5 | ./vas -o ./examples/fibonacci.o ./examples/fibonacci.s && gcc -o ./examples/fibonacci.out ./examples/fibonacci.o && ./examples/fibonacci.out 6 | 7 | ./vas -o ./examples/fizzbuzz.o ./examples/fizzbuzz.s && gcc -o ./examples/fizzbuzz.out ./examples/fizzbuzz.o && ./examples/fizzbuzz.out 8 | 9 | ./vas -o ./examples/loop.o ./examples/loop.s && gcc -o ./examples/loop.out ./examples/loop.o && ./examples/loop.out 10 | 11 | ./vas -o ./examples/call.o ./examples/call.s && ld -o ./examples/call.out ./examples/call.o && ./examples/call.out 12 | 13 | ./vas -o ./examples/mem_ref.o ./examples/mem_ref.s && ld -o ./examples/mem_ref.out ./examples/mem_ref.o && ./examples/mem_ref.out 14 | 15 | ./vas -o ./examples/puts.o ./examples/puts.s && gcc -o ./examples/puts.out ./examples/puts.o && ./examples/puts.out 16 | 17 | ./vas -o ./examples/hello.o ./examples/hello.s && ld -o ./examples/hello.out ./examples/hello.o && ./examples/hello.out 18 | 19 | ./vas -o ./examples/nop.o ./examples/nop.s && ld -o ./examples/nop.out ./examples/nop.o && ./examples/nop.out && echo $? 20 | 21 | ./vas -o ./examples/print_int.o examples/print_int.s && gcc -o examples/print_int.out examples/print_int.o && ./examples/print_int.out 22 | 23 | ./vas -o ./examples/rule110.o examples/rule110.s && gcc -o examples/rule110.out examples/rule110.o && ./examples/rule110.out 24 | 25 | -------------------------------------------------------------------------------- /examples/print_int.s: -------------------------------------------------------------------------------- 1 | 2 | .text 3 | .global main 4 | dump: 5 | pushq %rbp 6 | movq %rsp, %rbp 7 | subq $64, %rsp 8 | movq %rdi, -8(%rbp) 9 | movq $1, -56(%rbp) 10 | movl $32, %eax 11 | subq -56(%rbp), %rax 12 | movb $10, -48(%rbp,%rax) 13 | .LBB0_1: 14 | movq -8(%rbp), %rax 15 | movl $10, %ecx 16 | xorl %edx, %edx 17 | divq %rcx 18 | addq $48, %rdx 19 | movb %dl, %cl 20 | movl $32, %eax 21 | subq -56(%rbp), %rax 22 | subq $1, %rax 23 | movb %cl, -48(%rbp,%rax) 24 | movq -56(%rbp), %rax 25 | addq $1, %rax 26 | movq %rax, -56(%rbp) 27 | movq -8(%rbp), %rax 28 | movl $10, %ecx 29 | xorl %edx, %edx 30 | divq %rcx 31 | movq %rax, -8(%rbp) 32 | cmpq $0, -8(%rbp) 33 | jne .LBB0_1 34 | movl $32, %eax 35 | subq -56(%rbp), %rax 36 | leaq -48(%rbp), %rsi 37 | addq %rax, %rsi 38 | movq -56(%rbp), %rdx 39 | movl $1, %edi 40 | callq write 41 | addq $64, %rsp 42 | popq %rbp 43 | retq 44 | main: 45 | pushq %rbp 46 | movq %rsp, %rbp 47 | subq $16, %rsp 48 | movl $0, -4(%rbp) 49 | movl $12345, %edi 50 | callq dump 51 | xorl %eax, %eax 52 | addq $16, %rsp 53 | popq %rbp 54 | retq 55 | 56 | .section .note.GNU-stack, "" 57 | 58 | -------------------------------------------------------------------------------- /examples/fibonacci.s: -------------------------------------------------------------------------------- 1 | # ../vas -o fibonacci.o fibonacci.s && gcc -o fibonacci.out fibonacci.o && ./fibonacci.out 2 | # 0 3 | # 1 4 | # 1 5 | # 2 6 | # 3 7 | # 5 8 | # 8 9 | # 13 10 | # 21 11 | # 34 12 | # 55 13 | # 89 14 | # 144 15 | 16 | .text 17 | .global main 18 | fib: 19 | pushq %rbp 20 | movq %rsp, %rbp 21 | pushq %rbx 22 | subq $24, %rsp 23 | movl %edi, -20(%rbp) 24 | cmpl $1, -20(%rbp) 25 | jne .L2 26 | movl $0, %eax 27 | jmp .L3 28 | .L2: 29 | cmpl $2, -20(%rbp) 30 | jne .L4 31 | movl $1, %eax 32 | jmp .L3 33 | .L4: 34 | movl -20(%rbp), %eax 35 | subl $1, %eax 36 | movl %eax, %edi 37 | call fib 38 | movl %eax, %ebx 39 | movl -20(%rbp), %eax 40 | subl $2, %eax 41 | movl %eax, %edi 42 | call fib 43 | addl %ebx, %eax 44 | .L3: 45 | movq -8(%rbp), %rbx 46 | leave 47 | ret 48 | .LC0: 49 | .string "%d\n" 50 | main: 51 | pushq %rbp 52 | movq %rsp, %rbp 53 | subq $16, %rsp 54 | movl $1, -4(%rbp) 55 | jmp .L6 56 | .L7: 57 | movl -4(%rbp), %eax 58 | movl %eax, %edi 59 | call fib 60 | movl %eax, %esi 61 | leaq .LC0(%rip), %rdi 62 | movl $0, %eax 63 | call printf 64 | addl $1, -4(%rbp) 65 | .L6: 66 | cmpl $14, -4(%rbp) 67 | jne .L7 68 | movl $0, %eax 69 | leave 70 | ret 71 | 72 | .section .note.GNU-stack, "" 73 | 74 | -------------------------------------------------------------------------------- /examples/fizzbuzz.s: -------------------------------------------------------------------------------- 1 | # Fizz Buzz 2 | # ../vas -o fizzbuzz.o fizzbuzz.s && gcc -o fizzbuzz.out fizzbuzz.o && ./fizzbuzz.out 3 | 4 | .global main 5 | 6 | .data 7 | fizz: 8 | .string "fizz\n" 9 | buzz: 10 | .string "buzz\n" 11 | fizzbuzz: 12 | .string "Fizz Buzz\n" 13 | fmt: 14 | .string "%d\n" 15 | 16 | .text 17 | main: 18 | pushq %rbp 19 | movq %rsp, %rbp 20 | subq $16, %rsp 21 | 22 | # int a = 1 23 | movq $1, -4(%rbp) 24 | 25 | loop_start: 26 | cmpq $101, -4(%rbp) 27 | je loop_end 28 | 29 | # check if it's multiple of 15 30 | movq -4(%rbp), %rax 31 | movq $15, %rbx 32 | cqto 33 | idivq %rbx 34 | 35 | movq $0, %rax 36 | cmpq %rdx, %rax 37 | jne .L1 38 | 39 | # print fizz buzz 40 | leaq fizzbuzz(%rip), %rdi 41 | call printf 42 | jmp .L2 43 | 44 | .L1: 45 | # check if it's multiple of 3 46 | movq -4(%rbp), %rax 47 | movq $3, %rbx 48 | cqto 49 | idivq %rbx 50 | movq $0, %rax 51 | cmpq %rdx, %rax 52 | jne .L3 53 | leaq fizz(%rip), %rdi 54 | call printf 55 | jmp .L2 56 | 57 | .L3: 58 | # check if it's multiple of 5 59 | movq -4(%rbp), %rax 60 | movq $5, %rbx 61 | cqto 62 | idivq %rbx 63 | movq $0, %rax 64 | cmpq %rdx, %rax 65 | jne .L4 66 | leaq buzz(%rip), %rdi 67 | call printf 68 | jmp .L2 69 | .L4: 70 | # print number 71 | leaq fmt(%rip), %rdi 72 | movq -4(%rbp), %rsi 73 | callq printf 74 | 75 | .L2: 76 | # increment 77 | movq -4(%rbp), %rax 78 | addq $1, %rax 79 | movq %rax, -4(%rbp) 80 | jmp loop_start 81 | 82 | loop_end: 83 | movq $0, %rdi 84 | callq exit 85 | 86 | .section .note.GNU-stack, "" 87 | 88 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature-request.yml: -------------------------------------------------------------------------------- 1 | name: Feature Request 2 | description: Suggest an idea for this project 3 | title: (feature request summary) 4 | labels: Feature Request 5 | body: 6 | - type: textarea 7 | id: description 8 | attributes: 9 | label: Describe the feature 10 | description: A clear and concise description of the feature you are proposing. 11 | validations: 12 | required: true 13 | 14 | - type: textarea 15 | id: use-case 16 | attributes: 17 | label: Use Case 18 | description: | 19 | Why do you need this feature? For example: "I'm always frustrated when..." 20 | validations: 21 | required: true 22 | 23 | - type: textarea 24 | id: solution 25 | attributes: 26 | label: Proposed Solution 27 | description: Suggest how to implement the addition or change. Please include prototype/workaround/sketch/reference implementation. 28 | validations: 29 | required: false 30 | 31 | - type: textarea 32 | id: other 33 | attributes: 34 | label: Other Information 35 | description: Any alternative solutions or features you considered, a more detailed explanation, stack traces, related issues, links for context, etc. 36 | validations: 37 | required: false 38 | 39 | - type: checkboxes 40 | id: ack 41 | attributes: 42 | label: Acknowledgements 43 | options: 44 | - label: I may be able to implement this feature request 45 | required: false 46 | 47 | - label: This feature might incur a breaking change 48 | required: false 49 | -------------------------------------------------------------------------------- /vas.v: -------------------------------------------------------------------------------- 1 | module main 2 | 3 | import os 4 | import flag 5 | import lexer 6 | import encoder 7 | import elf 8 | 9 | fn file_name_without_ext(file_name string) string { 10 | ext_len := os.file_ext(file_name).len 11 | bytes := file_name.bytes() 12 | return if file_name == '-' { 13 | 'main' 14 | } else { 15 | bytes[..bytes.len - ext_len].bytestr() 16 | } 17 | } 18 | 19 | fn main() { 20 | mut fp := flag.new_flag_parser(os.args) 21 | fp.application('vas') 22 | fp.version('v0.0.0') 23 | fp.skip_executable() 24 | mut out_file := fp.string('o', `o`, 'out_file_none', 'set output file name') 25 | keep_locals := fp.bool('keep-locals', 0, false, 'keeps local symbols (e.g., those starting with `.L`)') 26 | 27 | additional_args := fp.finalize() or { 28 | println(fp.usage()) 29 | return 30 | } 31 | 32 | if additional_args.len < 1 { 33 | println(fp.usage()) 34 | return 35 | } 36 | 37 | file_name := additional_args[0] 38 | 39 | if out_file == 'out_file_none' { 40 | out_file = file_name_without_ext(file_name) + '.o' 41 | } 42 | 43 | program := if file_name == '-' { 44 | os.get_raw_lines_joined() 45 | } else { 46 | os.read_file(file_name) or { 47 | eprintln('error: reading file `${file_name}`') 48 | exit(1) 49 | } 50 | } 51 | 52 | mut l := lexer.new(file_name, program) 53 | 54 | mut en := encoder.new(mut l, file_name) 55 | en.encode() 56 | en.assign_addresses() 57 | 58 | mut e := elf.new(out_file, keep_locals, en.rela_text_users, en.user_defined_sections, en.user_defined_symbols) 59 | e.collect_rela_symbols() 60 | e.build_symtab_strtab() 61 | e.rela_text_users() 62 | e.build_shstrtab() 63 | e.build_headers() 64 | e.write_elf() 65 | } 66 | 67 | -------------------------------------------------------------------------------- /examples/rule110.s: -------------------------------------------------------------------------------- 1 | # ../vas -o rule110.o rule110.s && gcc -o rule110.out rule110.o && ./rule110.out 2 | 3 | .global board 4 | .bss 5 | board: 6 | .zero 120 7 | 8 | .data 9 | .LC0: 10 | .string " *" 11 | 12 | .text 13 | .global main 14 | main: 15 | .LFB0: 16 | pushq %rbp 17 | movq %rsp, %rbp 18 | subq $32, %rsp 19 | movl $1, 112+board(%rip) 20 | movq $0, -8(%rbp) 21 | jmp .L2 22 | .L7: 23 | movq $0, -16(%rbp) 24 | jmp .L3 25 | .L4: 26 | movq stdout(%rip), %rdx 27 | movq -16(%rbp), %rax 28 | leaq 0(,%rax,4), %rcx 29 | leaq board(%rip), %rax 30 | movl (%rcx,%rax), %eax 31 | cltq 32 | leaq .LC0(%rip), %rcx 33 | movzbl (%rax,%rcx), %eax 34 | movsbl %al, %eax 35 | movq %rdx, %rsi 36 | movl %eax, %edi 37 | call fputc 38 | addq $1, -16(%rbp) 39 | .L3: 40 | cmpq $29, -16(%rbp) 41 | jbe .L4 42 | movq stdout(%rip), %rax 43 | movq %rax, %rsi 44 | movl $10, %edi 45 | call fputc 46 | movl board(%rip), %eax 47 | leal (%rax,%rax), %edx 48 | movl 4+board(%rip), %eax 49 | orl %edx, %eax 50 | movl %eax, -20(%rbp) 51 | movq $1, -32(%rbp) 52 | jmp .L5 53 | .L6: 54 | movl -20(%rbp), %eax 55 | addl %eax, %eax 56 | andl $7, %eax 57 | movl %eax, %ecx 58 | movq -32(%rbp), %rax 59 | addq $1, %rax 60 | leaq 0(,%rax,4), %rdx 61 | leaq board(%rip), %rax 62 | movl (%rdx,%rax), %eax 63 | orl %ecx, %eax 64 | movl %eax, -20(%rbp) 65 | movl -20(%rbp), %eax 66 | movl $110, %edx 67 | movl %eax, %ecx 68 | sarl %cl, %edx 69 | movl %edx, %eax 70 | andl $1, %eax 71 | movl %eax, %ecx 72 | movq -32(%rbp), %rax 73 | leaq 0(,%rax,4), %rdx 74 | leaq board(%rip), %rax 75 | movl %ecx, (%rdx,%rax) 76 | addq $1, -32(%rbp) 77 | .L5: 78 | cmpq $28, -32(%rbp) 79 | jbe .L6 80 | addq $1, -8(%rbp) 81 | .L2: 82 | cmpq $27, -8(%rbp) 83 | jbe .L7 84 | movl $0, %eax 85 | leave 86 | ret 87 | .LFE0: 88 | 89 | .section .note.GNU-stack,"" 90 | 91 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.yml: -------------------------------------------------------------------------------- 1 | name: Bug Report 2 | description: Report a bug 3 | title: (bug report summary) 4 | labels: Bug 5 | body: 6 | - type: textarea 7 | id: description 8 | attributes: 9 | label: Describe the bug 10 | description: What is the problem? A clear and concise description of the bug. 11 | validations: 12 | required: true 13 | 14 | - type: textarea 15 | id: expected 16 | attributes: 17 | label: Expected Behavior 18 | description: What did you expect to happen? 19 | validations: 20 | required: true 21 | 22 | - type: textarea 23 | id: current 24 | attributes: 25 | label: Current Behavior 26 | description: | 27 | What actually happened? 28 | 29 | Please include full errors, uncaught exceptions, stack traces, and relevant logs. 30 | If service/functions responses are relevant, please include wire logs. 31 | validations: 32 | required: true 33 | 34 | - type: textarea 35 | id: reproduction 36 | attributes: 37 | label: Reproduction Steps 38 | description: | 39 | Provide a self-contained, concise snippet of code that can be used to reproduce the issue. 40 | For more complex issues provide a repo with the smallest sample that reproduces the bug. 41 | validations: 42 | required: true 43 | 44 | - type: textarea 45 | id: solution 46 | attributes: 47 | label: Possible Solution 48 | description: Suggest a fix/reason for the bug 49 | validations: 50 | required: false 51 | 52 | - type: textarea 53 | id: context 54 | attributes: 55 | label: Additional Information/Context 56 | description: | 57 | Anything else that might be relevant for troubleshooting this bug. 58 | Providing context helps us come up with a solution that is most useful in the real world. 59 | validations: 60 | required: false 61 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # vas - x86-64 Assembler written in V 2 | 3 | [![CI](https://github.com/v420v/vas/actions/workflows/ci.yml/badge.svg)](https://github.com/v420v/vas/actions/workflows/ci.yml) 4 | 5 | ## Installation 6 | 7 | ### Docker setup 8 | 9 | ```sh 10 | # Build the Docker image 11 | docker build ./ -t vas 12 | 13 | # Run the container 14 | # Linux/MacOS: 15 | docker run --rm -it -v "$(pwd)":/root/env vas 16 | 17 | # Windows (CMD): 18 | docker run --rm -it -v "%cd%":/root/env vas 19 | 20 | # Windows (PowerShell): 21 | docker run --rm -it -v "${pwd}:/root/env" vas 22 | ``` 23 | 24 | ### Build 25 | 26 | Requires the V compiler to be installed. 27 | 28 | ```sh 29 | v . -prod 30 | ``` 31 | 32 | ## Usage 33 | 34 | Basic usage: 35 | ```sh 36 | vas [options] .s 37 | ``` 38 | 39 | Options: 40 | - `-o `: Set output file name (default: input_file.o) 41 | - `--keep-locals`: Keep local symbols (e.g., those starting with `.L`) 42 | 43 | ### Example 44 | 45 | 1. Create an assembly file (hello.s): 46 | ```asm 47 | # Hello world example 48 | 49 | .global _start 50 | 51 | .section .data, "aw" 52 | msg: 53 | .string "Hello, world!\n" 54 | 55 | .section .text, "ax" 56 | _start: 57 | pushq %rbp 58 | movq %rsp, %rbp 59 | subq $16, %rsp 60 | 61 | movq $1, %rax # write syscall 62 | movq $1, %rdi # stdout 63 | movq $msg, %rsi # message 64 | movq $14, %rdx # length 65 | syscall 66 | 67 | movq $60, %rax # exit syscall 68 | movq $0, %rdi # status code 0 69 | syscall 70 | ``` 71 | 72 | 2. Assemble the file: 73 | ```sh 74 | vas hello.s 75 | ``` 76 | 77 | 3. Link the object file: 78 | ```sh 79 | ld hello.o 80 | ``` 81 | 82 | 4. Run the executable: 83 | ```sh 84 | ./a.out 85 | ``` 86 | 87 | Output: 88 | ``` 89 | Hello, world! 90 | ``` 91 | 92 | ## Star History 93 | 94 | 95 | 96 | 97 | Star History Chart 98 | 99 | 100 | 101 | - https://x.com/v_language/status/1643642842214957061 102 | 103 | ## License 104 | 105 | This project is licensed under the MIT License - see the LICENSE file for details. 106 | -------------------------------------------------------------------------------- /encoder/addr.v: -------------------------------------------------------------------------------- 1 | module encoder 2 | 3 | import encoding.binary 4 | import error 5 | 6 | fn section_flags(flags string) int { 7 | mut val := 0 8 | for c in flags { 9 | match c { 10 | `a` { 11 | val |= encoder.shf_alloc 12 | } 13 | `x` { 14 | val |= encoder.shf_execinstr 15 | } 16 | `w` { 17 | val |= encoder.shf_write 18 | } else { 19 | panic('unkown attribute $c') 20 | } 21 | } 22 | } 23 | return val 24 | } 25 | 26 | fn (mut e Encoder) change_symbol_binding(instr Instr, binding u8) { 27 | mut s := e.user_defined_symbols[instr.symbol_name] or { 28 | error.print(instr.pos, 'undefined symbol `$instr.symbol_name`') 29 | exit(1) 30 | } 31 | 32 | if binding == encoder.stb_global && s.kind == .section { 33 | error.print(instr.pos, 'sections cannot be global') 34 | exit(1) 35 | } 36 | 37 | s.binding = binding 38 | } 39 | 40 | fn (mut e Encoder) change_symbol_visibility(instr Instr, visibility u8) { 41 | mut s := e.user_defined_symbols[instr.symbol_name] or { 42 | error.print(instr.pos, 'undefined symbol `$instr.symbol_name`') 43 | exit(1) 44 | } 45 | 46 | s.visibility = visibility 47 | } 48 | 49 | fn (mut e Encoder) fix_same_section_relocations() { 50 | for mut rela in e.rela_text_users { 51 | if symbol := e.user_defined_symbols[rela.uses] { 52 | if symbol.section_name != rela.instr.section_name { 53 | continue 54 | } 55 | if symbol.binding == encoder.stb_global { 56 | continue 57 | } 58 | 59 | if !rela.instr.is_jmp_or_call && rela.rtype != encoder.r_x86_64_pc32 { 60 | continue 61 | } 62 | 63 | num := ((symbol.addr - rela.instr.addr) - rela.instr.code.len) + rela.adjust 64 | 65 | mut hex := [u8(0), 0, 0, 0] 66 | binary.little_endian_put_u32(mut &hex, u32(num)) 67 | 68 | mut section := e.user_defined_sections[rela.instr.section_name] or { 69 | panic('this should not happen') 70 | } 71 | 72 | section.code[rela.instr.addr + rela.offset] = hex[0] 73 | section.code[rela.instr.addr + rela.offset+1] = hex[1] 74 | section.code[rela.instr.addr + rela.offset+2] = hex[2] 75 | section.code[rela.instr.addr + rela.offset+3] = hex[3] 76 | 77 | rela.is_already_resolved = true 78 | } 79 | } 80 | } 81 | 82 | pub fn (mut e Encoder) assign_addresses() { 83 | for mut instr in e.instrs { 84 | if instr.kind == .section && instr.section_name !in e.user_defined_sections { 85 | e.user_defined_sections[instr.section_name] = &UserDefinedSection{ 86 | flags: section_flags(instr.flags) 87 | } 88 | } 89 | 90 | mut section := e.user_defined_sections[instr.section_name] or { 91 | panic('this should not happen') 92 | } 93 | 94 | match instr.kind { 95 | .global { 96 | e.change_symbol_binding(*instr, encoder.stb_global) 97 | continue 98 | } 99 | .local { 100 | e.change_symbol_binding(*instr, encoder.stb_local) 101 | continue 102 | } 103 | .hidden { 104 | e.change_symbol_visibility(*instr, encoder.stv_hidden) 105 | continue 106 | } 107 | .internal { 108 | e.change_symbol_visibility(*instr, encoder.stv_internal) 109 | continue 110 | } 111 | .protected { 112 | e.change_symbol_visibility(*instr, encoder.stv_protected) 113 | continue 114 | } else {} 115 | } 116 | 117 | instr.addr = section.addr 118 | section.addr += instr.code.len 119 | section.code << instr.code 120 | } 121 | 122 | e.fix_same_section_relocations() 123 | } 124 | 125 | -------------------------------------------------------------------------------- /encoder/stack_op.v: -------------------------------------------------------------------------------- 1 | module encoder 2 | 3 | import error 4 | 5 | fn (mut e Encoder) pop() { 6 | e.set_current_instr(.pop) 7 | 8 | source := e.parse_operand() 9 | 10 | if source is Register { 11 | source.check_regi_size(.suffix_quad) 12 | e.add_prefix(Empty{}, Empty{}, source, []) 13 | e.current_instr.code << [0x58 + source.base_offset%8] 14 | return 15 | } 16 | if source is Indirection { 17 | e.add_segment_override_prefix(source) 18 | e.add_prefix(Empty{}, source.index, source.base, []) 19 | e.current_instr.code << 0x8f // op_code 20 | e.add_modrm_sib_disp(source, encoder.slash_0) 21 | return 22 | } 23 | 24 | error.print(source.pos, 'invalid operand for instruction') 25 | exit(1) 26 | } 27 | 28 | fn (mut e Encoder) push() { 29 | e.set_current_instr(.push) 30 | 31 | source := e.parse_operand() 32 | 33 | if source is Register { 34 | source.check_regi_size(.suffix_quad) 35 | e.add_prefix(Empty{}, Empty{}, source, []) 36 | source.check_regi_size(.suffix_quad) 37 | e.current_instr.code << [0x50 + source.base_offset%8] 38 | return 39 | } 40 | if source is Indirection { 41 | e.add_segment_override_prefix(source) 42 | e.add_prefix(Empty{}, source.index, source.base, []) 43 | e.current_instr.code << 0xff // op_code 44 | e.add_modrm_sib_disp(source, encoder.slash_6) 45 | return 46 | } 47 | if source is Immediate { 48 | mut used_symbols := []string{} 49 | imm_val := int(eval_expr_get_symbol_64(source, mut used_symbols)) 50 | if used_symbols.len >= 2 { 51 | error.print(source.pos, 'invalid immediate operand') 52 | exit(1) 53 | } 54 | 55 | e.current_instr.code << if is_in_i8_range(imm_val) { 56 | u8(0x6A) 57 | } else { 58 | u8(0x68) 59 | } 60 | 61 | imm_need_rela := used_symbols.len == 1 62 | 63 | if imm_need_rela { 64 | e.add_imm_rela(used_symbols[0], imm_val, DataSize.suffix_quad) 65 | } else { 66 | e.add_imm_value(imm_val, DataSize.suffix_quad) 67 | } 68 | return 69 | } 70 | 71 | error.print(source.pos, 'invalid operand for instruction') 72 | exit(1) 73 | } 74 | 75 | fn (mut e Encoder) jmp_instr(kind InstrKind, rel32_code []u8, rel32_offset i64) { 76 | e.set_current_instr(kind) 77 | e.current_instr.is_jmp_or_call = true 78 | 79 | desti := e.parse_operand() 80 | 81 | if desti is Ident { 82 | e.current_instr.code << rel32_code 83 | e.rela_text_users << &Rela{ 84 | uses: desti.lit, 85 | instr: e.current_instr, 86 | offset: rel32_offset, 87 | rtype: encoder.r_x86_64_32s, 88 | adjust: 0, 89 | } 90 | return 91 | } 92 | if desti is Star { 93 | desti.regi.check_regi_size(DataSize.suffix_quad) 94 | e.add_prefix(Empty{}, Empty{}, desti.regi, []) 95 | e.current_instr.code << [u8(0xFF), 0xE0 + desti.regi.base_offset%8] 96 | return 97 | } 98 | 99 | error.print(desti.pos, 'invalid operand for instruction') 100 | exit(1) 101 | } 102 | 103 | fn (mut e Encoder) call() { 104 | e.set_current_instr(.call) 105 | e.current_instr.is_jmp_or_call = true 106 | 107 | desti := e.parse_operand() 108 | 109 | if desti is Star { 110 | desti.regi.check_regi_size(DataSize.suffix_quad) 111 | e.add_prefix(Empty{}, Empty{}, desti.regi, []) 112 | e.current_instr.code << [u8(0xFF), 0xD0 + desti.regi.base_offset%8] 113 | } else { 114 | e.current_instr.code << [u8(0xe8), 0, 0, 0, 0] 115 | mut used_symbols := []string{} 116 | adjust := int(eval_expr_get_symbol_64(desti, mut used_symbols)) 117 | if used_symbols.len >= 2 { 118 | error.print(desti.pos, 'invalid operand for instruction') 119 | exit(1) 120 | } 121 | 122 | e.rela_text_users << encoder.Rela{ 123 | instr: e.current_instr, 124 | offset: 1, 125 | uses: used_symbols[0], 126 | adjust: adjust, 127 | rtype: encoder.r_x86_64_plt32 128 | } 129 | } 130 | } 131 | 132 | -------------------------------------------------------------------------------- /encoder/directive.v: -------------------------------------------------------------------------------- 1 | module encoder 2 | 3 | import token 4 | import error 5 | import encoding.binary 6 | 7 | fn (mut e Encoder) add_section(section_name string, flag string, pos token.Position) { 8 | e.current_section_name = section_name 9 | 10 | instr := Instr{kind: .section, pos: pos, section_name: section_name, symbol_type: encoder.stt_section, flags: flag} 11 | e.instrs << &instr 12 | 13 | if s := e.user_defined_symbols[section_name] { 14 | if s.kind == .label { 15 | error.print(pos, 'symbol `$section_name` is already defined') 16 | exit(1) 17 | } 18 | } else { 19 | e.user_defined_symbols[section_name] = &instr 20 | } 21 | } 22 | 23 | fn (mut e Encoder) section() { 24 | pos := e.tok.pos 25 | 26 | section_name := e.tok.lit 27 | 28 | e.next() 29 | e.expect(.comma) 30 | section_flags := e.tok.lit 31 | e.expect(.string) 32 | 33 | e.add_section(section_name, section_flags, pos) 34 | } 35 | 36 | fn (mut e Encoder) zero() { 37 | e.set_current_instr(.zero) 38 | 39 | operand := e.parse_operand() 40 | 41 | n := eval_expr(operand) 42 | 43 | for _ in 0..n { 44 | e.current_instr.code << 0 45 | } 46 | } 47 | 48 | fn (mut e Encoder) string() { 49 | e.set_current_instr(.string) 50 | 51 | value := e.tok.lit 52 | e.expect(.string) 53 | 54 | e.current_instr.code = value.bytes() 55 | e.current_instr.code << 0x00 56 | } 57 | 58 | fn (mut e Encoder) byte() { 59 | e.set_current_instr(.byte) 60 | 61 | desti := e.parse_operand() 62 | 63 | mut used_symbols := []string{} 64 | adjust := int(eval_expr_get_symbol_64(desti, mut used_symbols)) 65 | if used_symbols.len >= 2 { 66 | error.print(desti.pos, 'invalid operand') 67 | exit(1) 68 | } 69 | 70 | if used_symbols.len == 1 { 71 | e.rela_text_users << &Rela{ 72 | uses: used_symbols[0], 73 | instr: e.current_instr, 74 | adjust: adjust, 75 | rtype: encoder.r_x86_64_8 76 | } 77 | e.current_instr.code = [u8(0)] 78 | } else { 79 | e.current_instr.code << u8(adjust) 80 | } 81 | } 82 | 83 | fn (mut e Encoder) word() { 84 | e.set_current_instr(.word) 85 | 86 | desti := e.parse_operand() 87 | 88 | mut used_symbols := []string{} 89 | adjust := int(eval_expr_get_symbol_64(desti, mut used_symbols)) 90 | if used_symbols.len >= 2 { 91 | error.print(desti.pos, 'invalid operand') 92 | exit(1) 93 | } 94 | 95 | if used_symbols.len == 1 { 96 | e.rela_text_users << &Rela{ 97 | uses: used_symbols[0], 98 | instr: e.current_instr, 99 | adjust: adjust, 100 | rtype: encoder.r_x86_64_16 101 | } 102 | e.current_instr.code = [u8(0), 0] 103 | } else { 104 | mut hex := [u8(0), 0] 105 | binary.little_endian_put_u16(mut &hex, u16(adjust)) 106 | e.current_instr.code = hex 107 | } 108 | } 109 | 110 | fn (mut e Encoder) long() { 111 | e.set_current_instr(.long) 112 | 113 | desti := e.parse_operand() 114 | 115 | mut used_symbols := []string{} 116 | adjust := int(eval_expr_get_symbol_64(desti, mut used_symbols)) 117 | if used_symbols.len >= 2 { 118 | error.print(desti.pos, 'invalid operand') 119 | exit(1) 120 | } 121 | 122 | if used_symbols.len == 1 { 123 | e.rela_text_users << &Rela{ 124 | uses: used_symbols[0], 125 | instr: e.current_instr, 126 | adjust: adjust, 127 | rtype: encoder.r_x86_64_32 128 | } 129 | e.current_instr.code = [u8(0), 0, 0, 0] 130 | } else { 131 | mut hex := [u8(0), 0, 0, 0] 132 | binary.little_endian_put_u32(mut &hex, u32(adjust)) 133 | e.current_instr.code = hex 134 | } 135 | } 136 | 137 | fn (mut e Encoder) quad() { 138 | e.set_current_instr(.quad) 139 | 140 | desti := e.parse_operand() 141 | 142 | mut used_symbols := []string{} 143 | adjust := int(eval_expr_get_symbol_64(desti, mut used_symbols)) 144 | if used_symbols.len >= 2 { 145 | error.print(desti.pos, 'invalid operand') 146 | exit(1) 147 | } 148 | 149 | if used_symbols.len == 1 { 150 | rela := &Rela{ 151 | uses: used_symbols[0], 152 | instr: e.current_instr, 153 | adjust: adjust, 154 | rtype: encoder.r_x86_64_64 155 | } 156 | e.current_instr.code = [u8(0), 0, 0, 0, 0, 0, 0, 0] 157 | e.rela_text_users << rela 158 | } else { 159 | mut hex := [u8(0), 0, 0, 0, 0, 0, 0, 0] 160 | binary.little_endian_put_u64(mut &hex, u64(adjust)) 161 | e.current_instr.code = hex 162 | } 163 | } 164 | 165 | 166 | -------------------------------------------------------------------------------- /encoder/indir_helper.v: -------------------------------------------------------------------------------- 1 | module encoder 2 | 3 | import error 4 | import encoding.binary 5 | 6 | fn (mut e Encoder) add_segment_override_prefix(indir Indirection) { 7 | if indir.base.size == .suffix_long || indir.index.size == .suffix_long { 8 | e.current_instr.code << 0x67 9 | } 10 | } 11 | 12 | fn scale(n u8) u8 { 13 | match n { 14 | 1 { 15 | return 0 16 | } 17 | 2 { 18 | return 1 19 | } 20 | 4 { 21 | return 2 22 | } 23 | 8 { 24 | return 3 25 | } else { 26 | panic('scale unreachable') 27 | } 28 | } 29 | } 30 | 31 | fn (indir Indirection) check_base_register() (bool, bool, bool) { 32 | if !indir.has_base { 33 | return false, false, false 34 | } 35 | 36 | match true { 37 | indir.base.lit in ['RIP', 'EIP'] { 38 | return true, false, false 39 | } 40 | indir.base.lit in ['RSP', 'ESP'] { 41 | return false, true, false 42 | } 43 | indir.base.lit in ['RBP', 'EBP'] { 44 | return false, false, true 45 | } else { 46 | return false, false, false 47 | } 48 | } 49 | } 50 | 51 | fn (mut e Encoder) add_modrm_sib_disp(indir Indirection, index u8) { 52 | if !indir.has_base && !indir.has_index_scale { 53 | error.print(indir.pos, 'syntax not supported yet. `disp(,,)`') 54 | exit(1) 55 | } 56 | 57 | base_is_ip, base_is_sp, base_is_bp := indir.check_base_register() 58 | 59 | mut used_symbols := []string{} 60 | disp := int(eval_expr_get_symbol_64(indir.disp, mut used_symbols)) 61 | if used_symbols.len >= 2 { 62 | error.print(indir.disp.pos, 'invalid operand') 63 | exit(1) 64 | } 65 | disp_need_rela := used_symbols.len == 1 66 | 67 | if indir.has_index_scale { 68 | if base_is_ip { 69 | error.print(indir.index.pos, '%$indir.base.lit.to_lower() as base register can not have an index register') 70 | exit(1) 71 | } 72 | 73 | if indir.base.size != indir.index.size && indir.has_base { 74 | error.print(indir.base.pos, 'base register is $indir.base.size-bit, but index register is not') 75 | exit(1) 76 | } 77 | 78 | if !indir.has_base { 79 | e.current_instr.code << compose_mod_rm(mod_indirection_with_no_disp, index, 0b100) 80 | } else if disp_need_rela { 81 | e.current_instr.code << compose_mod_rm(mod_indirection_with_disp32, index, 0b100) 82 | } else if disp == 0 && !base_is_bp { 83 | e.current_instr.code << compose_mod_rm(mod_indirection_with_no_disp, index, 0b100) 84 | } else if is_in_i8_range(disp) { 85 | e.current_instr.code << compose_mod_rm(mod_indirection_with_disp8, index, 0b100) 86 | } else if is_in_i32_range(disp) { 87 | e.current_instr.code << compose_mod_rm(mod_indirection_with_disp32, index, 0b100) 88 | } else { 89 | panic('disp out range!') 90 | } 91 | } else { 92 | if base_is_ip { 93 | e.current_instr.code << compose_mod_rm(mod_indirection_with_no_disp, index, 0b101) // rip relative 94 | } else if disp_need_rela { 95 | e.current_instr.code << compose_mod_rm(mod_indirection_with_disp32, index, indir.base.base_offset%8) 96 | } else if disp == 0 && !base_is_bp { 97 | e.current_instr.code << compose_mod_rm(mod_indirection_with_no_disp, index, indir.base.base_offset%8) 98 | } else if is_in_i8_range(disp) { 99 | e.current_instr.code << compose_mod_rm(mod_indirection_with_disp8, index, indir.base.base_offset%8) 100 | } else if is_in_i32_range(disp) { 101 | e.current_instr.code << compose_mod_rm(mod_indirection_with_disp32, index, indir.base.base_offset%8) 102 | } else { 103 | panic('disp out range!') 104 | } 105 | } 106 | 107 | // add sib byte 108 | if indir.has_index_scale { 109 | scale_num := u8(eval_expr(indir.scale)) 110 | if scale_num !in [1, 2, 4, 8] { 111 | error.print(indir.scale.pos, 'scale factor in address must be 1, 2, 4 or 8') 112 | exit(0) 113 | } 114 | if indir.has_base { 115 | e.current_instr.code << indir.base.base_offset%8 + (indir.index.base_offset%8 << 3) + (scale(scale_num) << 6) 116 | } else { 117 | e.current_instr.code << 0x5 + (indir.index.base_offset%8 << 3) + (scale(scale_num) << 6) 118 | } 119 | } else if base_is_sp || indir.base.base_offset == 12 { 120 | e.current_instr.code << 0x24 121 | } 122 | 123 | // disp 124 | if disp_need_rela { 125 | rtype := if base_is_ip { 126 | encoder.r_x86_64_pc32 127 | } else if indir.base.size == .suffix_quad { 128 | encoder.r_x86_64_32s 129 | } else { 130 | encoder.r_x86_64_32 131 | } 132 | rela := encoder.Rela{ 133 | instr: e.current_instr 134 | uses: used_symbols[0] 135 | offset: e.current_instr.code.len 136 | rtype: u64(rtype) 137 | adjust: eval_expr(indir.disp) 138 | } 139 | e.rela_text_users << rela 140 | e.current_instr.code << [u8(0), 0, 0, 0] 141 | } else { 142 | if !indir.has_base { 143 | mut hex := [u8(0), 0, 0, 0] 144 | binary.little_endian_put_u32(mut &hex, u32(disp)) 145 | e.current_instr.code << hex 146 | } else if disp != 0 || base_is_ip || base_is_bp { 147 | if base_is_ip { 148 | mut hex := [u8(0), 0, 0, 0] 149 | binary.little_endian_put_u32(mut &hex, u32(disp)) 150 | e.current_instr.code << hex 151 | } else if is_in_i8_range(disp) { 152 | e.current_instr.code << u8(disp) 153 | } else if is_in_i32_range(disp) { 154 | mut hex := [u8(0), 0, 0, 0] 155 | binary.little_endian_put_u32(mut &hex, u32(disp)) 156 | e.current_instr.code << hex 157 | } 158 | } 159 | } 160 | } 161 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, religion, or sexual identity 10 | and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the 26 | overall community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | * The use of sexualized language or imagery, and sexual attention or 31 | advances of any kind 32 | * Trolling, insulting or derogatory comments, and personal or political attacks 33 | * Public or private harassment 34 | * Publishing others' private information, such as a physical or email 35 | address, without their explicit permission 36 | * Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards of 42 | acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies when 54 | an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official e-mail address, 56 | posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders responsible for enforcement at 63 | . 64 | All complaints will be reviewed and investigated promptly and fairly. 65 | 66 | All community leaders are obligated to respect the privacy and security of the 67 | reporter of any incident. 68 | 69 | ## Enforcement Guidelines 70 | 71 | Community leaders will follow these Community Impact Guidelines in determining 72 | the consequences for any action they deem in violation of this Code of Conduct: 73 | 74 | ### 1. Correction 75 | 76 | **Community Impact**: Use of inappropriate language or other behavior deemed 77 | unprofessional or unwelcome in the community. 78 | 79 | **Consequence**: A private, written warning from community leaders, providing 80 | clarity around the nature of the violation and an explanation of why the 81 | behavior was inappropriate. A public apology may be requested. 82 | 83 | ### 2. Warning 84 | 85 | **Community Impact**: A violation through a single incident or series 86 | of actions. 87 | 88 | **Consequence**: A warning with consequences for continued behavior. No 89 | interaction with the people involved, including unsolicited interaction with 90 | those enforcing the Code of Conduct, for a specified period of time. This 91 | includes avoiding interactions in community spaces as well as external channels 92 | like social media. Violating these terms may lead to a temporary or 93 | permanent ban. 94 | 95 | ### 3. Temporary Ban 96 | 97 | **Community Impact**: A serious violation of community standards, including 98 | sustained inappropriate behavior. 99 | 100 | **Consequence**: A temporary ban from any sort of interaction or public 101 | communication with the community for a specified period of time. No public or 102 | private interaction with the people involved, including unsolicited interaction 103 | with those enforcing the Code of Conduct, is allowed during this period. 104 | Violating these terms may lead to a permanent ban. 105 | 106 | ### 4. Permanent Ban 107 | 108 | **Community Impact**: Demonstrating a pattern of violation of community 109 | standards, including sustained inappropriate behavior, harassment of an 110 | individual, or aggression toward or disparagement of classes of individuals. 111 | 112 | **Consequence**: A permanent ban from any sort of public interaction within 113 | the community. 114 | 115 | ## Attribution 116 | 117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 118 | version 2.0, available at 119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. 120 | 121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct 122 | enforcement ladder](https://github.com/mozilla/diversity). 123 | 124 | [homepage]: https://www.contributor-covenant.org 125 | 126 | For answers to common questions about this code of conduct, see the FAQ at 127 | https://www.contributor-covenant.org/faq. Translations are available at 128 | https://www.contributor-covenant.org/translations. 129 | -------------------------------------------------------------------------------- /encoder/sse_instr.v: -------------------------------------------------------------------------------- 1 | module encoder 2 | 3 | import error 4 | 5 | fn (mut e Encoder) cvttss2sil() { 6 | e.set_current_instr(.cvttss2sil) 7 | 8 | source, desti := e.parse_two_operand() 9 | 10 | if source is Xmm && desti is Register { 11 | desti.check_regi_size(DataSize.suffix_long) 12 | e.add_prefix(desti, Empty{}, source, [DataSize.suffix_single]) 13 | e.current_instr.code << [u8(0x0F), 0x2C] 14 | e.current_instr.code << compose_mod_rm(encoder.mod_regi, desti.base_offset%8, source.base_offset%8) 15 | return 16 | } 17 | 18 | error.print(source.pos, 'invalid operand for instruction') 19 | exit(1) 20 | } 21 | 22 | fn (mut e Encoder) cvtsi2ssq() { 23 | e.set_current_instr(.cvtsi2ssq) 24 | 25 | source, desti := e.parse_two_operand() 26 | 27 | if source is Register && desti is Xmm { 28 | source.check_regi_size(DataSize.suffix_quad) 29 | e.add_prefix(desti, Empty{}, source, [DataSize.suffix_single, DataSize.suffix_quad]) 30 | e.current_instr.code << [u8(0x0F), 0x2A] 31 | e.current_instr.code << compose_mod_rm(encoder.mod_regi, desti.base_offset%8, source.base_offset%8) 32 | return 33 | } 34 | 35 | error.print(source.pos, 'invalid operand for instruction') 36 | exit(1) 37 | } 38 | 39 | fn (mut e Encoder) cvtsi2sdq() { 40 | e.set_current_instr(.cvtsi2sdq) 41 | 42 | source, desti := e.parse_two_operand() 43 | 44 | if source is Register && desti is Xmm { 45 | source.check_regi_size(DataSize.suffix_quad) 46 | e.add_prefix(desti, Empty{}, source, [DataSize.suffix_double, DataSize.suffix_quad]) 47 | e.current_instr.code << [u8(0x0F), 0x2A] 48 | e.current_instr.code << compose_mod_rm(encoder.mod_regi, desti.base_offset%8, source.base_offset%8) 49 | return 50 | } 51 | 52 | error.print(source.pos, 'invalid operand for instruction') 53 | exit(1) 54 | } 55 | 56 | fn (mut e Encoder) xorp(kind InstrKind, sizes []DataSize) { 57 | e.set_current_instr(kind) 58 | 59 | source, desti := e.parse_two_operand() 60 | 61 | if source is Xmm && desti is Xmm { 62 | e.add_prefix(desti, Empty{}, source, sizes) 63 | e.current_instr.code << [u8(0x0F), 0x57] 64 | e.current_instr.code << compose_mod_rm(encoder.mod_regi, desti.base_offset%8, source.base_offset%8) 65 | return 66 | } 67 | 68 | error.print(source.pos, 'invalid operand for instruction') 69 | exit(1) 70 | } 71 | 72 | fn (mut e Encoder) sse_arith_instr(kind InstrKind, op_code []u8, sizes []DataSize) { 73 | e.set_current_instr(kind) 74 | 75 | source, desti := e.parse_two_operand() 76 | 77 | if source is Xmm && desti is Xmm { 78 | e.add_prefix(desti, Empty{}, source, sizes) 79 | e.current_instr.code << op_code 80 | e.current_instr.code << compose_mod_rm(encoder.mod_regi, desti.base_offset%8, source.base_offset%8) 81 | return 82 | } 83 | 84 | if source is Indirection && desti is Xmm { 85 | e.add_segment_override_prefix(source) 86 | e.add_prefix(desti, source.index, source.base, sizes) 87 | e.current_instr.code << op_code 88 | e.add_modrm_sib_disp(source, desti.base_offset%8) 89 | return 90 | } 91 | 92 | error.print(source.pos, 'invalid operand for instruction') 93 | exit(1) 94 | } 95 | 96 | fn (mut e Encoder) sse_data_transfer_instr(kind InstrKind, op_code_base u8, sizes []DataSize) { 97 | e.set_current_instr(kind) 98 | 99 | source, desti := e.parse_two_operand() 100 | 101 | if source is Xmm && desti is Xmm { 102 | e.add_prefix(desti, Empty{}, source, sizes) 103 | mod_rm := compose_mod_rm(encoder.mod_regi, desti.base_offset%8, source.base_offset%8) 104 | e.current_instr.code << 0x0F 105 | e.current_instr.code << op_code_base 106 | e.current_instr.code << mod_rm 107 | return 108 | } 109 | 110 | if source is Indirection && desti is Xmm { 111 | e.add_segment_override_prefix(source) 112 | e.add_prefix(desti, source.index, source.base, sizes) 113 | e.current_instr.code << 0x0F 114 | e.current_instr.code << op_code_base 115 | e.add_modrm_sib_disp(source, desti.base_offset%8) 116 | return 117 | } 118 | 119 | if source is Xmm && desti is Indirection { 120 | e.add_segment_override_prefix(desti) 121 | e.add_prefix(source, desti.index, desti.base, sizes) 122 | e.current_instr.code << 0x0F 123 | e.current_instr.code << op_code_base + 1 124 | e.add_modrm_sib_disp(desti, source.base_offset%8) 125 | return 126 | } 127 | 128 | error.print(source.pos, 'invalid operand for instruction') 129 | exit(1) 130 | } 131 | 132 | fn (mut e Encoder) movd() { 133 | e.set_current_instr(.movd) 134 | 135 | source, desti := e.parse_two_operand() 136 | 137 | if source is Register && desti is Xmm { 138 | source.check_regi_size(DataSize.suffix_long) 139 | e.add_prefix(desti, Empty{}, source, [DataSize.suffix_word]) 140 | mod_rm := compose_mod_rm(encoder.mod_regi, desti.base_offset%8, source.base_offset%8) 141 | e.current_instr.code << [u8(0x0F), 0x6e] 142 | e.current_instr.code << mod_rm 143 | return 144 | } 145 | 146 | if source is Xmm && desti is Register { 147 | desti.check_regi_size(DataSize.suffix_long) 148 | e.add_prefix(source, Empty{}, desti, [DataSize.suffix_word]) 149 | mod_rm := compose_mod_rm(encoder.mod_regi, source.base_offset%8, desti.base_offset%8) 150 | e.current_instr.code << [u8(0x0F), 0x7e] 151 | e.current_instr.code << mod_rm 152 | return 153 | } 154 | 155 | if source is Indirection && desti is Xmm { 156 | e.add_segment_override_prefix(source) 157 | e.add_prefix(desti, source.index, source.base, [DataSize.suffix_word]) 158 | e.current_instr.code << [u8(0x0F), 0x6e] 159 | e.add_modrm_sib_disp(source, desti.base_offset%8) 160 | return 161 | } 162 | 163 | if source is Xmm && desti is Indirection { 164 | e.add_segment_override_prefix(desti) 165 | e.add_prefix(source, desti.index, desti.base, [DataSize.suffix_word]) 166 | e.current_instr.code << [u8(0x0F), 0x7e] 167 | e.add_modrm_sib_disp(desti, source.base_offset%8) 168 | return 169 | } 170 | 171 | error.print(source.pos, 'invalid operand for instruction') 172 | exit(1) 173 | } 174 | 175 | -------------------------------------------------------------------------------- /encoder/register.v: -------------------------------------------------------------------------------- 1 | module encoder 2 | 3 | const general_registers = { 4 | 'RAX': Register{lit: 'RAX', base_offset: 0, size: .suffix_quad} 5 | 'RCX': Register{lit: 'RCX', base_offset: 1, size: .suffix_quad} 6 | 'RDX': Register{lit: 'RDX', base_offset: 2, size: .suffix_quad} 7 | 'RBX': Register{lit: 'RBX', base_offset: 3, size: .suffix_quad} 8 | 'RSP': Register{lit: 'RSP', base_offset: 4, size: .suffix_quad} 9 | 'RBP': Register{lit: 'RBP', base_offset: 5, size: .suffix_quad} 10 | 'RSI': Register{lit: 'RSI', base_offset: 6, size: .suffix_quad} 11 | 'RDI': Register{lit: 'RDI', base_offset: 7, size: .suffix_quad} 12 | 'R8': Register{lit: 'R8', base_offset: 8, size: .suffix_quad} 13 | 'R9': Register{lit: 'R9', base_offset: 9, size: .suffix_quad} 14 | 'R10': Register{lit: 'R10', base_offset: 10, size: .suffix_quad} 15 | 'R11': Register{lit: 'R11', base_offset: 11, size: .suffix_quad} 16 | 'R12': Register{lit: 'R12', base_offset: 12, size: .suffix_quad} 17 | 'R13': Register{lit: 'R13', base_offset: 13, size: .suffix_quad} 18 | 'R14': Register{lit: 'R14', base_offset: 14, size: .suffix_quad} 19 | 'R15': Register{lit: 'R15', base_offset: 15, size: .suffix_quad} 20 | 21 | 'EAX': Register{lit: 'EAX', base_offset: 0, size: .suffix_long} 22 | 'ECX': Register{lit: 'ECX', base_offset: 1, size: .suffix_long} 23 | 'EDX': Register{lit: 'EDX', base_offset: 2, size: .suffix_long} 24 | 'EBX': Register{lit: 'EBX', base_offset: 3, size: .suffix_long} 25 | 'ESP': Register{lit: 'ESP', base_offset: 4, size: .suffix_long} 26 | 'EBP': Register{lit: 'EBP', base_offset: 5, size: .suffix_long} 27 | 'ESI': Register{lit: 'ESI', base_offset: 6, size: .suffix_long} 28 | 'EDI': Register{lit: 'EDI', base_offset: 7, size: .suffix_long} 29 | 'R8D': Register{lit: 'R8D', base_offset: 8, size: .suffix_long} 30 | 'R9D': Register{lit: 'R9D', base_offset: 9, size: .suffix_long} 31 | 'R10D': Register{lit: 'R10D', base_offset: 10, size: .suffix_long} 32 | 'R11D': Register{lit: 'R11D', base_offset: 11, size: .suffix_long} 33 | 'R12D': Register{lit: 'R12D', base_offset: 12, size: .suffix_long} 34 | 'R13D': Register{lit: 'R13D', base_offset: 13, size: .suffix_long} 35 | 'R14D': Register{lit: 'R14D', base_offset: 14, size: .suffix_long} 36 | 'R15D': Register{lit: 'R15D', base_offset: 15, size: .suffix_long} 37 | 38 | 'AX': Register{lit: 'AX', base_offset: 0, size: .suffix_word} 39 | 'CX': Register{lit: 'CX', base_offset: 1, size: .suffix_word} 40 | 'DX': Register{lit: 'DX', base_offset: 2, size: .suffix_word} 41 | 'BX': Register{lit: 'BX', base_offset: 3, size: .suffix_word} 42 | 'SP': Register{lit: 'SP', base_offset: 4, size: .suffix_word} 43 | 'SI': Register{lit: 'SI', base_offset: 6, size: .suffix_word} 44 | 'DI': Register{lit: 'DI', base_offset: 7, size: .suffix_word} 45 | 'R8W': Register{lit: 'R8W', base_offset: 8, size: .suffix_word} 46 | 'R9W': Register{lit: 'R9W', base_offset: 9, size: .suffix_word} 47 | 'R10W': Register{lit: 'R10W', base_offset: 10, size: .suffix_word} 48 | 'R11W': Register{lit: 'R11W', base_offset: 11, size: .suffix_word} 49 | 'R12W': Register{lit: 'R12W', base_offset: 12, size: .suffix_word} 50 | 'R13W': Register{lit: 'R13W', base_offset: 13, size: .suffix_word} 51 | 'R14W': Register{lit: 'R14W', base_offset: 14, size: .suffix_word} 52 | 'R15W': Register{lit: 'R15W', base_offset: 15, size: .suffix_word} 53 | 54 | 'AL': Register{lit: 'AL', base_offset: 0, size: .suffix_byte} 55 | 'CL': Register{lit: 'CL', base_offset: 1, size: .suffix_byte} 56 | 'DL': Register{lit: 'DL', base_offset: 2, size: .suffix_byte} 57 | 'BL': Register{lit: 'BL', base_offset: 3, size: .suffix_byte} 58 | 'AH': Register{lit: 'AH', base_offset: 4, size: .suffix_byte} 59 | 'BP': Register{lit: 'BP', base_offset: 5, size: .suffix_byte} 60 | 'CH': Register{lit: 'CH', base_offset: 5, size: .suffix_byte} 61 | 'DH': Register{lit: 'DH', base_offset: 6, size: .suffix_byte} 62 | 'BH': Register{lit: 'BH', base_offset: 7, size: .suffix_byte} 63 | 'SPL': Register{lit: 'SPL', base_offset: 4, size: .suffix_byte, rex_required: true} 64 | 'SIL': Register{lit: 'SIL', base_offset: 6, size: .suffix_byte, rex_required: true} 65 | 'BPL': Register{lit: 'BPL', base_offset: 5, size: .suffix_byte, rex_required: true} 66 | 'DIL': Register{lit: 'DIL', base_offset: 7, size: .suffix_byte, rex_required: true} 67 | 'R8B': Register{lit: 'R8B', base_offset: 8, size: .suffix_byte} 68 | 'R9B': Register{lit: 'R9B', base_offset: 9, size: .suffix_byte} 69 | 'R10B': Register{lit: 'R10B', base_offset: 10, size: .suffix_byte} 70 | 'R11B': Register{lit: 'R11B', base_offset: 11, size: .suffix_byte} 71 | 'R12B': Register{lit: 'R12B', base_offset: 12, size: .suffix_byte} 72 | 'R13B': Register{lit: 'R13B', base_offset: 13, size: .suffix_byte} 73 | 'R14B': Register{lit: 'R14B', base_offset: 14, size: .suffix_byte} 74 | 'R15B': Register{lit: 'R15B', base_offset: 15, size: .suffix_byte} 75 | 76 | 'RIP': Register{lit: 'RIP', base_offset: 0, size: .suffix_quad} 77 | 'EIP': Register{lit: 'EIP', base_offset: 0, size: .suffix_long} 78 | 'IP': Register{lit: 'IP', base_offset: 0, size: .suffix_word} 79 | } 80 | 81 | const xmm_registers = { 82 | 'XMM0': Xmm{lit: 'XMM0', base_offset: 0, size: .suffix_unkown} 83 | 'XMM1': Xmm{lit: 'XMM1', base_offset: 1, size: .suffix_unkown} 84 | 'XMM2': Xmm{lit: 'XMM2', base_offset: 2, size: .suffix_unkown} 85 | 'XMM3': Xmm{lit: 'XMM3', base_offset: 3, size: .suffix_unkown} 86 | 'XMM4': Xmm{lit: 'XMM4', base_offset: 4, size: .suffix_unkown} 87 | 'XMM5': Xmm{lit: 'XMM5', base_offset: 5, size: .suffix_unkown} 88 | 'XMM6': Xmm{lit: 'XMM6', base_offset: 6, size: .suffix_unkown} 89 | 'XMM7': Xmm{lit: 'XMM7', base_offset: 7, size: .suffix_unkown} 90 | 'XMM8': Xmm{lit: 'XMM8', base_offset: 8, size: .suffix_unkown} 91 | 'XMM9': Xmm{lit: 'XMM9', base_offset: 9, size: .suffix_unkown} 92 | 'XMM10': Xmm{lit: 'XMM10', base_offset: 10, size: .suffix_unkown} 93 | 'XMM11': Xmm{lit: 'XMM11', base_offset: 11, size: .suffix_unkown} 94 | 'XMM12': Xmm{lit: 'XMM12', base_offset: 12, size: .suffix_unkown} 95 | 'XMM13': Xmm{lit: 'XMM13', base_offset: 13, size: .suffix_unkown} 96 | 'XMM14': Xmm{lit: 'XMM14', base_offset: 14, size: .suffix_unkown} 97 | 'XMM15': Xmm{lit: 'XMM15', base_offset: 15, size: .suffix_unkown} 98 | } 99 | -------------------------------------------------------------------------------- /lexer/lexer.v: -------------------------------------------------------------------------------- 1 | module lexer 2 | 3 | import token 4 | import error 5 | 6 | pub struct Lexer { 7 | mut: 8 | c u8 // current character 9 | text string // program line 10 | idx int 11 | line int 12 | col int 13 | file_name string 14 | } 15 | 16 | pub fn new(file_name string, text string) &Lexer { 17 | c := if text.len == 0 { 18 | `\0` 19 | } else { 20 | text[0] 21 | } 22 | return &Lexer{ 23 | text: text 24 | c: c 25 | line: 1 26 | col: 0 27 | file_name: file_name 28 | } 29 | } 30 | 31 | fn (mut l Lexer) advance() { 32 | l.col++ 33 | l.idx++ 34 | 35 | if l.c == `\n` { 36 | l.col = 0 37 | l.line++ 38 | } 39 | 40 | if l.text.len == l.idx { 41 | l.c = `\0` 42 | } else { 43 | l.c = l.text[l.idx] 44 | } 45 | } 46 | 47 | fn (mut l Lexer) peak(n int) u8 { 48 | if l.text.len == l.idx+n { 49 | return `\0` 50 | } else { 51 | return l.text[l.idx+n] 52 | } 53 | } 54 | 55 | fn (mut l Lexer) current_pos() token.Position { 56 | return token.Position{ 57 | line: l.line 58 | file_name: l.file_name 59 | } 60 | } 61 | 62 | fn (mut l Lexer) skip_comment() { 63 | for l.c !in [`\n`, `\0`] { 64 | l.advance() 65 | } 66 | } 67 | 68 | fn (mut l Lexer) read_number() token.Token { 69 | pos := l.current_pos() 70 | start := l.idx 71 | 72 | if l.text[start] == `0` { 73 | l.advance() 74 | 75 | if l.c in [`x`, `X`] { 76 | l.advance() 77 | for (l.c >= `0` && l.c <= `9`) || (l.c >= `a` && l.c <= `z`) || (l.c >= `A` && l.c <= `Z`) { 78 | l.advance() 79 | } 80 | } 81 | 82 | if l.c in [`o`, `O`] { 83 | l.advance() 84 | for l.c >= `0` && l.c <= `7` { 85 | l.advance() 86 | } 87 | } 88 | 89 | if l.c in [`b`, `B`] { 90 | l.advance() 91 | for l.c in [`0`, `1`] { 92 | l.advance() 93 | } 94 | } 95 | } else { 96 | for l.c >= `0` && l.c <= `9` { 97 | l.advance() 98 | } 99 | } 100 | 101 | lit := l.text[start..l.idx] 102 | 103 | return token.Token{ 104 | lit: lit 105 | kind: .number 106 | pos: pos 107 | } 108 | } 109 | 110 | fn is_alpha(c u8) bool { 111 | return (c >= `a` && c <= `z`) || (c >= `A` && c <= `Z`) 112 | } 113 | 114 | fn is_digit(c u8) bool { 115 | return (c >= `0` && c <= `9`) 116 | } 117 | 118 | fn (mut l Lexer) read_ident() token.Token { 119 | pos := l.current_pos() 120 | start := l.idx 121 | for { 122 | if is_alpha(l.c) || is_digit(l.c) || l.c in [`_`, `.`, `-`, `$`] { 123 | l.advance() 124 | } else { 125 | break 126 | } 127 | } 128 | lit := l.text[start..l.idx] 129 | return token.Token{ 130 | lit: lit 131 | kind: .ident 132 | pos: pos 133 | } 134 | } 135 | 136 | fn is_hex_digit(c u8) bool { 137 | return (`0` <= c && c <= `9`) || (`A` <= c && c <= `F`) || (`a` <= c && c <= `f`) 138 | } 139 | 140 | fn from_hex(c u8) u8 { 141 | if `0` <= c && c <= `9` { 142 | return c - `0` 143 | } 144 | if `a` <= c && c <= `f` { 145 | return c - `a` + 10 146 | } 147 | return c - `A` + 10 148 | } 149 | 150 | fn (mut l Lexer) read_escaped_char() u8 { 151 | mut c := u8(0) 152 | l.advance() // skip '\\' 153 | if `0` <= l.c && l.c <= `7` { 154 | c = l.c - `0` 155 | if `0` <= l.peak(1) && l.peak(1) <= `7` { 156 | l.advance() 157 | c = (c << 3) + (l.c - `0`) 158 | if `0` <= l.peak(1) && l.peak(1) <= `7` { 159 | l.advance() 160 | c = (c << 3) + (l.c - `0`) 161 | } 162 | } 163 | return c 164 | } 165 | 166 | if l.c == `x` { 167 | l.advance() 168 | if !is_hex_digit(l.c) { 169 | eprintln('error: invalid hex escape sequence'); 170 | exit(1) 171 | } 172 | 173 | for is_hex_digit(l.c) { 174 | c = (c << 4) + from_hex(l.c) 175 | l.advance() 176 | } 177 | } 178 | 179 | if l.c == `a` { 180 | return `\a` 181 | } 182 | if l.c == `b` { 183 | return `\b` 184 | } 185 | if l.c == `t` { 186 | return `\t` 187 | } 188 | if l.c == `n` { 189 | return `\n` 190 | } 191 | if l.c == `v` { 192 | return `\v` 193 | } 194 | if l.c == `f` { 195 | return `\f` 196 | } 197 | if l.c == `r` { 198 | return `\r` 199 | } 200 | 201 | return l.c 202 | } 203 | 204 | fn (mut l Lexer) read_string() token.Token { 205 | pos := l.current_pos() 206 | l.advance() 207 | mut lit := []u8{} 208 | for l.c != `"` { 209 | if l.c == `\\` { 210 | lit << l.read_escaped_char() 211 | l.advance() 212 | } else { 213 | lit << l.c 214 | l.advance() 215 | } 216 | } 217 | l.advance() 218 | return token.Token{ 219 | lit: lit.bytestr() 220 | kind: .string 221 | pos: pos 222 | } 223 | } 224 | 225 | fn (mut l Lexer) single_letter_token(c string, kind token.TokenKind) token.Token { 226 | pos := l.current_pos() 227 | l.advance() 228 | return token.Token{ 229 | lit: c 230 | kind: kind 231 | pos: pos 232 | } 233 | } 234 | 235 | pub fn (mut l Lexer) lex() token.Token { 236 | for l.c != `\0` { 237 | mut pos := l.current_pos() 238 | if l.c in [` `, `\t`, `\n`] { 239 | l.advance() 240 | } else if is_digit(l.c) { 241 | return l.read_number() 242 | } else if is_alpha(l.c) || l.c == `_` || l.c == `.` { 243 | return l.read_ident() 244 | } else if l.c == `"` { 245 | return l.read_string() 246 | } else { 247 | match l.c { 248 | `#` { // skip comment 249 | l.skip_comment() 250 | } 251 | `:` { 252 | return l.single_letter_token(':', .colon) 253 | } 254 | `(` { 255 | return l.single_letter_token('(', .lpar) 256 | } 257 | `)` { 258 | return l.single_letter_token(')', .rpar) 259 | } 260 | `+` { 261 | return l.single_letter_token('+', .plus) 262 | } 263 | `-` { 264 | return l.single_letter_token('-', .minus) 265 | } 266 | `*` { 267 | return l.single_letter_token('*', .mul) 268 | } 269 | `/` { 270 | return l.single_letter_token('/', .div) 271 | } 272 | `$` { 273 | return l.single_letter_token('$', .dolor) 274 | } 275 | `%` { 276 | return l.single_letter_token('%', .percent) 277 | } 278 | `,` { 279 | return l.single_letter_token(',', .comma) 280 | } 281 | else { 282 | c := [l.c].bytestr() 283 | error.print(pos, 'unexpected token `${c}`') 284 | exit(1) 285 | } 286 | } 287 | } 288 | } 289 | 290 | return token.Token{ 291 | lit: '\0' 292 | kind: token.TokenKind.eof 293 | pos: l.current_pos() 294 | } // end of file 295 | } 296 | 297 | -------------------------------------------------------------------------------- /examples/gol.s: -------------------------------------------------------------------------------- 1 | # ../vas -o gol.o gol.s && gcc -o gol.out gol.o 2 | 3 | .text 4 | .section .rodata, "a" 5 | .LC0: 6 | .string "\033[H" 7 | .LC1: 8 | .string "\033[07m \033[m" 9 | .LC2: 10 | .string " " 11 | .LC3: 12 | .string "\033[E" 13 | .text 14 | .global show 15 | show: 16 | pushq %rbp 17 | movq %rsp, %rbp 18 | pushq %rbx 19 | subq $56, %rsp 20 | movq %rdi, -56(%rbp) 21 | movl %esi, -60(%rbp) 22 | movl %edx, -64(%rbp) 23 | movl -60(%rbp), %ebx 24 | movslq %ebx, %rax 25 | subq $1, %rax 26 | movq %rax, -32(%rbp) 27 | movslq %ebx, %rax 28 | movq %rax, %r8 29 | movl $0, %r9d 30 | movq -56(%rbp), %rax 31 | movq %rax, -40(%rbp) 32 | leaq .LC0(%rip), %rax 33 | movq %rax, %rdi 34 | movl $0, %eax 35 | call printf 36 | movl $0, -20(%rbp) 37 | jmp .L2 38 | .L7: 39 | movl $0, -24(%rbp) 40 | jmp .L3 41 | .L6: 42 | movl -20(%rbp), %eax 43 | movslq %eax, %rdx 44 | movslq %ebx, %rax 45 | imulq %rdx, %rax 46 | leaq 0(,%rax,4), %rdx 47 | movq -40(%rbp), %rax 48 | addq %rax, %rdx 49 | movl -24(%rbp), %eax 50 | cltq 51 | movl (%rdx,%rax,4), %eax 52 | testl %eax, %eax 53 | je .L4 54 | leaq .LC1(%rip), %rax 55 | jmp .L5 56 | .L4: 57 | leaq .LC2(%rip), %rax 58 | .L5: 59 | movq %rax, %rdi 60 | movl $0, %eax 61 | call printf 62 | addl $1, -24(%rbp) 63 | .L3: 64 | movl -24(%rbp), %eax 65 | cmpl -60(%rbp), %eax 66 | jl .L6 67 | leaq .LC3(%rip), %rax 68 | movq %rax, %rdi 69 | movl $0, %eax 70 | call printf 71 | addl $1, -20(%rbp) 72 | .L2: 73 | movl -20(%rbp), %eax 74 | cmpl -64(%rbp), %eax 75 | jl .L7 76 | movq stdout(%rip), %rax 77 | movq %rax, %rdi 78 | call fflush 79 | nop 80 | movq -8(%rbp), %rbx 81 | leave 82 | ret 83 | .global evolve 84 | evolve: 85 | pushq %rbp 86 | movq %rsp, %rbp 87 | pushq %r15 88 | pushq %r14 89 | pushq %r13 90 | pushq %r12 91 | pushq %rbx 92 | subq $152, %rsp 93 | movq %rdi, -136(%rbp) 94 | movl %esi, -140(%rbp) 95 | movl %edx, -144(%rbp) 96 | movq %rsp, %rax 97 | movq %rax, -184(%rbp) 98 | movl -140(%rbp), %r9d 99 | movslq %r9d, %rax 100 | subq $1, %rax 101 | movq %rax, -88(%rbp) 102 | movslq %r9d, %rax 103 | movq %rax, %rcx 104 | movl $0, %ebx 105 | movq -136(%rbp), %rax 106 | movq %rax, -96(%rbp) 107 | movl -140(%rbp), %edi 108 | movl -144(%rbp), %r8d 109 | movslq %edi, %rax 110 | subq $1, %rax 111 | movq %rax, -104(%rbp) 112 | movslq %edi, %rax 113 | movq %rax, -176(%rbp) 114 | movq $0, -168(%rbp) 115 | movslq %edi, %rax 116 | leaq 0(,%rax,4), %rcx 117 | movslq %r8d, %rax 118 | subq $1, %rax 119 | movq %rax, -112(%rbp) 120 | movslq %edi, %rax 121 | movq %rax, -160(%rbp) 122 | movq $0, -152(%rbp) 123 | movslq %r8d, %rax 124 | movq %rax, %r14 125 | movl $0, %r15d 126 | movq -160(%rbp), %rbx 127 | movq -152(%rbp), %rsi 128 | movq %rsi, %rdx 129 | imulq %r14, %rdx 130 | movq %rbx, -160(%rbp) 131 | movq %rsi, -152(%rbp) 132 | movq %rbx, %rax 133 | imulq %r15, %rax 134 | leaq (%rdx,%rax), %rsi 135 | movq -160(%rbp), %rax 136 | mulq %r14 137 | addq %rdx, %rsi 138 | movq %rsi, %rdx 139 | movslq %edi, %rax 140 | movq %rax, %r12 141 | movl $0, %r13d 142 | movslq %r8d, %rax 143 | movq %rax, %r10 144 | movl $0, %r11d 145 | movq %r13, %rdx 146 | imulq %r10, %rdx 147 | movq %r11, %rax 148 | imulq %r12, %rax 149 | leaq (%rdx,%rax), %rsi 150 | movq %r12, %rax 151 | mulq %r10 152 | addq %rdx, %rsi 153 | movq %rsi, %rdx 154 | movslq %edi, %rdx 155 | movslq %r8d, %rax 156 | imulq %rdx, %rax 157 | leaq 0(,%rax,4), %rdx 158 | movl $16, %eax 159 | subq $1, %rax 160 | addq %rdx, %rax 161 | movl $16, %ebx 162 | movl $0, %edx 163 | divq %rbx 164 | imulq $16, %rax, %rax 165 | subq %rax, %rsp 166 | movq %rsp, %rax 167 | addq $3, %rax 168 | shrq $2, %rax 169 | salq $2, %rax 170 | movq %rax, -120(%rbp) 171 | movl $0, -52(%rbp) 172 | jmp .L9 173 | .L21: 174 | movl $0, -56(%rbp) 175 | jmp .L10 176 | .L20: 177 | movl $0, -60(%rbp) 178 | movl -52(%rbp), %eax 179 | subl $1, %eax 180 | movl %eax, -64(%rbp) 181 | jmp .L11 182 | .L15: 183 | movl -56(%rbp), %eax 184 | subl $1, %eax 185 | movl %eax, -68(%rbp) 186 | jmp .L12 187 | .L14: 188 | movl -64(%rbp), %edx 189 | movl -144(%rbp), %eax 190 | addl %edx, %eax 191 | cltd 192 | idivl -144(%rbp) 193 | movl %edx, %eax 194 | movslq %eax, %rdx 195 | movslq %r9d, %rax 196 | imulq %rdx, %rax 197 | leaq 0(,%rax,4), %rdx 198 | movq -96(%rbp), %rax 199 | leaq (%rdx,%rax), %rsi 200 | movl -68(%rbp), %edx 201 | movl -140(%rbp), %eax 202 | addl %edx, %eax 203 | cltd 204 | idivl -140(%rbp) 205 | movl %edx, %eax 206 | cltq 207 | movl (%rsi,%rax,4), %eax 208 | testl %eax, %eax 209 | je .L13 210 | addl $1, -60(%rbp) 211 | .L13: 212 | addl $1, -68(%rbp) 213 | .L12: 214 | movl -56(%rbp), %eax 215 | addl $1, %eax 216 | cmpl %eax, -68(%rbp) 217 | jle .L14 218 | addl $1, -64(%rbp) 219 | .L11: 220 | movl -52(%rbp), %eax 221 | addl $1, %eax 222 | cmpl %eax, -64(%rbp) 223 | jle .L15 224 | movl -52(%rbp), %eax 225 | movslq %eax, %rdx 226 | movslq %r9d, %rax 227 | imulq %rdx, %rax 228 | leaq 0(,%rax,4), %rdx 229 | movq -96(%rbp), %rax 230 | addq %rax, %rdx 231 | movl -56(%rbp), %eax 232 | cltq 233 | movl (%rdx,%rax,4), %eax 234 | testl %eax, %eax 235 | je .L16 236 | subl $1, -60(%rbp) 237 | .L16: 238 | cmpl $3, -60(%rbp) 239 | je .L17 240 | cmpl $2, -60(%rbp) 241 | jne .L18 242 | movl -52(%rbp), %eax 243 | movslq %eax, %rdx 244 | movslq %r9d, %rax 245 | imulq %rdx, %rax 246 | leaq 0(,%rax,4), %rdx 247 | movq -96(%rbp), %rax 248 | addq %rax, %rdx 249 | movl -56(%rbp), %eax 250 | cltq 251 | movl (%rdx,%rax,4), %eax 252 | testl %eax, %eax 253 | je .L18 254 | .L17: 255 | movl $1, %eax 256 | jmp .L19 257 | .L18: 258 | movl $0, %eax 259 | .L19: 260 | movq %rcx, %rdi 261 | shrq $2, %rdi 262 | movl %eax, %r8d 263 | movq -120(%rbp), %rax 264 | movl -56(%rbp), %edx 265 | movslq %edx, %rsi 266 | movl -52(%rbp), %edx 267 | movslq %edx, %rdx 268 | imulq %rdi, %rdx 269 | addq %rsi, %rdx 270 | movl %r8d, (%rax,%rdx,4) 271 | addl $1, -56(%rbp) 272 | .L10: 273 | movl -56(%rbp), %eax 274 | cmpl -140(%rbp), %eax 275 | jl .L20 276 | addl $1, -52(%rbp) 277 | .L9: 278 | movl -52(%rbp), %eax 279 | cmpl -144(%rbp), %eax 280 | jl .L21 281 | movl $0, -72(%rbp) 282 | jmp .L22 283 | .L25: 284 | movl $0, -76(%rbp) 285 | jmp .L23 286 | .L24: 287 | movq %rcx, %r8 288 | shrq $2, %r8 289 | movl -72(%rbp), %eax 290 | movslq %eax, %rdx 291 | movslq %r9d, %rax 292 | imulq %rdx, %rax 293 | leaq 0(,%rax,4), %rdx 294 | movq -96(%rbp), %rax 295 | leaq (%rdx,%rax), %rsi 296 | movq -120(%rbp), %rax 297 | movl -76(%rbp), %edx 298 | movslq %edx, %rdi 299 | movl -72(%rbp), %edx 300 | movslq %edx, %rdx 301 | imulq %r8, %rdx 302 | addq %rdi, %rdx 303 | movl (%rax,%rdx,4), %edx 304 | movl -76(%rbp), %eax 305 | cltq 306 | movl %edx, (%rsi,%rax,4) 307 | addl $1, -76(%rbp) 308 | .L23: 309 | movl -76(%rbp), %eax 310 | cmpl -140(%rbp), %eax 311 | jl .L24 312 | addl $1, -72(%rbp) 313 | .L22: 314 | movl -72(%rbp), %eax 315 | cmpl -144(%rbp), %eax 316 | jl .L25 317 | movq -184(%rbp), %rsp 318 | nop 319 | leaq -40(%rbp), %rsp 320 | popq %rbx 321 | popq %r12 322 | popq %r13 323 | popq %r14 324 | popq %r15 325 | popq %rbp 326 | ret 327 | .global game 328 | game: 329 | pushq %rbp 330 | movq %rsp, %rbp 331 | pushq %r15 332 | pushq %r14 333 | pushq %r13 334 | pushq %r12 335 | pushq %rbx 336 | subq $56, %rsp 337 | movl %edi, -84(%rbp) 338 | movl %esi, -88(%rbp) 339 | movl -84(%rbp), %esi 340 | movl -88(%rbp), %edi 341 | movslq %esi, %r12 342 | subq $1, %r12 343 | movq %r12, -64(%rbp) 344 | movslq %esi, %r12 345 | movq %r12, %r14 346 | movl $0, %r15d 347 | movslq %esi, %r12 348 | leaq 0(,%r12,4), %r14 349 | movslq %edi, %r12 350 | subq $1, %r12 351 | movq %r12, -72(%rbp) 352 | movslq %esi, %r12 353 | movq %r12, %r10 354 | movl $0, %r11d 355 | movslq %edi, %r12 356 | movq %r12, %rax 357 | movl $0, %edx 358 | movq %r11, %r13 359 | imulq %rax, %r13 360 | movq %rdx, %r12 361 | imulq %r10, %r12 362 | addq %r13, %r12 363 | mulq %r10 364 | leaq (%r12,%rdx), %r10 365 | movq %r10, %rdx 366 | movslq %esi, %rax 367 | movq %rax, %r8 368 | movl $0, %r9d 369 | movslq %edi, %rax 370 | movq %rax, %rcx 371 | movl $0, %ebx 372 | movq %r9, %rdx 373 | imulq %rcx, %rdx 374 | movq %rbx, %rax 375 | imulq %r8, %rax 376 | leaq (%rdx,%rax), %r10 377 | movq %r8, %rax 378 | mulq %rcx 379 | leaq (%r10,%rdx), %rcx 380 | movq %rcx, %rdx 381 | movslq %esi, %rdx 382 | movslq %edi, %rax 383 | imulq %rdx, %rax 384 | leaq 0(,%rax,4), %rdx 385 | movl $16, %eax 386 | subq $1, %rax 387 | addq %rdx, %rax 388 | movl $16, %ebx 389 | movl $0, %edx 390 | divq %rbx 391 | imulq $16, %rax, %rax 392 | subq %rax, %rsp 393 | movq %rsp, %rax 394 | addq $3, %rax 395 | shrq $2, %rax 396 | salq $2, %rax 397 | movq %rax, -80(%rbp) 398 | movl $0, -52(%rbp) 399 | jmp .L27 400 | .L30: 401 | movl $0, -56(%rbp) 402 | jmp .L28 403 | .L29: 404 | call rand 405 | cmpl $214748363, %eax 406 | setle %al 407 | movq %r14, %rdi 408 | shrq $2, %rdi 409 | movzbl %al, %edx 410 | movq -80(%rbp), %rax 411 | movl -52(%rbp), %ecx 412 | movslq %ecx, %rsi 413 | movl -56(%rbp), %ecx 414 | movslq %ecx, %rcx 415 | imulq %rdi, %rcx 416 | addq %rsi, %rcx 417 | movl %edx, (%rax,%rcx,4) 418 | addl $1, -56(%rbp) 419 | .L28: 420 | movl -56(%rbp), %eax 421 | cmpl -88(%rbp), %eax 422 | jl .L29 423 | addl $1, -52(%rbp) 424 | .L27: 425 | movl -52(%rbp), %eax 426 | cmpl -84(%rbp), %eax 427 | jl .L30 428 | .L31: 429 | movl -88(%rbp), %edx 430 | movl -84(%rbp), %ecx 431 | movq -80(%rbp), %rax 432 | movl %ecx, %esi 433 | movq %rax, %rdi 434 | call show 435 | movl -88(%rbp), %edx 436 | movl -84(%rbp), %ecx 437 | movq -80(%rbp), %rax 438 | movl %ecx, %esi 439 | movq %rax, %rdi 440 | call evolve 441 | movl $200000, %edi 442 | call usleep 443 | jmp .L31 444 | .global main 445 | main: 446 | pushq %rbp 447 | movq %rsp, %rbp 448 | subq $32, %rsp 449 | movl %edi, -20(%rbp) 450 | movq %rsi, -32(%rbp) 451 | movl $0, -4(%rbp) 452 | movl $0, -8(%rbp) 453 | cmpl $1, -20(%rbp) 454 | jle .L33 455 | movq -32(%rbp), %rax 456 | addq $8, %rax 457 | movq (%rax), %rax 458 | movq %rax, %rdi 459 | call atoi 460 | movl %eax, -4(%rbp) 461 | .L33: 462 | cmpl $2, -20(%rbp) 463 | jle .L34 464 | movq -32(%rbp), %rax 465 | addq $16, %rax 466 | movq (%rax), %rax 467 | movq %rax, %rdi 468 | call atoi 469 | movl %eax, -8(%rbp) 470 | .L34: 471 | cmpl $0, -4(%rbp) 472 | jg .L35 473 | movl $30, -4(%rbp) 474 | .L35: 475 | cmpl $0, -8(%rbp) 476 | jg .L36 477 | movl $30, -8(%rbp) 478 | .L36: 479 | movl -8(%rbp), %edx 480 | movl -4(%rbp), %eax 481 | movl %edx, %esi 482 | movl %eax, %edi 483 | call game 484 | movl $0, %eax 485 | leave 486 | ret 487 | 488 | .section .note.GNU-stack,"" 489 | 490 | -------------------------------------------------------------------------------- /elf/elf.v: -------------------------------------------------------------------------------- 1 | module elf 2 | 3 | import encoder 4 | import os 5 | 6 | pub struct Elf { 7 | out_file string 8 | rela_text_users []encoder.Rela 9 | user_defined_symbols map[string]&encoder.Instr 10 | user_defined_sections map[string]&encoder.UserDefinedSection 11 | mut: 12 | keep_locals bool // flag to keep local labels. labels that start from `.L` 13 | ehdr Elf64_Ehdr // Elf header 14 | symtab_symbol_indexs map[string]int // symtab symbol index 15 | local_symbols_count int // used in .symtab section header 16 | rela_symbols []string // symbols that are not defined 17 | user_defined_section_names []string // list of user-defined section names 18 | user_defined_section_idx map[string]int // user-defined sections index 19 | section_name_offs map[string]int 20 | strtab []u8 21 | symtab []Elf64_Sym 22 | rela_section_names []string 23 | rela map[string][]Elf64_Rela 24 | shstrtab []u8 25 | section_headers []Elf64_Shdr 26 | } 27 | 28 | pub struct Elf64_Ehdr { 29 | e_ident [16]u8 30 | e_type u16 31 | e_machine u16 32 | e_version u32 33 | e_entry voidptr 34 | e_phoff voidptr 35 | e_shoff voidptr 36 | e_flags u32 37 | e_ehsize u16 38 | e_phentsize u16 39 | e_phnum u16 40 | e_shentsize u16 41 | e_shnum u16 42 | e_shstrndx u16 43 | } 44 | 45 | pub struct Elf64_Sym { 46 | st_name u32 47 | st_info u8 48 | st_other u8 49 | st_shndx u16 50 | st_value voidptr 51 | st_size u64 52 | } 53 | 54 | pub struct Elf64_Shdr { 55 | sh_name u32 56 | sh_type u32 57 | sh_flags voidptr 58 | sh_addr voidptr 59 | sh_offset voidptr 60 | sh_size voidptr 61 | sh_link u32 62 | sh_info u32 63 | sh_addralign voidptr 64 | sh_entsize voidptr 65 | } 66 | 67 | pub struct Elf64_Rela { 68 | r_offset u64 69 | r_info u64 70 | r_addend i64 71 | } 72 | 73 | pub struct Elf64_Phdr { 74 | ph_type u32 75 | ph_flags u32 76 | ph_off u64 77 | ph_vaddr u64 78 | ph_paddr u64 79 | ph_filesz u64 80 | ph_memsz u64 81 | ph_align u64 82 | } 83 | 84 | const stb_local = 0 85 | const stb_global = 1 86 | const stt_notype = 0 87 | const stt_object = 1 88 | const stt_func = 2 89 | const stt_section = 3 90 | const stt_file = 4 91 | const stt_common = 5 92 | const stt_tls = 6 93 | const stt_relc = 8 94 | const stt_srelc = 9 95 | const stt_loos = 10 96 | const stt_hios = 12 97 | const stt_loproc = 13 98 | const stt_hiproc = 14 99 | const sht_null = 0 100 | const sht_progbits = 1 101 | const sht_symtab = 2 102 | const sht_strtab = 3 103 | const sht_rela = 4 104 | const shf_write = 0x1 105 | const shf_alloc = 0x2 106 | const shf_execinstr = 0x4 107 | const shf_merge = 0x10 108 | const shf_strings = 0x20 109 | const shf_info_link = 0x40 110 | const shf_link_order = 0x80 111 | const shf_os_nonconforming = 0x100 112 | const shf_group = 0x200 113 | const shf_tls = 0x400 114 | const r_x86_64_none = u64(0) 115 | const r_x86_64_64 = u64(1) 116 | const r_x86_64_pc32 = u64(2) 117 | const r_x86_64_got32 = u64(3) 118 | const r_x86_64_plt32 = u64(4) 119 | const r_x86_64_copy = u64(5) 120 | const r_x86_64_glob_dat = u64(6) 121 | const r_x86_64_jump_slot = u64(7) 122 | const r_x86_64_relative = u64(8) 123 | const r_x86_64_gotpcrel = u64(9) 124 | const r_x86_64_32 = u64(10) 125 | const r_x86_64_32s = u64(11) 126 | const r_x86_64_16 = u64(12) 127 | const r_x86_64_pc16 = u64(13) 128 | const r_x86_64_8 = u64(14) 129 | const r_x86_64_pc8 = u64(15) 130 | const r_x86_64_pc64 = u64(24) 131 | const stv_default = 0 132 | const stv_internal = 1 133 | const stv_hidden = 2 134 | const stv_protected = 3 135 | 136 | pub fn new(out_file string, keep_locals bool, rela_text_users []encoder.Rela, user_defined_sections map[string]&encoder.UserDefinedSection, user_defined_symbols map[string]&encoder.Instr) &Elf { 137 | mut e := &Elf{ 138 | out_file: out_file 139 | keep_locals: keep_locals 140 | rela_text_users: rela_text_users 141 | user_defined_symbols: user_defined_symbols 142 | user_defined_sections: user_defined_sections 143 | } 144 | 145 | for name, _ in e.user_defined_sections { 146 | e.user_defined_section_names << name 147 | e.user_defined_section_idx[name] = e.user_defined_section_idx.len + 1 148 | } 149 | 150 | return e 151 | } 152 | 153 | pub fn align_to(n int, align int) int { 154 | return (n + align - 1) / align * align 155 | } 156 | 157 | fn add_padding(mut code []u8) { 158 | padding := (align_to(code.len, 16) - code.len) 159 | for _ in 0 .. padding { 160 | code << 0 161 | } 162 | } 163 | 164 | fn (mut e Elf) elf_symbol(symbol_binding int, mut off &int, mut str &string) { 165 | for name, symbol in e.user_defined_symbols { 166 | if symbol.binding != symbol_binding { 167 | continue 168 | } 169 | 170 | if symbol.binding == stb_local { 171 | if !e.keep_locals && symbol.binding == stb_local && name.to_upper().starts_with('.L') { 172 | continue 173 | } 174 | e.local_symbols_count++ 175 | } 176 | 177 | e.symtab_symbol_indexs[name] = e.symtab_symbol_indexs.len 178 | 179 | unsafe { *off += str.len + 1 } 180 | st_shndx := u16(e.user_defined_section_idx[symbol.section_name]) 181 | mut st_name := u32(0) 182 | 183 | if symbol.symbol_type == stt_section { 184 | st_name = 0 185 | } else { 186 | st_name = u32(*off) 187 | } 188 | 189 | e.symtab << Elf64_Sym{ 190 | st_name: st_name 191 | st_info: u8((symbol.binding << 4) + (symbol.symbol_type & 0xf)) 192 | st_other: symbol.visibility 193 | st_shndx: st_shndx 194 | st_value: voidptr(symbol.addr) 195 | } 196 | 197 | e.strtab << name.bytes() 198 | e.strtab << 0x00 199 | str = name 200 | } 201 | } 202 | 203 | // Add rela symbol to symtab and strtab 204 | // This function should be called after processing local symbols. 205 | fn (mut e Elf) elf_rela_symbol(mut off &int, mut str &string) { 206 | for symbol_name in e.rela_symbols { 207 | unsafe {*off += str.len + 1} 208 | e.symtab_symbol_indexs[symbol_name] = e.symtab_symbol_indexs.len 209 | 210 | e.symtab << Elf64_Sym{ 211 | st_name: u32(*off) 212 | st_info: u8((stb_global << 4) + (stt_notype & 0xf)) 213 | st_shndx: 0 214 | } 215 | e.strtab << symbol_name.bytes() 216 | e.strtab << 0x00 217 | str = symbol_name 218 | } 219 | } 220 | 221 | // relocation table 222 | pub fn (mut e Elf) rela_text_users() { 223 | /* 224 | x86: 再配置型 225 | https://docs.oracle.com/cd/E19683-01/817-4912/6mkdg542u/index.html#chapter6-26 226 | */ 227 | for r in e.rela_text_users { 228 | mut index := 0 229 | 230 | mut r_addend := if r.rtype in [r_x86_64_32s, r_x86_64_32, r_x86_64_64, r_x86_64_16, r_x86_64_8] { 231 | i64(0) 232 | } else if r.rtype in [r_x86_64_pc32] { 233 | i64(r.offset - r.instr.code.len) 234 | } else { 235 | i64(0-4) 236 | } 237 | 238 | // already resolved instruction. 239 | if r.is_already_resolved { 240 | continue 241 | } 242 | 243 | if s := e.user_defined_symbols[r.uses] { 244 | if s.binding == stb_global { 245 | index = e.symtab_symbol_indexs[r.uses] 246 | } else { 247 | r_addend += s.addr 248 | index = e.symtab_symbol_indexs[s.section_name] 249 | } 250 | } else { 251 | index = e.symtab_symbol_indexs[r.uses] 252 | } 253 | 254 | rela_section_name := '.rela' + r.instr.section_name 255 | e.rela[rela_section_name] << Elf64_Rela{ 256 | r_offset: u64(r.instr.addr + r.offset), 257 | r_info: (u64(index) << 32) + r.rtype, 258 | r_addend: r_addend + r.adjust, 259 | } 260 | 261 | if rela_section_name !in e.rela_section_names { 262 | e.rela_section_names << rela_section_name 263 | } 264 | } 265 | } 266 | 267 | pub fn (mut e Elf) collect_rela_symbols() { 268 | for rela in e.rela_text_users { 269 | if rela.uses !in e.rela_symbols { 270 | if rela.uses in e.user_defined_symbols { 271 | continue 272 | } 273 | e.rela_symbols << rela.uses 274 | } 275 | } 276 | } 277 | 278 | pub fn (mut e Elf) build_symtab_strtab() { 279 | // null 280 | e.strtab << [u8(0x00)] 281 | e.symtab << Elf64_Sym{ 282 | st_name: 0 283 | st_info: u8((stb_local << 4) + (stt_notype & 0xf)) 284 | } 285 | e.symtab_symbol_indexs[''] = e.symtab_symbol_indexs.len // null symbol 286 | e.local_symbols_count++ 287 | 288 | mut off := 0 289 | mut str := '' 290 | 291 | e.elf_symbol(stb_local, mut &off, mut &str) // local 292 | e.elf_rela_symbol(mut &off, mut &str) // rela local 293 | e.elf_symbol(stb_global, mut &off, mut &str) // global 294 | 295 | add_padding(mut e.strtab) 296 | } 297 | 298 | pub fn (mut e Elf) build_shstrtab() { 299 | // null 300 | e.shstrtab << [u8(0x00)] 301 | e.section_name_offs[''] = 0 302 | 303 | // custom sections 304 | mut name_offs := ''.len + 1 305 | for name in e.user_defined_section_names { 306 | e.section_name_offs[name] = name_offs 307 | name_offs += name.len + 1 308 | 309 | e.shstrtab << name.bytes() 310 | e.shstrtab << 0x00 311 | } 312 | 313 | for name in ['.strtab', '.symtab', '.shstrtab'] { 314 | e.section_name_offs[name] = name_offs 315 | name_offs += name.len + 1 316 | 317 | e.shstrtab << name.bytes() 318 | e.shstrtab << 0x00 319 | } 320 | 321 | for name in e.rela.keys() { 322 | e.section_name_offs[name] = name_offs 323 | name_offs += name.len + 1 324 | 325 | e.shstrtab << name.bytes() 326 | e.shstrtab << 0x00 327 | } 328 | 329 | add_padding(mut e.shstrtab) 330 | } 331 | 332 | // TODO: refactor later... 333 | pub fn (mut e Elf) build_headers() { 334 | mut section_offs := sizeof(Elf64_Ehdr) 335 | mut section_idx := {'': 0} 336 | 337 | // null section 338 | e.section_headers << Elf64_Shdr{ 339 | sh_name: u32(e.section_name_offs['']) 340 | sh_type: sht_null 341 | } 342 | 343 | // user-defined sections 344 | for name in e.user_defined_section_names { 345 | section := e.user_defined_sections[name] or { 346 | panic('[internal error] unkown section `$name`') 347 | } 348 | e.section_headers << Elf64_Shdr{ 349 | sh_name: u32(e.section_name_offs[name]) 350 | sh_type: sht_progbits 351 | sh_flags: voidptr(section.flags) 352 | sh_addr: 0 353 | sh_offset: voidptr(section_offs) 354 | sh_size: voidptr(section.code.len) 355 | sh_link: 0 356 | sh_info: 0 357 | sh_addralign: voidptr(1) 358 | sh_entsize: 0 359 | } 360 | section_offs += u32(section.code.len) 361 | section_idx[name] = section_idx.len 362 | } 363 | 364 | strtab_ofs := section_offs 365 | strtab_size := u32(e.strtab.len) 366 | section_idx['.strtab'] = section_idx.len 367 | 368 | symtab_ofs := strtab_ofs + strtab_size 369 | symtab_size := sizeof(Elf64_Sym) * u32(e.symtab.len) 370 | section_idx['.symtab'] = section_idx.len 371 | 372 | e.section_headers << [ 373 | // .strtab 374 | Elf64_Shdr{ 375 | sh_name: u32(e.section_name_offs['.strtab']) 376 | sh_type: sht_strtab 377 | sh_flags: 0 378 | sh_addr: 0 379 | sh_offset: voidptr(strtab_ofs) 380 | sh_size: voidptr(strtab_size) 381 | sh_link: 0 382 | sh_info: 0 383 | sh_addralign: voidptr(1) 384 | sh_entsize: 0 385 | }, 386 | // .symtab 387 | Elf64_Shdr{ 388 | sh_name: u32(e.section_name_offs['.symtab']) 389 | sh_type: sht_symtab 390 | sh_flags: 0 391 | sh_addr: 0 392 | sh_offset: voidptr(symtab_ofs) 393 | sh_size: voidptr(symtab_size) 394 | sh_link: u32(section_idx['.strtab']) // section number of .strtab 395 | sh_info: u32(e.local_symbols_count) // Number of local symbols 396 | sh_addralign: voidptr(8) 397 | sh_entsize: voidptr(sizeof(Elf64_Sym)) 398 | } 399 | ] 400 | 401 | section_offs = symtab_ofs + symtab_size 402 | 403 | // add rela ... to section headers 404 | for name in e.rela_section_names { 405 | size := u32(e.rela[name].len) * sizeof(Elf64_Rela) 406 | e.section_headers << Elf64_Shdr{ 407 | sh_name: u32(e.section_name_offs[name]) 408 | sh_type: sht_rela, 409 | sh_flags: voidptr(shf_info_link) 410 | sh_addr: 0 411 | sh_offset: voidptr(section_offs) 412 | sh_size: voidptr(size) 413 | sh_link: u32(section_idx['.symtab']) 414 | sh_info: u32(section_idx[name[5..]]) // target section index. if `.rela.text` the target will be `.text` 415 | sh_addralign: voidptr(8) 416 | sh_entsize: voidptr(sizeof(Elf64_Rela)) 417 | } 418 | section_offs += size 419 | } 420 | 421 | // .shstrtab 422 | e.section_headers << Elf64_Shdr{ 423 | sh_name: u32(e.section_name_offs['.shstrtab']) 424 | sh_type: sht_strtab 425 | sh_flags: 0 426 | sh_addr: 0 427 | sh_offset: voidptr(section_offs) 428 | sh_size: voidptr(e.shstrtab.len) 429 | sh_link: 0 430 | sh_info: 0 431 | sh_addralign: voidptr(1) 432 | sh_entsize: 0 433 | } 434 | 435 | sectionheader_ofs := section_offs + u32(e.shstrtab.len) 436 | 437 | // elf header 438 | e.ehdr = Elf64_Ehdr{ 439 | e_ident: [ 440 | u8(0x7f), 0x45, 0x4c, 0x46, // Magic number ' ELF' in ascii format 441 | 0x02, // 2 = 64-bit 442 | 0x01, // 1 = little endian 443 | 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 444 | ]! 445 | e_type: 1 // 1 = realocatable 446 | e_machine: 0x3e 447 | e_version: 1 448 | e_entry: 0 449 | e_phoff: 0 450 | e_shoff: voidptr(sectionheader_ofs) 451 | e_flags: 0x0 452 | e_ehsize: u16(sizeof(Elf64_Ehdr)) 453 | e_phentsize: u16(sizeof(Elf64_Phdr)) 454 | e_phnum: 0 455 | e_shentsize: u16(sizeof(Elf64_Shdr)) 456 | e_shnum: u16(e.section_headers.len) 457 | e_shstrndx: u16(e.section_headers.len - 1) 458 | } 459 | } 460 | 461 | pub fn (mut e Elf) write_elf() { 462 | mut fp := os.open_file(e.out_file, 'w') or { panic('error opening file `${e.out_file}`') } 463 | 464 | defer { 465 | fp.close() 466 | } 467 | 468 | fp.write_struct(e.ehdr) or { 469 | panic('error writing `elf header`') 470 | } 471 | 472 | for name in e.user_defined_section_names { 473 | section := e.user_defined_sections[name] or { 474 | panic('unkown section $name') 475 | } 476 | fp.write(section.code) or { 477 | panic('error writing `$name`') 478 | } 479 | } 480 | 481 | fp.write(e.strtab) or { 482 | panic('error writing `.strtab`') 483 | } 484 | 485 | for s in e.symtab { 486 | fp.write_struct(s) or { 487 | panic('error writing `.symtab`') 488 | } 489 | } 490 | 491 | for name in e.rela_section_names { 492 | for r in e.rela[name] { 493 | fp.write_struct(r) or { 494 | panic('error writing `.rela.text`') 495 | } 496 | } 497 | } 498 | 499 | fp.write(e.shstrtab) or { 500 | panic('error writing `.shstrtab`') 501 | } 502 | 503 | for sh in e.section_headers { 504 | fp.write_struct(sh) or { 505 | panic('error writing `section_headers`') 506 | } 507 | } 508 | } 509 | 510 | 511 | -------------------------------------------------------------------------------- /encoder/arith.v: -------------------------------------------------------------------------------- 1 | module encoder 2 | 3 | import error 4 | import encoding.binary 5 | 6 | fn (mut e Encoder) add_imm_rela(symbol string, imm_val int, size DataSize) { 7 | mut rela := Rela{ uses: symbol, instr: e.current_instr, adjust: imm_val, offset: e.current_instr.code.len } 8 | match size { 9 | .suffix_byte { 10 | rela.rtype = encoder.r_x86_64_8 11 | e.current_instr.code << u8(0) 12 | } 13 | .suffix_word { 14 | rela.rtype = encoder.r_x86_64_16 15 | e.current_instr.code << [u8(0), 0] 16 | } 17 | .suffix_long { 18 | rela.rtype = encoder.r_x86_64_32 19 | e.current_instr.code << [u8(0), 0, 0, 0] 20 | } 21 | .suffix_quad { 22 | rela.rtype = encoder.r_x86_64_32s 23 | e.current_instr.code << [u8(0), 0, 0, 0] 24 | } else {} 25 | } 26 | e.rela_text_users << rela 27 | } 28 | 29 | fn (mut e Encoder) add_imm_value(imm_val int, size DataSize) { 30 | if is_in_i8_range(imm_val) || size == DataSize.suffix_byte { 31 | e.current_instr.code << u8(imm_val) 32 | } else if size == DataSize.suffix_word { 33 | mut hex := [u8(0), 0] 34 | binary.little_endian_put_u16(mut &hex, u16(imm_val)) 35 | e.current_instr.code << hex 36 | } else if is_in_i32_range(imm_val) { 37 | mut hex := [u8(0), 0, 0, 0] 38 | binary.little_endian_put_u32(mut &hex, u32(imm_val)) 39 | e.current_instr.code << hex 40 | } else { 41 | panic('unreachable') 42 | } 43 | } 44 | 45 | fn (mut e Encoder) add_imm_value2(imm_val int, size DataSize) { 46 | if size == DataSize.suffix_byte { 47 | e.current_instr.code << [u8(imm_val)] 48 | } else if size == DataSize.suffix_word { 49 | mut hex := [u8(0), 0] 50 | binary.little_endian_put_u16(mut &hex, u16(imm_val)) 51 | e.current_instr.code << [hex[0], hex[1]] 52 | } else { 53 | mut hex := [u8(0), 0, 0, 0] 54 | binary.little_endian_put_u32(mut &hex, u32(imm_val)) 55 | e.current_instr.code << [hex[0], hex[1], hex[2], hex[3]] 56 | } 57 | } 58 | 59 | fn (mut e Encoder) cmov(kind InstrKind, op_code []u8, size DataSize) { 60 | e.set_current_instr(kind) 61 | 62 | source, desti := e.parse_two_operand() 63 | 64 | if source is Register && desti is Register { 65 | e.add_prefix(desti, Empty{}, source, [size]) 66 | e.current_instr.code << op_code 67 | e.current_instr.code << compose_mod_rm(encoder.mod_regi, desti.base_offset%8, source.base_offset%8) 68 | return 69 | } 70 | 71 | error.print(source.pos, 'invalid operand for instruction') 72 | exit(1) 73 | } 74 | 75 | fn (mut e Encoder) mov(size DataSize) { 76 | e.set_current_instr(.mov) 77 | 78 | source, desti := e.parse_two_operand() 79 | 80 | if source is Xmm && desti is Register { 81 | desti.check_regi_size(DataSize.suffix_quad) 82 | e.add_prefix(source, Empty{}, desti, [DataSize.suffix_word, DataSize.suffix_quad]) 83 | e.current_instr.code << [u8(0x0F), 0x7E] 84 | e.current_instr.code << compose_mod_rm(encoder.mod_regi, source.base_offset%8, desti.base_offset%8) 85 | return 86 | } 87 | if source is Register && desti is Xmm { 88 | source.check_regi_size(DataSize.suffix_quad) 89 | e.add_prefix(desti, Empty{}, source, [DataSize.suffix_word, DataSize.suffix_quad]) 90 | e.current_instr.code << [u8(0x0F), 0x6E] 91 | e.current_instr.code << compose_mod_rm(encoder.mod_regi, desti.base_offset%8, source.base_offset%8) 92 | return 93 | } 94 | if source is Indirection && desti is Xmm { 95 | e.add_segment_override_prefix(source) 96 | e.add_prefix(desti, source.index, source.base, [DataSize.suffix_single]) 97 | e.current_instr.code << [u8(0x0F), 0x7E] 98 | e.add_modrm_sib_disp(source, desti.base_offset%8) 99 | return 100 | } 101 | if source is Xmm && desti is Indirection { 102 | e.add_segment_override_prefix(desti) 103 | e.add_prefix(source, desti.index, desti.base, [DataSize.suffix_word]) 104 | e.current_instr.code << [u8(0x0F), 0xD6] 105 | e.add_modrm_sib_disp(desti, source.base_offset%8) 106 | return 107 | } 108 | 109 | if source is Register { 110 | op_code := if size == .suffix_byte { 111 | [u8(0x88)] 112 | } else { 113 | [u8(0x89)] 114 | } 115 | source.check_regi_size(size) 116 | 117 | if desti is Register { 118 | desti.check_regi_size(size) 119 | e.add_prefix(source, Empty{}, desti, [size]) 120 | e.current_instr.code << op_code 121 | e.current_instr.code << compose_mod_rm(encoder.mod_regi, source.base_offset%8, desti.base_offset%8) 122 | return 123 | } 124 | if desti is Indirection { 125 | e.add_segment_override_prefix(desti) 126 | e.add_prefix(source, desti.index, desti.base, [size]) 127 | e.current_instr.code << op_code 128 | e.add_modrm_sib_disp(desti, source.base_offset%8) 129 | return 130 | } 131 | } 132 | 133 | if source is Indirection && desti is Register { 134 | op_code := if size == .suffix_byte { 135 | [u8(0x8a)] 136 | } else { 137 | [u8(0x8b)] 138 | } 139 | desti.check_regi_size(size) 140 | e.add_segment_override_prefix(source) 141 | e.add_prefix(desti, source.index, source.base, [size]) 142 | e.current_instr.code << op_code 143 | e.add_modrm_sib_disp(source, desti.base_offset%8) 144 | return 145 | } 146 | 147 | if source is Immediate { 148 | match desti { 149 | Register { 150 | desti.check_regi_size(size) 151 | e.add_prefix(Empty{}, Empty{}, desti, [size]) 152 | e.current_instr.code << if size == .suffix_quad { 153 | e.current_instr.code << 0xc7 154 | 0xc0 + desti.base_offset%8 155 | } else if size == .suffix_byte { 156 | 0xB0 + desti.base_offset%8 157 | } else { 158 | 0xB8 + desti.base_offset%8 159 | } 160 | } 161 | Indirection { 162 | e.add_segment_override_prefix(desti) 163 | e.add_prefix(Empty{}, desti.index, desti.base, [size]) 164 | e.current_instr.code << if size == .suffix_byte { 165 | u8(0xc6) 166 | } else { 167 | u8(0xc7) 168 | } 169 | e.add_modrm_sib_disp(desti, encoder.slash_0) 170 | } else { 171 | error.print(desti.pos, 'invalid operand for instruction') 172 | exit(1) 173 | } 174 | } 175 | 176 | mut imm_used_symbols := []string{} 177 | imm_val := int(eval_expr_get_symbol_64(source.expr, mut imm_used_symbols)) 178 | if imm_used_symbols.len >= 2 { 179 | error.print(source.pos, 'invalid immediate operand') 180 | exit(1) 181 | } 182 | imm_need_rela := imm_used_symbols.len == 1 183 | 184 | if imm_need_rela { 185 | e.add_imm_rela(imm_used_symbols[0], int(imm_val), size) 186 | } else { 187 | e.add_imm_value2(imm_val, size) 188 | } 189 | return 190 | } 191 | 192 | error.print(source.pos, 'invalid operand for instruction') 193 | exit(1) 194 | } 195 | 196 | fn (mut e Encoder) rep() { 197 | e.set_current_instr(.rep) 198 | 199 | source := e.parse_operand() 200 | 201 | if source is Ident { 202 | match source.lit { 203 | 'movsq' { 204 | e.current_instr.code << [u8(0xF3), 0x48, 0xA5] 205 | return 206 | } 207 | 'stosq' { 208 | e.current_instr.code << [u8(0xF3), 0x48, 0xAB] 209 | return 210 | } else {} 211 | } 212 | } 213 | 214 | error.print(source.pos, 'invalid operand for instruction') 215 | exit(1) 216 | } 217 | 218 | fn (mut e Encoder) movabsq() { 219 | e.set_current_instr(.movabsq) 220 | 221 | source, desti := e.parse_two_operand() 222 | 223 | if source is Immediate && desti is Register { 224 | desti.check_regi_size(DataSize.suffix_quad) 225 | e.add_prefix(Empty{}, Empty{}, desti, [DataSize.suffix_quad]) 226 | e.current_instr.code << u8(0xB8) + desti.base_offset%8 227 | 228 | mut imm_used_symbols := []string{} 229 | imm_val := eval_expr_get_symbol_64(source.expr, mut imm_used_symbols) 230 | if imm_used_symbols.len >= 2 { 231 | error.print(source.pos, 'invalid immediate operand') 232 | exit(1) 233 | } 234 | imm_need_rela := imm_used_symbols.len == 1 235 | 236 | if imm_need_rela { 237 | mut rela := Rela{ 238 | uses: imm_used_symbols[0], 239 | instr: e.current_instr, 240 | adjust: int(imm_val), 241 | offset: e.current_instr.code.len, 242 | } 243 | rela.rtype = encoder.r_x86_64_64 244 | e.current_instr.code << [u8(0), 0, 0, 0, 0, 0, 0, 0] 245 | e.rela_text_users << rela 246 | } else { 247 | mut hex := [u8(0), 0, 0, 0, 0, 0, 0, 0] 248 | binary.little_endian_put_u64(mut &hex, u64(imm_val)) 249 | e.current_instr.code << hex 250 | } 251 | 252 | return 253 | } 254 | 255 | error.print(source.pos, 'invalid operand for instruction') 256 | exit(1) 257 | } 258 | 259 | fn (mut e Encoder) mul(size DataSize) { 260 | e.set_current_instr(.mul) 261 | 262 | source := e.parse_operand() 263 | 264 | if e.tok.kind != .comma { 265 | op_code := if size == DataSize.suffix_byte { 266 | [u8(0xf6)] 267 | } else { 268 | [u8(0xf7)] 269 | } 270 | if source is Register { 271 | source.check_regi_size(size) 272 | e.add_prefix(Empty{}, Empty{}, source, [size]) 273 | e.current_instr.code << op_code 274 | e.current_instr.code << compose_mod_rm(encoder.mod_regi, encoder.slash_4, source.base_offset%8) 275 | return 276 | } 277 | if source is Indirection { 278 | e.add_segment_override_prefix(source) 279 | e.add_prefix(Empty{}, source.index, source.base, [size]) 280 | e.current_instr.code << op_code 281 | e.add_modrm_sib_disp(source, encoder.slash_4) 282 | return 283 | } 284 | } 285 | error.print(source.pos, 'invalid operand for instruction') 286 | exit(1) 287 | } 288 | 289 | fn (mut e Encoder) mov_zero_or_sign_extend(op_code []u8, source_size DataSize, desti_size DataSize) { 290 | e.set_current_instr(.movzx) 291 | 292 | source, desti := e.parse_two_operand() 293 | 294 | if source is Register && desti is Register { 295 | // TODO: The following check should be done at add_prefix() 296 | if desti_size == .suffix_quad && source.lit in ['AH','CH','DH','BH'] { 297 | error.print(source.pos, 'can\'t encode `%$source.lit` in an instruction requiring REX prefix') 298 | exit(1) 299 | } 300 | 301 | source.check_regi_size(source_size) 302 | desti.check_regi_size(desti_size) 303 | e.add_prefix(desti, Empty{}, source, [desti_size]) 304 | e.current_instr.code << op_code 305 | e.current_instr.code << compose_mod_rm(encoder.mod_regi, desti.base_offset%8, source.base_offset%8) 306 | return 307 | } 308 | if source is Indirection && desti is Register { 309 | desti.check_regi_size(desti_size) 310 | e.add_segment_override_prefix(source) 311 | e.add_prefix(desti, source.index, source.base, [desti_size]) 312 | e.current_instr.code << op_code 313 | e.add_modrm_sib_disp(source, desti.base_offset%8) 314 | return 315 | } 316 | error.print(source.pos, 'invalid operand for instruction') 317 | exit(1) 318 | } 319 | 320 | fn (mut e Encoder) test(size DataSize) { 321 | e.set_current_instr(.test) 322 | 323 | source, desti := e.parse_two_operand() 324 | 325 | if source is Register { 326 | op_code := if size == DataSize.suffix_byte { 327 | [u8(0x84)] 328 | } else { 329 | [u8(0x84) + 1] 330 | } 331 | source.check_regi_size(size) 332 | if desti is Register { 333 | desti.check_regi_size(size) 334 | e.add_prefix(source, Empty{}, desti, [size]) 335 | e.current_instr.code << op_code 336 | e.current_instr.code << compose_mod_rm(encoder.mod_regi, source.base_offset%8, desti.base_offset%8) 337 | return 338 | } 339 | if desti is Indirection { 340 | e.add_segment_override_prefix(desti) 341 | e.add_prefix(source, desti.index, desti.base, [size]) 342 | e.current_instr.code << op_code 343 | e.add_modrm_sib_disp(desti, source.base_offset%8) 344 | return 345 | } 346 | } 347 | if source is Indirection && desti is Register { 348 | op_code := if size == DataSize.suffix_byte { 349 | [u8(0x84)] 350 | } else { 351 | [u8(0x84) + 1] 352 | } 353 | desti.check_regi_size(size) 354 | e.add_segment_override_prefix(source) 355 | e.add_prefix(desti, source.index, source.base, [size]) 356 | e.current_instr.code << op_code 357 | e.add_modrm_sib_disp(source, desti.base_offset%8) 358 | return 359 | } 360 | if source is Immediate { 361 | mut imm_used_symbols := []string{} 362 | imm_val := int(eval_expr_get_symbol_64(source.expr, mut imm_used_symbols)) 363 | if imm_used_symbols.len >= 2 { 364 | error.print(source.pos, 'invalid immediate operand') 365 | exit(1) 366 | } 367 | imm_need_rela := imm_used_symbols.len == 1 368 | 369 | op_code := if size == DataSize.suffix_byte { 370 | u8(0xF6) 371 | } else { 372 | u8(0xF7) 373 | } 374 | 375 | if desti is Register { 376 | desti.check_regi_size(size) 377 | e.add_prefix(Empty{}, Empty{}, desti, [size]) 378 | e.current_instr.code << op_code 379 | e.current_instr.code << compose_mod_rm(mod_regi, encoder.slash_0, desti.base_offset%8) 380 | } else if desti is Indirection { 381 | e.add_segment_override_prefix(desti) 382 | e.add_prefix(Empty{}, desti.index, desti.base, [size]) 383 | e.current_instr.code << op_code 384 | e.add_modrm_sib_disp(desti, encoder.slash_0) 385 | } else { 386 | error.print(source.pos, 'invalid operand for instruction') 387 | exit(1) 388 | } 389 | 390 | if imm_need_rela { 391 | e.add_imm_rela(imm_used_symbols[0], int(imm_val), size) 392 | } else { 393 | e.add_imm_value2(imm_val, size) 394 | } 395 | 396 | return 397 | } 398 | error.print(source.pos, 'invalid operand for instruction') 399 | exit(1) 400 | } 401 | 402 | fn (mut e Encoder) arith_instr(kind InstrKind, op_code_base u8, slash u8, size DataSize) { 403 | e.set_current_instr(kind) 404 | 405 | source, desti := e.parse_two_operand() 406 | 407 | if source is Register { 408 | op_code := if size == DataSize.suffix_byte { 409 | op_code_base 410 | } else { 411 | op_code_base + 1 412 | } 413 | source.check_regi_size(size) 414 | if desti is Register { 415 | desti.check_regi_size(size) 416 | e.add_prefix(source, Empty{}, desti, [size]) 417 | e.current_instr.code << op_code 418 | e.current_instr.code << compose_mod_rm(encoder.mod_regi, source.base_offset%8, desti.base_offset%8) 419 | return 420 | } 421 | if desti is Indirection { 422 | e.add_segment_override_prefix(desti) 423 | e.add_prefix(source, desti.index, desti.base, [size]) 424 | e.current_instr.code << op_code 425 | e.add_modrm_sib_disp(desti, source.base_offset%8) 426 | return 427 | } 428 | } 429 | if source is Indirection && desti is Register { 430 | op_code := if size == DataSize.suffix_byte { 431 | op_code_base + 2 432 | } else { 433 | op_code_base + 3 434 | } 435 | desti.check_regi_size(size) 436 | e.add_segment_override_prefix(source) 437 | e.add_prefix(desti, source.index, source.base, [size]) 438 | e.current_instr.code << op_code 439 | e.add_modrm_sib_disp(source, desti.base_offset%8) 440 | return 441 | } 442 | if source is Immediate { 443 | mut imm_used_symbols := []string{} 444 | imm_val := int(eval_expr_get_symbol_64(source.expr, mut imm_used_symbols)) 445 | if imm_used_symbols.len >= 2 { 446 | error.print(source.pos, 'invalid immediate operand') 447 | exit(1) 448 | } 449 | imm_need_rela := imm_used_symbols.len == 1 450 | 451 | op_code := if size == DataSize.suffix_byte { 452 | u8(0x80) 453 | } else if is_in_i8_range(int(imm_val)) && !imm_need_rela { 454 | u8(0x83) 455 | } else { 456 | u8(0x81) 457 | } 458 | 459 | if desti is Register { 460 | desti.check_regi_size(size) 461 | e.add_prefix(Empty{}, Empty{}, desti, [size]) 462 | e.current_instr.code << op_code 463 | e.current_instr.code << compose_mod_rm(mod_regi, slash, desti.base_offset%8) 464 | } else if desti is Indirection { 465 | e.add_segment_override_prefix(desti) 466 | e.add_prefix(Empty{}, desti.index, desti.base, [size]) 467 | e.current_instr.code << op_code 468 | e.add_modrm_sib_disp(desti, slash) 469 | } else { 470 | error.print(source.pos, 'invalid operand for instruction') 471 | exit(1) 472 | } 473 | 474 | if imm_need_rela { 475 | e.add_imm_rela(imm_used_symbols[0], imm_val, size) 476 | } else { 477 | e.add_imm_value(imm_val, size) 478 | } 479 | 480 | return 481 | } 482 | error.print(source.pos, 'invalid operand for instruction') 483 | exit(1) 484 | } 485 | 486 | fn (mut e Encoder) imul(size DataSize) { 487 | e.set_current_instr(.imul) 488 | 489 | source := e.parse_operand() 490 | 491 | if e.tok.kind != .comma { 492 | op_code := if size == DataSize.suffix_byte { 493 | [u8(0xf6)] 494 | } else { 495 | [u8(0xf7)] 496 | } 497 | if source is Register { 498 | source.check_regi_size(size) 499 | e.add_prefix(Empty{}, Empty{}, source, [size]) 500 | e.current_instr.code << op_code 501 | e.current_instr.code << compose_mod_rm(encoder.mod_regi, encoder.slash_5, source.base_offset%8) 502 | return 503 | } 504 | if source is Indirection { 505 | e.add_segment_override_prefix(source) 506 | e.add_prefix(Empty{}, source.index, source.base, [size]) 507 | e.current_instr.code << op_code 508 | e.add_modrm_sib_disp(source, encoder.slash_5) 509 | return 510 | } 511 | } 512 | 513 | e.expect(.comma) 514 | desti_operand_1 := e.parse_operand() 515 | 516 | if source is Indirection && desti_operand_1 is Register { 517 | desti_operand_1.check_regi_size(size) 518 | e.add_segment_override_prefix(source) 519 | e.add_prefix(desti_operand_1, source.index, source.base, [size]) 520 | e.current_instr.code << [u8(0x0f), 0xaf] 521 | e.add_modrm_sib_disp(source, desti_operand_1.base_offset%8) 522 | return 523 | } 524 | 525 | if source is Register && desti_operand_1 is Register { 526 | source.check_regi_size(size) 527 | desti_operand_1.check_regi_size(size) 528 | e.add_prefix(desti_operand_1, Empty{}, source, [size]) 529 | e.current_instr.code << [u8(0x0f), 0xaf] 530 | e.current_instr.code << compose_mod_rm(encoder.mod_regi, desti_operand_1.base_offset%8, source.base_offset%8) 531 | return 532 | } 533 | 534 | desti_operand_2 := if e.tok.kind != .comma { 535 | desti_operand_1 536 | } else { 537 | e.expect(.comma) 538 | e.parse_operand() 539 | } 540 | 541 | if source is Immediate && desti_operand_1 is Register && desti_operand_2 is Register { 542 | mut imm_used_symbols := []string{} 543 | imm_val := int(eval_expr_get_symbol_64(source.expr, mut imm_used_symbols)) 544 | if imm_used_symbols.len >= 2 { 545 | error.print(source.pos, 'invalid immediate operand') 546 | exit(1) 547 | } 548 | imm_need_rela := imm_used_symbols.len == 1 549 | 550 | op_code := if is_in_i8_range(imm_val) && !imm_need_rela { 551 | u8(0x6b) 552 | } else { 553 | u8(0x69) 554 | } 555 | 556 | desti_operand_1.check_regi_size(size) 557 | desti_operand_2.check_regi_size(size) 558 | e.add_prefix(desti_operand_2, Empty{}, desti_operand_1, [size]) 559 | e.current_instr.code << op_code 560 | e.current_instr.code << compose_mod_rm(mod_regi, desti_operand_2.base_offset%8, desti_operand_1.base_offset%8) 561 | 562 | if imm_need_rela { 563 | e.add_imm_rela(imm_used_symbols[0], imm_val, size) 564 | } else { 565 | e.add_imm_value(imm_val, size) 566 | } 567 | 568 | return 569 | } 570 | error.print(source.pos, 'invalid operand for instruction') 571 | exit(1) 572 | } 573 | 574 | fn (mut e Encoder) one_operand_arith(kind InstrKind, slash u8, size DataSize) { 575 | e.set_current_instr(kind) 576 | 577 | source := e.parse_operand() 578 | 579 | op_code := if size == DataSize.suffix_byte { 580 | [u8(0xF6)] 581 | } else { 582 | [u8(0xF7)] 583 | } 584 | 585 | if source is Register { 586 | source.check_regi_size(size) 587 | e.add_prefix(Empty{}, Empty{}, source, [size]) 588 | e.current_instr.code << op_code 589 | e.current_instr.code << compose_mod_rm(encoder.mod_regi, slash, source.base_offset%8) 590 | return 591 | } 592 | if source is Indirection { 593 | e.add_segment_override_prefix(source) 594 | e.add_prefix(Empty{}, source.index, source.base, [size]) 595 | e.current_instr.code << op_code 596 | e.add_modrm_sib_disp(source, slash) 597 | return 598 | } 599 | 600 | error.print(source.pos, 'invalid operand for instruction') 601 | exit(1) 602 | } 603 | 604 | fn (mut e Encoder) lea(instr_name_upper string) { 605 | e.set_current_instr(.lea) 606 | 607 | size := get_size_by_suffix(instr_name_upper) 608 | 609 | source, desti := e.parse_two_operand() 610 | 611 | if source is Indirection && desti is Register { 612 | desti.check_regi_size(size) 613 | e.add_segment_override_prefix(source) 614 | e.add_prefix(desti, source.index, source.base, [size]) 615 | e.current_instr.code << u8(0x8D) 616 | e.add_modrm_sib_disp(source, desti.base_offset%8) 617 | return 618 | } 619 | error.print(source.pos, 'invalid operand for instruction') 620 | exit(1) 621 | } 622 | 623 | fn (mut e Encoder) set(kind InstrKind, op_code []u8) { 624 | e.set_current_instr(kind) 625 | 626 | regi := e.parse_operand() 627 | 628 | if regi is Register { 629 | regi.check_regi_size(DataSize.suffix_byte) 630 | e.add_prefix(Empty{}, Empty{}, regi, [DataSize.suffix_byte]) 631 | e.current_instr.code << op_code 632 | e.current_instr.code << compose_mod_rm(encoder.mod_regi, encoder.slash_0, regi.base_offset%8) 633 | return 634 | } 635 | 636 | error.print(regi.pos, 'invalid operand for instruction') 637 | exit(1) 638 | } 639 | 640 | fn (mut e Encoder) shift(kind InstrKind, slash u8, size DataSize) { 641 | e.set_current_instr(kind) 642 | 643 | source := e.parse_operand() 644 | 645 | // single operand 646 | if e.tok.kind != .comma { 647 | op_code := if size == DataSize.suffix_byte { 648 | u8(0xD0) 649 | } else { 650 | u8(0xD1) 651 | } 652 | 653 | if source is Register { 654 | source.check_regi_size(size) 655 | e.add_prefix(Empty{}, Empty{}, source, [size]) 656 | e.current_instr.code << op_code 657 | e.current_instr.code << compose_mod_rm(encoder.mod_regi, slash, source.base_offset%8) 658 | return 659 | } else if source is Indirection { 660 | e.add_segment_override_prefix(source) 661 | e.add_prefix(Empty{}, source.index, source.base, [size]) 662 | e.current_instr.code << op_code 663 | e.add_modrm_sib_disp(source, slash) 664 | return 665 | } 666 | } 667 | 668 | e.expect(.comma) 669 | desti := e.parse_operand() 670 | 671 | if source is Register { 672 | if source.lit != 'CL' { 673 | error.print(source.pos, 'invalid operand for instruction') 674 | exit(1) 675 | } 676 | op_code := if size == DataSize.suffix_byte { 677 | u8(0xD2) 678 | } else { 679 | u8(0xD3) 680 | } 681 | if desti is Register { 682 | desti.check_regi_size(size) 683 | e.add_prefix(source, Empty{}, desti, [size]) 684 | e.current_instr.code << op_code 685 | e.current_instr.code << compose_mod_rm(encoder.mod_regi, slash, desti.base_offset%8) 686 | return 687 | } else if desti is Indirection { 688 | e.add_segment_override_prefix(desti) 689 | e.add_prefix(source, desti.index, desti.base, [size]) 690 | e.current_instr.code << op_code 691 | e.add_modrm_sib_disp(desti, slash) 692 | return 693 | } 694 | } 695 | 696 | if source is Immediate { 697 | mut used_symbols := []string{} 698 | imm_val := int(eval_expr_get_symbol_64(source.expr, mut used_symbols)) 699 | 700 | if used_symbols.len >= 2 { 701 | error.print(source.expr.pos, 'invalid immediate operand') 702 | exit(1) 703 | } 704 | 705 | imm_need_rela := used_symbols.len == 1 706 | op_code := if imm_val == 1 && !imm_need_rela { 707 | if size == DataSize.suffix_byte { 708 | u8(0xD0) 709 | } else { 710 | u8(0xD1) 711 | } 712 | } else { 713 | if size == DataSize.suffix_byte { 714 | u8(0xC0) 715 | } else { 716 | u8(0xC1) 717 | } 718 | } 719 | 720 | match desti { 721 | Register { 722 | desti.check_regi_size(size) 723 | e.add_prefix(Empty{}, Empty{}, desti, [size]) 724 | e.current_instr.code << op_code 725 | e.current_instr.code << compose_mod_rm(encoder.mod_regi, slash, desti.base_offset%8) 726 | } 727 | Indirection { 728 | e.add_segment_override_prefix(desti) 729 | e.add_prefix(Empty{}, desti.index, desti.base, [size]) 730 | e.current_instr.code << op_code 731 | e.add_modrm_sib_disp(desti, slash) 732 | } else { 733 | error.print(desti.pos, 'invalid operand for instruction') 734 | exit(1) 735 | } 736 | } 737 | 738 | if imm_need_rela { 739 | e.add_imm_rela(used_symbols[0], 0, DataSize.suffix_byte) 740 | } else if imm_val != 1 { 741 | e.add_imm_value(u8(imm_val), DataSize.suffix_byte) 742 | } 743 | return 744 | } 745 | error.print(source.pos, 'invalid operand for instruction') 746 | exit(1) 747 | } 748 | 749 | -------------------------------------------------------------------------------- /encoder/encoder.v: -------------------------------------------------------------------------------- 1 | module encoder 2 | 3 | import error 4 | import token 5 | import lexer 6 | import strconv 7 | 8 | pub struct Encoder { 9 | mut: 10 | tok token.Token // current token 11 | l lexer.Lexer // lexer 12 | pub mut: 13 | current_section_name string 14 | current_instr &Instr 15 | instrs []&Instr // All instructions, sections, symbols, directives 16 | rela_text_users []Rela 17 | user_defined_symbols map[string]&Instr 18 | user_defined_sections map[string]&UserDefinedSection 19 | } 20 | 21 | pub enum InstrKind { 22 | @none 23 | section 24 | global 25 | local 26 | hidden 27 | internal 28 | protected 29 | string 30 | byte 31 | word 32 | long 33 | quad 34 | zero 35 | add 36 | sub 37 | instr_or 38 | adc 39 | sbb 40 | xor 41 | and 42 | imul 43 | idiv 44 | div 45 | neg 46 | mul 47 | lea 48 | mov 49 | movabsq 50 | rep 51 | test 52 | movzx 53 | movsx 54 | not 55 | cqto 56 | cltq 57 | cltd 58 | cwtl 59 | cmp 60 | shl 61 | shr 62 | sar 63 | sal 64 | pop 65 | push 66 | call 67 | seto 68 | setno 69 | setb 70 | setnb 71 | setae 72 | setbe 73 | seta 74 | setpo 75 | setl 76 | setg 77 | setle 78 | setge 79 | sete 80 | setne 81 | jmp 82 | jne 83 | je 84 | jl 85 | jg 86 | jle 87 | jge 88 | jbe 89 | jnb 90 | jnbe 91 | jp 92 | ja 93 | js 94 | jb 95 | jns 96 | ret 97 | syscall 98 | nop 99 | hlt 100 | leave 101 | cmovs 102 | cmovns 103 | cmovg 104 | cmovge 105 | cmovl 106 | cmovle 107 | cvttss2sil 108 | cvtsi2ssq 109 | cvtsi2sdq 110 | cvtsd2ss 111 | cvtss2sd 112 | movss 113 | movsd 114 | movd 115 | ucomiss 116 | ucomisd 117 | comisd 118 | comiss 119 | subss 120 | subsd 121 | addss 122 | addsd 123 | mulss 124 | mulsd 125 | divss 126 | divsd 127 | movaps 128 | movups 129 | xorpd 130 | xorps 131 | pxor 132 | label 133 | } 134 | 135 | const stb_local = 0 136 | const stb_global = 1 137 | const stt_notype = 0 138 | const stt_object = 1 139 | const stt_func = 2 140 | const stt_section = 3 141 | const stt_file = 4 142 | const stt_common = 5 143 | const stt_tls = 6 144 | const stt_relc = 8 145 | const stt_srelc = 9 146 | const stt_loos = 10 147 | const stt_hios = 12 148 | const stt_loproc = 13 149 | const stt_hiproc = 14 150 | const sht_null = 0 151 | const sht_progbits = 1 152 | const sht_symtab = 2 153 | const sht_strtab = 3 154 | const sht_rela = 4 155 | const shf_write = 0x1 156 | const shf_alloc = 0x2 157 | const shf_execinstr = 0x4 158 | const shf_merge = 0x10 159 | const shf_strings = 0x20 160 | const shf_info_link = 0x40 161 | const shf_link_order = 0x80 162 | const shf_os_nonconforming = 0x100 163 | const shf_group = 0x200 164 | const shf_tls = 0x400 165 | const r_x86_64_none = u64(0) 166 | const r_x86_64_64 = u64(1) 167 | const r_x86_64_pc32 = u64(2) 168 | const r_x86_64_got32 = u64(3) 169 | const r_x86_64_plt32 = u64(4) 170 | const r_x86_64_copy = u64(5) 171 | const r_x86_64_glob_dat = u64(6) 172 | const r_x86_64_jump_slot = u64(7) 173 | const r_x86_64_relative = u64(8) 174 | const r_x86_64_gotpcrel = u64(9) 175 | const r_x86_64_32 = u64(10) 176 | const r_x86_64_32s = u64(11) 177 | const r_x86_64_16 = u64(12) 178 | const r_x86_64_pc16 = u64(13) 179 | const r_x86_64_8 = u64(14) 180 | const r_x86_64_pc8 = u64(15) 181 | const r_x86_64_pc64 = u64(24) 182 | const stv_default = 0 183 | const stv_internal = 1 184 | const stv_hidden = 2 185 | const stv_protected = 3 186 | 187 | pub struct Instr { 188 | pub mut: 189 | kind InstrKind @[required] 190 | code []u8 = []u8{cap: 16} 191 | symbol_name string 192 | flags string 193 | addr i64 194 | binding u8 195 | visibility u8 // STV_DEFAULT, STV_INTERNAL, STV_HIDDEN, STV_PROTECTED 196 | symbol_type u8 197 | section_name string @[required] 198 | is_jmp_or_call bool 199 | pos token.Position @[required] 200 | } 201 | 202 | pub struct Rela { 203 | pub mut: 204 | uses string 205 | instr &Instr 206 | offset i64 207 | rtype u64 208 | adjust int 209 | is_already_resolved bool 210 | } 211 | 212 | pub type Expr = Binop | Ident | Immediate | Indirection | Neg | Number | Register | Star | Xmm 213 | 214 | pub struct Number { 215 | pub: 216 | lit string 217 | pos token.Position 218 | } 219 | 220 | pub struct Star { 221 | pub: 222 | regi Register 223 | pos token.Position 224 | } 225 | 226 | pub struct Binop { 227 | pub: 228 | left_hs Expr 229 | right_hs Expr 230 | op token.TokenKind 231 | pos token.Position 232 | } 233 | 234 | pub struct Neg { 235 | pub: 236 | expr Expr 237 | pos token.Position 238 | } 239 | 240 | type RegiAll = Register | Xmm | Empty 241 | 242 | pub struct Register { 243 | pub mut: 244 | lit string 245 | size DataSize 246 | base_offset u8 247 | rex_required bool 248 | pos token.Position 249 | } 250 | 251 | pub struct Xmm { 252 | pub mut: 253 | lit string 254 | size DataSize 255 | base_offset u8 256 | rex_required bool 257 | pos token.Position 258 | } 259 | 260 | struct Empty { 261 | pub mut: 262 | lit string 263 | size DataSize 264 | rex_required bool 265 | base_offset u8 266 | } 267 | 268 | pub struct Immediate { 269 | pub: 270 | expr Expr 271 | pos token.Position 272 | } 273 | 274 | pub struct Indirection { 275 | pub mut: 276 | disp Expr 277 | base Register 278 | index Register 279 | scale Expr 280 | pos token.Position 281 | has_base bool 282 | has_index_scale bool 283 | } 284 | 285 | pub struct Ident { 286 | pub: 287 | lit string 288 | pos token.Position 289 | } 290 | 291 | pub struct UserDefinedSection { 292 | pub mut: 293 | code []u8 294 | addr int 295 | flags int 296 | } 297 | 298 | enum DataSize { 299 | suffix_byte 300 | suffix_word 301 | suffix_long 302 | suffix_quad 303 | suffix_single 304 | suffix_double 305 | suffix_unkown 306 | } 307 | 308 | const mod_indirection_with_no_disp = u8(0) 309 | const mod_indirection_with_disp8 = u8(1) 310 | const mod_indirection_with_disp32 = u8(2) 311 | const mod_regi = u8(3) 312 | const rex_w = u8(0x48) 313 | const operand_size_prefix16 = u8(0x66) 314 | const slash_0 = 0 // /0 315 | const slash_1 = 1 // /1 316 | const slash_2 = 2 // /2 317 | const slash_3 = 3 // /3 318 | const slash_4 = 4 // /4 319 | const slash_5 = 5 // /5 320 | const slash_6 = 6 // /6 321 | const slash_7 = 7 // /7 322 | 323 | pub fn new(mut l lexer.Lexer, file_name string) &Encoder { 324 | tok := l.lex() 325 | mut e := &Encoder{ 326 | tok: tok 327 | l: l 328 | current_section_name: '.text' 329 | instrs: []&Instr{cap: 1500000} 330 | current_instr: unsafe { nil } 331 | } 332 | e.add_section('.bss', 'aw', tok.pos) 333 | e.add_section('.data', 'aw', tok.pos) 334 | e.add_section('.text', 'ax', tok.pos) 335 | return e 336 | } 337 | 338 | fn (mut e Encoder) set_current_instr(kind InstrKind) { 339 | instr := &Instr{ 340 | pos: e.tok.pos 341 | kind: kind 342 | section_name: e.current_section_name 343 | } 344 | e.current_instr = instr 345 | e.instrs << instr 346 | } 347 | 348 | fn (mut e Encoder) next() { 349 | e.tok = e.l.lex() 350 | } 351 | 352 | fn (mut e Encoder) expect(exp token.TokenKind) { 353 | if e.tok.kind != exp { 354 | error.print(e.tok.pos, 'unexpected token `${e.tok.lit}`') 355 | exit(1) 356 | } 357 | e.next() 358 | } 359 | 360 | fn (mut e Encoder) parse_register() Expr { 361 | e.expect(.percent) 362 | 363 | register_name := e.tok.lit.to_upper() 364 | 365 | if mut xmm_register := xmm_registers[register_name] { 366 | xmm_register.pos = e.tok.pos 367 | e.next() 368 | return xmm_register 369 | } 370 | 371 | if mut general_register := general_registers[register_name] { 372 | general_register.pos = e.tok.pos 373 | e.next() 374 | return general_register 375 | } 376 | 377 | error.print(e.tok.pos, 'unkown register `${e.tok.lit}`') 378 | exit(1) 379 | } 380 | 381 | fn (mut e Encoder) parse_factor() Expr { 382 | match e.tok.kind { 383 | .number { 384 | lit := e.tok.lit 385 | e.next() 386 | return Number{ 387 | pos: e.tok.pos 388 | lit: lit 389 | } 390 | } 391 | .ident { 392 | lit := e.tok.lit 393 | e.next() 394 | return Ident{ 395 | pos: e.tok.pos 396 | lit: lit 397 | } 398 | } 399 | .minus { 400 | e.next() 401 | expr := e.parse_factor() 402 | return Neg{ 403 | pos: e.tok.pos 404 | expr: expr 405 | } 406 | } 407 | else { 408 | error.print(e.tok.pos, 'unexpected token `${e.tok.lit}`') 409 | exit(1) 410 | } 411 | } 412 | } 413 | 414 | fn (mut e Encoder) parse_expr() Expr { 415 | expr := e.parse_factor() 416 | if e.tok.kind in [.plus, .minus, .mul, .div] { 417 | op := e.tok.kind 418 | pos := e.tok.pos 419 | e.next() 420 | right_hs := e.parse_expr() 421 | return Binop{ 422 | left_hs: expr 423 | right_hs: right_hs 424 | op: op 425 | pos: pos 426 | } 427 | } 428 | return expr 429 | } 430 | 431 | fn (mut e Encoder) parse_two_operand() (Expr, Expr) { 432 | source := e.parse_operand() 433 | e.expect(.comma) 434 | desti := e.parse_operand() 435 | return source, desti 436 | } 437 | 438 | fn (mut e Encoder) parse_operand() Expr { 439 | pos := e.tok.pos 440 | 441 | match e.tok.kind { 442 | .dolor { 443 | e.next() 444 | return Immediate{ 445 | expr: e.parse_expr() 446 | pos: pos 447 | } 448 | } 449 | .percent { 450 | return e.parse_register() 451 | } 452 | .mul { 453 | e.expect(.mul) 454 | regi := e.parse_register() as Register 455 | return Star{ 456 | regi: regi 457 | pos: pos 458 | } 459 | } 460 | else { 461 | // parse indirect 462 | expr := if e.tok.kind == .lpar { 463 | Expr(Number{ 464 | lit: '0' 465 | pos: pos 466 | }) 467 | } else { 468 | e.parse_expr() 469 | } 470 | if e.tok.kind != .lpar { 471 | return expr 472 | } 473 | e.next() 474 | mut indirection := Indirection{ 475 | disp: expr 476 | pos: pos 477 | } 478 | if e.tok.kind != .comma { 479 | indirection.has_base = true 480 | indirection.base = e.parse_register() as Register 481 | } 482 | // has index and scale 483 | if e.tok.kind == .comma { 484 | indirection.has_index_scale = true 485 | e.next() 486 | indirection.index = e.parse_register() as Register 487 | indirection.scale = if e.tok.kind == .comma { 488 | e.expect(.comma) 489 | e.parse_expr() 490 | } else { 491 | Expr(Number{ 492 | lit: '1' 493 | pos: pos 494 | }) 495 | } 496 | } 497 | e.expect(.rpar) 498 | return indirection 499 | } 500 | } 501 | error.print(e.tok.pos, 'unexpected token `${e.tok.lit}`') 502 | exit(1) 503 | } 504 | 505 | fn eval_expr_get_symbol_64(expr Expr, mut arr []string) i64 { 506 | return match expr { 507 | Number { 508 | strconv.parse_int(expr.lit, 0, 64) or { 509 | error.print(expr.pos, 'invalid number `expr.lit`') 510 | exit(1) 511 | } 512 | } 513 | Binop { 514 | match expr.op { 515 | .plus { 516 | eval_expr_get_symbol_64(expr.left_hs, mut arr) + 517 | eval_expr_get_symbol_64(expr.right_hs, mut arr) 518 | } 519 | .minus { 520 | eval_expr_get_symbol_64(expr.left_hs, mut arr) - eval_expr_get_symbol_64(expr.right_hs, mut 521 | arr) 522 | } 523 | .mul { 524 | eval_expr_get_symbol_64(expr.left_hs, mut arr) * eval_expr_get_symbol_64(expr.right_hs, mut 525 | arr) 526 | } 527 | .div { 528 | eval_expr_get_symbol_64(expr.left_hs, mut arr) / eval_expr_get_symbol_64(expr.right_hs, mut 529 | arr) 530 | } 531 | else { 532 | panic('not implemented yet') 533 | } 534 | } 535 | } 536 | Ident { 537 | arr << expr.lit 538 | 0 539 | } 540 | Neg { 541 | eval_expr_get_symbol_64(expr.expr, mut arr) * -1 542 | } 543 | Immediate { 544 | eval_expr_get_symbol_64(expr.expr, mut arr) 545 | } 546 | else { 547 | panic('not implmented yet') 548 | } 549 | } 550 | } 551 | 552 | fn eval_expr(expr Expr) int { 553 | mut empty := []string{} 554 | return int(eval_expr_get_symbol_64(expr, mut empty)) 555 | } 556 | 557 | fn get_size_by_suffix(name string) DataSize { 558 | return match name.to_upper()[name.len - 1] { 559 | `Q` { 560 | DataSize.suffix_quad 561 | } 562 | `L` { 563 | DataSize.suffix_long 564 | } 565 | `W` { 566 | DataSize.suffix_word 567 | } 568 | `B` { 569 | DataSize.suffix_byte 570 | } 571 | else { 572 | panic('unkown DataSize') 573 | } 574 | } 575 | } 576 | 577 | fn (regi Register) check_regi_size(size DataSize) { 578 | if regi.size != size { 579 | error.print(regi.pos, 'invalid size of register for instruction.') 580 | exit(0) 581 | } 582 | } 583 | 584 | fn rex(w u8, r u8, x u8, b u8) u8 { 585 | return 64 | (w << 3) | (r << 2) | (x << 1) | b 586 | } 587 | 588 | fn (mut e Encoder) add_prefix(regi_r RegiAll, regi_i RegiAll, regi_b RegiAll, sizes []DataSize) { 589 | mut w, mut r, mut x, mut b := u8(0), u8(0), u8(0), u8(0) 590 | 591 | if regi_r.base_offset >= 8 { 592 | r = 1 593 | } 594 | 595 | if regi_i.base_offset >= 8 { 596 | x = 1 597 | } 598 | 599 | if regi_b.base_offset >= 8 { 600 | b = 1 601 | } 602 | 603 | if DataSize.suffix_word in sizes { 604 | e.current_instr.code << encoder.operand_size_prefix16 605 | } 606 | if DataSize.suffix_single in sizes { 607 | e.current_instr.code << 0xF3 608 | } 609 | if DataSize.suffix_double in sizes { 610 | e.current_instr.code << 0xF2 611 | } 612 | if DataSize.suffix_quad in sizes { 613 | w = 1 614 | } 615 | 616 | if w != 0 || r != 0 || b != 0 || x != 0 || regi_r.rex_required || regi_b.rex_required { 617 | e.current_instr.code << rex(w, r, x, b) 618 | } 619 | } 620 | 621 | fn align_to(n int, align int) int { 622 | return (n + align - 1) / align * align 623 | } 624 | 625 | fn is_in_i8_range(n int) bool { 626 | return -128 <= n && n <= 127 627 | } 628 | 629 | fn is_in_i32_range(n int) bool { 630 | return n < (1 << 31) 631 | } 632 | 633 | fn compose_mod_rm(mod u8, reg_op u8, rm u8) u8 { 634 | return (mod << 6) + (reg_op << 3) + rm 635 | } 636 | 637 | fn (mut e Encoder) encode_instr() { 638 | pos := e.tok.pos 639 | 640 | instr_name := e.tok.lit 641 | instr_name_upper := instr_name.to_upper() 642 | e.next() 643 | 644 | if e.tok.kind == .colon { 645 | instr := Instr{ 646 | kind: .label 647 | pos: pos 648 | section_name: e.current_section_name 649 | symbol_name: instr_name 650 | } 651 | e.expect(.colon) 652 | 653 | if instr_name in e.user_defined_symbols { 654 | error.print(pos, 'symbol `${instr_name}` is already defined') 655 | exit(1) 656 | } 657 | 658 | e.user_defined_symbols[instr_name] = &instr 659 | e.instrs << &instr 660 | return 661 | } 662 | 663 | match instr_name_upper { 664 | '.SECTION' { 665 | e.section() 666 | } 667 | '.TEXT' { 668 | e.add_section('.text', 'ax', pos) 669 | } 670 | '.DATA' { 671 | e.add_section('.data', 'wa', pos) 672 | } 673 | '.BSS' { 674 | e.add_section('.bss', 'wa', pos) 675 | } 676 | '.GLOBAL', '.GLOBL' { 677 | e.instrs << &Instr{ 678 | kind: .global 679 | pos: pos 680 | section_name: e.current_section_name 681 | symbol_name: e.tok.lit 682 | } 683 | e.next() 684 | } 685 | '.LOCAL' { 686 | e.instrs << &Instr{ 687 | kind: .local 688 | pos: pos 689 | section_name: e.current_section_name 690 | symbol_name: e.tok.lit 691 | } 692 | e.next() 693 | } 694 | '.HIDDEN' { 695 | e.instrs << &Instr{ 696 | kind: .hidden 697 | pos: pos 698 | section_name: e.current_section_name 699 | symbol_name: e.tok.lit 700 | } 701 | e.next() 702 | } 703 | '.INTERNAL' { 704 | e.instrs << &Instr{ 705 | kind: .internal 706 | pos: pos 707 | section_name: e.current_section_name 708 | symbol_name: e.tok.lit 709 | } 710 | e.next() 711 | } 712 | '.PROTECTED' { 713 | e.instrs << &Instr{ 714 | kind: .protected 715 | pos: pos 716 | section_name: e.current_section_name 717 | symbol_name: e.tok.lit 718 | } 719 | e.next() 720 | } 721 | '.STRING' { 722 | e.string() 723 | } 724 | '.BYTE' { 725 | e.byte() 726 | } 727 | '.WORD' { 728 | e.word() 729 | } 730 | '.LONG' { 731 | e.long() 732 | } 733 | '.QUAD' { 734 | e.quad() 735 | } 736 | '.ZERO' { 737 | e.zero() 738 | } 739 | 'POP', 'POPQ' { 740 | e.pop() 741 | } 742 | 'PUSHQ', 'PUSH' { 743 | e.push() 744 | } 745 | 'CALLQ', 'CALL' { 746 | e.call() 747 | } 748 | 'LEAQ', 'LEAL', 'LEAW' { 749 | e.lea(instr_name_upper) 750 | } 751 | 'NOTQ', 'NOTL', 'NOTW', 'NOTB' { 752 | e.one_operand_arith(.not, encoder.slash_2, get_size_by_suffix(instr_name_upper)) 753 | } 754 | 'NEGQ', 'NEGL', 'NEGW', 'NEGB' { 755 | e.one_operand_arith(.neg, encoder.slash_3, get_size_by_suffix(instr_name_upper)) 756 | } 757 | 'DIVQ', 'DIVL', 'DIVW', 'DIVB' { 758 | e.one_operand_arith(.div, encoder.slash_6, get_size_by_suffix(instr_name_upper)) 759 | } 760 | 'IDIVQ', 'IDIVL', 'IDIVW', 'IDIVB' { 761 | e.one_operand_arith(.idiv, encoder.slash_7, get_size_by_suffix(instr_name_upper)) 762 | } 763 | 'IMULQ', 'IMULL', 'IMULW' { 764 | e.imul(get_size_by_suffix(instr_name_upper)) 765 | } 766 | 'MULQ', 'MULL', 'MULW', 'MULB' { 767 | e.mul(get_size_by_suffix(instr_name_upper)) 768 | } 769 | 'MOVQ', 'MOVL', 'MOVW', 'MOVB' { 770 | e.mov(get_size_by_suffix(instr_name_upper)) 771 | } 772 | 'MOVZBW' { 773 | e.mov_zero_or_sign_extend([u8(0x0F), 0xB6], DataSize.suffix_byte, DataSize.suffix_word) 774 | } 775 | 'MOVZBL' { 776 | e.mov_zero_or_sign_extend([u8(0x0F), 0xB6], DataSize.suffix_byte, DataSize.suffix_long) 777 | } 778 | 'MOVZBQ' { 779 | e.mov_zero_or_sign_extend([u8(0x0F), 0xB6], DataSize.suffix_byte, DataSize.suffix_quad) 780 | } 781 | 'MOVZWQ' { 782 | e.mov_zero_or_sign_extend([u8(0x0F), 0xB7], DataSize.suffix_word, DataSize.suffix_quad) 783 | } 784 | 'MOVZWL' { 785 | e.mov_zero_or_sign_extend([u8(0x0F), 0xB7], DataSize.suffix_word, DataSize.suffix_long) 786 | } 787 | 'MOVSBL' { 788 | e.mov_zero_or_sign_extend([u8(0x0F), 0xBE], DataSize.suffix_byte, DataSize.suffix_long) 789 | } 790 | 'MOVSBW' { 791 | e.mov_zero_or_sign_extend([u8(0x0F), 0xBE], DataSize.suffix_byte, DataSize.suffix_word) 792 | } 793 | 'MOVSBQ' { 794 | e.mov_zero_or_sign_extend([u8(0x0F), 0xBE], DataSize.suffix_byte, DataSize.suffix_quad) 795 | } 796 | 'MOVSWL' { 797 | e.mov_zero_or_sign_extend([u8(0x0F), 0xBF], DataSize.suffix_word, DataSize.suffix_long) 798 | } 799 | 'MOVSWQ' { 800 | e.mov_zero_or_sign_extend([u8(0x0F), 0xBF], DataSize.suffix_word, DataSize.suffix_quad) 801 | } 802 | 'MOVSLQ' { 803 | e.mov_zero_or_sign_extend([u8(0x63)], DataSize.suffix_long, DataSize.suffix_quad) 804 | } 805 | 'MOVABSQ' { 806 | e.movabsq() 807 | } 808 | 'TESTQ', 'TESTL', 'TESTW', 'TESTB' { 809 | e.test(get_size_by_suffix(instr_name_upper)) 810 | } 811 | 'ADDQ', 'ADDL', 'ADDW', 'ADDB' { 812 | e.arith_instr(.add, 0, encoder.slash_0, get_size_by_suffix(instr_name_upper)) 813 | } 814 | 'ORQ', 'ORL', 'ORW', 'ORB' { 815 | e.arith_instr(.instr_or, 0x8, encoder.slash_1, get_size_by_suffix(instr_name_upper)) 816 | } 817 | 'ADCQ', 'ADCL', 'ADCW', 'ADCB' { 818 | e.arith_instr(.adc, 0x10, encoder.slash_2, get_size_by_suffix(instr_name_upper)) 819 | } 820 | 'SBBQ', 'SBBL', 'SBBW', 'SBBB' { 821 | e.arith_instr(.sbb, 0x18, encoder.slash_3, get_size_by_suffix(instr_name_upper)) 822 | } 823 | 'ANDQ', 'ANDL', 'ANDW', 'ANDB' { 824 | e.arith_instr(.and, 0x20, encoder.slash_4, get_size_by_suffix(instr_name_upper)) 825 | } 826 | 'SUBQ', 'SUBL', 'SUBW', 'SUBB' { 827 | e.arith_instr(.sub, 0x28, encoder.slash_5, get_size_by_suffix(instr_name_upper)) 828 | } 829 | 'XORQ', 'XORL', 'XORW', 'XORB' { 830 | e.arith_instr(.xor, 0x30, encoder.slash_6, get_size_by_suffix(instr_name_upper)) 831 | } 832 | 'CMPQ', 'CMPL', 'CMPW', 'CMPB' { 833 | e.arith_instr(.cmp, 0x38, encoder.slash_7, get_size_by_suffix(instr_name_upper)) 834 | } 835 | 'SHLQ', 'SHLL', 'SHLW', 'SHLB' { 836 | e.shift(.shl, encoder.slash_4, get_size_by_suffix(instr_name_upper)) 837 | } 838 | 'SHRQ', 'SHRL', 'SHRW', 'SHRB' { 839 | e.shift(.shr, encoder.slash_5, get_size_by_suffix(instr_name_upper)) 840 | } 841 | 'SARQ', 'SARL', 'SARW', 'SARB' { 842 | e.shift(.sar, encoder.slash_7, get_size_by_suffix(instr_name_upper)) 843 | } 844 | 'SALQ', 'SALL', 'SALW', 'SALB' { 845 | e.shift(.sal, encoder.slash_4, get_size_by_suffix(instr_name_upper)) 846 | } 847 | 'SETO' { 848 | e.set(.seto, [u8(0x0F), 0x90]) 849 | } 850 | 'SETNO' { 851 | e.set(.setno, [u8(0x0F), 0x91]) 852 | } 853 | 'SETB' { 854 | e.set(.setb, [u8(0x0F), 0x92]) 855 | } 856 | 'SETAE' { 857 | e.set(.setae, [u8(0x0F), 0x93]) 858 | } 859 | 'SETE' { 860 | e.set(.sete, [u8(0x0F), 0x94]) 861 | } 862 | 'SETNE' { 863 | e.set(.setne, [u8(0x0F), 0x95]) 864 | } 865 | 'SETNB' { 866 | e.set(.setnb, [u8(0x0F), 0x93]) 867 | } 868 | 'SETBE' { 869 | e.set(.setbe, [u8(0x0F), 0x96]) 870 | } 871 | 'SETA' { 872 | e.set(.seta, [u8(0x0F), 0x97]) 873 | } 874 | 'SETPO' { 875 | e.set(.setpo, [u8(0x0F), 0x9B]) 876 | } 877 | 'SETL' { 878 | e.set(.setl, [u8(0x0F), 0x9C]) 879 | } 880 | 'SETG' { 881 | e.set(.setg, [u8(0x0F), 0x9F]) 882 | } 883 | 'SETLE' { 884 | e.set(.setle, [u8(0x0F), 0x9E]) 885 | } 886 | 'SETGE' { 887 | e.set(.setge, [u8(0x0F), 0x9D]) 888 | } 889 | 'JMP' { 890 | e.jmp_instr(.jmp, [u8(0xE9), 0, 0, 0, 0], 1) 891 | } 892 | 'JNE' { 893 | e.jmp_instr(.jne, [u8(0x0F), 0x85, 0, 0, 0, 0], 2) 894 | } 895 | 'JE' { 896 | e.jmp_instr(.je, [u8(0x0F), 0x84, 0, 0, 0, 0], 2) 897 | } 898 | 'JL' { 899 | e.jmp_instr(.jl, [u8(0x0f), 0x8C, 0, 0, 0, 0], 2) 900 | } 901 | 'JG' { 902 | e.jmp_instr(.jg, [u8(0x0F), 0x8F, 0, 0, 0, 0], 2) 903 | } 904 | 'JLE' { 905 | e.jmp_instr(.jle, [u8(0x0F), 0x8E, 0, 0, 0, 0], 2) 906 | } 907 | 'JGE' { 908 | e.jmp_instr(.jge, [u8(0x0F), 0x8D, 0, 0, 0, 0], 2) 909 | } 910 | 'JNB' { 911 | e.jmp_instr(.jnb, [u8(0x0F), 0x83, 0, 0, 0, 0], 2) 912 | } 913 | 'JBE' { 914 | e.jmp_instr(.jbe, [u8(0x0F), 0x86, 0, 0, 0, 0], 2) 915 | } 916 | 'JNBE' { 917 | e.jmp_instr(.jnbe, [u8(0x0F), 0x87, 0, 0, 0, 0], 2) 918 | } 919 | 'JP' { 920 | e.jmp_instr(.jp, [u8(0x0F), 0x8A, 0, 0, 0, 0], 2) 921 | } 922 | 'JA' { 923 | e.jmp_instr(.ja, [u8(0x0F), 0x87, 0, 0, 0, 0], 2) 924 | } 925 | 'JB' { 926 | e.jmp_instr(.jb, [u8(0x0F), 0x82, 0, 0, 0, 0], 2) 927 | } 928 | 'JS' { 929 | e.jmp_instr(.js, [u8(0x0F), 0x88, 0, 0, 0, 0], 2) 930 | } 931 | 'JNS' { 932 | e.jmp_instr(.jns, [u8(0x0F), 0x89, 0, 0, 0, 0], 2) 933 | } 934 | 'REP' { 935 | e.rep() 936 | } 937 | 'CVTTSS2SIL' { 938 | e.cvttss2sil() 939 | } 940 | 'CVTSI2SSQ' { 941 | e.cvtsi2ssq() 942 | } 943 | 'CVTSI2SDQ' { 944 | e.cvtsi2sdq() 945 | } 946 | 'MOVD' { 947 | e.movd() 948 | } 949 | 'XORPD' { 950 | e.xorp(.xorpd, [DataSize.suffix_word]) 951 | } 952 | 'XORPS' { 953 | e.xorp(.xorps, []) 954 | } 955 | 'MOVSS' { 956 | e.sse_data_transfer_instr(.movss, 0x10, [DataSize.suffix_single]) 957 | } 958 | 'MOVSD' { 959 | e.sse_data_transfer_instr(.movss, 0x10, [DataSize.suffix_double]) 960 | } 961 | 'MOVAPS' { 962 | e.sse_data_transfer_instr(.movaps, 0x28, []) 963 | } 964 | 'MOVUPS' { 965 | e.sse_data_transfer_instr(.movups, 0x10, []) 966 | } 967 | 'PXOR' { 968 | e.sse_arith_instr(.pxor, [u8(0x0F), 0xEF], [DataSize.suffix_word]) 969 | } 970 | 'CVTSD2SS' { 971 | e.sse_arith_instr(.cvtsd2ss, [u8(0x0F), 0x5A], [DataSize.suffix_double]) 972 | } 973 | 'CVTSS2SD' { 974 | e.sse_arith_instr(.cvtss2sd, [u8(0x0F), 0x5A], [DataSize.suffix_single]) 975 | } 976 | 'UCOMISS' { 977 | e.sse_arith_instr(.ucomiss, [u8(0x0F), 0x2E], []) 978 | } 979 | 'UCOMISD' { 980 | e.sse_arith_instr(.ucomisd, [u8(0x0F), 0x2E], [DataSize.suffix_word]) 981 | } 982 | 'COMISS' { 983 | e.sse_arith_instr(.comiss, [u8(0x0F), 0x2F], []) 984 | } 985 | 'COMISD' { 986 | e.sse_arith_instr(.ucomisd, [u8(0x0F), 0x2F], [DataSize.suffix_word]) 987 | } 988 | 'SUBSS' { 989 | e.sse_arith_instr(.subss, [u8(0x0F), 0x5C], [DataSize.suffix_single]) 990 | } 991 | 'SUBSD' { 992 | e.sse_arith_instr(.subss, [u8(0x0F), 0x5C], [DataSize.suffix_double]) 993 | } 994 | 'ADDSS' { 995 | e.sse_arith_instr(.addss, [u8(0x0F), 0x58], [DataSize.suffix_single]) 996 | } 997 | 'ADDSD' { 998 | e.sse_arith_instr(.addsd, [u8(0x0F), 0x58], [DataSize.suffix_double]) 999 | } 1000 | 'MULSS' { 1001 | e.sse_arith_instr(.mulss, [u8(0x0F), 0x59], [DataSize.suffix_single]) 1002 | } 1003 | 'MULSD' { 1004 | e.sse_arith_instr(.mulsd, [u8(0x0F), 0x59], [DataSize.suffix_double]) 1005 | } 1006 | 'DIVSS' { 1007 | e.sse_arith_instr(.divss, [u8(0x0F), 0x5E], [DataSize.suffix_single]) 1008 | } 1009 | 'DIVSD' { 1010 | e.sse_arith_instr(.divsd, [u8(0x0F), 0x5E], [DataSize.suffix_double]) 1011 | } 1012 | 'CMOVNEQ', 'CMOVNEL', 'CMOVNEW' { 1013 | e.cmov(.cmovs, [u8(0x0F), 0x45], get_size_by_suffix(instr_name_upper)) 1014 | } 1015 | 'CMOVSQ', 'CMOVSL', 'CMOVSW' { 1016 | e.cmov(.cmovs, [u8(0x0F), 0x48], get_size_by_suffix(instr_name_upper)) 1017 | } 1018 | 'CMOVNSQ', 'CMOVNSL', 'CMOVNSW' { 1019 | e.cmov(.cmovns, [u8(0x0F), 0x49], get_size_by_suffix(instr_name_upper)) 1020 | } 1021 | 'CMOVLQ', 'CMOVLL', 'CMOVLW' { 1022 | e.cmov(.cmovl, [u8(0x0F), 0x4C], get_size_by_suffix(instr_name_upper)) 1023 | } 1024 | 'CMOVGEQ', 'CMOVGEL', 'CMOVGEW' { 1025 | e.cmov(.cmovge, [u8(0x0F), 0x4D], get_size_by_suffix(instr_name_upper)) 1026 | } 1027 | 'CMOVLEQ', 'CMOVLEL', 'CMOVLEW' { 1028 | e.cmov(.cmovle, [u8(0x0F), 0x4E], get_size_by_suffix(instr_name_upper)) 1029 | } 1030 | 'CMOVGQ', 'CMOVGL', 'CMOVGW' { 1031 | e.cmov(.cmovg, [u8(0x0F), 0x4F], get_size_by_suffix(instr_name_upper)) 1032 | } 1033 | 'RETQ', 'RET' { 1034 | e.instrs << &Instr{ 1035 | kind: .ret 1036 | pos: pos 1037 | section_name: e.current_section_name 1038 | code: [u8(0xc3)] 1039 | } 1040 | } 1041 | 'SYSCALL' { 1042 | e.instrs << &Instr{ 1043 | kind: .syscall 1044 | pos: pos 1045 | section_name: e.current_section_name 1046 | code: [u8(0x0f), 0x05] 1047 | } 1048 | } 1049 | 'NOPQ', 'NOP' { 1050 | e.instrs << &Instr{ 1051 | kind: .nop 1052 | pos: pos 1053 | section_name: e.current_section_name 1054 | code: [u8(0x90)] 1055 | } 1056 | } 1057 | 'HLT' { 1058 | e.instrs << &Instr{ 1059 | kind: .hlt 1060 | pos: pos 1061 | section_name: e.current_section_name 1062 | code: [u8(0xf4)] 1063 | } 1064 | } 1065 | 'LEAVE' { 1066 | e.instrs << &Instr{ 1067 | kind: .leave 1068 | pos: pos 1069 | section_name: e.current_section_name 1070 | code: [u8(0xc9)] 1071 | } 1072 | } 1073 | 'CLTQ' { 1074 | e.instrs << &Instr{ 1075 | kind: .cltq 1076 | pos: pos 1077 | section_name: e.current_section_name 1078 | code: [u8(0x48), 0x98] 1079 | } 1080 | } 1081 | 'CLTD' { 1082 | e.instrs << &Instr{ 1083 | kind: .cltd 1084 | pos: pos 1085 | section_name: e.current_section_name 1086 | code: [u8(0x99)] 1087 | } 1088 | } 1089 | 'CQTO' { 1090 | e.instrs << &Instr{ 1091 | kind: .cqto 1092 | pos: pos 1093 | section_name: e.current_section_name 1094 | code: [u8(0x48), 0x99] 1095 | } 1096 | } 1097 | 'CWTL' { 1098 | e.instrs << &Instr{ 1099 | kind: .cwtl 1100 | pos: pos 1101 | section_name: e.current_section_name 1102 | code: [u8(0x98)] 1103 | } 1104 | } 1105 | else { 1106 | error.print(pos, 'unkwoun instruction `${instr_name}`') 1107 | exit(1) 1108 | } 1109 | } 1110 | } 1111 | 1112 | pub fn (mut e Encoder) encode() { 1113 | for { 1114 | if e.tok.kind == .eof { 1115 | break 1116 | } 1117 | e.encode_instr() 1118 | } 1119 | } 1120 | --------------------------------------------------------------------------------