├── .gitignore ├── .travis.yml ├── 8086-pinout.gif ├── Makefile ├── README.md ├── Vagrantfile ├── assemblers.md ├── bibliography.md ├── binutils.md ├── branch-prediction.md ├── cache.md ├── calling-conventions.md ├── cdecl.asm ├── cdecl.md ├── compiler-generated ├── .gitignore ├── Makefile ├── README.md ├── atomic.cpp ├── block.c ├── bound_simplification.c ├── bss.c ├── bss_static.c ├── builtin_inline.c ├── builtin_no_side_effect.c ├── callee_saved_registers.c ├── cast_int_long.c ├── char_array_init.c ├── char_array_init_global.c ├── char_array_init_long.c ├── char_pointer_init.c ├── char_pointer_init_global.c ├── class.cpp ├── common.c ├── common_large.c ├── const.c ├── const_cast.c ├── constexpr.cpp ├── cse.c ├── cse_function.c ├── cse_large_function.c ├── data.c ├── double_assign_constant.c ├── double_sum.c ├── enum.c ├── float2int.c ├── float2int_direct_vs_variable.c ├── float_assign_constant.c ├── float_sum.c ├── for_empty.c ├── function_call_another.c ├── function_int_int_int.c ├── function_many_arguments.c ├── function_many_arguments_2calls.c ├── function_many_arguments_64.c ├── gcc.md ├── gcc │ ├── Makefile │ ├── Makefile_params │ ├── builtin_expect.c │ ├── builtin_expect_off.c │ ├── interactive │ │ ├── Makefile │ │ ├── builtin_expect_off_perf.c │ │ └── builtin_expect_perf.c │ └── restrict.cpp ├── global_int.c ├── hello_world.c ├── if.c ├── int_assign_constant.c ├── ld.c ├── long_double_assign_constant.c ├── long_double_sum.c ├── memcmp.c ├── modulo.c ├── precalculate_loop.c ├── precalculate_loop_unknown_init.c ├── register.c ├── restrict.c ├── return_struct.c ├── sqrt.c ├── static_local.c ├── switch3.c ├── switch6.c ├── switch6_if.c ├── switch6_spread.c ├── switch6_very_spread.c ├── tail_call.c ├── template.cpp ├── use_rax.c ├── use_rax_return.c ├── virtual.cpp └── while_infinite.c ├── containers.md ├── cpu-benchmarks.md ├── cpu-bugs.md ├── cpu-hardware-design.md ├── cpu-optimizations.md ├── cpu-pins.md ├── elf.md ├── endianess.asm ├── extensions.md ├── flynns-taxonomy.md ├── fpu.md ├── gas ├── Makefile ├── README.md ├── altmacro.S ├── ascii.S ├── asciz.S ├── bibliography.md ├── char_literal.S ├── current_address.S ├── equ.S ├── extern.md ├── gasversion.S ├── global.S ├── irp.S ├── lib ├── linux │ ├── Makefile │ ├── README.md │ └── hello_world.S ├── local_label.S ├── local_symbol.S ├── macro.S ├── params.makefile ├── print.S ├── push.S ├── section.S ├── size.S └── type.S ├── getting-started.md ├── how-to-learn.md ├── implementations.md ├── instruction-level-parallelism.md ├── instruction-sets.md ├── intel-processor-history.md ├── intel-vs-atet.md ├── intel2gas.md ├── interactive ├── Makefile ├── README.md ├── int3.asm ├── int4.asm ├── lib └── test ├── ld-linux-so.md ├── ld.md ├── ldd.md ├── lib ├── Makefile ├── README.md ├── common.asm ├── common_gas.h ├── common_nasm.inc └── driver.c ├── linker-scripts ├── Makefile ├── README.md ├── TODO.md ├── common.h ├── data.S ├── data.ld ├── edata.S ├── edata.ld ├── flags.S ├── flags.ld ├── hello_world.S ├── hello_world.ld ├── keep.S ├── keep.ld ├── min.S ├── min.ld ├── symbol.S └── symbol.ld ├── linux ├── Makefile ├── README.md ├── c-from-assembly │ ├── README.md │ ├── main │ │ ├── Makefile │ │ ├── README.md │ │ └── main.asm │ ├── start-gcc │ │ ├── Makefile │ │ └── main.asm │ └── start │ │ ├── Makefile │ │ └── main.asm ├── custom-entry-gcc │ ├── Makefile │ ├── README.md │ └── main.c ├── custom-entry │ ├── Makefile │ └── hello_world.asm ├── hello_world.asm ├── hello_world_min.asm ├── interactive │ ├── Makefile │ ├── memory_layout.asm │ └── no_exit.asm ├── min.asm ├── stack_top.asm └── system_call.md ├── microcode.md ├── microcontrollers.md ├── nasm ├── Makefile ├── comment.asm ├── current_address.asm ├── define.asm ├── equ.asm ├── if.asm ├── include.asm ├── included.inc ├── introduction.md ├── lib ├── local_labels.asm ├── ptr.asm ├── ram.asm └── symbol_colon.asm ├── objcopy.md ├── objdump.md ├── other-architectures.md ├── params.makefile ├── pipeline.md ├── pros-and-cons-of-assembly.md ├── provision ├── pusha.asm ├── readelf.md ├── registers.asm ├── risc-vs-cisc.md ├── segment ├── segment_registers.asm ├── size.md ├── stdcall.md ├── string-instructions.md ├── superscalar.md ├── synchronization.md ├── system-vs-application-programming.md ├── vliw.md └── x86-64 ├── Makefile ├── README.md ├── Vagrantfile ├── calling-convention.md ├── cmp-sign-extend.asm ├── gas ├── Makefile ├── lib ├── movabs.S └── params.makefile ├── lib ├── Makefile ├── common.asm ├── common_gas.h ├── common_nasm.inc └── driver.c ├── linux ├── Makefile ├── README.md ├── c-from-assembly │ ├── Makefile │ ├── README.md │ ├── hello.asm │ └── printf.asm ├── common.inc ├── interactive │ ├── Makefile │ ├── common.inc │ ├── initial_state.asm │ ├── min_dummy.asm │ ├── min_empty.asm │ └── stack_top.asm └── zero-extend-32bit-memory │ ├── Makefile │ ├── README.md │ ├── link.ld │ └── main.asm ├── main.asm ├── movabs.asm ├── params.makefile └── provision /.gitignore: -------------------------------------------------------------------------------- 1 | _out/ 2 | _tmp/ 3 | *.tmp 4 | tmp.* 5 | 6 | # Compiled Object files 7 | *.slo 8 | *.lo 9 | *.out 10 | *.o 11 | *.so 12 | *.dylib 13 | *.lai 14 | *.la 15 | *.a 16 | 17 | .vagrant 18 | 19 | # Image files 20 | *.bin 21 | *.img 22 | *.iso 23 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | before_install: 2 | - sudo apt-get update -q 3 | - sudo apt-get install build-essential gcc-multilib nasm 4 | script: 5 | - uname -a 6 | - make TRAVIS=y test 7 | -------------------------------------------------------------------------------- /8086-pinout.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cirosantilli/x86-assembly-cheat/0c64fc5961bbdcbf442cec9bb98d70e889b1984f/8086-pinout.gif -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .POSIX: 2 | 3 | BITS = 32 4 | CC = gcc 5 | CFLAGS = -ggdb3 -m$(BITS) -O0 -pedantic-errors -std=c99 -Wall -Wextra -pedantic 6 | # To match nasm. 7 | DISAS_FLAVOR = intel 8 | GAS_EXT = .S 9 | LIB_DIR = lib/ 10 | NASM = nasm -f elf$(BITS) -gdwarf -w+all 11 | NASM_EXT = .asm 12 | OBJ_EXT = .o 13 | OUT_EXT = .out 14 | PHONY_MAKES = 15 | RUN = main 16 | TEST = test 17 | export 18 | 19 | ifeq ($(TRAVIS),) 20 | CFLAGS += -fno-pie -no-pie 21 | endif 22 | 23 | -include params.makefile 24 | 25 | OUTS := $(foreach IN_EXT,$(GAS_EXT) $(NASM_EXT),$(patsubst %$(IN_EXT),%$(OUT_EXT),$(wildcard *$(IN_EXT)))) 26 | 27 | .PRECIOUS: %$(OBJ_EXT) 28 | .PHONY: all clean driver run test $(PHONY_MAKES) 29 | 30 | all: driver $(OUTS) $(PHONY_MAKES) 31 | for phony in $(PHONY_MAKES); do \ 32 | $(MAKE) -C $${phony}; \ 33 | done 34 | 35 | %$(OUT_EXT): %$(OBJ_EXT) 36 | $(CC) $(CFLAGS) -o '$@' '$<' $(LIB_DIR)*$(OBJ_EXT) 37 | 38 | %$(OBJ_EXT): %$(NASM_EXT) 39 | $(NASM) -o '$@' '$<' 40 | 41 | %$(OBJ_EXT): %$(GAS_EXT) 42 | $(CC) $(CFLAGS) -c -o '$@' '$<' 43 | 44 | clean: 45 | rm -f *$(OBJ_EXT) *$(OUT_EXT) 46 | $(MAKE) -C $(LIB_DIR) '$@' 47 | for phony in $(PHONY_MAKES); do \ 48 | $(MAKE) -C $${phony} clean; \ 49 | done 50 | 51 | driver: 52 | $(MAKE) -C $(LIB_DIR) 53 | 54 | gdb-%: %$(OUT_EXT) 55 | gdb -q --nh -ex 'set disassembly-flavor $(DISAS_FLAVOR)' -ex 'layout split' -ex 'break asm_main' -ex 'run' '$<' 56 | 57 | gdb2-%: %$(OUT_EXT) 58 | gdb -q -ex 'break asm_main' -ex 'run' '$<' 59 | 60 | run-%: %$(OUT_EXT) 61 | ./'$<' 62 | 63 | test: all 64 | @\ 65 | if [ -x $(TEST) ]; then \ 66 | ./$(TEST) '$(OUT_EXT)' ;\ 67 | else\ 68 | fail=false ;\ 69 | for t in *"$(OUT_EXT)"; do\ 70 | if ! ./"$$t"; then \ 71 | fail=true ;\ 72 | break ;\ 73 | fi ;\ 74 | done ;\ 75 | if $$fail; then \ 76 | echo "TEST FAILED: $$t" ;\ 77 | exit 1 ;\ 78 | fi ;\ 79 | fi; \ 80 | for phony in $(PHONY_MAKES); do \ 81 | $(MAKE) -C $${phony} test; \ 82 | done; \ 83 | echo 'ALL TESTS PASSED' 84 | -------------------------------------------------------------------------------- /Vagrantfile: -------------------------------------------------------------------------------- 1 | Vagrant.configure(2) do |config| 2 | config.vm.box = 'hashicorp/precise32' 3 | config.vm.provision :shell, privileged: false, path: 'provision' 4 | config.vm.provider "virtualbox" do |v| 5 | v.customize [ 6 | 'modifyvm', :id, 7 | '--cpus', 1, 8 | '--memory', 128, 9 | ] 10 | end 11 | end 12 | -------------------------------------------------------------------------------- /assemblers.md: -------------------------------------------------------------------------------- 1 | # Assemblers 2 | 3 | Programs that transform text in computer code. 4 | 5 | Most of this tutorial is written in NASM, and some parts in GAS. 6 | 7 | ## MASM 8 | 9 | Microsoft. 10 | 11 | x86. 12 | 13 | ## TASM 14 | 15 | Borland. 16 | 17 | Stands for "turbo assembler". LOL, why not "BASM" instead? 18 | 19 | Windows only. 20 | -------------------------------------------------------------------------------- /bibliography.md: -------------------------------------------------------------------------------- 1 | # Bibliography 2 | 3 | ## Manuals 4 | 5 | - [IA-32 manual](http://www.intel.com/content/www/us/en/processors/architectures-software-developer-manuals.html) 6 | 7 | Intel man pages, *the* official source. 8 | 9 | Also includes Intel 64. 10 | 11 | Interesting sections: 12 | 13 | - Vol 1 5: instruction overview. In particular, groups instructions logically. 14 | - Vol 2 3: full instruction listing and API. 15 | 16 | - [AMD64 manuals](http://developer.amd.com/resources/documentation-articles/developer-guides-manuals/) 17 | 18 | Under the manuals section. 19 | 20 | Original specification of x86-64, which Intel then implemented as well. 21 | 22 | - [Intel 64 manual](http://www.intel.com/content/www/us/en/processors/architectures-software-developer-manuals.html) 23 | 24 | Intel man pages, *the* official source. 25 | 26 | Also includes IA-32. 27 | 28 | Get yourself the 3 volume IA documentation and the optimization manual and sleep with them. 29 | 30 | Interesting chapters: 31 | 32 | - Volume 1 chapter 5: list of instructions grouped by type, good to find new instructions to play with 33 | 34 | - Volume 2 chapter 3: full alphabetic list of all instructions 35 | 36 | The version is given on the front page, e.g.: 37 | 38 | Order Number: 325383-053US 39 | January 2015 40 | 41 | `325383` is the document (different for volumes 1-3), and 053 the version (TODO US is the language? Are there translations?) 42 | 43 | - [System V Application Binary Interface v0.99.7][] 44 | 45 | - open source machine readable definition of instructions (non-PDF), only published in 2016 46 | 47 | ## Tutorials 48 | 49 | - 50 | 51 | - 52 | 53 | - Programming from the ground up. 54 | 55 | 56 | 57 | Linux GAS, for complete programming beginners. 58 | 59 | - PC assembly language. 60 | 61 | 62 | 63 | NASM. 64 | 65 | - 66 | 67 | Reverse engineering for beginners. 68 | 69 | Lots of info on what C code maps to in assembly, and how to look for what matters to reverse engineer stuff. 70 | 71 | - 72 | 73 | Highly regarded optimization guidelines for x86 and C++. 74 | 75 | - 76 | 77 | Washington university course that covers branch prediction, cache and other CPU internals 78 | 79 | With runnable source code: 80 | 81 | - https://github.com/0xAX/asm not extensive, but clean, and highly commented. 82 | 83 | Without runnable source code: 84 | 85 | - http://www.egr.unlv.edu/~ed/assembly64.pdf by Ed Jorgensen. 86 | 87 | A professional Creative Commons book that covers a lot of the basics. 88 | 89 | ## Unofficial API references 90 | 91 | - [Wikipedia Instruction List](http://en.wikipedia.org/wiki/X86_instruction_listings) 92 | 93 | Instruction list. 94 | 95 | ## Libraries 96 | 97 | - 98 | 99 | Looks like the best free disassembly library out there. 100 | 101 | Binutils cannot be used as a library to disassemble? Sad. 102 | 103 | ## Cool projects 104 | 105 | Useless but cool and educational: 106 | 107 | - 108 | 109 | [Itanium C++ ABI]: http://mentorembedded.github.io/cxx-abi/abi.html 110 | [System V ABI AMD64]: http://www.x86-64.org/documentation_folder/abi-0.99.pdf 111 | -------------------------------------------------------------------------------- /binutils.md: -------------------------------------------------------------------------------- 1 | # binutils 2 | 3 | GNU set of utilities to compile and view and modify compiled code. 4 | 5 | Official site: . 6 | 7 | As stated there, the two main utilities are `ld`, the GNU linker, and `as` the gnu assembler. 8 | 9 | Binutils is a dependency of GCC: GCC deals with the high level to assembler source translation, and Binutils does the rest. 10 | 11 | Also contains GDB. 12 | 13 | ## Makefile 14 | 15 | sudo apt-get build-dep gdb 16 | mkdir binutils-gdb 17 | cd binutils-gdb 18 | git clone git://sourceware.org/git/binutils-gdb.git src 19 | cd src 20 | git checkout binutils-2_25 21 | cd .. 22 | mkdir build 23 | cd build 24 | ../src/configure --with-sysroot=/ --prefix="$(pwd)/../install" 25 | time make -j5 26 | make install 27 | 28 | Try it out on Linux: 29 | 30 | cd ../install/bin 31 | 32 | cat <main.S 33 | .data 34 | s: 35 | .ascii "Hello world!\n" 36 | len = . - s 37 | .text 38 | .global _start 39 | _start: 40 | movl $4, %eax 41 | movl $1, %ebx 42 | movl $s, %ecx 43 | movl $len, %edx 44 | int $0x80 45 | movl $1, %eax 46 | movl $0, %ebx 47 | int $0x80 48 | EOF 49 | ./as -o main.o main.S 50 | ./ld -o main.out main.o 51 | ./main.out 52 | 53 | Took 3 minutes in a 2013 computer. 54 | 55 | Why `--with-sysroot` is needed . Why not make it the default?! 56 | 57 | ## Source tree 58 | 59 | - `gas`: assembler 60 | 61 | - `ld`: linker 62 | 63 | - `gold`: new faster linker by Google 64 | 65 | - `gdb`: GDB 66 | 67 | - `bfd`: Binary File Descriptor? 68 | 69 | Contains a generic model that supports multiple output formats: ELF, Mach-O, PE, etc. 70 | 71 | Used by most utilities. 72 | 73 | - `opcodes`: low-level CPU instruction descriptions. 74 | 75 | Key to assembly and disassembly, used by many utilities. 76 | 77 | ### GAS 78 | 79 | Source tree: 80 | 81 | - `read.c`: the main parser 82 | 83 | Vocabulary: 84 | 85 | - `po`: Pseudo-op, same as assembly directive, e.g. `.macro`. 86 | 87 | - `poc`: Pseudo-op Code 88 | 89 | - `TC`: TODO? Tool Chain? E.g. `TC_START_LABEL` in `read.c`, `config/tc-XXX` for different architectures. 90 | 91 | - `md`: Machine Description. Each one is encoded in a files under `config/` that encode the binary representation of structures. 92 | 93 | Many identifiers also get this prefix. 94 | 95 | GAS compiles for many targets (arch + container), which have many (often unstandardized) syntaxes. The parsing is well-factored however. For this reason, make sure to specify your target whenever considering a concept. E.g., even things like the line-comment character, and the case sensitivity directives may change across architectures, 96 | -------------------------------------------------------------------------------- /branch-prediction.md: -------------------------------------------------------------------------------- 1 | # Branch prediction 2 | 3 | Only makes sense in pipelined processors: at a branch, it tries to guess which side will be taken, and puts those instructions in the pipeline. 4 | 5 | - 6 | - 7 | - 8 | -------------------------------------------------------------------------------- /cache.md: -------------------------------------------------------------------------------- 1 | # Cache 2 | 3 | - 4 | - 5 | - 6 | - 7 | - 8 | - 9 | - 10 | 11 | ## Example 12 | 13 | Shut up and do an ASCII art example, first direct mapped then set. 14 | 15 | ## Direct mapped 16 | 17 | One possible cache location per memory address. 18 | 19 | Upside: simple circuit, fast to find which is it, and small area. 20 | 21 | Downside: you might invalidate an entry that was recently accessed, even if all other entries are old. 22 | 23 | ## Fully associative 24 | 25 | One cache entry for every memory, so would be perfect because no conflicts. 26 | 27 | But of course, requires a cache as large as main memory, thus useless. 28 | 29 | ## Set associative 30 | 31 | Middle ground between direct mapped. 32 | 33 | - 2 way associative example 34 | 35 | Now address specifies the set where it might be, and the tag can be anywhere in that set. 36 | 37 | Unlike direct mapped, you now have the fun choice of which entry to evict when a set is full: 38 | 39 | ## Bits 40 | 41 | Forgetting SMP coherency. 42 | 43 | ### Validity bit 44 | 45 | If false, indicates that the given data is invalid, and must be re-fetched. 46 | 47 | When it is set to invalid: 48 | 49 | - at startup, everything is set invalid, otherwise we wouldn't be able to differentiate between valid data and the noise present at startup. This is the major use case. 50 | - another processor modifies main memory for a cache that we hold https://en.wikipedia.org/wiki/Bus_snooping 51 | 52 | ### Dirty bit 53 | 54 | Set whenever the CPU writes to cache, unset when cache is written to main memory. 55 | 56 | ## Tag 57 | 58 | Part of the original address (MSB), stored in the cache, to disambiguate if a cache line is a hit or not. 59 | 60 | ## Virtual or physical memory 61 | 62 | Four possibilities: virtual or physical addressed or tagged. 63 | 64 | TODO: which one is best / most common and why? 65 | 66 | ## Cache coherency 67 | 68 | You have many CPUs modifying memory. How to keep caches up to date. 69 | 70 | - 71 | - 72 | - 73 | - 74 | - 75 | - 76 | - 77 | - 78 | - 79 | - 80 | - 81 | - 82 | - ARM AMBA 4 ACE 83 | 84 | ## Register vs cache 85 | 86 | TODO physically different? Apparently not, both made of flip-flops (SRAM), registers can be seen as an L0 cache. 87 | 88 | - 89 | - 90 | - 91 | - 92 | -------------------------------------------------------------------------------- /calling-conventions.md: -------------------------------------------------------------------------------- 1 | # Calling convention 2 | 3 | 4 | 5 | The calling convention specifies how exactly function call and return are implemented in assembly. 6 | 7 | ## C calling conventions 8 | 9 | ANSI C does not specify assembly level calling convention to be used by implementations nor means to control actual calling conventions. 10 | 11 | Some compilers do however contain extensions that allow to fix a given calling convention. 12 | 13 | GCC allows to specify calling convention explicitly as: 14 | 15 | void f (int i) __attribute__ ((cdecl)); 16 | 17 | where `cdecl` is the name of the calling convention. 18 | 19 | In IA32, cdecl is the most popular in Linux, and stdcall the most popular in Windows, but there exist many others. 20 | 21 | ## Call C function from assembly 22 | 23 | To call a C function from assembly you need to: 24 | 25 | - use the correct C calling convention to which the C code compiled to. 26 | 27 | Remember that ANSI C does not specify this. 28 | 29 | - use the correct naming convention of the libraries you want to link to. 30 | 31 | For example, `exit` may be called `_exit` on certain compilers such as GCC elf format. 32 | 33 | This is not specified by ANSI C, so the only portable alternative is via macros. 34 | -------------------------------------------------------------------------------- /cdecl.asm: -------------------------------------------------------------------------------- 1 | ; # cdecl 2 | 3 | %include "lib/common_nasm.inc" 4 | 5 | ENTRY 6 | 7 | ; Recursive factorial. 8 | 9 | push dword 5 10 | call factorial_rec_cdecl 11 | ; We must clean up the argument stack ourselves. 12 | ; This allows for varargs like `printf`. 13 | add esp, 4 14 | ASSERT_EQ eax, 120 15 | 16 | push dword 1 17 | call factorial_rec_cdecl 18 | add esp, 4 19 | ASSERT_EQ eax, 1 20 | 21 | ; Non-recursive factorial. 22 | 23 | push dword 5 24 | call factorial_norec_cdecl 25 | add esp, 4 26 | ASSERT_EQ eax, 120 27 | 28 | push dword 1 29 | call factorial_norec_cdecl 30 | add esp, 4 31 | ASSERT_EQ eax, 1 32 | 33 | EXIT 34 | 35 | ; Recursive factorial. cdecl calling convention. 36 | factorial_rec_cdecl: 37 | enter 0, 0 38 | ; - `[ebp - 4]` the first local variable 39 | ; - `[ebp]` is the address to return to pushed by `call` 40 | ; - `[ebp + 4]` is the old `esp` pushed by `enter` before more local variables are pushed 41 | ; - `[ebp + 8]` first argument passed. For this to be true for any number of arguments, 42 | ; they must be pushed right-to-left. 43 | ; - `[ebp + 12]` second argument passed 44 | cmp dword [ebp + 8], 1 45 | je factorial_rec_cdecl_base 46 | mov ebx, [ebp + 8] 47 | dec ebx 48 | push ebx 49 | call factorial_rec_cdecl 50 | add esp, 4 51 | mul dword [ebp + 8] 52 | jmp factorial_rec_cdecl_return 53 | factorial_rec_cdecl_base: 54 | mov eax, 1 55 | factorial_rec_cdecl_return: 56 | leave 57 | ret 58 | 59 | ; Non_recursive factorial. cdecl calling convention. 60 | factorial_norec_cdecl: 61 | ; No need for `enter` since we are non-recursive. 62 | ; This does not affect in any way the ccecl calling convertion: 63 | ; enter and leave are invisible from the outside. 64 | mov ecx, [esp + 4] 65 | mov eax, 1 66 | factorial_norec_cdecl_loop: 67 | mul ecx 68 | loop factorial_norec_cdecl_loop 69 | ret 70 | -------------------------------------------------------------------------------- /cdecl.md: -------------------------------------------------------------------------------- 1 | # cdecl 2 | 3 | TODO move in with the code. 4 | 5 | De facto standard C calling convention used by GCC. 6 | 7 | Does not have a formal standard, so different compilers may have incompatible versions of cdecl. 8 | 9 | - return address is put on the stack (usually via `call`/`ret` instructions) 10 | 11 | - arguments are passed on stack 12 | 13 | - always pass `dword` arguments 14 | 15 | If you want to pass a single byte, first convert to dword 16 | 17 | - arguments are put on stack in inverse order from declaration 18 | 19 | This allows for functions with arbitrary numbers of args such as `printf`: 20 | 21 | Like this the format string will always be on the same position (at the end). 22 | 23 | - arguments are not popped before return. 24 | 25 | If you want to use their values, use ESP pointer 26 | 27 | - return value is put in: 28 | 29 | - EAX if integer or pointer. 30 | 31 | If 64-bit, put in edx:eax. 32 | 33 | - ST0 x87 register if floating point. 34 | 35 | - for structures, there is some divergence between different compilers. 36 | 37 | For small structures, some compilers may choose to return the struct on multiple registers such as EAX and EDX. 38 | 39 | For larger structures which do not fit into registers, a common technique is for the compiler to automatically add a hidden pointer parameter to the function and automatically make the caller allocate memory and pass a pointer to that memory. 40 | 41 | - EAX, ECX and EDX may change after the call and must be saved by the caller if he wants to keep those. 42 | 43 | All other registers are guaranteed not to change after the call. They may be temporarily changed inside 44 | 45 | - the caller clears the argument heap 46 | 47 | This generates more code since programs usually make several for each definition, 48 | 49 | This allows for functions with variable number or arguments such as `printf`, because then only the caller knows how many args he put on the stack. 50 | 51 | PASCAL for example follows a function clear argument stack convention. 52 | -------------------------------------------------------------------------------- /compiler-generated/.gitignore: -------------------------------------------------------------------------------- 1 | *.objdump 2 | *.readelf 3 | *.s 4 | 5 | # -fdump-tree-all -fdump-rtl-all -fdump-class-hierarchy 6 | *.alias 7 | *.alignments 8 | *.asmcons 9 | *.barriers 10 | *.bbro 11 | *.bswap 12 | *.ccp1 13 | *.ccp2 14 | *.ccp3 15 | *.cdce 16 | *.cddce1 17 | *.cddce2 18 | *.ce1 19 | *.ce2 20 | *.ce3 21 | *.cfg 22 | *.ch 23 | *.class 24 | *.combine 25 | *.compgotos 26 | *.copyprop1 27 | *.copyprop2 28 | *.copyprop3 29 | *.copyprop4 30 | *.copyprop5 31 | *.copyrename1 32 | *.copyrename2 33 | *.copyrename3 34 | *.copyrename4 35 | *.cplxlower0 36 | *.cplxlower1 37 | *.cprop1 38 | *.cprop2 39 | *.cprop3 40 | *.cprop_hardreg 41 | *.crited1 42 | *.csa 43 | *.cse_local 44 | *.cse1 45 | *.cse2 46 | *.cselim 47 | *.cunroll 48 | *.cunrolli 49 | *.dce1 50 | *.dce2 51 | *.dceloop1 52 | *.dceloop3 53 | *.dfinish 54 | *.dfinit 55 | *.dom1 56 | *.dom2 57 | *.dse1 58 | *.dse2 59 | *.dwarf2 60 | *.ealias 61 | *.early_optimizations 62 | *.eh 63 | *.eh_ranges 64 | *.ehcleanup2 65 | *.ehdisp 66 | *.ehopt 67 | *.einline 68 | *.eipa_sra 69 | *.esra 70 | *.expand 71 | *.fab1 72 | *.final 73 | *.fnsplit 74 | *.forwprop1 75 | *.forwprop2 76 | *.forwprop3 77 | *.forwprop4 78 | *.fre1 79 | *.fre2 80 | *.fwprop1 81 | *.fwprop2 82 | *.gcse2 83 | *.gimple 84 | *.gkd 85 | *.ifcombine 86 | *.ifcvt 87 | *.init-regs 88 | *.inline_param1 89 | *.inline_param2 90 | *.into_cfglayout 91 | *.ira 92 | *.ivcanon 93 | *.ivopts 94 | *.jump 95 | *.jump2 96 | *.ldist 97 | *.lim1 98 | *.lim3 99 | *.local-pure-const1 100 | *.local-pure-const2 101 | *.loop 102 | *.loop2 103 | *.loop2_done 104 | *.loop2_init 105 | *.loop2_invariant 106 | *.loop2_unswitch 107 | *.loopdone 108 | *.loopinit 109 | *.lower 110 | *.mach 111 | *.mergephi1 112 | *.mergephi2 113 | *.mode_sw 114 | *.nothrow 115 | *.nrv 116 | *.objsz1 117 | *.omplower 118 | *.optimized 119 | *.original 120 | *.outof_cfglayout 121 | *.pcom 122 | *.peephole2 123 | *.phicprop1 124 | *.phicprop2 125 | *.phiopt1 126 | *.phiopt2 127 | *.phiopt3 128 | *.phiprop 129 | *.postreload 130 | *.pre 131 | *.pro_and_epilogue 132 | *.profile_estimate 133 | *.reassoc1 134 | *.reassoc2 135 | *.ree 136 | *.reginfo 137 | *.regmove 138 | *.release_ssa 139 | *.reload 140 | *.resx 141 | *.retslot 142 | *.rtl_dce 143 | *.sccp 144 | *.sched2 145 | *.shorten 146 | *.sincos 147 | *.sink 148 | *.slp 149 | *.slsr 150 | *.split1 151 | *.split2 152 | *.split4 153 | *.sra 154 | *.ssa 155 | *.stack 156 | *.statistics 157 | *.strlen 158 | *.subreg1 159 | *.subreg2 160 | *.switchconv 161 | *.tailc 162 | *.tailr1 163 | *.tailr2 164 | *.tu 165 | *.ud_dce 166 | *.uncprop1 167 | *.uninit1 168 | *.unswitch 169 | *.vartrack 170 | *.veclower 171 | *.veclower21 172 | *.vect 173 | *.vregs 174 | *.vrp1 175 | *.vrp2 176 | *.widening_mul 177 | -------------------------------------------------------------------------------- /compiler-generated/README.md: -------------------------------------------------------------------------------- 1 | # Compiler generated 2 | 3 | Let's see what our compilers are compiling to. 4 | 5 | 1. General compiler remarks 6 | 1. [GCC](gcc.md) 7 | 1. C 8 | 1. [hello_world.c](hello_world.c) 9 | 1. Functions 10 | 1. [function_int_int_int.c](function_int_int_int.c) 11 | 1. [return_struct.c](return_struct.c) 12 | 1. switch 13 | 1. [switch3.c](switch3.c) 14 | 1. [switch6.c](switch6.c) 15 | 1. [switch6_spread.c](switch6_spread.c) 16 | 1. [switch6_very_spread.c](switch16_very_spread.c) 17 | 1. [Cast int to long](cast_int_long.c) 18 | 1. Floating point 19 | [float_sum.c](float_sum.c) 20 | [float2int.c](float2int.c) 21 | [sqrt.c](sqrt.c) 22 | 1. Optimization 23 | 1. [for_empty.c](for_empty.c) 24 | 1. [while_infinite.c](while_infinite.c) 25 | 1. [use_rax_return.c](use_rax_return.c) 26 | 1. [restrict.c](restrict.c) 27 | 1. [builtin_no_side_effect.c](builtin_no_side_effect.c) 28 | 1. [builtin_inline.c](builtin_inline.c) 29 | 1. [CSE](cse.c) 30 | 1. [CSE function](cse_function.c) 31 | 1. [CSE large function](cse_large_function.c) 32 | 1. Branch prediction 33 | 1. [__builtin_expect](gcc/builtin_expect.c) 34 | 1. [__builtin_expect off](gcc/builtin_expect_off.c) 35 | 1. [Precalculate loop](precalculate_loop.c) 36 | 1. [Precalculate loop without init](precalculate_loop_without_init.c) 37 | 1. [Bound simplficiation](bound_simplification.c) 38 | 1. C++ 39 | 1. [class](class.cpp) 40 | 1. [template](template.cpp) 41 | 1. [constexpr.cpp](constexpr.cpp) 42 | 1. [atomic.cpp](atomic.cpp) 43 | 1. [this_method.cpp](this_method.cpp) 44 | 1. [virtual.cpp](virtual.cpp) 45 | 1. libc 46 | 1. [memcmp.c](memcmp.c) 47 | 1. ELF sections 48 | 1. [bss.c](bss.c) 49 | 1. [bss_static.c ](bss_static.c) 50 | 1. [common.c](common.c) 51 | 1. [data.c](data.c) 52 | 53 | TODO 54 | 55 | 1. Optimizations 56 | 1. Strength reduction, like multiplication to lea 57 | -------------------------------------------------------------------------------- /compiler-generated/atomic.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | GCC 4.8 compiles to xadd through the __atomic_fetch_add built-in. 3 | */ 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | #if __cplusplus >= 201103L 10 | const int NUM_THREADS = 1000; 11 | const int NUM_ITERS = 1000; 12 | 13 | std::atomic_int global(0); 14 | 15 | void threadMain() { 16 | for (int i = 0; i < NUM_ITERS; ++i) 17 | global++; 18 | } 19 | #endif 20 | 21 | int main() { 22 | #if __cplusplus >= 201103L 23 | std::thread threads[NUM_THREADS]; 24 | int i; 25 | for (i = 0; i < NUM_THREADS; ++i) 26 | threads[i] = std::thread(threadMain); 27 | for (i = 0; i < NUM_THREADS; ++i) 28 | threads[i].join(); 29 | assert(global.load() == NUM_THREADS * NUM_ITERS); 30 | #endif 31 | } 32 | -------------------------------------------------------------------------------- /compiler-generated/block.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main() { 4 | int i = 0; 5 | { 6 | int i = 1; 7 | printf("%d\n", i); 8 | } 9 | return 0; 10 | } 11 | -------------------------------------------------------------------------------- /compiler-generated/bound_simplification.c: -------------------------------------------------------------------------------- 1 | /* 2 | # Module upper bound 3 | */ 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | int main() { 10 | int i = time(NULL); 11 | srand(i); 12 | double d = i; 13 | unsigned int u = i; 14 | unsigned int u01 = i % 2U; 15 | 16 | /* -O3 */ 17 | if (u01 == 2U) exit(0x1); 18 | if (u01 > 1U) exit(0x2); 19 | if (u01 + 1 > 2U) exit(0x3); 20 | if (i > 0) 21 | if (i < 0) 22 | exit(0x4); 23 | if (d > 0.0) 24 | if (d < 0.0) 25 | exit(0x5); 26 | 27 | /* No. */ 28 | if (rand() < 0) 29 | exit(0x6); 30 | if (sin(i) > 1.1) exit(0x80); 31 | if (sqrt(i) < 0.0) exit(0x81); 32 | 33 | return 0; 34 | } 35 | -------------------------------------------------------------------------------- /compiler-generated/bss.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | /* Contrast with the Common and Data section outputs. */ 4 | int my_var = 0; 5 | 6 | int main() { 7 | printf("%d\n", my_var + 1); 8 | return 0; 9 | } 10 | -------------------------------------------------------------------------------- /compiler-generated/bss_static.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | /* 4 | Since this is not visible from the outside because static, 5 | GCC can just ignore the label and inline it everywhere. 6 | */ 7 | static int my_var = 0; 8 | 9 | int main() { 10 | printf("%d\n", my_var + 1); 11 | return 0; 12 | } 13 | -------------------------------------------------------------------------------- /compiler-generated/builtin_inline.c: -------------------------------------------------------------------------------- 1 | /* 2 | Will `abs(-1)` be inlined, or precalculated? 3 | The compiler coud know that it is deterministic and has no side effects. 4 | 5 | GCC 4.8: yes, even at -O0. 6 | */ 7 | 8 | #include 9 | #include 10 | 11 | int main() { 12 | printf("%d\n", abs(-1)); 13 | printf("%d\n", abs(-1)); 14 | return 0; 15 | } 16 | -------------------------------------------------------------------------------- /compiler-generated/builtin_no_side_effect.c: -------------------------------------------------------------------------------- 1 | /* 2 | Will `abs(-1)` be optimized away? 3 | The compiler must know that it has no internal side effects. 4 | 5 | GCC 4.8: yes, even at -O0. 6 | */ 7 | 8 | #include 9 | #include 10 | 11 | int main() { 12 | abs(-1); 13 | return 0; 14 | } 15 | -------------------------------------------------------------------------------- /compiler-generated/callee_saved_registers.c: -------------------------------------------------------------------------------- 1 | /* https://stackoverflow.com/questions/18024672/what-registers-are-preserved-through-a-linux-x86-64-function-call/55207335#55207335 */ 2 | 3 | #include 4 | 5 | uint64_t inc(uint64_t i) { 6 | __asm__ __volatile__( 7 | "" 8 | : "+m" (i) 9 | : 10 | : "rax", 11 | "rbx", 12 | "rcx", 13 | "rdx", 14 | "rsi", 15 | "rdi", 16 | /* GCC 8.2: compiles in -O3, fails in -O0 with: 17 | * error: bp cannot be used in asm here 18 | */ 19 | /*"rbp",*/ 20 | "rsp", 21 | "r8", 22 | "r9", 23 | "r10", 24 | "r11", 25 | "r12", 26 | "r13", 27 | "r14", 28 | "r15", 29 | "ymm0", 30 | "ymm1", 31 | "ymm2", 32 | "ymm3", 33 | "ymm4", 34 | "ymm5", 35 | "ymm6", 36 | "ymm7", 37 | "ymm8", 38 | "ymm9", 39 | "ymm10", 40 | "ymm11", 41 | "ymm12", 42 | "ymm13", 43 | "ymm14", 44 | "ymm15" 45 | ); 46 | return i + 1; 47 | } 48 | 49 | int main(int argc, char **argv) { 50 | (void)argv; 51 | return inc(argc); 52 | } 53 | -------------------------------------------------------------------------------- /compiler-generated/cast_int_long.c: -------------------------------------------------------------------------------- 1 | /* 2 | # Cast int to long. 3 | 4 | GCC 4.8 -O0: CDQE: http://stackoverflow.com/questions/6555094/what-does-cltq-do-in-assembly/31114310#31114310 5 | GCC 4.8 -O3: MOVSX, which extends and moves in one go: http://stackoverflow.com/questions/7861095/what-does-movsbl-instruction-do 6 | */ 7 | 8 | #include 9 | #include 10 | 11 | int main() { 12 | int i; 13 | long l; 14 | i = time(NULL); 15 | l = i; 16 | printf("%ld", l); 17 | return 0; 18 | } 19 | -------------------------------------------------------------------------------- /compiler-generated/char_array_init.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main() { 4 | /* Moved stack. */ 5 | char s[] = "abc"; 6 | printf("%s\n", s); 7 | return 0; 8 | } 9 | -------------------------------------------------------------------------------- /compiler-generated/char_array_init_global.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | /* .data */ 4 | char s[] = "abc"; 5 | 6 | int main() { 7 | printf("%s\n", s); 8 | return 0; 9 | } 10 | -------------------------------------------------------------------------------- /compiler-generated/char_array_init_long.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main() { 4 | /* Multiple movs to stack. */ 5 | char s[] = "abcdefghiabcdefghiabcdefghiabcdefghiabcdefghiabcdefghiabcdefghiabcdefghiabcdefghiabcdefghi"; 6 | printf("%s\n", s); 7 | return 0; 8 | } 9 | -------------------------------------------------------------------------------- /compiler-generated/char_pointer_init.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main() { 4 | char *s = "abc"; 5 | printf("%s\n", s); 6 | return 0; 7 | } 8 | -------------------------------------------------------------------------------- /compiler-generated/char_pointer_init_global.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | /* .data */ 4 | char *s = "abc"; 5 | 6 | int main() { 7 | printf("%s\n", s); 8 | return 0; 9 | } 10 | -------------------------------------------------------------------------------- /compiler-generated/class.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Basic class concepts: 3 | 4 | - constructor 5 | - field 6 | - method 7 | */ 8 | 9 | #include 10 | #include 11 | 12 | class C { 13 | public: 14 | int i; 15 | 16 | C(int i) : i(i) {} 17 | 18 | /* 19 | this is passed as an extra argument. 20 | 21 | Like for struct, `this` points directly to the class data. 22 | */ 23 | int f(int j) { 24 | int r = std::time(NULL); 25 | r += i; 26 | r += j; 27 | return r; 28 | } 29 | }; 30 | 31 | int main() { 32 | /* 33 | Passes a hidden argument to this which the constructor initializes. 34 | 35 | Just like returning struct from a function in C. 36 | */ 37 | C c(1); 38 | int i = c.f(2); 39 | std::cout << i << std::endl; 40 | } 41 | -------------------------------------------------------------------------------- /compiler-generated/common.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | /* 4 | Goes to `.common` on the `.o`, and `.bss` on the `.out` 5 | if not defined in any other file linked to (the case here). 6 | 7 | GCC 4.8 does not use weak symbols by default. TODO why? 8 | 9 | `.common` seems to be treated magically by the linker. 10 | */ 11 | int my_var; 12 | 13 | int main() { 14 | my_var = 0; 15 | printf("%d\n", my_var + 1); 16 | return 0; 17 | } 18 | -------------------------------------------------------------------------------- /compiler-generated/common_large.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | /* 4 | Does not occupy space on the binaries, only when the program loads: 5 | http://stackoverflow.com/questions/610682/do-bss-section-zero-initialized-variables-occupy-space-in-elf-file 6 | */ 7 | int my_var[1000000]; 8 | 9 | int main() { 10 | my_var[0] = 1; 11 | printf("%d\n", my_var[0]); 12 | return 0; 13 | } 14 | -------------------------------------------------------------------------------- /compiler-generated/const.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | /* Goes to .rodata. */ 4 | const int my_var = 1; 5 | 6 | int main() { 7 | /* Gets inlined. */ 8 | printf("%d\n", my_var); 9 | return 0; 10 | } 11 | -------------------------------------------------------------------------------- /compiler-generated/const_cast.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | const int my_var = 1; 4 | 5 | int main() { 6 | int* ip = (int*)&my_var; 7 | /* 8 | Segfaults, since my_var is in rodata. 9 | 10 | Fine since this is C undefined behaviour. 11 | */ 12 | *ip = 2; 13 | return 0; 14 | } 15 | -------------------------------------------------------------------------------- /compiler-generated/constexpr.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | /* .rodata, not SHN_ABS. */ 4 | constexpr int my_var = 1; 5 | 6 | int main() { 7 | std::cout << my_var << std::endl; 8 | } 9 | -------------------------------------------------------------------------------- /compiler-generated/cse.c: -------------------------------------------------------------------------------- 1 | /* 2 | # CSE 3 | 4 | # Common subexpression elimination 5 | 6 | https://en.wikipedia.org/wiki/Common_subexpression_elimination 7 | 8 | Will `a * b` be calculated only once? 9 | 10 | GCC 4.8: yes, at `-O3`, no at `-O0`. 11 | */ 12 | 13 | #include 14 | #include 15 | #include 16 | 17 | int main() { 18 | int a, b; 19 | a = time(NULL); 20 | b = time(NULL); 21 | printf("%d\n", a * b + 1); 22 | printf("%d\n", a * b * 2); 23 | return 0; 24 | } 25 | -------------------------------------------------------------------------------- /compiler-generated/cse_function.c: -------------------------------------------------------------------------------- 1 | /* 2 | # CSE function 3 | 4 | # Common subexpression elimination function 5 | 6 | http://stackoverflow.com/questions/1982131/is-loop-hoisting-still-a-valid-manual-optimization-for-c-code 7 | 8 | Will `f(a)` be calculated only once? 9 | 10 | The compiler must prove that the function has no side effects. 11 | 12 | In GCC, the attribute `const` tells that to the compiler, but it alone does not guarantee the hoisting: 13 | you might also have to play with the hoisting parameters if the function is complex. 14 | 15 | GCC 4.8: yes, at `-O3`, no at `-O0`. 16 | */ 17 | 18 | #include 19 | #include 20 | #include 21 | 22 | int f(int x) { 23 | return x * x; 24 | } 25 | 26 | int main() { 27 | int a; 28 | a = time(NULL); 29 | printf("%d\n", f(a) + 1); 30 | printf("%d\n", f(a) * 2); 31 | return 0; 32 | } 33 | -------------------------------------------------------------------------------- /compiler-generated/cse_large_function.c: -------------------------------------------------------------------------------- 1 | /* 2 | # CSE large function 3 | Why can't GCC 4.8 -O3 with vamped up CSE `--param`s not factorize 4 | the expensive `contPrimesLessThan` call? 5 | 6 | Origin: http://stackoverflow.com/a/1983492/895245 7 | */ 8 | 9 | #include 10 | 11 | /*int isPrime(int) __attribute__((const));*/ 12 | int isPrime(int p) { 13 | for (int i = 2; i*i <= p; ++i) { 14 | if ((p % i) == 0) return 0; 15 | } 16 | return 1; 17 | } 18 | 19 | /*int countPrimesLessThan(int) __attribute__((const));*/ 20 | int countPrimesLessThan(int max) { 21 | int count = 0; 22 | for (int i = 2; i < max; ++i) { 23 | if (isPrime(i)) ++count; 24 | } 25 | return count; 26 | } 27 | 28 | int main() { 29 | for (int i = 0; i < 5; ++i) { 30 | printf("%d\n", countPrimesLessThan(1000*1000)); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /compiler-generated/data.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | /* If this were 0, it would fall on the BSS. */ 4 | int my_var = 1; 5 | 6 | int main() { 7 | printf("%d\n", my_var + 1); 8 | return 0; 9 | } 10 | -------------------------------------------------------------------------------- /compiler-generated/double_assign_constant.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main() { 4 | double f; 5 | f = 0.0; 6 | f = 1.0; 7 | printf("%f\n", f); 8 | return 0; 9 | } 10 | -------------------------------------------------------------------------------- /compiler-generated/double_sum.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main() { 5 | double f; 6 | f = (double)(time(NULL) % 3); 7 | /* 8 | GCC 4.8: 9 | 10 | - fpmath=sse: addsd (default) 11 | - fpmath=387: faddp 12 | */ 13 | f += 0.5; 14 | printf("%f\n", f); 15 | return 0; 16 | } 17 | -------------------------------------------------------------------------------- /compiler-generated/enum.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | /* 4 | enums in C need not be represented in object files, and are omited from them. 5 | 6 | To use an enum across files, you must put it in a header file and include. 7 | */ 8 | enum my_enum { 9 | my_enum_a = 0, 10 | my_enum_b = 1 11 | }; 12 | 13 | int main() { 14 | printf("%d\n", my_enum_a); 15 | return 0; 16 | } 17 | -------------------------------------------------------------------------------- /compiler-generated/float2int.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main() { 5 | int i; 6 | float f; 7 | f = (float)(time(NULL) % 3) + 0.5; 8 | /* 9 | - sse: cvttss2si 10 | */ 11 | i = (int)f; 12 | printf("%d\n", i); 13 | return 0; 14 | } 15 | -------------------------------------------------------------------------------- /compiler-generated/float2int_direct_vs_variable.c: -------------------------------------------------------------------------------- 1 | /* 2 | http://stackoverflow.com/questions/17220787/c-casting-from-double-to-int 3 | 4 | Don't reproduce him. 5 | */ 6 | 7 | #include 8 | #include 9 | 10 | int main() { 11 | int a; 12 | int d; 13 | double b = 0.41; 14 | 15 | /* Cast from variable. */ 16 | double c = b * 100.0; 17 | a = (int)(c); 18 | 19 | /* Cast expression directly. */ 20 | d = (int)(b * 100.0); 21 | 22 | printf("c = %f \n", c); 23 | printf("a = %d \n", a); 24 | printf("d = %d \n", d); 25 | 26 | return 0; 27 | } 28 | -------------------------------------------------------------------------------- /compiler-generated/float_assign_constant.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main() { 4 | float f; 5 | /* 6 | GCC 4.8: Unlike ints, float literals are stored in memory. 7 | TODO why? double just uses immediates. To save space because 8 | not possible to have 4 byte immediates? 9 | */ 10 | f = 0.0f; 11 | f = 1.0f; 12 | printf("%f\n", f); 13 | return 0; 14 | } 15 | -------------------------------------------------------------------------------- /compiler-generated/float_sum.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main() { 5 | float f; 6 | f = (float)(time(NULL) % 3); 7 | /* 8 | GCC 4.8: 9 | 10 | - fpmath=sse: addss (default) 11 | - fpmath=fpu: faddp 12 | */ 13 | f += 0.5f; 14 | printf("%f\n", f); 15 | return 0; 16 | } 17 | -------------------------------------------------------------------------------- /compiler-generated/for_empty.c: -------------------------------------------------------------------------------- 1 | /* 2 | # for empty 3 | 4 | GCC 4.8 -O0: stays 5 | GCC 4.8 -O3: optimized away 6 | */ 7 | 8 | int main() { 9 | int i; 10 | for (i = 0; i < 16; i++) 11 | ; 12 | return 0; 13 | } 14 | -------------------------------------------------------------------------------- /compiler-generated/function_call_another.c: -------------------------------------------------------------------------------- 1 | /* 2 | A function that calls another. 3 | */ 4 | 5 | #include 6 | #include 7 | 8 | int func1(int i, int j, int k) { 9 | int l; 10 | l = i + j + k; 11 | return l; 12 | } 13 | 14 | /* 15 | This function calls another function. 16 | 17 | In rsp must be moved to: 18 | 19 | - store all arguments and local variables. 20 | - align to 0x10 in x86-64 21 | */ 22 | int func0(int i, int j, int k) { 23 | int l; 24 | l = func1(i, j, k); 25 | return l; 26 | } 27 | 28 | int main() { 29 | printf("%d\n", func0(1, 2, 3)); 30 | return 0; 31 | } 32 | -------------------------------------------------------------------------------- /compiler-generated/function_int_int_int.c: -------------------------------------------------------------------------------- 1 | /* 2 | Simple function that takes two ints and returns a third. 3 | */ 4 | 5 | #include 6 | 7 | /* 8 | GCC 4.8 -O3 reduces this to a single lea. 9 | 10 | At `-O0`, when there is no function call inside this function, `rsp` is not changed. 11 | Such function is called a leaf function. 12 | 13 | TODO why does the first variable go to `rbp - 0x14` instead of `rbp - 0x4`? 14 | */ 15 | int func0(int i, int j) { 16 | int k; 17 | k = i + j; 18 | return k; 19 | } 20 | 21 | int main() { 22 | /* 23 | GCC 4.8 -O3 resolves the function call into a constant. 24 | */ 25 | printf("%d\n", func0(1, 2)); 26 | return 0; 27 | } 28 | -------------------------------------------------------------------------------- /compiler-generated/function_many_arguments.c: -------------------------------------------------------------------------------- 1 | /* Let's force some stack arguments. */ 2 | 3 | /* for i in `seq 0 3`; do echo "int i_$((i*4 + 0)), int i_$((i*4 + 1)), int i_$((i*4 + 2)), int i_$((i*4 + 3)),"; done */ 4 | int func( 5 | int i_0, int i_1, int i_2, int i_3, 6 | int i_4, int i_5, int i_6, int i_7, 7 | int i_8, int i_9, int i_10, int i_11, 8 | int i_12, int i_13, int i_14, int i_15 9 | ) { 10 | return 11 | /* for i in `seq 0 3`; do echo "i_$((i*4 + 0)) + i_$((i*4 + 1)) + i_$((i*4 + 2)) + i_$((i*4 + 3)) +"; done */ 12 | i_0 + i_1 + i_2 + i_3 + 13 | i_4 + i_5 + i_6 + i_7 + 14 | i_8 + i_9 + i_10 + i_11 + 15 | i_12 + i_13 + i_14 + i_15 16 | ; 17 | } 18 | 19 | int main(int argc, char **argv) { 20 | (void)argv; 21 | return func( 22 | argc, argc, argc, argc, 23 | argc, argc, argc, argc, 24 | argc, argc, argc, argc, 25 | argc, argc, argc, argc 26 | ); 27 | } 28 | -------------------------------------------------------------------------------- /compiler-generated/function_many_arguments_2calls.c: -------------------------------------------------------------------------------- 1 | /* Trying to force out tail call optimization. */ 2 | 3 | int func2( 4 | int i_0, int i_1, int i_2, int i_3, 5 | int i_4, int i_5, int i_6, int i_7, 6 | int i_8, int i_9, int i_10, int i_11, 7 | int i_12, int i_13, int i_14, int i_15 8 | ) { 9 | return 10 | i_0 + i_1 + i_2 + i_3 + 11 | i_4 + i_5 + i_6 + i_7 + 12 | i_8 + i_9 + i_10 + i_11 + 13 | i_12 + i_13 + i_14 + i_15 14 | ; 15 | } 16 | 17 | int func( 18 | int i_0, int i_1, int i_2, int i_3, 19 | int i_4, int i_5, int i_6, int i_7, 20 | int i_8, int i_9, int i_10, int i_11, 21 | int i_12, int i_13, int i_14, int i_15 22 | ) { 23 | return func2( 24 | i_12 , i_13 , i_14 , i_15 , 25 | i_8 , i_9 , i_10 , i_11 , 26 | i_4 , i_5 , i_6 , i_7 , 27 | i_0 , i_1 , i_2 , i_3 28 | ); 29 | } 30 | 31 | int main(int argc, char **argv) { 32 | (void)argv; 33 | return func( 34 | argc, argc, argc, argc, 35 | argc, argc, argc, argc, 36 | argc, argc, argc, argc, 37 | argc, argc, argc, argc 38 | ); 39 | } 40 | -------------------------------------------------------------------------------- /compiler-generated/function_many_arguments_64.c: -------------------------------------------------------------------------------- 1 | /* Now with 64 arguments, trying to break the red zone and force the stack to move. 2 | * But TODO why: fails in GCC 8.2, stack is still fixed. 3 | */ 4 | 5 | /* for i in `seq 0 15`; do echo "int i_$((i*4 + 0)), int i_$((i*4 + 1)), int i_$((i*4 + 2)), int i_$((i*4 + 3)),"; done */ 6 | int func( 7 | int i_0, int i_1, int i_2, int i_3, 8 | int i_4, int i_5, int i_6, int i_7, 9 | int i_8, int i_9, int i_10, int i_11, 10 | int i_12, int i_13, int i_14, int i_15, 11 | 12 | int i_16, int i_17, int i_18, int i_19, 13 | int i_20, int i_21, int i_22, int i_23, 14 | int i_24, int i_25, int i_26, int i_27, 15 | int i_28, int i_29, int i_30, int i_31, 16 | 17 | int i_32, int i_33, int i_34, int i_35, 18 | int i_36, int i_37, int i_38, int i_39, 19 | int i_40, int i_41, int i_42, int i_43, 20 | int i_44, int i_45, int i_46, int i_47, 21 | 22 | int i_48, int i_49, int i_50, int i_51, 23 | int i_52, int i_53, int i_54, int i_55, 24 | int i_56, int i_57, int i_58, int i_59, 25 | int i_60, int i_61, int i_62, int i_63 26 | ) { 27 | return 28 | /* for i in `seq 0 15`; do echo "i_$((i*4 + 0)) + i_$((i*4 + 1)) + i_$((i*4 + 2)) + i_$((i*4 + 3)) +"; done */ 29 | i_0 + i_1 + i_2 + i_3 + 30 | i_4 + i_5 + i_6 + i_7 + 31 | i_8 + i_9 + i_10 + i_11 + 32 | i_12 + i_13 + i_14 + i_15 + 33 | 34 | i_16 + i_17 + i_18 + i_19 + 35 | i_20 + i_21 + i_22 + i_23 + 36 | i_24 + i_25 + i_26 + i_27 + 37 | i_28 + i_29 + i_30 + i_31 + 38 | 39 | i_32 + i_33 + i_34 + i_35 + 40 | i_36 + i_37 + i_38 + i_39 + 41 | i_40 + i_41 + i_42 + i_43 + 42 | i_44 + i_45 + i_46 + i_47 + 43 | 44 | i_48 + i_49 + i_50 + i_51 + 45 | i_52 + i_53 + i_54 + i_55 + 46 | i_56 + i_57 + i_58 + i_59 + 47 | i_60 + i_61 + i_62 + i_63 48 | ; 49 | } 50 | 51 | int main(int argc, char **argv) { 52 | (void)argv; 53 | return func( 54 | argc, argc, argc, argc, 55 | argc, argc, argc, argc, 56 | argc, argc, argc, argc, 57 | argc, argc, argc, argc, 58 | 59 | argc, argc, argc, argc, 60 | argc, argc, argc, argc, 61 | argc, argc, argc, argc, 62 | argc, argc, argc, argc, 63 | 64 | argc, argc, argc, argc, 65 | argc, argc, argc, argc, 66 | argc, argc, argc, argc, 67 | argc, argc, argc, argc, 68 | 69 | argc, argc, argc, argc, 70 | argc, argc, argc, argc, 71 | argc, argc, argc, argc, 72 | argc, argc, argc, argc 73 | ); 74 | } 75 | -------------------------------------------------------------------------------- /compiler-generated/gcc.md: -------------------------------------------------------------------------------- 1 | # GCC 2 | 3 | Tested with: GCC 4.8. 4 | 5 | ## Optimizations 6 | 7 | Without any flags, GCC will do several minor optimizations that make the code hard to read for beginners. Don't be afraid :-) 8 | 9 | Common ones: 10 | 11 | - `lea`: `add` and `mov` 12 | - `xor eax, eax`: `mov $0` 13 | - function inlining 14 | - RIP addressing as it is shorter 15 | 16 | ## Label names 17 | 18 | 19 | 20 | ## Optimizations 21 | 22 | - `RBP` optimized out `-fomit-frame-pointer` 23 | 24 | - operate on the correct register for the return value 25 | 26 | int f(i, j) { return i + j; } 27 | 28 | is doable in a single `lea`. 29 | -------------------------------------------------------------------------------- /compiler-generated/gcc/Makefile: -------------------------------------------------------------------------------- 1 | ../Makefile -------------------------------------------------------------------------------- /compiler-generated/gcc/Makefile_params: -------------------------------------------------------------------------------- 1 | STD := gnu11 2 | -------------------------------------------------------------------------------- /compiler-generated/gcc/builtin_expect.c: -------------------------------------------------------------------------------- 1 | #include "stdio.h" 2 | #include "time.h" 3 | 4 | int main() { 5 | int i; 6 | i = !time(NULL); 7 | if (__builtin_expect(i, 0)) 8 | puts("a"); 9 | return 0; 10 | } 11 | -------------------------------------------------------------------------------- /compiler-generated/gcc/builtin_expect_off.c: -------------------------------------------------------------------------------- 1 | #include "stdio.h" 2 | #include "time.h" 3 | 4 | int main() { 5 | int i; 6 | i = !time(NULL); 7 | if (i) 8 | puts("a"); 9 | return 0; 10 | } 11 | -------------------------------------------------------------------------------- /compiler-generated/gcc/interactive/Makefile: -------------------------------------------------------------------------------- 1 | ../Makefile -------------------------------------------------------------------------------- /compiler-generated/gcc/interactive/builtin_expect_off_perf.c: -------------------------------------------------------------------------------- 1 | /* https://stackoverflow.com/questions/7346929/what-is-the-advantage-of-gccs-builtin-expect-in-if-else-statements/31540623#31540623 */ 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | int main() { 8 | enum N { N = 8 }; 9 | unsigned int i, x[N], y; 10 | srand(time(NULL)); 11 | y = rand(); 12 | for (i = 0; i < N; i++) { 13 | x[i] = rand() & 4; 14 | } 15 | for (i = 0; i < 100000000; i++) { 16 | if (!x[0]) y++; 17 | if (x[1]) y++; 18 | if (x[2]) y++; 19 | if (!x[3]) y++; 20 | if (!x[4]) y++; 21 | if (x[5]) y++; 22 | if (!x[6]) y++; 23 | if (x[7]) y++; 24 | y = 2*y*y + 3*y + 5; 25 | } 26 | printf("%u\n", y); 27 | return 0; 28 | } 29 | -------------------------------------------------------------------------------- /compiler-generated/gcc/interactive/builtin_expect_perf.c: -------------------------------------------------------------------------------- 1 | /* https://stackoverflow.com/questions/7346929/what-is-the-advantage-of-gccs-builtin-expect-in-if-else-statements/31540623#31540623 */ 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | int main() { 8 | enum N { N = 8 }; 9 | unsigned int i, x[N], y; 10 | srand(time(NULL)); 11 | y = rand(); 12 | for (i = 0; i < N; i++) { 13 | x[i] = rand() & 4; 14 | } 15 | for (i = 0; i < 100000000; i++) { 16 | if (__builtin_expect( !x[0], 0)) y++; 17 | if (__builtin_expect(!!x[1], 1)) y++; 18 | if (__builtin_expect(!!x[2], 1)) y++; 19 | if (__builtin_expect( !x[3], 0)) y++; 20 | if (__builtin_expect( !x[4], 0)) y++; 21 | if (__builtin_expect(!!x[5], 1)) y++; 22 | if (__builtin_expect( !x[6], 0)) y++; 23 | if (__builtin_expect(!!x[7], 1)) y++; 24 | y = 2*y*y + 3*y + 5; 25 | } 26 | printf("%u\n", y); 27 | return 0; 28 | } 29 | -------------------------------------------------------------------------------- /compiler-generated/gcc/restrict.cpp: -------------------------------------------------------------------------------- 1 | void f(int *a, int *b, int *x) { 2 | *a += *x; 3 | *b += *x; 4 | } 5 | 6 | void fr(int *__restrict__ a, int *__restrict__ b, int *__restrict__ x) { 7 | *a += *x; 8 | *b += *x; 9 | } 10 | 11 | int main() {} 12 | -------------------------------------------------------------------------------- /compiler-generated/global_int.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int myint = 0x1234; 4 | 5 | int main() { 6 | return 0; 7 | } 8 | -------------------------------------------------------------------------------- /compiler-generated/hello_world.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main() { 4 | /* 5 | GCC 4.8 uses __printf_chk by default, 6 | which is part of the LSB and safer than printf: 7 | http://refspecs.linuxbase.org/LSB_4.1.0/LSB-Core-generic/LSB-Core-generic/libc---printf-chk-1.html 8 | */ 9 | puts("Hello world!"); 10 | return 0; 11 | } 12 | -------------------------------------------------------------------------------- /compiler-generated/if.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main() { 5 | if ((int)time(NULL)) { 6 | puts("1"); 7 | } else { 8 | puts("0"); 9 | } 10 | return 0; 11 | } 12 | -------------------------------------------------------------------------------- /compiler-generated/int_assign_constant.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main() { 4 | int i; 5 | /* Immediates */ 6 | i = 0; 7 | i = 1; 8 | printf("%d\n", i); 9 | return 0; 10 | } 11 | -------------------------------------------------------------------------------- /compiler-generated/ld.c: -------------------------------------------------------------------------------- 1 | /* Stuff that ld might define. */ 2 | 3 | #include 4 | 5 | #define F(x) extern char x; \ 6 | printf(#x " = %p\n", &x); 7 | 8 | int main() { 9 | F(etext) 10 | F(edata) 11 | F(end) 12 | F(__data_start) 13 | F(_GLOBAL_OFFSET_TABLE_) 14 | return 0; 15 | } 16 | -------------------------------------------------------------------------------- /compiler-generated/long_double_assign_constant.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main() { 4 | long double f; 5 | /* Uses rax and rdx for the long double. */ 6 | f = 0.0L; 7 | f = 1.0L; 8 | printf("%Lf\n", f); 9 | return 0; 10 | } 11 | -------------------------------------------------------------------------------- /compiler-generated/long_double_sum.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main() { 5 | long double f; 6 | f = (long double)(time(NULL) % 3); 7 | /* 8 | GCC 4.8: faddp by default. Constrast wth double which uses SSE. 9 | 10 | Reason: FPU has the 80-bit operations which GCC uses for `long double`. 11 | There is no 128-bit instrution in SSE. 12 | */ 13 | f += 0.5L; 14 | printf("%Lf\n", f); 15 | return 0; 16 | } 17 | -------------------------------------------------------------------------------- /compiler-generated/memcmp.c: -------------------------------------------------------------------------------- 1 | /* 2 | # memcmp 3 | 4 | GCC 4.8 calls glibc always. 5 | 6 | glibc has lengthy SIMD implementations, so it is not viable to inline it. 7 | 8 | http://stackoverflow.com/questions/21106801/why-is-memcmp-so-much-faster-than-a-for-loop-check 9 | */ 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | int main() { 18 | size_t i; 19 | int out; 20 | enum N { N = 2 }; 21 | char s[N]; 22 | char s2[N]; 23 | srand(time(NULL)); 24 | for (i = 0; i < N; ++i) { 25 | s[i] = rand() % SCHAR_MAX; 26 | s2[i] = rand() % SCHAR_MAX; 27 | } 28 | out = memcmp(s, s2, N); 29 | printf("%d\n", out == 0); 30 | return 0; 31 | } 32 | -------------------------------------------------------------------------------- /compiler-generated/modulo.c: -------------------------------------------------------------------------------- 1 | /* 2 | GCC 4.8 uses a complicated set of instructions to avoid div which is a very slow instruction. 3 | 4 | http://stackoverflow.com/questions/4361979/how-does-the-gcc-implementation-of-module-work-and-why-does-it-not-use-the 5 | */ 6 | 7 | #include 8 | #include 9 | 10 | int main() { 11 | int i = (int)(time(NULL) % 3); 12 | printf("%d\n", i); 13 | return 0; 14 | } 15 | -------------------------------------------------------------------------------- /compiler-generated/precalculate_loop.c: -------------------------------------------------------------------------------- 1 | /* 2 | # Precalculate loop 3 | 4 | Will the compiler precalculate a loop without side effects, and with deterministic inputs? 5 | 6 | GCC 4.8 -O3: yes. Without this optimization, it takes forever. 7 | 8 | TODO what is the name for this? Is it related to loop unroling? 9 | */ 10 | 11 | #include 12 | 13 | int main() { 14 | unsigned long i; 15 | for (i = 0; i < 0xFFFFFFFFFFFFFFFFu; i++) 16 | ; 17 | printf("%lx\n", i); 18 | return 0; 19 | } 20 | -------------------------------------------------------------------------------- /compiler-generated/precalculate_loop_unknown_init.c: -------------------------------------------------------------------------------- 1 | /* 2 | # Precalculate loop 3 | 4 | Yup. 5 | */ 6 | 7 | #include 8 | #include 9 | 10 | int main() { 11 | unsigned long i, j; 12 | j = time(NULL); 13 | for (i = 0; i < 0xFFFFFFFFFFFFFFFFu; i++, j++) 14 | ; 15 | printf("%lx\n", j); 16 | return 0; 17 | } 18 | -------------------------------------------------------------------------------- /compiler-generated/register.c: -------------------------------------------------------------------------------- 1 | /* TODO unable to see any difference with or without register. */ 2 | 3 | #include 4 | 5 | int f() { 6 | int i = 0; 7 | /* 8 | TODO: GCC 4.8 -O3 seems to ignore the possibility that this might change i, 9 | and always returns 0. 10 | 11 | So we don't observe any difference when using `register`. 12 | */ 13 | (*((int*)time(NULL)))++; 14 | return i; 15 | } 16 | 17 | int g() { 18 | register int i = 0; 19 | (*((int*)time(NULL)))++; 20 | return i; 21 | } 22 | 23 | int main() { 24 | return 0; 25 | } 26 | -------------------------------------------------------------------------------- /compiler-generated/restrict.c: -------------------------------------------------------------------------------- 1 | void f(int *a, int *b, int *x) { 2 | *a += *x; 3 | *b += *x; 4 | } 5 | 6 | void fr(int *restrict a, int *restrict b, int *restrict x) { 7 | *a += *x; 8 | *b += *x; 9 | } 10 | 11 | int main() { 12 | return 0; 13 | } 14 | -------------------------------------------------------------------------------- /compiler-generated/return_struct.c: -------------------------------------------------------------------------------- 1 | /* 2 | Return values normally go on `rax`. 3 | 4 | But what happens when returning a large struct that does not fit into registers? 5 | */ 6 | 7 | #include 8 | #include 9 | 10 | struct S { 11 | int i0; 12 | int i1; 13 | int i2; 14 | int i3; 15 | int i4; 16 | int i5; 17 | int i6; 18 | int i7; 19 | int i8; 20 | int i9; 21 | int i10; 22 | int i11; 23 | int i12; 24 | int i13; 25 | int i14; 26 | int i15; 27 | }; 28 | 29 | struct S f() { 30 | struct S s = { 31 | time(NULL), time(NULL), time(NULL), time(NULL), 32 | time(NULL), time(NULL), time(NULL), time(NULL), 33 | time(NULL), time(NULL), time(NULL), time(NULL), 34 | time(NULL), time(NULL), time(NULL), time(NULL) 35 | }; 36 | return s; 37 | } 38 | 39 | int main() { 40 | /* 41 | GCC 4.8 allocates stack space and passes a pointer to it as a hidden parameter. 42 | 43 | f() then initializes that parameter's location. 44 | */ 45 | struct S s = f(); 46 | int n = 47 | s.i0 + 48 | s.i1 + 49 | s.i2 + 50 | s.i3 + 51 | s.i4 + 52 | s.i5 + 53 | s.i6 + 54 | s.i7 + 55 | s.i8 + 56 | s.i9 + 57 | s.i10 + 58 | s.i11 + 59 | s.i12 + 60 | s.i13 + 61 | s.i14 + 62 | s.i15; 63 | printf("%d\n", n); 64 | return 0; 65 | } 66 | -------------------------------------------------------------------------------- /compiler-generated/sqrt.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | int main() { 6 | float f; 7 | f = time(NULL) % 3; 8 | /* Calls glibc, which uses `sqrtsd` */ 9 | f = sqrt(f); 10 | printf("%f\n", f); 11 | return 0; 12 | } 13 | -------------------------------------------------------------------------------- /compiler-generated/static_local.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int f() { 4 | static int i = 1; 5 | i++; 6 | return i; 7 | } 8 | 9 | int main() { 10 | printf("%d\n", f()); 11 | printf("%d\n", f()); 12 | return 0; 13 | } 14 | -------------------------------------------------------------------------------- /compiler-generated/switch3.c: -------------------------------------------------------------------------------- 1 | /* 2 | GCC 4.8: if else 3 | */ 4 | 5 | #include 6 | #include 7 | 8 | int main() { 9 | int i, j; 10 | i = time(NULL) % 3; 11 | switch (i) { 12 | case 0: 13 | j = 0; 14 | break; 15 | case 1: 16 | j = 1; 17 | break; 18 | case 2: 19 | j = 2; 20 | break; 21 | }; 22 | printf("%d\n", j); 23 | return 0; 24 | } 25 | -------------------------------------------------------------------------------- /compiler-generated/switch6.c: -------------------------------------------------------------------------------- 1 | /* 2 | GCC 4.8: jump table 3 | 4 | - https://en.wikipedia.org/wiki/Indirect_branch 5 | - http://stackoverflow.com/questions/9223756/what-does-an-asterisk-before-an-address-mean-in-x86-64-att-assembly 6 | 7 | Basically becomes something like: 8 | 9 | 400661: 83 7d fc 05 cmpl $0x5,-0x4(%rbp) 10 | 400665: 77 42 ja 4006a9 11 | 400667: 8b 45 fc mov -0x4(%rbp),%eax 12 | 40066a: 48 8b 04 c5 60 07 40 mov 0x400760(,%rax,8),%rax 13 | 400671: 00 14 | 400672: ff e0 jmpq *%rax 15 | 400674: c7 45 f8 00 00 00 00 movl $0x0,-0x8(%rbp) 16 | 40067b: eb 2c jmp 4006a9 17 | 40067d: c7 45 f8 01 00 00 00 movl $0x1,-0x8(%rbp) 18 | 400684: eb 23 jmp 4006a9 19 | 20 | which checks if `i > 5`, and if not uses a jump table stored at 21 | `400760` in the `.rodata`, that contains the address of each case in an array. 22 | 23 | The `.rodata` is relocated to contain text addresses. 24 | */ 25 | 26 | #include 27 | #include 28 | 29 | int main() { 30 | int i, j; 31 | i = time(NULL) % 6; 32 | switch (i) { 33 | case 0: 34 | j = 0; 35 | break; 36 | case 1: 37 | j = 1; 38 | break; 39 | case 2: 40 | j = 2; 41 | break; 42 | case 3: 43 | j = 3; 44 | break; 45 | case 4: 46 | j = 4; 47 | break; 48 | case 5: 49 | j = 5; 50 | break; 51 | }; 52 | printf("%d\n", j); 53 | return 0; 54 | } 55 | -------------------------------------------------------------------------------- /compiler-generated/switch6_if.c: -------------------------------------------------------------------------------- 1 | /* 2 | Identical to switch 6 but with if else, 3 | so see if the compiler can also optimize it with the jump table. 4 | 5 | GCC 4.8 -O0: does not use the jump table. 6 | */ 7 | 8 | #include 9 | #include 10 | 11 | int main() { 12 | int i, j; 13 | i = time(NULL) % 6; 14 | if (i == 0) { 15 | j = 0; 16 | } else if (i == 1) { 17 | j = 1; 18 | } else if (i == 2) { 19 | j = 2; 20 | } else if (i == 3) { 21 | j = 3; 22 | } else if (i == 4) { 23 | j = 4; 24 | } else if (i == 5) { 25 | j = 5; 26 | }; 27 | printf("%d\n", j); 28 | return 0; 29 | } 30 | -------------------------------------------------------------------------------- /compiler-generated/switch6_spread.c: -------------------------------------------------------------------------------- 1 | /* 2 | Spread out case numbers exponentially to see if the jump table table is still used. 3 | 4 | GCC 4.8 -O0: creates 32 entries on the jump table, 5 | such that each entry that does not have a case jumps to the end of the switch. 6 | */ 7 | 8 | #include 9 | #include 10 | 11 | int main() { 12 | int i, j; 13 | i = time(NULL) % 6; 14 | j = -1; 15 | switch (i) { 16 | case 1: 17 | j = 0; 18 | break; 19 | case 2: 20 | j = 1; 21 | break; 22 | case 4: 23 | j = 2; 24 | break; 25 | case 8: 26 | j = 3; 27 | break; 28 | case 16: 29 | j = 4; 30 | break; 31 | case 32: 32 | j = 5; 33 | break; 34 | }; 35 | printf("%d\n", j); 36 | return 0; 37 | } 38 | -------------------------------------------------------------------------------- /compiler-generated/switch6_very_spread.c: -------------------------------------------------------------------------------- 1 | /* 2 | Spread switch numbers even more. 3 | 4 | GCC 4.8 -O0: Seems to do binary search from some point onwards. 5 | */ 6 | 7 | #include 8 | #include 9 | 10 | int main() { 11 | int i, j; 12 | i = time(NULL) % 1000000; 13 | j = -1; 14 | switch (i) { 15 | case 0: 16 | j = 0; 17 | break; 18 | case 1: 19 | j = 1; 20 | break; 21 | case 0x10: 22 | j = 2; 23 | break; 24 | case 0x100: 25 | j = 3; 26 | break; 27 | case 0x1000: 28 | j = 4; 29 | break; 30 | case 0xFFFFFFF: 31 | j = 5; 32 | break; 33 | }; 34 | printf("%d\n", j); 35 | return 0; 36 | } 37 | -------------------------------------------------------------------------------- /compiler-generated/tail_call.c: -------------------------------------------------------------------------------- 1 | /* https://stackoverflow.com/questions/310974/what-is-tail-call-optimization/55230417#55230417 */ 2 | 3 | #include 4 | #include 5 | 6 | unsigned factorial(unsigned n) { 7 | if (n == 1) { 8 | return 1; 9 | } 10 | return n * factorial(n - 1); 11 | } 12 | 13 | int main(int argc, char **argv) { 14 | int input; 15 | if (argc > 1) { 16 | input = strtoul(argv[1], NULL, 0); 17 | } else { 18 | input = 5; 19 | } 20 | printf("%u\n", factorial(input)); 21 | return EXIT_SUCCESS; 22 | } 23 | -------------------------------------------------------------------------------- /compiler-generated/template.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | # template 3 | */ 4 | 5 | #include 6 | 7 | template 8 | C f(C i) { return i; } 9 | 10 | int main() { 11 | f(1); 12 | f(1.5); 13 | } 14 | -------------------------------------------------------------------------------- /compiler-generated/use_rax.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int global = 0; 4 | 5 | int my_function() { 6 | /* 7 | Is the compiler smart enough to move global into `rax` to increment it, 8 | to reuse that for the return value? 9 | 10 | For GCC 4.8: 11 | 12 | - without `-O3`: rax used by default, but an extra mov from global to eax is done 13 | - with `O3`: no extra move. TODO find exact flag that enables this. 14 | */ 15 | global++; 16 | return global; 17 | } 18 | 19 | int main() { 20 | printf("%d\n", my_function()); 21 | printf("%d\n", my_function()); 22 | return 0; 23 | } 24 | -------------------------------------------------------------------------------- /compiler-generated/use_rax_return.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int global = 0; 4 | 5 | int my_function() { 6 | /* 7 | Is the compiler smart enough to move global into `rax` to increment it, 8 | to reuse that for the return value? 9 | 10 | Yes for GCC 4.8 -O3 11 | */ 12 | global++; 13 | return global; 14 | } 15 | 16 | int main() { 17 | printf("%d\n", my_function()); 18 | printf("%d\n", my_function()); 19 | return 0; 20 | } 21 | -------------------------------------------------------------------------------- /compiler-generated/virtual.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | # virtual 3 | 4 | Let's look at the C++ vtable. 5 | 6 | - http://stackoverflow.com/questions/5712808/understanding-the-vtable-entries 7 | - http://stackoverflow.com/questions/1963926/when-is-a-vtable-created-in-c 8 | 9 | Every virtual class has: 10 | 11 | - a vtbale: an array of addresses of the methods of the class stored in `.rodata` 12 | - an extra hidden field which points to the vtable of this class 13 | - extra initialization code on the constructor that sets this hidden field 14 | 15 | Whenever a virtual method call is done, it: 16 | 17 | - dereferenes the hidden field to find the vtable 18 | - adds an known offset depending on which method is being called. 19 | Each method must have a fixed position in the vtbale for all classes. 20 | - dereferences that address and calls it with `callq *%rax` 21 | 22 | # typeinfo 23 | 24 | There is a single typeinfo object for each virtual class, and it is stored 25 | 8 bytes before the first function. 26 | 27 | # TODO 28 | 29 | And there is yet another field on each vtables: 30 | 31 | http://stackoverflow.com/questions/5712808 32 | 33 | TODO: observe it. 34 | */ 35 | 36 | #include 37 | #include 38 | #include 39 | 40 | int main() { 41 | class Base { 42 | public: 43 | int i; 44 | Base(int i) : i(i) {} 45 | virtual int f() { return this->i; } 46 | virtual int g() { return this->i + 10; } 47 | }; 48 | 49 | class Derived1 : public Base { 50 | public: 51 | Derived1(int i) : Base(i) {} 52 | virtual int f() { return this->i + 1; } 53 | virtual int g() { return this->i + 11; } 54 | }; 55 | 56 | class Derived2 : public Base { 57 | public: 58 | Derived2(int i) : Base(i) {} 59 | virtual int f() { return this->i + 2; } 60 | virtual int g() { return this->i + 12; } 61 | }; 62 | 63 | Base *bp; 64 | int i; 65 | 66 | // For each object of a virtual class, store the call table address 67 | // on the stack for it. This address falls in `.rodata`. 68 | Base b(0); 69 | bp = &b; 70 | i = bp->f(); 71 | assert(i == 0); 72 | i = bp->g(); 73 | assert(i == 10); 74 | std::cout << typeid(*bp).name() << std::endl; 75 | 76 | Derived1 d1(0); 77 | bp = &d1; 78 | i = bp->f(); 79 | assert(i == 1); 80 | i = bp->g(); 81 | assert(i == 11); 82 | std::cout << typeid(*bp).name() << std::endl; 83 | 84 | Derived2 d2(0); 85 | bp = &d2; 86 | i = bp->f(); 87 | assert(i == 2); 88 | i = bp->g(); 89 | assert(i == 12); 90 | std::cout << typeid(*bp).name() << std::endl; 91 | } 92 | -------------------------------------------------------------------------------- /compiler-generated/while_infinite.c: -------------------------------------------------------------------------------- 1 | /* 2 | # while infinite 3 | 4 | http://stackoverflow.com/questions/2178115/are-compilers-allowed-to-eliminate-infinite-loops 5 | */ 6 | 7 | int main() { 8 | while (1); 9 | return 0; 10 | } 11 | -------------------------------------------------------------------------------- /containers.md: -------------------------------------------------------------------------------- 1 | # Containers 2 | 3 | Formats that contain assembly. 4 | 5 | Elf is the most popular format on Linux. 6 | 7 | ### bin 8 | 9 | 10 | 11 | ### Mach-O 12 | 13 | 14 | 15 | MAC OS. 16 | 17 | ### PE 18 | 19 | 20 | 21 | Windows `.exe` as of 2013. 22 | -------------------------------------------------------------------------------- /cpu-benchmarks.md: -------------------------------------------------------------------------------- 1 | # CPU benchmarks 2 | 3 | - 4 | - Useless metric because of CISC vs RISC 5 | - lists a few: 6 | - parallel: 7 | - 8 | - 9 | - 10 | - Multicore focus 11 | - TODO focus? 12 | - TODO focus? Canonical source? Not to be confused with the C# benchmark: 13 | 14 | Low level: 15 | 16 | - DMIPS 17 | 18 | - specially DMIPS / Watt. 19 | 20 | - Description: 21 | 22 | > lmbench is a suite of simple, portable, ANSI/C microbenchmarks for UNIX/POSIX. In general, it measures two key features: latency and bandwidth. 23 | 24 | GitHub mirror: 25 | -------------------------------------------------------------------------------- /cpu-bugs.md: -------------------------------------------------------------------------------- 1 | # CPU Bugs 2 | 3 | CPU bugs are obviously super serious to the company, as things may get super serious as users may ask for refunds on an expensive product. 4 | 5 | ## 2015 bug fesetival 6 | 7 | 8 | 9 | ## Halt and catch fire 10 | 11 | 12 | 13 | ## FDIV bug 14 | 15 | Found out by a mathematician doing numerical calculations. 16 | 17 | 18 | 19 | ## F00F bug 20 | 21 | 22 | 23 | ## Cyrix bug 24 | 25 | 26 | -------------------------------------------------------------------------------- /cpu-hardware-design.md: -------------------------------------------------------------------------------- 1 | # CPU architecture 2 | 3 | Clock limits: 4 | 5 | - 6 | - 7 | 8 | Misc: 9 | 10 | - 11 | - 12 | -------------------------------------------------------------------------------- /cpu-optimizations.md: -------------------------------------------------------------------------------- 1 | # CPU Optimizations 2 | 3 | CPU semi-internals that can be used to write more efficient code. 4 | 5 | Compilers take most of those into consideration. 6 | 7 | ## Instruction level parallelism 8 | 9 | ### Out of order processing 10 | 11 | 12 | 13 | - 14 | 15 | ## Megahertz myth 16 | 17 | - 18 | -------------------------------------------------------------------------------- /cpu-pins.md: -------------------------------------------------------------------------------- 1 | # CPU pins 2 | 3 | ## Simplified CPU 4 | 5 | For many purposes, you can picture a CPU as having only the following pins: 6 | 7 | - memory selector 8 | - data 9 | - clock 10 | - ground and 5V 11 | 12 | ## Read or write two memory slots at once 13 | 14 | 15 | 16 | Impossible, since there is only one memory selector at a time. 17 | 18 | So instructions can never operate at two different memory locations at once, nor read and write at a single memory location at once. 19 | 20 | ## Pinout 21 | 22 | It is a good idea to understand what are the physical pins of the processor and what they do to understand what is actually going on when you do instructions 23 | 24 | ![8086 pinout a](8086-pinout.gif "8086 pinout") 25 | 26 | ![80387 pinout a](80387-pinout.gif "80387 pinout") 27 | 28 | TODO understand 29 | 30 | ## Real CPUs 31 | 32 | 33 | -------------------------------------------------------------------------------- /elf.md: -------------------------------------------------------------------------------- 1 | # ELF 2 | 3 | ### Linux 4 | 5 | Interesting files in v4.0: 6 | 7 | - `include/uapi/linux/elf.h`: contains most of the format, which is natural since the format is needed from userland to implement compilers, and the most part if not arch dependent 8 | - `include/linux/elf.h` 9 | - `arch/x86/include/asm/elf.h`: x86 specifics described in the AMD64 ABI extension, e.g. the `R_X86_64_64` type 10 | - `fs/binfmt_elf.c`: this is where the real action happens. `do_execve` in `src/fs/exec.c` from the system call calls `load_elf_binary`, which prepares everything and culminates in a call to `start_thread` 11 | 12 | ### GCC 13 | 14 | In 5.1, definitions are under various `config/` files. 15 | 16 | ## C ABI 17 | 18 | ### _start 19 | 20 | ### Entry point 21 | 22 | [System V ABI AMD64][] says that `main` is the entry point of a C program, and it calls `_start` which is the ELF entry point as mentioned in the [System V ABI AMD64][]. `main` is not special in pure assembly 23 | 24 | > The initial state of the process stack, i.e. when _start is called 25 | 26 | describes the call sequence that actually happens on Linux. 27 | 28 | TODO where is it mentioned in the arch agnostic standards? 29 | 30 | ## C++ ABI 31 | 32 | The [System V ABI AMD64][] links to the [Itanium C++ ABI][]. 33 | 34 | ## Relocation 35 | 36 | 37 | 38 | ### R_386_32 39 | 40 | To test this one out, try use: 41 | 42 | a: .long s 43 | b: .long s + 0x12345678 44 | s: 45 | 46 | then: 47 | 48 | as --32 -o main.o main.S 49 | objdump -dzr main.o 50 | 51 | on Binutils 2.24 gives: 52 | 53 | 00000000 : 54 | 0: 08 00 or %al,(%eax) 55 | 0: R_386_32 .text 56 | 2: 00 00 add %al,(%eax) 57 | 58 | 00000004 : 59 | 4: 80 56 34 12 adcb $0x12,0x34(%esi) 60 | 4: R_386_32 .text 61 | 62 | This makes it really clear how: 63 | 64 | - `a` gets 8 added, which is the length of `a` + `b` (to reach `s`) 65 | - `b` gets `0x12345608` == `0x12345678 + 8`, where `8` is once again the position of `s` 66 | 67 | ### R_X86_64_PC32 68 | 69 | This is another common relocation method that does: 70 | 71 | S + A - P 72 | 73 | on 4 bytes, where `P` is the current position of the Program Counter on the text segment, thus the `PC` on the name, A.K.A. the `RIP` register. 74 | 75 | This method is common in x86-64 because `RIP` relative addressing is very popular, and for it to work, the relocation must subtract the position `P`. 76 | 77 | Note that `%RIP` points to the *next* instruction: so it is common to use `A = -4` to remove the offset of the 4 byte address which is at the end of the instruction encoding: 78 | 79 | X Y A A A A 80 | Next insruction <-- RIP 81 | 82 | ### Value of the relocated memory before relocation 83 | 84 | Does the value of the address to be overwritten before linking matter at all? 85 | 86 | ### R_X86_64_16 87 | 88 | ### 8 and 16 bit 89 | 90 | GNU adds 8 and 16 bit relocations as an extension to the ELF standard, which it calls with names like `R_X86_64_16`. 91 | 92 | ### Segment vs segment section 93 | 94 | TODO: confirm: each program header represents either a segment, or a segment section, which is a subdivision of segments. 95 | 96 | For instance, a C hello world contains the lines (order changed): 97 | 98 | LOAD 0x000000 0x0000000000400000 0x0000000000400000 0x0006fc 0x0006fc R E 0x200000 99 | PHDR 0x000040 0x0000000000400040 0x0000000000400040 0x0001f8 0x0001f8 R E 0x8 100 | INTERP 0x000238 0x0000000000400238 0x0000000000400238 0x00001c 0x00001c R 0x1 101 | NOTE 0x000254 0x0000000000400254 0x0000000000400254 0x000044 0x000044 R 0x4 102 | GNU_EH_FRAME 0x0005d0 0x00000000004005d0 0x00000000004005d0 0x000034 0x000034 R 0x4 103 | 104 | so that the `LOAD` segment contains multiple segment sections `PHDR, INTERP, etc.` 105 | 106 | ## Alignment of global symbols 107 | 108 | TODO what are the alignment constraints of global symbols? I observe that 4 byte relocations align at 4 *bits*... what is going on? 109 | -------------------------------------------------------------------------------- /endianess.asm: -------------------------------------------------------------------------------- 1 | ; # Endianess 2 | 3 | ; x86 is little endian. 4 | 5 | ; http://stackoverflow.com/questions/5185551/why-is-x86-little-endian 6 | 7 | %include "lib/common_nasm.inc" 8 | 9 | section .data 10 | x dw 0x0102 11 | ENTRY 12 | ASSERT_EQ [x], 2, byte 13 | EXIT 14 | -------------------------------------------------------------------------------- /extensions.md: -------------------------------------------------------------------------------- 1 | # Extensions 2 | 3 | Whacky extensions that did not fit anywhere else: 4 | 5 | - 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /flynns-taxonomy.md: -------------------------------------------------------------------------------- 1 | # Flynn's taxonomy 2 | 3 | 4 | 5 | - SISD 6 | - SIMD 7 | - MIMD 8 | 9 | ## SIMD 10 | 11 | Out of Flynn's taxonomy, this is the model that promises the greatest advances currently. 12 | 13 | Single instruction, multiple data, for example, making 4 multiplications on a single CPU cycle. 14 | 15 | More and more, SIMD instructions are being added on different processors. 16 | 17 | Intel has been grouping those instructions under the SSE name, but previously used other names like MMX. 18 | 19 | GPGPUs are also another hot SIMD implementation. 20 | 21 | See also: 22 | 23 | ## SIMT 24 | 25 | GPUs tend strongly to SIMD systems, but NVIDIA prefers to call them SIMT, because they feel they are more flexible than pure SIMD. 26 | 27 | 28 | 29 | It is a flexibility / throughput trade-off. 30 | -------------------------------------------------------------------------------- /fpu.md: -------------------------------------------------------------------------------- 1 | # FPU 2 | 3 | Used to be a separate optional processor called the Floating Point Unit, later integrated into the CPU. 4 | 5 | Has 8 registers: `st0` to `st7`, which for m a stack. 6 | 7 | `st0` is always the top of the stack. 8 | 9 | You can only communicate with `stX` from memory, not directly from registers. 10 | -------------------------------------------------------------------------------- /gas/Makefile: -------------------------------------------------------------------------------- 1 | ../Makefile -------------------------------------------------------------------------------- /gas/README.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | 3 | ## Why this tutorial uses NASM 4 | 5 | I started this tutorial a long time ago, and I made the mistake of using `nasm` for it by default. 6 | 7 | `gas` is the GNU assembler, backend to GCC, and used in the Linux kernel and glibc, and therefore the only true assembler. 8 | 9 | Oh, also it is cross ISA, in the sense that most of its directives can be used on all ISA, which is awesome. 10 | 11 | Just pipe it through GCC's C preprocessor to gain some missing functionality that `nasm` provides natively, and we are golden. 12 | 13 | ## Actual intro to GAS 14 | 15 | GNU assembler syntax cheatsheet. 16 | 17 | Part of the GNU Binutils projects, which also includes other lower-level toolchain tools like `ld` (but not `gcc`). 18 | 19 | Supposes that you already know IA-32 and NASM. 20 | 21 | Documentation: Many questions can be answered directly via the index: 22 | 23 | Executable name: `as`. 24 | 25 | Used by the kernel, GCC default backend and glibc... so just learn it. 26 | 27 | Part of Binutils. 28 | 29 | ## Vs NASM 30 | 31 | Advantages over NASM: 32 | 33 | - can handle multiple architectures, not just x86, including: 34 | 35 | - i386 36 | - SPARC 37 | - ARM 38 | 39 | and more. 40 | 41 | - NASM is slightly more convenient to write by hand, maybe because it is more focused as a direct input language, rather than a GCC backend. 42 | 43 | But given the huge Linux support for GAS, it is my preferred assembler. 44 | 45 | ### GCC as a GAS front-end 46 | 47 | GCC can be used as a frontend for GAS: 48 | 49 | gcc -S a.c -o a.s 50 | gcc -masm=att -S a.c -o a.s 51 | gcc -masm=intel -S a.c -o a.s 52 | 53 | ## Comments 54 | 55 | Multiline comments are like C comments. 56 | 57 | Single line comments vary with language. 58 | 59 | For x86, they are `# ` and **not** `;`! 60 | 61 | ## Registers 62 | 63 | Registers are prefixed by a percent sign `%`. E.g.: 64 | 65 | movl %eax, %ebx 66 | 67 | ## Integer constants 68 | 69 | Constants are prefixed by a dollar sign `$`. E.g.: 70 | 71 | movl $1, %eax 72 | 73 | ## Order 74 | 75 | Operator order is different from Intel syntax. 76 | 77 | For example, the following moves `eax` to `ebx`: 78 | 79 | movl %eax %ebx 80 | 81 | Which is kind of logical because it is in the order in which we say it: `move eax to ebx` however is different from the usual c `=` operator which would be something like 82 | 83 | ebx = eax 84 | 85 | ## Instruction lengths 86 | 87 | Suffixes are added to instructions to specify length: 88 | 89 | - `b` = byte (8 bit) 90 | - `s` = short (16 bit integer) or single (32-bit floating point) 91 | - `w` = word (16 bit) 92 | - `l` = long (32 bit integer or 64-bit floating point) 93 | - `q` = quad (64 bit) 94 | - `dq` = double quad (128 bit) 95 | - `t` = ten bytes (80-bit floating point) 96 | 97 | These are separate words in NASM. TODO can those be specified in macros? in NASM yes because they are separate words 98 | 99 | If you add two suffixes to an instruction, it means that it should zero extend, e.g.: `movbl`. 100 | 101 | In Intel, it is called `MOVSX`. 102 | 103 | ## Directives 104 | 105 | Stuff that starts with a dot `.` and does not specify machine instructions directly but rather gives information to GAS. 106 | 107 | - `.text`: text segment 108 | - `.data`: data segment 109 | - `.global`: data segment 110 | - `.globl`: same as global 111 | - `.loc`: debugging source line info, only generated with `-dgdb` 112 | - `.align`: 113 | - `.zero`: 114 | - `.macro`: macros 115 | 116 | ### cfi directives 117 | 118 | Call frame information. 119 | 120 | Appear all over GCC generated code for C sources. 121 | 122 | 123 | -------------------------------------------------------------------------------- /gas/altmacro.S: -------------------------------------------------------------------------------- 1 | /* 2 | # altmacro 3 | 4 | Enables the alternate macro mode from now on. 5 | 6 | Can also be set as a command line option `--alternate`. 7 | 8 | This adds extra capabilities to macros: 9 | 10 | - LOCAL 11 | - % string evaluation 12 | 13 | # noaltmacro 14 | 15 | Turned off `.altmacro`. 16 | */ 17 | 18 | #include "lib/common_gas.h" 19 | 20 | ENTRY 21 | 22 | .altmacro 23 | 24 | /* 25 | With altmacro, \ is not needed to expand the arguments. 26 | 27 | TODO undocumented? 28 | */ 29 | 30 | .macro STRING_ARG x 31 | mov x, %eax 32 | .endm 33 | mov $0, %eax 34 | STRING_ARG $1 35 | ASSERT_EQ($1, %eax) 36 | 37 | /* But escaping it still works. */ 38 | .macro STRING_ARG_BACKSLASH x 39 | mov \x, %eax 40 | .endm 41 | mov $0, %eax 42 | STRING_ARG $1 43 | ASSERT_EQ($1, %eax) 44 | 45 | /* TODO how to avoid expansion? */ 46 | 47 | /* 48 | # LOCAL 49 | 50 | Generates local labels that are impossible to clash. 51 | */ 52 | 53 | .macro LOCAL_KEYWORD a="%eax" 54 | LOCAL ok 55 | mov $1, %eax 56 | jmp ok 57 | call assert_fail 58 | ok: 59 | .endm 60 | 61 | LOCAL_KEYWORD 62 | LOCAL_KEYWORD 63 | 64 | /* 65 | # % 66 | 67 | # Percent 68 | 69 | Appears to work only on arguments passed or their default values. 70 | */ 71 | 72 | .macro PERCENT x 73 | mov $\x, %eax 74 | .endm 75 | mov $0, %eax 76 | PERCENT %1+1 77 | ASSERT_EQ($2, %eax) 78 | 79 | .macro PERCENT_DEFAULT x=%1+1 80 | mov $\x, %eax 81 | .endm 82 | mov $0, %eax 83 | PERCENT_DEFAULT 1 84 | ASSERT_EQ($1, %eax) 85 | PERCENT_DEFAULT 86 | ASSERT_EQ($2, %eax) 87 | 88 | /* 89 | This is a horrible feature as it makes it impossible to pass registers 90 | as arguments to altmacros without special escaping care... 91 | 92 | http://stackoverflow.com/questions/19776992/gas-altmacro-macro-with-a-percent-sign-in-a-default-parameter-fails-with-oper 93 | */ 94 | 95 | .macro PERCENT_ESCAPE x 96 | mov \x, %eax 97 | .endm 98 | mov $0, %eax 99 | mov $1, %ebx 100 | PERCENT_ESCAPE <%ebx> 101 | ASSERT_EQ($1, %eax) 102 | 103 | .macro PERCENT_ESCAPE_DEFAULT x=<%ebx> 104 | mov \x, %eax 105 | .endm 106 | mov $0, %eax 107 | mov $1, %ebx 108 | PERCENT_ESCAPE_DEFAULT 109 | ASSERT_EQ($1, %eax) 110 | 111 | /* 112 | One alternative if we are sure that the argument is a register, 113 | is to put the percent inside the macro: 114 | */ 115 | 116 | .macro PERCENT_ESCAPE_REG x 117 | mov %x, %eax 118 | .endm 119 | mov $0, %eax 120 | mov $1, %ebx 121 | PERCENT_ESCAPE_REG ebx 122 | ASSERT_EQ($1, %eax) 123 | 124 | /* But this has the downside that we cannot pass immediates like `$1` anymore */ 125 | 126 | /*PERCENT_ESCAPE_REG $1*/ 127 | 128 | /* TODO application? */ 129 | 130 | .noaltmacro 131 | 132 | EXIT 133 | -------------------------------------------------------------------------------- /gas/ascii.S: -------------------------------------------------------------------------------- 1 | /* 2 | # ascii 3 | 4 | https://sourceware.org/binutils/docs/as/Ascii.html#Ascii 5 | 6 | Create a byte string from ASCII characters. 7 | 8 | Not null terminated. 9 | 10 | TODO vs string? https://sourceware.org/binutils/docs/as/String.html 11 | */ 12 | 13 | #include "lib/common_gas.h" 14 | 15 | .data 16 | s: 17 | .ascii "abc\0" 18 | s_len = . - s 19 | cat: 20 | .ascii "ab" "cd" 21 | 22 | ENTRY 23 | mov s, %eax 24 | ASSERT_EQ($0x00636261, %eax) 25 | 26 | /* Concatenates literals like C does. */ 27 | mov cat, %eax 28 | ASSERT_EQ($0x64636261, %eax) 29 | 30 | mov $s_len, %eax 31 | ASSERT_EQ($4, %eax) 32 | 33 | mov $0, %edx 34 | mov $30, %eax 35 | mov $25, %ecx 36 | div %ecx 37 | mov %edx, %eax 38 | ASSERT_EQ($5, %eax) 39 | 40 | EXIT 41 | -------------------------------------------------------------------------------- /gas/asciz.S: -------------------------------------------------------------------------------- 1 | /* 2 | # asciz 3 | 4 | Like `.ascii` but adds null char to string 5 | 6 | Same as: 7 | 8 | .ascii "assert failed\n\x00" 9 | .ascii "assert failed\n\000" 10 | */ 11 | 12 | #include "lib/common_gas.h" 13 | 14 | .data 15 | s: 16 | .asciz "abc" 17 | s_len = . - s 18 | cat: 19 | .asciz "a" "b" 20 | 21 | ENTRY 22 | mov s, %eax 23 | ASSERT_EQ($0x00636261, %eax) 24 | 25 | /* 26 | Concatenates literals somewhat like C does, 27 | BUT adds a null after each one. 28 | */ 29 | mov cat, %eax 30 | ASSERT_EQ($0x00620061, %eax) 31 | 32 | mov $s_len, %eax 33 | ASSERT_EQ($4, %eax) 34 | EXIT 35 | -------------------------------------------------------------------------------- /gas/bibliography.md: -------------------------------------------------------------------------------- 1 | # Bibliography 2 | 3 | - : 4 | 5 | gas differences from nasm. Great intro if you already know nasm. 6 | 7 | - : 8 | -------------------------------------------------------------------------------- /gas/char_literal.S: -------------------------------------------------------------------------------- 1 | /* 2 | http://stackoverflow.com/questions/33246811/how-to-use-character-literals-in-gnu-gas-to-replace-numbers 3 | 4 | This syntax plays horribly with the C preprocessor: 5 | 6 | MACRO($'a) 7 | 8 | fails because cpp treats string and char literals magically. 9 | 10 | GAS macros work better here. 11 | 12 | And it generates trailing whitespace... bad move from GAS devs. They should just have closed it like C. 13 | */ 14 | 15 | #include "lib/common_gas.h" 16 | 17 | ENTRY 18 | 19 | /* Memory. */ 20 | mov c, %al 21 | ASSERT_EQ($0x61, %al) 22 | 23 | /* Immediate. Without the `$`, does a memory access, and segfaults! */ 24 | mov $'b, %al 25 | ASSERT_EQ($0x62, %al) 26 | 27 | /* Space character works. */ 28 | mov $' , %al 29 | ASSERT_EQ($0x20, %al) 30 | 31 | /* Backslash escapes work. */ 32 | mov $'\n , %al 33 | ASSERT_EQ($0x0A, %al) 34 | 35 | EXIT 36 | 37 | c: 38 | .byte 'a 39 | -------------------------------------------------------------------------------- /gas/current_address.S: -------------------------------------------------------------------------------- 1 | /* 2 | # Current address 3 | 4 | # dot 5 | 6 | # . 7 | 8 | http://stackoverflow.com/questions/8987767/is-there-a-symbol-that-represents-the-current-address-in-gnu-gas-assembly 9 | */ 10 | 11 | #include "lib/common_gas.h" 12 | 13 | /* x points to its own address. */ 14 | x: .long . 15 | ENTRY 16 | mov x, %eax 17 | mov $x, %ebx 18 | ASSERT_EQ(%ebx, %eax) 19 | EXIT 20 | -------------------------------------------------------------------------------- /gas/equ.S: -------------------------------------------------------------------------------- 1 | /* # equ 2 | * 3 | * # set 4 | * 5 | * `.equ` and `.set` are the same. 6 | * 7 | * http://stackoverflow.com/questions/21624155/difference-between-equ-and-word-in-arm-assembly 8 | * 9 | * - https://sourceware.org/binutils/docs/as/Equ.html 10 | * - https://sourceware.org/binutils/docs/as/Set.html 11 | */ 12 | 13 | #include "lib/common_gas.h" 14 | 15 | .data 16 | 17 | .equ asdf, 0x1234567 18 | 19 | ENTRY 20 | 21 | .equ x, 0x1234567 22 | mov $0, %eax 23 | mov $x, %eax 24 | ASSERT_EQ($0x1234567, %eax) 25 | 26 | .set y, 0x1234567 27 | mov $0, %eax 28 | mov $y, %eax 29 | ASSERT_EQ($0x1234567, %eax) 30 | 31 | 32 | /* TODO: vs macro? 33 | * 34 | * One difference is that expressions can be used. Is that the main one? 35 | */ 36 | 37 | .set y, 2 * 1 + 1 38 | mov $0, %eax 39 | mov $y, %eax 40 | ASSERT_EQ($0x3, %eax) 41 | 42 | EXIT 43 | -------------------------------------------------------------------------------- /gas/extern.md: -------------------------------------------------------------------------------- 1 | # extern 2 | 3 | Unlike `nasm`, GAS has no extern. Every undefined symbol shows as an undefined symbol in the output automatically: no need to declare them. 4 | 5 | 6 | -------------------------------------------------------------------------------- /gas/gasversion.S: -------------------------------------------------------------------------------- 1 | /* https://stackoverflow.com/questions/51008231/how-to-check-the-version-of-binutils-on-gnu-gas-assembly-code-at-compile-time */ 2 | 3 | #include "lib/common_gas.h" 4 | 5 | ENTRY 6 | .if .gasversion. >= 22800 && .gasversion. < 22900 7 | PRINT_STRING_LITERAL("bug") 8 | .else 9 | PRINT_STRING_LITERAL("no bug") 10 | .endif 11 | EXIT 12 | -------------------------------------------------------------------------------- /gas/global.S: -------------------------------------------------------------------------------- 1 | /* 2 | # .global 3 | 4 | By default, ELF symbols are STB_LOCAL, and not visible by the linker. 5 | 6 | This make them STB_LOCAL. 7 | 8 | Observe this with: 9 | 10 | readelf -s object-file.o 11 | 12 | The entry symbol, notably, must be global. 13 | 14 | Local symbols still appear on the output obejct file. To remove them completely, use local symbol `.L`. 15 | 16 | # .local 17 | 18 | TODO what is the effect of this, since variables are already local by default? 19 | */ 20 | 21 | #include "lib/common_gas.h" 22 | 23 | .data 24 | 25 | not_global: 26 | .long 1 27 | 28 | .global yes_global 29 | yes_global: 30 | .long 2 31 | 32 | .local yes_local 33 | yes_local: 34 | .long 3 35 | 36 | ENTRY 37 | mov not_global, %eax 38 | ASSERT_EQ($1, %eax) 39 | 40 | mov yes_global, %eax 41 | ASSERT_EQ($2, %eax) 42 | 43 | mov yes_local, %eax 44 | ASSERT_EQ($3, %eax) 45 | 46 | EXIT 47 | -------------------------------------------------------------------------------- /gas/irp.S: -------------------------------------------------------------------------------- 1 | /* 2 | # irp 3 | 4 | Macro for in loop. 5 | 6 | TODO: mnemonic? 7 | 8 | See also: .rept 9 | */ 10 | 11 | #include "lib/common_gas.h" 12 | 13 | ENTRY 14 | mov $0, %eax 15 | /* 16 | Generate: 17 | 18 | add $3, %eax 19 | add $1, %eax 20 | add $2, %eax 21 | */ 22 | .irp i, 3, 1, 2 23 | add $\i, %eax 24 | .endr 25 | ASSERT_EQ($6, %eax) 26 | EXIT 27 | -------------------------------------------------------------------------------- /gas/lib: -------------------------------------------------------------------------------- 1 | ../lib -------------------------------------------------------------------------------- /gas/linux/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: clean run 2 | 3 | hello_world.out: hello_world.S 4 | as --32 -o hello_world.o hello_world.S 5 | ld -m elf_i386 -o hello_world.out hello_world.o 6 | 7 | clean: 8 | rm -f *.o *.out 9 | 10 | run: hello_world.out 11 | test: hello_world.out 12 | ./hello_world.out 13 | -------------------------------------------------------------------------------- /gas/linux/README.md: -------------------------------------------------------------------------------- 1 | # Linux hello world 2 | 3 | Minimal and rather sane Linux hello world. 4 | -------------------------------------------------------------------------------- /gas/linux/hello_world.S: -------------------------------------------------------------------------------- 1 | .data 2 | s: 3 | .ascii "hello world\n" 4 | len = . - s 5 | .text 6 | .global _start 7 | _start: 8 | /* write */ 9 | mov $4, %eax 10 | mov $1, %ebx 11 | mov $s, %ecx 12 | mov $len, %edx 13 | int $0x80 14 | 15 | /* exit */ 16 | mov $1, %eax 17 | mov $0, %ebx 18 | int $0x80 19 | -------------------------------------------------------------------------------- /gas/local_label.S: -------------------------------------------------------------------------------- 1 | /* 2 | # Local label 3 | 4 | https://sourceware.org/binutils/docs/as/Symbol-Names.html#Symbol-Names 5 | 6 | Labels that contain only digits are magic and can be defined multiple times: 7 | 8 | You can then write `1b` and `1f` to refer to the latest and next definition of such labels. 9 | 10 | Such labels don't appear on the output file. 11 | */ 12 | 13 | #include "lib/common_gas.h" 14 | 15 | ENTRY 16 | /* Basic forward local jump. */ 17 | 1: 18 | jmp 1f 19 | ASSERT_FAIL 20 | 1: 21 | 22 | /* Example with a backwards jump. */ 23 | jmp 2f 24 | ASSERT_FAIL 25 | 1: 26 | jmp 3f 27 | ASSERT_FAIL 28 | 2: 29 | jmp 1b 30 | ASSERT_FAIL 31 | 3: 32 | 33 | EXIT 34 | -------------------------------------------------------------------------------- /gas/local_symbol.S: -------------------------------------------------------------------------------- 1 | /* 2 | # local symbol 3 | 4 | # .L 5 | 6 | Symbols that start with `.L` don't appear at all on the ELF output. 7 | 8 | By default, symbols do appear on ELF, but are local. 9 | 10 | Unlike NASM's `.label`, such labels are visible on the entire file, 11 | and cannot be redefined. 12 | */ 13 | 14 | #include "lib/common_gas.h" 15 | 16 | .data 17 | not_local: 18 | .long 1 19 | 20 | .Lyes_local: 21 | .long 2 22 | ENTRY 23 | mov not_local, %eax 24 | ASSERT_EQ($1, %eax) 25 | 26 | mov .Lyes_local, %eax 27 | ASSERT_EQ($2, %eax) 28 | 29 | EXIT 30 | -------------------------------------------------------------------------------- /gas/macro.S: -------------------------------------------------------------------------------- 1 | /* 2 | # macro 3 | 4 | Avoid it, use `cpp` instead which is more powerful. 5 | */ 6 | 7 | #include "lib/common_gas.h" 8 | 9 | ENTRY 10 | 11 | /* 12 | # Labels in macros 13 | 14 | If you are going to use a macro with a label inside it many times 15 | you need some way of ensuring that this label will be unique 16 | for each macro invocation. 17 | */ 18 | 19 | /* # \@ technique. Not 100% safe, but good enough. */ 20 | 21 | .macro LOCAL_AT 22 | jmp _local_\@_ok 23 | call assert_fail 24 | _local_\@_ok: 25 | .endm 26 | 27 | LOCAL_AT 28 | LOCAL_AT 29 | 30 | /* The best technique is to use LOCAL from .altmacro. */ 31 | 32 | /* 33 | Unlike the C preprocessor, GAS macros don't treat string literals 34 | magically and expand inside them as well. 35 | */ 36 | 37 | .macro STRING_EXPAND x 38 | .ascii "\x" 39 | .endm 40 | mov s, %al 41 | ASSERT_EQ($0x61, %al) 42 | 43 | EXIT 44 | 45 | .data 46 | s: 47 | STRING_EXPAND a 48 | -------------------------------------------------------------------------------- /gas/params.makefile: -------------------------------------------------------------------------------- 1 | DISAS_FLAVOR = att 2 | PHONY_MAKES = linux 3 | -------------------------------------------------------------------------------- /gas/print.S: -------------------------------------------------------------------------------- 1 | /* 2 | # print 3 | 4 | Just print a string. 5 | 6 | # warn 7 | 8 | Print the warning. 9 | 10 | # error 11 | 12 | Print it and stop compilation. 13 | 14 | # Expected output 15 | 16 | print 17 | print.S: Assembler messages: 18 | print.S:5: Warning: warn 19 | print.S:6: Error: error 20 | */ 21 | 22 | #include "lib/common_gas.h" 23 | 24 | ENTRY 25 | /* 26 | .print "print" 27 | .warning "warn" 28 | .error "error" 29 | */ 30 | EXIT 31 | -------------------------------------------------------------------------------- /gas/push.S: -------------------------------------------------------------------------------- 1 | #include "lib/common_gas.h" 2 | 3 | .data 4 | val1: 5 | .long 1 6 | val2: 7 | .long 2 8 | orig: 9 | .skip 4 10 | ENTRY 11 | 12 | mov %esp, orig 13 | 14 | mov $val2, %esp 15 | push $3 16 | mov %esp, %eax 17 | 18 | /* Restore esp to not break our IO. */ 19 | mov orig, %esp 20 | 21 | /* val1 was modified by the push. */ 22 | ASSERT_EQ($3, val1) 23 | 24 | /* esp was reduced by 4. */ 25 | ASSERT_EQ($val1, %eax) 26 | 27 | mov $val1, %esp 28 | pop %ebx 29 | mov %esp, %eax 30 | mov orig, %esp 31 | ASSERT_EQ($3, %ebx) 32 | ASSERT_EQ($val2, %eax) 33 | 34 | EXIT 35 | -------------------------------------------------------------------------------- /gas/section.S: -------------------------------------------------------------------------------- 1 | /* 2 | # .section 3 | 4 | Create ELF sections. 5 | 6 | # .segment 7 | 8 | Same as `.section`. 9 | 10 | So don't use, since what we actually create are sections, 11 | not segments, which are created by the linker. 12 | 13 | # .text 14 | 15 | TODO vs `.section .text` 16 | 17 | # .data 18 | 19 | Creates a data section. 20 | 21 | TODO vs `.section .data` 22 | http://stackoverflow.com/questions/21606690/whats-difference-between-section-data-and-just-data-in-gas-coding 23 | 24 | Actually looks like the exact same: even with `.section .data`, 25 | the section is treated magically, e.g. gets W flag. 26 | 27 | # Section flags 28 | 29 | Specified as: 30 | 31 | .section .custom, "awx" 32 | 33 | This would be an Allocatable Writable and eXecutable section. 34 | */ 35 | 36 | #include "lib/common_gas.h" 37 | 38 | .section .data 39 | i: .long 1 40 | .data 41 | j: .long 1 42 | ENTRY 43 | mov i, %eax 44 | ASSERT_EQ($1, %eax) 45 | EXIT 46 | -------------------------------------------------------------------------------- /gas/size.S: -------------------------------------------------------------------------------- 1 | /* 2 | # size 3 | 4 | https://sourceware.org/binutils/docs/as/Size.html#Size 5 | 6 | Observe the generated object file with `readelf -s`. 7 | */ 8 | 9 | #include "lib/common_gas.h" 10 | 11 | ENTRY 12 | EXIT 13 | 14 | /* 15 | In ELF, adds st_size attribute of a symbol. 16 | GCC emmits those automatically for C functions, even in `-g0`, as: .size main, .-main 17 | */ 18 | .type myfunc, @function 19 | .global myfunc 20 | myfunc: 21 | nop 22 | .size myfunc, . - myfunc 23 | 24 | /* 25 | Without it, it just gets set to 0, it means unknown size. 26 | */ 27 | .type myfunc_nosize, @function 28 | .global myfunc_nosize 29 | myfunc_nosize: 30 | nop 31 | 32 | .global myfunc_nosize2 33 | myfunc_nosize2: 34 | nop 35 | -------------------------------------------------------------------------------- /gas/type.S: -------------------------------------------------------------------------------- 1 | /* # type 2 | 3 | https://sourceware.org/binutils/docs/as/Type.html#Type 4 | 5 | Observe the generated object file with `readelf -s`. 6 | */ 7 | 8 | #include "lib/common_gas.h" 9 | 10 | ENTRY 11 | EXIT 12 | 13 | /* 14 | In ELF, adds the STT_FUNC type to the symbol. 15 | GCC emmits those automatically for C functions, even in `-g0`. 16 | 17 | The at sign `@` is mandatory and has no meaning. 18 | */ 19 | .type myfunc, @function 20 | .global myfunc 21 | myfunc: 22 | 23 | .global myfunc_notype 24 | myfunc_notype: 25 | -------------------------------------------------------------------------------- /how-to-learn.md: -------------------------------------------------------------------------------- 1 | # How to learn 2 | 3 | Cycle between the following until you are a master: 4 | 5 | 1. write assembly 6 | 1. read Intel manual pages 7 | 1. decompile and interpret what compilers compiled: simple examples, Kernel, and hardcore libraries like glibc 8 | 9 | Clearly separate OS specifics from hardware specifics. 10 | -------------------------------------------------------------------------------- /implementations.md: -------------------------------------------------------------------------------- 1 | # Implementations 2 | 3 | Current: 4 | 5 | - Intel, created and holds most copyrights for IA-32 6 | - AMD, created and holds copyrights for x86-64 7 | 8 | Past: 9 | 10 | - VIA https://en.wikipedia.org/wiki/VIA_Technologies . Bought: 11 | 12 | - Cyrix https://en.wikipedia.org/wiki/Cyrix from National Semiconductor 13 | - Centaur https://en.wikipedia.org/wiki/Centaur_Technology 14 | 15 | Intel and AMD are the major ones. 16 | 17 | Cyrix was an implementor before being bought by National Semiconductor, and VIA continue to produce them after buying Cyrix from NS. 18 | 19 | ## x86 licensing 20 | 21 | You need a license to implement x86, at least the parts which are newer than 20 years ago: 22 | 23 | - 24 | - Cyrix got into trouble for not licensing it https://en.wikipedia.org/wiki/Cyrix#Legal_troubles 25 | 26 | Intel and AMD have cross-licensing agreements: 27 | 28 | - http://www.kitguru.net/components/cpu/anton-shilov/amd-clarifies-cross-license-with-intel-change-of-control-terminates-agreement-for-both/ 29 | - http://www.sec.gov/Archives/edgar/data/2488/000119312509236705/dex102.htm 30 | - https://www.reddit.com/r/hardware/comments/3b0ytk/discussion_what_is_an_x86_license_and_why_do/ 31 | - http://www.anandtech.com/show/2873 32 | - http://www.eetimes.com/document.asp?doc_id=1129690 33 | - Interview with W. Jerry Sanders http://silicongenesis.stanford.edu/transcripts/sanders.htm 34 | 35 | ## Open implementations 36 | 37 | TODO legal? 38 | -------------------------------------------------------------------------------- /instruction-level-parallelism.md: -------------------------------------------------------------------------------- 1 | # Instruction level parallelism 2 | 3 | 4 | -------------------------------------------------------------------------------- /instruction-sets.md: -------------------------------------------------------------------------------- 1 | # Instruction sets 2 | 3 | ## Instruction set architecture 4 | 5 | ## ISA 6 | 7 | List of operations the processor can do. Obviously ultra processor dependant. 8 | 9 | - 10 | - 11 | 12 | x86 is family of ISAs. 13 | -------------------------------------------------------------------------------- /intel-processor-history.md: -------------------------------------------------------------------------------- 1 | # Intel processor history 2 | 3 | Intel is highly backwards compatible and hugely influential, so understanding their processor history helps to understand terms and concepts which are still relevant. 4 | 5 | ## Intel 4004 6 | 7 | 8 | 9 | The first commercial integrated CPU in the world. Huge breakthrough. 10 | 11 | ## Intel 8080 12 | 13 | 1974, 8 bit, 2MHz. Hit a price / performance point that kick-started the PC revolution: Apple, Microsoft, etc. 14 | 15 | Before it, computers were huge machines for large corporations only. 16 | 17 | ## Intel 8086 18 | 19 | 1976, 16 bit, very popular, base to x86 IA. 20 | 21 | TODO why `86`? 22 | 23 | ## Intel 80386 24 | 25 | aka i386 26 | 27 | 1985, 32 bit word register 28 | 29 | ## Intel 8087 30 | 31 | 1980 32 | 33 | External floating point co-processor for the 8086. 34 | 35 | In other words: this is not a CPU, but something external to the CPU, which the CPU could interface with. 36 | 37 | Included inside CPUs starting from the 80436 38 | 39 | x87 often used to describe the floating point operations inside the processors 40 | 41 | Instructions include: 42 | 43 | - `FSQRT` 44 | - `FSIN` 45 | 46 | ## Intel 80486 47 | 48 | 1989 49 | 50 | Includes floating point unity inside of it 51 | 52 | ## Intel Core 53 | 54 | Generic Intel branding since 2007. 55 | 56 | ## i3 57 | 58 | ## i5 59 | 60 | ## i7 61 | 62 | Generic branding by Intel since 2010. 63 | 64 | They just mean low, mid and high end, nothing else is implied, not even the number of cores. 65 | 66 | A much more meaningful way to group the processors is by the architecture, e.g. Ivy Bridge, Haswell, etc. 67 | 68 | Each architecture is also referred to a generation, which started counting with the Nehalem, so for example Ivy Bridge is the 3rd generation. 69 | 70 | To be really precise, you have to talk about the model ID for each processor Those are of the form: 71 | 72 | iA-BXXXL 73 | 74 | for example: 75 | 76 | i5-3210M 77 | 78 | where: 79 | 80 | - `A`: is a letter `i3`, `i5`, `i7` low, mid and high level marker 81 | - `B`: is usually the generation, e.g. 3 for Ivy Bridge. 82 | - `XXX`: further specifies the model 83 | - `L`: zero or more letters that further specify the type of the CPU, e.g. `M` is often used for mobile targeted CPUs 84 | -------------------------------------------------------------------------------- /intel-vs-atet.md: -------------------------------------------------------------------------------- 1 | # Intel vs AT&T syntax 2 | 3 | There is no standard for x86 assembly syntax notation. 4 | 5 | Two notations dominate: Intel and AT&T. 6 | 7 | Neither is a proper standard, but rather "implementation defined". 8 | 9 | ## Intel 10 | 11 | Used in Intel manuals, and likely Intel-made tools. 12 | 13 | More popular on Windows. 14 | 15 | Used by NASM and MASM. 16 | 17 | This tutorial uses Intel syntax by default because that is what most tutorials I've used to learn also use. But I ended up regretting that, because I'm a Linux groupie. 18 | 19 | ### AT&T 20 | 21 | More popular for Linux, since UNIX was created at Bell Labs. 22 | 23 | Implemented by GAS, the GNU assembler. 24 | 25 | Used by GCC by default, the Linux kernel, glibc... 26 | 27 | Advantages: 28 | 29 | - registers are clearly marked with `%`. More verbose, but more precise: allows to differentiate label `ax` from register `ax`. 30 | 31 | Downsides: 32 | 33 | - addressing is less self-documenting. Compare: 34 | 35 | [eax + 2*ebx + 4] 36 | 37 | with: 38 | 39 | 4(eax,ebx,2) 40 | 41 | - Labels are not consistently marked with `$`: it depends on the instruction. E.g.: 42 | 43 | mov $label, %eax 44 | 45 | but: 46 | 47 | jmp label 48 | 49 | instead of the uniform: 50 | 51 | jmp $label 52 | 53 | -------------------------------------------------------------------------------- /intel2gas.md: -------------------------------------------------------------------------------- 1 | # intel2gas 2 | 3 | Convert AT&T syntax to Gas. 4 | 5 | intel2gas a.asm 6 | 7 | Output the converted output to stdout. 8 | -------------------------------------------------------------------------------- /interactive/Makefile: -------------------------------------------------------------------------------- 1 | ../Makefile -------------------------------------------------------------------------------- /interactive/README.md: -------------------------------------------------------------------------------- 1 | # Interactive 2 | 3 | Examples that are meant to exit non zero, or take too long, or print control characters to your terminal, or do other evil things. 4 | 5 | 1. [int3](int3.asm) 6 | 1. [int 4](int4.asm) 7 | -------------------------------------------------------------------------------- /interactive/int3.asm: -------------------------------------------------------------------------------- 1 | ; # int3 2 | 3 | ; http://unix.stackexchange.com/questions/131044/dialog-trap-and-sigtrap 4 | 5 | %include "lib/common_nasm.inc" 6 | 7 | ENTRY 8 | int3 9 | ; 2 byte encoding. 10 | int 3 11 | EXIT 12 | -------------------------------------------------------------------------------- /interactive/int4.asm: -------------------------------------------------------------------------------- 1 | ; # int 4 2 | 3 | ; Mostly to compare with int 3. 4 | 5 | ; Segfaults. TODO why? 6 | 7 | %include "lib/common_nasm.inc" 8 | 9 | ENTRY 10 | int 4 11 | EXIT 12 | -------------------------------------------------------------------------------- /interactive/lib: -------------------------------------------------------------------------------- 1 | ../lib -------------------------------------------------------------------------------- /interactive/test: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | -------------------------------------------------------------------------------- /ld-linux-so.md: -------------------------------------------------------------------------------- 1 | # ld-linux.so 2 | 3 | TODO what does it do exactly. How is it called? By the kernel? 4 | 5 | man ld.so 6 | 7 | `/lib64/ld-linux-x86-64.so.2` is the usual 64-bit version. 8 | 9 | `ld-linux.so*` is the executable that does the dynamic loading for every executable. 10 | 11 | As such, it cannot have any dependencies. 12 | 13 | Its path is specified in the `.interp` section of ELF files, which Linux reads and uses to call `ld-linux`. 14 | 15 | The default on Ubuntu 14.04 is `/lib64/ld-linux-x86-64.so.2`. 16 | 17 | The program to be run is passed as an argument to `ld-linux`: 18 | 19 | /lib64/ld-linux-x86-64.so.2 a.out 20 | 21 | Then: 22 | 23 | man execve 24 | 25 | says that the path of the loader is stored in the elf file, and `readelf -a` shows a section devoted to it: 26 | 27 | INTERP 0x0000000000000238 0x0000000000400238 0x0000000000400238 28 | 0x000000000000001c 0x000000000000001c R 1 29 | [Requesting program interpreter: /lib64/ld-linux-x86-64.so.2] 30 | 31 | On Ubuntu 16.04, provided by the `libc6` package. 32 | 33 | ## LD_DEBUG 34 | 35 | TODO 36 | 37 | ## LD_LIBRARY_PATH 38 | 39 | You can also add to path with environment variables. 40 | 41 | Don't rely on this method for production. 42 | 43 | export LD_LIBRARY_PATH='/path/to/link' 44 | 45 | ## ld.so 46 | 47 | TODO what is it? 48 | 49 | ## ld.so.conf 50 | 51 | TODO. E.g. Ubuntu 16.04 mesa: 52 | 53 | /usr/lib/x86_64-linux-gnu/mesa/ld.so.conf 54 | 55 | ## /etc/ld.so.conf 56 | 57 | TODO. 58 | 59 | ## ldconfig 60 | 61 | TODO 62 | -------------------------------------------------------------------------------- /ld.md: -------------------------------------------------------------------------------- 1 | # ld 2 | 3 | 4 | 5 | Does the linking process, which converts object files into executables. 6 | 7 | Basic usage: 8 | 9 | nasm -o hello_world.o hello_world.asm 10 | ld -o hello_world.out hello_world.o 11 | ./hello_world 12 | 13 | ## Order of libraries passed 14 | 15 | The order matters for static libraries: 16 | 17 | - 18 | 19 | References undefined references of later libraries can only be resolved by the earlier ones. 20 | 21 | ### start-group 22 | 23 | ### end-group 24 | 25 | Those weird options act on the `-l` libraries that appear in between them, e.g.: 26 | 27 | ld --start-group lib1 lib2 lib3 --end-group 28 | 29 | With them, `ld` loops between the given libraries until all references have been resolved. 30 | 31 | This has a performance impact. 32 | 33 | ## Standard library 34 | 35 | If you program is compiled with `gcc`, it expects a few things from the linker which `ld` does not do by default: 36 | 37 | ld -dynamic-linker /lib64/ld-linux-x86-64.so.2 \ 38 | /usr/lib/x86_64-linux-gnu/crt1.o \ 39 | /usr/lib/x86_64-linux-gnu/crti.o \ 40 | -lc hello_world.o \ 41 | /usr/lib/x86_64-linux-gnu/crtn.o 42 | 43 | ## m 44 | 45 | ## Cross compile 46 | 47 | 48 | 49 | `-m `, where `` can be found with `ld -V`, sample output: 50 | 51 | elf_x86_64 52 | elf32_x86_64 53 | elf_i386 54 | i386linux 55 | elf_l1om 56 | elf_k1om 57 | i386pep 58 | i386pe 59 | 60 | TODO meaning of each type? 61 | 62 | ## verbose 63 | 64 | Print `ld` information, including the linker script used: 65 | 66 | ld --verbose 67 | 68 | ## e 69 | 70 | Sets the ELF entry point to a given symbol or address. 71 | 72 | This determines `e_entry` of the ELF header. 73 | 74 | You can set it to any address, but keep in mind that sections have alignment constraints. 75 | 76 | So for example if you something like: 77 | 78 | . = 0x4FFFFF; 79 | .text : 80 | { 81 | *(*) 82 | } 83 | 84 | and the page size is 2M, then your executable will be about 2M in size: 85 | 86 | - `.text` segment will go to `0x4FFFFF` 87 | - all bytes up to `0x4FFFFE` will be filled with zeroes 88 | - `e_entry` is set to `0x4FFFFF` 89 | 90 | ## Library and weak TODOs 91 | 92 | - 93 | - 94 | -------------------------------------------------------------------------------- /ldd.md: -------------------------------------------------------------------------------- 1 | # ldd 2 | 3 | Print information about the configuration of the dynamic loader `ld-linux.so`. 4 | 5 | List required shared libraries of an executable and if they can be found. 6 | 7 | `binutils` package. 8 | 9 | man ldd 10 | man ld.so 11 | 12 | Usage: 13 | 14 | ldd /bin/ls 15 | 16 | Sample output: 17 | 18 | linux-vdso.so.1 => (0x00007ffd3cbdc000) 19 | libselinux.so.1 => /lib/x86_64-linux-gnu/libselinux.so.1 (0x00007effc013d000) 20 | libacl.so.1 => /lib/x86_64-linux-gnu/libacl.so.1 (0x00007effbff35000) 21 | libc.so.6 => /lib/x86_64-linux-gnu/libc.so.6 (0x00007effbfb70000) 22 | libpcre.so.3 => /lib/x86_64-linux-gnu/libpcre.so.3 (0x00007effbf932000) 23 | libdl.so.2 => /lib/x86_64-linux-gnu/libdl.so.2 (0x00007effbf72e000) 24 | /lib64/ld-linux-x86-64.so.2 (0x00007effc0360000) 25 | libattr.so.1 => /lib/x86_64-linux-gnu/libattr.so.1 (0x00007effbf529000) 26 | 27 | Quick reminder: 28 | 29 | - `linux-vdso`: magically loaded by the kernel to make system calls faster 30 | - `libselinux`, `libacl`: access control stuff 31 | - `libc`: C standard library 32 | - `libpcre`: Perl Compatible Regular expressions. 33 | 34 | Possible outputs: 35 | 36 | - `Not a dynamic executable` 37 | - `liba.1.so => /lib/liba.1.so` 38 | - `liba.1.so => not found` 39 | 40 | ## vs readelf -d (NEEDED) 41 | 42 | I think `readelf` does not take into account dependencies of dependencies: 43 | 44 | - 45 | - 46 | 47 | For example: 48 | 49 | readelf -d /bin/ls | grep NEEDED 50 | 51 | Gives: 52 | 53 | 0x0000000000000001 (NEEDED) Shared library: [libselinux.so.1] 54 | 0x0000000000000001 (NEEDED) Shared library: [libacl.so.1] 55 | 0x0000000000000001 (NEEDED) Shared library: [libc.so.6] 56 | 57 | Then: 58 | 59 | readelf -d /lib/x86_64-linux-gnu/libselinux.so.1 | grep NEEDED 60 | 61 | Gives: 62 | 63 | 0x0000000000000001 (NEEDED) Shared library: [libpcre.so.3] 64 | 0x0000000000000001 (NEEDED) Shared library: [libdl.so.2] 65 | 0x0000000000000001 (NEEDED) Shared library: [libc.so.6] 66 | 0x0000000000000001 (NEEDED) Shared library: [ld-linux-x86-64.so.2] 67 | 68 | so this is where `libpcre` came from. 69 | 70 | ## Verbose 71 | 72 | ## Show linked symbols 73 | 74 | LD_DEBUG=bindings ldd -r /bin/date 75 | 76 | Shows a huge amount of debug information! 77 | 78 | But also consider `ld -y` and `info symbol &printf` inside GDB: 79 | -------------------------------------------------------------------------------- /lib/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all clean 2 | 3 | all: 4 | $(NASM) -o common$(OBJ_EXT) common.asm 5 | $(CC) $(CFLAGS) -o driver$(OBJ_EXT) -c driver.c 6 | 7 | clean: 8 | rm -f *$(OBJ_EXT) *$(OUT_EXT) 9 | -------------------------------------------------------------------------------- /lib/README.md: -------------------------------------------------------------------------------- 1 | # Lib 2 | 3 | Written by Paul A. Carter, with minor modifications. 4 | 5 | This is a simple library for learning purposes. 6 | 7 | Encapsulates system calls, making code OS portable. 8 | 9 | Interfaces with the C stdlib. So it is also an example of how to do so. 10 | -------------------------------------------------------------------------------- /lib/common.asm: -------------------------------------------------------------------------------- 1 | ; Helper functions. Gets linked with every executable. 2 | ; 3 | ; Vs macros on included headers: 4 | ; 5 | ; - less NASM / GAS specific, and helps with portability across assemblers 6 | ; - more debug friendly, as you can properly step into the functions and see source 7 | 8 | %define STRING_TERMINATOR 0 9 | %define NEWLINE 10 10 | 11 | extern printf, exit 12 | 13 | ; Print error message and exit program with status 1. 14 | ; 15 | ; Usually called with the `assert_fail` macro, 16 | ; which also prepares the line number. 17 | ; 18 | ; The line number must be set in a macro 19 | ; otherwise it would always point to this function. 20 | ; 21 | ; eax: line of the failure 22 | ; 23 | global assert_fail 24 | assert_fail: 25 | mov ebx, eax 26 | mov eax, .message 27 | call print_string 28 | mov eax, ebx 29 | call print_int 30 | ; Call libc exit with exit status 1: 31 | push dword 1 32 | call exit 33 | .message db 10, 'ASSERT FAILED AT LINE: ', 0 34 | 35 | ; Print 32 bit integer in decimal. 36 | global print_int 37 | print_int: 38 | ; Prologue. 39 | enter 0,0 40 | pusha 41 | pushf 42 | ; Body. 43 | push eax 44 | push dword .format 45 | call printf 46 | pop ecx 47 | pop ecx 48 | ; Epilogue. 49 | popf 50 | popa 51 | leave 52 | ret 53 | .format db "%i", NEWLINE, STRING_TERMINATOR 54 | 55 | ; Print a NUL terminated string. 56 | global print_string 57 | print_string: 58 | ; Prologue. 59 | enter 0,0 60 | pusha 61 | pushf 62 | ; Body. 63 | push eax 64 | push dword .format 65 | call printf 66 | pop ecx 67 | pop ecx 68 | ; Epilogue. 69 | popf 70 | popa 71 | leave 72 | ret 73 | .format db "%s", NEWLINE, STRING_TERMINATOR 74 | -------------------------------------------------------------------------------- /lib/common_gas.h: -------------------------------------------------------------------------------- 1 | /* GAS macros. 2 | * 3 | * Use cpp because GAS macros are too limted. 4 | */ 5 | 6 | #define ENTRY \ 7 | .text ;\ 8 | .global asm_main ;\ 9 | asm_main: \ 10 | enter $0, $0 11 | 12 | #define EXIT \ 13 | leave ;\ 14 | mov $0, %eax ;\ 15 | ret 16 | 17 | #define PRINT_INT(x) \ 18 | mov x, %eax ;\ 19 | call print_int 20 | 21 | #define PRINT_STRING(x) \ 22 | mov x, %eax ;\ 23 | call print_string 24 | 25 | /* Usage: 26 | * 27 | * PRINT_STRING_LITERAL("My string literal!") 28 | * 29 | * A trailing newline and zero terminator are automatically added. 30 | */ 31 | #define PRINT_STRING_LITERAL(x) \ 32 | .section .rodata ;\ 33 | 1: ;\ 34 | .asciz x ;\ 35 | .text ;\ 36 | PRINT_STRING($1b) 37 | 38 | /* Must be defined with `#define` because of __LINE__: 39 | * http://stackoverflow.com/questions/30958595/is-there-a-line-macro-for-gas-assembly-that-expands-to-the-current-source-li 40 | */ 41 | #define ASSERT_FAIL \ 42 | mov $__LINE__, %eax ;\ 43 | call assert_fail 44 | 45 | #define ASSERT_FLAG(flag) \ 46 | flag 1f ;\ 47 | ASSERT_FAIL ;\ 48 | 1: 49 | 50 | /* TODO define in terms of ASSERT_EQ_SIZE, need empty macro argument. */ 51 | #define ASSERT_EQ(x, y) \ 52 | cmp x, y ;\ 53 | je 1f ;\ 54 | ASSERT_FAIL ;\ 55 | 1: 56 | 57 | #define ASSERT_EQ_SIZE(x, y, size) \ 58 | cmp ## size x, y ;\ 59 | je 1f ;\ 60 | ASSERT_FAIL ;\ 61 | 1: 62 | -------------------------------------------------------------------------------- /lib/common_nasm.inc: -------------------------------------------------------------------------------- 1 | ; Extern definitions and macros to be included in NASM source files. 2 | ; 3 | ; Many macros are just helpers to call library functions more easily. 4 | ; Many of them set-up arguments, e.g. prepare EAX for a call. 5 | ; 6 | ; Macros here do not preserve registers as their contract. 7 | 8 | extern \ 9 | assert_fail, \ 10 | print_int, \ 11 | print_string 12 | 13 | ; Structure macros 14 | 15 | %macro TEXT 0 16 | section .text 17 | %endmacro 18 | 19 | %macro DATA 0 20 | section .data 21 | %endmacro 22 | 23 | %macro RODATA 0 24 | section .rodata 25 | %endmacro 26 | 27 | %macro GLOBAL 1 28 | global %1 29 | %1: 30 | %endmacro 31 | 32 | ; Enter program that uses the C driver. 33 | ; The entry function is `asm_main`. 34 | %macro ENTRY 0 35 | TEXT 36 | GLOBAL asm_main 37 | enter 0, 0 38 | %endmacro 39 | 40 | ; Exit program entered by `ENTRY`. 41 | %macro EXIT 0 42 | leave 43 | mov eax, 0 44 | ret 45 | %endmacro 46 | 47 | ; Print macros. 48 | 49 | %macro PRINT_INT 1 50 | mov eax, %1 51 | call print_int 52 | %endmacro 53 | 54 | %macro PRINT_STRING 1 55 | mov eax, %1 56 | call print_string 57 | %endmacro 58 | 59 | ; Usage: PRINT_STRING_LITERAL 'My string literal!' 60 | %macro PRINT_STRING_LITERAL 1 61 | RODATA 62 | %%str db %1, 0 63 | TEXT 64 | PRINT_STRING %%str 65 | %endmacro 66 | 67 | %macro dump_regs 1 68 | push dword %1 69 | call sub_dump_regs 70 | %endmacro 71 | 72 | ; Usage: dump_mem label, start-address, 73 | %macro dump_mem 3 74 | push dword %1 75 | push dword %2 76 | push dword %3 77 | call sub_dump_mem 78 | %endmacro 79 | 80 | %macro dump_math 1 81 | push dword %1 82 | call sub_dump_math 83 | %endmacro 84 | 85 | %macro dump_stack 3 86 | push dword %3 87 | push dword %2 88 | push dword %1 89 | call sub_dump_stack 90 | %endmacro 91 | 92 | ; Assert macros 93 | 94 | ; Assert eax eq %1 95 | %macro ASSERT_EQ 3 96 | pushf 97 | cmp %3 %1, %2 98 | je %%ok 99 | ASSERT_FAIL 100 | %%ok: 101 | popf 102 | %endmacro 103 | 104 | %macro ASSERT_EQ 2 105 | ASSERT_EQ %1, %2, %3 106 | %endmacro 107 | 108 | ; asserts eax neq %1 109 | %macro ASSERT_NEQ 2 110 | pushfd 111 | pusha 112 | cmp %1, %2 113 | jne %%ok 114 | ASSERT_FAIL 115 | %%ok: 116 | popa 117 | popfd 118 | %endmacro 119 | 120 | ; assert jX or jnX jumps 121 | %macro ASSERT_FLAG 1 122 | %1 %%ok 123 | ASSERT_FAIL 124 | %%ok: 125 | %endmacro 126 | 127 | %macro ASSERT_FAIL 0 128 | mov eax, __LINE__ 129 | call assert_fail 130 | %endmacro 131 | 132 | ; SIMD macros 133 | 134 | %macro ASSERT_EQ4 5 135 | ASSERT_EQ [%1], %2, dword 136 | ASSERT_EQ [%1 + 4], %3, dword 137 | ASSERT_EQ [%1 + 8], %4, dword 138 | ASSERT_EQ [%1 + 12], %5, dword 139 | %endmacro 140 | 141 | ; Move 4 given dwords into the given memory location. 142 | ; The memory location should normally be 8 dwords long. 143 | %macro MOV4 5 144 | mov dword [%1], %2 145 | mov dword [%1 + 4], %3 146 | mov dword [%1 + 8], %4 147 | mov dword [%1 + 12], %5 148 | %endmacro 149 | -------------------------------------------------------------------------------- /lib/driver.c: -------------------------------------------------------------------------------- 1 | #if defined(__GNUC__) 2 | # define PRE_CDECL 3 | # define POST_CDECL __attribute__((cdecl)) 4 | #else 5 | # define PRE_CDECL __cdecl 6 | # define POST_CDECL 7 | #endif 8 | 9 | /** Fix the calling convention so that we can implement it in assembly. */ 10 | int PRE_CDECL asm_main(void) POST_CDECL; 11 | 12 | int main(void) { 13 | return asm_main(); 14 | } 15 | -------------------------------------------------------------------------------- /linker-scripts/Makefile: -------------------------------------------------------------------------------- 1 | .POSIX: 2 | 3 | IN_EXT ?= .S 4 | LD_EXT ?= .ld 5 | LD_FLAGS ?= -T $(@:$(OUT_EXT)=.ld) 6 | # Use gcc so that the preprocessor will run first. 7 | OBJ_EXT ?= .o 8 | OUT_EXT ?= .out 9 | RUN ?= bios_hello_world 10 | 11 | OUTS := $(patsubst %,%$(OUT_EXT),$(basename $(wildcard *$(IN_EXT)))) 12 | 13 | .PRECIOUS: %$(OBJ_EXT) 14 | .PHONY: all clean run 15 | 16 | all: $(OUTS) 17 | 18 | %$(OUT_EXT): %$(OBJ_EXT) %$(LD_EXT) 19 | ld -m elf_i386 -o '$@' -T $(@:$(OUT_EXT)=$(LD_EXT)) '$<' 20 | 21 | %$(OBJ_EXT): %$(IN_EXT) common.h 22 | gcc -c -m32 -o '$@' '$<' 23 | 24 | clean: 25 | rm -f *$(OBJ_EXT) *$(OUT_EXT) 26 | 27 | test: all 28 | # Lazy. 29 | 30 | run: all 31 | qemu-system-i386 '$(RUN)$(OUT_EXT)' 32 | -------------------------------------------------------------------------------- /linker-scripts/README.md: -------------------------------------------------------------------------------- 1 | # Linker scripts 2 | 3 | 1. [Hello world](hello_world.ld) 4 | 1. [min](min.ld) 5 | 1. [Symbol](symbol.ld) 6 | 1. [edata](edata.ld) 7 | 1. [FLAGS](flags.ld) 8 | 1. [KEEP](keep.ld) 9 | 10 | ## Introduction 11 | 12 | This directory must be run in Linux IA-32. 13 | 14 | Linker scripts determine how exactly `ld` is going to transform object files into the executable. 15 | 16 | In particular, how the sections are going to me mashed up into segments. 17 | 18 | The default linker script operation is determined by the default linker scripts. 19 | 20 | Those are hardcoded into the `ld` executable. 21 | 22 | `make install` also generates an informational copy of the scripts at: 23 | 24 | x86_64-unknown-linux-gnu/lib/ldscripts 25 | 26 | which some distributions provide. But those are only informational. The usual location is: 27 | 28 | /usr/lib/ldscripts 29 | 30 | The actual script being used is determined by the `ld` options, and can be viewed with: 31 | 32 | ld --verbose 33 | 34 | which spits out the entire script, but not it's name. You can then match this output to the scripts under `/usr/lib/ldscripts`. 35 | 36 | Minimal example: TODO make into a repo. 37 | 38 | The main case where linker scripts are needed is when building more "exotic" executables, typically boot sectors or a multiboot file. E.g.: 39 | 40 | ## T 41 | 42 | Replace the linker script with a custom one: 43 | 44 | ld -T script a.o 45 | 46 | You will often use an existing script as the basis for this. 47 | 48 | ## Ttext-segment 49 | 50 | Change the address of some predefined segments: 51 | 52 | -Ttext-segment=0x80000 53 | 54 | The default linker script reads: 55 | 56 | . = SEGMENT_START("text-segment", 0x400000) 57 | 58 | which chooses takes the value passed, or 4M by default. 59 | 60 | ## Directives 61 | 62 | ### SECTIONS 63 | 64 | Most important directive: contains the section o segment mappings. 65 | 66 | #### Select by object file 67 | 68 | You can select sections by object file. 69 | 70 | For example, for a multiboot elf file, we must put the multiboot header before everything else. 71 | 72 | So if we had our header in the `.text` section of the `boot.o` file (bad practice, it would be better to have a specially named section), we could write: 73 | 74 | .text 0x100000 : 75 | { 76 | boot.o(.text) 77 | *(.text) 78 | } 79 | 80 | This way, `boot.o(.text)` is guaranteed to put `boot.o` first, and `*(.text)` then matches the others. 81 | 82 | ### BYTE 83 | 84 | ### SHORT 85 | 86 | ### LONG 87 | 88 | ### QUAD 89 | 90 | Used to insert raw bytes directly into the output. 91 | 92 | Application: insert boot sector magic bytes: https://github.com/cirosantilli/x86-bare-metal-examples/blob/2b79ac21df801fbf4619d009411be6b9cd10e6e0/a.ld#L14 93 | 94 | ### SQUAD 95 | 96 | Like `QUAD` but sign extends. 97 | 98 | ### Assignment 99 | 100 | #### PROVIDE 101 | 102 | 103 | 104 | Make provided symbols weak: if they are defined by the object files, that definition wins. 105 | 106 | Without `PROVIDE`, a multiple definition error occurs. 107 | 108 | E.g.: 109 | 110 | __symbol_weak = PROVIDE(.); 111 | __symbol = .; 112 | 113 | Note that: 114 | 115 | __symbol = .; 116 | 117 | sets the **address** of `__symbol`, relative to the current segment. `.` in this context is also relative to the current segment. 118 | 119 | To set the value of the symbol, you'd need: 120 | 121 | __symbol = .; 122 | DWORD 123 | 124 | Used on the default userland script for section position symbols like `etext` and `edata`: 125 | 126 | Those symbols are placed on the output right where they are defined, relative to other sections and symbols. 127 | 128 | #### ABSOLUTE 129 | 130 | TODO 131 | 132 | ### ALIGN 133 | 134 | Returns the address of the next multiple. 135 | 136 | Applications: 137 | 138 | Extend the output to be a multiple of 512 bytes: 139 | 140 | . = ALIGN(512) 141 | 142 | This can be useful to generate boot sectors with multiple stages. 143 | 144 | ## Segment flags 145 | 146 | ## Segment permissions 147 | 148 | Some segment names are magic: `.text` and `.data`, and will have fixed permissions no matter what. 149 | 150 | If you use a non-magic name lie `.blurb`, the permission is the sum of the permissions of all sections that make up the segment. 151 | 152 | It is also possible to set the segment permissions with the `FLAGS` option of the `PHDRS` linker script command. 153 | -------------------------------------------------------------------------------- /linker-scripts/TODO.md: -------------------------------------------------------------------------------- 1 | # TODO 2 | 3 | - Factor out examples in this repository. 4 | 5 | The major pain point is: 6 | 7 | - using the C standard library (through the ia-32 directory) requires GCC (TODO true?), which I cannot get to work without the default script 8 | - not using the C library loses all portability so I'd have to reimplement a bunch of things 9 | 10 | One possible alternative is to limit ourselves to two system calls only: write and exit, and to unit testing from an external script. 11 | -------------------------------------------------------------------------------- /linker-scripts/common.h: -------------------------------------------------------------------------------- 1 | #define GLOBAL(symbol) \ 2 | .global symbol;\ 3 | symbol: 4 | 5 | /* Create a non-zero terminated string and a symbol for it's length. */ 6 | #define STRING(symbol, value) \ 7 | symbol: \ 8 | .ascii value;\ 9 | symbol ## _len = . - symbol 10 | 11 | /* Exit syscall. */ 12 | #define EXIT(status) \ 13 | mov $1, %eax;\ 14 | mov status, %ebx;\ 15 | int $0x80 16 | 17 | /* 18 | Write syscall. 19 | 20 | The length of `string_address` should be stored in a symbol named 21 | `string_address ## _len`. 22 | 23 | - string_address: a symbol that points to the start of the string (without $) 24 | */ 25 | #define WRITE(string_address) \ 26 | mov $4, %eax;\ 27 | mov $1, %ebx;\ 28 | mov $string_address, %ecx;\ 29 | mov $string_address ## _len, %edx;\ 30 | int $0x80 31 | -------------------------------------------------------------------------------- /linker-scripts/data.S: -------------------------------------------------------------------------------- 1 | .section .text 2 | movb $0, x 3 | mov $1, %eax 4 | mov $0, %ebx 5 | mov x, %bl 6 | int $0x80 7 | .section .data 8 | x: .byte 1 9 | -------------------------------------------------------------------------------- /linker-scripts/data.ld: -------------------------------------------------------------------------------- 1 | /* 2 | Make a writable data segment. 3 | */ 4 | SECTIONS 5 | { 6 | . = 0x400000; 7 | .text : 8 | { 9 | *(.text) 10 | } 11 | .data : 12 | { 13 | *(.data) 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /linker-scripts/edata.S: -------------------------------------------------------------------------------- 1 | .section .text 2 | mov $1, %eax 3 | mov sdata, %ebx 4 | int $0x80 5 | .section .data 6 | .byte 1 7 | -------------------------------------------------------------------------------- /linker-scripts/edata.ld: -------------------------------------------------------------------------------- 1 | /* 2 | To answer: 3 | http://stackoverflow.com/questions/1765969/where-are-the-symbols-etext-edata-and-end-defined 4 | */ 5 | SECTIONS 6 | { 7 | . = 0x400000; 8 | .text : 9 | { 10 | *(.text) 11 | sdata = .; 12 | *(.data) 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /linker-scripts/flags.S: -------------------------------------------------------------------------------- 1 | /* 2 | This is also minimal as it uses the fact that 3 | the default section is `.text`. 4 | */ 5 | 6 | #include "common.h" 7 | .section .mytext, "a" 8 | GLOBAL(_start) 9 | EXIT($0) 10 | -------------------------------------------------------------------------------- /linker-scripts/flags.ld: -------------------------------------------------------------------------------- 1 | /* 2 | How to alter segment flags (permissions). 3 | 4 | Expected outcome: segfault, because the segment is not executable. 5 | */ 6 | ENTRY(_start) 7 | PHDRS 8 | { 9 | /* 10 | The FLAGS number is the same as the 777 filesystem 11 | permission bits: RWX. 12 | 13 | Try changing this to 2 to get a segfaut. 14 | 15 | TODO why does it not segfault with just 4 (R)? 16 | */ 17 | .mytext PT_LOAD FLAGS(5); 18 | } 19 | SECTIONS 20 | { 21 | . = 0x400000; 22 | .mytext : { 23 | *(*) 24 | } :.mytext 25 | } 26 | -------------------------------------------------------------------------------- /linker-scripts/hello_world.S: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | .section .mydata, "a" 3 | STRING(s, "hello world\n") 4 | .section .mytext, "ax" 5 | .global _start 6 | _start: 7 | WRITE(s) 8 | EXIT($0) 9 | -------------------------------------------------------------------------------- /linker-scripts/hello_world.ld: -------------------------------------------------------------------------------- 1 | /* 2 | Small very explicit example. 3 | 4 | Expected output: `hello world\n` 5 | 6 | We could make this much much smaller by using default names, 7 | but this contains the minimum ammount of detail 8 | that you should know about to get started. 9 | */ 10 | 11 | /* 12 | If start is not present, a series of defaults are used: 13 | 14 | - the value of a target specific symbol, if it is defined. For many targets this is start. 15 | - the address of the first byte of the `.text' section, if present 16 | - addres 0 17 | */ 18 | ENTRY(_start); 19 | 20 | /* Program headers, each of which specifies a segment. */ 21 | PHDRS 22 | { 23 | /* 24 | If we don't set the flags here, 25 | the permissions are the minimum possible for the sections inside this segment.. 26 | */ 27 | mytext_segment PT_LOAD; 28 | mydata_segment PT_LOAD; 29 | } 30 | 31 | SECTIONS 32 | { 33 | /* 34 | Set where the text segment will be loaded into memory by Linux. 35 | 36 | Linux segfaults us if the .text is too low. 37 | 38 | If this were not present, it would be 0, and that is too low and segfaults. See also: 39 | 40 | - http://stackoverflow.com/questions/2187484/why-is-the-elf-execution-entry-point-virtual-address-of-the-form-0x80xxxxx-and-n 41 | - http://stackoverflow.com/questions/2966426/why-do-virtual-memory-addresses-for-linux-binaries-start-at-0x8048000?lq=1 42 | */ 43 | . = 0x400000; 44 | 45 | /* Create an output section. */ 46 | .mytext_output_section : 47 | { 48 | /* Crump every `.mytext` section from every object file (`*`) into here. */ 49 | *(.mytext) 50 | /* If we don't set a segment here, `.text` segment is the default. */ 51 | } :mytext_segment 52 | 53 | . = 0x600000; 54 | .mydata_output_section : 55 | { 56 | *(.mydata) 57 | } :mydata_segment 58 | } 59 | -------------------------------------------------------------------------------- /linker-scripts/keep.S: -------------------------------------------------------------------------------- 1 | .section .text 2 | .global _start 3 | _start: 4 | /* Dummy access so that after will be referenced and kept. */ 5 | mov after, %eax 6 | /*mov keep, %eax*/ 7 | /* Exit system call. */ 8 | mov $1, %eax 9 | /* Take the exit status 4 bytes after before. */ 10 | mov $4, %ebx 11 | mov before(%ebx), %ebx 12 | int $0x80 13 | .section .before 14 | before: .long 0 15 | .section .keep, "a" 16 | keep: .long 1 17 | .section .after 18 | after: .long 2 19 | -------------------------------------------------------------------------------- /linker-scripts/keep.ld: -------------------------------------------------------------------------------- 1 | /* 2 | http://stackoverflow.com/questions/9827157/what-does-keep-mean-in-a-linker-script 3 | */ 4 | ENTRY(_start) 5 | SECTIONS 6 | { 7 | . = 0x400000; 8 | .text : 9 | { 10 | *(.text) 11 | *(.before) 12 | /*KEEP(*(.keep));*/ 13 | *(.keep) 14 | *(.after) 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /linker-scripts/min.S: -------------------------------------------------------------------------------- 1 | /* 2 | This is also minimal as it uses the fact that 3 | the default section is `.text`. 4 | */ 5 | 6 | #include "common.h" 7 | EXIT($0) 8 | -------------------------------------------------------------------------------- /linker-scripts/min.ld: -------------------------------------------------------------------------------- 1 | /* 2 | Minimal linker script possible. 3 | 4 | Expected outcome: program exit status is 0. 5 | 6 | Several defaults are used: 7 | 8 | - entry on `.text` segment 9 | - put every section that does not match any rule into the `.text` segment 10 | */ 11 | SECTIONS 12 | { 13 | /* If we don't use this, it gets set to 0 and segfaults. */ 14 | . = 0x400000; 15 | } 16 | -------------------------------------------------------------------------------- /linker-scripts/symbol.S: -------------------------------------------------------------------------------- 1 | mov $4, %eax 2 | mov $1, %ebx 3 | mov $s, %ecx 4 | mov s_len, %edx 5 | int $0x80 6 | 7 | mov $1, %eax 8 | mov $0, %ebx 9 | int $0x80 10 | -------------------------------------------------------------------------------- /linker-scripts/symbol.ld: -------------------------------------------------------------------------------- 1 | /* 2 | Define symbols and give them values on the linker script. 3 | 4 | Expected output: `hello world\n` 5 | 6 | Used on many default linker scripts to say where sections start and end: 7 | http://stackoverflow.com/questions/1765969/where-are-the-symbols-etext-edata-and-end-defined 8 | */ 9 | SECTIONS 10 | { 11 | . = 0x400000; 12 | .text : 13 | { 14 | *(*) 15 | 16 | /* 17 | This defines the POSITION of s RELATIVE TO THE CURRENT SECTION, 18 | *not* it's value. 19 | 20 | This works because the value of the `.` (location counter) 21 | is also relative to the current segment: 22 | https://sourceware.org/binutils/docs/ld/Location-Counter.html 23 | */ 24 | s = .; 25 | 26 | /* 27 | Immediately after comes the value. 28 | 29 | Obtained from: printf 'hello world' | rev | hd 30 | */ 31 | QUAD(0x6f77206f6c6c6568) 32 | LONG(0x0a646c72) 33 | 34 | /* 35 | The name `len` cannot be used, because it is a keyword! 36 | https://sourceware.org/binutils/docs/ld/MEMORY.html 37 | TODO possible to escape it? 38 | */ 39 | s_len = .; 40 | LONG(. - s) 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /linux/Makefile: -------------------------------------------------------------------------------- 1 | IN_EXT ?= .asm 2 | OUT_EXT ?= .out 3 | TMP_EXT ?= .o 4 | RUN ?= hello_world 5 | 6 | INS := $(wildcard *$(IN_EXT)) 7 | OUTS := $(patsubst %$(IN_EXT),%$(OUT_EXT),$(INS)) 8 | 9 | .PRECIOUS: %$(TMP_EXT) 10 | .PHONY: all clean run test 11 | 12 | all: $(OUTS) 13 | 14 | %$(OUT_EXT): $(OUT_DIR)%$(TMP_EXT) 15 | ld -m elf_i386 -o '$@' '$<' 16 | 17 | %.o: %.asm 18 | nasm -w+all -f elf32 -g -o '$@' '$<' 19 | 20 | clean: 21 | rm -f *$(TMP_EXT) *$(OUT_EXT) 22 | 23 | run: all 24 | ./$(RUN)$(OUT_EXT) 25 | 26 | test: all 27 | for f in *$(OUT_EXT); do echo $$f; ./$$f; done 28 | -------------------------------------------------------------------------------- /linux/README.md: -------------------------------------------------------------------------------- 1 | # Linux 2 | 3 | ## Introduction 4 | 5 | Linux-specific IA-32 examples without external libraries like libc. 6 | 7 | Use a libc for OS-portable code instead. 8 | 9 | You can also compile and run this in Linux x64-64, which is able to run 32-bit. TODO: check that. Seems to work. 10 | 11 | This probably works because Linux x86-64 uses compatibility mode to run the instructions, and the old IA-32 `int0x80` system calls work directly on `x86-64`. 12 | 13 | When using external libraries like libc, you also have to take care to link to the right version of the library. 14 | 15 | ## System calls 16 | 17 | We mostly use `int 0x80` system calls here for their simplicity. 18 | 19 | However, the best way to do system calls in IA-32 Linux is through `sysenter` as it is faster. But user applications don't use `sysenter` directly, but rather `VDSO` should be used. 20 | 21 | ## VDSO 22 | 23 | ## vsyscall 24 | 25 | The kernel just maps and maintains a magic memory area to every process which contains fast versions of certain functions. 26 | 27 | 28 | -------------------------------------------------------------------------------- /linux/c-from-assembly/README.md: -------------------------------------------------------------------------------- 1 | # C from assembly 2 | 3 | Call the C standard library from assembly. 4 | 5 | This is not OS portable because ANSI C does not specify the ABI. 6 | 7 | Things which may vary include: 8 | 9 | - calling conventions 10 | - method names. E.g. Watcom uses `puts_` instead of `puts` 11 | 12 | The only way to deal with that is by treating each implementation differently with macros. 13 | 14 | The following methods are covered: 15 | 16 | - `main/`: `main` function in assembly 17 | - `start/`: `_start` function in assembly, and link with `ld` 18 | - `start-gcc/`: `_start` function in assembly, and link with `gcc` 19 | -------------------------------------------------------------------------------- /linux/c-from-assembly/main/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: clean test 2 | 3 | main.out: main.asm 4 | nasm -w+all -f elf32 -o main.o main.asm 5 | gcc -m32 -o main.out main.o 6 | 7 | clean: 8 | rm -f *.o *.out 9 | 10 | test: main.out 11 | ./main.out 12 | -------------------------------------------------------------------------------- /linux/c-from-assembly/main/README.md: -------------------------------------------------------------------------------- 1 | # C from assembly 2 | 3 | Call C from assembly. 4 | 5 | This is not portable because ANSI C does not specify the ABI. 6 | 7 | Things which may vary include: 8 | 9 | - calling conventions 10 | - method names. E.g. Watcom uses `puts_` instead of `puts` 11 | 12 | The only way to deal with that is by treating each implementation differently with macros. 13 | -------------------------------------------------------------------------------- /linux/c-from-assembly/main/main.asm: -------------------------------------------------------------------------------- 1 | extern puts, exit 2 | section .data 3 | hello_world db "hello world", 0 4 | section .text 5 | global main 6 | main: 7 | push hello_world 8 | call puts 9 | pop eax 10 | 11 | push 0 12 | call exit 13 | -------------------------------------------------------------------------------- /linux/c-from-assembly/start-gcc/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: clean test 2 | 3 | main.out: main.asm 4 | nasm -w+all -f elf32 -o main.o main.asm 5 | # -nostartfiles is the key here, or else we will have two _start symbols: 6 | # one on main.asm, and another on crt1.o. 7 | gcc -nostartfiles -m32 -o main.out main.o 8 | 9 | clean: 10 | rm -f *.o *.out 11 | 12 | test: main.out 13 | ./main.out 14 | -------------------------------------------------------------------------------- /linux/c-from-assembly/start-gcc/main.asm: -------------------------------------------------------------------------------- 1 | extern puts, exit 2 | section .data 3 | hello_world db "hello world", 0 4 | section .text 5 | global _start 6 | _start: 7 | push hello_world 8 | call puts 9 | pop eax 10 | push 0 11 | call exit 12 | -------------------------------------------------------------------------------- /linux/c-from-assembly/start/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: clean test 2 | 3 | main.out: main.asm 4 | nasm -w+all -f elf32 -o main.o main.asm 5 | # -dynamic-linker is put into the .interp section 6 | # so that the stdlib can be found. 7 | # 8 | # gcc would fail here because it requires a `main` method 9 | # which is called from crt1.o 10 | ld -m elf_i386 -dynamic-linker /lib/ld-linux.so.2 -o main.out -lc main.o 11 | 12 | clean: 13 | rm -f *.o *.out 14 | 15 | test: main.out 16 | ./main.out 17 | -------------------------------------------------------------------------------- /linux/c-from-assembly/start/main.asm: -------------------------------------------------------------------------------- 1 | extern puts, exit 2 | section .data 3 | hello_world db "hello world", 0 4 | section .text 5 | global _start 6 | _start: 7 | push hello_world 8 | call puts 9 | pop eax 10 | push 0 11 | call exit 12 | -------------------------------------------------------------------------------- /linux/custom-entry-gcc/Makefile: -------------------------------------------------------------------------------- 1 | .POSIX: 2 | .PHONY: clean run 3 | 4 | main.out: main.c 5 | gcc -nostartfiles -Wl,-etest -o '$@' '$<' 6 | 7 | clean: 8 | rm -f main.out 9 | 10 | test: main.out 11 | ./main.out 12 | -------------------------------------------------------------------------------- /linux/custom-entry-gcc/README.md: -------------------------------------------------------------------------------- 1 | # Custom entry GCC 2 | 3 | Custom entry point in C file different than `main`. 4 | 5 | 6 | -------------------------------------------------------------------------------- /linux/custom-entry-gcc/main.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | extern char **environ; 5 | 6 | int test(int argc, char **argv) { 7 | /* Trash. */ 8 | printf("argc = %d\n", argc); 9 | /* Setup. */ 10 | printf("environ[0] = %s\n", environ[0]); 11 | exit(0); 12 | } 13 | -------------------------------------------------------------------------------- /linux/custom-entry/Makefile: -------------------------------------------------------------------------------- 1 | IN_EXT ?= .asm 2 | OUT_EXT ?= .out 3 | TMP_EXT ?= .o 4 | RUN ?= hello_world 5 | 6 | INS := $(wildcard *$(IN_EXT)) 7 | OUTS := $(patsubst %$(IN_EXT),%$(OUT_EXT),$(INS)) 8 | 9 | .PRECIOUS: %$(TMP_EXT) 10 | .PHONY: all clean run 11 | 12 | all: $(OUTS) 13 | 14 | %$(OUT_EXT): $(OUT_DIR)%$(TMP_EXT) 15 | ld --verbose -e _custom -m elf_i386 -o '$@' '$<' 16 | 17 | %.o: %.asm 18 | nasm -w+all -f elf32 -o '$@' '$<' 19 | 20 | clean: 21 | rm -f *$(TMP_EXT) *$(OUT_EXT) 22 | 23 | run: all 24 | ./$(RUN)$(OUT_EXT) 25 | -------------------------------------------------------------------------------- /linux/custom-entry/hello_world.asm: -------------------------------------------------------------------------------- 1 | section .data 2 | hello_world db "hello world", 10 3 | hello_world_len equ $ - hello_world 4 | section .text 5 | global _custom 6 | _custom: 7 | mov eax, 4 8 | mov ebx, 1 9 | mov ecx, hello_world 10 | mov edx, hello_world_len 11 | int 0x80 12 | mov eax, 1 13 | mov ebx, 0 14 | int 0x80 15 | -------------------------------------------------------------------------------- /linux/hello_world.asm: -------------------------------------------------------------------------------- 1 | section .rodata 2 | hello_world db "hello world", 10 3 | hello_world_len equ $ - hello_world 4 | section .text 5 | global _start 6 | _start: 7 | mov eax, 4 8 | mov ebx, 1 9 | mov ecx, hello_world 10 | mov edx, hello_world_len 11 | int 0x80 12 | mov eax, 1 13 | mov ebx, 0 14 | int 0x80 15 | -------------------------------------------------------------------------------- /linux/hello_world_min.asm: -------------------------------------------------------------------------------- 1 | ; Minimized hello world. 2 | ; 3 | ; - put the string directly at the end of the `.text` section. Generates ugly `objdump`, but works fine. 4 | ; - remove explicit `section .text` as NASM defaults to it when no section is specified 5 | ; 6 | ; We could also: 7 | ; 8 | ; - remove `_start` as `ld` defaults to `.text` when the `ENTRY` symbol is not specified 9 | ; 10 | ; But that would generate warnings. 11 | 12 | global _start 13 | _start: 14 | mov eax, 4 15 | mov ebx, 1 16 | mov ecx, hello_world 17 | mov edx, hello_world_len 18 | int 0x80 19 | mov eax, 1 20 | mov ebx, 0 21 | int 0x80 22 | hello_world db "hello world", 10 23 | hello_world_len equ $ - hello_world 24 | -------------------------------------------------------------------------------- /linux/interactive/Makefile: -------------------------------------------------------------------------------- 1 | ../Makefile -------------------------------------------------------------------------------- /linux/interactive/memory_layout.asm: -------------------------------------------------------------------------------- 1 | ; See what `esp` is worth at initialization. 2 | ; 3 | ; Pass the output of this through `hd` to see it. 4 | ; 5 | ; Expected outcome: two ints, 4 bytes apart, close to 4Gb. 6 | ; 7 | ; A little less than 4Gb and random because of ASLR. 8 | ; 9 | ; The kernel sets esp to it when the program is started: 10 | ; this is how it communicates it to the application. 11 | ; 12 | ; This is specified by the AMD64 ABI. 13 | 14 | %macro print_address 1 15 | mov eax, 4 16 | mov ebx, 1 17 | mov ecx, %1 18 | ; Little endian is easier for humans to read. 19 | bswap ecx 20 | mov [resd0], ecx 21 | mov ecx, resd0 22 | mov edx, 4 23 | int 80h 24 | %endmacro 25 | 26 | section .bss 27 | 28 | resd0 resd 1 29 | 30 | section .text 31 | 32 | global _start 33 | _start: 34 | 35 | print_address _start 36 | 37 | print_address esp 38 | push 1 39 | print_address esp 40 | 41 | mov eax, 1 42 | mov ebx, 0 43 | int 0x80 44 | -------------------------------------------------------------------------------- /linux/interactive/no_exit.asm: -------------------------------------------------------------------------------- 1 | ; Without the exist system call, the instruction pointer 2 | ; just keeps increating until segfault. 3 | section .data 4 | hello_world db "Hello world!", 10 5 | hello_world_len equ $ - hello_world 6 | section .text 7 | global _start 8 | _start: 9 | mov eax, 4 10 | mov ebx, 1 11 | mov ecx, hello_world 12 | mov edx, hello_world_len 13 | int 80h 14 | -------------------------------------------------------------------------------- /linux/min.asm: -------------------------------------------------------------------------------- 1 | section .text 2 | global _start 3 | _start: 4 | mov eax, 1 5 | mov ebx, 0 6 | int 0x80 7 | -------------------------------------------------------------------------------- /linux/stack_top.asm: -------------------------------------------------------------------------------- 1 | ; See what `esp` is worth at initialization. 2 | ; 3 | ; Pass the output of this through hd to see it. 4 | ; 5 | ; Expected outcome: two ints, 4 bytes apart, close to 4Gb. 6 | ; 7 | ; A little less than 4Gb and random because of ASLR. 8 | ; 9 | ; The kernel sets esp to it when the program is started: 10 | ; this is how it communicates it to the application. 11 | ; 12 | ; This is specified by the AMD64 ABI. 13 | 14 | %macro print_address 1 15 | mov eax, 4 16 | mov ebx, 1 17 | mov ecx, %1 18 | bswap ecx 19 | mov [resd0], ecx 20 | mov ecx, resd0 21 | mov edx, 4 22 | int 80h 23 | %endmacro 24 | 25 | section .bss 26 | 27 | resd0 resd 1 28 | 29 | section .text 30 | 31 | global _start 32 | _start: 33 | 34 | print_address esp 35 | push 1 36 | print_address esp 37 | 38 | print_address _start 39 | 40 | mov eax, 1 41 | mov ebx, 0 42 | int 0x80 43 | -------------------------------------------------------------------------------- /linux/system_call.md: -------------------------------------------------------------------------------- 1 | # System call 2 | 3 | 4 | 5 | The following methods are possible, in order of decreasing preference: 6 | 7 | - VDSO. Used by GCC by default. 8 | - `sysenter` 9 | - `int 0x80` 10 | -------------------------------------------------------------------------------- /microcode.md: -------------------------------------------------------------------------------- 1 | # Microcode 2 | 3 | Intel processors have a large front-end that decodes every instruction to a simpler internal undocumented and unaccessible instruction set. 4 | 5 | The undocumented internal instructions are also known as microcode. 6 | 7 | The internal microcode interpreter is known as the RISC core. 8 | 9 | It also seems that microcode can be changed with firmware updates. 10 | 11 | - 12 | - 13 | -------------------------------------------------------------------------------- /microcontrollers.md: -------------------------------------------------------------------------------- 1 | # Microcontrollers 2 | 3 | ## Vs microprocessor 4 | 5 | There is a spectrum, so difficult to say. 6 | 7 | Microcontrollers are small SoCs... memory is inside chip. 8 | 9 | They have IO dedicated pins. 10 | 11 | TODO: response time is smaller. 12 | 13 | TODO: most have predictable instruction times, and are better for hard real time. 14 | 15 | TODO: the latency between signal and result is smaller 16 | 17 | TODO: often programmed to run on bare metal, while microprocessors are more often programmed to run on top of an OS (e.g. Linux). But there are also OSes for microcontrollers, mostly real time ones: , e.g. QNX. Those OSes can even implement the POSIX C API. 18 | 19 | Microcontrollers can have either internal or external clock sources: TODO CPUs? 20 | 21 | 22 | -------------------------------------------------------------------------------- /nasm/Makefile: -------------------------------------------------------------------------------- 1 | ../Makefile -------------------------------------------------------------------------------- /nasm/comment.asm: -------------------------------------------------------------------------------- 1 | ; # Comments 2 | 3 | ; Single line with semicolon `;` 4 | 5 | ; Multi-line: TODO `%comment` was added but then removed? 6 | 7 | ;%comment 8 | ;Any thing. 9 | ;%endcomment 10 | 11 | %include "lib/common_nasm.inc" 12 | 13 | ENTRY 14 | mov eax, 1 15 | ; mov eax, 2 16 | ASSERT_EQ eax, 1 17 | EXIT 18 | -------------------------------------------------------------------------------- /nasm/current_address.asm: -------------------------------------------------------------------------------- 1 | ; # Current address 2 | 3 | ; # $ 4 | 5 | %include "lib/common_nasm.inc" 6 | 7 | ENTRY 8 | 9 | PRINT_INT $ 10 | PRINT_INT $ 11 | mov eax, $ + 1 12 | 13 | ; Infinite loop. 14 | ;jmp $ 15 | 16 | EXIT 17 | -------------------------------------------------------------------------------- /nasm/define.asm: -------------------------------------------------------------------------------- 1 | # define 2 | 3 | %include "lib/common_nasm.inc" 4 | 5 | ENTRY 6 | %define SIZE 10 7 | mov eax, SIZE 8 | ASSERT_EQ eax, 10 9 | EXIT 10 | -------------------------------------------------------------------------------- /nasm/equ.asm: -------------------------------------------------------------------------------- 1 | ; # equ 2 | 3 | ; http://stackoverflow.com/a/33148242/895245 4 | 5 | %include "lib/common_nasm.inc" 6 | 7 | DATA 8 | global x 9 | x: equ 1 + 1 10 | y: dd 1 + 1 11 | ENTRY 12 | mov eax, x 13 | ASSERT_EQ eax, 2 14 | mov eax, [y] 15 | ASSERT_EQ eax, 2 16 | EXIT 17 | -------------------------------------------------------------------------------- /nasm/if.asm: -------------------------------------------------------------------------------- 1 | %include "lib/common_nasm.inc" 2 | 3 | ENTRY 4 | %if 0 5 | ASSERT_FAIL 6 | %endif 7 | EXIT 8 | -------------------------------------------------------------------------------- /nasm/include.asm: -------------------------------------------------------------------------------- 1 | ; # include 2 | 3 | ; Same as C #include: copy pastes file data. 4 | 5 | ; The common standard extension is `.inc`. 6 | 7 | %include "lib/common_nasm.inc" 8 | 9 | ENTRY 10 | mov eax, 1 11 | %include "included.inc" 12 | ASSERT_EQ eax, 2 13 | EXIT 14 | -------------------------------------------------------------------------------- /nasm/included.inc: -------------------------------------------------------------------------------- 1 | mov eax, 2 2 | -------------------------------------------------------------------------------- /nasm/introduction.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | 3 | Netwide assembler. TODO what does Netwide mean? 4 | 5 | One of the most popular for Linux. 6 | 7 | Open source. 8 | 9 | Intel like syntax. 10 | 11 | Input formats: 12 | 13 | - `.asm`, Intel like syntax 14 | 15 | Most useful command line options: 16 | 17 | - `-w+all`: enable all warnings 18 | 19 | ## Downsides 20 | 21 | NASM is way too gentle on input validation, even on `-w+all`. 22 | 23 | In particular, anything unknown is happily treated as label, even if it does not have colon `:`. 24 | 25 | ## f 26 | 27 | Output format: 28 | 29 | nasm -f elf32 main.asm 30 | 31 | List all formats: 32 | 33 | nasm -hf 34 | 35 | You should always use this flag. 36 | 37 | The default format of a default compilation (and that of Ubuntu 14.04) is `bin`, which generates raw instructions without any ELF metadata. 38 | 39 | is not very useful 40 | 41 | There seems to be no way to automatically decide a better choice: the default is a compilation option. 42 | 43 | The following formats work: 44 | 45 | - `elf32`: Linux 32-bit 46 | - `elf64`: Linux 64-bit 47 | - `bin`: raw 16-bit. Useful to create boot setors. 32-bit memory addresses are encodable with the `66h` prefix. 48 | 49 | ## Source code 50 | 51 | git clone git://repo.or.cz/nasm.git 52 | sh autogen.sh 53 | ./configure 54 | make 55 | make install 56 | 57 | -------------------------------------------------------------------------------- /nasm/lib: -------------------------------------------------------------------------------- 1 | ../lib -------------------------------------------------------------------------------- /nasm/local_labels.asm: -------------------------------------------------------------------------------- 1 | ; # Local labels 2 | 3 | ; http://www.nasm.us/doc/nasmdoc3.html#section-3.9 4 | 5 | ; Labels that start with a period `.` get the previous non-local label 6 | ; prepended to them, which may give them uniqueness. 7 | 8 | ; Those are still present on the output `.o` however. 9 | 10 | ; GAS has a related convention which hides `.L` prefixed labels 11 | ; completely from the output `.o`. 12 | 13 | %include "lib/common_nasm.inc" 14 | 15 | ENTRY 16 | outside_label: 17 | 18 | ; This should be not done in practice, 19 | ; but shows how it works under the hood. 20 | jmp outside_label.inside_label 21 | ASSERT_FAIL 22 | .inside_label: 23 | 24 | ; This is what you should do in practice. 25 | ; Labels also get appended when used as arguments. 26 | jmp .inside_label2 27 | ASSERT_FAIL 28 | .inside_label2: 29 | 30 | EXIT 31 | -------------------------------------------------------------------------------- /nasm/ptr.asm: -------------------------------------------------------------------------------- 1 | ; http://stackoverflow.com/questions/13790146/x86-difference-between-byte-and-byte-ptr 2 | ; 3 | ; It's just a syntax error for NASM. 4 | 5 | %include "lib/common_nasm.inc" 6 | 7 | ENTRY 8 | ; mov byte [ecx], 0 9 | EXIT 10 | -------------------------------------------------------------------------------- /nasm/ram.asm: -------------------------------------------------------------------------------- 1 | ; Examples of how to access RAM in NASM. 2 | 3 | %include "lib/common_nasm.inc" 4 | 5 | section .bss 6 | 7 | resd0 resd 1 8 | resd1 resd 1 9 | 10 | ENTRY 11 | 12 | ; Square brackets `[]` are needed to access the *data*, 13 | ; otherwise the address of the label is understood: 14 | 15 | ; Data: 16 | 17 | mov dword [resd0], 1 18 | ASSERT_EQ [resd0], 1, dword 19 | 20 | ; Address: 21 | 22 | mov dword eax, resd1 23 | sub eax, resd0 24 | ASSERT_EQ eax, 4 25 | 26 | ; # Memory size 27 | 28 | ; TODO examples. 29 | 30 | ; When operations involve registers it is not necessary to specify 31 | ; the number of bytes to move around since that can be deduced from the 32 | ; register form: `eax` = 4 bytes, `ax` == 2 bytes, etc. 33 | 34 | ; But when moving or comparing literals to RAM 35 | ; it is mandatory to specify the number of bytes to move. 36 | 37 | ; Error: size not specified. 38 | ;mov [resd0], 1 39 | 40 | ; NASM knows it is 4 bytes because we are using eax: 41 | 42 | mov eax, 1 43 | mov [resd0], eax 44 | 45 | ; List of all size keywords: 46 | ; http://stackoverflow.com/questions/12063840/what-are-the-sizes-of-tword-oword-and-yword-operands 47 | 48 | ; # byte 49 | 50 | ; 1 51 | 52 | ; # word 53 | 54 | ; 2 55 | 56 | ; # dword 57 | 58 | ; 4 59 | 60 | ; Double word. 61 | 62 | ; # qword 63 | 64 | ; 8 65 | 66 | ; Quadruple word. 67 | 68 | ; Only possible in 64-bit. 69 | 70 | ; # tword 71 | 72 | ; 10 73 | 74 | ; Ten. 75 | 76 | ; Used for x87 floating point formats. TODO example 77 | 78 | ; # oword 79 | 80 | ; 16 81 | 82 | ; TODO example 83 | 84 | ; # yword 85 | 86 | ; 32 87 | 88 | ; # zword 89 | 90 | ; 64 91 | 92 | EXIT 93 | -------------------------------------------------------------------------------- /nasm/symbol_colon.asm: -------------------------------------------------------------------------------- 1 | ; The colon after a simbol name is optional in NASM. 2 | ; 3 | ; http://stackoverflow.com/questions/8006365/is-there-a-difference-between-using-or-not-a-colon-after-symbol-name-definitions 4 | 5 | %include "lib/common_nasm.inc" 6 | 7 | DATA 8 | ; With colon. 9 | x: db 1 10 | ; Without colon. 11 | y db 2 12 | ENTRY 13 | mov eax, 0 14 | mov al, [x] 15 | ASSERT_EQ eax, 1 16 | mov al, [y] 17 | ASSERT_EQ eax, 2 18 | EXIT 19 | -------------------------------------------------------------------------------- /objcopy.md: -------------------------------------------------------------------------------- 1 | # objcopy 2 | 3 | ## Extract content from a section 4 | 5 | 6 | 7 | ## Separate debug information from executable 8 | 9 | 10 | 11 | objcopy --only-keep-debug "${from}" "${to}" 12 | strip --strip-debug --strip-unneeded "${from}" 13 | objcopy --add-gnu-debuglink="${to}" "${from}" 14 | -------------------------------------------------------------------------------- /objdump.md: -------------------------------------------------------------------------------- 1 | # objdump 2 | 3 | `objdump` shows a weird view of ELF files: use `readelf` whenever possible: 4 | 5 | - 6 | - 7 | 8 | Things which it can do that `readelf` cannot: 9 | 10 | - disassemble 11 | 12 | Most useful command `objdump` command: 13 | 14 | objdump -Sr a.o 15 | 16 | ## d 17 | 18 | See assembler instructions of object file or executable: 19 | 20 | objdump -d a.o 21 | 22 | Only disassembles sections which should contain assembly code like `.text`. 23 | 24 | Sample output on a x86_64 Linux hello world: 25 | 26 | hello_world.o: file format elf64-x86-64 27 | 28 | 29 | Disassembly of section .text: 30 | 31 | 0000000000000000 <_start>: 32 | 0: b8 01 00 00 00 mov $0x1,%eax 33 | 5: bf 01 00 00 00 mov $0x1,%edi 34 | a: 48 be 00 00 00 00 00 movabs $0x0,%rsi 35 | 11: 00 00 00 36 | 14: ba 0d 00 00 00 mov $0xd,%edx 37 | 19: 0f 05 syscall 38 | 1b: b8 3c 00 00 00 mov $0x3c,%eax 39 | 20: bf 00 00 00 00 mov $0x0,%edi 40 | 25: 0f 05 syscall 41 | 42 | For executable files, it still writes `Disassembly of section .text:`, but it means *segment* `.text`, it is purely wrong. 43 | 44 | ## S 45 | 46 | Similar to `-d`, but also intermingle original C code with disassembly if there is debug information available for that: 47 | 48 | gcc -ggdb3 -o a.o a.c 49 | objdump -S a.o 50 | 51 | ## j 52 | 53 | Select which section to act on. 54 | 55 | ## Disassemble a single function 56 | 57 | Seems impossible, use GDB instead. 58 | 59 | 60 | 61 | ## M 62 | 63 | Disassemble with Intel syntax: 64 | 65 | objdump -M intel -d main.o 66 | -------------------------------------------------------------------------------- /other-architectures.md: -------------------------------------------------------------------------------- 1 | # Other architectures 2 | 3 | This tutorial focuses only on the x86 architecture families. 4 | 5 | Here are some other popular architectures which you should have in mind as well: 6 | 7 | ## PowerPC 8 | 9 | 10 | 11 | Created by IBM, Motorola and Apple. 12 | 13 | But IBM and Motorola are dying, Apple moved to Intel in 2005 for power efficiency, Xbox One and PlayStation 4 moved to x86-64 in 2013. 14 | 15 | Used on Nintendo GameCube (2001) up to Wii U (2012), PlayStation 3 and Xbox 360. 16 | 17 | ## MIPS 18 | 19 | 20 | 21 | RISC. Was really hot in 80s, bought by SGI. More recently bought by Imagination Technologies . 22 | 23 | Used mostly on embedded systems, and notably video game consoles: Nintendo 64, Sony PlayStation, PlayStation 2. 24 | 25 | Officially supported on Android. 26 | 27 | 28 | 29 | Lextra story: It is not possible to patent an entire ISA as there is too much prior art, but companies patent instead specific novel instructions, and the compatibility break kills off reimplementors. 30 | 31 | ## SPARC 32 | 33 | 34 | 35 | Created by Sun for its operating system Solaris. Now owned and used by Oracle. Not used much by anyone else. 36 | 37 | Was meant to be the ultimate RISC beats Intel and Windows coalition, but it failed. 38 | -------------------------------------------------------------------------------- /params.makefile: -------------------------------------------------------------------------------- 1 | PHONY_MAKES = gas linux interactive linker-scripts nasm x86-64 2 | -------------------------------------------------------------------------------- /pipeline.md: -------------------------------------------------------------------------------- 1 | # Pipeline 2 | 3 | Throughput latency tradeoff. Hugely advantageous on throughput side, so always used today. 4 | 5 | - 6 | - 7 | - 8 | - 9 | - 10 | - (AKA pipeline stall) 11 | 12 | Tutorials: 13 | 14 | - 15 | - 16 | - talks about the classic 5 stage pipeline 17 | - good structural diagram 18 | 19 | ## Hazards 20 | 21 | Types: 22 | 23 | - data 24 | - structural: same pipeline stage used by two instructions at the same time 25 | - control: branches 26 | 27 | - 28 | 29 | ## Classic RISC pipeline 30 | 31 | Most tutorials cover this, so it is likely a good idea to learn this one real well. 32 | 33 | Seems to come from MIPS, so basic MIPS assembly will help you. 34 | 35 | 36 | 37 | ## Implementations 38 | 39 | Vendors document pipeline length. 40 | 41 | Intel: 42 | -------------------------------------------------------------------------------- /pros-and-cons-of-assembly.md: -------------------------------------------------------------------------------- 1 | # Pros and cons of assembly 2 | 3 | Assembly vs. higher level languages like C. 4 | 5 | ## Pros 6 | 7 | ### Speed 8 | 9 | - Use instructions that are so CPU specific and useful for your needs that compilers don't implement. 10 | 11 | - Use instructions in a way that is smarter than any compiler is likely to do. 12 | 13 | Note however that it gets harder and harder to make code more efficient using assembler because compilers are smarter and smarter. 14 | 15 | ### Do the impossible 16 | 17 | Access low level hardware which is so hardware specific it is not implemented in standard C. 18 | 19 | ## Cons 20 | 21 | ### OS/processor dependant 22 | 23 | You write for a single CPU architecture only. 24 | 25 | ### Hard to read/write 26 | 27 | Other things are much more likely to speed up your code if that's what you want, namely: 28 | 29 | - better algorithms 30 | - better cache usage 31 | 32 | and only then, using assembly may stand a change to speeding thing up. 33 | -------------------------------------------------------------------------------- /provision: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | sudo apt-get install make 4 | sudo apt-get install nasm 5 | -------------------------------------------------------------------------------- /pusha.asm: -------------------------------------------------------------------------------- 1 | ; # pusha 2 | 3 | ; # popa 4 | 5 | ; Push and restore the following registers using the stack: 6 | ; EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI 7 | 8 | ; This is important for example in the C calling convention, 9 | ; where certain registers must not be changed by functions. 10 | 11 | %include "lib/common_nasm.inc" 12 | 13 | ENTRY 14 | 15 | mov ebx, 0 16 | mov ecx, 0 17 | 18 | mov eax, esp 19 | pusha 20 | sub eax, esp 21 | ; 8x 4 bytes 22 | ASSERT_EQ eax, 32 23 | 24 | mov ebx, 1 25 | mov ecx, 1 26 | popa 27 | ASSERT_EQ ebx, 0 28 | ASSERT_EQ ecx, 0 29 | 30 | EXIT 31 | -------------------------------------------------------------------------------- /readelf.md: -------------------------------------------------------------------------------- 1 | # readelf 2 | 3 | Get information stored inside executable files in a human readable way. 4 | 5 | See all information: 6 | 7 | readelf -a a.out 8 | 9 | TODO understand all the information, and therefore the entire elf format. 10 | 11 | To show only specific informations: 12 | 13 | ## s 14 | 15 | Symtable of elf, `.o` or `.so`. 16 | 17 | ## d 18 | 19 | Show only dependencies of an executable (symbols and shared libraries). 20 | 21 | - `NEEDED`: direct `.so` dependencies . `.so` can also depend on other `.so`. To recursively list all dependencies, use `ldd`. 22 | 23 | ## x 24 | 25 | Hexdump section data for given section: 26 | 27 | readelf -x .data hello_world.o 28 | 29 | Sample output: 30 | 31 | Hex dump of section '.data': 32 | 0x00000000 48656c6c 6f20776f 726c6421 0a Hello world!. 33 | 34 | ## W 35 | 36 | Don't limit the output width to 80 columns which is the default, and reduce the amount of `0` padding. 37 | 38 | Makes things much more readable is your terminal is wide enough. 39 | 40 | Without `-W`: 41 | 42 | Section Headers: 43 | [Nr] Name Type Address Offset 44 | Size EntSize Flags Link Info Align 45 | [ 0] NULL 0000000000000000 00000000 46 | 0000000000000000 0000000000000000 0 0 0 47 | [ 1] .text PROGBITS 0000000000000000 00000040 48 | 000000000000005c 0000000000000000 AX 0 0 1 49 | [ 2] .rela.text RELA 0000000000000000 000002f0 50 | 00000000000000d8 0000000000000018 I 11 1 8 51 | [ 3] .data PROGBITS 0000000000000000 0000009c 52 | 0000000000000004 0000000000000000 WA 0 0 4 53 | [ 4] .bss NOBITS 0000000000000000 000000a0 54 | 0000000000000000 0000000000000000 WA 0 0 1 55 | [ 5] .rodata PROGBITS 0000000000000000 000000a0 56 | 0000000000000004 0000000000000000 A 0 0 1 57 | [ 6] .comment PROGBITS 0000000000000000 000000a4 58 | 0000000000000025 0000000000000001 MS 0 0 1 59 | [ 7] .note.GNU-stack PROGBITS 0000000000000000 000000c9 60 | 0000000000000000 0000000000000000 0 0 1 61 | [ 8] .eh_frame PROGBITS 0000000000000000 000000d0 62 | 0000000000000058 0000000000000000 A 0 0 8 63 | [ 9] .rela.eh_frame RELA 0000000000000000 000003c8 64 | 0000000000000030 0000000000000018 I 11 8 8 65 | [10] .shstrtab STRTAB 0000000000000000 00000128 66 | 0000000000000061 0000000000000000 0 0 1 67 | [11] .symtab SYMTAB 0000000000000000 00000190 68 | 0000000000000138 0000000000000018 12 10 8 69 | [12] .strtab STRTAB 0000000000000000 000002c8 70 | 0000000000000025 0000000000000000 0 0 1 71 | 72 | With `-W`: 73 | 74 | Section Headers: 75 | [Nr] Name Type Address Off Size ES Flg Lk Inf Al 76 | [ 0] NULL 0000000000000000 000000 000000 00 0 0 0 77 | [ 1] .text PROGBITS 0000000000000000 000040 00005c 00 AX 0 0 1 78 | [ 2] .rela.text RELA 0000000000000000 0002f0 0000d8 18 I 11 1 8 79 | [ 3] .data PROGBITS 0000000000000000 00009c 000004 00 WA 0 0 4 80 | [ 4] .bss NOBITS 0000000000000000 0000a0 000000 00 WA 0 0 1 81 | [ 5] .rodata PROGBITS 0000000000000000 0000a0 000004 00 A 0 0 1 82 | [ 6] .comment PROGBITS 0000000000000000 0000a4 000025 01 MS 0 0 1 83 | [ 7] .note.GNU-stack PROGBITS 0000000000000000 0000c9 000000 00 0 0 1 84 | [ 8] .eh_frame PROGBITS 0000000000000000 0000d0 000058 00 A 0 0 8 85 | [ 9] .rela.eh_frame RELA 0000000000000000 0003c8 000030 18 I 11 8 8 86 | [10] .shstrtab STRTAB 0000000000000000 000128 000061 00 0 0 1 87 | [11] .symtab SYMTAB 0000000000000000 000190 000138 18 12 10 8 88 | [12] .strtab STRTAB 0000000000000000 0002c8 000025 00 0 0 1 89 | -------------------------------------------------------------------------------- /registers.asm: -------------------------------------------------------------------------------- 1 | ; Intel puts the registers in the following groups: 2 | 3 | ; - General-purpose registers 4 | ; - Segment registers 5 | ; - EFLAGS (program status and control) register 6 | 7 | %include "lib/common_nasm.inc" 8 | 9 | ENTRY 10 | 11 | ; # General purpose registers 12 | 13 | ; # ebp 14 | 15 | ; Stack base. 16 | 17 | ; Why it exists: 18 | ; http://stackoverflow.com/questions/579262/what-is-the-purpose-of-the-ebp-frame-pointer-register 19 | 20 | ; - tell the debugger where the current frame starts 21 | ; - facilitate VLA and alloca 22 | 23 | ; Modified by `enter` and `leave`. 24 | 25 | ; You can almost never use it as a general purpose register because of that. 26 | 27 | ; It's usage on high level languages can be optimized away 28 | ; e.g. with `-fomit-frame-pointer` in GCC 4.8. 29 | 30 | ; # esp 31 | 32 | ; Stack Pointer. 33 | 34 | ; Notably modified by `push`, `pop`, `enter`, `leave`, `call` and `ret`. 35 | 36 | ; You can almost never use it as a general purpose register because of that. 37 | 38 | ; # eip 39 | 40 | ; Instruction Pointer. 41 | 42 | ; Address of the current instruction to be executed. 43 | 44 | ; Cannot be retrieved directly: `mov` cannot be encoded to output to EIP: 45 | ; http://stackoverflow.com/questions/599968/reading-program-counter-directly 46 | 47 | ; There are however indirect techniques, and NASM offers `$`. 48 | 49 | ; # pc 50 | 51 | ; Another name for the IP. 52 | 53 | ; https://en.wikipedia.org/wiki/Program_counter 54 | 55 | ; # Initial register state 56 | 57 | ; Finally, no more programming languages getting in our way with definite assignment: 58 | 59 | ; - http://stackoverflow.com/questions/1802783/initial-state-of-program-registers-and-stack-on-linux-arm 60 | ; - http://stackoverflow.com/questions/9147455/what-is-default-register-state-when-program-launches-asm-linux 61 | 62 | ; Mentioned on major ABI specs, e.g. AMD64: http://www.x86-64.org/documentation/abi-0.99.pdf 63 | 64 | ; # Flag registers and instructions 65 | 66 | ; Most flag are identified by one letter, and commonly denoted `XF` for "flag X". 67 | 68 | ; Flags may be set or reset explicitly by dedicated instructions, 69 | ; and also as side effects of other instructions, e.g. `cmp` for ZF and CF. 70 | 71 | ; The flags are: 72 | 73 | ; - Type: Name (Identifier) 74 | ; - X: ID Flag (ID) 75 | ; - X: Virtual Interrupt Pending (VIP) 76 | ; - X: Virtual Interrupt Flag (VIF) 77 | ; - X: Alignment Check / Access Control (AC) 78 | ; - X: Virtual-8086 Mode (VM) 79 | ; - X: Resume Flag (RF) 80 | ; - X: Nested Task (NT) 81 | ; - X: I/O Privilege Level (IOPL) 82 | ; - S: Overflow Flag (OF) 83 | ; - C: Direction Flag (DF) 84 | ; - X: Interrupt Enable Flag (IF) 85 | ; - X: Trap Flag (TF) 86 | ; - S: Sign Flag (SF) 87 | ; - S: Zero Flag (ZF) 88 | ; - S: Auxiliary Carry Flag (AF) 89 | ; - S: Parity Flag (PF) 90 | ; - S: Carry Flag (CF) 91 | 92 | ; The types are: 93 | 94 | ; - `S`: Status Flag. Has no side effects. 95 | ; - `C`: Control Flag. Has side effects. 96 | ; - `X`: System Flag. Should not be modified from applications, only OS. 97 | 98 | ; The bit number of each flag is fixed. 99 | 100 | ; All the non-used are reserved for future use. 101 | 102 | ; # FLAGS 103 | 104 | ; 16-bit register that contains bit flags. 105 | 106 | ; Same as the lower bits of EFLAGS, which was added later. 107 | 108 | ; # EFLAGS 109 | 110 | ; 32-bit register that contains bit flags. TODO: added in x86-64? 111 | 112 | ; Lower 32 bits of RFLAGS. 113 | 114 | ; # RFLAGS 115 | 116 | ; 64-bit register that contains bit flags. Added in x86-64. 117 | 118 | ; TODO check: none of the new flags are used so far. 119 | 120 | ; # How to read and write flags 121 | 122 | ; http://stackoverflow.com/questions/1406783/flags-registers-can-we-read-or-write-them-directly 123 | 124 | ; - for a few flags: `jxx` 125 | ; - for the 8 lower bits: `lahf` and `sahf` 126 | ; - for all others: `pushf` and `popf` 127 | 128 | ; # Individual flag instructions 129 | 130 | ; Some, but not all, individual flags have dedicated instructions to operate on them. 131 | 132 | ; # stc 133 | 134 | ; # sti 135 | 136 | ; # std 137 | 138 | ; Set flag X (set it to 1). 139 | 140 | ; # clc 141 | 142 | ; # cli 143 | 144 | ; # cld 145 | 146 | ; Clear flag X (set it to 0). 147 | 148 | ; # cmc 149 | 150 | ; Complement flag X (boolean negation). 151 | 152 | ; This is the only flag that has the complement instruction. 153 | 154 | stc 155 | ASSERT_FLAG jc 156 | clc 157 | ASSERT_FLAG jnc 158 | cmc 159 | ASSERT_FLAG jc 160 | cmc 161 | ASSERT_FLAG jnc 162 | EXIT 163 | -------------------------------------------------------------------------------- /risc-vs-cisc.md: -------------------------------------------------------------------------------- 1 | # RISC vs CISC 2 | 3 | The distinction between RISC and CISC is not a precise one, only a subjective quantitative description. 4 | 5 | ## RISC 6 | 7 | Reduced Instruction set. 8 | 9 | Has few operations, but performs each one very fast. 10 | 11 | ### One instruction set compiler 12 | 13 | Takes it to the limit: 14 | 15 | 16 | 17 | Funny example implementation for C in x86 `mov`: 18 | 19 | ## CISC 20 | 21 | Complex instruction set. 22 | 23 | Lots of operations. 24 | 25 | x86 is considered CISC, although it is known that the processor has a RISC core. 26 | 27 | ARM is considered RISC. 28 | 29 | ## RISC core 30 | 31 | The distinction between RISC and CISC is further blurred by the fact that Intel X86, widely condiered CISC is actually composed of: 32 | 33 | - a complex instruction decoder, which transforms complex input instructions into a simpler subset called microcode 34 | - a RISC core which understands the microcode and actually runs it 35 | -------------------------------------------------------------------------------- /segment: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cirosantilli/x86-assembly-cheat/0c64fc5961bbdcbf442cec9bb98d70e889b1984f/segment -------------------------------------------------------------------------------- /segment_registers.asm: -------------------------------------------------------------------------------- 1 | ; # Segment registers 2 | 3 | ; # CS register 4 | 5 | ; # DS register 6 | 7 | ; # ES register 8 | 9 | ; # FS register 10 | 11 | ; # GS register 12 | 13 | ; # SS register 14 | 15 | ; They cannot be tested easily in userland. 16 | 17 | ; Simple test using them in real mode: 18 | ; https://github.com/cirosantilli/x86-bare-metal-examples/blob/01a5e64efaac653429ee24e502f611fea7850abb/segment_registers_real_mode.S 19 | 20 | ; TODO what are their initial values in Linux? 21 | 22 | %include "lib/common_nasm.inc" 23 | 24 | ENTRY 25 | PRINT_STRING_LITERAL 'CS' 26 | PRINT_INT cs 27 | 28 | PRINT_STRING_LITERAL 'DS' 29 | PRINT_INT ds 30 | 31 | PRINT_STRING_LITERAL 'ES' 32 | PRINT_INT es 33 | 34 | PRINT_STRING_LITERAL 'FS' 35 | PRINT_INT fs 36 | 37 | PRINT_STRING_LITERAL 'GS' 38 | PRINT_INT gs 39 | 40 | PRINT_STRING_LITERAL 'SS' 41 | PRINT_INT ss 42 | 43 | ; SEGFAULT: can't write to them in user mode: 44 | ;mov eax, 0 45 | ;mov ss, eax 46 | 47 | EXIT 48 | -------------------------------------------------------------------------------- /size.md: -------------------------------------------------------------------------------- 1 | # size 2 | 3 | Shows size of each memory part of a executable: 4 | 5 | gcc -c a.c 6 | gcc a.o 7 | size a.out a.o 8 | 9 | - text: instructions 10 | - data: init and `uinit` data 11 | - `dec` and `hex`: size of executable in decimal and hexadecimal 12 | -------------------------------------------------------------------------------- /stdcall.md: -------------------------------------------------------------------------------- 1 | # stdcall 2 | 3 | A C calling convention, less used than cdecl. 4 | 5 | Used on the Win32 API. 6 | 7 | Advantages: 8 | 9 | - generates slightly smaller code 10 | - potentially faster. 11 | 12 | Disadvantages: 13 | 14 | - it is not possible to pass variable number of arguments. 15 | -------------------------------------------------------------------------------- /string-instructions.md: -------------------------------------------------------------------------------- 1 | # String instruction 2 | 3 | Instructions to efficiently loop over multiple bytes. 4 | -------------------------------------------------------------------------------- /superscalar.md: -------------------------------------------------------------------------------- 1 | # Superscalar architecture 2 | 3 | TODO vs pipeline and instruction level parallelism? 4 | 5 | 6 | -------------------------------------------------------------------------------- /synchronization.md: -------------------------------------------------------------------------------- 1 | # Synchronization 2 | 3 | TODO understand better and make multithreaded examples that fail. 4 | 5 | TODO Other sync instructions: 6 | 7 | - MFENCE 8 | - LFENCE 9 | - SFENCE 10 | 11 | Likely should be done with inline assembly. TODO: is it possible to test those things here (see failures) e.g. under the Linux kernel, or do we need to go bare metal? 12 | 13 | - http://stackoverflow.com/questions/4725676/how-does-x86-pause-instruction-work-in-spinlock-and-can-it-be-used-in-other-sc 14 | - http://stackoverflow.com/questions/11959374/fastest-inline-assembly-spinlock 15 | - http://wiki.osdev.org/Spinlock 16 | 17 | ## Lock prefix 18 | 19 | - http://stackoverflow.com/questions/11065675/lock-prefix-of-intel-instruction-what-is-the-point-its-function-is-so-limited 20 | - http://stackoverflow.com/questions/3339141/x86-lock-question-on-multi-core-cpus 21 | - http://stackoverflow.com/questions/8891067/what-does-the-lock-instruction-mean-in-x86-assembly 22 | - http://stackoverflow.com/questions/3343589/how-do-i-use-the-lock-asm-prefix-to-read-a-value 23 | 24 | Can be used with: ADD, ADC, AND, BTC, BTR, BTS, CMPXCHG, CMPXCH8B, DEC, INC, NEG, NOT, OR, SBB, SUB, XOR, XADD, and XCHG 25 | -------------------------------------------------------------------------------- /system-vs-application-programming.md: -------------------------------------------------------------------------------- 1 | # System programming vs application programming 2 | 3 | Most architectures provide two general sets of instructions: 4 | 5 | - system programming: instructions that only operating system writers will need. 6 | 7 | Application programs are generally forbidden from even using such instructions by security measures. 8 | 9 | This allows for example for process separation: preventing one process from seeing the other's memory, reducing the impact of bugs and malicious attacks. 10 | 11 | - application programming. Instructions that any program can use. 12 | 13 | Those will be usually packaged inside executable wrappers of some sort, e.g. ELF, which add required metadata to the instructions, e.g. where to find shared libraries. 14 | 15 | This distinction is made so that application programs will run inside some sort of "protected mode", where the damage it can do to other applications and the OS is reduced. 16 | 17 | It is common for manuals to separate those into separate chapters. E.g., Intel puts all system programming specifics on Volume 3. 18 | 19 | This tutorial only covers application, programming. for system programming see: 20 | 21 | System programming concepts not covered here include: 22 | 23 | - instructions: 24 | - `cli` and `sti` 25 | - `hlt` 26 | - `inb` 27 | - `outb` 28 | - special registers: 29 | - `ds` 30 | - `cs` 31 | - `ss` 32 | - paging 33 | - segmentation 34 | - rings 35 | 36 | Those concepts cannot be tried out while an OS is running without disrupting the OS, so testing them requires substantially different techniques. 37 | -------------------------------------------------------------------------------- /vliw.md: -------------------------------------------------------------------------------- 1 | # VLIW 2 | 3 | Wiki has the perfect example: 4 | 5 | > For example, the following is an instruction for the Super Harvard Architecture Single-Chip Computer (SHARC). In one cycle, it does a floating-point multiply, a floating-point add, and two autoincrement loads. All of this fits in one 48-bit instruction: 6 | 7 | > f12 = f0 * f4, f8 = f8 + f12, f0 = dm(i0, m3), f4 = pm(i8, m9); 8 | 9 | TODO implementations? 10 | -------------------------------------------------------------------------------- /x86-64/Makefile: -------------------------------------------------------------------------------- 1 | ../Makefile -------------------------------------------------------------------------------- /x86-64/README.md: -------------------------------------------------------------------------------- 1 | # x86-64 2 | 3 | This section highlights differences from x86: learn that first. 4 | 5 | x86_64 is a bit architecture, originally by AMD, and later implemented with great (but not complete) compatibility Intel. 6 | 7 | Intel calls their implementation "Intel 64" but it used "IA-32e" and "EM64T" before, AMD says AMD64, Apple x86-64 and x86_64, and Microsoft and Oracle x64. Notice the great naming uniformity. 8 | 9 | Do not confuse with IA-64, which is the old name for the Itanium architecture, a completely different architecture developed by Intel, which largely lost to the x86-64 alternative that AMD first proposed. 10 | 11 | ## x86-64 vs IA-32 12 | 13 | x86-64 has a compatibility mode which allows running IA32 code directly on it. 14 | 15 | In 64-bit mode, outside of compatibility mode, most instructions are analogous, but a few have been removed. 16 | 17 | The major difference added in x86-64 is the possibility of making 8 byte operations instead of 4. 18 | 19 | Most operations still exist and are analogous but some have changed or been removed. But if you turn on compatibility mode, it works with IA32 code. 20 | 21 | Lots of new registers were added in particular `r9` - `r16` and extra XMM. 22 | 23 | SSE and SSE2 are required in x86-64. This is why, for example, the AMD64 calling convention can use XMM registers to pass floats. 24 | 25 | RIP addressing mode. 26 | 27 | ## Incompatibilities between Intel and AMD 28 | 29 | 30 | 31 | 32 | 33 | Mostly on the systems programming instructions, not application. 34 | -------------------------------------------------------------------------------- /x86-64/Vagrantfile: -------------------------------------------------------------------------------- 1 | Vagrant.configure(2) do |config| 2 | config.vm.box = 'hashicorp/precise64' 3 | config.vm.provision :shell, privileged: false, path: 'provision' 4 | end 5 | -------------------------------------------------------------------------------- /x86-64/calling-convention.md: -------------------------------------------------------------------------------- 1 | # Calling convention 2 | 3 | In x86-64, the number of calling conventions was greatly reduced. 4 | 5 | ## System V AMD64 ABI 6 | 7 | [System V AMD64 ABI][] dominating in Linux and other UNIX systems, and Mac OS X. 8 | 9 | A simplified version of the most important points to keep in mind about the [System V AMD ABI](http://www.x86-64.org/documentation/abi.pdf), which both Mac and Linux use, are: 10 | 11 | - before call 12 | 13 | - the stack pointer *must* be aligned by a multiple of 16 bytes after `call` pushes the return value to the stack. 14 | 15 | This often fails by default without explicitly changing `%rsp` because `call` will store the 8 byte return address on the stack, so you usually need to do: 16 | 17 | sub 8, rsp 18 | call f 19 | 20 | If this fails, segfault is common outcome. 21 | 22 | The most common action then is to just store `rsp` on the stack to even things out. That also allows for recursion. 23 | 24 | 25 | 26 | - parameter order is: %rdi, %rsi, %rdx, %rcx, %r8, %r9, then push the rest on the stack in reverse order 27 | 28 | Floating point arguments are passed through the %xmm0 to %xmm7 registers present in Intel's SSE2, which must be part of every x86-64 implementation. 29 | 30 | Linux system calls [use R10 is used instead of RCX](http://stackoverflow.com/questions/2535989/what-are-the-calling-conventions-for-unix-linux-system-calls-on-x86-64), Mac's [are unchanged](https://developer.apple.com/library/mac/documentation/DeveloperTools/Conceptual/LowLevelABI/140-x86-64_Function_Calling_Conventions/x86_64.html) 31 | 32 | Parameters only need to be pushed to the stack to allow for recursion or calling other functions, and this is done on the callee. So compilers can optimize and only use the registers if possible. 33 | 34 | - variable argument functions like printf, `rax` must contain the number of vector (SSE / AVX) varargs: 35 | 36 | This includes for example `float` and `double` arguments, but not `int`. 37 | 38 | TODO why is this needed? 39 | 40 | - after return 41 | 42 | - unchanged registers: RBX, RBP and R12-R15. Others may have changed. 43 | 44 | - return value (may need multiple quadwords for structs): 45 | 46 | - integers go in: %rax and %rdx 47 | 48 | - %xmm0 and %xmm1 are used for floats up to 8-byte wide (`double`) 49 | 50 | - %st0 holds x87 80-bit floats (`long double) 51 | 52 | - %rdi holds the address of return values that went to memory 53 | 54 | Other surprising ABI points: 55 | 56 | - You can use up to 128 bytes below `RSP` without fear that it will be corrupted by signal or interrupt handlers: . This is useful for leaf functions, which don't need to store arguments for further calls. 57 | 58 | Good article: 59 | 60 | ## Microsoft x64 61 | 62 | Obviously they could not be compatible with the rest. 63 | 64 | Microsoft x64 calling convention and its 2013 extension `__vectorcall` for Windows, 65 | -------------------------------------------------------------------------------- /x86-64/cmp-sign-extend.asm: -------------------------------------------------------------------------------- 1 | ; cmp (and most other instructions) cannot encode 64-bit immediates. 2 | ; 3 | ; mov however can. 4 | ; 5 | ; This can be seen on the Intel manual: cmp has not imm64 encoding. 6 | ; So in general, we have to move values to registers before we can operate on them. 7 | 8 | %include "lib/common_nasm.inc" 9 | 10 | ENTRY 11 | ; Sanity check: mov can encode imm64, and cmp r64, r64 works as expected. 12 | mov rax, 0x12345678_80000000 13 | mov rbx, 0x00000001_00000000 14 | add rax, rbx 15 | mov rbx, 0x12345679_80000000 16 | cmp rax, rbx 17 | je reg_reg 18 | ASSERT_FAIL 19 | reg_reg: 20 | 21 | ; The r64 imm32 encoding sign extends the immediate. 22 | ; 23 | ; This works, but it is kind of a lie: 24 | ; the actual encoded immediate is just 0x80000000. 25 | ; 26 | ; NASM does not give warnings however, so we prefer this syntax. 27 | mov rax, 0xFFFFFFFF_80000000 28 | cmp rax, 0xFFFFFFFF_80000000 29 | je reg_imm_sign_1 30 | ASSERT_FAIL 31 | reg_imm_sign_1: 32 | 33 | ; Positive sign extend. 34 | mov rax, 0x00000000_40000000 35 | cmp rax, 0x40000000 36 | je reg_imm_sign_0 37 | ASSERT_FAIL 38 | reg_imm_sign_0: 39 | 40 | ; Negative sign extend without leading 1's. 41 | ; Works but NASM warns, so we don't use this form. 42 | ; even though I think it is saner. 43 | ;mov rax, 0xFFFFFFFF_80000000 44 | ;cmp rax, 0x80000000 45 | ;je reg_imm_sign_1 46 | ;ASSERT_FAIL 47 | ;reg_imm_sign_1: 48 | 49 | ; Impossible negative sign extend. 50 | ; NASM warns and that is good, but it assembles to the same as above 51 | ; and I'd rather have an error! 52 | ;mov rax, 0xFFFFFFFF_80000000 53 | ;cmp rax, 0x1_80000000 54 | ;je reg_imm_sign_nasm 55 | ;ASSERT_FAIL 56 | ;reg_imm_sign_nasm: 57 | 58 | ; Don't forget that our macro ASSERT_EQ uses cmp, and won't work as expected 59 | ; if the immediate cannot be encoded by sign extension. 60 | ; So juts move to a register before comparing. 61 | mov rax, 0x12345678_80000000 62 | mov rbx, 0x00000001_00000000 63 | add rax, rbx 64 | mov rbx, 0x12345679_80000000 65 | ASSERT_EQ rax, rbx 66 | 67 | EXIT 68 | -------------------------------------------------------------------------------- /x86-64/gas/Makefile: -------------------------------------------------------------------------------- 1 | ../Makefile -------------------------------------------------------------------------------- /x86-64/gas/lib: -------------------------------------------------------------------------------- 1 | ../lib -------------------------------------------------------------------------------- /x86-64/gas/movabs.S: -------------------------------------------------------------------------------- 1 | # TODO get working 2 | # http://stackoverflow.com/questions/19415184/load-from-a-64-bit-address-into-other-register-than-rax/19416331 3 | # http://reverseengineering.stackexchange.com/questions/2627/what-is-the-meaning-of-movabs-in-gas-x86-att-syntax 4 | 5 | .text 6 | 7 | .global asm_main 8 | asm_main: 9 | 10 | #movabs 0x8000000000000000, %rax 11 | 12 | # Error: operand size mismatch 13 | # Only possible for `rax`. 14 | #movabs 0x8000000000000000, %rbx 15 | 16 | mov $0, %rax 17 | ret 18 | -------------------------------------------------------------------------------- /x86-64/gas/params.makefile: -------------------------------------------------------------------------------- 1 | BITS := 64 2 | DISAS_FLAVOR = att 3 | -------------------------------------------------------------------------------- /x86-64/lib/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all clean 2 | 3 | all: 4 | $(NASM) -o common$(OBJ_EXT) common.asm 5 | $(CC) $(CFLAGS) -o driver$(OBJ_EXT) -c driver.c 6 | 7 | clean: 8 | rm -f *$(OBJ_EXT) *$(OUT_EXT) 9 | -------------------------------------------------------------------------------- /x86-64/lib/common.asm: -------------------------------------------------------------------------------- 1 | ; System V calling convention used for all functions. 2 | 3 | extern printf, exit 4 | 5 | segment .text 6 | 7 | global print_long 8 | print_long: 9 | sub rsp, 8 10 | mov rsi, rdi 11 | mov rdi, .format 12 | mov rax, 1 13 | call printf 14 | add rsp, 8 15 | ret 16 | .format db "%ld", 10, 0 17 | 18 | global print_string 19 | print_string: 20 | sub rsp, 8 21 | mov rsi, rdi 22 | mov rdi, .format 23 | mov rax, 1 24 | call printf 25 | add rsp, 8 26 | ret 27 | .format db "%s", 10, 0 28 | 29 | global assert_fail 30 | assert_fail: 31 | sub rsp, 8 32 | mov rbx, rdi 33 | mov rdi, .message 34 | call print_string 35 | mov rdi, rbx 36 | call print_long 37 | 38 | ; Call libc exit with exit status 1. 39 | mov rdi, 1 40 | call exit 41 | .message db 10, 'ASSERT FAILED AT LINE: ', 0 42 | -------------------------------------------------------------------------------- /x86-64/lib/common_gas.h: -------------------------------------------------------------------------------- 1 | .extern \ 2 | assert_fail, \ 3 | print_long, \ 4 | print_long_hex, \ 5 | print_string, \ 6 | ; 7 | 8 | #define ENTRY \ 9 | enter $0, $0 ;\ 10 | .text ;\ 11 | .global asm_main ;\ 12 | asm_main: \ 13 | sub $8, %rsp 14 | 15 | #define EXIT \ 16 | leave ;\ 17 | mov $0, %eax ;\ 18 | ret 19 | 20 | #define ASSERT_FAIL \ 21 | mov $__LINE__, %rdi ;\ 22 | call assert_fail 23 | 24 | #define ASSERT_EQ(x, y) \ 25 | cmp x, y ;\ 26 | je 1f ;\ 27 | ASSERT_FAIL ;\ 28 | 1: 29 | -------------------------------------------------------------------------------- /x86-64/lib/common_nasm.inc: -------------------------------------------------------------------------------- 1 | ; TODO: factor out with IA-32. 2 | ; Basically replace eax with rax. 3 | 4 | extern \ 5 | assert_fail, \ 6 | print_long, \ 7 | print_long_hex, \ 8 | print_string 9 | 10 | ; Structure macros 11 | 12 | %macro TEXT 0 13 | section .text 14 | %endmacro 15 | 16 | %macro DATA 0 17 | section .data 18 | %endmacro 19 | 20 | %macro RODATA 0 21 | section .rodata 22 | %endmacro 23 | 24 | %macro GLOBAL 1 25 | global %1 26 | %1: 27 | %endmacro 28 | 29 | ; Enter program that uses the C driver. 30 | ; The entry function is `asm_main`. 31 | %macro ENTRY 0 32 | TEXT 33 | GLOBAL asm_main 34 | ; To align stack so that calls can be made directly. 35 | sub rsp, 8 36 | %endmacro 37 | 38 | ; Exit program entered by `ENTRY`. 39 | %macro EXIT 0 40 | add rsp, 8 41 | mov rax, 0 42 | ret 43 | %endmacro 44 | 45 | ; Assert macros 46 | 47 | ; Assert rax eq %1 48 | %macro ASSERT_EQ 3 49 | cmp %3 %1, %2 50 | je %%ok 51 | ASSERT_FAIL 52 | %%ok: 53 | %endmacro 54 | 55 | %macro ASSERT_EQ 2 56 | ASSERT_EQ %1, %2, %3 57 | %endmacro 58 | 59 | %macro ASSERT_EQ 1 60 | ASSERT_EQ rax, %1 61 | %endmacro 62 | 63 | %macro ASSERT_EQ 0 64 | ASSERT_EQ rax, rbx 65 | %endmacro 66 | 67 | ; asserts rax neq %1 68 | %macro ASSERT_NEQ 2 69 | cmp %1, %2 70 | jne %%ok 71 | ASSERT_FAIL 72 | %%ok: 73 | %endmacro 74 | 75 | ; assert jX or jnX jumps 76 | %macro ASSERT_FLAG 1 77 | %1 %%ok 78 | ASSERT_FAIL 79 | %%ok: 80 | %endmacro 81 | 82 | %macro ASSERT_FAIL 0 83 | mov rdi, __LINE__ 84 | call assert_fail 85 | %endmacro 86 | -------------------------------------------------------------------------------- /x86-64/lib/driver.c: -------------------------------------------------------------------------------- 1 | int asm_main(void); 2 | 3 | int main() { 4 | return asm_main(); 5 | } 6 | -------------------------------------------------------------------------------- /x86-64/linux/Makefile: -------------------------------------------------------------------------------- 1 | # TODO Dry this out with 32-bit code. 2 | 3 | .POSIX: 4 | 5 | IN_EXT ?= .asm 6 | INC_EXT ?= .inc 7 | OUT_EXT ?= .out 8 | TMP_EXT ?= .o 9 | RUN ?= hello_world 10 | 11 | OUTS := $(patsubst %$(IN_EXT),%$(OUT_EXT),$(wildcard *$(IN_EXT))) 12 | INCS := $(wildcard *$(INC_EXT)) 13 | 14 | .PRECIOUS: %$(TMP_EXT) 15 | .PHONY: all clean run 16 | 17 | all: $(OUTS) 18 | 19 | %$(OUT_EXT): %$(TMP_EXT) 20 | ld -o '$@' '$<' 21 | 22 | %$(TMP_EXT): %$(IN_EXT) $(INCS) 23 | nasm -w+all -f elf64 -o '$@' '$<' 24 | 25 | clean: 26 | rm -f *$(TMP_EXT) *$(OUT_EXT) 27 | 28 | run: all 29 | ./$(RUN)$(OUT_EXT) 30 | 31 | test: all 32 | for f in *$(OUT_EXT); do echo $$f; ./$$f; done 33 | -------------------------------------------------------------------------------- /x86-64/linux/README.md: -------------------------------------------------------------------------------- 1 | # Linux 2 | 3 | 1. [C from assembly](c-from-assembly/) 4 | 5 | ## System calls 6 | 7 | Linux 64-bit accepts both the old IA-32 `int 0x80` system calls and also the new `syscall` instruction, which should be used instead as it is faster. Support for the old calls likely exists so that you can run IA-32 elf files directly. 8 | 9 | `syscall` system calls have different numbers than the old IA-32, so watch out. 10 | 11 | Syscall parameters are passed as follows: 12 | 13 | - `rax`: system call number 14 | - `rdi`: parameter 1 15 | - `rsi`: parameter 2 16 | - `rdx` 17 | - `r10` 18 | - `r8` 19 | - `r9` 20 | 21 | The return value is stored in `rax`. 22 | -------------------------------------------------------------------------------- /x86-64/linux/c-from-assembly/Makefile: -------------------------------------------------------------------------------- 1 | .POSIX: 2 | 3 | IN_EXT ?= .asm 4 | INC_EXT ?= .inc 5 | OUT_EXT ?= .out 6 | TMP_EXT ?= .o 7 | RUN ?= main 8 | 9 | OUTS := $(patsubst %$(IN_EXT),%$(OUT_EXT),$(wildcard *$(IN_EXT))) 10 | INCS := $(wildcard *$(INC_EXT)) 11 | 12 | .PRECIOUS: %$(TMP_EXT) 13 | .PHONY: all clean run test 14 | 15 | all: $(OUTS) 16 | 17 | %$(OUT_EXT): %$(TMP_EXT) 18 | gcc -fno-pie -m64 -no-pie -pedantic-errors -o '$@' '$<' 19 | 20 | %$(TMP_EXT): %$(IN_EXT) $(INCS) 21 | nasm -w+all -f elf64 -o '$@' '$<' 22 | 23 | clean: 24 | rm -f *$(TMP_EXT) *$(OUT_EXT) 25 | 26 | run: all 27 | ./$(RUN)$(OUT_EXT) 28 | 29 | test: all 30 | for f in *$(OUT_EXT); do echo $$f; ./$$f; done 31 | -------------------------------------------------------------------------------- /x86-64/linux/c-from-assembly/README.md: -------------------------------------------------------------------------------- 1 | # C from assembly 2 | 3 | Call C from assembly. 4 | 5 | This is not portable because ANSI C does not specify the ABI. 6 | 7 | Things which may vary include: 8 | 9 | - calling conventions 10 | - method names. E.g. Watcom uses `puts_` instead of `puts` 11 | 12 | The only way to deal with that is by treating each implementation differently with macros. 13 | -------------------------------------------------------------------------------- /x86-64/linux/c-from-assembly/hello.asm: -------------------------------------------------------------------------------- 1 | default rel 2 | extern puts 3 | section .rodata 4 | message db "hello", 0 5 | section .text 6 | global main 7 | main: 8 | sub rsp, 8 9 | lea rdi, [rel message] 10 | call puts 11 | xor eax, eax 12 | add rsp, 8 13 | ret 14 | -------------------------------------------------------------------------------- /x86-64/linux/c-from-assembly/printf.asm: -------------------------------------------------------------------------------- 1 | ; printf is a bit harder as it requires varargs setup. 2 | ; https://stackoverflow.com/questions/8194141/how-to-print-a-number-in-assembly-nasm/32853546#32853546 3 | default rel ; make [rel format] the default, you always want this. 4 | extern printf ; NASM requires declarations of external symbols, unlike GAS 5 | section .rodata 6 | format db "%#x", 10, 0 ; C 0-terminated string: "%#x\n" 7 | section .text 8 | global main 9 | main: 10 | sub rsp, 8 ; re-align the stack to 16, ready for another call 11 | 12 | ; Call printf. 13 | mov esi, 0x12345678 ; "%x" takes a 32-bit unsigned int 14 | lea rdi, [rel format] 15 | xor eax, eax ; AL=0 no FP args in XMM regs 16 | call printf 17 | 18 | ; Return from main. 19 | xor eax, eax 20 | add rsp, 8 21 | ret 22 | -------------------------------------------------------------------------------- /x86-64/linux/common.inc: -------------------------------------------------------------------------------- 1 | %macro save_syscall 0 2 | push rax 3 | push rdi 4 | push rsi 5 | push rdx 6 | push r10 7 | push r8 8 | push r9 9 | %endmacro 10 | 11 | %macro load_syscall 0 12 | pop rdx 13 | pop rsi 14 | pop rdi 15 | pop rax 16 | pop r9 17 | pop r8 18 | pop r10 19 | %endmacro 20 | 21 | ; Write 64-bit register or address to stdtout in small endian. 22 | ; 23 | ; Registers are preserved. 24 | ; 25 | ; Sample calls: 26 | ; 27 | ; write64 rax 28 | ; write64 _start 29 | ; 30 | ; @param 1 the value to write 31 | ; 32 | %macro write64 1 33 | save_syscall 34 | ; Must store the value in case it was passed 35 | ; as one of the registers used in the syscall. 36 | push rbx 37 | mov rbx, %1 38 | mov rax, 1 39 | mov rdi, 1 40 | mov rsi, rbx 41 | bswap rsi 42 | mov [rsp + 1], rsi 43 | lea rsi, [rsp + 1] 44 | mov rdx, 8 45 | syscall 46 | pop rbx 47 | load_syscall 48 | %endmacro 49 | 50 | %macro sys_exit 1 51 | mov rax, 60 52 | mov rdi, %1 53 | syscall 54 | %endmacro 55 | 56 | %macro sys_exit 0 57 | sys_exit 0 58 | %endmacro 59 | -------------------------------------------------------------------------------- /x86-64/linux/interactive/Makefile: -------------------------------------------------------------------------------- 1 | ../Makefile -------------------------------------------------------------------------------- /x86-64/linux/interactive/common.inc: -------------------------------------------------------------------------------- 1 | ../common.inc -------------------------------------------------------------------------------- /x86-64/linux/interactive/initial_state.asm: -------------------------------------------------------------------------------- 1 | ; Initial register state. 2 | 3 | ; Pass output through hd. 4 | 5 | %include "common.inc" 6 | 7 | section .text 8 | 9 | global _start 10 | _start: 11 | 12 | write64 rax 13 | write64 rbp 14 | write64 rbx 15 | write64 rcx 16 | write64 rdi 17 | write64 rdx 18 | write64 rsi 19 | write64 rsp 20 | write64 r8 21 | write64 r9 22 | write64 r10 23 | write64 r11 24 | write64 r12 25 | write64 r13 26 | write64 r14 27 | write64 r15 28 | 29 | sys_exit 30 | -------------------------------------------------------------------------------- /x86-64/linux/interactive/min_dummy.asm: -------------------------------------------------------------------------------- 1 | ; Contains a dummy instruction to differentiate from `min_empty`. 2 | section .text 3 | global _start 4 | _start: 5 | mov rax, 0 6 | -------------------------------------------------------------------------------- /x86-64/linux/interactive/min_empty.asm: -------------------------------------------------------------------------------- 1 | ; bash: ./min_empty.out: cannot execute binary file: Exec format error 2 | ; TODO why? Looks like text cannot be empty. 3 | section .text 4 | global _start 5 | _start: 6 | -------------------------------------------------------------------------------- /x86-64/linux/interactive/stack_top.asm: -------------------------------------------------------------------------------- 1 | ; See what `rsp` is worth at initialization. 2 | ; 3 | ; Similar to IA-32, except that this time we are at around 2^48 = 256 TiB. 4 | ; 5 | ; Should be piped into hd. 6 | 7 | %include "common.inc" 8 | 9 | section .text 10 | 11 | global _start 12 | _start: 13 | 14 | write64 rsp 15 | push 1 16 | write64 rsp 17 | 18 | sys_exit 19 | -------------------------------------------------------------------------------- /x86-64/linux/zero-extend-32bit-memory/Makefile: -------------------------------------------------------------------------------- 1 | .POSIX: 2 | 3 | main.out: main.asm link.ld 4 | nasm -felf64 -o main.o main.asm 5 | ld -Tlink.ld -o main.out main.o 6 | 7 | clean: 8 | rm -f *.o *.out 9 | 10 | test: main.out 11 | ./main.out 12 | -------------------------------------------------------------------------------- /x86-64/linux/zero-extend-32bit-memory/README.md: -------------------------------------------------------------------------------- 1 | # Zero extend 32-bit memory 2 | 3 | TODO get working. 4 | 5 | Attempt to answer: 6 | http://stackoverflow.com/questions/33318342/when-is-it-better-for-an-assembler-to-use-sign-extended-relocation-like-r-x86-64 7 | 8 | The goal is to make 2 memory accesses: 9 | 10 | - one truncated sign extended to end in 1's 11 | - one with the full address (`movabs`) 12 | 13 | Current outcome: as soon as I put a single byte into `.data2`, segfault. Why? Linux does not want to play ball with the high address? 14 | -------------------------------------------------------------------------------- /x86-64/linux/zero-extend-32bit-memory/link.ld: -------------------------------------------------------------------------------- 1 | SECTIONS 2 | { 3 | . = 0x0000000000400000; 4 | .text : 5 | { 6 | *(.text) 7 | } 8 | .data : 9 | { 10 | *(.data) 11 | } 12 | . = 0xFFFFFFFF80000000; 13 | .data2 : 14 | { 15 | *(.data2) 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /x86-64/linux/zero-extend-32bit-memory/main.asm: -------------------------------------------------------------------------------- 1 | section .text 2 | global _start 3 | _start: 4 | 5 | ; This mov gets the 64-bit address truncated to 32-bits. 6 | ; The CPU sign extends it. 7 | mov byte al, 0xFFFFFFFF80000000 8 | mov [output], al 9 | 10 | mov rax, 1 11 | mov rdi, 1 12 | mov rsi, output 13 | mov rdx, 1 14 | syscall 15 | 16 | ; TODO do another move with the full explicit 64-bit address. 17 | ; Should give the same output. 18 | mov byte al, 0xFFFFFFFF80000000 19 | mov [output], al 20 | 21 | mov rax, 1 22 | mov rdi, 1 23 | mov rsi, output 24 | mov rdx, 1 25 | syscall 26 | 27 | mov rax, 60 28 | mov rdi, 0 29 | syscall 30 | section .data 31 | output: 32 | db 'a' 33 | section .data2 34 | input: 35 | db 'b' 36 | 37 | -------------------------------------------------------------------------------- /x86-64/main.asm: -------------------------------------------------------------------------------- 1 | %include "lib/common_nasm.inc" 2 | 3 | ENTRY 4 | 5 | ; # RIP relative addressing 6 | 7 | ; New addressing mode in which addresses are encoded relative to the RIP. 8 | 9 | ; Advantages: 10 | 11 | ; - easier to write RIP code for shared libraries: with RIP you can just dump 12 | ; the `.text` and `.data` anywhere without rewritting all data accesses beforehand 13 | 14 | ; - genrates shorter instructions, using 4 bytes to encode the address instead of 8. 15 | 16 | ; This seems to be the main reason why GCC 4.8 uses it by default everywhere. 17 | 18 | ; Downside of this: `.data` and `.text` must be withing 4Gb of each other. 19 | ; But that should be fine. 20 | 21 | ; It does not seem to be possible to use it with Intel syntax directly: 22 | 23 | ;lea rax, [rip] 24 | ;lea rax, [rip + 4] 25 | 26 | ; Instead, it seems that you must use the `rel` or `abs` keyword, 27 | ; and possibly set their defaults http://www.nasm.us/doc/nasmdoc6.html#section-6.2.1 : 28 | 29 | ;lea rax, [rel _start] 30 | ;lea rax, [abs _start] 31 | ;lea rax, $ 32 | 33 | ; # pop only works with quadwords 34 | 35 | ; Some instructions do not work with operand sizes for which they work in 32 bit mode. 36 | ; E.g., you cannot do `pop eax`, only `pop rax`. 37 | 38 | ; ERROR 39 | ;pop eax 40 | 41 | ;mov rax, 1 42 | ;mov rdi, 1 43 | ;mov rsi, asserts_passed_str 44 | ;mov rdx, asserts_passed_str_len 45 | ;syscall 46 | 47 | EXIT 48 | -------------------------------------------------------------------------------- /x86-64/movabs.asm: -------------------------------------------------------------------------------- 1 | ; TODO review this, it is a mess. 2 | ; 3 | ; Most instructions cannot deal with 64-bit addresses: 4 | ; they simply use 32 bits bytes and *sign* extend to 64 bits. 5 | ; 6 | ; http://stackoverflow.com/questions/33318342/when-is-it-better-for-an-assembler-to-use-sign-extended-relocation-like-r-x86-64 7 | ; http://stackoverflow.com/questions/3623899/nasm-64-bit-immediate-address-for-movlps-gives-dword-data-exceeds-bounds 8 | ; https://reverseengineering.stackexchange.com/questions/2627/what-is-the-meaning-of-movabs-in-gas-x86-att-syntax 9 | ; 10 | ; For mov, rax is the only register that accepts it. 11 | ; TODO see other instructions. 12 | ; 13 | ; I think this is encoded as `MOV moffs64*,RAX`. TODO better understand the `moffs` thing. 14 | ; 15 | ; movabs is the AT&T name of the special form that accepts 64-bit immediates. 16 | ; 17 | ; Intel just puts it under MOV, and NASM just uses `mov`. 18 | ; 19 | ; NASM only gives warnings in case an impossible mov is coded, 20 | ; and silently truncates the address. 21 | ; 22 | ; http://stackoverflow.com/questions/19415184/load-from-a-64-bit-address-into-other-register-than-rax 23 | 24 | %include "lib/common_nasm.inc" 25 | 26 | ENTRY 27 | ; TODO what is the case where only rax works? 28 | ; All of the below give the same error. 29 | ; Error: dword exceeds bounds. 30 | ;mov [0x8000_0000_0000_0000], rax 31 | ;mov [0x8000_0000_0000_0000], rbx 32 | ;mov rax, [0x8000_0000_0000_0000] 33 | ;mov rbx, [0x8000_0000_0000_0000] 34 | 35 | ; These assemble. TODO create a minimal example well defined example that does 36 | ; not access random memory addresses. 37 | ; mov rax, 0x8000_0000_0000_0000 38 | ; mov qword [rax], rbx 39 | ; mov rbx, 0x8000_0000_0000_0000 40 | ; mov qword [rbx], rcx 41 | 42 | EXIT 43 | -------------------------------------------------------------------------------- /x86-64/params.makefile: -------------------------------------------------------------------------------- 1 | BITS := 64 2 | PHONY_MAKES = gas linux linux/c-from-assembly 3 | # TODO get working. 4 | # linux/zero-extend-32bit-memory 5 | -------------------------------------------------------------------------------- /x86-64/provision: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | sudo apt-get install make 4 | sudo apt-get install nasm 5 | --------------------------------------------------------------------------------