├── .gitignore
├── .travis.yml
├── 8086-pinout.gif
├── Makefile
├── README.md
├── Vagrantfile
├── assemblers.md
├── bibliography.md
├── binutils.md
├── branch-prediction.md
├── cache.md
├── calling-conventions.md
├── cdecl.asm
├── cdecl.md
├── compiler-generated
    ├── .gitignore
    ├── Makefile
    ├── README.md
    ├── atomic.cpp
    ├── block.c
    ├── bound_simplification.c
    ├── bss.c
    ├── bss_static.c
    ├── builtin_inline.c
    ├── builtin_no_side_effect.c
    ├── callee_saved_registers.c
    ├── cast_int_long.c
    ├── char_array_init.c
    ├── char_array_init_global.c
    ├── char_array_init_long.c
    ├── char_pointer_init.c
    ├── char_pointer_init_global.c
    ├── class.cpp
    ├── common.c
    ├── common_large.c
    ├── const.c
    ├── const_cast.c
    ├── constexpr.cpp
    ├── cse.c
    ├── cse_function.c
    ├── cse_large_function.c
    ├── data.c
    ├── double_assign_constant.c
    ├── double_sum.c
    ├── enum.c
    ├── float2int.c
    ├── float2int_direct_vs_variable.c
    ├── float_assign_constant.c
    ├── float_sum.c
    ├── for_empty.c
    ├── function_call_another.c
    ├── function_int_int_int.c
    ├── function_many_arguments.c
    ├── function_many_arguments_2calls.c
    ├── function_many_arguments_64.c
    ├── gcc.md
    ├── gcc
    │   ├── Makefile
    │   ├── Makefile_params
    │   ├── builtin_expect.c
    │   ├── builtin_expect_off.c
    │   ├── interactive
    │   │   ├── Makefile
    │   │   ├── builtin_expect_off_perf.c
    │   │   └── builtin_expect_perf.c
    │   └── restrict.cpp
    ├── global_int.c
    ├── hello_world.c
    ├── if.c
    ├── int_assign_constant.c
    ├── ld.c
    ├── long_double_assign_constant.c
    ├── long_double_sum.c
    ├── memcmp.c
    ├── modulo.c
    ├── precalculate_loop.c
    ├── precalculate_loop_unknown_init.c
    ├── register.c
    ├── restrict.c
    ├── return_struct.c
    ├── sqrt.c
    ├── static_local.c
    ├── switch3.c
    ├── switch6.c
    ├── switch6_if.c
    ├── switch6_spread.c
    ├── switch6_very_spread.c
    ├── tail_call.c
    ├── template.cpp
    ├── use_rax.c
    ├── use_rax_return.c
    ├── virtual.cpp
    └── while_infinite.c
├── containers.md
├── cpu-benchmarks.md
├── cpu-bugs.md
├── cpu-hardware-design.md
├── cpu-optimizations.md
├── cpu-pins.md
├── elf.md
├── endianess.asm
├── extensions.md
├── flynns-taxonomy.md
├── fpu.md
├── gas
    ├── Makefile
    ├── README.md
    ├── altmacro.S
    ├── ascii.S
    ├── asciz.S
    ├── bibliography.md
    ├── char_literal.S
    ├── current_address.S
    ├── equ.S
    ├── extern.md
    ├── gasversion.S
    ├── global.S
    ├── irp.S
    ├── lib
    ├── linux
    │   ├── Makefile
    │   ├── README.md
    │   └── hello_world.S
    ├── local_label.S
    ├── local_symbol.S
    ├── macro.S
    ├── params.makefile
    ├── print.S
    ├── push.S
    ├── section.S
    ├── size.S
    └── type.S
├── getting-started.md
├── how-to-learn.md
├── implementations.md
├── instruction-level-parallelism.md
├── instruction-sets.md
├── intel-processor-history.md
├── intel-vs-atet.md
├── intel2gas.md
├── interactive
    ├── Makefile
    ├── README.md
    ├── int3.asm
    ├── int4.asm
    ├── lib
    └── test
├── ld-linux-so.md
├── ld.md
├── ldd.md
├── lib
    ├── Makefile
    ├── README.md
    ├── common.asm
    ├── common_gas.h
    ├── common_nasm.inc
    └── driver.c
├── linker-scripts
    ├── Makefile
    ├── README.md
    ├── TODO.md
    ├── common.h
    ├── data.S
    ├── data.ld
    ├── edata.S
    ├── edata.ld
    ├── flags.S
    ├── flags.ld
    ├── hello_world.S
    ├── hello_world.ld
    ├── keep.S
    ├── keep.ld
    ├── min.S
    ├── min.ld
    ├── symbol.S
    └── symbol.ld
├── linux
    ├── Makefile
    ├── README.md
    ├── c-from-assembly
    │   ├── README.md
    │   ├── main
    │   │   ├── Makefile
    │   │   ├── README.md
    │   │   └── main.asm
    │   ├── start-gcc
    │   │   ├── Makefile
    │   │   └── main.asm
    │   └── start
    │   │   ├── Makefile
    │   │   └── main.asm
    ├── custom-entry-gcc
    │   ├── Makefile
    │   ├── README.md
    │   └── main.c
    ├── custom-entry
    │   ├── Makefile
    │   └── hello_world.asm
    ├── hello_world.asm
    ├── hello_world_min.asm
    ├── interactive
    │   ├── Makefile
    │   ├── memory_layout.asm
    │   └── no_exit.asm
    ├── min.asm
    ├── stack_top.asm
    └── system_call.md
├── microcode.md
├── microcontrollers.md
├── nasm
    ├── Makefile
    ├── comment.asm
    ├── current_address.asm
    ├── define.asm
    ├── equ.asm
    ├── if.asm
    ├── include.asm
    ├── included.inc
    ├── introduction.md
    ├── lib
    ├── local_labels.asm
    ├── ptr.asm
    ├── ram.asm
    └── symbol_colon.asm
├── objcopy.md
├── objdump.md
├── other-architectures.md
├── params.makefile
├── pipeline.md
├── pros-and-cons-of-assembly.md
├── provision
├── pusha.asm
├── readelf.md
├── registers.asm
├── risc-vs-cisc.md
├── segment
├── segment_registers.asm
├── size.md
├── stdcall.md
├── string-instructions.md
├── superscalar.md
├── synchronization.md
├── system-vs-application-programming.md
├── vliw.md
└── x86-64
    ├── Makefile
    ├── README.md
    ├── Vagrantfile
    ├── calling-convention.md
    ├── cmp-sign-extend.asm
    ├── gas
        ├── Makefile
        ├── lib
        ├── movabs.S
        └── params.makefile
    ├── lib
        ├── Makefile
        ├── common.asm
        ├── common_gas.h
        ├── common_nasm.inc
        └── driver.c
    ├── linux
        ├── Makefile
        ├── README.md
        ├── c-from-assembly
        │   ├── Makefile
        │   ├── README.md
        │   ├── hello.asm
        │   └── printf.asm
        ├── common.inc
        ├── interactive
        │   ├── Makefile
        │   ├── common.inc
        │   ├── initial_state.asm
        │   ├── min_dummy.asm
        │   ├── min_empty.asm
        │   └── stack_top.asm
        └── zero-extend-32bit-memory
        │   ├── Makefile
        │   ├── README.md
        │   ├── link.ld
        │   └── main.asm
    ├── main.asm
    ├── movabs.asm
    ├── params.makefile
    └── provision


/.gitignore:
--------------------------------------------------------------------------------
 1 | _out/
 2 | _tmp/
 3 | *.tmp
 4 | tmp.*
 5 | 
 6 | # Compiled Object files
 7 | *.slo
 8 | *.lo
 9 | *.out
10 | *.o
11 | *.so
12 | *.dylib
13 | *.lai
14 | *.la
15 | *.a
16 | 
17 | .vagrant
18 | 
19 | # Image files
20 | *.bin
21 | *.img
22 | *.iso
23 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | before_install:
2 |   - sudo apt-get update -q
3 |   - sudo apt-get install build-essential gcc-multilib nasm
4 | script:
5 |   - uname -a
6 |   - make TRAVIS=y test
7 | 


--------------------------------------------------------------------------------
/8086-pinout.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cirosantilli/x86-assembly-cheat/0c64fc5961bbdcbf442cec9bb98d70e889b1984f/8086-pinout.gif


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .POSIX:
 2 | 
 3 | BITS = 32
 4 | CC = gcc
 5 | CFLAGS = -ggdb3 -m$(BITS) -O0 -pedantic-errors -std=c99 -Wall -Wextra -pedantic
 6 | # To match nasm.
 7 | DISAS_FLAVOR = intel
 8 | GAS_EXT = .S
 9 | LIB_DIR = lib/
10 | NASM = nasm -f elf$(BITS) -gdwarf -w+all
11 | NASM_EXT = .asm
12 | OBJ_EXT = .o
13 | OUT_EXT = .out
14 | PHONY_MAKES =
15 | RUN = main
16 | TEST = test
17 | export
18 | 
19 | ifeq ($(TRAVIS),)
20 |   CFLAGS += -fno-pie -no-pie
21 | endif
22 | 
23 | -include params.makefile
24 | 
25 | OUTS := $(foreach IN_EXT,$(GAS_EXT) $(NASM_EXT),$(patsubst %$(IN_EXT),%$(OUT_EXT),$(wildcard *$(IN_EXT))))
26 | 
27 | .PRECIOUS: %$(OBJ_EXT)
28 | .PHONY: all clean driver run test $(PHONY_MAKES)
29 | 
30 | all: driver $(OUTS) $(PHONY_MAKES)
31 | 	for phony in $(PHONY_MAKES); do \
32 | 	  $(MAKE) -C $${phony}; \
33 | 	done
34 | 
35 | %$(OUT_EXT): %$(OBJ_EXT)
36 | 	$(CC) $(CFLAGS) -o '$@' '$<' $(LIB_DIR)*$(OBJ_EXT)
37 | 
38 | %$(OBJ_EXT): %$(NASM_EXT)
39 | 	$(NASM) -o '$@' '$<'
40 | 
41 | %$(OBJ_EXT): %$(GAS_EXT)
42 | 	$(CC) $(CFLAGS) -c -o '$@' '$<'
43 | 
44 | clean:
45 | 	rm -f *$(OBJ_EXT) *$(OUT_EXT)
46 | 	$(MAKE) -C $(LIB_DIR) '$@'
47 | 	for phony in $(PHONY_MAKES); do \
48 | 	  $(MAKE) -C $${phony} clean; \
49 | 	done
50 | 
51 | driver:
52 | 	$(MAKE) -C $(LIB_DIR)
53 | 
54 | gdb-%: %$(OUT_EXT)
55 | 	gdb -q --nh -ex 'set disassembly-flavor $(DISAS_FLAVOR)' -ex 'layout split' -ex 'break asm_main' -ex 'run' '$<'
56 | 
57 | gdb2-%: %$(OUT_EXT)
58 | 	gdb -q -ex 'break asm_main' -ex 'run' '$<'
59 | 
60 | run-%: %$(OUT_EXT)
61 | 	./'$<'
62 | 
63 | test: all
64 | 	@\
65 | 	if [ -x $(TEST) ]; then \
66 | 	  ./$(TEST) '$(OUT_EXT)' ;\
67 | 	else\
68 | 	  fail=false ;\
69 | 	  for t in *"$(OUT_EXT)"; do\
70 | 	    if ! ./"$$t"; then \
71 | 	      fail=true ;\
72 | 	      break ;\
73 | 	    fi ;\
74 | 	  done ;\
75 | 	  if $$fail; then \
76 | 	    echo "TEST FAILED: $$t" ;\
77 | 	    exit 1 ;\
78 | 	  fi ;\
79 | 	fi; \
80 | 	for phony in $(PHONY_MAKES); do \
81 | 	  $(MAKE) -C $${phony} test; \
82 | 	done; \
83 | 	echo 'ALL TESTS PASSED'
84 | 


--------------------------------------------------------------------------------
/Vagrantfile:
--------------------------------------------------------------------------------
 1 | Vagrant.configure(2) do |config|
 2 |   config.vm.box = 'hashicorp/precise32'
 3 |   config.vm.provision :shell, privileged: false, path: 'provision'
 4 |   config.vm.provider "virtualbox" do |v|
 5 |     v.customize [
 6 |       'modifyvm', :id,
 7 |       '--cpus',   1,
 8 |       '--memory', 128,
 9 |     ]
10 |   end
11 | end
12 | 


--------------------------------------------------------------------------------
/assemblers.md:
--------------------------------------------------------------------------------
 1 | # Assemblers
 2 | 
 3 | Programs that transform text in computer code.
 4 | 
 5 | Most of this tutorial is written in NASM, and some parts in GAS.
 6 | 
 7 | ## MASM
 8 | 
 9 | Microsoft.
10 | 
11 | x86.
12 | 
13 | ## TASM
14 | 
15 | Borland.
16 | 
17 | Stands for "turbo assembler". LOL, why not "BASM" instead?
18 | 
19 | Windows only.
20 | 


--------------------------------------------------------------------------------
/bibliography.md:
--------------------------------------------------------------------------------
  1 | # Bibliography
  2 | 
  3 | ## Manuals
  4 | 
  5 | -   [IA-32 manual](http://www.intel.com/content/www/us/en/processors/architectures-software-developer-manuals.html)
  6 | 
  7 |     Intel man pages, *the* official source.
  8 | 
  9 |     Also includes Intel 64.
 10 | 
 11 |     Interesting sections:
 12 | 
 13 |     - Vol 1 5: instruction overview. In particular, groups instructions logically.
 14 |     - Vol 2 3: full instruction listing and API.
 15 | 
 16 | -   [AMD64 manuals](http://developer.amd.com/resources/documentation-articles/developer-guides-manuals/)
 17 | 
 18 |     Under the manuals section.
 19 | 
 20 |     Original specification of x86-64, which Intel then implemented as well.
 21 | 
 22 | -   [Intel 64 manual](http://www.intel.com/content/www/us/en/processors/architectures-software-developer-manuals.html)
 23 | 
 24 |     Intel man pages, *the* official source.
 25 | 
 26 |     Also includes IA-32.
 27 | 
 28 |     Get yourself the 3 volume IA documentation and the optimization manual and sleep with them.
 29 | 
 30 |     Interesting chapters:
 31 | 
 32 |     -   Volume 1 chapter 5: list of instructions grouped by type, good to find new instructions to play with
 33 | 
 34 |     -   Volume 2 chapter 3: full alphabetic list of all instructions
 35 | 
 36 |     The version is given on the front page, e.g.:
 37 | 
 38 |         Order Number: 325383-053US
 39 |         January 2015
 40 | 
 41 |     `325383` is the document (different for volumes 1-3), and 053 the version (TODO US is the language? Are there translations?)
 42 | 
 43 | -   [System V Application Binary Interface v0.99.7][]
 44 | 
 45 | -   <https://github.com/intelxed/xed/tree/master/datafiles> open source machine readable definition of instructions (non-PDF), only published in 2016
 46 | 
 47 | ## Tutorials
 48 | 
 49 | -   <http://www.tldp.org/HOWTO/Assembly-HOWTO/index.html>
 50 | 
 51 | -   <http://en.wikibooks.org/wiki/X86_Assembly/X86_Architecture>
 52 | 
 53 | -   Programming from the ground up.
 54 | 
 55 |     <http://download.savannah.gnu.org/releases/pgubook/>
 56 | 
 57 |     Linux GAS, for complete programming beginners.
 58 | 
 59 | -   PC assembly language.
 60 | 
 61 |     <http://www.drpaulcarter.com/pcasm/>
 62 | 
 63 |     NASM.
 64 | 
 65 | -   <https://github.com/dennis714/RE-for-beginners>
 66 | 
 67 |     Reverse engineering for beginners.
 68 | 
 69 |     Lots of info on what C code maps to in assembly, and how to look for what matters to reverse engineer stuff.
 70 | 
 71 | -   <http://www.agner.org/optimize/>
 72 | 
 73 |     Highly regarded optimization guidelines for x86 and C++.
 74 | 
 75 | -   <http://courses.cs.washington.edu/courses/cse471/12sp/lectures.html>
 76 | 
 77 |     Washington university course that covers branch prediction, cache and other CPU internals
 78 | 
 79 | With runnable source code:
 80 | 
 81 | -   https://github.com/0xAX/asm not extensive, but clean, and highly commented.
 82 | 
 83 | Without runnable source code:
 84 | 
 85 | -   http://www.egr.unlv.edu/~ed/assembly64.pdf by Ed Jorgensen.
 86 | 
 87 |     A professional Creative Commons book that covers a lot of the basics.
 88 | 
 89 | ## Unofficial API references
 90 | 
 91 | -   [Wikipedia Instruction List](http://en.wikipedia.org/wiki/X86_instruction_listings)
 92 | 
 93 |     Instruction list.
 94 | 
 95 | ## Libraries
 96 | 
 97 | -   <https://github.com/aquynh/capstone>
 98 | 
 99 |     Looks like the best free disassembly library out there.
100 | 
101 |     Binutils cannot be used as a library to disassemble? Sad.
102 | 
103 | ## Cool projects
104 | 
105 | Useless but cool and educational:
106 | 
107 | -   <https://github.com/flouthoc/calc.asm>
108 | 
109 | [Itanium C++ ABI]: http://mentorembedded.github.io/cxx-abi/abi.html
110 | [System V ABI AMD64]: http://www.x86-64.org/documentation_folder/abi-0.99.pdf
111 | 


--------------------------------------------------------------------------------
/binutils.md:
--------------------------------------------------------------------------------
 1 | # binutils
 2 | 
 3 | GNU set of utilities to compile and view and modify compiled code.
 4 | 
 5 | Official site: <http://sourceware.org/binutils/>.
 6 | 
 7 | As stated there, the two main utilities are `ld`, the GNU linker, and `as` the gnu assembler.
 8 | 
 9 | Binutils is a dependency of GCC: GCC deals with the high level to assembler source translation, and Binutils does the rest.
10 | 
11 | Also contains GDB.
12 | 
13 | ## Makefile
14 | 
15 |     sudo apt-get build-dep gdb
16 |     mkdir binutils-gdb
17 |     cd binutils-gdb
18 |     git clone git://sourceware.org/git/binutils-gdb.git src
19 |     cd src
20 |     git checkout binutils-2_25
21 |     cd ..
22 |     mkdir build
23 |     cd build
24 |     ../src/configure --with-sysroot=/ --prefix="$(pwd)/../install"
25 |     time make -j5
26 |     make install
27 | 
28 | Try it out on Linux:
29 | 
30 |     cd ../install/bin
31 | 
32 |     cat <<EOF >main.S
33 |     .data
34 |         s:
35 |             .ascii "Hello world!\n"
36 |             len = . - s
37 |     .text
38 |         .global _start
39 |         _start:
40 |             movl $4, %eax
41 |             movl $1, %ebx
42 |             movl $s, %ecx
43 |             movl $len, %edx
44 |             int $0x80
45 |             movl $1, %eax
46 |             movl $0, %ebx
47 |             int $0x80
48 |     EOF
49 |     ./as -o main.o main.S
50 |     ./ld -o main.out main.o
51 |     ./main.out
52 | 
53 | Took 3 minutes in a 2013 computer.
54 | 
55 | Why `--with-sysroot` is needed <https://sourceware.org/ml/binutils/2007-02/msg00274.html>. Why not make it the default?!
56 | 
57 | ## Source tree
58 | 
59 | -   `gas`: assembler
60 | 
61 | -   `ld`: linker
62 | 
63 | -   `gold`: new faster linker by Google
64 | 
65 | -   `gdb`: GDB
66 | 
67 | -   `bfd`: Binary File Descriptor?
68 | 
69 |     Contains a generic model that supports multiple output formats: ELF, Mach-O, PE, etc.
70 | 
71 |     Used by most utilities.
72 | 
73 | -   `opcodes`: low-level CPU instruction descriptions.
74 | 
75 |     Key to assembly and disassembly, used by many utilities.
76 | 
77 | ### GAS
78 | 
79 | Source tree:
80 | 
81 | - `read.c`: the main parser
82 | 
83 | Vocabulary:
84 | 
85 | -   `po`: Pseudo-op, same as assembly directive, e.g. `.macro`.
86 | 
87 | -   `poc`: Pseudo-op Code
88 | 
89 | -   `TC`: TODO? Tool Chain? E.g. `TC_START_LABEL` in `read.c`, `config/tc-XXX` for different architectures.
90 | 
91 | -   `md`: Machine Description. Each one is encoded in a files under `config/` that encode the binary representation of structures.
92 | 
93 |     Many identifiers also get this prefix.
94 | 
95 | GAS compiles for many targets (arch + container), which have many (often unstandardized) syntaxes. The parsing is well-factored however. For this reason, make sure to specify your target whenever considering a concept. E.g., even things like the line-comment character, and the case sensitivity directives may change across architectures, 
96 | 


--------------------------------------------------------------------------------
/branch-prediction.md:
--------------------------------------------------------------------------------
1 | # Branch prediction
2 | 
3 | Only makes sense in pipelined processors: at a branch, it tries to guess which side will be taken, and puts those instructions in the pipeline.
4 | 
5 | - <http://en.wikipedia.org/wiki/Speculative_execution>
6 | - <http://en.wikipedia.org/wiki/Branch_predictor>
7 | - <http://en.wikipedia.org/wiki/Memory_dependence_prediction>
8 | 


--------------------------------------------------------------------------------
/cache.md:
--------------------------------------------------------------------------------
 1 | # Cache
 2 | 
 3 | - <https://en.wikipedia.org/wiki/CPU_cache>
 4 | - <http://stackoverflow.com/questions/16699247/what-is-cache-friendly-code>
 5 | - <http://stackoverflow.com/questions/9936132/why-does-the-order-of-the-loops-affect-performance-when-iterating-over-a-2d-arra>
 6 | - <http://stackoverflow.com/questions/8469427/how-and-when-to-align-to-cache-line-size>
 7 | - <http://stackoverflow.com/questions/763262/how-does-one-write-code-that-best-utilizes-the-cpu-cache-to-improve-performance>
 8 | - <http://stackoverflow.com/questions/7905760/matrix-multiplication-small-difference-in-matrix-size-large-difference-in-timi>
 9 | - <http://stackoverflow.com/questions/8547778/why-is-one-loop-so-much-slower-than-two-loops>
10 | 
11 | ## Example
12 | 
13 | Shut up and do an ASCII art example, first direct mapped then set.
14 | 
15 | ## Direct mapped
16 | 
17 | One possible cache location per memory address.
18 | 
19 | Upside: simple circuit, fast to find which is it, and small area.
20 | 
21 | Downside: you might invalidate an entry that was recently accessed, even if all other entries are old.
22 | 
23 | ## Fully associative
24 | 
25 | One cache entry for every memory, so would be perfect because no conflicts.
26 | 
27 | But of course, requires a cache as large as main memory, thus useless.
28 | 
29 | ## Set associative
30 | 
31 | Middle ground between direct mapped.
32 | 
33 | - 2 way associative example <https://www.youtube.com/watch?v=mCF5XNn_xfA>
34 | 
35 | Now address specifies the set where it might be, and the tag can be anywhere in that set.
36 | 
37 | Unlike direct mapped, you now have the fun choice of which entry to evict when a set is full: <https://en.wikipedia.org/wiki/Cache_replacement_policies>
38 | 
39 | ## Bits
40 | 
41 | Forgetting SMP coherency.
42 | 
43 | ### Validity bit
44 | 
45 | If false, indicates that the given data is invalid, and must be re-fetched.
46 | 
47 | When it is set to invalid:
48 | 
49 | - at startup, everything is set invalid, otherwise we wouldn't be able to differentiate between valid data and the noise present at startup. This is the major use case.
50 | - another processor modifies main memory for a cache that we hold https://en.wikipedia.org/wiki/Bus_snooping
51 | 
52 | ### Dirty bit
53 | 
54 | Set whenever the CPU writes to cache, unset when cache is written to main memory.
55 | 
56 | ## Tag
57 | 
58 | Part of the original address (MSB), stored in the cache, to disambiguate if a cache line is a hit or not.
59 | 
60 | ## Virtual or physical memory
61 | 
62 | Four possibilities: virtual or physical addressed or tagged.
63 | 
64 | TODO: which one is best / most common and why?
65 | 
66 | ## Cache coherency
67 | 
68 | You have many CPUs modifying memory. How to keep caches up to date.
69 | 
70 | - <https://en.wikipedia.org/wiki/Cache_coherence>
71 | - <https://en.wikipedia.org/wiki/Snarfing>
72 | - <https://en.wikipedia.org/wiki/Bus_snooping>
73 | - <https://en.wikipedia.org/wiki/Dragon_protocol>
74 | - <https://en.wikipedia.org/wiki/Firefly_(cache_coherence_protocol)>
75 | - <https://en.wikipedia.org/wiki/Write-once_(cache_coherence)>
76 | - <https://en.wikipedia.org/wiki/MESIF_protocol>
77 | - <https://en.wikipedia.org/wiki/MERSI_protocol>
78 | - <https://en.wikipedia.org/wiki/MOESI_protocol>
79 | - <https://en.wikipedia.org/wiki/MOSI_protocol>
80 | - <https://en.wikipedia.org/wiki/MESI_protocol>
81 | - <https://en.wikipedia.org/wiki/MSI_protocol>
82 | - ARM AMBA 4 ACE
83 | 
84 | ## Register vs cache
85 | 
86 | TODO physically different? Apparently not, both made of flip-flops (SRAM), registers can be seen as an L0 cache.
87 | 
88 | - <http://stackoverflow.com/questions/3500491/are-cpu-registers-and-cpu-cache-different>
89 | - <http://stackoverflow.com/questions/14504734/cache-or-registers-which-is-faster>
90 | - <http://superuser.com/questions/208932/difference-between-cache-memory-and-register>
91 | - <https://www.quora.com/How-does-a-cache-memory-differ-from-registers>
92 | 


--------------------------------------------------------------------------------
/calling-conventions.md:
--------------------------------------------------------------------------------
 1 | # Calling convention
 2 | 
 3 | <https://en.wikipedia.org/wiki/X86_calling_conventions>
 4 | 
 5 | The calling convention specifies how exactly function call and return are implemented in assembly.
 6 | 
 7 | ## C calling conventions
 8 | 
 9 | ANSI C does not specify assembly level calling convention to be used by implementations nor means to control actual calling conventions.
10 | 
11 | Some compilers do however contain extensions that allow to fix a given calling convention.
12 | 
13 | GCC allows to specify calling convention explicitly as:
14 | 
15 |     void f (int i) __attribute__ ((cdecl));
16 | 
17 | where `cdecl` is the name of the calling convention.
18 | 
19 | In IA32, cdecl is the most popular in Linux, and stdcall the most popular in Windows, but there exist many others.
20 | 
21 | ## Call C function from assembly
22 | 
23 | To call a C function from assembly you need to:
24 | 
25 | -   use the correct C calling convention to which the C code compiled to.
26 | 
27 |     Remember that ANSI C does not specify this.
28 | 
29 | -   use the correct naming convention of the libraries you want to link to.
30 | 
31 |     For example, `exit` may be called `_exit` on certain compilers such as GCC elf format.
32 | 
33 |     This is not specified by ANSI C, so the only portable alternative is via macros.
34 | 


--------------------------------------------------------------------------------
/cdecl.asm:
--------------------------------------------------------------------------------
 1 | ; # cdecl
 2 | 
 3 | %include "lib/common_nasm.inc"
 4 | 
 5 | ENTRY
 6 | 
 7 |     ; Recursive factorial.
 8 | 
 9 |         push dword 5
10 |         call factorial_rec_cdecl
11 |         ; We must clean up the argument stack ourselves.
12 |         ; This allows for varargs like `printf`.
13 |         add esp, 4
14 |         ASSERT_EQ eax, 120
15 | 
16 |         push dword 1
17 |         call factorial_rec_cdecl
18 |         add esp, 4
19 |         ASSERT_EQ eax, 1
20 | 
21 |     ; Non-recursive factorial.
22 | 
23 |         push dword 5
24 |         call factorial_norec_cdecl
25 |         add esp, 4
26 |         ASSERT_EQ eax, 120
27 | 
28 |         push dword 1
29 |         call factorial_norec_cdecl
30 |         add esp, 4
31 |         ASSERT_EQ eax, 1
32 | 
33 | EXIT
34 | 
35 | ; Recursive factorial. cdecl calling convention.
36 | factorial_rec_cdecl:
37 |     enter 0, 0
38 |     ; -  `[ebp - 4]` the first local variable
39 |     ; -  `[ebp]` is the address to return to pushed by `call`
40 |     ; -  `[ebp + 4]` is the old `esp` pushed by `enter` before more local variables are pushed
41 |     ; -  `[ebp + 8]` first argument passed. For this to be true for any number of arguments,
42 |     ;    they must be pushed right-to-left.
43 |     ; -  `[ebp + 12]` second argument passed
44 |     cmp dword [ebp + 8], 1
45 |     je factorial_rec_cdecl_base
46 |         mov ebx, [ebp + 8]
47 |         dec ebx
48 |         push ebx
49 |         call factorial_rec_cdecl
50 |         add esp, 4
51 |         mul dword [ebp + 8]
52 |     jmp factorial_rec_cdecl_return
53 |     factorial_rec_cdecl_base:
54 |         mov eax, 1
55 |     factorial_rec_cdecl_return:
56 |     leave
57 |     ret
58 | 
59 | ; Non_recursive factorial. cdecl calling convention.
60 | factorial_norec_cdecl:
61 |     ; No need for `enter` since we are non-recursive.
62 |     ; This does not affect in any way the ccecl calling convertion:
63 |     ; enter and leave are invisible from the outside.
64 |     mov ecx, [esp + 4]
65 |     mov eax, 1
66 |     factorial_norec_cdecl_loop:
67 |         mul ecx
68 |     loop factorial_norec_cdecl_loop
69 |     ret
70 | 


--------------------------------------------------------------------------------
/cdecl.md:
--------------------------------------------------------------------------------
 1 | # cdecl
 2 | 
 3 | TODO move in with the code.
 4 | 
 5 | De facto standard C calling convention used by GCC.
 6 | 
 7 | Does not have a formal standard, so different compilers may have incompatible versions of cdecl.
 8 | 
 9 | -   return address is put on the stack (usually via `call`/`ret` instructions)
10 | 
11 | -   arguments are passed on stack
12 | 
13 |     -   always pass `dword` arguments
14 | 
15 |         If you want to pass a single byte, first convert to dword
16 | 
17 |     -   arguments are put on stack in inverse order from declaration
18 | 
19 |         This allows for functions with arbitrary numbers of args such as `printf`:
20 | 
21 |         Like this the format string will always be on the same position (at the end).
22 | 
23 |     -   arguments are not popped before return.
24 | 
25 |         If you want to use their values, use ESP pointer
26 | 
27 | -   return value is put in:
28 | 
29 |     -   EAX if integer or pointer.
30 | 
31 |         If 64-bit, put in edx:eax.
32 | 
33 |     -   ST0 x87 register if floating point.
34 | 
35 |     -   for structures, there is some divergence between different compilers.
36 | 
37 |         For small structures, some compilers may choose to return the struct on multiple registers such as EAX and EDX.
38 | 
39 |         For larger structures which do not fit into registers, a common technique is for the compiler to automatically add a hidden pointer parameter to the function and automatically make the caller allocate memory and pass a pointer to that memory.
40 | 
41 | -   EAX, ECX and EDX may change after the call and must be saved by the caller if he wants to keep those.
42 | 
43 |     All other registers are guaranteed not to change after the call. They may be temporarily changed inside
44 | 
45 | -   the caller clears the argument heap
46 | 
47 |     This generates more code since programs usually make several for each definition,
48 | 
49 |     This allows for functions with variable number or arguments such as `printf`, because then only the caller knows how many args he put on the stack.
50 | 
51 |     PASCAL for example follows a function clear argument stack convention.
52 | 


--------------------------------------------------------------------------------
/compiler-generated/.gitignore:
--------------------------------------------------------------------------------
  1 | *.objdump
  2 | *.readelf
  3 | *.s
  4 | 
  5 | # -fdump-tree-all -fdump-rtl-all -fdump-class-hierarchy
  6 | *.alias
  7 | *.alignments
  8 | *.asmcons
  9 | *.barriers
 10 | *.bbro
 11 | *.bswap
 12 | *.ccp1
 13 | *.ccp2
 14 | *.ccp3
 15 | *.cdce
 16 | *.cddce1
 17 | *.cddce2
 18 | *.ce1
 19 | *.ce2
 20 | *.ce3
 21 | *.cfg
 22 | *.ch
 23 | *.class
 24 | *.combine
 25 | *.compgotos
 26 | *.copyprop1
 27 | *.copyprop2
 28 | *.copyprop3
 29 | *.copyprop4
 30 | *.copyprop5
 31 | *.copyrename1
 32 | *.copyrename2
 33 | *.copyrename3
 34 | *.copyrename4
 35 | *.cplxlower0
 36 | *.cplxlower1
 37 | *.cprop1
 38 | *.cprop2
 39 | *.cprop3
 40 | *.cprop_hardreg
 41 | *.crited1
 42 | *.csa
 43 | *.cse_local
 44 | *.cse1
 45 | *.cse2
 46 | *.cselim
 47 | *.cunroll
 48 | *.cunrolli
 49 | *.dce1
 50 | *.dce2
 51 | *.dceloop1
 52 | *.dceloop3
 53 | *.dfinish
 54 | *.dfinit
 55 | *.dom1
 56 | *.dom2
 57 | *.dse1
 58 | *.dse2
 59 | *.dwarf2
 60 | *.ealias
 61 | *.early_optimizations
 62 | *.eh
 63 | *.eh_ranges
 64 | *.ehcleanup2
 65 | *.ehdisp
 66 | *.ehopt
 67 | *.einline
 68 | *.eipa_sra
 69 | *.esra
 70 | *.expand
 71 | *.fab1
 72 | *.final
 73 | *.fnsplit
 74 | *.forwprop1
 75 | *.forwprop2
 76 | *.forwprop3
 77 | *.forwprop4
 78 | *.fre1
 79 | *.fre2
 80 | *.fwprop1
 81 | *.fwprop2
 82 | *.gcse2
 83 | *.gimple
 84 | *.gkd
 85 | *.ifcombine
 86 | *.ifcvt
 87 | *.init-regs
 88 | *.inline_param1
 89 | *.inline_param2
 90 | *.into_cfglayout
 91 | *.ira
 92 | *.ivcanon
 93 | *.ivopts
 94 | *.jump
 95 | *.jump2
 96 | *.ldist
 97 | *.lim1
 98 | *.lim3
 99 | *.local-pure-const1
100 | *.local-pure-const2
101 | *.loop
102 | *.loop2
103 | *.loop2_done
104 | *.loop2_init
105 | *.loop2_invariant
106 | *.loop2_unswitch
107 | *.loopdone
108 | *.loopinit
109 | *.lower
110 | *.mach
111 | *.mergephi1
112 | *.mergephi2
113 | *.mode_sw
114 | *.nothrow
115 | *.nrv
116 | *.objsz1
117 | *.omplower
118 | *.optimized
119 | *.original
120 | *.outof_cfglayout
121 | *.pcom
122 | *.peephole2
123 | *.phicprop1
124 | *.phicprop2
125 | *.phiopt1
126 | *.phiopt2
127 | *.phiopt3
128 | *.phiprop
129 | *.postreload
130 | *.pre
131 | *.pro_and_epilogue
132 | *.profile_estimate
133 | *.reassoc1
134 | *.reassoc2
135 | *.ree
136 | *.reginfo
137 | *.regmove
138 | *.release_ssa
139 | *.reload
140 | *.resx
141 | *.retslot
142 | *.rtl_dce
143 | *.sccp
144 | *.sched2
145 | *.shorten
146 | *.sincos
147 | *.sink
148 | *.slp
149 | *.slsr
150 | *.split1
151 | *.split2
152 | *.split4
153 | *.sra
154 | *.ssa
155 | *.stack
156 | *.statistics
157 | *.strlen
158 | *.subreg1
159 | *.subreg2
160 | *.switchconv
161 | *.tailc
162 | *.tailr1
163 | *.tailr2
164 | *.tu
165 | *.ud_dce
166 | *.uncprop1
167 | *.uninit1
168 | *.unswitch
169 | *.vartrack
170 | *.veclower
171 | *.veclower21
172 | *.vect
173 | *.vregs
174 | *.vrp1
175 | *.vrp2
176 | *.widening_mul
177 | 


--------------------------------------------------------------------------------
/compiler-generated/README.md:
--------------------------------------------------------------------------------
 1 | # Compiler generated
 2 | 
 3 | Let's see what our compilers are compiling to.
 4 | 
 5 | 1.  General compiler remarks
 6 |     1. [GCC](gcc.md)
 7 | 1.  C
 8 |     1.  [hello_world.c](hello_world.c)
 9 |     1.  Functions
10 |         1.  [function_int_int_int.c](function_int_int_int.c)
11 |     1.  [return_struct.c](return_struct.c)
12 |     1.  switch
13 |         1.  [switch3.c](switch3.c)
14 |         1.  [switch6.c](switch6.c)
15 |         1.  [switch6_spread.c](switch6_spread.c)
16 |         1.  [switch6_very_spread.c](switch16_very_spread.c)
17 |     1.  [Cast int to long](cast_int_long.c)
18 |     1.  Floating point
19 |         [float_sum.c](float_sum.c)
20 |         [float2int.c](float2int.c)
21 |         [sqrt.c](sqrt.c)
22 |     1.  Optimization
23 |         1.  [for_empty.c](for_empty.c)
24 |         1.  [while_infinite.c](while_infinite.c)
25 |         1.  [use_rax_return.c](use_rax_return.c)
26 |         1.  [restrict.c](restrict.c)
27 |         1.  [builtin_no_side_effect.c](builtin_no_side_effect.c)
28 |         1.  [builtin_inline.c](builtin_inline.c)
29 |         1.  [CSE](cse.c)
30 |         1.  [CSE function](cse_function.c)
31 |         1.  [CSE large function](cse_large_function.c)
32 |         1.  Branch prediction
33 |             1.   [__builtin_expect](gcc/builtin_expect.c)
34 |             1.   [__builtin_expect off](gcc/builtin_expect_off.c)
35 |         1.  [Precalculate loop](precalculate_loop.c)
36 |         1.  [Precalculate loop without init](precalculate_loop_without_init.c)
37 |         1.  [Bound simplficiation](bound_simplification.c)
38 | 1.  C++
39 |     1. [class](class.cpp)
40 |     1. [template](template.cpp)
41 |     1. [constexpr.cpp](constexpr.cpp)
42 |     1. [atomic.cpp](atomic.cpp)
43 |     1. [this_method.cpp](this_method.cpp)
44 |     1. [virtual.cpp](virtual.cpp)
45 | 1.  libc
46 |     1. [memcmp.c](memcmp.c)
47 | 1.  ELF sections
48 |     1. [bss.c](bss.c)
49 |     1. [bss_static.c ](bss_static.c)
50 |     1. [common.c](common.c)
51 |     1. [data.c](data.c)
52 | 
53 | TODO
54 | 
55 | 1.  Optimizations
56 |     1.  Strength reduction, like multiplication to lea <https://en.wikipedia.org/wiki/Strength_reduction>
57 | 


--------------------------------------------------------------------------------
/compiler-generated/atomic.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | GCC 4.8 compiles to xadd through the __atomic_fetch_add built-in.
 3 | */
 4 | 
 5 | #include <atomic>
 6 | #include <cassert>
 7 | #include <thread>
 8 | 
 9 | #if __cplusplus >= 201103L
10 | const int NUM_THREADS = 1000;
11 | const int NUM_ITERS = 1000;
12 | 
13 | std::atomic_int global(0);
14 | 
15 | void threadMain() {
16 |     for (int i = 0; i < NUM_ITERS; ++i)
17 |         global++;
18 | }
19 | #endif
20 | 
21 | int main() {
22 | #if __cplusplus >= 201103L
23 |     std::thread threads[NUM_THREADS];
24 |     int i;
25 |     for (i = 0; i < NUM_THREADS; ++i)
26 |         threads[i] = std::thread(threadMain);
27 |     for (i = 0; i < NUM_THREADS; ++i)
28 |         threads[i].join();
29 |     assert(global.load() == NUM_THREADS * NUM_ITERS);
30 | #endif
31 | }
32 | 


--------------------------------------------------------------------------------
/compiler-generated/block.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | int main() {
 4 |     int i = 0;
 5 |     {
 6 |         int i = 1;
 7 |         printf("%d\n", i);
 8 |     }
 9 |     return 0;
10 | }
11 | 


--------------------------------------------------------------------------------
/compiler-generated/bound_simplification.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | # Module upper bound
 3 | */
 4 | 
 5 | #include <time.h>
 6 | #include <math.h>
 7 | #include <stdlib.h>
 8 | 
 9 | int main() {
10 |     int i = time(NULL);
11 |     srand(i);
12 |     double d = i;
13 |     unsigned int u = i;
14 |     unsigned int u01 = i % 2U;
15 | 
16 |     /* -O3 */
17 |     if (u01 == 2U) exit(0x1);
18 |     if (u01 > 1U) exit(0x2);
19 |     if (u01 + 1 > 2U) exit(0x3);
20 |     if (i > 0)
21 |         if (i < 0)
22 |             exit(0x4);
23 |     if (d > 0.0)
24 |         if (d < 0.0)
25 |             exit(0x5);
26 | 
27 |     /* No. */
28 |     if (rand() < 0)
29 |         exit(0x6);
30 |     if (sin(i) > 1.1) exit(0x80);
31 |     if (sqrt(i) < 0.0) exit(0x81);
32 | 
33 |     return 0;
34 | }
35 | 


--------------------------------------------------------------------------------
/compiler-generated/bss.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | /* Contrast with the Common and Data section outputs. */
 4 | int my_var = 0;
 5 | 
 6 | int main() {
 7 |     printf("%d\n", my_var + 1);
 8 |     return 0;
 9 | }
10 | 


--------------------------------------------------------------------------------
/compiler-generated/bss_static.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | /*
 4 | Since this is not visible from the outside because static,
 5 | GCC can just ignore the label and inline it everywhere.
 6 | */
 7 | static int my_var = 0;
 8 | 
 9 | int main() {
10 |     printf("%d\n", my_var + 1);
11 |     return 0;
12 | }
13 | 


--------------------------------------------------------------------------------
/compiler-generated/builtin_inline.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | Will `abs(-1)` be inlined, or precalculated?
 3 | The compiler coud know that it is deterministic and has no side effects.
 4 | 
 5 | GCC 4.8: yes, even at -O0.
 6 | */
 7 | 
 8 | #include <stdio.h>
 9 | #include <stdlib.h>
10 | 
11 | int main() {
12 |     printf("%d\n", abs(-1));
13 |     printf("%d\n", abs(-1));
14 |     return 0;
15 | }
16 | 


--------------------------------------------------------------------------------
/compiler-generated/builtin_no_side_effect.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | Will `abs(-1)` be optimized away?
 3 | The compiler must know that it has no internal side effects.
 4 | 
 5 | GCC 4.8: yes, even at -O0.
 6 | */
 7 | 
 8 | #include <stdio.h>
 9 | #include <stdlib.h>
10 | 
11 | int main() {
12 |     abs(-1);
13 |     return 0;
14 | }
15 | 


--------------------------------------------------------------------------------
/compiler-generated/callee_saved_registers.c:
--------------------------------------------------------------------------------
 1 | /* https://stackoverflow.com/questions/18024672/what-registers-are-preserved-through-a-linux-x86-64-function-call/55207335#55207335 */
 2 | 
 3 | #include <inttypes.h>
 4 | 
 5 | uint64_t inc(uint64_t i) {
 6 |     __asm__ __volatile__(
 7 |         ""
 8 |         : "+m" (i)
 9 |         :
10 |         : "rax",
11 |           "rbx",
12 |           "rcx",
13 |           "rdx",
14 |           "rsi",
15 |           "rdi",
16 |           /* GCC 8.2: compiles in -O3, fails in -O0 with:
17 |            * error: bp cannot be used in asm here
18 |            */
19 |           /*"rbp",*/
20 |           "rsp",
21 |           "r8",
22 |           "r9",
23 |           "r10",
24 |           "r11",
25 |           "r12",
26 |           "r13",
27 |           "r14",
28 |           "r15",
29 |           "ymm0",
30 |           "ymm1",
31 |           "ymm2",
32 |           "ymm3",
33 |           "ymm4",
34 |           "ymm5",
35 |           "ymm6",
36 |           "ymm7",
37 |           "ymm8",
38 |           "ymm9",
39 |           "ymm10",
40 |           "ymm11",
41 |           "ymm12",
42 |           "ymm13",
43 |           "ymm14",
44 |           "ymm15"
45 |     );
46 |     return i + 1;
47 | }
48 | 
49 | int main(int argc, char **argv) {
50 |     (void)argv;
51 |     return inc(argc);
52 | }
53 | 


--------------------------------------------------------------------------------
/compiler-generated/cast_int_long.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | # Cast int to long.
 3 | 
 4 | GCC 4.8 -O0: CDQE: http://stackoverflow.com/questions/6555094/what-does-cltq-do-in-assembly/31114310#31114310
 5 | GCC 4.8 -O3: MOVSX, which extends and moves in one go: http://stackoverflow.com/questions/7861095/what-does-movsbl-instruction-do
 6 | */
 7 | 
 8 | #include <stdio.h>
 9 | #include <time.h>
10 | 
11 | int main() {
12 |     int i;
13 |     long l;
14 |     i = time(NULL);
15 |     l = i;
16 |     printf("%ld", l);
17 |     return 0;
18 | }
19 | 


--------------------------------------------------------------------------------
/compiler-generated/char_array_init.c:
--------------------------------------------------------------------------------
1 | #include <stdio.h>
2 | 
3 | int main() {
4 |     /* Moved stack. */
5 |     char s[] = "abc";
6 |     printf("%s\n", s);
7 |     return 0;
8 | }
9 | 


--------------------------------------------------------------------------------
/compiler-generated/char_array_init_global.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | /* .data */
 4 | char s[] = "abc";
 5 | 
 6 | int main() {
 7 |     printf("%s\n", s);
 8 |     return 0;
 9 | }
10 | 


--------------------------------------------------------------------------------
/compiler-generated/char_array_init_long.c:
--------------------------------------------------------------------------------
1 | #include <stdio.h>
2 | 
3 | int main() {
4 |     /* Multiple movs to stack. */
5 |     char s[] = "abcdefghiabcdefghiabcdefghiabcdefghiabcdefghiabcdefghiabcdefghiabcdefghiabcdefghiabcdefghi";
6 |     printf("%s\n", s);
7 |     return 0;
8 | }
9 | 


--------------------------------------------------------------------------------
/compiler-generated/char_pointer_init.c:
--------------------------------------------------------------------------------
1 | #include <stdio.h>
2 | 
3 | int main() {
4 |     char *s = "abc";
5 |     printf("%s\n", s);
6 |     return 0;
7 | }
8 | 


--------------------------------------------------------------------------------
/compiler-generated/char_pointer_init_global.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | /* .data */
 4 | char *s = "abc";
 5 | 
 6 | int main() {
 7 |     printf("%s\n", s);
 8 |     return 0;
 9 | }
10 | 


--------------------------------------------------------------------------------
/compiler-generated/class.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Basic class concepts:
 3 | 
 4 | - constructor
 5 | - field
 6 | - method
 7 | */
 8 | 
 9 | #include <ctime>
10 | #include <iostream>
11 | 
12 | class C {
13 |     public:
14 |         int i;
15 | 
16 |         C(int i) : i(i) {}
17 | 
18 |         /*
19 |         this is passed as an extra argument.
20 | 
21 |         Like for struct, `this` points directly to the class data.
22 |         */
23 |         int f(int j) {
24 |             int r = std::time(NULL);
25 |             r += i;
26 |             r += j;
27 |             return r;
28 |         }
29 | };
30 | 
31 | int main() {
32 |     /*
33 |     Passes a hidden argument to this which the constructor initializes.
34 | 
35 |     Just like returning struct from a function in C.
36 |     */
37 |     C c(1);
38 |     int i = c.f(2);
39 |     std::cout << i << std::endl;
40 | }
41 | 


--------------------------------------------------------------------------------
/compiler-generated/common.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | /*
 4 | Goes to `.common` on the `.o`, and `.bss` on the `.out`
 5 | if not defined in any other file linked to (the case here).
 6 | 
 7 | GCC 4.8 does not use weak symbols by default. TODO why?
 8 | 
 9 | `.common` seems to be treated magically by the linker.
10 | */
11 | int my_var;
12 | 
13 | int main() {
14 |     my_var = 0;
15 |     printf("%d\n", my_var + 1);
16 |     return 0;
17 | }
18 | 


--------------------------------------------------------------------------------
/compiler-generated/common_large.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | /*
 4 | Does not occupy space on the binaries, only when the program loads:
 5 | http://stackoverflow.com/questions/610682/do-bss-section-zero-initialized-variables-occupy-space-in-elf-file
 6 | */
 7 | int my_var[1000000];
 8 | 
 9 | int main() {
10 |     my_var[0] = 1;
11 |     printf("%d\n", my_var[0]);
12 |     return 0;
13 | }
14 | 


--------------------------------------------------------------------------------
/compiler-generated/const.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | /* Goes to .rodata. */
 4 | const int my_var = 1;
 5 | 
 6 | int main() {
 7 |     /* Gets inlined. */
 8 |     printf("%d\n", my_var);
 9 |     return 0;
10 | }
11 | 


--------------------------------------------------------------------------------
/compiler-generated/const_cast.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | const int my_var = 1;
 4 | 
 5 | int main() {
 6 |     int* ip = (int*)&my_var;
 7 |     /*
 8 |     Segfaults, since my_var is in rodata.
 9 | 
10 |     Fine since this is C undefined behaviour.
11 |     */
12 |     *ip = 2;
13 |     return 0;
14 | }
15 | 


--------------------------------------------------------------------------------
/compiler-generated/constexpr.cpp:
--------------------------------------------------------------------------------
1 | #include <iostream>
2 | 
3 | /* .rodata, not SHN_ABS. */
4 | constexpr int my_var = 1;
5 | 
6 | int main() {
7 |     std::cout << my_var << std::endl;
8 | }
9 | 


--------------------------------------------------------------------------------
/compiler-generated/cse.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | # CSE
 3 | 
 4 | # Common subexpression elimination
 5 | 
 6 |     https://en.wikipedia.org/wiki/Common_subexpression_elimination
 7 | 
 8 |     Will `a * b` be calculated only once?
 9 | 
10 |     GCC 4.8: yes, at `-O3`, no at `-O0`.
11 | */
12 | 
13 | #include <stdio.h>
14 | #include <stdlib.h>
15 | #include <time.h>
16 | 
17 | int main() {
18 |     int a, b;
19 |     a = time(NULL);
20 |     b = time(NULL);
21 |     printf("%d\n", a * b + 1);
22 |     printf("%d\n", a * b * 2);
23 |     return 0;
24 | }
25 | 


--------------------------------------------------------------------------------
/compiler-generated/cse_function.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | # CSE function
 3 | 
 4 | # Common subexpression elimination function
 5 | 
 6 |     http://stackoverflow.com/questions/1982131/is-loop-hoisting-still-a-valid-manual-optimization-for-c-code
 7 | 
 8 |     Will `f(a)` be calculated only once?
 9 | 
10 |     The compiler must prove that the function has no side effects.
11 | 
12 |     In GCC, the attribute `const` tells that to the compiler, but it alone does not guarantee the hoisting:
13 |     you might also have to play with the hoisting parameters if the function is complex.
14 | 
15 |     GCC 4.8: yes, at `-O3`, no at `-O0`.
16 | */
17 | 
18 | #include <stdio.h>
19 | #include <stdlib.h>
20 | #include <time.h>
21 | 
22 | int f(int x) {
23 |     return x * x;
24 | }
25 | 
26 | int main() {
27 |     int a;
28 |     a = time(NULL);
29 |     printf("%d\n", f(a) + 1);
30 |     printf("%d\n", f(a) * 2);
31 |     return 0;
32 | }
33 | 


--------------------------------------------------------------------------------
/compiler-generated/cse_large_function.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | # CSE large function
 3 | Why can't GCC 4.8 -O3 with vamped up CSE `--param`s not factorize
 4 | the expensive `contPrimesLessThan` call?
 5 | 
 6 | Origin: http://stackoverflow.com/a/1983492/895245
 7 | */
 8 | 
 9 | #include <stdio.h>
10 | 
11 | /*int isPrime(int) __attribute__((const));*/
12 | int isPrime(int p)  {
13 |     for (int i = 2; i*i <= p; ++i)  {
14 |         if ((p % i) == 0) return 0;
15 |     }
16 |     return 1;
17 | }
18 | 
19 | /*int countPrimesLessThan(int) __attribute__((const));*/
20 | int countPrimesLessThan(int max)  {
21 |     int count = 0;
22 |     for (int i = 2; i < max; ++i) {
23 |         if (isPrime(i)) ++count;
24 |     }
25 |     return count;
26 | }
27 | 
28 | int main() {
29 |     for (int i = 0; i < 5; ++i) {
30 |         printf("%d\n", countPrimesLessThan(1000*1000));
31 |     }
32 | }
33 | 


--------------------------------------------------------------------------------
/compiler-generated/data.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | /* If this were 0, it would fall on the BSS. */
 4 | int my_var = 1;
 5 | 
 6 | int main() {
 7 |     printf("%d\n", my_var + 1);
 8 |     return 0;
 9 | }
10 | 


--------------------------------------------------------------------------------
/compiler-generated/double_assign_constant.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | int main() {
 4 |     double f;
 5 |     f = 0.0;
 6 |     f = 1.0;
 7 |     printf("%f\n", f);
 8 |     return 0;
 9 | }
10 | 


--------------------------------------------------------------------------------
/compiler-generated/double_sum.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <time.h>
 3 | 
 4 | int main() {
 5 |     double f;
 6 |     f = (double)(time(NULL) % 3);
 7 |     /*
 8 |     GCC 4.8:
 9 | 
10 |     - fpmath=sse: addsd (default)
11 |     - fpmath=387: faddp
12 |     */
13 |     f += 0.5;
14 |     printf("%f\n", f);
15 |     return 0;
16 | }
17 | 


--------------------------------------------------------------------------------
/compiler-generated/enum.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | /*
 4 | enums in C need not be represented in object files, and are omited from them.
 5 | 
 6 | To use an enum across files, you must put it in a header file and include.
 7 | */
 8 | enum my_enum {
 9 |     my_enum_a = 0,
10 |     my_enum_b = 1
11 | };
12 | 
13 | int main() {
14 |     printf("%d\n", my_enum_a);
15 |     return 0;
16 | }
17 | 


--------------------------------------------------------------------------------
/compiler-generated/float2int.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <time.h>
 3 | 
 4 | int main() {
 5 |     int i;
 6 |     float f;
 7 |     f = (float)(time(NULL) % 3) + 0.5;
 8 |     /*
 9 |     - sse: cvttss2si
10 |     */
11 |     i = (int)f;
12 |     printf("%d\n", i);
13 |     return 0;
14 | }
15 | 


--------------------------------------------------------------------------------
/compiler-generated/float2int_direct_vs_variable.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | http://stackoverflow.com/questions/17220787/c-casting-from-double-to-int
 3 | 
 4 | Don't reproduce him.
 5 | */
 6 | 
 7 | #include <stdio.h>
 8 | #include <stdarg.h>
 9 | 
10 | int main() {
11 |     int a;
12 |     int d;
13 |     double b = 0.41;
14 | 
15 |     /* Cast from variable. */
16 |     double c = b * 100.0;
17 |     a = (int)(c);
18 | 
19 |     /* Cast expression directly. */
20 |     d = (int)(b * 100.0);
21 | 
22 |     printf("c = %f \n", c);
23 |     printf("a = %d \n", a);
24 |     printf("d = %d \n", d);
25 | 
26 |     return 0;
27 | }
28 | 


--------------------------------------------------------------------------------
/compiler-generated/float_assign_constant.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | int main() {
 4 |     float f;
 5 |     /*
 6 |     GCC 4.8: Unlike ints, float literals are stored in memory.
 7 |     TODO why? double just uses immediates. To save space because
 8 |     not possible to have 4 byte immediates?
 9 |     */
10 |     f = 0.0f;
11 |     f = 1.0f;
12 |     printf("%f\n", f);
13 |     return 0;
14 | }
15 | 


--------------------------------------------------------------------------------
/compiler-generated/float_sum.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <time.h>
 3 | 
 4 | int main() {
 5 |     float f;
 6 |     f = (float)(time(NULL) % 3);
 7 |     /*
 8 |     GCC 4.8:
 9 | 
10 |     - fpmath=sse: addss (default)
11 |     - fpmath=fpu: faddp
12 |     */
13 |     f += 0.5f;
14 |     printf("%f\n", f);
15 |     return 0;
16 | }
17 | 


--------------------------------------------------------------------------------
/compiler-generated/for_empty.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | # for empty
 3 | 
 4 | GCC 4.8 -O0: stays
 5 | GCC 4.8 -O3: optimized away
 6 | */
 7 | 
 8 | int main() {
 9 |     int i;
10 |     for (i = 0; i < 16; i++)
11 |         ;
12 |     return 0;
13 | }
14 | 


--------------------------------------------------------------------------------
/compiler-generated/function_call_another.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | A function that calls another.
 3 | */
 4 | 
 5 | #include <stdio.h>
 6 | #include <time.h>
 7 | 
 8 | int func1(int i, int j, int k) {
 9 |     int l;
10 |     l = i + j + k;
11 |     return l;
12 | }
13 | 
14 | /*
15 | This function calls another function.
16 | 
17 | In rsp must be moved to:
18 | 
19 | - store all arguments and local variables.
20 | - align to 0x10 in x86-64
21 | */
22 | int func0(int i, int j, int k) {
23 |     int l;
24 |     l = func1(i, j, k);
25 |     return l;
26 | }
27 | 
28 | int main() {
29 |     printf("%d\n", func0(1, 2, 3));
30 |     return 0;
31 | }
32 | 


--------------------------------------------------------------------------------
/compiler-generated/function_int_int_int.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | Simple function that takes two ints and returns a third.
 3 | */
 4 | 
 5 | #include <stdio.h>
 6 | 
 7 | /*
 8 | GCC 4.8 -O3 reduces this to a single lea.
 9 | 
10 | At `-O0`, when there is no function call inside this function, `rsp` is not changed.
11 | Such function is called a leaf function.
12 | 
13 | TODO why does the first variable go to `rbp - 0x14` instead of `rbp - 0x4`?
14 | */
15 | int func0(int i, int j) {
16 |     int k;
17 |     k = i + j;
18 |     return k;
19 | }
20 | 
21 | int main() {
22 |     /*
23 |     GCC 4.8 -O3 resolves the function call into a constant.
24 |     */
25 |     printf("%d\n", func0(1, 2));
26 |     return 0;
27 | }
28 | 


--------------------------------------------------------------------------------
/compiler-generated/function_many_arguments.c:
--------------------------------------------------------------------------------
 1 | /* Let's force some stack arguments. */
 2 | 
 3 | /* for i in `seq 0 3`; do echo "int i_$((i*4 + 0)), int i_$((i*4 + 1)), int i_$((i*4 + 2)), int i_$((i*4 + 3)),"; done */
 4 | int func(
 5 |     int i_0,  int i_1,  int i_2,  int i_3,
 6 |     int i_4,  int i_5,  int i_6,  int i_7,
 7 |     int i_8,  int i_9,  int i_10, int i_11,
 8 |     int i_12, int i_13, int i_14, int i_15
 9 | ) {
10 |     return
11 |         /* for i in `seq 0 3`; do echo "i_$((i*4 + 0)) + i_$((i*4 + 1)) + i_$((i*4 + 2)) + i_$((i*4 + 3)) +"; done */
12 |         i_0  + i_1  + i_2  + i_3  +
13 |         i_4  + i_5  + i_6  + i_7  +
14 |         i_8  + i_9  + i_10 + i_11 +
15 |         i_12 + i_13 + i_14 + i_15
16 |     ;
17 | }
18 | 
19 | int main(int argc, char **argv) {
20 |     (void)argv;
21 |     return func(
22 |         argc, argc, argc, argc,
23 |         argc, argc, argc, argc,
24 |         argc, argc, argc, argc,
25 |         argc, argc, argc, argc
26 |     );
27 | }
28 | 


--------------------------------------------------------------------------------
/compiler-generated/function_many_arguments_2calls.c:
--------------------------------------------------------------------------------
 1 | /* Trying to force out tail call optimization. */
 2 | 
 3 | int func2(
 4 |     int i_0,  int i_1,  int i_2,  int i_3,
 5 |     int i_4,  int i_5,  int i_6,  int i_7,
 6 |     int i_8,  int i_9,  int i_10, int i_11,
 7 |     int i_12, int i_13, int i_14, int i_15
 8 | ) {
 9 |     return
10 |         i_0  + i_1  + i_2  + i_3  +
11 |         i_4  + i_5  + i_6  + i_7  +
12 |         i_8  + i_9  + i_10 + i_11 +
13 |         i_12 + i_13 + i_14 + i_15
14 |     ;
15 | }
16 | 
17 | int func(
18 |     int i_0,  int i_1,  int i_2,  int i_3,
19 |     int i_4,  int i_5,  int i_6,  int i_7,
20 |     int i_8,  int i_9,  int i_10, int i_11,
21 |     int i_12, int i_13, int i_14, int i_15
22 | ) {
23 |     return func2(
24 |         i_12 , i_13 , i_14 , i_15 ,
25 |         i_8  , i_9  , i_10 , i_11 ,
26 |         i_4  , i_5  , i_6  , i_7  ,
27 |         i_0  , i_1  , i_2  , i_3
28 |     );
29 | }
30 | 
31 | int main(int argc, char **argv) {
32 |     (void)argv;
33 |     return func(
34 |         argc, argc, argc, argc,
35 |         argc, argc, argc, argc,
36 |         argc, argc, argc, argc,
37 |         argc, argc, argc, argc
38 |     );
39 | }
40 | 


--------------------------------------------------------------------------------
/compiler-generated/function_many_arguments_64.c:
--------------------------------------------------------------------------------
 1 | /* Now with 64 arguments, trying to break the red zone and force the stack to move.
 2 |  * But TODO why: fails in GCC 8.2, stack is still fixed.
 3 |  */
 4 | 
 5 | /* for i in `seq 0 15`; do echo "int i_$((i*4 + 0)), int i_$((i*4 + 1)), int i_$((i*4 + 2)), int i_$((i*4 + 3)),"; done */
 6 | int func(
 7 |     int i_0,  int i_1,  int i_2,  int i_3,
 8 |     int i_4,  int i_5,  int i_6,  int i_7,
 9 |     int i_8,  int i_9,  int i_10, int i_11,
10 |     int i_12, int i_13, int i_14, int i_15,
11 | 
12 |     int i_16, int i_17, int i_18, int i_19,
13 |     int i_20, int i_21, int i_22, int i_23,
14 |     int i_24, int i_25, int i_26, int i_27,
15 |     int i_28, int i_29, int i_30, int i_31,
16 | 
17 |     int i_32, int i_33, int i_34, int i_35,
18 |     int i_36, int i_37, int i_38, int i_39,
19 |     int i_40, int i_41, int i_42, int i_43,
20 |     int i_44, int i_45, int i_46, int i_47,
21 | 
22 |     int i_48, int i_49, int i_50, int i_51,
23 |     int i_52, int i_53, int i_54, int i_55,
24 |     int i_56, int i_57, int i_58, int i_59,
25 |     int i_60, int i_61, int i_62, int i_63
26 | ) {
27 |     return
28 |         /* for i in `seq 0 15`; do echo "i_$((i*4 + 0)) + i_$((i*4 + 1)) + i_$((i*4 + 2)) + i_$((i*4 + 3)) +"; done */
29 |         i_0  + i_1  + i_2  + i_3  +
30 |         i_4  + i_5  + i_6  + i_7  +
31 |         i_8  + i_9  + i_10 + i_11 +
32 |         i_12 + i_13 + i_14 + i_15 +
33 | 
34 |         i_16 + i_17 + i_18 + i_19 +
35 |         i_20 + i_21 + i_22 + i_23 +
36 |         i_24 + i_25 + i_26 + i_27 +
37 |         i_28 + i_29 + i_30 + i_31 +
38 | 
39 |         i_32 + i_33 + i_34 + i_35 +
40 |         i_36 + i_37 + i_38 + i_39 +
41 |         i_40 + i_41 + i_42 + i_43 +
42 |         i_44 + i_45 + i_46 + i_47 +
43 | 
44 |         i_48 + i_49 + i_50 + i_51 +
45 |         i_52 + i_53 + i_54 + i_55 +
46 |         i_56 + i_57 + i_58 + i_59 +
47 |         i_60 + i_61 + i_62 + i_63
48 |     ;
49 | }
50 | 
51 | int main(int argc, char **argv) {
52 |     (void)argv;
53 |     return func(
54 |         argc, argc, argc, argc,
55 |         argc, argc, argc, argc,
56 |         argc, argc, argc, argc,
57 |         argc, argc, argc, argc,
58 | 
59 |         argc, argc, argc, argc,
60 |         argc, argc, argc, argc,
61 |         argc, argc, argc, argc,
62 |         argc, argc, argc, argc,
63 | 
64 |         argc, argc, argc, argc,
65 |         argc, argc, argc, argc,
66 |         argc, argc, argc, argc,
67 |         argc, argc, argc, argc,
68 | 
69 |         argc, argc, argc, argc,
70 |         argc, argc, argc, argc,
71 |         argc, argc, argc, argc,
72 |         argc, argc, argc, argc
73 |     );
74 | }
75 | 


--------------------------------------------------------------------------------
/compiler-generated/gcc.md:
--------------------------------------------------------------------------------
 1 | # GCC
 2 | 
 3 | Tested with: GCC 4.8.
 4 | 
 5 | ## Optimizations
 6 | 
 7 | Without any flags, GCC will do several minor optimizations that make the code hard to read for beginners. Don't be afraid :-)
 8 | 
 9 | Common ones:
10 | 
11 | - `lea`: `add` and `mov`
12 | - `xor eax, eax`: `mov $0`
13 | - function inlining
14 | - RIP addressing as it is shorter
15 | 
16 | ## Label names
17 | 
18 | <http://stackoverflow.com/a/30212164/895245>
19 | 
20 | ## Optimizations
21 | 
22 | -   `RBP` optimized out `-fomit-frame-pointer`
23 | 
24 | -   operate on the correct register for the return value
25 | 
26 |         int f(i, j) { return i + j; }
27 | 
28 |     is doable in a single `lea`.
29 | 


--------------------------------------------------------------------------------
/compiler-generated/gcc/Makefile:
--------------------------------------------------------------------------------
1 | ../Makefile


--------------------------------------------------------------------------------
/compiler-generated/gcc/Makefile_params:
--------------------------------------------------------------------------------
1 | STD := gnu11
2 | 


--------------------------------------------------------------------------------
/compiler-generated/gcc/builtin_expect.c:
--------------------------------------------------------------------------------
 1 | #include "stdio.h"
 2 | #include "time.h"
 3 | 
 4 | int main() {
 5 |     int i;
 6 |     i = !time(NULL);
 7 |     if (__builtin_expect(i, 0))
 8 |         puts("a");
 9 |     return 0;
10 | }
11 | 


--------------------------------------------------------------------------------
/compiler-generated/gcc/builtin_expect_off.c:
--------------------------------------------------------------------------------
 1 | #include "stdio.h"
 2 | #include "time.h"
 3 | 
 4 | int main() {
 5 |     int i;
 6 |     i = !time(NULL);
 7 |     if (i)
 8 |         puts("a");
 9 |     return 0;
10 | }
11 | 


--------------------------------------------------------------------------------
/compiler-generated/gcc/interactive/Makefile:
--------------------------------------------------------------------------------
1 | ../Makefile


--------------------------------------------------------------------------------
/compiler-generated/gcc/interactive/builtin_expect_off_perf.c:
--------------------------------------------------------------------------------
 1 | /* https://stackoverflow.com/questions/7346929/what-is-the-advantage-of-gccs-builtin-expect-in-if-else-statements/31540623#31540623 */
 2 | 
 3 | #include <stdlib.h>
 4 | #include <stdio.h>
 5 | #include <time.h>
 6 | 
 7 | int main() {
 8 |     enum N { N = 8 };
 9 |     unsigned int i, x[N], y;
10 |     srand(time(NULL));
11 |     y = rand();
12 |     for (i = 0; i < N; i++) {
13 |         x[i] = rand() & 4;
14 |     }
15 |     for (i = 0; i < 100000000; i++) {
16 |         if (!x[0]) y++;
17 |         if (x[1]) y++;
18 |         if (x[2]) y++;
19 |         if (!x[3]) y++;
20 |         if (!x[4]) y++;
21 |         if (x[5]) y++;
22 |         if (!x[6]) y++;
23 |         if (x[7]) y++;
24 |         y =  2*y*y + 3*y + 5;
25 |     }
26 |     printf("%u\n", y);
27 |     return 0;
28 | }
29 | 


--------------------------------------------------------------------------------
/compiler-generated/gcc/interactive/builtin_expect_perf.c:
--------------------------------------------------------------------------------
 1 | /* https://stackoverflow.com/questions/7346929/what-is-the-advantage-of-gccs-builtin-expect-in-if-else-statements/31540623#31540623 */
 2 | 
 3 | #include <stdlib.h>
 4 | #include <stdio.h>
 5 | #include <time.h>
 6 | 
 7 | int main() {
 8 |     enum N { N = 8 };
 9 |     unsigned int i, x[N], y;
10 |     srand(time(NULL));
11 |     y = rand();
12 |     for (i = 0; i < N; i++) {
13 |         x[i] = rand() & 4;
14 |     }
15 |     for (i = 0; i < 100000000; i++) {
16 |         if (__builtin_expect( !x[0], 0)) y++;
17 |         if (__builtin_expect(!!x[1], 1)) y++;
18 |         if (__builtin_expect(!!x[2], 1)) y++;
19 |         if (__builtin_expect( !x[3], 0)) y++;
20 |         if (__builtin_expect( !x[4], 0)) y++;
21 |         if (__builtin_expect(!!x[5], 1)) y++;
22 |         if (__builtin_expect( !x[6], 0)) y++;
23 |         if (__builtin_expect(!!x[7], 1)) y++;
24 |         y =  2*y*y + 3*y + 5;
25 |     }
26 |     printf("%u\n", y);
27 |     return 0;
28 | }
29 | 


--------------------------------------------------------------------------------
/compiler-generated/gcc/restrict.cpp:
--------------------------------------------------------------------------------
 1 | void f(int *a, int *b, int *x) {
 2 |     *a += *x;
 3 |     *b += *x;
 4 | }
 5 | 
 6 | void fr(int *__restrict__ a, int *__restrict__ b, int *__restrict__ x) {
 7 |     *a += *x;
 8 |     *b += *x;
 9 | }
10 | 
11 | int main() {}
12 | 


--------------------------------------------------------------------------------
/compiler-generated/global_int.c:
--------------------------------------------------------------------------------
1 | #include <stdio.h>
2 | 
3 | int myint = 0x1234;
4 | 
5 | int main() {
6 |     return 0;
7 | }
8 | 


--------------------------------------------------------------------------------
/compiler-generated/hello_world.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | int main() {
 4 |     /*
 5 |     GCC 4.8 uses __printf_chk by default,
 6 |     which is part of the LSB and safer than printf:
 7 |     http://refspecs.linuxbase.org/LSB_4.1.0/LSB-Core-generic/LSB-Core-generic/libc---printf-chk-1.html
 8 |     */
 9 |     puts("Hello world!");
10 |     return 0;
11 | }
12 | 


--------------------------------------------------------------------------------
/compiler-generated/if.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <time.h>
 3 | 
 4 | int main() {
 5 |     if ((int)time(NULL)) {
 6 |         puts("1");
 7 |     } else {
 8 |         puts("0");
 9 |     }
10 |     return 0;
11 | }
12 | 


--------------------------------------------------------------------------------
/compiler-generated/int_assign_constant.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | int main() {
 4 |     int i;
 5 |     /* Immediates */
 6 |     i = 0;
 7 |     i = 1;
 8 |     printf("%d\n", i);
 9 |     return 0;
10 | }
11 | 


--------------------------------------------------------------------------------
/compiler-generated/ld.c:
--------------------------------------------------------------------------------
 1 | /* Stuff that ld might define. */
 2 | 
 3 | #include <stdio.h>
 4 | 
 5 | #define F(x) extern char x; \
 6 |     printf(#x " = %p\n", &x);
 7 | 
 8 | int main() {
 9 |     F(etext)
10 |     F(edata)
11 |     F(end)
12 |     F(__data_start)
13 |     F(_GLOBAL_OFFSET_TABLE_)
14 |     return 0;
15 | }
16 | 


--------------------------------------------------------------------------------
/compiler-generated/long_double_assign_constant.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | int main() {
 4 |     long double f;
 5 |     /* Uses rax and rdx for the long double. */
 6 |     f = 0.0L;
 7 |     f = 1.0L;
 8 |     printf("%Lf\n", f);
 9 |     return 0;
10 | }
11 | 


--------------------------------------------------------------------------------
/compiler-generated/long_double_sum.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <time.h>
 3 | 
 4 | int main() {
 5 |     long double f;
 6 |     f = (long double)(time(NULL) % 3);
 7 |     /*
 8 |     GCC 4.8: faddp by default. Constrast wth double which uses SSE.
 9 | 
10 |     Reason: FPU has the 80-bit operations which GCC uses for `long double`.
11 |     There is no 128-bit instrution in SSE.
12 |     */
13 |     f += 0.5L;
14 |     printf("%Lf\n", f);
15 |     return 0;
16 | }
17 | 


--------------------------------------------------------------------------------
/compiler-generated/memcmp.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | # memcmp
 3 | 
 4 |     GCC 4.8 calls glibc always.
 5 | 
 6 |     glibc has lengthy SIMD implementations, so it is not viable to inline it.
 7 | 
 8 |     http://stackoverflow.com/questions/21106801/why-is-memcmp-so-much-faster-than-a-for-loop-check
 9 | */
10 | 
11 | #include <limits.h>
12 | #include <stdio.h>
13 | #include <stdlib.h>
14 | #include <string.h>
15 | #include <time.h>
16 | 
17 | int main() {
18 |     size_t i;
19 |     int out;
20 |     enum N { N = 2 };
21 |     char s[N];
22 |     char s2[N];
23 |     srand(time(NULL));
24 |     for (i = 0; i < N; ++i) {
25 |         s[i] = rand() % SCHAR_MAX;
26 |         s2[i] = rand() % SCHAR_MAX;
27 |     }
28 |     out = memcmp(s, s2, N);
29 |     printf("%d\n", out == 0);
30 |     return 0;
31 | }
32 | 


--------------------------------------------------------------------------------
/compiler-generated/modulo.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | GCC 4.8 uses a complicated set of instructions to avoid div which is a very slow instruction.
 3 | 
 4 | http://stackoverflow.com/questions/4361979/how-does-the-gcc-implementation-of-module-work-and-why-does-it-not-use-the
 5 | */
 6 | 
 7 | #include <stdio.h>
 8 | #include <time.h>
 9 | 
10 | int main() {
11 |     int i = (int)(time(NULL) % 3);
12 |     printf("%d\n", i);
13 |     return 0;
14 | }
15 | 


--------------------------------------------------------------------------------
/compiler-generated/precalculate_loop.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | # Precalculate loop
 3 | 
 4 |     Will the compiler precalculate a loop without side effects, and with deterministic inputs?
 5 | 
 6 |     GCC 4.8 -O3: yes. Without this optimization, it takes forever.
 7 | 
 8 |     TODO what is the name for this? Is it related to loop unroling?
 9 | */
10 | 
11 | #include <stdio.h>
12 | 
13 | int main() {
14 |     unsigned long i;
15 |     for (i = 0; i < 0xFFFFFFFFFFFFFFFFu; i++)
16 |         ;
17 |     printf("%lx\n", i);
18 |     return 0;
19 | }
20 | 


--------------------------------------------------------------------------------
/compiler-generated/precalculate_loop_unknown_init.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | # Precalculate loop
 3 | 
 4 |     Yup.
 5 | */
 6 | 
 7 | #include <stdio.h>
 8 | #include <time.h>
 9 | 
10 | int main() {
11 |     unsigned long i, j;
12 |     j = time(NULL);
13 |     for (i = 0; i < 0xFFFFFFFFFFFFFFFFu; i++, j++)
14 |         ;
15 |     printf("%lx\n", j);
16 |     return 0;
17 | }
18 | 


--------------------------------------------------------------------------------
/compiler-generated/register.c:
--------------------------------------------------------------------------------
 1 | /* TODO unable to see any difference with or without register. */
 2 | 
 3 | #include <time.h>
 4 | 
 5 | int f() {
 6 |     int i = 0;
 7 |     /*
 8 |     TODO: GCC 4.8 -O3 seems to ignore the possibility that this might change i,
 9 |     and always returns 0.
10 | 
11 |     So we don't observe any difference when using `register`.
12 |     */
13 |     (*((int*)time(NULL)))++;
14 |     return i;
15 | }
16 | 
17 | int g() {
18 |     register int i = 0;
19 |     (*((int*)time(NULL)))++;
20 |     return i;
21 | }
22 | 
23 | int main() {
24 |     return 0;
25 | }
26 | 


--------------------------------------------------------------------------------
/compiler-generated/restrict.c:
--------------------------------------------------------------------------------
 1 | void f(int *a, int *b, int *x) {
 2 |     *a += *x;
 3 |     *b += *x;
 4 | }
 5 | 
 6 | void fr(int *restrict a, int *restrict b, int *restrict x) {
 7 |     *a += *x;
 8 |     *b += *x;
 9 | }
10 | 
11 | int main() {
12 |     return 0;
13 | }
14 | 


--------------------------------------------------------------------------------
/compiler-generated/return_struct.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | Return values normally go on `rax`.
 3 | 
 4 | But what happens when returning a large struct that does not fit into registers?
 5 | */
 6 | 
 7 | #include <stdio.h>
 8 | #include <time.h>
 9 | 
10 | struct S {
11 |     int i0;
12 |     int i1;
13 |     int i2;
14 |     int i3;
15 |     int i4;
16 |     int i5;
17 |     int i6;
18 |     int i7;
19 |     int i8;
20 |     int i9;
21 |     int i10;
22 |     int i11;
23 |     int i12;
24 |     int i13;
25 |     int i14;
26 |     int i15;
27 | };
28 | 
29 | struct S f() {
30 |     struct S s = {
31 |         time(NULL), time(NULL), time(NULL), time(NULL),
32 |         time(NULL), time(NULL), time(NULL), time(NULL),
33 |         time(NULL), time(NULL), time(NULL), time(NULL),
34 |         time(NULL), time(NULL), time(NULL), time(NULL)
35 |     };
36 |     return s;
37 | }
38 | 
39 | int main() {
40 |     /*
41 |     GCC 4.8 allocates stack space and passes a pointer to it as a hidden parameter.
42 | 
43 |     f() then initializes that parameter's location.
44 |     */
45 |     struct S s = f();
46 |     int n =
47 |         s.i0 +
48 |         s.i1 +
49 |         s.i2 +
50 |         s.i3 +
51 |         s.i4 +
52 |         s.i5 +
53 |         s.i6 +
54 |         s.i7 +
55 |         s.i8 +
56 |         s.i9 +
57 |         s.i10 +
58 |         s.i11 +
59 |         s.i12 +
60 |         s.i13 +
61 |         s.i14 +
62 |         s.i15;
63 |     printf("%d\n", n);
64 |     return 0;
65 | }
66 | 


--------------------------------------------------------------------------------
/compiler-generated/sqrt.c:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <stdio.h>
 3 | #include <time.h>
 4 | 
 5 | int main() {
 6 |     float f;
 7 |     f = time(NULL) % 3;
 8 |     /* Calls glibc, which uses `sqrtsd` */
 9 |     f = sqrt(f);
10 |     printf("%f\n", f);
11 |     return 0;
12 | }
13 | 


--------------------------------------------------------------------------------
/compiler-generated/static_local.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | int f() {
 4 |     static int i = 1;
 5 |     i++;
 6 |     return i;
 7 | }
 8 | 
 9 | int main() {
10 |     printf("%d\n", f());
11 |     printf("%d\n", f());
12 |     return 0;
13 | }
14 | 


--------------------------------------------------------------------------------
/compiler-generated/switch3.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | GCC 4.8: if else
 3 | */
 4 | 
 5 | #include <stdio.h>
 6 | #include <time.h>
 7 | 
 8 | int main() {
 9 |     int i, j;
10 |     i = time(NULL) % 3;
11 |     switch (i) {
12 |         case 0:
13 |             j = 0;
14 |         break;
15 |         case 1:
16 |             j = 1;
17 |         break;
18 |         case 2:
19 |             j = 2;
20 |         break;
21 |     };
22 |     printf("%d\n", j);
23 |     return 0;
24 | }
25 | 


--------------------------------------------------------------------------------
/compiler-generated/switch6.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | GCC 4.8: jump table
 3 | 
 4 | - https://en.wikipedia.org/wiki/Indirect_branch
 5 | - http://stackoverflow.com/questions/9223756/what-does-an-asterisk-before-an-address-mean-in-x86-64-att-assembly
 6 | 
 7 | Basically becomes something like:
 8 | 
 9 |     400661:       83 7d fc 05             cmpl   $0x5,-0x4(%rbp)
10 |     400665:       77 42                   ja     4006a9 <main+0x8c>
11 |     400667:       8b 45 fc                mov    -0x4(%rbp),%eax
12 |     40066a:       48 8b 04 c5 60 07 40    mov    0x400760(,%rax,8),%rax
13 |     400671:       00 
14 |     400672:       ff e0                   jmpq   *%rax
15 |     400674:       c7 45 f8 00 00 00 00    movl   $0x0,-0x8(%rbp)
16 |     40067b:       eb 2c                   jmp    4006a9 <main+0x8c>
17 |     40067d:       c7 45 f8 01 00 00 00    movl   $0x1,-0x8(%rbp)
18 |     400684:       eb 23                   jmp    4006a9 <main+0x8c>
19 | 
20 | which checks if `i > 5`, and if not uses a jump table stored at
21 | `400760` in the `.rodata`, that contains the address of each case in an array.
22 | 
23 | The `.rodata` is relocated to contain text addresses.
24 | */
25 | 
26 | #include <stdio.h>
27 | #include <time.h>
28 | 
29 | int main() {
30 |     int i, j;
31 |     i = time(NULL) % 6;
32 |     switch (i) {
33 |         case 0:
34 |             j = 0;
35 |         break;
36 |         case 1:
37 |             j = 1;
38 |         break;
39 |         case 2:
40 |             j = 2;
41 |         break;
42 |         case 3:
43 |             j = 3;
44 |         break;
45 |         case 4:
46 |             j = 4;
47 |         break;
48 |         case 5:
49 |             j = 5;
50 |         break;
51 |     };
52 |     printf("%d\n", j);
53 |     return 0;
54 | }
55 | 


--------------------------------------------------------------------------------
/compiler-generated/switch6_if.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | Identical to switch 6 but with if else,
 3 | so see if the compiler can also optimize it with the jump table.
 4 | 
 5 | GCC 4.8 -O0: does not use the jump table.
 6 | */
 7 | 
 8 | #include <stdio.h>
 9 | #include <time.h>
10 | 
11 | int main() {
12 |     int i, j;
13 |     i = time(NULL) % 6;
14 |     if (i == 0) {
15 |         j = 0;
16 |     } else if (i == 1) {
17 |         j = 1;
18 |     } else if (i == 2) {
19 |         j = 2;
20 |     } else if (i == 3) {
21 |         j = 3;
22 |     } else if (i == 4) {
23 |         j = 4;
24 |     } else if (i == 5) {
25 |         j = 5;
26 |     };
27 |     printf("%d\n", j);
28 |     return 0;
29 | }
30 | 


--------------------------------------------------------------------------------
/compiler-generated/switch6_spread.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | Spread out case numbers exponentially to see if the jump table table is still used.
 3 | 
 4 | GCC 4.8 -O0: creates 32 entries on the jump table,
 5 | such that each entry that does not have a case jumps to the end of the switch.
 6 | */
 7 | 
 8 | #include <stdio.h>
 9 | #include <time.h>
10 | 
11 | int main() {
12 |     int i, j;
13 |     i = time(NULL) % 6;
14 |     j = -1;
15 |     switch (i) {
16 |         case 1:
17 |             j = 0;
18 |         break;
19 |         case 2:
20 |             j = 1;
21 |         break;
22 |         case 4:
23 |             j = 2;
24 |         break;
25 |         case 8:
26 |             j = 3;
27 |         break;
28 |         case 16:
29 |             j = 4;
30 |         break;
31 |         case 32:
32 |             j = 5;
33 |         break;
34 |     };
35 |     printf("%d\n", j);
36 |     return 0;
37 | }
38 | 


--------------------------------------------------------------------------------
/compiler-generated/switch6_very_spread.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | Spread switch numbers even more.
 3 | 
 4 | GCC 4.8 -O0: Seems to do binary search from some point onwards.
 5 | */
 6 | 
 7 | #include <stdio.h>
 8 | #include <time.h>
 9 | 
10 | int main() {
11 |     int i, j;
12 |     i = time(NULL) % 1000000;
13 |     j = -1;
14 |     switch (i) {
15 |         case 0:
16 |             j = 0;
17 |         break;
18 |         case 1:
19 |             j = 1;
20 |         break;
21 |         case 0x10:
22 |             j = 2;
23 |         break;
24 |         case 0x100:
25 |             j = 3;
26 |         break;
27 |         case 0x1000:
28 |             j = 4;
29 |         break;
30 |         case 0xFFFFFFF:
31 |             j = 5;
32 |         break;
33 |     };
34 |     printf("%d\n", j);
35 |     return 0;
36 | }
37 | 


--------------------------------------------------------------------------------
/compiler-generated/tail_call.c:
--------------------------------------------------------------------------------
 1 | /* https://stackoverflow.com/questions/310974/what-is-tail-call-optimization/55230417#55230417 */
 2 | 
 3 | #include <stdio.h>
 4 | #include <stdlib.h>
 5 | 
 6 | unsigned factorial(unsigned n) {
 7 |     if (n == 1) {
 8 |         return 1;
 9 |     }
10 |     return n * factorial(n - 1);
11 | }
12 | 
13 | int main(int argc, char **argv) {
14 |     int input;
15 |     if (argc > 1) {
16 |         input = strtoul(argv[1], NULL, 0);
17 |     } else {
18 |         input = 5;
19 |     }
20 |     printf("%u\n", factorial(input));
21 |     return EXIT_SUCCESS;
22 | }
23 | 


--------------------------------------------------------------------------------
/compiler-generated/template.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | # template
 3 | */
 4 | 
 5 | #include <cassert>
 6 | 
 7 | template <class C>
 8 | C f(C i) { return i; }
 9 | 
10 | int main() {
11 |     f<int>(1);
12 |     f<double>(1.5);
13 | }
14 | 


--------------------------------------------------------------------------------
/compiler-generated/use_rax.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | int global = 0;
 4 | 
 5 | int my_function() {
 6 |     /*
 7 |     Is the compiler smart enough to move global into `rax` to increment it,
 8 |     to reuse that for the return value?
 9 | 
10 |     For GCC 4.8:
11 | 
12 |     - without `-O3`: rax used by default, but an extra mov from global to eax is done
13 |     - with `O3`: no extra move. TODO find exact flag that enables this.
14 |     */
15 |     global++;
16 |     return global;
17 | }
18 | 
19 | int main() {
20 |     printf("%d\n", my_function());
21 |     printf("%d\n", my_function());
22 |     return 0;
23 | }
24 | 


--------------------------------------------------------------------------------
/compiler-generated/use_rax_return.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | int global = 0;
 4 | 
 5 | int my_function() {
 6 |     /*
 7 |     Is the compiler smart enough to move global into `rax` to increment it,
 8 |     to reuse that for the return value?
 9 | 
10 |     Yes for GCC 4.8 -O3
11 |     */
12 |     global++;
13 |     return global;
14 | }
15 | 
16 | int main() {
17 |     printf("%d\n", my_function());
18 |     printf("%d\n", my_function());
19 |     return 0;
20 | }
21 | 


--------------------------------------------------------------------------------
/compiler-generated/virtual.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | # virtual
 3 | 
 4 |     Let's look at the C++ vtable.
 5 | 
 6 |     - http://stackoverflow.com/questions/5712808/understanding-the-vtable-entries
 7 |     - http://stackoverflow.com/questions/1963926/when-is-a-vtable-created-in-c
 8 | 
 9 |     Every virtual class has:
10 | 
11 |     - a vtbale: an array of addresses of the methods of the class stored in `.rodata`
12 |     - an extra hidden field which points to the vtable of this class
13 |     - extra initialization code on the constructor that sets this hidden field
14 | 
15 |     Whenever a virtual method call is done, it:
16 | 
17 |     - dereferenes the hidden field to find the vtable
18 |     - adds an known offset depending on which method is being called.
19 |         Each method must have a fixed position in the vtbale for all classes.
20 |     - dereferences that address and calls it with `callq *%rax`
21 | 
22 | # typeinfo
23 | 
24 |     There is a single typeinfo object for each virtual class, and it is stored
25 |     8 bytes before the first function.
26 | 
27 | # TODO
28 | 
29 |     And there is yet another field on each vtables:
30 | 
31 |     http://stackoverflow.com/questions/5712808
32 | 
33 |     TODO: observe it.
34 | */
35 | 
36 | #include <cassert>
37 | #include <typeinfo>
38 | #include <iostream>
39 | 
40 | int main() {
41 |     class Base {
42 |         public:
43 |             int i;
44 |             Base(int i) : i(i) {}
45 |             virtual int f() { return this->i; }
46 |             virtual int g() { return this->i + 10; }
47 |     };
48 | 
49 |     class Derived1 : public Base {
50 |         public:
51 |             Derived1(int i) : Base(i) {}
52 |             virtual int f() { return this->i + 1; }
53 |             virtual int g() { return this->i + 11; }
54 |     };
55 | 
56 |     class Derived2 : public Base {
57 |         public:
58 |             Derived2(int i) : Base(i) {}
59 |             virtual int f() { return this->i + 2; }
60 |             virtual int g() { return this->i + 12; }
61 |     };
62 | 
63 |     Base *bp;
64 |     int i;
65 | 
66 |     // For each object of a virtual class, store the call table address
67 |     // on the stack for it. This address falls in `.rodata`.
68 |     Base b(0);
69 |     bp = &b;
70 |     i = bp->f();
71 |     assert(i == 0);
72 |     i = bp->g();
73 |     assert(i == 10);
74 |     std::cout << typeid(*bp).name() << std::endl;
75 | 
76 |     Derived1 d1(0);
77 |     bp = &d1;
78 |     i = bp->f();
79 |     assert(i == 1);
80 |     i = bp->g();
81 |     assert(i == 11);
82 |     std::cout << typeid(*bp).name() << std::endl;
83 | 
84 |     Derived2 d2(0);
85 |     bp = &d2;
86 |     i = bp->f();
87 |     assert(i == 2);
88 |     i = bp->g();
89 |     assert(i == 12);
90 |     std::cout << typeid(*bp).name() << std::endl;
91 | }
92 | 


--------------------------------------------------------------------------------
/compiler-generated/while_infinite.c:
--------------------------------------------------------------------------------
 1 | /*
 2 | # while infinite
 3 | 
 4 |     http://stackoverflow.com/questions/2178115/are-compilers-allowed-to-eliminate-infinite-loops
 5 | */
 6 | 
 7 | int main() {
 8 |     while (1);
 9 |     return 0;
10 | }
11 | 


--------------------------------------------------------------------------------
/containers.md:
--------------------------------------------------------------------------------
 1 | # Containers
 2 | 
 3 | Formats that contain assembly.
 4 | 
 5 | Elf is the most popular format on Linux.
 6 | 
 7 | ### bin
 8 | 
 9 | <http://stackoverflow.com/questions/2427011/what-is-the-difference-between-elf-files-and-bin-files>
10 | 
11 | ### Mach-O
12 | 
13 | <https://en.wikipedia.org/wiki/Mach-O>
14 | 
15 | MAC OS.
16 | 
17 | ### PE
18 | 
19 | <https://en.wikipedia.org/wiki/Portable_Executable>
20 | 
21 | Windows `.exe` as of 2013.
22 | 


--------------------------------------------------------------------------------
/cpu-benchmarks.md:
--------------------------------------------------------------------------------
 1 | # CPU benchmarks
 2 | 
 3 | -   <https://en.wikipedia.org/wiki/Instructions_per_cycle>
 4 |     - Useless metric because of CISC vs RISC
 5 | -   <http://cpuboss.com/> lists a few:
 6 |     - parallel:
 7 |         - <https://compubench.com/result.jsp>
 8 |         - <http://www.passmark.com/>
 9 | -   <http://www.eembc.org/ulpbench/>
10 | -   <http://parsec.cs.princeton.edu/> Multicore focus
11 | -   <http://gem5.org/Splash_benchmarks> TODO focus?
12 | -   <https://en.wikipedia.org/wiki/NBench> TODO focus? Canonical source? Not to be confused with the C# benchmark: <https://github.com/petabridge/NBench>
13 | 
14 | Low level:
15 | 
16 | -   DMIPS <https://en.wikipedia.org/wiki/Dhrystone>
17 | 
18 |     - specially DMIPS / Watt.
19 | 
20 | -   <http://lmbench.sourceforge.net/> Description:
21 | 
22 |     > lmbench is a suite of simple, portable, ANSI/C microbenchmarks for UNIX/POSIX. In general, it measures two key features: latency and bandwidth.
23 | 
24 |     GitHub mirror: <https://github.com/dmonakhov/lmbench>
25 | 


--------------------------------------------------------------------------------
/cpu-bugs.md:
--------------------------------------------------------------------------------
 1 | # CPU Bugs
 2 | 
 3 | CPU bugs are obviously super serious to the company, as things may get super serious as users may ask for refunds on an expensive product.
 4 | 
 5 | ## 2015 bug fesetival
 6 | 
 7 | <http://danluu.com/cpu-bugs/>
 8 | 
 9 | ## Halt and catch fire
10 | 
11 | <http://en.wikipedia.org/wiki/Halt_and_Catch_Fire>
12 | 
13 | ## FDIV bug
14 | 
15 | Found out by a mathematician doing numerical calculations.
16 | 
17 | <http://en.wikipedia.org/wiki/Pentium_FDIV_bug>
18 | 
19 | ## F00F bug
20 | 
21 | <http://en.wikipedia.org/wiki/Pentium_F00F_bug>
22 | 
23 | ## Cyrix bug
24 | 
25 | <http://en.wikipedia.org/wiki/Cyrix_coma_bug>
26 | 


--------------------------------------------------------------------------------
/cpu-hardware-design.md:
--------------------------------------------------------------------------------
 1 | # CPU architecture
 2 | 
 3 | Clock limits:
 4 | 
 5 | - <http://electronics.stackexchange.com/questions/122050/what-limits-cpu-speed>
 6 | - <https://www.quora.com/Will-10Ghz-processors-be-possible-by-2020>
 7 | 
 8 | Misc:
 9 | 
10 | - <http://cs.stackexchange.com/questions/22589/why-does-a-processor-have-32-registers>
11 | - <http://electronics.stackexchange.com/questions/158053/what-components-or-circuitry-exist-that-can-provide-extremely-high-speed-accura>
12 | 


--------------------------------------------------------------------------------
/cpu-optimizations.md:
--------------------------------------------------------------------------------
 1 | # CPU Optimizations
 2 | 
 3 | CPU semi-internals that can be used to write more efficient code.
 4 | 
 5 | Compilers take most of those into consideration.
 6 | 
 7 | ## Instruction level parallelism
 8 | 
 9 | ### Out of order processing
10 | 
11 | <http://en.wikipedia.org/wiki/Out-of-order_execution>
12 | 
13 | - <http://en.wikipedia.org/wiki/Register_renaming>
14 | 
15 | ## Megahertz myth
16 | 
17 | - <http://en.wikipedia.org/wiki/Megahertz_myth>
18 | 


--------------------------------------------------------------------------------
/cpu-pins.md:
--------------------------------------------------------------------------------
 1 | # CPU pins
 2 | 
 3 | ## Simplified CPU
 4 | 
 5 | For many purposes, you can picture a CPU as having only the following pins:
 6 | 
 7 | - memory selector
 8 | - data
 9 | - clock
10 | - ground and 5V
11 | 
12 | ## Read or write two memory slots at once
13 | 
14 | <http://stackoverflow.com/questions/11953352/why-ia32-does-not-allow-memory-to-memory-mov>
15 | 
16 | Impossible, since there is only one memory selector at a time.
17 | 
18 | So instructions can never operate at two different memory locations at once, nor read and write at a single memory location at once.
19 | 
20 | ## Pinout
21 | 
22 | It is a good idea to understand what are the physical pins of the processor and what they do to understand what is actually going on when you do instructions
23 | 
24 | ![8086 pinout a](8086-pinout.gif "8086 pinout")
25 | 
26 | ![80387 pinout a](80387-pinout.gif "80387 pinout")
27 | 
28 | TODO understand
29 | 
30 | ## Real CPUs
31 | 
32 | <https://en.wikipedia.org/wiki/CPU_socket>
33 | 


--------------------------------------------------------------------------------
/elf.md:
--------------------------------------------------------------------------------
  1 | # ELF
  2 | 
  3 | ### Linux
  4 | 
  5 | Interesting files in v4.0:
  6 | 
  7 | - `include/uapi/linux/elf.h`: contains most of the format, which is natural since the format is needed from userland to implement compilers, and the most part if not arch dependent
  8 | - `include/linux/elf.h`
  9 | - `arch/x86/include/asm/elf.h`: x86 specifics described in the AMD64 ABI extension, e.g. the `R_X86_64_64` type
 10 | - `fs/binfmt_elf.c`: this is where the real action happens. `do_execve` in `src/fs/exec.c` from the system call calls `load_elf_binary`, which prepares everything and culminates in a call to `start_thread`
 11 | 
 12 | ### GCC
 13 | 
 14 | In 5.1, definitions are under various `config/` files.
 15 | 
 16 | ## C ABI
 17 | 
 18 | ### _start
 19 | 
 20 | ### Entry point
 21 | 
 22 | [System V ABI AMD64][] says that `main` is the entry point of a C program, and it calls `_start`  which is the ELF entry point as mentioned in the [System V ABI AMD64][]. `main` is not special in pure assembly 
 23 | 
 24 | > The initial state of the process stack, i.e. when _start is called
 25 | 
 26 | <http://dbp-consulting.com/tutorials/debugging/linuxProgramStartup.html> describes the call sequence that actually happens on Linux.
 27 | 
 28 | TODO where is it mentioned in the arch agnostic standards?
 29 | 
 30 | ## C++ ABI
 31 | 
 32 | The [System V ABI AMD64][] links to the [Itanium C++ ABI][].
 33 | 
 34 | ## Relocation
 35 | 
 36 | <http://stackoverflow.com/questions/12122446/how-does-c-linking-work-in-practice/30507725#30507725>
 37 | 
 38 | ### R_386_32
 39 | 
 40 | To test this one out, try use:
 41 | 
 42 |     a: .long s
 43 |     b: .long s + 0x12345678
 44 |     s:
 45 | 
 46 | then:
 47 | 
 48 |     as --32 -o main.o main.S
 49 |     objdump -dzr main.o
 50 | 
 51 | on Binutils 2.24 gives:
 52 | 
 53 |     00000000 <a>:
 54 |     0:  08 00                   or     %al,(%eax)
 55 |                 0: R_386_32 .text
 56 |     2:  00 00                   add    %al,(%eax)
 57 | 
 58 |     00000004 <b>:
 59 |     4:  80 56 34 12             adcb   $0x12,0x34(%esi)
 60 |                 4: R_386_32 .text
 61 | 
 62 | This makes it really clear how:
 63 | 
 64 | - `a` gets 8 added, which is the length of `a` + `b` (to reach `s`)
 65 | - `b` gets `0x12345608` == `0x12345678 + 8`, where `8` is once again the position of `s`
 66 | 
 67 | ### R_X86_64_PC32
 68 | 
 69 | This is another common relocation method that does:
 70 | 
 71 |     S + A - P
 72 | 
 73 | on 4 bytes, where `P` is the current position of the Program Counter on the text segment, thus the `PC` on the name, A.K.A. the `RIP` register.
 74 | 
 75 | This method is common in x86-64 because `RIP` relative addressing is very popular, and for it to work, the relocation must subtract the position `P`.
 76 | 
 77 | Note that `%RIP` points to the *next* instruction: so it is common to use `A = -4` to remove the offset of the 4 byte address which is at the end of the instruction encoding:
 78 | 
 79 |     X Y A A A A
 80 |     Next insruction <-- RIP
 81 | 
 82 | ### Value of the relocated memory before relocation
 83 | 
 84 | Does the value of the address to be overwritten before linking matter at all?
 85 | 
 86 | ### R_X86_64_16
 87 | 
 88 | ### 8 and 16 bit
 89 | 
 90 | GNU adds 8 and 16 bit relocations as an extension to the ELF standard, which it calls with names like `R_X86_64_16`.
 91 | 
 92 | ### Segment vs segment section
 93 | 
 94 | TODO: confirm: each program header represents either a segment, or a segment section, which is a subdivision of segments.
 95 | 
 96 | For instance, a C hello world contains the lines (order changed):
 97 | 
 98 |     LOAD           0x000000 0x0000000000400000 0x0000000000400000 0x0006fc 0x0006fc R E 0x200000
 99 |     PHDR           0x000040 0x0000000000400040 0x0000000000400040 0x0001f8 0x0001f8 R E 0x8
100 |     INTERP         0x000238 0x0000000000400238 0x0000000000400238 0x00001c 0x00001c R   0x1
101 |     NOTE           0x000254 0x0000000000400254 0x0000000000400254 0x000044 0x000044 R   0x4
102 |     GNU_EH_FRAME   0x0005d0 0x00000000004005d0 0x00000000004005d0 0x000034 0x000034 R   0x4
103 | 
104 | so that the `LOAD` segment contains multiple segment sections `PHDR, INTERP, etc.`
105 | 
106 | ## Alignment of global symbols
107 | 
108 | TODO what are the alignment constraints of global symbols? I observe that 4 byte relocations align at 4 *bits*... what is going on?
109 | 


--------------------------------------------------------------------------------
/endianess.asm:
--------------------------------------------------------------------------------
 1 | ; # Endianess
 2 | 
 3 | ; x86 is little endian.
 4 | 
 5 | ; http://stackoverflow.com/questions/5185551/why-is-x86-little-endian
 6 | 
 7 | %include "lib/common_nasm.inc"
 8 | 
 9 | section .data
10 |     x dw 0x0102
11 | ENTRY
12 |     ASSERT_EQ [x], 2, byte
13 | EXIT
14 | 


--------------------------------------------------------------------------------
/extensions.md:
--------------------------------------------------------------------------------
 1 | # Extensions
 2 | 
 3 | Whacky extensions that did not fit anywhere else:
 4 | 
 5 | -   <http://www.intel.com/content/www/us/en/architecture-and-technology/vpro/vpro-technology-general.html>
 6 | 
 7 |     <https://en.wikipedia.org/wiki/Intel_vPro>
 8 | 
 9 |     <https://en.wikipedia.org/wiki/Intel_Active_Management_Technology>
10 | 


--------------------------------------------------------------------------------
/flynns-taxonomy.md:
--------------------------------------------------------------------------------
 1 | # Flynn's taxonomy
 2 | 
 3 | <http://en.wikipedia.org/wiki/Flynn%27s_taxonomy>
 4 | 
 5 | - SISD
 6 | - SIMD
 7 | - MIMD
 8 | 
 9 | ## SIMD
10 | 
11 | Out of Flynn's taxonomy, this is the model that promises the greatest advances currently.
12 | 
13 | Single instruction, multiple data, for example, making 4 multiplications on a single CPU cycle.
14 | 
15 | More and more, SIMD instructions are being added on different processors.
16 | 
17 | Intel has been grouping those instructions under the SSE name, but previously used other names like MMX.
18 | 
19 | GPGPUs are also another hot SIMD implementation.
20 | 
21 | See also: <http://neilkemp.us/src/sse_tutorial/sse_tutorial.html>
22 | 
23 | ## SIMT
24 | 
25 | GPUs tend strongly to SIMD systems, but NVIDIA prefers to call them SIMT, because they feel they are more flexible than pure SIMD.
26 | 
27 | <http://yosefk.com/blog/simd-simt-smt-parallelism-in-nvidia-gpus.html>
28 | 
29 | It is a flexibility / throughput trade-off.
30 | 


--------------------------------------------------------------------------------
/fpu.md:
--------------------------------------------------------------------------------
 1 | # FPU
 2 | 
 3 | Used to be a separate optional processor called the Floating Point Unit, later integrated into the CPU.
 4 | 
 5 | Has 8 registers: `st0` to `st7`, which for m a stack.
 6 | 
 7 | `st0` is always the top of the stack.
 8 | 
 9 | You can only communicate with `stX` from memory, not directly from registers.
10 | 


--------------------------------------------------------------------------------
/gas/Makefile:
--------------------------------------------------------------------------------
1 | ../Makefile


--------------------------------------------------------------------------------
/gas/README.md:
--------------------------------------------------------------------------------
  1 | # Introduction
  2 | 
  3 | ## Why this tutorial uses NASM
  4 | 
  5 | I started this tutorial a long time ago, and I made the mistake of using `nasm` for it by default.
  6 | 
  7 | `gas` is the GNU assembler, backend to GCC, and used in the Linux kernel and glibc, and therefore the only true assembler.
  8 | 
  9 | Oh, also it is cross ISA, in the sense that most of its directives can be used on all ISA, which is awesome.
 10 | 
 11 | Just pipe it through GCC's C preprocessor to gain some missing functionality that `nasm` provides natively, and we are golden.
 12 | 
 13 | ## Actual intro to GAS
 14 | 
 15 | GNU assembler syntax cheatsheet.
 16 | 
 17 | Part of the GNU Binutils projects, which also includes other lower-level toolchain tools like `ld` (but not `gcc`).
 18 | 
 19 | Supposes that you already know IA-32 and NASM.
 20 | 
 21 | Documentation: <https://sourceware.org/binutils/docs-2.25/as/> Many questions can be answered directly via the index: <https://sourceware.org/binutils/docs/as/index.html>
 22 | 
 23 | Executable name: `as`.
 24 | 
 25 | Used by the kernel, GCC default backend and glibc... so just learn it.
 26 | 
 27 | Part of Binutils.
 28 | 
 29 | ## Vs NASM
 30 | 
 31 | Advantages over NASM:
 32 | 
 33 | -   can handle multiple architectures, not just x86, including:
 34 | 
 35 |     - i386
 36 |     - SPARC
 37 |     - ARM
 38 | 
 39 |     and more.
 40 | 
 41 | -   NASM is slightly more convenient to write by hand, maybe because it is more focused as a direct input language, rather than a GCC backend.
 42 | 
 43 | But given the huge Linux support for GAS, it is my preferred assembler.
 44 | 
 45 | ### GCC as a GAS front-end
 46 | 
 47 | GCC can be used as a frontend for GAS:
 48 | 
 49 |     gcc -S a.c -o a.s
 50 |     gcc -masm=att -S a.c -o a.s
 51 |     gcc -masm=intel -S a.c -o a.s
 52 | 
 53 | ## Comments
 54 | 
 55 | Multiline comments are like C comments.
 56 | 
 57 | Single line comments vary with language.
 58 | 
 59 | For x86, they are `# ` and **not** `;`!
 60 | 
 61 | ## Registers
 62 | 
 63 | Registers are prefixed by a percent sign `%`. E.g.:
 64 | 
 65 |     movl %eax, %ebx
 66 | 
 67 | ## Integer constants
 68 | 
 69 | Constants are prefixed by a dollar sign `$`. E.g.:
 70 | 
 71 |     movl $1, %eax
 72 | 
 73 | ## Order
 74 | 
 75 | Operator order is different from Intel syntax.
 76 | 
 77 | For example, the following moves `eax` to `ebx`:
 78 | 
 79 |     movl %eax %ebx
 80 | 
 81 | Which is kind of logical because it is in the order in which we say it: `move eax to ebx` however is different from the usual c `=` operator which would be something like
 82 | 
 83 |     ebx = eax
 84 | 
 85 | ## Instruction lengths
 86 | 
 87 | Suffixes are added to instructions to specify length:
 88 | 
 89 | - `b` = byte (8 bit)
 90 | - `s` = short (16 bit integer) or single (32-bit floating point)
 91 | - `w` = word (16 bit)
 92 | - `l` = long (32 bit integer or 64-bit floating point)
 93 | - `q` = quad (64 bit)
 94 | - `dq` = double quad (128 bit)
 95 | - `t` = ten bytes (80-bit floating point)
 96 | 
 97 | These are separate words in NASM. TODO can those be specified in macros? in NASM yes because they are separate words
 98 | 
 99 | If you add two suffixes to an instruction, it means that it should zero extend, e.g.: `movbl`.
100 | 
101 | In Intel, it is called `MOVSX`.
102 | 
103 | ## Directives
104 | 
105 | Stuff that starts with a dot `.` and does not specify machine instructions directly but rather gives information to GAS.
106 | 
107 | - `.text`: text segment
108 | - `.data`: data segment
109 | - `.global`: data segment
110 | - `.globl`: same as global
111 | - `.loc`: debugging source line info, only generated with `-dgdb`
112 | - `.align`:
113 | - `.zero`:
114 | - `.macro`: macros
115 | 
116 | ### cfi directives
117 | 
118 | Call frame information.
119 | 
120 | Appear all over GCC generated code for C sources.
121 | 
122 | <http://stackoverflow.com/questions/2529185/what-are-cfi-directives-in-gnu-assembler-gas-used-for>
123 | 


--------------------------------------------------------------------------------
/gas/altmacro.S:
--------------------------------------------------------------------------------
  1 | /*
  2 | # altmacro
  3 | 
  4 |     Enables the alternate macro mode from now on.
  5 | 
  6 |     Can also be set as a command line option `--alternate`.
  7 | 
  8 |     This adds extra capabilities to macros:
  9 | 
 10 |     - LOCAL
 11 |     - % string evaluation
 12 | 
 13 | # noaltmacro
 14 | 
 15 |     Turned off `.altmacro`.
 16 | */
 17 | 
 18 | #include "lib/common_gas.h"
 19 | 
 20 | ENTRY
 21 | 
 22 |     .altmacro
 23 | 
 24 |     /*
 25 |     With altmacro, \ is not needed to expand the arguments.
 26 | 
 27 |     TODO undocumented?
 28 |     */
 29 | 
 30 |         .macro STRING_ARG x
 31 |             mov x, %eax
 32 |         .endm
 33 |         mov $0, %eax
 34 |         STRING_ARG $1
 35 |         ASSERT_EQ($1, %eax)
 36 | 
 37 |         /* But escaping it still works. */
 38 |         .macro STRING_ARG_BACKSLASH x
 39 |             mov \x, %eax
 40 |         .endm
 41 |         mov $0, %eax
 42 |         STRING_ARG $1
 43 |         ASSERT_EQ($1, %eax)
 44 | 
 45 |         /* TODO how to avoid expansion? */
 46 | 
 47 |     /*
 48 |     # LOCAL
 49 | 
 50 |         Generates local labels that are impossible to clash.
 51 |     */
 52 | 
 53 |         .macro LOCAL_KEYWORD a="%eax"
 54 |             LOCAL ok
 55 |             mov $1, %eax
 56 |             jmp ok
 57 |                 call assert_fail
 58 |             ok:
 59 |         .endm
 60 | 
 61 |         LOCAL_KEYWORD
 62 |         LOCAL_KEYWORD
 63 | 
 64 |     /*
 65 |     # %
 66 | 
 67 |     # Percent
 68 | 
 69 |         Appears to work only on arguments passed or their default values.
 70 |     */
 71 | 
 72 |             .macro PERCENT x
 73 |                 mov $\x, %eax
 74 |             .endm
 75 |             mov $0, %eax
 76 |             PERCENT %1+1
 77 |             ASSERT_EQ($2, %eax)
 78 | 
 79 |             .macro PERCENT_DEFAULT x=%1+1
 80 |                 mov $\x, %eax
 81 |             .endm
 82 |             mov $0, %eax
 83 |             PERCENT_DEFAULT 1
 84 |             ASSERT_EQ($1, %eax)
 85 |             PERCENT_DEFAULT
 86 |             ASSERT_EQ($2, %eax)
 87 | 
 88 |         /*
 89 |         This is a horrible feature as it makes it impossible to pass registers
 90 |         as arguments to altmacros without special escaping care...
 91 | 
 92 |         http://stackoverflow.com/questions/19776992/gas-altmacro-macro-with-a-percent-sign-in-a-default-parameter-fails-with-oper
 93 |         */
 94 | 
 95 |             .macro PERCENT_ESCAPE x
 96 |                 mov \x, %eax
 97 |             .endm
 98 |             mov $0, %eax
 99 |             mov $1, %ebx
100 |             PERCENT_ESCAPE <%ebx>
101 |             ASSERT_EQ($1, %eax)
102 | 
103 |             .macro PERCENT_ESCAPE_DEFAULT x=<%ebx>
104 |                 mov \x, %eax
105 |             .endm
106 |             mov $0, %eax
107 |             mov $1, %ebx
108 |             PERCENT_ESCAPE_DEFAULT
109 |             ASSERT_EQ($1, %eax)
110 | 
111 |         /*
112 |         One alternative if we are sure that the argument is a register,
113 |         is to put the percent inside the macro:
114 |         */
115 | 
116 |             .macro PERCENT_ESCAPE_REG x
117 |                 mov %x, %eax
118 |             .endm
119 |             mov $0, %eax
120 |             mov $1, %ebx
121 |             PERCENT_ESCAPE_REG ebx
122 |             ASSERT_EQ($1, %eax)
123 | 
124 |         /* But this has the downside that we cannot pass immediates like `$1` anymore */
125 | 
126 |             /*PERCENT_ESCAPE_REG $1*/
127 | 
128 |         /* TODO application? */
129 | 
130 |     .noaltmacro
131 | 
132 | EXIT
133 | 


--------------------------------------------------------------------------------
/gas/ascii.S:
--------------------------------------------------------------------------------
 1 | /*
 2 | # ascii
 3 | 
 4 | https://sourceware.org/binutils/docs/as/Ascii.html#Ascii
 5 | 
 6 | Create a byte string from ASCII characters.
 7 | 
 8 | Not null terminated.
 9 | 
10 | TODO vs string? https://sourceware.org/binutils/docs/as/String.html
11 | */
12 | 
13 | #include "lib/common_gas.h"
14 | 
15 | .data
16 | s:
17 |     .ascii "abc\0"
18 |     s_len = . - s
19 | cat:
20 |     .ascii  "ab" "cd"
21 | 
22 | ENTRY
23 |     mov s, %eax
24 |     ASSERT_EQ($0x00636261, %eax)
25 | 
26 |     /* Concatenates literals like C does. */
27 |     mov cat, %eax
28 |     ASSERT_EQ($0x64636261, %eax)
29 | 
30 |     mov $s_len, %eax
31 |     ASSERT_EQ($4, %eax)
32 | 
33 |     mov $0, %edx
34 |     mov $30, %eax
35 |     mov $25, %ecx
36 |     div %ecx
37 |     mov %edx, %eax
38 |     ASSERT_EQ($5, %eax)
39 | 
40 | EXIT
41 | 


--------------------------------------------------------------------------------
/gas/asciz.S:
--------------------------------------------------------------------------------
 1 | /*
 2 | # asciz
 3 | 
 4 |     Like `.ascii` but adds null char to string
 5 | 
 6 |     Same as:
 7 | 
 8 |         .ascii "assert failed\n\x00"
 9 |         .ascii "assert failed\n\000"
10 | */
11 | 
12 | #include "lib/common_gas.h"
13 | 
14 | .data
15 | s:
16 |     .asciz "abc"
17 |     s_len = . - s
18 | cat:
19 |     .asciz "a" "b"
20 | 
21 | ENTRY
22 |     mov s, %eax
23 |     ASSERT_EQ($0x00636261, %eax)
24 | 
25 |     /*
26 |     Concatenates literals somewhat like C does,
27 |     BUT adds a null after each one.
28 |     */
29 |     mov cat, %eax
30 |     ASSERT_EQ($0x00620061, %eax)
31 | 
32 |     mov $s_len, %eax
33 |     ASSERT_EQ($4, %eax)
34 | EXIT
35 | 


--------------------------------------------------------------------------------
/gas/bibliography.md:
--------------------------------------------------------------------------------
1 | # Bibliography
2 | 
3 | -   <http://www.ibm.com/developerworks/library/l-gas-nasm/>:
4 | 
5 |     gas differences from nasm. Great intro if you already know nasm.
6 | 
7 | -   <http://sourceware.org/binutils/docs/as/>:
8 | 


--------------------------------------------------------------------------------
/gas/char_literal.S:
--------------------------------------------------------------------------------
 1 | /*
 2 | http://stackoverflow.com/questions/33246811/how-to-use-character-literals-in-gnu-gas-to-replace-numbers
 3 | 
 4 | This syntax plays horribly with the C preprocessor:
 5 | 
 6 |     MACRO($'a)
 7 | 
 8 | fails because cpp treats string and char literals magically.
 9 | 
10 | GAS macros work better here.
11 | 
12 | And it generates trailing whitespace... bad move from GAS devs. They should just have closed it like C.
13 | */
14 | 
15 | #include "lib/common_gas.h"
16 | 
17 | ENTRY
18 | 
19 |     /* Memory. */
20 |     mov c, %al
21 |     ASSERT_EQ($0x61, %al)
22 | 
23 |     /* Immediate. Without the `$`, does a memory access, and segfaults! */
24 |     mov $'b, %al
25 |     ASSERT_EQ($0x62, %al)
26 | 
27 |     /* Space character works. */
28 |     mov $' , %al
29 |     ASSERT_EQ($0x20, %al)
30 | 
31 |     /* Backslash escapes work. */
32 |     mov $'\n , %al
33 |     ASSERT_EQ($0x0A, %al)
34 | 
35 | EXIT
36 | 
37 | c:
38 |     .byte 'a
39 | 


--------------------------------------------------------------------------------
/gas/current_address.S:
--------------------------------------------------------------------------------
 1 | /*
 2 | # Current address
 3 | 
 4 | # dot
 5 | 
 6 | # .
 7 | 
 8 |     http://stackoverflow.com/questions/8987767/is-there-a-symbol-that-represents-the-current-address-in-gnu-gas-assembly
 9 | */
10 | 
11 | #include "lib/common_gas.h"
12 | 
13 | /* x points to its own address. */
14 | x: .long .
15 | ENTRY
16 |     mov x, %eax
17 |     mov $x, %ebx
18 |     ASSERT_EQ(%ebx, %eax)
19 | EXIT
20 | 


--------------------------------------------------------------------------------
/gas/equ.S:
--------------------------------------------------------------------------------
 1 | /* # equ
 2 |  *
 3 |  * # set
 4 |  *
 5 |  *     `.equ` and `.set` are the same.
 6 |  *
 7 |  *     http://stackoverflow.com/questions/21624155/difference-between-equ-and-word-in-arm-assembly
 8 |  *
 9 |  *     - https://sourceware.org/binutils/docs/as/Equ.html
10 |  *     - https://sourceware.org/binutils/docs/as/Set.html
11 |  */
12 | 
13 | #include "lib/common_gas.h"
14 | 
15 | .data
16 | 
17 |     .equ asdf, 0x1234567
18 | 
19 | ENTRY
20 | 
21 |     .equ x, 0x1234567
22 |     mov $0, %eax
23 |     mov $x, %eax
24 |     ASSERT_EQ($0x1234567, %eax)
25 | 
26 |     .set y, 0x1234567
27 |     mov $0, %eax
28 |     mov $y, %eax
29 |     ASSERT_EQ($0x1234567, %eax)
30 | 
31 | 
32 |     /* TODO: vs macro?
33 |      *
34 |      * One difference is that expressions can be used. Is that the main one?
35 |      */
36 | 
37 |     .set y, 2 * 1 + 1
38 |     mov $0, %eax
39 |     mov $y, %eax
40 |     ASSERT_EQ($0x3, %eax)
41 | 
42 | EXIT
43 | 


--------------------------------------------------------------------------------
/gas/extern.md:
--------------------------------------------------------------------------------
1 | # extern
2 | 
3 | Unlike `nasm`, GAS has no extern. Every undefined symbol shows as an undefined symbol in the output automatically: no need to declare them.
4 | 
5 | <https://sourceware.org/binutils/docs/as/Extern.html#Extern>
6 | 


--------------------------------------------------------------------------------
/gas/gasversion.S:
--------------------------------------------------------------------------------
 1 | /* https://stackoverflow.com/questions/51008231/how-to-check-the-version-of-binutils-on-gnu-gas-assembly-code-at-compile-time */
 2 | 
 3 | #include "lib/common_gas.h"
 4 | 
 5 | ENTRY
 6 | .if .gasversion. >= 22800 && .gasversion. < 22900
 7 |     PRINT_STRING_LITERAL("bug")
 8 | .else
 9 |     PRINT_STRING_LITERAL("no bug")
10 | .endif
11 | EXIT
12 | 


--------------------------------------------------------------------------------
/gas/global.S:
--------------------------------------------------------------------------------
 1 | /*
 2 | # .global
 3 | 
 4 | By default, ELF symbols are STB_LOCAL, and not visible by the linker.
 5 | 
 6 | This make them STB_LOCAL.
 7 | 
 8 | Observe this with:
 9 | 
10 |     readelf -s object-file.o
11 | 
12 | The entry symbol, notably, must be global.
13 | 
14 | Local symbols still appear on the output obejct file. To remove them completely, use local symbol `.L`.
15 | 
16 | # .local
17 | 
18 | TODO what is the effect of this, since variables are already local by default?
19 | */
20 | 
21 | #include "lib/common_gas.h"
22 | 
23 | .data
24 | 
25 |     not_global:
26 |         .long 1
27 | 
28 |     .global yes_global
29 |     yes_global:
30 |         .long 2
31 | 
32 |     .local yes_local
33 |     yes_local:
34 |         .long 3
35 | 
36 | ENTRY
37 |     mov not_global, %eax
38 |     ASSERT_EQ($1, %eax)
39 | 
40 |     mov yes_global, %eax
41 |     ASSERT_EQ($2, %eax)
42 | 
43 |     mov yes_local, %eax
44 |     ASSERT_EQ($3, %eax)
45 | 
46 | EXIT
47 | 


--------------------------------------------------------------------------------
/gas/irp.S:
--------------------------------------------------------------------------------
 1 | /*
 2 | # irp
 3 | 
 4 | Macro for in loop.
 5 | 
 6 | TODO: mnemonic?
 7 | 
 8 | See also: .rept
 9 | */
10 | 
11 | #include "lib/common_gas.h"
12 | 
13 | ENTRY
14 |     mov $0, %eax
15 |     /*
16 |     Generate:
17 | 
18 |         add $3, %eax
19 |         add $1, %eax
20 |         add $2, %eax
21 |     */
22 |     .irp i, 3, 1, 2
23 |         add $\i, %eax
24 |     .endr
25 |     ASSERT_EQ($6, %eax)
26 | EXIT
27 | 


--------------------------------------------------------------------------------
/gas/lib:
--------------------------------------------------------------------------------
1 | ../lib


--------------------------------------------------------------------------------
/gas/linux/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: clean run
 2 | 
 3 | hello_world.out: hello_world.S
 4 | 	as --32 -o hello_world.o hello_world.S
 5 | 	ld -m elf_i386 -o hello_world.out hello_world.o
 6 | 
 7 | clean:
 8 | 	rm -f *.o *.out
 9 | 
10 | run: hello_world.out
11 | test: hello_world.out
12 | 	./hello_world.out
13 | 


--------------------------------------------------------------------------------
/gas/linux/README.md:
--------------------------------------------------------------------------------
1 | # Linux hello world
2 | 
3 | Minimal and rather sane Linux hello world.
4 | 


--------------------------------------------------------------------------------
/gas/linux/hello_world.S:
--------------------------------------------------------------------------------
 1 | .data
 2 | s:
 3 |     .ascii "hello world\n"
 4 |     len = . - s
 5 | .text
 6 | .global _start
 7 | _start:
 8 |     /* write */
 9 |     mov $4, %eax
10 |     mov $1, %ebx
11 |     mov $s, %ecx
12 |     mov $len, %edx
13 |     int $0x80
14 | 
15 |     /* exit */
16 |     mov $1, %eax
17 |     mov $0, %ebx
18 |     int $0x80
19 | 


--------------------------------------------------------------------------------
/gas/local_label.S:
--------------------------------------------------------------------------------
 1 | /*
 2 | # Local label
 3 | 
 4 | https://sourceware.org/binutils/docs/as/Symbol-Names.html#Symbol-Names
 5 | 
 6 | Labels that contain only digits are magic and can be defined multiple times:
 7 | 
 8 | You can then write `1b` and `1f` to refer to the latest and next definition of such labels.
 9 | 
10 | Such labels don't appear on the output file.
11 | */
12 | 
13 | #include "lib/common_gas.h"
14 | 
15 | ENTRY
16 |     /* Basic forward local jump. */
17 |     1:
18 |     jmp 1f
19 |     ASSERT_FAIL
20 |     1:
21 | 
22 |     /* Example with a backwards jump. */
23 |     jmp 2f
24 |     ASSERT_FAIL
25 |     1:
26 |     jmp 3f
27 |     ASSERT_FAIL
28 |     2:
29 |     jmp 1b
30 |     ASSERT_FAIL
31 |     3:
32 | 
33 | EXIT
34 | 


--------------------------------------------------------------------------------
/gas/local_symbol.S:
--------------------------------------------------------------------------------
 1 | /*
 2 | # local symbol
 3 | 
 4 | # .L
 5 | 
 6 | Symbols that start with `.L` don't appear at all on the ELF output.
 7 | 
 8 | By default, symbols do appear on ELF, but are local.
 9 | 
10 | Unlike NASM's `.label`, such labels are visible on the entire file,
11 | and cannot be redefined.
12 | */
13 | 
14 | #include "lib/common_gas.h"
15 | 
16 | .data
17 |     not_local:
18 |         .long 1
19 | 
20 |     .Lyes_local:
21 |         .long 2
22 | ENTRY
23 |     mov not_local, %eax
24 |     ASSERT_EQ($1, %eax)
25 | 
26 |     mov .Lyes_local, %eax
27 |     ASSERT_EQ($2, %eax)
28 | 
29 | EXIT
30 | 


--------------------------------------------------------------------------------
/gas/macro.S:
--------------------------------------------------------------------------------
 1 | /*
 2 | # macro
 3 | 
 4 |     Avoid it, use `cpp` instead which is more powerful.
 5 | */
 6 | 
 7 | #include "lib/common_gas.h"
 8 | 
 9 | ENTRY
10 | 
11 |     /*
12 |     # Labels in macros
13 | 
14 |         If you are going to use a macro with a label inside it many times
15 |         you need some way of ensuring that this label will be unique
16 |         for each macro invocation.
17 |     */
18 | 
19 |         /* # \@ technique. Not 100% safe, but good enough. */
20 | 
21 |             .macro LOCAL_AT
22 |                 jmp _local_\@_ok
23 |                     call assert_fail
24 |                 _local_\@_ok:
25 |             .endm
26 | 
27 |             LOCAL_AT
28 |             LOCAL_AT
29 | 
30 |         /* The best technique is to use LOCAL from .altmacro. */
31 | 
32 |     /*
33 |     Unlike the C preprocessor, GAS macros don't treat string literals
34 |     magically and expand inside them as well.
35 |     */
36 | 
37 |         .macro STRING_EXPAND x
38 |             .ascii "\x"
39 |         .endm
40 |         mov s, %al
41 |         ASSERT_EQ($0x61, %al)
42 | 
43 | EXIT
44 | 
45 | .data
46 |     s:
47 |         STRING_EXPAND a
48 | 


--------------------------------------------------------------------------------
/gas/params.makefile:
--------------------------------------------------------------------------------
1 | DISAS_FLAVOR = att
2 | PHONY_MAKES = linux
3 | 


--------------------------------------------------------------------------------
/gas/print.S:
--------------------------------------------------------------------------------
 1 | /*
 2 | # print
 3 | 
 4 |     Just print a string.
 5 | 
 6 | # warn
 7 | 
 8 |     Print the warning.
 9 | 
10 | # error
11 | 
12 |     Print it and stop compilation.
13 | 
14 | # Expected output
15 | 
16 |     print
17 |     print.S: Assembler messages:
18 |     print.S:5: Warning: warn
19 |     print.S:6: Error: error
20 | */
21 | 
22 | #include "lib/common_gas.h"
23 | 
24 | ENTRY
25 |     /*
26 |     .print "print"
27 |     .warning "warn"
28 |     .error "error"
29 |     */
30 | EXIT
31 | 


--------------------------------------------------------------------------------
/gas/push.S:
--------------------------------------------------------------------------------
 1 | #include "lib/common_gas.h"
 2 | 
 3 | .data
 4 |     val1:
 5 |         .long 1
 6 |     val2:
 7 |         .long 2
 8 |     orig:
 9 |         .skip 4
10 | ENTRY
11 | 
12 |     mov %esp, orig
13 | 
14 |     mov $val2, %esp
15 |     push $3
16 |     mov %esp, %eax
17 | 
18 |     /* Restore esp to not break our IO. */
19 |     mov orig, %esp
20 | 
21 |     /* val1 was modified by the push. */
22 |     ASSERT_EQ($3, val1)
23 | 
24 |     /* esp was reduced by 4. */
25 |     ASSERT_EQ($val1, %eax)
26 | 
27 |     mov $val1, %esp
28 |     pop %ebx
29 |     mov %esp, %eax
30 |     mov orig, %esp
31 |     ASSERT_EQ($3, %ebx)
32 |     ASSERT_EQ($val2, %eax)
33 | 
34 | EXIT
35 | 


--------------------------------------------------------------------------------
/gas/section.S:
--------------------------------------------------------------------------------
 1 | /*
 2 | # .section
 3 | 
 4 |     Create ELF sections.
 5 | 
 6 | # .segment
 7 | 
 8 |     Same as `.section`.
 9 | 
10 |     So don't use, since what we actually create are sections,
11 |     not segments, which are created by the linker.
12 | 
13 | # .text
14 | 
15 |     TODO vs `.section .text`
16 | 
17 | # .data
18 | 
19 |     Creates a data section.
20 | 
21 |     TODO vs `.section .data`
22 |     http://stackoverflow.com/questions/21606690/whats-difference-between-section-data-and-just-data-in-gas-coding
23 | 
24 |     Actually looks like the exact same: even with `.section .data`,
25 |     the section is treated magically, e.g. gets W flag.
26 | 
27 | # Section flags
28 | 
29 |     Specified as:
30 | 
31 |         .section .custom, "awx"
32 | 
33 |     This would be an Allocatable Writable and eXecutable section.
34 | */
35 | 
36 | #include "lib/common_gas.h"
37 | 
38 | .section .data
39 |     i: .long 1
40 | .data
41 |     j: .long 1
42 | ENTRY
43 |     mov i, %eax
44 |     ASSERT_EQ($1, %eax)
45 | EXIT
46 | 


--------------------------------------------------------------------------------
/gas/size.S:
--------------------------------------------------------------------------------
 1 | /*
 2 | # size
 3 | 
 4 | https://sourceware.org/binutils/docs/as/Size.html#Size
 5 | 
 6 | Observe the generated object file with `readelf -s`.
 7 | */
 8 | 
 9 | #include "lib/common_gas.h"
10 | 
11 | ENTRY
12 | EXIT
13 | 
14 | /*
15 | In ELF, adds st_size attribute of a symbol.
16 | GCC emmits those automatically for C functions, even in `-g0`, as: .size main, .-main
17 | */
18 | .type myfunc, @function
19 | .global myfunc
20 | myfunc:
21 |     nop
22 | .size myfunc, . - myfunc
23 | 
24 | /*
25 | Without it, it just gets set to 0, it means unknown size.
26 | */
27 | .type myfunc_nosize, @function
28 | .global myfunc_nosize
29 | myfunc_nosize:
30 |     nop
31 | 
32 | .global myfunc_nosize2
33 | myfunc_nosize2:
34 |     nop
35 | 


--------------------------------------------------------------------------------
/gas/type.S:
--------------------------------------------------------------------------------
 1 | /* # type
 2 | 
 3 | https://sourceware.org/binutils/docs/as/Type.html#Type
 4 | 
 5 | Observe the generated object file with `readelf -s`.
 6 | */
 7 | 
 8 | #include "lib/common_gas.h"
 9 | 
10 | ENTRY
11 | EXIT
12 | 
13 | /*
14 | In ELF, adds the STT_FUNC type to the symbol.
15 | GCC emmits those automatically for C functions, even in `-g0`.
16 | 
17 | The at sign `@` is mandatory and has no meaning.
18 | */
19 | .type myfunc, @function
20 | .global myfunc
21 | myfunc:
22 | 
23 | .global myfunc_notype
24 | myfunc_notype:
25 | 


--------------------------------------------------------------------------------
/how-to-learn.md:
--------------------------------------------------------------------------------
 1 | # How to learn
 2 | 
 3 | Cycle between the following until you are a master:
 4 | 
 5 | 1. write assembly
 6 | 1. read Intel manual pages
 7 | 1. decompile and interpret what compilers compiled: simple examples, Kernel, and hardcore libraries like glibc
 8 | 
 9 | Clearly separate OS specifics from hardware specifics.
10 | 


--------------------------------------------------------------------------------
/implementations.md:
--------------------------------------------------------------------------------
 1 | # Implementations
 2 | 
 3 | Current:
 4 | 
 5 | - Intel, created and holds most copyrights for IA-32
 6 | - AMD, created and holds copyrights for x86-64
 7 | 
 8 | Past:
 9 | 
10 | -   VIA https://en.wikipedia.org/wiki/VIA_Technologies . Bought:
11 | 
12 |     -   Cyrix https://en.wikipedia.org/wiki/Cyrix from National Semiconductor
13 |     -   Centaur https://en.wikipedia.org/wiki/Centaur_Technology
14 | 
15 | Intel and AMD are the major ones.
16 | 
17 | Cyrix was an implementor before being bought by National Semiconductor, and VIA continue to produce them after buying Cyrix from NS.
18 | 
19 | ## x86 licensing
20 | 
21 | You need a license to implement x86, at least the parts which are newer than 20 years ago:
22 | 
23 | - <http://arstechnica.com/civis/viewtopic.php?f=8&t=852658>
24 | - Cyrix got into trouble for not licensing it https://en.wikipedia.org/wiki/Cyrix#Legal_troubles
25 | 
26 | Intel and AMD have cross-licensing agreements:
27 | 
28 | - http://www.kitguru.net/components/cpu/anton-shilov/amd-clarifies-cross-license-with-intel-change-of-control-terminates-agreement-for-both/
29 | - http://www.sec.gov/Archives/edgar/data/2488/000119312509236705/dex102.htm
30 | - https://www.reddit.com/r/hardware/comments/3b0ytk/discussion_what_is_an_x86_license_and_why_do/
31 | - http://www.anandtech.com/show/2873
32 | - http://www.eetimes.com/document.asp?doc_id=1129690
33 | - Interview with W. Jerry Sanders http://silicongenesis.stanford.edu/transcripts/sanders.htm
34 | 
35 | ## Open implementations
36 | 
37 | <https://github.com/marmolejo/zet> TODO legal?
38 | 


--------------------------------------------------------------------------------
/instruction-level-parallelism.md:
--------------------------------------------------------------------------------
1 | # Instruction level parallelism
2 | 
3 | <https://en.wikipedia.org/wiki/Instruction-level_parallelism>
4 | 


--------------------------------------------------------------------------------
/instruction-sets.md:
--------------------------------------------------------------------------------
 1 | # Instruction sets
 2 | 
 3 | ## Instruction set architecture
 4 | 
 5 | ## ISA
 6 | 
 7 | List of operations the processor can do. Obviously ultra processor dependant.
 8 | 
 9 | - <http://en.wikipedia.org/wiki/Comparison_of_CPU_architectures>
10 | - <http://en.wikipedia.org/wiki/Instruction_set>
11 | 
12 | x86 is family of ISAs.
13 | 


--------------------------------------------------------------------------------
/intel-processor-history.md:
--------------------------------------------------------------------------------
 1 | # Intel processor history
 2 | 
 3 | Intel is highly backwards compatible and hugely influential, so understanding their processor history helps to understand terms and concepts which are still relevant.
 4 | 
 5 | ## Intel 4004
 6 | 
 7 | <https://en.wikipedia.org/wiki/Intel_4004>
 8 | 
 9 | The first commercial integrated CPU in the world. Huge breakthrough.
10 | 
11 | ## Intel 8080
12 | 
13 | 1974, 8 bit, 2MHz. Hit a price / performance point that kick-started the PC revolution: Apple, Microsoft, etc.
14 | 
15 | Before it, computers were huge machines for large corporations only.
16 | 
17 | ## Intel 8086
18 | 
19 | 1976, 16 bit, very popular, base to x86 IA.
20 | 
21 | TODO why `86`?
22 | 
23 | ## Intel 80386
24 | 
25 | aka i386
26 | 
27 | 1985, 32 bit word register
28 | 
29 | ## Intel 8087
30 | 
31 | 1980
32 | 
33 | External floating point co-processor for the 8086.
34 | 
35 | In other words: this is not a CPU, but something external to the CPU, which the CPU could interface with.
36 | 
37 | Included inside CPUs starting from the 80436
38 | 
39 | x87 often used to describe the floating point operations inside the processors
40 | 
41 | Instructions include:
42 | 
43 | - `FSQRT`
44 | - `FSIN`
45 | 
46 | ## Intel 80486
47 | 
48 | 1989
49 | 
50 | Includes floating point unity inside of it
51 | 
52 | ## Intel Core
53 | 
54 | Generic Intel branding since 2007.
55 | 
56 | ## i3
57 | 
58 | ## i5
59 | 
60 | ## i7
61 | 
62 | Generic branding by Intel since 2010.
63 | 
64 | They just mean low, mid and high end, nothing else is implied, not even the number of cores.
65 | 
66 | A much more meaningful way to group the processors is by the architecture, e.g. Ivy Bridge, Haswell, etc.
67 | 
68 | Each architecture is also referred to a generation, which started counting with the Nehalem, so for example Ivy Bridge is the 3rd generation.
69 | 
70 | To be really precise, you have to talk about the model ID for each processor Those are of the form:
71 | 
72 |     iA-BXXXL
73 | 
74 | for example:
75 | 
76 |     i5-3210M
77 | 
78 | where:
79 | 
80 | - `A`: is a letter `i3`, `i5`, `i7` low, mid and high level marker
81 | - `B`: is usually the generation, e.g. 3 for Ivy Bridge.
82 | - `XXX`: further specifies the model
83 | - `L`: zero or more letters that further specify the type of the CPU, e.g. `M` is often used for mobile targeted CPUs
84 | 


--------------------------------------------------------------------------------
/intel-vs-atet.md:
--------------------------------------------------------------------------------
 1 | # Intel vs AT&T syntax
 2 | 
 3 | There is no standard for x86 assembly syntax notation.
 4 | 
 5 | Two notations dominate: Intel and AT&T.
 6 | 
 7 | Neither is a proper standard, but rather "implementation defined".
 8 | 
 9 | ## Intel
10 | 
11 | Used in Intel manuals, and likely Intel-made tools.
12 | 
13 | More popular on Windows.
14 | 
15 | Used by NASM and MASM.
16 | 
17 | This tutorial uses Intel syntax by default because that is what most tutorials I've used to learn also use. But I ended up regretting that, because I'm a Linux groupie.
18 | 
19 | ### AT&T
20 | 
21 | More popular for Linux, since UNIX was created at Bell Labs.
22 | 
23 | Implemented by GAS, the GNU assembler.
24 | 
25 | Used by GCC by default, the Linux kernel, glibc...
26 | 
27 | Advantages:
28 | 
29 | -   registers are clearly marked with `%`. More verbose, but more precise: allows to differentiate label `ax` from register `ax`.
30 | 
31 | Downsides:
32 | 
33 | -   addressing is less self-documenting. Compare:
34 | 
35 |         [eax + 2*ebx + 4]
36 | 
37 |     with:
38 | 
39 |         4(eax,ebx,2)
40 | 
41 | -   Labels are not consistently marked with `$`: it depends on the instruction. E.g.:
42 | 
43 |         mov $label, %eax
44 | 
45 |     but:
46 | 
47 |         jmp label
48 | 
49 |     instead of the uniform:
50 | 
51 |         jmp $label
52 | 
53 | 


--------------------------------------------------------------------------------
/intel2gas.md:
--------------------------------------------------------------------------------
1 | # intel2gas
2 | 
3 | Convert AT&T syntax to Gas.
4 | 
5 |     intel2gas a.asm
6 | 
7 | Output the converted output to stdout.
8 | 


--------------------------------------------------------------------------------
/interactive/Makefile:
--------------------------------------------------------------------------------
1 | ../Makefile


--------------------------------------------------------------------------------
/interactive/README.md:
--------------------------------------------------------------------------------
1 | # Interactive
2 | 
3 | Examples that are meant to exit non zero, or take too long, or print control characters to your terminal, or do other evil things.
4 | 
5 | 1. [int3](int3.asm)
6 | 1. [int 4](int4.asm)
7 | 


--------------------------------------------------------------------------------
/interactive/int3.asm:
--------------------------------------------------------------------------------
 1 | ; # int3
 2 | 
 3 |     ; http://unix.stackexchange.com/questions/131044/dialog-trap-and-sigtrap
 4 | 
 5 | %include "lib/common_nasm.inc"
 6 | 
 7 | ENTRY
 8 |     int3
 9 |     ; 2 byte encoding.
10 |     int 3
11 | EXIT
12 | 


--------------------------------------------------------------------------------
/interactive/int4.asm:
--------------------------------------------------------------------------------
 1 | ; # int 4
 2 | 
 3 |     ; Mostly to compare with int 3.
 4 | 
 5 |     ; Segfaults. TODO why?
 6 | 
 7 | %include "lib/common_nasm.inc"
 8 | 
 9 | ENTRY
10 |     int 4
11 | EXIT
12 | 


--------------------------------------------------------------------------------
/interactive/lib:
--------------------------------------------------------------------------------
1 | ../lib


--------------------------------------------------------------------------------
/interactive/test:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 


--------------------------------------------------------------------------------
/ld-linux-so.md:
--------------------------------------------------------------------------------
 1 | # ld-linux.so
 2 | 
 3 | TODO what does it do exactly. How is it called? By the kernel?
 4 | 
 5 |     man ld.so
 6 | 
 7 | `/lib64/ld-linux-x86-64.so.2` is the usual 64-bit version.
 8 | 
 9 | `ld-linux.so*` is the executable that does the dynamic loading for every executable.
10 | 
11 | As such, it cannot have any dependencies.
12 | 
13 | Its path is specified in the `.interp` section of ELF files, which Linux reads and uses to call `ld-linux`.
14 | 
15 | The default on Ubuntu 14.04 is `/lib64/ld-linux-x86-64.so.2`.
16 | 
17 | The program to be run is passed as an argument to `ld-linux`:
18 | 
19 |     /lib64/ld-linux-x86-64.so.2 a.out
20 | 
21 | Then:
22 | 
23 |     man execve
24 | 
25 | says that the path of the loader is stored in the elf file, and `readelf -a` shows a section devoted to it:
26 | 
27 |     INTERP         0x0000000000000238 0x0000000000400238 0x0000000000400238
28 |                     0x000000000000001c 0x000000000000001c  R      1
29 |         [Requesting program interpreter: /lib64/ld-linux-x86-64.so.2]
30 | 
31 | On Ubuntu 16.04, provided by the `libc6` package.
32 | 
33 | ## LD_DEBUG
34 | 
35 | TODO
36 | 
37 | ## LD_LIBRARY_PATH
38 | 
39 | You can also add to path with environment variables.
40 | 
41 | Don't rely on this method for production.
42 | 
43 |     export LD_LIBRARY_PATH='/path/to/link'
44 | 
45 | ## ld.so
46 | 
47 | TODO what is it?
48 | 
49 | ## ld.so.conf
50 | 
51 | TODO. E.g. Ubuntu 16.04 mesa:
52 | 
53 |     /usr/lib/x86_64-linux-gnu/mesa/ld.so.conf
54 | 
55 | ## /etc/ld.so.conf
56 | 
57 | TODO.
58 | 
59 | ## ldconfig
60 | 
61 | TODO
62 | 


--------------------------------------------------------------------------------
/ld.md:
--------------------------------------------------------------------------------
 1 | # ld
 2 | 
 3 | <https://sourceware.org/binutils/docs-2.25/ld/index.html>
 4 | 
 5 | Does the linking process, which converts object files into executables.
 6 | 
 7 | Basic usage:
 8 | 
 9 |     nasm -o hello_world.o hello_world.asm
10 |     ld -o hello_world.out hello_world.o
11 |     ./hello_world
12 | 
13 | ## Order of libraries passed
14 | 
15 | The order matters for static libraries:
16 | 
17 | - <http://stackoverflow.com/questions/45135/why-does-the-order-in-which-libraries-are-linked-sometimes-cause-errors-in-gcc>
18 | 
19 | References undefined references of later libraries can only be resolved by the earlier ones.
20 | 
21 | ### start-group
22 | 
23 | ### end-group
24 | 
25 | Those weird options act on the `-l` libraries that appear in between them, e.g.:
26 | 
27 |     ld --start-group lib1 lib2 lib3 --end-group
28 | 
29 | With them, `ld` loops between the given libraries until all references have been resolved.
30 | 
31 | This has a performance impact.
32 | 
33 | ## Standard library
34 | 
35 | If you program is compiled with `gcc`, it expects a few things from the linker which `ld` does not do by default: <http://stackoverflow.com/questions/3577922/how-to-link-a-gas-assembly-file-as-a-c-program-with-ld-without-using-gcc>
36 | 
37 |     ld -dynamic-linker /lib64/ld-linux-x86-64.so.2 \
38 |       /usr/lib/x86_64-linux-gnu/crt1.o \
39 |       /usr/lib/x86_64-linux-gnu/crti.o \
40 |       -lc hello_world.o \
41 |       /usr/lib/x86_64-linux-gnu/crtn.o
42 | 
43 | ## m
44 | 
45 | ## Cross compile
46 | 
47 | <http://stackoverflow.com/questions/16004206/force-gnu-linker-to-generate-32-bit-elf-executables>
48 | 
49 | `-m <arch>`, where `<arch>` can be found with `ld -V`, sample output:
50 | 
51 |     elf_x86_64
52 |     elf32_x86_64
53 |     elf_i386
54 |     i386linux
55 |     elf_l1om
56 |     elf_k1om
57 |     i386pep
58 |     i386pe
59 | 
60 | TODO meaning of each type?
61 | 
62 | ## verbose
63 | 
64 | Print `ld` information, including the linker script used:
65 | 
66 |     ld --verbose
67 | 
68 | ## e
69 | 
70 | Sets the ELF entry point to a given symbol or address.
71 | 
72 | This determines `e_entry` of the ELF header.
73 | 
74 | You can set it to any address, but keep in mind that sections have alignment constraints.
75 | 
76 | So for example if you something like:
77 | 
78 |     . = 0x4FFFFF;
79 |     .text :
80 |     {
81 |         *(*)
82 |     }
83 | 
84 | and the page size is 2M, then your executable will be about 2M in size:
85 | 
86 | - `.text` segment will go to `0x4FFFFF`
87 | - all bytes up to `0x4FFFFE` will be filled with zeroes
88 | - `e_entry` is set to `0x4FFFFF`
89 | 
90 | ## Library and weak TODOs
91 | 
92 | - <http://stackoverflow.com/questions/24390267/why-redefinition-of-a-function-which-is-already-present-in-dynamic-or-static-lib>
93 | - <http://stackoverflow.com/questions/13089166/how-to-make-gcc-link-strong-symbol-in-static-library-to-overwittren-weak-symbol>
94 | 


--------------------------------------------------------------------------------
/ldd.md:
--------------------------------------------------------------------------------
 1 | # ldd
 2 | 
 3 | Print information about the configuration of the dynamic loader `ld-linux.so`.
 4 | 
 5 | List required shared libraries of an executable and if they can be found.
 6 | 
 7 | `binutils` package.
 8 | 
 9 |     man ldd
10 |     man ld.so
11 | 
12 | Usage:
13 | 
14 |     ldd /bin/ls
15 | 
16 | Sample output:
17 | 
18 |     linux-vdso.so.1 =>  (0x00007ffd3cbdc000)
19 |     libselinux.so.1 => /lib/x86_64-linux-gnu/libselinux.so.1 (0x00007effc013d000)
20 |     libacl.so.1 => /lib/x86_64-linux-gnu/libacl.so.1 (0x00007effbff35000)
21 |     libc.so.6 => /lib/x86_64-linux-gnu/libc.so.6 (0x00007effbfb70000)
22 |     libpcre.so.3 => /lib/x86_64-linux-gnu/libpcre.so.3 (0x00007effbf932000)
23 |     libdl.so.2 => /lib/x86_64-linux-gnu/libdl.so.2 (0x00007effbf72e000)
24 |     /lib64/ld-linux-x86-64.so.2 (0x00007effc0360000)
25 |     libattr.so.1 => /lib/x86_64-linux-gnu/libattr.so.1 (0x00007effbf529000)
26 | 
27 | Quick reminder:
28 | 
29 | - `linux-vdso`: magically loaded by the kernel to make system calls faster
30 | - `libselinux`, `libacl`: access control stuff
31 | - `libc`: C standard library
32 | - `libpcre`: Perl Compatible Regular expressions.
33 | 
34 | Possible outputs:
35 | 
36 | - `Not a dynamic executable`
37 | - `liba.1.so => /lib/liba.1.so`
38 | - `liba.1.so => not found`
39 | 
40 | ## vs readelf -d (NEEDED)
41 | 
42 | I think `readelf` does not take into account dependencies of dependencies:
43 | 
44 | - <http://stackoverflow.com/questions/6242761/how-do-i-find-the-direct-shared-object-dependencies-of-a-linux-elf-binary>
45 | - <http://unix.stackexchange.com/questions/120015/how-to-find-out-the-dynamic-libraries-executables-loads-when-run>
46 | 
47 | For example:
48 | 
49 |     readelf -d /bin/ls | grep NEEDED
50 | 
51 | Gives:
52 | 
53 |     0x0000000000000001 (NEEDED)             Shared library: [libselinux.so.1]
54 |     0x0000000000000001 (NEEDED)             Shared library: [libacl.so.1]
55 |     0x0000000000000001 (NEEDED)             Shared library: [libc.so.6]
56 | 
57 | Then:
58 | 
59 |     readelf -d /lib/x86_64-linux-gnu/libselinux.so.1 | grep NEEDED
60 | 
61 | Gives:
62 | 
63 |     0x0000000000000001 (NEEDED)             Shared library: [libpcre.so.3]
64 |     0x0000000000000001 (NEEDED)             Shared library: [libdl.so.2]
65 |     0x0000000000000001 (NEEDED)             Shared library: [libc.so.6]
66 |     0x0000000000000001 (NEEDED)             Shared library: [ld-linux-x86-64.so.2]
67 | 
68 | so this is where `libpcre` came from.
69 | 
70 | ## Verbose
71 | 
72 | ## Show linked symbols
73 | 
74 |     LD_DEBUG=bindings ldd -r /bin/date
75 | 
76 | Shows a huge amount of debug information! <http://stackoverflow.com/a/5567907/895245>
77 | 
78 | But also consider `ld -y` and `info symbol &printf` inside GDB: <http://stackoverflow.com/a/5533801/895245>
79 | 


--------------------------------------------------------------------------------
/lib/Makefile:
--------------------------------------------------------------------------------
1 | .PHONY: all clean
2 | 
3 | all:
4 | 	$(NASM) -o common$(OBJ_EXT) common.asm
5 | 	$(CC) $(CFLAGS) -o driver$(OBJ_EXT) -c driver.c
6 | 
7 | clean:
8 | 	rm -f *$(OBJ_EXT) *$(OUT_EXT)
9 | 


--------------------------------------------------------------------------------
/lib/README.md:
--------------------------------------------------------------------------------
 1 | # Lib
 2 | 
 3 | Written by Paul A. Carter, with minor modifications.
 4 | 
 5 | This is a simple library for learning purposes.
 6 | 
 7 | Encapsulates system calls, making code OS portable.
 8 | 
 9 | Interfaces with the C stdlib. So it is also an example of how to do so.
10 | 


--------------------------------------------------------------------------------
/lib/common.asm:
--------------------------------------------------------------------------------
 1 | ; Helper functions. Gets linked with every executable.
 2 | ;
 3 | ; Vs macros on included headers:
 4 | ;
 5 | ; - less NASM / GAS specific, and helps with portability across assemblers
 6 | ; - more debug friendly, as you can properly step into the functions and see source
 7 | 
 8 | %define STRING_TERMINATOR 0
 9 | %define NEWLINE 10
10 | 
11 | extern printf, exit
12 | 
13 | ; Print error message and exit program with status 1.
14 | ;
15 | ; Usually called with the `assert_fail` macro,
16 | ; which also prepares the line number.
17 | ;
18 | ; The line number must be set in a macro
19 | ; otherwise it would always point to this function.
20 | ;
21 | ; eax: line of the failure
22 | ;
23 | global assert_fail
24 | assert_fail:
25 |     mov ebx, eax
26 |     mov eax, .message
27 |     call print_string
28 |     mov eax, ebx
29 |     call print_int
30 |     ; Call libc exit with exit status 1:
31 |     push dword 1
32 |     call exit
33 | .message db 10, 'ASSERT FAILED AT LINE: ', 0
34 | 
35 | ; Print 32 bit integer in decimal.
36 | global print_int
37 | print_int:
38 |     ; Prologue.
39 |     enter 0,0
40 |     pusha
41 |     pushf
42 |     ; Body.
43 |     push eax
44 |     push dword .format
45 |     call printf
46 |     pop ecx
47 |     pop ecx
48 |     ; Epilogue.
49 |     popf
50 |     popa
51 |     leave
52 |     ret
53 | .format db "%i", NEWLINE, STRING_TERMINATOR
54 | 
55 | ; Print a NUL terminated string.
56 | global print_string
57 | print_string:
58 |     ; Prologue.
59 |     enter 0,0
60 |     pusha
61 |     pushf
62 |     ; Body.
63 |     push eax
64 |     push dword .format
65 |     call printf
66 |     pop ecx
67 |     pop ecx
68 |     ; Epilogue.
69 |     popf
70 |     popa
71 |     leave
72 |     ret
73 | .format db "%s", NEWLINE, STRING_TERMINATOR
74 | 


--------------------------------------------------------------------------------
/lib/common_gas.h:
--------------------------------------------------------------------------------
 1 | /* GAS macros.
 2 |  *
 3 |  * Use cpp because GAS macros are too limted.
 4 |  */
 5 | 
 6 | #define ENTRY \
 7 |     .text ;\
 8 |         .global asm_main ;\
 9 |         asm_main: \
10 |         enter $0, $0
11 | 
12 | #define EXIT \
13 |     leave ;\
14 |     mov $0, %eax ;\
15 |     ret
16 | 
17 | #define PRINT_INT(x) \
18 |     mov x, %eax ;\
19 |     call print_int
20 | 
21 | #define PRINT_STRING(x) \
22 |     mov x, %eax ;\
23 |     call print_string
24 | 
25 | /* Usage:
26 |  *
27 |  *     PRINT_STRING_LITERAL("My string literal!")
28 |  *
29 |  * A trailing newline and zero terminator are automatically added.
30 |  */
31 | #define PRINT_STRING_LITERAL(x) \
32 |     .section .rodata ;\
33 |     1: ;\
34 |          .asciz x ;\
35 |     .text ;\
36 |     PRINT_STRING($1b)
37 | 
38 | /* Must be defined with `#define` because of __LINE__:
39 |  * http://stackoverflow.com/questions/30958595/is-there-a-line-macro-for-gas-assembly-that-expands-to-the-current-source-li
40 |  */
41 | #define ASSERT_FAIL \
42 |     mov $__LINE__, %eax ;\
43 |     call assert_fail
44 | 
45 | #define ASSERT_FLAG(flag) \
46 |         flag 1f ;\
47 |         ASSERT_FAIL ;\
48 |     1:
49 | 
50 | /* TODO define in terms of ASSERT_EQ_SIZE, need empty macro argument. */
51 | #define ASSERT_EQ(x, y) \
52 |     cmp x, y ;\
53 |     je  1f ;\
54 |         ASSERT_FAIL ;\
55 |     1:
56 | 
57 | #define ASSERT_EQ_SIZE(x, y, size) \
58 |     cmp ## size x, y ;\
59 |     je  1f ;\
60 |         ASSERT_FAIL ;\
61 |     1:
62 | 


--------------------------------------------------------------------------------
/lib/common_nasm.inc:
--------------------------------------------------------------------------------
  1 | ; Extern definitions and macros to be included in NASM source files.
  2 | ;
  3 | ; Many macros are just helpers to call library functions more easily.
  4 | ; Many of them set-up arguments, e.g. prepare EAX for a call.
  5 | ;
  6 | ; Macros here do not preserve registers as their contract.
  7 | 
  8 | extern \
  9 |     assert_fail, \
 10 |     print_int, \
 11 |     print_string
 12 | 
 13 | ; Structure macros
 14 | 
 15 |     %macro TEXT 0
 16 |         section .text
 17 |     %endmacro
 18 | 
 19 |     %macro DATA 0
 20 |         section .data
 21 |     %endmacro
 22 | 
 23 |     %macro RODATA 0
 24 |         section .rodata
 25 |     %endmacro
 26 | 
 27 |     %macro GLOBAL 1
 28 |         global %1
 29 |         %1:
 30 |     %endmacro
 31 | 
 32 |     ; Enter program that uses the C driver.
 33 |     ; The entry function is `asm_main`.
 34 |     %macro ENTRY 0
 35 |         TEXT
 36 |         GLOBAL asm_main
 37 |             enter 0, 0
 38 |     %endmacro
 39 | 
 40 |     ; Exit program entered by `ENTRY`.
 41 |     %macro EXIT 0
 42 |         leave
 43 |         mov eax, 0
 44 |         ret
 45 |     %endmacro
 46 | 
 47 | ; Print macros.
 48 | 
 49 |     %macro PRINT_INT 1
 50 |         mov eax, %1
 51 |         call print_int
 52 |     %endmacro
 53 | 
 54 |     %macro PRINT_STRING 1
 55 |         mov eax, %1
 56 |         call print_string
 57 |     %endmacro
 58 | 
 59 |     ; Usage: PRINT_STRING_LITERAL 'My string literal!'
 60 |     %macro PRINT_STRING_LITERAL 1
 61 |         RODATA
 62 |         %%str db %1, 0
 63 |         TEXT
 64 |         PRINT_STRING %%str
 65 |     %endmacro
 66 | 
 67 |     %macro dump_regs 1
 68 |         push dword %1
 69 |         call sub_dump_regs
 70 |     %endmacro
 71 | 
 72 |     ; Usage: dump_mem label, start-address,
 73 |     %macro dump_mem 3
 74 |         push dword %1
 75 |         push dword %2
 76 |         push dword %3
 77 |         call sub_dump_mem
 78 |     %endmacro
 79 | 
 80 |     %macro dump_math 1
 81 |         push dword %1
 82 |         call sub_dump_math
 83 |     %endmacro
 84 | 
 85 |     %macro  dump_stack 3
 86 |         push dword %3
 87 |         push dword %2
 88 |         push dword %1
 89 |         call sub_dump_stack
 90 |     %endmacro
 91 | 
 92 | ; Assert macros
 93 | 
 94 |     ; Assert eax eq %1
 95 |     %macro ASSERT_EQ 3
 96 |         pushf
 97 |         cmp %3 %1, %2
 98 |         je %%ok
 99 |             ASSERT_FAIL
100 |         %%ok:
101 |         popf
102 |     %endmacro
103 | 
104 |     %macro ASSERT_EQ 2
105 |         ASSERT_EQ %1, %2, %3
106 |     %endmacro
107 | 
108 |     ; asserts eax neq %1
109 |     %macro ASSERT_NEQ 2
110 |         pushfd
111 |         pusha
112 |         cmp %1, %2
113 |         jne %%ok
114 |             ASSERT_FAIL
115 |         %%ok:
116 |         popa
117 |         popfd
118 |     %endmacro
119 | 
120 |     ; assert jX or jnX jumps
121 |     %macro ASSERT_FLAG 1
122 |         %1 %%ok
123 |             ASSERT_FAIL
124 |         %%ok:
125 |     %endmacro
126 | 
127 |     %macro ASSERT_FAIL 0
128 |         mov eax, __LINE__
129 |         call assert_fail
130 |     %endmacro
131 | 
132 | ; SIMD macros
133 | 
134 |     %macro ASSERT_EQ4 5
135 |         ASSERT_EQ [%1], %2, dword
136 |         ASSERT_EQ [%1 + 4], %3, dword
137 |         ASSERT_EQ [%1 + 8], %4, dword
138 |         ASSERT_EQ [%1 + 12], %5, dword
139 |     %endmacro
140 | 
141 |     ; Move 4 given dwords into the given memory location.
142 |     ; The memory location should normally be 8 dwords long.
143 |     %macro MOV4 5
144 |         mov dword [%1], %2
145 |         mov dword [%1 + 4], %3
146 |         mov dword [%1 + 8], %4
147 |         mov dword [%1 + 12], %5
148 |     %endmacro
149 | 


--------------------------------------------------------------------------------
/lib/driver.c:
--------------------------------------------------------------------------------
 1 | #if defined(__GNUC__)
 2 | #  define PRE_CDECL
 3 | #  define POST_CDECL __attribute__((cdecl))
 4 | #else
 5 | #  define PRE_CDECL __cdecl
 6 | #  define POST_CDECL
 7 | #endif
 8 | 
 9 | /** Fix the calling convention so that we can implement it in assembly. */
10 | int PRE_CDECL asm_main(void) POST_CDECL;
11 | 
12 | int main(void) {
13 |     return asm_main();
14 | }
15 | 


--------------------------------------------------------------------------------
/linker-scripts/Makefile:
--------------------------------------------------------------------------------
 1 | .POSIX:
 2 | 
 3 | IN_EXT ?= .S
 4 | LD_EXT ?= .ld
 5 | LD_FLAGS ?= -T $(@:$(OUT_EXT)=.ld)
 6 | # Use gcc so that the preprocessor will run first.
 7 | OBJ_EXT ?= .o
 8 | OUT_EXT ?= .out
 9 | RUN ?= bios_hello_world
10 | 
11 | OUTS := $(patsubst %,%$(OUT_EXT),$(basename $(wildcard *$(IN_EXT))))
12 | 
13 | .PRECIOUS: %$(OBJ_EXT)
14 | .PHONY: all clean run
15 | 
16 | all: $(OUTS)
17 | 
18 | %$(OUT_EXT): %$(OBJ_EXT) %$(LD_EXT)
19 | 	ld -m elf_i386 -o '$@' -T $(@:$(OUT_EXT)=$(LD_EXT)) '$<'
20 | 
21 | %$(OBJ_EXT): %$(IN_EXT) common.h
22 | 	gcc -c -m32 -o '$@' '$<'
23 | 
24 | clean:
25 | 	rm -f *$(OBJ_EXT) *$(OUT_EXT)
26 | 
27 | test: all
28 | 	# Lazy.
29 | 
30 | run: all
31 | 	qemu-system-i386 '$(RUN)$(OUT_EXT)'
32 | 


--------------------------------------------------------------------------------
/linker-scripts/README.md:
--------------------------------------------------------------------------------
  1 | # Linker scripts
  2 | 
  3 | 1.  [Hello world](hello_world.ld)
  4 | 1.  [min](min.ld)
  5 | 1.  [Symbol](symbol.ld)
  6 |     1. [edata](edata.ld)
  7 | 1.  [FLAGS](flags.ld)
  8 | 1.  [KEEP](keep.ld)
  9 | 
 10 | ## Introduction
 11 | 
 12 | This directory must be run in Linux IA-32.
 13 | 
 14 | Linker scripts determine how exactly `ld` is going to transform object files into the executable.
 15 | 
 16 | In particular, how the sections are going to me mashed up into segments.
 17 | 
 18 | The default linker script operation is determined by the default linker scripts.
 19 | 
 20 | Those are hardcoded into the `ld` executable.
 21 | 
 22 | `make install` also generates an informational copy of the scripts at:
 23 | 
 24 |     x86_64-unknown-linux-gnu/lib/ldscripts
 25 | 
 26 | which some distributions provide. But those are only informational. The usual location is:
 27 | 
 28 |     /usr/lib/ldscripts
 29 | 
 30 | The actual script being used is determined by the `ld` options, and can be viewed with:
 31 | 
 32 |     ld --verbose
 33 | 
 34 | which spits out the entire script, but not it's name. You can then match this output to the scripts under `/usr/lib/ldscripts`.
 35 | 
 36 | Minimal example: <http://stackoverflow.com/questions/7182409/how-to-correctly-use-a-simple-linker-script> TODO make into a repo.
 37 | 
 38 | The main case where linker scripts are needed is when building more "exotic" executables, typically boot sectors or a multiboot file. E.g.: <http://wiki.osdev.org/Bare_Bones>
 39 | 
 40 | ## T
 41 | 
 42 | Replace the linker script with a custom one:
 43 | 
 44 |     ld -T script a.o
 45 | 
 46 | You will often use an existing script as the basis for this.
 47 | 
 48 | ## Ttext-segment
 49 | 
 50 | Change the address of some predefined segments:
 51 | 
 52 |     -Ttext-segment=0x80000
 53 | 
 54 | The default linker script reads:
 55 | 
 56 |     . = SEGMENT_START("text-segment", 0x400000)
 57 | 
 58 | which chooses takes the value passed, or 4M by default.
 59 | 
 60 | ## Directives
 61 | 
 62 | ### SECTIONS
 63 | 
 64 | Most important directive: contains the section o segment mappings.
 65 | 
 66 | #### Select by object file
 67 | 
 68 | You can select sections by object file.
 69 | 
 70 | For example, for a multiboot elf file, we must put the multiboot header before everything else.
 71 | 
 72 | So if we had our header in the `.text` section of the `boot.o` file (bad practice, it would be better to have a specially named section), we could write:
 73 | 
 74 |     .text 0x100000 :
 75 |     {
 76 |         boot.o(.text)
 77 |         *(.text)
 78 |     }
 79 | 
 80 | This way, `boot.o(.text)` is guaranteed to put `boot.o` first, and `*(.text)` then matches the others.
 81 | 
 82 | ### BYTE
 83 | 
 84 | ### SHORT
 85 | 
 86 | ### LONG
 87 | 
 88 | ### QUAD
 89 | 
 90 | Used to insert raw bytes directly into the output.
 91 | 
 92 | Application: insert boot sector magic bytes: https://github.com/cirosantilli/x86-bare-metal-examples/blob/2b79ac21df801fbf4619d009411be6b9cd10e6e0/a.ld#L14
 93 | 
 94 | ### SQUAD
 95 | 
 96 | Like `QUAD` but sign extends.
 97 | 
 98 | ### Assignment
 99 | 
100 | #### PROVIDE
101 | 
102 | <https://sourceware.org/binutils/docs/ld/PROVIDE.html#index-PROVIDE-408>
103 | 
104 | Make provided symbols weak: if they are defined by the object files, that definition wins.
105 | 
106 | Without `PROVIDE`, a multiple definition error occurs.
107 | 
108 | E.g.:
109 | 
110 |     __symbol_weak = PROVIDE(.);
111 |     __symbol = .;
112 | 
113 | Note that:
114 | 
115 |     __symbol = .;
116 | 
117 | sets the **address** of `__symbol`, relative to the current segment. `.` in this context is also relative to the current segment.
118 | 
119 | To set the value of the symbol, you'd need:
120 | 
121 |     __symbol = .;
122 |     DWORD
123 | 
124 | Used on the default userland script for section position symbols like `etext` and `edata`: <http://stackoverflow.com/questions/1765969/where-are-the-symbols-etext-edata-and-end-defined>
125 | 
126 | Those symbols are placed on the output right where they are defined, relative to other sections and symbols.
127 | 
128 | #### ABSOLUTE
129 | 
130 | TODO
131 | 
132 | ### ALIGN
133 | 
134 | Returns the address of the next multiple.
135 | 
136 | Applications:
137 | 
138 | Extend the output to be a multiple of 512 bytes:
139 | 
140 |     . = ALIGN(512)
141 | 
142 | This can be useful to generate boot sectors with multiple stages.
143 | 
144 | ## Segment flags
145 | 
146 | ## Segment permissions
147 | 
148 | Some segment names are magic: `.text` and `.data`, and will have fixed permissions no matter what.
149 | 
150 | If you use a non-magic name lie `.blurb`, the permission is the sum of the permissions of all sections that make up the segment.
151 | 
152 | It is also possible to set the segment permissions with the `FLAGS` option of the `PHDRS` linker script command.
153 | 


--------------------------------------------------------------------------------
/linker-scripts/TODO.md:
--------------------------------------------------------------------------------
 1 | # TODO
 2 | 
 3 | -   Factor out examples in this repository.
 4 | 
 5 |     The major pain point is:
 6 | 
 7 |     -   using the C standard library (through the ia-32 directory) requires GCC (TODO true?), which I cannot get to work without the default script
 8 |     -   not using the C library loses all portability so I'd have to reimplement a bunch of things
 9 | 
10 |     One possible alternative is to limit ourselves to two system calls only: write and exit, and to unit testing from an external script.
11 | 


--------------------------------------------------------------------------------
/linker-scripts/common.h:
--------------------------------------------------------------------------------
 1 | #define GLOBAL(symbol) \
 2 |     .global symbol;\
 3 |     symbol:
 4 | 
 5 | /* Create a non-zero terminated string and a symbol for it's length. */
 6 | #define STRING(symbol, value) \
 7 |     symbol: \
 8 |         .ascii value;\
 9 |         symbol ## _len = . - symbol
10 | 
11 | /* Exit syscall. */
12 | #define EXIT(status) \
13 |     mov $1, %eax;\
14 |     mov status, %ebx;\
15 |     int $0x80
16 | 
17 | /*
18 | Write syscall.
19 | 
20 | The length of `string_address` should be stored in a symbol named
21 | `string_address ## _len`.
22 | 
23 | - string_address: a symbol that points to the start of the string (without $)
24 | */
25 | #define WRITE(string_address) \
26 |     mov $4, %eax;\
27 |     mov $1, %ebx;\
28 |     mov $string_address, %ecx;\
29 |     mov $string_address ## _len, %edx;\
30 |     int $0x80
31 | 


--------------------------------------------------------------------------------
/linker-scripts/data.S:
--------------------------------------------------------------------------------
1 | .section .text
2 |     movb $0, x
3 |     mov $1, %eax
4 |     mov $0, %ebx
5 |     mov x, %bl
6 |     int $0x80
7 | .section .data
8 |     x: .byte 1
9 | 


--------------------------------------------------------------------------------
/linker-scripts/data.ld:
--------------------------------------------------------------------------------
 1 | /*
 2 | Make a writable data segment.
 3 | */
 4 | SECTIONS
 5 | {
 6 |     . = 0x400000;
 7 |     .text :
 8 |     {
 9 |         *(.text)
10 |     }
11 |     .data :
12 |     {
13 |         *(.data)
14 |     }
15 | }
16 | 


--------------------------------------------------------------------------------
/linker-scripts/edata.S:
--------------------------------------------------------------------------------
1 | .section .text
2 |     mov $1, %eax
3 |     mov sdata, %ebx
4 |     int $0x80
5 | .section .data
6 |     .byte 1
7 | 


--------------------------------------------------------------------------------
/linker-scripts/edata.ld:
--------------------------------------------------------------------------------
 1 | /*
 2 | To answer:
 3 | http://stackoverflow.com/questions/1765969/where-are-the-symbols-etext-edata-and-end-defined
 4 | */
 5 | SECTIONS
 6 | {
 7 |     . = 0x400000;
 8 |     .text :
 9 |     {
10 |         *(.text)
11 |         sdata = .;
12 |         *(.data)
13 |     }
14 | }
15 | 


--------------------------------------------------------------------------------
/linker-scripts/flags.S:
--------------------------------------------------------------------------------
 1 | /*
 2 | This is also minimal as it uses the fact that
 3 | the default section is `.text`.
 4 | */
 5 | 
 6 | #include "common.h"
 7 | .section .mytext, "a"
 8 | GLOBAL(_start)
 9 | EXIT($0)
10 | 


--------------------------------------------------------------------------------
/linker-scripts/flags.ld:
--------------------------------------------------------------------------------
 1 | /*
 2 | How to alter segment flags (permissions).
 3 | 
 4 | Expected outcome: segfault, because the segment is not executable.
 5 | */
 6 | ENTRY(_start)
 7 | PHDRS
 8 | {
 9 |     /*
10 |     The FLAGS number is the same as the 777 filesystem
11 |     permission bits: RWX.
12 | 
13 |     Try changing this to 2 to get a segfaut.
14 | 
15 |     TODO why does it not segfault with just 4 (R)?
16 |     */
17 |     .mytext PT_LOAD FLAGS(5);
18 | }
19 | SECTIONS
20 | {
21 |     . = 0x400000;
22 |     .mytext : {
23 |         *(*)
24 |     } :.mytext
25 | }
26 | 


--------------------------------------------------------------------------------
/linker-scripts/hello_world.S:
--------------------------------------------------------------------------------
1 | #include "common.h"
2 | .section .mydata, "a"
3 |     STRING(s, "hello world\n")
4 | .section .mytext, "ax"
5 |     .global _start
6 |     _start:
7 |         WRITE(s)
8 |     EXIT($0)
9 | 


--------------------------------------------------------------------------------
/linker-scripts/hello_world.ld:
--------------------------------------------------------------------------------
 1 | /*
 2 | Small very explicit example.
 3 | 
 4 | Expected output: `hello world\n`
 5 | 
 6 | We could make this much much smaller by using default names,
 7 | but this contains the minimum ammount of detail
 8 | that you should know about to get started.
 9 | */
10 | 
11 | /*
12 | If start is not present, a series of defaults are used:
13 | 
14 | - the value of a target specific symbol, if it is defined. For many targets this is start.
15 | - the address of the first byte of the `.text' section, if present
16 | - addres 0
17 |  */
18 | ENTRY(_start);
19 | 
20 | /* Program headers, each of which specifies a segment. */
21 | PHDRS
22 | {
23 |     /*
24 |     If we don't set the flags here,
25 |     the permissions are the minimum possible for the sections inside this segment..
26 |      */
27 |     mytext_segment PT_LOAD;
28 |     mydata_segment PT_LOAD;
29 | }
30 | 
31 | SECTIONS
32 | {
33 |     /*
34 |     Set where the text segment will be loaded into memory by Linux.
35 | 
36 |     Linux segfaults us if the .text is too low.
37 | 
38 |     If this were not present, it would be 0, and that is too low and segfaults. See also:
39 | 
40 |     - http://stackoverflow.com/questions/2187484/why-is-the-elf-execution-entry-point-virtual-address-of-the-form-0x80xxxxx-and-n
41 |     - http://stackoverflow.com/questions/2966426/why-do-virtual-memory-addresses-for-linux-binaries-start-at-0x8048000?lq=1
42 |     */
43 |     . = 0x400000;
44 | 
45 |     /* Create an output section. */
46 |     .mytext_output_section :
47 |     {
48 |         /* Crump every `.mytext` section from every object file (`*`) into here. */
49 |         *(.mytext)
50 |     /* If we don't set a segment here, `.text` segment is the default. */
51 |     } :mytext_segment
52 | 
53 |     . = 0x600000;
54 |     .mydata_output_section :
55 |     {
56 |         *(.mydata)
57 |     } :mydata_segment
58 | }
59 | 


--------------------------------------------------------------------------------
/linker-scripts/keep.S:
--------------------------------------------------------------------------------
 1 | .section .text
 2 | .global _start
 3 | _start:
 4 |     /* Dummy access so that after will be referenced and kept. */
 5 |     mov after, %eax
 6 |     /*mov keep, %eax*/
 7 |     /* Exit system call. */
 8 |     mov $1, %eax
 9 |     /* Take the exit status 4 bytes after before. */
10 |     mov $4, %ebx
11 |     mov before(%ebx), %ebx
12 |     int $0x80
13 | .section .before
14 |     before: .long 0
15 | .section .keep, "a"
16 |     keep: .long 1
17 | .section .after
18 |     after: .long 2
19 | 


--------------------------------------------------------------------------------
/linker-scripts/keep.ld:
--------------------------------------------------------------------------------
 1 | /*
 2 | http://stackoverflow.com/questions/9827157/what-does-keep-mean-in-a-linker-script
 3 | */
 4 | ENTRY(_start)
 5 | SECTIONS
 6 | {
 7 |     . = 0x400000;
 8 |     .text :
 9 |     {
10 |         *(.text)
11 |         *(.before)
12 |         /*KEEP(*(.keep));*/
13 |         *(.keep)
14 |         *(.after)
15 |     }
16 | }
17 | 


--------------------------------------------------------------------------------
/linker-scripts/min.S:
--------------------------------------------------------------------------------
1 | /*
2 | This is also minimal as it uses the fact that
3 | the default section is `.text`.
4 | */
5 | 
6 | #include "common.h"
7 | EXIT($0)
8 | 


--------------------------------------------------------------------------------
/linker-scripts/min.ld:
--------------------------------------------------------------------------------
 1 | /*
 2 | Minimal linker script possible.
 3 | 
 4 | Expected outcome: program exit status is 0.
 5 | 
 6 | Several defaults are used:
 7 | 
 8 | - entry on `.text` segment
 9 | - put every section that does not match any rule into the `.text` segment
10 | */
11 | SECTIONS
12 | {
13 |     /* If we don't use this, it gets set to 0 and segfaults. */
14 |     . = 0x400000;
15 | }
16 | 


--------------------------------------------------------------------------------
/linker-scripts/symbol.S:
--------------------------------------------------------------------------------
 1 | mov $4, %eax
 2 | mov $1, %ebx
 3 | mov $s, %ecx
 4 | mov s_len, %edx
 5 | int $0x80
 6 | 
 7 | mov $1, %eax
 8 | mov $0, %ebx
 9 | int $0x80
10 | 


--------------------------------------------------------------------------------
/linker-scripts/symbol.ld:
--------------------------------------------------------------------------------
 1 | /*
 2 | Define symbols and give them values on the linker script.
 3 | 
 4 | Expected output: `hello world\n`
 5 | 
 6 | Used on many default linker scripts to say where sections start and end:
 7 | http://stackoverflow.com/questions/1765969/where-are-the-symbols-etext-edata-and-end-defined
 8 | */
 9 | SECTIONS
10 | {
11 |     . = 0x400000;
12 |     .text :
13 |     {
14 |         *(*)
15 | 
16 |         /*
17 |         This defines the POSITION of s RELATIVE TO THE CURRENT SECTION,
18 |         *not* it's value.
19 | 
20 |         This works because the value of the `.` (location counter)
21 |         is also relative to the current segment:
22 |         https://sourceware.org/binutils/docs/ld/Location-Counter.html
23 |         */
24 |         s = .;
25 | 
26 |         /*
27 |         Immediately after comes the value.
28 | 
29 |         Obtained from: printf 'hello world' | rev | hd
30 |          */
31 |         QUAD(0x6f77206f6c6c6568)
32 |         LONG(0x0a646c72)
33 | 
34 |         /*
35 |         The name `len` cannot be used, because it is a keyword!
36 |         https://sourceware.org/binutils/docs/ld/MEMORY.html
37 |         TODO possible to escape it?
38 |          */
39 |         s_len = .;
40 |         LONG(. - s)
41 |     }
42 | }
43 | 


--------------------------------------------------------------------------------
/linux/Makefile:
--------------------------------------------------------------------------------
 1 | IN_EXT ?= .asm
 2 | OUT_EXT ?= .out
 3 | TMP_EXT ?= .o
 4 | RUN ?= hello_world
 5 | 
 6 | INS := $(wildcard *$(IN_EXT))
 7 | OUTS := $(patsubst %$(IN_EXT),%$(OUT_EXT),$(INS))
 8 | 
 9 | .PRECIOUS: %$(TMP_EXT)
10 | .PHONY: all clean run test
11 | 
12 | all: $(OUTS)
13 | 
14 | %$(OUT_EXT): $(OUT_DIR)%$(TMP_EXT)
15 | 	ld -m elf_i386 -o '$@' '$<'
16 | 
17 | %.o: %.asm
18 | 	nasm -w+all -f elf32 -g -o '$@' '$<'
19 | 
20 | clean:
21 | 	rm -f *$(TMP_EXT) *$(OUT_EXT)
22 | 
23 | run: all
24 | 	./$(RUN)$(OUT_EXT)
25 | 
26 | test: all
27 | 	for f in *$(OUT_EXT); do echo $$f; ./$$f; done
28 | 


--------------------------------------------------------------------------------
/linux/README.md:
--------------------------------------------------------------------------------
 1 | # Linux
 2 | 
 3 | ## Introduction
 4 | 
 5 | Linux-specific IA-32 examples without external libraries like libc.
 6 | 
 7 | Use a libc for OS-portable code instead.
 8 | 
 9 | You can also compile and run this in Linux x64-64, which is able to run 32-bit. TODO: check that. Seems to work.
10 | 
11 | This probably works because Linux x86-64 uses compatibility mode to run the instructions, and the old IA-32 `int0x80` system calls work directly on `x86-64`.
12 | 
13 | When using external libraries like libc, you also have to take care to link to the right version of the library.
14 | 
15 | ## System calls
16 | 
17 | We mostly use `int 0x80` system calls here for their simplicity.
18 | 
19 | However, the best way to do system calls in IA-32 Linux is through `sysenter` as it is faster. But user applications don't use `sysenter` directly, but rather `VDSO` should be used.
20 | 
21 | ## VDSO
22 | 
23 | ## vsyscall
24 | 
25 | The kernel just maps and maintains a magic memory area to every process which contains fast versions of certain functions.
26 | 
27 | <http://stackoverflow.com/questions/19938324/what-are-vdso-and-vsyscall>
28 | 


--------------------------------------------------------------------------------
/linux/c-from-assembly/README.md:
--------------------------------------------------------------------------------
 1 | # C from assembly
 2 | 
 3 | Call the C standard library from assembly.
 4 | 
 5 | This is not OS portable because ANSI C does not specify the ABI.
 6 | 
 7 | Things which may vary include:
 8 | 
 9 | - calling conventions
10 | - method names. E.g. Watcom uses `puts_` instead of `puts`
11 | 
12 | The only way to deal with that is by treating each implementation differently with macros.
13 | 
14 | The following methods are covered:
15 | 
16 | - `main/`: `main` function in assembly
17 | - `start/`: `_start` function in assembly, and link with `ld`
18 | - `start-gcc/`: `_start` function in assembly, and link with `gcc`
19 | 


--------------------------------------------------------------------------------
/linux/c-from-assembly/main/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: clean test
 2 | 
 3 | main.out: main.asm
 4 | 	nasm -w+all -f elf32 -o main.o main.asm
 5 | 	gcc -m32 -o main.out main.o
 6 | 
 7 | clean:
 8 | 	rm -f *.o *.out
 9 | 
10 | test: main.out
11 | 	./main.out
12 | 


--------------------------------------------------------------------------------
/linux/c-from-assembly/main/README.md:
--------------------------------------------------------------------------------
 1 | # C from assembly
 2 | 
 3 | Call C from assembly.
 4 | 
 5 | This is not portable because ANSI C does not specify the ABI.
 6 | 
 7 | Things which may vary include:
 8 | 
 9 | - calling conventions
10 | - method names. E.g. Watcom uses `puts_` instead of `puts`
11 | 
12 | The only way to deal with that is by treating each implementation differently with macros.
13 | 


--------------------------------------------------------------------------------
/linux/c-from-assembly/main/main.asm:
--------------------------------------------------------------------------------
 1 | extern puts, exit
 2 | section .data
 3 |     hello_world db "hello world", 0
 4 | section .text
 5 |     global main
 6 |     main:
 7 |         push hello_world
 8 |         call puts
 9 |         pop eax
10 | 
11 |         push 0
12 |         call exit
13 | 


--------------------------------------------------------------------------------
/linux/c-from-assembly/start-gcc/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: clean test
 2 | 
 3 | main.out: main.asm
 4 | 	nasm -w+all -f elf32 -o main.o main.asm
 5 | 	# -nostartfiles is the key here, or else we will have two _start symbols:
 6 | 	# one on main.asm, and another on crt1.o.
 7 | 	gcc -nostartfiles -m32 -o main.out main.o
 8 | 
 9 | clean:
10 | 	rm -f *.o *.out
11 | 
12 | test: main.out
13 | 	./main.out
14 | 


--------------------------------------------------------------------------------
/linux/c-from-assembly/start-gcc/main.asm:
--------------------------------------------------------------------------------
 1 | extern puts, exit
 2 | section .data
 3 |     hello_world db "hello world", 0
 4 | section .text
 5 |     global _start
 6 |     _start:
 7 |         push hello_world
 8 |         call puts
 9 |         pop eax
10 |         push 0
11 |         call exit
12 | 


--------------------------------------------------------------------------------
/linux/c-from-assembly/start/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: clean test
 2 | 
 3 | main.out: main.asm
 4 | 	nasm -w+all -f elf32 -o main.o main.asm
 5 | 	# -dynamic-linker is put into the .interp section
 6 | 	# so that the stdlib can be found.
 7 | 	#
 8 | 	# gcc would fail here because it requires a `main` method
 9 | 	# which is called from crt1.o
10 | 	ld -m elf_i386 -dynamic-linker /lib/ld-linux.so.2 -o main.out -lc main.o
11 | 
12 | clean:
13 | 	rm -f *.o *.out
14 | 
15 | test: main.out
16 | 	./main.out
17 | 


--------------------------------------------------------------------------------
/linux/c-from-assembly/start/main.asm:
--------------------------------------------------------------------------------
 1 | extern puts, exit
 2 | section .data
 3 |     hello_world db "hello world", 0
 4 | section .text
 5 |     global _start
 6 |     _start:
 7 |         push hello_world
 8 |         call puts
 9 |         pop eax
10 |         push 0
11 |         call exit
12 | 


--------------------------------------------------------------------------------
/linux/custom-entry-gcc/Makefile:
--------------------------------------------------------------------------------
 1 | .POSIX:
 2 | .PHONY: clean run
 3 | 
 4 | main.out: main.c
 5 | 	gcc -nostartfiles -Wl,-etest -o '$@'  '$<'
 6 | 
 7 | clean:
 8 | 	rm -f main.out
 9 | 
10 | test: main.out
11 | 	./main.out
12 | 


--------------------------------------------------------------------------------
/linux/custom-entry-gcc/README.md:
--------------------------------------------------------------------------------
1 | # Custom entry GCC
2 | 
3 | Custom entry point in C file different than `main`.
4 | 
5 | <http://stackoverflow.com/questions/3097825/is-there-a-gcc-compiler-linker-option-to-change-the-name-of-main>
6 | 


--------------------------------------------------------------------------------
/linux/custom-entry-gcc/main.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | extern char **environ;
 5 | 
 6 | int test(int argc, char **argv) {
 7 |     /* Trash. */
 8 |     printf("argc = %d\n", argc);
 9 |     /* Setup. */
10 |     printf("environ[0] = %s\n", environ[0]);
11 |     exit(0);
12 | }
13 | 


--------------------------------------------------------------------------------
/linux/custom-entry/Makefile:
--------------------------------------------------------------------------------
 1 | IN_EXT ?= .asm
 2 | OUT_EXT ?= .out
 3 | TMP_EXT ?= .o
 4 | RUN ?= hello_world
 5 | 
 6 | INS := $(wildcard *$(IN_EXT))
 7 | OUTS := $(patsubst %$(IN_EXT),%$(OUT_EXT),$(INS))
 8 | 
 9 | .PRECIOUS: %$(TMP_EXT)
10 | .PHONY: all clean run
11 | 
12 | all: $(OUTS)
13 | 
14 | %$(OUT_EXT): $(OUT_DIR)%$(TMP_EXT)
15 | 	ld --verbose -e _custom -m elf_i386 -o '$@' '$<'
16 | 
17 | %.o: %.asm
18 | 	nasm -w+all -f elf32 -o '$@' '$<'
19 | 
20 | clean:
21 | 	rm -f *$(TMP_EXT) *$(OUT_EXT)
22 | 
23 | run: all
24 | 	./$(RUN)$(OUT_EXT)
25 | 


--------------------------------------------------------------------------------
/linux/custom-entry/hello_world.asm:
--------------------------------------------------------------------------------
 1 | section .data
 2 |     hello_world db "hello world", 10
 3 |     hello_world_len  equ $ - hello_world
 4 | section .text
 5 |     global _custom
 6 |     _custom:
 7 |         mov eax, 4
 8 |         mov ebx, 1
 9 |         mov ecx, hello_world
10 |         mov edx, hello_world_len
11 |         int 0x80
12 |         mov eax, 1
13 |         mov ebx, 0
14 |         int 0x80
15 | 


--------------------------------------------------------------------------------
/linux/hello_world.asm:
--------------------------------------------------------------------------------
 1 | section .rodata
 2 |     hello_world db "hello world", 10
 3 |     hello_world_len equ $ - hello_world
 4 | section .text
 5 |     global _start
 6 |     _start:
 7 |         mov eax, 4
 8 |         mov ebx, 1
 9 |         mov ecx, hello_world
10 |         mov edx, hello_world_len
11 |         int 0x80
12 |         mov eax, 1
13 |         mov ebx, 0
14 |         int 0x80
15 | 


--------------------------------------------------------------------------------
/linux/hello_world_min.asm:
--------------------------------------------------------------------------------
 1 | ; Minimized hello world.
 2 | ;
 3 | ; - put the string directly at the end of the `.text` section. Generates ugly `objdump`, but works fine.
 4 | ; - remove explicit `section .text` as NASM defaults to it when no section is specified
 5 | ;
 6 | ; We could also:
 7 | ;
 8 | ; -  remove `_start` as `ld` defaults to `.text` when the `ENTRY` symbol is not specified
 9 | ;
10 | ;    But that would generate warnings.
11 | 
12 | global _start
13 | _start:
14 |     mov eax, 4
15 |     mov ebx, 1
16 |     mov ecx, hello_world
17 |     mov edx, hello_world_len
18 |     int 0x80
19 |     mov eax, 1
20 |     mov ebx, 0
21 |     int 0x80
22 | hello_world db "hello world", 10
23 | hello_world_len  equ $ - hello_world
24 | 


--------------------------------------------------------------------------------
/linux/interactive/Makefile:
--------------------------------------------------------------------------------
1 | ../Makefile


--------------------------------------------------------------------------------
/linux/interactive/memory_layout.asm:
--------------------------------------------------------------------------------
 1 | ; See what `esp` is worth at initialization.
 2 | ;
 3 | ; Pass the output of this through `hd` to see it.
 4 | ;
 5 | ; Expected outcome: two ints, 4 bytes apart, close to 4Gb.
 6 | ;
 7 | ; A little less than 4Gb and random because of ASLR.
 8 | ;
 9 | ; The kernel sets esp to it when the program is started:
10 | ; this is how it communicates it to the application.
11 | ;
12 | ; This is specified by the AMD64 ABI.
13 | 
14 | %macro print_address 1
15 |     mov eax, 4
16 |     mov ebx, 1
17 |     mov ecx, %1
18 |     ; Little endian is easier for humans to read.
19 |     bswap ecx
20 |     mov [resd0], ecx
21 |     mov ecx, resd0
22 |     mov edx, 4
23 |     int 80h
24 | %endmacro
25 | 
26 | section .bss
27 | 
28 |     resd0 resd 1
29 | 
30 | section .text
31 | 
32 |     global _start
33 |     _start:
34 | 
35 |         print_address _start
36 | 
37 |         print_address esp
38 |         push 1
39 |         print_address esp
40 | 
41 |         mov eax, 1
42 |         mov ebx, 0
43 |         int 0x80
44 | 


--------------------------------------------------------------------------------
/linux/interactive/no_exit.asm:
--------------------------------------------------------------------------------
 1 | ; Without the exist system call, the instruction pointer
 2 | ; just keeps increating until segfault.
 3 | section .data
 4 |     hello_world db "Hello world!", 10
 5 |     hello_world_len  equ $ - hello_world
 6 | section .text
 7 |     global _start
 8 |     _start:
 9 |         mov eax, 4
10 |         mov ebx, 1
11 |         mov ecx, hello_world
12 |         mov edx, hello_world_len
13 |         int 80h
14 | 


--------------------------------------------------------------------------------
/linux/min.asm:
--------------------------------------------------------------------------------
1 | section .text
2 | global _start
3 | _start:
4 |     mov eax, 1
5 |     mov ebx, 0
6 |     int 0x80
7 | 


--------------------------------------------------------------------------------
/linux/stack_top.asm:
--------------------------------------------------------------------------------
 1 | ; See what `esp` is worth at initialization.
 2 | ;
 3 | ; Pass the output of this through hd to see it.
 4 | ;
 5 | ; Expected outcome: two ints, 4 bytes apart, close to 4Gb.
 6 | ;
 7 | ; A little less than 4Gb and random because of ASLR.
 8 | ;
 9 | ; The kernel sets esp to it when the program is started:
10 | ; this is how it communicates it to the application.
11 | ;
12 | ; This is specified by the AMD64 ABI.
13 | 
14 | %macro print_address 1
15 |     mov eax, 4
16 |     mov ebx, 1
17 |     mov ecx, %1
18 |     bswap ecx
19 |     mov [resd0], ecx
20 |     mov ecx, resd0
21 |     mov edx, 4
22 |     int 80h
23 | %endmacro
24 | 
25 | section .bss
26 | 
27 |     resd0 resd 1
28 | 
29 | section .text
30 | 
31 |     global _start
32 |     _start:
33 | 
34 |         print_address esp
35 |         push 1
36 |         print_address esp
37 | 
38 |         print_address _start
39 | 
40 |         mov eax, 1
41 |         mov ebx, 0
42 |         int 0x80
43 | 


--------------------------------------------------------------------------------
/linux/system_call.md:
--------------------------------------------------------------------------------
 1 | # System call
 2 | 
 3 | <http://stackoverflow.com/questions/12806584/what-is-better-int-0x80-or-syscall>
 4 | 
 5 | The following methods are possible, in order of decreasing preference:
 6 | 
 7 | - VDSO. Used by GCC by default.
 8 | - `sysenter`
 9 | - `int 0x80`
10 | 


--------------------------------------------------------------------------------
/microcode.md:
--------------------------------------------------------------------------------
 1 | # Microcode
 2 | 
 3 | Intel processors have a large front-end that decodes every instruction to a simpler internal undocumented and unaccessible instruction set.
 4 | 
 5 | The undocumented internal instructions are also known as microcode.
 6 | 
 7 | The internal microcode interpreter is known as the RISC core.
 8 | 
 9 | It also seems that microcode can be changed with firmware updates.
10 | 
11 | - <http://stackoverflow.com/questions/4366837/what-is-intel-microcode>
12 | - <http://stackoverflow.com/questions/5806589/why-does-intel-hide-internal-risc-core-in-their-processors>
13 | 


--------------------------------------------------------------------------------
/microcontrollers.md:
--------------------------------------------------------------------------------
 1 | # Microcontrollers
 2 | 
 3 | ## Vs microprocessor
 4 | 
 5 | There is a spectrum, so difficult to say.
 6 | 
 7 | Microcontrollers are small SoCs... memory is inside chip.
 8 | 
 9 | They have IO dedicated pins.
10 | 
11 | TODO: response time is smaller.
12 | 
13 | TODO: most have predictable instruction times, and are better for hard real time.
14 | 
15 | TODO: the latency between signal and result is smaller
16 | 
17 | TODO: often programmed to run on bare metal, while microprocessors are more often programmed to run on top of an OS (e.g. Linux). But there are also OSes for microcontrollers, mostly real time ones: <http://www.embedded.com/design/operating-systems/4425751/Comparing-microcontroller-real-time-operating-systems>, e.g. QNX. Those OSes can even implement the POSIX C API.
18 | 
19 | Microcontrollers can have either internal or external clock sources: <http://electronics.stackexchange.com/questions/15455/internal-or-external-oscillator> TODO CPUs?
20 | 
21 | <http://electronics.stackexchange.com/questions/1092/whats-the-difference-between-a-microcontroller-and-a-microprocessor>
22 | 


--------------------------------------------------------------------------------
/nasm/Makefile:
--------------------------------------------------------------------------------
1 | ../Makefile


--------------------------------------------------------------------------------
/nasm/comment.asm:
--------------------------------------------------------------------------------
 1 | ; # Comments
 2 | 
 3 |     ; Single line with semicolon `;`
 4 | 
 5 |     ; Multi-line: TODO `%comment` was added but then removed?
 6 | 
 7 |         ;%comment
 8 |             ;Any thing.
 9 |         ;%endcomment
10 | 
11 | %include "lib/common_nasm.inc"
12 | 
13 | ENTRY
14 |     mov eax, 1
15 |     ; mov eax, 2
16 |     ASSERT_EQ eax, 1
17 | EXIT
18 | 


--------------------------------------------------------------------------------
/nasm/current_address.asm:
--------------------------------------------------------------------------------
 1 | ; # Current address
 2 | 
 3 | ; # $
 4 | 
 5 | %include "lib/common_nasm.inc"
 6 | 
 7 | ENTRY
 8 | 
 9 |     PRINT_INT $
10 |     PRINT_INT $
11 |     mov eax, $ + 1
12 | 
13 |     ; Infinite loop.
14 |     ;jmp $
15 | 
16 | EXIT
17 | 


--------------------------------------------------------------------------------
/nasm/define.asm:
--------------------------------------------------------------------------------
 1 | # define
 2 | 
 3 | %include "lib/common_nasm.inc"
 4 | 
 5 | ENTRY
 6 |     %define SIZE 10
 7 |     mov eax, SIZE
 8 |     ASSERT_EQ eax, 10
 9 | EXIT
10 | 


--------------------------------------------------------------------------------
/nasm/equ.asm:
--------------------------------------------------------------------------------
 1 | ; # equ
 2 | 
 3 |     ; http://stackoverflow.com/a/33148242/895245
 4 | 
 5 | %include "lib/common_nasm.inc"
 6 | 
 7 | DATA
 8 |     global x
 9 |     x: equ 1 + 1
10 |     y: dd 1 + 1
11 | ENTRY
12 |     mov eax, x
13 |     ASSERT_EQ eax, 2
14 |     mov eax, [y]
15 |     ASSERT_EQ eax, 2
16 | EXIT
17 | 


--------------------------------------------------------------------------------
/nasm/if.asm:
--------------------------------------------------------------------------------
1 | %include "lib/common_nasm.inc"
2 | 
3 | ENTRY
4 |     %if 0
5 |         ASSERT_FAIL
6 |     %endif
7 | EXIT
8 | 


--------------------------------------------------------------------------------
/nasm/include.asm:
--------------------------------------------------------------------------------
 1 | ; # include
 2 | 
 3 |     ; Same as C #include: copy pastes file data.
 4 | 
 5 |     ; The common standard extension is `.inc`.
 6 | 
 7 | %include "lib/common_nasm.inc"
 8 | 
 9 | ENTRY
10 |     mov eax, 1
11 |     %include "included.inc"
12 |     ASSERT_EQ eax, 2
13 | EXIT
14 | 


--------------------------------------------------------------------------------
/nasm/included.inc:
--------------------------------------------------------------------------------
1 | mov eax, 2
2 | 


--------------------------------------------------------------------------------
/nasm/introduction.md:
--------------------------------------------------------------------------------
 1 | # Introduction
 2 | 
 3 | Netwide assembler. TODO what does Netwide mean?
 4 | 
 5 | One of the most popular for Linux.
 6 | 
 7 | Open source.
 8 | 
 9 | Intel like syntax.
10 | 
11 | Input formats:
12 | 
13 | - `.asm`, Intel like syntax
14 | 
15 | Most useful command line options:
16 | 
17 | - `-w+all`: enable all warnings
18 | 
19 | ## Downsides
20 | 
21 | NASM is way too gentle on input validation, even on `-w+all`.
22 | 
23 | In particular, anything unknown is happily treated as label, even if it does not have colon `:`.
24 | 
25 | ## f
26 | 
27 | Output format:
28 | 
29 |     nasm -f elf32 main.asm
30 | 
31 | List all formats:
32 | 
33 |     nasm -hf
34 | 
35 | You should always use this flag.
36 | 
37 | The default format of a default compilation (and that of Ubuntu 14.04) is `bin`, which generates raw instructions without any ELF metadata.
38 | 
39 | is not very useful <http://stackoverflow.com/questions/2427011/what-is-the-difference-between-elf-files-and-bin-files>
40 | 
41 | There seems to be no way to automatically decide a better choice: the default is a compilation option.
42 | 
43 | The following formats work:
44 | 
45 | - `elf32`: Linux 32-bit
46 | - `elf64`: Linux 64-bit
47 | - `bin`: raw 16-bit. Useful to create boot setors. 32-bit memory addresses are encodable with the `66h` prefix.
48 | 
49 | ## Source code
50 | 
51 |     git clone git://repo.or.cz/nasm.git
52 |     sh autogen.sh
53 |     ./configure
54 |     make
55 |     make install
56 | 
57 | 


--------------------------------------------------------------------------------
/nasm/lib:
--------------------------------------------------------------------------------
1 | ../lib


--------------------------------------------------------------------------------
/nasm/local_labels.asm:
--------------------------------------------------------------------------------
 1 | ; # Local labels
 2 | 
 3 |     ; http://www.nasm.us/doc/nasmdoc3.html#section-3.9
 4 | 
 5 |     ; Labels that start with a period `.` get the previous non-local label
 6 |     ; prepended to them, which may give them uniqueness.
 7 | 
 8 |     ; Those are still present on the output `.o` however.
 9 | 
10 |     ; GAS has a related convention which hides `.L` prefixed labels
11 |     ; completely from the output `.o`.
12 | 
13 | %include "lib/common_nasm.inc"
14 | 
15 | ENTRY
16 | outside_label:
17 | 
18 |     ; This should be not done in practice,
19 |     ; but shows how it works under the hood.
20 |     jmp outside_label.inside_label
21 |     ASSERT_FAIL
22 | .inside_label:
23 | 
24 |     ; This is what you should do in practice.
25 |     ; Labels also get appended when used as arguments.
26 |     jmp .inside_label2
27 |     ASSERT_FAIL
28 | .inside_label2:
29 | 
30 | EXIT
31 | 


--------------------------------------------------------------------------------
/nasm/ptr.asm:
--------------------------------------------------------------------------------
 1 | ; http://stackoverflow.com/questions/13790146/x86-difference-between-byte-and-byte-ptr
 2 | ;
 3 | ; It's just a syntax error for NASM.
 4 | 
 5 | %include "lib/common_nasm.inc"
 6 | 
 7 | ENTRY
 8 |     ; mov byte [ecx], 0
 9 | EXIT
10 | 


--------------------------------------------------------------------------------
/nasm/ram.asm:
--------------------------------------------------------------------------------
 1 | ; Examples of how to access RAM in NASM.
 2 | 
 3 | %include "lib/common_nasm.inc"
 4 | 
 5 | section .bss
 6 | 
 7 |     resd0 resd 1
 8 |     resd1 resd 1
 9 | 
10 | ENTRY
11 | 
12 |     ; Square brackets `[]` are needed to access the *data*,
13 |     ; otherwise the address of the label is understood:
14 | 
15 |     ; Data:
16 | 
17 |         mov dword [resd0], 1
18 |         ASSERT_EQ [resd0], 1, dword
19 | 
20 |     ; Address:
21 | 
22 |         mov dword eax, resd1
23 |         sub eax, resd0
24 |         ASSERT_EQ eax, 4
25 | 
26 |     ; # Memory size
27 | 
28 |         ; TODO examples.
29 | 
30 |         ; When operations involve registers it is not necessary to specify
31 |         ; the number of bytes to move around since that can be deduced from the
32 |         ; register form: `eax` = 4 bytes, `ax` == 2 bytes, etc.
33 | 
34 |         ; But when moving or comparing literals to RAM
35 |         ; it is mandatory to specify the number of bytes to move.
36 | 
37 |             ; Error: size not specified.
38 |             ;mov [resd0], 1
39 | 
40 |         ; NASM knows it is 4 bytes because we are using eax:
41 | 
42 |             mov eax, 1
43 |             mov [resd0], eax
44 | 
45 |         ; List of all size keywords:
46 |         ; http://stackoverflow.com/questions/12063840/what-are-the-sizes-of-tword-oword-and-yword-operands
47 | 
48 |         ; # byte
49 | 
50 |             ; 1
51 | 
52 |         ; # word
53 | 
54 |             ; 2
55 | 
56 |         ; # dword
57 | 
58 |             ; 4
59 | 
60 |             ; Double word.
61 | 
62 |         ; # qword
63 | 
64 |             ; 8
65 | 
66 |             ; Quadruple word.
67 | 
68 |             ; Only possible in 64-bit.
69 | 
70 |         ; # tword
71 | 
72 |             ; 10
73 | 
74 |             ; Ten.
75 | 
76 |             ; Used for x87 floating point formats. TODO example
77 | 
78 |         ; # oword
79 | 
80 |             ; 16
81 | 
82 |             ; TODO example
83 | 
84 |         ; # yword
85 | 
86 |             ; 32
87 | 
88 |         ; # zword
89 | 
90 |             ; 64
91 | 
92 | EXIT
93 | 


--------------------------------------------------------------------------------
/nasm/symbol_colon.asm:
--------------------------------------------------------------------------------
 1 | ; The colon after a simbol name is optional in NASM.
 2 | ;
 3 | ; http://stackoverflow.com/questions/8006365/is-there-a-difference-between-using-or-not-a-colon-after-symbol-name-definitions
 4 | 
 5 | %include "lib/common_nasm.inc"
 6 | 
 7 | DATA
 8 |     ; With colon.
 9 |     x: db 1
10 |     ; Without colon.
11 |     y db 2
12 | ENTRY
13 |     mov eax, 0
14 |     mov al, [x]
15 |     ASSERT_EQ eax, 1
16 |     mov al, [y]
17 |     ASSERT_EQ eax, 2
18 | EXIT
19 | 


--------------------------------------------------------------------------------
/objcopy.md:
--------------------------------------------------------------------------------
 1 | # objcopy
 2 | 
 3 | ## Extract content from a section
 4 | 
 5 | <http://stackoverflow.com/questions/3925075/how-do-you-extract-only-the-contents-of-an-elf-section>
 6 | 
 7 | ## Separate debug information from executable
 8 | 
 9 | <http://stackoverflow.com/questions/866721/how-to-generate-gcc-debug-symbol-outside-the-build-target>
10 | 
11 |     objcopy --only-keep-debug "${from}" "${to}"
12 |     strip --strip-debug --strip-unneeded "${from}"
13 |     objcopy --add-gnu-debuglink="${to}" "${from}"
14 | 


--------------------------------------------------------------------------------
/objdump.md:
--------------------------------------------------------------------------------
 1 | # objdump
 2 | 
 3 | `objdump` shows a weird view of ELF files: use `readelf` whenever possible:
 4 | 
 5 | - <http://stackoverflow.com/questions/22160621/objdump-not-showing-all-sections>
 6 | - <http://stackoverflow.com/questions/8979664/readelf-vs-objdump-why-are-both-needed>
 7 | 
 8 | Things which it can do that `readelf` cannot:
 9 | 
10 | - disassemble
11 | 
12 | Most useful command `objdump` command:
13 | 
14 |     objdump -Sr a.o
15 | 
16 | ## d
17 | 
18 | See assembler instructions of object file or executable:
19 | 
20 |     objdump -d a.o
21 | 
22 | Only disassembles sections which should contain assembly code like `.text`.
23 | 
24 | Sample output on a x86_64 Linux hello world:
25 | 
26 |     hello_world.o:     file format elf64-x86-64
27 | 
28 | 
29 |     Disassembly of section .text:
30 | 
31 |     0000000000000000 <_start>:
32 |        0:       b8 01 00 00 00          mov    $0x1,%eax
33 |        5:       bf 01 00 00 00          mov    $0x1,%edi
34 |        a:       48 be 00 00 00 00 00    movabs $0x0,%rsi
35 |       11:       00 00 00
36 |       14:       ba 0d 00 00 00          mov    $0xd,%edx
37 |       19:       0f 05                   syscall
38 |       1b:       b8 3c 00 00 00          mov    $0x3c,%eax
39 |       20:       bf 00 00 00 00          mov    $0x0,%edi
40 |       25:       0f 05                   syscall
41 | 
42 | For executable files, it still writes `Disassembly of section .text:`, but it means *segment* `.text`, it is purely wrong.
43 | 
44 | ## S
45 | 
46 | Similar to `-d`, but also intermingle original C code with disassembly if there is debug information available for that: 
47 | 
48 |     gcc -ggdb3 -o a.o a.c
49 |     objdump -S a.o
50 | 
51 | ## j
52 | 
53 | Select which section to act on.
54 | 
55 | ## Disassemble a single function
56 | 
57 | Seems impossible, use GDB instead.
58 | 
59 | <http://stackoverflow.com/questions/22769246/disassemble-one-function-using-objdump>
60 | 
61 | ## M
62 | 
63 | Disassemble with Intel syntax:
64 | 
65 |     objdump -M intel -d main.o
66 | 


--------------------------------------------------------------------------------
/other-architectures.md:
--------------------------------------------------------------------------------
 1 | # Other architectures
 2 | 
 3 | This tutorial focuses only on the x86 architecture families.
 4 | 
 5 | Here are some other popular architectures which you should have in mind as well:
 6 | 
 7 | ## PowerPC
 8 | 
 9 | <https://en.wikipedia.org/wiki/PowerPC>
10 | 
11 | Created by IBM, Motorola and Apple.
12 | 
13 | But IBM and Motorola are dying, Apple moved to Intel in 2005 for power efficiency, Xbox One and PlayStation 4 moved to x86-64 in 2013.
14 | 
15 | Used on Nintendo GameCube (2001) up to Wii U (2012), PlayStation 3 and Xbox 360.
16 | 
17 | ## MIPS
18 | 
19 | <http://en.wikipedia.org/wiki/MIPS_instruction_set>
20 | 
21 | RISC. Was really hot in 80s, bought by SGI. More recently bought by Imagination Technologies <https://en.wikipedia.org/wiki/Imagination_Technologies>.
22 | 
23 | Used mostly on embedded systems, and notably video game consoles: Nintendo 64, Sony PlayStation, PlayStation 2.
24 | 
25 | Officially supported on Android.
26 | 
27 | <http://www.androidauthority.com/nintendo-64-android-l-microcontrollers-story-mips-538596/>
28 | 
29 | Lextra story: <http://probell.com/lexra/> It is not possible to patent an entire ISA as there is too much prior art, but companies patent instead specific novel instructions, and the compatibility break kills off reimplementors.
30 | 
31 | ## SPARC
32 | 
33 | <https://en.wikipedia.org/wiki/SPARC>
34 | 
35 | Created by Sun for its operating system Solaris. Now owned and used by Oracle. Not used much by anyone else.
36 | 
37 | Was meant to be the ultimate RISC beats Intel and Windows coalition, but it failed.
38 | 


--------------------------------------------------------------------------------
/params.makefile:
--------------------------------------------------------------------------------
1 | PHONY_MAKES = gas linux interactive linker-scripts nasm x86-64
2 | 


--------------------------------------------------------------------------------
/pipeline.md:
--------------------------------------------------------------------------------
 1 | # Pipeline
 2 | 
 3 | Throughput latency tradeoff. Hugely advantageous on throughput side, so always used today.
 4 | 
 5 | - <http://en.wikipedia.org/wiki/Instruction_pipeline>
 6 | - <http://en.wikipedia.org/wiki/Operand_forwarding> <http://web.cs.iastate.edu/~prabhu/Tutorial/PIPELINE/forward.html>
 7 | - <http://en.wikipedia.org/wiki/Bubble_%28computing%29>
 8 | - <https://en.wikipedia.org/wiki/Very_long_instruction_word>
 9 | - <https://en.wikipedia.org/wiki/Orthogonal_instruction_set>
10 | - <https://en.wikipedia.org/wiki/Bubble_(computing)> (AKA pipeline stall)
11 | 
12 | Tutorials:
13 | 
14 | - <https://scalibq.wordpress.com/2012/02/19/cpus-and-pipelines-how-do-they-work/>
15 | - <https://www.cs.uaf.edu/2010/fall/cs441/lecture/09_16_pipelining.html>
16 | - <https://www.youtube.com/watch?v=euhQ_hdDGA8> talks about the classic 5 stage pipeline
17 | - <https://compas.cs.stonybrook.edu/course/cse502-s13/lectures/cse502-L4-pipelining.pdf> good structural diagram
18 | 
19 | ## Hazards
20 | 
21 | Types:
22 | 
23 | - data
24 | - structural: same pipeline stage used by two instructions at the same time
25 | - control: branches
26 | 
27 | - <https://en.wikipedia.org/wiki/Hazard_%28computer_architecture%29>
28 | 
29 | ## Classic RISC pipeline
30 | 
31 | Most tutorials cover this, so it is likely a good idea to learn this one real well.
32 | 
33 | Seems to come from MIPS, so basic MIPS assembly will help you.
34 | 
35 | <https://en.wikipedia.org/wiki/Classic_RISC_pipeline>
36 | 
37 | ## Implementations
38 | 
39 | Vendors document pipeline length.
40 | 
41 | Intel: <https://en.wikipedia.org/wiki/List_of_Intel_CPU_microarchitectures>
42 | 


--------------------------------------------------------------------------------
/pros-and-cons-of-assembly.md:
--------------------------------------------------------------------------------
 1 | # Pros and cons of assembly
 2 | 
 3 | Assembly vs. higher level languages like C.
 4 | 
 5 | ## Pros
 6 | 
 7 | ### Speed
 8 | 
 9 | -   Use instructions that are so CPU specific and useful for your needs that compilers don't implement.
10 | 
11 | -   Use instructions in a way that is smarter than any compiler is likely to do.
12 | 
13 | Note however that it gets harder and harder to make code more efficient using assembler because compilers are smarter and smarter.
14 | 
15 | ### Do the impossible
16 | 
17 | Access low level hardware which is so hardware specific it is not implemented in standard C.
18 | 
19 | ## Cons
20 | 
21 | ### OS/processor dependant
22 | 
23 | You write for a single CPU architecture only.
24 | 
25 | ### Hard to read/write
26 | 
27 | Other things are much more likely to speed up your code if that's what you want, namely:
28 | 
29 | - better algorithms
30 | - better cache usage
31 | 
32 | and only then, using assembly may stand a change to speeding thing up.
33 | 


--------------------------------------------------------------------------------
/provision:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | sudo apt-get install make
4 | sudo apt-get install nasm
5 | 


--------------------------------------------------------------------------------
/pusha.asm:
--------------------------------------------------------------------------------
 1 | ; # pusha
 2 | 
 3 | ; # popa
 4 | 
 5 |     ; Push and restore the following registers using the stack:
 6 |     ; EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI
 7 | 
 8 |     ; This is important for example in the C calling convention,
 9 |     ; where certain registers must not be changed by functions.
10 | 
11 | %include "lib/common_nasm.inc"
12 | 
13 | ENTRY
14 | 
15 |     mov ebx, 0
16 |     mov ecx, 0
17 | 
18 |     mov eax, esp
19 |     pusha
20 |     sub eax, esp
21 |     ; 8x 4 bytes
22 |     ASSERT_EQ eax, 32
23 | 
24 |     mov ebx, 1
25 |     mov ecx, 1
26 |     popa
27 |     ASSERT_EQ ebx, 0
28 |     ASSERT_EQ ecx, 0
29 | 
30 | EXIT
31 | 


--------------------------------------------------------------------------------
/readelf.md:
--------------------------------------------------------------------------------
 1 | # readelf
 2 | 
 3 | Get information stored inside executable files in a human readable way.
 4 | 
 5 | See all information:
 6 | 
 7 |     readelf -a a.out
 8 | 
 9 | TODO understand all the information, and therefore the entire elf format.
10 | 
11 | To show only specific informations:
12 | 
13 | ## s
14 | 
15 | Symtable of elf, `.o` or `.so`.
16 | 
17 | ## d
18 | 
19 | Show only dependencies of an executable (symbols and shared libraries).
20 | 
21 | - `NEEDED`: direct `.so` dependencies <http://stackoverflow.com/questions/6242761/how-do-i-find-the-direct-shared-object-dependencies-of-a-linux-elf-binary>. `.so` can also depend on other `.so`. To recursively list all dependencies, use `ldd`.
22 | 
23 | ## x
24 | 
25 | Hexdump section data for given section:
26 | 
27 |     readelf -x .data hello_world.o
28 | 
29 | Sample output:
30 | 
31 |     Hex dump of section '.data':
32 |       0x00000000 48656c6c 6f20776f 726c6421 0a       Hello world!.
33 | 
34 | ## W
35 | 
36 | Don't limit the output width to 80 columns which is the default, and reduce the amount of `0` padding.
37 | 
38 | Makes things much more readable is your terminal is wide enough.
39 | 
40 | Without `-W`:
41 | 
42 |     Section Headers:
43 |       [Nr] Name              Type             Address           Offset
44 |            Size              EntSize          Flags  Link  Info  Align
45 |       [ 0]                   NULL             0000000000000000  00000000
46 |            0000000000000000  0000000000000000           0     0     0
47 |       [ 1] .text             PROGBITS         0000000000000000  00000040
48 |            000000000000005c  0000000000000000  AX       0     0     1
49 |       [ 2] .rela.text        RELA             0000000000000000  000002f0
50 |            00000000000000d8  0000000000000018   I      11     1     8
51 |       [ 3] .data             PROGBITS         0000000000000000  0000009c
52 |            0000000000000004  0000000000000000  WA       0     0     4
53 |       [ 4] .bss              NOBITS           0000000000000000  000000a0
54 |            0000000000000000  0000000000000000  WA       0     0     1
55 |       [ 5] .rodata           PROGBITS         0000000000000000  000000a0
56 |            0000000000000004  0000000000000000   A       0     0     1
57 |       [ 6] .comment          PROGBITS         0000000000000000  000000a4
58 |            0000000000000025  0000000000000001  MS       0     0     1
59 |       [ 7] .note.GNU-stack   PROGBITS         0000000000000000  000000c9
60 |            0000000000000000  0000000000000000           0     0     1
61 |       [ 8] .eh_frame         PROGBITS         0000000000000000  000000d0
62 |            0000000000000058  0000000000000000   A       0     0     8
63 |       [ 9] .rela.eh_frame    RELA             0000000000000000  000003c8
64 |            0000000000000030  0000000000000018   I      11     8     8
65 |       [10] .shstrtab         STRTAB           0000000000000000  00000128
66 |            0000000000000061  0000000000000000           0     0     1
67 |       [11] .symtab           SYMTAB           0000000000000000  00000190
68 |            0000000000000138  0000000000000018          12    10     8
69 |       [12] .strtab           STRTAB           0000000000000000  000002c8
70 |            0000000000000025  0000000000000000           0     0     1
71 | 
72 | With `-W`:
73 | 
74 |     Section Headers:
75 |       [Nr] Name              Type            Address          Off    Size   ES Flg Lk Inf Al
76 |       [ 0]                   NULL            0000000000000000 000000 000000 00      0   0  0
77 |       [ 1] .text             PROGBITS        0000000000000000 000040 00005c 00  AX  0   0  1
78 |       [ 2] .rela.text        RELA            0000000000000000 0002f0 0000d8 18   I 11   1  8
79 |       [ 3] .data             PROGBITS        0000000000000000 00009c 000004 00  WA  0   0  4
80 |       [ 4] .bss              NOBITS          0000000000000000 0000a0 000000 00  WA  0   0  1
81 |       [ 5] .rodata           PROGBITS        0000000000000000 0000a0 000004 00   A  0   0  1
82 |       [ 6] .comment          PROGBITS        0000000000000000 0000a4 000025 01  MS  0   0  1
83 |       [ 7] .note.GNU-stack   PROGBITS        0000000000000000 0000c9 000000 00      0   0  1
84 |       [ 8] .eh_frame         PROGBITS        0000000000000000 0000d0 000058 00   A  0   0  8
85 |       [ 9] .rela.eh_frame    RELA            0000000000000000 0003c8 000030 18   I 11   8  8
86 |       [10] .shstrtab         STRTAB          0000000000000000 000128 000061 00      0   0  1
87 |       [11] .symtab           SYMTAB          0000000000000000 000190 000138 18     12  10  8
88 |       [12] .strtab           STRTAB          0000000000000000 0002c8 000025 00      0   0  1
89 | 


--------------------------------------------------------------------------------
/registers.asm:
--------------------------------------------------------------------------------
  1 | ; Intel puts the registers in the following groups:
  2 | 
  3 | ; - General-purpose registers
  4 | ; - Segment registers
  5 | ; - EFLAGS (program status and control) register
  6 | 
  7 | %include "lib/common_nasm.inc"
  8 | 
  9 | ENTRY
 10 | 
 11 |     ; # General purpose registers
 12 | 
 13 |         ; # ebp
 14 | 
 15 |             ; Stack base.
 16 | 
 17 |             ; Why it exists:
 18 |             ; http://stackoverflow.com/questions/579262/what-is-the-purpose-of-the-ebp-frame-pointer-register
 19 | 
 20 |             ; - tell the debugger where the current frame starts
 21 |             ; - facilitate VLA and alloca
 22 | 
 23 |             ; Modified by `enter` and `leave`.
 24 | 
 25 |             ; You can almost never use it as a general purpose register because of that.
 26 | 
 27 |             ; It's usage on high level languages can be optimized away
 28 |             ; e.g. with `-fomit-frame-pointer` in GCC 4.8.
 29 | 
 30 |         ; # esp
 31 | 
 32 |             ; Stack Pointer.
 33 | 
 34 |             ; Notably modified by `push`, `pop`, `enter`, `leave`, `call` and `ret`.
 35 | 
 36 |             ; You can almost never use it as a general purpose register because of that.
 37 | 
 38 |         ; # eip
 39 | 
 40 |             ; Instruction Pointer.
 41 | 
 42 |             ; Address of the current instruction to be executed.
 43 | 
 44 |             ; Cannot be retrieved directly: `mov` cannot be encoded to output to EIP:
 45 |             ; http://stackoverflow.com/questions/599968/reading-program-counter-directly
 46 | 
 47 |             ; There are however indirect techniques, and NASM offers `$`.
 48 | 
 49 |         ; # pc
 50 | 
 51 |             ; Another name for the IP.
 52 | 
 53 |             ; https://en.wikipedia.org/wiki/Program_counter
 54 | 
 55 |     ; # Initial register state
 56 | 
 57 |         ; Finally, no more programming languages getting in our way with definite assignment:
 58 | 
 59 |         ; - http://stackoverflow.com/questions/1802783/initial-state-of-program-registers-and-stack-on-linux-arm
 60 |         ; - http://stackoverflow.com/questions/9147455/what-is-default-register-state-when-program-launches-asm-linux
 61 | 
 62 |         ; Mentioned on major ABI specs, e.g. AMD64: http://www.x86-64.org/documentation/abi-0.99.pdf
 63 | 
 64 |     ; # Flag registers and instructions
 65 | 
 66 |         ; Most flag are identified by one letter, and commonly denoted `XF` for "flag X".
 67 | 
 68 |         ; Flags may be set or reset explicitly by dedicated instructions,
 69 |         ; and also as side effects of other instructions, e.g. `cmp` for ZF and CF.
 70 | 
 71 |         ; The flags are:
 72 | 
 73 |         ; - Type: Name (Identifier)
 74 |         ; - X: ID Flag (ID)
 75 |         ; - X: Virtual Interrupt Pending (VIP)
 76 |         ; - X: Virtual Interrupt Flag (VIF)
 77 |         ; - X: Alignment Check / Access Control (AC)
 78 |         ; - X: Virtual-8086 Mode (VM)
 79 |         ; - X: Resume Flag (RF)
 80 |         ; - X: Nested Task (NT)
 81 |         ; - X: I/O Privilege Level (IOPL)
 82 |         ; - S: Overflow Flag (OF)
 83 |         ; - C: Direction Flag (DF)
 84 |         ; - X: Interrupt Enable Flag (IF)
 85 |         ; - X: Trap Flag (TF)
 86 |         ; - S: Sign Flag (SF)
 87 |         ; - S: Zero Flag (ZF)
 88 |         ; - S: Auxiliary Carry Flag (AF)
 89 |         ; - S: Parity Flag (PF)
 90 |         ; - S: Carry Flag (CF)
 91 | 
 92 |         ; The types are:
 93 | 
 94 |         ; - `S`: Status Flag. Has no side effects.
 95 |         ; - `C`: Control Flag. Has side effects.
 96 |         ; - `X`: System Flag. Should not be modified from applications, only OS.
 97 | 
 98 |         ; The bit number of each flag is fixed.
 99 | 
100 |         ; All the non-used are reserved for future use.
101 | 
102 |         ; # FLAGS
103 | 
104 |             ; 16-bit register that contains bit flags.
105 | 
106 |             ; Same as the lower bits of EFLAGS, which was added later.
107 | 
108 |         ; # EFLAGS
109 | 
110 |             ; 32-bit register that contains bit flags. TODO: added in x86-64?
111 | 
112 |             ; Lower 32 bits of RFLAGS.
113 | 
114 |         ; # RFLAGS
115 | 
116 |             ; 64-bit register that contains bit flags. Added in x86-64.
117 | 
118 |             ; TODO check: none of the new flags are used so far.
119 | 
120 |         ; # How to read and write flags
121 | 
122 |             ; http://stackoverflow.com/questions/1406783/flags-registers-can-we-read-or-write-them-directly
123 | 
124 |             ; - for a few flags: `jxx`
125 |             ; - for the 8 lower bits: `lahf` and `sahf`
126 |             ; - for all others: `pushf` and `popf`
127 | 
128 |         ; # Individual flag instructions
129 | 
130 |             ; Some, but not all, individual flags have dedicated instructions to operate on them.
131 | 
132 |             ; # stc
133 | 
134 |             ; # sti
135 | 
136 |             ; # std
137 | 
138 |                 ; Set flag X (set it to 1).
139 | 
140 |             ; # clc
141 | 
142 |             ; # cli
143 | 
144 |             ; # cld
145 | 
146 |                 ; Clear flag X (set it to 0).
147 | 
148 |             ; # cmc
149 | 
150 |                 ; Complement flag X (boolean negation).
151 | 
152 |                 ; This is the only flag that has the complement instruction.
153 | 
154 |             stc
155 |             ASSERT_FLAG jc
156 |             clc
157 |             ASSERT_FLAG jnc
158 |             cmc
159 |             ASSERT_FLAG jc
160 |             cmc
161 |             ASSERT_FLAG jnc
162 | EXIT
163 | 


--------------------------------------------------------------------------------
/risc-vs-cisc.md:
--------------------------------------------------------------------------------
 1 | # RISC vs CISC
 2 | 
 3 | The distinction between RISC and CISC is not a precise one, only a subjective quantitative description.
 4 | 
 5 | ## RISC
 6 | 
 7 | Reduced Instruction set.
 8 | 
 9 | Has few operations, but performs each one very fast.
10 | 
11 | ### One instruction set compiler
12 | 
13 | Takes it to the limit:
14 | 
15 | <https://en.wikipedia.org/wiki/One_instruction_set_computer>
16 | 
17 | Funny example implementation for C in x86 `mov`: <https://github.com/xoreaxeaxeax/movfuscator>
18 | 
19 | ## CISC
20 | 
21 | Complex instruction set.
22 | 
23 | Lots of operations.
24 | 
25 | x86 is considered CISC, although it is known that the processor has a RISC core.
26 | 
27 | ARM is considered RISC.
28 | 
29 | ## RISC core
30 | 
31 | The distinction between RISC and CISC is further blurred by the fact that Intel X86, widely condiered CISC is actually composed of:
32 | 
33 | - a complex instruction decoder, which transforms complex input instructions into a simpler subset called microcode
34 | - a RISC core which understands the microcode and actually runs it
35 | 


--------------------------------------------------------------------------------
/segment:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cirosantilli/x86-assembly-cheat/0c64fc5961bbdcbf442cec9bb98d70e889b1984f/segment


--------------------------------------------------------------------------------
/segment_registers.asm:
--------------------------------------------------------------------------------
 1 | ; # Segment registers
 2 | 
 3 | ; # CS register
 4 | 
 5 | ; # DS register
 6 | 
 7 | ; # ES register
 8 | 
 9 | ; # FS register
10 | 
11 | ; # GS register
12 | 
13 | ; # SS register
14 | 
15 |     ; They cannot be tested easily in userland.
16 | 
17 |     ; Simple test using them in real mode:
18 |     ; https://github.com/cirosantilli/x86-bare-metal-examples/blob/01a5e64efaac653429ee24e502f611fea7850abb/segment_registers_real_mode.S
19 | 
20 |     ; TODO what are their initial values in Linux?
21 | 
22 | %include "lib/common_nasm.inc"
23 | 
24 | ENTRY
25 |     PRINT_STRING_LITERAL 'CS'
26 |     PRINT_INT cs
27 | 
28 |     PRINT_STRING_LITERAL 'DS'
29 |     PRINT_INT ds
30 | 
31 |     PRINT_STRING_LITERAL 'ES'
32 |     PRINT_INT es
33 | 
34 |     PRINT_STRING_LITERAL 'FS'
35 |     PRINT_INT fs
36 | 
37 |     PRINT_STRING_LITERAL 'GS'
38 |     PRINT_INT gs
39 | 
40 |     PRINT_STRING_LITERAL 'SS'
41 |     PRINT_INT ss
42 | 
43 |     ; SEGFAULT: can't write to them in user mode:
44 |     ;mov eax, 0
45 |     ;mov ss, eax
46 | 
47 | EXIT
48 | 


--------------------------------------------------------------------------------
/size.md:
--------------------------------------------------------------------------------
 1 | # size
 2 | 
 3 | Shows size of each memory part of a executable:
 4 | 
 5 |     gcc -c a.c
 6 |     gcc a.o
 7 |     size a.out a.o
 8 | 
 9 | - text: instructions
10 | - data: init and `uinit` data
11 | - `dec` and `hex`: size of executable in decimal and hexadecimal
12 | 


--------------------------------------------------------------------------------
/stdcall.md:
--------------------------------------------------------------------------------
 1 | # stdcall
 2 | 
 3 | A C calling convention, less used than cdecl.
 4 | 
 5 | Used on the Win32 API.
 6 | 
 7 | Advantages:
 8 | 
 9 | - generates slightly smaller code
10 | - potentially faster.
11 | 
12 | Disadvantages:
13 | 
14 | - it is not possible to pass variable number of arguments.
15 | 


--------------------------------------------------------------------------------
/string-instructions.md:
--------------------------------------------------------------------------------
1 | # String instruction
2 | 
3 | Instructions to efficiently loop over multiple bytes.
4 | 


--------------------------------------------------------------------------------
/superscalar.md:
--------------------------------------------------------------------------------
1 | # Superscalar architecture
2 | 
3 | TODO vs pipeline and instruction level parallelism?
4 | 
5 | <http://en.wikipedia.org/wiki/Superscalar>
6 | 


--------------------------------------------------------------------------------
/synchronization.md:
--------------------------------------------------------------------------------
 1 | # Synchronization
 2 | 
 3 | TODO understand better and make multithreaded examples that fail.
 4 | 
 5 | TODO Other sync instructions:
 6 | 
 7 | - MFENCE
 8 | - LFENCE
 9 | - SFENCE
10 | 
11 | Likely should be done with inline assembly. TODO: is it possible to test those things here (see failures) e.g. under the Linux kernel, or do we need to go bare metal?
12 | 
13 | - http://stackoverflow.com/questions/4725676/how-does-x86-pause-instruction-work-in-spinlock-and-can-it-be-used-in-other-sc
14 | - http://stackoverflow.com/questions/11959374/fastest-inline-assembly-spinlock
15 | - http://wiki.osdev.org/Spinlock
16 | 
17 | ## Lock prefix
18 | 
19 | - http://stackoverflow.com/questions/11065675/lock-prefix-of-intel-instruction-what-is-the-point-its-function-is-so-limited
20 | - http://stackoverflow.com/questions/3339141/x86-lock-question-on-multi-core-cpus
21 | - http://stackoverflow.com/questions/8891067/what-does-the-lock-instruction-mean-in-x86-assembly
22 | - http://stackoverflow.com/questions/3343589/how-do-i-use-the-lock-asm-prefix-to-read-a-value
23 | 
24 | Can be used with: ADD, ADC, AND, BTC, BTR, BTS, CMPXCHG, CMPXCH8B, DEC, INC, NEG, NOT, OR, SBB, SUB, XOR, XADD, and XCHG
25 | 


--------------------------------------------------------------------------------
/system-vs-application-programming.md:
--------------------------------------------------------------------------------
 1 | # System programming vs application programming
 2 | 
 3 | Most architectures provide two general sets of instructions:
 4 | 
 5 | -   system programming: instructions that only operating system writers will need.
 6 | 
 7 |     Application programs are generally forbidden from even using such instructions by security measures.
 8 | 
 9 |     This allows for example for process separation: preventing one process from seeing the other's memory, reducing the impact of bugs and malicious attacks.
10 | 
11 | -   application programming. Instructions that any program can use.
12 | 
13 |     Those will be usually packaged inside executable wrappers of some sort, e.g. ELF, which add required metadata to the instructions, e.g. where to find shared libraries.
14 | 
15 | This distinction is made so that application programs will run inside some sort of "protected mode", where the damage it can do to other applications and the OS is reduced.
16 | 
17 | It is common for manuals to separate those into separate chapters. E.g., Intel puts all system programming specifics on Volume 3.
18 | 
19 | This tutorial only covers application, programming. for system programming see: <https://github.com/cirosantilli/x86-bare-metal-examples>
20 | 
21 | System programming concepts not covered here include:
22 | 
23 | -   instructions:
24 |     - `cli` and `sti`
25 |     - `hlt`
26 |     - `inb`
27 |     - `outb`
28 | -   special registers:
29 |     - `ds`
30 |     - `cs`
31 |     - `ss`
32 | -   paging
33 | -   segmentation
34 | -   rings
35 | 
36 | Those concepts cannot be tried out while an OS is running without disrupting the OS, so testing them requires substantially different techniques.
37 | 


--------------------------------------------------------------------------------
/vliw.md:
--------------------------------------------------------------------------------
 1 | # VLIW
 2 | 
 3 | Wiki has the perfect example:
 4 | 
 5 | > For example, the following is an instruction for the Super Harvard Architecture Single-Chip Computer (SHARC). In one cycle, it does a floating-point multiply, a floating-point add, and two autoincrement loads. All of this fits in one 48-bit instruction:
 6 | 
 7 | > f12 = f0 * f4, f8 = f8 + f12, f0 = dm(i0, m3), f4 = pm(i8, m9);
 8 | 
 9 | TODO implementations?
10 | 


--------------------------------------------------------------------------------
/x86-64/Makefile:
--------------------------------------------------------------------------------
1 | ../Makefile


--------------------------------------------------------------------------------
/x86-64/README.md:
--------------------------------------------------------------------------------
 1 | # x86-64
 2 | 
 3 | This section highlights differences from x86: learn that first.
 4 | 
 5 | x86_64 is a bit architecture, originally by AMD, and later implemented with great (but not complete) compatibility Intel.
 6 | 
 7 | Intel calls their implementation "Intel 64" but it used "IA-32e" and "EM64T" before, AMD says AMD64, Apple x86-64 and x86_64, and Microsoft and Oracle x64. Notice the great naming uniformity.
 8 | 
 9 | Do not confuse with IA-64, which is the old name for the Itanium architecture, a completely different architecture developed by Intel, which largely lost to the x86-64 alternative that AMD first proposed.
10 | 
11 | ## x86-64 vs IA-32
12 | 
13 | x86-64 has a compatibility mode which allows running IA32 code directly on it.
14 | 
15 | In 64-bit mode, outside of compatibility mode, most instructions are analogous, but a few have been removed.
16 | 
17 | The major difference added in x86-64 is the possibility of making 8 byte operations instead of 4.
18 | 
19 | Most operations still exist and are analogous but some have changed or been removed. But if you turn on compatibility mode, it works with IA32 code.
20 | 
21 | Lots of new registers were added in particular `r9` - `r16` and extra XMM.
22 | 
23 | SSE and SSE2 are required in x86-64. This is why, for example, the AMD64 calling convention can use XMM registers to pass floats.
24 | 
25 | RIP addressing mode.
26 | 
27 | ## Incompatibilities between Intel and AMD
28 | 
29 | <http://stackoverflow.com/questions/29833938/what-is-the-compatible-subset-of-intels-and-amds-x86-64-implementations>
30 | 
31 | <https://en.wikipedia.org/wiki/X86-64#Differences_between_AMD64_and_Intel_64>
32 | 
33 | Mostly on the systems programming instructions, not application.
34 | 


--------------------------------------------------------------------------------
/x86-64/Vagrantfile:
--------------------------------------------------------------------------------
1 | Vagrant.configure(2) do |config|
2 |   config.vm.box = 'hashicorp/precise64'
3 |   config.vm.provision :shell, privileged: false, path: 'provision'
4 | end
5 | 


--------------------------------------------------------------------------------
/x86-64/calling-convention.md:
--------------------------------------------------------------------------------
 1 | # Calling convention
 2 | 
 3 | In x86-64, the number of calling conventions was greatly reduced.
 4 | 
 5 | ## System V AMD64 ABI
 6 | 
 7 | [System V AMD64 ABI][] dominating in Linux and other UNIX systems, and Mac OS X.
 8 | 
 9 | A simplified version of the most important points to keep in mind about the [System V AMD ABI](http://www.x86-64.org/documentation/abi.pdf), which both Mac and Linux use, are:
10 | 
11 | -   before call
12 | 
13 |     -   the stack pointer *must* be aligned by a multiple of 16 bytes after `call` pushes the return value to the stack.
14 | 
15 |         This often fails by default without explicitly changing `%rsp` because `call` will store the 8 byte return address on the stack, so you usually need to do:
16 | 
17 |             sub 8, rsp
18 |             call f
19 | 
20 |         If this fails, segfault is common outcome.
21 | 
22 |         The most common action then is to just store `rsp` on the stack to even things out. That also allows for recursion.
23 | 
24 |         <http://stackoverflow.com/questions/8691792/how-to-write-assembly-language-hello-world-program-for-64-bit-mac-os-x-using-pri>
25 | 
26 |     -   parameter order is: %rdi, %rsi, %rdx, %rcx, %r8, %r9, then push the rest on the stack in reverse order
27 | 
28 |         Floating point arguments are passed through the %xmm0 to %xmm7 registers present in Intel's SSE2, which must be part of every x86-64 implementation.
29 | 
30 |         Linux system calls [use R10 is used instead of RCX](http://stackoverflow.com/questions/2535989/what-are-the-calling-conventions-for-unix-linux-system-calls-on-x86-64), Mac's [are unchanged](https://developer.apple.com/library/mac/documentation/DeveloperTools/Conceptual/LowLevelABI/140-x86-64_Function_Calling_Conventions/x86_64.html)
31 | 
32 |         Parameters only need to be pushed to the stack to allow for recursion or calling other functions, and this is done on the callee. So compilers can optimize and only use the registers if possible.
33 | 
34 |     -   variable argument functions like printf, `rax` must contain the number of vector (SSE / AVX) varargs: <http://stackoverflow.com/questions/6212665/why-is-eax-zeroed-before-a-call-to-printf>
35 | 
36 |         This includes for example `float` and `double` arguments, but not `int`.
37 | 
38 |         TODO why is this needed?
39 | 
40 | -   after return
41 | 
42 |     -   unchanged registers: RBX, RBP and R12-R15. Others may have changed.
43 | 
44 |     -   return value (may need multiple quadwords for structs):
45 | 
46 |         - integers go in: %rax and %rdx
47 | 
48 |         - %xmm0 and %xmm1 are used for floats up to 8-byte wide (`double`)
49 | 
50 |         - %st0 holds x87 80-bit floats (`long double)
51 | 
52 |         - %rdi holds the address of return values that went to memory
53 | 
54 | Other surprising ABI points:
55 | 
56 | - You can use up to 128 bytes below `RSP` without fear that it will be corrupted by signal or interrupt handlers: <http://stackoverflow.com/questions/13201644/why-does-the-x86-64-gcc-function-prologue-allocate-less-stack-than-the-local-var>. This is useful for leaf functions, which don't need to store arguments for further calls.
57 | 
58 | Good article: <http://nickdesaulniers.github.io/blog/2014/04/18/lets-write-some-x86-64/>
59 | 
60 | ## Microsoft x64
61 | 
62 | Obviously they could not be compatible with the rest.
63 | 
64 | Microsoft x64 calling convention and its 2013 extension `__vectorcall` for Windows,
65 | 


--------------------------------------------------------------------------------
/x86-64/cmp-sign-extend.asm:
--------------------------------------------------------------------------------
 1 | ; cmp (and most other instructions) cannot encode 64-bit immediates.
 2 | ;
 3 | ; mov however can.
 4 | ;
 5 | ; This can be seen on the Intel manual: cmp has not imm64 encoding.
 6 | ; So in general, we have to move values to registers before we can operate on them.
 7 | 
 8 | %include "lib/common_nasm.inc"
 9 | 
10 | ENTRY
11 |     ; Sanity check: mov can encode imm64, and cmp r64, r64 works as expected.
12 |     mov rax, 0x12345678_80000000
13 |     mov rbx, 0x00000001_00000000
14 |     add rax, rbx
15 |     mov rbx, 0x12345679_80000000
16 |     cmp rax, rbx
17 |     je reg_reg
18 |     ASSERT_FAIL
19 | reg_reg:
20 | 
21 |     ; The r64 imm32 encoding sign extends the immediate.
22 |     ;
23 |     ; This works, but it is kind of a lie:
24 |     ; the actual encoded immediate is just 0x80000000.
25 |     ;
26 |     ; NASM does not give warnings however, so we prefer this syntax.
27 |     mov rax, 0xFFFFFFFF_80000000
28 |     cmp rax, 0xFFFFFFFF_80000000
29 |     je reg_imm_sign_1
30 |     ASSERT_FAIL
31 | reg_imm_sign_1:
32 | 
33 |     ; Positive sign extend.
34 |     mov rax, 0x00000000_40000000
35 |     cmp rax,         0x40000000
36 |     je reg_imm_sign_0
37 |     ASSERT_FAIL
38 | reg_imm_sign_0:
39 | 
40 |     ; Negative sign extend without leading 1's.
41 |     ; Works but NASM warns, so we don't use this form.
42 |     ; even though I think it is saner.
43 |     ;mov rax, 0xFFFFFFFF_80000000
44 |     ;cmp rax,          0x80000000
45 |     ;je reg_imm_sign_1
46 |     ;ASSERT_FAIL
47 | ;reg_imm_sign_1:
48 | 
49 |     ; Impossible negative sign extend.
50 |     ; NASM warns and that is good, but it assembles to the same as above
51 |     ; and I'd rather have an error!
52 |     ;mov rax, 0xFFFFFFFF_80000000
53 |     ;cmp rax,        0x1_80000000
54 |     ;je reg_imm_sign_nasm
55 |     ;ASSERT_FAIL
56 | ;reg_imm_sign_nasm:
57 | 
58 |     ; Don't forget that our macro ASSERT_EQ uses cmp, and won't work as expected
59 |     ; if the immediate cannot be encoded by sign extension.
60 |     ; So juts move to a register before comparing.
61 |     mov rax, 0x12345678_80000000
62 |     mov rbx, 0x00000001_00000000
63 |     add rax, rbx
64 |     mov rbx, 0x12345679_80000000
65 |     ASSERT_EQ rax, rbx
66 | 
67 | EXIT
68 | 


--------------------------------------------------------------------------------
/x86-64/gas/Makefile:
--------------------------------------------------------------------------------
1 | ../Makefile


--------------------------------------------------------------------------------
/x86-64/gas/lib:
--------------------------------------------------------------------------------
1 | ../lib


--------------------------------------------------------------------------------
/x86-64/gas/movabs.S:
--------------------------------------------------------------------------------
 1 | # TODO get working
 2 | # http://stackoverflow.com/questions/19415184/load-from-a-64-bit-address-into-other-register-than-rax/19416331
 3 | # http://reverseengineering.stackexchange.com/questions/2627/what-is-the-meaning-of-movabs-in-gas-x86-att-syntax
 4 | 
 5 | .text
 6 | 
 7 |     .global asm_main
 8 |     asm_main:
 9 | 
10 |         #movabs 0x8000000000000000, %rax
11 | 
12 |         # Error: operand size mismatch
13 |         # Only possible for `rax`.
14 |         #movabs 0x8000000000000000, %rbx
15 | 
16 |         mov $0, %rax
17 |         ret
18 | 


--------------------------------------------------------------------------------
/x86-64/gas/params.makefile:
--------------------------------------------------------------------------------
1 | BITS := 64
2 | DISAS_FLAVOR = att
3 | 


--------------------------------------------------------------------------------
/x86-64/lib/Makefile:
--------------------------------------------------------------------------------
1 | .PHONY: all clean
2 | 
3 | all:
4 | 	$(NASM) -o common$(OBJ_EXT) common.asm
5 | 	$(CC) $(CFLAGS) -o driver$(OBJ_EXT) -c driver.c
6 | 
7 | clean:
8 | 	rm -f *$(OBJ_EXT) *$(OUT_EXT)
9 | 


--------------------------------------------------------------------------------
/x86-64/lib/common.asm:
--------------------------------------------------------------------------------
 1 | ; System V calling convention used for all functions.
 2 | 
 3 | extern printf, exit
 4 | 
 5 | segment .text
 6 | 
 7 | global print_long
 8 | print_long:
 9 |     sub rsp, 8
10 |     mov rsi, rdi
11 |     mov rdi, .format
12 |     mov rax, 1
13 |     call printf
14 |     add rsp, 8
15 |     ret
16 | .format db "%ld", 10, 0
17 | 
18 | global print_string
19 | print_string:
20 |     sub rsp, 8
21 |     mov rsi, rdi
22 |     mov rdi, .format
23 |     mov rax, 1
24 |     call printf
25 |     add rsp, 8
26 |     ret
27 | .format db "%s", 10, 0
28 | 
29 | global assert_fail
30 | assert_fail:
31 |     sub rsp, 8
32 |     mov rbx, rdi
33 |     mov rdi, .message
34 |     call print_string
35 |     mov rdi, rbx
36 |     call print_long
37 | 
38 |     ; Call libc exit with exit status 1.
39 |     mov rdi, 1
40 |     call exit
41 | .message db 10, 'ASSERT FAILED AT LINE: ', 0
42 | 


--------------------------------------------------------------------------------
/x86-64/lib/common_gas.h:
--------------------------------------------------------------------------------
 1 | .extern \
 2 |     assert_fail, \
 3 |     print_long, \
 4 |     print_long_hex, \
 5 |     print_string, \
 6 | ;
 7 | 
 8 | #define ENTRY \
 9 |     enter $0, $0 ;\
10 |     .text ;\
11 |         .global asm_main ;\
12 |         asm_main: \
13 |         sub $8, %rsp
14 | 
15 | #define EXIT \
16 |     leave ;\
17 |     mov $0, %eax ;\
18 |     ret
19 | 
20 | #define ASSERT_FAIL \
21 |     mov $__LINE__, %rdi ;\
22 |     call assert_fail
23 | 
24 | #define ASSERT_EQ(x, y) \
25 |     cmp x, y ;\
26 |     je  1f ;\
27 |         ASSERT_FAIL ;\
28 |     1:
29 | 


--------------------------------------------------------------------------------
/x86-64/lib/common_nasm.inc:
--------------------------------------------------------------------------------
 1 | ; TODO: factor out with IA-32.
 2 | ; Basically replace eax with rax.
 3 | 
 4 | extern \
 5 |     assert_fail, \
 6 |     print_long, \
 7 |     print_long_hex, \
 8 |     print_string
 9 | 
10 | ; Structure macros
11 | 
12 |     %macro TEXT 0
13 |         section .text
14 |     %endmacro
15 | 
16 |     %macro DATA 0
17 |         section .data
18 |     %endmacro
19 | 
20 |     %macro RODATA 0
21 |         section .rodata
22 |     %endmacro
23 | 
24 |     %macro GLOBAL 1
25 |         global %1
26 |         %1:
27 |     %endmacro
28 | 
29 |     ; Enter program that uses the C driver.
30 |     ; The entry function is `asm_main`.
31 |     %macro ENTRY 0
32 |         TEXT
33 |         GLOBAL asm_main
34 |         ; To align stack so that calls can be made directly.
35 |         sub rsp, 8
36 |     %endmacro
37 | 
38 |     ; Exit program entered by `ENTRY`.
39 |     %macro EXIT 0
40 |         add rsp, 8
41 |         mov rax, 0
42 |         ret
43 |     %endmacro
44 | 
45 | ; Assert macros
46 | 
47 |     ; Assert rax eq %1
48 |     %macro ASSERT_EQ 3
49 |         cmp %3 %1, %2
50 |         je %%ok
51 |             ASSERT_FAIL
52 |         %%ok:
53 |     %endmacro
54 | 
55 |     %macro ASSERT_EQ 2
56 |         ASSERT_EQ %1, %2, %3
57 |     %endmacro
58 | 
59 |     %macro ASSERT_EQ 1
60 |         ASSERT_EQ rax, %1
61 |     %endmacro
62 | 
63 |     %macro ASSERT_EQ 0
64 |         ASSERT_EQ rax, rbx
65 |     %endmacro
66 | 
67 |     ; asserts rax neq %1
68 |     %macro ASSERT_NEQ 2
69 |         cmp %1, %2
70 |         jne %%ok
71 |             ASSERT_FAIL
72 |         %%ok:
73 |     %endmacro
74 | 
75 |     ; assert jX or jnX jumps
76 |     %macro ASSERT_FLAG 1
77 |         %1 %%ok
78 |             ASSERT_FAIL
79 |         %%ok:
80 |     %endmacro
81 | 
82 |     %macro ASSERT_FAIL 0
83 |         mov rdi, __LINE__
84 |         call assert_fail
85 |     %endmacro
86 | 


--------------------------------------------------------------------------------
/x86-64/lib/driver.c:
--------------------------------------------------------------------------------
1 | int asm_main(void);
2 | 
3 | int main() {
4 |   return asm_main();
5 | }
6 | 


--------------------------------------------------------------------------------
/x86-64/linux/Makefile:
--------------------------------------------------------------------------------
 1 | # TODO Dry this out with 32-bit code.
 2 | 
 3 | .POSIX:
 4 | 
 5 | IN_EXT ?= .asm
 6 | INC_EXT ?= .inc
 7 | OUT_EXT ?= .out
 8 | TMP_EXT ?= .o
 9 | RUN ?= hello_world
10 | 
11 | OUTS := $(patsubst %$(IN_EXT),%$(OUT_EXT),$(wildcard *$(IN_EXT)))
12 | INCS := $(wildcard *$(INC_EXT))
13 | 
14 | .PRECIOUS: %$(TMP_EXT)
15 | .PHONY: all clean run
16 | 
17 | all: $(OUTS)
18 | 
19 | %$(OUT_EXT): %$(TMP_EXT)
20 | 	ld -o '$@' '$<'
21 | 
22 | %$(TMP_EXT): %$(IN_EXT) $(INCS)
23 | 	nasm -w+all -f elf64 -o '$@' '$<'
24 | 
25 | clean:
26 | 	rm -f *$(TMP_EXT) *$(OUT_EXT)
27 | 
28 | run: all
29 | 	./$(RUN)$(OUT_EXT)
30 | 
31 | test: all
32 | 	for f in *$(OUT_EXT); do echo $$f; ./$$f; done
33 | 


--------------------------------------------------------------------------------
/x86-64/linux/README.md:
--------------------------------------------------------------------------------
 1 | # Linux
 2 | 
 3 | 1.  [C from assembly](c-from-assembly/)
 4 | 
 5 | ## System calls
 6 | 
 7 | Linux 64-bit accepts both the old IA-32 `int 0x80` system calls and also the new `syscall` instruction, which should be used instead as it is faster. Support for the old calls likely exists so that you can run IA-32 elf files directly.
 8 | 
 9 | `syscall` system calls have different numbers than the old IA-32, so watch out.
10 | 
11 | Syscall parameters are passed as follows:
12 | 
13 | - `rax`: system call number
14 | - `rdi`: parameter 1
15 | - `rsi`: parameter 2
16 | - `rdx`
17 | - `r10`
18 | - `r8`
19 | - `r9`
20 | 
21 | The return value is stored in `rax`.
22 | 


--------------------------------------------------------------------------------
/x86-64/linux/c-from-assembly/Makefile:
--------------------------------------------------------------------------------
 1 | .POSIX:
 2 | 
 3 | IN_EXT ?= .asm
 4 | INC_EXT ?= .inc
 5 | OUT_EXT ?= .out
 6 | TMP_EXT ?= .o
 7 | RUN ?= main
 8 | 
 9 | OUTS := $(patsubst %$(IN_EXT),%$(OUT_EXT),$(wildcard *$(IN_EXT)))
10 | INCS := $(wildcard *$(INC_EXT))
11 | 
12 | .PRECIOUS: %$(TMP_EXT)
13 | .PHONY: all clean run test
14 | 
15 | all: $(OUTS)
16 | 
17 | %$(OUT_EXT): %$(TMP_EXT)
18 | 	gcc -fno-pie -m64 -no-pie -pedantic-errors -o '$@' '$<'
19 | 
20 | %$(TMP_EXT): %$(IN_EXT) $(INCS)
21 | 	nasm -w+all -f elf64 -o '$@' '$<'
22 | 
23 | clean:
24 | 	rm -f *$(TMP_EXT) *$(OUT_EXT)
25 | 
26 | run: all
27 | 	./$(RUN)$(OUT_EXT)
28 | 
29 | test: all
30 | 	for f in *$(OUT_EXT); do echo $$f; ./$$f; done
31 | 


--------------------------------------------------------------------------------
/x86-64/linux/c-from-assembly/README.md:
--------------------------------------------------------------------------------
 1 | # C from assembly
 2 | 
 3 | Call C from assembly.
 4 | 
 5 | This is not portable because ANSI C does not specify the ABI.
 6 | 
 7 | Things which may vary include:
 8 | 
 9 | - calling conventions
10 | - method names. E.g. Watcom uses `puts_` instead of `puts`
11 | 
12 | The only way to deal with that is by treating each implementation differently with macros.
13 | 


--------------------------------------------------------------------------------
/x86-64/linux/c-from-assembly/hello.asm:
--------------------------------------------------------------------------------
 1 | default rel
 2 | extern puts
 3 | section .rodata
 4 |     message db "hello", 0
 5 | section .text
 6 | global main
 7 | main:
 8 |     sub rsp, 8
 9 |     lea rdi, [rel message]
10 |     call puts
11 |     xor eax, eax
12 |     add rsp, 8
13 |     ret
14 | 


--------------------------------------------------------------------------------
/x86-64/linux/c-from-assembly/printf.asm:
--------------------------------------------------------------------------------
 1 | ; printf is a bit harder as it requires varargs setup.
 2 | ; https://stackoverflow.com/questions/8194141/how-to-print-a-number-in-assembly-nasm/32853546#32853546
 3 | default rel            ; make [rel format] the default, you always want this.
 4 | extern printf          ; NASM requires declarations of external symbols, unlike GAS
 5 | section .rodata
 6 |     format db "%#x", 10, 0   ; C 0-terminated string: "%#x\n"
 7 | section .text
 8 | global main
 9 | main:
10 |     sub   rsp, 8             ; re-align the stack to 16, ready for another call
11 | 
12 |     ; Call printf.
13 |     mov   esi, 0x12345678    ; "%x" takes a 32-bit unsigned int
14 |     lea   rdi, [rel format]
15 |     xor   eax, eax           ; AL=0  no FP args in XMM regs
16 |     call  printf
17 | 
18 |     ; Return from main.
19 |     xor   eax, eax
20 |     add   rsp, 8
21 |     ret
22 | 


--------------------------------------------------------------------------------
/x86-64/linux/common.inc:
--------------------------------------------------------------------------------
 1 | %macro save_syscall 0
 2 |     push rax
 3 |     push rdi
 4 |     push rsi
 5 |     push rdx
 6 |     push r10
 7 |     push r8
 8 |     push r9
 9 | %endmacro
10 | 
11 | %macro load_syscall 0
12 |     pop rdx
13 |     pop rsi
14 |     pop rdi
15 |     pop rax
16 |     pop r9
17 |     pop r8
18 |     pop r10
19 | %endmacro
20 | 
21 | ; Write 64-bit register or address to stdtout in small endian.
22 | ;
23 | ; Registers are preserved.
24 | ;
25 | ; Sample calls:
26 | ;
27 | ;     write64 rax
28 | ;     write64 _start
29 | ;
30 | ; @param 1 the value to write
31 | ;
32 | %macro write64 1
33 |     save_syscall
34 |     ; Must store the value in case it was passed
35 |     ; as one of the registers used in the syscall.
36 |     push rbx
37 |     mov rbx, %1
38 |     mov rax, 1
39 |     mov rdi, 1
40 |     mov rsi, rbx
41 |     bswap rsi
42 |     mov [rsp + 1], rsi
43 |     lea rsi, [rsp + 1]
44 |     mov rdx, 8
45 |     syscall
46 |     pop rbx
47 |     load_syscall
48 | %endmacro
49 | 
50 | %macro sys_exit 1
51 |     mov rax, 60
52 |     mov rdi, %1
53 |     syscall
54 | %endmacro
55 | 
56 | %macro sys_exit 0
57 |     sys_exit 0
58 | %endmacro
59 | 


--------------------------------------------------------------------------------
/x86-64/linux/interactive/Makefile:
--------------------------------------------------------------------------------
1 | ../Makefile


--------------------------------------------------------------------------------
/x86-64/linux/interactive/common.inc:
--------------------------------------------------------------------------------
1 | ../common.inc


--------------------------------------------------------------------------------
/x86-64/linux/interactive/initial_state.asm:
--------------------------------------------------------------------------------
 1 | ; Initial register state.
 2 | 
 3 | ; Pass output through hd.
 4 | 
 5 | %include "common.inc"
 6 | 
 7 | section .text
 8 | 
 9 |     global _start
10 |     _start:
11 | 
12 |         write64 rax
13 |         write64 rbp
14 |         write64 rbx
15 |         write64 rcx
16 |         write64 rdi
17 |         write64 rdx
18 |         write64 rsi
19 |         write64 rsp
20 |         write64 r8
21 |         write64 r9
22 |         write64 r10
23 |         write64 r11
24 |         write64 r12
25 |         write64 r13
26 |         write64 r14
27 |         write64 r15
28 | 
29 |         sys_exit
30 | 


--------------------------------------------------------------------------------
/x86-64/linux/interactive/min_dummy.asm:
--------------------------------------------------------------------------------
1 | ; Contains a dummy instruction to differentiate from `min_empty`.
2 | section .text
3 |     global _start
4 |     _start:
5 |         mov rax, 0
6 | 


--------------------------------------------------------------------------------
/x86-64/linux/interactive/min_empty.asm:
--------------------------------------------------------------------------------
1 | ; bash: ./min_empty.out: cannot execute binary file: Exec format error
2 | ; TODO why? Looks like text cannot be empty.
3 | section .text
4 |     global _start
5 |     _start:
6 | 


--------------------------------------------------------------------------------
/x86-64/linux/interactive/stack_top.asm:
--------------------------------------------------------------------------------
 1 | ; See what `rsp` is worth at initialization.
 2 | ;
 3 | ; Similar to IA-32, except that this time we are at around 2^48 = 256 TiB.
 4 | ;
 5 | ; Should be piped into hd.
 6 | 
 7 | %include "common.inc"
 8 | 
 9 | section .text
10 | 
11 |     global _start
12 |     _start:
13 | 
14 |         write64 rsp
15 |         push 1
16 |         write64 rsp
17 | 
18 |         sys_exit
19 | 


--------------------------------------------------------------------------------
/x86-64/linux/zero-extend-32bit-memory/Makefile:
--------------------------------------------------------------------------------
 1 | .POSIX:
 2 | 
 3 | main.out: main.asm link.ld
 4 | 	nasm -felf64 -o main.o main.asm
 5 | 	ld -Tlink.ld -o main.out main.o
 6 | 
 7 | clean:
 8 | 	rm -f *.o *.out
 9 | 
10 | test: main.out
11 | 	./main.out
12 | 


--------------------------------------------------------------------------------
/x86-64/linux/zero-extend-32bit-memory/README.md:
--------------------------------------------------------------------------------
 1 | # Zero extend 32-bit memory
 2 | 
 3 | TODO get working.
 4 | 
 5 | Attempt to answer:
 6 | http://stackoverflow.com/questions/33318342/when-is-it-better-for-an-assembler-to-use-sign-extended-relocation-like-r-x86-64
 7 | 
 8 | The goal is to make 2 memory accesses:
 9 | 
10 | - one truncated sign extended to end in 1's
11 | - one with the full address (`movabs`)
12 | 
13 | Current outcome: as soon as I put a single byte into `.data2`, segfault. Why? Linux does not want to play ball with the high address?
14 | 


--------------------------------------------------------------------------------
/x86-64/linux/zero-extend-32bit-memory/link.ld:
--------------------------------------------------------------------------------
 1 | SECTIONS
 2 | {
 3 |     . = 0x0000000000400000;
 4 |     .text :
 5 |     {
 6 |         *(.text)
 7 |     }
 8 |     .data :
 9 |     {
10 |         *(.data)
11 |     }
12 |     . = 0xFFFFFFFF80000000;
13 |     .data2 :
14 |     {
15 |         *(.data2)
16 |     }
17 | }
18 | 


--------------------------------------------------------------------------------
/x86-64/linux/zero-extend-32bit-memory/main.asm:
--------------------------------------------------------------------------------
 1 | section .text
 2 | global _start
 3 | _start:
 4 | 
 5 |     ; This mov gets the 64-bit address truncated to 32-bits.
 6 |     ; The CPU sign extends it.
 7 |     mov byte al, 0xFFFFFFFF80000000
 8 |     mov [output], al
 9 | 
10 |     mov rax, 1
11 |     mov rdi, 1
12 |     mov rsi, output
13 |     mov rdx, 1
14 |     syscall
15 | 
16 |     ; TODO do another move with the full explicit 64-bit address.
17 |     ; Should give the same output.
18 |     mov byte al, 0xFFFFFFFF80000000
19 |     mov [output], al
20 | 
21 |     mov rax, 1
22 |     mov rdi, 1
23 |     mov rsi, output
24 |     mov rdx, 1
25 |     syscall
26 | 
27 |     mov rax, 60
28 |     mov rdi, 0
29 |     syscall
30 | section .data
31 |     output:
32 |         db 'a'
33 | section .data2
34 |     input:
35 |         db 'b'
36 | 
37 | 


--------------------------------------------------------------------------------
/x86-64/main.asm:
--------------------------------------------------------------------------------
 1 | %include "lib/common_nasm.inc"
 2 | 
 3 | ENTRY
 4 | 
 5 |     ; # RIP relative addressing
 6 | 
 7 |         ; New addressing mode in which addresses are encoded relative to the RIP.
 8 | 
 9 |         ; Advantages:
10 | 
11 |         ; - easier to write RIP code for shared libraries: with RIP you can just dump
12 |             ; the `.text` and `.data` anywhere without rewritting all data accesses beforehand
13 | 
14 |         ; - genrates shorter instructions, using 4 bytes to encode the address instead of 8.
15 | 
16 |             ; This seems to be the main reason why GCC 4.8 uses it by default everywhere.
17 | 
18 |             ; Downside of this: `.data` and `.text` must be withing 4Gb of each other.
19 |             ; But that should be fine.
20 | 
21 |         ; It does not seem to be possible to use it with Intel syntax directly:
22 | 
23 |             ;lea rax, [rip]
24 |             ;lea rax, [rip + 4]
25 | 
26 |         ; Instead, it seems that you must use the `rel` or `abs` keyword,
27 |         ; and possibly set their defaults http://www.nasm.us/doc/nasmdoc6.html#section-6.2.1 :
28 | 
29 |             ;lea rax, [rel _start]
30 |             ;lea rax, [abs _start]
31 |             ;lea rax, $
32 | 
33 |     ; # pop only works with quadwords
34 | 
35 |         ; Some instructions do not work with operand sizes for which they work in 32 bit mode.
36 |         ; E.g., you cannot do `pop eax`, only `pop rax`.
37 | 
38 |             ; ERROR
39 |             ;pop eax
40 | 
41 |     ;mov rax, 1
42 |     ;mov rdi, 1
43 |     ;mov rsi, asserts_passed_str
44 |     ;mov rdx, asserts_passed_str_len
45 |     ;syscall
46 | 
47 | EXIT
48 | 


--------------------------------------------------------------------------------
/x86-64/movabs.asm:
--------------------------------------------------------------------------------
 1 | ; TODO review this, it is a mess.
 2 | ;
 3 | ; Most instructions cannot deal with 64-bit addresses:
 4 | ; they simply use 32 bits bytes and *sign* extend to 64 bits.
 5 | ;
 6 | ; http://stackoverflow.com/questions/33318342/when-is-it-better-for-an-assembler-to-use-sign-extended-relocation-like-r-x86-64
 7 | ; http://stackoverflow.com/questions/3623899/nasm-64-bit-immediate-address-for-movlps-gives-dword-data-exceeds-bounds
 8 | ; https://reverseengineering.stackexchange.com/questions/2627/what-is-the-meaning-of-movabs-in-gas-x86-att-syntax
 9 | ;
10 | ; For mov, rax is the only register that accepts it.
11 | ; TODO see other instructions.
12 | ;
13 | ; I think this is encoded as `MOV moffs64*,RAX`. TODO better understand the `moffs` thing.
14 | ;
15 | ; movabs is the AT&T name of the special form that accepts 64-bit immediates.
16 | ;
17 | ; Intel just puts it under MOV, and NASM just uses `mov`.
18 | ;
19 | ; NASM only gives warnings in case an impossible mov is coded,
20 | ; and silently truncates the address.
21 | ;
22 | ; http://stackoverflow.com/questions/19415184/load-from-a-64-bit-address-into-other-register-than-rax
23 | 
24 | %include "lib/common_nasm.inc"
25 | 
26 | ENTRY
27 |     ; TODO what is the case where only rax works?
28 |     ; All of the below give the same error.
29 |     ; Error: dword exceeds bounds.
30 |     ;mov [0x8000_0000_0000_0000], rax
31 |     ;mov [0x8000_0000_0000_0000], rbx
32 |     ;mov rax, [0x8000_0000_0000_0000]
33 |     ;mov rbx, [0x8000_0000_0000_0000]
34 | 
35 |     ; These assemble. TODO create a minimal example well defined example that does
36 |     ; not access random memory addresses.
37 |     ; mov rax, 0x8000_0000_0000_0000
38 |     ; mov qword [rax], rbx
39 |     ; mov rbx, 0x8000_0000_0000_0000
40 |     ; mov qword [rbx], rcx
41 | 
42 | EXIT
43 | 


--------------------------------------------------------------------------------
/x86-64/params.makefile:
--------------------------------------------------------------------------------
1 | BITS := 64
2 | PHONY_MAKES = gas linux linux/c-from-assembly
3 | # TODO get working.
4 | # linux/zero-extend-32bit-memory
5 | 


--------------------------------------------------------------------------------
/x86-64/provision:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | sudo apt-get install make
4 | sudo apt-get install nasm
5 | 


--------------------------------------------------------------------------------