├── risc-v ├── .gitignore ├── Makefile ├── src │ └── hello.s └── README.md ├── gas ├── .gitignore ├── linux │ ├── something.h │ ├── instructions.s │ └── hello.S ├── func.cc ├── func.s ├── jump.s ├── cli.s ├── i386 │ ├── Makefile │ └── learning_i386.s ├── avx_mul.c ├── avx.c ├── loop.s ├── jz.s ├── jne.s ├── cli_lea.s ├── Makefile ├── 64bit.s └── README.md ├── inline-assembly ├── .gitignore └── basic.c ├── c ├── inline.h ├── empty.c ├── rel.c ├── sleep.c ├── pic.c ├── stack.c ├── args.c ├── sp.c ├── ptr.c ├── simple.c ├── func.c ├── null.cc ├── inline.c ├── overflow.c ├── apple.c ├── loop.c ├── while.cc └── vmmap-example.c ├── .gitignore ├── nasm ├── linux │ ├── .gitignore │ ├── libsample.c │ ├── align.asm │ ├── jnz.asm │ ├── printf.asm │ ├── hello.asm │ ├── func-args.asm │ ├── function.asm │ └── Makefile └── macos │ ├── Makefile │ └── hello.asm ├── arm ├── Dockerfile ├── src │ ├── bic.s │ ├── uxtb.s │ ├── xzr.s │ ├── ldr.s │ ├── func.s │ ├── space.s │ ├── branch.s │ ├── rename.s │ ├── psr.s │ ├── add.s │ ├── ldmia.s │ └── first.s ├── .gitignore ├── fedora_aarch64.repo ├── Makefile └── README.md ├── cdecl.c ├── stdcall.c ├── fastcall.c ├── linux ├── zero-flag.s ├── cmov.s ├── first.s ├── direction-flag.s ├── arr.s ├── .gitignore ├── check_zero.s ├── bcd.s ├── div.s ├── setne.s ├── mul.s ├── mmx.s ├── function.s ├── exec-stack.s ├── test.s ├── carry.s ├── float.s ├── write.s ├── jle.s ├── cfi.s ├── Makefile ├── sse.s ├── multi.s ├── notes │ ├── mmx.md │ ├── bcd.md │ ├── float.md │ └── sse.md └── README.md ├── Makefile ├── cache.c └── aix └── README.md /risc-v/.gitignore: -------------------------------------------------------------------------------- 1 | out 2 | -------------------------------------------------------------------------------- /gas/.gitignore: -------------------------------------------------------------------------------- 1 | out 2 | *.dSYM 3 | -------------------------------------------------------------------------------- /inline-assembly/.gitignore: -------------------------------------------------------------------------------- 1 | basic 2 | -------------------------------------------------------------------------------- /c/inline.h: -------------------------------------------------------------------------------- 1 | static inline void doit(int b); 2 | -------------------------------------------------------------------------------- /c/empty.c: -------------------------------------------------------------------------------- 1 | int main(void) { 2 | return 0; 3 | } 4 | -------------------------------------------------------------------------------- /gas/linux/something.h: -------------------------------------------------------------------------------- 1 | #define SOMETHING "Hellr, world@\n" 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.swp 3 | cdecl 4 | stdcall 5 | fastcall 6 | cache 7 | -------------------------------------------------------------------------------- /c/rel.c: -------------------------------------------------------------------------------- 1 | extern int something; 2 | 3 | int function(void) { 4 | return something; 5 | } 6 | -------------------------------------------------------------------------------- /c/sleep.c: -------------------------------------------------------------------------------- 1 | #include 2 | int main () { 3 | usleep(1000); // will sleep for 1 ms 4 | } 5 | -------------------------------------------------------------------------------- /nasm/linux/.gitignore: -------------------------------------------------------------------------------- 1 | hello 2 | printf 3 | function 4 | align 5 | func-args 6 | libsample 7 | jnz 8 | -------------------------------------------------------------------------------- /c/pic.c: -------------------------------------------------------------------------------- 1 | int myglob = 42; 2 | 3 | int ml_func(int a, int b) 4 | { 5 | return myglob + a + b; 6 | } 7 | -------------------------------------------------------------------------------- /c/stack.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main (int argc, char **argv) 4 | { 5 | return 0; 6 | } 7 | -------------------------------------------------------------------------------- /arm/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM arm64v8/alpine 2 | 3 | RUN apk add util-linux pciutils usbutils coreutils binutils findutils grep gdb 4 | -------------------------------------------------------------------------------- /c/args.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main (int argc, char **argv) 4 | { 5 | printf ("%d\n", argc); 6 | return 0; 7 | } 8 | -------------------------------------------------------------------------------- /c/sp.c: -------------------------------------------------------------------------------- 1 | #gcc -fomit-frame-pointer -S sp.c 2 | void function(void) 3 | { 4 | int i = 100; 5 | int j = 200; 6 | int k = 300; 7 | } 8 | -------------------------------------------------------------------------------- /arm/src/bic.s: -------------------------------------------------------------------------------- 1 | .text 2 | 3 | .global _start 4 | 5 | _start: 6 | mov r0, #15 7 | bic r1, r0, #4 8 | bic r1, r0, #12 9 | b . 10 | -------------------------------------------------------------------------------- /c/ptr.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main (int argc, char **argv) { 4 | int* p = new int(10); 5 | printf ("%p\n", p); 6 | return 0; 7 | } 8 | -------------------------------------------------------------------------------- /c/simple.c: -------------------------------------------------------------------------------- 1 | // CFLAGS="-g -o0 2 | // gcc -g -O0 simple.c -o simple 3 | int main() { 4 | int a = 5; 5 | int b = a + 6; 6 | return 0; 7 | } 8 | -------------------------------------------------------------------------------- /arm/src/uxtb.s: -------------------------------------------------------------------------------- 1 | .text 2 | 3 | .global _start 4 | 5 | _start: 6 | ldr r0, =#0xFFFFFF65 7 | uxtb r1, r0 8 | uxth r1, r0 9 | b . 10 | 11 | -------------------------------------------------------------------------------- /c/func.c: -------------------------------------------------------------------------------- 1 | // clang -g -o func func.c 2 | int doit(int i) { 3 | return i; 4 | } 5 | 6 | int main(int argc, char** argv) { 7 | int i = doit(6); 8 | } 9 | -------------------------------------------------------------------------------- /arm/.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | src/first 3 | first 4 | add 5 | xzr 6 | branch 7 | ldr 8 | func 9 | space 10 | rename 11 | ldmia 12 | bic 13 | uxtb 14 | psr 15 | -------------------------------------------------------------------------------- /nasm/linux/libsample.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main(int argc, char** argv) { 5 | puts("hello\n"); 6 | exit(0); 7 | } 8 | -------------------------------------------------------------------------------- /arm/src/xzr.s: -------------------------------------------------------------------------------- 1 | .text 2 | 3 | .global _start 4 | 5 | _start: 6 | mov x0, #0 7 | mov x0, xzr 8 | 9 | mov x0, xzr 10 | mov x8, #93 11 | svc #0 12 | -------------------------------------------------------------------------------- /gas/linux/instructions.s: -------------------------------------------------------------------------------- 1 | .global _start 2 | .data 3 | 4 | .text 5 | _start: 6 | movl $1, %eax 7 | REX.W movl $1, %eax 8 | movl $0, %ebx 9 | int $0x80 10 | -------------------------------------------------------------------------------- /arm/src/ldr.s: -------------------------------------------------------------------------------- 1 | .text 2 | 3 | .global _start 4 | 5 | _start: 6 | mov x0, #3 7 | str x0, [SP, #-16]! 8 | ldr x0, [SP], #16 9 | 10 | mov x8, #93 11 | svc #0 12 | -------------------------------------------------------------------------------- /cdecl.c: -------------------------------------------------------------------------------- 1 | 2 | void __attribute__ ((__cdecl__)) something(int a, int b, int c) { 3 | } 4 | 5 | int main(int argc, char** argv) { 6 | something(1, 2, 3); 7 | return 0; 8 | } 9 | -------------------------------------------------------------------------------- /stdcall.c: -------------------------------------------------------------------------------- 1 | 2 | void __attribute__ ((__stdcall__)) something(int a, int b, int c) { 3 | } 4 | 5 | int main(int argc, char** argv) { 6 | something(1, 2, 3); 7 | return 0; 8 | } 9 | -------------------------------------------------------------------------------- /fastcall.c: -------------------------------------------------------------------------------- 1 | 2 | void __attribute__ ((__fastcall__)) something(int a, int b, int c) { 3 | } 4 | 5 | int main(int argc, char** argv) { 6 | something(1, 2, 3); 7 | return 0; 8 | } 9 | -------------------------------------------------------------------------------- /gas/func.cc: -------------------------------------------------------------------------------- 1 | #include 2 | extern "C" int dot(int x); 3 | 4 | int main(int argc, char** argv) { 5 | int ret = dot(10); 6 | std::cout << ret << '\n'; 7 | return 0; 8 | } 9 | -------------------------------------------------------------------------------- /linux/zero-flag.s: -------------------------------------------------------------------------------- 1 | .global _start 2 | 3 | .text 4 | 5 | _start: 6 | mov $2, %rax 7 | mov $2, %rcx 8 | sub %rax, %rcx 9 | 10 | mov $60, %rax 11 | mov $0, %rdi 12 | syscall 13 | -------------------------------------------------------------------------------- /nasm/macos/Makefile: -------------------------------------------------------------------------------- 1 | hello: hello.o 2 | ld $@.o -o $@ 3 | 4 | hello.o: hello.asm 5 | nasm -f macho64 -o $@.o $< 6 | 7 | clean: 8 | $(info "cleaning...") 9 | rm -rf hello 10 | 11 | -------------------------------------------------------------------------------- /arm/src/func.s: -------------------------------------------------------------------------------- 1 | .text 2 | 3 | .global _start 4 | 5 | _start: 6 | mov x0, #1 7 | mov x1, #2 8 | bl func 9 | 10 | mov x8, #93 11 | svc #1 12 | 13 | func: 14 | mov x0, #3 15 | ret 16 | -------------------------------------------------------------------------------- /c/null.cc: -------------------------------------------------------------------------------- 1 | // $ clang++ -c null.cc -std=c++11 2 | extern "C" { 3 | int* something = nullptr; 4 | } 5 | 6 | int main(int argc, char** argv) { 7 | int* one = nullptr; 8 | return 0; 9 | } 10 | -------------------------------------------------------------------------------- /arm/src/space.s: -------------------------------------------------------------------------------- 1 | 2 | .text 3 | .global _start 4 | 5 | _start: 6 | ldr r0, =A 7 | mov r1, #2 8 | str r1, [r0] 9 | //mov r0, #1 10 | 11 | stop: 12 | b stop 13 | 14 | .data 15 | A: .space 4 16 | -------------------------------------------------------------------------------- /linux/cmov.s: -------------------------------------------------------------------------------- 1 | 2 | .global _start 3 | 4 | .text 5 | _start: 6 | mov $10, %rax 7 | mov $10, %rcx 8 | mov $3, %rsi 9 | cmovae %rsi, %rdi 10 | 11 | mov $60, %rax 12 | mov $1, %rdi 13 | syscall 14 | -------------------------------------------------------------------------------- /c/inline.c: -------------------------------------------------------------------------------- 1 | // clang -g -o inline inline.c 2 | #include "inline.h" 3 | 4 | static inline void doit(int k) { 5 | int i = k; 6 | } 7 | 8 | int main(int argc, char** argv) { 9 | int i = 8; 10 | doit(i); 11 | } 12 | -------------------------------------------------------------------------------- /arm/src/branch.s: -------------------------------------------------------------------------------- 1 | .text 2 | .global _start 3 | 4 | _start: 5 | mov x0, #10 6 | mov x1, #0 7 | loop: 8 | add x1, x1, #1 9 | cmp x0, x1 10 | b.NE loop 11 | 12 | mov x0, x1 13 | mov x8, #93 14 | svc #0 15 | -------------------------------------------------------------------------------- /c/overflow.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void return_input (void) { 4 | char array[30]; 5 | gets (array); 6 | printf("%s\n", array); 7 | } 8 | 9 | int main() { 10 | return_input(); 11 | return 0; 12 | } 13 | -------------------------------------------------------------------------------- /linux/first.s: -------------------------------------------------------------------------------- 1 | .data 2 | something: 3 | .byte 2 4 | 5 | .text 6 | .globl _start 7 | _start: 8 | movl $something, %eax 9 | call _exit 10 | 11 | _exit: 12 | mov $60, %rax 13 | xor %rdi, %rdi 14 | syscall 15 | -------------------------------------------------------------------------------- /arm/src/rename.s: -------------------------------------------------------------------------------- 1 | ZEROR .req r0 2 | /* .equ does not work for registers which is the reason for .req */ 3 | 4 | .global _start 5 | 6 | .text 7 | 8 | _start: 9 | mov ZEROR, #4 10 | 11 | stop: 12 | b stop 13 | 14 | 15 | -------------------------------------------------------------------------------- /c/apple.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main (int argc, char **argv, char **envp, char **apple) 4 | { 5 | int i = 0; 6 | for (i=0; i < 4; i++) 7 | printf ("%s\n", apple[i]); 8 | 9 | return 0; 10 | } 11 | -------------------------------------------------------------------------------- /gas/func.s: -------------------------------------------------------------------------------- 1 | .section __DATA, __data 2 | .section __TEXT, __text 3 | .globl _dot 4 | 5 | _dot: 6 | pushq %rbp 7 | movq %rsp,%rbp 8 | 9 | addq $22, %rdi 10 | movq %rdi, %rax # return code 11 | popq %rbp 12 | retq 13 | -------------------------------------------------------------------------------- /gas/jump.s: -------------------------------------------------------------------------------- 1 | .section __TEXT, __text 2 | .globl _main 3 | 4 | _main: 5 | jmp overhere 6 | mov $0x2000001, %eax 7 | mov $0, %rdi 8 | syscall 9 | overhere: 10 | mov $0x2000001, %eax 11 | mov $2, %rdi 12 | syscall 13 | -------------------------------------------------------------------------------- /nasm/linux/align.asm: -------------------------------------------------------------------------------- 1 | section .data 2 | dummy db 3 3 | nr dw 10 4 | 5 | section .bss 6 | section .text 7 | global main 8 | 9 | main: 10 | push rbp 11 | mov rbp, rsp 12 | mov ax, [nr] 13 | leave 14 | ret 15 | -------------------------------------------------------------------------------- /linux/direction-flag.s: -------------------------------------------------------------------------------- 1 | .global _start 2 | 3 | .text 4 | _start: 5 | // set the direction flag 6 | std 7 | // clear the direction flag 8 | cld 9 | 10 | nop 11 | 12 | mov $60, %rax 13 | mov $0, %rdi 14 | syscall 15 | -------------------------------------------------------------------------------- /c/loop.c: -------------------------------------------------------------------------------- 1 | #include 2 | // $ clang -O3 loop.c -S 3 | int testarr [] = {1, 2, 3}; 4 | 5 | int main (int argc, char **argv) 6 | { 7 | for (int i = 0; i <= 2; i++) { 8 | printf("i=%d\n", testarr[i]); 9 | } 10 | return 0; 11 | } 12 | -------------------------------------------------------------------------------- /linux/arr.s: -------------------------------------------------------------------------------- 1 | .text 2 | .global _start 3 | .type _start, @function 4 | 5 | _start: 6 | nop 7 | sub $1, %rsp 8 | movb $1, 0(%rsp) 9 | movb $2, -1(%rsp) 10 | movb $3, -2(%rsp) 11 | 12 | mov $60, %rax 13 | xor %rdi, %rdi 14 | syscall 15 | -------------------------------------------------------------------------------- /nasm/linux/jnz.asm: -------------------------------------------------------------------------------- 1 | section .data 2 | section .bss 3 | section .text 4 | global main: 5 | 6 | main: 7 | push rbp 8 | mov rbp, rsp 9 | mov eax, 4 10 | doit: 11 | dec eax 12 | jnz doit 13 | 14 | mov rax, 60 15 | mov rdi, 0 16 | syscall 17 | -------------------------------------------------------------------------------- /arm/src/psr.s: -------------------------------------------------------------------------------- 1 | .text 2 | .global _start 3 | 4 | _start: 5 | mov r2, #4 6 | mov r3, #2 7 | subs r1, r3, r2 8 | bne negative 9 | positive: 10 | mov r0, #1 11 | b end 12 | 13 | negative: 14 | mov r0, #-1 15 | b end 16 | 17 | end: 18 | 19 | b . 20 | -------------------------------------------------------------------------------- /linux/.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | first 3 | check_zero 4 | write 5 | jle 6 | cfi 7 | arr 8 | multi 9 | exec-stack 10 | carry 11 | direction-flag 12 | zero-flag 13 | div 14 | cmov 15 | function 16 | float 17 | sse 18 | test 19 | setne 20 | bcd 21 | setne 22 | mmx 23 | mul 24 | -------------------------------------------------------------------------------- /linux/check_zero.s: -------------------------------------------------------------------------------- 1 | .text 2 | .globl _start 3 | 4 | _start: 5 | mov $0, %rcx 6 | test %rcx, %rcx 7 | je _zero_func 8 | movq $60, %rax 9 | movq $1, %rdi 10 | syscall 11 | 12 | _zero_func: 13 | movq $60, %rax 14 | movq $2, %rdi 15 | syscall 16 | 17 | -------------------------------------------------------------------------------- /linux/bcd.s: -------------------------------------------------------------------------------- 1 | .data 2 | 3 | unpacked1: .byte 8 4 | unpacked2: .byte 4 5 | 6 | .global _start 7 | 8 | .text 9 | _start: 10 | mov unpacked1, %al 11 | mov unpacked2, %bl 12 | add %bl, %al 13 | aaa 14 | 15 | mov $1, %eax 16 | mov $0, %ebx 17 | int $0x80 18 | -------------------------------------------------------------------------------- /c/while.cc: -------------------------------------------------------------------------------- 1 | // CFLAGS="-g -o0 2 | // clang -g -O0 while.c -o while 3 | int main() { 4 | bool flag = true; 5 | int a = 5; 6 | do { 7 | if (a == 5) { 8 | flag = false; 9 | } 10 | a++; 11 | } while(flag == true); 12 | return 0; 13 | } 14 | -------------------------------------------------------------------------------- /risc-v/Makefile: -------------------------------------------------------------------------------- 1 | AS=riscv64-linux-gnu-as 2 | LD=riscv64-linux-gnu-ld 3 | ASFLAGS=-g 4 | OUT_DIR=out 5 | 6 | out/hello: src/hello.s | $(OUT_DIR) 7 | $(AS) $(ASFLAGS) $< -o $@.o 8 | $(LD) $@.o -o $@ 9 | 10 | out: 11 | @mkdir $(OUT_DIR) 12 | 13 | .PHONY: clean 14 | clean: 15 | @${RM} -rf $(OUT_DIR) 16 | -------------------------------------------------------------------------------- /arm/src/add.s: -------------------------------------------------------------------------------- 1 | AREA |.text|, CODE,READONLY 2 | 3 | .text 4 | 5 | .globl _start 6 | _start: 7 | mov x1, #2 8 | mov x3, #3 9 | add x4, x1, x3 10 | 11 | /* syscall exit(int status) */ 12 | mov x0, x4 /* status */ 13 | mov w8, #93 /* exit syscall #93 */ 14 | svc #0 15 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | cdecl: cdecl.c 2 | ${CC} -m32 -o $@ $< 3 | 4 | stdcall: stdcall.c 5 | ${CC} -m32 -o $@ $< 6 | 7 | fastcall: fastcall.c 8 | ${CC} -m32 -o $@ $< 9 | #${CC} -m32 -s $< 10 | 11 | cache: cache.c 12 | ${CC} -g -O0 -o $@ $< 13 | 14 | .PHONY: clean 15 | clean: 16 | ${RM} -f cdecl stdcall fastcall.s 17 | -------------------------------------------------------------------------------- /linux/div.s: -------------------------------------------------------------------------------- 1 | .text 2 | .global _start 3 | 4 | _start: 5 | mov $23, %rax 6 | mov $2, %rbx 7 | div %rbx 8 | 9 | add $48, %rax 10 | push %rax 11 | 12 | mov $1, %rax 13 | mov $1, %rdi 14 | lea (%rsp), %rsi 15 | mov $1, %dx 16 | syscall 17 | 18 | mov $60, %rax 19 | mov $1, %rdi 20 | syscall 21 | -------------------------------------------------------------------------------- /linux/setne.s: -------------------------------------------------------------------------------- 1 | .global _start 2 | 3 | .text 4 | _start: 5 | mov $1, %rax 6 | mov $2, %rbx 7 | cmp %rbx, %rax 8 | setne %cl 9 | 10 | xor %rax, %rax 11 | xor %rbx, %rbx 12 | xor %cl, %cl 13 | mov $1, %rbx 14 | cmp %rbx, %rax 15 | setnz %cl 16 | 17 | mov $60, %rax 18 | mov $0, %rdi 19 | syscall 20 | -------------------------------------------------------------------------------- /gas/linux/hello.S: -------------------------------------------------------------------------------- 1 | #include "something.h" 2 | 3 | .global _start 4 | 5 | .text 6 | _start: 7 | movl $4, %eax 8 | movl $1, %ebx 9 | movl $msg, %ecx 10 | movl $len, %edx 11 | int $0x80 12 | 13 | movl $1, %eax 14 | movl $0, %ebx 15 | int $0x80 16 | .data 17 | msg: 18 | .ascii SOMETHING 19 | len = . - msg 20 | -------------------------------------------------------------------------------- /arm/src/ldmia.s: -------------------------------------------------------------------------------- 1 | .data 2 | 3 | .text 4 | array: 5 | .word 0x000000000 /* 4 bytes */ 6 | .word 0x000000001 /* 4 bytes */ 7 | .word 0x000000010 /* 4 bytes */ 8 | .word 0x000000011 /* 4 bytes */ 9 | .word 0x000000100 /* 4 bytes */ 10 | 11 | .global _start 12 | 13 | _start: 14 | adr r0, array 15 | ldmia r0, {r1, r2, r3, r4, r5} 16 | b . 17 | -------------------------------------------------------------------------------- /linux/mul.s: -------------------------------------------------------------------------------- 1 | .text 2 | .global _start 3 | 4 | _start: 5 | mov $33, %al 6 | mov $2, %dl 7 | imul %dl 8 | 9 | push %rax 10 | 11 | mov $1, %rax 12 | mov $1, %rdi 13 | lea (%rsp), %rsi 14 | mov $1, %dx 15 | syscall # Will print B which is the ascii character for 66 16 | 17 | mov $60, %rax 18 | mov $1, %rdi 19 | syscall 20 | -------------------------------------------------------------------------------- /gas/cli.s: -------------------------------------------------------------------------------- 1 | .section __DATA, __data 2 | argc: 3 | .asciz "There are %d parameters\n" 4 | 5 | .section __TEXT, __text 6 | .globl _main 7 | 8 | _main: 9 | pushq %rbp 10 | movq %rsp,%rbp 11 | movq %rdi, %rsi 12 | movq argc@GOTPCREL(%rip), %rdi 13 | callq _printf 14 | movl $0x2000001, %eax # exit code 15 | movq $0, %rdi # return code 16 | syscall 17 | -------------------------------------------------------------------------------- /linux/mmx.s: -------------------------------------------------------------------------------- 1 | .data 2 | 3 | v1: .quad 1 # 64-bit 4 | v2: .double 1, 2 # 2 32-bit values 5 | v3: .word 1, 2, 3, 4 # 4 16-bit values 6 | 7 | .global _start 8 | 9 | .text 10 | 11 | _start: 12 | movq v1, %mm0 13 | movd v2, %mm1 14 | movq v3, %mm2 15 | paddw %mm2, %mm2 16 | 17 | mov $60, %rax 18 | mov $1, %rdi 19 | syscall 20 | -------------------------------------------------------------------------------- /inline-assembly/basic.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main(int argc, char** argv) { 4 | printf("main...\n"); 5 | 6 | int x = 10; 7 | int y = 20; 8 | int z; 9 | asm("mov %1, %0\n\t" 10 | "add %2, %0" 11 | : "=r" (z) 12 | : "r" (x), "r" (y)); 13 | 14 | printf("x=%d, y=%d, z=%d\n", x, y, z); 15 | return 0; 16 | } 17 | -------------------------------------------------------------------------------- /linux/function.s: -------------------------------------------------------------------------------- 1 | .global _start 2 | 3 | .type print_msg, @function 4 | 5 | .data 6 | 7 | msg: .ascii "something\n" 8 | len: .int . - msg 9 | 10 | .text 11 | _start: 12 | call print_msg 13 | mov $60, %rax 14 | mov $1, %rdi 15 | syscall 16 | 17 | print_msg: 18 | mov $1, %rax 19 | mov $1, %rdi 20 | lea msg, %rsi 21 | mov len, %edx 22 | syscall 23 | ret 24 | -------------------------------------------------------------------------------- /nasm/macos/hello.asm: -------------------------------------------------------------------------------- 1 | %define SYSCALL_WRITE 0x2000004 2 | %define SYSCALL_EXIT 0x2000001 3 | 4 | global start 5 | start: 6 | mov rdi, 1 7 | mov rsi, str 8 | mov rdx, strlen 9 | mov rax, SYSCALL_WRITE 10 | syscall 11 | 12 | mov rax, SYSCALL_EXIT 13 | mov rdi, 0 14 | syscall 15 | 16 | section .data 17 | str: 18 | db `Hey, assembly!\n` 19 | strlen equ $ - str 20 | -------------------------------------------------------------------------------- /gas/i386/Makefile: -------------------------------------------------------------------------------- 1 | MACOSX := 10.11 2 | OUT := out 3 | 4 | PROGS := $(addprefix out/,learning_i386.o) 5 | 6 | $(OUT)/%.o : %.s 7 | as -W -g -arch i386 -o $@ $< 8 | ld -no_pie -arch i386 -macosx_version_min $(MACOSX) -o $(OUT)/$* $@ -lSystem 9 | 10 | all: $(PROGS) 11 | 12 | $(PROGS) : | out 13 | 14 | out: 15 | mkdir $(OUT) 16 | 17 | .PHONY: clean 18 | 19 | clean: 20 | rm -fr $(OUT) 21 | -------------------------------------------------------------------------------- /nasm/linux/printf.asm: -------------------------------------------------------------------------------- 1 | extern printf 2 | 3 | section .data 4 | msg db "hello world",0 5 | fmt db "Using printf to write: %s", 10, 0 6 | 7 | section .bss 8 | section .text 9 | global main 10 | 11 | main: 12 | push rbp 13 | mov rbp, rsp 14 | mov rdi, fmt 15 | mov rsi, msg 16 | mov rax, 0 17 | call printf 18 | 19 | mov rsp, rbp 20 | mov rax, 60 21 | mov rdi, 0 22 | syscall 23 | -------------------------------------------------------------------------------- /linux/exec-stack.s: -------------------------------------------------------------------------------- 1 | .data 2 | code: .long 0xCC 3 | 4 | .text 5 | .global _start 6 | 7 | _start: 8 | jmp code 9 | 10 | mov $60, %rax 11 | mov $0, %rdi 12 | syscall 13 | 14 | /* Notice that with the following directive it is not possible to execute 15 | instructions in the data section */ 16 | #if defined(__linux__) && defined(__ELF__) 17 | #.section .note.GNU-stack,"",%progbits 18 | #endif 19 | -------------------------------------------------------------------------------- /gas/avx_mul.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main() { 5 | 6 | // set as big endian (r = revers of default little endian) 7 | __m256d a = _mm256_setr_pd(1.0, 2.0, 3.0, 4.0); 8 | __m256d b = _mm256_setr_pd(1.0, 2.0, 3.0, 4.0); 9 | 10 | __m256d result = _mm256_mul_pd(a, b); 11 | 12 | double* d = (double*)&result; 13 | printf("%f %f %f %f\n", d[0], d[1], d[2], d[3]); 14 | 15 | return 0; 16 | } 17 | -------------------------------------------------------------------------------- /gas/avx.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main() { 5 | 6 | // set as big endian (r = revers of default little endian) 7 | __m256d evens = _mm256_setr_pd(1.0, 2.0, 3.0, 4.0); 8 | __m256d odds = _mm256_setr_pd(1.0, 2.0, 3.0, 4.0); 9 | 10 | __m256d result = _mm256_mul_pd(evens, odds); 11 | 12 | double* d = (double*)&result; 13 | printf("%f %f %f %f\n", d[0], d[1], d[2], d[3]); 14 | 15 | return 0; 16 | } 17 | -------------------------------------------------------------------------------- /linux/test.s: -------------------------------------------------------------------------------- 1 | .data 2 | zero: .ascii "zero\n" 3 | zero_len: .int . - zero 4 | 5 | .global _start 6 | .text 7 | 8 | _start: 9 | nop 10 | mov $1, %rax 11 | test $2, %rax # test will perform 0010 & 0001 = 0000 12 | jz _zero 13 | jmp _exit 14 | 15 | _zero: 16 | mov $1, %rax 17 | mov $1, %rdi 18 | lea zero, %rsi 19 | mov zero_len, %dx 20 | syscall 21 | 22 | _exit: 23 | mov $60, %rax 24 | mov $1, %rdi 25 | syscall 26 | 27 | 28 | -------------------------------------------------------------------------------- /gas/loop.s: -------------------------------------------------------------------------------- 1 | .section __DATA, __data 2 | val: 3 | .asciz "Value = %d\n" 4 | values: 5 | .int 10, 20, 30, 40, 50 6 | .section __TEXT, __text 7 | .globl _main 8 | 9 | _main: 10 | subq $8, %rsp 11 | movabsq $0, %r12 12 | leaq values(%rip), %r13 13 | loop: 14 | movq (%r13, %r12, 4), %rsi 15 | movq val@GOTPCREL(%rip), %rdi 16 | callq _printf 17 | incq %r12 18 | cmpq $5, %r12 19 | jne loop 20 | movl $0x2000001, %eax # exit code 21 | movq $0, %rdi # return code 22 | syscall 23 | -------------------------------------------------------------------------------- /gas/jz.s: -------------------------------------------------------------------------------- 1 | .section __DATA, __data 2 | res: 3 | .asciz "Zero \n" 4 | .section __TEXT, __text 5 | .globl _main 6 | 7 | _main: 8 | pushq %rbp 9 | movq %rsp,%rbp 10 | 11 | movq $10, %rax 12 | movq $10, %rbx 13 | subq %rax, %rbx 14 | jz zero 15 | jmp end 16 | 17 | zero: 18 | movq res@GOTPCREL(%rip), %rdi 19 | callq _printf 20 | 21 | end: 22 | movl $0x2000001, %eax # exit code 23 | movq $0, %rdi # return code 24 | syscall 25 | movq %rsp, %rbp 26 | popq %rbp 27 | ret 28 | -------------------------------------------------------------------------------- /nasm/linux/hello.asm: -------------------------------------------------------------------------------- 1 | section .data 2 | msg db "hello world",10,0 3 | ; note that 10 is the new line character in decimal 4 | len equ $-msg-1 5 | ; not that $ is the current address, we substract the address 6 | ; of msg and then take that value -1 to ignore the 0/null byte 7 | 8 | section .bss 9 | section .text 10 | global main 11 | 12 | main: 13 | mov rax, 1 14 | mov rdi, 1 15 | mov rsi, msg 16 | mov rdx, len 17 | syscall 18 | 19 | mov rax, 60 20 | mov rdi, 0 21 | syscall 22 | -------------------------------------------------------------------------------- /gas/jne.s: -------------------------------------------------------------------------------- 1 | .section __DATA, __data 2 | res: 3 | .asciz "Equal\n" 4 | .section __TEXT, __text 5 | .globl _main 6 | 7 | _main: 8 | pushq %rbp 9 | movq %rsp,%rbp 10 | 11 | # move the value of rdi (argc) into rsi which is used pass 2nd argument to functions 12 | movq $10, %rax 13 | movq $10, %rbx 14 | cmpq %rax, %rbx 15 | jne end 16 | movq res@GOTPCREL(%rip), %rdi 17 | callq _printf 18 | 19 | end: 20 | movl $0x2000001, %eax # exit code 21 | movq $0, %rdi # return code 22 | syscall 23 | -------------------------------------------------------------------------------- /linux/carry.s: -------------------------------------------------------------------------------- 1 | .global _start 2 | 3 | .text 4 | 5 | _start: 6 | mov $10, %rax 7 | mov $9, %rsi 8 | // source, destination 9 | // sub destination source 11 - 10 = 1 10 | // (lldb) register read rflags -f b 11 | //rflags = 0b0000000000000000000000000000000000000000000000000000001010010111 12 | cmp %rax, %rsi 13 | sub %rax, %rsi 14 | 15 | // set the carry flag 16 | stc 17 | // clear the carry flag 18 | clc 19 | nop 20 | 21 | mov $60, %rax 22 | mov $0, %rdi 23 | syscall 24 | 25 | -------------------------------------------------------------------------------- /arm/fedora_aarch64.repo: -------------------------------------------------------------------------------- 1 | [copr:copr.fedorainfracloud.org:lantw44:aarch64-linux-gnu-toolchain] 2 | name=Copr repo for aarch64-linux-gnu-toolchain owned by lantw44 3 | baseurl=https://download.copr.fedorainfracloud.org/results/lantw44/aarch64-linux-gnu-toolchain/fedora-$releasever-$basearch/ 4 | type=rpm-md 5 | skip_if_unavailable=True 6 | gpgcheck=1 7 | gpgkey=https://download.copr.fedorainfracloud.org/results/lantw44/aarch64-linux-gnu-toolchain/pubkey.gpg 8 | repo_gpgcheck=0 9 | enabled=1 10 | enabled_metadata=1 11 | -------------------------------------------------------------------------------- /nasm/linux/func-args.asm: -------------------------------------------------------------------------------- 1 | section .data 2 | section .bss 3 | section .text 4 | global main 5 | 6 | doit: 7 | push rbp ; and this push decreases the stack 8 | mov rbp, rsp 9 | ; call pushed the return address onto the stack, so that is 8 bytes (64 bits) 10 | ; push rbp pushed another 8 bytes. So to access the first arg a have to 11 | ; specify 8+8=16 12 | mov rax, [rbp+16] 13 | leave 14 | ret 15 | 16 | main: 17 | push rbp 18 | mov rbp, rsp 19 | push $18 20 | call doit 21 | leave 22 | ret 23 | -------------------------------------------------------------------------------- /gas/cli_lea.s: -------------------------------------------------------------------------------- 1 | .section __DATA, __data 2 | msg: 3 | .asciz "There are %d parameters\n" 4 | 5 | .section __TEXT, __text 6 | .globl _main 7 | 8 | _main: 9 | pushq %rbp 10 | movq %rsp,%rbp 11 | 12 | # move the value of rdi (argc) into rsi which is used pass 2nd argument to functions 13 | movq %rdi, %rsi 14 | # lea loads a pointer to a msg, mov would loads the actual value 15 | leaq msg(%rip), %rdi 16 | callq _printf 17 | 18 | movl $0x2000001, %eax # exit code 19 | movq $0, %rdi # return code 20 | syscall 21 | -------------------------------------------------------------------------------- /nasm/linux/function.asm: -------------------------------------------------------------------------------- 1 | section .data 2 | section .bss 3 | section .text 4 | global main 5 | 6 | main: 7 | section .text 8 | push rbp 9 | mov rbp, rsp 10 | mov rdi, $8 11 | call doit 12 | leave 13 | ret 14 | 15 | doit: 16 | section .data 17 | .msg db "doit...",10,0 ; not visible outside of doit. 18 | .len equ $-.msg-1 ; not visible outside of doit. 19 | section .text 20 | push rbp 21 | mov rbp, rsp 22 | mov rdx, rdi 23 | mov rax, 1 24 | mov rdi, 1 25 | mov rsi, .msg 26 | syscall 27 | leave 28 | ret 29 | -------------------------------------------------------------------------------- /arm/src/first.s: -------------------------------------------------------------------------------- 1 | .data 2 | 3 | msg: 4 | .ascii "Bajja\n" 5 | len = . - msg 6 | 7 | .text 8 | 9 | .globl _start 10 | _start: 11 | /* syscall write(int fd, const void *buf, size_t count) */ 12 | mov x0, #1 /* fd */ 13 | ldr x1, =msg /* buf */ 14 | ldr x2, =len /* count */ 15 | mov w8, #64 /* write syscall #64 */ 16 | svc #0 17 | 18 | /* syscall exit(int status) */ 19 | mov x0, #0 /* status */ 20 | mov w8, #93 /* exit syscall #93 */ 21 | svc #0 22 | -------------------------------------------------------------------------------- /linux/float.s: -------------------------------------------------------------------------------- 1 | .global _start 2 | 3 | .data 4 | radius: .float 3.14 # .float is 64 bits 5 | m: .float 2.1 6 | result: .float 0 7 | s: .short # .short is 32 bits 8 | age: .word 46 9 | 10 | .text 11 | _start: 12 | nop 13 | mov %rsp, %rbp 14 | 15 | fld radius # load onto the FPU stack 16 | fld m # load onto the FPU stack 17 | fmulp # 18 | fstp result # store floating point value in result and pop the stack 19 | 20 | fild age # load integer and convert to floating point, then push onto the stack 21 | 22 | mov $60, %rax 23 | mov $1, %rdi 24 | syscall 25 | -------------------------------------------------------------------------------- /risc-v/src/hello.s: -------------------------------------------------------------------------------- 1 | .global _start 2 | 3 | _start: 4 | addi a0, x0, 1 # 1 = standard out 5 | la a1, msg # load address of message 6 | addi a2, x0, 8 # length of our string 7 | addi a7, x0, 64 # linux write system call 8 | ecall # Call linux to output the string 9 | 10 | # Setup the parameters to exit the program 11 | # and then call Linux to do it. 12 | 13 | addi a0, x0, 0 # Use 0 return code 14 | addi a7, x0, 93 # Service command code 93 terminates 15 | ecall # Call linux to terminate the program 16 | 17 | .data 18 | msg: .ascii "Bajja!\n" 19 | -------------------------------------------------------------------------------- /linux/write.s: -------------------------------------------------------------------------------- 1 | .data 2 | msg: .ascii "something something\n" 3 | len: .int . - msg 4 | 5 | .text 6 | .globl _start 7 | .type _start, @function 8 | 9 | _start: 10 | /* 11 | long syscall(long number, ...); 12 | ssize_t write(int fd, const void* buf, size_t nbytes) 13 | */ 14 | mov $1, %rax /* syscall number */ 15 | mov $1, %rdi /* file descriptor (stdout) */ 16 | lea msg, %rsi /* load the address of msg into rsi */ 17 | #mov $msg, %rsi /* also works to place the address of msg into rsi */ 18 | mov len, %edx /* length to write */ 19 | syscall 20 | 21 | mov $60, %rax 22 | xor %rdi, %rdi 23 | syscall 24 | -------------------------------------------------------------------------------- /linux/jle.s: -------------------------------------------------------------------------------- 1 | 2 | .data 3 | msg: .ascii "bajja\n" 4 | msg_len = . - msg 5 | 6 | done: .ascii "done...\n" 7 | .set done_len, . - done 8 | 9 | .text 10 | .global _start 11 | 12 | _start: 13 | push %rbp 14 | mov %rsp, %rbp 15 | 16 | xor %rcx, %rcx 17 | mov $1, %rdx 18 | lp: 19 | lea msg(,%rcx), %rsi 20 | inc %rcx 21 | push %rcx 22 | call print 23 | pop %rcx 24 | cmp $msg_len, %rcx 25 | jl lp 26 | 27 | lea done, %rsi 28 | mov $done_len, %rdx 29 | call print 30 | 31 | mov $60, %rax 32 | xor %rdi, %rdi 33 | syscall 34 | 35 | print: 36 | push %rbp 37 | mov %rsp, %rbp 38 | mov $1, %rax 39 | mov $1, %rdi 40 | syscall 41 | leave 42 | ret 43 | -------------------------------------------------------------------------------- /gas/Makefile: -------------------------------------------------------------------------------- 1 | MACOSX := 10.11 2 | OUT := out 3 | 4 | SRC_FILES := $(filter-out func.s, $(wildcard *.s)) 5 | OBJ_FILES := $(filter-out out/func.o, $(addprefix out/,cli.o cli_lea.o loop.o, 64bit.o, jump.o, jne.o, jz.o, jnz.o, test.o,)) 6 | 7 | $(OUT)/%.o : %.s | out 8 | @echo "compile " $@ 9 | as -g -W -arch x86_64 -o $@ $(*F).s 10 | ld -e _main -macosx_version_min $(MACOSX) -lc -lSystem -arch x86_64 $@ -o $* 11 | 12 | $(OUT)/func.o : func.s | out 13 | as -g -W -arch x86_64 -o $@ $< 14 | 15 | $(OUT)/func: func.cc $(OUT)/func.o | out 16 | clang++ -std=c++11 -O0 -g $(OUT)/func.o func.cc -o $(OUT)/func 17 | 18 | out: 19 | mkdir $(OUT) 20 | 21 | $(OUT)/avx_mul : avx_mul.c | out 22 | clang -mavx -o $@ $< 23 | 24 | .PHONY: clean 25 | 26 | clean: 27 | @rm -fr $(OUT) 28 | -------------------------------------------------------------------------------- /gas/i386/learning_i386.s: -------------------------------------------------------------------------------- 1 | # Mach-O has segments that contain sections 2 | # __TEXT is a segment and __text a section 3 | .section __TEXT, __text 4 | .globl _main 5 | 6 | _main: 7 | pushl %ebp # push the value of ebp onto the stack, to save 8 | movl %esp, %ebp # store the current stack pointer in ebp to we can use ebp 9 | # with indirect addressing 10 | 11 | subl $0x4, %esp # make room for a 32 bit value on the stack to avoid overwriting 12 | # the return address on the stack 13 | pushl $msg # push msg onto the stack (will increment esp but ebp will remain 14 | call _puts 15 | addl $0x8, %esp # clean up the stack 16 | 17 | movl $1, %eax 18 | popl %ebp 19 | ret 20 | 21 | .section __DATA, __data 22 | msg: 23 | .ascii "Learning Assembler\0" 24 | -------------------------------------------------------------------------------- /gas/64bit.s: -------------------------------------------------------------------------------- 1 | #as -g -arch x86_64 64bit.s -o 64bit.o 2 | #ld -e _start -macosx_version_min 10.8 -lSystem -arch x86_64 64bit.o -o 64bit 3 | 4 | .data 5 | msg: 6 | .ascii "Assembly x86_64!\n" 7 | len: 8 | .long . - msg 9 | 10 | .text 11 | .globl _main 12 | 13 | _main: 14 | # write call (SYSCALL_CONSTRUCT_UNIX in /usr/include/sys/syscall.h). 15 | movq $0x2000004, %rax 16 | # file descriptior (stdout). rdi is used for the first argument to functions 17 | movq $1, %rdi 18 | # string to print. rsi is used for the second argument to functions in x86_64 19 | movq msg@GOTPCREL(%rip), %rsi 20 | # length of string. rdx is used for the third argument to functions in x86_64 21 | movq len(%rip), %rdx 22 | # call write 23 | syscall 24 | 25 | # exit call 26 | movq $0x2000001, %rax 27 | # return code 28 | movq $0, %rdi 29 | # call exit 30 | syscall 31 | -------------------------------------------------------------------------------- /linux/cfi.s: -------------------------------------------------------------------------------- 1 | .data 2 | msg: .ascii "cfi example\n" 3 | .set len, . - msg 4 | 5 | .text 6 | .globl _start 7 | .type _start, @function 8 | 9 | _start: 10 | .cfi_startproc 11 | push %rbp 12 | /* Document that Common Frame Address will be stored on the stack */ 13 | .cfi_def_cfa_offset 16 14 | /* Document that the value of register 6 (rbp) is saved on the stack */ 15 | .cfi_offset 6, -16 16 | mov %rbp, %rsp 17 | /* Document that rbp will be used as the CFA from this point onwards */ 18 | .cfi_def_cfa_register 6 19 | mov $1, %rax /* syscall number */ 20 | mov $1, %rdi /* file descriptor (stdout) */ 21 | lea msg, %rsi /* load the address of msg into rsi */ 22 | mov $len, %rdx /* length to write */ 23 | syscall 24 | 25 | mov $60, %rax 26 | xor %rdi, %rdi 27 | syscall 28 | pop %rbp 29 | /* Document that CFA is now rsp and at offset 8 */ 30 | .cfi_def_cfa 7, 8 31 | .cfi_endproc 32 | -------------------------------------------------------------------------------- /cache.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | int main(int argc, char** argv) { 7 | int cache_line_size = 64; 8 | size_t cache_size = 256 * cache_line_size;; 9 | printf("256 array/64 bytes: %d\n", cache_size); 10 | 11 | register uint64_t start, end; 12 | int core_id = 0; 13 | 14 | int array[cache_size]; 15 | for (int i = 0; i < cache_size; i++) { 16 | start = __rdtscp(&core_id); 17 | array[i] = 1; 18 | end = __rdtscp(&core_id); 19 | unsigned long duration = end - start; 20 | printf("Duration of set operation: %u core_id: %x\n", duration, core_id); 21 | } 22 | for (int i = 0; i < cache_size; i+=cache_line_size) { 23 | //printf("clflush array[%d]\n", i); 24 | _mm_clflush(&array[i]); 25 | } 26 | 27 | for (int i = 0; i < cache_size; i++) { 28 | start = __rdtscp(&core_id); 29 | array[i]++; 30 | end = __rdtscp(&core_id); 31 | unsigned long duration = end - start; 32 | //printf("Duration of increment operation: %u core_id: %x\n", duration, core_id); 33 | } 34 | return 0; 35 | } 36 | -------------------------------------------------------------------------------- /nasm/linux/Makefile: -------------------------------------------------------------------------------- 1 | hello: hello.o 2 | ld -no-pie -e main -o $@ $< 3 | 4 | hello.o: hello.asm 5 | nasm -f elf64 -g -F dwarf -o $@ $< 6 | 7 | printf: printf.o 8 | gcc -o $@ $< -no-pie 9 | 10 | printf.o: printf.asm 11 | nasm -f elf64 -g -F dwarf -o $@ $< 12 | 13 | function: function.o 14 | gcc -o $@ $< -no-pie 15 | 16 | function.o: function.asm 17 | nasm -f elf64 -g -F dwarf -o $@ $< 18 | 19 | func-args: func-args.o 20 | gcc -o $@ $< -no-pie 21 | 22 | func-args.o: func-args.asm 23 | nasm -f elf64 -g -F dwarf -o $@ $< 24 | 25 | simd-intro: simd-intro.o 26 | gcc -o $@ $< -no-pie 27 | 28 | simd-intro.o: simd-intro.asm 29 | nasm -f elf64 -g -F dwarf -o $@ $< 30 | 31 | align: align.o 32 | gcc -o $@ $< -no-pie 33 | 34 | align.o: align.asm 35 | nasm -f elf64 -g -F dwarf -o $@ $< 36 | 37 | half-adder: half-adder.o 38 | gcc -o $@ $< -no-pie 39 | 40 | half-adder.o: half-adder.asm 41 | nasm -f elf64 -g -F dwarf -o $@ $< 42 | 43 | jnz: jnz.o 44 | gcc -o $@ $< -no-pie 45 | 46 | jnz.o: jnz.asm 47 | nasm -f elf64 -g -F dwarf -o $@ $< 48 | 49 | clean: 50 | $(info "cleaning...") 51 | rm -rf hello printf function 52 | 53 | -------------------------------------------------------------------------------- /linux/Makefile: -------------------------------------------------------------------------------- 1 | first: first.o 2 | ${LD} -o $@ $< 3 | 4 | check_zero: check_zero.o 5 | ${LD} -o $@ $< 6 | 7 | write: write.o 8 | ${LD} -o $@ $< 9 | 10 | jle: jle.o 11 | ${LD} -o $@ $< 12 | 13 | cfi: cfi.o 14 | ${LD} -o $@ $< 15 | 16 | arr: arr.o 17 | ${LD} -o $@ $< 18 | 19 | multi: multi.o 20 | ${LD} -o $@ $< 21 | 22 | div: div.o 23 | ${LD} -o $@ $< 24 | 25 | mul: mul.o 26 | ${LD} -o $@ $< 27 | 28 | exec-stack: exec-stack.o 29 | ${LD} -o $@ $< 30 | 31 | carry: carry.o 32 | ${LD} -o $@ $< 33 | 34 | zero-flag: zero-flag.o 35 | ${LD} -o $@ $< 36 | 37 | direction-flag: direction-flag.o 38 | ${LD} -o $@ $< 39 | 40 | cmov: cmov.o 41 | ${LD} -o $@ $< 42 | 43 | function: function.o 44 | ${LD} -o $@ $< 45 | 46 | 47 | float: float.o 48 | ${LD} -o $@ $< 49 | 50 | sse: sse.o 51 | ${LD} -o $@ $< 52 | 53 | mmx: mmx.o 54 | ${LD} -o $@ $< 55 | 56 | test: test.o 57 | ${LD} -o $@ $< 58 | 59 | setne: setne.o 60 | ${LD} -o $@ $< 61 | 62 | bcd: bcd.o 63 | ${LD} -melf_i386 -o $@ $< 64 | 65 | bcd.o: bcd.s 66 | ${AS} -g --32 -o bcd.o bcd.s 67 | 68 | %.o: %.s 69 | ${AS} -g -o $@ $< 70 | 71 | .PHONY: clean 72 | clean: 73 | ${RM} -f first check_zero jle write *.o 74 | -------------------------------------------------------------------------------- /c/vmmap-example.c: -------------------------------------------------------------------------------- 1 | // gcc -o vmmap-example -c vmmap-example.c 2 | // ./vmmap-example & 3 | // vmmap 4 | #include 5 | #include 6 | #include 7 | 8 | //int global_j; 9 | //const int ci = 24; 10 | 11 | int main (int argc, char **argv) 12 | { 13 | //int local_stack = 0; 14 | //char *const_data = "This data is constant"; 15 | //char *tiny = malloc (32); /* allocate 32 bytes */ 16 | //char *small = malloc (2*1024); /* Allocate 2K */ 17 | //char *large = malloc (1*1024*1024); /* Allocate 1MB */ 18 | 19 | //printf("Text is %p\n", main); 20 | //printf("Global Data is %p\n", &global_j); 21 | //printf("Local (Stack) is %p\n", &local_stack); 22 | //printf("Constant data is %p\n", &ci); 23 | //printf("Hardcoded string (also constant) are at %p\n", const_data); 24 | //printf("Tiny allocations from %p\n", tiny); 25 | //printf("Small allocations from %p\n", small); 26 | //printf("Large allocations from %p\n", large); 27 | //printf("Malloc (i.e. libSystem) is at %p\n", malloc); 28 | //sleep(100); /* so we can use vmmap on this process before it exits */ 29 | usleep(1000); 30 | return 1; 31 | } 32 | -------------------------------------------------------------------------------- /linux/sse.s: -------------------------------------------------------------------------------- 1 | .data 2 | v1: .float 1.0, 2.0, 3.0, 4.0 # .float is 4 bytes. 32 bytes in total 3 | v2: .float 5.0, 6.0, 7.0, 8.0 4 | 5 | v3: .double 1.0, 2.0 6 | v4: .double 3.0, 4.0 7 | 8 | i1: .int 1, 2, 3, 4 9 | i2: .int 5, 6, 7, 8 10 | 11 | i3: .byte 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 12 | 13 | .global _start 14 | 15 | .text 16 | _start: 17 | nop 18 | movaps v1, %xmm0 # move aligned packed single precision 19 | movaps v2, %xmm1 # move aligned packed single precision 20 | addps %xmm1, %xmm0 # add packed single precision src, dest (result in dest) 21 | 22 | xorps %xmm0, %xmm0 # xor packed single precision 23 | xorps %xmm1, %xmm1 # xor packed single precision 24 | 25 | movapd v3, %xmm0 # move aligned packed double precision 26 | movapd v4, %xmm1 # move aligned packed double precision 27 | addpd %xmm1, %xmm0 # add packed double precision src, dest (result in dest) 28 | 29 | xorps %xmm0, %xmm0 # xor packed single precision 30 | movapd i1, %xmm0 31 | addps i2, %xmm0 # add packed double precision src, dest (result in dest) 32 | 33 | xorps %xmm0, %xmm0 # xor packed single precision 34 | movapd i3, %xmm0 35 | paddb i3, %xmm0 36 | 37 | mov $60, %rax 38 | mov $0, %rdi 39 | syscall 40 | -------------------------------------------------------------------------------- /linux/multi.s: -------------------------------------------------------------------------------- 1 | 2 | .text 3 | .global _start 4 | 5 | _start: 6 | mov $28, %rax 7 | mov $2, %rbx 8 | mul %rbx 9 | 10 | xor %rcx, %rcx 11 | 12 | loop: 13 | xor %rdx, %rdx 14 | mov $10, %rbx 15 | div %rbx /* divides what is in %rax */ 16 | add $48, %rdx /* add ascii 48 to the remainder which is in rdx */ 17 | push %rdx /* push to the right most digit onto the stack */ 18 | inc %rcx /* number of digits counter */ 19 | cmp $0, %rax /* continue as long as there the value in %rax is on 0*/ 20 | jz next /* jz wil jump if the zero flag is set, so if %rax is 0 */ 21 | jmp loop /* else continue loop */ 22 | 23 | next: 24 | cmp $0, %rcx /* check that are numbers to print, %rsi is the counter */ 25 | jz exit /* if there are not jump to exit */ 26 | dec %rcx 27 | 28 | mov $1, %rax /* syscall 1 write */ 29 | mov $1, %rdi /* fd */ 30 | lea (%rsp), %rsi /* current number on stack to print */ 31 | mov $1, %dx /* number of characters to print */ 32 | push %rcx /* save the value of rcx on the stack as it may be clobbered by the syscall */ 33 | syscall 34 | pop %rcx /* restore rcx */ 35 | add $8, %rsp /* move rsp backward */ 36 | jmp next 37 | 38 | exit: 39 | mov $1, %rax 40 | mov $1, %rdi 41 | push $10 42 | lea (%rsp), %rsi 43 | syscall 44 | 45 | mov $60, %rax 46 | mov $0, %rdi 47 | syscall 48 | -------------------------------------------------------------------------------- /linux/notes/mmx.md: -------------------------------------------------------------------------------- 1 | ### Multimedia Media Extension (MMX) 2 | Introduced new instructions and data types. 3 | 4 | There are 8 new registers, 57 new instructions four new data types. 5 | 6 | #### Registers 7 | Unlike the FPU the mmx registers are freely addressable (they are not stack based 8 | like in x87 FPU). Note that the mmx registers cannot be used to perform 9 | floating point arithmetic. 10 | 11 | ``` 12 | MM0...MM7 13 | 14 | 64 0 15 | +--------------------+ 16 | | MM0 | 17 | +--------------------+ 18 | ... 19 | ``` 20 | This is done by still mainting compability with existing operating systems by 21 | mapping these new register to the ones in the FPU (see notes 22 | [float.md](./float.md)) which if we recall were 8 80-bit registers. 23 | 24 | So these new register map to those but only to the 64-bits. If one wants to mix 25 | FPU and MMX one needs to be careful and call the `emms` instruction before 26 | switching. 27 | 28 | #### Data types 29 | So we saw above that the registers are 64 bits in size. So we can place one 30 | 64-bit value (Quadword) in a register, or 2 32-bit (Packed doubleword), or 31 | 4 16-bit values (Packed word), or 8 8-bit (Packed byte). 32 | 33 | Each value is a separate fixed point integer. 34 | 35 | ### mov 36 | So lets start simple by moving a value into a mmx register: 37 | ```assembly 38 | v1: .double 1, 2 39 | 40 | movd v1, %mm0 41 | 42 | ``` 43 | 44 | ### paddb, paddw 45 | Packed add byte/word/doubleword/quadword is used for adding packed integers. 46 | 47 | ```assembly 48 | v3: .word 1, 2, 3, 4 49 | 50 | movq v3, %mm2 51 | paddw %mm2, %mm2 52 | ``` 53 | ```console 54 | (lldb) register read --format int16 mm2 55 | mm2 = {2 4 6 8} 56 | ``` 57 | -------------------------------------------------------------------------------- /arm/Makefile: -------------------------------------------------------------------------------- 1 | first: first.o 2 | aarch64-linux-gnu-ld -g -o $@ $< 3 | 4 | first.o: src/first.s 5 | aarch64-linux-gnu-as -g -o $@ $< 6 | 7 | add: add.o 8 | aarch64-linux-gnu-ld -g -o $@ $< 9 | 10 | add.o: src/add.s 11 | #aarch64-linux-gnu-as -g -o $@ $< 12 | armasm -g -o $@ $< 13 | 14 | branch: branch.o 15 | aarch64-linux-gnu-ld -g -o $@ $< 16 | 17 | branch.o: src/branch.s 18 | aarch64-linux-gnu-as -g -o $@ $< 19 | 20 | xzr: xzr.o 21 | aarch64-linux-gnu-ld -g -o $@ $< 22 | 23 | xzr.o: src/xzr.s 24 | aarch64-linux-gnu-as -g -o $@ $< 25 | 26 | stack: stack.o 27 | aarch64-linux-gnu-ld -g -o $@ $< 28 | 29 | stack.o: src/stack.s 30 | aarch64-linux-gnu-as -g -o $@ $< 31 | 32 | ldr: ldr.o 33 | aarch64-linux-gnu-ld -g -o $@ $< 34 | 35 | ldr.o: src/ldr.s 36 | aarch64-linux-gnu-as -g -o $@ $< 37 | 38 | func: func.o 39 | aarch64-linux-gnu-ld -g -o $@ $< 40 | 41 | func.o: src/func.s 42 | aarch64-linux-gnu-as -g -o $@ $< 43 | 44 | space: space.o 45 | arm-none-eabi-ld -g -o $@ $< 46 | 47 | space.o: src/space.s 48 | arm-none-eabi-as -g -o $@ $< 49 | 50 | rename: rename.o 51 | arm-none-eabi-ld -g -o $@ $< 52 | 53 | rename.o: src/rename.s 54 | arm-none-eabi-as -g -o $@ $< 55 | 56 | mov: mov.o 57 | arm-none-eabi-ld -g -o $@ $< 58 | 59 | mov.o: src/mov.s 60 | arm-none-eabi-as -g -o $@ $< 61 | 62 | ldmia: ldmia.o 63 | arm-none-eabi-ld -g -o $@ $< 64 | 65 | ldmia.o: src/ldmia.s 66 | arm-none-eabi-as -g -o $@ $< 67 | 68 | bic: bic.o 69 | arm-none-eabi-ld -g -o $@ $< 70 | 71 | bic.o: src/bic.s 72 | arm-none-eabi-as -g -o $@ $< 73 | 74 | uxtb: uxtb.o 75 | arm-none-eabi-ld -g -o $@ $< 76 | 77 | uxtb.o: src/uxtb.s 78 | arm-none-eabi-as -g -o $@ $< 79 | 80 | psr: psr.o 81 | arm-none-eabi-ld -g -o $@ $< 82 | 83 | psr.o: src/psr.s 84 | arm-none-eabi-as -g -o $@ $< 85 | 86 | .PHONY: run-first 87 | run-first: first 88 | qemu-aarch64 $< 89 | 90 | .PHONY: run-add 91 | run-add: add 92 | qemu-aarch64 $< 93 | 94 | 95 | .PHONY: clean 96 | clean: 97 | ${RM} first *.o 98 | 99 | 100 | -------------------------------------------------------------------------------- /risc-v/README.md: -------------------------------------------------------------------------------- 1 | ## RISC-V 2 | RISC-V is open source and began its development in 2010 (ARM started in 1990) by 3 | Berkely Parallel Computing Laboratory and later 36 tech companies came together 4 | to form the RISC-V Foundation which was later renamed to RISV-V International in 5 | 2020. RISC-V seems to be pronounced as RISC-Five where the 5 comes from this 6 | was Birkely's fifth RISC ISA design. 7 | 8 | As the name hints as RISC-V is a reduced instruction set computer instruction 9 | set architecture (ISA). ISA is perhaps simplified as the design of a computer 10 | in terms of basic operations that it must support. It does not address impl 11 | specific details of the computer so two even if you have 2 processors that 12 | support the same ISA they can be implemented very differently. 13 | 14 | ### Background 15 | Both ARM and RISC-V are instruction set architectures (ISA) and there are both 16 | reduced instruction set computers (RISC). 17 | 18 | 19 | ### ISA Specification 20 | https://github.com/riscv/riscv-isa-manual/releases/download/Ratified-IMAFDQC/riscv-spec-20191213.pdf 21 | 22 | ### Hardware Thread 23 | In the ISA they talk about Harts which is the same thing as a core in other 24 | ISAs. 25 | 26 | ### Base Module 27 | If I've understood this correclty the base modules are RV32I, RV64I, and RV128I 28 | where RV stands for RISC-V, 32/64/128 the bus/register sized, and `I` stands for 29 | integer which refers to the operations available. 30 | 31 | ### Extension Modules 32 | Extension modules add to the base and add more operations. For example, the `M` 33 | extension stands for multiplication and adds multiply/divide instructions. I 34 | thought this was a bit odd and that mul/div should be included in the base, but 35 | there are many places where only the simplest of operations are required, like 36 | small "stupid" small gadgets/devices. So it allows the manufacturers to only 37 | include what they actually will use. 38 | 39 | ### add instruction 40 | The following adds the contents of two registers and stores the result in the 41 | destination register: 42 | ```assembly 43 | add rd rs1 rs2 44 | 45 | rd = distination register 46 | rs1 = source register 1 47 | rs2 = source register 2 48 | ``` 49 | 50 | 51 | ### Compiling 52 | ``` 53 | $ make out/hello 54 | $ ./out/hello 55 | Bajja! 56 | ``` 57 | 58 | ### Debugging 59 | ```console 60 | $ qemu-riscv64 -g 7777 out/hello 61 | ``` 62 | ```console 63 | $ riscv64-unknown-linux-gnu-gdb ./out/hello 64 | Reading symbols from ./out/hello... 65 | (gdb) target remote localhost:7777 66 | `` 67 | -------------------------------------------------------------------------------- /linux/notes/bcd.md: -------------------------------------------------------------------------------- 1 | ### Binary Coded Decimal 2 | This is where a decimal (base 10) is divided into individual bytes (so 8 bit. For example 3 | 123 would become: 4 | ``` 5 | 1 2 3 6 | 00000001 00000010 00000011 7 | ``` 8 | So we are representing on decimal digit, that is 0-9 as a byte. But we might 9 | notice that we are using 4 bits to do this and 4 bits (1 nibble) can represent 10 | 0-15. 11 | 12 | So what happens if we add two BCD numbers say 5 and 6: 13 | ``` 14 | 00000101 15 | +00000110 16 | ----- 17 | 00001011 18 | ``` 19 | That is 11 in decimal but notice that 11 in BCD is: 20 | ``` 21 | 00000001 00000001 22 | ``` 23 | So there needs something to be done after the addition to bring the result into 24 | BCD after adding. 25 | 26 | ### Unpacked BCD 27 | Lets take our example 123: 28 | ``` 29 | 1 2 3 30 | 00000001 00000010 00000011 31 | ``` 32 | Notice that we are only using the lower nibble, the upper is left unused and 33 | wasted. 34 | 35 | ### Packed BCD 36 | Saves space by packing two digits into a byte. 37 | ``` 38 | 1 2 39 | 00000001 00000010 Unpacked BCD 40 | 00010010 Packed BCD 41 | ``` 42 | So instead of two bytes we can fit the same information in a single byte. 43 | Instructions need to understand if the data it is operating on is in unpacked 44 | or packed format to be able to perform the correct operations. 45 | 46 | So we would use a .byte type to store either a unpacked single BCD number or 47 | the same .byte could be used to store two packed BCD numbers. 48 | 49 | ### Adjust After Addition (aaa) 50 | This instruction adjusts the al register. It is added after an add instruction 51 | which adds two unpacked bcd values together and places the sum in `al`. 52 | 53 | Lets take a simple example where we add two unpacked values. 54 | The examples can be found in [bcd.s](../bcd.s). 55 | ```console 56 | 3 unpacked1: .byte 8 57 | 4 unpacked2: .byte 4 58 | 5 59 | 6 .global _start 60 | 7 61 | 8 .text 62 | 9 _start: 63 | 10 mov unpacked1, %al 64 | 11 mov unpacked2, %bl 65 | -> 12 add %bl, %al 66 | 13 aaa 67 | ``` 68 | Now if we inspect the values before the addtion: 69 | ```console 70 | (lldb) register read -f b al 71 | al = 0b00001000 72 | (lldb) register read -f b bl 73 | bl = 0b00000100 74 | ``` 75 | And after the addition we have the value in `al`: 76 | ```console 77 | (lldb) register read -f b al 78 | al = 0b00001100 79 | ``` 80 | Notice that this addition generated 1100 which is 12. But since we are working 81 | with unpacked bcd values we want the result in that format. We can get that by 82 | using the aaa instruction: 83 | 84 | And after the addition the ascii adjust after addition (aaa): 85 | ```console 86 | (lldb) register read -f b ax 87 | ax = 0b0000 0001 0000 0010 88 | 1 2 89 | ``` 90 | 91 | 92 | -------------------------------------------------------------------------------- /aix/README.md: -------------------------------------------------------------------------------- 1 | ### AIX Assembler/Assembly notes 2 | Advanced Interactive executive (AIX) is a proprietery UNIX operating system sold 3 | by IBM and introduced in 1986. 4 | 5 | Servers that run this OS are AS/400, later known as iSeries, System i, OS/400 6 | later known as i5/OS and now IBMi. And also IBM POWER and POWERPC in the 7 | RS/600 later known as pSeries, then System p 8 | 9 | 10 | #### Assembly syntax 11 | Being a RISC processor all instructions take a register as its first operand. 12 | And unlike CISC processors the registers use number instead of names. For 13 | general purpose data there are 32 registers. 14 | 15 | Because there are so many registers all arguments to functions can be passed 16 | in registers, starting from register 3 which would be the first argument for 17 | a function call. 18 | For system calls the syscall number goes into gpr0 and the args begin in 19 | gpr3. 20 | 21 | #### load immediate (li) 22 | Loads a constant into a register, similar to `mov $1, %rax` one would do: 23 | ```assembly 24 | .data 25 | message: 26 | .string 'Bajja\n' 27 | len = . - message 28 | 29 | .global _start 30 | 31 | .text 32 | _start: 33 | li 0, 1 # load constant 1 into register 0 (syscall number) 34 | ``` 35 | This is actually not an instruction but a memonic which can be though of as a 36 | preprocessor macro. The assembler will interpret it and generate the correct 37 | instructions for the memonic. For example the `li` instruction above will 38 | become: 39 | ```assembler 40 | addi 0, 0, 1 41 | ``` 42 | This might look like it is adding 1 to register 0 and then storing that in 43 | register 0 but gpr0 is sometimes read as 0 depending on the context and in the 44 | case of addi the spec says that it is 0 in this case. 45 | 46 | #### add 47 | ```assembly 48 | addi 4, 3, 5 49 | ``` 50 | The above will add the value of register 5 to the contents of register 3 and 51 | then store the result in register 4. 52 | 53 | #### addi 54 | Add immediate 55 | ```assembly 56 | addi 4, 3, 5 57 | ``` 58 | The above will add 5 to the contents of register 3 and then store the result 59 | in register 4. Notice that we are adding the constant 5 and not the contents of 60 | register 5. 61 | 62 | #### or 63 | ```assembly 64 | or rA, rS, rB 65 | ``` 66 | This will register S with register B and then store the result in register A. 67 | There is also an simplified memonic named `mr` (memonic or?): 68 | ```assembly 69 | mr rA,sA 70 | ``` 71 | Which is the same as: 72 | ```assembly 73 | or rA, rS, rS 74 | ``` 75 | 76 | Example: 77 | ```assembly 78 | mr 9,3 79 | ``` 80 | So this would or the contents of gpr3 is or:ed with itself and the the result 81 | is stored in gpr9. Is this some sort of way to move the contents between 82 | registers? I mean or with itself will not alter the content (not like xor which 83 | would be the same thing as setting it to zero. 84 | 85 | 86 | #### mtvsrd 87 | ```assembly 88 | mtvsrd 32,14 89 | ``` 90 | 91 | -------------------------------------------------------------------------------- /linux/notes/float.md: -------------------------------------------------------------------------------- 1 | ## Floating points in assembly 2 | The Floating Point Unit (FPU) is a decicated hardware component that implements 3 | operations like addition, subtraction, multiplication and division on floating 4 | point numbers. There are also instructions for more advanced operations like 5 | square root, trig funtions, logarithm functions. 6 | It supports multiple data types including single/double precision 7 | floating-point, signed integers, and BCD. 8 | 9 | This unit has its own registers: 10 | ``` 11 | 79 0 12 | +-----------------------------------+ 13 | R7 | | 14 | +-----------------------------------+ 15 | R6 | | 16 | +-----------------------------------+ 17 | R5 | | 18 | +-----------------------------------+ 19 | R4 | | 20 | +-----------------------------------+ 21 | R3 | | 22 | +-----------------------------------+ 23 | R2 | | 24 | +-----------------------------------+ 25 | R1 | | 26 | +-----------------------------------+ 27 | R0 | | 28 | +-----------------------------------+ 29 | ``` 30 | These 8 registers make up a stack. Data that can be pushed onto this stack 31 | are signed integers of sizes 16, 32, or 64 bits. Floating point values of sizes 32 | 32, 64, or 80 bits. BCD packed quantities. 33 | There is no way to transfer data from one of these registers to the general 34 | purpose x86 registers. 35 | 36 | ST(0) denotes the top of the stack. ST(i) denotes the i-th register from the 37 | current top. Most of the instructions use ST(0) as an implicit operand. 38 | 39 | There are also registers for general purpose in this unit. These are 16-bit 40 | registers. 41 | 42 | ### Control register 43 | ``` 44 | Control register 45 | 15 0 46 | +-----------------------------------------+ 47 | | | 48 | +-----------------------------------------+ 49 | ``` 50 | 51 | ### Status/Word register 52 | The status register is sometimes called the 53 | ``` 54 | Status register 55 | 15 0 56 | +-----------------------------------------+ 57 | |B|C3|TOP|C2|C1|C0|ES|SF|PE|UE|OE|ZE|DE|IE| 58 | +-----------------------------------------+ 59 | ``` 60 | These values are cleared using `fclex` or `fnclex` (Clear Exceptions) 61 | instructions. 62 | The values cannot be used directly but instead one has to copied to memory 63 | or to register `ax` using `fstsw` or `fnstsw` (Store FPU status word) 64 | 65 | ### Tag register 66 | This register describes the content of the data on the stack registers. 67 | ``` 68 | Tag register 69 | 15 0 70 | +-----------------------------------------+ 71 | | | 72 | +-----------------------------------------+ 73 | ``` 74 | 75 | 76 | ### Defining a float 77 | ```assembly 78 | .data 79 | radius: .float 3.14 80 | m: .float 2.2 81 | ``` 82 | A float is a 64 bit value. A short is a 32 bit value. 83 | 84 | ### Data transfer 85 | These instruction deal with pushing and poping values to/from the stack 86 | These instructions are named differently depending on the data being 87 | pushed/poped (floating-point, signed integer, or packed BCD). 88 | 89 | `fld` (floating point unit load) pushed a floating point value onto the register 90 | stack. The operand can be a memory location but it can also be st(0) (the value 91 | on the top of the stack) 92 | 93 | `fild` (float point unit Integer Load) reads a signed integer from memory and 94 | converts it to a double extended precision value and then pushes that value onto 95 | the register stack. 96 | 97 | `fst` (floating point unit store) copies st(0) to st(i) or a memory location. 98 | `fstp` (floating point unit store and pop) copies st(0) to st(i) or a memory 99 | location and also pops the stack (removes the entry or perhaps just adjusts 100 | the stack pointer to the slot before it). 101 | 102 | `fist` floating point unit convert to Integer and store the result in a memory 103 | location. 104 | `fistp` floating point unit convert to Integer and store the result in a memory 105 | location and pops the stack. 106 | `fisttp` converts the value in st(0) to an integer using truncation, and saves 107 | the result in the specified memory location and then pops the stack. This 108 | instruction is only available on processors that support SSE3. 109 | 110 | `fxch` exchanges the content of register st(0) and st(i). 111 | 112 | `fcmovcc` conditionally copies the content of st(i) to st(0) if the condition 113 | is true. 114 | 115 | ### Push a float onto the FPU stack 116 | ```assembly 117 | fld radius 118 | ``` 119 | We can inspect this by using: 120 | ```console 121 | (lldb) expr -f b -- radius 122 | (void *) $2 = 0b0000000000000000000000000000000001000000010010001111010111000011 123 | (lldb) register read st0 124 | ``` 125 | 126 | ### Copy value st(0) 127 | Floating point store and then pop (the FPU stack that is): 128 | ```console 129 | fstp result 130 | ``` 131 | 132 | ### Multiplication 133 | ```assembly 134 | fld radius 135 | fld m 136 | fmulp 137 | 138 | (lldb) expr -f f -- result 139 | (void *) $2 = 5.3733119381271126E-315 140 | ``` 141 | -------------------------------------------------------------------------------- /linux/notes/sse.md: -------------------------------------------------------------------------------- 1 | ### Streaming SIMD Extension (SSE) 2 | This makes it possible to perform arithmetic operations on four pairs of 32-bit 3 | floating point numbers at a time. This is done with 16 128 bit registers: 4 | ``` 5 | XMM0...XMM15 6 | 7 | 127 0 8 | +--------------------+ 9 | | XMM0 | 10 | +--------------------+ 11 | ... 12 | ``` 13 | So each register can hold 128 bits which means we can store one 128 bit value, 14 | or 2 64-bit values, or 4 32-bit values, or 8 16-bit values, or 16 8 bit values. 15 | 16 | 17 | SSE was introduces in 1998, SSE2 in 1999, and SSE3 in 2004. 18 | 19 | Advanced Vector Extension (AVX) was introduced in 2011 and expanded the 20 | 16 registers to 256 bits and then names for them are 21 | ``` 22 | XMM0...XMM15 128-bits 23 | YMM0...YMM15 256-bits 24 | 25 | 255 127 0 26 | +------------------------------------------+ 27 | | YMM0 | XMM0 | 28 | +------------------------------------------+ 29 | ... 30 | ``` 31 | Now, these registers can store 8 32-bit floating point values, or 4 64-bit 32 | values. 33 | In 2013 Intel released AVX-2 which also allowed the values to be integers in 34 | addition to floating point values. 35 | 36 | AVX-512 increased the number of registers to 32 and also increased the size of 37 | the registers to 512. 38 | ``` 39 | XMM0...XMM15 128-bits 40 | YMM0...YMM15 256-bits 41 | ZMM1...ZMM31 512-bits 42 | 43 | 512 255 127 0 44 | +------------------------------------------+----------------------+ 45 | | ZMM1 | YMM0 | XMM0 | 46 | +------------------------------------------+----------------------+ 47 | ... 48 | ``` 49 | 50 | ### Scalar operations 51 | Only operate on the least significant data elements. So if we have a 64-bit 52 | floating point value a scalar operation only operates on the bits 0-31, and 53 | for a 128 bit value only on the bits 0-64. 54 | 55 | ### Packed operations 56 | The operate on the whole register bits in parallel.. 57 | 58 | 59 | ### Single Instruction Multiple Data (SIMD) 60 | Is what is says on the box, a single instructions (add, sub, mul, div, shift, 61 | compares, datals 62 | ) on 63 | multiple data elements during the same instruction. So one instruction can 64 | replace multiple. 65 | 66 | ### movaps 67 | This instruction will move an aligned packed single precision value into an 68 | xmm register. 69 | 70 | So lets start very simple and see how we can place data in a register. 71 | ```assembly 72 | .data 73 | v1: .float 1.0, 2.0, 3.0, 4.0 74 | ... 75 | movaps v1, %xmm0 76 | ``` 77 | 78 | ```console 79 | (lldb) expr -f float32 -- $xmm0 80 | (unsigned char __attribute__((ext_vector_type(16)))) $2 = (1, 2, 3, 4) 81 | ``` 82 | So that looks like what we expect. We have declared v1 to be the address where 83 | there will be four floats which each are 4 bytes. 84 | Just printing the register `xmm0` will give us: 85 | ```console 86 | (lldb) register read xmm0 87 | xmm0 = {0x00 0x00 0x80 0x3f 0x00 0x00 0x00 0x40 0x00 0x00 0x40 0x40 0x00 0x00 0x80 0x40} 88 | ``` 89 | Notice that this showing the data in this register as 16 bytes (8x16=128), but 90 | we specified that our data is a float which is 4 bytes each. 128/4=32. 91 | Next we can add these two vector together: 92 | ```assembly 93 | addps %xmm1, %xmm0 94 | ``` 95 | This is using add packad (the p) single precision (the s which is 32 bits). This 96 | is a destructive operation and add the values in source xmm1 with the values 97 | in destination xmm0 and then overrite the values in the destination register. 98 | We can inspect the values in xmm0 after this operation: 99 | ```console 100 | (lldb) expr -f float32 -- $xmm0 101 | (unsigned char __attribute__((ext_vector_type(16)))) $15 = (2, 4, 6, 8) 102 | ``` 103 | 104 | ### movapd 105 | This will move an aligned packed double precision value into a xmm register. 106 | ```assembly 107 | movapd v3, %xmm0 # move aligned packed double precision 108 | ``` 109 | ```console 110 | (lldb) expr -f float64 -- $xmm0 111 | (unsigned char __attribute__((ext_vector_type(16)))) $0 = (1, 2) 112 | ``` 113 | 114 | 115 | ### xor xmm register 116 | To clear/set to zero a register we can use `xorps` for packed single precsion: 117 | ```assembly 118 | xorps %xmm0, %xmm0 # xor packed single precision 119 | ``` 120 | 121 | ### Integers 122 | We can also work with integers instead of floating point values. Remember that 123 | we have xmm registers that are 128 bits long. How we divide/interpret/pack 124 | that data is up to us. We could have two .quad (8 bytes, 64 bits), 4 .int/.long 125 | (4 bytes, 32 bits), or 8 .word/.short (2 bytes, 16 bits), or 16 .byte (1 byte, 126 | 8 bits). 127 | 128 | For example we can have a packed 128 bit value with 4 32 bit ints: 129 | ```assembly 130 | i1: .int 1, 2, 3, 4 131 | 132 | movaps i1, %xmm0 # mov aligned packed single precision (32 bit values). 133 | ``` 134 | Remember that we have to specify the correct instrution for the type of the 135 | data that we are moving. 136 | ```console 137 | (lldb) expr -f uint32_t -- $xmm0 138 | (unsigned char __attribute__((ext_vector_type(16)))) $2 = (1, 2, 3, 4) 139 | ``` 140 | Add to add two of these vectors we use `addps`: 141 | ```assembly 142 | addps i2, %xmm0 # add packed double precision src, dest (result in dest) 143 | ``` 144 | 145 | We can also use the space to add 16 bytes together: 146 | ```assembly 147 | i3: .byte 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 148 | 149 | xorps %xmm0, %xmm0 # xor packed single precision 150 | movapd i3, %xmm0 151 | paddb i3, %xmm0 152 | ``` 153 | ```console 154 | (lldb) expr -f uint8_t -- $xmm0 155 | (unsigned char __attribute__((ext_vector_type(16)))) $12 = (0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30) 156 | ``` 157 | -------------------------------------------------------------------------------- /gas/README.md: -------------------------------------------------------------------------------- 1 | ## GNU Assembler examples 2 | 3 | ### Building 4 | 5 | make 6 | 7 | All binaries will be placed in the `out` directory. 8 | 9 | 10 | ### arch type 11 | When using `as` you can specify the architecture type using `-arch` 12 | 13 | man 3 arch 14 | 15 | If no target architecture is specified, it defaults to the architecture of the host it is running on. 16 | 17 | ### Segments 18 | In Mach-O sections are segments that contains sections, for example: 19 | 20 | .section __TEXT, __text 21 | 22 | `__TEXT` is the segment and `__text` the section. 23 | 24 | 25 | ### System calls 26 | This is done using the `syscall` instruction. 27 | 28 | /usr/include/sys/syscall.h 29 | 30 | 31 | ### Pointers 32 | When using parenthises around a register, for example (%eax) means to 33 | dereference the pointer in eax and use it. 34 | To get the address of a label you can use `$` before the label. If you are on a 35 | 64 bit machine you may have to load the effective address instead 36 | `(leaq label(%rip), %rdi)` if direct addressing is not supported. 37 | 38 | ### learning_i386 39 | Just a hello world example that prints something to standard out. 40 | 41 | ### 64Bit.s 42 | This is a example of using system calls in a x86_64 arch. There are a few 43 | interesting/different things that I ran into when trying to get this working. 44 | The first was the way a system call is specified. 45 | 46 | You can find the system calls [syscalls.master](inhttp://www.opensource.apple.com/source/xnu/xnu-1504.3.12/bsd/kern/syscalls.master): 47 | ``` 48 | ... 49 | 4 AUE_NULL ALL { user_ssize_t write(int fd, user_addr_t cbuf, user_size_t nbyte); } 50 | ``` 51 | We can see that `write` has the value `4`, but when we specify this in 52 | [64bit.s](./64bit.s) we use: 53 | ``` 54 | movq $0x2000004, %rax 55 | ``` 56 | Why `x2000004``` instead of simply `4`? 57 | The reason for this can be found in [syscall_sw.h](http://www.opensource.apple.com/source/xnu/xnu-792.13.8/osfmk/mach/i386/syscall_sw.h). 58 | This is not a public header so it will probably not be available on your system. 59 | 60 | In XNU, the POSIX system calls make up only of four system call classes (SYSCALL_CLASS): 61 | 62 | 1. UNIX (1) 63 | 2. MACH (2) 64 | 3. MDEP (3) 65 | 4. DIAG (4) 66 | 67 | In 64-bit, all call types are positive, but the most significant byte contains 68 | the value of SYSCALL_CLASS from the preceding table. 69 | The value is checked by shifting the system call number 70 | 71 | SYSCALL_CLASS_SHIFT (=24) bits. 72 | 2 << 24 = 2000000 hex 73 | 74 | 75 | The next thing that I did not understand was this line: 76 | 77 | movq msg@GOTPCREL(%rip), %rsi # string to print. rsi is used for the second argument to functions in x86_64 78 | 79 | ### Relocations 80 | If you have multiple object files and want to link them together one options is to add the code sections together, then the 81 | data sections etc. But if you have a function at address 0 in both object files which one will get invoked? It would depend on 82 | which was linked first as the other would have it's address shifted. 83 | 84 | * When you load/store data you need to know the location. 85 | * When you branch/jump you need to be able to specify the location to branch/jump to. 86 | 87 | Lets take a look at the following c program: 88 | 89 | extern int something; 90 | 91 | int function(void) { 92 | return something; 93 | } 94 | 95 | Next, we can compile and then display the relocation section: 96 | 97 | $ clang -c rel.c 98 | $ otool -r rel.o 99 | RELOCATION RECORDS FOR [__text]: 100 | 0000000000000007 X86_64_RELOC_GOT_LOAD _something@GOTPCREL 101 | 102 | RELOCATION RECORDS FOR [__compact_unwind]: 103 | 0000000000000000 X86_64_RELOC_UNSIGNED __text 104 | 105 | You can find information about [X86_64_RELOC_GOT_LOAD](https://opensource.apple.com/source/xnu/xnu-1699.22.73/EXTERNAL_HEADERS/mach-o/x86_64/reloc.h.auto.html). 106 | During compilation _something is not known to the compiler so a relocation entry is left for the linker to resolve. The entry is specified as addredss `0000000000000007' 107 | 108 | $ objdump -disassemble rel.o 109 | 110 | rel.o:file format Mach-O 64-bit x86-64 111 | 112 | Disassembly of section __TEXT,__text: 113 | _function: 114 | 0: 55 pushq%rbp 115 | 1: 48 89 e5 movq%rsp, %rbp 116 | 4: 48 8b 05 00 00 00 00 movq(%rip), %rax 117 | b: 8b 00 movl(%rax), %eax 118 | d: 5d popq%rbp 119 | e: c3 retq 120 | 121 | If we look at address 7: 122 | 123 | 4: 48 8b 05 00 00 00 00 movq(%rip), %rax 124 | /\ 125 | 126 | We can see that at address 7 there are four bytes that will be filled by the linker with the correct address. 127 | 128 | 129 | GOTPCEL is short for Global Offset Table and Procedure Linkage Table (I think). From what I understand this has to do with relocations. So lets look what relocation information can be found in the mach object file: 130 | 131 | $ otool -r out/64bit.o 132 | $ otool -r out/cli.o 133 | RELOCATION RECORDS FOR [__text]: 134 | 000000000000000f X86_64_RELOC_BRANCH _printf 135 | 000000000000000a X86_64_RELOC_GOT_LOAD argc@GOTPCREL 136 | 137 | RELOCATION RECORDS FOR [__debug_info]: 138 | 000000000000008b X86_64_RELOC_UNSIGNED __text 139 | 0000000000000018 X86_64_RELOC_UNSIGNED __text 140 | 0000000000000010 X86_64_RELOC_UNSIGNED __text 141 | 142 | RELOCATION RECORDS FOR [__debug_aranges]: 143 | 0000000000000010 X86_64_RELOC_UNSIGNED __text 144 | 145 | RELOCATION RECORDS FOR [__debug_line]: 146 | 0000000000000029 X86_64_RELOC_UNSIGNED __text 147 | 148 | Relocation is the process of connecting symbolic referenses to symbolic definitions. For the _text segement we can find the following: 149 | 150 | 000000000000000a X86_64_RELOC_GOT_LOAD argc@GOTPCREL 151 | 152 | This maps to [cli.s](./cli.s): 153 | 154 | movq argc@GOTPCREL(%rip), %rdi 155 | 156 | Recall that RIP is the instruction pointer 157 | 158 | ### Instruction Pointer Relative addressing (RIP) 159 | RIP addressing is a mode where address references are provided as a 32-bit displacements from the current instruction pointer (RIP register value). 160 | One of the advantages of RIP is that is makes it easier to generate Position Independant Code, which is code that is not dependent upon where the 161 | code is loaded. This is important for shared objects as they don't know where they will be loaded. 162 | In x64, references to code and data are done using instruction pointer relative (RIP) addressing modes. 163 | 164 | ### Position Independant Code (PIC) 165 | When the linker creates a shared library it does not know where in the process's address space it might be loaded. This causes a problem for code and data references which need to point to the correct memory locations. 166 | 167 | My view of this is that when the linker takes multiple object files and merges the sections, like .text, .data etc, merge might not be a good 168 | description but rather adds them sequentially to the resulting object file. If the source files refer to absolut 169 | locations in it's .data section these might not be in the same place after linking into the resulting object file. 170 | Solving this problem can be done using position independant code (PIC) or load-time relocation. 171 | 172 | There is an offset between the text and data sections. The linker combines all the text and data sections from all the object files and therefore knows the sizes of these sections. So the linker can rewrite the instructions using offsets and the sizes of the sections. 173 | 174 | But x86 requires absolute addressing does it not? 175 | You might come accross the following compilation error using as on Mac: 176 | 177 | 32-bit absolute addressing is not supported in 64-bit mode 178 | 179 | If we need a relative address (relative to the current instruction pointer which there is no operation for) a way to get this is to use the `CALL some_label` like this: 180 | 181 | call some_label 182 | some_label: 183 | pop eax 184 | 185 | `call` causes the address of the next instruction to be saved on the stack and then it will jump to some_label. `pop eax` pops the address into eax which is now the value of the instruction pointer. 186 | 187 | PIC are implemented using Global Offset Table (GOT) which is a table of addresses in the .data section. When an instruction referres to a variable it does not use an absolute address (would require relocation) but instead referrs to an entry in the GOT which is located at a well known place in the data section. The entry in the GOT referrs to an absolut address. 188 | So this is a sort of relocation but in the data section instead of in the code section which is what was done for load-time relocation. But doing this in the data section, which is not shared and is writable does not cause any issues. 189 | Also relocations in the code section have to be done per variable reference and not per variable as is the case when using a GOT. 190 | 191 | So that covers variables but for function calls a Procedure Linkage Table (PLT) is used. This is part of the text section. Instead of calling a function directly a call is made to an entry in the PLT which performs the actual call. This is sometimes called `trampoline` which I've seen on occasions when inspecting/dumping in lldb but did not know what it meant. This allows for lazy resolution of functions calls.Also every PLT entry as an entry in the GOT. 192 | 193 | 194 | Only position independent code is supposed to be included into shared objects (SO) as they should have an ability to dynamically change their 195 | location in RAM. 196 | 197 | ### Load-time relocation 198 | This process might take some time during loading which might be a performance hit depending on the type of program being written. 199 | Since the text section needs to be modified during loading (needs to do the actual relocations) it is not possible to have it shared by multiple processes. 200 | 201 | ### Instruction Pointer Relative addressing (RIP) 202 | References to code and data in x64 are done with instruction relative pointer addressing. So instructions can use references that are relative to the current instruction (or the next one) and don't require them to be absolute addresses. This works for offsets of up to 32bits but for programs that are larger than that this offset will not be enough. One could use absolute 64 bit addresses for everything but more instructions are required to perform simple operations and most programs will not require this. 203 | The solution is to introduce code models to cater for all needs. The compiler should be able to take an option where the programmer can say that this object file will not be linked into a large program. And also that this compilation unit will be included in a huge library and that 64-bit addressing should be used. 204 | 205 | In (64-bit mode), the encoding for the old 32-bit immediate offset addressing mode, is now a 32-bit offset 206 | from the current RIP, not from 0x00000000 like before. 207 | You only need to know how far away it is from the currently executing instruction (technically the next instruction) 208 | 209 | 210 | #### Assemble 64Bit.s 211 | 212 | as -g -arch x86_64 64bit.s -o 64bit.o 213 | 214 | #### Link 64Bit.s 215 | 216 | ld -e _start -macosx_version_min 10.8 -lSystem -arch x86_64 64bit.o -o 64bit 217 | 218 | ### Mach Object file (mach-o) 219 | 220 | otool -h 64bit.o 221 | 64bit.o: 222 | Mach header 223 | magic cputype cpusubtype caps filetype ncmds sizeofcmds flags 224 | 0xfeedfacf 16777223 3 0x00 1 3 656 0x00000000 225 | 226 | The magic number can be found in ```/usr/include/mach-o/loader.h```: 227 | 228 | #define MH_MAGIC_64 0xfeedfacf /* the 64-bit mach magic number */ 229 | 230 | The ```cputype``` can be located in ```/usr/include/mach/machine.h```: 231 | 232 | 233 | ### otool 234 | Dump sections: 235 | 236 | $ otool -s __TEXT __text jump.o 237 | $ otool -s __DATA __data jump.o 238 | 239 | 240 | ### Redzone 241 | Put simply, the red zone is an optimization. Code can assume that the 128 bytes below rsp will not be asynchronously clobbered 242 | by signals or interrupt handlers, and thus can use it for scratch data, without explicitly moving the stack pointer. The last 243 | sentence is where the optimization lays - decrementing rspand restoring it are two instructions that can be saved when using 244 | the red zone for data. 245 | 246 | However, keep in mind that the red zone will be clobbered by function calls, so it's usually most useful in leaf functions 247 | (functions that call no other functions) 248 | 249 | Preserving the base pointer 250 | The base pointer rbp (and its predecessor ebp on x86), being a stable "anchor" to the beginning of the stack frame throughout 251 | the execution of a function, is very convenient for manual assembly coding and for debugging [5]. However, some time ago it 252 | was noticed that compiler-generated code doesn't really need it (the compiler can easily keep track of offsets from rsp), 253 | and the DWARF debugging format provides means (CFI) to access stack frames without the base pointer. 254 | 255 | This is why some compilers started omitting the base pointer for aggressive optimizations, thus shortening the function prologue 256 | and epilogue, and providing an additional register for general-purpose use (which, recall, is quite useful on x86 with its 257 | limited set of GPRs). 258 | 259 | gcc keeps the base pointer by default on x86, but allows the optimization with the -fomit-frame-pointer compilation flag. 260 | How recommended it is to use this flag is a debated issue - you may do some googling if this interests you. 261 | 262 | Anyhow, one other "novelty" the AMD64 ABI introduced is making the base pointer explicitly optional, stating: 263 | 264 | The conventional use of %rbp as a frame pointer for the stack frame may be avoided by using %rsp (the stack pointer) to index into 265 | the stack frame. This technique saves two instructions in the prologue and epilogue and makes one additional general-purpose 266 | register (%rbp) available. 267 | gcc adheres to this recommendation and by default omits the frame pointer on x64, when compiling with optimizations. It gives an 268 | option to preserve it by providing the -fno-omit-frame-pointer flag. For clarity's sake, the stack frames showed above were 269 | produced without omitting the frame pointer. 270 | 271 | 272 | ### Setting register to zero 273 | My first thought would be using a mov instruction, like `movq $0, rax' for example. 274 | 275 | You might come a cross something like the following: 276 | 277 | xorl %eax, %eax 278 | 279 | Which simply a way of setting the register to zero. The xorl instruction uses fewer bytes than the mov. I found suggestions that it 280 | migth be more performat that using mov $0, %eax 281 | 282 | 100000f72:48 c7 c0 00 00 00 00 movq $0, %rax 283 | 284 | 100000f79:48 31 c0 xorq %rax, %rax 285 | 286 | Notice that the byte code for xorg are smaller than movq. 287 | Reducing instruction sizes will reduce instruction-cache misses, and therefore improve performance. 288 | 289 | ### System calls 290 | You can use dtruss to see what system call are being done: 291 | 292 | $ sudo dtruss `pwd`/malloc 293 | 294 | 295 | ### comp 296 | Compares two values and sets the EFLAGS register. It performs subtraction on the operands: 297 | 298 | cmp op1, op2 299 | 300 | op2 - op1. None of the operands are modified but the EFLAGS reg is set as if subtraction too 301 | place. 302 | 303 | 304 | ### Preprocessing assemler files 305 | Assembler files that start with a capital S indicate that the file needs to be 306 | preprocessed as it contains #include/#define which have to be processed. A lower 307 | case s assembly file is just normal assembly. 308 | 309 | So we have to preprocess such files with `cpp`: 310 | ```console 311 | $ cpp hello.S | as -o hello.o - 312 | $ ld -o hello hello.o 313 | $ ./hello 314 | Hello, world! 315 | ``` 316 | -------------------------------------------------------------------------------- /arm/README.md: -------------------------------------------------------------------------------- 1 | ### ARM Assembly 2 | ARM is a Reduced Instruction Set Computing (RISC) processor which is different 3 | from Intel which are Complex Instruction Set Computing (CISC) processors. 4 | Simpler instructions tend to consume less power and is a reason for ARM being 5 | used in smaller embedded devices. 6 | 7 | It has more general purpose registers than CISC processors and have around 100 8 | instructions. 9 | 10 | ARM uses a LOAD/STORE memory model for memory access so an operation will first 11 | have to load a value into a register, operate on that value, and then store it 12 | back to memory. 13 | 14 | ARM has two modes, ARM mode and Thumb mode. 15 | 16 | Before version 3 ARM processors were little-endian but after that the ARM 17 | processors have become BI-endian which allows the endienness to be toggled. 18 | 19 | ### ARM mode 20 | This is the traditional instructions set where instructions are 32-bits long. 21 | 22 | ### Thumb mode 23 | This mode supports higher code density where instructions can be either 16-bits 24 | and some are still 32-bits long. 25 | 26 | ### Thumb2 mode 27 | My understanding of this is that you have to choose if you use ARM mode or 28 | Thumb mode when writing code. Thumb2 adds more instructions to Thumb mode so 29 | that it is almost on par with ARM mode, but also adds a new assembly syntax 30 | to allow for writing code in a unified way and then deciding on the mode at 31 | assemble time. The is called Unified Assembly Language (UAL). 32 | 33 | ### ARM versions 34 | ``` 35 | ARM Family ARM architecture 36 | -------------------------------------------------------- 37 | ARM7 ARM v4 38 | ARM9 ARM v5 39 | ARM11 ARM v6 40 | Cortex-A ARM v7-A (A=Application) 41 | Cortex-R ARM v7-R (R=Realtime) 42 | Cortex-M ARM v7-M (M=Microcontroller) 43 | ``` 44 | 45 | ### ARMv8 46 | Introduced AArch64, which is a new instruction set (64 bit support). 47 | 48 | ### Container for assembly development 49 | ```console 50 | $ docker build -t arm-assembly . 51 | ``` 52 | 53 | ### Run the container 54 | ```console 55 | $ docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -ti -v${PWD}/src:/src:Z -w="/src" arm-assembly sh 56 | ``` 57 | 58 | ### Compiling and linking 59 | ```console 60 | /src # as first.s -o first.o 61 | /src # ld -o first first.o 62 | /src # ./first 63 | Hello, ARM64! 64 | ``` 65 | 66 | ### objdump (arch64-linux-gnu) 67 | ```console 68 | $ aarch64-linux-gnu-objdump -s -d first 69 | 70 | first: file format elf64-littleaarch64 71 | 72 | Contents of section .text: 73 | 4000b0 200080d2 e1000058 02010058 080880d2 ......X...X.... 74 | 4000c0 010000d4 000080d2 a80b8052 010000d4 ...........R.... 75 | 4000d0 e0004100 00000000 06000000 00000000 ..A............. 76 | Contents of section .data: 77 | 4100e0 42616a6a 610a Bajja. 78 | 79 | Disassembly of section .text: 80 | 81 | 00000000004000b0 <_start>: 82 | 4000b0: d2800020 mov x0, #0x1 // #1 83 | 4000b4: 580000e1 ldr x1, 4000d0 <_start+0x20> 84 | 4000b8: 58000102 ldr x2, 4000d8 <_start+0x28> 85 | 4000bc: d2800808 mov x8, #0x40 // #64 86 | 4000c0: d4000001 svc #0x0 87 | 4000c4: d2800000 mov x0, #0x0 // #0 88 | 4000c8: 52800ba8 mov w8, #0x5d // #93 89 | 4000cc: d4000001 svc #0x0 90 | 4000d0: 004100e0 .word 0x004100e0 91 | 4000d4: 00000000 .word 0x00000000 92 | 4000d8: 00000006 .word 0x00000006 93 | 4000dc: 00000000 .word 0x00000000 94 | ``` 95 | Notice the `=mesg` here: 96 | ```assembly 97 | ldr x1, =msg /* buf */ 98 | ``` 99 | The `=` sign in this case indicates an LDR pseudo instruction. msg is defined 100 | in first.s as: 101 | ```assembler 102 | msg: 103 | .ascii "Bajja\n" 104 | ``` 105 | And `msg` is a label which is an address so it would be 64 bits when using a 106 | 64-bit processor. So without the `=` sign the compiler would see a value that 107 | is not a 16-bit immediate value trying to be loaded into x1. But with the `=` 108 | sign the compiler will change this instruction to : 109 | ``` 110 | 4000b4: 580000e1 ldr x1, 4000d0 <_start+0x20> 111 | ``` 112 | The max size of an immediate value is 16-bits, and that becomes an issue when 113 | we need use 64 bit addresses and move them into registers. The register can 114 | handle 64 bits but not the opcode parameter. But what it can do is use a value 115 | relative to the current instruction pointer and this is what is happening here. 116 | We are telling the processor to use the value at 4000d0 which contains a pointer 117 | to the data, in this case the string 'bajja'. 118 | Notice that this is in the .text segment following the code of the function. 119 | 120 | ### Instructions 121 | Are 32 bits in size (for both 32 and 64 bit processors). 122 | 123 | ### Registers 124 | A64 provides 31 general purpose registers and each can be used as a 64-bit 125 | register in which case the name of the register starts with an `x`. So we have 126 | x0-x30 (can be upper or lower case) to use. 127 | These register can also be used as 32-bit register and the one uses `w` as the 128 | name of them. 129 | 130 | Note that the type of register used will impact the instruction in which the 131 | register is used. 132 | 133 | ``` 134 | x0-x7 Arguments to functions and return values. 135 | x8 For syscalls the number goes into this register. 136 | x9-x15 For local variable. 137 | x16-x18 Used for IPC and platform values. 138 | x19-x28 Callee saved 139 | x29 Frame register (like rbp I think) 140 | x30 Link Register (return address for function calls) 141 | SP/XZR The stack pointer for instruction dealing with the stack 142 | and zero register otherwise. 143 | PC The program counter. 144 | ``` 145 | 146 | 147 | ### Calling conventions 148 | ``` 149 | syscall nr return arg0 arg1 arg2 arg3 arg4 arg5 150 | arm r7 r0 r0 r1 r2 r3 r4 r5 151 | arm64 x8 x0 x0 x1 x2 x3 x4 x5 152 | ``` 153 | 154 | ### System calls 155 | See [64-bit table](https://chromium.googlesource.com/chromiumos/docs/+/master/constants/syscalls.md#tables) 156 | for system call numbers. 157 | 158 | The instruction for system calls, system interrupt is `svc` 159 | (supervisor call) which takes a system call number for the table above. 160 | The arguments the system call takes can also be see in the table above in the 161 | additional columns for each call. 162 | 163 | 164 | ### xzr register 165 | Is a register and it's value is always zero. 166 | ```console 167 | Disassembly of section .text: 168 | 169 | 0000000000400078 <_start>: 170 | 400078: d2800000 mov x0, #0x0 // #0 171 | 40007c: aa1f03e0 mov x0, xzr 172 | 400080: aa1f03e0 mov x0, xzr 173 | 400084: d2800ba8 mov x8, #0x5d // #93 174 | 400088: d4000001 svc #0x0 175 | ``` 176 | Note that `movz` will move the immediate and then zero out the rest of the bits 177 | are set to zero in the destination register. 178 | And without aliases we get: 179 | ```console 180 | $ aarch64-linux-gnu-objdump -d -M no-aliases xzr 181 | 182 | xzr: file format elf64-littleaarch64 183 | 184 | 185 | Disassembly of section .text: 186 | 187 | 0000000000400078 <_start>: 188 | 400078: d2800000 movz x0, #0x0 189 | 40007c: aa1f03e0 orr x0, xzr, xzr 190 | 400080: aa1f03e0 orr x0, xzr, xzr 191 | 400084: d2800ba8 movz x8, #0x5d 192 | 400088: d4000001 svc #0x0 193 | ``` 194 | 195 | ### svc (supervisor call) 196 | This is used for system interrupt, for example calling exit: 197 | ```assembly 198 | mov x0, #0 /* status */ 199 | mov x8, #93 /* exit syscall #93 */ 200 | svc #0 201 | ``` 202 | To me it makes sense that the exist status code is passed ion register x0, but 203 | it is not clear to me why the system call number, #93 above, is passed in 204 | register x8. This is just a calling convention and we can see the conventions 205 | in the syscall man page: 206 | ```console 207 | Arch/ABI Instruction System Ret Ret Error Notes 208 | call # val val2 209 | ─────────────────────────────────────────────────────────────────── 210 | alpha callsys v0 v0 a4 a3 1, 6 211 | arc trap0 r8 r0 - - 212 | arm/OABI swi NR - r0 - - 2 213 | arm/EABI swi 0x0 r7 r0 r1 - 214 | arm64 svc #0 w8 x0 x1 - 215 | blackfin excpt 0x0 P0 R0 - - 216 | i386 int $0x80 eax eax edx - 217 | ia64 break 0x100000 r15 r8 r9 r10 1, 6 218 | m68k trap #0 d0 d0 - - 219 | microblaze brki r14,8 r12 r3 - - 220 | mips syscall v0 v0 v1 a3 1, 6 221 | nios2 trap r2 r2 - r7 222 | parisc ble 0x100(%sr2, %r0) r20 r28 - - 223 | powerpc sc r0 r3 - r0 1 224 | powerpc64 sc r0 r3 - cr0.SO 1 225 | riscv ecall a7 a0 a1 - 226 | s390 svc 0 r1 r2 r3 - 3 227 | s390x svc 0 r1 r2 r3 - 3 228 | superh trap #0x17 r3 r0 r1 - 4, 6 229 | sparc/32 t 0x10 g1 o0 o1 psr/csr 1, 6 230 | sparc/64 t 0x6d g1 o0 o1 psr/csr 1, 6 231 | tile swint1 R10 R00 - R01 1 232 | x86-64 syscall rax rax rdx - 5 233 | x32 syscall rax rax rdx - 5 234 | xtensa syscall a2 a2 - - 235 | ``` 236 | 237 | 238 | The argument to `svc` is mandatory but is up to the handler how to use it (I 239 | think). 240 | 241 | 242 | ### mov 243 | ``` 244 | mov{S}{cond} Rd, Operand2 245 | mov{cond} Rd, #imm16 246 | ``` 247 | If `S` is specified then the conditional flags are updated as part of the 248 | operation. 249 | 250 | 251 | Apperently mov is not an arm instruction but an alias. So when we write 252 | ```assembly 253 | mov x0, #4 254 | mov x1, x0 255 | ``` 256 | The assembler will expand that to: 257 | ```asssembly 258 | 4000c4: d2800080 movz x0, #0x4 259 | 4000c8: aa0003e1 orr x1, xzr, x0 260 | ``` 261 | 262 | ### add 263 | A very basic [add](./src/add.s): 264 | ``` 265 | $ make add 266 | $ qemu-aarch64 add 267 | $ echo $? 268 | 5 269 | ``` 270 | 271 | ### load address (ldr) 272 | This is used to load the address, like leaq in x86_64. The `=` sign is used 273 | in this case: 274 | ```assembly 275 | ldr x1, =msg 276 | ``` 277 | The `=` sign in this case means to use the LDR pseudo instruction. 278 | 279 | The following example loads the value found in the memory location in r0 into 280 | ra: 281 | ```assembly 282 | ldr ra, [r0] 283 | ``` 284 | 285 | ### branch 286 | An example of conditional branching can be found in [branch](../src/branch.s). 287 | 288 | ### Store Register (STR) 289 | This command stores the contents of a register into memory: 290 | ```assembly 291 | str x0, [SP, #-16]! 292 | ``` 293 | Notice the `!` which is the for register write-back. So SP is used as the base 294 | register and 16 is subtracted from that, and SP is also updated with that value. 295 | 296 | So what this is doing is substracting 16 from SP and updating SP, then 297 | copying x0 into that location. 298 | 299 | 300 | ### QEMU 301 | Machine emulator. 302 | 303 | #### Setup 304 | ```console 305 | $ sudo cp fedora_aarch64.repo /etc/yum.repos.d/ 306 | $ dnf install aarch64-linux-gnu-{binutils,gcc,glibc} 307 | ``` 308 | 309 | #### Compiling and linking 310 | ```console 311 | $ aarch64-linux-gnu-as -o first.o src/first.s 312 | $ aarch64-linux-gnu-ld -o first first.o 313 | $ file first 314 | first: ELF 64-bit LSB executable, ARM aarch64, version 1 (SYSV), statically linked, not stripped 315 | ``` 316 | 317 | #### Run using QEMU 318 | ```console 319 | $ $ qemu-aarch64 first 320 | Hello, ARM64! 321 | ``` 322 | 323 | ### Microprocessor without Interlocked Pipelined Stages (MIPS) 324 | Is a RISC instruction set architecture (ISA). 325 | 326 | 327 | ### armasm 328 | Needs to be [downloaded](https://developer.arm.com/tools-and-software/embedded/arm-compiler/downloads/version-6) 329 | and installed. 330 | 331 | To start using Arm Compiler for Embedded 6.17: 332 | ```console 333 | - Create a suite sub-shell using /home/danielbevenius/ArmCompilerforEmbedded6.17/bin/suite_exec bash 334 | ``` 335 | 336 | 337 | #### Directives 338 | Just keep in mind that these are directives that the specific assembler uses 339 | and are not part of the instructions set. So we can choose use either armasm or 340 | or as (GNU assembler) to write our programs and they only understand their own 341 | directives. 342 | 343 | armasm and GNU as directives: 344 | ``` 345 | ASM GNU 346 | AREA .sect 347 | EQU .equ 348 | DCB .byte 349 | DCW .half 350 | DCD .word 351 | SPACE .space 352 | END .end 353 | RN .asg 354 | ``` 355 | EQU comes from equate directive. 356 | 357 | 358 | ### arm-none-eabi-as 359 | This can be used to cross compilation of arm assembly programs and allows for 360 | exploring 32 bit arm code. 361 | 362 | For example, there is a [space.s](./src/space.s) program that we can compile 363 | using: 364 | ```console 365 | $ make space 366 | arm-none-eabi-as -g -o space.o src/space.s 367 | arm-none-eabi-ld -g -o space space.o 368 | ``` 369 | This can then be run using: 370 | ```console 371 | $ qemu-arm ./space 372 | ``` 373 | That is not very helpful though as nothing will happen. What we can do instead 374 | is specify that the emulator should halt: 375 | ```console 376 | $ qemu-arm -singlestep -g 7777 space 377 | ``` 378 | `7777` is the port that we can then use to connector using gdb: 379 | ```console 380 | $ arm-none-eabi-gdb 381 | (gdb) file space 382 | (gdb) target remote localhost:7777 383 | Remote debugging using localhost:7777 384 | _start () at src/space.s:6 385 | 6 ldr r0, =A 386 | 387 | // stepping/inspecting... 388 | 389 | (gdb) disassemble 390 | Dump of assembler code for function _start: 391 | 0x00008000 <+0>: ldr r0, [pc, #8] ; 0x8010 <_start+16> 392 | 0x00008004 <+4>: mov r1, #2 393 | 0x00008008 <+8>: str r1, [r0] 394 | => 0x0000800c <+12>: b 0x8000 <_start> 395 | 0x00008010 <+16>: andeq r8, r1, r4, lsl r0 396 | End of assembler dump. 397 | 398 | (gdb) 399 | (gdb) x $r0 400 | 0x18014: 0x00000002 401 | 402 | (gdb) kill 403 | ``` 404 | 405 | ### ldr (arm) 406 | Takes a value in memory and writes it to a regiser: 407 | ``` 408 | LDR{size}{cond} , 409 | ``` 410 | Without a size specified the will be a 32-bit write. 411 | Size can also be `LDRB` for a 8-bits, `LDRH` for 16-bits (Halfword), `LDRSB` 412 | for signed byte, `LDRSH` signed halfword, and `LDM` for multiple words. 413 | The addressing modes can have a base register, and offset, and a shift 414 | operation: 415 | ```assembly 416 | dest base shift operation 417 | ↓ ↓ ↓ 418 | ldr r9, [r12, r8, LSR #2] 419 | ↑ ↑ 420 | offset immediate value 421 | ``` 422 | The shifted offset is added to the base, so r12 + r8 * 4 and this is called the 423 | effective address. So lets say the base address contains the address to a 424 | struct, r8 is a member of the struct which is an array, then we could index 425 | values in the array using the shift I think. 426 | 427 | ### str (arm) 428 | Takes a value from a register and stores i in memory. 429 | ```assembly 430 | 431 | str r0, [r1] 432 | 433 | r0: 0xaabbccdd r1: 0x00008000 ------> 0x0000800: 0xdd 434 | 0x0000801: 0xcc 435 | 0x0000802: 0xbb 436 | 0x0000803: 0xaa 437 | ``` 438 | We can also add an increment operand to the str instruction: 439 | ``` 440 | str r0, [r1], #4 441 | 442 | r0: 0xaabbccdd r1: 0x00008004 --+ 0x0000800: 0xdd 443 | | 0x0000801: 0xcc 444 | | 0x0000802: 0xbb 445 | | 0x0000803: 0xaa 446 | +---→ 0x0000804: 0x00 447 | ``` 448 | Notice that after the instruction completes r1 has now been incremented. 449 | 450 | ```assembly 451 | r1, [r0, #4]! 452 | ``` 453 | r1 will contain the value or r0+4, and r0 will be updated to contain r0+4. 454 | 455 | ```assembly 456 | r1, [r0], #4 457 | ``` 458 | r1 will contain the value or r0, and r0 will be updated to contain r0+4. 459 | 460 | ### pre-indexed addressing 461 | ``` 462 | ldr{size}{cond} , [, ] {!} 463 | {effecitve addr} 464 | 465 | ! = should the effective address be written back into Rn, without this Rn will 466 | be unchanged. 467 | ``` 468 | 469 | ### post-indexed addressing 470 | This is the same as we saw previously where after the str (or ldr) completes 471 | Rn is incremented. Notice that this only says incremented, and this differs 472 | from `!` where the effective address is written back into `Rn`. 473 | ``` 474 | str{size}{cond} , [], 475 | 476 | str r0, [r1], #4 477 | 478 | r0: 0xaabbccdd r1: 0x00008000 ------> 0x0000800: 0xdd 479 | 0x0000801: 0xcc 480 | 0x0000802: 0xbb 481 | 0x0000803: 0xaa 482 | ``` 483 | 484 | ### instruction encoding 485 | 486 | The instruction encoding for a 32-bit instruction looks like this: 487 | ``` 488 | 31 29 27 25 23 21 19 17 15 13 11 9 7 5 3 1 489 | +---------------------------------------------------------+ 490 | | Cond |0|0|I| opcode|S| Op1 | Dest | Operand 2 | 491 | +---------------------------------------------------------+ 492 | 30 28 26 24 22 20 18 16 14 12 10 8 6 4 2 0 493 | 494 | Data processing instruction: bit 26 and 27: 00 495 | opcode: the instruction, add, sub, mov, cmp etc. 496 | I: is the immediate bit. If this is 0 then Operand 2 is a register, and if this 497 | bit is 1 Operand 2 is an immediate value. 498 | Operand 2: 12-bits. 2¹²=4096, so we only have values in the range 0-4096 but 499 | ARM does not use this value as an 12-bit number! 500 | Instead what is does is that it uses an 8bit value with a 4-bits rotate value 501 | 2⁴ = 16. 502 | 503 | 11 9 7 5 3 1 504 | +-----------------------+ 505 | |Rotate| Immediate | 506 | +-----------------------+ 507 | 10 8 6 4 2 0 508 | 509 | 11-8 Rotate bits 510 | 7-0 Immediate bits 511 | ``` 512 | For example: 513 | ``` 514 | mov r0, #3, 2 515 | 516 | 3 = 0000 0000 0000 0000 0000 0000 0000 0011 517 | Rotate that binary 2 give us: 518 | 1100 0000 0000 0000 0000 0000 0000 0000 519 | which is -1073741824 decimal 520 | ``` 521 | And we can inspect the generated instruction using objdump: 522 | 523 | 800c: e3a00103 mov r0, #-1073741824 ; 0xc0000000 524 | ``` 525 | 526 | ### dot 527 | The dot “.” represents the current location in program memory. So: 528 | ```assembly 529 | b . 530 | ``` 531 | Tells the processor to jump to this very instruction, that is. execute it 532 | over and over in an endless loop. This is used often at the end of 533 | a microcontroller programs, as it prevents the processor from executing random 534 | data that is located in flash memory after the program. 535 | 536 | The following instruction is also an unconditional branch: 537 | ```assemlby 538 | b.n . 539 | ``` 540 | The `.n` suffix is telling the assembler to encode this instruction as 16 bits. 541 | 542 | 543 | ### STMIA/LDMIA 544 | Store and load multiple registers. 545 | Syntax: 546 | ```assembly 547 | ldmia Rn!, {reg_list} 548 | stmia Rn!, {reg_list} 549 | ``` 550 | Example: 551 | ```assembly 552 | 8000358: c302 stmia r3!, {r1} 553 | ``` 554 | 555 | ```console 556 | $ make ldmia 557 | $ qemu-arm ldmia 558 | $ qemu-arm -g 7777 ./ldmia 559 | ``` 560 | ```console 561 | $ arm-none-eabi-gdb ldmia 562 | GNU gdb (GNU Arm Embedded 563 | (gdb) br ldmia.s:15 564 | Breakpoint 1 at 0x8000: file src/ldmia.s, line 15. 565 | (gdb) target remote localhost:7777 566 | Remote debugging using localhost:7777 567 | _start () at src/ldmia.s:15 568 | (gdb) si 569 | (gdb) i r $r0 $r1 $r2 $r3 $r4 $r5 570 | r0 0x8000 32768 571 | r1 0x0 0 572 | r2 0x1 1 573 | r3 0x10 16 574 | r4 0x11 17 575 | r5 0x100 256 576 | ``` 577 | So notice that we have loaded all of the registers, r1, r2, r3 r4, and r5 with 578 | the values of the `array`. So with a single instruction we have loaded 579 | `multiple` registers. 580 | 581 | ### bic (bit clear) 582 | ```console 583 | $ make bic 584 | $ qemu-arm -g 7777 ./bic 585 | ``` 586 | ```console 587 | $ arm-none-eabi-gdb bic 588 | (gdb) target remote localhost:7777 589 | Remote debugging using localhost:7777 590 | (gdb) si 591 | (gdb) p/t $r0 592 | $1 = 1111 593 | (gdb) f 594 | #0 _start () at src/bic.s:7 595 | 7 bic r1, r0, #4 596 | (gdb) p/t $r1 597 | $3 = 1011 598 | ``` 599 | Notice how this instruction can be used to mask out single bits. If we wanted 600 | to make out the topmost bits we could us 1100 (12 decimal). 601 | 602 | 603 | ### Unsigned Extend type (uxtx) 604 | Where type can be `b` for byte, `h` for halfword, `w` for word`. 605 | So lets say we have a 4 bit binary value 1010 and want to extend this to 606 | 8 bits this would just be 0000 1010. 607 | 608 | Example: [uxtb.s](./src/uxtb.s] 609 | 610 | ```console 611 | (gdb) p/x $r0 612 | $2 = 0xffffff65 613 | 614 | (gdb) si 615 | (gdb) p/t $r1 616 | $4 = 1100101 617 | 618 | (gdb) si 619 | (gdb) p/x $r1 620 | $6 = 0x65 621 | (gdb) p/x $r1 622 | $2 = 0xff65 623 | ``` 624 | Notice that this allows us to copy/cut/mask a portion of register. This would 625 | be useful when reading from a register. 626 | 627 | ### Current Program Status Register 628 | Example: [psr.s](./src/psr.s) 629 | 630 | ```console 631 | (gdb) x/tw $cpsr 632 | 0x10: 00000000001010000000000000000010 633 | ``` 634 | Bit 31, is the negative condition flag and we can see that it is currently not 635 | set. 636 | Bit 30, is the zero condition flag and we can see that it is currently not set. 637 | ```console 638 | (gdb) p/t $cpsr 639 | $3 = 10000000000000000000000000010000 640 | (gdb) p/t ($cpsr & (1 << 31)) != 0 641 | $13 = 1 642 | ``` 643 | 644 | ### Wait For Interrupt (wfi) 645 | Allows the core to enter low power mode and stop executing code. 646 | ```assembly 647 | wfi 648 | ``` 649 | 650 | ### Wait For Event (wfe) 651 | Allows the core to enter low power mode and stop executing code. 652 | ```assembly 653 | wfe 654 | ``` 655 | 656 | ### Signal Event (SEV) 657 | Causes an event to be signaled to all cores. So I think that if there there are 658 | multiple cores one core could issue a wfe and then calling SEV would wake it 659 | up. 660 | -------------------------------------------------------------------------------- /linux/README.md: -------------------------------------------------------------------------------- 1 | # Linux assembly language exploration 2 | 3 | The examples here are sometimes not really useful except for inspecting 4 | object code and understanding how things get linked. 5 | 6 | ## Variables in data section 7 | 8 | Take `first.s` as an example and look at the variable that is defined in the 9 | .data section: 10 | 11 | ``` 12 | .data 13 | something: 14 | .byte 2 15 | ``` 16 | 17 | If we use objdump to inspect the data section we find: 18 | 19 | ```console 20 | $ objdump -d -j .data first 21 | 22 | first: file format elf64-x86-64 23 | 24 | 25 | Disassembly of section .data: 26 | 27 | 0000000000402000 : 28 | 402000: 02 00 .byte 0x2 29 | ... 30 | ``` 31 | 32 | ## Check if zero/null 33 | [check_zero.s](./check_zero.s) contains an example of checking a register if 34 | it is zero by using the `test` opcode: 35 | ``` 36 | test %rcx, %rcx 37 | je zero_func 38 | ``` 39 | So I'm thinking that this would be similar to a checking that there is a value 40 | or not. 41 | 42 | 43 | ## Load Effective Address 44 | 45 | Take the following instructions: 46 | ``` 47 | .data 48 | msg: 49 | .ascii "bajja\n" 50 | ... 51 | 52 | mov msg, %rsi 53 | lea msg, %rsi 54 | ``` 55 | 56 | Now, if we take a look at `msg` it contains: 57 | ```console 58 | (lldb) expr msg 59 | (void *) $5 = 0x00000a616a6a6162 60 | ``` 61 | 62 | This looked a little strange to me at first, but this is actually the value 63 | contained in the memory location: 64 | 65 | ```console 66 | (lldb) memory read -f x -s 8 -c 1 0x0000000000402000 67 | 0x00402000: 0x00000a616a6a6162 68 | 69 | (lldb) memory read -c 5 0x0000000000402000 70 | 0x00402000: 62 61 6a 6a 61 bajja 71 | ``` 72 | And remember memory is read using little endian `00000a616a6a6162` which 73 | would then become `00000a62616a6a61`. 74 | 75 | Using mov with `msg` will only copy the value `00000a616a6a6162` into a register 76 | for example. But to pass the msg to a function like write we would need to 77 | pass a pointer. For this we use the command lea which is like `&` in c/c++ 78 | to get the address: 79 | ``` 80 | (lldb) expr (char*)&msg 81 | (char *) $26 = 0x0000000000402000 "bajja\n" 82 | ``` 83 | 84 | ### .set directive 85 | Can be used to set a memory location to a value. 86 | ``` 87 | done: .ascii "done...\n" 88 | .set done_len, . - done 89 | ``` 90 | If we take a look at the binary we will find: 91 | ``` 92 | 40102e: 48 c7 c2 08 00 00 00 mov $0x8,%rdx 93 | ``` 94 | So this works sort of like a `#define` in C/C++ which would be replaced by the 95 | preprocessor (not that there is one when using as). There won't be any symbol 96 | for msg_len. 97 | 98 | This can also be written as: 99 | ``` 100 | msg_len = . - msg 101 | ``` 102 | 103 | ### GAS section directive 104 | This directive has the following format: 105 | ``` 106 | .section name [, "flags"[, @type[, flag_specific_args]]] 107 | ``` 108 | 109 | ### syscall 110 | syscall is used to make an indirect system call and has the following signature: 111 | ``` 112 | long syscall(long number, ...); 113 | ``` 114 | And example is when calling exit which has sys number 60: 115 | ``` 116 | mov $60, %rax 117 | xor %rdi, %rdi 118 | syscall 119 | ``` 120 | The system calls can be found using `man syscalls` and the actual numbers can 121 | be found in `/usr/include/asm/unistd_64.h` 122 | 123 | So the system call number is passed in rax, and the following arguments to the 124 | actual system call are passed in rdi, rsi, rdx, r10, r8, r9. And the result is 125 | stored in rax. 126 | 127 | ### execve 128 | This section will look closer at the execve system call and calling it from 129 | assembly code. 130 | 131 | ```c 132 | #include 133 | 134 | int execve(const char *pathname, char *const argv[], 135 | char *const envp[]); 136 | ``` 137 | 138 | System call nr is `59` which is the value that does into `%rax`. 139 | ```assembly 140 | mov $59, %rax 141 | ``` 142 | 143 | The first argument which is the file name is passed in `%rdi`. 144 | ```assembly 145 | lea msg, %rdi 146 | ``` 147 | 148 | The second argument which is argv is passed in `%rsi%`. Now this is an array 149 | of char pointers which we need to create. 150 | ```assembly 151 | ``` 152 | And the last argument which is envp is passed in `%rdx`. 153 | 154 | ### array on the stack 155 | So it was not obvious to me how to create an array on the stack in assembly and 156 | adding elements to it. 157 | ```c 158 | int main() { 159 | int array[2] = {1, 2}; 160 | int* ptr = array; 161 | } 162 | ``` 163 | 164 | ```console 165 | $ gcc -o arr arr.c -fomit-frame-pointer 166 | ``` 167 | 168 | ```console 169 | $ objdump --disassemble=main arr 170 | 171 | arr: file format elf64-x86-64 172 | 173 | 174 | Disassembly of section .init: 175 | 176 | Disassembly of section .text: 177 | 178 | 0000000000401106
: 179 | 401106: c7 44 24 f0 01 00 00 movl $0x1,-0x10(%rsp) 180 | 40110d: 00 181 | 40110e: c7 44 24 f4 02 00 00 movl $0x2,-0xc(%rsp) 182 | 401115: 00 183 | 401116: 48 8d 44 24 f0 lea -0x10(%rsp),%rax 184 | 40111b: 48 89 44 24 f8 mov %rax,-0x8(%rsp) 185 | 401120: b8 00 00 00 00 mov $0x0,%eax 186 | 401125: c3 retq 187 | 188 | Disassembly of section .fini: 189 | ``` 190 | So this is interesting, we are just placing the 1 on the stack relative to the 191 | stack pointer (now remember that the position is given in hex! I keep forgetting 192 | this when debugging): 193 | ```console 194 | (lldb) memory read -f x -c 1 -s 4 '$rsp - 16' 195 | 0x7fffffffd0e8: 0x00000001 196 | ``` 197 | So the whole array would be at: 198 | ```console 199 | (lldb) memory read -f x -c 2 -s 4 '$rsp - 16' 200 | 0x7fffffffd0e8: 0x00000001 0x00000000 201 | ``` 202 | And we can verify this by stepping over the next instruction using si 203 | ```console 204 | (lldb) si 205 | (lldb) memory read -f x -c 2 -s 4 '$rsp - 16' 206 | 0x7fffffffd0e8: 0x00000001 0x00000002 207 | ``` 208 | Now, we have the `ptr` local variable which is loading the effective address 209 | of `%rsp - 16` into rax, which is the address of the first entry of the array. 210 | Next, this value is stored in location `%rsp - 8`. 211 | ```console 212 | (lldb) register read rax 213 | rax = 0x0000000000401106 arr`main at arr.c:2:7 214 | (lldb) register read rax 215 | rax = 0x00007fffffffd0e8 216 | (lldb) memory read -f x -c 1 -s 4 '$rsp - 8' 217 | 0x7fffffffd0f0: 0x00000000 218 | (lldb) si 219 | (lldb) memory read -f x -c 1 -s 4 '$rsp - 8' 220 | 0x7fffffffd0f0: 0xffffd0e8 221 | ``` 222 | Now this might seem really trivial but it can be good to know how to actually 223 | create an array on the stack in assembly without having to first disassemble 224 | c code. 225 | 226 | ### Stack addressing 227 | I need to remind myself that the stack is just a part of memory, but handled 228 | in a different way. The stack is there in the allocated memory for the process 229 | and we can use memory locations below the stack pointer value (rsp). Remember 230 | that rsp just points to a memory location that happens to be what some 231 | instructions update when the are executed, for example, push/pop will subtract 232 | and add to value in rsp. But if we don't use those instructions we are free 233 | to just store values by using mov and placing values in specific locations 234 | relative to rsp (if rsp moves we would be in trouble which in those cases we 235 | would use a base pointer/frame pointer in rbp). 236 | 237 | We have to know what size of the data we are going to move so that move 238 | instruction will know. 239 | ``` 240 | movb 1 bytes (8 bits) 241 | movs single (32-bit floating point) 242 | movw word (16 bits) 243 | movl long (32-bit integer or 64-bit floating point) 244 | movq quad (64-bit) 245 | movt ten bytes (80-bit floating point) 246 | ``` 247 | Lets explore this a little using [arr.s](./arr.s): 248 | ```console 249 | (lldb) br s -n _start 250 | ``` 251 | Now, lets say we want to see the stack from the current rsp and 64 bytes 252 | down which is the stack where we can place values. 253 | To do this we have to remember that the stack grows downward, so we want to 254 | look at from the current rsp down 64 bytes which means subtracting 64 from rsp: 255 | ```console 256 | (lldb) memory read -f x -c 10 -s 8 '$rsp - 64' 257 | 0x7fffffffd190: 0x0000000000000000 0x0000000000000000 258 | 0x7fffffffd1a0: 0x0000000000000000 0x0000000000000000 259 | 0x7fffffffd1b0: 0x0000000000000000 0x0000000000000000 260 | 0x7fffffffd1c0: 0x0000000000000000 0x0000000000000000 261 | 262 | (lldb) memory read -f x -c 10 -s 8 '$rsp - 64' 263 | 0x7fffffffd190: 0x0000000000000000 0x0000000000000000 264 | 0x7fffffffd1a0: 0x0000000000000000 0x0000000000000000 265 | 0x7fffffffd1b0: 0x0000000000000000 0x0000000000000000 266 | 0x7fffffffd1c0: 0x0000000000000000 0x0000000000000000 267 | 0x7fffffffd1d0: 0x0000000000000001 0x00007fffffffd5a1 268 | ``` 269 | 270 | Now, depending on the data stored we might be interested in looking at bytes, 271 | words, etc. So we need to adjust the `size` `-s` and also the `count` `-c`. 272 | The size is the size of the memory granuality that we be displayed which makes 273 | it easier to see what belongs to which memory locations. 274 | ```console 275 | (lldb) memory read -f x -c 20 -s 4 '$rsp - 64' 276 | 0x7fffffffd190: 0x00000000 0x00000000 0x00000000 0x00000000 277 | 0x7fffffffd1a0: 0x00000000 0x00000000 0x00000000 0x00000000 278 | 0x7fffffffd1b0: 0x00000000 0x00000000 0x00000000 0x00000000 279 | 0x7fffffffd1c0: 0x00000000 0x00000000 0x00000002 0x00000000 280 | 0x7fffffffd1d0: 0x00000004 0x00000000 0xffffd5a1 0x00007fff 281 | ``` 282 | ``` 283 | Bytes: size: 1 count: 64/1 = 64 (add one for rsp) 284 | Word: size: 2 count: 64/2 = 32 (add one for rsp) 285 | Quad: size: 4 count: 64/4 = 16 (add one for rsp) 286 | 287 | ``` 288 | Byte example 289 | ```console 290 | (lldb) memory read -f x -c 65 -s 1 '$rsp - 64' 291 | 0x7fffffffd190: 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 292 | 0x7fffffffd198: 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 293 | 0x7fffffffd1a0: 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 294 | 0x7fffffffd1a8: 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 295 | 0x7fffffffd1b0: 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 296 | 0x7fffffffd1b8: 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 297 | 0x7fffffffd1c0: 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 298 | 0x7fffffffd1c8: 0x02 0x00 0x00 0x00 0x00 0x00 0x00 0x00 299 | 0x7fffffffd1d0: 0x04 300 | ``` 301 | 302 | 303 | Word example: 304 | ```console 305 | (lldb) memory read -f x -c 33 -s 2 '$rsp - 64' 306 | 0x7fffffffd190: 0x0000 0x0000 0x0000 0x0000 0x0000 0x0000 0x0000 0x0000 307 | 0x7fffffffd1a0: 0x0000 0x0000 0x0000 0x0000 0x0000 0x0000 0x0000 0x0000 308 | 0x7fffffffd1b0: 0x0000 0x0000 0x0000 0x0000 0x0000 0x0000 0x0000 0x0000 309 | 0x7fffffffd1c0: 0x0000 0x0000 0x0000 0x0000 0x0002 0x0000 0x0000 0x0000 310 | 0x7fffffffd1d0: 0x0004 311 | ``` 312 | 313 | Quad example: 314 | ```console 315 | (lldb) memory read -f x -c 17 -s 4 '$rsp - 64' 316 | 0x7fffffffd190: 0x00000000 0x00000000 0x00000000 0x00000000 317 | 0x7fffffffd1a0: 0x00000000 0x00000000 0x00000000 0x00000000 318 | 0x7fffffffd1b0: 0x00000000 0x00000000 0x00000000 0x00000000 319 | 0x7fffffffd1c0: 0x00000000 0x00000000 0x00000002 0x00000000 320 | 0x7fffffffd1d0: 0x00000004 321 | ``` 322 | 323 | Now, if we specify addressing relative to a register we use an number before 324 | the register and use parentheses: 325 | ``` 326 | movb $4, 0(%rsp) 327 | movb $2, -1(%rsp) 328 | movb $3, -2(%rsp) 329 | ``` 330 | Doing this this 331 | ```console 332 | (lldb) memory read -f x -c 4 -s 1 '$rsp - 3' 333 | 0x7fffffffd1cd: 0x00 0x03 0x02 0x01 334 | ``` 335 | 336 | And we can use the address from rsp to address the different values, just 337 | like an array of ints: 338 | ```console 339 | (lldb) register read rsp 340 | rsp = 0x00007fffffffd1d0 341 | (lldb) memory read -f x -c 1 -s 1 '0x00007fffffffd1d0 - 2' 342 | 0x7fffffffd1ce: 0x03 343 | (lldb) memory read -f x -c 1 -s 1 '0x00007fffffffd1d0 - 1' 344 | 0x7fffffffd1cf: 0x02 345 | (lldb) memory read -f x -c 1 -s 1 '0x00007fffffffd1d0 - 0' 346 | 0x7fffffffd1d0: 0x01 347 | ``` 348 | 349 | One thing to note is that when we run the example program this it will be invoked 350 | by `execve`: 351 | ```console 352 | $ strace ./arr 353 | execve("./arr", ["./arr"], 0x7fffde884fb0 /* 74 vars */) = 0 354 | exit(0) = ? 355 | +++ exited with 0 +++ 356 | ``` 357 | And when we break in the debugger we can inspect the existing stack: 358 | ```console 359 | (lldb) memory read -f x -c 8 -s 8 '$rsp' 360 | 0x7fffffffd1d0: 0x0000000000000001 0x00007fffffffd5a1 361 | 0x7fffffffd1e0: 0x0000000000000000 0x00007fffffffd5e0 362 | 0x7fffffffd1f0: 0x00007fffffffd5f4 0x00007fffffffd62a 363 | 0x7fffffffd200: 0x00007fffffffd641 0x00007fffffffd660 364 | (lldb) memory read -f s 0x00007fffffffd5a1 365 | 0x7fffffffd5a1: "/home/danielbevenius/work/assembly/learning-assembly/linux/arr" 366 | ``` 367 | The value in top most position on the stack is argc which above is 1. So really 368 | our program should not overwrite that value but instead substract the size of 369 | an int (32-bits/4-bytes) and then add our values. 370 | ```assembly 371 | sub $1, %rsp 372 | ``` 373 | Notice that the register we have specified is a 64 bit register 374 | 375 | ``` 376 | (lldb) register read rsp 377 | rsp = 0x00007fffffffd1b0 378 | (lldb) si 379 | (lldb) register read rsp 380 | rsp = 0x00007fffffffd1af 381 | ``` 382 | 140737488343472 140737488343471 383 | 384 | Try to add an alias for this which will take the size of the stack to show: 385 | ``` 386 | (lldb) command alias showstack memory read -f x -c 10 -s 8 `$rsp - 64` 387 | ``` 388 | 389 | ### mov label 390 | When you move something you need to think about how much you data you are 391 | moving. For example, take the following: 392 | ```assembly 393 | .data 394 | msg: .ascii "something\n" 395 | len: .int . - msg 396 | ... 397 | 398 | mov len, %rdx 399 | ``` 400 | This is moving 64 bits into rdx starting from the memory location len. In our 401 | case the first 32 bits of len contain our message length which is 10 (a in hex) 402 | and the rest is whatever follows in the data section. 403 | If we only want to move our int we can use: 404 | ```assembly 405 | mov len %edx 406 | ``` 407 | or 408 | ```assembly 409 | mov len %dl 410 | ``` 411 | 412 | ### mul 413 | Take the [example](./multi.s) and if we run this in the debugger we find: 414 | ```console 415 | $ lldb -- ./multi 416 | (lldb) target create "./multi" 417 | Current executable set to './multi' (x86_64). 418 | (lldb) br s -n _start 419 | (lldb) r 420 | (lldb) register read rax rbx 421 | rax = 0x0000000000000008 422 | rbx = 0x0000000000000004 423 | (lldb) si 424 | (lldb) register read rax rbx 425 | rax = 0x0000000000000020 426 | rbx = 0x0000000000000004 427 | (lldb) register read -f d rax rbx 428 | rax = 32 429 | rbx = 4 430 | ``` 431 | Notice that the result of the multiplication is placed in `rax`. 432 | 433 | ### div 434 | This operaton generates two output values, the quotient and a remainder. 435 | 436 | Take the [example](./div.s) and if we run this in the debugger we find: 437 | ```console 438 | 4 _start: 439 | 5 mov $21, %ax 440 | 6 mov $2, %bx 441 | -->7 div %bx 442 | (lldb) register read -f d al ah bx 443 | al = 21 444 | ah = 0 445 | bx = 2 446 | (lldb) si 447 | (lldb) register read -f d al ah dx 448 | al = 10 449 | ah = 0 450 | dx = 1 451 | ``` 452 | Notice the quotent is in `al` and the remainder is in `dx`. 453 | 454 | ### integer to string 455 | To do this in assembly we need to take a look at how this is done. 456 | We have an integer which is a number of digits. We divide take the remainder of 457 | dividing the number by 10 to get the right most digit, and then continue 458 | dividing until there are no more digits. 459 | 460 | ``` 461 | 234 % 10 = 4-------------+ 462 | 234 / 10 = 23 | 463 | | 464 | +------------↓↓ 465 | 23 % 10 = 3 234 466 | 23 / 10 = 2 ↑ 467 | | 468 | 2 % 10 = 2-----------+ 469 | 2 / 10 = 0 470 | ``` 471 | So when we get 0 as the quotient (the result of dividing the number by 10) we 472 | are finished, and if we take the remainders (shown as separate operations above 473 | but the `div` operation in assembly produces both), we have the number if the 474 | reverse order (432 instread of 234). Now we still want to print these and that 475 | will be done with ascii. In ascii the zero digit has the value 48: 476 | ```console 477 | $ man ascii 478 | Oct Dec Hex Char Oct Dec Hex Char 479 | ──────────────────────────────────────────────────────────────────────── 480 | 000 0 00 NUL '\0' (null character) 100 64 40 @ 481 | ... 482 | 012 10 0A LF '\n' (new line) 112 74 4A J 483 | ... 484 | 060 48 30 0 160 112 70 p 485 | 061 49 31 1 161 113 71 q 486 | 062 50 32 2 162 114 72 r 487 | 063 51 33 3 163 115 73 s 488 | 064 52 34 4 164 116 74 t 489 | 065 53 35 5 165 117 75 u 490 | 066 54 36 6 166 118 76 v 491 | 067 55 37 7 167 119 77 w 492 | 070 56 38 8 170 120 78 x 493 | 071 57 39 9 171 121 79 y 494 | ``` 495 | So if we take our digit `4` and add `48` we get `52`. And if we do that will 496 | all the digits and arrage them after each other in memory we can then pass 497 | a pointer to that memory to the write system call. 498 | 499 | 500 | ### executable stack 501 | The background here is that linux has allowed the stack to be executable in the 502 | past, for example for nested functions and trampoline code. I think this was 503 | before exploits were a real issue but nowadays this is something that is 504 | prevented as allowing the programs data or stack to be executable allows for 505 | code to be placed in these regions of memory and then jumped to which can be 506 | used in exploits. 507 | 508 | The solution is that compilers can add a section to the object file which the 509 | linker can then detect to make the data/stack non-executable. But if this 510 | section is missing the data/stack area will be executable. Remember that the 511 | assembler will create an object file which contains information for the linker, 512 | and the linker will create an object file that is used by the operating system 513 | to load it into memory. If I recall the permission for memory is set in the page 514 | table so this would have to some information in the object file created by the 515 | linker that causes the OS to make the stack be executable. 516 | 517 | When the ld does its linking linking, it will not add a specific program header 518 | if a single object file is not marked as non-executable. So an object file 519 | , for example an assembly source that is not marked as non-executable, 520 | would make the entire library/executable become marked as having an executable 521 | stack. 522 | 523 | For an executable file (or a shared object file) this information is in the 524 | program header, which is an array of structures that describes a segment that 525 | the system needs to handle to make a process for the program. The program 526 | header is only meaningful for executable and shared object files. 527 | This struct looks like this: 528 | ```c 529 | typedef struct { 530 | Elf64_Word p_type; 531 | Elf64_Word p_flags; 532 | Elf64_Off p_offset; 533 | Elf64_Addr p_vaddr; 534 | Elf64_Addr p_paddr; 535 | Elf64_Xword p_filesz; 536 | Elf64_Xword p_memsz; 537 | Elf64_Xword p_align; 538 | } Elf64_Phdr; 539 | ``` 540 | Notice the `p_flags` which can be: 541 | ``` 542 | PF_X 0x1 Execute 543 | PF_W 0x2 Write 544 | PF_R 0x4 Read 545 | PF_MASKPROC 0xf000000 Unspecified 546 | ``` 547 | `PT_GNU_STACK` is a p_flags member specifies the permissions on the segment 548 | containing the stack and is used to indicate wether the stack should be 549 | executable. `The absense of this header indicates that the stack will be 550 | executable`. This is not exactly true and the value of the p_flags entry might 551 | not be `PF_X` in which case the program header will still be there. 552 | For example: 553 | Using the example without the section in the source we compile and like this: 554 | ```console 555 | $ as -o exec-stack.o exec-stack.s 556 | $ ld -z execstack -o exec-stack exec-stack.o 557 | $ readelf -l exec-stack | grep -A1 GNU_STACK 558 | GNU_STACK 0x0000000000000000 0x0000000000000000 0x0000000000000000 559 | 0x0000000000000000 0x0000000000000000 RWE 0x10 560 | ``` 561 | Notice that we have the program header but the flags are `RWE`. 562 | ```console 563 | $ ld -z noexecstack -o exec-stack exec-stack.o 564 | $ readelf -l exec-stack | grep -A1 GNU_STACK 565 | GNU_STACK 0x0000000000000000 0x0000000000000000 0x0000000000000000 566 | 0x0000000000000000 0x0000000000000000 RW 0x10 567 | ``` 568 | 569 | When assembling with as (gnu assembler) this can be specified using: 570 | ``` 571 | --execstack or --noexecstack assembler options 572 | ``` 573 | Or you can add the section to the assembly source file: 574 | ```assembly 575 | ```assembly 576 | .section .note.GNU-stack,"",@progbits 577 | ``` 578 | Specifying `--noexecstack` would be the same as adding the section to the 579 | assembly source code. 580 | 581 | It is also possible to specify this flag to the linker: 582 | ```console 583 | $ as -o exec-stack.o exec-stack.s 584 | $ ld -z noexecstack -o exec-stack exec-stack.o 585 | ``` 586 | So if we have a source file without the .note.GNU-stack section this would add 587 | the section to the program header of the generated object file. 588 | 589 | The example [exec-stack.s](./exec-stack.s) will be used below and if we take 590 | a look at the program headers for it without the section `.note.GNU-stack` added 591 | we see: 592 | ```console 593 | $ readelf -w --program-headers exec-stack 594 | Elf file type is EXEC (Executable file) 595 | Entry point 0x401000 596 | There are 3 program headers, starting at offset 64 597 | 598 | Program Headers: 599 | Type Offset VirtAddr PhysAddr 600 | FileSiz MemSiz Flags Align 601 | LOAD 0x0000000000000000 0x0000000000400000 0x0000000000400000 602 | 0x00000000000000e8 0x00000000000000e8 R 0x1000 603 | LOAD 0x0000000000001000 0x0000000000401000 0x0000000000401000 604 | 0x0000000000000015 0x0000000000000015 R E 0x1000 605 | LOAD 0x0000000000002000 0x0000000000402000 0x0000000000402000 606 | 0x0000000000000004 0x0000000000000004 RW 0x1000 607 | ``` 608 | Notice that there are only three Program Headers! 609 | 610 | And if we add the section we can find: 611 | ```console 612 | $ readelf -w --program-headers exec-stack 613 | Elf file type is EXEC (Executable file) 614 | Entry point 0x401000 615 | There are 4 program headers, starting at offset 64 616 | 617 | Program Headers: 618 | Type Offset VirtAddr PhysAddr 619 | FileSiz MemSiz Flags Align 620 | LOAD 0x0000000000000000 0x0000000000400000 0x0000000000400000 621 | 0x0000000000000120 0x0000000000000120 R 0x1000 622 | LOAD 0x0000000000001000 0x0000000000401000 0x0000000000401000 623 | 0x0000000000000015 0x0000000000000015 R E 0x1000 624 | LOAD 0x0000000000002000 0x0000000000402000 0x0000000000402000 625 | 0x0000000000000004 0x0000000000000004 RW 0x1000 626 | GNU_STACK 0x0000000000000000 0x0000000000000000 0x0000000000000000 627 | 0x0000000000000000 0x0000000000000000 RW 0x10 628 | ``` 629 | So when have the secion there will be a program header named `GNU_STACK` and 630 | this executable will not be allowed to execute code in the data or stack 631 | section. 632 | 633 | This program header will be checked when the binary is loaded in 634 | [load_elf_binary](https://elixir.bootlin.com/linux/latest/source/fs/binfmt_elf.c#L929): 635 | ```c 636 | for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++) 637 | switch (elf_ppnt->p_type) { 638 | case PT_GNU_STACK: 639 | if (elf_ppnt->p_flags & PF_X) 640 | executable_stack = EXSTACK_ENABLE_X; 641 | else 642 | executable_stack = EXSTACK_DISABLE_X; 643 | break; 644 | 645 | ... 646 | 647 | if (elf_read_implies_exec(*elf_ex, executable_stack)) 648 | current->personality |= READ_IMPLIES_EXEC; 649 | ... 650 | ``` 651 | Notice that we only enter this if `PT_GNU_STACK` is present. So that should 652 | be enough to determine if an executable has a non-exeutable stack is that 653 | there is no PT_GNU_STACK. 654 | 655 | For example, without the section added: 656 | ```console 657 | $ readelf -w -l exec-stack | grep GNU_STACK 658 | ``` 659 | And with it added: 660 | ```console 661 | $ readelf -w -l exec-stack | grep GNU_STACK 662 | GNU_STACK 0x0000000000000000 0x0000000000000000 0x0000000000000000 663 | ``` 664 | 665 | We can also inspect the object file using: 666 | ```console 667 | $ readelf -WS exec-stack.o | grep .note.GNU-stack 668 | [ 5] .note.GNU-stack PROGBITS 0000000000000000 000059 000000 00 0 0 1 669 | ``` 670 | And without the section in the source this would not match. 671 | ```console 672 | $ as --noexecstack -o exec-stack.o exec-stack.s 673 | $ readelf -WS exec-stack.o | grep .note.GNU-stack 674 | [ 5] .note.GNU-stack PROGBITS 0000000000000000 000059 000000 00 0 0 1 675 | ``` 676 | 677 | ## No Execute (NX) 678 | The no execute bit is used in CPUs to separate memory areas for either data 679 | storage or instructions storage. An operating system with NX support will mark 680 | certain areas of memory as non-exeutable. 681 | 682 | 683 | ### sections 684 | We can add section as we wish: 685 | ```assembly 686 | .section bajja_section 687 | ``` 688 | ```console 689 | $ objdump -h tmp.o 690 | 691 | tmp.o: file format elf64-x86-64 692 | 693 | Sections: 694 | Idx Name Size VMA LMA File off Algn 695 | 0 .text 00000030 0000000000000000 0000000000000000 00000040 2**0 696 | CONTENTS, ALLOC, LOAD, RELOC, READONLY, CODE 697 | 1 .data 0000000a 0000000000000000 0000000000000000 00000070 2**0 698 | CONTENTS, ALLOC, LOAD, DATA 699 | 2 .bss 00000000 0000000000000000 0000000000000000 0000007a 2**0 700 | ALLOC 701 | 3 bajja_section 00000000 0000000000000000 0000000000000000 0000007a 2**0 702 | CONTENTS, READONLY 703 | ``` 704 | 705 | ### cmp and sub 706 | The cmp instruction will take a source and destination and subtract the 707 | destination with the source: 708 | ```assembly 709 | mov $11, %rax 710 | mov $10, %rsi 711 | cmp %rax, %rsi 712 | ``` 713 | This similar to `sub %rax, %rsi` only the destination %rsi will not be modfied. 714 | So the source is subtracted from the destination, so this will perform 715 | `%rsi - %rax = 10 - 11 = -1`. 716 | 717 | Remember to display rflags using `--binary or -b`: 718 | ```console 719 | (lldb) expr -f b -- $rflags 720 | (unsigned long) $11 = 0b0000000000000000000000000000000000000000000000000000001010010111 721 | ``` 722 | Using `expr` allows us to use a mask to find the values that we might be 723 | interested in. For example, to see only the value of the carry flag: 724 | ```console 725 | (lldb) expr -f b -- $rflags & 0x0001 726 | (unsigned long) $11 = 0b0000000000000000000000000000000000000000000000000000000000000001 727 | ``` 728 | When subtracting, and hence this is also true for cmp, the carry flag is set 729 | when the result becomes too large of a negative value. For example 730 | ``` 731 | 0000 - 0001 = 1111 = -1 732 | ``` 733 | 734 | The flags affected by cmp/sub are CF, OF, SF, ZF, AF, and PF. 735 | 736 | The carry flag is used to determine when subtracting unsigned integers produce 737 | a negative value. 738 | How about signed integers and subtracting them, these could have valid negative 739 | numbers. In this case the carry flag is not useful. Instread one has to use 740 | the overflow flag. 741 | 742 | ### test 743 | This instruction is very similar to `cmp` and affects the same flags in almost 744 | the same way with the execption of `AF`. 745 | 746 | ### rflags 747 | 748 | ``` 749 | 11 10 9 8 7 6 5 4 3 2 1 0 750 | +-----------------------------------------------------------------------------+ 751 | | |OF|DF|IF|TF|SF|ZF| |AF| |PF| |CF| 752 | +-----------------------------------------------------------------------------+ 753 | 754 | CF = Carry 0x0001 00000000000000000000000000000001 755 | PF = Parity 0x0004 00000000000000000000000000000100 756 | AF = Adjust 0x0010 00000000000000000000000000010000 757 | ZF = Zero 0x0040 00000000000000000000000001000000 758 | SF = Sign 0x0080 00000000000000000000000010000000 759 | TF = Trap 0x0100 00000000000000000000000100000000 760 | IF = Interrupt 0x0200 00000000000000000000001000000000 761 | DF = Direction 0x0400 0000000000000000000001000000000 762 | OF = Overflow 0x0800 0000000000000000000010000000000 763 | ``` 764 | The above masks can be used to AND the rflags register to check values: 765 | ```console 766 | (lldb) expr -f b -- $rflags & 0x0001 767 | ``` 768 | 769 | ### Parity Flag 770 | The parity flags on x86 only looks at one byte, the least significant, and if 771 | the bits set are even the parity flags is set, and if it is odd then it is 0. 772 | 773 | ### Adjust/Auxiliary/Auxilary Carry Flag 774 | The adjust flag is also called the Auxiliary flag or Auxilariy Carray flag. This 775 | is set if an arithmetic operation causes a borrow to occur in the four least 776 | significant bits. This was used for EBCDIC and is not really used anymore, but 777 | the cmp/sub instructions can affect these flags. 778 | 779 | ### Zero flag 780 | ``` 781 | $ lldb -- zero-flag 782 | (lldb) target create "zero-flag" 783 | Current executable set to 'zero-flag' (x86_64). 784 | (lldb) br s -n _start 785 | (lldb) disassemble 786 | zero-flag`_start: 787 | 0x401000 <+0>: mov rax, 0x2 788 | -> 0x401007 <+7>: mov rcx, 0x2 789 | 0x40100e <+14>: sub rcx, rax 790 | (lldb) expr -f b -- $rflags & 0x0040 791 | (unsigned long) $0 = 0b0000000000000000000000000000000000000000000000000000000000000000 792 | (lldb) si 793 | (lldb) expr -f b -- $rflags & 0x0040 794 | (unsigned long) $5 = 0b0000000000000000000000000000000000000000000000000000000001000000 795 | ``` 796 | 797 | ### Direction flag 798 | 799 | ## conditional set instruction (setcc) 800 | This instruction will conditionally set the destination operand to 0 or 1 801 | depending on the status flags (CF, SF, OF, ZF, and PF). 802 | 803 | An example can be found in [setne.s](./setne.s). 804 | 805 | For a real example this usage can be found in Node.js: 806 | ```console 807 | a4c1f3: e8 58 ab fd ff callq a26d50 808 | a4c204: 48 85 c0 test %rax,%rax 809 | a4c207: 0f 95 05 73 c8 ba 03 setne 0x3bac873(%rip) # 45f8a81 <_ZN4node11per_process15linux_at_secureE> 810 | ``` 811 | Notice that are calling `getauxval` and it returns a values which on x64 will 812 | be in register rax. The test is anding that regiser with itself, if the result 813 | is not zero then we set the value at the address relative to the value in the 814 | instruction pointer registry. 815 | 816 | 817 | ### Conditional mov 818 | TODO: add an example of this. 819 | 820 | ### dereferencing 821 | ```assembly 822 | mov msg, %rcx 823 | ``` 824 | This will move the contents located at the address msg, the size of the data 825 | will be 64-bits as we are moving into rax. 826 | ```console 827 | (lldb) disassemble -F att 828 | 0x401000 <+0>: movq 0x402000, %rcx 829 | ``` 830 | 831 | We can put parentheses around it which is the same thing as saying that we 832 | want to copy the data located at the address msg. 833 | ```assembly 834 | mov (msg), %rax 835 | ``` 836 | A reason for doing this is perhaps we want to add an offset to the address: 837 | ```assembly 838 | mov (msg+0), %rcx 839 | ``` 840 | But that is actually possible without the parenthesis. 841 | ```console 842 | 0x401011 <+17>: movq 0x402000, %rcx 843 | ``` 844 | But this can be useful when we want to specify an offset for a register perhaps 845 | like : 846 | ```assembly 847 | 0x4016cd <+11>: movq %rsi, -0x20(%rbp) 848 | ``` 849 | 850 | Now if we use `$` which is also use $ immediate valus like $4 would be the 851 | constant 4 and remember that msg is just an address and we are specifying that 852 | as an immediate value. 853 | ```assembly 854 | mov $msg, %rcx 855 | ``` 856 | We also use $ with constants, like $4 would be the constant 4 and remember 857 | that msg is just an address 858 | ```console 859 | 0x401008 <+8>: movq $0x402000, %rax ; imm = 0x40200 860 | ``` 861 | 862 | And this is the same as using `lea` to load the effective address. 863 | ```assembly 864 | lea msg, %rbx 865 | ``` 866 | ```console 867 | 0x401020 <+32>: leaq 0x402000, %rbx 868 | ``` 869 | 870 | ### .p2align directive 871 | This directive is used to pad the location counter 872 | ```assembly 873 | .p2align 5,,31 874 | ``` 875 | This pads to align on a 32-byte boundry. 876 | 877 | --------------------------------------------------------------------------------