├── .gitignore
├── Dockerfile
├── HARNESSING.md
├── Paper.pdf
├── README.md
├── blackbox-fuzzing
    ├── afl++.png
    ├── chatgpt.png
    ├── firefox.png
    ├── ghidra_search.png
    ├── ghidra_tmpd.png
    └── tmpd_function_list.png
├── corpus
    ├── add_carriage_return.py
    ├── httpd
    │   ├── abc.txt
    │   ├── cgi.txt
    │   ├── gdpr.txt
    │   ├── get_param.txt
    │   └── login_base64.txt
    ├── notify.txt
    └── simple.txt.bck
├── gdb_hook_init.gdb
├── harnesses
    ├── http_recv_hook.c
    ├── httpd_parser_main.c
    └── parser_parse_hook.c
├── safl_fuzz_httpd.sh
├── sdebug_httpd.sh
└── srun_dir_httpd.sh


/.gitignore:
--------------------------------------------------------------------------------
1 | afl-out/


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM debian:latest
 2 | 
 3 | RUN apt update && apt install -y \
 4 |       curl \
 5 |       vim \
 6 |       gcc-mipsel-linux-gnu \
 7 |       openssh-server \
 8 |       qemu-user-static \
 9 |       gdb-multiarch
10 | # Qemu statics are installed at /usr/bin/qemu-mipsel-static
11 | 
12 | # Compiling AFL++
13 | RUN apt install -y git make build-essential clang ninja-build pkg-config libglib2.0-dev libpixman-1-dev
14 | RUN git clone https://github.com/AFLplusplus/AFLplusplus /AFLplusplus
15 | WORKDIR /AFLplusplus
16 | RUN make all
17 | WORKDIR /AFLplusplus/qemu_mode
18 | RUN CPU_TARGET=mipsel ./build_qemu_support.sh
19 | 
20 | RUN echo "#!/bin/bash\n\nsleep infinity" >> /entry.sh
21 | RUN chmod +x /entry.sh
22 | 
23 | WORKDIR /share
24 | ENTRYPOINT [ "/entry.sh" ]
25 | 
26 | # docker build -t fuzz .
27 | # docker run -d --rm -v $PWD/:/share --name fuzz fuzz


--------------------------------------------------------------------------------
/HARNESSING.md:
--------------------------------------------------------------------------------
 1 | ```sh
 2 | # Start in docker container
 3 | $ docker run -ti -v $PWD/:/share forallsecure/fuzzing-firmware
 4 | 
 5 | # Install cross compiler for mipsel
 6 | $ apt update
 7 | $ apt install -y gcc-mipsel-linux-gnu
 8 | 
 9 | # Cross compile as shared library
10 | $ mipsel-linux-gnu-gcc http_recv_hook.c -o http_recv_hook.o -shared -fPIC
11 | 
12 | # Create sample corpus
13 | $ echo "AAABBBCCC" > corpus.txt
14 | 
15 | $ chroot root /qemu-mipsel-static -strace /usr/bin/wscd -d 3 -i eth0 -m 1
16 | 
17 | $ cp fuzzing/http_recv_hook.o root/http_recv_hook.o
18 | $ chroot root /qemu-mipsel-static -E LD_PRELOAD=/http_recv_hook.o /usr/bin/wscd
19 | /usr/bin/wscd: can\'t load library 'libc.so.6'
20 | 
21 | $ cp /usr/mipsel-linux-gnu/lib/ld.so.1 root/lib/
22 | 
23 |     $ cp /usr/mipsel-linux-gnu/lib/libc.so.6 root/lib/
24 |     /usr/bin/wscd: '/lib/libc.so.6' library contains unsupported TLS
25 |     /usr/bin/wscd: '/lib/libc.so.6' library contains unsupported TLS
26 |     /usr/bin/wscd: can\'t load library 'libc.so.6'
27 | 
28 | $ cp root/lib/libc.so.0 root/lib/libc.so.6
29 | 
30 | 
31 | $ chroot root /qemu-mipsel-static -E LD_PRELOAD=/http_recv_hook.o /usr/bin/wscd
32 | qemu: uncaught target signal 11 (Segmentation fault) - core dumped
33 | ```
34 | 
35 | 
36 | # Debugging
37 | 
38 | find / -name libreadline.so.7
39 | cp /lib/x86_64-linux-gnu/libreadline.so.7 root/lib/x86_64-linux-gnu/libreadline.so.7
40 | 
41 | cp -r /lib/x86_64-linux-gnu/ root/lib/
42 | cp -r /usr/lib/x86_64-linux-gnu root/usr/lib
43 | 
44 | ```sh
45 | 
46 | # NOT WORKING
47 | $ chroot root /qemu-mipsel-static -g 1234 -E LD_PRELOAD=/http_recv_hook.o /usr/bin/wscd
48 | 
49 | # On another screen
50 | $ docker exec -it 84 /bin/bash
51 | $ cp /usr/bin/gdb-multiarch root
52 | $ gdb-multiarch
53 | (gdb) source fuzzing/gdb_hook_init.gdb
54 | (gdb) c
55 | Continuing.
56 | 
57 | Breakpoint 1, 0x00433fd0 in __uClibc_main (
58 | 
59 | # Finding in which function the seq fault is
60 | # Stepping over the function
61 | (gdb) ni
62 | 0x00413224 in http_RecvMessage ()
63 | (gdb) disas
64 | ...
65 |    0x00413218 <+160>:   move    a0,s6
66 |    0x0041321c <+164>:   addiu   a1,sp,24
67 |    0x00413220 <+168>:   li      a2,2048
68 | => 0x00413224 <+172>:   bal     0x421364 <sock_read>
69 |    0x00413228 <+176>:   move    a3,s7
70 |    0x0041322c <+180>:   lw      gp,16(sp)
71 |    0x00413230 <+184>:   blez    v0,0x4132ac <http_RecvMessage+308>
72 | ...
73 | (gdb) ni
74 | Program received signal SIGSEGV, Segmentation fault.
75 | 0x0042106c in ?? ()
76 | ```
77 | 


--------------------------------------------------------------------------------
/Paper.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/otsmr/blackbox-fuzzing/ea8a7ca767f41c078de61dd96667dba755f73445/Paper.pdf


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Blackbox-Fuzzing of IoT Devices Using the Router TL-WR902AC as Example
  2 | 
  3 | This is the HTML version of my term paper which can be downloaded as PDF
  4 | [here](Paper.pdf).
  5 | 
  6 | ## Introduction
  7 | 
  8 | Fuzzing has become "one of the most effective ways" of finding bugs in software. With this or
  9 | similar claims, many current fuzzing-related papers start
 10 | [\[google-scholar\]](https://scholar.google.com/scholar?start=40&q=Fuzzing+%2B%22most+effective%22&hl=de&as_sdt=0,5).
 11 | The main goal of our last term paper about the topic "Internet of Vulnerable Things" was to find a
 12 | memory-related bug and then write an exploit for this vulnerability. We were able to find a
 13 | vulnerability by reversing the firmware, but no memory related bugs were found. Finding a buffer
 14 | overflow by reversing a binary by hand is not only time-consuming, but also requires a lot of
 15 | experience. Fuzzing at the same time aims to be the "most effective way" to find such memory related
 16 | vulnerabilities.  Google, for example, introduced OSS-Fuzz, which continuously fuzzes open source
 17 | software and has already found over 10,000 vulnerabilities across 1,000 projects
 18 | [\[oss-fuzz\]](https://github.com/google/oss-fuzz).
 19 | 
 20 | The goal of this term paper is again to find a memory-related vulnerability, but this time by using
 21 | fuzzing. The goal vulnerability should be exploitable over the network without knowledge of the
 22 | admin credentials. This paper describes the way to achieve this goal. For this, the paper is
 23 | separated into two parts. The first part focuses on how to find a potent target, which tools can be
 24 | used, and what a good fuzzing target should consist of. The second part then describes how to
 25 | develop and debug a harness that is able to fuzz a specific function in a binary. Then the developed
 26 | harness is used by AFL++ to fuzz the target function. In the following, a short background is given
 27 | and what the current state of the art is when it comes to IoT device fuzzing.
 28 | 
 29 | All files created in context of this term paper are also published in full on GitHub and can be
 30 | accessed using the following URL:
 31 | [otsmr/blackbox-fuzzing](https://github.com/otsmr/blackbox-fuzzing).
 32 | 
 33 | ### State of the Art
 34 | 
 35 | Fuzzing IoT devices is not as easy as fuzzing an open source project.  Often the source code is
 36 | proprietary, which makes gray-box fuzzing, which instruments the source code for the best fuzzing
 37 | performance, impossible
 38 | [\[afl-persistent\]](https://github.com/AFLplusplus/AFLplusplus/blob/stable/instrumentation/README.persistent_mode.md).
 39 | Also, the CPU architecture is often not natively supported by fuzzers which requires an emulator
 40 | like QEMU [\[qemu\]](https://www.qemu.org/) which also slows down the fuzzing speed
 41 | [\[afl-persistent\]](https://github.com/AFLplusplus/AFLplusplus/blob/stable/instrumentation/README.persistent_mode.md).
 42 | Another issue is the hardware peripherals, which complicates the development of a general approach.
 43 | The paper "Embedded Fuzzing: A Review of Challenges, Tools, and Solutions"
 44 | [\[embedded-fuzzing\]](https://cybersecurity.springeropen.com/articles/10.1186/s42400-022-00123-y#Sec12)
 45 | gives an overview of different fuzzing strategies, like hardware-based embedded fuzzing. Most of
 46 | these strategies need the source code of the target program, like when porting the fuzzers source
 47 | code, like AFL, to ARM-based IoT devices to run the fuzzer on the IoT hardware. Running the fuzzer
 48 | on the device's hardware also has performance problems because they often have low-level CPUs, which
 49 | are slower than normal desktop CPUs. Another approach presented in this paper is emulation-based
 50 | embedded fuzzing. Where either a single targeted program is executed in an emulator to perform
 51 | coverage-guided fuzzing or the full system.
 52 | 
 53 | The above-mentioned approaches all target a binary directly by using an emulator or by instrumenting
 54 | the source code. These approaches require a fuzzing setup that must often be specifically crafted
 55 | for a single IoT device and are hard to generalize. For that, researchers created a program
 56 | `IoTFuzzer` which aims to be an automated fuzzing framework aiming to "finding memory corruption
 57 | vulnerabilities without access to their firmware images
 58 | [\[iotfuzzer\]](http://staff.ie.cuhk.edu.hk/~khzhang/my-papers/2018-ndss-iot.pdf)." `IoTFuzzers`
 59 | based on the observation that most IoT devices have a mobile app to control them, and such apps
 60 | contain information about the protocol used to communicate with the device. The program then
 61 | identifies and reuses program-specific logic to mutate the test cases to effectively test IoT
 62 | targets [\[iotfuzzer\]](http://staff.ie.cuhk.edu.hk/~khzhang/my-papers/2018-ndss-iot.pdf).
 63 | 
 64 | ### Background
 65 | 
 66 | #### Harness
 67 | 
 68 | A harness describes a sequence of API calls processing the fuzzer provided inputs. On the contrary
 69 | to a normal application, which often does not need a harness, a library that implements reusable
 70 | functions must be called with the correct parameters and also in the right sequence, so the state
 71 | between multiple shared function calls can be called. Randomly fuzzing the library without building
 72 | the state machine is unlikely to be successful and will, in contrast, create a lot of false-positive
 73 | crashes when the library dependencies are not enforced.  This can happen when, for example, a buffer
 74 | size check is skipped by the fuzzer resulting in a spurious buffer overflow.
 75 | 
 76 | In this paper, normal applications will be fuzzed, but because of the hardware dependencies of the
 77 | use of sockets and multi-threading, we need to create a harness for them as well. The harness is
 78 | loaded in the context of the binary and can call internal functions of the targeted program, as
 79 | shown in [Code 10](#c10).
 80 | 
 81 | #### Corpus
 82 | 
 83 | The term "corpus" describes valid input samples or test cases and serves as a foundational reference
 84 | for generating new input data during the fuzzing process. In [Code 10](#c10) this would be, for
 85 | example, an HTTP request. Fuzzers then leverage this corpus to create mutated or diversified test
 86 | cases, aiding in the detection of software vulnerabilities through the exploration of various input
 87 | scenarios.
 88 | 
 89 | ## Finding a potent target
 90 | 
 91 | The most time-consuming part of black box fuzzing is finding a potential
 92 | vulnerable function in the firmware. The first step is to find
 93 | interesting binaries that, for example, are accessible over the network,
 94 | use insecure functions or do not have security features like stack
 95 | canary enabled, which is buffer overflow protection. Our last paper
 96 | ([\[iovt\]](https://raw.githubusercontent.com/otsmr/internet-of-vulnerable-things/main/Internet_of_Vulnerable_Things.pdf)) already described how to extract the firmware
 97 | from the targeted router and how to find a potentially dangerous binary.
 98 | For this, the tool EMBA [\[emba\]](https://github.com/e-m-b-a/emba) was used. EMBA ranks all
 99 | binaries found in the firmware by the count of unsecure functions like
100 | `strcpy`, network access, and security protection like stack canary or
101 | the NX-Bit which become interesting when exploiting a buffer overflow,
102 | which can be found in [Code 1](#c1).
103 | 
104 | <div id="c1"></div>
105 | 
106 | ```txt
107 | [+] STRCPY - top 10 results:
108 |  235   : libcmm.so       : common linux file: no  |  No RELRO  |  No Canary  |  NX disabled  |  No Symbols  |  No Networking |
109 |  77    : wscd            : common linux file: no  |  No RELRO  |  No Canary  |  NX disabled  |  No Symbols  |  Networking    |
110 |  [snip]
111 |  28    : httpd           : common linux file: yes |  RELRO     |  No Canary  |  NX enabled   |  No Symbols  |  Networking    |
112 |  27    : cli             : common linux file: no  |  No RELRO  |  No Canary  |  NX disabled  |  No Symbols  |  No Networking |
113 | ```
114 | <p style="text-align: center">Code 1: EMBAs result of unsecure uses of the function strcpy.</p>
115 | 
116 | Because the goal of this paper is to find a memory vulnerability that can be exploited over the
117 | network without the knowledge of the admin credentials, the vulnerable function must be callable
118 | over the network and should interact directly with the provided user input. But having network
119 | interaction does not mean the binary is also directly accessible over the network. To find out which
120 | binaries are listening, we can use the UART root shell, which was already established in
121 | [\[iovt\]](https://raw.githubusercontent.com/otsmr/internet-of-vulnerable-things/main/Internet_of_Vulnerable_Things.pdf).
122 | 
123 | <div id="c2"></div>  
124 | 
125 | ```txt
126 |  ~ # netstat -tulpn
127 | Active Internet connections (only servers)
128 | Proto Recv-Q Send-Q Local Address           Foreign Address         State       PID/Program name
129 | tcp        0      0 127.0.0.1:20002         0.0.0.0:*               LISTEN      1045/tmpd
130 | tcp        0      0 0.0.0.0:1900            0.0.0.0:*               LISTEN      1034/upnpd
131 | tcp        0      0 0.0.0.0:80              0.0.0.0:*               LISTEN      1027/httpd
132 | tcp        0      0 0.0.0.0:22              0.0.0.0:*               LISTEN      1224/dropbear
133 | udp        0      0 0.0.0.0:20002           0.0.0.0:*                           1048/tdpd
134 | [...]
135 | ```
136 | <p style="text-align: center">Code 2: Using the UART root shell to execute netstat</p>
137 | 
138 | ### Reversing the binary
139 | 
140 | The first binary that looks promising is `wscd`. The binary has the most unsafe `strcpy` calls
141 | (except for the libcmm.so library) and network interaction, which in the case of `wscd` means it
142 | connects to a `UPnP` device and does not listen on a specific port. It has, as shown later, an easy
143 | function to fuzz, which is why this binary was selected as an example in this paper to explain the
144 | general procedure. Before reversing, we can use the UART root shell to find out if the binary is
145 | running and how it was started.
146 | 
147 | <div id="c3"></div>
148 | 
149 | ```txt
150 | $ ps
151 |  PID USER       VSZ STAT COMMAND
152 |  962 admin     1096 S    wscd -i ra0 -m 1 -w /var/tmp/wsc_upnp/
153 | 1018 admin     1080 S    wscd_5G -i rai0 -m 1 -w /var/tmp/wsc_upnp_5G/
154 | ```
155 | <p style="text-align: center">Code 3: Using the command ps to display all running programs.</p>
156 | 
157 | With `ps` we not only see that the binary is running but also what the arguments are, which are
158 | important to verify if a potential function is called at all. The meaning of these arguments can be
159 | gained from the CLI help, which is displayed when calling the binary without any arguments.
160 | 
161 | <div id="c4"></div>
162 | 
163 | ```txt
164 | $ chroot root /qemu-mipsel-static /usr/bin/wscd
165 | Usage: wscd [-i infName] [-a ipaddress] [-p port] [-f descDoc] [-w webRootDir] -m UPnPOpMode -D [-d debugLevel] -h
166 |  -i:  Interface name this daemon will run wsc protocol(if not set, will use the default interface name - ra0)
167 |        e.g.: ra0
168 |  -w: Filesystem path where descDoc and web files related to the device are stored
169 |        e.g.: /etc/xml/
170 |  -m: UPnP system operation mode
171 |        1: Enable UPnP Device service(Support Enrolle or Proxy functions)
172 |        2: Enable UPnP Control Point service(Support Registratr function)
173 |        3: Enable both UPnP device service and Control Point services.
174 |  [...]
175 | ```
176 | <p style="text-align: center">Code 4: Options of the binary wscd.</p>
177 | 
178 | As shown in [Code 4](#c4) `wscd` is started with "Enabled UPnP Device service" which looks
179 | promising. After verifying that the binary is actually running on the router, the binary can then be
180 | analyzed using [Ghidra](https://ghidra-sre.org/) to search for suspect functions. For fuzzing,
181 | parsing functions are especially interesting because they are usually complex, and often the input
182 | that is parsed has length fields for the containing data, like the TCP packet contains the length of
183 | the payload.
184 | 
185 | <div id="f1"></div>
186 | <figure>
187 |   <p> <img src="blackbox-fuzzing/ghidra_search.png" style="width:90.0%" /></p>
188 |   <figcaption>
189 |     <p>Figure 1: Using Ghidra to search for parsing functions.</p>
190 |   </figcaption>
191 | </figure>
192 | 
193 | Another benefit of a parsing functions is that they often do not interact with other parts of the
194 | code or have user interaction over the network. So the parsing function can be called directly with
195 | the input without modifying the binary or overwriting other functions, so the function can be
196 | fuzzed.
197 | 
198 | Before starting to fuzz the function, it should be checked if the function is triggered at all,
199 | because the function is only interesting when it is called with a user-controlled input. For this,
200 | Ghidra can be used to search for references to the target function. In the case of the
201 | `parser_parse` function there are multiple ways. Because we know how the program is started, the
202 | calls can be reduced to a single function call tree, shown in [Code 5](#c5).
203 | 
204 | <div id="c5"></div>
205 | 
206 | ```c
207 | main()
208 |  if ((WscUPnPOpMode & 1) != 0) // Argument -m 1
209 |   WscUPnPDevStart()
210 |    UpnpDownloadXmlDoc() -> my_http_Download() -> http_Download()
211 |      if (http_MakeMessage())
212 |       http_RequestAndResponse()
213 |        http_RecvMessage()
214 | ```
215 | <p style="text-align: center">Code 5: Call tree of the function parser_parse</p>
216 | 
217 | After a target function is found, we can now create a fuzzing setup to fuzz the function, which is
218 | described in the next part. But first other potent functions are presented.
219 | 
220 | ### Other potential vulnerable functions
221 | 
222 | For this paper, multiple potential binaries were manually analyzed for suspect functions. The
223 | following is a short summary of other possible targets which were found.
224 | 
225 | The binary **httpd** is the backend for the admin web interface. The binary is accessible over the
226 | network on port 80. One interesting function in `httpd` is the `httpd_parser_main` function. While
227 | skimming the parser implementation using Ghidra several different suspect code parts could be
228 | identified. One of the suspect parts is the parsing of the `Content-Type`. In the following, a basic
229 | HTTP request can be found.
230 | 
231 | ```txt
232 | POST / HTTP/1.1\r\n
233 | Content-Type: multipart/form-data; boundary=X;\r\n
234 | Host: example.com\r\n
235 | \r\n
236 | \r\n
237 | DATA\r\n
238 | ```
239 | 
240 | Below is a snippet from the `httpd_parser_main` function which parses the `Content-Type` from the
241 | user provided http request.
242 | 
243 | 
244 | <div id="c6"></div>
245 | 
246 | ```c
247 | // user_input_ptr points to
248 | //  "Content-Type: multipart/form-data; boundary=X;\r\nHost: example.com\r\n..."
249 | cursor = strstr(user_input_ptr,"multipart/form-data");
250 | 
251 | if (user_input_ptr == cursor) {
252 |  cursor = strstr(user_input_ptr,"boundary=");
253 |  user_input_ptr = cursor + 9;
254 | 
255 |  // user_input_ptr points now to "X;\r\nHost: example.com\r\n..."
256 | 
257 |  if (cursor != (char *)0x0) {
258 | 
259 |   do {
260 |     while (cursor = user_input_ptr, *cursor == " ") {
261 |       user_input_ptr = cursor + 1;
262 |     }
263 |     user_input_ptr = cursor + 1;
264 |   } while (*cursor == "\t");
265 | 
266 |   // cursor points now to "X;\r\nHost: example.com\r\n..."
267 | 
268 |   // strchr returns a pointer to the first occurrence of ";" in the user request.
269 |   // If ";" is not found, the function returns a null pointer.
270 |   user_input_ptr = strchr(cursor, ";");
271 |   if (user_input_ptr != (char *)0x0) {
272 |     // The character ";" is replaced by an null byte to terminate the string
273 |     *user_input_ptr = "\0";
274 |     // cursor points now to "X\0\r\nHost: example.com\r\n..."
275 |   }
276 | 
277 |   // DAT_00444050 global array from 0x00444050 to 0x0044414f (255 Bytes)
278 |   strcpy(&DAT_00444050, cursor);
279 |   // DAT_00444050 contains now "X"
280 |  }
281 | }
282 | ```
283 | <p style="text-align: center">Code 6: Call tree of the function parser_parse</p>
284 | 
285 | The vulnerability in this code is the function call `strcpy` and the assumption that the
286 | `Content-Type` ends with a semicolon. Because `strcpy` copies the buffer until the next null byte,
287 | and as shown in [Code 6](#c6) the null byte is only added when a semicolon is found. By removing the
288 | semicolon, the next null byte is at the end of the input buffer, e.g., the end of the HTTP request.
289 | So the global variable DAT_00444050 can be overflowed, which then overwrites data beyond the address
290 | 0x0044414f. The challenging part is not only to find an interesting global variable beyond this
291 | address that could be overwritten, but also that no null bytes can be used because of `strcpy`. But
292 | when there is one such mistake, there are probably more to find.
293 | 
294 | The binary **tdpd** is used by the mobile app and is accessible over UDP on the local network.
295 | `tdpd` has almost the same functions as the `tmpd` which are mostly just never called. The main
296 | function only listens for messages over the UDP port and always responds with basic information
297 | about the router, like the name or model. There is barely any interaction with the user-provided
298 | input, which is therefore not interesting to fuzz.
299 | 
300 | Another interesting pair of binaries are **upnpd** and **ushare**. Both binaries are handling `UPnP`
301 | messages which therefore need to parse XML.  Because a copyright string can be found in the binary,
302 | it can be assumed that these programs were not developed by TP-Link.
303 | 
304 | ```sh
305 | $ strings usr/bin/ushare | grep "(C)"
306 | Benjamin Zores (C) 2005-2007, for GeeXboX Team.
307 | ```
308 | 
309 | Both binaries are loading the shared libraries `libupnp.so` and `libixml.so` which have the same
310 | functions as the open source project `pupnp` [\[pupnp\]](https://github.com/pupnp/pupnp/). Because
311 | the focus of this paper is black box fuzzing, these binaries are ignored. But gray box fuzzing this
312 | library could have potential because in 2021 a memory leak was found in `libixml.so`
313 | [\[pupnp-mem-leak\]](https://github.com/pupnp/pupnp/issues/249).
314 | 
315 | The binary **tmpd** is the backend of the mobile app. The interesting part is that the router and
316 | the mobile app are communicating over a custom binary protocol. In the following, a message from the
317 | client to the server is shown.
318 | 
319 | <div id="c7"></div>
320 | 
321 | ```txt
322 | 00000000  01 00 05 00 00 08 00 00  00 00 00 17 50 7b 6e fe  |............P{n.|
323 | 00000010  01 01 02 00 00 00 00 00                           |........        |
324 | ```
325 | <p class="text-align: center">Code 7: Message from the mobile app to the router.</p>
326 | 
327 | To understand the binary protocol, the binary `tmpd` was reversed using Ghidra. With this
328 | information, the message in [Code 7](#c7) can be broken down into the following:
329 | 
330 | 
331 | ```txt
332 | 01 00 05 00 : Version
333 | 00 08 00 00 : Size (8 Bytes)
334 | 00 00 00 17 : Datatype
335 | 50 7b 6e fe : Checksum (CRC32)
336 | 01 01       : Options
337 | 02 00       : Function id
338 | 00 00 00 00 : Function parameters
339 | ```
340 | <p class="text-align: center">Code 8: Custom binary protocol broken down.</p>
341 | 
342 | This looks promising because such binary protocols must be parsed. But the most suspect part of the
343 | binary protocol is not the length field, but the use of the function ID and function parameters.
344 | 
345 | 
346 | <figure id="f2">
347 |   <p><img src="blackbox-fuzzing/ghidra_tmpd.png" style="width:90.0%" /></p>
348 |   <figcaption>
349 |     <p style="text-align: center">Figure 2: Reversed function from tmpd which parses the function id and their parameters.</p>
350 |   </figcaption>
351 | </figure>
352 | 
353 | [Figure 2](#f2) shows a part of the decompiled parser function of the custom protocol. In line 16,
354 | the function ID is extracted, and the corresponding function is then called in line 29. The suspect
355 | behavior is that the function is called with parameters extracted without any check from the
356 | user-controlled input buffer. We could now try to find a function in the jumping table shown in
357 | [Figure 3](#f3) where this could be dangerous, like when the parameter is used to index a buffer
358 | or interpreted as a string. Instead of manually reversing and searching the over 100 functions,
359 | which would be time-consuming, we can use a fuzzer which would do this automatically.
360 | 
361 | <figure id="f3">
362 | <p><img src="blackbox-fuzzing/tmpd_function_list.png"
363 | style="width:90.0%" /></p>
364 | <figcaption><p style="text-align: center">Figure 3: Reversed function from tmpd which parses the function ID
365 | and its parameters.</p></figcaption>
366 | </figure>
367 | 
368 | Unfortunately, the `tmpd` binary is only locally reachable over the network, as shown in [Code
369 | 2](#c2). To connect to this binary, the app first connects to the router via SSH in the mode
370 | `direct-tcpip` which just forwards the packets to the local process. And the SSH connection is
371 | protected by the admin credentials. But as described in
372 | [\[iovt\]](https://raw.githubusercontent.com/otsmr/internet-of-vulnerable-things/main/Internet_of_Vulnerable_Things.pdf)
373 | the SSH connection can easily be compromised because the server host key is never checked by the
374 | app. By dropping every packet routed to the internet, the admin can be tricked into logging in to
375 | the router while a man-in-the-middle attack is performed to steal the credentials.
376 | 
377 | ## Fuzzing with AFL++ and QEMU
378 | 
379 | In this section, a harness is developed targeting one of the previously found function. After the
380 | harness is developed, the state-of-the-art fuzzer AFL++
381 | [\[aflpp\]](https://github.com/AFLplusplus/AFLplusplus) is used to fuzz the target function. Because
382 | the binaries are compiled for the `mipsel` architecture, the emulator QEMU is used to execute the
383 | binary. The basic fuzzing setup used in this paper is mostly inspired by the blog entry "Firmware
384 | Fuzzing 101" by Adam Van Prooyen [\[b101\]](https://www.mayhem.security/blog/firmware-fuzzing-101).
385 | 
386 | ### Fuzzing environment 
387 | 
388 | To easily create a reproducible fuzzing environment, Docker is the best choice. We created a
389 | Dockerfile that installs every necessary tool, like a cross-compiler for `mipsel` CPU architecture
390 | or `gdb-multiarch` which can be used to debug the harness.
391 | 
392 | Furthermore, AFLplusplus is downloaded and compiled together with QEMU which is built in a version
393 | with minor tweaks to allow non-instrumented binaries to be run under afl-fuzz.
394 | 
395 | ```docker
396 | FROM debian:latest
397 | 
398 | RUN apt update && apt install -y \
399 |       curl \
400 |       vim \
401 |       gcc-mipsel-linux-gnu \
402 |       openssh-server \
403 |       qemu-user-static \
404 |       gdb-multiarch
405 | # Qemu statics are installed at /usr/bin/qemu-mipsel-static
406 | 
407 | # Compiling AFL++
408 | RUN apt install -y git make build-essential clang ninja-build pkg-config libglib2.0-dev libpixman-1-dev
409 | RUN git clone https://github.com/AFLplusplus/AFLplusplus /AFLplusplus
410 | WORKDIR /AFLplusplus
411 | RUN make all
412 | WORKDIR /AFLplusplus/qemu_mode
413 | RUN CPU_TARGET=mipsel ./build_qemu_support.sh
414 | 
415 | RUN echo "#!/bin/bash\n\nsleep infinity" >> /entry.sh
416 | RUN chmod +x /entry.sh
417 | 
418 | WORKDIR /share
419 | ENTRYPOINT [ "/entry.sh" ]
420 | ```
421 | 
422 | Dockerfile which installs necessary tools.
423 | 
424 | The image can then be built using `docker build`.
425 | 
426 | ```sh
427 | docker build -t fuzz .
428 | ```
429 | 
430 | When the image is built, it can be easily used with `docker run` which
431 | then starts the container.
432 | 
433 | ```sh
434 | docker run -d --rm -v $PWD/:/share --name fuzz fuzz
435 | ```
436 | 
437 | Using the option `-d` will start the container in the background. With `docker exec` multiple shells
438 | can be started inside the container, which is helpful to start the executable in one session using
439 | QEMU and in the other session `gdb-multiarch`.
440 | 
441 | ```sh
442 | docker exec -it fuzz /bin/bash
443 | ```
444 | 
445 | ### Overwrite the main function
446 | 
447 | In the previous section, a potent fuzz target was identified. The problem is that when executing the
448 | binary, we will never reach the function call because the `parser_parse` function is only called if
449 | a TCP packet is received over a socket. This would be not only bad for performance, but also hard to
450 | set up. This is why the entry of the fuzzer should be at an different location than the normal main
451 | function.  For this, the environment variable `LD_PRELOAD` which enables injecting a harness that
452 | has access to internal functions, can be used. As the man page of `ld.so`, which is responsible for
453 | linking the shared libraries needed by an executable at runtime, describes, `LD_PRELOAD` can be used
454 | "to selectively override functions in other shared objects
455 | [\[man-pages\]](https://www.man7.org/linux/man-pages/man8/ld.so.8.html)."
456 | 
457 | The function `__uClibc_main` is best suited for this purpose. To overwrite this function, a C file
458 | must be created that contains a function with the same name.
459 | 
460 | ```c
461 | void __uClibc_main(void *main, int argc, char** argv) {
462 |     // Harness code, e.g. call the function parser_append
463 |     printf("My custom __uClibc_main was called!");
464 | }
465 | ```
466 | 
467 | The C file can then be cross-compiled to a shared object in the mipsel architecture using
468 | `mipsel-linux-gnu-gcc`. The option `-fPIC` enables "Position Independent Code" which means that the
469 | machine code does not depend on being located at a specific address by using relative addressing
470 | instead of absolute.
471 | 
472 | ```txt
473 | $ mipsel-linux-gnu-gcc parser_parse_hook.c -o parser_parse_hook.o -shared -fPIC
474 | ```
475 | 
476 | The newly created shared library can then be loaded by adding the environment variable `LD_PRELOAD`
477 | to the QEMU command.
478 | 
479 | ```txt
480 | $ chroot root /qemu-mipsel-static -E LD_PRELOAD=/parser_parse_hook.o /usr/bin/wscd
481 | My custom __uClibc_main was called!
482 | ```
483 | 
484 | With the command `chroot` the current and root directories can be changed for the command provided.
485 | This is helpful because the executable `wscd` opens other files, like shared libraries from the
486 | firmware. We can see this behavior by adding the argument `-strace` to QEMU.
487 | 
488 | ```txt
489 | chroot root /qemu-mipsel-static -E LD_PRELOAD=/parser_parse_hook.o -strace /usr/bin/wscd /corpus/notify.txt
490 | 38180 mmap(NULL,4096,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_ANONYMOUS|0x4000000,-1,0) = 0x7f7e7000
491 | 38180 stat("/etc/ld.so.cache",0x7ffffa48) = -1 errno=2 (No such file or directory)
492 | 38180 open("/parser_parse_hook.o",O_RDONLY) = 3
493 | 38180 fstat(3,0x7ffff920) = 0
494 | 38180 close(3) = 0
495 | 38180 munmap(0x7f7e6000,4096) = 0
496 | 38180 open("/lib/libpthread.so.0",O_RDONLY) = 3
497 | 38180 open("/lib/libc.so.0",O_RDONLY) = 3
498 | [...]
499 | ```
500 | 
501 | As we can see, the executable opens multiple libraries in the `/lib/` folder on the firmware and not
502 | on the host.
503 | 
504 | ### Developing and debug the harness
505 | 
506 | After the setup is created, we can now start developing a harness. As described in the background
507 | section, the harness is the driver between the fuzzer and the target function. The harness loads the
508 | fuzz input, which is stored by AFL++ in a file. With the file path as parameters, the harness then
509 | calls the fuzzing target; in this case, this would be `parser_append`. The functions can be called
510 | by using the address.
511 | 
512 | <div id="c10"></div>
513 | 
514 | ```c
515 | void __uClibc_main(void *main, int argc, char** argv)
516 | {
517 |   // Verify that a filename is provided
518 |   if (argc != 2) exit(1);
519 | 
520 |   // Create function pointer to the fuzz target
521 |   int (*parser_request_init)(void *, int) = (void *) 0x00412564;
522 |   int (*parser_append)(void *, void *, int) = (void *) 0x00412e98;
523 | 
524 |   // Open the fuzz input file
525 |   int fd = open(argv[1], O_RDONLY);
526 |   char fuzz_buf[2048 + 1];
527 |   int fuzz_buf_len = read(fd, fuzz_buf, sizeof(fuzz_buf) - 1);
528 |   if (fuzz_buf_len < 0) exit(1);
529 |   fuzz_buf[fuzz_buf_len] = 0;
530 | 
531 |   // Call the target functions
532 |   uint8_t parsed_data[220]; 
533 |   parser_request_init(parsed_data, 8);
534 |   int status = parser_append(parsed_data, fuzz_buf, fuzz_buf_len);
535 |   printf("Response is %d\n", status);
536 |   exit(0);
537 | }
538 | ```
539 | <p style="text-align: center">Code 10: Harness code with the fuzz target `parser_append` in the binary wscd.</p>
540 | 
541 | As shown in [Code 10](#c10) the function `parser_parse` is not called directly but by using the
542 | function `parser_append`. Before this function is called, the initialization function
543 | `parser_request_init` must be called, which initializes the output struct of the `parser_parse`
544 | function.
545 | 
546 | While in the case of the `parser_parse` the harness is pretty easy to set up, other targets require
547 | more sophisticated harnesses like the `httpd_parser_main` function. For example, before calling the
548 | target, the function `http_init_main` must be called, which ends in a SIGSEGV.  To find out where
549 | this segmentation fault is caused, it is useful to debug the code with a debugger like `gdb`. To do
550 | this, QEMU can be started with the option `-g` which spawns a `gdb-server` at the provided port.
551 | 
552 | ```sh
553 | chroot root /qemu-mipsel-static -strace -g 1234 -E LD_PRELOAD="/httpd_parser_main.o" /usr/bin/httpd
554 | corpus/httpd/simple.txt
555 | ```
556 | 
557 | Because the binary is in the `mipsel` architecture `gdb-multiarch` must be used. After gdb is
558 | started, the following init script can be loaded with gdb using `sources <path to script>`.
559 | 
560 | ```sh
561 | set solib-absolute-prefix /share/root/
562 | file /share/root/usr/bin/httpd
563 | target remote :1234
564 | # break bevor fuzz target is called
565 | # break __uClibc_main
566 | break http_parser_main
567 | display/4i $pc
568 | ```
569 | 
570 | Because of the chroot the script first changed the absolute prefix path so that when the binary
571 | loads a shared object, gdb will find the file.  Then the targeted file is set, because QEMUs
572 | gdb-server does not support file transfer, so gdb tries to load the files from the disk instead.
573 | After gdb is configured, the script then connects to the gdb-server with `target remote` and creates
574 | a breakpoint at the start of the target function. With display, the output is just improved, so when
575 | stepping through, the next four lines of assembly will be shown. Using `si` we can step one
576 | instruction, which is useful when the harness has a segmentation fault using the default corpus,
577 | which should always work.  As shown in [Code 11](#c11) the binary has a segmentation fault in the
578 | function `fprintf`.
579 | 
580 | <div id="c11"></div>
581 | 
582 | ```sh
583 | (gdb) si
584 | 0x004059b0 in http_parser_makeHeader ()
585 | 1: x/4i \$pc
586 | => 0x4059b0 <http_parser_makeHeader+120>:       jalr    t9
587 |    0x4059b4 <http_parser_makeHeader+124>:       addiu   a1,a1,16248
588 |    0x4059b8 <http_parser_makeHeader+128>:       li      v0,200
589 |    0x4059bc <http_parser_makeHeader+132>:       lw      gp,16(sp)
590 | (gdb) ni
591 | 0x7f56a8ac in fprintf () from /share/root/lib/libc.so.0
592 | 1: x/4i \$pc
593 | => 0x7f56a8ac \<fprintf+44>:     bal     0x7f56db80 <vfprintf>
594 |    0x7f56a8b0 \<fprintf+48>:     nop
595 |    0x7f56a8b4 \<fprintf+52>:     lw      ra,36(sp)
596 |    0x7f56a8b8 \<fprintf+56>:     jr      ra
597 |    0x7f56a8bc \<fprintf+60>:     addiu   sp,sp,40
598 | (gdb) n
599 | Single stepping until exit from function fprintf,
600 | which has no line number information.
601 | 
602 | Program received signal SIGSEGV, Segmentation fault.
603 | ```
604 | <p style="text-align: center">Code 11: Segmentation fault in printf.</p>
605 | 
606 | To investigate the error, Ghidra can be used to find out with which parameters the function is
607 | called.
608 | 
609 | ```c
610 | fprintf(
611 |  *(FILE **)(iVar1 + 0x101c),
612 |  "HTTP/1.1 %d %s\r\n",
613 |  *(undefined4 *)(&DAT_0042ee68 + (uint)(byte)(&DAT_00414570)[statuscode & 0x3f] * 8),
614 |  (&PTR_DAT_0042ee6c)[(uint)(byte)(&DAT_00414570)[statuscode & 0x3f] * 2]
615 | );
616 | ```
617 | 
618 | The SIGSEGV is probably caused by the fact that the first parameter is not a file descriptor but a
619 | null pointer. Where `iVar1`` is just a reference to the input of the `httpd_parser_main` function.
620 | This means that the fuzzing input must have a file descriptor at position 0x101c.  So the input must
621 | be adjusted to the following struct.
622 | 
623 | ```c
624 | typedef struct  {
625 |   int _a;     // 4 Bytes
626 |   int _b;     // 4 Bytes
627 |   int socket; // 4 Bytes
628 |   int ip;     // 4 Bytes
629 |   int mac;    // 4 Bytes
630 |   unsigned char body[0x1008]; 0x101c - 4*5 = 0x1008 Bytes
631 |   FILE * fd_out; // expected to be a valid file descriptor
632 | } HttpMainT;
633 | ```
634 | 
635 | Because `fd_out` must just be a valid file descriptor pointer, it can easily be set to `stdout`.
636 | Executing the `httpd_parser_main` again will now produce a valid HTTP output.
637 | 
638 | ```c
639 | $ chroot root /qemu-mipsel-static -E LD_PRELOAD=/httpd_parser_main.o \
640 |     /usr/bin/httpd /httpd_corpus.txt
641 | 
642 | bind: No such file or directory
643 | [ dm_shmInit ] 086:  shmget to exitst shared memory failed. Could not create shared memory.
644 | rdp_getObj is called with: 4274932gdpr_getSystemGDPREntry Error
645 | gdpr_getNewSystemGDPREntry OK
646 | #Msg: getsockname error
647 | HTTP/1.1 200 OK
648 | Content-Type: text/html; charset=utf-8
649 | Content-Length: 24257
650 | Set-Cookie: JSESSIONID=deleted; Expires=Thu, 01 Jan 1970 00:00:01 GMT; Path=/; HttpOnly
651 | Connection: close
652 | 
653 | <!DOCTYPE html>
654 | [...]
655 | ```
656 | 
657 | The harness works now and can be used to fuzz the function using AFL++ which will be explained in
658 | the next section.
659 | 
660 | ### Generate corpus data
661 | 
662 | As mentioned in the background, a seed corpus describes valid input samples, which serves as a
663 | foundational reference for generating new input data during the fuzzing process.
664 | 
665 | These inputs are typically chosen to represent different aspects of the target programs. The seed
666 | corpus is used by a fuzzer to generate mutated or evolved test cases that are then run against the
667 | target software to uncover bugs, crashes, or other problems. This corpus plays an important role in
668 | directing the fuzzer to relevant areas of the program and increasing the probability of detecting
669 | vulnerabilities or unexpected behaviors. By providing a diverse and representative set of initial
670 | inputs, the seed corpus helps the fuzzer explore different paths in the target faster and thereby
671 | increases coverage.
672 | 
673 | When it comes to functions parsing network data, these inputs can be created by using Wireshark to
674 | record different packets.
675 | 
676 | For the function, `httpd_parse_main` four different corpora were created. Each targeting different
677 | paths in the binary. One example is the login request, which contains the username and password. For
678 | this corpus, the harness had to be modified because TP-Link uses (weak) cryptography to "protect"
679 | the password. For this, the password is encrypted in the browser using AES and then decrypted on the
680 | backend.  Whereby the password is generated in the browser and then encrypted using RSA. Then the
681 | encrypted data is signed. Because a fuzzer can not create a signature or encrypt data, some
682 | functions were overwritten and now just decodes the data from base64. For this, the data were first
683 | extracted in plaintext from the browser using the debugger shown in [Figure 4](#f4).
684 | 
685 | <div id="f4"></div>
686 | 
687 | <figure>
688 |   <p><img src="blackbox-fuzzing/firefox.png" style="width:95.0%" /></p>
689 | <figcaption><p style="text-align: center">Figure 4: Extracting the data bevor encryption.</p></figcaption>
690 | </figure>
691 | 
692 | In the target, the function `rsa_tmp_decrypt_bypart` was then overwritten to replace the logic from
693 | decrypting the data to just decoding from base64.
694 | 
695 | ```c
696 | // Replacing the logic with b64_decode
697 | int rsa_tmp_decrypt_bypart(uint8_t *input, int input_len, uint8_t *output) { // other params just key data
698 |   int (*b64_decode)(uint8_t *, int, uint8_t *, int) = (void *) 0x0040bf00;
699 |   b64_decode(output, 0x1000, input, input_len);
700 |   int * seqnumber = (int *) 0x00444db0;
701 |   *seqnumber = 0x3ac28e29-input_len+12;
702 |   return 0; // says it was okay
703 | }
704 | ```
705 | <p style="text-align: center">Code 12: Function rsa_tmp_decrypt_bypart now just decodes base64
706 | instead of decrypt the data.</p>
707 | 
708 | While executing the corpus, the target function always returns an HTML document with the error "408
709 | Request Timeout". Using Ghidra and GDB the problem could be identified. The error always happens
710 | after the function call to `http_stream_fgets`. The problematic line was the check for the line
711 | break character `\n`.
712 | 
713 | ```c
714 | if (((cVar1 == '\n') && (param_3 < pcVar4)) && (pcVar4[-1] == '\r')) {
715 | ```
716 | 
717 | This condition enforces that after every line break, a carriage return must follow. After adding the
718 | carriage return, all the created corpora worked.
719 | 
720 | ### Fuzz the target
721 | 
722 | In the last section, we developed multiple harnesses and executed them using QEMU. In this section,
723 | QEMU is replaced by AFL++ which gets the generated corpora as seed input to fuzz the target
724 | function. In the section "Fuzzing environment" a docker image was created that already pulls AFL++
725 | from GitHub and then uses an AFL++-provided script to build a patched version of QEMU. So AFL++ can
726 | now be started with the following command that gets different parameters, like `-Q` which tells
727 | AFL++ to use the patched version of QEMU.
728 | 
729 | ```sh
730 | QEMU_LD_PREFIX=/share/root AFL_PRELOAD=/share/root/httpd_parser_main.o \
731 |   /AFLplusplus/afl-fuzz -Q \
732 |   -i /share/root/corpus/httpd/ -o /share/afl-out/httpd/ \
733 |   -- /share/root/usr/bin/httpd @@
734 | ```
735 | <p style="text-align: center">Code 13: Fuzzing the binary <code>httpd</code> using the harness
736 | and <code>afl-fuzz</code>.</p>
737 | 
738 | Unlike before, the command `chroot` is no longer necessary and is replaced by the variable
739 | `QEMU_LD_PREFIX`. Which tells QEMU where to search for shared objects. Also, the `LD_PRELOAD`
740 | variable is replaced by the AFL-specific version `AFL_PRELOAD`. The last argument in the command is
741 | the two `@` characters. They will be replaced by AFL++ with a file path that holds the fuzzing
742 | input. When started, AFL++ shows the progress using the terminal UI shown in [Figure 5](#f5).
743 | 
744 | <div id="f5"></div>
745 | <figure>
746 | <p><img src="blackbox-fuzzing/afl++.png" style="width:95.0%" /></p>
747 | <figcaption><p style="text-align: center">Figure 5: The status screen from AFL++.</p></figcaption>
748 | </figure>
749 | 
750 | The `AFL++` status screen provides essential insights into the current fuzzing process. The docs of
751 | `AFL++` have a nice overview of the terms used in the status screen
752 | [\[afl-screen\]](https://aflplus.plus/docs/status_screen/).  When debugging the corpus with the
753 | following environment variables, the UI can be disabled and with `AFL_DEBUG` a detailed logging
754 | enabled, which shows the current fuzzer input and the `stdout` from the target program.
755 | 
756 | ```sh
757 | export AFL_DEBUG=1 && export AFL_NO_UI=1
758 | unset AFL_DEBUG && unset AFL_NO_UI
759 | ```
760 | 
761 | As shown in [Figure 5](#f5) fuzzing a binary can take quite some time. According to the docs it
762 | "should be expected to run for days or weeks" and "some jobs will be allowed to run for months." To
763 | improve the time needed, the exec speed should be above 100 execs/sec. When, for example, the target
764 | `httpd_main_parser` was fuzzed, the exec speed was at the beginning by around 30/sec. To improve the
765 | speed, the target binary was searched for suspect functions, which are probably the cause of the
766 | slowdown. One of the suspect functions was `rsa_gdpr_generate_key` because generating an RSA key is
767 | known to be slow. After overwriting the function, the speed improved to 600 executions per second.
768 | 
769 | One indicator that helps indicate when to stop fuzzing is the cycle counter. AFL++ will highlight
770 | the number in green when "the fuzzer has not been seeing any action for a longer while," which helps
771 | to make the call to stop the fuzzer.
772 | 
773 | But the most interesting number is probably "total crashes". This shows when the program crashes
774 | because of the current fuzzing input and is probably a memory-related bug. To verify that this is a
775 | real bug `gdb` can be used again to find the position of the bug.
776 | 
777 | ## Conclusion
778 | 
779 | Fuzzing may be the most effective way to find security vulnerabilities.  In this term paper, three
780 | different functions were fuzzed, but none were found. While the black box fuzzing setup itself is
781 | not that complex and time-consuming, finding a potent target and developing a working harness are.
782 | Most of the time, the harness has to be debugged, and then the underlining logic in the binary must
783 | be reversed, which again consumes a long time.
784 | 


--------------------------------------------------------------------------------
/blackbox-fuzzing/afl++.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/otsmr/blackbox-fuzzing/ea8a7ca767f41c078de61dd96667dba755f73445/blackbox-fuzzing/afl++.png


--------------------------------------------------------------------------------
/blackbox-fuzzing/chatgpt.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/otsmr/blackbox-fuzzing/ea8a7ca767f41c078de61dd96667dba755f73445/blackbox-fuzzing/chatgpt.png


--------------------------------------------------------------------------------
/blackbox-fuzzing/firefox.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/otsmr/blackbox-fuzzing/ea8a7ca767f41c078de61dd96667dba755f73445/blackbox-fuzzing/firefox.png


--------------------------------------------------------------------------------
/blackbox-fuzzing/ghidra_search.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/otsmr/blackbox-fuzzing/ea8a7ca767f41c078de61dd96667dba755f73445/blackbox-fuzzing/ghidra_search.png


--------------------------------------------------------------------------------
/blackbox-fuzzing/ghidra_tmpd.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/otsmr/blackbox-fuzzing/ea8a7ca767f41c078de61dd96667dba755f73445/blackbox-fuzzing/ghidra_tmpd.png


--------------------------------------------------------------------------------
/blackbox-fuzzing/tmpd_function_list.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/otsmr/blackbox-fuzzing/ea8a7ca767f41c078de61dd96667dba755f73445/blackbox-fuzzing/tmpd_function_list.png


--------------------------------------------------------------------------------
/corpus/add_carriage_return.py:
--------------------------------------------------------------------------------
 1 | def add_carriage_return(file):
 2 | 
 3 |     lines = []
 4 |     with open(file, 'r') as f:
 5 |         lines = f.readlines()
 6 | 
 7 |     replace = True
 8 |     with open(file, 'w') as modified:
 9 |         for line in lines:
10 |             if line == "\n":
11 |                 replace = True
12 |             if replace:
13 |                 line = line.rstrip('\n')  # Remove existing newline characters
14 |                 line = line.rstrip('\r')  # Remove existing newline characters
15 |                 modified.write(line + '\r\n')  # Add '\r\n' after each line
16 |             else:
17 |                 modified.write(line)  # Add '\r\n' after each line
18 | 
19 |     print("OK")
20 | 
21 | 
22 | import sys
23 | 
24 | if len(sys.argv) < 2:
25 |     print("Usage: .py file.txt")
26 |     exit(1)
27 | 
28 | add_carriage_return(sys.argv[1])
29 | 
30 | 


--------------------------------------------------------------------------------
/corpus/httpd/abc.txt:
--------------------------------------------------------------------------------
 1 | GET / HTTP/1.1
 2 | Host: tplinklogin.net
 3 | Content-Type: text/html; charset=utf-8
 4 | Connection: close
 5 | Content-Type: multipart/form-data; boundary=XXXXXXXXXXXXX;
 6 | Cookie: JSESSIONID=deleted
 7 | Referer: http:://exmaple.org
 8 | User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/118.0
 9 | Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8
10 | Accept-Language: de,en-US;q=0.7,en;q=0.3
11 | Accept-Encoding: gzip, deflate
12 | 
13 | 
14 | 


--------------------------------------------------------------------------------
/corpus/httpd/cgi.txt:
--------------------------------------------------------------------------------
 1 | POST /cgi?1&1&1&8 HTTP/1.1
 2 | Host: localhost:8000
 3 | User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/118.0
 4 | Accept: */*
 5 | Accept-Language: de,en-US;q=0.7,en;q=0.3
 6 | Accept-Encoding: gzip, deflate, br
 7 | Content-Type: text/plain
 8 | Content-Length: 248
 9 | Origin: http://localhost:8000
10 | DNT: 1
11 | Connection: keep-alive
12 | Referer: http://localhost:8000/mainFrame.htm
13 | Sec-Fetch-Dest: empty
14 | Sec-Fetch-Mode: cors
15 | Sec-Fetch-Site: same-origin
16 | Sec-GPC: 1
17 | Pragma: no-cache
18 | Cache-Control: no-cache
19 | 
20 | [IGD_DEV_INFO#0,0,0,0,0,0#0,0,0,0,0,0]0,4
21 | modelName
22 | description
23 | X_TP_isFD
24 | X_TP_ProductVersion
25 | [ETH_SWITCH#0,0,0,0,0,0#0,0,0,0,0,0]1,1
26 | numberOfVirtualPorts
27 | [MULTIMODE#0,0,0,0,0,0#0,0,0,0,0,0]2,1
28 | mode
29 | [/cgi/info#0,0,0,0,0,0#0,0,0,0,0,0]3,0
30 | 


--------------------------------------------------------------------------------
/corpus/httpd/gdpr.txt:
--------------------------------------------------------------------------------
 1 | POST /cgi_gdpr HTTP/1.1
 2 | Host: localhost:8000
 3 | User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/118.0
 4 | Accept: */*
 5 | Accept-Language: de,en-US;q=0.7,en;q=0.3
 6 | Accept-Encoding: gzip, deflate, br
 7 | Content-Type: text/plain
 8 | Content-Length: 43
 9 | Origin: http://localhost:8000
10 | DNT: 1
11 | Connection: keep-alive
12 | Referer: http://localhost:8000/mainFrame.htm
13 | Sec-Fetch-Dest: empty
14 | Sec-Fetch-Mode: cors
15 | Sec-Fetch-Site: same-origin
16 | Sec-GPC: 1
17 | Pragma: no-cache
18 | Cache-Control: no-cache
19 | 
20 | sign=a2V5PTQ3OTg3MzE5NTY5NjU2ODkmaXY9NzUxOTE1NDI5NjkwMTYyMCZoPWY2ZmRmZmU0OGM5MDhkZWIwZjRjM2JkMzZjMDMyZTcyJnM9OTg1ODI4OTA1
21 | data=W01VTFRJTU9ERSMwLDAsMCwwLDAsMCMwLDAsMCwwLDAsMF0wLDA=
22 | 
23 | 


--------------------------------------------------------------------------------
/corpus/httpd/get_param.txt:
--------------------------------------------------------------------------------
 1 | POST /cgi?8 HTTP/1.1
 2 | Host: localhost:8000
 3 | User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/118.0
 4 | Accept: */*
 5 | Accept-Language: de,en-US;q=0.7,en;q=0.3
 6 | Accept-Encoding: gzip, deflate, br
 7 | Content-Type: text/plain
 8 | Content-Length: 43
 9 | Origin: http://localhost:8000
10 | DNT: 1
11 | Connection: keep-alive
12 | Referer: http://localhost:8000/frame/login.htm
13 | Sec-Fetch-Dest: empty
14 | Sec-Fetch-Mode: cors
15 | Sec-Fetch-Site: same-origin
16 | Sec-GPC: 1
17 | Pragma: no-cache
18 | Cache-Control: no-cache
19 | 
20 | [/cgi/getParm#0,0,0,0,0,0#0,0,0,0,0,0]0,0
21 | 


--------------------------------------------------------------------------------
/corpus/httpd/login_base64.txt:
--------------------------------------------------------------------------------
 1 | POST /cgi_gdpr HTTP/1.1
 2 | Host: localhost:8000
 3 | User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/118.0
 4 | Accept: */*
 5 | Accept-Language: de,en-US;q=0.7,en;q=0.3
 6 | Accept-Encoding: gzip, deflate, br
 7 | Content-Type: text/plain
 8 | Content-Length: 378
 9 | Origin: http://localhost:8000
10 | DNT: 1
11 | Connection: keep-alive
12 | Referer: http://localhost:8000/frame/login.htm
13 | Sec-Fetch-Dest: empty
14 | Sec-Fetch-Mode: cors
15 | Sec-Fetch-Site: same-origin
16 | Sec-GPC: 1
17 | Pragma: no-cache
18 | Cache-Control: no-cache
19 | 
20 | sign=a2V5PTQ3OTg3MzE5NTY5NjU2ODkmaXY9NzUxOTE1NDI5NjkwMTYyMCZoPWY2ZmRmZmU0OGM5MDhkZWIwZjRjM2JkMzZjMDMyZTcyJnM9OTg1ODI4OTA1
21 | data=OA0KWy9jZ2kvbG9naW4jMCwwLDAsMCwwLDAjMCwwLDAsMCwwLDBdMCwyDQp1c2VybmFtZT1hZG1pbg0KcGFzc3dvcmQ9YWRtaW4NCg==
22 | 


--------------------------------------------------------------------------------
/corpus/notify.txt:
--------------------------------------------------------------------------------
 1 | NOTIFY * HTTP/1.1
 2 | HOST: 239.255.255.250:1900
 3 | CACHE-CONTROL: max-age=100
 4 | LOCATION: http://192.168.1.100:8080/description.xml
 5 | NT: urn:schemas-upnp-org:device:MediaServer:1
 6 | NTS: ssdp:alive
 7 | SERVER: MyUPnP/1.0 UPnP/1.1 MiniUPnPd/1.4
 8 | USN: uuid:3d9f1766-31fd-49e5-9618-c0de74d9e461::urn:schemas-upnp-org:device:MediaServer:1
 9 | BOOTID.UPNP.ORG: 1
10 | CONFIGID.UPNP.ORG: 1337
11 | 


--------------------------------------------------------------------------------
/corpus/simple.txt.bck:
--------------------------------------------------------------------------------
1 | GET /main HTTP/1.1
2 | Host: tplink.net
3 | Set-Cookie: JSESSIONID=deleted; Expires=Thu, 01 Jan 1970 00:00:01 GMT; Path=/; HttpOnly
4 | Content-Type: text/html; charset=utf-8
5 | Connection: close
6 | 


--------------------------------------------------------------------------------
/gdb_hook_init.gdb:
--------------------------------------------------------------------------------
 1 | set solib-absolute-prefix /share/root/
 2 | 
 3 | # Show disas when stepping
 4 | 
 5 | file /share/root/usr/bin/httpd
 6 | target remote :1234
 7 | # break bevor fuzz target is called
 8 | # break __uClibc_main
 9 | # break http_parser_main
10 | # break *(__uClibc_main+148)
11 | 
12 | # break at parser_parse
13 | # c
14 | display/4i $pc
15 | 
16 | # b *0x405f4c
17 | b *0x40f3d0
18 | 
19 | # http_init_main
20 | # b *0x004034f8
21 | # b *0x00403684
22 | # debug http_stream_fgets
23 | # b *0x0040787c
24 | 
25 | # echo "source /share/fuzzing/gdb_hook_init.gdb" > ~/.gdbinit
26 | 


--------------------------------------------------------------------------------
/harnesses/http_recv_hook.c:
--------------------------------------------------------------------------------
 1 | #include <fcntl.h>
 2 | #include <stdio.h>
 3 | #include <stdint.h>
 4 | #include <unistd.h>
 5 | #include <stdlib.h>
 6 | 
 7 | /* int http_RecvMessage(int socket,request *buffer,int is_first,int timeout?,int *out_statuscode); */
 8 | 
 9 | void __uClibc_main(void *main, int argc, char** argv)
10 | {
11 |   /* char req[4096 + 1]; */
12 |   /* int32_t in_addr; */
13 | 
14 |   // http_RecvMessage
15 |   int socket = 0;
16 |   uint8_t * buffer[240];
17 | 
18 |   // make sure parser_request_init is called
19 |   int is_first = 8;
20 | 
21 |   int timeout = 0;
22 |   int statuscode = 400;
23 | 
24 |   // Create function pointer to the fuzz target
25 |   int (*http_RecvMessage)(int, void *, int, int, int*) = (void *) 0x00413178;
26 | 
27 |   // Call the function
28 |   printf("Calling fuzz target");
29 |   int status = http_RecvMessage(socket, buffer, is_first, timeout, &statuscode);
30 |   printf("Response is %d", status);
31 | 
32 |   exit(0);
33 | }
34 | 


--------------------------------------------------------------------------------
/harnesses/httpd_parser_main.c:
--------------------------------------------------------------------------------
  1 | 
  2 | #include <fcntl.h>
  3 | #include <stdio.h>
  4 | #include <stdint.h>
  5 | #include <unistd.h>
  6 | /* #include <stdlib.h> */
  7 | 
  8 | typedef struct
  9 | {
 10 |   int _a;
 11 |   int _b;
 12 |   int socket;
 13 |   int ip;
 14 |   int mac;
 15 |   unsigned char body[0x1004];
 16 |   int ptr;
 17 |   FILE *socket_out;
 18 | } HttpMainT;
 19 | 
 20 | // Store this global
 21 | HttpMainT fuzzInput;
 22 | 
 23 | int rdp_getObj(int, char *id, char *output, char *input)
 24 | {
 25 |   if (strstr(input, "adminPwd") != 0)
 26 |   {
 27 |     char adminData[] = "adminName=admin\nadminPwd=admin\n\x00";
 28 |     memcpy(input, adminData, sizeof(adminData));
 29 |   }
 30 |   else
 31 |   {
 32 |     printf("rdp_getObj is called with: %s to get %s\n", id, input);
 33 |   }
 34 |   return 0;
 35 | }
 36 | 
 37 | int rdp_getObjStruct(char *id, char *output, char *input)
 38 | {
 39 |   /* printf("rdp_getObjStruct is called with: %s to get %s\n", id); */
 40 |   return 0;
 41 | }
 42 | 
 43 | // slows afl-fuzz significantly down
 44 | // exex speed from 30/s up to 500/s
 45 | void rsa_gdpr_generate_key()
 46 | {
 47 | }
 48 | 
 49 | int aes_tmp_decrypt_buf_nopadding_new(void *input, void *output, int *len, int _key, int _iv)
 50 | {
 51 |   printf("AES:DEC: %p %p %p\n", input, output, len);
 52 |   if (*len > 0 && *len < 0x1000)
 53 |   {
 54 |     strncpy(output, input, *len);
 55 |   }
 56 |   return 0; // OK
 57 | }
 58 | 
 59 | // Replacing the logic with b64_decode
 60 | int rsa_tmp_decrypt_bypart(uint8_t *input, int input_len, uint8_t *output)
 61 | { // other params just key data
 62 | 
 63 |   /* int b64_decode(byte *out,int max_out_len,byte *input,size_t input_len) */
 64 |   int (*b64_decode)(uint8_t *, int, uint8_t *, int) = (void *)0x0040bf00;
 65 |   b64_decode(output, 0x1000, input, input_len);
 66 | 
 67 |   int *seqnumber = (int *)0x00444db0;
 68 |   *seqnumber = 0x3ac28e29 - input_len + 12;
 69 | 
 70 |   return 0; // says it was okay
 71 | }
 72 | 
 73 | int rdp_netChkIpInAllLanSubnet()
 74 | {
 75 |   // must be wrapped will crash
 76 |   return 1; // must be one
 77 | }
 78 | 
 79 | int select(int, void *)
 80 | {
 81 |   // called at the end of http_init_main
 82 |   // blocks
 83 | 
 84 |   int status = http_parser_main(&fuzzInput);
 85 | 
 86 |   printf("\nResponse is %d\n", status);
 87 | 
 88 |   exit(0);
 89 | }
 90 | 
 91 | void __uClibc_main(void *main, int argc, char **argv)
 92 | {
 93 | 
 94 |   // Verify that a filename is provided
 95 |   if (argc != 2)
 96 |   {
 97 |     printf("No input file provided\n");
 98 |     exit(1);
 99 |   }
100 | 
101 |   // Open the fuzz input file
102 |   int fuzzer_fd = open(argv[1], O_RDONLY);
103 | 
104 |   // Harness
105 | 
106 |   // Create function pointer to the fuzz target
107 |   int (*http_parser_main)(void *) = (void *)0x00405ee8;
108 |   void (*http_init_main)() = (void *)0x004034f8;
109 | 
110 |   fuzzInput.socket = fuzzer_fd;
111 |   fuzzInput.socket_out = stdout;
112 | 
113 |   /* *((unsigned int *)0x4060d0) = 0x00; */
114 |   /* set *(unsigned char*)0x80FFDDEE = 0x90 */
115 | 
116 |   /* uint8_t parsed_data[220]; // memset in parser_request_init and size of struct in ghidra */
117 |   http_init_main();
118 | 
119 |   // Fuzzing start  is in select
120 | 
121 |   exit(0);
122 | }
123 | 


--------------------------------------------------------------------------------
/harnesses/parser_parse_hook.c:
--------------------------------------------------------------------------------
 1 | #include <fcntl.h>
 2 | #include <stdio.h>
 3 | #include <stdint.h>
 4 | #include <unistd.h>
 5 | #include <stdlib.h>
 6 | 
 7 | /* int http_RecvMessage(int socket,request *buffer,int is_first,int timeout?,int *out_statuscode); */
 8 | 
 9 | void __uClibc_main(void *main, int argc, char** argv)
10 | {
11 | 
12 |   // Verify that a filename is provided
13 |   if (argc != 2) {
14 |     printf("No input file provided\n");
15 |     exit(1);
16 |   }
17 | 
18 | 
19 |   // Create function pointer to the fuzz target
20 |   int (*parser_request_init)(void *, int) = (void *) 0x00412564;
21 |   int (*parser_append)(void *, void *, int) = (void *) 0x00412e98;
22 | 
23 | 
24 |   // Open the fuzz input file
25 |   int fd = open(argv[1], O_RDONLY);
26 |   char fuzz_buf[2048 + 1];
27 | 
28 |   int fuzz_buf_len = read(fd, fuzz_buf, sizeof(fuzz_buf) - 1);
29 |   if (fuzz_buf_len < 0) {
30 |     printf("Error reading\n");
31 |     exit(1);
32 |   }
33 |   fuzz_buf[fuzz_buf_len] = 0;
34 | 
35 | 
36 |   uint8_t parsed_data[220]; // memset in parser_request_init and size of struct in ghidra
37 | 
38 |   // Initialize the parser
39 |   parser_request_init(parsed_data, 8);
40 | 
41 |   // socket_read max: 2048
42 | 
43 |   int status = parser_append(parsed_data, fuzz_buf, fuzz_buf_len);
44 | 
45 |   printf("Response is %d\n", status);
46 | 
47 |   exit(0);
48 | }
49 | 


--------------------------------------------------------------------------------
/safl_fuzz_httpd.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | export AFL_AUTORESUME=1
 3 | # export AFL_DEBUG=1 && export AFL_NO_UI=1
 4 | unset AFL_DEBUG && unset AFL_NO_UI
 5 | 
 6 | 
 7 | QEMU_LD_PREFIX=/share/root AFL_PRELOAD=/share/root/httpd_parser_main.o \
 8 |   /AFLplusplus/afl-fuzz -Q \
 9 |   -i /share/root/corpus/httpd/ \
10 |   -o /share/afl-out/httpd/ \
11 |   -- /share/root/usr/bin/httpd @@
12 | 
13 | # chroot root /qemu-mipsel-static -E LD_PRELOAD=/httpd_parser_main.o /usr/bin/httpd /corpus/httpd/simple.txt
14 | 


--------------------------------------------------------------------------------
/sdebug_httpd.sh:
--------------------------------------------------------------------------------
 1 | # chroot root /qemu-mipsel-static -E LD_PRELOAD=/parser_parse_hook.o /usr/bin/wscd
 2 | # mipsel-linux-gnu-gcc fuzzing/parser_parse_hook.c -o root/parser_parse_hook.o -shared -fPIC
 3 | 
 4 | hook=httpd_parser_main
 5 | corpus_file="/corpus/httpd/login_base64.txt"
 6 | # corpus_file="/corpus/crashes/01"
 7 | target=/usr/bin/httpd
 8 | 
 9 | ## https -> parser_main
10 | mipsel-linux-gnu-gcc fuzzing/$hook.c -o root/$hook.o -shared -fPIC
11 | 
12 | # execute using debugger
13 | kill -9 $(pidof qemu-x86_64)
14 | 
15 | # Check if the specified folder exists
16 | if [ ! -f "root/$corpus_file" ]; then
17 |   echo "corpus_file not found!"
18 |   exit 1
19 | fi
20 | 
21 | chroot root /qemu-mipsel-static -E LD_PRELOAD="/$hook.o" $target "$corpus_file"
22 | # chroot root /qemu-mipsel-static -strace -g 1234 -E LD_PRELOAD="/$hook.o" $target "$corpus_file"
23 | 
24 | 


--------------------------------------------------------------------------------
/srun_dir_httpd.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | # chroot root /qemu-mipsel-static -E LD_PRELOAD=/parser_parse_hook.o /usr/bin/wscd
 4 | # mipsel-linux-gnu-gcc fuzzing/parser_parse_hook.c -o root/parser_parse_hook.o -shared -fPIC
 5 | 
 6 | hook=httpd_parser_main
 7 | corpus_dir="root/corpus/httpd"
 8 | target=/usr/bin/httpd
 9 | 
10 | ## https -> parser_main
11 | mipsel-linux-gnu-gcc fuzzing/$hook.c -o root/$hook.o -shared -fPIC
12 | 
13 | 
14 | # execute using debugger
15 | # kill -9 $(pidof qemu-x86_64)
16 | # chroot root /qemu-mipsel-static -strace -g 1234 -E LD_PRELOAD=/httpd_parser_main.o /usr/bin/httpd /httpd_corpus.txt
17 | 
18 | # Check if the specified folder exists
19 | if [ ! -d "$corpus_dir" ]; then
20 |   echo "Folder not found!"
21 |   exit 1
22 | fi
23 | 
24 | # Loop through each file in the folder
25 | for file in "$corpus_dir"/*; do
26 |   if [ -f "$file" ]; then
27 |     file=$(echo "$file" | sed 's/root\///')
28 |     printf "\n---- $file ----\n\n"
29 |     chroot root /qemu-mipsel-static -E LD_PRELOAD="/$hook.o" $target "$file"
30 |   fi
31 | done
32 | 
33 | 


--------------------------------------------------------------------------------