├── .gitignore ├── README.md ├── experiments ├── crazy_alloc.c ├── multi_page_alloc.c └── simple_alloc.c └── pagetables ├── Makefile ├── pagetables-usr.c └── pagetables.c /.gitignore: -------------------------------------------------------------------------------- 1 | # 'Hidden' files, modulo this one. 2 | .* 3 | !.gitignore 4 | 5 | # Generate files 6 | *.order 7 | *.symvers 8 | *.mod.* 9 | *.o 10 | 11 | # Binaries 12 | *.ko 13 | pagetables/pagetables 14 | experiments/multi_page_alloc 15 | experiments/crazy_alloc 16 | experiments/simple_alloc 17 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Linux VM Hacks 2 | 3 | This repo is a place for experimental code and patches targeting linux 4.6 4 | designed to help me better understand the linux VM subsystem. 5 | 6 | This project's sister repo, [linux-vm-notes][vm-notes] contains notes on the 7 | subsystem. 8 | 9 | ## experiments 10 | 11 | Contains userland experiments for exploring the linux VM. 12 | 13 | ## pagetables 14 | 15 | Pagetables is a kernel module which outputs the precise PGD/PUD/PMD/PTE page 16 | table contents for a given process. 17 | 18 | It differs from `/proc//pagemap` ([doc page][page-map]) in that pagemap is 19 | vastly more useful :) it allows a process to map between virtual and physical 20 | pages without reference to individual page tables whereas `pagetables` is 21 | designed to expose these details. 22 | 23 | Additionally, `pagetables` exposes kernel mappings. 24 | 25 | ### Building 26 | 27 | ``` 28 | $ cd pagetables 29 | $ make 30 | ``` 31 | 32 | ### Usage 33 | 34 | __WARNING:__ Don't use this with a kernel you care about. It's experimental and 35 | I've probably made horrific mistakes which will result in data/hair/firstborn 36 | loss. 37 | 38 | __EVEN MORE SERIOUS WARNING:__ This module is a security nightmare and exposes 39 | sensitive data, including kernel mappings and the mappings of any specified 40 | process. You've been warned! 41 | 42 | ``` 43 | $ cd pagetables 44 | $ sudo insmod pagetables.ko 45 | $ sudo ./pagetables 46 | ``` 47 | 48 | ## License 49 | 50 | All code here is licensed under [GPL v2][gpl-v2] to remain compatible with the 51 | kernel itself. 52 | 53 | [vm-notes]:https://github.com/lorenzo-stoakes/linux-vm-notes 54 | [page-map]:https://github.com/torvalds/linux/blob/v4.6/Documentation/vm/pagemap.txt 55 | [gpl-v2]:http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html 56 | -------------------------------------------------------------------------------- /experiments/crazy_alloc.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include 3 | #include 4 | 5 | #define SIZE (1UL * 1024 * 1024 * 1024 * 8) 6 | 7 | int main(void) 8 | { 9 | if (!malloc(SIZE)) { 10 | perror("malloc"); 11 | return EXIT_FAILURE; 12 | } 13 | 14 | return EXIT_SUCCESS; 15 | } 16 | -------------------------------------------------------------------------------- /experiments/multi_page_alloc.c: -------------------------------------------------------------------------------- 1 | /* 2 | * NOTE: Assumes x86-64, 4KiB pages. 3 | * 4 | * Allocates some memory, then touches a page, then touches other pages in order 5 | * to allow observation of the changes of page table allocation + demand 6 | * paging. 7 | * 8 | * Blocks waiting for input at each case. 9 | */ 10 | 11 | #define _GNU_SOURCE 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #define PAGE_SIZE 4096 18 | 19 | #define SIZE (PAGE_SIZE * 11) 20 | 21 | #if SIZE < PAGE_SIZE*7 22 | #error SIZE needs to be at least 7 pages long. 23 | #endif 24 | 25 | #define PGDIR_SHIFT 39 26 | #define PUD_SHIFT 30 27 | #define PMD_SHIFT 21 28 | #define PAGE_SHIFT 12 29 | 30 | static void block(void) 31 | { 32 | getchar(); 33 | } 34 | 35 | static void print_addr(unsigned char *ptr) 36 | { 37 | unsigned long pgd_ind, pud_ind, pmd_ind, pte_ind; 38 | unsigned long addr = (unsigned long)ptr; 39 | 40 | printf("Address = %016lx\n", addr); 41 | 42 | pgd_ind = addr>>PGDIR_SHIFT; 43 | addr &= ((1UL<>PUD_SHIFT; 46 | addr &= ((1UL<>PMD_SHIFT; 49 | addr &= ((1UL<>PAGE_SHIFT; 52 | addr &= ((1UL< 3 | #include 4 | #include 5 | 6 | #define PAGES 1025 7 | #define ADDITIONAL 69 8 | #define STACK_COUNT 7000 9 | 10 | static void stack_alloc_recur(int count) 11 | { 12 | char dummy[1024]; 13 | 14 | if (count <= 0) 15 | return; 16 | 17 | dummy[0] = 'x'; 18 | 19 | stack_alloc_recur(count-1); 20 | 21 | dummy[1023] = dummy[0]; 22 | } 23 | 24 | static void stack_alloc(void) 25 | { 26 | stack_alloc_recur(STACK_COUNT); 27 | } 28 | 29 | static void heap_alloc(void) 30 | { 31 | long i; 32 | long page_size = sysconf(_SC_PAGESIZE); 33 | long bytes = PAGES*page_size + ADDITIONAL; 34 | unsigned char *buf = malloc(bytes); 35 | 36 | for (i = 0; i < bytes; i++) 37 | buf[i] = 'x'; 38 | 39 | free(buf); 40 | } 41 | 42 | int main(void) 43 | { 44 | heap_alloc(); 45 | stack_alloc(); 46 | 47 | return EXIT_SUCCESS; 48 | } 49 | -------------------------------------------------------------------------------- /pagetables/Makefile: -------------------------------------------------------------------------------- 1 | KERNEL_TREE_PATH?=/lib/modules/$(shell uname -r)/build 2 | 3 | obj-m += pagetables.o 4 | 5 | all: pagetables pagetables.ko 6 | 7 | pagetables: pagetables-usr.c 8 | $(CC) -g --std=gnu89 -pedantic -Wall -Wextra -Werror -o $@ $^ 9 | 10 | pagetables.ko: pagetables.c 11 | make -C $(KERNEL_TREE_PATH) M=$(PWD) modules 12 | 13 | clean: 14 | rm -f pagetables 15 | make -C $(KERNEL_TREE_PATH) M=$(PWD) clean 16 | 17 | .PHONY: all clean 18 | -------------------------------------------------------------------------------- /pagetables/pagetables-usr.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #if __SIZEOF_POINTER__ != 8 9 | #error 64-bit only, sorry. 10 | #endif 11 | 12 | /* Kinda assuming x86-64 here. */ 13 | #define _PAGE_PRESENT (1UL<<0) 14 | #define _PAGE_RW (1UL<<1) 15 | #define _PAGE_USER (1UL<<2) 16 | #define _PAGE_ACCESSED (1UL<<5) 17 | #define _PAGE_DIRTY (1UL<<6) 18 | #define _PAGE_PSE (1UL<<7) 19 | #define _PAGE_GLOBAL (1UL<<8) 20 | #define _PAGE_NX (1UL<<63) 21 | #define PTRS_PER_PGD 512 22 | #define PTRS_PER_PUD 512 23 | #define PTRS_PER_PMD 512 24 | #define PTRS_PER_PTE 512 25 | #define PGDIR_SHIFT 39 26 | #define PUD_SHIFT 30 27 | #define PMD_SHIFT 21 28 | #define PAGE_SHIFT 12 29 | #define PAGE_SIZE (1UL< 1024 && suffix_ind < 5) { 169 | suffix_ind++; 170 | bytesf /= 1024; 171 | } 172 | 173 | printf("%6.1f %s", bytesf, human_suffix[suffix_ind]); 174 | } 175 | 176 | static void print_bin(unsigned long val, int len) 177 | { 178 | int i; 179 | char buf[65]; 180 | 181 | buf[len] = '\0'; 182 | for (i = 0; i < len; i++) 183 | buf[len-i-1] = val&(1UL<= sizeof(buf)) { 217 | fprintf(stderr, 218 | "pagetables: sync_vaddr: attempted write %d, >= %lu.\n", 219 | len, sizeof(buf)); 220 | exit(1); 221 | } 222 | 223 | if (fwrite(buf, 1, len, file) != len) { 224 | fprintf(stderr, "pagetables: write error at %s\n", VADDR_PATH); 225 | exit(1); 226 | } 227 | 228 | fclose(file); 229 | } 230 | 231 | static void update_sync_vaddr(enum pgtable_level level, int index) 232 | { 233 | update_vaddr(level, index); 234 | sync_vaddr(); 235 | } 236 | 237 | static void print_entry(int index, enum pgtable_level level, unsigned long entry) 238 | { 239 | unsigned long phys_addr, flags, present; 240 | int count = level_size[level]; 241 | unsigned long phys_addr_mask = (~(count - 1)) & MAX_PHYS_MASK; 242 | unsigned long flags_mask = ~phys_addr_mask; 243 | 244 | phys_addr = entry&phys_addr_mask; 245 | flags = entry&flags_mask; 246 | present = flags&_PAGE_PRESENT; 247 | 248 | print_indent(level); 249 | printf("%03d ", index < 0 ? -index : index); 250 | 251 | if (index < 0) { 252 | printf(" \n"); 253 | return; 254 | } 255 | 256 | if (!present) 257 | printf(" "); 258 | else 259 | printf("%016lx ", phys_addr); 260 | 261 | print_bin(flags, FLAGS_MIN_BITS); 262 | 263 | if (flags&_PAGE_NX) 264 | printf(" NX"); 265 | if (flags&_PAGE_PSE) 266 | printf(" H"); 267 | 268 | printf("\n"); 269 | } 270 | 271 | static void update_pte_counts(unsigned long entry) 272 | { 273 | int i; 274 | 275 | for (i = 0; i < FLAG_COUNT; i++) 276 | if (entry&flag_mapping[i]) 277 | pte_count[i]++; 278 | } 279 | 280 | static void update_stats(enum pgtable_level level, unsigned long entry, int valid) 281 | { 282 | int huge = entry&_PAGE_PSE; 283 | 284 | if (!(entry&_PAGE_PRESENT)) 285 | return; 286 | 287 | if (!valid) { 288 | invalid_entry_count++; 289 | return; 290 | } 291 | 292 | if (huge) { 293 | switch (level) { 294 | case PUD_LEVEL: 295 | gigantic_page_count++; 296 | 297 | break; 298 | case PMD_LEVEL: 299 | huge_page_count++; 300 | 301 | break; 302 | default: 303 | fprintf(stderr, "WARNING: Unrecognised huge page at %s level\n", 304 | level_name[level]); 305 | 306 | break; 307 | } 308 | } else { 309 | /* Each entry is a page of the next level. */ 310 | page_count[level+1]++; 311 | } 312 | 313 | if (level == PTE_LEVEL || huge) 314 | update_pte_counts(entry); 315 | } 316 | 317 | static void print_pagetable(enum pgtable_level level) 318 | { 319 | int i, start = 0; 320 | unsigned long entry; 321 | int count = level_size[level]; 322 | 323 | FILE *file = fopen(level_path[level], "r"); 324 | 325 | if (!file) { 326 | fprintf(stderr, "pagetables: error opening %s: %s\n", 327 | level_path[level], strerror(errno)); 328 | exit(1); 329 | } 330 | 331 | if (level == PGD_LEVEL) { 332 | if (HIDE_KERNEL) 333 | count = KERNEL_PGD_BOUNDARY-1; 334 | else if (HIDE_USER && level == PGD_LEVEL) { 335 | /* fseek() seems not to work for sysfs files. */ 336 | for (i = 0; i < KERNEL_PGD_BOUNDARY; i++) 337 | if (fread(&entry, 1, WORD_SIZE, file) != WORD_SIZE) { 338 | fprintf(stderr, "pagetables: error: seek error: %s\n", 339 | strerror(errno)); 340 | exit(1); 341 | } 342 | 343 | start = KERNEL_PGD_BOUNDARY; 344 | } 345 | } 346 | 347 | for (i = start; i < count; i++) { 348 | int valid = 1; 349 | int present, huge; 350 | 351 | if (fread(&entry, 1, WORD_SIZE, file) != WORD_SIZE) { 352 | if (errno == EINVAL) { 353 | entry = 0; 354 | valid = 0; 355 | } else { 356 | fprintf(stderr, 357 | "pagetables: error: read error: %s\n", 358 | strerror(errno)); 359 | exit(1); 360 | } 361 | } 362 | 363 | /* Skip empty entries. */ 364 | if (valid && !entry) 365 | continue; 366 | 367 | present = entry&_PAGE_PRESENT; 368 | huge = entry&_PAGE_PSE; 369 | 370 | if (!STATS_ONLY) 371 | print_entry(valid ? i : -i, level, entry); 372 | 373 | update_stats(level, entry, valid); 374 | 375 | if (present && !huge && level < MAX_LEVEL) { 376 | update_sync_vaddr(level, i); 377 | 378 | print_pagetable(level + 1); 379 | } 380 | } 381 | 382 | fclose(file); 383 | } 384 | 385 | static void print_counts(void) 386 | { 387 | int i; 388 | unsigned long count, ptes, total = 0, total_bytes = 0; 389 | 390 | puts("\n== Page Counts ==\n"); 391 | 392 | /* Pedantry to the absolute maximum. */ 393 | printf("PGD pages:\t 1 ( 4.0 KiB)\n"); 394 | total++; 395 | total_bytes += PAGE_SIZE; 396 | 397 | for (i = 1; i < LEVEL_COUNT; i++) { 398 | count = page_count[i]; 399 | 400 | printf("%s pages:\t%8lu (", level_name[i], count); 401 | print_human_bytes(count * PAGE_SIZE); 402 | printf(")\n"); 403 | 404 | total += count; 405 | total_bytes += count*PAGE_SIZE; 406 | } 407 | 408 | if (gigantic_page_count > 0) { 409 | printf("Giga Phys pages:%8lu (", gigantic_page_count); 410 | print_human_bytes(gigantic_page_count * GIGA_PAGE_SIZE); 411 | printf(")\n"); 412 | 413 | total += gigantic_page_count; 414 | total_bytes += gigantic_page_count * GIGA_PAGE_SIZE; 415 | } 416 | 417 | if (huge_page_count > 0) { 418 | printf("Huge Phys pages:%8lu (", huge_page_count); 419 | print_human_bytes(huge_page_count * HUGE_PAGE_SIZE); 420 | printf(")\n"); 421 | 422 | total += huge_page_count; 423 | total_bytes += huge_page_count * HUGE_PAGE_SIZE; 424 | } 425 | 426 | printf("TOTAL:\t\t%8lu ", total); 427 | 428 | printf("("); 429 | print_human_bytes(total_bytes); 430 | printf(")\n\n"); 431 | 432 | if (invalid_entry_count > 0) 433 | printf("!!! Invalid (unreadable) entries: %lu\n\n", 434 | invalid_entry_count); 435 | 436 | ptes = page_count[PHYS_4K_LEVEL] + huge_page_count + gigantic_page_count; 437 | for (i = 0; i < FLAG_COUNT; i++) { 438 | count = pte_count[i]; 439 | 440 | if (count == 0) 441 | continue; 442 | 443 | printf("%s PTEs:\t%8lu/%lu (", flag_name[i], pte_count[i], ptes); 444 | print_human_bytes((unsigned long)pte_count[i] * PAGE_SIZE); 445 | printf(")\n"); 446 | } 447 | } 448 | 449 | int main(int argc, char *argv[]) 450 | { 451 | if (argc > 1) 452 | set_target_pid(argv[1]); 453 | else 454 | set_target_pid("0"); 455 | 456 | print_pagetable(PGD_LEVEL); 457 | print_counts(); 458 | 459 | return EXIT_SUCCESS; 460 | } 461 | -------------------------------------------------------------------------------- /pagetables/pagetables.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | /* 8 | * TODO: Is possible page reads could be non-atomic, check. 9 | * TODO: Probably needs some locking, check. 10 | */ 11 | 12 | MODULE_LICENSE("GPL"); 13 | MODULE_AUTHOR("Lorenzo Stoakes "); 14 | MODULE_DESCRIPTION("Simple experimental tool for extracting page tables."); 15 | 16 | static u64 vaddr; 17 | static pid_t target_pid_nr; 18 | static struct dentry *pagetables_dir; 19 | 20 | static struct mm_struct *get_mm(void) 21 | { 22 | struct task_struct *task; 23 | struct pid *pid; 24 | struct mm_struct *ret = NULL; 25 | 26 | if (target_pid_nr == 0) 27 | return current->mm; 28 | 29 | rcu_read_lock(); 30 | 31 | pid = find_vpid(target_pid_nr); 32 | if (pid == NULL) 33 | goto done; 34 | 35 | task = pid_task(pid, PIDTYPE_PID); 36 | if (!task) 37 | goto done; 38 | 39 | ret = task->mm; 40 | done: 41 | rcu_read_unlock(); 42 | return ret; 43 | } 44 | 45 | static ssize_t pgd_read(struct file *file, char __user *out, size_t size, 46 | loff_t *off) 47 | { 48 | struct mm_struct *mm = get_mm(); 49 | 50 | if (!mm) 51 | return -ESRCH; 52 | 53 | return simple_read_from_buffer(out, size, off, mm->pgd, 54 | sizeof(unsigned long) * PTRS_PER_PGD); 55 | } 56 | 57 | static const struct file_operations pgd_fops = { 58 | .owner = THIS_MODULE, 59 | .read = pgd_read 60 | }; 61 | 62 | static ssize_t pud_read(struct file *file, char __user *out, size_t size, 63 | loff_t *off) 64 | { 65 | pgd_t *pgdp, pgd; 66 | pud_t *pudp; 67 | struct mm_struct *mm = get_mm(); 68 | 69 | if (!mm) 70 | return -ESRCH; 71 | 72 | pgdp = pgd_offset(mm, vaddr); 73 | pgd = *pgdp; 74 | if (pgd_none(pgd) || pgd_bad(pgd)) 75 | return -EINVAL; 76 | 77 | pudp = (pud_t *)pgd_page_vaddr(pgd); 78 | 79 | return simple_read_from_buffer(out, size, off, pudp, 80 | sizeof(unsigned long) * PTRS_PER_PUD); 81 | } 82 | 83 | static const struct file_operations pud_fops = { 84 | .owner = THIS_MODULE, 85 | .read = pud_read 86 | }; 87 | 88 | static ssize_t pmd_read(struct file *file, char __user *out, size_t size, 89 | loff_t *off) 90 | { 91 | pgd_t *pgdp, pgd; 92 | pud_t *pudp, pud; 93 | pmd_t *pmdp; 94 | struct mm_struct *mm = get_mm(); 95 | 96 | if (!mm) 97 | return -ESRCH; 98 | 99 | pgdp = pgd_offset(mm, vaddr); 100 | pgd = *pgdp; 101 | if (pgd_none(pgd) || pgd_bad(pgd)) 102 | return -EINVAL; 103 | 104 | pudp = pud_offset(pgdp, vaddr); 105 | pud = *pudp; 106 | if (pud_none(pud) || pud_bad(pud)) 107 | return -EINVAL; 108 | 109 | pmdp = (pmd_t *)pud_page_vaddr(pud); 110 | 111 | return simple_read_from_buffer(out, size, off, pmdp, 112 | sizeof(unsigned long) * PTRS_PER_PMD); 113 | } 114 | 115 | static const struct file_operations pmd_fops = { 116 | .owner = THIS_MODULE, 117 | .read = pmd_read 118 | }; 119 | 120 | static ssize_t pte_read(struct file *file, char __user *out, size_t size, 121 | loff_t *off) 122 | { 123 | pgd_t *pgdp, pgd; 124 | pud_t *pudp, pud; 125 | pmd_t *pmdp, pmd; 126 | pte_t *ptep; 127 | struct mm_struct *mm = get_mm(); 128 | 129 | if (!mm) 130 | return -ESRCH; 131 | 132 | pgdp = pgd_offset(mm, vaddr); 133 | pgd = *pgdp; 134 | if (pgd_none(pgd) || pgd_bad(pgd)) 135 | return -EINVAL; 136 | 137 | pudp = pud_offset(pgdp, vaddr); 138 | pud = *pudp; 139 | if (pud_none(pud) || pud_bad(pud)) 140 | return -EINVAL; 141 | 142 | pmdp = pmd_offset(pudp, vaddr); 143 | pmd = *pmdp; 144 | if (pmd_none(pmd) || pmd_bad(pmd)) 145 | return -EINVAL; 146 | 147 | /* TODO: Perhaps we should enforce a lock here. */ 148 | ptep = (pte_t *)pmd_page_vaddr(pmd); 149 | 150 | return simple_read_from_buffer(out, size, off, ptep, 151 | sizeof(unsigned long) * PTRS_PER_PTE); 152 | } 153 | 154 | static const struct file_operations pte_fops = { 155 | .owner = THIS_MODULE, 156 | .read = pte_read 157 | }; 158 | 159 | static int __init pagetables_init(void) 160 | { 161 | struct dentry *filep; 162 | 163 | filep = debugfs_create_dir("pagetables", NULL); 164 | if (IS_ERR_OR_NULL(filep)) 165 | goto error; 166 | else 167 | pagetables_dir = filep; 168 | 169 | 170 | filep = debugfs_create_x64("vaddr", 0600, pagetables_dir, 171 | &vaddr); 172 | if (IS_ERR_OR_NULL(filep)) 173 | goto error; 174 | 175 | filep = debugfs_create_u32("pid", 0600, pagetables_dir, 176 | &target_pid_nr); 177 | if (IS_ERR_OR_NULL(filep)) 178 | goto error; 179 | 180 | filep = debugfs_create_file("pgd", 0400, pagetables_dir, NULL, 181 | &pgd_fops); 182 | if (IS_ERR_OR_NULL(filep)) 183 | goto error; 184 | 185 | filep = debugfs_create_file("pud", 0400, pagetables_dir, NULL, 186 | &pud_fops); 187 | if (IS_ERR_OR_NULL(filep)) 188 | goto error; 189 | 190 | filep = debugfs_create_file("pmd", 0400, pagetables_dir, NULL, 191 | &pmd_fops); 192 | if (IS_ERR_OR_NULL(filep)) 193 | goto error; 194 | 195 | filep = debugfs_create_file("pte", 0400, pagetables_dir, NULL, 196 | &pte_fops); 197 | if (IS_ERR_OR_NULL(filep)) 198 | goto error; 199 | 200 | return 0; 201 | 202 | error: 203 | debugfs_remove_recursive(pagetables_dir); 204 | return filep ? PTR_ERR(filep) : -ENOMEM; 205 | } 206 | 207 | static void __exit pagetables_exit(void) 208 | { 209 | /* Does the right thing if pagetables_dir NULL. */ 210 | debugfs_remove_recursive(pagetables_dir); 211 | } 212 | 213 | module_init(pagetables_init); 214 | module_exit(pagetables_exit); 215 | --------------------------------------------------------------------------------