├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── malloc-demo.c ├── page-info-test.c ├── page-info.c └── page-info.h /.gitignore: -------------------------------------------------------------------------------- 1 | # dot files 2 | /.* 3 | 4 | # binary 5 | page-info-test 6 | malloc-demo 7 | 8 | # Prerequisites 9 | *.d 10 | 11 | # Object files 12 | *.o 13 | *.ko 14 | *.obj 15 | *.elf 16 | 17 | # Linker output 18 | *.ilk 19 | *.map 20 | *.exp 21 | 22 | # Precompiled Headers 23 | *.gch 24 | *.pch 25 | 26 | # Libraries 27 | *.lib 28 | *.a 29 | *.la 30 | *.lo 31 | 32 | # Shared objects (inc. Windows DLLs) 33 | *.dll 34 | *.so 35 | *.so.* 36 | *.dylib 37 | 38 | # Executables 39 | *.exe 40 | *.out 41 | *.app 42 | *.i*86 43 | *.x86_64 44 | *.hex 45 | 46 | # Debug files 47 | *.dSYM/ 48 | *.su 49 | *.idb 50 | *.pdb 51 | 52 | # Kernel Module Compile Results 53 | *.mod* 54 | *.cmd 55 | .tmp_versions/ 56 | modules.order 57 | Module.symvers 58 | Mkfile.old 59 | dkms.conf 60 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 travisdowns 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY := clean test valgrind 2 | CPPFLAGS := -g -Os -Wall -Wextra -D_GNU_SOURCE=1 3 | CFLAGS := -std=c11 4 | CXXFLAGS := -std=c++11 5 | BINARIES := page-info-test malloc-demo 6 | 7 | all: $(BINARIES) 8 | 9 | page-info.o : page-info.h 10 | 11 | $(BINARIES) : % : %.o page-info.o page-info.h 12 | $(CC) $(CFLAGS) -o $@ $< page-info.o 13 | 14 | clean: 15 | rm -f *.o $(BINARIES) 16 | 17 | test: valgrind 18 | 19 | valgrind: $(BINARIES) 20 | valgrind --error-exitcode=1 --leak-check=full ./malloc-demo 1000 21 | sudo valgrind --error-exitcode=1 --leak-check=full ./malloc-demo 1000 22 | MAX_KIB=4096 valgrind --error-exitcode=1 --leak-check=full ./page-info-test 23 | sudo MAX_KIB=4096 valgrind --error-exitcode=1 --leak-check=full ./page-info-test 24 | 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # page-info 2 | 3 | A small utility allowing programmatic access to information about the memory pages backing a given region of memory on Linux. For example, with page-info you can answer questions like: 4 | 5 | - How many of the pages associated with memory I just got from `malloc` are physically present in RAM? 6 | - How much more information can I put on the stack before sufferring a page fault? 7 | - What fraction of this allocation is backed by huge pages? 8 | - Have any pages in this range been swapped out to the swapfile? 9 | 10 | Basically this parses the `/proc/$PID/pagemap` file for the current process, which returns basic information about the allocated memory regions (VMAs), and then if possible it looks up more interesting flags on a per-page basis in `/proc/kpagemap`. The available flags are documented [here](https://www.kernel.org/doc/Documentation/vm/pagemap.txt) and more briefly on the [proc manpage](http://man7.org/linux/man-pages/man5/proc.5.html). 11 | 12 | ## Example 13 | 14 | As a simple example, here's a snippet which prints to stdout the percentage of pages that have been allocated with huge pages. 15 | 16 | ``` 17 | char *array = malloc(size); 18 | memset(array, 1, size); // commit the pages 19 | 20 | page_info_array pinfo = get_info_for_range(array, array + size); 21 | flag_count thp_count = get_flag_count(pinfo, KPF_THP); 22 | if (thp_count.pages_available) { 23 | printf("Source pages allocated with transparent hugepages: %4.1f%% (%lu total pages, %4.1f%% flagged)\n", 24 | 100.0 * thp_count.pages_set / thp_count.pages_total, 25 | thp_count.pages_total, 26 | 100.0 * thp_count.pages_available / thp_count.pages_total); 27 | } else { 28 | printf("Couldn't determine hugepage info (you are probably not running as root)\n"); 29 | } 30 | ``` 31 | 32 | A slightly more complete version of this example is available as a standalone program as [malloc-demo](malloc-demo.c). On my system it reports (this depends heavily on the value in `/sys/kernel/mm/transparent_hugepage/enabled`): 33 | 34 | ``` 35 | Allocating an array of size 7168 KiB using malloc 36 | Source pages allocated with transparent hugepages: 85.7% (1793 total pages, 100.0% flagged) 37 | ``` 38 | 39 | ## Permissions 40 | 41 | Unfortunately (from the perspective of those wanting to use this library to its maximum capability), most of the juicy infomation about backing pages lives in the `/proc/kpagemap` file and this file is only accessible as root. You can still use this utility as a regular user, but only a handful of flags that are encoded directly in `/proc/pagemap` are available. They are those directly named in the `page_info` structure in `page-info.h`: 42 | 43 | ``` 44 | /* soft-dirty set */ 45 | bool softdirty; 46 | /* exclusively mapped, see e.g., https://patchwork.kernel.org/patch/6787921/ */ 47 | bool exclusive; 48 | /* is a file mapping */ 49 | bool file; 50 | /* page is swapped out */ 51 | bool swapped; 52 | /* page is present, i.e, a physical page is allocated */ 53 | bool present; 54 | ``` 55 | 56 | So you can determine if a page is present, swapped out, its soft-dirty status, whether it is exclusive and whether it is a file mapping, but not much more. On older kernels, you can also get the _physical frame number_ (the `pfn`) field, which is essentially the physical address of the page (shifted right by 12). 57 | 58 | So if you want the full info about a mapped region, you have to run this as root. This could probably be fixed as decribed in [issue #3](https://github.com/travisdowns/page-info/issues/2), but that work hasn't been done yet. 59 | 60 | ## Building 61 | 62 | Just run `make` which builds the `page-info-test` binary. 63 | 64 | ## Running the test 65 | 66 | You can run the `page-info-test` binary to see the information obtained by getting page info on a series of allocations via `malloc`, starting at 256 KiB and running through 4 GiB. Information is presented both before and after touching each page in the allocation via `memset`. The difference is that for larger allocation sizes, most pages in the allocation are not present until you touch them, so limited information is available (indeed, there are no pages backing them, so questions about the nature of the backing pages have no answer). 67 | 68 | Here's a portion of the output on my system: 69 | 70 | ``` 71 | PFN sdirty excl file swappd presnt LOCK ACTI SLAB BUDD MMAP ANON SWAP SWAP COMP COMP HUGE UNEV HWPO NOPA KSM THP BALL ZERO IDLE 72 | 73 | MADV_HUGEPAGE 2.00 MiB BEFORE memset: ---------- 1.0000 0.0019 0.0000 0.0000 0.0019 0.00 1.00 0.00 0.00 1.00 1.00 0.00 1.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 74 | MADV_HUGEPAGE 2.00 MiB AFTER memset: ---------- 1.0000 1.0000 0.0000 0.0000 1.0000 0.00 1.00 0.00 0.00 1.00 1.00 0.00 1.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 75 | MADV_NORMAL 2.00 MiB BEFORE memset: ---------- 1.0000 0.5029 0.0000 0.0000 0.5029 0.00 1.00 0.00 0.00 1.00 1.00 0.00 1.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 76 | MADV_NORMAL 2.00 MiB AFTER memset: ---------- 1.0000 1.0000 0.0000 0.0000 1.0000 0.00 1.00 0.00 0.00 1.00 1.00 0.00 1.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 77 | MADV_NOHUGEPAGE 2.00 MiB BEFORE memset: ---------- 1.0000 1.0000 0.0000 0.0000 1.0000 0.00 1.00 0.00 0.00 1.00 1.00 0.00 1.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 78 | MADV_NOHUGEPAGE 2.00 MiB AFTER memset: ---------- 1.0000 1.0000 0.0000 0.0000 1.0000 0.00 1.00 0.00 0.00 1.00 1.00 0.00 1.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 79 | 80 | MADV_HUGEPAGE 4.00 MiB BEFORE memset: ---------- 1.0000 0.0010 0.0000 0.0000 0.0010 0.00 1.00 0.00 0.00 1.00 1.00 0.00 1.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 81 | MADV_HUGEPAGE 4.00 MiB AFTER memset: ---------- 1.0000 1.0000 0.0000 0.0000 1.0000 0.00 0.50 0.00 0.00 1.00 1.00 0.00 0.50 0.00 0.50 0.00 0.00 0.00 0.00 0.00 0.50 0.00 0.00 0.00 82 | MADV_NORMAL 4.00 MiB BEFORE memset: ---------- 1.0000 0.5015 0.0000 0.0000 0.5015 0.00 1.00 0.00 0.00 1.00 1.00 0.00 1.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 83 | MADV_NORMAL 4.00 MiB AFTER memset: ---------- 1.0000 1.0000 0.0000 0.0000 1.0000 0.00 1.00 0.00 0.00 1.00 1.00 0.00 1.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 84 | MADV_NOHUGEPAGE 4.00 MiB BEFORE memset: ---------- 1.0000 1.0000 0.0000 0.0000 1.0000 0.00 1.00 0.00 0.00 1.00 1.00 0.00 1.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 85 | MADV_NOHUGEPAGE 4.00 MiB AFTER memset: ---------- 1.0000 1.0000 0.0000 0.0000 1.0000 0.00 1.00 0.00 0.00 1.00 1.00 0.00 1.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 86 | 87 | MADV_HUGEPAGE 8.00 MiB BEFORE memset: ---------- 1.0000 0.0005 0.0000 0.0000 0.0005 0.00 1.00 0.00 0.00 1.00 1.00 0.00 1.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 88 | MADV_HUGEPAGE 8.00 MiB AFTER memset: ---------- 1.0000 1.0000 0.0000 0.0000 1.0000 0.00 0.25 0.00 0.00 1.00 1.00 0.00 0.25 0.00 0.75 0.00 0.00 0.00 0.00 0.00 0.75 0.00 0.00 0.00 89 | MADV_NORMAL 8.00 MiB BEFORE memset: ---------- 1.0000 0.0010 0.0000 0.0000 0.0010 0.00 1.00 0.00 0.00 1.00 1.00 0.00 1.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 90 | MADV_NORMAL 8.00 MiB AFTER memset: ---------- 1.0000 1.0000 0.0000 0.0000 1.0000 0.00 0.25 0.00 0.00 1.00 1.00 0.00 0.25 0.00 0.75 0.00 0.00 0.00 0.00 0.00 0.75 0.00 0.00 0.00 91 | MADV_NOHUGEPAGE 8.00 MiB BEFORE memset: ---------- 1.0000 1.0000 0.0000 0.0000 1.0000 0.00 0.25 0.00 0.00 1.00 1.00 0.00 0.25 0.00 0.75 0.00 0.00 0.00 0.00 0.00 0.75 0.00 0.00 0.00 92 | MADV_NOHUGEPAGE 8.00 MiB AFTER memset: ---------- 1.0000 1.0000 0.0000 0.0000 1.0000 0.00 0.25 0.00 0.00 1.00 1.00 0.00 0.25 0.00 0.75 0.00 0.00 0.00 0.00 0.00 0.75 0.00 0.00 0.00 93 | 94 | MADV_HUGEPAGE 16.00 MiB BEFORE memset: ---------- 1.0000 0.0002 0.0000 0.0000 0.0002 0.00 1.00 0.00 0.00 1.00 1.00 0.00 1.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 95 | MADV_HUGEPAGE 16.00 MiB AFTER memset: ---------- 1.0000 1.0000 0.0000 0.0000 1.0000 0.00 0.13 0.00 0.00 1.00 1.00 0.00 0.13 0.00 0.87 0.00 0.00 0.00 0.00 0.00 0.87 0.00 0.00 0.00 96 | MADV_NORMAL 16.00 MiB BEFORE memset: ---------- 1.0000 0.5004 0.0000 0.0000 0.5004 0.00 0.25 0.00 0.00 1.00 1.00 0.00 0.25 0.00 0.75 0.00 0.00 0.00 0.00 0.00 0.75 0.00 0.00 0.00 97 | MADV_NORMAL 16.00 MiB AFTER memset: ---------- 1.0000 1.0000 0.0000 0.0000 1.0000 0.00 0.25 0.00 0.00 1.00 1.00 0.00 0.25 0.00 0.75 0.00 0.00 0.00 0.00 0.00 0.75 0.00 0.00 0.00 98 | MADV_NOHUGEPAGE 16.00 MiB BEFORE memset: ---------- 1.0000 1.0000 0.0000 0.0000 1.0000 0.00 0.25 0.00 0.00 1.00 1.00 0.00 0.25 0.00 0.75 0.00 0.00 0.00 0.00 0.00 0.75 0.00 0.00 0.00 99 | MADV_NOHUGEPAGE 16.00 MiB AFTER memset: ---------- 1.0000 1.0000 0.0000 0.0000 1.0000 0.00 0.25 0.00 0.00 1.00 1.00 0.00 0.25 0.00 0.75 0.00 0.00 0.00 0.00 0.00 0.75 0.00 0.00 0.00 100 | 101 | MADV_HUGEPAGE 32.00 MiB BEFORE memset: ---------- 1.0000 0.0001 0.0000 0.0000 0.0001 0.00 1.00 0.00 0.00 1.00 1.00 0.00 1.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 102 | MADV_HUGEPAGE 32.00 MiB AFTER memset: ---------- 1.0000 1.0000 0.0000 0.0000 1.0000 0.00 0.06 0.00 0.00 1.00 1.00 0.00 0.06 0.00 0.94 0.00 0.00 0.00 0.00 0.00 0.94 0.00 0.00 0.00 103 | MADV_NORMAL 32.00 MiB BEFORE memset: ---------- 1.0000 0.0001 0.0000 0.0000 0.0001 0.00 1.00 0.00 0.00 1.00 1.00 0.00 1.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 104 | MADV_NORMAL 32.00 MiB AFTER memset: ---------- 1.0000 1.0000 0.0000 0.0000 1.0000 0.00 0.06 0.00 0.00 1.00 1.00 0.00 0.06 0.00 0.94 0.00 0.00 0.00 0.00 0.00 0.94 0.00 0.00 0.00 105 | MADV_NOHUGEPAGE 32.00 MiB BEFORE memset: ---------- 1.0000 0.0001 0.0000 0.0000 0.0001 0.00 1.00 0.00 0.00 1.00 1.00 0.00 1.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 106 | MADV_NOHUGEPAGE 32.00 MiB AFTER memset: ---------- 1.0000 1.0000 0.0000 0.0000 1.0000 0.00 1.00 0.00 0.00 1.00 1.00 0.00 1.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 107 | ``` 108 | You can see, for example, by looking at the `presnt` column, what fraction of pages are "present in RAM" - as the allocations become larger, only a small fraction (usually the first page) of an allocation is present after allocation, but all are present following the `memset`. You can also look at the `THP` column to see that some fraction of the larger allocatoins are usually backed by huge pages, depending on the value of the `madvise()` call. 109 | 110 | There are many other columns which have more or less interesting information depending on your scenario. The first few columns in lowercase (`sdirty excl file swappd presnt`) are available without special permissions since they come from `/proc/$PID/pagemap`, but the following uppercase columns require `/proc/kpageflags` access and so are generally only available to processes running as root (more precisely, those with the `CAP_SYS_ADMIN` priviledge). 111 | 112 | ## Using it in your project. 113 | 114 | Just copy `page-info.c` and `page-info.h` into your project and include `page-info.h` in any file where you want to access the exposed methods. 115 | -------------------------------------------------------------------------------- /malloc-demo.c: -------------------------------------------------------------------------------- 1 | #include "page-info.h" 2 | #include 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #define DEFAULT_KIB (7 * 1024) // 7 MiB 10 | int main(int argc, char **argv) { 11 | size_t size = 0; 12 | if (argc >= 2) { 13 | size = atoi(argv[1]) * 1024; 14 | } 15 | if (!size) { 16 | size = DEFAULT_KIB * 1024; 17 | } 18 | 19 | printf("Allocating an array of size %zu KiB using malloc\n", size / 1024); 20 | char *array = malloc(size); 21 | if (!array) { 22 | err(EXIT_FAILURE, "malloc failed"); 23 | } 24 | memset(array, 1, size); // commit the pages 25 | 26 | page_info_array pinfo = get_info_for_range(array, array + size); 27 | flag_count thp_count = get_flag_count(pinfo, KPF_THP); 28 | 29 | free(array); 30 | 31 | if (thp_count.pages_available) { 32 | printf("Source pages allocated with transparent hugepages: %4.1f%% (%lu total pages, %4.1f%% flagged)\n", 33 | 100.0 * thp_count.pages_set / thp_count.pages_total, 34 | thp_count.pages_total, 35 | 100.0 * thp_count.pages_available / thp_count.pages_total); 36 | } else { 37 | printf("Couldn't determine hugepage info (you are probably not running as root)\n"); 38 | } 39 | free_info_array(pinfo); 40 | } 41 | -------------------------------------------------------------------------------- /page-info-test.c: -------------------------------------------------------------------------------- 1 | /* 2 | * page-info-test.c 3 | * 4 | * A basic test for page-info that allocates increasing amounts of memory with malloc() and then tries 5 | * three types of madvise calls: no call, MADV_HUGEPAGE and MADV_NOHUGEPAGE and gets page info on the result. 6 | * 7 | * Not really a "test" at all since we don't validate the results but you can eyeball the results and make 8 | * sure they look sane. Note that you dont't really expect the MADV flags to always work, since we are often 9 | * calling this on memory that has already been paged in (returned by malloc), and we are often calling it 10 | * on regions smaller than 2MB where we don't expect it to work anyways. 11 | */ 12 | #include "page-info.h" 13 | 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | static inline void *pagedown(void *p, unsigned psize) { 22 | return (void *)(((uintptr_t)p) & -(uintptr_t)psize); 23 | } 24 | 25 | int getenv_int(const char *var, int def) { 26 | const char *val = getenv(var); 27 | return val ? atoi(val) : def; 28 | } 29 | 30 | typedef struct { 31 | char const *name; 32 | int flag; 33 | } advice_s; 34 | 35 | #define ADVICE(flag) { #flag, flag } 36 | 37 | const advice_s advices[] = { ADVICE(MADV_HUGEPAGE), ADVICE(MADV_NORMAL), ADVICE(MADV_NOHUGEPAGE), {} }; 38 | 39 | void print_for_range(void *start, void *end) { 40 | page_info_array infos = get_info_for_range(start, end); 41 | fprint_ratios_noheader(stdout, infos); 42 | free_info_array(infos); 43 | } 44 | 45 | void do_full_table() { 46 | int psize = getpagesize(); 47 | 48 | printf("%44s", ""); 49 | fprint_info_header(stdout); 50 | 51 | size_t max_kib = getenv_int("MAX_KIB", 1024 * 1024); 52 | 53 | for (size_t kib = 256; kib <= max_kib; kib *=2) { 54 | for (const advice_s *advice = advices; advice->name; advice++) { 55 | size_t size = kib * 1024; 56 | char *b = malloc(size); 57 | if (advice->flag != MADV_NORMAL) { 58 | char *ba = pagedown(b, psize); 59 | if (madvise(ba, size, advice->flag)) { 60 | err(EXIT_FAILURE, "madvise(%s) failed", advice->name); 61 | } 62 | } 63 | if (!b) { 64 | // this probably wont' happen because with overcommit the malloc itself generally succeeds, 65 | // but you may get killed later when mapping in pages 66 | err(EXIT_FAILURE, "Allocating %zu bytes failed, exiting...", size); 67 | } 68 | printf("%16s %7.2f MiB BEFORE memset: ", advice->name, (double)kib / 1024); 69 | print_for_range(b, b + size); 70 | memset(b, 0x42, size); 71 | printf("%16s %7.2f MiB AFTER memset: ", advice->name, (double)kib / 1024); 72 | print_for_range(b, b + size); 73 | free(b); 74 | } 75 | printf("\n"); 76 | } 77 | } 78 | 79 | #define W "7" 80 | 81 | // printing the 82 | void do_one_flag_ratio(char const * name) { 83 | 84 | int flag = flag_from_name(name); 85 | if (flag < 0) { 86 | errx(EXIT_FAILURE, "Couldn't find flag with name '%s'", name); 87 | } 88 | 89 | printf(" size memset %10s %" W "s %" W "s %" W "s\n", "FLAG", "SET", "UNSET", "UNAVAIL"); 90 | 91 | for (size_t kib = 256; kib <= 1024 * 1024; kib *=2) { 92 | size_t size = kib * 1024; 93 | char *b = malloc(size); 94 | if (!b) { 95 | // this probably wont' happen because with overcommit the malloc itself generally succeeds, 96 | // but you may get killed later when mapping in pages 97 | err(EXIT_FAILURE, "Allocating %zu bytes failed, exiting...", size); 98 | } 99 | 100 | flag_count count; 101 | count = get_flag_count(get_info_for_range(b, b + size), flag); 102 | printf("%7.2f MiB BEFORE %10s %"W"zu %"W"zu %"W"zu\n", (double)kib / 1024, name, 103 | count.pages_set, count.pages_available - count.pages_set, count.pages_total - count.pages_available); 104 | memset(b, 0x42, size); 105 | count = get_flag_count(get_info_for_range(b, b + size), flag); 106 | printf("%7.2f MiB AFTER %10s %"W"zu %"W"zu %"W"zu\n", (double)kib / 1024, name, 107 | count.pages_set, count.pages_available - count.pages_set, count.pages_total - count.pages_available); 108 | } 109 | } 110 | 111 | int main(int argc, char** argv) { 112 | 113 | int psize = getpagesize(); 114 | 115 | printf("PAGE_SIZE = %d, PID = %ld\n", psize, (long)getpid()); 116 | 117 | if (argc == 1) { 118 | do_full_table(); 119 | } else if (argc == 2) { 120 | do_one_flag_ratio(argv[1]); 121 | } else { 122 | fprintf(stderr, "Usage: page-info-test [flag]\n"); 123 | return EXIT_FAILURE; 124 | } 125 | 126 | printf("DONE\n"); 127 | } 128 | 129 | 130 | -------------------------------------------------------------------------------- /page-info.c: -------------------------------------------------------------------------------- 1 | /* 2 | * smaps.c 3 | * 4 | * Created on: Jan 31, 2017 5 | * Author: tdowns 6 | */ 7 | 8 | #include "page-info.h" 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | 24 | #define PM_PFRAME_MASK ((1ULL << 55) - 1) 25 | #define PM_SOFT_DIRTY (1ULL << 55) 26 | #define PM_MMAP_EXCLUSIVE (1ULL << 56) 27 | #define PM_FILE (1ULL << 61) 28 | #define PM_SWAP (1ULL << 62) 29 | #define PM_PRESENT (1ULL << 63) 30 | 31 | 32 | /** bundles a flag with its description */ 33 | typedef struct { 34 | int flag_num; 35 | char const *name; 36 | bool show_default; 37 | } flag; 38 | 39 | #define FLAG_SHOW(name) { KPF_ ## name, # name, true }, 40 | #define FLAG_HIDE(name) { KPF_ ## name, # name, false }, 41 | 42 | const flag kpageflag_defs[] = { 43 | FLAG_SHOW(LOCKED ) 44 | FLAG_HIDE(ERROR ) 45 | FLAG_HIDE(REFERENCED ) 46 | FLAG_HIDE(UPTODATE ) 47 | FLAG_HIDE(DIRTY ) 48 | FLAG_HIDE(LRU ) 49 | FLAG_SHOW(ACTIVE ) 50 | FLAG_SHOW(SLAB ) 51 | FLAG_HIDE(WRITEBACK ) 52 | FLAG_HIDE(RECLAIM ) 53 | FLAG_SHOW(BUDDY ) 54 | FLAG_SHOW(MMAP ) 55 | FLAG_SHOW(ANON ) 56 | FLAG_SHOW(SWAPCACHE ) 57 | FLAG_SHOW(SWAPBACKED ) 58 | FLAG_SHOW(COMPOUND_HEAD) 59 | FLAG_SHOW(COMPOUND_TAIL) 60 | FLAG_SHOW(HUGE ) 61 | FLAG_SHOW(UNEVICTABLE ) 62 | FLAG_SHOW(HWPOISON ) 63 | FLAG_SHOW(NOPAGE ) 64 | FLAG_SHOW(KSM ) 65 | FLAG_SHOW(THP ) 66 | /* older kernels won't have these new flags, so conditionally compile in support for them */ 67 | #ifdef KPF_BALLOON 68 | FLAG_SHOW(BALLOON ) 69 | #endif 70 | #ifdef KPF_ZERO_PAGE 71 | FLAG_SHOW(ZERO_PAGE ) 72 | #endif 73 | #ifdef KPF_IDLE 74 | FLAG_SHOW(IDLE ) 75 | #endif 76 | 77 | { -1, 0, false } // sentinel 78 | }; 79 | 80 | #define kpageflag_count (sizeof(kpageflag_defs)/sizeof(kpageflag_defs[0]) - 1) 81 | 82 | #define ITERATE_FLAGS for (flag const *f = kpageflag_defs; f->flag_num != -1; f++) 83 | 84 | 85 | // x-macro for doing some operation on all the pagemap flags 86 | #define PAGEMAP_X(fn) \ 87 | fn(softdirty ) \ 88 | fn(exclusive ) \ 89 | fn(file ) \ 90 | fn(swapped ) \ 91 | fn(present ) 92 | 93 | static unsigned get_page_size() { 94 | long psize = sysconf(_SC_PAGESIZE); 95 | assert(psize >= 1 && psize <= UINT_MAX); 96 | return (unsigned)psize; 97 | } 98 | 99 | /* round the given pointer down to the page boundary (i.e,. return a pointer to the page it lives in) */ 100 | static inline void *pagedown(void *p, unsigned psize) { 101 | return (void *)(((uintptr_t)p) & -(uintptr_t)psize); 102 | } 103 | 104 | /** 105 | * Extract the interesting info from a 64-bit pagemap value, and return it as a page_info. 106 | */ 107 | page_info extract_info(uint64_t bits) { 108 | page_info ret = {}; 109 | ret.pfn = bits & PM_PFRAME_MASK; 110 | ret.softdirty = bits & PM_SOFT_DIRTY; 111 | ret.exclusive = bits & PM_MMAP_EXCLUSIVE; 112 | ret.file = bits & PM_FILE; 113 | ret.swapped = bits & PM_SWAP; 114 | ret.present = bits & PM_PRESENT; 115 | return ret; 116 | } 117 | 118 | /* print page_info to the given file */ 119 | void fprint_info(FILE* f, page_info info) { 120 | fprintf(f, 121 | "PFN: %p\n" 122 | "softdirty = %d\n" 123 | "exclusive = %d\n" 124 | "file = %d\n" 125 | "swapped = %d\n" 126 | "present = %d\n", 127 | (void*)info.pfn, 128 | info.softdirty, 129 | info.exclusive, 130 | info.file, 131 | info.swapped, 132 | info.present); 133 | } 134 | 135 | void print_info(page_info info) { 136 | fprint_info(stdout, info); 137 | } 138 | 139 | flag_count get_flag_count(page_info_array infos, int flag_num) { 140 | flag_count ret = {}; 141 | 142 | if (flag_num < 0 || flag_num > 63) { 143 | return ret; 144 | } 145 | 146 | uint64_t flag = (1ULL << flag_num); 147 | 148 | ret.flag = flag_num; 149 | ret.pages_total = infos.num_pages; 150 | 151 | for (size_t i = 0; i < infos.num_pages; i++) { 152 | page_info info = infos.info[i]; 153 | if (info.kpageflags_ok) { 154 | ret.pages_set += (info.kpageflags & flag) == flag; 155 | ret.pages_available++; 156 | } 157 | } 158 | return ret; 159 | } 160 | 161 | /** 162 | * Print the table header that lines up with the tabluar format used by the "table" printing 163 | * functions. Called by fprint_ratios, or you can call it yourself if you want to prefix the 164 | * output with your own columns. 165 | */ 166 | void fprint_info_header(FILE *file) { 167 | fprintf(file, " PFN sdirty excl file swappd presnt "); 168 | ITERATE_FLAGS { if (f->show_default) fprintf(file, "%4.4s ", f->name); } 169 | fprintf(file, "\n"); 170 | } 171 | 172 | /* print one info in a tabular format (as a single row) */ 173 | void fprint_info_row(FILE *file, page_info info) { 174 | fprintf(file, "%12p %7d%7d%7d%7d%7d ", 175 | (void*)info.pfn, 176 | info.softdirty, 177 | info.exclusive, 178 | info.file, 179 | info.swapped, 180 | info.present); 181 | 182 | if (info.kpageflags_ok) { 183 | ITERATE_FLAGS { if (f->show_default) fprintf(file, "%4d ", !!(info.kpageflags & (1ULL << f->flag_num))); } 184 | } 185 | fprintf(file, "\n"); 186 | } 187 | 188 | #define DECLARE_ACCUM(name) size_t name ## _accum = 0; 189 | #define INCR_ACCUM(name) name ## _accum += info->name; 190 | #define PRINT_ACCUM(name) fprintf(file, "%7.4f", (double)name ## _accum / infos.num_pages); 191 | 192 | 193 | void fprint_ratios_noheader(FILE *file, page_info_array infos) { 194 | PAGEMAP_X(DECLARE_ACCUM); 195 | size_t total_kpage_ok = 0; 196 | size_t flag_totals[kpageflag_count] = {}; 197 | for (size_t p = 0; p < infos.num_pages; p++) { 198 | page_info *info = &infos.info[p]; 199 | PAGEMAP_X(INCR_ACCUM); 200 | if (info->kpageflags_ok) { 201 | total_kpage_ok++; 202 | int i = 0; 203 | ITERATE_FLAGS { 204 | flag_totals[i++] += !!(info->kpageflags & (1ULL << f->flag_num)); 205 | } 206 | } 207 | } 208 | 209 | printf("%12s ", "----------"); 210 | PAGEMAP_X(PRINT_ACCUM) 211 | 212 | int i = 0; 213 | if (total_kpage_ok > 0) { 214 | ITERATE_FLAGS { 215 | if (f->show_default) fprintf(file, " %4.2f", (double)flag_totals[i] / total_kpage_ok); 216 | i++; 217 | } 218 | } 219 | fprintf(file, "\n"); 220 | } 221 | 222 | /* 223 | * Print a table with one row per page from the given infos. 224 | */ 225 | void fprint_ratios(FILE *file, page_info_array infos) { 226 | fprint_info_header(file); 227 | fprint_ratios_noheader(file, infos); 228 | } 229 | 230 | /* 231 | * Prints a summary of all the pages in the given array as ratios: the fraction of the time the given 232 | * flag was set. 233 | */ 234 | void fprint_table(FILE *f, page_info_array infos) { 235 | fprintf(f, "%zu total pages\n", infos.num_pages); 236 | fprint_info_header(f); 237 | for (size_t p = 0; p < infos.num_pages; p++) { 238 | fprint_info_row(f, infos.info[p]); 239 | } 240 | } 241 | 242 | 243 | 244 | /** 245 | * Get info for a single page indicated by the given pointer (which may point anywhere in the page) 246 | */ 247 | page_info get_page_info(void *p) { 248 | // just get the info array for a single page 249 | page_info_array onepage = get_info_for_range(p, (char *)p + 1); 250 | assert(onepage.num_pages == 1); 251 | page_info ret = onepage.info[0]; 252 | free_info_array(onepage); 253 | return ret; 254 | } 255 | 256 | /** 257 | * Get information for each page in the range from start (inclusive) to end (exclusive). 258 | */ 259 | page_info_array get_info_for_range(void *start, void *end) { 260 | unsigned psize = get_page_size(); 261 | void *start_page = pagedown(start, psize); 262 | void *end_page = pagedown(end - 1, psize) + psize; 263 | size_t page_count = start < end ? (end_page - start_page) / psize : 0; 264 | assert(page_count == 0 || start_page < end_page); 265 | 266 | if (page_count == 0) { 267 | return (page_info_array){ 0, NULL }; 268 | } 269 | 270 | page_info *infos = malloc(page_count * sizeof(page_info)); 271 | 272 | // open the pagemap file 273 | FILE *pagemap_file = fopen("/proc/self/pagemap", "rb"); 274 | if (!pagemap_file) err(EXIT_FAILURE, "failed to open pagemap"); 275 | 276 | // seek to the first page 277 | if (fseek(pagemap_file, (uintptr_t)start_page / psize * sizeof(uint64_t), SEEK_SET)) err(EXIT_FAILURE, "pagemap seek failed"); 278 | 279 | size_t bitmap_bytes = page_count * sizeof(uint64_t); 280 | uint64_t* bitmap = malloc(bitmap_bytes); 281 | assert(bitmap); 282 | size_t readc = fread(bitmap, bitmap_bytes, 1, pagemap_file); 283 | if (readc != 1) err(EXIT_FAILURE, "unexpected fread(pagemap) return: %zu", readc); 284 | 285 | fclose(pagemap_file); 286 | 287 | FILE *kpageflags_file = NULL; 288 | enum { INIT, OPEN, FAILED } file_state = INIT; 289 | 290 | for (size_t page_idx = 0; page_idx < page_count; page_idx++) { 291 | page_info info = extract_info(bitmap[page_idx]); 292 | 293 | if (info.pfn) { 294 | // we got a pfn, try to read /proc/kpageflags 295 | 296 | // open file if not open 297 | if (file_state == INIT) { 298 | kpageflags_file = fopen("/proc/kpageflags", "rb"); 299 | if (!kpageflags_file) { 300 | warn("failed to open kpageflags"); 301 | file_state = FAILED; 302 | } else { 303 | file_state = OPEN; 304 | } 305 | } 306 | 307 | if (file_state == OPEN) { 308 | uint64_t bits; 309 | if (fseek(kpageflags_file, info.pfn * sizeof(bits), SEEK_SET)) err(EXIT_FAILURE, "kpageflags seek failed"); 310 | if ((readc = fread(&bits, sizeof(bits), 1, kpageflags_file)) != 1) err(EXIT_FAILURE, "unexpected fread(kpageflags) return: %zu", readc); 311 | info.kpageflags_ok = true; 312 | info.kpageflags = bits; 313 | } 314 | } 315 | 316 | infos[page_idx] = info; 317 | } 318 | 319 | if (kpageflags_file) 320 | fclose(kpageflags_file); 321 | 322 | free(bitmap); 323 | 324 | return (page_info_array){ page_count, infos }; 325 | } 326 | 327 | void free_info_array(page_info_array infos) { 328 | free(infos.info); 329 | } 330 | 331 | int flag_from_name(char const *name) { 332 | ITERATE_FLAGS { 333 | if (strcasecmp(f->name, name) == 0) { 334 | return f->flag_num; 335 | } 336 | } 337 | return -1; 338 | } 339 | 340 | 341 | -------------------------------------------------------------------------------- /page-info.h: -------------------------------------------------------------------------------- 1 | /* 2 | * page-info.h 3 | */ 4 | 5 | #ifndef PAGE_INFO_H_ 6 | #define PAGE_INFO_H_ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #ifdef __cplusplus 14 | extern "C" { 15 | #endif 16 | 17 | typedef struct { 18 | /* page frame number: if present, the physical frame for the page */ 19 | uint64_t pfn; 20 | /* soft-dirty set */ 21 | bool softdirty; 22 | /* exclusively mapped, see e.g., https://patchwork.kernel.org/patch/6787921/ */ 23 | bool exclusive; 24 | /* is a file mapping */ 25 | bool file; 26 | /* page is swapped out */ 27 | bool swapped; 28 | /* page is present, i.e, a physical page is allocated */ 29 | bool present; 30 | /* if true, the kpageflags were successfully loaded, if false they were not (and are all zero) */ 31 | bool kpageflags_ok; 32 | /* the 64-bit flag value extracted from /proc/kpageflags only if pfn is non-null */ 33 | uint64_t kpageflags; 34 | 35 | } page_info; 36 | /* 37 | * Information for a number of virtually consecutive pages. 38 | */ 39 | typedef struct { 40 | /* how many page_info structures are in the array pointed to by info */ 41 | size_t num_pages; 42 | 43 | /* pointer to the array of page_info structures */ 44 | page_info *info; 45 | } page_info_array; 46 | 47 | 48 | typedef struct { 49 | /* the number of pages on which this flag was set, always <= pages_available */ 50 | size_t pages_set; 51 | 52 | /* the number of pages on which information could be obtained */ 53 | size_t pages_available; 54 | 55 | /* the total number of pages examined, which may be greater than pages_available if 56 | * the flag value could not be obtained for some pages (usually because the pfn is not available 57 | * since the page is not yet present or because running as non-root. 58 | */ 59 | size_t pages_total; 60 | 61 | /* the flag the values were queried for */ 62 | int flag; 63 | 64 | } flag_count; 65 | 66 | /** 67 | * Examine the page info in infos to count the number of times a specified /proc/kpageflags flag was set, 68 | * effectively giving you a ratio, so you can say "80% of the pages for this allocation are backed by 69 | * huge pages" or whatever. 70 | * 71 | * The flags *must* come from kpageflags (these are not the same as those in /proc/pid/pagemap) and 72 | * are declared in linux/kernel-page-flags.h. 73 | * 74 | * Ideally, the flag information is available for all the pages in the range, so you can 75 | * say something about the entire range, but this is often not the case because (a) flags 76 | * are not available for pages that aren't present and (b) flags are generally never available 77 | * for non-root users. So the ratio structure indicates both the total number of pages as 78 | * well as the number of pages for which the flag information was available. 79 | */ 80 | flag_count get_flag_count(page_info_array infos, int flag); 81 | 82 | /** 83 | * Given the case-insensitive name of a flag, return the flag number (the index of the bit 84 | * representing this flag), or -1 if the flag is not found. The "names" of the flags are 85 | * the same as the macro names in without the KPF_ prefix. 86 | * 87 | * For example, the name of the transparent hugepages flag is "THP" and the corresponding 88 | * macro is KPF_THP, and the value of this macro and returned by this method is 22. 89 | * 90 | * You can generate the corresponding mask value to check the flag using (1ULL << value). 91 | */ 92 | int flag_from_name(char const *name); 93 | 94 | /** 95 | * Print the info in the page_info structure to stdout. 96 | */ 97 | void print_info(page_info info); 98 | 99 | /** 100 | * Print the info in the page_info structure to the give file. 101 | */ 102 | void fprint_info(FILE* file, page_info info); 103 | 104 | 105 | /** 106 | * Print the table header that lines up with the tabluar format used by the "table" printing 107 | * functions. Called by fprint_ratios, or you can call it yourself if you want to prefix the 108 | * output with your own columns. 109 | */ 110 | void fprint_info_header(FILE *file); 111 | 112 | /* print one info in a tabular format (as a single row) */ 113 | void fprint_info_row(FILE *file, page_info info); 114 | 115 | 116 | /** 117 | * Print the ratio for each flag in infos. The ratio is the number of times the flag was set over 118 | * the total number of pages (or the total number of pages for which the information could be obtained). 119 | */ 120 | void fprint_ratios_noheader(FILE *file, page_info_array infos); 121 | /* 122 | * Print a table with one row per page from the given infos. 123 | */ 124 | void fprint_ratios(FILE *file, page_info_array infos); 125 | 126 | /* 127 | * Prints a summary of all the pages in the given array as ratios: the fraction of the time the given 128 | * flag was set. 129 | */ 130 | void fprint_table(FILE *f, page_info_array infos); 131 | 132 | 133 | /** 134 | * Get info for a single page indicated by the given pointer (which may point anywhere in the page). 135 | */ 136 | page_info get_page_info(void *p); 137 | 138 | /** 139 | * Get information for each page in the range from start (inclusive) to end (exclusive). 140 | */ 141 | page_info_array get_info_for_range(void *start, void *end); 142 | 143 | /** 144 | * Free the memory associated with the given page_info_array. You shouldn't use it after this call. 145 | */ 146 | void free_info_array(page_info_array infos); 147 | 148 | #ifdef __cplusplus 149 | } 150 | #endif 151 | 152 | #endif /* PAGE_INFO_H_ */ 153 | --------------------------------------------------------------------------------