├── .assets └── demo.cast ├── .gitignore ├── .gitmodules ├── LICENSE ├── Makefile ├── README.md ├── examples └── free_uninit.c ├── src ├── args.c ├── args.h ├── backtrace.c ├── backtrace.h ├── breakpoints.c ├── breakpoints.h ├── c-syntax.scm ├── debugger.c ├── debugger.h ├── history.c ├── history.h ├── info.c ├── info.h ├── magic.c ├── magic.h ├── print-source.scm ├── print_source.c ├── print_source.h ├── ptrace.c ├── ptrace.h ├── registers.c ├── registers.h ├── spray.c ├── spray_dwarf.c ├── spray_dwarf.h ├── spray_elf.c ├── spray_elf.h └── tokenize.scm └── tests ├── assets ├── Makefile ├── commented.c ├── custom_types.c ├── deref_pointers.c ├── extern-variables │ ├── first_file.c │ ├── main.c │ ├── second_file.c │ └── third_file.c ├── include-variable │ ├── header.h │ └── main.c ├── many-files │ ├── bar1.h │ ├── bar2.h │ ├── baz.h │ ├── foo1.c │ ├── foo1.h │ ├── foo2.c │ ├── foo2.h │ └── main.c ├── multi-file │ ├── file1.c │ ├── file2.c │ └── file2.h ├── nested_functions.c ├── pointers.c ├── print_args.c ├── recurring_variables.c ├── simple.c └── type_examples.c ├── c-types.scm ├── debugger.c ├── dwarf.c ├── elf.c ├── main.c ├── test_integration.py ├── test_utils.h └── tokenize.scm /.assets/demo.cast: -------------------------------------------------------------------------------- 1 | {"version": 2, "width": 80, "height": 25, "timestamp": 1699696489, "env": {"SHELL": "zsh", "TERM": "xterm-kitty"}} 2 | [0.368112, "o", "\u001b]2;spray a.out\u0007\u001b]1;spray\u0007"] 3 | [0.396934, "o", "🐛🐛🐛 34156 🐛🐛🐛\r\n"] 4 | [0.639952, "o", " 20 }\u001b[0m\r\n 21\r\n 22 \u001b[32mint\u001b[0m \u001b[0mmain\u001b[0m(\u001b[0m\u001b[32mvoid\u001b[0m)\u001b[0m \u001b[0m{\u001b[0m\r\n 23 -> \u001b[0mprint_concat\u001b[0m(\u001b[0m\u001b[31m\"foo\"\u001b[0m,\u001b[0m \u001b[0m\u001b[31m\"bar\"\u001b[0m)\u001b[0m;\u001b[0m\r\n 24 \u001b[0mprint_concat\u001b[0m(\u001b[0m\u001b[31m\"foo\"\u001b[0m,\u001b[0m \u001b[0m\u001b[31m\"\"\u001b[0m)\u001b[0m;\u001b[0m\r\n 25 \u001b[0m\u001b[35mreturn\u001b[0m \u001b[0m\u001b[34m0\u001b[0m;\u001b[0m\r\n 26 }\u001b[0m\r\nspray> "] 5 | [1.516171, "o", "b"] 6 | [1.603219, "o", "r"] 7 | [1.648151, "o", "e"] 8 | [1.745696, "o", "a"] 9 | [1.816271, "o", "k"] 10 | [1.992976, "o", " "] 11 | [2.133773, "o", "p"] 12 | [2.235755, "o", "r"] 13 | [2.353192, "o", "i"] 14 | [2.425048, "o", "n"] 15 | [2.488264, "o", "t"] 16 | [2.692241, "o", "_"] 17 | [2.917091, "o", "c"] 18 | [2.944816, "o", "o"] 19 | [3.054356, "o", "n"] 20 | [3.160349, "o", "c"] 21 | [3.271191, "o", "a"] 22 | [3.403404, "o", "t"] 23 | [3.786994, "o", "\r\n"] 24 | [3.788509, "o", "spray> "] 25 | [4.386978, "o", "c"] 26 | [4.431837, "o", "\r\n"] 27 | [4.432644, "o", "Hit breakpoint at address 0x0000000000401180 in free_uninit.c\r\n"] 28 | [4.433025, "o", " 4\r\n 5 \u001b[32mvoid\u001b[0m \u001b[0mprint_concat\u001b[0m(\u001b[0m\u001b[32mchar\u001b[0m \u001b[0m\u001b[33m*\u001b[0ma\u001b[0m,\u001b[0m \u001b[0m\u001b[32mchar\u001b[0m \u001b[0m\u001b[33m*\u001b[0mb\u001b[0m)\u001b[0m \u001b[0m{\u001b[0m\r\n 6 \u001b[0m\u001b[32mchar\u001b[0m \u001b[0m\u001b[33m*\u001b[0mbuf\u001b[0m;\u001b[0m\r\n 7 -> \u001b[0m\u001b[32mint\u001b[0m \u001b[0ma_len\u001b[0m \u001b[0m\u001b[33m=\u001b[0m \u001b[0mstrlen\u001b[0m(\u001b[0ma\u001b[0m)\u001b[0m;\u001b[0m\r\n 8 \u001b[0m\u001b[32mint\u001b[0m \u001b[0mb_len\u001b[0m \u001b[0m\u001b[33m=\u001b[0m \u001b[0mstrlen\u001b[0m(\u001b[0mb\u001b[0m)\u001b[0m;\u001b[0m\r\n 9\r\n 10 \u001b[0m\u001b[35mif\u001b[0m \u001b[0m(\u001b[0mb_len\u001b[0m \u001b[0m\u001b[33m>\u001b[0m \u001b[0m\u001b[34m0\u001b[0m)\u001b[0m \u001b[0m{\u001b[0m\r\n"] 29 | [4.433286, "o", "spray> "] 30 | [4.993165, "o", "n"] 31 | [5.202661, "o", "\r\n"] 32 | [5.205535, "o", " 5 \u001b[32mvoid\u001b[0m \u001b[0mprint_concat\u001b[0m(\u001b[0m\u001b[32mchar\u001b[0m \u001b[0m\u001b[33m*\u001b[0ma\u001b[0m,\u001b[0m \u001b[0m\u001b[32mchar\u001b[0m \u001b[0m\u001b[33m*\u001b[0mb\u001b[0m)\u001b[0m \u001b[0m{\u001b[0m\r\n 6 \u001b[0m\u001b[32mchar\u001b[0m \u001b[0m\u001b[33m*\u001b[0mbuf\u001b[0m;\u001b[0m\r\n 7 \u001b[0m\u001b[32mint\u001b[0m \u001b[0ma_len\u001b[0m \u001b[0m\u001b[33m=\u001b[0m \u001b[0mstrlen\u001b[0m(\u001b[0ma\u001b[0m)\u001b[0m;\u001b[0m\r\n 8 -> \u001b[0m\u001b[32mint\u001b[0m \u001b[0mb_len\u001b[0m \u001b[0m\u001b[33m=\u001b[0m \u001b[0mstrlen\u001b[0m(\u001b[0mb\u001b[0m)\u001b[0m;\u001b[0m\r\n 9\r\n 10 \u001b[0m\u001b[35mif\u001b[0m \u001b[0m(\u001b[0mb_len\u001b[0m \u001b[0m\u001b[33m>\u001b[0m \u001b[0m\u001b[34m0\u001b[0m)\u001b[0m \u001b[0m{\u001b[0m\r\n 11 \u001b[0mbuf\u001b[0m \u001b[0m\u001b[33m=\u001b[0m \u001b[0mmalloc\u001b[0m(\u001b[0ma_len\u001b[0m \u001b[0m\u001b[33m+\u001b[0m \u001b[0mb_len\u001b[0m \u001b[0m\u001b[33m+\u001b[0m \u001b[0m\u001b[34m1\u001b[0m)\u001b[0m;\u001b[0m\r\nspray> "] 33 | [5.656346, "o", "n"] 34 | [5.854361, "o", "\r\n"] 35 | [5.856267, "o", " 7 \u001b[0m\u001b[32mint\u001b[0m \u001b[0ma_len\u001b[0m \u001b[0m\u001b[33m=\u001b[0m \u001b[0mstrlen\u001b[0m(\u001b[0ma\u001b[0m)\u001b[0m;\u001b[0m\r\n 8 \u001b[0m\u001b[32mint\u001b[0m \u001b[0mb_len\u001b[0m \u001b[0m\u001b[33m=\u001b[0m \u001b[0mstrlen\u001b[0m(\u001b[0mb\u001b[0m)\u001b[0m;\u001b[0m\r\n 9\r\n 10 -> \u001b[0m\u001b[35mif\u001b[0m \u001b[0m(\u001b[0mb_len\u001b[0m \u001b[0m\u001b[33m>\u001b[0m \u001b[0m\u001b[34m0\u001b[0m)\u001b[0m \u001b[0m{\u001b[0m\r\n 11 \u001b[0mbuf\u001b[0m \u001b[0m\u001b[33m=\u001b[0m \u001b[0mmalloc\u001b[0m(\u001b[0ma_len\u001b[0m \u001b[0m\u001b[33m+\u001b[0m \u001b[0mb_len\u001b[0m \u001b[0m\u001b[33m+\u001b[0m \u001b[0m\u001b[34m1\u001b[0m)\u001b[0m;\u001b[0m\r\n 12 \u001b[0mstrcpy\u001b[0m(\u001b[0mbuf\u001b[0m,\u001b[0m \u001b[0ma\u001b[0m)\u001b[0m;\u001b[0m\r\n 13 \u001b[0mstrcpy\u001b[0m(\u001b[0mbuf\u001b[0m \u001b[0m\u001b[33m+\u001b[0m \u001b[0ma_len\u001b[0m,\u001b[0m \u001b[0mb\u001b[0m)\u001b[0m;\u001b[0m\r\n"] 36 | [5.856363, "o", "spray> "] 37 | [6.202183, "o", "p"] 38 | [6.246698, "o", " "] 39 | [6.535284, "o", "b"] 40 | [6.946225, "o", "_"] 41 | [7.203841, "o", "l"] 42 | [7.335918, "o", "e"] 43 | [7.383841, "o", "n"] 44 | [7.591174, "o", "\r\n"] 45 | [7.592125, "o", " 3 (free_uninit.c:8)\r\n"] 46 | [7.592178, "o", "spray> "] 47 | [8.071171, "o", "n"] 48 | [8.277766, "o", "\r\n"] 49 | [8.279863, "o", " 8 \u001b[0m\u001b[32mint\u001b[0m \u001b[0mb_len\u001b[0m \u001b[0m\u001b[33m=\u001b[0m \u001b[0mstrlen\u001b[0m(\u001b[0mb\u001b[0m)\u001b[0m;\u001b[0m\r\n 9\r\n 10 \u001b[0m\u001b[35mif\u001b[0m \u001b[0m(\u001b[0mb_len\u001b[0m \u001b[0m\u001b[33m>\u001b[0m \u001b[0m\u001b[34m0\u001b[0m)\u001b[0m \u001b[0m{\u001b[0m\r\n 11 -> \u001b[0mbuf\u001b[0m \u001b[0m\u001b[33m=\u001b[0m \u001b[0mmalloc\u001b[0m(\u001b[0ma_len\u001b[0m \u001b[0m\u001b[33m+\u001b[0m \u001b[0mb_len\u001b[0m \u001b[0m\u001b[33m+\u001b[0m \u001b[0m\u001b[34m1\u001b[0m)\u001b[0m;\u001b[0m\r\n 12 \u001b[0mstrcpy\u001b[0m(\u001b[0mbuf\u001b[0m,\u001b[0m \u001b[0ma\u001b[0m)\u001b[0m;\u001b[0m\r\n 13 \u001b[0mstrcpy\u001b[0m(\u001b[0mbuf\u001b[0m \u001b[0m\u001b[33m+\u001b[0m \u001b[0ma_len\u001b[0m,\u001b[0m \u001b[0mb\u001b[0m)\u001b[0m;\u001b[0m\r\n 14 \u001b[0mputs\u001b[0m(\u001b[0mbuf\u001b[0m)\u001b[0m;\u001b[0m\r\nspray> "] 50 | [8.703905, "o", "n"] 51 | [8.926853, "o", "\r\n"] 52 | [8.92877, "o", " 9\r\n 10 \u001b[0m\u001b[35mif\u001b[0m \u001b[0m(\u001b[0mb_len\u001b[0m \u001b[0m\u001b[33m>\u001b[0m \u001b[0m\u001b[34m0\u001b[0m)\u001b[0m \u001b[0m{\u001b[0m\r\n 11 \u001b[0mbuf\u001b[0m \u001b[0m\u001b[33m=\u001b[0m \u001b[0mmalloc\u001b[0m(\u001b[0ma_len\u001b[0m \u001b[0m\u001b[33m+\u001b[0m \u001b[0mb_len\u001b[0m \u001b[0m\u001b[33m+\u001b[0m \u001b[0m\u001b[34m1\u001b[0m)\u001b[0m;\u001b[0m\r\n 12 -> \u001b[0mstrcpy\u001b[0m(\u001b[0mbuf\u001b[0m,\u001b[0m \u001b[0ma\u001b[0m)\u001b[0m;\u001b[0m\r\n 13 \u001b[0mstrcpy\u001b[0m(\u001b[0mbuf\u001b[0m \u001b[0m\u001b[33m+\u001b[0m \u001b[0ma_len\u001b[0m,\u001b[0m \u001b[0mb\u001b[0m)\u001b[0m;\u001b[0m\r\n 14 \u001b[0mputs\u001b[0m(\u001b[0mbuf\u001b[0m)\u001b[0m;\u001b[0m\r\n 15 \u001b[0m}\u001b[0m \u001b[0m\u001b[35melse\u001b[0m \u001b[0m{\u001b[0m\r\n"] 53 | [8.928837, "o", "spray> "] 54 | [9.403846, "o", "p"] 55 | [9.457709, "o", " "] 56 | [9.712869, "o", "b"] 57 | [9.769724, "o", "u"] 58 | [9.886716, "o", "f"] 59 | [10.286159, "o", "\r\n"] 60 | [10.286619, "o", " 0x00000000004052a0 (free_uninit.c:6)\r\nspray> "] 61 | [11.224844, "o", "l"] 62 | [11.290902, "o", "e"] 63 | [11.479218, "o", "a"] 64 | [11.722201, "o", "v"] 65 | [11.794879, "o", "e"] 66 | [11.959731, "o", "\r\n"] 67 | [11.960012, "o", "foobar\r\n"] 68 | [11.961021, "o", " 21\r\n 22 \u001b[32mint\u001b[0m \u001b[0mmain\u001b[0m(\u001b[0m\u001b[32mvoid\u001b[0m)\u001b[0m \u001b[0m{\u001b[0m\r\n 23 \u001b[0mprint_concat\u001b[0m(\u001b[0m\u001b[31m\"foo\"\u001b[0m,\u001b[0m \u001b[0m\u001b[31m\"bar\"\u001b[0m)\u001b[0m;\u001b[0m\r\n 24 -> \u001b[0mprint_concat\u001b[0m(\u001b[0m\u001b[31m\"foo\"\u001b[0m,\u001b[0m \u001b[0m\u001b[31m\"\"\u001b[0m)\u001b[0m;\u001b[0m\r\n 25 \u001b[0m\u001b[35mreturn\u001b[0m \u001b[0m\u001b[34m0\u001b[0m;\u001b[0m\r\n 26 }\u001b[0m\r\n"] 69 | [11.961281, "o", "spray> "] 70 | [13.528316, "o", "d"] 71 | [13.786704, "o", "e"] 72 | [14.240303, "o", "e"] 73 | [14.807235, "o", "\rspray> de\u001b[0K\r\u001b[9C"] 74 | [15.023292, "o", "l"] 75 | [15.089166, "o", "e"] 76 | [15.176018, "o", "t"] 77 | [15.296072, "o", "e"] 78 | [15.395173, "o", " "] 79 | [15.527185, "o", "p"] 80 | [15.617162, "o", "r"] 81 | [15.724553, "o", "i"] 82 | [15.787081, "o", "n"] 83 | [15.835567, "o", "t"] 84 | [16.018626, "o", "_"] 85 | [16.213391, "o", "c"] 86 | [16.26728, "o", "o"] 87 | [16.35738, "o", "n"] 88 | [16.441781, "o", "c"] 89 | [16.550101, "o", "a"] 90 | [16.660493, "o", "t"] 91 | [16.86185, "o", "\r\n"] 92 | [16.862965, "o", "spray> "] 93 | [18.06518, "o", "s"] 94 | [18.160724, "o", "t"] 95 | [18.245, "o", "e"] 96 | [18.38572, "o", "p"] 97 | [18.649892, "o", "\r\n"] 98 | [18.652583, "o", " 2 \u001b[35m#include\u001b[0m \u001b[0m\u001b[0m\r\n 3 \u001b[35m#include\u001b[0m \u001b[0m\u001b[0m\r\n 4\r\n 5 -> \u001b[32mvoid\u001b[0m \u001b[0mprint_concat\u001b[0m(\u001b[0m\u001b[32mchar\u001b[0m \u001b[0m\u001b[33m*\u001b[0ma\u001b[0m,\u001b[0m \u001b[0m\u001b[32mchar\u001b[0m \u001b[0m\u001b[33m*\u001b[0mb\u001b[0m)\u001b[0m \u001b[0m{\u001b[0m\r\n 6 \u001b[0m\u001b[32mchar\u001b[0m \u001b[0m\u001b[33m*\u001b[0mbuf\u001b[0m;\u001b[0m\r\n 7 \u001b[0m\u001b[32mint\u001b[0m \u001b[0ma_len\u001b[0m \u001b[0m\u001b[33m=\u001b[0m \u001b[0mstrlen\u001b[0m(\u001b[0ma\u001b[0m)\u001b[0m;\u001b[0m\r\n 8 \u001b[0m\u001b[32mint\u001b[0m \u001b[0mb_len\u001b[0m \u001b[0m\u001b[33m=\u001b[0m \u001b[0mstrlen\u001b[0m(\u001b[0mb\u001b[0m)\u001b[0m;\u001b[0m\r\n"] 99 | [18.652682, "o", "spray> "] 100 | [19.647382, "o", "n"] 101 | [19.836063, "o", "\r\n"] 102 | [19.838181, "o", " 4\r\n 5 \u001b[19mvoid\u001b[0m \u001b[0mprint_concat\u001b[0m(\u001b[0m\u001b[19mchar\u001b[0m \u001b[0m\u001b[33m*\u001b[0ma\u001b[0m,\u001b[0m \u001b[0m\u001b[19mchar\u001b[0m \u001b[0m\u001b[33m*\u001b[0mb\u001b[0m)\u001b[0m \u001b[0m{\u001b[0m\r\n 6 \u001b[0m\u001b[19mchar\u001b[0m \u001b[0m\u001b[33m*\u001b[0mbuf\u001b[0m;\u001b[0m\r\n 7 -> \u001b[0m\u001b[19mint\u001b[0m \u001b[0ma_len\u001b[0m \u001b[0m\u001b[33m=\u001b[0m \u001b[0mstrlen\u001b[0m(\u001b[0ma\u001b[0m)\u001b[0m;\u001b[0m\r\n 8 \u001b[0m\u001b[19mint\u001b[0m \u001b[0mb_len\u001b[0m \u001b[0m\u001b[33m=\u001b[0m \u001b[0mstrlen\u001b[0m(\u001b[0mb\u001b[0m)\u001b[0m;\u001b[0m\r\n 9\r\n 10 \u001b[0m\u001b[35mif\u001b[0m \u001b[0m(\u001b[0mb_len\u001b[0m \u001b[0m\u001b[33m>\u001b[0m \u001b[0m\u001b[34m0\u001b[0m)\u001b[0m \u001b[0m{\u001b[0m\r\n"] 103 | [19.838321, "o", "spray> "] 104 | [20.139396, "o", "\r\n"] 105 | [20.141079, "o", " 5 \u001b[32mvoid\u001b[0m \u001b[0mprint_concat\u001b[0m(\u001b[0m\u001b[32mchar\u001b[0m \u001b[0m\u001b[20m*\u001b[0ma\u001b[0m,\u001b[0m \u001b[0m\u001b[32mchar\u001b[0m \u001b[0m\u001b[20m*\u001b[0mb\u001b[0m)\u001b[0m \u001b[0m{\u001b[0m\r\n 6 \u001b[0m\u001b[32mchar\u001b[0m \u001b[0m\u001b[20m*\u001b[0mbuf\u001b[0m;\u001b[0m\r\n 7 \u001b[0m\u001b[32mint\u001b[0m \u001b[0ma_len\u001b[0m \u001b[0m\u001b[20m=\u001b[0m \u001b[0mstrlen\u001b[0m(\u001b[0ma\u001b[0m)\u001b[0m;\u001b[0m\r\n 8 -> \u001b[0m\u001b[32mint\u001b[0m \u001b[0mb_len\u001b[0m \u001b[0m\u001b[20m=\u001b[0m \u001b[0mstrlen\u001b[0m(\u001b[0mb\u001b[0m)\u001b[0m;\u001b[0m\r\n 9\r\n 10 \u001b[0m\u001b[35mif\u001b[0m \u001b[0m(\u001b[0mb_len\u001b[0m \u001b[0m\u001b[20m>\u001b[0m \u001b[0m\u001b[34m0\u001b[0m)\u001b[0m \u001b[0m{\u001b[0m\r\n 11 \u001b[0mbuf\u001b[0m \u001b[0m\u001b[20m=\u001b[0m \u001b[0mmalloc\u001b[0m(\u001b[0ma_len\u001b[0m \u001b[0m\u001b[20m+\u001b[0m \u001b[0mb_len\u001b[0m \u001b[0m\u001b[20m+\u001b[0m \u001b[0m\u001b[34m1\u001b[0m)\u001b[0m;\u001b[0m\r\n"] 106 | [20.141214, "o", "spray> "] 107 | [20.316017, "o", "\r\n"] 108 | [20.317851, "o", " 7 \u001b[0m\u001b[32mint\u001b[0m \u001b[0ma_len\u001b[0m \u001b[0m\u001b[20m=\u001b[0m \u001b[0mstrlen\u001b[0m(\u001b[0ma\u001b[0m)\u001b[0m;\u001b[0m\r\n 8 \u001b[0m\u001b[32mint\u001b[0m \u001b[0mb_len\u001b[0m \u001b[0m\u001b[20m=\u001b[0m \u001b[0mstrlen\u001b[0m(\u001b[0mb\u001b[0m)\u001b[0m;\u001b[0m\r\n 9\r\n 10 -> \u001b[0m\u001b[35mif\u001b[0m \u001b[0m(\u001b[0mb_len\u001b[0m \u001b[0m\u001b[20m>\u001b[0m \u001b[0m\u001b[34m0\u001b[0m)\u001b[0m \u001b[0m{\u001b[0m\r\n 11 \u001b[0mbuf\u001b[0m \u001b[0m\u001b[20m=\u001b[0m \u001b[0mmalloc\u001b[0m(\u001b[0ma_len\u001b[0m \u001b[0m\u001b[20m+\u001b[0m \u001b[0mb_len\u001b[0m \u001b[0m\u001b[20m+\u001b[0m \u001b[0m\u001b[34m1\u001b[0m)\u001b[0m;\u001b[0m\r\n 12 \u001b[0mstrcpy\u001b[0m(\u001b[0mbuf\u001b[0m,\u001b[0m \u001b[0ma\u001b[0m)\u001b[0m;\u001b[0m\r\n 13 \u001b[0mstrcpy\u001b[0m(\u001b[0mbuf\u001b[0m \u001b[0m\u001b[20m+\u001b[0m \u001b[0ma_len\u001b[0m,\u001b[0m \u001b[0mb\u001b[0m)\u001b[0m;\u001b[0m\r\n"] 109 | [20.317964, "o", "spray> "] 110 | [20.679326, "o", "\r\n"] 111 | [20.681062, "o", " 13 \u001b[0mstrcpy\u001b[0m(\u001b[0mbuf\u001b[0m \u001b[0m\u001b[20m+\u001b[0m \u001b[0ma_len\u001b[0m,\u001b[0m \u001b[0mb\u001b[0m)\u001b[0m;\u001b[0m\r\n 14 \u001b[0mputs\u001b[0m(\u001b[0mbuf\u001b[0m)\u001b[0m;\u001b[0m\r\n 15 \u001b[0m}\u001b[0m \u001b[0m\u001b[35melse\u001b[0m \u001b[0m{\u001b[0m\r\n 16 -> \u001b[0mputs\u001b[0m(\u001b[0ma\u001b[0m)\u001b[0m;\u001b[0m\r\n 17 \u001b[0m}\u001b[0m\r\n 18\r\n 19 \u001b[0mfree\u001b[0m(\u001b[0mbuf\u001b[0m)\u001b[0m;\u001b[0m\r\n"] 112 | [20.68123, "o", "spray> "] 113 | [22.178929, "o", "p"] 114 | [22.227345, "o", " "] 115 | [22.730766, "o", "b"] 116 | [22.772284, "o", "u"] 117 | [22.901487, "o", "f"] 118 | [22.98863, "o", "\r\n"] 119 | [22.989354, "o", " 0x00000000004052a0 (free_uninit.c:6)\r\nspray> "] 120 | [23.937341, "o", "n"] 121 | [24.162253, "o", "\r\n"] 122 | [24.163273, "o", "foo\r\n"] 123 | [24.164034, "o", " 16 \u001b[0mputs\u001b[0m(\u001b[0ma\u001b[0m)\u001b[0m;\u001b[0m\r\n 17 \u001b[0m}\u001b[0m\r\n 18\r\n 19 -> \u001b[0mfree\u001b[0m(\u001b[0mbuf\u001b[0m)\u001b[0m;\u001b[0m\r\n 20 }\u001b[0m\r\n 21\r\n 22 \u001b[32mint\u001b[0m \u001b[0mmain\u001b[0m(\u001b[0m\u001b[32mvoid\u001b[0m)\u001b[0m \u001b[0m{\u001b[0m\r\n"] 124 | [24.164144, "o", "spray> "] 125 | [25.509229, "o", "n"] 126 | [25.716161, "o", "\r\n"] 127 | [25.717049, "o", "free(): double free detected in tcache 2\r\nChild was stopped by SIGABRT\r\n"] 128 | [25.717161, "o", "ERR: No source info for PC 0x00007ffff7e56884\r\nspray> "] 129 | [26.02117, "o", "\r\n"] 130 | [26.035574, "o", "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r"] 131 | [26.035654, "o", "\u001b]2;thasso@tatkx:~/Fun/spray/examples\u0007\u001b]1;..pray/examples\u0007"] 132 | [26.037403, "o", "\u001b]7;file://tatkx/home/thasso/Fun/spray/examples\u001b\\"] 133 | [26.047915, "o", "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J\u001b[31mtatkx\u001b[00m:\u001b[32mexamples\u001b[00m \u001b[34m(main*)\u001b[00m $ \u001b[K"] 134 | [26.047953, "o", "\u001b[?1h"] 135 | [26.04797, "o", "\u001b[?2004h"] 136 | [27.395422, "o", "\u001b[?2004l\r\r\n"] 137 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | **/a.out 2 | build/ 3 | .cache 4 | compile_commands.json 5 | **/__pycache__/ 6 | **/*.bin 7 | *~ 8 | *.import.scm 9 | /libdwarf-code/ 10 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "dependencies/hashmap.c"] 2 | path = dependencies/hashmap.c 3 | url = https://github.com/tidwall/hashmap.c.git 4 | [submodule "dependencies/linenoise"] 5 | path = dependencies/linenoise 6 | url = https://github.com/antirez/linenoise.git 7 | [submodule "dependencies/munit"] 8 | path = dependencies/munit 9 | url = https://github.com/nemequ/munit.git 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2023 Thassilo Schulze 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CC = clang 2 | CFLAGS = -fsanitize=address -g -Werror -Wall -Wextra -pedantic-errors -Wno-gnu-designator -std=gnu11 3 | CPPFLAGS = -MMD -I$(SOURCE_DIR) -I$(DEP)/linenoise -I$(DEP)/hashmap.c 4 | LDFLAGS = -ldwarf -lchicken -lzstd -lz 5 | 6 | BUILD_DIR = build 7 | SOURCE_DIR = src 8 | DEP = dependencies 9 | SOURCES = $(wildcard $(SOURCE_DIR)/*.c) 10 | OBJECTS = $(patsubst $(SOURCE_DIR)/%.c, $(BUILD_DIR)/%.o, $(SOURCES)) 11 | OBJECTS += $(BUILD_DIR)/hashmap.o $(BUILD_DIR)/linenoise.o $(BUILD_DIR)/print-source.o $(BUILD_DIR)/tokenize.o $(BUILD_DIR)/c-syntax.o 12 | BINARY = $(BUILD_DIR)/spray 13 | DEPS = $(OBJECTS:%.o=%.d) 14 | 15 | .PHONY = all bin clean run test unit integration assets install docker 16 | 17 | # === SPRAY === 18 | 19 | all: $(BINARY) assets 20 | @echo Build successful 👍️ 21 | 22 | run: all 23 | ./$(BINARY) $(args) 24 | 25 | install: $(BINARY) 26 | cp $(BINARY) $$HOME/.local/bin/ 27 | 28 | docker: $(BINARY) 29 | docker create -i ubuntu 30 | docker cp $(BINARY) `docker ps -q -l`:/opt/spray 31 | docker start `docker ps -q -l` 32 | docker exec -i `docker ps -q -l` bash 33 | 34 | $(BINARY): $(OBJECTS) 35 | $(CC) $(CFLAGS) $(OBJECTS) -o $(BINARY) $(LDFLAGS) 36 | 37 | -include $(DEPS) 38 | 39 | # Wow, seems like CHICKEN is quite strict ... 40 | $(BUILD_DIR)/print_source.o: CFLAGS += -Wno-unused-parameter -Wno-strict-prototypes -Wno-pedantic -Wno-unused-but-set-variable -Wno-unused-variable 41 | $(BUILD_DIR)/print_source.o: CPPFLAGS += -I/usr/include/chicken 42 | $(BUILD_DIR)/print_source.o: $(SOURCE_DIR)/print_source.c | $(BUILD_DIR) 43 | $(CC) $(CFLAGS) $(CPPFLAGS) -c $< -o $@ 44 | 45 | $(BUILD_DIR)/print-source.o: $(SOURCE_DIR)/print-source.scm $(BUILD_DIR)/tokenize.o | $(BUILD_DIR) 46 | csc -uses tokenizer -c -embedded $(SOURCE_DIR)/print-source.scm -o $@ 47 | 48 | $(BUILD_DIR)/tokenize.o: $(SOURCE_DIR)/tokenize.scm $(BUILD_DIR)/c-syntax.o | $(BUILD_DIR) 49 | csc -uses c-syntax -unit tokenizer -c -J $(SOURCE_DIR)/tokenize.scm -o $@ 50 | 51 | $(BUILD_DIR)/c-syntax.o: $(SOURCE_DIR)/c-syntax.scm | $(BUILD_DIR) 52 | csc -unit c-syntax -c -J $(SOURCE_DIR)/c-syntax.scm -o $@ 53 | 54 | $(BUILD_DIR)/%.o: $(SOURCE_DIR)/%.c | $(BUILD_DIR) 55 | $(CC) $(CFLAGS) $(CPPFLAGS) -c $< -o $@ 56 | 57 | $(BUILD_DIR)/hashmap.o: $(DEP)/hashmap.c/hashmap.c 58 | $(CC) $(CFLAGS) $(CPPFLAGS) -c $< -o $@ 59 | 60 | $(BUILD_DIR)/linenoise.o: CFLAGS += -Wno-gnu-zero-variadic-macro-arguments 61 | $(BUILD_DIR)/linenoise.o: $(DEP)/linenoise/linenoise.c 62 | $(CC) $(CFLAGS) $(CPPFLAGS) -c $< -o $@ 63 | 64 | 65 | $(BUILD_DIR): 66 | mkdir $(BUILD_DIR) 67 | 68 | # Clang's JSON compilation database. 69 | compile_commands.json: 70 | ifeq (, $(shell which bear)) 71 | $(error "Bear is required to generate `compile_commands.json`. You can get it here: https://github.com/rizsotto/Bear.git.") 72 | else 73 | make clean 74 | bear -- make all 75 | endif 76 | 77 | 78 | # === TESTS === 79 | 80 | TEST_SOURCE_DIR = tests 81 | TEST_BUILD_DIR = tests/build 82 | TEST_SOURCES = $(wildcard $(TEST_SOURCE_DIR)/*.c) 83 | TEST_OBJECTS = $(filter-out $(BUILD_DIR)/spray.o, $(OBJECTS)) 84 | TEST_OBJECTS += $(patsubst $(TEST_SOURCE_DIR)/%.c, $(TEST_BUILD_DIR)/%.o, $(TEST_SOURCES)) 85 | TEST_OBJECTS += $(TEST_BUILD_DIR)/munit.o 86 | TEST_DEPS = $(TEST_OBJECTS:%.o=%.d) 87 | TEST_BINARY = $(TEST_BUILD_DIR)/test 88 | 89 | # Run all tests. 90 | test: unit integration 91 | 92 | # Run C and Scheme unit tests. 93 | unit: cunit schemeunit 94 | 95 | cunit: CPPFLAGS += -I$(TEST_SOURCE_DIR) -I$(DEP)/munit 96 | cunit: $(TEST_BINARY) assets 97 | ./$(TEST_BINARY) $(args) 98 | 99 | schemeunit: assets 100 | csi -s tests/tokenize.scm 101 | csi -s tests/c-types.scm 102 | 103 | # Run integration tests. 104 | integration: $(BINARY) assets 105 | python -m pytest 106 | 107 | $(TEST_BINARY): $(TEST_OBJECTS) 108 | $(CC) $(CFLAGS) $(LDFLAGS) $(TEST_OBJECTS) -o $(TEST_BINARY) 109 | 110 | -include $(TEST_DEPS) 111 | 112 | $(TEST_BUILD_DIR)/%.o: $(TEST_SOURCE_DIR)/%.c | $(TEST_BUILD_DIR) 113 | $(CC) $(CFLAGS) $(CPPFLAGS) -c $< -o $@ 114 | 115 | $(TEST_BUILD_DIR)/munit.o: $(DEP)/munit/munit.c 116 | $(CC) $(CFLAGS) $(CPPFLAGS) -c $< -o $@ 117 | 118 | $(TEST_BUILD_DIR): 119 | mkdir $(TEST_BUILD_DIR) 120 | 121 | assets: 122 | $(MAKE) -C tests/assets all 123 | 124 | clean: 125 | $(RM) *.import.scm 126 | $(RM) -r $(BUILD_DIR) $(TEST_BUILD_DIR) compile_commands.json 127 | $(MAKE) -C tests/assets clean 128 | 129 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 |

🐛🐛🐛 Spray 🐛🐛🐛

3 |

4 | Get started - 5 | Issues - 6 | Bug report 7 |

8 |

9 | 10 | [![Spray Demo](https://asciinema.org/a/620413.svg)](https://asciinema.org/a/620413) 11 | 12 | > You can watch a tiny demo of using Spray to interact with a running program here: https://youtu.be/mjwIrfQkURc 13 | 14 | Spray is a small debugger for C code that comes with minimal mental overhead. All functionality aims to be simple and easy to grasp. 15 | 16 | In Spray you can easily control the execution of running programs, and inspect and modify their state. 17 | 18 | I started work on Spray out of curiosity about the mysterious inner workings of debuggers. In addition, I want to explore ways in which debugging can be made more approachable. 19 | 20 | ## 🦾 Features 21 | 22 | - [x] Breakpoints on functions, on lines in files and on addresses 23 | - [x] Printing and setting variables, memory at addresses and registers 24 | - [x] C syntax highlighting 25 | - [x] Backtraces 26 | - [x] Instruction, function and line level stepping 27 | - [x] Filters to format command output 28 | 29 | ## 🚀 Roadmap 30 | 31 | - [ ] Printing and modifying complex structures 32 | - [ ] Syntax highlighting for complex structures 33 | - [ ] Backtraces based on DWARF instead of frame pointers 34 | - [ ] Inlined functions 35 | - [ ] Loading external libraries 36 | - [ ] Catching signals sent to the debugged program 37 | 38 | ## 💿️ Installation 39 | 40 | Parts of the Spray frontend are written in Scheme and embedded into the application using [CHICKEN Scheme](https://www.call-cc.org/) which compiles Scheme to C. Currently, you need to have [CHICKEN installed](https://code.call-cc.org/#download) to build Spray. In the future it's possible that the generated C files are provided instead so that you only need a C compiler. 41 | 42 | Spray depends on [libdwarf](https://github.com/davea42/libdwarf-code/releases) 43 | so if you want to build Spray, you need to install libdwarf first. Then, to install Spray you clone this repository and run `make`. Note the you 44 | have to [clone all the submodules](https://stackoverflow.com/a/4438292) too. 45 | 46 | ```sh 47 | git clone --recurse-submodules https://github.com/thass0/spray.git 48 | cd spray 49 | make 50 | ``` 51 | 52 | The compiled binary is named `spray` and can be found in the `build` directory. 53 | 54 | To use `spray` as a regular command you need to [add it to your `$PATH`](https://askubuntu.com/a/322773). 55 | 56 | ## 🏃‍♀️ Running Spray 57 | 58 | > Ensure that the binary you want to debug has debug information enabled, i.e. it was compiled with the `-g` flag. Also, you should disable all compile-time optimizations to ensure the best output. 59 | 60 | > Spray is only tested using Clang. The debug information generated by different compilers for the same piece of code varies. Thus, `clang` should be used to compile the programs you want to debug using Spray. 61 | 62 | The first argument you pass to `spray` is the name of the binary that should be debugged (the debugee). All subsequent arguments are the arguments passed to the debugee. 63 | 64 | For example 65 | 66 | ```sh 67 | clang -g examples/free_uninit.c 68 | spray a.out 69 | ``` 70 | 71 | starts a debugging session with the executable `a.out`. 72 | 73 | ## ⌨️ Commands 74 | 75 | Spray's REPL offers the following commands to interact with a running program. 76 | 77 | ### Reading and writing values 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 |
CommandArgument(s)Description
print, p<variable>Print the value of the runtime variable.
<register>Print the value of the register.
<address>Print the value of the program's memory at the address.
set, t<variable> <value>Set the value of the runtime variable.
<register> <value>Set the value of the register.
<address> <value>Set the value of the program's memory at the address.
112 | 113 | Register names are prefixed with a `%`, akin to the AT&T assembly syntax. This avoids name conflicts between register names and variable names. For example, to read the value of `rax`, use `print %rax`. You can find a table of all available register names in `src/registers.h`. 114 | 115 | A `` can be a hexadecimal or a decimal number. The default is base 10 and hexadecimal will only be chosen if the literal contains a character that's exclusive to base 16 (i.e. one of a - f). You can prefix the literal with `0x` to explicitly use hexadecimal in cases where decimal would work as well. 116 | 117 | An `
` is always a hexadecimal number. The prefix `0x` is again optional. 118 | 119 | ### Breakpoints 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 |
CommandArgument(s)Description
break, b<function>Set a breakpoint on the function.
<file>:<line>Set a breakpoint on the line in the file.
<address>Set a breakpoint on the address.
delete, d<function>Delete a breakpoint on the function.
<file>:<line>Delete a breakpoint on the line in the file.
<address>Delete a breakpoint on the address.
continue, cContinue execution until the next breakpoint.
159 | 160 | It's possible that the location passed to `break`, `delete`, `print`, or `set` is both a valid function name and a valid hexadecimal address. For example, `add` could refer to a function called `add` and the number `0xadd`. In such a case, the default is to interpret the location as a function name. Use the prefix `0x` to explicitly specify an address. 161 | 162 | ### Stepping 163 | 164 | | Command | Description | 165 | |------------------|-----------------------------------------------------| 166 | | `next`, `n` | Go to the next line. Don't step into functions. | 167 | | `step`, `s` | Go to the next line. Step into functions. | 168 | | `leave`, `l` | Step out of the current function. | 169 | | `inst`, `i` | Step to the next instruction. | 170 | | `backtrace`, `a` | Print a backtrace starting at the current position. | 171 | 172 | ### Filters 173 | 174 | The `print` and `set` commands can be followed by a filter, to change how output is displayed. For example, if you want to inspect the binary data in the rdx register, you can enter `print %rdx | bin`. 175 | 176 | Filters are separated from the command by a pipe symbol: ` '|' `. Currently, only one filter can be used at a time. 177 | 178 | The following table shows how different filters format the same 64-bit word with the value 103. 179 | 180 | 181 | | Filter | Output | 182 | |-----------------------|---------------------------------------------------------------------------| 183 | | `dec` (*decimal*) | `103` | 184 | | `hex` (*hexadecimal*) | `0x67` | 185 | | `addr` (*address*) | `0x0000000000000067` | 186 | | `bits` | `00000000 00000000 00000000 00000000 00000000 00000000 00000000 01100111` | 187 | | `bytes` | `00 00 00 00 00 00 00 67` | 188 | | `deref` `*` | *Prints the value found at memory address `0x67`* | 189 | 190 | Except for `deref`, all the above simply change the way the output is formatted. `deref`, abbreviated as `*`, interprets the value that would be printed as a memory address, and prints whatever is found it memory that address. Using `deref`, you can inspect that value that a pointer points to. 191 | 192 | ## 🛠️Contributing 193 | 194 | All contributions are welcome. Before opening a pull request, please run 195 | the test suite locally to verify that your changes don't break any other 196 | features. 197 | 198 | It's possible that some of the tests fail due to off-by-one errors when 199 | making assertions about specific values found in the example binaries that 200 | are used in the tests. Refer to [this issue](https://github.com/thass0/spray/issues/2) 201 | for more details. You can ignore tests that fail for this reason only. 202 | 203 | ## 📖 References 204 | 205 | - Sy Brand's blog series [Writing a Linux Debugger](https://blog.tartanllama.xyz/writing-a-linux-debugger-setup/) on writing a debugger in C++ 206 | 207 | - [The DWARF 5 standard](https://dwarfstd.org/dwarf5std.html) 208 | 209 | - [libdwarf's documentation](https://www.prevanders.net/libdwarfdoc/index.html) 210 | 211 | - Eli Bendersky's posts [How debuggers work](https://eli.thegreenplace.net/2011/01/23/how-debuggers-work-part-1) 212 | -------------------------------------------------------------------------------- /examples/free_uninit.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | void print_concat(char *a, char *b) { 6 | char *buf; 7 | int a_len = strlen(a); 8 | int b_len = strlen(b); 9 | 10 | if (b_len > 0) { 11 | buf = malloc(a_len + b_len + 1); 12 | strcpy(buf, a); 13 | strcpy(buf + a_len, b); 14 | puts(buf); 15 | } else { 16 | puts(a); 17 | } 18 | 19 | free(buf); 20 | } 21 | 22 | int main(void) { 23 | print_concat("foo", "bar"); 24 | print_concat("foo", ""); 25 | return 0; 26 | } 27 | -------------------------------------------------------------------------------- /src/args.c: -------------------------------------------------------------------------------- 1 | #include "args.h" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | void 8 | print_help_message (const char *me) 9 | { 10 | if (me == NULL) 11 | { 12 | me = "spray"; 13 | } 14 | 15 | fprintf (stderr, 16 | "usage: %s [-c | --no-color] file [arg1 ...]\n" 17 | "\n" 18 | " file The name of the executable file to debug\n" 19 | " arg1 ... Arguments passed to the executable to debug\n" 20 | " -c, --no-color Disable colored output\n" 21 | "\n" 22 | "Spray is a simple debugger for programs written in C.\n" 23 | "For the best output, programs should be compiled using\n" 24 | "Clang and with debug information enabled: clang -g foo.c.\n" 25 | "\n" 26 | "A description of the commands available in Spray's REPL and\n" 27 | "of how to use Spray can be found in the README.md file.\n" 28 | "\n" 29 | "spray \n", 30 | me); 31 | } 32 | 33 | const char * 34 | prog_name_arg (int argc, char **argv) 35 | { 36 | if (argc > 0 && argv != NULL) 37 | { 38 | return argv[0]; 39 | } 40 | else 41 | { 42 | return NULL; 43 | } 44 | } 45 | 46 | /* Parse a flag starting with a single dash. Returns -1 on error. */ 47 | int 48 | parse_short_flag (const char *flag, Flags *flags) 49 | { 50 | if (flag == NULL || flags == NULL) 51 | { 52 | return -1; 53 | } 54 | 55 | if (strcmp ("-c", flag) == 0) 56 | { 57 | flags->no_color = true; 58 | } 59 | else 60 | { 61 | return -1; 62 | } 63 | 64 | return 0; 65 | } 66 | 67 | /* Parse a flag starting with a double dash. Returns -1 on error. */ 68 | int 69 | parse_long_flag (const char *flag, Flags *flags) 70 | { 71 | if (flag == NULL || flags == NULL) 72 | { 73 | return -1; 74 | } 75 | 76 | if (strcmp ("--no-color", flag) == 0) 77 | { 78 | flags->no_color = true; 79 | } 80 | else 81 | { 82 | return -1; 83 | } 84 | 85 | return 0; 86 | } 87 | 88 | /* Parse all flags in the command line arguments. Flags start with 89 | * either (1) a single dash followed by a single character or (2) a 90 | * double dash followed by a string. Parsing stops once one of the 91 | * given arguments doesn't fulfill either (1) or (2). 92 | * -1 is returned if the arguments contain invalid flags or the arguments 93 | * to this function are invalid. On success the number of arguments that 94 | * were processed thus far is returned. It is an error if there are no 95 | * arguments left after parsing all flags. */ 96 | int 97 | parse_flags (int argc, char **argv, Flags *flags) 98 | { 99 | if (flags == NULL || argv == NULL) 100 | { 101 | return -1; 102 | } 103 | 104 | Flags flags_buf = { 0 }; 105 | int res = 0; 106 | int i = 1; /* argv[0] is us. */ 107 | 108 | for (; i < argc; i++) 109 | { 110 | if (strncmp (argv[i], "--", 2) == 0) 111 | { 112 | res = parse_long_flag (argv[i], &flags_buf); 113 | } 114 | else if (strncmp (argv[i], "-", 1) == 0) 115 | { 116 | res = parse_short_flag (argv[i], &flags_buf); 117 | } 118 | else 119 | { 120 | /* There are no more flags. */ 121 | break; 122 | } 123 | 124 | /* Abort on error. */ 125 | if (res == -1) 126 | { 127 | return -1; 128 | } 129 | } 130 | 131 | if (i == argc) 132 | { 133 | /* There must be more arguments than just flags. */ 134 | return -1; 135 | } 136 | 137 | *flags = flags_buf; 138 | 139 | return i; 140 | } 141 | 142 | int 143 | parse_args (int argc, char **argv, Args *args) 144 | { 145 | if (argv == NULL || args == NULL) 146 | { 147 | return -1; 148 | } 149 | 150 | int flags_res = parse_flags (argc, argv, &args->flags); 151 | if (flags_res == -1) 152 | { 153 | return -1; 154 | } 155 | else 156 | { 157 | int file_idx = flags_res; 158 | args->file = argv[file_idx]; 159 | 160 | /* Are there any arguments that should be 161 | * passed to the debugged executable? */ 162 | if (file_idx + 1 < argc) 163 | { 164 | /* The arguments passed to the debugged 165 | * executable include its name. */ 166 | args->args = argv + file_idx; 167 | args->n_args = argc - file_idx; 168 | } 169 | } 170 | 171 | return 0; 172 | } 173 | 174 | static Args GLOBAL_ARGS = { 0 }; 175 | 176 | void 177 | set_args (const Args *args) 178 | { 179 | assert (args != NULL); 180 | 181 | GLOBAL_ARGS.flags = args->flags; 182 | 183 | /* Replace the filepath to the executable. */ 184 | free (GLOBAL_ARGS.file); 185 | GLOBAL_ARGS.file = strdup (args->file); 186 | 187 | /* Replace the `args` array. */ 188 | for (size_t i = 0; i < GLOBAL_ARGS.n_args; i++) 189 | { 190 | free (GLOBAL_ARGS.args[i]); 191 | } 192 | free (GLOBAL_ARGS.args); 193 | 194 | /* Allocate one pointer more than needed so that the 195 | * array is terminated by a NULL pointer. */ 196 | GLOBAL_ARGS.args = calloc (args->n_args + 1, sizeof (char *)); 197 | for (size_t i = 0; i < args->n_args; i++) 198 | { 199 | GLOBAL_ARGS.args[i] = strdup (args->args[i]); 200 | } 201 | } 202 | 203 | const Args * 204 | get_args (void) 205 | { 206 | return &GLOBAL_ARGS; 207 | } 208 | -------------------------------------------------------------------------------- /src/args.h: -------------------------------------------------------------------------------- 1 | /* Command line arguments for spray. */ 2 | 3 | #pragma once 4 | 5 | #ifndef _SPRAY_ARGS_H_ 6 | #define _SPRAY_ARGS_H_ 7 | 8 | #include 9 | #include 10 | 11 | typedef struct 12 | { 13 | bool no_color; /* -c, --no-color */ 14 | } Flags; 15 | 16 | typedef struct 17 | { 18 | Flags flags; 19 | char *file; /* file */ 20 | char **args; /* arg1 ... */ 21 | size_t n_args; 22 | } Args; 23 | 24 | /* Get a pointer to the arguments set using `set_args`. The return 25 | * values are meaningful only after `set_args` was called once. */ 26 | const Args *get_args (void); 27 | 28 | /* If `SET_ARGS_ONCE` is defined, extra utilities are declared that 29 | * allow retrieving and the storing the command line arguments. */ 30 | #ifdef SET_ARGS_ONCE 31 | 32 | /* Parse all command line arguments in `argc` and `argv`. Returns -1 on error. 33 | * Pointers to data in `argv` are stored in `args`.*/ 34 | int parse_args (int argc, char **argv, Args * args); 35 | 36 | /* Print the --help message. Defaults to the program name `spray`. */ 37 | void print_help_message (const char *me); 38 | 39 | /* Get the name of the *this* program from the given command line arguments. 40 | * Can be used to get the program name for `print_help_message`. */ 41 | const char *prog_name_arg (int argc, char **argv); 42 | 43 | /* Copy the given arguments so that they can be accessed from 44 | * anywhere in the program. Don't call this function if any 45 | * pointers returned by `get_args` are still alive. In general 46 | * it's best to call this function only once right after parsing 47 | * the arguments and then never again. */ 48 | void set_args (const Args * args); 49 | 50 | #endif /* SET_ARGS_ONCE */ 51 | 52 | #endif /* _SPRAY_ARGS_H_ */ 53 | -------------------------------------------------------------------------------- /src/backtrace.c: -------------------------------------------------------------------------------- 1 | #include "backtrace.h" 2 | 3 | #include "magic.h" 4 | #include "ptrace.h" 5 | #include "registers.h" 6 | 7 | #include 8 | #include 9 | 10 | typedef struct 11 | { 12 | dbg_addr pc; 13 | real_addr frame_pointer; 14 | struct 15 | { 16 | /* If `has_lineno` is false, `lineno` is meaningless. 17 | * Always check `has_lineno` before using `lineno`. */ 18 | bool has_lineno; 19 | uint32_t lineno; 20 | }; 21 | const char *function; 22 | } CallLocation; 23 | 24 | struct CallFrame 25 | { 26 | CallFrame *caller; 27 | CallLocation location; 28 | }; 29 | 30 | CallFrame * 31 | init_call_frame (CallFrame *caller, dbg_addr pc, 32 | real_addr frame_pointer, DebugInfo *info) 33 | { 34 | const DebugSymbol *func_sym = sym_by_addr (pc, info); 35 | 36 | const char *func_name = NULL; 37 | const Position *pos = NULL; 38 | 39 | if (func_sym != NULL) 40 | { 41 | func_name = sym_name (func_sym, info); 42 | pos = sym_position (func_sym, info); 43 | } 44 | 45 | CallFrame *frame = malloc (sizeof (*frame)); 46 | assert (frame != NULL); 47 | 48 | if (pos != NULL) 49 | { 50 | frame->location.has_lineno = true; 51 | frame->location.lineno = pos->line; 52 | } 53 | else 54 | { 55 | frame->location.has_lineno = false; 56 | } 57 | 58 | frame->caller = caller; 59 | frame->location.pc = pc; 60 | frame->location.frame_pointer = frame_pointer; 61 | frame->location.function = func_name; 62 | 63 | return frame; 64 | } 65 | 66 | 67 | /* Check if the first two instructions of the function that contains 68 | * the given PC store frame pointer. They should look like follows: 69 | * 70 | * 55 push %rbp 71 | * 48 89 e5 mov %rsp,%rbp 72 | * 73 | * This is the standard procedure to store the previous functions's 74 | * frame pointer and then set the current function's frame pointer 75 | * to the start of the frame (i.e. the stack pointer right at the 76 | * start of the function). If this isn't found, it's likely that 77 | * the compiler omitted the frame pointer so we should emit a warning. */ 78 | bool 79 | stores_frame_pointer (dbg_addr pc, real_addr load_address, 80 | pid_t pid, DebugInfo *info) 81 | { 82 | const DebugSymbol *func = sym_by_addr (pc, info); 83 | if (func == NULL) 84 | { 85 | return false; 86 | } 87 | 88 | uint64_t inst_bytes = { 0 }; 89 | real_addr func_start_addr = dbg_to_real (load_address, 90 | sym_start_addr (func)); 91 | SprayResult mem_res = pt_read_memory (pid, func_start_addr, &inst_bytes); 92 | if (mem_res == SP_ERR) 93 | { 94 | return false; 95 | } 96 | 97 | inst_bytes &= 0xffffffff; /* Mask the least significant four bytes. 98 | * Those are executed first. */ 99 | return inst_bytes == 0xe5894855; 100 | } 101 | 102 | /* NOTE: The following is a naive approach to getting a backtrace 103 | * which relies on the compiler emitting a frame pointer. 104 | * Try compiling again with `-fno-omit-frame-pointer` if 105 | * this doesn't work. */ 106 | 107 | CallFrame * 108 | init_backtrace (dbg_addr pc, 109 | real_addr load_address, pid_t pid, DebugInfo *info) 110 | { 111 | assert (info != NULL); 112 | 113 | /* Get the saved base pointer of the caller. */ 114 | real_addr frame_pointer = { 0 }; 115 | SprayResult reg_res = get_register_value (pid, rbp, &frame_pointer.value); 116 | if (reg_res == SP_ERR) 117 | { 118 | return NULL; 119 | } 120 | 121 | if (!stores_frame_pointer (pc, load_address, pid, info)) 122 | { 123 | printf ("WARN: it seems like this executable doesn't maintain a frame " 124 | "pointer.\n" 125 | " This results in incorrect or incomplete backtraces.\n" 126 | "HINT: Try to compile again with `-fno-omit-frame-pointer`.\n\n"); 127 | } 128 | 129 | CallFrame *call_frame = init_call_frame (NULL, pc, frame_pointer, info); 130 | 131 | while (frame_pointer.value != 0) 132 | { 133 | /* Read the return address of the current function 134 | * and use it as the PC of the next function. 135 | * NOTE: This operation must be performed *before* the 136 | * frame pointer is updated. */ 137 | SprayResult ret_res = pt_read_memory (pid, 138 | (real_addr) { frame_pointer.value 139 | + 8 140 | }, 141 | &pc.value); 142 | if (ret_res == SP_ERR) 143 | { 144 | free_backtrace (call_frame); 145 | return NULL; 146 | } 147 | 148 | /* Read the frame pointer of the next function. */ 149 | SprayResult fp_res = 150 | pt_read_memory (pid, frame_pointer, &frame_pointer.value); 151 | if (fp_res == SP_ERR) 152 | { 153 | free_backtrace (call_frame); 154 | return NULL; 155 | } 156 | 157 | call_frame = init_call_frame (call_frame, pc, frame_pointer, info); 158 | } 159 | 160 | return call_frame; 161 | } 162 | 163 | void 164 | free_backtrace (CallFrame *call_frame) 165 | { 166 | /* Recursively free all call frames. */ 167 | while (call_frame != NULL) 168 | { 169 | CallFrame *caller = call_frame->caller; 170 | free (call_frame); 171 | call_frame = caller; 172 | } 173 | } 174 | 175 | void 176 | print_backtrace (CallFrame *call_frame) 177 | { 178 | printf ("How did we even get here? (backtrace)\n"); 179 | if (call_frame == NULL) 180 | { 181 | printf ("\n"); 182 | } 183 | else 184 | { 185 | CallLocation *location; 186 | while (call_frame != NULL) 187 | { 188 | location = &call_frame->location; 189 | 190 | printf (" " ADDR_FORMAT " ", location->pc.value); 191 | 192 | if (location->function) 193 | { 194 | printf ("%s", location->function); 195 | } 196 | else 197 | { 198 | printf (""); 199 | } 200 | 201 | if (location->has_lineno) 202 | { 203 | printf (":%u\n", location->lineno); 204 | } 205 | else 206 | { 207 | printf ("\n"); 208 | } 209 | 210 | call_frame = call_frame->caller; 211 | } 212 | } 213 | } 214 | -------------------------------------------------------------------------------- /src/backtrace.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef _SPRAY_BACKTRACE_H_ 4 | #define _SPRAY_BACKTRACE_H_ 5 | 6 | #include "info.h" 7 | #include "magic.h" 8 | 9 | typedef struct CallFrame CallFrame; 10 | 11 | /* Create a call frame starting at the code 12 | address `pc` and the current stack frame. */ 13 | CallFrame *init_backtrace (dbg_addr pc, 14 | real_addr load_address, 15 | pid_t pid, DebugInfo * info); 16 | 17 | /* Print a backtrace starting at the given call frame. */ 18 | void print_backtrace (CallFrame * start_frame); 19 | 20 | /* Delete the call frame. */ 21 | void free_backtrace (CallFrame * call_frame); 22 | 23 | #endif /* _SPRAY_BACKTRACE_H_ */ 24 | -------------------------------------------------------------------------------- /src/breakpoints.c: -------------------------------------------------------------------------------- 1 | #include "breakpoints.h" 2 | 3 | #include "magic.h" 4 | #include "hashmap.h" 5 | 6 | #include 7 | #include 8 | 9 | typedef struct 10 | { 11 | real_addr addr; /* The address is the only member that's 12 | * used to compare and look up breakpoints. */ 13 | bool is_enabled; 14 | uint8_t orig_data; 15 | } Breakpoint; 16 | 17 | struct Breakpoints 18 | { 19 | struct hashmap *map; 20 | pid_t pid; 21 | }; 22 | 23 | int 24 | breakpoint_compare (const void *a, const void *b, void *udata) 25 | { 26 | unused (udata); 27 | const Breakpoint *breakpoint_a = (Breakpoint *) a; 28 | const Breakpoint *breakpoint_b = (Breakpoint *) b; 29 | /* `compare` assumes that strings are used in its implementation. 30 | * Mimicking `strcmp`, 0 is returned when the keys are equal. */ 31 | return !(breakpoint_a->addr.value == breakpoint_b->addr.value); 32 | } 33 | 34 | uint64_t 35 | breakpoint_hash (const void *entry, uint64_t seed0, uint64_t seed1) 36 | { 37 | const Breakpoint *breakpoint = (Breakpoint *) entry; 38 | uint64_t addr = breakpoint->addr.value; 39 | return hashmap_sip (&addr, sizeof (addr), seed0, seed1); 40 | } 41 | 42 | Breakpoints * 43 | init_breakpoints (pid_t pid) 44 | { 45 | struct hashmap *map = hashmap_new (sizeof (Breakpoint), 0, 0, 0, 46 | breakpoint_hash, breakpoint_compare, 47 | NULL, NULL); 48 | Breakpoints *breakpoints = (Breakpoints *) calloc (1, sizeof (Breakpoints)); 49 | breakpoints->map = map; 50 | breakpoints->pid = pid; 51 | return breakpoints; 52 | } 53 | 54 | void 55 | free_breakpoints (Breakpoints *breakpoints) 56 | { 57 | assert (breakpoints != NULL); 58 | hashmap_free (breakpoints->map); 59 | free (breakpoints); 60 | } 61 | 62 | bool 63 | lookup_breakpoint (Breakpoints *breakpoints, real_addr address) 64 | { 65 | assert (breakpoints != NULL); 66 | 67 | Breakpoint lookup = {.addr = address }; 68 | const Breakpoint *check = hashmap_get (breakpoints->map, &lookup); 69 | 70 | /* Did we find an enabled breakpoint? */ 71 | if (check != NULL && check->is_enabled) 72 | { 73 | return true; 74 | } 75 | else 76 | { 77 | return false; 78 | } 79 | } 80 | 81 | /* Wrapper to make internal breakpoint look-ups more comfortable. */ 82 | const Breakpoint * 83 | get_breakpoint (Breakpoints *breakpoints, real_addr addr) 84 | { 85 | assert (breakpoints != NULL); 86 | return hashmap_get (breakpoints->map, &(Breakpoint) {.addr = addr}); 87 | } 88 | 89 | SprayResult 90 | enable_breakpoint (Breakpoints *breakpoints, real_addr addr) 91 | { 92 | assert (breakpoints != NULL); 93 | 94 | const Breakpoint *to_enable = get_breakpoint (breakpoints, addr); 95 | 96 | /* Do we need to create the breakpoint first? */ 97 | if (to_enable == NULL) 98 | { 99 | hashmap_set (breakpoints->map, &(Breakpoint) {.addr = addr}); 100 | to_enable = get_breakpoint (breakpoints, addr); 101 | assert (to_enable != NULL); 102 | } 103 | 104 | /* Only enable the breakpoint if it's currently disabled. 105 | * Re-activating an already active breakpoint would delete the 106 | * original instructions that were overwritten to insert the trap. */ 107 | if (!to_enable->is_enabled) 108 | { 109 | /* Read the word at `bp->addr` in the tracee's memory. */ 110 | uint64_t word = { 0 }; 111 | SprayResult res = 112 | pt_read_memory (breakpoints->pid, to_enable->addr, &word); 113 | if (res == SP_ERR) 114 | { 115 | return SP_ERR; 116 | } 117 | 118 | /* Save the original least significant byte. */ 119 | uint64_t orig_data = (uint8_t) (word & BTM_BYTE_MASK); 120 | 121 | /* Set the least significant bytes to the instruction `int 3`. 122 | * When this instruction is executed, the tracee raises an 123 | * interrupt and it is sent a trap signal. Receiving this 124 | * signal stops it. */ 125 | uint64_t int3_data = ((word & ~BTM_BYTE_MASK) | INT3); 126 | 127 | /* Write the trap to the tracee's memory. */ 128 | res = pt_write_memory (breakpoints->pid, to_enable->addr, int3_data); 129 | if (res == SP_ERR) 130 | { 131 | return SP_ERR; 132 | } 133 | 134 | /* Update the entry in the hash map. All data belonging to 135 | * the breakpoint is updated here at once, after the memory write 136 | * to the tracee's memory has completed successfully. */ 137 | Breakpoint updated = { 138 | .addr = to_enable->addr, 139 | .is_enabled = true, 140 | .orig_data = orig_data, 141 | }; 142 | hashmap_set (breakpoints->map, &updated); 143 | } 144 | 145 | return SP_OK; 146 | } 147 | 148 | SprayResult 149 | disable_breakpoint (Breakpoints *breakpoints, real_addr addr) 150 | { 151 | assert (breakpoints != NULL); 152 | 153 | const Breakpoint *to_disable = get_breakpoint (breakpoints, addr); 154 | 155 | if (to_disable != NULL && to_disable->is_enabled) 156 | { 157 | /* `ptrace` only operates on whole words, so we need 158 | * to read what's currently there first, then replace the 159 | * modified low byte and write it to the address. */ 160 | 161 | uint64_t modified_word = 0; 162 | SprayResult res = 163 | pt_read_memory (breakpoints->pid, to_disable->addr, &modified_word); 164 | if (res == SP_ERR) 165 | { 166 | return SP_ERR; 167 | } 168 | 169 | uint64_t restored_word = 170 | ((modified_word & ~BTM_BYTE_MASK) | to_disable->orig_data); 171 | res = 172 | pt_write_memory (breakpoints->pid, to_disable->addr, restored_word); 173 | if (res == SP_ERR) 174 | { 175 | return SP_ERR; 176 | } 177 | 178 | /* Update after the write succeeded. */ 179 | Breakpoint disabled = { 180 | .addr = to_disable->addr, 181 | .is_enabled = false, 182 | .orig_data = to_disable->orig_data, 183 | }; 184 | hashmap_set (breakpoints->map, &disabled); 185 | } 186 | 187 | return SP_OK; 188 | } 189 | -------------------------------------------------------------------------------- /src/breakpoints.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef _SPRAY_BREAKPOINTS_H_ 4 | #define _SPRAY_BREAKPOINTS_H_ 5 | 6 | #include 7 | #include 8 | 9 | #include "ptrace.h" 10 | 11 | typedef struct Breakpoints Breakpoints; 12 | 13 | Breakpoints *init_breakpoints (pid_t pid); 14 | 15 | void free_breakpoints (Breakpoints * breakpoints); 16 | 17 | /* Enable the given breakpoint by replacing the 18 | * instruction at `addr` with `int 3` (0xcc). 19 | * 20 | * This will make the child receive a `SIGTRAP` once the 21 | * instruction at address `addr` is reached. 22 | * 23 | * The tracee's memory stays untouched if an error is returned. */ 24 | SprayResult enable_breakpoint (Breakpoints * breakpoints, real_addr addr); 25 | 26 | /* Disable a breakpoint, restoring the original instruction. 27 | * Does nothing if there is no breakpoint at `addr`. 28 | * 29 | * On error, the tracee's memory stays untouched 30 | * and thus the breakpoints remains active. */ 31 | SprayResult disable_breakpoint (Breakpoints * breakpoints, real_addr addr); 32 | 33 | /* Return `true` if there is a breakpoint at `addr` and 34 | * this breakpoint is enabled. Otherwise, if the breakpoint 35 | * doesn't exist or is disabled, return `false`. */ 36 | bool lookup_breakpoint (Breakpoints * breakpoints, real_addr addr); 37 | 38 | #endif /* _SPRAY_BREAKPOINTS_H_ */ 39 | -------------------------------------------------------------------------------- /src/c-syntax.scm: -------------------------------------------------------------------------------- 1 | (module c-tokens * 2 | (import scheme 3 | (chicken base)) 4 | 5 | ;;; Tokens in C source code. 6 | (define token-tag-other 'tt-other) 7 | (define token-tag-keyword 'tt-keyword) 8 | (define token-tag-operator 'tt-operator) 9 | (define token-tag-special-symbol 'tt-special-symbol) 10 | (define token-tag-constant 'tt-constant) 11 | (define token-tag-literal 'tt-literal) 12 | (define token-tag-identifier 'tt-identifier) 13 | (define token-tag-prim-type 'tt-type) 14 | (define token-tag-preproc-directive 'tt-preproc) 15 | (define token-tag-include-filepath 'tt-include-filepath) 16 | (define token-tag-comment 'tt-comment) 17 | (define token-tag-uncomment 'tt-uncomment) 18 | (define token-tag-trailing-uncomment 'tt-trailing-uncomment) 19 | (define token-tag-comment-text 'tt-comment-text) 20 | (define token-tag-whitespace 'tt-whitespace) 21 | 22 | (define (make-token text token-tag) 23 | (cons token-tag text)) 24 | 25 | (define (make-token-list text token-tag) 26 | (list (make-token text token-tag))) 27 | 28 | (define (make-end-token) 29 | (make-token "" 'tt-end)) 30 | 31 | (define (end-token? token) 32 | (and (equal? (token-text token) "") 33 | (eq? (token-tag token) 'tt-end))) 34 | 35 | (define (token-tag token) 36 | (if (pair? token) 37 | (car token) 38 | (error "token-tag, token must be a pair" token))) 39 | 40 | (define (token-text token) 41 | (if (pair? token) 42 | (cdr token) 43 | (error "token-text, token must be a pair" token))) 44 | 45 | 46 | ;;; Lists of meaningful string literals in C sources. 47 | (define C-keywords '("case" "default" "if" "else" "switch" "while" 48 | "do" "for" "goto" "continue" "break" "return" 49 | "struct" "union" "enum" "typedef" "extern" 50 | "static" "register" "auto" "const" "volatile" 51 | "restrict")) 52 | (define C-operators '(">>=" "<<=" "+=" "-=" "*=" "/=" "%=" "&=" "^=" "|=" 53 | ">>" "<<" "++" "--" "->" "&&" "||" "<=" ">=" "==" "!=" 54 | "=" "." "&" "!" "~" "-" "+" "*" "/" "%" "<" ">" "^" 55 | "|" "?" ":" "sizeof")) 56 | (define C-builtin-types '("char" "short" "int" "long" "signed" 57 | "unsigned" "float" "double" "void")) 58 | (define C-special-symbols '("(" ")" "[" "]" "{" "}" "," ";" "...")) 59 | (define C-comment '("/*")) 60 | (define C-uncomment '("*/")) 61 | (define C++-comment '("//")) 62 | 63 | ;; End of module c-tokens. 64 | ) 65 | 66 | 67 | (module c-regex * 68 | (import scheme) 69 | (import regex) ; `regexp` and `string-search`. 70 | 71 | ;;; Regular expressions for scanning C code. They mostly 72 | ;;; resemble what's used in [this](https://www.lysator.liu.se/c/ANSI-C-grammar-l.html) 73 | ;;; scanner although some modifications were made. 74 | 75 | (define literal-regex (regexp "^\"([^\"\\\\]|\\\\[\\s\\S])*\"")) 76 | (define whitespace-regex (regexp "^[\t\r\n ]*")) 77 | (define identifier-regex (regexp "^[a-zA-Z_][a-zA-Z_0-9]*")) 78 | (define hex-constant-regex (regexp "^0[xX][a-fA-F0-9]+(u|U|l|L)*")) 79 | (define octal-constant-regex (regexp "^0[0-7]+(u|U|l|L)*")) 80 | (define decimal-constant-regex (regexp "^[0-9][0-9]*(u|U|l|L)*")) 81 | (define char-constant-regex (regexp "^(u|U|l|L)*'(\\\\.|[^\\\\'])+'")) 82 | (define sci-constant-regex (regexp "^[1-9][0-9]*[Ee][+-]?[0-9]+")) 83 | ;; Floating point constants requiring fractional part. 84 | (define float-constant-regex-frac 85 | (regexp "^[0-9]*\\.[0-9]+([Ee][+-]?[0-9]+)?(f|F|l|L)?")) 86 | ;; Floating point constants requiring whole number part. 87 | (define float-constant-regex-whole 88 | (regexp "^[0-9]+\\.[0-9]*([Ee][+-]?[0-9]+)?(f|F|l|L)?")) 89 | ;; A preprocessor directive. Optionally also matches the 90 | ;; ``/`"filename"` part of `#include`s. 91 | (define preproc-directive-regex 92 | (regexp "^(#[a-z_]+)([ \t]*)([<\"]([^>\"\\\\]|\\\\[\\s\\S])*[>\"])?")) 93 | (define comment-text-regex (regexp "^(\\*(?!\\/)|[^*])*")) 94 | (define line-comment-text-regex (regexp "^[^\n]*")) 95 | ;; Match anything that's not whitespace. 96 | ;; Used to recover from invalid pieces of syntax. 97 | (define any-regex (regexp "^[^ \n\t\r]*")) 98 | ;; Check that the given string starts with a keyword and 99 | ;; that the keyword is terminated by whitespace (i.e. 100 | ;; the given string doesn't continue with more characters). 101 | (define keyword-regex (regexp "^(case|default|if|else|switch|while|do|for|goto|continue|break|return|struct|union|enum|typedef|extern|static|register|auto|const|volatile|restrict)[\n\t ]")) 102 | 103 | ;;; Does `str` match `regex`? 104 | (define (regex-match? regex str) 105 | (let ((search-result (string-search regex str))) 106 | (and (pair? search-result) 107 | (not (equal? (car search-result) ""))))) 108 | 109 | ;;; Return the full match of `str` and `regex`. 110 | (define (full-match regex str) 111 | (car (string-search regex str))) 112 | 113 | ;;; Reexport string-search -- `regex-matches` 114 | ;;; returns a list of all the matches. 115 | (define regex-matches string-search) 116 | 117 | ;; End of module c-regex. 118 | ) 119 | 120 | 121 | (module c-types (make-types-env 122 | is-type-in-env?) 123 | (import scheme 124 | (chicken base) 125 | (srfi-69) 126 | (only (srfi-1) find) 127 | c-tokens) 128 | 129 | ;; From the C reference grammar (https://www.lysator.liu.se/c/ANSI-C-grammar-y.html): 130 | ;; 131 | ;; type_specifier 132 | ;; : VOID 133 | ;; ... 134 | ;; | UNSIGNED 135 | ;; | struct_or_union_specifier 136 | ;; | enum_specifier 137 | ;; | TYPE_NAME 138 | ;; ; 139 | ;; 140 | ;; struct_or_union_specifier 141 | ;; : struct_or_union IDENTIFIER '{' struct_declaration_list '}' 142 | ;; | struct_or_union '{' struct_declaration_list '}' 143 | ;; | struct_or_union IDENTIFIER 144 | ;; ; 145 | ;; 146 | ;; struct_or_union 147 | ;; : STRUCT 148 | ;; | UNION 149 | ;; ; 150 | ;; ... 151 | ;; enum_specifier 152 | ;; : ENUM '{' enumerator_list '}' 153 | ;; | ENUM IDENTIFIER '{' enumerator_list '}' 154 | ;; | ENUM IDENTIFIER 155 | ;; ; 156 | ;; 157 | ;; This tells us that every time we see the any of the keywords 158 | ;; struct, union or enum, we just need to check if the next token 159 | ;; is an identifier or a opening bracket. If it's an identifier, we 160 | ;; can store that as the name of a type. Otherwise the type 161 | ;; being declared is anonymous, so we can ignore it. 162 | 163 | ;;; Is `token` a keyword token that's a type specifier? 164 | (define (type-spec-keyword? token) 165 | (and (eq? (token-tag token) 166 | token-tag-keyword) 167 | (let ((keyword (token-text token))) 168 | (or (equal? keyword "struct") 169 | (equal? keyword "enum") 170 | (equal? keyword "union"))))) 171 | 172 | ;;; Store the identifier of the first type specification 173 | ;;; found in `tokens` in `env`. 174 | (define (store-type-spec! env tokens) 175 | (let loop ((tokens tokens)) 176 | (if (pair? tokens) 177 | (cond ((eq? (token-tag (car tokens)) 178 | token-tag-identifier) 179 | ;; Save the type's identifier. 180 | (set-cdr! env (cons (token-text (car tokens)) 181 | (cdr env)))) 182 | ((eq? (token-tag (car tokens)) 183 | token-tag-whitespace) 184 | ;; Skip whitespace. 185 | (loop (cdr tokens))) 186 | ((type-spec-keyword? (car tokens)) 187 | ;; Skip type specifiers. 188 | (loop (cdr tokens))))))) 189 | 190 | (define (store-typedef! env tokens) 191 | ;; TODO 192 | '()) 193 | 194 | ;;; Return a list of sublists extracted from `tokens` 195 | ;;; that begin with a keyword token. 196 | (define (filter-keywords tokens) 197 | (if (null? tokens) 198 | '() 199 | (let loop ((tokens tokens) 200 | (keyword-tokens '())) 201 | (if (null? tokens) 202 | keyword-tokens 203 | (loop 204 | (cdr tokens) 205 | (if (eq? (token-tag (car tokens)) 206 | token-tag-keyword) 207 | (cons tokens keyword-tokens) 208 | keyword-tokens)))))) 209 | 210 | (define (make-types-env token-lines . token-lines-lst) 211 | (define (extend-types-env env token-lines) 212 | (for-each 213 | (lambda (tokens) 214 | (let ((token (car tokens))) 215 | (cond ((type-spec-keyword? token) 216 | (store-type-spec! env tokens)) 217 | ((equal? (token-text token) 218 | "typedef") 219 | (store-typedef! env tokens))))) 220 | (filter-keywords (flatten token-lines)))) 221 | (let ((env (list '*env*))) 222 | (extend-types-env env token-lines) 223 | (for-each 224 | (lambda (token-lines) 225 | (extend-types-env env token-lines)) 226 | token-lines-lst) 227 | env)) 228 | 229 | (define (types-env? env) 230 | (and (pair? env) 231 | (eq? '*env* (car env)))) 232 | 233 | (define (is-type-in-env? env type) 234 | (if (types-env? env) 235 | (find (lambda (type-identifier) 236 | (equal? type-identifier type)) 237 | (cdr env)) 238 | #f)) 239 | ;; End of module c-types. 240 | ) 241 | -------------------------------------------------------------------------------- /src/debugger.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef _SPRAY_DEBUGGER_H_ 4 | #define _SPRAY_DEBUGGER_H_ 5 | 6 | /* Required to use `sigabbrev_np` */ 7 | #define _GNU_SOURCE 8 | 9 | #include 10 | 11 | #include "breakpoints.h" 12 | #include "history.h" 13 | #include "info.h" 14 | 15 | typedef struct 16 | { 17 | const char *prog_name; /* Tracee program name. */ 18 | pid_t pid; /* Tracee pid. */ 19 | Breakpoints *breakpoints; /* Breakpoints. */ 20 | DebugInfo *info; /* Debug information about the tracee. */ 21 | real_addr load_address; /* Load address. Set for PIEs, 0 otherwise. */ 22 | History history; /* Command history of recent commands. */ 23 | } Debugger; 24 | 25 | /* Setup a debugger. This forks the child process, launches 26 | * and immediately stops it. 27 | * 28 | * On success, `dbg` is modified to accommodate the changes. 29 | * 30 | On error, `dbg` stays untouched, and `-1` is returned. */ 31 | int setup_debugger (const char *prog_name, char *prog_argv[], Debugger * dbg); 32 | 33 | /* Run the debugger. Starts debugging at the beginning 34 | * of the `main` function of the child process. 35 | * 36 | * Call `setup_debugger` on `dbg` before calling this function. 37 | * After `run_debugger` returns, `dbg` is still allocated and 38 | * must be deleted using `del_debugger`. */ 39 | void run_debugger (Debugger dbg); 40 | 41 | /* Free memory allocated by the debugger. Returns 42 | * `SP_ERR` if some resource couldn't be deleted. */ 43 | SprayResult del_debugger (Debugger dbg); 44 | 45 | #ifdef UNIT_TESTS 46 | 47 | typedef enum 48 | { 49 | EXEC_SIG_EXITED, 50 | EXEC_SIG_KILLED, 51 | EXEC_SIG_CONT, 52 | EXEC_SIG_STOPPED, 53 | EXEC_NONE, /* No additionally information. */ 54 | } ExecOkCode; 55 | 56 | typedef enum 57 | { 58 | EXEC_CONT_DEAD, 59 | EXEC_INVALID_WAIT_STATUS, 60 | EXEC_FUNCTION_NOT_FOUND, 61 | EXEC_SET_BREAKPOINTS_FAILED, 62 | EXEC_PC_LINE_NOT_FOUND, 63 | EXEC_STEP, 64 | } ExecErrCode; 65 | 66 | typedef struct 67 | { 68 | SprayResult type; 69 | union 70 | { 71 | ExecOkCode ok; 72 | ExecErrCode err; 73 | } code; 74 | union 75 | { 76 | struct 77 | { 78 | int signo; 79 | int code; /* `si_code` field of `siginfo_t` struct. */ 80 | } signal; /* Set for `EXEC_KILLED` and `EXEC_STOPPED`. */ 81 | int exit_code; /* Set for `EXEC_EXITED`. */ 82 | int wait_status; /* Set for `EXEC_INVALID_WAIT_STATUS`. */ 83 | } data; 84 | } ExecResult; 85 | 86 | ExecResult continue_execution (Debugger * dbg); 87 | ExecResult wait_for_signal (Debugger * dbg); 88 | 89 | #endif /* UNIT_TESTS */ 90 | 91 | #endif /* _SPRAY_DEBUGGER_H_ */ 92 | -------------------------------------------------------------------------------- /src/history.c: -------------------------------------------------------------------------------- 1 | #include "history.h" 2 | 3 | #include 4 | #include 5 | 6 | struct History 7 | { 8 | char *command; 9 | }; 10 | 11 | History 12 | init_history (void) 13 | { 14 | History history = (History) calloc (1, sizeof (struct History)); 15 | assert (history != NULL); 16 | return history; 17 | } 18 | 19 | void 20 | free_history (History history) 21 | { 22 | if (history != NULL) 23 | { 24 | free (history->command); 25 | free (history); 26 | } 27 | } 28 | 29 | void 30 | save_command (History history, const char *line) 31 | { 32 | assert (history != NULL); 33 | assert (line != NULL); 34 | 35 | history->command = (char *) realloc (history->command, strlen (line) + 1); 36 | strcpy (history->command, line); 37 | } 38 | 39 | SprayResult 40 | read_command (History history, char **line) 41 | { 42 | assert (history != NULL); 43 | assert (line != NULL); 44 | if (history->command == NULL) 45 | { 46 | return SP_ERR; 47 | } 48 | else 49 | { 50 | *line = strdup (history->command); 51 | return SP_OK; 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/history.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef _SPRAY_HISTORY_H_ 4 | #define _SPRAY_HISTORY_H_ 5 | 6 | #include "magic.h" 7 | 8 | #include 9 | 10 | typedef struct History *History; 11 | 12 | History init_history (void); 13 | 14 | void free_history (History history); 15 | 16 | void save_command (History history, const char *line); 17 | 18 | /* Copys the command to `line` (if there is a command) 19 | * using `malloc`. The caller must free the copy. */ 20 | SprayResult read_command (History history, char **line); 21 | 22 | #endif /* _SPRAY_HISTORY_H_ */ 23 | -------------------------------------------------------------------------------- /src/info.h: -------------------------------------------------------------------------------- 1 | /* Information about the executable that's being debugged. */ 2 | 3 | #pragma once 4 | 5 | #ifndef _SPRAY_INFO_H_ 6 | #define _SPRAY_INFO_H_ 7 | 8 | #include "breakpoints.h" 9 | #include "registers.h" 10 | #include 11 | 12 | typedef struct DebugInfo DebugInfo; 13 | 14 | /* Initialize debugging information. Returns NULL on error. */ 15 | DebugInfo *init_debug_info (const char *filepath); 16 | 17 | /* Free the given `DebugInfo` instance. Any pointer 18 | * to an object returned from a function in this file 19 | * becomes invalid if the `DebugInfo` instance given 20 | * to that function is deleted. 21 | * Returns `SP_ERR` if some resources couldn't be deleted. */ 22 | SprayResult free_debug_info (DebugInfo ** infop); 23 | 24 | /* A symbol in the executable that's being debugged. */ 25 | typedef struct DebugSymbol DebugSymbol; 26 | 27 | /* Get a debug symbol by its name. Returns NULL on error. */ 28 | const DebugSymbol *sym_by_name (const char *name, DebugInfo * info); 29 | 30 | /* Get a debug symbol by an address that belongs to it. 31 | * Returns NULL on error. */ 32 | const DebugSymbol *sym_by_addr (dbg_addr addr, DebugInfo * info); 33 | 34 | /* Get the name of the given symbol. Returns NULL if there is no name. */ 35 | const char *sym_name (const DebugSymbol * sym, const DebugInfo * info); 36 | 37 | /* Get the address at which the code of the first line 38 | * of the given function starts. Returns `SP_ERR` and 39 | * leaves `addr` untouched if the symbol doesn't refer 40 | * to a function. */ 41 | SprayResult function_start_addr (const DebugSymbol * func, 42 | const DebugInfo * info, dbg_addr * addr); 43 | 44 | /* Get the start address (low PC) of the given symbol. */ 45 | dbg_addr sym_start_addr (const DebugSymbol * sym); 46 | 47 | /* Get the end address (high PC) of the given symbol. */ 48 | dbg_addr sym_end_addr (const DebugSymbol * sym); 49 | 50 | /* Get the address of the given symbol. Returns the same address 51 | * as `sym_start_addr` if the symbol was created from a name. */ 52 | dbg_addr sym_addr (const DebugSymbol * sym); 53 | 54 | /* Get the filepath of the source file that belongs to the symbol. 55 | * The string that's returned is owned and later deleted by `info`. */ 56 | const char *sym_filepath (const DebugSymbol * sym, const DebugInfo * info); 57 | 58 | /* A position in a source file. */ 59 | typedef struct Position 60 | { 61 | uint32_t line; 62 | uint32_t column; 63 | /* `true` if this position perfectly matches the symbol used to 64 | * retrieve it. Otherwise this position only represents the closest 65 | * location to describe the symbol with. */ 66 | bool is_exact; 67 | } Position; 68 | 69 | /* Returns the position of the symbol in the source file 70 | * that belongs to the symbol. NULL is returned if no 71 | * such position could be retrieved. */ 72 | const Position *sym_position (const DebugSymbol * sym, 73 | const DebugInfo * info); 74 | 75 | /* Return the position that belongs to the given address. 76 | * Returns NULL on error. */ 77 | const Position *addr_position (dbg_addr addr, DebugInfo * info); 78 | 79 | /* Returns the function name that belongs to the given address. 80 | * Returns NULL on error. */ 81 | const char *addr_name (dbg_addr addr, DebugInfo * info); 82 | 83 | /* Returns the filepath that belongs to the given address. 84 | * Returns NULL on error. */ 85 | const char *addr_filepath (dbg_addr addr, DebugInfo * info); 86 | 87 | /* The following function don't fit the regular scheme of 88 | * this interface. They are currently required by might 89 | * be incorporated in a generic interface later. */ 90 | 91 | /* Returns the address that belongs to the given filepath and line number. 92 | * `SP_ERR` is returned if no such address could be found and `addr` 93 | * stays untouched. */ 94 | SprayResult addr_at (const char *filepath, 95 | uint32_t lineno, 96 | const DebugInfo * info, dbg_addr * addr); 97 | 98 | /* Is this a dynamic executable which is relocated? */ 99 | bool is_dyn_exec (const DebugInfo * info); 100 | 101 | /* Set breakpoints required to step over the line referred to by `func`. 102 | * On error `SP_ERR` is returned and nothing has to be deleted. */ 103 | SprayResult set_step_over_breakpoints (const DebugSymbol * func, 104 | const DebugInfo * info, 105 | real_addr load_address, 106 | Breakpoints * breakpoints, 107 | real_addr ** to_del, size_t *n_to_del); 108 | 109 | /* Information about runtime variables. 110 | * 111 | * This includes a description of where to find this variable 112 | * in the memory of the running debugee process, the path to 113 | * the file where the variable is declared and line number in 114 | * this file. 115 | * 116 | * It also includes information on the type of the variable. 117 | * This way, the value of the variable can be printed 118 | * according to the type. 119 | * 120 | * It does not include the name the variable is declared as. 121 | * The name should be easily accessible since it's required 122 | * to create an instance of `RuntimeVariable`. */ 123 | typedef struct RuntimeVariable RuntimeVariable; 124 | 125 | /* Return the location of the variable as a runtime address. 126 | * The return value is meaningless if `is_addr_loc == false`. 127 | * Check that that's not the case first! */ 128 | real_addr var_loc_addr (const RuntimeVariable *var); 129 | 130 | /* Return the location of the variable as a register. 131 | * The return value is meaningless if `is_reg_loc == false`. 132 | * Check that that's not the case first! */ 133 | x86_reg var_loc_reg (const RuntimeVariable *var); 134 | 135 | /* Check the type of a location description. */ 136 | bool is_addr_loc (const RuntimeVariable *var); 137 | bool is_reg_loc (const RuntimeVariable *var); 138 | 139 | /* Return the path of the file and the line number in the file 140 | * where the variable described by `var` was declared. 141 | * 142 | * Both of them are optional. `0` indicates that there is no 143 | * line number (since line numbers start at 1!), and `NULL` is 144 | * returned if there is no path. */ 145 | const char *var_loc_path (const RuntimeVariable *var); 146 | unsigned var_loc_line (const RuntimeVariable *var); 147 | 148 | /* Print the path and the line of the given variable into 149 | * a string that's returned. The caller should free the string. 150 | * 151 | * This function uses the values as `var_loc_path` and 152 | * `var_loc_line` return. 153 | * 154 | * `var` must not be `NULL`. */ 155 | char *print_var_loc (const RuntimeVariable *var); 156 | 157 | /* Use the type of the variable and return it's value in a string. 158 | * The caller should free the string. */ 159 | char *print_var_value (const RuntimeVariable *var, uint64_t value, 160 | FormatFilter filter); 161 | 162 | /* Same as `print_var_value`, but `value` is the result of dereferencing 163 | * the given variable, and not the value of the variable by itself. The 164 | * caller should free the given string. */ 165 | char *print_var_deref_value (const RuntimeVariable *var, uint64_t deref, 166 | FormatFilter filter); 167 | 168 | /* Mask off all bits of value that are not part of the type's value. 169 | * E.g. only the LSB is returned for `char`s. */ 170 | uint64_t mask_var_value (const RuntimeVariable *var, uint64_t value); 171 | 172 | /* Get the location of the variable with the 173 | * given name in the scope around `pc`. 174 | * 175 | * On success, a new heap-allocated location is returned. 176 | * This location must be manually `free`'d (TODO: make 177 | * happen automatically when `info` is destroyed). 178 | * 179 | * `NULL` is returned on error. */ 180 | RuntimeVariable *init_var (dbg_addr pc, 181 | real_addr load_address, 182 | const char *var_name, 183 | pid_t pid, const DebugInfo * info); 184 | 185 | /* Delete a `RuntimeVariable` pointer as returned by `init_var`. */ 186 | void del_var (RuntimeVariable *var); 187 | 188 | #endif /* _SPRAY_INFO_H_ */ 189 | -------------------------------------------------------------------------------- /src/magic.c: -------------------------------------------------------------------------------- 1 | #include "magic.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include /* `getcwd` */ 7 | #include 8 | #include 9 | #include /* `PATH_MAX` */ 10 | #include 11 | 12 | unsigned 13 | n_digits (double num) 14 | { 15 | if (num == 0) 16 | { 17 | return 1; /* Zero has one digit when written out. */ 18 | } 19 | else 20 | { 21 | return ((unsigned) floor (log10 (fabs (num)))) + 1; 22 | } 23 | } 24 | 25 | void 26 | indent_by (unsigned n_spaces) 27 | { 28 | for (unsigned i = 0; i < n_spaces; i++) 29 | { 30 | printf (" "); 31 | } 32 | } 33 | 34 | bool 35 | str_eq (const char *restrict a, const char *restrict b) 36 | { 37 | return strcmp (a, b) == 0; 38 | } 39 | 40 | dbg_addr 41 | real_to_dbg (real_addr offset, real_addr real) 42 | { 43 | return (dbg_addr) {real.value - offset.value}; 44 | } 45 | 46 | real_addr 47 | dbg_to_real (real_addr offset, dbg_addr dwarf) 48 | { 49 | return (real_addr) {dwarf.value + offset.value}; 50 | } 51 | 52 | FormatFilter 53 | parse_format (const char *str) 54 | { 55 | if (str != NULL) 56 | { 57 | if (str_eq (str, "hex")) 58 | { 59 | return FMT_HEX; 60 | } 61 | else if (str_eq (str, "bits")) 62 | { 63 | return FMT_BITS; 64 | } 65 | else if (str_eq (str, "addr")) 66 | { 67 | return FMT_ADDR; 68 | } 69 | else if (str_eq (str, "dec")) 70 | { 71 | return FMT_DEC; 72 | } 73 | else if (str_eq (str, "bytes")) 74 | { 75 | return FMT_BYTES; 76 | } 77 | else 78 | { 79 | return FMT_NONE; 80 | } 81 | } 82 | else 83 | { 84 | return FMT_NONE; 85 | } 86 | } 87 | 88 | FormatFilter 89 | default_format (FormatFilter current, FormatFilter _default) 90 | { 91 | if (current == FMT_NONE) 92 | { 93 | return _default; 94 | } 95 | else 96 | { 97 | return current; 98 | } 99 | } 100 | 101 | /* Macros for printing binary numbers. https://stackoverflow.com/a/25108449 */ 102 | #define PRINTF_BITS_PATTERN_INT8 "%c%c%c%c%c%c%c%c" 103 | #define PRINTF_BITS_INT8(i) \ 104 | (((i) & 0x80ll) ? '1' : '0'), \ 105 | (((i) & 0x40ll) ? '1' : '0'), \ 106 | (((i) & 0x20ll) ? '1' : '0'), \ 107 | (((i) & 0x10ll) ? '1' : '0'), \ 108 | (((i) & 0x08ll) ? '1' : '0'), \ 109 | (((i) & 0x04ll) ? '1' : '0'), \ 110 | (((i) & 0x02ll) ? '1' : '0'), \ 111 | (((i) & 0x01ll) ? '1' : '0') 112 | 113 | #define PRINTF_BITS_PATTERN_INT16 \ 114 | PRINTF_BITS_PATTERN_INT8 " " PRINTF_BITS_PATTERN_INT8 115 | #define PRINTF_BITS_PATTERN_INT32 \ 116 | PRINTF_BITS_PATTERN_INT16 " " PRINTF_BITS_PATTERN_INT16 117 | #define PRINTF_BITS_PATTERN_INT64 \ 118 | PRINTF_BITS_PATTERN_INT32 " " PRINTF_BITS_PATTERN_INT32 119 | 120 | #define PRINTF_BITS_INT16(i) \ 121 | PRINTF_BITS_INT8((i) >> 8), PRINTF_BITS_INT8(i) 122 | #define PRINTF_BITS_INT32(i) \ 123 | PRINTF_BITS_INT16((i) >> 16), PRINTF_BITS_INT16(i) 124 | #define PRINTF_BITS_INT64(i) \ 125 | PRINTF_BITS_INT32((i) >> 32), PRINTF_BITS_INT32(i) 126 | 127 | /* Macros for printing bytes made up of two hexadecimal digits each. */ 128 | #define PRINTF_BYTES_PATTERN_INT8 "%02hx" 129 | #define PRINTF_BYTES_PATTERN_INT16 \ 130 | PRINTF_BYTES_PATTERN_INT8 " " PRINTF_BYTES_PATTERN_INT8 131 | #define PRINTF_BYTES_PATTERN_INT32 \ 132 | PRINTF_BYTES_PATTERN_INT16 " " PRINTF_BYTES_PATTERN_INT16 133 | #define PRINTF_BYTES_PATTERN_INT64 \ 134 | PRINTF_BYTES_PATTERN_INT32 " " PRINTF_BYTES_PATTERN_INT32 135 | #define PRINTF_BYTES_INT16(i) ((uint8_t)((i) >> 8) & 0xff), ((uint8_t)(i) & 0xff) 136 | #define PRINTF_BYTES_INT32(i) PRINTF_BYTES_INT16((i) >> 16), PRINTF_BYTES_INT16(i) 137 | #define PRINTF_BYTES_INT64(i) PRINTF_BYTES_INT32((i) >> 32), PRINTF_BYTES_INT32(i) 138 | 139 | #define HEX_FORMAT "0x%lx" 140 | #define DEC_FORMAT "%ld" 141 | 142 | char * 143 | print_format (uint64_t value, FormatFilter filter) 144 | { 145 | /* A 512 byte maximum means at most 8 characters per bit. 146 | * That should be sufficient (+1 for '\0') */ 147 | int n = 513; 148 | char *buf = malloc (n); 149 | assert (buf != NULL); 150 | 151 | switch (filter) 152 | { 153 | case FMT_NONE: 154 | case FMT_DEC: 155 | /* Signed decimal numbers are the default. */ 156 | snprintf (buf, n, DEC_FORMAT, (int64_t) value); 157 | break; 158 | case FMT_HEX: 159 | snprintf (buf, n, HEX_FORMAT, value); 160 | break; 161 | case FMT_BITS: 162 | snprintf (buf, n, PRINTF_BITS_PATTERN_INT64, 163 | PRINTF_BITS_INT64 (value)); 164 | break; 165 | case FMT_ADDR: 166 | snprintf (buf, n, ADDR_FORMAT, value); 167 | break; 168 | case FMT_BYTES: 169 | snprintf (buf, n, PRINTF_BYTES_PATTERN_INT64, 170 | PRINTF_BYTES_INT64 (value)); 171 | break; 172 | } 173 | 174 | return buf; 175 | } 176 | 177 | const char * 178 | relative_filepath (const char *abs_filepath) 179 | { 180 | if (abs_filepath == NULL) 181 | { 182 | return NULL; 183 | } 184 | 185 | char *cwd_buf = malloc (sizeof (*cwd_buf) * PATH_MAX); 186 | char *cwd = getcwd (cwd_buf, PATH_MAX); 187 | if (cwd == NULL) 188 | { 189 | return NULL; 190 | } 191 | 192 | /* Set `i` to the first index in `filepath` that's not part of the cwd. */ 193 | size_t i = 0; 194 | while (cwd[i] == abs_filepath[i]) 195 | { 196 | i++; 197 | } 198 | 199 | free (cwd_buf); 200 | 201 | if (i == 0) 202 | { 203 | /* `abs_filepath` is a relative filepath and 204 | * should be returned entirely. */ 205 | return abs_filepath; 206 | } 207 | else 208 | { 209 | /* Return the part of `filepath` that's not part of the cwd. 210 | * `+ 1` removes the slash character at `abs_filepath[i]`. 211 | * This slash is left because `cwd` doesn't have a trailing 212 | * slash character. Hence, this character is the first one 213 | * where `abs_filepath` and `cmd` differ. */ 214 | return abs_filepath + i + 1; 215 | } 216 | } 217 | 218 | void 219 | print_as_relative_filepath (const char *filepath) 220 | { 221 | assert (filepath != NULL); 222 | 223 | char *relative_buf = strdup (filepath); 224 | const char *relative = relative_filepath (relative_buf); 225 | if (relative != NULL) 226 | { 227 | printf ("%s", relative); 228 | free (relative_buf); 229 | } 230 | else 231 | { 232 | printf ("%s", filepath); 233 | } 234 | } 235 | 236 | void 237 | print_msg (FILE *stream, const char *kind, const char *fmt, va_list argp) 238 | { 239 | assert (kind != NULL); 240 | assert (fmt != NULL); 241 | assert (argp != NULL); 242 | 243 | size_t len = strlen (fmt) + strlen (kind) + 4; 244 | char *fmt_buf = calloc (len, sizeof (*fmt_buf)); 245 | assert (fmt_buf != NULL); 246 | 247 | size_t n_printed = snprintf (fmt_buf, len, "%s: %s\n", kind, fmt); 248 | /* `snprintf` writes a maximum of `len` bytes, including the 249 | * `\0` byte, and returns the number of bytes written, 250 | * excluding the `\0` byte. Thus, `len` was too small and the 251 | * output was truncated if `n_printed >= len`. */ 252 | assert (n_printed == (len - 1)); 253 | 254 | vfprintf (stream, fmt_buf, argp); 255 | 256 | free (fmt_buf); 257 | } 258 | 259 | void 260 | spray_err (const char *fmt, ...) 261 | { 262 | va_list argp; 263 | va_start (argp, fmt); 264 | print_msg (stderr, "ERR", fmt, argp); 265 | va_end (argp); 266 | } 267 | 268 | void 269 | spray_warn (const char *fmt, ...) 270 | { 271 | va_list argp; 272 | va_start (argp, fmt); 273 | print_msg (stderr, "WARN", fmt, argp); 274 | va_end (argp); 275 | } 276 | 277 | void 278 | spray_hint (const char *fmt, ...) 279 | { 280 | va_list argp; 281 | va_start (argp, fmt); 282 | print_msg (stderr, "HINT", fmt, argp); 283 | va_end (argp); 284 | } 285 | 286 | void 287 | repl_err (const char *fmt, ...) 288 | { 289 | va_list argp; 290 | va_start (argp, fmt); 291 | print_msg (stdout, "ERR", fmt, argp); 292 | va_end (argp); 293 | } 294 | 295 | void 296 | repl_warn (const char *fmt, ...) 297 | { 298 | va_list argp; 299 | va_start (argp, fmt); 300 | print_msg (stdout, "WARN", fmt, argp); 301 | va_end (argp); 302 | } 303 | 304 | void 305 | repl_hint (const char *fmt, ...) 306 | { 307 | va_list argp; 308 | va_start (argp, fmt); 309 | print_msg (stdout, "HINT", fmt, argp); 310 | va_end (argp); 311 | } 312 | -------------------------------------------------------------------------------- /src/magic.h: -------------------------------------------------------------------------------- 1 | /* Utilities, miscellaneous functions and magic numbers. ✨. */ 2 | 3 | #pragma once 4 | 5 | #ifndef _SPRAY_MAGIC_H_ 6 | #define _SPRAY_MAGIC_H_ 7 | 8 | #include /* `uint64_t` for address values. */ 9 | #include /* `printf` for `print_addr`. */ 10 | #include 11 | 12 | #define unused(x) (void) (x); 13 | 14 | enum magic_numbers 15 | { 16 | /* `int 3` instruction code. */ 17 | INT3 = 0xcc, 18 | /* Mask of lowest byte in number. */ 19 | BTM_BYTE_MASK = 0xff, 20 | /* Number of registers in the `x86_regs` enum. */ 21 | N_REGISTERS = 27, 22 | /* Number of characters required to store any possible 23 | * path `/proc//maps`. According to the man-page for 24 | * proc(5) the maximum pid is up to 2^22. In decimal this 25 | * number has 7 digits. This plus characters for the rest 26 | * of the path plus a NULL terminator make up this number. */ 27 | PROC_MAPS_FILEPATH_LEN = 19, 28 | /* Size of the buffer to print all the tracee's registers. 29 | * All values are zero-padded so the size is always the same. */ 30 | REGISTER_PRINT_BUF_SIZE = 716, 31 | /* Width of the format string "\t%8s 0x%016lx" given that the string 32 | * substituted is no longer that 8 characters. This doesn't 33 | * include the string's NULL-byte. */ 34 | REGISTER_PRINT_LEN = 26, 35 | /* Maximum number of instruction-level steps performed by 36 | * `single_step_line` until giving up trying to find another 37 | * line. Can be fairly large since the program will likely 38 | * end after this limit was reached. */ 39 | SINGLE_STEP_SEARCH_LIMIT = 128, 40 | }; 41 | 42 | typedef enum 43 | { 44 | SP_OK, 45 | SP_ERR, 46 | } SprayResult; 47 | 48 | /* Calculate the number of digits in the given number. */ 49 | unsigned n_digits (double num); 50 | 51 | /* Print n space characters to standard out. */ 52 | void indent_by (unsigned n_spaces); 53 | 54 | /* Helper to test if two strings are equal (`strcmp(...) == 0`) */ 55 | bool str_eq (const char *restrict a, const char *restrict b); 56 | 57 | typedef struct 58 | { 59 | uint64_t value; 60 | } real_addr; 61 | 62 | typedef struct 63 | { 64 | uint64_t value; 65 | } dbg_addr; 66 | 67 | /* The runtime addresses in *position independent executables* 68 | * may all be offset by a particular value from the addresses 69 | * which are stored in the binary file itself. 70 | * The addresses found in the DWARF debug information are such 71 | * permanently stored addresses. `dbg_addr` represents them. 72 | * Addresses retrieved from the running process or addresses from 73 | * the debug addresses, which have been offset by the load address, 74 | * are represented by `real_addr`. The are also referred to as *real* 75 | * addresses. */ 76 | 77 | /* Convert a real address to a debug address. */ 78 | dbg_addr real_to_dbg (real_addr offset, real_addr real); 79 | 80 | /* Convert a debug address to a real address. */ 81 | real_addr dbg_to_real (real_addr offset, dbg_addr dwarf); 82 | 83 | /* `printf` format string for addresses. */ 84 | #define ADDR_FORMAT "0x%016lx" 85 | 86 | /* Filters to format the output. */ 87 | 88 | typedef enum 89 | { 90 | FMT_NONE, /* No filter. */ 91 | FMT_HEX, /* Hexadecimal number. */ 92 | FMT_BITS, /* Binary data. */ 93 | FMT_ADDR, /* Address. */ 94 | FMT_DEC, /* Signed decimal number. */ 95 | FMT_BYTES, /* Hexadecimal bytes. */ 96 | } FormatFilter; 97 | 98 | FormatFilter parse_format (const char *str); 99 | 100 | /* Turn `current` into `_default` if `current` is `FMT_NONE`. */ 101 | FormatFilter default_format (FormatFilter current, FormatFilter _default); 102 | 103 | /* Format the given value based on `filter` and return the formatted string. 104 | * The caller should free the string. */ 105 | char * 106 | print_format (uint64_t value, FormatFilter filter); 107 | 108 | /* Return the part of `abs_filepath` that's relative to 109 | * the present working directory. 110 | * 111 | * On success, the pointer that's returned points into 112 | * `abs_filepath`. 113 | * 114 | * Otherwise, `NULL` is returned to signal an error. */ 115 | const char *relative_filepath (const char *abs_filepath); 116 | 117 | /* Print `filepath` as relative to the current working directory. 118 | * 119 | * `filepath` must not be `NULL`. */ 120 | void print_as_relative_filepath (const char *filepath); 121 | 122 | 123 | /* FORMAT OF MESSAGES 124 | * 125 | * 1. They start with a capital letter. 126 | * 2. They do not include tags like 'ERR' or 'WARN'. Those tags 127 | * are added automatically. 128 | * 3. They do not end with a newline character. Line breaks are 129 | * automatically added at the end of each message. 130 | * 4. They do not end with a period. Periods are only used to delimit 131 | * sentences inside the message. If appropriate, question or 132 | * exclamation marks may be used at the end of a message. 133 | * 5. They may use standard `printf` formatting. 134 | * 135 | * EXAMPLES OF VALID MESSAGES 136 | * 137 | * 'Failed to retrieve data' 138 | * 'Did you forget to initialize this variable?' 139 | * 'Variable %s has the value %d' - Expects a string and an integer. 140 | * 141 | * EXAMPLES OF INVALID MESSAGES 142 | * 143 | * 'ERR: Cannot open file' - The 'ERR: ' is added automatically. 144 | * 'Please provide more information.' - The period at the end is not needed. 145 | * It only adds visual clutter. */ 146 | 147 | /* Print messages not tied to the UI to stderr */ 148 | void spray_err (const char *msg, ...); 149 | void spray_warn (const char *msg, ...); 150 | void spray_hint (const char *msg, ...); 151 | 152 | /* Print messages tied to the debugger REPL to stdout */ 153 | void repl_err (const char *msg, ...); 154 | void repl_warn (const char *msg, ...); 155 | void repl_hint (const char *msg, ...); 156 | 157 | 158 | #endif /* _SPRAY_MAGIC_H_ */ 159 | -------------------------------------------------------------------------------- /src/print-source.scm: -------------------------------------------------------------------------------- 1 | (module source-files (load-source-file! 2 | sf-name 3 | sf-token-lines 4 | sf-types-env) 5 | (import scheme 6 | (chicken base) 7 | (chicken pathname) 8 | (only (chicken file) file-exists?) 9 | (only (chicken string) string-split conc) 10 | (only (srfi-13) string-prefix?) 11 | (only (chicken io) read-string) 12 | (only (srfi-1) fold filter-map) 13 | (srfi-69)) 14 | (import tokenizer c-types c-tokens) 15 | 16 | (define source-files 17 | (make-hash-table equal?)) 18 | 19 | (define (hash-table-lookup table key) 20 | (hash-table-ref/default table key #f)) 21 | 22 | (define (hash-table-insert table key value) 23 | (hash-table-set! table key value) 24 | value) 25 | 26 | ;;; Return a list of all lines in the file behind 27 | ;;; filepath or '() if the file can't be read. 28 | (define (file->list filepath) 29 | ;; By keeping empty strings when splitting the 30 | ;; content at the newline characters, we accidentally 31 | ;; add another empty string to the end of the lines 32 | ;; list that's generated by the trailing newline at 33 | ;; the end of the file. The lines list is printed by 34 | ;; appending a newline to the end of each entry. Thereby, 35 | ;; we basically add another line to the end of the file 36 | ;; that doesn't actually exist. Here that line is removed. 37 | (define (omit-trailing-line! lines) 38 | (define (before-last lst) 39 | (if (null? lst) 40 | '() 41 | (let loop ((current lst) 42 | (next (cdr lst))) 43 | (if (null? (cdr next)) 44 | current 45 | (loop (cdr current) 46 | (cdr next)))))) 47 | (let ((pen (before-last lines))) 48 | (cond ((null? pen) 49 | '()) 50 | ((equal? (cadr pen) "") 51 | (set! (cdr pen) '()) 52 | lines) 53 | (else 54 | lines)))) 55 | 56 | (if (file-exists? filepath) 57 | (call-with-input-file filepath 58 | (lambda (port) 59 | (let ((content (read-string #f port))) 60 | (if (not content) 61 | '() 62 | (omit-trailing-line! 63 | (string-split content "\n" #t)))))) 64 | '())) 65 | 66 | 67 | (define (make-deps filepath includes) 68 | (list '*deps* filepath includes)) 69 | 70 | (define (deps? deps) 71 | (and (pair? deps) 72 | (eq? '*deps* (car deps)))) 73 | 74 | (define (deps-file deps) 75 | (cadr deps)) 76 | 77 | (define (deps-includes deps) 78 | (caddr deps)) 79 | 80 | (define (filter-filepaths filepath token-lines) 81 | (define (strip-path-quotes str) 82 | (substring str 83 | 1 84 | (- (string-length str) 1))) 85 | 86 | (define (find-local-header include-filepath) 87 | (make-pathname 88 | (pathname-directory filepath) 89 | include-filepath)) 90 | 91 | (define search-paths 92 | (list "/usr/include" "/usr/local/include")) 93 | 94 | (define (find-system-header include-filepath) 95 | (let loop ((search-paths search-paths)) 96 | (if (null? search-paths) 97 | #f 98 | (let ((check-filepath (make-pathname (car search-paths) 99 | include-filepath))) 100 | (if (file-exists? check-filepath) 101 | check-filepath 102 | (loop (cdr search-paths))))))) 103 | 104 | (define (find-location include-filepath search-type) 105 | (if (eq? search-type 'system-header) 106 | (find-system-header include-filepath) 107 | (find-local-header include-filepath))) 108 | 109 | (define (search-type filepath) 110 | (if (string-prefix? "<" filepath) 111 | 'system-header 112 | 'local-header)) 113 | 114 | (filter-map 115 | (lambda (token) 116 | (if (eq? (token-tag token) 117 | token-tag-include-filepath) 118 | (let ((filepath (token-text token))) 119 | (find-location 120 | (strip-path-quotes filepath) 121 | (search-type filepath))) 122 | #f)) 123 | (flatten token-lines))) 124 | 125 | (define (make-sf token-lines deps types-env) 126 | (list '*source-file* token-lines deps types-env)) 127 | 128 | (define (source-file? sf) 129 | (and (pair? sf) 130 | (eq? '*source-file* 131 | (car sf)))) 132 | 133 | (define (sf-deps sf) 134 | (if (source-file? sf) 135 | (caddr sf) 136 | (error "sf-deps, not a source file" sf))) 137 | 138 | ;;; Flatten the given source file's dependency tree into 139 | ;;; a list of all unique files in it. 140 | (define (sf-flat-deps sf) 141 | (let ((flattened (make-hash-table equal?))) 142 | (let rec ((deps (sf-deps sf))) 143 | ;; Continue this recursion only if this is a new dependency. 144 | (if (not (hash-table-exists? flattened (deps-file deps))) 145 | (begin 146 | (hash-table-set! flattened (deps-file deps) '()) 147 | (for-each 148 | (lambda (dep) 149 | (rec dep)) 150 | (deps-includes deps))))) 151 | (map 152 | (lambda (assoc) 153 | (car assoc)) 154 | (hash-table->alist flattened)))) 155 | 156 | (define (sf-deps->token-lines-lst sf) 157 | (map 158 | (lambda (dep) 159 | (sf-token-lines 160 | (hash-table-ref source-files dep))) 161 | (sf-flat-deps sf))) 162 | 163 | (define (sf-name sf) 164 | (deps-file (sf-deps sf))) 165 | 166 | (define (sf-token-lines sf) 167 | (if (source-file? sf) 168 | (cadr sf) 169 | (error "sf-token-lines, not a source file" sf))) 170 | 171 | (define (sf-types-env sf) 172 | (define (_sf-types-env sf) (cadddr sf)) 173 | (if (source-file? sf) 174 | ;; Source files are created by calling `load-source-file`. This procedure 175 | ;; doesn't initialize the types environment to save compute. The types 176 | ;; environment doesn't know about the tree-structure of the dependencies 177 | ;; at this point in time. Instead, it expects a flat list of inputs and 178 | ;; output another flat data structure. If this changes, that is if the 179 | ;; type environment becomes recursive, then it can be initialized in the 180 | ;; `load-source-file` procedure instantly. This will likely save compute 181 | ;; because linearizing the dependency tree is not needed anymore. 182 | (let ((types-env (_sf-types-env sf))) 183 | (if (not (null? types-env)) 184 | types-env 185 | (_sf-types-env 186 | (hash-table-update! source-files 187 | (sf-name sf) 188 | (lambda (sf) 189 | (make-sf 190 | (sf-token-lines sf) 191 | (sf-deps sf) 192 | ;; Parse the types in the source file's dependency tree. 193 | (apply make-types-env 194 | (sf-deps->token-lines-lst sf)))))))))) 195 | 196 | (define (load-source-file! filepath) 197 | (let ((lookup (hash-table-lookup source-files filepath))) 198 | (if lookup 199 | lookup ; The file has been read already. Return that result. 200 | (let ((token-lines (tokenize ; Read this new file. 201 | (file->list filepath)))) 202 | (if (not token-lines) 203 | '() 204 | (begin ; Recurse over all of the file's includes. 205 | (let loop ((filepaths (filter-filepaths filepath token-lines)) 206 | (includes '())) 207 | (if (pair? filepaths) 208 | ;; Load the dependencies of this include filepath and continue 209 | ;; with the next one. 210 | (loop 211 | (cdr filepaths) 212 | (cons (sf-deps 213 | (load-source-file! (car filepaths))) 214 | includes)) 215 | ;; All include filepaths were processed. Store the source 216 | ;; file for the current file and return it. 217 | (hash-table-insert source-files 218 | filepath 219 | (make-sf 220 | token-lines 221 | (make-deps filepath 222 | includes) 223 | '())))))))))) 224 | 225 | ;; End of module source-files. 226 | ) 227 | 228 | 229 | (module print-source (print-source) 230 | (import scheme 231 | (chicken base) 232 | (only traversal sublist)) 233 | (import tokenizer source-files) 234 | 235 | 236 | ;;; Create a sublist without exceeding the range of `lst`. 237 | ;;; Saturates on both ends if either index is out of range. 238 | (define (sat-sublist lst start end) 239 | (define (sat-start) 240 | (cond ((< start 0) 241 | 0) 242 | ((< start (length lst)) 243 | start) 244 | (else 245 | ;; The start index is inclusive. 246 | (- (length lst) 1)))) 247 | (define (sat-end) 248 | (cond ((< end 0) 249 | 0) 250 | ((< end (length lst)) 251 | end) 252 | (else 253 | ;; The end index is exclusive. 254 | (length lst)))) 255 | (sublist lst (sat-start) (sat-end))) 256 | 257 | (define (start-lineno lineno n-context-lines) 258 | (if (> lineno n-context-lines) 259 | (- lineno n-context-lines) 260 | 1)) 261 | 262 | (define (end-lineno lineno n-context-lines) 263 | (+ lineno n-context-lines 1 264 | ;; Extend window downward if there 265 | ;; aren't enough lines above. 266 | (if (< lineno n-context-lines) 267 | (- n-context-lines lineno) 268 | 0))) 269 | 270 | (define (line-window lines lineno n-context-lines) 271 | (let ((start-lineno (start-lineno lineno n-context-lines)) 272 | (end-lineno (end-lineno lineno n-context-lines))) 273 | (sat-sublist lines 274 | (- start-lineno 1) 275 | (- end-lineno 1)))) 276 | 277 | ;;; Print the source code at the `filepath`:`lineno` with 278 | ;;; `n-context-lines` above and below `lineno`. Returns #f 279 | ;;; on error and #t on success. 280 | (define (print-source filepath lineno n-context-lines use-color) 281 | (let ((source-file (load-source-file! filepath)) 282 | (start-lineno (start-lineno lineno n-context-lines))) 283 | (if (not source-file) 284 | #f 285 | (begin 286 | (display 287 | (color-tokens 288 | (line-window (sf-token-lines source-file) 289 | lineno 290 | n-context-lines) 291 | (sf-types-env source-file) 292 | start-lineno 293 | lineno 294 | use-color)) 295 | #t)))) 296 | ;; End module print-source. 297 | ) 298 | 299 | (import print-source) 300 | 301 | (cond-expand 302 | ;; FFI is not allowed in interpreted mode. 303 | (compiling 304 | (begin 305 | (import (chicken foreign) 306 | (chicken platform)) 307 | (define-external (print_source_extern (c-string filepath) 308 | (unsigned-int lineno) 309 | (unsigned-int n_context_lines) 310 | (bool use-color)) 311 | int 312 | (if (print-source filepath lineno n_context_lines use-color) 313 | 0 314 | -1)) 315 | (return-to-host))) 316 | (else )) 317 | -------------------------------------------------------------------------------- /src/print_source.c: -------------------------------------------------------------------------------- 1 | #include "print_source.h" 2 | #include "args.h" 3 | 4 | #include 5 | #include 6 | 7 | void 8 | init_print_source (void) 9 | { 10 | CHICKEN_run (C_toplevel); 11 | } 12 | 13 | /* Defined in `src/source-files.scm`. */ 14 | extern int print_source_extern (const char *filepath, 15 | unsigned lineno, 16 | unsigned n_context_lines, bool use_color); 17 | 18 | SprayResult 19 | print_source (const char *filepath, unsigned lineno, unsigned n_context) 20 | { 21 | bool use_color = !get_args ()->flags.no_color; 22 | int res = print_source_extern (filepath, 23 | lineno, 24 | n_context, 25 | use_color); 26 | if (res == 0) 27 | { 28 | return SP_OK; 29 | } 30 | else 31 | { 32 | return SP_ERR; 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/print_source.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef _SPRAY_PRINT_SOURCE_H_ 4 | #define _SPRAY_PRINT_SOURCE_H_ 5 | 6 | #include "magic.h" 7 | 8 | /* Call this to initialize `print_source`. The program 9 | * will crash if `print_source` is called without being 10 | * initialized. */ 11 | void init_print_source (void); 12 | 13 | SprayResult print_source (const char *source_filepath, 14 | unsigned lineno, unsigned n_context); 15 | 16 | #endif /* _SPRAY_PRINT_SOURCE_H_ */ 17 | -------------------------------------------------------------------------------- /src/ptrace.c: -------------------------------------------------------------------------------- 1 | #include "ptrace.h" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | enum 8 | { PTRACE_ERROR = -1 }; 9 | 10 | /* NOTE: All `PTRACE_PEEK*` requests return the 11 | * requested data. Because the return value if 12 | * always used to indicate an error (by returning 13 | * -1), `errno` must be used to determine if the 14 | * result of the read is -1 or there is an error. */ 15 | 16 | SprayResult 17 | pt_read_memory (pid_t pid, real_addr addr, uint64_t *read) 18 | { 19 | assert (read != NULL); 20 | 21 | /* The `ptrace(2)` API requires that we manually set `errno` here. */ 22 | errno = 0; 23 | uint64_t value = ptrace (PTRACE_PEEKDATA, pid, addr, NULL); 24 | if (errno == 0) 25 | { 26 | /* No error was raised. Return the result. */ 27 | *read = value; 28 | return SP_OK; 29 | } 30 | else 31 | { 32 | /* `errno` now indicates the error. */ 33 | return SP_ERR; 34 | } 35 | } 36 | 37 | SprayResult 38 | pt_write_memory (pid_t pid, real_addr addr, uint64_t write) 39 | { 40 | if (ptrace (PTRACE_POKEDATA, pid, addr, write) == PTRACE_ERROR) 41 | { 42 | return SP_ERR; 43 | } 44 | else 45 | { 46 | return SP_OK; 47 | } 48 | } 49 | 50 | SprayResult 51 | pt_read_registers (pid_t pid, struct user_regs_struct *regs) 52 | { 53 | assert (regs != NULL); 54 | /* `addr` is ignored here. `PTRACE_GETREGS` stores all 55 | * of the tracee's general purpose registers in `regs`. */ 56 | if (ptrace (PTRACE_GETREGS, pid, NULL, regs) == PTRACE_ERROR) 57 | { 58 | return SP_ERR; 59 | } 60 | else 61 | { 62 | return SP_OK; 63 | } 64 | } 65 | 66 | SprayResult 67 | pt_write_registers (pid_t pid, struct user_regs_struct *regs) 68 | { 69 | assert (regs != NULL); 70 | if (ptrace (PTRACE_SETREGS, pid, NULL, regs) == PTRACE_ERROR) 71 | { 72 | return SP_ERR; 73 | } 74 | else 75 | { 76 | return SP_OK; 77 | } 78 | } 79 | 80 | SprayResult 81 | pt_continue_execution (pid_t pid) 82 | { 83 | if (ptrace (PTRACE_CONT, pid, NULL, NULL) == PTRACE_ERROR) 84 | { 85 | return SP_ERR; 86 | } 87 | else 88 | { 89 | return SP_OK; 90 | } 91 | } 92 | 93 | SprayResult 94 | pt_trace_me (void) 95 | { 96 | if (ptrace (PTRACE_TRACEME, 0, NULL, NULL) == PTRACE_ERROR) 97 | { 98 | return SP_ERR; 99 | } 100 | else 101 | { 102 | return SP_OK; 103 | } 104 | } 105 | 106 | SprayResult 107 | pt_single_step (pid_t pid) 108 | { 109 | if (ptrace (PTRACE_SINGLESTEP, pid, NULL, NULL) == PTRACE_ERROR) 110 | { 111 | return SP_ERR; 112 | } 113 | else 114 | { 115 | return SP_OK; 116 | } 117 | } 118 | 119 | SprayResult 120 | pt_get_signal_info (pid_t pid, siginfo_t *siginfo) 121 | { 122 | assert (siginfo != NULL); 123 | if (ptrace (PTRACE_GETSIGINFO, pid, NULL, siginfo) == PTRACE_ERROR) 124 | { 125 | return SP_ERR; 126 | } 127 | else 128 | { 129 | return SP_OK; 130 | } 131 | } 132 | -------------------------------------------------------------------------------- /src/ptrace.h: -------------------------------------------------------------------------------- 1 | /* The `ptrace` API is ... special. This header 2 | * wraps it up for use in the rest of this program. 3 | * If one of the functions here fails, `errno` will 4 | * hold the value set by `ptrace`. */ 5 | 6 | #pragma once 7 | 8 | #ifndef _SPRAY_PTRACE_H_ 9 | #define _SPRAY_PTRACE_H_ 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #include "magic.h" 18 | 19 | SprayResult pt_read_memory (pid_t pid, real_addr addr, uint64_t * read); 20 | SprayResult pt_write_memory (pid_t pid, real_addr addr, uint64_t write); 21 | 22 | SprayResult pt_read_registers (pid_t pid, struct user_regs_struct *regs); 23 | SprayResult pt_write_registers (pid_t pid, struct user_regs_struct *regs); 24 | 25 | SprayResult pt_continue_execution (pid_t pid); 26 | SprayResult pt_trace_me (void); 27 | SprayResult pt_single_step (pid_t pid); 28 | 29 | SprayResult pt_get_signal_info (pid_t pid, siginfo_t * siginfo); 30 | 31 | #endif /* _SPRAY_PTRACE_H_ */ 32 | -------------------------------------------------------------------------------- /src/registers.c: -------------------------------------------------------------------------------- 1 | #include "registers.h" 2 | #include "magic.h" 3 | #include "ptrace.h" 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | /* Both `x86_reg` and `reg_descriptors` are laid 12 | * out the same way as `user_regs_struct` in 13 | * `/usr/include/sys/user.h`. Hence, `x86_reg` can 14 | * index both of them. */ 15 | 16 | SprayResult 17 | get_register_value (pid_t pid, x86_reg reg, uint64_t *read) 18 | { 19 | assert (read != NULL); 20 | 21 | struct user_regs_struct regs; /* Register buffer */ 22 | SprayResult res = pt_read_registers (pid, ®s); 23 | if (res == SP_ERR) 24 | { 25 | return SP_ERR; 26 | } 27 | else 28 | { 29 | uint64_t *regs_as_array = (uint64_t *) & regs; 30 | *read = regs_as_array[reg]; 31 | return SP_OK; 32 | } 33 | } 34 | 35 | SprayResult 36 | set_register_value (pid_t pid, x86_reg reg, uint64_t write) 37 | { 38 | struct user_regs_struct regs; 39 | SprayResult res = pt_read_registers (pid, ®s); 40 | if (res == SP_ERR) 41 | { 42 | return SP_ERR; 43 | } 44 | else 45 | { 46 | uint64_t *regs_as_array = (uint64_t *) & regs; 47 | regs_as_array[reg] = write; 48 | return pt_write_registers (pid, ®s); 49 | } 50 | } 51 | 52 | bool 53 | dwarf_regnum_to_x86_reg (uint8_t dwarf_regnum, x86_reg *store) 54 | { 55 | assert (store != NULL); 56 | 57 | size_t i = 0; 58 | for (; i < N_REGISTERS; i++) 59 | { 60 | if (reg_descriptors[i].dwarf_r == (int) dwarf_regnum) 61 | { 62 | break; 63 | } 64 | } 65 | 66 | if (i == N_REGISTERS) 67 | { 68 | /* We searched the entire array 69 | * without finding a match. : ( */ 70 | return false; 71 | } 72 | else 73 | { 74 | *store = reg_descriptors[i].r; 75 | return true; 76 | } 77 | } 78 | 79 | /* NOTE: All DWARF register numbers are small unsigned integers. 80 | * Negative values for `dwarf_r` in `reg_descriptors` are used 81 | * to make those registers inaccessible via a DWARF register number. */ 82 | 83 | bool 84 | get_dwarf_register_value (pid_t pid, int8_t dwarf_regnum, uint64_t *read) 85 | { 86 | assert (read != NULL); 87 | 88 | x86_reg associated_reg; 89 | 90 | bool regnum_was_translated = 91 | dwarf_regnum_to_x86_reg (dwarf_regnum, &associated_reg); 92 | 93 | if (regnum_was_translated) 94 | { 95 | SprayResult res = get_register_value (pid, associated_reg, read); 96 | if (res == SP_OK) 97 | { 98 | return true; 99 | } 100 | else 101 | { 102 | return false; 103 | } 104 | } 105 | else 106 | { 107 | return false; 108 | } 109 | } 110 | 111 | const char * 112 | get_name_from_register (x86_reg reg) 113 | { 114 | size_t i = 0; 115 | for (; i < N_REGISTERS; i++) 116 | { 117 | if (reg_descriptors[i].r == reg) 118 | { 119 | break; 120 | } 121 | } 122 | 123 | /* `reg_descriptors` maps all possible values 124 | * of `reg`. Therefore the name *must* be found. */ 125 | assert (i != N_REGISTERS); 126 | 127 | return reg_descriptors[i].name; 128 | } 129 | 130 | bool 131 | get_register_from_name (const char *name, x86_reg *store) 132 | { 133 | size_t i = 0; 134 | for (; i < N_REGISTERS; i++) 135 | { 136 | if (str_eq (reg_descriptors[i].name, name)) 137 | { 138 | break; 139 | } 140 | } 141 | 142 | if (i == N_REGISTERS) 143 | { 144 | /* Couldn't find a register named `name`. */ 145 | return false; 146 | } 147 | else 148 | { 149 | *store = reg_descriptors[i].r; 150 | return true; 151 | } 152 | } 153 | -------------------------------------------------------------------------------- /src/registers.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifndef _SPARY_REGISTERS_H_ 4 | #define _SPARY_REGISTERS_H_ 5 | 6 | #include "magic.h" 7 | #include "ptrace.h" 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | typedef enum 14 | { 15 | r15 = 0, r14, r13, r12, 16 | rbp, rbx, r11, r10, r9, r8, 17 | rax, rcx, rdx, rsi, rdi, 18 | orig_rax, rip, cs, eflags, 19 | rsp, ss, fs_base, gs_base, 20 | ds, es, fs, gs, 21 | } x86_reg; 22 | 23 | typedef struct 24 | { 25 | x86_reg r; 26 | int dwarf_r; /* DWARF register number. */ 27 | const char *name; 28 | } reg_descriptor; 29 | 30 | static const reg_descriptor reg_descriptors[N_REGISTERS] = { 31 | {r15, 15, "r15"}, 32 | {r14, 14, "r14"}, 33 | {r13, 13, "r13"}, 34 | {r12, 12, "r12"}, 35 | {rbp, 6, "rbp"}, 36 | {rbx, 3, "rbx"}, 37 | {r11, 11, "r11"}, 38 | {r10, 10, "r10"}, 39 | {r9, 9, "r9"}, 40 | {r8, 8, "r8"}, 41 | {rax, 0, "rax"}, 42 | {rcx, 2, "rcx"}, 43 | {rdx, 1, "rdx"}, 44 | {rsi, 4, "rsi"}, 45 | {rdi, 5, "rdi"}, 46 | {orig_rax, -1, "orig_rax"}, 47 | {rip, -1, "rip"}, 48 | {cs, 51, "cs"}, 49 | {eflags, 49, "eflags"}, 50 | {rsp, 7, "rsp"}, 51 | {ss, 52, "ss"}, 52 | {fs_base, 58, "fs_base"}, 53 | {gs_base, 59, "gs_base"}, 54 | {ds, 53, "ds"}, 55 | {es, 50, "es"}, 56 | {fs, 54, "fs"}, 57 | {gs, 55, "gs"}, 58 | }; 59 | 60 | /* Store the value of the register `reg` in `read`. */ 61 | SprayResult get_register_value (pid_t pid, x86_reg reg, uint64_t * read); 62 | 63 | /* Write the value in `write` to register `reg`. */ 64 | SprayResult set_register_value (pid_t pid, x86_reg reg, uint64_t write); 65 | 66 | /* Translate the given DWARF register number into the associated x86 register. 67 | * Returns `true` if the register number could be translated. 68 | * Otherwise, `false` is returned. */ 69 | bool dwarf_regnum_to_x86_reg (uint8_t dwarf_regnum, x86_reg * store); 70 | 71 | /* Store the value of the register `dwarf_regnum` in `read`. 72 | * `true` is returned on success. 73 | * 74 | * If the value of `dwarf_regnum` doesn't represent a valid 75 | * register, false is returned and `read` stays untouched. */ 76 | bool get_dwarf_register_value (pid_t pid, int8_t dwarf_regnum, 77 | uint64_t * read); 78 | 79 | /* Get the name of the register `reg` as a string. */ 80 | const char *get_name_from_register (x86_reg reg); 81 | 82 | /* Store the register referred to by `name` in `store`. 83 | * 84 | * `true` is returned on success. 85 | * 86 | * If `name` is not a know register, then `false` is returned 87 | * and `store` remains untouched. */ 88 | bool get_register_from_name (const char *name, x86_reg * store); 89 | 90 | #endif /* _SPARY_REGISTERS_H_ */ 91 | -------------------------------------------------------------------------------- /src/spray.c: -------------------------------------------------------------------------------- 1 | /* 🐛🐛🐛 Spray: an ergonomic debugger for x86_64 Linux. 🐛🐛🐛 */ 2 | 3 | #include "debugger.h" 4 | 5 | #define SET_ARGS_ONCE 6 | #include "args.h" 7 | 8 | int 9 | setup_args (int argc, char **argv) 10 | { 11 | Args args = { 0 }; 12 | 13 | if (parse_args (argc, argv, &args)) 14 | { 15 | print_help_message (prog_name_arg (argc, argv)); 16 | return -1; 17 | } 18 | else 19 | { 20 | set_args (&args); 21 | return 0; 22 | } 23 | } 24 | 25 | int 26 | main (int argc, char **argv) 27 | { 28 | if (setup_args (argc, argv) == -1) 29 | { 30 | return -1; 31 | } 32 | 33 | Debugger debugger; 34 | 35 | if (setup_debugger (get_args ()->file, get_args ()->args, &debugger) == -1) 36 | { 37 | return -1; 38 | } 39 | 40 | run_debugger (debugger); 41 | 42 | if (del_debugger (debugger) == SP_ERR) 43 | return -1; 44 | 45 | return 0; 46 | } 47 | -------------------------------------------------------------------------------- /src/spray_dwarf.h: -------------------------------------------------------------------------------- 1 | /* Spray's wrapper around libdwarf. Exposes interfaces 2 | * to gather information about the current position in 3 | * the source files, about the types of runtime variables, 4 | * and about how to retrieve the values of those variables 5 | * at runtime. */ 6 | 7 | #pragma once 8 | 9 | #ifndef _SPRAY_SPRAY_DWARF_H_ 10 | #define _SPRAY_SPRAY_DWARF_H_ 11 | 12 | #include "ptrace.h" 13 | #include "spray_elf.h" /* `ElfFile` in `SdLocEvalCtx` */ 14 | #include "registers.h" /* `x86_reg` in `SdLocation` */ 15 | 16 | #include 17 | #include 18 | #include 19 | 20 | /* Initialized libdwarf's debug info. Returns NULL on error. */ 21 | Dwarf_Debug sd_dwarf_init (const char *filepath, Dwarf_Error * error); 22 | 23 | 24 | /**************************************************/ 25 | /* Information about the current position of the */ 26 | /* executing program in the program source files. */ 27 | /**************************************************/ 28 | 29 | /* Get the file path of the source file that contains the 30 | * code that the given PC points to. The string that's returned 31 | * must be `free`'d by the caller. */ 32 | char *sd_filepath_from_pc (Dwarf_Debug dbg, dbg_addr pc); 33 | 34 | typedef struct 35 | { 36 | bool is_ok; 37 | bool new_statement; 38 | bool prologue_end; 39 | /* Set to true if the PC used to retrieve the 40 | * line entry was exactly equal to `addr`. */ 41 | bool is_exact; 42 | unsigned ln; 43 | unsigned cl; 44 | dbg_addr addr; 45 | /* Don't free this string. 46 | * It's owned by the `Dwarf_Debug` instance. */ 47 | char *filepath; 48 | } LineEntry; 49 | 50 | /* Returns the line entry for the PC if this line entry contains 51 | * the address of PC. On error `is_ok` is set to false. */ 52 | LineEntry sd_line_entry_from_pc (Dwarf_Debug dbg, dbg_addr pc); 53 | 54 | /* Get the line entry for the given position in the program source. */ 55 | LineEntry sd_line_entry_at (Dwarf_Debug dbg, const char *filepath, 56 | unsigned lineno); 57 | 58 | typedef SprayResult (*LineCallback) (LineEntry * line, void *const data); 59 | 60 | /* Call `callback` for each new statement line entry 61 | * in the subprogram with the given name. */ 62 | SprayResult sd_for_each_line (Dwarf_Debug dbg, 63 | const char *fn_name, 64 | const char *filepath, 65 | LineCallback callback, void *const init_data); 66 | 67 | /* Figure out where the function prologue of the function starting 68 | * at `low_pc` ends and return this address. Used for breakpoints on 69 | * functions to break only after the prologue. 70 | * `prologue_start` is the same address as a subprogram's low PC 71 | * and `function_end` is the same address as the high PC. */ 72 | SprayResult sd_effective_start_addr (Dwarf_Debug dbg, 73 | dbg_addr prologue_start, 74 | dbg_addr function_end, 75 | dbg_addr * function_start); 76 | 77 | 78 | /*************************************************************/ 79 | /* Information about location and type of runtime variables. */ 80 | /*************************************************************/ 81 | 82 | /* Type information. */ 83 | 84 | typedef struct 85 | { 86 | enum 87 | { 88 | BASE_TYPE_CHAR, 89 | BASE_TYPE_SIGNED_CHAR, 90 | BASE_TYPE_UNSIGNED_CHAR, 91 | BASE_TYPE_SHORT, 92 | BASE_TYPE_UNSIGNED_SHORT, 93 | BASE_TYPE_INT, 94 | BASE_TYPE_UNSIGNED_INT, 95 | BASE_TYPE_LONG, 96 | BASE_TYPE_UNSIGNED_LONG, 97 | BASE_TYPE_LONG_LONG, 98 | BASE_TYPE_UNSIGNED_LONG_LONG, 99 | BASE_TYPE_FLOAT, 100 | BASE_TYPE_DOUBLE, 101 | BASE_TYPE_LONG_DOUBLE, 102 | } tag; 103 | /* Number of bytes used to represent this base type. */ 104 | unsigned char size; 105 | } SdBasetype; 106 | 107 | /* See the DWARF 5 standard 5.3. */ 108 | typedef enum 109 | { 110 | TYPE_MOD_ATOMIC = DW_TAG_atomic_type, 111 | TYPE_MOD_CONST = DW_TAG_const_type, 112 | TYPE_MOD_POINTER = DW_TAG_pointer_type, 113 | TYPE_MOD_RESTRICT = DW_TAG_restrict_type, 114 | TYPE_MOD_VOLATILE = DW_TAG_volatile_type, 115 | } SdTypemod; 116 | 117 | /* Single node in the representation variable types. */ 118 | typedef struct 119 | { 120 | enum 121 | { 122 | NODE_BASE_TYPE, 123 | NODE_MODIFIER, 124 | NODE_UNSPECIFIED, /* See the DWARF 5 standard 5.2. */ 125 | NODE_TYPEDEF, /* See the DWARF 5 standard 5.4. */ 126 | } tag; /* Kind of this node. */ 127 | union 128 | { 129 | SdBasetype base_type; 130 | SdTypemod modifier; 131 | }; 132 | } SdTypenode; 133 | 134 | /* Host structure for variable types. */ 135 | typedef struct 136 | { 137 | SdTypenode *nodes; /* Buffer of nodes. */ 138 | size_t n_nodes; /* First `n` nodes in use. */ 139 | size_t n_alloc; /* Maximum number of nodes. */ 140 | } SdType; 141 | 142 | void del_type (SdType * type); 143 | 144 | 145 | /* `DW_AT_location` of DIEs that represent runtime variables. 146 | * It can be used in combination with `sd_init_loclist` to 147 | * initialize a new location list. */ 148 | typedef struct 149 | { 150 | Dwarf_Attribute loc; /* `DW_AT_location` attribute. */ 151 | } SdLocattr; 152 | 153 | /* Representation of runtime variables. They are used to find the 154 | * location of the variable's value in the running program, and 155 | * to find out what type the variable has. 156 | * 157 | * `SdLocattr`'s memory is handled by `libdwarf`. Only `SdType` 158 | * must be deleted after it's been used by the user. */ 159 | typedef struct 160 | { 161 | SdLocattr loc; /* Runtime location. */ 162 | SdType type; /* Type. */ 163 | } SdVarattr; 164 | 165 | /* Get the attributes describing the variable with the given 166 | * name, and the file and line where this variable was declared. 167 | * `pc` is used to choose the closest variable if the variable 168 | * name occurs more than once. 169 | * 170 | * On success `SP_OK` is returned, and `attr`, `decl_file`, and 171 | * `decl_line` are set. `decl_file` must be `free`'d manually by 172 | * this function's caller. 173 | * 174 | * On error `SP_ERR` is returned, and `attr`, `decl_file`, and 175 | * `decl_file` remain unchanged. 176 | * 177 | * `dbg`, `var_name`, `attr`, `decl_file`, and `decl_line` must 178 | * not be `NULL`. */ 179 | SprayResult sd_runtime_variable (Dwarf_Debug dbg, 180 | dbg_addr pc, 181 | const char *var_name, 182 | SdVarattr * attr, 183 | char **decl_file, unsigned *decl_line); 184 | 185 | 186 | /* Location information. */ 187 | 188 | typedef struct SdExpression SdLocdesc; 189 | typedef struct SdLocRange SdLocRange; 190 | 191 | /* A DWARF location list (list of DWARF expressions) used 192 | * to describe the different locations of a specific 193 | * variable during the runtime of a program. */ 194 | typedef struct SdLoclist 195 | { 196 | size_t n_exprs; 197 | SdLocdesc *exprs; 198 | SdLocRange *ranges; 199 | } SdLoclist; 200 | 201 | /* Initialize a location list based on the location 202 | * description attribute in `loc_attr`. */ 203 | SprayResult sd_init_loclist (Dwarf_Debug dbg, 204 | SdLocattr loc_attr, SdLoclist * loclist); 205 | 206 | /* Delete the given location list. */ 207 | void del_loclist (SdLoclist * loclist); 208 | 209 | /* Print the given location list. */ 210 | void print_loclist (SdLoclist loclist); 211 | 212 | /* Contextual information used to evaluate 213 | * certain operations in location lists. */ 214 | typedef struct SdLocEvalCtx 215 | { 216 | pid_t pid; 217 | dbg_addr pc; 218 | const ElfFile *elf; 219 | real_addr load_address; 220 | } SdLocEvalCtx; 221 | 222 | /* The location of a runtime variable at a specific point 223 | * in time. Created by evaluating the location list of the 224 | * variable in question. */ 225 | typedef struct SdLocation 226 | { 227 | enum 228 | { 229 | LOC_ADDR, 230 | LOC_REG, 231 | } tag; 232 | union 233 | { 234 | real_addr addr; 235 | x86_reg reg; 236 | }; 237 | } SdLocation; 238 | 239 | /* Evaluate the given location list and return the 240 | * current location of the variable the location list 241 | * describes. */ 242 | SprayResult sd_eval_loclist (Dwarf_Debug dbg, 243 | SdLocEvalCtx ctx, 244 | SdLoclist loclist, SdLocation * location); 245 | 246 | 247 | #ifdef UNIT_TESTS 248 | 249 | /* Search callback types for searching DIEs. */ 250 | 251 | typedef struct SearchFor 252 | { 253 | unsigned level; /* Level in the DIE tree. */ 254 | const void *data; /* Custom data used as context while searching. */ 255 | } SearchFor; 256 | 257 | typedef struct SearchFindings 258 | { 259 | void *data; /* Custom data collected while searching */ 260 | } SearchFindings; 261 | 262 | typedef bool (*SearchCallback) (Dwarf_Debug, 263 | Dwarf_Die, SearchFor, SearchFindings); 264 | 265 | /* Search function that searches DIEs for different content. */ 266 | int sd_search_dwarf_dbg (Dwarf_Debug dbg, 267 | Dwarf_Error * const error, 268 | SearchCallback search_callback, 269 | const void *search_for_data, 270 | void *search_findings_data); 271 | 272 | /* Find a `DW_TAG_subprogram` DIE by its name. */ 273 | bool sd_is_subprog_with_name (Dwarf_Debug dbg, 274 | Dwarf_Die die, const char *name); 275 | 276 | /* Describe a result returned by libdwarf. */ 277 | const char *what_dwarf_result (int dwarf_res); 278 | 279 | /* Full definition of types internal to `SdLoclist`. */ 280 | typedef struct SdLocRange 281 | { 282 | bool meaningful; 283 | real_addr lowpc; /* Inclusive lower bound. */ 284 | real_addr highpc; /* Exclusive upper bound. */ 285 | } SdLocRange; 286 | 287 | typedef Dwarf_Small SdOperator; 288 | typedef Dwarf_Unsigned SdOperand; 289 | 290 | /* A single operation in a DWARF expression. */ 291 | typedef struct SdOperation 292 | { 293 | SdOperator opcode; 294 | /* The operands 1-3 can be addressed either as single 295 | struct members or as elements in an array. */ 296 | union 297 | { 298 | struct 299 | { 300 | SdOperand operand1; 301 | SdOperand operand2; 302 | SdOperand operand3; 303 | }; 304 | SdOperand operands[3]; 305 | }; 306 | } SdOperation; 307 | 308 | /* A DWARF expression used for locexprs. */ 309 | typedef struct SdExpression 310 | { 311 | size_t n_operations; 312 | SdOperation *operations; 313 | } SdExpression; 314 | 315 | char ** sd_get_filepaths (Dwarf_Debug dbg); 316 | 317 | #endif /* UNIT_TESTS */ 318 | 319 | #endif /* _SPRAY_DWARF_H_ */ 320 | -------------------------------------------------------------------------------- /src/spray_elf.c: -------------------------------------------------------------------------------- 1 | #include "spray_elf.h" 2 | 3 | #include "magic.h" 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | enum 13 | { 14 | CHECK_SECTION_HEADER = 0xffff, 15 | }; 16 | 17 | /* Validates the content of the given `Elf64_Ehdr` and 18 | * parses all values of interest. Some values (`n_prog_hdrs`, 19 | * `n_sect_hdrs` and `shstrtab_idx`) might be too 20 | * large to be stored in the `Elf64_Ehdr`. Then they are set to 21 | * `CHECK_SECTION_HEADER` to signal that they should be read from 22 | * the inital section header. */ 23 | ElfParseResult 24 | parse_elf_header (const Elf64_Ehdr *elf_src, 25 | ElfFile *elf_dest, 26 | uint64_t *prog_table_off, 27 | uint64_t *sect_table_off, 28 | uint32_t *n_prog_hdrs, 29 | uint32_t *n_sect_hdrs, uint32_t *shstrtab_idx) 30 | { 31 | assert (elf_dest != NULL); 32 | assert (prog_table_off != NULL); 33 | assert (sect_table_off != NULL); 34 | assert (n_prog_hdrs != NULL); 35 | assert (n_sect_hdrs != NULL); 36 | assert (shstrtab_idx != NULL); 37 | 38 | /* Is the magic number invalid? */ 39 | if ((elf_src->e_ident[EI_MAG0] != ELFMAG0) /* 0x7f */ 40 | || (elf_src->e_ident[EI_MAG1] != ELFMAG1) /* 'E' */ 41 | || (elf_src->e_ident[EI_MAG2] != ELFMAG2) /* 'L' */ 42 | || (elf_src->e_ident[EI_MAG3] != ELFMAG3) /* 'F' */ 43 | ) 44 | { 45 | return ELF_PARSE_INVALID; 46 | } 47 | 48 | /* Is this ELF file meant for something different than 64 bit? */ 49 | if (elf_src->e_ident[EI_CLASS] != ELFCLASS64) 50 | { 51 | return ELF_PARSE_DISLIKE; 52 | } 53 | 54 | /* Is the file's data encoding two's complement and little-endian? */ 55 | if (elf_src->e_ident[EI_DATA] == ELFDATA2LSB) 56 | { 57 | elf_dest->endianness = ELF_ENDIAN_LITTLE; 58 | } 59 | /* Is the file's data encoding two's complement and big-endian? */ 60 | else if (elf_src->e_ident[EI_DATA] == ELFDATA2MSB) 61 | { 62 | elf_dest->endianness = ELF_ENDIAN_BIG; 63 | } 64 | /* Is the file's data encoding missing? */ 65 | else 66 | { 67 | return ELF_PARSE_DISLIKE; 68 | } 69 | 70 | /* Is the ABI suppored? `ELFOSABI_NONE` is the same as `SYSV`. */ 71 | if ((elf_src->e_ident[EI_OSABI] != ELFOSABI_LINUX) 72 | && (elf_src->e_ident[EI_OSABI] != ELFOSABI_NONE) 73 | && (elf_src->e_ident[EI_OSABI] != ELFOSABI_SYSV)) 74 | { 75 | return ELF_PARSE_DISLIKE; 76 | } 77 | 78 | /* `EI_VERSION` and `EI_ABIVERSION` are basically unused 79 | * and must conform to the values below to be valid. 80 | * `e_version` is the same. */ 81 | if (elf_src->e_ident[EI_VERSION] != EV_CURRENT 82 | || elf_src->e_ident[EI_ABIVERSION] != 0 83 | || elf_src->e_version != EV_CURRENT) 84 | { 85 | return ELF_PARSE_INVALID; 86 | } 87 | 88 | /* Is the object file type in the accepted range? */ 89 | if (elf_src->e_type <= ELF_TYPE_CORE) 90 | { 91 | /* `e_type` maps to `elf_type` in this range. */ 92 | elf_dest->type = (ElfType) elf_src->e_type; 93 | } 94 | else 95 | { 96 | /* Object file type is in the reserved range. */ 97 | return ELF_PARSE_INVALID; 98 | } 99 | 100 | /* Is the target instruction set architecture something 101 | * different than x86? We're x86 only here! */ 102 | if (elf_src->e_machine != EM_X86_64) 103 | { 104 | return ELF_PARSE_DISLIKE; 105 | } 106 | 107 | 108 | /*********************************************/ 109 | /* Program and section header table parsing. */ 110 | /*********************************************/ 111 | 112 | /* Is this file missing a program header table? */ 113 | if (elf_src->e_phoff == 0) 114 | { 115 | return ELF_PARSE_DISLIKE; 116 | } 117 | else 118 | { 119 | *prog_table_off = elf_src->e_phoff; 120 | } 121 | 122 | /* Is this file missing a section header table? */ 123 | if (elf_src->e_shoff == 0) 124 | { 125 | return ELF_PARSE_DISLIKE; 126 | } 127 | else 128 | { 129 | *sect_table_off = elf_src->e_shoff; 130 | } 131 | 132 | /* Are the entry sizes in the header tables meant for 64-bit? */ 133 | if (elf_src->e_phentsize != sizeof (Elf64_Phdr) || 134 | elf_src->e_shentsize != sizeof (Elf64_Shdr)) 135 | { 136 | return ELF_PARSE_DISLIKE; 137 | } 138 | 139 | /* Some of the values in the ELF header don't fit 140 | * its data types anymore. E.g. if there are more 141 | * than 0xffff program headers, the `e_phnum` field 142 | * cannot store how many of them there are. In this 143 | * case, the first entry in the section header table 144 | * stores the actual real value. */ 145 | 146 | /* Does the number of program headers exceed the representable range? */ 147 | if (elf_src->e_phnum == PN_XNUM) 148 | { 149 | *n_prog_hdrs = CHECK_SECTION_HEADER; 150 | } 151 | else 152 | { 153 | *n_prog_hdrs = elf_src->e_phnum; 154 | } 155 | 156 | if (elf_src->e_shnum == 0) 157 | { 158 | /* `e_shnum` being 0 signals one of two options: 159 | * (1) The number of section table headers lies outside 160 | * the range that can be represented in 16 bits and 161 | * the actual value is found in `sh_size`. 162 | * (2) The number of entries in really just 0. Then `sh_size` 163 | * will be 0, too. */ 164 | *n_sect_hdrs = CHECK_SECTION_HEADER; 165 | } 166 | else 167 | { 168 | *n_sect_hdrs = elf_src->e_shnum; 169 | } 170 | 171 | /* Is the index of the section name string table outside 172 | * the range that can be represented? */ 173 | if (elf_src->e_shstrndx == SHN_XINDEX) 174 | { 175 | *shstrtab_idx = CHECK_SECTION_HEADER; 176 | } 177 | else 178 | { 179 | *shstrtab_idx = elf_src->e_shstrndx; 180 | } 181 | 182 | return ELF_PARSE_OK; 183 | } 184 | 185 | /* The initial section header is reserved to store values that 186 | * didn't fit into the ELF header. If any of the argument's values 187 | * is set to `CHECK_SECTION_HEADER`, then it will be set to the 188 | * value in this header. */ 189 | void 190 | parse_init_section (const Elf64_Shdr *init_section_header, 191 | uint32_t *n_prog_hdrs, uint32_t *n_sect_hdrs, 192 | uint32_t *shstrtab_idx) 193 | { 194 | assert (init_section_header != NULL); 195 | assert (n_prog_hdrs != NULL); 196 | assert (n_sect_hdrs != NULL); 197 | assert (shstrtab_idx != NULL); 198 | 199 | if (*n_prog_hdrs == CHECK_SECTION_HEADER) 200 | { 201 | *n_prog_hdrs = init_section_header->sh_info; 202 | } 203 | 204 | if (*n_sect_hdrs == CHECK_SECTION_HEADER) 205 | { 206 | *n_sect_hdrs = init_section_header->sh_size; 207 | } 208 | 209 | if (*shstrtab_idx == CHECK_SECTION_HEADER) 210 | { 211 | *shstrtab_idx = init_section_header->sh_link; 212 | } 213 | } 214 | 215 | /* Helpers to check bit masks. */ 216 | bool 217 | is_set (int value, int mask) 218 | { 219 | return (value & mask) != 0; 220 | } 221 | 222 | bool 223 | is_unset (int value, int mask) 224 | { 225 | return (value & mask) == 0; 226 | } 227 | 228 | bool 229 | is_valid_symtab (Elf64_Shdr *shdr, const char *name) 230 | { 231 | return str_eq (name, ".symtab") && shdr->sh_type == SHT_SYMTAB && 232 | /* `SHF_ALLOC` is always set for .dynsym. */ 233 | is_unset (shdr->sh_flags, SHF_ALLOC) && 234 | shdr->sh_entsize == sizeof (Elf64_Sym); 235 | } 236 | 237 | bool 238 | is_valid_strtab (Elf64_Shdr *shdr, const char *name) 239 | { 240 | return str_eq (name, ".strtab") 241 | && shdr->sh_type == SHT_STRTAB 242 | && is_unset (shdr->sh_flags, SHF_ALLOC); 243 | } 244 | 245 | SprayResult 246 | find_table_sections (Elf64_Shdr *sect_headers, uint32_t n_sect_hdrs, 247 | const char *shstrtab, uint32_t *symtab_idx, 248 | uint32_t *strtab_idx) 249 | { 250 | assert (sect_headers != NULL); 251 | assert (shstrtab != NULL); 252 | assert (symtab_idx != NULL); 253 | assert (strtab_idx != NULL); 254 | 255 | /* NOTE: To check if a given index has been set already, 256 | * we can check if it is zero. This relies on the fact 257 | * that the section header at index zero is reserved and 258 | * cannot be used for any of the entries we are looking for. */ 259 | 260 | Elf64_Shdr *cur_hdr = NULL; 261 | const char *name = NULL; 262 | for (uint32_t i = 0; i < n_sect_hdrs; i++) 263 | { 264 | cur_hdr = §_headers[i]; 265 | name = &shstrtab[cur_hdr->sh_name]; 266 | if (is_valid_symtab (cur_hdr, name) 267 | && *symtab_idx == 0) 268 | { 269 | *symtab_idx = i; 270 | } 271 | else if (is_valid_strtab (cur_hdr, name) 272 | && *strtab_idx == 0) 273 | { 274 | *strtab_idx = i; 275 | } 276 | } 277 | 278 | if (*symtab_idx != 0 && *strtab_idx != 0) 279 | { 280 | return SP_OK; 281 | } 282 | else 283 | { 284 | return SP_ERR; 285 | } 286 | } 287 | 288 | SprayResult 289 | file_size (int fd, size_t *dest) 290 | { 291 | assert (dest != NULL); 292 | 293 | off_t n_bytes = lseek (fd, 0, SEEK_END); 294 | if (n_bytes < 0) 295 | { 296 | return SP_ERR; 297 | } 298 | else 299 | { 300 | *dest = (size_t) n_bytes; 301 | return SP_OK; 302 | } 303 | } 304 | 305 | /* Cast pointers pointing into the memory mapped ELF 306 | * file to specific structures. Using these functions 307 | * is much more readable than plain casts. */ 308 | 309 | static inline Elf64_Ehdr * 310 | ehdr_at (byte *bytes, size_t off) 311 | { 312 | return (Elf64_Ehdr *) (bytes + off); 313 | } 314 | 315 | static inline Elf64_Phdr * 316 | phdr_at (byte *bytes, size_t off) 317 | { 318 | return (Elf64_Phdr *) (bytes + off); 319 | } 320 | 321 | static inline Elf64_Shdr * 322 | shdr_at (byte *bytes, size_t off) 323 | { 324 | return (Elf64_Shdr *) (bytes + off); 325 | } 326 | 327 | static inline Elf64_Sym * 328 | symtab_at (byte *bytes, size_t off) 329 | { 330 | return (Elf64_Sym *) (bytes + off); 331 | } 332 | 333 | static inline char * 334 | strtab_at (byte *bytes, size_t off) 335 | { 336 | return (char *) (bytes + off); 337 | } 338 | 339 | ElfParseResult 340 | se_parse_elf (const char *filepath, ElfFile *elf_store) 341 | { 342 | assert (filepath != NULL); 343 | assert (elf_store != NULL); 344 | 345 | /* Acquire file descriptor for `mmap`. */ 346 | int fd = open (filepath, O_RDONLY); 347 | if (fd == -1) 348 | { 349 | return ELF_PARSE_IO_ERR; 350 | } 351 | 352 | /* Get the number of bytes in the file. */ 353 | size_t n_bytes = 0; 354 | if (file_size (fd, &n_bytes) == SP_ERR) 355 | { 356 | close (fd); 357 | return ELF_PARSE_IO_ERR; 358 | } 359 | 360 | byte *bytes = mmap (0, /* Kernel chooses address. */ 361 | n_bytes, /* Init the entire file. */ 362 | PROT_READ, 363 | MAP_PRIVATE, 364 | fd, 365 | 0); 366 | 367 | close (fd); /* Close no matter the outcome of `mmap`. */ 368 | 369 | if (bytes == MAP_FAILED) 370 | { 371 | return ELF_PARSE_IO_ERR; 372 | } 373 | 374 | /* Parse relevant information from the ELF header. */ 375 | 376 | Elf64_Ehdr *elf_header = ehdr_at (bytes, 0); 377 | 378 | uint64_t prog_table_off = 0; 379 | uint32_t n_prog_hdrs = 0; 380 | 381 | uint64_t sect_table_off = 0; 382 | uint32_t n_sect_hdrs = 0; 383 | 384 | uint32_t shstrtab_idx = 0; 385 | 386 | ElfParseResult elf_header_res = 387 | parse_elf_header (elf_header, elf_store, &prog_table_off, §_table_off, 388 | &n_prog_hdrs, &n_sect_hdrs, &shstrtab_idx); 389 | 390 | if (elf_header_res != ELF_PARSE_OK) 391 | { 392 | if (munmap (bytes, n_bytes) == -1) 393 | { 394 | return ELF_PARSE_IO_ERR; 395 | } 396 | else 397 | { 398 | return elf_header_res; 399 | } 400 | } 401 | 402 | Elf64_Shdr *sect_headers = shdr_at (bytes, sect_table_off); 403 | 404 | /* Fill-in missing values if they weren't found in the ELF header. */ 405 | parse_init_section (sect_headers, &n_prog_hdrs, &n_sect_hdrs, 406 | &shstrtab_idx); 407 | 408 | 409 | /* Find the section headers for the symbol table and the string table. */ 410 | uint32_t symtab_idx = 0; 411 | uint32_t strtab_idx = 0; 412 | /* Get the section header string table that contains the names of 413 | * the sections in the section header table. `sh_name` is an index into 414 | * that table, and thus the table can be used to read the names of the 415 | * different sections. */ 416 | Elf64_Shdr *shstrtab_hdr = §_headers[shstrtab_idx]; 417 | const char *shstrtab = strtab_at (bytes, shstrtab_hdr->sh_offset); 418 | 419 | SprayResult tables_res = find_table_sections (sect_headers, n_sect_hdrs, 420 | shstrtab, &symtab_idx, 421 | &strtab_idx); 422 | 423 | if (tables_res == SP_ERR) 424 | { 425 | if (munmap (bytes, n_bytes) == -1) 426 | { 427 | return ELF_PARSE_IO_ERR; 428 | } 429 | else 430 | { 431 | return ELF_PARSE_INVALID; 432 | } 433 | } 434 | 435 | elf_store->sect_table = (ElfSectTable) 436 | { 437 | .n_headers = n_sect_hdrs,.symtab_idx = symtab_idx,.shstrtab_idx = 438 | shstrtab_idx,.strtab_idx = strtab_idx,.headers = sect_headers,}; 439 | 440 | Elf64_Phdr *prog_headers = phdr_at (bytes, prog_table_off); 441 | elf_store->prog_table = (ElfProgTable) 442 | { 443 | .n_headers = n_prog_hdrs,.headers = prog_headers 444 | }; 445 | 446 | elf_store->data = (ElfData) 447 | { 448 | .bytes = bytes,.n_bytes = n_bytes 449 | }; 450 | 451 | return ELF_PARSE_OK; 452 | } 453 | 454 | const char * 455 | elf_parse_result_name (ElfParseResult res) 456 | { 457 | static const char *elf_parse_result_names[] = { 458 | [ELF_PARSE_OK] = "parsed file successfully", 459 | [ELF_PARSE_IO_ERR] = "file I/O error", 460 | [ELF_PARSE_INVALID] = "invalid file contents", 461 | [ELF_PARSE_DISLIKE] = "unsupported file contents", 462 | }; 463 | 464 | return elf_parse_result_names[res]; 465 | } 466 | 467 | SprayResult 468 | se_free_elf (ElfFile elf) 469 | { 470 | if (munmap (elf.data.bytes, elf.data.n_bytes) == -1) 471 | { 472 | return SP_ERR; 473 | } 474 | else 475 | { 476 | return SP_OK; 477 | } 478 | } 479 | 480 | const Elf64_Sym * 481 | se_symbol_from_name (const char *name, const ElfFile *elf) 482 | { 483 | assert (name != NULL); 484 | assert (elf != NULL); 485 | 486 | Elf64_Shdr *symtab_hdr = 487 | &elf->sect_table.headers[elf->sect_table.symtab_idx]; 488 | const Elf64_Sym *symtab = 489 | symtab_at (elf->data.bytes, symtab_hdr->sh_offset); 490 | 491 | uint64_t n_symbols = symtab_hdr->sh_size / symtab_hdr->sh_entsize; 492 | 493 | for (uint64_t i = 0; i < n_symbols; i++) 494 | { 495 | if (str_eq (se_symbol_name (&symtab[i], elf), name)) 496 | { 497 | return &symtab[i]; 498 | } 499 | } 500 | 501 | return NULL; 502 | } 503 | 504 | const Elf64_Sym * 505 | se_symbol_from_addr (dbg_addr addr, const ElfFile *elf) 506 | { 507 | assert (elf != NULL); 508 | 509 | Elf64_Shdr *symtab_hdr = 510 | &elf->sect_table.headers[elf->sect_table.symtab_idx]; 511 | const Elf64_Sym *symtab = 512 | symtab_at (elf->data.bytes, symtab_hdr->sh_offset); 513 | 514 | uint64_t n_symbols = symtab_hdr->sh_size / symtab_hdr->sh_entsize; 515 | 516 | for (uint64_t i = 0; i < n_symbols; i++) 517 | { 518 | if (se_symbol_start_addr (&symtab[i]).value <= addr.value && 519 | se_symbol_end_addr (&symtab[i]).value >= addr.value) 520 | { 521 | return &symtab[i]; 522 | } 523 | } 524 | 525 | return NULL; 526 | } 527 | 528 | int 529 | se_symbol_binding (const Elf64_Sym *sym) 530 | { 531 | assert (sym != NULL); 532 | return ELF64_ST_BIND (sym->st_info); 533 | } 534 | 535 | int 536 | se_symbol_type (const Elf64_Sym *sym) 537 | { 538 | assert (sym != NULL); 539 | return ELF64_ST_TYPE (sym->st_info); 540 | } 541 | 542 | int 543 | se_symbol_visibility (const Elf64_Sym *sym) 544 | { 545 | assert (sym != NULL); 546 | return sym->st_other; 547 | } 548 | 549 | uint64_t 550 | symbol_value (const Elf64_Sym *sym) 551 | { 552 | assert (sym != NULL); 553 | return sym->st_value; 554 | } 555 | 556 | dbg_addr 557 | se_symbol_start_addr (const Elf64_Sym *sym) 558 | { 559 | assert (sym != NULL); 560 | return (dbg_addr) {sym->st_value}; 561 | } 562 | 563 | dbg_addr 564 | se_symbol_end_addr (const Elf64_Sym *sym) 565 | { 566 | assert (sym != NULL); 567 | /* The symbol's size is the offset from the 568 | * start address if the symbol is a function. */ 569 | return (dbg_addr) { sym->st_value + sym->st_size }; 570 | } 571 | 572 | const char * 573 | se_symbol_name (const Elf64_Sym *sym, const ElfFile *elf) 574 | { 575 | assert (sym != NULL); 576 | assert (elf != NULL); 577 | 578 | Elf64_Shdr *strtab_hdr = 579 | &elf->sect_table.headers[elf->sect_table.strtab_idx]; 580 | const char *strtab = strtab_at (elf->data.bytes, strtab_hdr->sh_offset); 581 | return &strtab[sym->st_name]; 582 | } 583 | -------------------------------------------------------------------------------- /src/spray_elf.h: -------------------------------------------------------------------------------- 1 | /* Parse ELF files and provide relevant info. */ 2 | 3 | #pragma once 4 | 5 | #ifndef _SPARY_SPRAY_ELF_H_ 6 | #define _SPRAY_SPRAY_ELF_H_ 7 | 8 | #define _GNU_SOURCE 9 | 10 | #include "magic.h" 11 | #include "ptrace.h" 12 | 13 | #include 14 | #include 15 | #include 16 | 17 | typedef unsigned char byte; 18 | 19 | typedef enum 20 | { 21 | ELF_TYPE_NONE = ET_NONE, 22 | ELF_TYPE_REL = ET_REL, 23 | ELF_TYPE_EXEC = ET_EXEC, 24 | ELF_TYPE_DYN = ET_DYN, 25 | ELF_TYPE_CORE = ET_CORE, 26 | } ElfType; 27 | 28 | typedef enum 29 | { 30 | ELF_ENDIAN_BIG, 31 | ELF_ENDIAN_LITTLE, 32 | } Endianness; 33 | 34 | /* ELF program header table. */ 35 | typedef struct 36 | { 37 | uint32_t n_headers; 38 | Elf64_Phdr *headers; 39 | } ElfProgTable; 40 | 41 | /* ELF section header table. */ 42 | typedef struct 43 | { 44 | uint32_t n_headers; 45 | /* Symbol table index in `headers`. */ 46 | uint32_t symtab_idx; 47 | /* String table indices in `headers`. */ 48 | uint32_t shstrtab_idx; 49 | uint32_t strtab_idx; 50 | Elf64_Shdr *headers; 51 | } ElfSectTable; 52 | 53 | typedef struct 54 | { 55 | /* Memory-mapped content of file. */ 56 | byte *bytes; 57 | size_t n_bytes; 58 | } ElfData; 59 | 60 | typedef struct 61 | { 62 | ElfType type; 63 | Endianness endianness; 64 | ElfProgTable prog_table; 65 | ElfSectTable sect_table; 66 | ElfData data; 67 | } ElfFile; 68 | 69 | typedef enum 70 | { 71 | ELF_PARSE_OK, 72 | ELF_PARSE_IO_ERR, /* Error during I/O. */ 73 | ELF_PARSE_INVALID, /* Invalid file. */ 74 | ELF_PARSE_DISLIKE, /* Theoretically a valid ELF file but 75 | * some feature used is not supported. */ 76 | } ElfParseResult; 77 | 78 | const char *elf_parse_result_name (ElfParseResult res); 79 | 80 | /* Parse an ELF file and store the info in `elf`. 81 | * Returns `ELF_PARSE_OK` on success. `*elf` might 82 | * be changed even if the result is ultimately an error. */ 83 | ElfParseResult se_parse_elf (const char *filepath, ElfFile * elf); 84 | 85 | /* Returns `SP_ERR` if un-mapping the ELF file didn't work. */ 86 | SprayResult se_free_elf (ElfFile elf); 87 | 88 | 89 | /***************************/ 90 | /* Symbol table interface. */ 91 | /***************************/ 92 | 93 | /* Get the symbol table entry for the symbol name. 94 | * Returns `NULL` in no such symbol was found. */ 95 | const Elf64_Sym *se_symbol_from_name (const char *name, const ElfFile * elf); 96 | 97 | /* Get the symbol table entry for the symbol that 98 | * belongs to the given instruction address. */ 99 | const Elf64_Sym *se_symbol_from_addr (dbg_addr addr, const ElfFile * elf); 100 | 101 | /* Access different fields in a symbol. The way information 102 | * is stored in the different members of a symbol is a bit 103 | * weird so these wrappers make the code more readable. */ 104 | 105 | int se_symbol_binding (const Elf64_Sym * sym); 106 | int se_symbol_type (const Elf64_Sym * sym); 107 | int se_symbol_visibility (const Elf64_Sym * sym); 108 | 109 | /* Get start (low PC) and end (high PC) address of function symbol. 110 | * Return values are meaningless in this context if the symbol is 111 | * not a function. */ 112 | dbg_addr se_symbol_start_addr (const Elf64_Sym * sym); 113 | dbg_addr se_symbol_end_addr (const Elf64_Sym * sym); 114 | 115 | /* Looks up the symbol name in the string table. */ 116 | const char *se_symbol_name (const Elf64_Sym * sym, const ElfFile * elf); 117 | 118 | #endif /* _SPRAY_PARSE_ELF_H_ */ 119 | -------------------------------------------------------------------------------- /src/tokenize.scm: -------------------------------------------------------------------------------- 1 | (cond-expand 2 | ;; Tests are run using `csi` and require this load. 3 | (csi 4 | (load "src/c-syntax.scm")) 5 | (else )) 6 | 7 | (module tokenizer 8 | ;; Take a string (C source code) and create a list of tokens 9 | ;; that represent its syntax. 10 | (tokenize 11 | ;; Turn the given list of syntax tokens into a string. 12 | ;; The string contains ANSI escape codes to represent the 13 | ;; colors if `use-color` is true. 14 | color-tokens) 15 | 16 | (import scheme) 17 | (import (chicken base)) 18 | (import (chicken string)) 19 | (import traversal) 20 | (import (except (srfi-1) assoc member)) 21 | (import (except (srfi-13) string->list string-fill! string-copy)) 22 | (import format) ; `format` 23 | 24 | (import c-tokens) 25 | (import c-regex) 26 | (import c-types) 27 | 28 | ;;; Transform `code-lines` into a list of token 29 | ;;; streams representing the color of each piece 30 | ;;; of code in each line. 31 | (define (tokenize code-lines) 32 | ;; Does `given-str` start with any of the prefixes in `possible-prefixes`? 33 | (define (find-prefix given-str possible-prefixes) 34 | (find 35 | (lambda (possible-prefix) 36 | (string-prefix? possible-prefix given-str)) 37 | possible-prefixes)) 38 | 39 | ;; Predicate for `find-prefix`. 40 | (define (prefix? given-str possible-prefixes) 41 | (if (find-prefix given-str possible-prefixes) 42 | #t #f)) 43 | 44 | (define (starts-with-comment? str) 45 | (prefix? str C-comment)) 46 | 47 | (define (starts-with-line-comment? str) 48 | (prefix? str C++-comment)) 49 | 50 | (define (starts-with-uncomment? str) 51 | (prefix? str C-uncomment)) 52 | 53 | (define (starts-with-keyword? str) 54 | (regex-match? keyword-regex str)) 55 | 56 | (define (starts-with-operator? str) 57 | (prefix? str C-operators)) 58 | 59 | (define (starts-with-special-symbol? str) 60 | (prefix? str C-special-symbols)) 61 | 62 | (define (starts-with-literal? str) 63 | (regex-match? literal-regex str)) 64 | 65 | (define (starts-with-whitespace? str) 66 | (regex-match? whitespace-regex str)) 67 | 68 | (define (starts-with-identifier? str) 69 | (regex-match? identifier-regex str)) 70 | 71 | (define (starts-with-constant? str) 72 | (or (regex-match? hex-constant-regex str) 73 | (regex-match? octal-constant-regex str) 74 | (regex-match? decimal-constant-regex str) 75 | (regex-match? char-constant-regex str) 76 | (regex-match? sci-constant-regex str) 77 | (regex-match? float-constant-regex-frac str) 78 | (regex-match? float-constant-regex-whole str))) 79 | 80 | (define (starts-with-preproc? str) 81 | (regex-match? preproc-directive-regex str)) 82 | 83 | (define (starts-with-any? str) 84 | (regex-match? any-regex str)) 85 | 86 | 87 | ;; NOTE: All scan procedures assume that the corresponding 88 | ;; `starts-with-*?` procedure is called first so as to verify 89 | ;; that the string actually matches the regex. 90 | (define (scan-comment code) 91 | (make-token-list (find-prefix code C-comment) 92 | token-tag-comment)) 93 | 94 | (define (scan-line-comment code) 95 | (make-token-list (find-prefix code C++-comment) 96 | token-tag-comment)) 97 | 98 | (define (scan-line-comment-text code) 99 | (make-token-list (full-match line-comment-text-regex code) 100 | token-tag-comment-text)) 101 | 102 | (define (scan-comment-text code) 103 | (make-token-list (full-match comment-text-regex code) 104 | token-tag-comment-text)) 105 | 106 | (define (scan-uncomment code) 107 | (make-token-list (find-prefix code C-uncomment) 108 | token-tag-uncomment)) 109 | 110 | (define (scan-trailing-uncomment code) 111 | (make-token-list (find-prefix code C-uncomment) 112 | token-tag-trailing-uncomment)) 113 | 114 | (define (scan-keyword code) 115 | (make-token-list (find-prefix code C-keywords) 116 | token-tag-keyword)) 117 | 118 | (define (scan-operator code) 119 | (make-token-list (find-prefix code C-operators) 120 | token-tag-operator)) 121 | 122 | (define (scan-special-symbol code) 123 | (make-token-list (find-prefix code C-special-symbols) 124 | token-tag-special-symbol)) 125 | 126 | (define (scan-literal code) 127 | (make-token-list (full-match literal-regex code) 128 | token-tag-literal)) 129 | 130 | (define (scan-whitespace code) 131 | (make-token-list (full-match whitespace-regex code) 132 | token-tag-whitespace)) 133 | 134 | (define (scan-identifier code) 135 | ;; Check if `identifier` is the identifier of a type. 136 | (define (type-identifier? identifier) 137 | (find 138 | (lambda (type) 139 | (string=? type identifier)) 140 | C-builtin-types)) 141 | (let ((match (full-match identifier-regex code))) 142 | (make-token-list 143 | match 144 | (if (type-identifier? match) 145 | token-tag-prim-type 146 | token-tag-identifier)))) 147 | 148 | (define (scan-constant code) 149 | (make-token-list 150 | (cond ((regex-match? hex-constant-regex code) 151 | (full-match hex-constant-regex code)) 152 | ((regex-match? octal-constant-regex code) 153 | (full-match octal-constant-regex code)) 154 | ((regex-match? decimal-constant-regex code) 155 | (full-match decimal-constant-regex code)) 156 | ((regex-match? char-constant-regex code) 157 | (full-match char-constant-regex code)) 158 | ((regex-match? sci-constant-regex code) 159 | (full-match sci-constant-regex code)) 160 | ((regex-match? float-constant-regex-frac code) 161 | (full-match float-constant-regex-frac code)) 162 | ((regex-match? float-constant-regex-whole code) 163 | (full-match float-constant-regex-whole code)) 164 | (else 165 | (error "scan-constant, expected to find match" code))) 166 | token-tag-constant)) 167 | 168 | (define (scan-preproc code) 169 | (define (include-match? matches) 170 | (and (equal? "#include" 171 | (cadr matches)) 172 | (cadddr matches))) 173 | 174 | (let ((matches (regex-matches preproc-directive-regex code))) 175 | (if (include-match? matches) 176 | (let ((directive (cadr matches)) 177 | (whitespace (caddr matches)) 178 | (filepath (cadddr matches))) 179 | (list (make-token directive 180 | token-tag-preproc-directive) 181 | (make-token whitespace 182 | token-tag-whitespace) 183 | (make-token filepath 184 | token-tag-include-filepath))) 185 | (let ((directive (cadr matches))) 186 | (make-token-list directive 187 | token-tag-preproc-directive))))) 188 | 189 | (define (scan-any code) 190 | (make-token-list (full-match any-regex code) 191 | token-tag-other)) 192 | 193 | (define (scan-normal-mode code) 194 | (cond 195 | ((starts-with-uncomment? code) 196 | (scan-trailing-uncomment code)) 197 | ((starts-with-keyword? code) 198 | (scan-keyword code)) 199 | ((starts-with-operator? code) 200 | (scan-operator code)) 201 | ((starts-with-special-symbol? code) 202 | (scan-special-symbol code)) 203 | ((starts-with-literal? code) 204 | (scan-literal code)) 205 | ((starts-with-whitespace? code) 206 | (scan-whitespace code)) 207 | ((starts-with-identifier? code) 208 | (scan-identifier code)) 209 | ((starts-with-constant? code) 210 | (scan-constant code)) 211 | ((starts-with-preproc? code) 212 | (scan-preproc code)) 213 | ((starts-with-any? code) 214 | (scan-any code)) 215 | (else 216 | (error "scan, invalid input" code)))) 217 | 218 | ;;; Scan the next token in the code. 219 | (define scan 220 | (let ((mode 'normal-mode)) 221 | (lambda (code new-line?) 222 | ;; Implicitly end single-line comment. 223 | (if (and (eq? mode 'line-comment-mode) 224 | new-line?) 225 | (set! mode 'normal-mode)) 226 | 227 | (cond 228 | ((string-null? code) 229 | '()) 230 | ((eq? mode 'normal-mode) 231 | (cond 232 | ((starts-with-comment? code) 233 | (begin 234 | ;; Begin block comment. 235 | (set! mode 'comment-mode) 236 | (scan-comment code))) 237 | ((starts-with-line-comment? code) 238 | (begin 239 | ;; Begin single-line comment. 240 | (set! mode 'line-comment-mode) 241 | (scan-line-comment code))) 242 | (else 243 | ;; Scan normal code. 244 | (scan-normal-mode code)))) 245 | ((eq? mode 'comment-mode) 246 | (if (starts-with-uncomment? code) 247 | (begin 248 | ;; Explicitly end multi-line comment. 249 | (set! mode 'normal-mode) 250 | (scan-uncomment code)) 251 | ;; Eat-up the block comment. 252 | (scan-comment-text code))) 253 | ((eq? mode 'line-comment-mode) 254 | (scan-line-comment-text code)))))) 255 | 256 | ;;; Return the next token in the code. 257 | (define next-token 258 | ;; Queue of tokens to be returned before scanning the next token. 259 | (let ((token-queue '())) 260 | (lambda (code new-line?) 261 | (if (null? token-queue) 262 | (let ((new-tokens (scan code new-line?))) 263 | (if (null? new-tokens) 264 | (make-end-token) ; Signal that input is over. 265 | (begin 266 | (set! token-queue (cdr new-tokens)) 267 | (car new-tokens)))) 268 | (let ((this-token (car token-queue))) 269 | (set! token-queue (cdr token-queue)) 270 | this-token))))) 271 | 272 | ;;; Return the rest of `str` after removing 273 | ;;; `(string-length cutoff)` characters from its start. 274 | (define (string-cutoff str cutoff) 275 | (substring str (string-length cutoff) (string-length str))) 276 | 277 | ;;; Colorize the given piece of code by splitting it into tokens. 278 | (define (tokenize-code code tokens new-line?) 279 | (let ((token (next-token code new-line?))) 280 | (if (end-token? token) 281 | (reverse tokens) 282 | (tokenize-code 283 | (string-cutoff code (token-text token)) 284 | (cons token tokens) 285 | ;; `new-line?` may only be set to true 286 | ;; by an external caller. 287 | #f)))) 288 | 289 | ;;; Sometimes comments begin outside of the given piece of 290 | ;;; source code. Then there is a trailing `*/` somewhere at 291 | ;;; the start. This procedure includes anything up to that `*/` 292 | ;;; in the comment. 293 | (define (wrap-leading-comment token-lines) 294 | (define (lead-comment? token-lines) 295 | (find 296 | (lambda (tag) 297 | (eq? tag token-tag-trailing-uncomment)) 298 | (map token-tag (flatten token-lines)))) 299 | 300 | (define (make-lead-end tokens) 301 | (cons 'lead-comment-end tokens)) 302 | 303 | (define (make-lead-line token) 304 | (cons 'lead-comment-line token)) 305 | 306 | (define (lead-end? lead-line) 307 | (and (pair? lead-line) 308 | (eq? (car lead-line) 'lead-comment-end))) 309 | 310 | (define (lead-tokens lead-line) 311 | (if (and (pair? lead-line) 312 | (or (eq? (car lead-line) 313 | 'lead-comment-end) 314 | (eq? (car lead-line) 315 | 'lead-comment-line))) 316 | (cdr lead-line) 317 | (error "lead-line-tokens, not a lead comment line" 318 | lead-line))) 319 | 320 | (define (wrap-leading-comment-line line) 321 | (let tokens-loop ((ext-str "") 322 | (rest-tokens line)) 323 | (cond ((null? rest-tokens) 324 | (make-lead-line 325 | (make-token-list ext-str token-tag-comment-text))) 326 | ((eq? (token-tag (car rest-tokens)) 327 | token-tag-trailing-uncomment) 328 | (make-lead-end 329 | (cons (make-token ext-str token-tag-comment-text) 330 | rest-tokens))) 331 | (else 332 | (tokens-loop 333 | (conc ext-str (token-text (car rest-tokens))) 334 | (cdr rest-tokens)))))) 335 | 336 | (if (lead-comment? token-lines) 337 | (let lines-loop ((ext-lines '()) 338 | (rest-lines token-lines)) 339 | ;; Don't have to check if `rest-lines` is null 340 | ;; because `wrap-leading-comment-line` will return a 341 | ;; pair before `rest-lines` ends if `lead-comment?` 342 | ;; was true. 343 | (let ((lead-line 344 | (wrap-leading-comment-line (car rest-lines)))) 345 | (if (lead-end? lead-line) 346 | (append 347 | (reverse 348 | (cons (lead-tokens lead-line) 349 | ext-lines)) 350 | (cdr rest-lines)) 351 | (lines-loop 352 | (cons (lead-tokens lead-line) 353 | ext-lines) 354 | (cdr rest-lines))))) 355 | token-lines)) 356 | 357 | (wrap-leading-comment 358 | (map 359 | (lambda (code-line) 360 | (tokenize-code code-line '() #t)) 361 | code-lines)) 362 | ) ; End procedure tokenize. 363 | 364 | (define (color-tokens token-lines types-env start-lineno active-lineno use-color) 365 | (define (def-color color) 366 | (string-append "\033[" color "m")) 367 | 368 | (define literal-color (def-color "31")) 369 | (define type-color (def-color "32")) 370 | (define operator-color (def-color "33")) 371 | (define constant-color (def-color "34")) 372 | (define keyword-color (def-color "35")) 373 | (define comment-color (def-color "96")) 374 | (define no-color (def-color "0")) 375 | (define nothing "") 376 | 377 | (define (comment-tag? tag) 378 | (or (eq? tag token-tag-comment) 379 | (eq? tag token-tag-comment-text) 380 | (eq? tag token-tag-uncomment) 381 | (eq? tag token-tag-trailing-uncomment))) 382 | 383 | (define (pick-before-color token) 384 | (let ((tag (token-tag token)) 385 | (text (token-text token))) 386 | (cond ((eq? tag token-tag-keyword) keyword-color) 387 | ((eq? tag token-tag-preproc-directive) keyword-color) 388 | ((eq? tag token-tag-operator) operator-color) 389 | ((eq? tag token-tag-prim-type) type-color) 390 | ((eq? tag token-tag-literal) literal-color) 391 | ((eq? tag token-tag-constant) constant-color) 392 | ((eq? tag token-tag-identifier) 393 | (if (is-type-in-env? types-env text) 394 | type-color 395 | nothing)) 396 | ((comment-tag? tag) comment-color) 397 | (else nothing)))) 398 | 399 | (define (before-color token) 400 | (if use-color 401 | (pick-before-color token) 402 | nothing)) 403 | 404 | (define (after-color) 405 | (if use-color 406 | no-color 407 | nothing)) 408 | 409 | (define (format-token token) 410 | (conc (before-color token) 411 | (token-text token) 412 | (after-color))) 413 | 414 | (define (accumulate-strings strs) 415 | (foldr conc "" strs)) 416 | 417 | (define (format-tokens tokens) 418 | (accumulate-strings 419 | (map format-token tokens))) 420 | 421 | ;;; Check if `tokens` contains any non-whitespace text. 422 | (define (visible-content? tokens) 423 | (find 424 | (lambda (token-text-chars) 425 | (not (null? 426 | (filter 427 | (lambda (char) 428 | (not (char-whitespace? char))) 429 | token-text-chars)))) 430 | (map (lambda (token) 431 | (string->list (token-text token))) 432 | tokens))) 433 | 434 | (define (format-lineno tokens offset) 435 | (let ((current-lineno (+ offset start-lineno))) 436 | (define (highlight-active-lineno) 437 | (cond ((= current-lineno active-lineno) 438 | " -> ") 439 | ((visible-content? tokens) 440 | " ") 441 | (else 442 | ""))) 443 | 444 | (conc (format #f " ~4d" current-lineno) 445 | (highlight-active-lineno)))) 446 | 447 | (accumulate-strings 448 | (map-indexed 449 | (lambda (token-line idx) 450 | (conc 451 | (format-lineno token-line idx) 452 | (format-tokens token-line) 453 | "\n")) 454 | token-lines))) ; End procedure color-tokens. 455 | ) ; End module tokenizer. 456 | -------------------------------------------------------------------------------- /tests/assets/Makefile: -------------------------------------------------------------------------------- 1 | CC = clang 2 | CFLAGS = -g 3 | 4 | .PHONY = all clean 5 | 6 | SIMPLE = simple.c 7 | NESTED = nested_functions.c 8 | MULTI_FILE = multi-file/file1.c multi-file/file2.c 9 | EXTERN_VARIABLES = extern-variables/main.c extern-variables/second_file.c extern-variables/first_file.c extern-variables/third_file.c 10 | INCLUDE_VARIABLE = include-variable/main.c 11 | PRINT_ARGS = print_args.c 12 | COMMENTED = commented.c 13 | CUSTOM_TYPES = custom_types.c 14 | VARIABLES = recurring_variables.c 15 | POINTERS = pointers.c 16 | TYPE_EXAMPLES = type_examples.c 17 | MANY_FILES = many-files/foo1.c many-files/foo2.c many-files/main.c 18 | DEREF_POINTERS = deref_pointers.c 19 | TARGETS = 64bit-linux-simple.bin 32bit-linux-simple.bin nested-functions.bin multi-file.bin print-args.bin frame-pointer-nested-functions.bin no-frame-pointer-nested-functions.bin commented.bin custom-types.bin recurring-variables.bin pointers.bin extern-variables.bin include-variable.bin wrong-compiler.bin type-examples.bin many-files.bin deref_pointers.bin 20 | 21 | all: $(TARGETS) 22 | 23 | 64bit-linux-simple.bin: $(SIMPLE) 24 | $(CC) $(CFLAGS) $< -o $@ 25 | 32bit-linux-simple.bin: $(SIMPLE) 26 | $(CC) $(CFLAGS) -m32 $< -o $@ 27 | nested-functions.bin: $(NESTED) 28 | $(CC) $(CFLAGS) $< -o $@ 29 | recurring-variables.bin: $(VARIABLES) 30 | $(CC) $(CFLAGS) $< -o $@ 31 | frame-pointer-nested-functions.bin: CFLAGS += -fno-omit-frame-pointer 32 | frame-pointer-nested-functions.bin: $(NESTED) 33 | $(CC) $(CFLAGS) $< -o $@ 34 | no-frame-pointer-nested-functions.bin: CFLAGS += -fomit-frame-pointer 35 | no-frame-pointer-nested-functions.bin: $(NESTED) 36 | $(CC) $(CFLAGS) $< -o $@ 37 | multi-file.bin: $(MULTI_FILE) 38 | $(CC) $(CFLAGS) $(MULTI_FILE) -o $@ 39 | print-args.bin: $(PRINT_ARGS) 40 | $(CC) $(CFLAGS) $< -o $@ 41 | commented.bin: $(COMMENTED) 42 | $(CC) $(CFLAGS) $< -o $@ 43 | custom-types.bin: $(CUSTOM_TYPES) 44 | $(CC) $(CFLAGS) $< -o $@ 45 | pointers.bin: $(POINTERS) 46 | $(CC) $(CFLAGS) $< -o $@ 47 | extern-variables.bin: $(EXTERN_VARIABLES) 48 | $(CC) $(CFLAGS) $(EXTERN_VARIABLES) -o $@ 49 | include-variable.bin: $(INCLUDE_VARIABLE) 50 | $(CC) $(CFLAGS) $< -o $@ 51 | wrong-compiler.bin: $(SIMPLE) 52 | gcc $(CFLAGS) $< -o $@ 53 | type-examples.bin: $(TYPE_EXAMPLES) 54 | $(CC) $(CFLAGS) $< -o $@ 55 | many-files.bin: $(MANY_FILES) 56 | $(CC) $(CFLAGS) $(MANY_FILES) -o $@ 57 | deref_pointers.bin: $(DEREF_POINTERS) 58 | $(CC) $(CFLAGS) $< -o $@ 59 | 60 | clean: 61 | $(RM) $(TARGETS) 62 | 63 | -------------------------------------------------------------------------------- /tests/assets/commented.c: -------------------------------------------------------------------------------- 1 | #include 2 | /* 3 | 4 | I start outside the text that's printed. 5 | and I span more than one line. */ 6 | int main(void) { /* blah! */ 7 | printf("blah\n"); // This C++ style comment can contain this */ or that /*. 8 | int a = 7; 9 | /* This comment ends outside the printed text 10 | and spans multiple lines, too. 11 | 12 | */ 13 | return 0; 14 | } 15 | 16 | -------------------------------------------------------------------------------- /tests/assets/custom_types.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | struct Rational { 4 | int numer; 5 | int denom; 6 | }; 7 | 8 | void print_rat(struct Rational rat) { 9 | printf("%d / %d\n", rat.numer, rat.denom); 10 | } 11 | 12 | /* `breakpoints` starts with the keyword `break`. 13 | The syntax-highlighter must get confused by it. */ 14 | struct breakpoints { 15 | char *blah; 16 | }; 17 | 18 | int main(void) { 19 | struct Rational rat = (struct Rational) { 5, 3 }; 20 | rat.numer = 9; 21 | printf("The numerator is: %d\n", rat.numer); 22 | print_rat(rat); 23 | struct breakpoints bp = { "hey!" }; 24 | return 0; 25 | } 26 | 27 | -------------------------------------------------------------------------------- /tests/assets/deref_pointers.c: -------------------------------------------------------------------------------- 1 | int main(void) { 2 | int i = 42; 3 | int *ip = &i; 4 | /* Some value likely to dereference to something: */ 5 | long ptr = (long) ip; 6 | char *x = "This is a test"; 7 | return x[0]; 8 | } 9 | -------------------------------------------------------------------------------- /tests/assets/extern-variables/first_file.c: -------------------------------------------------------------------------------- 1 | int blah_int1 = 42; 2 | -------------------------------------------------------------------------------- /tests/assets/extern-variables/main.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | /* 4 | The files in this directory were created to test 5 | that the file path to the file where some variable 6 | was declared is found correctly. 7 | */ 8 | 9 | extern int blah_int1; /* Declared in first_file.c */ 10 | extern int blah_int_another; /* Declared in third_file.c */ 11 | extern int blah_int2; /* Declared in second_file.c */ 12 | int my_own_int = 8; 13 | 14 | int main(void) { 15 | int sum = blah_int1 + my_own_int + blah_int2 + blah_int_another; 16 | printf("sum: %d\n", sum); 17 | return 0; 18 | } 19 | -------------------------------------------------------------------------------- /tests/assets/extern-variables/second_file.c: -------------------------------------------------------------------------------- 1 | int blah_int2 = 16; 2 | -------------------------------------------------------------------------------- /tests/assets/extern-variables/third_file.c: -------------------------------------------------------------------------------- 1 | int blah_int_another = 100; 2 | -------------------------------------------------------------------------------- /tests/assets/include-variable/header.h: -------------------------------------------------------------------------------- 1 | int blah = 4; 2 | -------------------------------------------------------------------------------- /tests/assets/include-variable/main.c: -------------------------------------------------------------------------------- 1 | #include "header.h" 2 | 3 | /* The executable compiled from this file and `header.h` 4 | * contains a single CU with multiple files (`main.c` and 5 | * `header.h`) in the line table header. It's used to test 6 | * retrieving the place were a variable was declared. */ 7 | 8 | int here = 9; 9 | 10 | int main(void) { 11 | int sum = here + blah; 12 | return sum; 13 | } 14 | -------------------------------------------------------------------------------- /tests/assets/many-files/bar1.h: -------------------------------------------------------------------------------- 1 | int bar1 = 9; 2 | -------------------------------------------------------------------------------- /tests/assets/many-files/bar2.h: -------------------------------------------------------------------------------- 1 | int bar2 = 10; 2 | -------------------------------------------------------------------------------- /tests/assets/many-files/baz.h: -------------------------------------------------------------------------------- 1 | int baz = 12345; 2 | -------------------------------------------------------------------------------- /tests/assets/many-files/foo1.c: -------------------------------------------------------------------------------- 1 | #include "foo1.h" 2 | 3 | int 4 | foo1 (void) 5 | { 6 | return 7; 7 | } 8 | -------------------------------------------------------------------------------- /tests/assets/many-files/foo1.h: -------------------------------------------------------------------------------- 1 | #ifndef _FOO1 2 | #define _FOO1 3 | 4 | int foo1 (void); 5 | 6 | #endif 7 | -------------------------------------------------------------------------------- /tests/assets/many-files/foo2.c: -------------------------------------------------------------------------------- 1 | #include "foo2.h" 2 | 3 | #include "baz.h" 4 | 5 | int 6 | foo2 (void) 7 | { 8 | return baz; 9 | } 10 | -------------------------------------------------------------------------------- /tests/assets/many-files/foo2.h: -------------------------------------------------------------------------------- 1 | #ifndef _FOO2 2 | #define _FOO2 3 | 4 | int foo2 (void); 5 | 6 | #endif 7 | -------------------------------------------------------------------------------- /tests/assets/many-files/main.c: -------------------------------------------------------------------------------- 1 | #include "foo1.h" 2 | #include "foo2.h" 3 | #include "bar1.h" 4 | #include "bar2.h" 5 | 6 | /* A binary comprised of a number of files 7 | * used to test the names of all files used 8 | * in a binary. 9 | * The main.c CU's line header table should 10 | * contain the file names bar1.h, bar2.h and 11 | * main.c. The line header tables for foo1.c 12 | * should contain only foo 13 | */ 14 | 15 | int 16 | main (void) 17 | { 18 | return bar1 + bar2 + foo1 () + foo2 (); 19 | } 20 | 21 | -------------------------------------------------------------------------------- /tests/assets/multi-file/file1.c: -------------------------------------------------------------------------------- 1 | #include "file2.h" 2 | 3 | int file1_compute_something(int n) { 4 | int i = 0; 5 | int acc = 0; 6 | while (i < n) { 7 | acc += i * i; 8 | i ++; 9 | } 10 | return acc; 11 | } 12 | 13 | int main(void) { 14 | int num1 = file1_compute_something(3); 15 | int num2 = file2_compute_something(num1); 16 | (void) (num1 + num2); 17 | struct Blah blah = file2_init_blah(4); 18 | (void) blah; 19 | return 0; 20 | } 21 | -------------------------------------------------------------------------------- /tests/assets/multi-file/file2.c: -------------------------------------------------------------------------------- 1 | #include "file2.h" 2 | 3 | int file2_compute_something(int n) { 4 | if (n < 2) { 5 | return n; 6 | } else { 7 | return file2_compute_something(n - 1) 8 | + file2_compute_something(n - 2); 9 | } 10 | } 11 | 12 | struct Blah file2_init_blah(int x) { 13 | return (struct Blah) { x }; 14 | } 15 | -------------------------------------------------------------------------------- /tests/assets/multi-file/file2.h: -------------------------------------------------------------------------------- 1 | #ifndef _FILE2_H_ 2 | #define _FILE2_H_ 3 | 4 | int file2_compute_something(int n); 5 | 6 | struct Blah { 7 | int x; 8 | }; 9 | 10 | struct Blah file2_init_blah(int x); 11 | 12 | #endif // _FILE2_H_ 13 | -------------------------------------------------------------------------------- /tests/assets/nested_functions.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int add(int a, int b) { 4 | int c = a + b; 5 | return c; 6 | } 7 | 8 | int mul(int a, int b) { 9 | int acc = 0; 10 | for (int i = 0; i < b; i++) { 11 | acc = add(acc, a); 12 | } 13 | return acc; 14 | } 15 | 16 | int main(void) { 17 | int product = mul(9, 3); 18 | int sum = add(product, 6); 19 | printf("Product: %d; Sum: %d\n", product, sum); 20 | return 0; 21 | } 22 | 23 | -------------------------------------------------------------------------------- /tests/assets/pointers.c: -------------------------------------------------------------------------------- 1 | // This file is used to test the debug information 2 | // generated to describe pointer variables. 3 | 4 | int deref_add(int *a, int *b) { 5 | int _a = *a; 6 | int _b = *b; 7 | return _a + _b; 8 | } 9 | 10 | void ptr_inc(int *inc) { 11 | *inc += 1; 12 | } 13 | 14 | int main(void) { 15 | int main_a = 9; 16 | int main_b = 18; 17 | int main_sum = deref_add(&main_a, &main_b); 18 | 19 | // Here the value of `main_sum` is increased by 1. 20 | ptr_inc(&main_sum); 21 | return 0; 22 | } 23 | 24 | -------------------------------------------------------------------------------- /tests/assets/print_args.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main(int argc, char *argv[]) { 4 | printf("Command line arguments: "); 5 | 6 | for (int i = 0; i < argc; i++) { 7 | printf("%s", argv[i]); 8 | if (i + 1 < argc) { 9 | printf(" "); 10 | } else { 11 | printf("\n"); 12 | } 13 | } 14 | 15 | return 0; 16 | } 17 | -------------------------------------------------------------------------------- /tests/assets/recurring_variables.c: -------------------------------------------------------------------------------- 1 | const int a = 3; /* Global variable. */ 2 | 3 | long blah(long b, long c) { 4 | if (b > c) { 5 | return b - c; 6 | } else { 7 | return c - b; 8 | } 9 | } 10 | 11 | int main(void) { 12 | int a = 19; 13 | long x = 0; 14 | long b = 5; 15 | long c = 9; 16 | c = blah(b, c); 17 | return 0; 18 | } 19 | -------------------------------------------------------------------------------- /tests/assets/simple.c: -------------------------------------------------------------------------------- 1 | int weird_sum(int a, 2 | int b) { 3 | int c = a + 1; 4 | int d = b + 2; 5 | int e = c + d; 6 | return e; 7 | } 8 | 9 | int main(void) { 10 | int a = 7; 11 | int b = 11; 12 | int c = weird_sum(a, b); 13 | return 0; 14 | } 15 | -------------------------------------------------------------------------------- /tests/assets/type_examples.c: -------------------------------------------------------------------------------- 1 | int a = 1; 2 | const long b = 2; 3 | void *c = (void *) 3; 4 | long long *d = (long long *) 4; 5 | const unsigned *e = (const unsigned *) 5; 6 | int *const f = (int *const) 6; 7 | volatile const char *restrict const g = (const char *const)7; 8 | 9 | 10 | int main(void) { 11 | char h = 'a'; 12 | unsigned char i = 'b'; 13 | signed char j = 'c'; 14 | const char k = 'd'; 15 | const unsigned char l = 'e'; 16 | const signed char m = 'f'; 17 | unsigned long long n = (unsigned long long) 1 << 63; 18 | 19 | typedef long i64; 20 | i64 o = -123456789; 21 | typedef unsigned char byte; 22 | byte p = 0xff; 23 | 24 | return 0; 25 | } 26 | -------------------------------------------------------------------------------- /tests/c-types.scm: -------------------------------------------------------------------------------- 1 | (load "src/tokenize.scm") 2 | (load "src/c-syntax.scm") 3 | 4 | (import test 5 | c-types 6 | tokenizer) 7 | 8 | (test-group "(c-types)" 9 | (test-assert "check finds single struct type" 10 | (is-type-in-env? (make-types-env (tokenize (list "struct a {};"))) "a")) 11 | (test-assert "check finds single union type" 12 | (is-type-in-env? (make-types-env (tokenize (list "union a {};"))) "a")) 13 | (test-assert "check finds single enum type" 14 | (is-type-in-env? (make-types-env (tokenize (list "enum a {};"))) "a")) 15 | 16 | (test "single struct type" 17 | (list '*env* "a") 18 | (make-types-env (tokenize (list "struct a {};")))) 19 | (test "single union type" 20 | (list '*env* "a") 21 | (make-types-env (tokenize (list "union a {};")))) 22 | (test "single enum type" 23 | (list '*env* "a") 24 | (make-types-env (tokenize (list "enum a {};")))) 25 | 26 | (test-assert "finds in multiple types" 27 | (is-type-in-env? 28 | (make-types-env (tokenize (list "enum a {}; union b {}; struct c {}'"))) 29 | "c")) 30 | (test "multiple types" 31 | (list '*env* "a" "b" "c") 32 | (make-types-env (tokenize (list "enum a {}; union b {}; struct c {}'")))) 33 | ;; End of test-group c-types 34 | ) 35 | 36 | (test-exit) 37 | -------------------------------------------------------------------------------- /tests/debugger.c: -------------------------------------------------------------------------------- 1 | #include "test_utils.h" 2 | 3 | #include "../src/breakpoints.h" 4 | #define UNIT_TESTS 5 | #include "../src/debugger.h" 6 | 7 | TEST (breakpoints_work) 8 | { 9 | Debugger dbg; 10 | char *prog_argv[] = { SIMPLE_64BIT_BIN, NULL }; 11 | assert_int (setup_debugger (prog_argv[0], prog_argv, &dbg), ==, 0); 12 | 13 | real_addr bp_addr1 = { 0x00401122 }; 14 | 15 | enable_breakpoint (dbg.breakpoints, bp_addr1); 16 | assert_true (lookup_breakpoint (dbg.breakpoints, bp_addr1)); 17 | 18 | disable_breakpoint (dbg.breakpoints, bp_addr1); 19 | assert_false (lookup_breakpoint (dbg.breakpoints, bp_addr1)); 20 | 21 | del_debugger (dbg); 22 | 23 | return MUNIT_OK; 24 | } 25 | 26 | #define TEST_VARLOC(test_name, bin_name, var_name, pc_value, expect) \ 27 | TEST ((test_name)) { \ 28 | Debugger dbg; \ 29 | char *prog_argv[] = {(bin_name), NULL}; \ 30 | assert_int(setup_debugger(prog_argv[0], prog_argv, &dbg), ==, 0); \ 31 | \ 32 | dbg_addr pc = {(pc_value)}; \ 33 | \ 34 | enable_breakpoint(dbg.breakpoints, dbg_to_real(dbg.load_address, pc)); \ 35 | ExecResult exec_res = continue_execution(&dbg); \ 36 | assert_int(exec_res.type, ==, SP_OK); \ 37 | ExecResult wait_res = wait_for_signal(&dbg); \ 38 | assert_int(wait_res.type, ==, SP_OK); \ 39 | \ 40 | RuntimeVariable *var = init_var(pc, \ 41 | dbg.load_address, \ 42 | (var_name), \ 43 | dbg.pid, \ 44 | dbg.info); \ 45 | assert_ptr_not_null(var); \ 46 | assert_true(is_addr_loc(var)); \ 47 | real_addr loc_addr = var_loc_addr(var); \ 48 | del_var(var); \ 49 | \ 50 | uint64_t value = 0; \ 51 | SprayResult mem_res = pt_read_memory(dbg.pid, loc_addr, &value); \ 52 | assert_int(mem_res, ==, SP_OK); \ 53 | \ 54 | assert_int(value, ==, (expect)); \ 55 | \ 56 | del_debugger(dbg); \ 57 | \ 58 | return MUNIT_OK; \ 59 | } 60 | 61 | /* Stack variable declared in the function body. */ 62 | TEST_VARLOC (varloc_fbreg_works0, SIMPLE_64BIT_BIN, "a", 0x401163, 7) 63 | /* Stack variable passed as a function parameter. */ 64 | TEST_VARLOC (varloc_fbreg_works1, RECURRING_VARIABLES_BIN, "c", 0x401124, 9) 65 | /* Global variable. */ 66 | TEST_VARLOC (varloc_addr_works, RECURRING_VARIABLES_BIN, "a", 0x401124, 3) 67 | extern SprayResult is_file_with_line (const char *file_line); 68 | 69 | TEST (file_line_check_works) 70 | { 71 | SprayResult res = is_file_with_line ("this/is/a/file:2578"); 72 | assert_int (res, ==, SP_OK); 73 | 74 | res = is_file_with_line ("this/is/a/filename/without/a/line"); 75 | assert_int (res, ==, SP_ERR); 76 | 77 | res = is_file_with_line ("710985"); 78 | assert_int (res, ==, SP_ERR); 79 | 80 | res = is_file_with_line ("src/blah/test.c74"); 81 | assert_int (res, ==, SP_ERR); 82 | 83 | return MUNIT_OK; 84 | } 85 | 86 | extern SprayResult is_valid_identifier (const char *func_name); 87 | 88 | TEST (function_name_check_works) 89 | { 90 | bool is_valid = is_valid_identifier ("function_name_check_works1203"); 91 | assert_true (is_valid); 92 | 93 | is_valid = is_valid_identifier ("785019blah_function"); // Starts with numbers. 94 | assert_false (is_valid); 95 | 96 | is_valid = is_valid_identifier ("check-function-name"); // Kebab case. 97 | assert_false (is_valid); 98 | 99 | is_valid = is_valid_identifier ("check>function!>name"); // Other symbols. 100 | assert_false (is_valid); 101 | 102 | return MUNIT_OK; 103 | } 104 | 105 | MunitTest debugger_tests[] = { 106 | REG_TEST (breakpoints_work), 107 | REG_TEST (file_line_check_works), 108 | REG_TEST (function_name_check_works), 109 | REG_TEST (varloc_fbreg_works0), 110 | REG_TEST (varloc_fbreg_works1), 111 | REG_TEST (varloc_addr_works), 112 | {NULL, NULL, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL} 113 | }; 114 | -------------------------------------------------------------------------------- /tests/dwarf.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "test_utils.h" 4 | 5 | #define UNIT_TESTS 6 | #include "../src/info.h" 7 | #include "../src/spray_dwarf.h" 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | enum 14 | { 15 | RAND_DATA_BUF_SIZE = 32, 16 | }; 17 | 18 | TEST (get_line_entry_from_pc_works) 19 | { 20 | Dwarf_Error error = NULL; 21 | Dwarf_Debug dbg = sd_dwarf_init (SIMPLE_64BIT_BIN, &error); 22 | assert_ptr_not_null (dbg); 23 | 24 | { /* Happy path. */ 25 | dbg_addr pc = { 0x00401156 }; 26 | LineEntry line_entry = sd_line_entry_from_pc (dbg, pc); 27 | assert_true (line_entry.is_ok); 28 | assert_int (line_entry.ln, ==, 11); 29 | assert_int (line_entry.cl, ==, 7); 30 | assert_ptr_not_null (line_entry.filepath); 31 | /* Ignore the part of the filepath that is host specific. */ 32 | assert_ptr_not_null (strstr (line_entry.filepath, SIMPLE_SRC)); 33 | } 34 | { /* Sad path 😢. */ 35 | dbg_addr pc = { 0xdeabbeef }; 36 | LineEntry line_entry = sd_line_entry_from_pc (dbg, pc); 37 | assert_false (line_entry.is_ok); 38 | assert_ptr_equal (line_entry.filepath, NULL); 39 | } 40 | 41 | dwarf_finish (dbg); 42 | return MUNIT_OK; 43 | } 44 | 45 | SprayResult 46 | callback__store_line (LineEntry *line, void *const void_data) 47 | { 48 | assert (line != NULL); 49 | assert (void_data != NULL); 50 | 51 | static int i = 0; 52 | unsigned *lines = (unsigned *) void_data; 53 | assert (i < 5); 54 | lines[i++] = line->ln; 55 | 56 | return SP_OK; 57 | } 58 | 59 | TEST (iterating_lines_works) 60 | { 61 | Dwarf_Error error = NULL; 62 | Dwarf_Debug dbg = sd_dwarf_init (SIMPLE_64BIT_BIN, &error); 63 | assert_ptr_not_null (dbg); 64 | 65 | unsigned lines[5]; 66 | 67 | char *filepath = realpath (SIMPLE_SRC, NULL); 68 | sd_for_each_line (dbg, "main", filepath, callback__store_line, &lines); 69 | dwarf_finish (dbg); 70 | free (filepath); 71 | 72 | unsigned expect[5] = { 9, 10, 11, 12, 13 }; 73 | assert_memory_equal (sizeof (unsigned[5]), lines, expect); 74 | 75 | return MUNIT_OK; 76 | } 77 | 78 | bool 79 | callback__test_search (Dwarf_Debug dbg, 80 | Dwarf_Die die, 81 | SearchFor search_for, SearchFindings search_findings) 82 | { 83 | assert (dbg != NULL); 84 | assert (die != NULL); 85 | 86 | const char *const fn_name = (char *) search_for.data; 87 | if (sd_is_subprog_with_name (dbg, die, fn_name)) 88 | { 89 | unsigned *level = (unsigned *) search_findings.data; 90 | *level = search_for.level; 91 | return true; 92 | } 93 | else 94 | { 95 | return false; 96 | } 97 | } 98 | 99 | TEST (search_returns_the_correct_result) 100 | { 101 | Dwarf_Error error = NULL; 102 | Dwarf_Debug dbg = sd_dwarf_init (SIMPLE_64BIT_BIN, &error); 103 | assert_ptr_not_null (dbg); 104 | int res = DW_DLV_OK; 105 | 106 | res = sd_search_dwarf_dbg (dbg, 107 | &error, 108 | callback__test_search, 109 | "this_function_name_does_not_exist", NULL); 110 | assert_int (res, ==, DW_DLV_NO_ENTRY); 111 | 112 | unsigned found_at_level = -1; /* Not a valid level. */ 113 | res = sd_search_dwarf_dbg (dbg, &error, callback__test_search, "main", // <- This does exist. 114 | &found_at_level); 115 | assert_int (res, ==, DW_DLV_OK); 116 | assert_int (found_at_level, ==, 1); 117 | 118 | dwarf_finish (dbg); 119 | 120 | return MUNIT_OK; 121 | } 122 | 123 | SprayResult 124 | test_get_effective_start_addr (Dwarf_Debug dbg, 125 | const DebugSymbol *sym, dbg_addr *dest) 126 | { 127 | return sd_effective_start_addr (dbg, sym_start_addr (sym), 128 | sym_end_addr (sym), dest); 129 | } 130 | 131 | TEST (get_effective_function_start_works) 132 | { 133 | Dwarf_Error error = NULL; 134 | Dwarf_Debug dbg = sd_dwarf_init (SIMPLE_64BIT_BIN, &error); 135 | assert_ptr_not_null (dbg); 136 | DebugInfo *info = init_debug_info (SIMPLE_64BIT_BIN); 137 | assert_ptr_not_null (info); 138 | const DebugSymbol *sym = sym_by_name ("main", info); 139 | assert_ptr_not_null (sym); 140 | 141 | dbg_addr main_start = { 0 }; 142 | SprayResult res = test_get_effective_start_addr (dbg, sym, &main_start); 143 | assert_int (res, ==, SP_OK); 144 | LineEntry line_entry = sd_line_entry_from_pc (dbg, main_start); 145 | assert_true (line_entry.is_ok); 146 | /* 10 is the line number of the first line after the function declaration. */ 147 | assert_int (line_entry.ln, ==, 10); 148 | 149 | /* `weird_sum` has a multi-line function declaration. */ 150 | sym = sym_by_name ("weird_sum", info); 151 | dbg_addr func_start = { 0 }; 152 | res = test_get_effective_start_addr (dbg, sym, &func_start); 153 | assert_int (res, ==, SP_OK); 154 | line_entry = sd_line_entry_from_pc (dbg, func_start); 155 | assert_true (line_entry.is_ok); 156 | /* 10 is the line number of the first line after the function declaration. */ 157 | assert_int (line_entry.ln, ==, 3); 158 | 159 | dwarf_finish (dbg); 160 | free_debug_info (&info); 161 | 162 | return MUNIT_OK; 163 | } 164 | 165 | TEST (get_filepath_from_pc_works) 166 | { 167 | Dwarf_Error error = NULL; 168 | Dwarf_Debug dbg = sd_dwarf_init (SIMPLE_64BIT_BIN, &error); 169 | assert_ptr_not_null (dbg); 170 | 171 | { 172 | dbg_addr pc = { 0x00401156 }; 173 | char *filepath = sd_filepath_from_pc (dbg, pc); 174 | assert_ptr_not_null (filepath); 175 | char *expect_filepath = realpath (SIMPLE_SRC, NULL); 176 | assert_string_equal (filepath, expect_filepath); 177 | free (filepath); 178 | free (expect_filepath); 179 | } 180 | { /* Sad path. */ 181 | dbg_addr pc = { 0xdeadbeef }; 182 | char *no_filepath = sd_filepath_from_pc (dbg, pc); 183 | assert_ptr_equal (no_filepath, NULL); 184 | } 185 | 186 | dwarf_finish (dbg); 187 | return MUNIT_OK; 188 | } 189 | 190 | TEST (sd_line_entry_at_works) 191 | { 192 | Dwarf_Error error = NULL; 193 | Dwarf_Debug dbg = sd_dwarf_init (SIMPLE_64BIT_BIN, &error); 194 | assert_ptr_not_null (dbg); 195 | 196 | LineEntry line = sd_line_entry_at (dbg, SIMPLE_SRC, 4); 197 | assert_true (line.is_ok); 198 | assert_int (line.ln, ==, 4); 199 | 200 | dwarf_finish (dbg); 201 | 202 | return MUNIT_OK; 203 | } 204 | 205 | #define ASSERT_TYPE(name, pc, _type) \ 206 | { \ 207 | SdVarattr var_attr = {0}; \ 208 | char *unused_decl_file = NULL; \ 209 | unsigned unused_decl_line = 0; \ 210 | SprayResult res = sd_runtime_variable( \ 211 | dbg, (pc), (name), &var_attr, &unused_decl_file, &unused_decl_line); \ 212 | assert_int(res, ==, SP_OK); \ 213 | assert_int(var_attr.type.n_nodes, ==, (_type).n_nodes); \ 214 | for (size_t i = 0; i < (_type).n_nodes; i++) { \ 215 | assert_memory_equal(sizeof(*(_type).nodes), &(_type).nodes[i], \ 216 | &var_attr.type.nodes[i]); \ 217 | } \ 218 | \ 219 | free(unused_decl_file); \ 220 | del_type(&var_attr.type); \ 221 | } 222 | 223 | TEST (finding_basic_variable_types_works) 224 | { 225 | Dwarf_Error error = NULL; 226 | Dwarf_Debug dbg = sd_dwarf_init (TYPE_EXAMPLES_BIN, &error); 227 | assert_ptr_not_null (dbg); 228 | 229 | /* There is no executable code in this CU. */ 230 | dbg_addr addr = { 0x0 }; 231 | 232 | SdTypenode a_nodes[1] = { 233 | {.tag = NODE_BASE_TYPE,.base_type = {.tag = BASE_TYPE_INT,.size = 4}}, 234 | }; 235 | SdType a = {.n_nodes = 1,.nodes = (SdTypenode *) & a_nodes }; 236 | ASSERT_TYPE ("a", addr, a); 237 | 238 | SdTypenode b_nodes[2] = { 239 | {.tag = NODE_MODIFIER,.modifier = TYPE_MOD_CONST}, 240 | {.tag = NODE_BASE_TYPE,.base_type = {.tag = BASE_TYPE_LONG,.size = 8}}, 241 | }; 242 | SdType b = {.n_nodes = 2,.nodes = (SdTypenode *) & b_nodes }; 243 | ASSERT_TYPE ("b", addr, b); 244 | 245 | SdTypenode c_nodes[1] = { 246 | {.tag = NODE_MODIFIER,.modifier = TYPE_MOD_POINTER}, 247 | }; 248 | SdType c = {.n_nodes = 1,.nodes = (SdTypenode *) & c_nodes }; 249 | ASSERT_TYPE ("c", addr, c); 250 | 251 | SdTypenode d_nodes[2] = { 252 | {.tag = NODE_MODIFIER,.modifier = TYPE_MOD_POINTER}, 253 | {.tag = NODE_BASE_TYPE, 254 | .base_type = {.tag = BASE_TYPE_LONG_LONG,.size = 8}}, 255 | }; 256 | SdType d = {.n_nodes = 2,.nodes = (SdTypenode *) & d_nodes }; 257 | ASSERT_TYPE ("d", addr, d); 258 | 259 | SdTypenode e_nodes[3] = { 260 | {.tag = NODE_MODIFIER,.modifier = TYPE_MOD_POINTER}, 261 | {.tag = NODE_MODIFIER,.modifier = TYPE_MOD_CONST}, 262 | {.tag = NODE_BASE_TYPE,.base_type = 263 | {.tag = BASE_TYPE_UNSIGNED_INT,.size = 4}}, 264 | }; 265 | SdType e = {.n_nodes = 3,.nodes = (SdTypenode *) & e_nodes }; 266 | ASSERT_TYPE ("e", addr, e); 267 | 268 | SdTypenode f_nodes[3] = { 269 | {.tag = NODE_MODIFIER,.modifier = TYPE_MOD_CONST}, 270 | {.tag = NODE_MODIFIER,.modifier = TYPE_MOD_POINTER}, 271 | {.tag = NODE_BASE_TYPE,.base_type = {.tag = BASE_TYPE_INT,.size = 4}}, 272 | }; 273 | SdType f = {.n_nodes = 3,.nodes = (SdTypenode *) & f_nodes }; 274 | ASSERT_TYPE ("f", addr, f); 275 | 276 | SdTypenode g_nodes[6] = { 277 | {.tag = NODE_MODIFIER,.modifier = TYPE_MOD_CONST}, 278 | {.tag = NODE_MODIFIER,.modifier = TYPE_MOD_RESTRICT}, 279 | {.tag = NODE_MODIFIER,.modifier = TYPE_MOD_POINTER}, 280 | {.tag = NODE_MODIFIER,.modifier = TYPE_MOD_CONST}, 281 | {.tag = NODE_MODIFIER,.modifier = TYPE_MOD_VOLATILE}, 282 | {.tag = NODE_BASE_TYPE,.base_type = {.tag = BASE_TYPE_CHAR,.size = 1} 283 | }, 284 | }; 285 | SdType g = {.n_nodes = 6,.nodes = (SdTypenode *) & g_nodes }; 286 | ASSERT_TYPE ("g", addr, g); 287 | 288 | dwarf_finish (dbg); 289 | return MUNIT_OK; 290 | } 291 | 292 | /* 293 | Assert that the first location description in the location list 294 | for the variable `name` in `func` has the given values. 295 | */ 296 | #define ASSERT_LOCDESC(name, pc, opcode_, op1, op2, op3, lowpc_, highpc_, \ 297 | file) \ 298 | { \ 299 | SdLoclist loclist = {0}; \ 300 | SdVarattr var_attr = {0}; \ 301 | char *decl_file = NULL; \ 302 | unsigned decl_line = 0; \ 303 | SprayResult res = sd_runtime_variable(dbg, (pc), (name), &var_attr, \ 304 | &decl_file, &decl_line); \ 305 | assert_int(res, ==, SP_OK); \ 306 | res = sd_init_loclist(dbg, var_attr.loc, &loclist); \ 307 | assert_int(res, ==, SP_OK); \ 308 | assert_int(loclist.ranges[0].lowpc.value, ==, (lowpc_)); \ 309 | assert_int(loclist.ranges[0].highpc.value, ==, (highpc_)); \ 310 | assert_int(loclist.exprs[0].operations[0].opcode, ==, (opcode_)); \ 311 | assert_int(loclist.exprs[0].operations[0].operand1, ==, (op1)); \ 312 | assert_int(loclist.exprs[0].operations[0].operand2, ==, (op2)); \ 313 | assert_int(loclist.exprs[0].operations[0].operand3, ==, (op3)); \ 314 | assert_string_equal(decl_file, (file)); \ 315 | free(decl_file); \ 316 | del_type(&var_attr.type); \ 317 | del_loclist(&loclist); \ 318 | } 319 | 320 | TEST (finding_variable_locations_works) 321 | { 322 | Dwarf_Error error = NULL; 323 | Dwarf_Debug dbg = sd_dwarf_init (SIMPLE_64BIT_BIN, &error); 324 | assert_ptr_not_null (dbg); 325 | 326 | dbg_addr main_addr = { 0x401163 }; /* Address from the binary's `main`. */ 327 | char *file_path = realpath (SIMPLE_SRC, NULL); 328 | assert_ptr_not_null (file_path); 329 | 330 | ASSERT_LOCDESC ("a", main_addr, DW_OP_fbreg, -8, 0, 0, 0, 0, file_path); 331 | 332 | free (file_path); 333 | dwarf_finish (dbg); 334 | return MUNIT_OK; 335 | } 336 | 337 | TEST (finding_locations_by_scope_works) 338 | { 339 | Dwarf_Error error = NULL; 340 | Dwarf_Debug dbg = sd_dwarf_init (RECURRING_VARIABLES_BIN, &error); 341 | assert_ptr_not_null (dbg); 342 | 343 | dbg_addr main_addr = { 0x401182 }; /* Some address in the binary's `main`. */ 344 | dbg_addr blah_addr = { 0x401132 }; /* Some address in the `blah` function. */ 345 | char *file_path = realpath (RECURRING_VARIABLES_SRC, NULL); 346 | assert_ptr_not_null (file_path); 347 | 348 | ASSERT_LOCDESC ("a", main_addr, DW_OP_fbreg, -8, 0, 0, 0, 0, file_path); 349 | ASSERT_LOCDESC ("b", main_addr, DW_OP_fbreg, -24, 0, 0, 0, 0, file_path); 350 | ASSERT_LOCDESC ("c", main_addr, DW_OP_fbreg, -32, 0, 0, 0, 0, file_path); 351 | 352 | ASSERT_LOCDESC ("a", blah_addr, DW_OP_addr, 4202512, 0, 0, 0, 0, file_path); 353 | ASSERT_LOCDESC ("b", blah_addr, DW_OP_fbreg, -16, 0, 0, 0, 0, file_path); 354 | ASSERT_LOCDESC ("c", blah_addr, DW_OP_fbreg, -24, 0, 0, 0, 0, file_path); 355 | 356 | free (file_path); 357 | dwarf_finish (dbg); 358 | 359 | return MUNIT_OK; 360 | } 361 | 362 | TEST (finding_variable_declration_files_works) 363 | { 364 | Dwarf_Error error = NULL; 365 | Dwarf_Debug dbg = sd_dwarf_init (EXTERN_VARIABLES_BIN, &error); 366 | assert_ptr_not_null (dbg); 367 | 368 | dbg_addr addr = { 0x40115e }; 369 | char *blah_int1_file = 370 | realpath ("tests/assets/extern-variables/first_file.c", NULL); 371 | char *blah_int2_file = 372 | realpath ("tests/assets/extern-variables/second_file.c", NULL); 373 | char *blah_int_another_file = 374 | realpath ("tests/assets/extern-variables/third_file.c", NULL); 375 | char *my_own_int_file = 376 | realpath ("tests/assets/extern-variables/main.c", NULL); 377 | assert_ptr_not_null (blah_int1_file); 378 | assert_ptr_not_null (blah_int2_file); 379 | assert_ptr_not_null (blah_int_another_file); 380 | assert_ptr_not_null (my_own_int_file); 381 | 382 | ASSERT_LOCDESC ("blah_int1", addr, DW_OP_addr, 0x404014, 0, 0, 0, 0, 383 | blah_int1_file); 384 | ASSERT_LOCDESC ("blah_int2", addr, DW_OP_addr, 0x404010, 0, 0, 0, 0, 385 | blah_int2_file); 386 | ASSERT_LOCDESC ("blah_int_another", addr, DW_OP_addr, 0x404018, 0, 0, 0, 0, 387 | blah_int_another_file); 388 | ASSERT_LOCDESC ("my_own_int", addr, DW_OP_addr, 0x40400c, 0, 0, 0, 0, 389 | my_own_int_file); 390 | 391 | free (blah_int1_file); 392 | free (blah_int2_file); 393 | free (blah_int_another_file); 394 | free (my_own_int_file); 395 | dwarf_finish (dbg); 396 | 397 | dbg = sd_dwarf_init (INCLUDE_VARIABLE_BIN, &error); 398 | assert_ptr_not_null (dbg); 399 | 400 | addr = (dbg_addr) 401 | { 402 | 0x401129}; 403 | char *blah_file = realpath ("tests/assets/include-variable/header.h", NULL); 404 | char *here_file = realpath ("tests/assets/include-variable/main.c", NULL); 405 | assert_ptr_not_null (blah_file); 406 | assert_ptr_not_null (here_file); 407 | 408 | ASSERT_LOCDESC ("blah", addr, DW_OP_addr, 0x404004, 0, 0, 0, 0, blah_file); 409 | ASSERT_LOCDESC ("here", addr, DW_OP_addr, 0x404008, 0, 0, 0, 0, here_file); 410 | 411 | free (blah_file); 412 | free (here_file); 413 | dwarf_finish (dbg); 414 | 415 | return MUNIT_OK; 416 | } 417 | 418 | TEST (manual_check_locexpr_output) 419 | { 420 | SdExpression first = { 0 }; 421 | first.n_operations = 2; 422 | first.operations = calloc (first.n_operations, sizeof (SdOperation)); 423 | first.operations[0] = (SdOperation) 424 | { 425 | .opcode = DW_OP_fbreg, /* Has one operand. */ 426 | .operands = { 13, 0, 0 },}; 427 | first.operations[1] = (SdOperation) 428 | { 429 | .opcode = DW_OP_const_type, /* Has three operands. */ 430 | .operands = { 14, 15, 16 },}; 431 | 432 | SdExpression second = { 0 }; 433 | second.n_operations = 1; 434 | second.operations = calloc (second.n_operations, sizeof (SdOperation)); 435 | second.operations[0] = (SdOperation) 436 | { 437 | .opcode = DW_OP_deref_type, /* Has two operands. */ 438 | .operands = { 123, 456 },}; 439 | 440 | SdLoclist loclist = { 0 }; 441 | loclist.n_exprs = 2; 442 | loclist.exprs = calloc (loclist.n_exprs, sizeof (SdExpression)); 443 | loclist.exprs[0] = first; 444 | loclist.exprs[1] = second; 445 | 446 | loclist.ranges = calloc (loclist.n_exprs, sizeof (SdLocRange)); 447 | loclist.ranges[0] = (SdLocRange) 448 | { 449 | .meaningful = true,.lowpc = { 78 },.highpc = { 910 },}; 450 | loclist.ranges[1] = (SdLocRange) 451 | { 452 | .meaningful = true,.lowpc = { 11 },.highpc = { 12 },}; 453 | 454 | /* TODO: Replace this test with an integration test, 455 | that captures the output that's emitted here and 456 | checks that the output is correct. */ 457 | 458 | printf ("\n"); /* Initial newline for easier inspection. */ 459 | print_loclist (loclist); 460 | del_loclist (&loclist); 461 | 462 | return MUNIT_OK; 463 | } 464 | 465 | TEST (validating_compilers_works) 466 | { 467 | Dwarf_Error error = NULL; 468 | Dwarf_Debug dbg = sd_dwarf_init (WRONG_COMPILER_BIN, &error); 469 | assert_ptr_equal (dbg, NULL); 470 | return MUNIT_OK; 471 | } 472 | 473 | TEST (type_attribute_form) 474 | { 475 | Dwarf_Error error = NULL; 476 | Dwarf_Debug dbg = sd_dwarf_init (SIMPLE_64BIT_BIN, &error); 477 | assert_ptr_not_null (dbg); 478 | 479 | SdVarattr var_attr = { 0 }; 480 | dbg_addr main_addr = { 0x401163 }; /* Address from the binary's `main`. */ 481 | char *decl_file = NULL; 482 | unsigned decl_line = 0; 483 | SprayResult res = sd_runtime_variable (dbg, 484 | main_addr, 485 | "a", 486 | &var_attr, 487 | &decl_file, 488 | &decl_line); 489 | assert_int (res, ==, SP_OK); 490 | 491 | dwarf_finish (dbg); 492 | del_type (&var_attr.type); 493 | free (decl_file); 494 | 495 | return MUNIT_OK; 496 | } 497 | 498 | 499 | 500 | bool 501 | ends_with (const char *str, const char *end) 502 | { 503 | if (str == NULL || end == NULL) 504 | return 0; 505 | 506 | size_t lenstr = strlen (str); 507 | size_t lenend = strlen (end); 508 | 509 | if (lenend > lenstr) 510 | return 0; 511 | 512 | return strncmp(str + lenstr - lenend, end, lenend) == 0; 513 | } 514 | 515 | TEST(get_filepaths_works) 516 | { 517 | Dwarf_Error error = NULL; 518 | Dwarf_Debug dbg = sd_dwarf_init (MANY_FILES_BIN, &error); 519 | assert_ptr_not_null (dbg); 520 | 521 | char **filepaths = sd_get_filepaths (dbg); 522 | char *expected_ends[6] = { 523 | "tests/assets/many-files/foo1.c", 524 | "tests/assets/many-files/baz.h", 525 | "tests/assets/many-files/foo2.c", 526 | "tests/assets/many-files/bar1.h", 527 | "tests/assets/many-files/bar2.h", 528 | "tests/assets/many-files/main.c", 529 | }; 530 | 531 | for (int i = 0; filepaths[i] != NULL; i++) 532 | { 533 | assert_int (i, <, 6); /* Don't exceed the number of expected ends. */ 534 | assert_true (ends_with (filepaths[i], expected_ends[i])); 535 | free (filepaths[i]); 536 | } 537 | 538 | free (filepaths); 539 | 540 | dwarf_finish (dbg); 541 | return MUNIT_OK; 542 | } 543 | 544 | 545 | MunitTest dwarf_tests[] = { 546 | REG_TEST (get_line_entry_from_pc_works), 547 | REG_TEST (iterating_lines_works), 548 | REG_TEST (search_returns_the_correct_result), 549 | REG_TEST (get_effective_function_start_works), 550 | REG_TEST (get_filepath_from_pc_works), 551 | REG_TEST (sd_line_entry_at_works), 552 | REG_TEST (finding_basic_variable_types_works), 553 | REG_TEST (finding_variable_locations_works), 554 | REG_TEST (finding_locations_by_scope_works), 555 | REG_TEST (manual_check_locexpr_output), 556 | REG_TEST (finding_variable_declration_files_works), 557 | REG_TEST (validating_compilers_works), 558 | REG_TEST (type_attribute_form), 559 | REG_TEST (get_filepaths_works), 560 | {NULL, NULL, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL} 561 | }; 562 | -------------------------------------------------------------------------------- /tests/elf.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "test_utils.h" 4 | 5 | #include "../src/spray_elf.h" 6 | 7 | TEST (accept_valid_executable) 8 | { 9 | ElfFile elf_file = { 0 }; 10 | ElfParseResult res = se_parse_elf (SIMPLE_64BIT_BIN, &elf_file); 11 | assert_int (res, ==, ELF_PARSE_OK); 12 | 13 | assert_int (elf_file.prog_table.n_headers, ==, 13); 14 | assert_int (elf_file.sect_table.n_headers, ==, 34); 15 | 16 | // Compare some randomly chosen values to those 17 | // returned by `readelf(1)`. 18 | 19 | assert_int (elf_file.prog_table.headers[0].p_type, ==, PT_PHDR); 20 | 21 | Elf64_Phdr load_ph = elf_file.prog_table.headers[3]; 22 | assert_int (load_ph.p_type, ==, PT_LOAD); 23 | assert_int (load_ph.p_offset, ==, 0x1000); 24 | assert_int (load_ph.p_vaddr, ==, 0x401000); 25 | assert_int (load_ph.p_paddr, ==, 0x401000); 26 | assert_int (load_ph.p_filesz, ==, 0x181); 27 | assert_int (load_ph.p_memsz, ==, 0x181); 28 | assert_int (load_ph.p_flags, ==, PF_R | PF_X); 29 | assert_int (load_ph.p_align, ==, 0x1000); 30 | 31 | Elf64_Phdr eh_frame_ph = elf_file.prog_table.headers[10]; 32 | assert_int (eh_frame_ph.p_type, ==, PT_GNU_EH_FRAME); 33 | assert_int (eh_frame_ph.p_offset, ==, 0x2010); 34 | assert_int (eh_frame_ph.p_vaddr, ==, 0x402010); 35 | assert_int (eh_frame_ph.p_paddr, ==, 0x402010); 36 | assert_int (eh_frame_ph.p_filesz, ==, 0x2c); 37 | assert_int (eh_frame_ph.p_memsz, ==, 0x2c); 38 | assert_int (eh_frame_ph.p_flags, ==, PF_R); 39 | assert_int (eh_frame_ph.p_align, ==, 0x4); 40 | 41 | Elf64_Shdr symtab_sh = elf_file.sect_table.headers[31]; 42 | assert_int (symtab_sh.sh_type, ==, SHT_SYMTAB); 43 | assert_int (symtab_sh.sh_addr, ==, 0x0); 44 | assert_int (symtab_sh.sh_offset, ==, 13656); 45 | assert_int (symtab_sh.sh_size, ==, 0x330); 46 | assert_int (symtab_sh.sh_entsize, ==, 0x18); 47 | assert_int (symtab_sh.sh_flags, ==, 0); 48 | assert_int (symtab_sh.sh_link, ==, 32); 49 | assert_int (symtab_sh.sh_info, ==, 18); 50 | assert_int (symtab_sh.sh_addralign, ==, 8); 51 | 52 | se_free_elf (elf_file); 53 | return MUNIT_OK; 54 | } 55 | 56 | TEST (read_elf_symbol_table_entries) 57 | { 58 | ElfFile elf_file = { 0 }; 59 | ElfParseResult res = se_parse_elf (MULTI_FILE_BIN, &elf_file); 60 | assert_int (res, ==, ELF_PARSE_OK); 61 | 62 | const Elf64_Sym *main = se_symbol_from_name ("main", &elf_file); 63 | assert_ptr_not_null (main); 64 | assert_string_equal (se_symbol_name (main, &elf_file), "main"); 65 | assert_int (se_symbol_binding (main), ==, STB_GLOBAL); 66 | assert_int (se_symbol_type (main), ==, STT_FUNC); 67 | assert_int (se_symbol_visibility (main), ==, STV_DEFAULT); 68 | 69 | const Elf64_Sym *func2 = 70 | se_symbol_from_name ("file2_compute_something", &elf_file); 71 | assert_ptr_not_null (func2); 72 | assert_string_equal (se_symbol_name (func2, &elf_file), 73 | "file2_compute_something"); 74 | assert_int (se_symbol_binding (func2), ==, STB_GLOBAL); 75 | assert_int (se_symbol_type (func2), ==, STT_FUNC); 76 | assert_int (se_symbol_visibility (func2), ==, STV_DEFAULT); 77 | 78 | const Elf64_Sym *func1 = 79 | se_symbol_from_addr ((dbg_addr) { 0x00401128 }, &elf_file); 80 | assert_ptr_not_null (func1); 81 | assert_string_equal (se_symbol_name (func1, &elf_file), 82 | "file1_compute_something"); 83 | assert_int (se_symbol_binding (func1), ==, STB_GLOBAL); 84 | assert_int (se_symbol_type (func1), ==, STT_FUNC); 85 | assert_int (se_symbol_visibility (func1), ==, STV_DEFAULT); 86 | 87 | se_free_elf (elf_file); 88 | return MUNIT_OK; 89 | } 90 | 91 | TEST (reject_invalid_executables) 92 | { 93 | // The following are a bunch of executables which 94 | // were compiled for unsupported targets (32-bit, ARM etc.) 95 | // All of them should be rejects. 96 | 97 | ElfFile elf_file = { 0 }; 98 | ElfParseResult res = se_parse_elf (SIMPLE_32BIT_BIN, &elf_file); 99 | assert_int (res, ==, ELF_PARSE_DISLIKE); 100 | return MUNIT_OK; 101 | } 102 | 103 | MunitTest parse_elf_tests[] = { 104 | REG_TEST (accept_valid_executable), 105 | REG_TEST (reject_invalid_executables), 106 | REG_TEST (read_elf_symbol_table_entries), 107 | {NULL, NULL, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL} 108 | }; 109 | -------------------------------------------------------------------------------- /tests/main.c: -------------------------------------------------------------------------------- 1 | #include "test_utils.h" 2 | 3 | extern MunitTest parse_elf_tests[]; 4 | extern MunitTest dwarf_tests[]; 5 | extern MunitTest debugger_tests[]; 6 | 7 | static MunitSuite suites[] = { 8 | { 9 | "/parse_elf", 10 | parse_elf_tests, 11 | NULL, 12 | 1, 13 | MUNIT_SUITE_OPTION_NONE}, 14 | { 15 | "/dwarf_tests", 16 | dwarf_tests, 17 | NULL, 18 | 1, 19 | MUNIT_SUITE_OPTION_NONE}, 20 | { 21 | "/debugger_tests", 22 | debugger_tests, 23 | NULL, 24 | 1, 25 | MUNIT_SUITE_OPTION_NONE}, 26 | {NULL, NULL, NULL, 0, MUNIT_SUITE_OPTION_NONE} 27 | }; 28 | 29 | static const MunitSuite suite = { 30 | "/spray", 31 | NULL, 32 | suites, 33 | 1, 34 | MUNIT_SUITE_OPTION_NONE, 35 | }; 36 | 37 | int 38 | main (int argc, char *const *argv) 39 | { 40 | return munit_suite_main (&suite, NULL, argc, argv); 41 | } 42 | -------------------------------------------------------------------------------- /tests/test_utils.h: -------------------------------------------------------------------------------- 1 | /* Test utilities. */ 2 | 3 | #ifndef _SPRAY_TEST_UTILS_H_ 4 | #define _SPRAY_TEST_UTILS_H_ 5 | 6 | #define MUNIT_ENABLE_ASSERT_ALIASES 7 | #include "../dependencies/munit/munit.h" 8 | 9 | // Names of tests assets 10 | #define SIMPLE_SRC "tests/assets/simple.c" 11 | #define SIMPLE_64BIT_BIN "tests/assets/64bit-linux-simple.bin" 12 | #define SIMPLE_32BIT_BIN "tests/assets/32bit-linux-simple.bin" 13 | #define NESTED_FUNCTIONS_SRC "tests/assets/nested_functions.c" 14 | #define NESTED_FUNCTIONS_BIN "tests/assets/nested-functions.bin" 15 | #define MULTI_FILE_BIN "tests/assets/multi-file.bin" 16 | #define EXTERN_VARIABLES_BIN "tests/assets/extern-variables.bin" 17 | #define PRINT_ARGS_SRC "tests/assets/print_args.c" 18 | #define PRINT_ARGS_BIN "tests/assets/print-args.bin" 19 | #define RECURRING_VARIABLES_SRC "tests/assets/recurring_variables.c" 20 | #define RECURRING_VARIABLES_BIN "tests/assets/recurring-variables.bin" 21 | #define POINTERS_SRC "tests/assets/pointers.c" 22 | #define POINTERS_BIN "tests/assets/pointers.bin" 23 | #define INCLUDE_VARIABLE_BIN "tests/assets/include-variable.bin" 24 | #define WRONG_COMPILER_BIN "tests/assets/wrong-compiler.bin" 25 | #define TYPE_EXAMPLES_BIN "tests/assets/type-examples.bin" 26 | #define MANY_FILES_BIN "tests/assets/many-files.bin" 27 | 28 | // Create a test 29 | #define TEST(name) \ 30 | static MunitResult name(MUNIT_UNUSED const MunitParameter p[], MUNIT_UNUSED void* fixture) 31 | 32 | // Register a test. 33 | #define REG_TEST(name) \ 34 | { "/"#name, name, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL } 35 | 36 | #endif // _SPRAY_TEST_UTILS_H_ 37 | -------------------------------------------------------------------------------- /tests/tokenize.scm: -------------------------------------------------------------------------------- 1 | (load "src/tokenize.scm") 2 | 3 | (import test 4 | c-regex 5 | tokenizer) 6 | 7 | (test-group "(test-regex)" 8 | (test-group "(test-keyword-regex)" 9 | (test-assert "keyword with space at end is accepted" 10 | (regex-match? keyword-regex "break ")) 11 | (test-assert "keyword with tab at end is accepted" 12 | (regex-match? keyword-regex "extern\t")) 13 | (test-assert "identifier starting with keyword is rejected" 14 | (not (regex-match? keyword-regex "breakpoints "))) 15 | (test-assert "other identifier starting with keyword is rejected" 16 | (not (regex-match? keyword-regex "return_value")))) 17 | 18 | (test-group "(test-literal-regex)" 19 | (test "match a string made up of only a string literal" 20 | "\"blah\"" 21 | (full-match literal-regex "\"blah\"")) 22 | (test "match a string starting with a string literal" 23 | "\"blah\"" 24 | (full-match literal-regex "\"blah\" ... some more lame text")) 25 | (test-assert "don't match a string now starting with a string literal" 26 | (not (regex-match? literal-regex "blah ... invalid text \"string literal\""))) 27 | ;; After removing the backslashes to embed this string in this source, 28 | ;; the string below looks like this: "\\ \" ... blah " 29 | (test "match escaped backslashes and quotation marks" 30 | "\"\\\\ \\\" ... blah \"" 31 | (full-match literal-regex "\"\\\\ \\\" ... blah \""))) 32 | 33 | (test-group "(test-whitespace-regex)" 34 | (test "only whitespace" 35 | " \t\t\r " 36 | (full-match whitespace-regex " \t\t\r ")) 37 | (test-assert "beginning whitespace" 38 | (regex-match? whitespace-regex "\n \t blah")) 39 | (test-assert "no beginning whitespace" 40 | (not (regex-match? whitespace-regex "blah \n ")))) 41 | 42 | (test-group "(test-identifier-regex)" 43 | (test-assert "identifier starting with numbers" 44 | (not (regex-match? identifier-regex "98blah"))) 45 | (test "identifier including numbers and under scores" 46 | "blah_984baz" 47 | (full-match identifier-regex "blah_984baz")) 48 | (test-assert "identifier starting with whitespace" 49 | (not (regex-match? identifier-regex " \n blah")))) 50 | 51 | (test-group "(test-hex-constant-regex)" 52 | (test-assert "hex constant" 53 | (regex-match? hex-constant-regex "0x81babe")) 54 | (test-assert "no hex prefix" 55 | (not (regex-match? hex-constant-regex "92873"))) 56 | (test-assert "type postfixes" 57 | (regex-match? hex-constant-regex "0xbad1dealueLU"))) 58 | 59 | (test-group "(test-octal-constant-regex)" 60 | (test-assert "octal constant" 61 | (regex-match? octal-constant-regex "01543672")) 62 | (test-assert "out of octal range" 63 | (not (regex-match? octal-constant-regex "09082")))) 64 | 65 | (test-group "(test-decimal-constant-regex)" 66 | (test-assert "decimal constant" 67 | (regex-match? decimal-constant-regex "780934lu")) 68 | (test-assert "single zero as decimal" 69 | (regex-match? decimal-constant-regex "0"))) 70 | 71 | (test-group "(test-char-constant-regex)" 72 | (test-assert "character constant" 73 | (regex-match? char-constant-regex "'a'")) 74 | (test-assert "multi-character constant" 75 | (regex-match? char-constant-regex "'abc\\n'")) 76 | (test-assert (not (regex-match? char-constant-regex "blha 'a'")))) 77 | 78 | (test-group "(test-sci-constant-regex)" 79 | (test-assert "scientific notation constant" 80 | (regex-match? sci-constant-regex "81e-2"))) 81 | 82 | (test-group "(test-float-constant-regex-frac)" 83 | (test-assert "fraction floating point constant without whole part" 84 | (regex-match? float-constant-regex-frac ".024E-3F")) 85 | (test-assert "fraction floating point constant with whole part" 86 | (regex-match? float-constant-regex-frac "0184.708e+9fl")) 87 | (test-assert "fraction floating point constant without fraction part" 88 | (not (regex-match? float-constant-regex-frac "98.e+085L")))) 89 | 90 | (test-group "(test-float-constant-regex-whole)" 91 | (test-assert "whole floating point constant without fraction part" 92 | (regex-match? float-constant-regex-whole "983.E-3F")) 93 | (test-assert "whole floating point constant with fraction part" 94 | (regex-match? float-constant-regex-whole "0184.708e+9fl")) 95 | (test-assert "whole floating point constant without whole part" 96 | (not (regex-match? float-constant-regex-whole ".41e+085L")))) 97 | 98 | (test-group "(test-preproc-directive-regex)" 99 | (test-assert "include directive" 100 | (regex-match? preproc-directive-regex "#include")) 101 | (test-assert "undef directive" 102 | (regex-match? preproc-directive-regex "#undef")) 103 | (test-assert "include_next directive" 104 | (regex-match? preproc-directive-regex "#include_next")) 105 | (test "include directive with angle-brackets path" 106 | "#include " 107 | (full-match preproc-directive-regex "#include int main ...")) 108 | (test-assert "include directive with quotes path" 109 | (regex-match? preproc-directive-regex "#import \"blah.h\""))) 110 | 111 | (test-group "(test-comment-text-regex)" 112 | (test "block comment regex" 113 | "blah " 114 | (full-match comment-text-regex "blah */ asdf")) 115 | (test "block comment regex with star and newline" 116 | "blah \n * asdf " 117 | (full-match comment-text-regex "blah \n * asdf */")) 118 | (test "line comment containing block comment" 119 | "blah /* hey */ wow" 120 | (full-match line-comment-text-regex "blah /* hey */ wow\n asdf"))) 121 | ;; End of test-group (test-regex). 122 | ) 123 | 124 | (test-group "(test-tokenize)" 125 | (test "basic code example" 126 | (tokenize (list "int main(void) {" " int i = 0;" " for (; i < 91; i++) {" " printf(\"Blah: %d\" i);" " }")) 127 | '(((tt-type . "int") (tt-whitespace . " ") (tt-identifier . "main") (tt-special-symbol . "(") (tt-type . "void") (tt-special-symbol . ")") (tt-whitespace . " ") (tt-special-symbol . "{")) ((tt-whitespace . " ") (tt-type . "int") (tt-whitespace . " ") (tt-identifier . "i") (tt-whitespace . " ") (tt-operator . "=") (tt-whitespace . " ") (tt-constant . "0") (tt-special-symbol . ";")) ((tt-whitespace . " ") (tt-keyword . "for") (tt-whitespace . " ") (tt-special-symbol . "(") (tt-special-symbol . ";") (tt-whitespace . " ") (tt-identifier . "i") (tt-whitespace . " ") (tt-operator . "<") (tt-whitespace . " ") (tt-constant . "91") (tt-special-symbol . ";") (tt-whitespace . " ") (tt-identifier . "i") (tt-operator . "++") (tt-special-symbol . ")") (tt-whitespace . " ") (tt-special-symbol . "{")) ((tt-whitespace . " ") (tt-identifier . "printf") (tt-special-symbol . "(") (tt-literal . "\"Blah: %d\"") (tt-whitespace . " ") (tt-identifier . "i") (tt-special-symbol . ")") (tt-special-symbol . ";")) ((tt-whitespace . " ") (tt-special-symbol . "}")))) 128 | 129 | (test "error recovery at whitespace" 130 | (tokenize (list "int Äpfel = (6 + 4) * 9;")) 131 | '(((tt-type . "int") (tt-whitespace . " ") (tt-other . "Äpfel") (tt-whitespace . " ") (tt-operator . "=") (tt-whitespace . " ") (tt-special-symbol . "(") (tt-constant . "6") (tt-whitespace . " ") (tt-operator . "+") (tt-whitespace . " ") (tt-constant . "4") (tt-special-symbol . ")") (tt-whitespace . " ") (tt-operator . "*") (tt-whitespace . " ") (tt-constant . "9") (tt-special-symbol . ";")))) 132 | 133 | (test "single C-style comments" 134 | (tokenize (list "int main(void) {" " /* blah */" " printf(\"blah\");" "}")) 135 | '(((tt-type . "int") (tt-whitespace . " ") (tt-identifier . "main") (tt-special-symbol . "(") (tt-type . "void") (tt-special-symbol . ")") (tt-whitespace . " ") (tt-special-symbol . "{")) ((tt-whitespace . " ") (tt-comment . "/*") (tt-comment-text . " blah ") (tt-uncomment . "*/")) ((tt-whitespace . " ") (tt-identifier . "printf") (tt-special-symbol . "(") (tt-literal . "\"blah\"") (tt-special-symbol . ")") (tt-special-symbol . ";")) ((tt-special-symbol . "}")))) 136 | 137 | (test "multi-line C-style comments" 138 | (tokenize (list "int a = 2;" "/*blah" "asdf */" "int b = 4;")) 139 | '(((tt-type . "int") (tt-whitespace . " ") (tt-identifier . "a") (tt-whitespace . " ") (tt-operator . "=") (tt-whitespace . " ") (tt-constant . "2") (tt-special-symbol . ";")) ((tt-comment . "/*") (tt-comment-text . "blah")) ((tt-comment-text . "asdf ") (tt-uncomment . "*/")) ((tt-type . "int") (tt-whitespace . " ") (tt-identifier . "b") (tt-whitespace . " ") (tt-operator . "=") (tt-whitespace . " ") (tt-constant . "4") (tt-special-symbol . ";")))) 140 | 141 | (test "multi-line C-style comment without end" 142 | (tokenize (list "int blah = 5;" "/* I don't end," "But this is still me")) 143 | '(((tt-type . "int") (tt-whitespace . " ") (tt-identifier . "blah") (tt-whitespace . " ") (tt-operator . "=") (tt-whitespace . " ") (tt-constant . "5") (tt-special-symbol . ";")) ((tt-comment . "/*") (tt-comment-text . " I don't end,")) ((tt-comment-text . "But this is still me")))) 144 | 145 | (test "C-style comment without beginning wraps to start" 146 | (tokenize (list "int main(void) */ {int a = 0;")) 147 | '(((tt-comment-text . "int main(void) ") (tt-trailing-uncomment . "*/") (tt-whitespace . " ") (tt-special-symbol . "{") (tt-type . "int") (tt-whitespace . " ") (tt-identifier . "a") (tt-whitespace . " ") (tt-operator . "=") (tt-whitespace . " ") (tt-constant . "0") (tt-special-symbol . ";")))) 148 | 149 | (test "code can be commented-out" 150 | (tokenize (list "int" "a" "=" "2;" " */ /* another comment */")) 151 | '(((tt-comment-text . "int")) ((tt-comment-text . "a")) ((tt-comment-text . "=")) ((tt-comment-text . "2;")) ((tt-comment-text . " ") (tt-trailing-uncomment . "*/") (tt-whitespace . " ") (tt-comment . "/*") (tt-comment-text . " another comment ") (tt-uncomment . "*/")))) 152 | 153 | (test "C++ style comments can contain block comments" 154 | (tokenize (list "int a = 7; // This C++ style comment can contain this */ or that /*.""// It even continues on the next line!")) 155 | '(((tt-type . "int") (tt-whitespace . " ") (tt-identifier . "a") (tt-whitespace . " ") (tt-operator . "=") (tt-whitespace . " ") (tt-constant . "7") (tt-special-symbol . ";") (tt-whitespace . " ") (tt-comment . "//") (tt-comment-text . " This C++ style comment can contain this */ or that /*.")) ((tt-comment . "//") (tt-comment-text . " It even continues on the next line!")))) 156 | 157 | (test "retrieve include paths" 158 | (tokenize (list "#include" "#include\"debugger.h\"" "#include " "#include \"debugger.h\"")) 159 | '(((tt-preproc . "#include") (tt-whitespace . "") (tt-include-filepath . "")) ((tt-preproc . "#include") (tt-whitespace . "") (tt-include-filepath . "\"debugger.h\"")) ((tt-preproc . "#include") (tt-whitespace . " ") (tt-include-filepath . "")) ((tt-preproc . "#include") (tt-whitespace . " ") (tt-include-filepath . "\"debugger.h\""))) 160 | ) 161 | (test "don't split identifier starting with keyword" 162 | (tokenize (list "struct breakpoints { int a; };" "struct breakpoints bp = { 5 };")) 163 | '(((tt-keyword . "struct") (tt-whitespace . " ") (tt-identifier . "breakpoints") (tt-whitespace . " ") (tt-special-symbol . "{") (tt-whitespace . " ") (tt-type . "int") (tt-whitespace . " ") (tt-identifier . "a") (tt-special-symbol . ";") (tt-whitespace . " ") (tt-special-symbol . "}") (tt-special-symbol . ";")) ((tt-keyword . "struct") (tt-whitespace . " ") (tt-identifier . "breakpoints") (tt-whitespace . " ") (tt-identifier . "bp") (tt-whitespace . " ") (tt-operator . "=") (tt-whitespace . " ") (tt-special-symbol . "{") (tt-whitespace . " ") (tt-constant . "5") (tt-whitespace . " ") (tt-special-symbol . "}") (tt-special-symbol . ";")))) 164 | 165 | ;; End test-group (test-tokenize). 166 | ) 167 | 168 | (test-exit) 169 | --------------------------------------------------------------------------------