├── .assets
└── demo.cast
├── .gitignore
├── .gitmodules
├── LICENSE
├── Makefile
├── README.md
├── examples
└── free_uninit.c
├── src
├── args.c
├── args.h
├── backtrace.c
├── backtrace.h
├── breakpoints.c
├── breakpoints.h
├── c-syntax.scm
├── debugger.c
├── debugger.h
├── history.c
├── history.h
├── info.c
├── info.h
├── magic.c
├── magic.h
├── print-source.scm
├── print_source.c
├── print_source.h
├── ptrace.c
├── ptrace.h
├── registers.c
├── registers.h
├── spray.c
├── spray_dwarf.c
├── spray_dwarf.h
├── spray_elf.c
├── spray_elf.h
└── tokenize.scm
└── tests
├── assets
├── Makefile
├── commented.c
├── custom_types.c
├── deref_pointers.c
├── extern-variables
│ ├── first_file.c
│ ├── main.c
│ ├── second_file.c
│ └── third_file.c
├── include-variable
│ ├── header.h
│ └── main.c
├── many-files
│ ├── bar1.h
│ ├── bar2.h
│ ├── baz.h
│ ├── foo1.c
│ ├── foo1.h
│ ├── foo2.c
│ ├── foo2.h
│ └── main.c
├── multi-file
│ ├── file1.c
│ ├── file2.c
│ └── file2.h
├── nested_functions.c
├── pointers.c
├── print_args.c
├── recurring_variables.c
├── simple.c
└── type_examples.c
├── c-types.scm
├── debugger.c
├── dwarf.c
├── elf.c
├── main.c
├── test_integration.py
├── test_utils.h
└── tokenize.scm
/.assets/demo.cast:
--------------------------------------------------------------------------------
1 | {"version": 2, "width": 80, "height": 25, "timestamp": 1699696489, "env": {"SHELL": "zsh", "TERM": "xterm-kitty"}}
2 | [0.368112, "o", "\u001b]2;spray a.out\u0007\u001b]1;spray\u0007"]
3 | [0.396934, "o", "🐛🐛🐛 34156 🐛🐛🐛\r\n"]
4 | [0.639952, "o", " 20 }\u001b[0m\r\n 21\r\n 22 \u001b[32mint\u001b[0m \u001b[0mmain\u001b[0m(\u001b[0m\u001b[32mvoid\u001b[0m)\u001b[0m \u001b[0m{\u001b[0m\r\n 23 -> \u001b[0mprint_concat\u001b[0m(\u001b[0m\u001b[31m\"foo\"\u001b[0m,\u001b[0m \u001b[0m\u001b[31m\"bar\"\u001b[0m)\u001b[0m;\u001b[0m\r\n 24 \u001b[0mprint_concat\u001b[0m(\u001b[0m\u001b[31m\"foo\"\u001b[0m,\u001b[0m \u001b[0m\u001b[31m\"\"\u001b[0m)\u001b[0m;\u001b[0m\r\n 25 \u001b[0m\u001b[35mreturn\u001b[0m \u001b[0m\u001b[34m0\u001b[0m;\u001b[0m\r\n 26 }\u001b[0m\r\nspray> "]
5 | [1.516171, "o", "b"]
6 | [1.603219, "o", "r"]
7 | [1.648151, "o", "e"]
8 | [1.745696, "o", "a"]
9 | [1.816271, "o", "k"]
10 | [1.992976, "o", " "]
11 | [2.133773, "o", "p"]
12 | [2.235755, "o", "r"]
13 | [2.353192, "o", "i"]
14 | [2.425048, "o", "n"]
15 | [2.488264, "o", "t"]
16 | [2.692241, "o", "_"]
17 | [2.917091, "o", "c"]
18 | [2.944816, "o", "o"]
19 | [3.054356, "o", "n"]
20 | [3.160349, "o", "c"]
21 | [3.271191, "o", "a"]
22 | [3.403404, "o", "t"]
23 | [3.786994, "o", "\r\n"]
24 | [3.788509, "o", "spray> "]
25 | [4.386978, "o", "c"]
26 | [4.431837, "o", "\r\n"]
27 | [4.432644, "o", "Hit breakpoint at address 0x0000000000401180 in free_uninit.c\r\n"]
28 | [4.433025, "o", " 4\r\n 5 \u001b[32mvoid\u001b[0m \u001b[0mprint_concat\u001b[0m(\u001b[0m\u001b[32mchar\u001b[0m \u001b[0m\u001b[33m*\u001b[0ma\u001b[0m,\u001b[0m \u001b[0m\u001b[32mchar\u001b[0m \u001b[0m\u001b[33m*\u001b[0mb\u001b[0m)\u001b[0m \u001b[0m{\u001b[0m\r\n 6 \u001b[0m\u001b[32mchar\u001b[0m \u001b[0m\u001b[33m*\u001b[0mbuf\u001b[0m;\u001b[0m\r\n 7 -> \u001b[0m\u001b[32mint\u001b[0m \u001b[0ma_len\u001b[0m \u001b[0m\u001b[33m=\u001b[0m \u001b[0mstrlen\u001b[0m(\u001b[0ma\u001b[0m)\u001b[0m;\u001b[0m\r\n 8 \u001b[0m\u001b[32mint\u001b[0m \u001b[0mb_len\u001b[0m \u001b[0m\u001b[33m=\u001b[0m \u001b[0mstrlen\u001b[0m(\u001b[0mb\u001b[0m)\u001b[0m;\u001b[0m\r\n 9\r\n 10 \u001b[0m\u001b[35mif\u001b[0m \u001b[0m(\u001b[0mb_len\u001b[0m \u001b[0m\u001b[33m>\u001b[0m \u001b[0m\u001b[34m0\u001b[0m)\u001b[0m \u001b[0m{\u001b[0m\r\n"]
29 | [4.433286, "o", "spray> "]
30 | [4.993165, "o", "n"]
31 | [5.202661, "o", "\r\n"]
32 | [5.205535, "o", " 5 \u001b[32mvoid\u001b[0m \u001b[0mprint_concat\u001b[0m(\u001b[0m\u001b[32mchar\u001b[0m \u001b[0m\u001b[33m*\u001b[0ma\u001b[0m,\u001b[0m \u001b[0m\u001b[32mchar\u001b[0m \u001b[0m\u001b[33m*\u001b[0mb\u001b[0m)\u001b[0m \u001b[0m{\u001b[0m\r\n 6 \u001b[0m\u001b[32mchar\u001b[0m \u001b[0m\u001b[33m*\u001b[0mbuf\u001b[0m;\u001b[0m\r\n 7 \u001b[0m\u001b[32mint\u001b[0m \u001b[0ma_len\u001b[0m \u001b[0m\u001b[33m=\u001b[0m \u001b[0mstrlen\u001b[0m(\u001b[0ma\u001b[0m)\u001b[0m;\u001b[0m\r\n 8 -> \u001b[0m\u001b[32mint\u001b[0m \u001b[0mb_len\u001b[0m \u001b[0m\u001b[33m=\u001b[0m \u001b[0mstrlen\u001b[0m(\u001b[0mb\u001b[0m)\u001b[0m;\u001b[0m\r\n 9\r\n 10 \u001b[0m\u001b[35mif\u001b[0m \u001b[0m(\u001b[0mb_len\u001b[0m \u001b[0m\u001b[33m>\u001b[0m \u001b[0m\u001b[34m0\u001b[0m)\u001b[0m \u001b[0m{\u001b[0m\r\n 11 \u001b[0mbuf\u001b[0m \u001b[0m\u001b[33m=\u001b[0m \u001b[0mmalloc\u001b[0m(\u001b[0ma_len\u001b[0m \u001b[0m\u001b[33m+\u001b[0m \u001b[0mb_len\u001b[0m \u001b[0m\u001b[33m+\u001b[0m \u001b[0m\u001b[34m1\u001b[0m)\u001b[0m;\u001b[0m\r\nspray> "]
33 | [5.656346, "o", "n"]
34 | [5.854361, "o", "\r\n"]
35 | [5.856267, "o", " 7 \u001b[0m\u001b[32mint\u001b[0m \u001b[0ma_len\u001b[0m \u001b[0m\u001b[33m=\u001b[0m \u001b[0mstrlen\u001b[0m(\u001b[0ma\u001b[0m)\u001b[0m;\u001b[0m\r\n 8 \u001b[0m\u001b[32mint\u001b[0m \u001b[0mb_len\u001b[0m \u001b[0m\u001b[33m=\u001b[0m \u001b[0mstrlen\u001b[0m(\u001b[0mb\u001b[0m)\u001b[0m;\u001b[0m\r\n 9\r\n 10 -> \u001b[0m\u001b[35mif\u001b[0m \u001b[0m(\u001b[0mb_len\u001b[0m \u001b[0m\u001b[33m>\u001b[0m \u001b[0m\u001b[34m0\u001b[0m)\u001b[0m \u001b[0m{\u001b[0m\r\n 11 \u001b[0mbuf\u001b[0m \u001b[0m\u001b[33m=\u001b[0m \u001b[0mmalloc\u001b[0m(\u001b[0ma_len\u001b[0m \u001b[0m\u001b[33m+\u001b[0m \u001b[0mb_len\u001b[0m \u001b[0m\u001b[33m+\u001b[0m \u001b[0m\u001b[34m1\u001b[0m)\u001b[0m;\u001b[0m\r\n 12 \u001b[0mstrcpy\u001b[0m(\u001b[0mbuf\u001b[0m,\u001b[0m \u001b[0ma\u001b[0m)\u001b[0m;\u001b[0m\r\n 13 \u001b[0mstrcpy\u001b[0m(\u001b[0mbuf\u001b[0m \u001b[0m\u001b[33m+\u001b[0m \u001b[0ma_len\u001b[0m,\u001b[0m \u001b[0mb\u001b[0m)\u001b[0m;\u001b[0m\r\n"]
36 | [5.856363, "o", "spray> "]
37 | [6.202183, "o", "p"]
38 | [6.246698, "o", " "]
39 | [6.535284, "o", "b"]
40 | [6.946225, "o", "_"]
41 | [7.203841, "o", "l"]
42 | [7.335918, "o", "e"]
43 | [7.383841, "o", "n"]
44 | [7.591174, "o", "\r\n"]
45 | [7.592125, "o", " 3 (free_uninit.c:8)\r\n"]
46 | [7.592178, "o", "spray> "]
47 | [8.071171, "o", "n"]
48 | [8.277766, "o", "\r\n"]
49 | [8.279863, "o", " 8 \u001b[0m\u001b[32mint\u001b[0m \u001b[0mb_len\u001b[0m \u001b[0m\u001b[33m=\u001b[0m \u001b[0mstrlen\u001b[0m(\u001b[0mb\u001b[0m)\u001b[0m;\u001b[0m\r\n 9\r\n 10 \u001b[0m\u001b[35mif\u001b[0m \u001b[0m(\u001b[0mb_len\u001b[0m \u001b[0m\u001b[33m>\u001b[0m \u001b[0m\u001b[34m0\u001b[0m)\u001b[0m \u001b[0m{\u001b[0m\r\n 11 -> \u001b[0mbuf\u001b[0m \u001b[0m\u001b[33m=\u001b[0m \u001b[0mmalloc\u001b[0m(\u001b[0ma_len\u001b[0m \u001b[0m\u001b[33m+\u001b[0m \u001b[0mb_len\u001b[0m \u001b[0m\u001b[33m+\u001b[0m \u001b[0m\u001b[34m1\u001b[0m)\u001b[0m;\u001b[0m\r\n 12 \u001b[0mstrcpy\u001b[0m(\u001b[0mbuf\u001b[0m,\u001b[0m \u001b[0ma\u001b[0m)\u001b[0m;\u001b[0m\r\n 13 \u001b[0mstrcpy\u001b[0m(\u001b[0mbuf\u001b[0m \u001b[0m\u001b[33m+\u001b[0m \u001b[0ma_len\u001b[0m,\u001b[0m \u001b[0mb\u001b[0m)\u001b[0m;\u001b[0m\r\n 14 \u001b[0mputs\u001b[0m(\u001b[0mbuf\u001b[0m)\u001b[0m;\u001b[0m\r\nspray> "]
50 | [8.703905, "o", "n"]
51 | [8.926853, "o", "\r\n"]
52 | [8.92877, "o", " 9\r\n 10 \u001b[0m\u001b[35mif\u001b[0m \u001b[0m(\u001b[0mb_len\u001b[0m \u001b[0m\u001b[33m>\u001b[0m \u001b[0m\u001b[34m0\u001b[0m)\u001b[0m \u001b[0m{\u001b[0m\r\n 11 \u001b[0mbuf\u001b[0m \u001b[0m\u001b[33m=\u001b[0m \u001b[0mmalloc\u001b[0m(\u001b[0ma_len\u001b[0m \u001b[0m\u001b[33m+\u001b[0m \u001b[0mb_len\u001b[0m \u001b[0m\u001b[33m+\u001b[0m \u001b[0m\u001b[34m1\u001b[0m)\u001b[0m;\u001b[0m\r\n 12 -> \u001b[0mstrcpy\u001b[0m(\u001b[0mbuf\u001b[0m,\u001b[0m \u001b[0ma\u001b[0m)\u001b[0m;\u001b[0m\r\n 13 \u001b[0mstrcpy\u001b[0m(\u001b[0mbuf\u001b[0m \u001b[0m\u001b[33m+\u001b[0m \u001b[0ma_len\u001b[0m,\u001b[0m \u001b[0mb\u001b[0m)\u001b[0m;\u001b[0m\r\n 14 \u001b[0mputs\u001b[0m(\u001b[0mbuf\u001b[0m)\u001b[0m;\u001b[0m\r\n 15 \u001b[0m}\u001b[0m \u001b[0m\u001b[35melse\u001b[0m \u001b[0m{\u001b[0m\r\n"]
53 | [8.928837, "o", "spray> "]
54 | [9.403846, "o", "p"]
55 | [9.457709, "o", " "]
56 | [9.712869, "o", "b"]
57 | [9.769724, "o", "u"]
58 | [9.886716, "o", "f"]
59 | [10.286159, "o", "\r\n"]
60 | [10.286619, "o", " 0x00000000004052a0 (free_uninit.c:6)\r\nspray> "]
61 | [11.224844, "o", "l"]
62 | [11.290902, "o", "e"]
63 | [11.479218, "o", "a"]
64 | [11.722201, "o", "v"]
65 | [11.794879, "o", "e"]
66 | [11.959731, "o", "\r\n"]
67 | [11.960012, "o", "foobar\r\n"]
68 | [11.961021, "o", " 21\r\n 22 \u001b[32mint\u001b[0m \u001b[0mmain\u001b[0m(\u001b[0m\u001b[32mvoid\u001b[0m)\u001b[0m \u001b[0m{\u001b[0m\r\n 23 \u001b[0mprint_concat\u001b[0m(\u001b[0m\u001b[31m\"foo\"\u001b[0m,\u001b[0m \u001b[0m\u001b[31m\"bar\"\u001b[0m)\u001b[0m;\u001b[0m\r\n 24 -> \u001b[0mprint_concat\u001b[0m(\u001b[0m\u001b[31m\"foo\"\u001b[0m,\u001b[0m \u001b[0m\u001b[31m\"\"\u001b[0m)\u001b[0m;\u001b[0m\r\n 25 \u001b[0m\u001b[35mreturn\u001b[0m \u001b[0m\u001b[34m0\u001b[0m;\u001b[0m\r\n 26 }\u001b[0m\r\n"]
69 | [11.961281, "o", "spray> "]
70 | [13.528316, "o", "d"]
71 | [13.786704, "o", "e"]
72 | [14.240303, "o", "e"]
73 | [14.807235, "o", "\rspray> de\u001b[0K\r\u001b[9C"]
74 | [15.023292, "o", "l"]
75 | [15.089166, "o", "e"]
76 | [15.176018, "o", "t"]
77 | [15.296072, "o", "e"]
78 | [15.395173, "o", " "]
79 | [15.527185, "o", "p"]
80 | [15.617162, "o", "r"]
81 | [15.724553, "o", "i"]
82 | [15.787081, "o", "n"]
83 | [15.835567, "o", "t"]
84 | [16.018626, "o", "_"]
85 | [16.213391, "o", "c"]
86 | [16.26728, "o", "o"]
87 | [16.35738, "o", "n"]
88 | [16.441781, "o", "c"]
89 | [16.550101, "o", "a"]
90 | [16.660493, "o", "t"]
91 | [16.86185, "o", "\r\n"]
92 | [16.862965, "o", "spray> "]
93 | [18.06518, "o", "s"]
94 | [18.160724, "o", "t"]
95 | [18.245, "o", "e"]
96 | [18.38572, "o", "p"]
97 | [18.649892, "o", "\r\n"]
98 | [18.652583, "o", " 2 \u001b[35m#include\u001b[0m \u001b[0m\u001b[0m\r\n 3 \u001b[35m#include\u001b[0m \u001b[0m\u001b[0m\r\n 4\r\n 5 -> \u001b[32mvoid\u001b[0m \u001b[0mprint_concat\u001b[0m(\u001b[0m\u001b[32mchar\u001b[0m \u001b[0m\u001b[33m*\u001b[0ma\u001b[0m,\u001b[0m \u001b[0m\u001b[32mchar\u001b[0m \u001b[0m\u001b[33m*\u001b[0mb\u001b[0m)\u001b[0m \u001b[0m{\u001b[0m\r\n 6 \u001b[0m\u001b[32mchar\u001b[0m \u001b[0m\u001b[33m*\u001b[0mbuf\u001b[0m;\u001b[0m\r\n 7 \u001b[0m\u001b[32mint\u001b[0m \u001b[0ma_len\u001b[0m \u001b[0m\u001b[33m=\u001b[0m \u001b[0mstrlen\u001b[0m(\u001b[0ma\u001b[0m)\u001b[0m;\u001b[0m\r\n 8 \u001b[0m\u001b[32mint\u001b[0m \u001b[0mb_len\u001b[0m \u001b[0m\u001b[33m=\u001b[0m \u001b[0mstrlen\u001b[0m(\u001b[0mb\u001b[0m)\u001b[0m;\u001b[0m\r\n"]
99 | [18.652682, "o", "spray> "]
100 | [19.647382, "o", "n"]
101 | [19.836063, "o", "\r\n"]
102 | [19.838181, "o", " 4\r\n 5 \u001b[19mvoid\u001b[0m \u001b[0mprint_concat\u001b[0m(\u001b[0m\u001b[19mchar\u001b[0m \u001b[0m\u001b[33m*\u001b[0ma\u001b[0m,\u001b[0m \u001b[0m\u001b[19mchar\u001b[0m \u001b[0m\u001b[33m*\u001b[0mb\u001b[0m)\u001b[0m \u001b[0m{\u001b[0m\r\n 6 \u001b[0m\u001b[19mchar\u001b[0m \u001b[0m\u001b[33m*\u001b[0mbuf\u001b[0m;\u001b[0m\r\n 7 -> \u001b[0m\u001b[19mint\u001b[0m \u001b[0ma_len\u001b[0m \u001b[0m\u001b[33m=\u001b[0m \u001b[0mstrlen\u001b[0m(\u001b[0ma\u001b[0m)\u001b[0m;\u001b[0m\r\n 8 \u001b[0m\u001b[19mint\u001b[0m \u001b[0mb_len\u001b[0m \u001b[0m\u001b[33m=\u001b[0m \u001b[0mstrlen\u001b[0m(\u001b[0mb\u001b[0m)\u001b[0m;\u001b[0m\r\n 9\r\n 10 \u001b[0m\u001b[35mif\u001b[0m \u001b[0m(\u001b[0mb_len\u001b[0m \u001b[0m\u001b[33m>\u001b[0m \u001b[0m\u001b[34m0\u001b[0m)\u001b[0m \u001b[0m{\u001b[0m\r\n"]
103 | [19.838321, "o", "spray> "]
104 | [20.139396, "o", "\r\n"]
105 | [20.141079, "o", " 5 \u001b[32mvoid\u001b[0m \u001b[0mprint_concat\u001b[0m(\u001b[0m\u001b[32mchar\u001b[0m \u001b[0m\u001b[20m*\u001b[0ma\u001b[0m,\u001b[0m \u001b[0m\u001b[32mchar\u001b[0m \u001b[0m\u001b[20m*\u001b[0mb\u001b[0m)\u001b[0m \u001b[0m{\u001b[0m\r\n 6 \u001b[0m\u001b[32mchar\u001b[0m \u001b[0m\u001b[20m*\u001b[0mbuf\u001b[0m;\u001b[0m\r\n 7 \u001b[0m\u001b[32mint\u001b[0m \u001b[0ma_len\u001b[0m \u001b[0m\u001b[20m=\u001b[0m \u001b[0mstrlen\u001b[0m(\u001b[0ma\u001b[0m)\u001b[0m;\u001b[0m\r\n 8 -> \u001b[0m\u001b[32mint\u001b[0m \u001b[0mb_len\u001b[0m \u001b[0m\u001b[20m=\u001b[0m \u001b[0mstrlen\u001b[0m(\u001b[0mb\u001b[0m)\u001b[0m;\u001b[0m\r\n 9\r\n 10 \u001b[0m\u001b[35mif\u001b[0m \u001b[0m(\u001b[0mb_len\u001b[0m \u001b[0m\u001b[20m>\u001b[0m \u001b[0m\u001b[34m0\u001b[0m)\u001b[0m \u001b[0m{\u001b[0m\r\n 11 \u001b[0mbuf\u001b[0m \u001b[0m\u001b[20m=\u001b[0m \u001b[0mmalloc\u001b[0m(\u001b[0ma_len\u001b[0m \u001b[0m\u001b[20m+\u001b[0m \u001b[0mb_len\u001b[0m \u001b[0m\u001b[20m+\u001b[0m \u001b[0m\u001b[34m1\u001b[0m)\u001b[0m;\u001b[0m\r\n"]
106 | [20.141214, "o", "spray> "]
107 | [20.316017, "o", "\r\n"]
108 | [20.317851, "o", " 7 \u001b[0m\u001b[32mint\u001b[0m \u001b[0ma_len\u001b[0m \u001b[0m\u001b[20m=\u001b[0m \u001b[0mstrlen\u001b[0m(\u001b[0ma\u001b[0m)\u001b[0m;\u001b[0m\r\n 8 \u001b[0m\u001b[32mint\u001b[0m \u001b[0mb_len\u001b[0m \u001b[0m\u001b[20m=\u001b[0m \u001b[0mstrlen\u001b[0m(\u001b[0mb\u001b[0m)\u001b[0m;\u001b[0m\r\n 9\r\n 10 -> \u001b[0m\u001b[35mif\u001b[0m \u001b[0m(\u001b[0mb_len\u001b[0m \u001b[0m\u001b[20m>\u001b[0m \u001b[0m\u001b[34m0\u001b[0m)\u001b[0m \u001b[0m{\u001b[0m\r\n 11 \u001b[0mbuf\u001b[0m \u001b[0m\u001b[20m=\u001b[0m \u001b[0mmalloc\u001b[0m(\u001b[0ma_len\u001b[0m \u001b[0m\u001b[20m+\u001b[0m \u001b[0mb_len\u001b[0m \u001b[0m\u001b[20m+\u001b[0m \u001b[0m\u001b[34m1\u001b[0m)\u001b[0m;\u001b[0m\r\n 12 \u001b[0mstrcpy\u001b[0m(\u001b[0mbuf\u001b[0m,\u001b[0m \u001b[0ma\u001b[0m)\u001b[0m;\u001b[0m\r\n 13 \u001b[0mstrcpy\u001b[0m(\u001b[0mbuf\u001b[0m \u001b[0m\u001b[20m+\u001b[0m \u001b[0ma_len\u001b[0m,\u001b[0m \u001b[0mb\u001b[0m)\u001b[0m;\u001b[0m\r\n"]
109 | [20.317964, "o", "spray> "]
110 | [20.679326, "o", "\r\n"]
111 | [20.681062, "o", " 13 \u001b[0mstrcpy\u001b[0m(\u001b[0mbuf\u001b[0m \u001b[0m\u001b[20m+\u001b[0m \u001b[0ma_len\u001b[0m,\u001b[0m \u001b[0mb\u001b[0m)\u001b[0m;\u001b[0m\r\n 14 \u001b[0mputs\u001b[0m(\u001b[0mbuf\u001b[0m)\u001b[0m;\u001b[0m\r\n 15 \u001b[0m}\u001b[0m \u001b[0m\u001b[35melse\u001b[0m \u001b[0m{\u001b[0m\r\n 16 -> \u001b[0mputs\u001b[0m(\u001b[0ma\u001b[0m)\u001b[0m;\u001b[0m\r\n 17 \u001b[0m}\u001b[0m\r\n 18\r\n 19 \u001b[0mfree\u001b[0m(\u001b[0mbuf\u001b[0m)\u001b[0m;\u001b[0m\r\n"]
112 | [20.68123, "o", "spray> "]
113 | [22.178929, "o", "p"]
114 | [22.227345, "o", " "]
115 | [22.730766, "o", "b"]
116 | [22.772284, "o", "u"]
117 | [22.901487, "o", "f"]
118 | [22.98863, "o", "\r\n"]
119 | [22.989354, "o", " 0x00000000004052a0 (free_uninit.c:6)\r\nspray> "]
120 | [23.937341, "o", "n"]
121 | [24.162253, "o", "\r\n"]
122 | [24.163273, "o", "foo\r\n"]
123 | [24.164034, "o", " 16 \u001b[0mputs\u001b[0m(\u001b[0ma\u001b[0m)\u001b[0m;\u001b[0m\r\n 17 \u001b[0m}\u001b[0m\r\n 18\r\n 19 -> \u001b[0mfree\u001b[0m(\u001b[0mbuf\u001b[0m)\u001b[0m;\u001b[0m\r\n 20 }\u001b[0m\r\n 21\r\n 22 \u001b[32mint\u001b[0m \u001b[0mmain\u001b[0m(\u001b[0m\u001b[32mvoid\u001b[0m)\u001b[0m \u001b[0m{\u001b[0m\r\n"]
124 | [24.164144, "o", "spray> "]
125 | [25.509229, "o", "n"]
126 | [25.716161, "o", "\r\n"]
127 | [25.717049, "o", "free(): double free detected in tcache 2\r\nChild was stopped by SIGABRT\r\n"]
128 | [25.717161, "o", "ERR: No source info for PC 0x00007ffff7e56884\r\nspray> "]
129 | [26.02117, "o", "\r\n"]
130 | [26.035574, "o", "\u001b[1m\u001b[7m%\u001b[27m\u001b[1m\u001b[0m \r \r"]
131 | [26.035654, "o", "\u001b]2;thasso@tatkx:~/Fun/spray/examples\u0007\u001b]1;..pray/examples\u0007"]
132 | [26.037403, "o", "\u001b]7;file://tatkx/home/thasso/Fun/spray/examples\u001b\\"]
133 | [26.047915, "o", "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J\u001b[31mtatkx\u001b[00m:\u001b[32mexamples\u001b[00m \u001b[34m(main*)\u001b[00m $ \u001b[K"]
134 | [26.047953, "o", "\u001b[?1h"]
135 | [26.04797, "o", "\u001b[?2004h"]
136 | [27.395422, "o", "\u001b[?2004l\r\r\n"]
137 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | **/a.out
2 | build/
3 | .cache
4 | compile_commands.json
5 | **/__pycache__/
6 | **/*.bin
7 | *~
8 | *.import.scm
9 | /libdwarf-code/
10 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "dependencies/hashmap.c"]
2 | path = dependencies/hashmap.c
3 | url = https://github.com/tidwall/hashmap.c.git
4 | [submodule "dependencies/linenoise"]
5 | path = dependencies/linenoise
6 | url = https://github.com/antirez/linenoise.git
7 | [submodule "dependencies/munit"]
8 | path = dependencies/munit
9 | url = https://github.com/nemequ/munit.git
10 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright 2023 Thassilo Schulze
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 |
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 |
7 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
8 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | CC = clang
2 | CFLAGS = -fsanitize=address -g -Werror -Wall -Wextra -pedantic-errors -Wno-gnu-designator -std=gnu11
3 | CPPFLAGS = -MMD -I$(SOURCE_DIR) -I$(DEP)/linenoise -I$(DEP)/hashmap.c
4 | LDFLAGS = -ldwarf -lchicken -lzstd -lz
5 |
6 | BUILD_DIR = build
7 | SOURCE_DIR = src
8 | DEP = dependencies
9 | SOURCES = $(wildcard $(SOURCE_DIR)/*.c)
10 | OBJECTS = $(patsubst $(SOURCE_DIR)/%.c, $(BUILD_DIR)/%.o, $(SOURCES))
11 | OBJECTS += $(BUILD_DIR)/hashmap.o $(BUILD_DIR)/linenoise.o $(BUILD_DIR)/print-source.o $(BUILD_DIR)/tokenize.o $(BUILD_DIR)/c-syntax.o
12 | BINARY = $(BUILD_DIR)/spray
13 | DEPS = $(OBJECTS:%.o=%.d)
14 |
15 | .PHONY = all bin clean run test unit integration assets install docker
16 |
17 | # === SPRAY ===
18 |
19 | all: $(BINARY) assets
20 | @echo Build successful 👍️
21 |
22 | run: all
23 | ./$(BINARY) $(args)
24 |
25 | install: $(BINARY)
26 | cp $(BINARY) $$HOME/.local/bin/
27 |
28 | docker: $(BINARY)
29 | docker create -i ubuntu
30 | docker cp $(BINARY) `docker ps -q -l`:/opt/spray
31 | docker start `docker ps -q -l`
32 | docker exec -i `docker ps -q -l` bash
33 |
34 | $(BINARY): $(OBJECTS)
35 | $(CC) $(CFLAGS) $(OBJECTS) -o $(BINARY) $(LDFLAGS)
36 |
37 | -include $(DEPS)
38 |
39 | # Wow, seems like CHICKEN is quite strict ...
40 | $(BUILD_DIR)/print_source.o: CFLAGS += -Wno-unused-parameter -Wno-strict-prototypes -Wno-pedantic -Wno-unused-but-set-variable -Wno-unused-variable
41 | $(BUILD_DIR)/print_source.o: CPPFLAGS += -I/usr/include/chicken
42 | $(BUILD_DIR)/print_source.o: $(SOURCE_DIR)/print_source.c | $(BUILD_DIR)
43 | $(CC) $(CFLAGS) $(CPPFLAGS) -c $< -o $@
44 |
45 | $(BUILD_DIR)/print-source.o: $(SOURCE_DIR)/print-source.scm $(BUILD_DIR)/tokenize.o | $(BUILD_DIR)
46 | csc -uses tokenizer -c -embedded $(SOURCE_DIR)/print-source.scm -o $@
47 |
48 | $(BUILD_DIR)/tokenize.o: $(SOURCE_DIR)/tokenize.scm $(BUILD_DIR)/c-syntax.o | $(BUILD_DIR)
49 | csc -uses c-syntax -unit tokenizer -c -J $(SOURCE_DIR)/tokenize.scm -o $@
50 |
51 | $(BUILD_DIR)/c-syntax.o: $(SOURCE_DIR)/c-syntax.scm | $(BUILD_DIR)
52 | csc -unit c-syntax -c -J $(SOURCE_DIR)/c-syntax.scm -o $@
53 |
54 | $(BUILD_DIR)/%.o: $(SOURCE_DIR)/%.c | $(BUILD_DIR)
55 | $(CC) $(CFLAGS) $(CPPFLAGS) -c $< -o $@
56 |
57 | $(BUILD_DIR)/hashmap.o: $(DEP)/hashmap.c/hashmap.c
58 | $(CC) $(CFLAGS) $(CPPFLAGS) -c $< -o $@
59 |
60 | $(BUILD_DIR)/linenoise.o: CFLAGS += -Wno-gnu-zero-variadic-macro-arguments
61 | $(BUILD_DIR)/linenoise.o: $(DEP)/linenoise/linenoise.c
62 | $(CC) $(CFLAGS) $(CPPFLAGS) -c $< -o $@
63 |
64 |
65 | $(BUILD_DIR):
66 | mkdir $(BUILD_DIR)
67 |
68 | # Clang's JSON compilation database.
69 | compile_commands.json:
70 | ifeq (, $(shell which bear))
71 | $(error "Bear is required to generate `compile_commands.json`. You can get it here: https://github.com/rizsotto/Bear.git.")
72 | else
73 | make clean
74 | bear -- make all
75 | endif
76 |
77 |
78 | # === TESTS ===
79 |
80 | TEST_SOURCE_DIR = tests
81 | TEST_BUILD_DIR = tests/build
82 | TEST_SOURCES = $(wildcard $(TEST_SOURCE_DIR)/*.c)
83 | TEST_OBJECTS = $(filter-out $(BUILD_DIR)/spray.o, $(OBJECTS))
84 | TEST_OBJECTS += $(patsubst $(TEST_SOURCE_DIR)/%.c, $(TEST_BUILD_DIR)/%.o, $(TEST_SOURCES))
85 | TEST_OBJECTS += $(TEST_BUILD_DIR)/munit.o
86 | TEST_DEPS = $(TEST_OBJECTS:%.o=%.d)
87 | TEST_BINARY = $(TEST_BUILD_DIR)/test
88 |
89 | # Run all tests.
90 | test: unit integration
91 |
92 | # Run C and Scheme unit tests.
93 | unit: cunit schemeunit
94 |
95 | cunit: CPPFLAGS += -I$(TEST_SOURCE_DIR) -I$(DEP)/munit
96 | cunit: $(TEST_BINARY) assets
97 | ./$(TEST_BINARY) $(args)
98 |
99 | schemeunit: assets
100 | csi -s tests/tokenize.scm
101 | csi -s tests/c-types.scm
102 |
103 | # Run integration tests.
104 | integration: $(BINARY) assets
105 | python -m pytest
106 |
107 | $(TEST_BINARY): $(TEST_OBJECTS)
108 | $(CC) $(CFLAGS) $(LDFLAGS) $(TEST_OBJECTS) -o $(TEST_BINARY)
109 |
110 | -include $(TEST_DEPS)
111 |
112 | $(TEST_BUILD_DIR)/%.o: $(TEST_SOURCE_DIR)/%.c | $(TEST_BUILD_DIR)
113 | $(CC) $(CFLAGS) $(CPPFLAGS) -c $< -o $@
114 |
115 | $(TEST_BUILD_DIR)/munit.o: $(DEP)/munit/munit.c
116 | $(CC) $(CFLAGS) $(CPPFLAGS) -c $< -o $@
117 |
118 | $(TEST_BUILD_DIR):
119 | mkdir $(TEST_BUILD_DIR)
120 |
121 | assets:
122 | $(MAKE) -C tests/assets all
123 |
124 | clean:
125 | $(RM) *.import.scm
126 | $(RM) -r $(BUILD_DIR) $(TEST_BUILD_DIR) compile_commands.json
127 | $(MAKE) -C tests/assets clean
128 |
129 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
🐛🐛🐛 Spray 🐛🐛🐛
3 |
4 | Get started -
5 | Issues -
6 | Bug report
7 |
8 |
9 |
10 | [](https://asciinema.org/a/620413)
11 |
12 | > You can watch a tiny demo of using Spray to interact with a running program here: https://youtu.be/mjwIrfQkURc
13 |
14 | Spray is a small debugger for C code that comes with minimal mental overhead. All functionality aims to be simple and easy to grasp.
15 |
16 | In Spray you can easily control the execution of running programs, and inspect and modify their state.
17 |
18 | I started work on Spray out of curiosity about the mysterious inner workings of debuggers. In addition, I want to explore ways in which debugging can be made more approachable.
19 |
20 | ## 🦾 Features
21 |
22 | - [x] Breakpoints on functions, on lines in files and on addresses
23 | - [x] Printing and setting variables, memory at addresses and registers
24 | - [x] C syntax highlighting
25 | - [x] Backtraces
26 | - [x] Instruction, function and line level stepping
27 | - [x] Filters to format command output
28 |
29 | ## 🚀 Roadmap
30 |
31 | - [ ] Printing and modifying complex structures
32 | - [ ] Syntax highlighting for complex structures
33 | - [ ] Backtraces based on DWARF instead of frame pointers
34 | - [ ] Inlined functions
35 | - [ ] Loading external libraries
36 | - [ ] Catching signals sent to the debugged program
37 |
38 | ## 💿️ Installation
39 |
40 | Parts of the Spray frontend are written in Scheme and embedded into the application using [CHICKEN Scheme](https://www.call-cc.org/) which compiles Scheme to C. Currently, you need to have [CHICKEN installed](https://code.call-cc.org/#download) to build Spray. In the future it's possible that the generated C files are provided instead so that you only need a C compiler.
41 |
42 | Spray depends on [libdwarf](https://github.com/davea42/libdwarf-code/releases)
43 | so if you want to build Spray, you need to install libdwarf first. Then, to install Spray you clone this repository and run `make`. Note the you
44 | have to [clone all the submodules](https://stackoverflow.com/a/4438292) too.
45 |
46 | ```sh
47 | git clone --recurse-submodules https://github.com/thass0/spray.git
48 | cd spray
49 | make
50 | ```
51 |
52 | The compiled binary is named `spray` and can be found in the `build` directory.
53 |
54 | To use `spray` as a regular command you need to [add it to your `$PATH`](https://askubuntu.com/a/322773).
55 |
56 | ## 🏃♀️ Running Spray
57 |
58 | > Ensure that the binary you want to debug has debug information enabled, i.e. it was compiled with the `-g` flag. Also, you should disable all compile-time optimizations to ensure the best output.
59 |
60 | > Spray is only tested using Clang. The debug information generated by different compilers for the same piece of code varies. Thus, `clang` should be used to compile the programs you want to debug using Spray.
61 |
62 | The first argument you pass to `spray` is the name of the binary that should be debugged (the debugee). All subsequent arguments are the arguments passed to the debugee.
63 |
64 | For example
65 |
66 | ```sh
67 | clang -g examples/free_uninit.c
68 | spray a.out
69 | ```
70 |
71 | starts a debugging session with the executable `a.out`.
72 |
73 | ## ⌨️ Commands
74 |
75 | Spray's REPL offers the following commands to interact with a running program.
76 |
77 | ### Reading and writing values
78 |
79 |
80 |
81 | Command |
82 | Argument(s) |
83 | Description |
84 |
85 |
86 | print , p |
87 | <variable> |
88 | Print the value of the runtime variable. |
89 |
90 |
91 | <register> |
92 | Print the value of the register. |
93 |
94 |
95 | <address> |
96 | Print the value of the program's memory at the address. |
97 |
98 |
99 | set , t |
100 | <variable> <value> |
101 | Set the value of the runtime variable. |
102 |
103 |
104 | <register> <value> |
105 | Set the value of the register. |
106 |
107 |
108 | <address> <value> |
109 | Set the value of the program's memory at the address. |
110 |
111 |
112 |
113 | Register names are prefixed with a `%`, akin to the AT&T assembly syntax. This avoids name conflicts between register names and variable names. For example, to read the value of `rax`, use `print %rax`. You can find a table of all available register names in `src/registers.h`.
114 |
115 | A `` can be a hexadecimal or a decimal number. The default is base 10 and hexadecimal will only be chosen if the literal contains a character that's exclusive to base 16 (i.e. one of a - f). You can prefix the literal with `0x` to explicitly use hexadecimal in cases where decimal would work as well.
116 |
117 | An `` is always a hexadecimal number. The prefix `0x` is again optional.
118 |
119 | ### Breakpoints
120 |
121 |
122 |
123 | Command |
124 | Argument(s) |
125 | Description |
126 |
127 |
128 | break , b |
129 | <function> |
130 | Set a breakpoint on the function. |
131 |
132 |
133 | <file>:<line> |
134 | Set a breakpoint on the line in the file. |
135 |
136 |
137 | <address> |
138 | Set a breakpoint on the address. |
139 |
140 |
141 | delete , d |
142 | <function> |
143 | Delete a breakpoint on the function. |
144 |
145 |
146 | <file>:<line> |
147 | Delete a breakpoint on the line in the file. |
148 |
149 |
150 | <address> |
151 | Delete a breakpoint on the address. |
152 |
153 |
154 | continue , c |
155 | |
156 | Continue execution until the next breakpoint. |
157 |
158 |
159 |
160 | It's possible that the location passed to `break`, `delete`, `print`, or `set` is both a valid function name and a valid hexadecimal address. For example, `add` could refer to a function called `add` and the number `0xadd`. In such a case, the default is to interpret the location as a function name. Use the prefix `0x` to explicitly specify an address.
161 |
162 | ### Stepping
163 |
164 | | Command | Description |
165 | |------------------|-----------------------------------------------------|
166 | | `next`, `n` | Go to the next line. Don't step into functions. |
167 | | `step`, `s` | Go to the next line. Step into functions. |
168 | | `leave`, `l` | Step out of the current function. |
169 | | `inst`, `i` | Step to the next instruction. |
170 | | `backtrace`, `a` | Print a backtrace starting at the current position. |
171 |
172 | ### Filters
173 |
174 | The `print` and `set` commands can be followed by a filter, to change how output is displayed. For example, if you want to inspect the binary data in the rdx register, you can enter `print %rdx | bin`.
175 |
176 | Filters are separated from the command by a pipe symbol: ` '|' `. Currently, only one filter can be used at a time.
177 |
178 | The following table shows how different filters format the same 64-bit word with the value 103.
179 |
180 |
181 | | Filter | Output |
182 | |-----------------------|---------------------------------------------------------------------------|
183 | | `dec` (*decimal*) | `103` |
184 | | `hex` (*hexadecimal*) | `0x67` |
185 | | `addr` (*address*) | `0x0000000000000067` |
186 | | `bits` | `00000000 00000000 00000000 00000000 00000000 00000000 00000000 01100111` |
187 | | `bytes` | `00 00 00 00 00 00 00 67` |
188 | | `deref` `*` | *Prints the value found at memory address `0x67`* |
189 |
190 | Except for `deref`, all the above simply change the way the output is formatted. `deref`, abbreviated as `*`, interprets the value that would be printed as a memory address, and prints whatever is found it memory that address. Using `deref`, you can inspect that value that a pointer points to.
191 |
192 | ## 🛠️Contributing
193 |
194 | All contributions are welcome. Before opening a pull request, please run
195 | the test suite locally to verify that your changes don't break any other
196 | features.
197 |
198 | It's possible that some of the tests fail due to off-by-one errors when
199 | making assertions about specific values found in the example binaries that
200 | are used in the tests. Refer to [this issue](https://github.com/thass0/spray/issues/2)
201 | for more details. You can ignore tests that fail for this reason only.
202 |
203 | ## 📖 References
204 |
205 | - Sy Brand's blog series [Writing a Linux Debugger](https://blog.tartanllama.xyz/writing-a-linux-debugger-setup/) on writing a debugger in C++
206 |
207 | - [The DWARF 5 standard](https://dwarfstd.org/dwarf5std.html)
208 |
209 | - [libdwarf's documentation](https://www.prevanders.net/libdwarfdoc/index.html)
210 |
211 | - Eli Bendersky's posts [How debuggers work](https://eli.thegreenplace.net/2011/01/23/how-debuggers-work-part-1)
212 |
--------------------------------------------------------------------------------
/examples/free_uninit.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 | void print_concat(char *a, char *b) {
6 | char *buf;
7 | int a_len = strlen(a);
8 | int b_len = strlen(b);
9 |
10 | if (b_len > 0) {
11 | buf = malloc(a_len + b_len + 1);
12 | strcpy(buf, a);
13 | strcpy(buf + a_len, b);
14 | puts(buf);
15 | } else {
16 | puts(a);
17 | }
18 |
19 | free(buf);
20 | }
21 |
22 | int main(void) {
23 | print_concat("foo", "bar");
24 | print_concat("foo", "");
25 | return 0;
26 | }
27 |
--------------------------------------------------------------------------------
/src/args.c:
--------------------------------------------------------------------------------
1 | #include "args.h"
2 |
3 | #include
4 | #include
5 | #include
6 |
7 | void
8 | print_help_message (const char *me)
9 | {
10 | if (me == NULL)
11 | {
12 | me = "spray";
13 | }
14 |
15 | fprintf (stderr,
16 | "usage: %s [-c | --no-color] file [arg1 ...]\n"
17 | "\n"
18 | " file The name of the executable file to debug\n"
19 | " arg1 ... Arguments passed to the executable to debug\n"
20 | " -c, --no-color Disable colored output\n"
21 | "\n"
22 | "Spray is a simple debugger for programs written in C.\n"
23 | "For the best output, programs should be compiled using\n"
24 | "Clang and with debug information enabled: clang -g foo.c.\n"
25 | "\n"
26 | "A description of the commands available in Spray's REPL and\n"
27 | "of how to use Spray can be found in the README.md file.\n"
28 | "\n"
29 | "spray \n",
30 | me);
31 | }
32 |
33 | const char *
34 | prog_name_arg (int argc, char **argv)
35 | {
36 | if (argc > 0 && argv != NULL)
37 | {
38 | return argv[0];
39 | }
40 | else
41 | {
42 | return NULL;
43 | }
44 | }
45 |
46 | /* Parse a flag starting with a single dash. Returns -1 on error. */
47 | int
48 | parse_short_flag (const char *flag, Flags *flags)
49 | {
50 | if (flag == NULL || flags == NULL)
51 | {
52 | return -1;
53 | }
54 |
55 | if (strcmp ("-c", flag) == 0)
56 | {
57 | flags->no_color = true;
58 | }
59 | else
60 | {
61 | return -1;
62 | }
63 |
64 | return 0;
65 | }
66 |
67 | /* Parse a flag starting with a double dash. Returns -1 on error. */
68 | int
69 | parse_long_flag (const char *flag, Flags *flags)
70 | {
71 | if (flag == NULL || flags == NULL)
72 | {
73 | return -1;
74 | }
75 |
76 | if (strcmp ("--no-color", flag) == 0)
77 | {
78 | flags->no_color = true;
79 | }
80 | else
81 | {
82 | return -1;
83 | }
84 |
85 | return 0;
86 | }
87 |
88 | /* Parse all flags in the command line arguments. Flags start with
89 | * either (1) a single dash followed by a single character or (2) a
90 | * double dash followed by a string. Parsing stops once one of the
91 | * given arguments doesn't fulfill either (1) or (2).
92 | * -1 is returned if the arguments contain invalid flags or the arguments
93 | * to this function are invalid. On success the number of arguments that
94 | * were processed thus far is returned. It is an error if there are no
95 | * arguments left after parsing all flags. */
96 | int
97 | parse_flags (int argc, char **argv, Flags *flags)
98 | {
99 | if (flags == NULL || argv == NULL)
100 | {
101 | return -1;
102 | }
103 |
104 | Flags flags_buf = { 0 };
105 | int res = 0;
106 | int i = 1; /* argv[0] is us. */
107 |
108 | for (; i < argc; i++)
109 | {
110 | if (strncmp (argv[i], "--", 2) == 0)
111 | {
112 | res = parse_long_flag (argv[i], &flags_buf);
113 | }
114 | else if (strncmp (argv[i], "-", 1) == 0)
115 | {
116 | res = parse_short_flag (argv[i], &flags_buf);
117 | }
118 | else
119 | {
120 | /* There are no more flags. */
121 | break;
122 | }
123 |
124 | /* Abort on error. */
125 | if (res == -1)
126 | {
127 | return -1;
128 | }
129 | }
130 |
131 | if (i == argc)
132 | {
133 | /* There must be more arguments than just flags. */
134 | return -1;
135 | }
136 |
137 | *flags = flags_buf;
138 |
139 | return i;
140 | }
141 |
142 | int
143 | parse_args (int argc, char **argv, Args *args)
144 | {
145 | if (argv == NULL || args == NULL)
146 | {
147 | return -1;
148 | }
149 |
150 | int flags_res = parse_flags (argc, argv, &args->flags);
151 | if (flags_res == -1)
152 | {
153 | return -1;
154 | }
155 | else
156 | {
157 | int file_idx = flags_res;
158 | args->file = argv[file_idx];
159 |
160 | /* Are there any arguments that should be
161 | * passed to the debugged executable? */
162 | if (file_idx + 1 < argc)
163 | {
164 | /* The arguments passed to the debugged
165 | * executable include its name. */
166 | args->args = argv + file_idx;
167 | args->n_args = argc - file_idx;
168 | }
169 | }
170 |
171 | return 0;
172 | }
173 |
174 | static Args GLOBAL_ARGS = { 0 };
175 |
176 | void
177 | set_args (const Args *args)
178 | {
179 | assert (args != NULL);
180 |
181 | GLOBAL_ARGS.flags = args->flags;
182 |
183 | /* Replace the filepath to the executable. */
184 | free (GLOBAL_ARGS.file);
185 | GLOBAL_ARGS.file = strdup (args->file);
186 |
187 | /* Replace the `args` array. */
188 | for (size_t i = 0; i < GLOBAL_ARGS.n_args; i++)
189 | {
190 | free (GLOBAL_ARGS.args[i]);
191 | }
192 | free (GLOBAL_ARGS.args);
193 |
194 | /* Allocate one pointer more than needed so that the
195 | * array is terminated by a NULL pointer. */
196 | GLOBAL_ARGS.args = calloc (args->n_args + 1, sizeof (char *));
197 | for (size_t i = 0; i < args->n_args; i++)
198 | {
199 | GLOBAL_ARGS.args[i] = strdup (args->args[i]);
200 | }
201 | }
202 |
203 | const Args *
204 | get_args (void)
205 | {
206 | return &GLOBAL_ARGS;
207 | }
208 |
--------------------------------------------------------------------------------
/src/args.h:
--------------------------------------------------------------------------------
1 | /* Command line arguments for spray. */
2 |
3 | #pragma once
4 |
5 | #ifndef _SPRAY_ARGS_H_
6 | #define _SPRAY_ARGS_H_
7 |
8 | #include
9 | #include
10 |
11 | typedef struct
12 | {
13 | bool no_color; /* -c, --no-color */
14 | } Flags;
15 |
16 | typedef struct
17 | {
18 | Flags flags;
19 | char *file; /* file */
20 | char **args; /* arg1 ... */
21 | size_t n_args;
22 | } Args;
23 |
24 | /* Get a pointer to the arguments set using `set_args`. The return
25 | * values are meaningful only after `set_args` was called once. */
26 | const Args *get_args (void);
27 |
28 | /* If `SET_ARGS_ONCE` is defined, extra utilities are declared that
29 | * allow retrieving and the storing the command line arguments. */
30 | #ifdef SET_ARGS_ONCE
31 |
32 | /* Parse all command line arguments in `argc` and `argv`. Returns -1 on error.
33 | * Pointers to data in `argv` are stored in `args`.*/
34 | int parse_args (int argc, char **argv, Args * args);
35 |
36 | /* Print the --help message. Defaults to the program name `spray`. */
37 | void print_help_message (const char *me);
38 |
39 | /* Get the name of the *this* program from the given command line arguments.
40 | * Can be used to get the program name for `print_help_message`. */
41 | const char *prog_name_arg (int argc, char **argv);
42 |
43 | /* Copy the given arguments so that they can be accessed from
44 | * anywhere in the program. Don't call this function if any
45 | * pointers returned by `get_args` are still alive. In general
46 | * it's best to call this function only once right after parsing
47 | * the arguments and then never again. */
48 | void set_args (const Args * args);
49 |
50 | #endif /* SET_ARGS_ONCE */
51 |
52 | #endif /* _SPRAY_ARGS_H_ */
53 |
--------------------------------------------------------------------------------
/src/backtrace.c:
--------------------------------------------------------------------------------
1 | #include "backtrace.h"
2 |
3 | #include "magic.h"
4 | #include "ptrace.h"
5 | #include "registers.h"
6 |
7 | #include
8 | #include
9 |
10 | typedef struct
11 | {
12 | dbg_addr pc;
13 | real_addr frame_pointer;
14 | struct
15 | {
16 | /* If `has_lineno` is false, `lineno` is meaningless.
17 | * Always check `has_lineno` before using `lineno`. */
18 | bool has_lineno;
19 | uint32_t lineno;
20 | };
21 | const char *function;
22 | } CallLocation;
23 |
24 | struct CallFrame
25 | {
26 | CallFrame *caller;
27 | CallLocation location;
28 | };
29 |
30 | CallFrame *
31 | init_call_frame (CallFrame *caller, dbg_addr pc,
32 | real_addr frame_pointer, DebugInfo *info)
33 | {
34 | const DebugSymbol *func_sym = sym_by_addr (pc, info);
35 |
36 | const char *func_name = NULL;
37 | const Position *pos = NULL;
38 |
39 | if (func_sym != NULL)
40 | {
41 | func_name = sym_name (func_sym, info);
42 | pos = sym_position (func_sym, info);
43 | }
44 |
45 | CallFrame *frame = malloc (sizeof (*frame));
46 | assert (frame != NULL);
47 |
48 | if (pos != NULL)
49 | {
50 | frame->location.has_lineno = true;
51 | frame->location.lineno = pos->line;
52 | }
53 | else
54 | {
55 | frame->location.has_lineno = false;
56 | }
57 |
58 | frame->caller = caller;
59 | frame->location.pc = pc;
60 | frame->location.frame_pointer = frame_pointer;
61 | frame->location.function = func_name;
62 |
63 | return frame;
64 | }
65 |
66 |
67 | /* Check if the first two instructions of the function that contains
68 | * the given PC store frame pointer. They should look like follows:
69 | *
70 | * 55 push %rbp
71 | * 48 89 e5 mov %rsp,%rbp
72 | *
73 | * This is the standard procedure to store the previous functions's
74 | * frame pointer and then set the current function's frame pointer
75 | * to the start of the frame (i.e. the stack pointer right at the
76 | * start of the function). If this isn't found, it's likely that
77 | * the compiler omitted the frame pointer so we should emit a warning. */
78 | bool
79 | stores_frame_pointer (dbg_addr pc, real_addr load_address,
80 | pid_t pid, DebugInfo *info)
81 | {
82 | const DebugSymbol *func = sym_by_addr (pc, info);
83 | if (func == NULL)
84 | {
85 | return false;
86 | }
87 |
88 | uint64_t inst_bytes = { 0 };
89 | real_addr func_start_addr = dbg_to_real (load_address,
90 | sym_start_addr (func));
91 | SprayResult mem_res = pt_read_memory (pid, func_start_addr, &inst_bytes);
92 | if (mem_res == SP_ERR)
93 | {
94 | return false;
95 | }
96 |
97 | inst_bytes &= 0xffffffff; /* Mask the least significant four bytes.
98 | * Those are executed first. */
99 | return inst_bytes == 0xe5894855;
100 | }
101 |
102 | /* NOTE: The following is a naive approach to getting a backtrace
103 | * which relies on the compiler emitting a frame pointer.
104 | * Try compiling again with `-fno-omit-frame-pointer` if
105 | * this doesn't work. */
106 |
107 | CallFrame *
108 | init_backtrace (dbg_addr pc,
109 | real_addr load_address, pid_t pid, DebugInfo *info)
110 | {
111 | assert (info != NULL);
112 |
113 | /* Get the saved base pointer of the caller. */
114 | real_addr frame_pointer = { 0 };
115 | SprayResult reg_res = get_register_value (pid, rbp, &frame_pointer.value);
116 | if (reg_res == SP_ERR)
117 | {
118 | return NULL;
119 | }
120 |
121 | if (!stores_frame_pointer (pc, load_address, pid, info))
122 | {
123 | printf ("WARN: it seems like this executable doesn't maintain a frame "
124 | "pointer.\n"
125 | " This results in incorrect or incomplete backtraces.\n"
126 | "HINT: Try to compile again with `-fno-omit-frame-pointer`.\n\n");
127 | }
128 |
129 | CallFrame *call_frame = init_call_frame (NULL, pc, frame_pointer, info);
130 |
131 | while (frame_pointer.value != 0)
132 | {
133 | /* Read the return address of the current function
134 | * and use it as the PC of the next function.
135 | * NOTE: This operation must be performed *before* the
136 | * frame pointer is updated. */
137 | SprayResult ret_res = pt_read_memory (pid,
138 | (real_addr) { frame_pointer.value
139 | + 8
140 | },
141 | &pc.value);
142 | if (ret_res == SP_ERR)
143 | {
144 | free_backtrace (call_frame);
145 | return NULL;
146 | }
147 |
148 | /* Read the frame pointer of the next function. */
149 | SprayResult fp_res =
150 | pt_read_memory (pid, frame_pointer, &frame_pointer.value);
151 | if (fp_res == SP_ERR)
152 | {
153 | free_backtrace (call_frame);
154 | return NULL;
155 | }
156 |
157 | call_frame = init_call_frame (call_frame, pc, frame_pointer, info);
158 | }
159 |
160 | return call_frame;
161 | }
162 |
163 | void
164 | free_backtrace (CallFrame *call_frame)
165 | {
166 | /* Recursively free all call frames. */
167 | while (call_frame != NULL)
168 | {
169 | CallFrame *caller = call_frame->caller;
170 | free (call_frame);
171 | call_frame = caller;
172 | }
173 | }
174 |
175 | void
176 | print_backtrace (CallFrame *call_frame)
177 | {
178 | printf ("How did we even get here? (backtrace)\n");
179 | if (call_frame == NULL)
180 | {
181 | printf ("\n");
182 | }
183 | else
184 | {
185 | CallLocation *location;
186 | while (call_frame != NULL)
187 | {
188 | location = &call_frame->location;
189 |
190 | printf (" " ADDR_FORMAT " ", location->pc.value);
191 |
192 | if (location->function)
193 | {
194 | printf ("%s", location->function);
195 | }
196 | else
197 | {
198 | printf (">");
199 | }
200 |
201 | if (location->has_lineno)
202 | {
203 | printf (":%u\n", location->lineno);
204 | }
205 | else
206 | {
207 | printf ("\n");
208 | }
209 |
210 | call_frame = call_frame->caller;
211 | }
212 | }
213 | }
214 |
--------------------------------------------------------------------------------
/src/backtrace.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #ifndef _SPRAY_BACKTRACE_H_
4 | #define _SPRAY_BACKTRACE_H_
5 |
6 | #include "info.h"
7 | #include "magic.h"
8 |
9 | typedef struct CallFrame CallFrame;
10 |
11 | /* Create a call frame starting at the code
12 | address `pc` and the current stack frame. */
13 | CallFrame *init_backtrace (dbg_addr pc,
14 | real_addr load_address,
15 | pid_t pid, DebugInfo * info);
16 |
17 | /* Print a backtrace starting at the given call frame. */
18 | void print_backtrace (CallFrame * start_frame);
19 |
20 | /* Delete the call frame. */
21 | void free_backtrace (CallFrame * call_frame);
22 |
23 | #endif /* _SPRAY_BACKTRACE_H_ */
24 |
--------------------------------------------------------------------------------
/src/breakpoints.c:
--------------------------------------------------------------------------------
1 | #include "breakpoints.h"
2 |
3 | #include "magic.h"
4 | #include "hashmap.h"
5 |
6 | #include
7 | #include
8 |
9 | typedef struct
10 | {
11 | real_addr addr; /* The address is the only member that's
12 | * used to compare and look up breakpoints. */
13 | bool is_enabled;
14 | uint8_t orig_data;
15 | } Breakpoint;
16 |
17 | struct Breakpoints
18 | {
19 | struct hashmap *map;
20 | pid_t pid;
21 | };
22 |
23 | int
24 | breakpoint_compare (const void *a, const void *b, void *udata)
25 | {
26 | unused (udata);
27 | const Breakpoint *breakpoint_a = (Breakpoint *) a;
28 | const Breakpoint *breakpoint_b = (Breakpoint *) b;
29 | /* `compare` assumes that strings are used in its implementation.
30 | * Mimicking `strcmp`, 0 is returned when the keys are equal. */
31 | return !(breakpoint_a->addr.value == breakpoint_b->addr.value);
32 | }
33 |
34 | uint64_t
35 | breakpoint_hash (const void *entry, uint64_t seed0, uint64_t seed1)
36 | {
37 | const Breakpoint *breakpoint = (Breakpoint *) entry;
38 | uint64_t addr = breakpoint->addr.value;
39 | return hashmap_sip (&addr, sizeof (addr), seed0, seed1);
40 | }
41 |
42 | Breakpoints *
43 | init_breakpoints (pid_t pid)
44 | {
45 | struct hashmap *map = hashmap_new (sizeof (Breakpoint), 0, 0, 0,
46 | breakpoint_hash, breakpoint_compare,
47 | NULL, NULL);
48 | Breakpoints *breakpoints = (Breakpoints *) calloc (1, sizeof (Breakpoints));
49 | breakpoints->map = map;
50 | breakpoints->pid = pid;
51 | return breakpoints;
52 | }
53 |
54 | void
55 | free_breakpoints (Breakpoints *breakpoints)
56 | {
57 | assert (breakpoints != NULL);
58 | hashmap_free (breakpoints->map);
59 | free (breakpoints);
60 | }
61 |
62 | bool
63 | lookup_breakpoint (Breakpoints *breakpoints, real_addr address)
64 | {
65 | assert (breakpoints != NULL);
66 |
67 | Breakpoint lookup = {.addr = address };
68 | const Breakpoint *check = hashmap_get (breakpoints->map, &lookup);
69 |
70 | /* Did we find an enabled breakpoint? */
71 | if (check != NULL && check->is_enabled)
72 | {
73 | return true;
74 | }
75 | else
76 | {
77 | return false;
78 | }
79 | }
80 |
81 | /* Wrapper to make internal breakpoint look-ups more comfortable. */
82 | const Breakpoint *
83 | get_breakpoint (Breakpoints *breakpoints, real_addr addr)
84 | {
85 | assert (breakpoints != NULL);
86 | return hashmap_get (breakpoints->map, &(Breakpoint) {.addr = addr});
87 | }
88 |
89 | SprayResult
90 | enable_breakpoint (Breakpoints *breakpoints, real_addr addr)
91 | {
92 | assert (breakpoints != NULL);
93 |
94 | const Breakpoint *to_enable = get_breakpoint (breakpoints, addr);
95 |
96 | /* Do we need to create the breakpoint first? */
97 | if (to_enable == NULL)
98 | {
99 | hashmap_set (breakpoints->map, &(Breakpoint) {.addr = addr});
100 | to_enable = get_breakpoint (breakpoints, addr);
101 | assert (to_enable != NULL);
102 | }
103 |
104 | /* Only enable the breakpoint if it's currently disabled.
105 | * Re-activating an already active breakpoint would delete the
106 | * original instructions that were overwritten to insert the trap. */
107 | if (!to_enable->is_enabled)
108 | {
109 | /* Read the word at `bp->addr` in the tracee's memory. */
110 | uint64_t word = { 0 };
111 | SprayResult res =
112 | pt_read_memory (breakpoints->pid, to_enable->addr, &word);
113 | if (res == SP_ERR)
114 | {
115 | return SP_ERR;
116 | }
117 |
118 | /* Save the original least significant byte. */
119 | uint64_t orig_data = (uint8_t) (word & BTM_BYTE_MASK);
120 |
121 | /* Set the least significant bytes to the instruction `int 3`.
122 | * When this instruction is executed, the tracee raises an
123 | * interrupt and it is sent a trap signal. Receiving this
124 | * signal stops it. */
125 | uint64_t int3_data = ((word & ~BTM_BYTE_MASK) | INT3);
126 |
127 | /* Write the trap to the tracee's memory. */
128 | res = pt_write_memory (breakpoints->pid, to_enable->addr, int3_data);
129 | if (res == SP_ERR)
130 | {
131 | return SP_ERR;
132 | }
133 |
134 | /* Update the entry in the hash map. All data belonging to
135 | * the breakpoint is updated here at once, after the memory write
136 | * to the tracee's memory has completed successfully. */
137 | Breakpoint updated = {
138 | .addr = to_enable->addr,
139 | .is_enabled = true,
140 | .orig_data = orig_data,
141 | };
142 | hashmap_set (breakpoints->map, &updated);
143 | }
144 |
145 | return SP_OK;
146 | }
147 |
148 | SprayResult
149 | disable_breakpoint (Breakpoints *breakpoints, real_addr addr)
150 | {
151 | assert (breakpoints != NULL);
152 |
153 | const Breakpoint *to_disable = get_breakpoint (breakpoints, addr);
154 |
155 | if (to_disable != NULL && to_disable->is_enabled)
156 | {
157 | /* `ptrace` only operates on whole words, so we need
158 | * to read what's currently there first, then replace the
159 | * modified low byte and write it to the address. */
160 |
161 | uint64_t modified_word = 0;
162 | SprayResult res =
163 | pt_read_memory (breakpoints->pid, to_disable->addr, &modified_word);
164 | if (res == SP_ERR)
165 | {
166 | return SP_ERR;
167 | }
168 |
169 | uint64_t restored_word =
170 | ((modified_word & ~BTM_BYTE_MASK) | to_disable->orig_data);
171 | res =
172 | pt_write_memory (breakpoints->pid, to_disable->addr, restored_word);
173 | if (res == SP_ERR)
174 | {
175 | return SP_ERR;
176 | }
177 |
178 | /* Update after the write succeeded. */
179 | Breakpoint disabled = {
180 | .addr = to_disable->addr,
181 | .is_enabled = false,
182 | .orig_data = to_disable->orig_data,
183 | };
184 | hashmap_set (breakpoints->map, &disabled);
185 | }
186 |
187 | return SP_OK;
188 | }
189 |
--------------------------------------------------------------------------------
/src/breakpoints.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #ifndef _SPRAY_BREAKPOINTS_H_
4 | #define _SPRAY_BREAKPOINTS_H_
5 |
6 | #include
7 | #include
8 |
9 | #include "ptrace.h"
10 |
11 | typedef struct Breakpoints Breakpoints;
12 |
13 | Breakpoints *init_breakpoints (pid_t pid);
14 |
15 | void free_breakpoints (Breakpoints * breakpoints);
16 |
17 | /* Enable the given breakpoint by replacing the
18 | * instruction at `addr` with `int 3` (0xcc).
19 | *
20 | * This will make the child receive a `SIGTRAP` once the
21 | * instruction at address `addr` is reached.
22 | *
23 | * The tracee's memory stays untouched if an error is returned. */
24 | SprayResult enable_breakpoint (Breakpoints * breakpoints, real_addr addr);
25 |
26 | /* Disable a breakpoint, restoring the original instruction.
27 | * Does nothing if there is no breakpoint at `addr`.
28 | *
29 | * On error, the tracee's memory stays untouched
30 | * and thus the breakpoints remains active. */
31 | SprayResult disable_breakpoint (Breakpoints * breakpoints, real_addr addr);
32 |
33 | /* Return `true` if there is a breakpoint at `addr` and
34 | * this breakpoint is enabled. Otherwise, if the breakpoint
35 | * doesn't exist or is disabled, return `false`. */
36 | bool lookup_breakpoint (Breakpoints * breakpoints, real_addr addr);
37 |
38 | #endif /* _SPRAY_BREAKPOINTS_H_ */
39 |
--------------------------------------------------------------------------------
/src/c-syntax.scm:
--------------------------------------------------------------------------------
1 | (module c-tokens *
2 | (import scheme
3 | (chicken base))
4 |
5 | ;;; Tokens in C source code.
6 | (define token-tag-other 'tt-other)
7 | (define token-tag-keyword 'tt-keyword)
8 | (define token-tag-operator 'tt-operator)
9 | (define token-tag-special-symbol 'tt-special-symbol)
10 | (define token-tag-constant 'tt-constant)
11 | (define token-tag-literal 'tt-literal)
12 | (define token-tag-identifier 'tt-identifier)
13 | (define token-tag-prim-type 'tt-type)
14 | (define token-tag-preproc-directive 'tt-preproc)
15 | (define token-tag-include-filepath 'tt-include-filepath)
16 | (define token-tag-comment 'tt-comment)
17 | (define token-tag-uncomment 'tt-uncomment)
18 | (define token-tag-trailing-uncomment 'tt-trailing-uncomment)
19 | (define token-tag-comment-text 'tt-comment-text)
20 | (define token-tag-whitespace 'tt-whitespace)
21 |
22 | (define (make-token text token-tag)
23 | (cons token-tag text))
24 |
25 | (define (make-token-list text token-tag)
26 | (list (make-token text token-tag)))
27 |
28 | (define (make-end-token)
29 | (make-token "" 'tt-end))
30 |
31 | (define (end-token? token)
32 | (and (equal? (token-text token) "")
33 | (eq? (token-tag token) 'tt-end)))
34 |
35 | (define (token-tag token)
36 | (if (pair? token)
37 | (car token)
38 | (error "token-tag, token must be a pair" token)))
39 |
40 | (define (token-text token)
41 | (if (pair? token)
42 | (cdr token)
43 | (error "token-text, token must be a pair" token)))
44 |
45 |
46 | ;;; Lists of meaningful string literals in C sources.
47 | (define C-keywords '("case" "default" "if" "else" "switch" "while"
48 | "do" "for" "goto" "continue" "break" "return"
49 | "struct" "union" "enum" "typedef" "extern"
50 | "static" "register" "auto" "const" "volatile"
51 | "restrict"))
52 | (define C-operators '(">>=" "<<=" "+=" "-=" "*=" "/=" "%=" "&=" "^=" "|="
53 | ">>" "<<" "++" "--" "->" "&&" "||" "<=" ">=" "==" "!="
54 | "=" "." "&" "!" "~" "-" "+" "*" "/" "%" "<" ">" "^"
55 | "|" "?" ":" "sizeof"))
56 | (define C-builtin-types '("char" "short" "int" "long" "signed"
57 | "unsigned" "float" "double" "void"))
58 | (define C-special-symbols '("(" ")" "[" "]" "{" "}" "," ";" "..."))
59 | (define C-comment '("/*"))
60 | (define C-uncomment '("*/"))
61 | (define C++-comment '("//"))
62 |
63 | ;; End of module c-tokens.
64 | )
65 |
66 |
67 | (module c-regex *
68 | (import scheme)
69 | (import regex) ; `regexp` and `string-search`.
70 |
71 | ;;; Regular expressions for scanning C code. They mostly
72 | ;;; resemble what's used in [this](https://www.lysator.liu.se/c/ANSI-C-grammar-l.html)
73 | ;;; scanner although some modifications were made.
74 |
75 | (define literal-regex (regexp "^\"([^\"\\\\]|\\\\[\\s\\S])*\""))
76 | (define whitespace-regex (regexp "^[\t\r\n ]*"))
77 | (define identifier-regex (regexp "^[a-zA-Z_][a-zA-Z_0-9]*"))
78 | (define hex-constant-regex (regexp "^0[xX][a-fA-F0-9]+(u|U|l|L)*"))
79 | (define octal-constant-regex (regexp "^0[0-7]+(u|U|l|L)*"))
80 | (define decimal-constant-regex (regexp "^[0-9][0-9]*(u|U|l|L)*"))
81 | (define char-constant-regex (regexp "^(u|U|l|L)*'(\\\\.|[^\\\\'])+'"))
82 | (define sci-constant-regex (regexp "^[1-9][0-9]*[Ee][+-]?[0-9]+"))
83 | ;; Floating point constants requiring fractional part.
84 | (define float-constant-regex-frac
85 | (regexp "^[0-9]*\\.[0-9]+([Ee][+-]?[0-9]+)?(f|F|l|L)?"))
86 | ;; Floating point constants requiring whole number part.
87 | (define float-constant-regex-whole
88 | (regexp "^[0-9]+\\.[0-9]*([Ee][+-]?[0-9]+)?(f|F|l|L)?"))
89 | ;; A preprocessor directive. Optionally also matches the
90 | ;; ``/`"filename"` part of `#include`s.
91 | (define preproc-directive-regex
92 | (regexp "^(#[a-z_]+)([ \t]*)([<\"]([^>\"\\\\]|\\\\[\\s\\S])*[>\"])?"))
93 | (define comment-text-regex (regexp "^(\\*(?!\\/)|[^*])*"))
94 | (define line-comment-text-regex (regexp "^[^\n]*"))
95 | ;; Match anything that's not whitespace.
96 | ;; Used to recover from invalid pieces of syntax.
97 | (define any-regex (regexp "^[^ \n\t\r]*"))
98 | ;; Check that the given string starts with a keyword and
99 | ;; that the keyword is terminated by whitespace (i.e.
100 | ;; the given string doesn't continue with more characters).
101 | (define keyword-regex (regexp "^(case|default|if|else|switch|while|do|for|goto|continue|break|return|struct|union|enum|typedef|extern|static|register|auto|const|volatile|restrict)[\n\t ]"))
102 |
103 | ;;; Does `str` match `regex`?
104 | (define (regex-match? regex str)
105 | (let ((search-result (string-search regex str)))
106 | (and (pair? search-result)
107 | (not (equal? (car search-result) "")))))
108 |
109 | ;;; Return the full match of `str` and `regex`.
110 | (define (full-match regex str)
111 | (car (string-search regex str)))
112 |
113 | ;;; Reexport string-search -- `regex-matches`
114 | ;;; returns a list of all the matches.
115 | (define regex-matches string-search)
116 |
117 | ;; End of module c-regex.
118 | )
119 |
120 |
121 | (module c-types (make-types-env
122 | is-type-in-env?)
123 | (import scheme
124 | (chicken base)
125 | (srfi-69)
126 | (only (srfi-1) find)
127 | c-tokens)
128 |
129 | ;; From the C reference grammar (https://www.lysator.liu.se/c/ANSI-C-grammar-y.html):
130 | ;;
131 | ;; type_specifier
132 | ;; : VOID
133 | ;; ...
134 | ;; | UNSIGNED
135 | ;; | struct_or_union_specifier
136 | ;; | enum_specifier
137 | ;; | TYPE_NAME
138 | ;; ;
139 | ;;
140 | ;; struct_or_union_specifier
141 | ;; : struct_or_union IDENTIFIER '{' struct_declaration_list '}'
142 | ;; | struct_or_union '{' struct_declaration_list '}'
143 | ;; | struct_or_union IDENTIFIER
144 | ;; ;
145 | ;;
146 | ;; struct_or_union
147 | ;; : STRUCT
148 | ;; | UNION
149 | ;; ;
150 | ;; ...
151 | ;; enum_specifier
152 | ;; : ENUM '{' enumerator_list '}'
153 | ;; | ENUM IDENTIFIER '{' enumerator_list '}'
154 | ;; | ENUM IDENTIFIER
155 | ;; ;
156 | ;;
157 | ;; This tells us that every time we see the any of the keywords
158 | ;; struct, union or enum, we just need to check if the next token
159 | ;; is an identifier or a opening bracket. If it's an identifier, we
160 | ;; can store that as the name of a type. Otherwise the type
161 | ;; being declared is anonymous, so we can ignore it.
162 |
163 | ;;; Is `token` a keyword token that's a type specifier?
164 | (define (type-spec-keyword? token)
165 | (and (eq? (token-tag token)
166 | token-tag-keyword)
167 | (let ((keyword (token-text token)))
168 | (or (equal? keyword "struct")
169 | (equal? keyword "enum")
170 | (equal? keyword "union")))))
171 |
172 | ;;; Store the identifier of the first type specification
173 | ;;; found in `tokens` in `env`.
174 | (define (store-type-spec! env tokens)
175 | (let loop ((tokens tokens))
176 | (if (pair? tokens)
177 | (cond ((eq? (token-tag (car tokens))
178 | token-tag-identifier)
179 | ;; Save the type's identifier.
180 | (set-cdr! env (cons (token-text (car tokens))
181 | (cdr env))))
182 | ((eq? (token-tag (car tokens))
183 | token-tag-whitespace)
184 | ;; Skip whitespace.
185 | (loop (cdr tokens)))
186 | ((type-spec-keyword? (car tokens))
187 | ;; Skip type specifiers.
188 | (loop (cdr tokens)))))))
189 |
190 | (define (store-typedef! env tokens)
191 | ;; TODO
192 | '())
193 |
194 | ;;; Return a list of sublists extracted from `tokens`
195 | ;;; that begin with a keyword token.
196 | (define (filter-keywords tokens)
197 | (if (null? tokens)
198 | '()
199 | (let loop ((tokens tokens)
200 | (keyword-tokens '()))
201 | (if (null? tokens)
202 | keyword-tokens
203 | (loop
204 | (cdr tokens)
205 | (if (eq? (token-tag (car tokens))
206 | token-tag-keyword)
207 | (cons tokens keyword-tokens)
208 | keyword-tokens))))))
209 |
210 | (define (make-types-env token-lines . token-lines-lst)
211 | (define (extend-types-env env token-lines)
212 | (for-each
213 | (lambda (tokens)
214 | (let ((token (car tokens)))
215 | (cond ((type-spec-keyword? token)
216 | (store-type-spec! env tokens))
217 | ((equal? (token-text token)
218 | "typedef")
219 | (store-typedef! env tokens)))))
220 | (filter-keywords (flatten token-lines))))
221 | (let ((env (list '*env*)))
222 | (extend-types-env env token-lines)
223 | (for-each
224 | (lambda (token-lines)
225 | (extend-types-env env token-lines))
226 | token-lines-lst)
227 | env))
228 |
229 | (define (types-env? env)
230 | (and (pair? env)
231 | (eq? '*env* (car env))))
232 |
233 | (define (is-type-in-env? env type)
234 | (if (types-env? env)
235 | (find (lambda (type-identifier)
236 | (equal? type-identifier type))
237 | (cdr env))
238 | #f))
239 | ;; End of module c-types.
240 | )
241 |
--------------------------------------------------------------------------------
/src/debugger.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #ifndef _SPRAY_DEBUGGER_H_
4 | #define _SPRAY_DEBUGGER_H_
5 |
6 | /* Required to use `sigabbrev_np` */
7 | #define _GNU_SOURCE
8 |
9 | #include
10 |
11 | #include "breakpoints.h"
12 | #include "history.h"
13 | #include "info.h"
14 |
15 | typedef struct
16 | {
17 | const char *prog_name; /* Tracee program name. */
18 | pid_t pid; /* Tracee pid. */
19 | Breakpoints *breakpoints; /* Breakpoints. */
20 | DebugInfo *info; /* Debug information about the tracee. */
21 | real_addr load_address; /* Load address. Set for PIEs, 0 otherwise. */
22 | History history; /* Command history of recent commands. */
23 | } Debugger;
24 |
25 | /* Setup a debugger. This forks the child process, launches
26 | * and immediately stops it.
27 | *
28 | * On success, `dbg` is modified to accommodate the changes.
29 | *
30 | On error, `dbg` stays untouched, and `-1` is returned. */
31 | int setup_debugger (const char *prog_name, char *prog_argv[], Debugger * dbg);
32 |
33 | /* Run the debugger. Starts debugging at the beginning
34 | * of the `main` function of the child process.
35 | *
36 | * Call `setup_debugger` on `dbg` before calling this function.
37 | * After `run_debugger` returns, `dbg` is still allocated and
38 | * must be deleted using `del_debugger`. */
39 | void run_debugger (Debugger dbg);
40 |
41 | /* Free memory allocated by the debugger. Returns
42 | * `SP_ERR` if some resource couldn't be deleted. */
43 | SprayResult del_debugger (Debugger dbg);
44 |
45 | #ifdef UNIT_TESTS
46 |
47 | typedef enum
48 | {
49 | EXEC_SIG_EXITED,
50 | EXEC_SIG_KILLED,
51 | EXEC_SIG_CONT,
52 | EXEC_SIG_STOPPED,
53 | EXEC_NONE, /* No additionally information. */
54 | } ExecOkCode;
55 |
56 | typedef enum
57 | {
58 | EXEC_CONT_DEAD,
59 | EXEC_INVALID_WAIT_STATUS,
60 | EXEC_FUNCTION_NOT_FOUND,
61 | EXEC_SET_BREAKPOINTS_FAILED,
62 | EXEC_PC_LINE_NOT_FOUND,
63 | EXEC_STEP,
64 | } ExecErrCode;
65 |
66 | typedef struct
67 | {
68 | SprayResult type;
69 | union
70 | {
71 | ExecOkCode ok;
72 | ExecErrCode err;
73 | } code;
74 | union
75 | {
76 | struct
77 | {
78 | int signo;
79 | int code; /* `si_code` field of `siginfo_t` struct. */
80 | } signal; /* Set for `EXEC_KILLED` and `EXEC_STOPPED`. */
81 | int exit_code; /* Set for `EXEC_EXITED`. */
82 | int wait_status; /* Set for `EXEC_INVALID_WAIT_STATUS`. */
83 | } data;
84 | } ExecResult;
85 |
86 | ExecResult continue_execution (Debugger * dbg);
87 | ExecResult wait_for_signal (Debugger * dbg);
88 |
89 | #endif /* UNIT_TESTS */
90 |
91 | #endif /* _SPRAY_DEBUGGER_H_ */
92 |
--------------------------------------------------------------------------------
/src/history.c:
--------------------------------------------------------------------------------
1 | #include "history.h"
2 |
3 | #include
4 | #include
5 |
6 | struct History
7 | {
8 | char *command;
9 | };
10 |
11 | History
12 | init_history (void)
13 | {
14 | History history = (History) calloc (1, sizeof (struct History));
15 | assert (history != NULL);
16 | return history;
17 | }
18 |
19 | void
20 | free_history (History history)
21 | {
22 | if (history != NULL)
23 | {
24 | free (history->command);
25 | free (history);
26 | }
27 | }
28 |
29 | void
30 | save_command (History history, const char *line)
31 | {
32 | assert (history != NULL);
33 | assert (line != NULL);
34 |
35 | history->command = (char *) realloc (history->command, strlen (line) + 1);
36 | strcpy (history->command, line);
37 | }
38 |
39 | SprayResult
40 | read_command (History history, char **line)
41 | {
42 | assert (history != NULL);
43 | assert (line != NULL);
44 | if (history->command == NULL)
45 | {
46 | return SP_ERR;
47 | }
48 | else
49 | {
50 | *line = strdup (history->command);
51 | return SP_OK;
52 | }
53 | }
54 |
--------------------------------------------------------------------------------
/src/history.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #ifndef _SPRAY_HISTORY_H_
4 | #define _SPRAY_HISTORY_H_
5 |
6 | #include "magic.h"
7 |
8 | #include
9 |
10 | typedef struct History *History;
11 |
12 | History init_history (void);
13 |
14 | void free_history (History history);
15 |
16 | void save_command (History history, const char *line);
17 |
18 | /* Copys the command to `line` (if there is a command)
19 | * using `malloc`. The caller must free the copy. */
20 | SprayResult read_command (History history, char **line);
21 |
22 | #endif /* _SPRAY_HISTORY_H_ */
23 |
--------------------------------------------------------------------------------
/src/info.h:
--------------------------------------------------------------------------------
1 | /* Information about the executable that's being debugged. */
2 |
3 | #pragma once
4 |
5 | #ifndef _SPRAY_INFO_H_
6 | #define _SPRAY_INFO_H_
7 |
8 | #include "breakpoints.h"
9 | #include "registers.h"
10 | #include
11 |
12 | typedef struct DebugInfo DebugInfo;
13 |
14 | /* Initialize debugging information. Returns NULL on error. */
15 | DebugInfo *init_debug_info (const char *filepath);
16 |
17 | /* Free the given `DebugInfo` instance. Any pointer
18 | * to an object returned from a function in this file
19 | * becomes invalid if the `DebugInfo` instance given
20 | * to that function is deleted.
21 | * Returns `SP_ERR` if some resources couldn't be deleted. */
22 | SprayResult free_debug_info (DebugInfo ** infop);
23 |
24 | /* A symbol in the executable that's being debugged. */
25 | typedef struct DebugSymbol DebugSymbol;
26 |
27 | /* Get a debug symbol by its name. Returns NULL on error. */
28 | const DebugSymbol *sym_by_name (const char *name, DebugInfo * info);
29 |
30 | /* Get a debug symbol by an address that belongs to it.
31 | * Returns NULL on error. */
32 | const DebugSymbol *sym_by_addr (dbg_addr addr, DebugInfo * info);
33 |
34 | /* Get the name of the given symbol. Returns NULL if there is no name. */
35 | const char *sym_name (const DebugSymbol * sym, const DebugInfo * info);
36 |
37 | /* Get the address at which the code of the first line
38 | * of the given function starts. Returns `SP_ERR` and
39 | * leaves `addr` untouched if the symbol doesn't refer
40 | * to a function. */
41 | SprayResult function_start_addr (const DebugSymbol * func,
42 | const DebugInfo * info, dbg_addr * addr);
43 |
44 | /* Get the start address (low PC) of the given symbol. */
45 | dbg_addr sym_start_addr (const DebugSymbol * sym);
46 |
47 | /* Get the end address (high PC) of the given symbol. */
48 | dbg_addr sym_end_addr (const DebugSymbol * sym);
49 |
50 | /* Get the address of the given symbol. Returns the same address
51 | * as `sym_start_addr` if the symbol was created from a name. */
52 | dbg_addr sym_addr (const DebugSymbol * sym);
53 |
54 | /* Get the filepath of the source file that belongs to the symbol.
55 | * The string that's returned is owned and later deleted by `info`. */
56 | const char *sym_filepath (const DebugSymbol * sym, const DebugInfo * info);
57 |
58 | /* A position in a source file. */
59 | typedef struct Position
60 | {
61 | uint32_t line;
62 | uint32_t column;
63 | /* `true` if this position perfectly matches the symbol used to
64 | * retrieve it. Otherwise this position only represents the closest
65 | * location to describe the symbol with. */
66 | bool is_exact;
67 | } Position;
68 |
69 | /* Returns the position of the symbol in the source file
70 | * that belongs to the symbol. NULL is returned if no
71 | * such position could be retrieved. */
72 | const Position *sym_position (const DebugSymbol * sym,
73 | const DebugInfo * info);
74 |
75 | /* Return the position that belongs to the given address.
76 | * Returns NULL on error. */
77 | const Position *addr_position (dbg_addr addr, DebugInfo * info);
78 |
79 | /* Returns the function name that belongs to the given address.
80 | * Returns NULL on error. */
81 | const char *addr_name (dbg_addr addr, DebugInfo * info);
82 |
83 | /* Returns the filepath that belongs to the given address.
84 | * Returns NULL on error. */
85 | const char *addr_filepath (dbg_addr addr, DebugInfo * info);
86 |
87 | /* The following function don't fit the regular scheme of
88 | * this interface. They are currently required by might
89 | * be incorporated in a generic interface later. */
90 |
91 | /* Returns the address that belongs to the given filepath and line number.
92 | * `SP_ERR` is returned if no such address could be found and `addr`
93 | * stays untouched. */
94 | SprayResult addr_at (const char *filepath,
95 | uint32_t lineno,
96 | const DebugInfo * info, dbg_addr * addr);
97 |
98 | /* Is this a dynamic executable which is relocated? */
99 | bool is_dyn_exec (const DebugInfo * info);
100 |
101 | /* Set breakpoints required to step over the line referred to by `func`.
102 | * On error `SP_ERR` is returned and nothing has to be deleted. */
103 | SprayResult set_step_over_breakpoints (const DebugSymbol * func,
104 | const DebugInfo * info,
105 | real_addr load_address,
106 | Breakpoints * breakpoints,
107 | real_addr ** to_del, size_t *n_to_del);
108 |
109 | /* Information about runtime variables.
110 | *
111 | * This includes a description of where to find this variable
112 | * in the memory of the running debugee process, the path to
113 | * the file where the variable is declared and line number in
114 | * this file.
115 | *
116 | * It also includes information on the type of the variable.
117 | * This way, the value of the variable can be printed
118 | * according to the type.
119 | *
120 | * It does not include the name the variable is declared as.
121 | * The name should be easily accessible since it's required
122 | * to create an instance of `RuntimeVariable`. */
123 | typedef struct RuntimeVariable RuntimeVariable;
124 |
125 | /* Return the location of the variable as a runtime address.
126 | * The return value is meaningless if `is_addr_loc == false`.
127 | * Check that that's not the case first! */
128 | real_addr var_loc_addr (const RuntimeVariable *var);
129 |
130 | /* Return the location of the variable as a register.
131 | * The return value is meaningless if `is_reg_loc == false`.
132 | * Check that that's not the case first! */
133 | x86_reg var_loc_reg (const RuntimeVariable *var);
134 |
135 | /* Check the type of a location description. */
136 | bool is_addr_loc (const RuntimeVariable *var);
137 | bool is_reg_loc (const RuntimeVariable *var);
138 |
139 | /* Return the path of the file and the line number in the file
140 | * where the variable described by `var` was declared.
141 | *
142 | * Both of them are optional. `0` indicates that there is no
143 | * line number (since line numbers start at 1!), and `NULL` is
144 | * returned if there is no path. */
145 | const char *var_loc_path (const RuntimeVariable *var);
146 | unsigned var_loc_line (const RuntimeVariable *var);
147 |
148 | /* Print the path and the line of the given variable into
149 | * a string that's returned. The caller should free the string.
150 | *
151 | * This function uses the values as `var_loc_path` and
152 | * `var_loc_line` return.
153 | *
154 | * `var` must not be `NULL`. */
155 | char *print_var_loc (const RuntimeVariable *var);
156 |
157 | /* Use the type of the variable and return it's value in a string.
158 | * The caller should free the string. */
159 | char *print_var_value (const RuntimeVariable *var, uint64_t value,
160 | FormatFilter filter);
161 |
162 | /* Same as `print_var_value`, but `value` is the result of dereferencing
163 | * the given variable, and not the value of the variable by itself. The
164 | * caller should free the given string. */
165 | char *print_var_deref_value (const RuntimeVariable *var, uint64_t deref,
166 | FormatFilter filter);
167 |
168 | /* Mask off all bits of value that are not part of the type's value.
169 | * E.g. only the LSB is returned for `char`s. */
170 | uint64_t mask_var_value (const RuntimeVariable *var, uint64_t value);
171 |
172 | /* Get the location of the variable with the
173 | * given name in the scope around `pc`.
174 | *
175 | * On success, a new heap-allocated location is returned.
176 | * This location must be manually `free`'d (TODO: make
177 | * happen automatically when `info` is destroyed).
178 | *
179 | * `NULL` is returned on error. */
180 | RuntimeVariable *init_var (dbg_addr pc,
181 | real_addr load_address,
182 | const char *var_name,
183 | pid_t pid, const DebugInfo * info);
184 |
185 | /* Delete a `RuntimeVariable` pointer as returned by `init_var`. */
186 | void del_var (RuntimeVariable *var);
187 |
188 | #endif /* _SPRAY_INFO_H_ */
189 |
--------------------------------------------------------------------------------
/src/magic.c:
--------------------------------------------------------------------------------
1 | #include "magic.h"
2 |
3 | #include
4 | #include
5 | #include
6 | #include /* `getcwd` */
7 | #include
8 | #include
9 | #include /* `PATH_MAX` */
10 | #include
11 |
12 | unsigned
13 | n_digits (double num)
14 | {
15 | if (num == 0)
16 | {
17 | return 1; /* Zero has one digit when written out. */
18 | }
19 | else
20 | {
21 | return ((unsigned) floor (log10 (fabs (num)))) + 1;
22 | }
23 | }
24 |
25 | void
26 | indent_by (unsigned n_spaces)
27 | {
28 | for (unsigned i = 0; i < n_spaces; i++)
29 | {
30 | printf (" ");
31 | }
32 | }
33 |
34 | bool
35 | str_eq (const char *restrict a, const char *restrict b)
36 | {
37 | return strcmp (a, b) == 0;
38 | }
39 |
40 | dbg_addr
41 | real_to_dbg (real_addr offset, real_addr real)
42 | {
43 | return (dbg_addr) {real.value - offset.value};
44 | }
45 |
46 | real_addr
47 | dbg_to_real (real_addr offset, dbg_addr dwarf)
48 | {
49 | return (real_addr) {dwarf.value + offset.value};
50 | }
51 |
52 | FormatFilter
53 | parse_format (const char *str)
54 | {
55 | if (str != NULL)
56 | {
57 | if (str_eq (str, "hex"))
58 | {
59 | return FMT_HEX;
60 | }
61 | else if (str_eq (str, "bits"))
62 | {
63 | return FMT_BITS;
64 | }
65 | else if (str_eq (str, "addr"))
66 | {
67 | return FMT_ADDR;
68 | }
69 | else if (str_eq (str, "dec"))
70 | {
71 | return FMT_DEC;
72 | }
73 | else if (str_eq (str, "bytes"))
74 | {
75 | return FMT_BYTES;
76 | }
77 | else
78 | {
79 | return FMT_NONE;
80 | }
81 | }
82 | else
83 | {
84 | return FMT_NONE;
85 | }
86 | }
87 |
88 | FormatFilter
89 | default_format (FormatFilter current, FormatFilter _default)
90 | {
91 | if (current == FMT_NONE)
92 | {
93 | return _default;
94 | }
95 | else
96 | {
97 | return current;
98 | }
99 | }
100 |
101 | /* Macros for printing binary numbers. https://stackoverflow.com/a/25108449 */
102 | #define PRINTF_BITS_PATTERN_INT8 "%c%c%c%c%c%c%c%c"
103 | #define PRINTF_BITS_INT8(i) \
104 | (((i) & 0x80ll) ? '1' : '0'), \
105 | (((i) & 0x40ll) ? '1' : '0'), \
106 | (((i) & 0x20ll) ? '1' : '0'), \
107 | (((i) & 0x10ll) ? '1' : '0'), \
108 | (((i) & 0x08ll) ? '1' : '0'), \
109 | (((i) & 0x04ll) ? '1' : '0'), \
110 | (((i) & 0x02ll) ? '1' : '0'), \
111 | (((i) & 0x01ll) ? '1' : '0')
112 |
113 | #define PRINTF_BITS_PATTERN_INT16 \
114 | PRINTF_BITS_PATTERN_INT8 " " PRINTF_BITS_PATTERN_INT8
115 | #define PRINTF_BITS_PATTERN_INT32 \
116 | PRINTF_BITS_PATTERN_INT16 " " PRINTF_BITS_PATTERN_INT16
117 | #define PRINTF_BITS_PATTERN_INT64 \
118 | PRINTF_BITS_PATTERN_INT32 " " PRINTF_BITS_PATTERN_INT32
119 |
120 | #define PRINTF_BITS_INT16(i) \
121 | PRINTF_BITS_INT8((i) >> 8), PRINTF_BITS_INT8(i)
122 | #define PRINTF_BITS_INT32(i) \
123 | PRINTF_BITS_INT16((i) >> 16), PRINTF_BITS_INT16(i)
124 | #define PRINTF_BITS_INT64(i) \
125 | PRINTF_BITS_INT32((i) >> 32), PRINTF_BITS_INT32(i)
126 |
127 | /* Macros for printing bytes made up of two hexadecimal digits each. */
128 | #define PRINTF_BYTES_PATTERN_INT8 "%02hx"
129 | #define PRINTF_BYTES_PATTERN_INT16 \
130 | PRINTF_BYTES_PATTERN_INT8 " " PRINTF_BYTES_PATTERN_INT8
131 | #define PRINTF_BYTES_PATTERN_INT32 \
132 | PRINTF_BYTES_PATTERN_INT16 " " PRINTF_BYTES_PATTERN_INT16
133 | #define PRINTF_BYTES_PATTERN_INT64 \
134 | PRINTF_BYTES_PATTERN_INT32 " " PRINTF_BYTES_PATTERN_INT32
135 | #define PRINTF_BYTES_INT16(i) ((uint8_t)((i) >> 8) & 0xff), ((uint8_t)(i) & 0xff)
136 | #define PRINTF_BYTES_INT32(i) PRINTF_BYTES_INT16((i) >> 16), PRINTF_BYTES_INT16(i)
137 | #define PRINTF_BYTES_INT64(i) PRINTF_BYTES_INT32((i) >> 32), PRINTF_BYTES_INT32(i)
138 |
139 | #define HEX_FORMAT "0x%lx"
140 | #define DEC_FORMAT "%ld"
141 |
142 | char *
143 | print_format (uint64_t value, FormatFilter filter)
144 | {
145 | /* A 512 byte maximum means at most 8 characters per bit.
146 | * That should be sufficient (+1 for '\0') */
147 | int n = 513;
148 | char *buf = malloc (n);
149 | assert (buf != NULL);
150 |
151 | switch (filter)
152 | {
153 | case FMT_NONE:
154 | case FMT_DEC:
155 | /* Signed decimal numbers are the default. */
156 | snprintf (buf, n, DEC_FORMAT, (int64_t) value);
157 | break;
158 | case FMT_HEX:
159 | snprintf (buf, n, HEX_FORMAT, value);
160 | break;
161 | case FMT_BITS:
162 | snprintf (buf, n, PRINTF_BITS_PATTERN_INT64,
163 | PRINTF_BITS_INT64 (value));
164 | break;
165 | case FMT_ADDR:
166 | snprintf (buf, n, ADDR_FORMAT, value);
167 | break;
168 | case FMT_BYTES:
169 | snprintf (buf, n, PRINTF_BYTES_PATTERN_INT64,
170 | PRINTF_BYTES_INT64 (value));
171 | break;
172 | }
173 |
174 | return buf;
175 | }
176 |
177 | const char *
178 | relative_filepath (const char *abs_filepath)
179 | {
180 | if (abs_filepath == NULL)
181 | {
182 | return NULL;
183 | }
184 |
185 | char *cwd_buf = malloc (sizeof (*cwd_buf) * PATH_MAX);
186 | char *cwd = getcwd (cwd_buf, PATH_MAX);
187 | if (cwd == NULL)
188 | {
189 | return NULL;
190 | }
191 |
192 | /* Set `i` to the first index in `filepath` that's not part of the cwd. */
193 | size_t i = 0;
194 | while (cwd[i] == abs_filepath[i])
195 | {
196 | i++;
197 | }
198 |
199 | free (cwd_buf);
200 |
201 | if (i == 0)
202 | {
203 | /* `abs_filepath` is a relative filepath and
204 | * should be returned entirely. */
205 | return abs_filepath;
206 | }
207 | else
208 | {
209 | /* Return the part of `filepath` that's not part of the cwd.
210 | * `+ 1` removes the slash character at `abs_filepath[i]`.
211 | * This slash is left because `cwd` doesn't have a trailing
212 | * slash character. Hence, this character is the first one
213 | * where `abs_filepath` and `cmd` differ. */
214 | return abs_filepath + i + 1;
215 | }
216 | }
217 |
218 | void
219 | print_as_relative_filepath (const char *filepath)
220 | {
221 | assert (filepath != NULL);
222 |
223 | char *relative_buf = strdup (filepath);
224 | const char *relative = relative_filepath (relative_buf);
225 | if (relative != NULL)
226 | {
227 | printf ("%s", relative);
228 | free (relative_buf);
229 | }
230 | else
231 | {
232 | printf ("%s", filepath);
233 | }
234 | }
235 |
236 | void
237 | print_msg (FILE *stream, const char *kind, const char *fmt, va_list argp)
238 | {
239 | assert (kind != NULL);
240 | assert (fmt != NULL);
241 | assert (argp != NULL);
242 |
243 | size_t len = strlen (fmt) + strlen (kind) + 4;
244 | char *fmt_buf = calloc (len, sizeof (*fmt_buf));
245 | assert (fmt_buf != NULL);
246 |
247 | size_t n_printed = snprintf (fmt_buf, len, "%s: %s\n", kind, fmt);
248 | /* `snprintf` writes a maximum of `len` bytes, including the
249 | * `\0` byte, and returns the number of bytes written,
250 | * excluding the `\0` byte. Thus, `len` was too small and the
251 | * output was truncated if `n_printed >= len`. */
252 | assert (n_printed == (len - 1));
253 |
254 | vfprintf (stream, fmt_buf, argp);
255 |
256 | free (fmt_buf);
257 | }
258 |
259 | void
260 | spray_err (const char *fmt, ...)
261 | {
262 | va_list argp;
263 | va_start (argp, fmt);
264 | print_msg (stderr, "ERR", fmt, argp);
265 | va_end (argp);
266 | }
267 |
268 | void
269 | spray_warn (const char *fmt, ...)
270 | {
271 | va_list argp;
272 | va_start (argp, fmt);
273 | print_msg (stderr, "WARN", fmt, argp);
274 | va_end (argp);
275 | }
276 |
277 | void
278 | spray_hint (const char *fmt, ...)
279 | {
280 | va_list argp;
281 | va_start (argp, fmt);
282 | print_msg (stderr, "HINT", fmt, argp);
283 | va_end (argp);
284 | }
285 |
286 | void
287 | repl_err (const char *fmt, ...)
288 | {
289 | va_list argp;
290 | va_start (argp, fmt);
291 | print_msg (stdout, "ERR", fmt, argp);
292 | va_end (argp);
293 | }
294 |
295 | void
296 | repl_warn (const char *fmt, ...)
297 | {
298 | va_list argp;
299 | va_start (argp, fmt);
300 | print_msg (stdout, "WARN", fmt, argp);
301 | va_end (argp);
302 | }
303 |
304 | void
305 | repl_hint (const char *fmt, ...)
306 | {
307 | va_list argp;
308 | va_start (argp, fmt);
309 | print_msg (stdout, "HINT", fmt, argp);
310 | va_end (argp);
311 | }
312 |
--------------------------------------------------------------------------------
/src/magic.h:
--------------------------------------------------------------------------------
1 | /* Utilities, miscellaneous functions and magic numbers. ✨. */
2 |
3 | #pragma once
4 |
5 | #ifndef _SPRAY_MAGIC_H_
6 | #define _SPRAY_MAGIC_H_
7 |
8 | #include /* `uint64_t` for address values. */
9 | #include /* `printf` for `print_addr`. */
10 | #include
11 |
12 | #define unused(x) (void) (x);
13 |
14 | enum magic_numbers
15 | {
16 | /* `int 3` instruction code. */
17 | INT3 = 0xcc,
18 | /* Mask of lowest byte in number. */
19 | BTM_BYTE_MASK = 0xff,
20 | /* Number of registers in the `x86_regs` enum. */
21 | N_REGISTERS = 27,
22 | /* Number of characters required to store any possible
23 | * path `/proc//maps`. According to the man-page for
24 | * proc(5) the maximum pid is up to 2^22. In decimal this
25 | * number has 7 digits. This plus characters for the rest
26 | * of the path plus a NULL terminator make up this number. */
27 | PROC_MAPS_FILEPATH_LEN = 19,
28 | /* Size of the buffer to print all the tracee's registers.
29 | * All values are zero-padded so the size is always the same. */
30 | REGISTER_PRINT_BUF_SIZE = 716,
31 | /* Width of the format string "\t%8s 0x%016lx" given that the string
32 | * substituted is no longer that 8 characters. This doesn't
33 | * include the string's NULL-byte. */
34 | REGISTER_PRINT_LEN = 26,
35 | /* Maximum number of instruction-level steps performed by
36 | * `single_step_line` until giving up trying to find another
37 | * line. Can be fairly large since the program will likely
38 | * end after this limit was reached. */
39 | SINGLE_STEP_SEARCH_LIMIT = 128,
40 | };
41 |
42 | typedef enum
43 | {
44 | SP_OK,
45 | SP_ERR,
46 | } SprayResult;
47 |
48 | /* Calculate the number of digits in the given number. */
49 | unsigned n_digits (double num);
50 |
51 | /* Print n space characters to standard out. */
52 | void indent_by (unsigned n_spaces);
53 |
54 | /* Helper to test if two strings are equal (`strcmp(...) == 0`) */
55 | bool str_eq (const char *restrict a, const char *restrict b);
56 |
57 | typedef struct
58 | {
59 | uint64_t value;
60 | } real_addr;
61 |
62 | typedef struct
63 | {
64 | uint64_t value;
65 | } dbg_addr;
66 |
67 | /* The runtime addresses in *position independent executables*
68 | * may all be offset by a particular value from the addresses
69 | * which are stored in the binary file itself.
70 | * The addresses found in the DWARF debug information are such
71 | * permanently stored addresses. `dbg_addr` represents them.
72 | * Addresses retrieved from the running process or addresses from
73 | * the debug addresses, which have been offset by the load address,
74 | * are represented by `real_addr`. The are also referred to as *real*
75 | * addresses. */
76 |
77 | /* Convert a real address to a debug address. */
78 | dbg_addr real_to_dbg (real_addr offset, real_addr real);
79 |
80 | /* Convert a debug address to a real address. */
81 | real_addr dbg_to_real (real_addr offset, dbg_addr dwarf);
82 |
83 | /* `printf` format string for addresses. */
84 | #define ADDR_FORMAT "0x%016lx"
85 |
86 | /* Filters to format the output. */
87 |
88 | typedef enum
89 | {
90 | FMT_NONE, /* No filter. */
91 | FMT_HEX, /* Hexadecimal number. */
92 | FMT_BITS, /* Binary data. */
93 | FMT_ADDR, /* Address. */
94 | FMT_DEC, /* Signed decimal number. */
95 | FMT_BYTES, /* Hexadecimal bytes. */
96 | } FormatFilter;
97 |
98 | FormatFilter parse_format (const char *str);
99 |
100 | /* Turn `current` into `_default` if `current` is `FMT_NONE`. */
101 | FormatFilter default_format (FormatFilter current, FormatFilter _default);
102 |
103 | /* Format the given value based on `filter` and return the formatted string.
104 | * The caller should free the string. */
105 | char *
106 | print_format (uint64_t value, FormatFilter filter);
107 |
108 | /* Return the part of `abs_filepath` that's relative to
109 | * the present working directory.
110 | *
111 | * On success, the pointer that's returned points into
112 | * `abs_filepath`.
113 | *
114 | * Otherwise, `NULL` is returned to signal an error. */
115 | const char *relative_filepath (const char *abs_filepath);
116 |
117 | /* Print `filepath` as relative to the current working directory.
118 | *
119 | * `filepath` must not be `NULL`. */
120 | void print_as_relative_filepath (const char *filepath);
121 |
122 |
123 | /* FORMAT OF MESSAGES
124 | *
125 | * 1. They start with a capital letter.
126 | * 2. They do not include tags like 'ERR' or 'WARN'. Those tags
127 | * are added automatically.
128 | * 3. They do not end with a newline character. Line breaks are
129 | * automatically added at the end of each message.
130 | * 4. They do not end with a period. Periods are only used to delimit
131 | * sentences inside the message. If appropriate, question or
132 | * exclamation marks may be used at the end of a message.
133 | * 5. They may use standard `printf` formatting.
134 | *
135 | * EXAMPLES OF VALID MESSAGES
136 | *
137 | * 'Failed to retrieve data'
138 | * 'Did you forget to initialize this variable?'
139 | * 'Variable %s has the value %d' - Expects a string and an integer.
140 | *
141 | * EXAMPLES OF INVALID MESSAGES
142 | *
143 | * 'ERR: Cannot open file' - The 'ERR: ' is added automatically.
144 | * 'Please provide more information.' - The period at the end is not needed.
145 | * It only adds visual clutter. */
146 |
147 | /* Print messages not tied to the UI to stderr */
148 | void spray_err (const char *msg, ...);
149 | void spray_warn (const char *msg, ...);
150 | void spray_hint (const char *msg, ...);
151 |
152 | /* Print messages tied to the debugger REPL to stdout */
153 | void repl_err (const char *msg, ...);
154 | void repl_warn (const char *msg, ...);
155 | void repl_hint (const char *msg, ...);
156 |
157 |
158 | #endif /* _SPRAY_MAGIC_H_ */
159 |
--------------------------------------------------------------------------------
/src/print-source.scm:
--------------------------------------------------------------------------------
1 | (module source-files (load-source-file!
2 | sf-name
3 | sf-token-lines
4 | sf-types-env)
5 | (import scheme
6 | (chicken base)
7 | (chicken pathname)
8 | (only (chicken file) file-exists?)
9 | (only (chicken string) string-split conc)
10 | (only (srfi-13) string-prefix?)
11 | (only (chicken io) read-string)
12 | (only (srfi-1) fold filter-map)
13 | (srfi-69))
14 | (import tokenizer c-types c-tokens)
15 |
16 | (define source-files
17 | (make-hash-table equal?))
18 |
19 | (define (hash-table-lookup table key)
20 | (hash-table-ref/default table key #f))
21 |
22 | (define (hash-table-insert table key value)
23 | (hash-table-set! table key value)
24 | value)
25 |
26 | ;;; Return a list of all lines in the file behind
27 | ;;; filepath or '() if the file can't be read.
28 | (define (file->list filepath)
29 | ;; By keeping empty strings when splitting the
30 | ;; content at the newline characters, we accidentally
31 | ;; add another empty string to the end of the lines
32 | ;; list that's generated by the trailing newline at
33 | ;; the end of the file. The lines list is printed by
34 | ;; appending a newline to the end of each entry. Thereby,
35 | ;; we basically add another line to the end of the file
36 | ;; that doesn't actually exist. Here that line is removed.
37 | (define (omit-trailing-line! lines)
38 | (define (before-last lst)
39 | (if (null? lst)
40 | '()
41 | (let loop ((current lst)
42 | (next (cdr lst)))
43 | (if (null? (cdr next))
44 | current
45 | (loop (cdr current)
46 | (cdr next))))))
47 | (let ((pen (before-last lines)))
48 | (cond ((null? pen)
49 | '())
50 | ((equal? (cadr pen) "")
51 | (set! (cdr pen) '())
52 | lines)
53 | (else
54 | lines))))
55 |
56 | (if (file-exists? filepath)
57 | (call-with-input-file filepath
58 | (lambda (port)
59 | (let ((content (read-string #f port)))
60 | (if (not content)
61 | '()
62 | (omit-trailing-line!
63 | (string-split content "\n" #t))))))
64 | '()))
65 |
66 |
67 | (define (make-deps filepath includes)
68 | (list '*deps* filepath includes))
69 |
70 | (define (deps? deps)
71 | (and (pair? deps)
72 | (eq? '*deps* (car deps))))
73 |
74 | (define (deps-file deps)
75 | (cadr deps))
76 |
77 | (define (deps-includes deps)
78 | (caddr deps))
79 |
80 | (define (filter-filepaths filepath token-lines)
81 | (define (strip-path-quotes str)
82 | (substring str
83 | 1
84 | (- (string-length str) 1)))
85 |
86 | (define (find-local-header include-filepath)
87 | (make-pathname
88 | (pathname-directory filepath)
89 | include-filepath))
90 |
91 | (define search-paths
92 | (list "/usr/include" "/usr/local/include"))
93 |
94 | (define (find-system-header include-filepath)
95 | (let loop ((search-paths search-paths))
96 | (if (null? search-paths)
97 | #f
98 | (let ((check-filepath (make-pathname (car search-paths)
99 | include-filepath)))
100 | (if (file-exists? check-filepath)
101 | check-filepath
102 | (loop (cdr search-paths)))))))
103 |
104 | (define (find-location include-filepath search-type)
105 | (if (eq? search-type 'system-header)
106 | (find-system-header include-filepath)
107 | (find-local-header include-filepath)))
108 |
109 | (define (search-type filepath)
110 | (if (string-prefix? "<" filepath)
111 | 'system-header
112 | 'local-header))
113 |
114 | (filter-map
115 | (lambda (token)
116 | (if (eq? (token-tag token)
117 | token-tag-include-filepath)
118 | (let ((filepath (token-text token)))
119 | (find-location
120 | (strip-path-quotes filepath)
121 | (search-type filepath)))
122 | #f))
123 | (flatten token-lines)))
124 |
125 | (define (make-sf token-lines deps types-env)
126 | (list '*source-file* token-lines deps types-env))
127 |
128 | (define (source-file? sf)
129 | (and (pair? sf)
130 | (eq? '*source-file*
131 | (car sf))))
132 |
133 | (define (sf-deps sf)
134 | (if (source-file? sf)
135 | (caddr sf)
136 | (error "sf-deps, not a source file" sf)))
137 |
138 | ;;; Flatten the given source file's dependency tree into
139 | ;;; a list of all unique files in it.
140 | (define (sf-flat-deps sf)
141 | (let ((flattened (make-hash-table equal?)))
142 | (let rec ((deps (sf-deps sf)))
143 | ;; Continue this recursion only if this is a new dependency.
144 | (if (not (hash-table-exists? flattened (deps-file deps)))
145 | (begin
146 | (hash-table-set! flattened (deps-file deps) '())
147 | (for-each
148 | (lambda (dep)
149 | (rec dep))
150 | (deps-includes deps)))))
151 | (map
152 | (lambda (assoc)
153 | (car assoc))
154 | (hash-table->alist flattened))))
155 |
156 | (define (sf-deps->token-lines-lst sf)
157 | (map
158 | (lambda (dep)
159 | (sf-token-lines
160 | (hash-table-ref source-files dep)))
161 | (sf-flat-deps sf)))
162 |
163 | (define (sf-name sf)
164 | (deps-file (sf-deps sf)))
165 |
166 | (define (sf-token-lines sf)
167 | (if (source-file? sf)
168 | (cadr sf)
169 | (error "sf-token-lines, not a source file" sf)))
170 |
171 | (define (sf-types-env sf)
172 | (define (_sf-types-env sf) (cadddr sf))
173 | (if (source-file? sf)
174 | ;; Source files are created by calling `load-source-file`. This procedure
175 | ;; doesn't initialize the types environment to save compute. The types
176 | ;; environment doesn't know about the tree-structure of the dependencies
177 | ;; at this point in time. Instead, it expects a flat list of inputs and
178 | ;; output another flat data structure. If this changes, that is if the
179 | ;; type environment becomes recursive, then it can be initialized in the
180 | ;; `load-source-file` procedure instantly. This will likely save compute
181 | ;; because linearizing the dependency tree is not needed anymore.
182 | (let ((types-env (_sf-types-env sf)))
183 | (if (not (null? types-env))
184 | types-env
185 | (_sf-types-env
186 | (hash-table-update! source-files
187 | (sf-name sf)
188 | (lambda (sf)
189 | (make-sf
190 | (sf-token-lines sf)
191 | (sf-deps sf)
192 | ;; Parse the types in the source file's dependency tree.
193 | (apply make-types-env
194 | (sf-deps->token-lines-lst sf))))))))))
195 |
196 | (define (load-source-file! filepath)
197 | (let ((lookup (hash-table-lookup source-files filepath)))
198 | (if lookup
199 | lookup ; The file has been read already. Return that result.
200 | (let ((token-lines (tokenize ; Read this new file.
201 | (file->list filepath))))
202 | (if (not token-lines)
203 | '()
204 | (begin ; Recurse over all of the file's includes.
205 | (let loop ((filepaths (filter-filepaths filepath token-lines))
206 | (includes '()))
207 | (if (pair? filepaths)
208 | ;; Load the dependencies of this include filepath and continue
209 | ;; with the next one.
210 | (loop
211 | (cdr filepaths)
212 | (cons (sf-deps
213 | (load-source-file! (car filepaths)))
214 | includes))
215 | ;; All include filepaths were processed. Store the source
216 | ;; file for the current file and return it.
217 | (hash-table-insert source-files
218 | filepath
219 | (make-sf
220 | token-lines
221 | (make-deps filepath
222 | includes)
223 | '()))))))))))
224 |
225 | ;; End of module source-files.
226 | )
227 |
228 |
229 | (module print-source (print-source)
230 | (import scheme
231 | (chicken base)
232 | (only traversal sublist))
233 | (import tokenizer source-files)
234 |
235 |
236 | ;;; Create a sublist without exceeding the range of `lst`.
237 | ;;; Saturates on both ends if either index is out of range.
238 | (define (sat-sublist lst start end)
239 | (define (sat-start)
240 | (cond ((< start 0)
241 | 0)
242 | ((< start (length lst))
243 | start)
244 | (else
245 | ;; The start index is inclusive.
246 | (- (length lst) 1))))
247 | (define (sat-end)
248 | (cond ((< end 0)
249 | 0)
250 | ((< end (length lst))
251 | end)
252 | (else
253 | ;; The end index is exclusive.
254 | (length lst))))
255 | (sublist lst (sat-start) (sat-end)))
256 |
257 | (define (start-lineno lineno n-context-lines)
258 | (if (> lineno n-context-lines)
259 | (- lineno n-context-lines)
260 | 1))
261 |
262 | (define (end-lineno lineno n-context-lines)
263 | (+ lineno n-context-lines 1
264 | ;; Extend window downward if there
265 | ;; aren't enough lines above.
266 | (if (< lineno n-context-lines)
267 | (- n-context-lines lineno)
268 | 0)))
269 |
270 | (define (line-window lines lineno n-context-lines)
271 | (let ((start-lineno (start-lineno lineno n-context-lines))
272 | (end-lineno (end-lineno lineno n-context-lines)))
273 | (sat-sublist lines
274 | (- start-lineno 1)
275 | (- end-lineno 1))))
276 |
277 | ;;; Print the source code at the `filepath`:`lineno` with
278 | ;;; `n-context-lines` above and below `lineno`. Returns #f
279 | ;;; on error and #t on success.
280 | (define (print-source filepath lineno n-context-lines use-color)
281 | (let ((source-file (load-source-file! filepath))
282 | (start-lineno (start-lineno lineno n-context-lines)))
283 | (if (not source-file)
284 | #f
285 | (begin
286 | (display
287 | (color-tokens
288 | (line-window (sf-token-lines source-file)
289 | lineno
290 | n-context-lines)
291 | (sf-types-env source-file)
292 | start-lineno
293 | lineno
294 | use-color))
295 | #t))))
296 | ;; End module print-source.
297 | )
298 |
299 | (import print-source)
300 |
301 | (cond-expand
302 | ;; FFI is not allowed in interpreted mode.
303 | (compiling
304 | (begin
305 | (import (chicken foreign)
306 | (chicken platform))
307 | (define-external (print_source_extern (c-string filepath)
308 | (unsigned-int lineno)
309 | (unsigned-int n_context_lines)
310 | (bool use-color))
311 | int
312 | (if (print-source filepath lineno n_context_lines use-color)
313 | 0
314 | -1))
315 | (return-to-host)))
316 | (else ))
317 |
--------------------------------------------------------------------------------
/src/print_source.c:
--------------------------------------------------------------------------------
1 | #include "print_source.h"
2 | #include "args.h"
3 |
4 | #include
5 | #include
6 |
7 | void
8 | init_print_source (void)
9 | {
10 | CHICKEN_run (C_toplevel);
11 | }
12 |
13 | /* Defined in `src/source-files.scm`. */
14 | extern int print_source_extern (const char *filepath,
15 | unsigned lineno,
16 | unsigned n_context_lines, bool use_color);
17 |
18 | SprayResult
19 | print_source (const char *filepath, unsigned lineno, unsigned n_context)
20 | {
21 | bool use_color = !get_args ()->flags.no_color;
22 | int res = print_source_extern (filepath,
23 | lineno,
24 | n_context,
25 | use_color);
26 | if (res == 0)
27 | {
28 | return SP_OK;
29 | }
30 | else
31 | {
32 | return SP_ERR;
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/src/print_source.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #ifndef _SPRAY_PRINT_SOURCE_H_
4 | #define _SPRAY_PRINT_SOURCE_H_
5 |
6 | #include "magic.h"
7 |
8 | /* Call this to initialize `print_source`. The program
9 | * will crash if `print_source` is called without being
10 | * initialized. */
11 | void init_print_source (void);
12 |
13 | SprayResult print_source (const char *source_filepath,
14 | unsigned lineno, unsigned n_context);
15 |
16 | #endif /* _SPRAY_PRINT_SOURCE_H_ */
17 |
--------------------------------------------------------------------------------
/src/ptrace.c:
--------------------------------------------------------------------------------
1 | #include "ptrace.h"
2 |
3 | #include
4 | #include
5 | #include
6 |
7 | enum
8 | { PTRACE_ERROR = -1 };
9 |
10 | /* NOTE: All `PTRACE_PEEK*` requests return the
11 | * requested data. Because the return value if
12 | * always used to indicate an error (by returning
13 | * -1), `errno` must be used to determine if the
14 | * result of the read is -1 or there is an error. */
15 |
16 | SprayResult
17 | pt_read_memory (pid_t pid, real_addr addr, uint64_t *read)
18 | {
19 | assert (read != NULL);
20 |
21 | /* The `ptrace(2)` API requires that we manually set `errno` here. */
22 | errno = 0;
23 | uint64_t value = ptrace (PTRACE_PEEKDATA, pid, addr, NULL);
24 | if (errno == 0)
25 | {
26 | /* No error was raised. Return the result. */
27 | *read = value;
28 | return SP_OK;
29 | }
30 | else
31 | {
32 | /* `errno` now indicates the error. */
33 | return SP_ERR;
34 | }
35 | }
36 |
37 | SprayResult
38 | pt_write_memory (pid_t pid, real_addr addr, uint64_t write)
39 | {
40 | if (ptrace (PTRACE_POKEDATA, pid, addr, write) == PTRACE_ERROR)
41 | {
42 | return SP_ERR;
43 | }
44 | else
45 | {
46 | return SP_OK;
47 | }
48 | }
49 |
50 | SprayResult
51 | pt_read_registers (pid_t pid, struct user_regs_struct *regs)
52 | {
53 | assert (regs != NULL);
54 | /* `addr` is ignored here. `PTRACE_GETREGS` stores all
55 | * of the tracee's general purpose registers in `regs`. */
56 | if (ptrace (PTRACE_GETREGS, pid, NULL, regs) == PTRACE_ERROR)
57 | {
58 | return SP_ERR;
59 | }
60 | else
61 | {
62 | return SP_OK;
63 | }
64 | }
65 |
66 | SprayResult
67 | pt_write_registers (pid_t pid, struct user_regs_struct *regs)
68 | {
69 | assert (regs != NULL);
70 | if (ptrace (PTRACE_SETREGS, pid, NULL, regs) == PTRACE_ERROR)
71 | {
72 | return SP_ERR;
73 | }
74 | else
75 | {
76 | return SP_OK;
77 | }
78 | }
79 |
80 | SprayResult
81 | pt_continue_execution (pid_t pid)
82 | {
83 | if (ptrace (PTRACE_CONT, pid, NULL, NULL) == PTRACE_ERROR)
84 | {
85 | return SP_ERR;
86 | }
87 | else
88 | {
89 | return SP_OK;
90 | }
91 | }
92 |
93 | SprayResult
94 | pt_trace_me (void)
95 | {
96 | if (ptrace (PTRACE_TRACEME, 0, NULL, NULL) == PTRACE_ERROR)
97 | {
98 | return SP_ERR;
99 | }
100 | else
101 | {
102 | return SP_OK;
103 | }
104 | }
105 |
106 | SprayResult
107 | pt_single_step (pid_t pid)
108 | {
109 | if (ptrace (PTRACE_SINGLESTEP, pid, NULL, NULL) == PTRACE_ERROR)
110 | {
111 | return SP_ERR;
112 | }
113 | else
114 | {
115 | return SP_OK;
116 | }
117 | }
118 |
119 | SprayResult
120 | pt_get_signal_info (pid_t pid, siginfo_t *siginfo)
121 | {
122 | assert (siginfo != NULL);
123 | if (ptrace (PTRACE_GETSIGINFO, pid, NULL, siginfo) == PTRACE_ERROR)
124 | {
125 | return SP_ERR;
126 | }
127 | else
128 | {
129 | return SP_OK;
130 | }
131 | }
132 |
--------------------------------------------------------------------------------
/src/ptrace.h:
--------------------------------------------------------------------------------
1 | /* The `ptrace` API is ... special. This header
2 | * wraps it up for use in the rest of this program.
3 | * If one of the functions here fails, `errno` will
4 | * hold the value set by `ptrace`. */
5 |
6 | #pragma once
7 |
8 | #ifndef _SPRAY_PTRACE_H_
9 | #define _SPRAY_PTRACE_H_
10 |
11 | #include
12 | #include
13 | #include
14 | #include
15 | #include
16 |
17 | #include "magic.h"
18 |
19 | SprayResult pt_read_memory (pid_t pid, real_addr addr, uint64_t * read);
20 | SprayResult pt_write_memory (pid_t pid, real_addr addr, uint64_t write);
21 |
22 | SprayResult pt_read_registers (pid_t pid, struct user_regs_struct *regs);
23 | SprayResult pt_write_registers (pid_t pid, struct user_regs_struct *regs);
24 |
25 | SprayResult pt_continue_execution (pid_t pid);
26 | SprayResult pt_trace_me (void);
27 | SprayResult pt_single_step (pid_t pid);
28 |
29 | SprayResult pt_get_signal_info (pid_t pid, siginfo_t * siginfo);
30 |
31 | #endif /* _SPRAY_PTRACE_H_ */
32 |
--------------------------------------------------------------------------------
/src/registers.c:
--------------------------------------------------------------------------------
1 | #include "registers.h"
2 | #include "magic.h"
3 | #include "ptrace.h"
4 |
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include
10 |
11 | /* Both `x86_reg` and `reg_descriptors` are laid
12 | * out the same way as `user_regs_struct` in
13 | * `/usr/include/sys/user.h`. Hence, `x86_reg` can
14 | * index both of them. */
15 |
16 | SprayResult
17 | get_register_value (pid_t pid, x86_reg reg, uint64_t *read)
18 | {
19 | assert (read != NULL);
20 |
21 | struct user_regs_struct regs; /* Register buffer */
22 | SprayResult res = pt_read_registers (pid, ®s);
23 | if (res == SP_ERR)
24 | {
25 | return SP_ERR;
26 | }
27 | else
28 | {
29 | uint64_t *regs_as_array = (uint64_t *) & regs;
30 | *read = regs_as_array[reg];
31 | return SP_OK;
32 | }
33 | }
34 |
35 | SprayResult
36 | set_register_value (pid_t pid, x86_reg reg, uint64_t write)
37 | {
38 | struct user_regs_struct regs;
39 | SprayResult res = pt_read_registers (pid, ®s);
40 | if (res == SP_ERR)
41 | {
42 | return SP_ERR;
43 | }
44 | else
45 | {
46 | uint64_t *regs_as_array = (uint64_t *) & regs;
47 | regs_as_array[reg] = write;
48 | return pt_write_registers (pid, ®s);
49 | }
50 | }
51 |
52 | bool
53 | dwarf_regnum_to_x86_reg (uint8_t dwarf_regnum, x86_reg *store)
54 | {
55 | assert (store != NULL);
56 |
57 | size_t i = 0;
58 | for (; i < N_REGISTERS; i++)
59 | {
60 | if (reg_descriptors[i].dwarf_r == (int) dwarf_regnum)
61 | {
62 | break;
63 | }
64 | }
65 |
66 | if (i == N_REGISTERS)
67 | {
68 | /* We searched the entire array
69 | * without finding a match. : ( */
70 | return false;
71 | }
72 | else
73 | {
74 | *store = reg_descriptors[i].r;
75 | return true;
76 | }
77 | }
78 |
79 | /* NOTE: All DWARF register numbers are small unsigned integers.
80 | * Negative values for `dwarf_r` in `reg_descriptors` are used
81 | * to make those registers inaccessible via a DWARF register number. */
82 |
83 | bool
84 | get_dwarf_register_value (pid_t pid, int8_t dwarf_regnum, uint64_t *read)
85 | {
86 | assert (read != NULL);
87 |
88 | x86_reg associated_reg;
89 |
90 | bool regnum_was_translated =
91 | dwarf_regnum_to_x86_reg (dwarf_regnum, &associated_reg);
92 |
93 | if (regnum_was_translated)
94 | {
95 | SprayResult res = get_register_value (pid, associated_reg, read);
96 | if (res == SP_OK)
97 | {
98 | return true;
99 | }
100 | else
101 | {
102 | return false;
103 | }
104 | }
105 | else
106 | {
107 | return false;
108 | }
109 | }
110 |
111 | const char *
112 | get_name_from_register (x86_reg reg)
113 | {
114 | size_t i = 0;
115 | for (; i < N_REGISTERS; i++)
116 | {
117 | if (reg_descriptors[i].r == reg)
118 | {
119 | break;
120 | }
121 | }
122 |
123 | /* `reg_descriptors` maps all possible values
124 | * of `reg`. Therefore the name *must* be found. */
125 | assert (i != N_REGISTERS);
126 |
127 | return reg_descriptors[i].name;
128 | }
129 |
130 | bool
131 | get_register_from_name (const char *name, x86_reg *store)
132 | {
133 | size_t i = 0;
134 | for (; i < N_REGISTERS; i++)
135 | {
136 | if (str_eq (reg_descriptors[i].name, name))
137 | {
138 | break;
139 | }
140 | }
141 |
142 | if (i == N_REGISTERS)
143 | {
144 | /* Couldn't find a register named `name`. */
145 | return false;
146 | }
147 | else
148 | {
149 | *store = reg_descriptors[i].r;
150 | return true;
151 | }
152 | }
153 |
--------------------------------------------------------------------------------
/src/registers.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #ifndef _SPARY_REGISTERS_H_
4 | #define _SPARY_REGISTERS_H_
5 |
6 | #include "magic.h"
7 | #include "ptrace.h"
8 |
9 | #include
10 | #include
11 | #include
12 |
13 | typedef enum
14 | {
15 | r15 = 0, r14, r13, r12,
16 | rbp, rbx, r11, r10, r9, r8,
17 | rax, rcx, rdx, rsi, rdi,
18 | orig_rax, rip, cs, eflags,
19 | rsp, ss, fs_base, gs_base,
20 | ds, es, fs, gs,
21 | } x86_reg;
22 |
23 | typedef struct
24 | {
25 | x86_reg r;
26 | int dwarf_r; /* DWARF register number. */
27 | const char *name;
28 | } reg_descriptor;
29 |
30 | static const reg_descriptor reg_descriptors[N_REGISTERS] = {
31 | {r15, 15, "r15"},
32 | {r14, 14, "r14"},
33 | {r13, 13, "r13"},
34 | {r12, 12, "r12"},
35 | {rbp, 6, "rbp"},
36 | {rbx, 3, "rbx"},
37 | {r11, 11, "r11"},
38 | {r10, 10, "r10"},
39 | {r9, 9, "r9"},
40 | {r8, 8, "r8"},
41 | {rax, 0, "rax"},
42 | {rcx, 2, "rcx"},
43 | {rdx, 1, "rdx"},
44 | {rsi, 4, "rsi"},
45 | {rdi, 5, "rdi"},
46 | {orig_rax, -1, "orig_rax"},
47 | {rip, -1, "rip"},
48 | {cs, 51, "cs"},
49 | {eflags, 49, "eflags"},
50 | {rsp, 7, "rsp"},
51 | {ss, 52, "ss"},
52 | {fs_base, 58, "fs_base"},
53 | {gs_base, 59, "gs_base"},
54 | {ds, 53, "ds"},
55 | {es, 50, "es"},
56 | {fs, 54, "fs"},
57 | {gs, 55, "gs"},
58 | };
59 |
60 | /* Store the value of the register `reg` in `read`. */
61 | SprayResult get_register_value (pid_t pid, x86_reg reg, uint64_t * read);
62 |
63 | /* Write the value in `write` to register `reg`. */
64 | SprayResult set_register_value (pid_t pid, x86_reg reg, uint64_t write);
65 |
66 | /* Translate the given DWARF register number into the associated x86 register.
67 | * Returns `true` if the register number could be translated.
68 | * Otherwise, `false` is returned. */
69 | bool dwarf_regnum_to_x86_reg (uint8_t dwarf_regnum, x86_reg * store);
70 |
71 | /* Store the value of the register `dwarf_regnum` in `read`.
72 | * `true` is returned on success.
73 | *
74 | * If the value of `dwarf_regnum` doesn't represent a valid
75 | * register, false is returned and `read` stays untouched. */
76 | bool get_dwarf_register_value (pid_t pid, int8_t dwarf_regnum,
77 | uint64_t * read);
78 |
79 | /* Get the name of the register `reg` as a string. */
80 | const char *get_name_from_register (x86_reg reg);
81 |
82 | /* Store the register referred to by `name` in `store`.
83 | *
84 | * `true` is returned on success.
85 | *
86 | * If `name` is not a know register, then `false` is returned
87 | * and `store` remains untouched. */
88 | bool get_register_from_name (const char *name, x86_reg * store);
89 |
90 | #endif /* _SPARY_REGISTERS_H_ */
91 |
--------------------------------------------------------------------------------
/src/spray.c:
--------------------------------------------------------------------------------
1 | /* 🐛🐛🐛 Spray: an ergonomic debugger for x86_64 Linux. 🐛🐛🐛 */
2 |
3 | #include "debugger.h"
4 |
5 | #define SET_ARGS_ONCE
6 | #include "args.h"
7 |
8 | int
9 | setup_args (int argc, char **argv)
10 | {
11 | Args args = { 0 };
12 |
13 | if (parse_args (argc, argv, &args))
14 | {
15 | print_help_message (prog_name_arg (argc, argv));
16 | return -1;
17 | }
18 | else
19 | {
20 | set_args (&args);
21 | return 0;
22 | }
23 | }
24 |
25 | int
26 | main (int argc, char **argv)
27 | {
28 | if (setup_args (argc, argv) == -1)
29 | {
30 | return -1;
31 | }
32 |
33 | Debugger debugger;
34 |
35 | if (setup_debugger (get_args ()->file, get_args ()->args, &debugger) == -1)
36 | {
37 | return -1;
38 | }
39 |
40 | run_debugger (debugger);
41 |
42 | if (del_debugger (debugger) == SP_ERR)
43 | return -1;
44 |
45 | return 0;
46 | }
47 |
--------------------------------------------------------------------------------
/src/spray_dwarf.h:
--------------------------------------------------------------------------------
1 | /* Spray's wrapper around libdwarf. Exposes interfaces
2 | * to gather information about the current position in
3 | * the source files, about the types of runtime variables,
4 | * and about how to retrieve the values of those variables
5 | * at runtime. */
6 |
7 | #pragma once
8 |
9 | #ifndef _SPRAY_SPRAY_DWARF_H_
10 | #define _SPRAY_SPRAY_DWARF_H_
11 |
12 | #include "ptrace.h"
13 | #include "spray_elf.h" /* `ElfFile` in `SdLocEvalCtx` */
14 | #include "registers.h" /* `x86_reg` in `SdLocation` */
15 |
16 | #include
17 | #include
18 | #include
19 |
20 | /* Initialized libdwarf's debug info. Returns NULL on error. */
21 | Dwarf_Debug sd_dwarf_init (const char *filepath, Dwarf_Error * error);
22 |
23 |
24 | /**************************************************/
25 | /* Information about the current position of the */
26 | /* executing program in the program source files. */
27 | /**************************************************/
28 |
29 | /* Get the file path of the source file that contains the
30 | * code that the given PC points to. The string that's returned
31 | * must be `free`'d by the caller. */
32 | char *sd_filepath_from_pc (Dwarf_Debug dbg, dbg_addr pc);
33 |
34 | typedef struct
35 | {
36 | bool is_ok;
37 | bool new_statement;
38 | bool prologue_end;
39 | /* Set to true if the PC used to retrieve the
40 | * line entry was exactly equal to `addr`. */
41 | bool is_exact;
42 | unsigned ln;
43 | unsigned cl;
44 | dbg_addr addr;
45 | /* Don't free this string.
46 | * It's owned by the `Dwarf_Debug` instance. */
47 | char *filepath;
48 | } LineEntry;
49 |
50 | /* Returns the line entry for the PC if this line entry contains
51 | * the address of PC. On error `is_ok` is set to false. */
52 | LineEntry sd_line_entry_from_pc (Dwarf_Debug dbg, dbg_addr pc);
53 |
54 | /* Get the line entry for the given position in the program source. */
55 | LineEntry sd_line_entry_at (Dwarf_Debug dbg, const char *filepath,
56 | unsigned lineno);
57 |
58 | typedef SprayResult (*LineCallback) (LineEntry * line, void *const data);
59 |
60 | /* Call `callback` for each new statement line entry
61 | * in the subprogram with the given name. */
62 | SprayResult sd_for_each_line (Dwarf_Debug dbg,
63 | const char *fn_name,
64 | const char *filepath,
65 | LineCallback callback, void *const init_data);
66 |
67 | /* Figure out where the function prologue of the function starting
68 | * at `low_pc` ends and return this address. Used for breakpoints on
69 | * functions to break only after the prologue.
70 | * `prologue_start` is the same address as a subprogram's low PC
71 | * and `function_end` is the same address as the high PC. */
72 | SprayResult sd_effective_start_addr (Dwarf_Debug dbg,
73 | dbg_addr prologue_start,
74 | dbg_addr function_end,
75 | dbg_addr * function_start);
76 |
77 |
78 | /*************************************************************/
79 | /* Information about location and type of runtime variables. */
80 | /*************************************************************/
81 |
82 | /* Type information. */
83 |
84 | typedef struct
85 | {
86 | enum
87 | {
88 | BASE_TYPE_CHAR,
89 | BASE_TYPE_SIGNED_CHAR,
90 | BASE_TYPE_UNSIGNED_CHAR,
91 | BASE_TYPE_SHORT,
92 | BASE_TYPE_UNSIGNED_SHORT,
93 | BASE_TYPE_INT,
94 | BASE_TYPE_UNSIGNED_INT,
95 | BASE_TYPE_LONG,
96 | BASE_TYPE_UNSIGNED_LONG,
97 | BASE_TYPE_LONG_LONG,
98 | BASE_TYPE_UNSIGNED_LONG_LONG,
99 | BASE_TYPE_FLOAT,
100 | BASE_TYPE_DOUBLE,
101 | BASE_TYPE_LONG_DOUBLE,
102 | } tag;
103 | /* Number of bytes used to represent this base type. */
104 | unsigned char size;
105 | } SdBasetype;
106 |
107 | /* See the DWARF 5 standard 5.3. */
108 | typedef enum
109 | {
110 | TYPE_MOD_ATOMIC = DW_TAG_atomic_type,
111 | TYPE_MOD_CONST = DW_TAG_const_type,
112 | TYPE_MOD_POINTER = DW_TAG_pointer_type,
113 | TYPE_MOD_RESTRICT = DW_TAG_restrict_type,
114 | TYPE_MOD_VOLATILE = DW_TAG_volatile_type,
115 | } SdTypemod;
116 |
117 | /* Single node in the representation variable types. */
118 | typedef struct
119 | {
120 | enum
121 | {
122 | NODE_BASE_TYPE,
123 | NODE_MODIFIER,
124 | NODE_UNSPECIFIED, /* See the DWARF 5 standard 5.2. */
125 | NODE_TYPEDEF, /* See the DWARF 5 standard 5.4. */
126 | } tag; /* Kind of this node. */
127 | union
128 | {
129 | SdBasetype base_type;
130 | SdTypemod modifier;
131 | };
132 | } SdTypenode;
133 |
134 | /* Host structure for variable types. */
135 | typedef struct
136 | {
137 | SdTypenode *nodes; /* Buffer of nodes. */
138 | size_t n_nodes; /* First `n` nodes in use. */
139 | size_t n_alloc; /* Maximum number of nodes. */
140 | } SdType;
141 |
142 | void del_type (SdType * type);
143 |
144 |
145 | /* `DW_AT_location` of DIEs that represent runtime variables.
146 | * It can be used in combination with `sd_init_loclist` to
147 | * initialize a new location list. */
148 | typedef struct
149 | {
150 | Dwarf_Attribute loc; /* `DW_AT_location` attribute. */
151 | } SdLocattr;
152 |
153 | /* Representation of runtime variables. They are used to find the
154 | * location of the variable's value in the running program, and
155 | * to find out what type the variable has.
156 | *
157 | * `SdLocattr`'s memory is handled by `libdwarf`. Only `SdType`
158 | * must be deleted after it's been used by the user. */
159 | typedef struct
160 | {
161 | SdLocattr loc; /* Runtime location. */
162 | SdType type; /* Type. */
163 | } SdVarattr;
164 |
165 | /* Get the attributes describing the variable with the given
166 | * name, and the file and line where this variable was declared.
167 | * `pc` is used to choose the closest variable if the variable
168 | * name occurs more than once.
169 | *
170 | * On success `SP_OK` is returned, and `attr`, `decl_file`, and
171 | * `decl_line` are set. `decl_file` must be `free`'d manually by
172 | * this function's caller.
173 | *
174 | * On error `SP_ERR` is returned, and `attr`, `decl_file`, and
175 | * `decl_file` remain unchanged.
176 | *
177 | * `dbg`, `var_name`, `attr`, `decl_file`, and `decl_line` must
178 | * not be `NULL`. */
179 | SprayResult sd_runtime_variable (Dwarf_Debug dbg,
180 | dbg_addr pc,
181 | const char *var_name,
182 | SdVarattr * attr,
183 | char **decl_file, unsigned *decl_line);
184 |
185 |
186 | /* Location information. */
187 |
188 | typedef struct SdExpression SdLocdesc;
189 | typedef struct SdLocRange SdLocRange;
190 |
191 | /* A DWARF location list (list of DWARF expressions) used
192 | * to describe the different locations of a specific
193 | * variable during the runtime of a program. */
194 | typedef struct SdLoclist
195 | {
196 | size_t n_exprs;
197 | SdLocdesc *exprs;
198 | SdLocRange *ranges;
199 | } SdLoclist;
200 |
201 | /* Initialize a location list based on the location
202 | * description attribute in `loc_attr`. */
203 | SprayResult sd_init_loclist (Dwarf_Debug dbg,
204 | SdLocattr loc_attr, SdLoclist * loclist);
205 |
206 | /* Delete the given location list. */
207 | void del_loclist (SdLoclist * loclist);
208 |
209 | /* Print the given location list. */
210 | void print_loclist (SdLoclist loclist);
211 |
212 | /* Contextual information used to evaluate
213 | * certain operations in location lists. */
214 | typedef struct SdLocEvalCtx
215 | {
216 | pid_t pid;
217 | dbg_addr pc;
218 | const ElfFile *elf;
219 | real_addr load_address;
220 | } SdLocEvalCtx;
221 |
222 | /* The location of a runtime variable at a specific point
223 | * in time. Created by evaluating the location list of the
224 | * variable in question. */
225 | typedef struct SdLocation
226 | {
227 | enum
228 | {
229 | LOC_ADDR,
230 | LOC_REG,
231 | } tag;
232 | union
233 | {
234 | real_addr addr;
235 | x86_reg reg;
236 | };
237 | } SdLocation;
238 |
239 | /* Evaluate the given location list and return the
240 | * current location of the variable the location list
241 | * describes. */
242 | SprayResult sd_eval_loclist (Dwarf_Debug dbg,
243 | SdLocEvalCtx ctx,
244 | SdLoclist loclist, SdLocation * location);
245 |
246 |
247 | #ifdef UNIT_TESTS
248 |
249 | /* Search callback types for searching DIEs. */
250 |
251 | typedef struct SearchFor
252 | {
253 | unsigned level; /* Level in the DIE tree. */
254 | const void *data; /* Custom data used as context while searching. */
255 | } SearchFor;
256 |
257 | typedef struct SearchFindings
258 | {
259 | void *data; /* Custom data collected while searching */
260 | } SearchFindings;
261 |
262 | typedef bool (*SearchCallback) (Dwarf_Debug,
263 | Dwarf_Die, SearchFor, SearchFindings);
264 |
265 | /* Search function that searches DIEs for different content. */
266 | int sd_search_dwarf_dbg (Dwarf_Debug dbg,
267 | Dwarf_Error * const error,
268 | SearchCallback search_callback,
269 | const void *search_for_data,
270 | void *search_findings_data);
271 |
272 | /* Find a `DW_TAG_subprogram` DIE by its name. */
273 | bool sd_is_subprog_with_name (Dwarf_Debug dbg,
274 | Dwarf_Die die, const char *name);
275 |
276 | /* Describe a result returned by libdwarf. */
277 | const char *what_dwarf_result (int dwarf_res);
278 |
279 | /* Full definition of types internal to `SdLoclist`. */
280 | typedef struct SdLocRange
281 | {
282 | bool meaningful;
283 | real_addr lowpc; /* Inclusive lower bound. */
284 | real_addr highpc; /* Exclusive upper bound. */
285 | } SdLocRange;
286 |
287 | typedef Dwarf_Small SdOperator;
288 | typedef Dwarf_Unsigned SdOperand;
289 |
290 | /* A single operation in a DWARF expression. */
291 | typedef struct SdOperation
292 | {
293 | SdOperator opcode;
294 | /* The operands 1-3 can be addressed either as single
295 | struct members or as elements in an array. */
296 | union
297 | {
298 | struct
299 | {
300 | SdOperand operand1;
301 | SdOperand operand2;
302 | SdOperand operand3;
303 | };
304 | SdOperand operands[3];
305 | };
306 | } SdOperation;
307 |
308 | /* A DWARF expression used for locexprs. */
309 | typedef struct SdExpression
310 | {
311 | size_t n_operations;
312 | SdOperation *operations;
313 | } SdExpression;
314 |
315 | char ** sd_get_filepaths (Dwarf_Debug dbg);
316 |
317 | #endif /* UNIT_TESTS */
318 |
319 | #endif /* _SPRAY_DWARF_H_ */
320 |
--------------------------------------------------------------------------------
/src/spray_elf.c:
--------------------------------------------------------------------------------
1 | #include "spray_elf.h"
2 |
3 | #include "magic.h"
4 |
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include
10 | #include
11 |
12 | enum
13 | {
14 | CHECK_SECTION_HEADER = 0xffff,
15 | };
16 |
17 | /* Validates the content of the given `Elf64_Ehdr` and
18 | * parses all values of interest. Some values (`n_prog_hdrs`,
19 | * `n_sect_hdrs` and `shstrtab_idx`) might be too
20 | * large to be stored in the `Elf64_Ehdr`. Then they are set to
21 | * `CHECK_SECTION_HEADER` to signal that they should be read from
22 | * the inital section header. */
23 | ElfParseResult
24 | parse_elf_header (const Elf64_Ehdr *elf_src,
25 | ElfFile *elf_dest,
26 | uint64_t *prog_table_off,
27 | uint64_t *sect_table_off,
28 | uint32_t *n_prog_hdrs,
29 | uint32_t *n_sect_hdrs, uint32_t *shstrtab_idx)
30 | {
31 | assert (elf_dest != NULL);
32 | assert (prog_table_off != NULL);
33 | assert (sect_table_off != NULL);
34 | assert (n_prog_hdrs != NULL);
35 | assert (n_sect_hdrs != NULL);
36 | assert (shstrtab_idx != NULL);
37 |
38 | /* Is the magic number invalid? */
39 | if ((elf_src->e_ident[EI_MAG0] != ELFMAG0) /* 0x7f */
40 | || (elf_src->e_ident[EI_MAG1] != ELFMAG1) /* 'E' */
41 | || (elf_src->e_ident[EI_MAG2] != ELFMAG2) /* 'L' */
42 | || (elf_src->e_ident[EI_MAG3] != ELFMAG3) /* 'F' */
43 | )
44 | {
45 | return ELF_PARSE_INVALID;
46 | }
47 |
48 | /* Is this ELF file meant for something different than 64 bit? */
49 | if (elf_src->e_ident[EI_CLASS] != ELFCLASS64)
50 | {
51 | return ELF_PARSE_DISLIKE;
52 | }
53 |
54 | /* Is the file's data encoding two's complement and little-endian? */
55 | if (elf_src->e_ident[EI_DATA] == ELFDATA2LSB)
56 | {
57 | elf_dest->endianness = ELF_ENDIAN_LITTLE;
58 | }
59 | /* Is the file's data encoding two's complement and big-endian? */
60 | else if (elf_src->e_ident[EI_DATA] == ELFDATA2MSB)
61 | {
62 | elf_dest->endianness = ELF_ENDIAN_BIG;
63 | }
64 | /* Is the file's data encoding missing? */
65 | else
66 | {
67 | return ELF_PARSE_DISLIKE;
68 | }
69 |
70 | /* Is the ABI suppored? `ELFOSABI_NONE` is the same as `SYSV`. */
71 | if ((elf_src->e_ident[EI_OSABI] != ELFOSABI_LINUX)
72 | && (elf_src->e_ident[EI_OSABI] != ELFOSABI_NONE)
73 | && (elf_src->e_ident[EI_OSABI] != ELFOSABI_SYSV))
74 | {
75 | return ELF_PARSE_DISLIKE;
76 | }
77 |
78 | /* `EI_VERSION` and `EI_ABIVERSION` are basically unused
79 | * and must conform to the values below to be valid.
80 | * `e_version` is the same. */
81 | if (elf_src->e_ident[EI_VERSION] != EV_CURRENT
82 | || elf_src->e_ident[EI_ABIVERSION] != 0
83 | || elf_src->e_version != EV_CURRENT)
84 | {
85 | return ELF_PARSE_INVALID;
86 | }
87 |
88 | /* Is the object file type in the accepted range? */
89 | if (elf_src->e_type <= ELF_TYPE_CORE)
90 | {
91 | /* `e_type` maps to `elf_type` in this range. */
92 | elf_dest->type = (ElfType) elf_src->e_type;
93 | }
94 | else
95 | {
96 | /* Object file type is in the reserved range. */
97 | return ELF_PARSE_INVALID;
98 | }
99 |
100 | /* Is the target instruction set architecture something
101 | * different than x86? We're x86 only here! */
102 | if (elf_src->e_machine != EM_X86_64)
103 | {
104 | return ELF_PARSE_DISLIKE;
105 | }
106 |
107 |
108 | /*********************************************/
109 | /* Program and section header table parsing. */
110 | /*********************************************/
111 |
112 | /* Is this file missing a program header table? */
113 | if (elf_src->e_phoff == 0)
114 | {
115 | return ELF_PARSE_DISLIKE;
116 | }
117 | else
118 | {
119 | *prog_table_off = elf_src->e_phoff;
120 | }
121 |
122 | /* Is this file missing a section header table? */
123 | if (elf_src->e_shoff == 0)
124 | {
125 | return ELF_PARSE_DISLIKE;
126 | }
127 | else
128 | {
129 | *sect_table_off = elf_src->e_shoff;
130 | }
131 |
132 | /* Are the entry sizes in the header tables meant for 64-bit? */
133 | if (elf_src->e_phentsize != sizeof (Elf64_Phdr) ||
134 | elf_src->e_shentsize != sizeof (Elf64_Shdr))
135 | {
136 | return ELF_PARSE_DISLIKE;
137 | }
138 |
139 | /* Some of the values in the ELF header don't fit
140 | * its data types anymore. E.g. if there are more
141 | * than 0xffff program headers, the `e_phnum` field
142 | * cannot store how many of them there are. In this
143 | * case, the first entry in the section header table
144 | * stores the actual real value. */
145 |
146 | /* Does the number of program headers exceed the representable range? */
147 | if (elf_src->e_phnum == PN_XNUM)
148 | {
149 | *n_prog_hdrs = CHECK_SECTION_HEADER;
150 | }
151 | else
152 | {
153 | *n_prog_hdrs = elf_src->e_phnum;
154 | }
155 |
156 | if (elf_src->e_shnum == 0)
157 | {
158 | /* `e_shnum` being 0 signals one of two options:
159 | * (1) The number of section table headers lies outside
160 | * the range that can be represented in 16 bits and
161 | * the actual value is found in `sh_size`.
162 | * (2) The number of entries in really just 0. Then `sh_size`
163 | * will be 0, too. */
164 | *n_sect_hdrs = CHECK_SECTION_HEADER;
165 | }
166 | else
167 | {
168 | *n_sect_hdrs = elf_src->e_shnum;
169 | }
170 |
171 | /* Is the index of the section name string table outside
172 | * the range that can be represented? */
173 | if (elf_src->e_shstrndx == SHN_XINDEX)
174 | {
175 | *shstrtab_idx = CHECK_SECTION_HEADER;
176 | }
177 | else
178 | {
179 | *shstrtab_idx = elf_src->e_shstrndx;
180 | }
181 |
182 | return ELF_PARSE_OK;
183 | }
184 |
185 | /* The initial section header is reserved to store values that
186 | * didn't fit into the ELF header. If any of the argument's values
187 | * is set to `CHECK_SECTION_HEADER`, then it will be set to the
188 | * value in this header. */
189 | void
190 | parse_init_section (const Elf64_Shdr *init_section_header,
191 | uint32_t *n_prog_hdrs, uint32_t *n_sect_hdrs,
192 | uint32_t *shstrtab_idx)
193 | {
194 | assert (init_section_header != NULL);
195 | assert (n_prog_hdrs != NULL);
196 | assert (n_sect_hdrs != NULL);
197 | assert (shstrtab_idx != NULL);
198 |
199 | if (*n_prog_hdrs == CHECK_SECTION_HEADER)
200 | {
201 | *n_prog_hdrs = init_section_header->sh_info;
202 | }
203 |
204 | if (*n_sect_hdrs == CHECK_SECTION_HEADER)
205 | {
206 | *n_sect_hdrs = init_section_header->sh_size;
207 | }
208 |
209 | if (*shstrtab_idx == CHECK_SECTION_HEADER)
210 | {
211 | *shstrtab_idx = init_section_header->sh_link;
212 | }
213 | }
214 |
215 | /* Helpers to check bit masks. */
216 | bool
217 | is_set (int value, int mask)
218 | {
219 | return (value & mask) != 0;
220 | }
221 |
222 | bool
223 | is_unset (int value, int mask)
224 | {
225 | return (value & mask) == 0;
226 | }
227 |
228 | bool
229 | is_valid_symtab (Elf64_Shdr *shdr, const char *name)
230 | {
231 | return str_eq (name, ".symtab") && shdr->sh_type == SHT_SYMTAB &&
232 | /* `SHF_ALLOC` is always set for .dynsym. */
233 | is_unset (shdr->sh_flags, SHF_ALLOC) &&
234 | shdr->sh_entsize == sizeof (Elf64_Sym);
235 | }
236 |
237 | bool
238 | is_valid_strtab (Elf64_Shdr *shdr, const char *name)
239 | {
240 | return str_eq (name, ".strtab")
241 | && shdr->sh_type == SHT_STRTAB
242 | && is_unset (shdr->sh_flags, SHF_ALLOC);
243 | }
244 |
245 | SprayResult
246 | find_table_sections (Elf64_Shdr *sect_headers, uint32_t n_sect_hdrs,
247 | const char *shstrtab, uint32_t *symtab_idx,
248 | uint32_t *strtab_idx)
249 | {
250 | assert (sect_headers != NULL);
251 | assert (shstrtab != NULL);
252 | assert (symtab_idx != NULL);
253 | assert (strtab_idx != NULL);
254 |
255 | /* NOTE: To check if a given index has been set already,
256 | * we can check if it is zero. This relies on the fact
257 | * that the section header at index zero is reserved and
258 | * cannot be used for any of the entries we are looking for. */
259 |
260 | Elf64_Shdr *cur_hdr = NULL;
261 | const char *name = NULL;
262 | for (uint32_t i = 0; i < n_sect_hdrs; i++)
263 | {
264 | cur_hdr = §_headers[i];
265 | name = &shstrtab[cur_hdr->sh_name];
266 | if (is_valid_symtab (cur_hdr, name)
267 | && *symtab_idx == 0)
268 | {
269 | *symtab_idx = i;
270 | }
271 | else if (is_valid_strtab (cur_hdr, name)
272 | && *strtab_idx == 0)
273 | {
274 | *strtab_idx = i;
275 | }
276 | }
277 |
278 | if (*symtab_idx != 0 && *strtab_idx != 0)
279 | {
280 | return SP_OK;
281 | }
282 | else
283 | {
284 | return SP_ERR;
285 | }
286 | }
287 |
288 | SprayResult
289 | file_size (int fd, size_t *dest)
290 | {
291 | assert (dest != NULL);
292 |
293 | off_t n_bytes = lseek (fd, 0, SEEK_END);
294 | if (n_bytes < 0)
295 | {
296 | return SP_ERR;
297 | }
298 | else
299 | {
300 | *dest = (size_t) n_bytes;
301 | return SP_OK;
302 | }
303 | }
304 |
305 | /* Cast pointers pointing into the memory mapped ELF
306 | * file to specific structures. Using these functions
307 | * is much more readable than plain casts. */
308 |
309 | static inline Elf64_Ehdr *
310 | ehdr_at (byte *bytes, size_t off)
311 | {
312 | return (Elf64_Ehdr *) (bytes + off);
313 | }
314 |
315 | static inline Elf64_Phdr *
316 | phdr_at (byte *bytes, size_t off)
317 | {
318 | return (Elf64_Phdr *) (bytes + off);
319 | }
320 |
321 | static inline Elf64_Shdr *
322 | shdr_at (byte *bytes, size_t off)
323 | {
324 | return (Elf64_Shdr *) (bytes + off);
325 | }
326 |
327 | static inline Elf64_Sym *
328 | symtab_at (byte *bytes, size_t off)
329 | {
330 | return (Elf64_Sym *) (bytes + off);
331 | }
332 |
333 | static inline char *
334 | strtab_at (byte *bytes, size_t off)
335 | {
336 | return (char *) (bytes + off);
337 | }
338 |
339 | ElfParseResult
340 | se_parse_elf (const char *filepath, ElfFile *elf_store)
341 | {
342 | assert (filepath != NULL);
343 | assert (elf_store != NULL);
344 |
345 | /* Acquire file descriptor for `mmap`. */
346 | int fd = open (filepath, O_RDONLY);
347 | if (fd == -1)
348 | {
349 | return ELF_PARSE_IO_ERR;
350 | }
351 |
352 | /* Get the number of bytes in the file. */
353 | size_t n_bytes = 0;
354 | if (file_size (fd, &n_bytes) == SP_ERR)
355 | {
356 | close (fd);
357 | return ELF_PARSE_IO_ERR;
358 | }
359 |
360 | byte *bytes = mmap (0, /* Kernel chooses address. */
361 | n_bytes, /* Init the entire file. */
362 | PROT_READ,
363 | MAP_PRIVATE,
364 | fd,
365 | 0);
366 |
367 | close (fd); /* Close no matter the outcome of `mmap`. */
368 |
369 | if (bytes == MAP_FAILED)
370 | {
371 | return ELF_PARSE_IO_ERR;
372 | }
373 |
374 | /* Parse relevant information from the ELF header. */
375 |
376 | Elf64_Ehdr *elf_header = ehdr_at (bytes, 0);
377 |
378 | uint64_t prog_table_off = 0;
379 | uint32_t n_prog_hdrs = 0;
380 |
381 | uint64_t sect_table_off = 0;
382 | uint32_t n_sect_hdrs = 0;
383 |
384 | uint32_t shstrtab_idx = 0;
385 |
386 | ElfParseResult elf_header_res =
387 | parse_elf_header (elf_header, elf_store, &prog_table_off, §_table_off,
388 | &n_prog_hdrs, &n_sect_hdrs, &shstrtab_idx);
389 |
390 | if (elf_header_res != ELF_PARSE_OK)
391 | {
392 | if (munmap (bytes, n_bytes) == -1)
393 | {
394 | return ELF_PARSE_IO_ERR;
395 | }
396 | else
397 | {
398 | return elf_header_res;
399 | }
400 | }
401 |
402 | Elf64_Shdr *sect_headers = shdr_at (bytes, sect_table_off);
403 |
404 | /* Fill-in missing values if they weren't found in the ELF header. */
405 | parse_init_section (sect_headers, &n_prog_hdrs, &n_sect_hdrs,
406 | &shstrtab_idx);
407 |
408 |
409 | /* Find the section headers for the symbol table and the string table. */
410 | uint32_t symtab_idx = 0;
411 | uint32_t strtab_idx = 0;
412 | /* Get the section header string table that contains the names of
413 | * the sections in the section header table. `sh_name` is an index into
414 | * that table, and thus the table can be used to read the names of the
415 | * different sections. */
416 | Elf64_Shdr *shstrtab_hdr = §_headers[shstrtab_idx];
417 | const char *shstrtab = strtab_at (bytes, shstrtab_hdr->sh_offset);
418 |
419 | SprayResult tables_res = find_table_sections (sect_headers, n_sect_hdrs,
420 | shstrtab, &symtab_idx,
421 | &strtab_idx);
422 |
423 | if (tables_res == SP_ERR)
424 | {
425 | if (munmap (bytes, n_bytes) == -1)
426 | {
427 | return ELF_PARSE_IO_ERR;
428 | }
429 | else
430 | {
431 | return ELF_PARSE_INVALID;
432 | }
433 | }
434 |
435 | elf_store->sect_table = (ElfSectTable)
436 | {
437 | .n_headers = n_sect_hdrs,.symtab_idx = symtab_idx,.shstrtab_idx =
438 | shstrtab_idx,.strtab_idx = strtab_idx,.headers = sect_headers,};
439 |
440 | Elf64_Phdr *prog_headers = phdr_at (bytes, prog_table_off);
441 | elf_store->prog_table = (ElfProgTable)
442 | {
443 | .n_headers = n_prog_hdrs,.headers = prog_headers
444 | };
445 |
446 | elf_store->data = (ElfData)
447 | {
448 | .bytes = bytes,.n_bytes = n_bytes
449 | };
450 |
451 | return ELF_PARSE_OK;
452 | }
453 |
454 | const char *
455 | elf_parse_result_name (ElfParseResult res)
456 | {
457 | static const char *elf_parse_result_names[] = {
458 | [ELF_PARSE_OK] = "parsed file successfully",
459 | [ELF_PARSE_IO_ERR] = "file I/O error",
460 | [ELF_PARSE_INVALID] = "invalid file contents",
461 | [ELF_PARSE_DISLIKE] = "unsupported file contents",
462 | };
463 |
464 | return elf_parse_result_names[res];
465 | }
466 |
467 | SprayResult
468 | se_free_elf (ElfFile elf)
469 | {
470 | if (munmap (elf.data.bytes, elf.data.n_bytes) == -1)
471 | {
472 | return SP_ERR;
473 | }
474 | else
475 | {
476 | return SP_OK;
477 | }
478 | }
479 |
480 | const Elf64_Sym *
481 | se_symbol_from_name (const char *name, const ElfFile *elf)
482 | {
483 | assert (name != NULL);
484 | assert (elf != NULL);
485 |
486 | Elf64_Shdr *symtab_hdr =
487 | &elf->sect_table.headers[elf->sect_table.symtab_idx];
488 | const Elf64_Sym *symtab =
489 | symtab_at (elf->data.bytes, symtab_hdr->sh_offset);
490 |
491 | uint64_t n_symbols = symtab_hdr->sh_size / symtab_hdr->sh_entsize;
492 |
493 | for (uint64_t i = 0; i < n_symbols; i++)
494 | {
495 | if (str_eq (se_symbol_name (&symtab[i], elf), name))
496 | {
497 | return &symtab[i];
498 | }
499 | }
500 |
501 | return NULL;
502 | }
503 |
504 | const Elf64_Sym *
505 | se_symbol_from_addr (dbg_addr addr, const ElfFile *elf)
506 | {
507 | assert (elf != NULL);
508 |
509 | Elf64_Shdr *symtab_hdr =
510 | &elf->sect_table.headers[elf->sect_table.symtab_idx];
511 | const Elf64_Sym *symtab =
512 | symtab_at (elf->data.bytes, symtab_hdr->sh_offset);
513 |
514 | uint64_t n_symbols = symtab_hdr->sh_size / symtab_hdr->sh_entsize;
515 |
516 | for (uint64_t i = 0; i < n_symbols; i++)
517 | {
518 | if (se_symbol_start_addr (&symtab[i]).value <= addr.value &&
519 | se_symbol_end_addr (&symtab[i]).value >= addr.value)
520 | {
521 | return &symtab[i];
522 | }
523 | }
524 |
525 | return NULL;
526 | }
527 |
528 | int
529 | se_symbol_binding (const Elf64_Sym *sym)
530 | {
531 | assert (sym != NULL);
532 | return ELF64_ST_BIND (sym->st_info);
533 | }
534 |
535 | int
536 | se_symbol_type (const Elf64_Sym *sym)
537 | {
538 | assert (sym != NULL);
539 | return ELF64_ST_TYPE (sym->st_info);
540 | }
541 |
542 | int
543 | se_symbol_visibility (const Elf64_Sym *sym)
544 | {
545 | assert (sym != NULL);
546 | return sym->st_other;
547 | }
548 |
549 | uint64_t
550 | symbol_value (const Elf64_Sym *sym)
551 | {
552 | assert (sym != NULL);
553 | return sym->st_value;
554 | }
555 |
556 | dbg_addr
557 | se_symbol_start_addr (const Elf64_Sym *sym)
558 | {
559 | assert (sym != NULL);
560 | return (dbg_addr) {sym->st_value};
561 | }
562 |
563 | dbg_addr
564 | se_symbol_end_addr (const Elf64_Sym *sym)
565 | {
566 | assert (sym != NULL);
567 | /* The symbol's size is the offset from the
568 | * start address if the symbol is a function. */
569 | return (dbg_addr) { sym->st_value + sym->st_size };
570 | }
571 |
572 | const char *
573 | se_symbol_name (const Elf64_Sym *sym, const ElfFile *elf)
574 | {
575 | assert (sym != NULL);
576 | assert (elf != NULL);
577 |
578 | Elf64_Shdr *strtab_hdr =
579 | &elf->sect_table.headers[elf->sect_table.strtab_idx];
580 | const char *strtab = strtab_at (elf->data.bytes, strtab_hdr->sh_offset);
581 | return &strtab[sym->st_name];
582 | }
583 |
--------------------------------------------------------------------------------
/src/spray_elf.h:
--------------------------------------------------------------------------------
1 | /* Parse ELF files and provide relevant info. */
2 |
3 | #pragma once
4 |
5 | #ifndef _SPARY_SPRAY_ELF_H_
6 | #define _SPRAY_SPRAY_ELF_H_
7 |
8 | #define _GNU_SOURCE
9 |
10 | #include "magic.h"
11 | #include "ptrace.h"
12 |
13 | #include
14 | #include
15 | #include
16 |
17 | typedef unsigned char byte;
18 |
19 | typedef enum
20 | {
21 | ELF_TYPE_NONE = ET_NONE,
22 | ELF_TYPE_REL = ET_REL,
23 | ELF_TYPE_EXEC = ET_EXEC,
24 | ELF_TYPE_DYN = ET_DYN,
25 | ELF_TYPE_CORE = ET_CORE,
26 | } ElfType;
27 |
28 | typedef enum
29 | {
30 | ELF_ENDIAN_BIG,
31 | ELF_ENDIAN_LITTLE,
32 | } Endianness;
33 |
34 | /* ELF program header table. */
35 | typedef struct
36 | {
37 | uint32_t n_headers;
38 | Elf64_Phdr *headers;
39 | } ElfProgTable;
40 |
41 | /* ELF section header table. */
42 | typedef struct
43 | {
44 | uint32_t n_headers;
45 | /* Symbol table index in `headers`. */
46 | uint32_t symtab_idx;
47 | /* String table indices in `headers`. */
48 | uint32_t shstrtab_idx;
49 | uint32_t strtab_idx;
50 | Elf64_Shdr *headers;
51 | } ElfSectTable;
52 |
53 | typedef struct
54 | {
55 | /* Memory-mapped content of file. */
56 | byte *bytes;
57 | size_t n_bytes;
58 | } ElfData;
59 |
60 | typedef struct
61 | {
62 | ElfType type;
63 | Endianness endianness;
64 | ElfProgTable prog_table;
65 | ElfSectTable sect_table;
66 | ElfData data;
67 | } ElfFile;
68 |
69 | typedef enum
70 | {
71 | ELF_PARSE_OK,
72 | ELF_PARSE_IO_ERR, /* Error during I/O. */
73 | ELF_PARSE_INVALID, /* Invalid file. */
74 | ELF_PARSE_DISLIKE, /* Theoretically a valid ELF file but
75 | * some feature used is not supported. */
76 | } ElfParseResult;
77 |
78 | const char *elf_parse_result_name (ElfParseResult res);
79 |
80 | /* Parse an ELF file and store the info in `elf`.
81 | * Returns `ELF_PARSE_OK` on success. `*elf` might
82 | * be changed even if the result is ultimately an error. */
83 | ElfParseResult se_parse_elf (const char *filepath, ElfFile * elf);
84 |
85 | /* Returns `SP_ERR` if un-mapping the ELF file didn't work. */
86 | SprayResult se_free_elf (ElfFile elf);
87 |
88 |
89 | /***************************/
90 | /* Symbol table interface. */
91 | /***************************/
92 |
93 | /* Get the symbol table entry for the symbol name.
94 | * Returns `NULL` in no such symbol was found. */
95 | const Elf64_Sym *se_symbol_from_name (const char *name, const ElfFile * elf);
96 |
97 | /* Get the symbol table entry for the symbol that
98 | * belongs to the given instruction address. */
99 | const Elf64_Sym *se_symbol_from_addr (dbg_addr addr, const ElfFile * elf);
100 |
101 | /* Access different fields in a symbol. The way information
102 | * is stored in the different members of a symbol is a bit
103 | * weird so these wrappers make the code more readable. */
104 |
105 | int se_symbol_binding (const Elf64_Sym * sym);
106 | int se_symbol_type (const Elf64_Sym * sym);
107 | int se_symbol_visibility (const Elf64_Sym * sym);
108 |
109 | /* Get start (low PC) and end (high PC) address of function symbol.
110 | * Return values are meaningless in this context if the symbol is
111 | * not a function. */
112 | dbg_addr se_symbol_start_addr (const Elf64_Sym * sym);
113 | dbg_addr se_symbol_end_addr (const Elf64_Sym * sym);
114 |
115 | /* Looks up the symbol name in the string table. */
116 | const char *se_symbol_name (const Elf64_Sym * sym, const ElfFile * elf);
117 |
118 | #endif /* _SPRAY_PARSE_ELF_H_ */
119 |
--------------------------------------------------------------------------------
/src/tokenize.scm:
--------------------------------------------------------------------------------
1 | (cond-expand
2 | ;; Tests are run using `csi` and require this load.
3 | (csi
4 | (load "src/c-syntax.scm"))
5 | (else ))
6 |
7 | (module tokenizer
8 | ;; Take a string (C source code) and create a list of tokens
9 | ;; that represent its syntax.
10 | (tokenize
11 | ;; Turn the given list of syntax tokens into a string.
12 | ;; The string contains ANSI escape codes to represent the
13 | ;; colors if `use-color` is true.
14 | color-tokens)
15 |
16 | (import scheme)
17 | (import (chicken base))
18 | (import (chicken string))
19 | (import traversal)
20 | (import (except (srfi-1) assoc member))
21 | (import (except (srfi-13) string->list string-fill! string-copy))
22 | (import format) ; `format`
23 |
24 | (import c-tokens)
25 | (import c-regex)
26 | (import c-types)
27 |
28 | ;;; Transform `code-lines` into a list of token
29 | ;;; streams representing the color of each piece
30 | ;;; of code in each line.
31 | (define (tokenize code-lines)
32 | ;; Does `given-str` start with any of the prefixes in `possible-prefixes`?
33 | (define (find-prefix given-str possible-prefixes)
34 | (find
35 | (lambda (possible-prefix)
36 | (string-prefix? possible-prefix given-str))
37 | possible-prefixes))
38 |
39 | ;; Predicate for `find-prefix`.
40 | (define (prefix? given-str possible-prefixes)
41 | (if (find-prefix given-str possible-prefixes)
42 | #t #f))
43 |
44 | (define (starts-with-comment? str)
45 | (prefix? str C-comment))
46 |
47 | (define (starts-with-line-comment? str)
48 | (prefix? str C++-comment))
49 |
50 | (define (starts-with-uncomment? str)
51 | (prefix? str C-uncomment))
52 |
53 | (define (starts-with-keyword? str)
54 | (regex-match? keyword-regex str))
55 |
56 | (define (starts-with-operator? str)
57 | (prefix? str C-operators))
58 |
59 | (define (starts-with-special-symbol? str)
60 | (prefix? str C-special-symbols))
61 |
62 | (define (starts-with-literal? str)
63 | (regex-match? literal-regex str))
64 |
65 | (define (starts-with-whitespace? str)
66 | (regex-match? whitespace-regex str))
67 |
68 | (define (starts-with-identifier? str)
69 | (regex-match? identifier-regex str))
70 |
71 | (define (starts-with-constant? str)
72 | (or (regex-match? hex-constant-regex str)
73 | (regex-match? octal-constant-regex str)
74 | (regex-match? decimal-constant-regex str)
75 | (regex-match? char-constant-regex str)
76 | (regex-match? sci-constant-regex str)
77 | (regex-match? float-constant-regex-frac str)
78 | (regex-match? float-constant-regex-whole str)))
79 |
80 | (define (starts-with-preproc? str)
81 | (regex-match? preproc-directive-regex str))
82 |
83 | (define (starts-with-any? str)
84 | (regex-match? any-regex str))
85 |
86 |
87 | ;; NOTE: All scan procedures assume that the corresponding
88 | ;; `starts-with-*?` procedure is called first so as to verify
89 | ;; that the string actually matches the regex.
90 | (define (scan-comment code)
91 | (make-token-list (find-prefix code C-comment)
92 | token-tag-comment))
93 |
94 | (define (scan-line-comment code)
95 | (make-token-list (find-prefix code C++-comment)
96 | token-tag-comment))
97 |
98 | (define (scan-line-comment-text code)
99 | (make-token-list (full-match line-comment-text-regex code)
100 | token-tag-comment-text))
101 |
102 | (define (scan-comment-text code)
103 | (make-token-list (full-match comment-text-regex code)
104 | token-tag-comment-text))
105 |
106 | (define (scan-uncomment code)
107 | (make-token-list (find-prefix code C-uncomment)
108 | token-tag-uncomment))
109 |
110 | (define (scan-trailing-uncomment code)
111 | (make-token-list (find-prefix code C-uncomment)
112 | token-tag-trailing-uncomment))
113 |
114 | (define (scan-keyword code)
115 | (make-token-list (find-prefix code C-keywords)
116 | token-tag-keyword))
117 |
118 | (define (scan-operator code)
119 | (make-token-list (find-prefix code C-operators)
120 | token-tag-operator))
121 |
122 | (define (scan-special-symbol code)
123 | (make-token-list (find-prefix code C-special-symbols)
124 | token-tag-special-symbol))
125 |
126 | (define (scan-literal code)
127 | (make-token-list (full-match literal-regex code)
128 | token-tag-literal))
129 |
130 | (define (scan-whitespace code)
131 | (make-token-list (full-match whitespace-regex code)
132 | token-tag-whitespace))
133 |
134 | (define (scan-identifier code)
135 | ;; Check if `identifier` is the identifier of a type.
136 | (define (type-identifier? identifier)
137 | (find
138 | (lambda (type)
139 | (string=? type identifier))
140 | C-builtin-types))
141 | (let ((match (full-match identifier-regex code)))
142 | (make-token-list
143 | match
144 | (if (type-identifier? match)
145 | token-tag-prim-type
146 | token-tag-identifier))))
147 |
148 | (define (scan-constant code)
149 | (make-token-list
150 | (cond ((regex-match? hex-constant-regex code)
151 | (full-match hex-constant-regex code))
152 | ((regex-match? octal-constant-regex code)
153 | (full-match octal-constant-regex code))
154 | ((regex-match? decimal-constant-regex code)
155 | (full-match decimal-constant-regex code))
156 | ((regex-match? char-constant-regex code)
157 | (full-match char-constant-regex code))
158 | ((regex-match? sci-constant-regex code)
159 | (full-match sci-constant-regex code))
160 | ((regex-match? float-constant-regex-frac code)
161 | (full-match float-constant-regex-frac code))
162 | ((regex-match? float-constant-regex-whole code)
163 | (full-match float-constant-regex-whole code))
164 | (else
165 | (error "scan-constant, expected to find match" code)))
166 | token-tag-constant))
167 |
168 | (define (scan-preproc code)
169 | (define (include-match? matches)
170 | (and (equal? "#include"
171 | (cadr matches))
172 | (cadddr matches)))
173 |
174 | (let ((matches (regex-matches preproc-directive-regex code)))
175 | (if (include-match? matches)
176 | (let ((directive (cadr matches))
177 | (whitespace (caddr matches))
178 | (filepath (cadddr matches)))
179 | (list (make-token directive
180 | token-tag-preproc-directive)
181 | (make-token whitespace
182 | token-tag-whitespace)
183 | (make-token filepath
184 | token-tag-include-filepath)))
185 | (let ((directive (cadr matches)))
186 | (make-token-list directive
187 | token-tag-preproc-directive)))))
188 |
189 | (define (scan-any code)
190 | (make-token-list (full-match any-regex code)
191 | token-tag-other))
192 |
193 | (define (scan-normal-mode code)
194 | (cond
195 | ((starts-with-uncomment? code)
196 | (scan-trailing-uncomment code))
197 | ((starts-with-keyword? code)
198 | (scan-keyword code))
199 | ((starts-with-operator? code)
200 | (scan-operator code))
201 | ((starts-with-special-symbol? code)
202 | (scan-special-symbol code))
203 | ((starts-with-literal? code)
204 | (scan-literal code))
205 | ((starts-with-whitespace? code)
206 | (scan-whitespace code))
207 | ((starts-with-identifier? code)
208 | (scan-identifier code))
209 | ((starts-with-constant? code)
210 | (scan-constant code))
211 | ((starts-with-preproc? code)
212 | (scan-preproc code))
213 | ((starts-with-any? code)
214 | (scan-any code))
215 | (else
216 | (error "scan, invalid input" code))))
217 |
218 | ;;; Scan the next token in the code.
219 | (define scan
220 | (let ((mode 'normal-mode))
221 | (lambda (code new-line?)
222 | ;; Implicitly end single-line comment.
223 | (if (and (eq? mode 'line-comment-mode)
224 | new-line?)
225 | (set! mode 'normal-mode))
226 |
227 | (cond
228 | ((string-null? code)
229 | '())
230 | ((eq? mode 'normal-mode)
231 | (cond
232 | ((starts-with-comment? code)
233 | (begin
234 | ;; Begin block comment.
235 | (set! mode 'comment-mode)
236 | (scan-comment code)))
237 | ((starts-with-line-comment? code)
238 | (begin
239 | ;; Begin single-line comment.
240 | (set! mode 'line-comment-mode)
241 | (scan-line-comment code)))
242 | (else
243 | ;; Scan normal code.
244 | (scan-normal-mode code))))
245 | ((eq? mode 'comment-mode)
246 | (if (starts-with-uncomment? code)
247 | (begin
248 | ;; Explicitly end multi-line comment.
249 | (set! mode 'normal-mode)
250 | (scan-uncomment code))
251 | ;; Eat-up the block comment.
252 | (scan-comment-text code)))
253 | ((eq? mode 'line-comment-mode)
254 | (scan-line-comment-text code))))))
255 |
256 | ;;; Return the next token in the code.
257 | (define next-token
258 | ;; Queue of tokens to be returned before scanning the next token.
259 | (let ((token-queue '()))
260 | (lambda (code new-line?)
261 | (if (null? token-queue)
262 | (let ((new-tokens (scan code new-line?)))
263 | (if (null? new-tokens)
264 | (make-end-token) ; Signal that input is over.
265 | (begin
266 | (set! token-queue (cdr new-tokens))
267 | (car new-tokens))))
268 | (let ((this-token (car token-queue)))
269 | (set! token-queue (cdr token-queue))
270 | this-token)))))
271 |
272 | ;;; Return the rest of `str` after removing
273 | ;;; `(string-length cutoff)` characters from its start.
274 | (define (string-cutoff str cutoff)
275 | (substring str (string-length cutoff) (string-length str)))
276 |
277 | ;;; Colorize the given piece of code by splitting it into tokens.
278 | (define (tokenize-code code tokens new-line?)
279 | (let ((token (next-token code new-line?)))
280 | (if (end-token? token)
281 | (reverse tokens)
282 | (tokenize-code
283 | (string-cutoff code (token-text token))
284 | (cons token tokens)
285 | ;; `new-line?` may only be set to true
286 | ;; by an external caller.
287 | #f))))
288 |
289 | ;;; Sometimes comments begin outside of the given piece of
290 | ;;; source code. Then there is a trailing `*/` somewhere at
291 | ;;; the start. This procedure includes anything up to that `*/`
292 | ;;; in the comment.
293 | (define (wrap-leading-comment token-lines)
294 | (define (lead-comment? token-lines)
295 | (find
296 | (lambda (tag)
297 | (eq? tag token-tag-trailing-uncomment))
298 | (map token-tag (flatten token-lines))))
299 |
300 | (define (make-lead-end tokens)
301 | (cons 'lead-comment-end tokens))
302 |
303 | (define (make-lead-line token)
304 | (cons 'lead-comment-line token))
305 |
306 | (define (lead-end? lead-line)
307 | (and (pair? lead-line)
308 | (eq? (car lead-line) 'lead-comment-end)))
309 |
310 | (define (lead-tokens lead-line)
311 | (if (and (pair? lead-line)
312 | (or (eq? (car lead-line)
313 | 'lead-comment-end)
314 | (eq? (car lead-line)
315 | 'lead-comment-line)))
316 | (cdr lead-line)
317 | (error "lead-line-tokens, not a lead comment line"
318 | lead-line)))
319 |
320 | (define (wrap-leading-comment-line line)
321 | (let tokens-loop ((ext-str "")
322 | (rest-tokens line))
323 | (cond ((null? rest-tokens)
324 | (make-lead-line
325 | (make-token-list ext-str token-tag-comment-text)))
326 | ((eq? (token-tag (car rest-tokens))
327 | token-tag-trailing-uncomment)
328 | (make-lead-end
329 | (cons (make-token ext-str token-tag-comment-text)
330 | rest-tokens)))
331 | (else
332 | (tokens-loop
333 | (conc ext-str (token-text (car rest-tokens)))
334 | (cdr rest-tokens))))))
335 |
336 | (if (lead-comment? token-lines)
337 | (let lines-loop ((ext-lines '())
338 | (rest-lines token-lines))
339 | ;; Don't have to check if `rest-lines` is null
340 | ;; because `wrap-leading-comment-line` will return a
341 | ;; pair before `rest-lines` ends if `lead-comment?`
342 | ;; was true.
343 | (let ((lead-line
344 | (wrap-leading-comment-line (car rest-lines))))
345 | (if (lead-end? lead-line)
346 | (append
347 | (reverse
348 | (cons (lead-tokens lead-line)
349 | ext-lines))
350 | (cdr rest-lines))
351 | (lines-loop
352 | (cons (lead-tokens lead-line)
353 | ext-lines)
354 | (cdr rest-lines)))))
355 | token-lines))
356 |
357 | (wrap-leading-comment
358 | (map
359 | (lambda (code-line)
360 | (tokenize-code code-line '() #t))
361 | code-lines))
362 | ) ; End procedure tokenize.
363 |
364 | (define (color-tokens token-lines types-env start-lineno active-lineno use-color)
365 | (define (def-color color)
366 | (string-append "\033[" color "m"))
367 |
368 | (define literal-color (def-color "31"))
369 | (define type-color (def-color "32"))
370 | (define operator-color (def-color "33"))
371 | (define constant-color (def-color "34"))
372 | (define keyword-color (def-color "35"))
373 | (define comment-color (def-color "96"))
374 | (define no-color (def-color "0"))
375 | (define nothing "")
376 |
377 | (define (comment-tag? tag)
378 | (or (eq? tag token-tag-comment)
379 | (eq? tag token-tag-comment-text)
380 | (eq? tag token-tag-uncomment)
381 | (eq? tag token-tag-trailing-uncomment)))
382 |
383 | (define (pick-before-color token)
384 | (let ((tag (token-tag token))
385 | (text (token-text token)))
386 | (cond ((eq? tag token-tag-keyword) keyword-color)
387 | ((eq? tag token-tag-preproc-directive) keyword-color)
388 | ((eq? tag token-tag-operator) operator-color)
389 | ((eq? tag token-tag-prim-type) type-color)
390 | ((eq? tag token-tag-literal) literal-color)
391 | ((eq? tag token-tag-constant) constant-color)
392 | ((eq? tag token-tag-identifier)
393 | (if (is-type-in-env? types-env text)
394 | type-color
395 | nothing))
396 | ((comment-tag? tag) comment-color)
397 | (else nothing))))
398 |
399 | (define (before-color token)
400 | (if use-color
401 | (pick-before-color token)
402 | nothing))
403 |
404 | (define (after-color)
405 | (if use-color
406 | no-color
407 | nothing))
408 |
409 | (define (format-token token)
410 | (conc (before-color token)
411 | (token-text token)
412 | (after-color)))
413 |
414 | (define (accumulate-strings strs)
415 | (foldr conc "" strs))
416 |
417 | (define (format-tokens tokens)
418 | (accumulate-strings
419 | (map format-token tokens)))
420 |
421 | ;;; Check if `tokens` contains any non-whitespace text.
422 | (define (visible-content? tokens)
423 | (find
424 | (lambda (token-text-chars)
425 | (not (null?
426 | (filter
427 | (lambda (char)
428 | (not (char-whitespace? char)))
429 | token-text-chars))))
430 | (map (lambda (token)
431 | (string->list (token-text token)))
432 | tokens)))
433 |
434 | (define (format-lineno tokens offset)
435 | (let ((current-lineno (+ offset start-lineno)))
436 | (define (highlight-active-lineno)
437 | (cond ((= current-lineno active-lineno)
438 | " -> ")
439 | ((visible-content? tokens)
440 | " ")
441 | (else
442 | "")))
443 |
444 | (conc (format #f " ~4d" current-lineno)
445 | (highlight-active-lineno))))
446 |
447 | (accumulate-strings
448 | (map-indexed
449 | (lambda (token-line idx)
450 | (conc
451 | (format-lineno token-line idx)
452 | (format-tokens token-line)
453 | "\n"))
454 | token-lines))) ; End procedure color-tokens.
455 | ) ; End module tokenizer.
456 |
--------------------------------------------------------------------------------
/tests/assets/Makefile:
--------------------------------------------------------------------------------
1 | CC = clang
2 | CFLAGS = -g
3 |
4 | .PHONY = all clean
5 |
6 | SIMPLE = simple.c
7 | NESTED = nested_functions.c
8 | MULTI_FILE = multi-file/file1.c multi-file/file2.c
9 | EXTERN_VARIABLES = extern-variables/main.c extern-variables/second_file.c extern-variables/first_file.c extern-variables/third_file.c
10 | INCLUDE_VARIABLE = include-variable/main.c
11 | PRINT_ARGS = print_args.c
12 | COMMENTED = commented.c
13 | CUSTOM_TYPES = custom_types.c
14 | VARIABLES = recurring_variables.c
15 | POINTERS = pointers.c
16 | TYPE_EXAMPLES = type_examples.c
17 | MANY_FILES = many-files/foo1.c many-files/foo2.c many-files/main.c
18 | DEREF_POINTERS = deref_pointers.c
19 | TARGETS = 64bit-linux-simple.bin 32bit-linux-simple.bin nested-functions.bin multi-file.bin print-args.bin frame-pointer-nested-functions.bin no-frame-pointer-nested-functions.bin commented.bin custom-types.bin recurring-variables.bin pointers.bin extern-variables.bin include-variable.bin wrong-compiler.bin type-examples.bin many-files.bin deref_pointers.bin
20 |
21 | all: $(TARGETS)
22 |
23 | 64bit-linux-simple.bin: $(SIMPLE)
24 | $(CC) $(CFLAGS) $< -o $@
25 | 32bit-linux-simple.bin: $(SIMPLE)
26 | $(CC) $(CFLAGS) -m32 $< -o $@
27 | nested-functions.bin: $(NESTED)
28 | $(CC) $(CFLAGS) $< -o $@
29 | recurring-variables.bin: $(VARIABLES)
30 | $(CC) $(CFLAGS) $< -o $@
31 | frame-pointer-nested-functions.bin: CFLAGS += -fno-omit-frame-pointer
32 | frame-pointer-nested-functions.bin: $(NESTED)
33 | $(CC) $(CFLAGS) $< -o $@
34 | no-frame-pointer-nested-functions.bin: CFLAGS += -fomit-frame-pointer
35 | no-frame-pointer-nested-functions.bin: $(NESTED)
36 | $(CC) $(CFLAGS) $< -o $@
37 | multi-file.bin: $(MULTI_FILE)
38 | $(CC) $(CFLAGS) $(MULTI_FILE) -o $@
39 | print-args.bin: $(PRINT_ARGS)
40 | $(CC) $(CFLAGS) $< -o $@
41 | commented.bin: $(COMMENTED)
42 | $(CC) $(CFLAGS) $< -o $@
43 | custom-types.bin: $(CUSTOM_TYPES)
44 | $(CC) $(CFLAGS) $< -o $@
45 | pointers.bin: $(POINTERS)
46 | $(CC) $(CFLAGS) $< -o $@
47 | extern-variables.bin: $(EXTERN_VARIABLES)
48 | $(CC) $(CFLAGS) $(EXTERN_VARIABLES) -o $@
49 | include-variable.bin: $(INCLUDE_VARIABLE)
50 | $(CC) $(CFLAGS) $< -o $@
51 | wrong-compiler.bin: $(SIMPLE)
52 | gcc $(CFLAGS) $< -o $@
53 | type-examples.bin: $(TYPE_EXAMPLES)
54 | $(CC) $(CFLAGS) $< -o $@
55 | many-files.bin: $(MANY_FILES)
56 | $(CC) $(CFLAGS) $(MANY_FILES) -o $@
57 | deref_pointers.bin: $(DEREF_POINTERS)
58 | $(CC) $(CFLAGS) $< -o $@
59 |
60 | clean:
61 | $(RM) $(TARGETS)
62 |
63 |
--------------------------------------------------------------------------------
/tests/assets/commented.c:
--------------------------------------------------------------------------------
1 | #include
2 | /*
3 |
4 | I start outside the text that's printed.
5 | and I span more than one line. */
6 | int main(void) { /* blah! */
7 | printf("blah\n"); // This C++ style comment can contain this */ or that /*.
8 | int a = 7;
9 | /* This comment ends outside the printed text
10 | and spans multiple lines, too.
11 |
12 | */
13 | return 0;
14 | }
15 |
16 |
--------------------------------------------------------------------------------
/tests/assets/custom_types.c:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | struct Rational {
4 | int numer;
5 | int denom;
6 | };
7 |
8 | void print_rat(struct Rational rat) {
9 | printf("%d / %d\n", rat.numer, rat.denom);
10 | }
11 |
12 | /* `breakpoints` starts with the keyword `break`.
13 | The syntax-highlighter must get confused by it. */
14 | struct breakpoints {
15 | char *blah;
16 | };
17 |
18 | int main(void) {
19 | struct Rational rat = (struct Rational) { 5, 3 };
20 | rat.numer = 9;
21 | printf("The numerator is: %d\n", rat.numer);
22 | print_rat(rat);
23 | struct breakpoints bp = { "hey!" };
24 | return 0;
25 | }
26 |
27 |
--------------------------------------------------------------------------------
/tests/assets/deref_pointers.c:
--------------------------------------------------------------------------------
1 | int main(void) {
2 | int i = 42;
3 | int *ip = &i;
4 | /* Some value likely to dereference to something: */
5 | long ptr = (long) ip;
6 | char *x = "This is a test";
7 | return x[0];
8 | }
9 |
--------------------------------------------------------------------------------
/tests/assets/extern-variables/first_file.c:
--------------------------------------------------------------------------------
1 | int blah_int1 = 42;
2 |
--------------------------------------------------------------------------------
/tests/assets/extern-variables/main.c:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | /*
4 | The files in this directory were created to test
5 | that the file path to the file where some variable
6 | was declared is found correctly.
7 | */
8 |
9 | extern int blah_int1; /* Declared in first_file.c */
10 | extern int blah_int_another; /* Declared in third_file.c */
11 | extern int blah_int2; /* Declared in second_file.c */
12 | int my_own_int = 8;
13 |
14 | int main(void) {
15 | int sum = blah_int1 + my_own_int + blah_int2 + blah_int_another;
16 | printf("sum: %d\n", sum);
17 | return 0;
18 | }
19 |
--------------------------------------------------------------------------------
/tests/assets/extern-variables/second_file.c:
--------------------------------------------------------------------------------
1 | int blah_int2 = 16;
2 |
--------------------------------------------------------------------------------
/tests/assets/extern-variables/third_file.c:
--------------------------------------------------------------------------------
1 | int blah_int_another = 100;
2 |
--------------------------------------------------------------------------------
/tests/assets/include-variable/header.h:
--------------------------------------------------------------------------------
1 | int blah = 4;
2 |
--------------------------------------------------------------------------------
/tests/assets/include-variable/main.c:
--------------------------------------------------------------------------------
1 | #include "header.h"
2 |
3 | /* The executable compiled from this file and `header.h`
4 | * contains a single CU with multiple files (`main.c` and
5 | * `header.h`) in the line table header. It's used to test
6 | * retrieving the place were a variable was declared. */
7 |
8 | int here = 9;
9 |
10 | int main(void) {
11 | int sum = here + blah;
12 | return sum;
13 | }
14 |
--------------------------------------------------------------------------------
/tests/assets/many-files/bar1.h:
--------------------------------------------------------------------------------
1 | int bar1 = 9;
2 |
--------------------------------------------------------------------------------
/tests/assets/many-files/bar2.h:
--------------------------------------------------------------------------------
1 | int bar2 = 10;
2 |
--------------------------------------------------------------------------------
/tests/assets/many-files/baz.h:
--------------------------------------------------------------------------------
1 | int baz = 12345;
2 |
--------------------------------------------------------------------------------
/tests/assets/many-files/foo1.c:
--------------------------------------------------------------------------------
1 | #include "foo1.h"
2 |
3 | int
4 | foo1 (void)
5 | {
6 | return 7;
7 | }
8 |
--------------------------------------------------------------------------------
/tests/assets/many-files/foo1.h:
--------------------------------------------------------------------------------
1 | #ifndef _FOO1
2 | #define _FOO1
3 |
4 | int foo1 (void);
5 |
6 | #endif
7 |
--------------------------------------------------------------------------------
/tests/assets/many-files/foo2.c:
--------------------------------------------------------------------------------
1 | #include "foo2.h"
2 |
3 | #include "baz.h"
4 |
5 | int
6 | foo2 (void)
7 | {
8 | return baz;
9 | }
10 |
--------------------------------------------------------------------------------
/tests/assets/many-files/foo2.h:
--------------------------------------------------------------------------------
1 | #ifndef _FOO2
2 | #define _FOO2
3 |
4 | int foo2 (void);
5 |
6 | #endif
7 |
--------------------------------------------------------------------------------
/tests/assets/many-files/main.c:
--------------------------------------------------------------------------------
1 | #include "foo1.h"
2 | #include "foo2.h"
3 | #include "bar1.h"
4 | #include "bar2.h"
5 |
6 | /* A binary comprised of a number of files
7 | * used to test the names of all files used
8 | * in a binary.
9 | * The main.c CU's line header table should
10 | * contain the file names bar1.h, bar2.h and
11 | * main.c. The line header tables for foo1.c
12 | * should contain only foo
13 | */
14 |
15 | int
16 | main (void)
17 | {
18 | return bar1 + bar2 + foo1 () + foo2 ();
19 | }
20 |
21 |
--------------------------------------------------------------------------------
/tests/assets/multi-file/file1.c:
--------------------------------------------------------------------------------
1 | #include "file2.h"
2 |
3 | int file1_compute_something(int n) {
4 | int i = 0;
5 | int acc = 0;
6 | while (i < n) {
7 | acc += i * i;
8 | i ++;
9 | }
10 | return acc;
11 | }
12 |
13 | int main(void) {
14 | int num1 = file1_compute_something(3);
15 | int num2 = file2_compute_something(num1);
16 | (void) (num1 + num2);
17 | struct Blah blah = file2_init_blah(4);
18 | (void) blah;
19 | return 0;
20 | }
21 |
--------------------------------------------------------------------------------
/tests/assets/multi-file/file2.c:
--------------------------------------------------------------------------------
1 | #include "file2.h"
2 |
3 | int file2_compute_something(int n) {
4 | if (n < 2) {
5 | return n;
6 | } else {
7 | return file2_compute_something(n - 1)
8 | + file2_compute_something(n - 2);
9 | }
10 | }
11 |
12 | struct Blah file2_init_blah(int x) {
13 | return (struct Blah) { x };
14 | }
15 |
--------------------------------------------------------------------------------
/tests/assets/multi-file/file2.h:
--------------------------------------------------------------------------------
1 | #ifndef _FILE2_H_
2 | #define _FILE2_H_
3 |
4 | int file2_compute_something(int n);
5 |
6 | struct Blah {
7 | int x;
8 | };
9 |
10 | struct Blah file2_init_blah(int x);
11 |
12 | #endif // _FILE2_H_
13 |
--------------------------------------------------------------------------------
/tests/assets/nested_functions.c:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | int add(int a, int b) {
4 | int c = a + b;
5 | return c;
6 | }
7 |
8 | int mul(int a, int b) {
9 | int acc = 0;
10 | for (int i = 0; i < b; i++) {
11 | acc = add(acc, a);
12 | }
13 | return acc;
14 | }
15 |
16 | int main(void) {
17 | int product = mul(9, 3);
18 | int sum = add(product, 6);
19 | printf("Product: %d; Sum: %d\n", product, sum);
20 | return 0;
21 | }
22 |
23 |
--------------------------------------------------------------------------------
/tests/assets/pointers.c:
--------------------------------------------------------------------------------
1 | // This file is used to test the debug information
2 | // generated to describe pointer variables.
3 |
4 | int deref_add(int *a, int *b) {
5 | int _a = *a;
6 | int _b = *b;
7 | return _a + _b;
8 | }
9 |
10 | void ptr_inc(int *inc) {
11 | *inc += 1;
12 | }
13 |
14 | int main(void) {
15 | int main_a = 9;
16 | int main_b = 18;
17 | int main_sum = deref_add(&main_a, &main_b);
18 |
19 | // Here the value of `main_sum` is increased by 1.
20 | ptr_inc(&main_sum);
21 | return 0;
22 | }
23 |
24 |
--------------------------------------------------------------------------------
/tests/assets/print_args.c:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | int main(int argc, char *argv[]) {
4 | printf("Command line arguments: ");
5 |
6 | for (int i = 0; i < argc; i++) {
7 | printf("%s", argv[i]);
8 | if (i + 1 < argc) {
9 | printf(" ");
10 | } else {
11 | printf("\n");
12 | }
13 | }
14 |
15 | return 0;
16 | }
17 |
--------------------------------------------------------------------------------
/tests/assets/recurring_variables.c:
--------------------------------------------------------------------------------
1 | const int a = 3; /* Global variable. */
2 |
3 | long blah(long b, long c) {
4 | if (b > c) {
5 | return b - c;
6 | } else {
7 | return c - b;
8 | }
9 | }
10 |
11 | int main(void) {
12 | int a = 19;
13 | long x = 0;
14 | long b = 5;
15 | long c = 9;
16 | c = blah(b, c);
17 | return 0;
18 | }
19 |
--------------------------------------------------------------------------------
/tests/assets/simple.c:
--------------------------------------------------------------------------------
1 | int weird_sum(int a,
2 | int b) {
3 | int c = a + 1;
4 | int d = b + 2;
5 | int e = c + d;
6 | return e;
7 | }
8 |
9 | int main(void) {
10 | int a = 7;
11 | int b = 11;
12 | int c = weird_sum(a, b);
13 | return 0;
14 | }
15 |
--------------------------------------------------------------------------------
/tests/assets/type_examples.c:
--------------------------------------------------------------------------------
1 | int a = 1;
2 | const long b = 2;
3 | void *c = (void *) 3;
4 | long long *d = (long long *) 4;
5 | const unsigned *e = (const unsigned *) 5;
6 | int *const f = (int *const) 6;
7 | volatile const char *restrict const g = (const char *const)7;
8 |
9 |
10 | int main(void) {
11 | char h = 'a';
12 | unsigned char i = 'b';
13 | signed char j = 'c';
14 | const char k = 'd';
15 | const unsigned char l = 'e';
16 | const signed char m = 'f';
17 | unsigned long long n = (unsigned long long) 1 << 63;
18 |
19 | typedef long i64;
20 | i64 o = -123456789;
21 | typedef unsigned char byte;
22 | byte p = 0xff;
23 |
24 | return 0;
25 | }
26 |
--------------------------------------------------------------------------------
/tests/c-types.scm:
--------------------------------------------------------------------------------
1 | (load "src/tokenize.scm")
2 | (load "src/c-syntax.scm")
3 |
4 | (import test
5 | c-types
6 | tokenizer)
7 |
8 | (test-group "(c-types)"
9 | (test-assert "check finds single struct type"
10 | (is-type-in-env? (make-types-env (tokenize (list "struct a {};"))) "a"))
11 | (test-assert "check finds single union type"
12 | (is-type-in-env? (make-types-env (tokenize (list "union a {};"))) "a"))
13 | (test-assert "check finds single enum type"
14 | (is-type-in-env? (make-types-env (tokenize (list "enum a {};"))) "a"))
15 |
16 | (test "single struct type"
17 | (list '*env* "a")
18 | (make-types-env (tokenize (list "struct a {};"))))
19 | (test "single union type"
20 | (list '*env* "a")
21 | (make-types-env (tokenize (list "union a {};"))))
22 | (test "single enum type"
23 | (list '*env* "a")
24 | (make-types-env (tokenize (list "enum a {};"))))
25 |
26 | (test-assert "finds in multiple types"
27 | (is-type-in-env?
28 | (make-types-env (tokenize (list "enum a {}; union b {}; struct c {}'")))
29 | "c"))
30 | (test "multiple types"
31 | (list '*env* "a" "b" "c")
32 | (make-types-env (tokenize (list "enum a {}; union b {}; struct c {}'"))))
33 | ;; End of test-group c-types
34 | )
35 |
36 | (test-exit)
37 |
--------------------------------------------------------------------------------
/tests/debugger.c:
--------------------------------------------------------------------------------
1 | #include "test_utils.h"
2 |
3 | #include "../src/breakpoints.h"
4 | #define UNIT_TESTS
5 | #include "../src/debugger.h"
6 |
7 | TEST (breakpoints_work)
8 | {
9 | Debugger dbg;
10 | char *prog_argv[] = { SIMPLE_64BIT_BIN, NULL };
11 | assert_int (setup_debugger (prog_argv[0], prog_argv, &dbg), ==, 0);
12 |
13 | real_addr bp_addr1 = { 0x00401122 };
14 |
15 | enable_breakpoint (dbg.breakpoints, bp_addr1);
16 | assert_true (lookup_breakpoint (dbg.breakpoints, bp_addr1));
17 |
18 | disable_breakpoint (dbg.breakpoints, bp_addr1);
19 | assert_false (lookup_breakpoint (dbg.breakpoints, bp_addr1));
20 |
21 | del_debugger (dbg);
22 |
23 | return MUNIT_OK;
24 | }
25 |
26 | #define TEST_VARLOC(test_name, bin_name, var_name, pc_value, expect) \
27 | TEST ((test_name)) { \
28 | Debugger dbg; \
29 | char *prog_argv[] = {(bin_name), NULL}; \
30 | assert_int(setup_debugger(prog_argv[0], prog_argv, &dbg), ==, 0); \
31 | \
32 | dbg_addr pc = {(pc_value)}; \
33 | \
34 | enable_breakpoint(dbg.breakpoints, dbg_to_real(dbg.load_address, pc)); \
35 | ExecResult exec_res = continue_execution(&dbg); \
36 | assert_int(exec_res.type, ==, SP_OK); \
37 | ExecResult wait_res = wait_for_signal(&dbg); \
38 | assert_int(wait_res.type, ==, SP_OK); \
39 | \
40 | RuntimeVariable *var = init_var(pc, \
41 | dbg.load_address, \
42 | (var_name), \
43 | dbg.pid, \
44 | dbg.info); \
45 | assert_ptr_not_null(var); \
46 | assert_true(is_addr_loc(var)); \
47 | real_addr loc_addr = var_loc_addr(var); \
48 | del_var(var); \
49 | \
50 | uint64_t value = 0; \
51 | SprayResult mem_res = pt_read_memory(dbg.pid, loc_addr, &value); \
52 | assert_int(mem_res, ==, SP_OK); \
53 | \
54 | assert_int(value, ==, (expect)); \
55 | \
56 | del_debugger(dbg); \
57 | \
58 | return MUNIT_OK; \
59 | }
60 |
61 | /* Stack variable declared in the function body. */
62 | TEST_VARLOC (varloc_fbreg_works0, SIMPLE_64BIT_BIN, "a", 0x401163, 7)
63 | /* Stack variable passed as a function parameter. */
64 | TEST_VARLOC (varloc_fbreg_works1, RECURRING_VARIABLES_BIN, "c", 0x401124, 9)
65 | /* Global variable. */
66 | TEST_VARLOC (varloc_addr_works, RECURRING_VARIABLES_BIN, "a", 0x401124, 3)
67 | extern SprayResult is_file_with_line (const char *file_line);
68 |
69 | TEST (file_line_check_works)
70 | {
71 | SprayResult res = is_file_with_line ("this/is/a/file:2578");
72 | assert_int (res, ==, SP_OK);
73 |
74 | res = is_file_with_line ("this/is/a/filename/without/a/line");
75 | assert_int (res, ==, SP_ERR);
76 |
77 | res = is_file_with_line ("710985");
78 | assert_int (res, ==, SP_ERR);
79 |
80 | res = is_file_with_line ("src/blah/test.c74");
81 | assert_int (res, ==, SP_ERR);
82 |
83 | return MUNIT_OK;
84 | }
85 |
86 | extern SprayResult is_valid_identifier (const char *func_name);
87 |
88 | TEST (function_name_check_works)
89 | {
90 | bool is_valid = is_valid_identifier ("function_name_check_works1203");
91 | assert_true (is_valid);
92 |
93 | is_valid = is_valid_identifier ("785019blah_function"); // Starts with numbers.
94 | assert_false (is_valid);
95 |
96 | is_valid = is_valid_identifier ("check-function-name"); // Kebab case.
97 | assert_false (is_valid);
98 |
99 | is_valid = is_valid_identifier ("check>function!>name"); // Other symbols.
100 | assert_false (is_valid);
101 |
102 | return MUNIT_OK;
103 | }
104 |
105 | MunitTest debugger_tests[] = {
106 | REG_TEST (breakpoints_work),
107 | REG_TEST (file_line_check_works),
108 | REG_TEST (function_name_check_works),
109 | REG_TEST (varloc_fbreg_works0),
110 | REG_TEST (varloc_fbreg_works1),
111 | REG_TEST (varloc_addr_works),
112 | {NULL, NULL, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL}
113 | };
114 |
--------------------------------------------------------------------------------
/tests/dwarf.c:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | #include "test_utils.h"
4 |
5 | #define UNIT_TESTS
6 | #include "../src/info.h"
7 | #include "../src/spray_dwarf.h"
8 |
9 | #include
10 | #include
11 | #include
12 |
13 | enum
14 | {
15 | RAND_DATA_BUF_SIZE = 32,
16 | };
17 |
18 | TEST (get_line_entry_from_pc_works)
19 | {
20 | Dwarf_Error error = NULL;
21 | Dwarf_Debug dbg = sd_dwarf_init (SIMPLE_64BIT_BIN, &error);
22 | assert_ptr_not_null (dbg);
23 |
24 | { /* Happy path. */
25 | dbg_addr pc = { 0x00401156 };
26 | LineEntry line_entry = sd_line_entry_from_pc (dbg, pc);
27 | assert_true (line_entry.is_ok);
28 | assert_int (line_entry.ln, ==, 11);
29 | assert_int (line_entry.cl, ==, 7);
30 | assert_ptr_not_null (line_entry.filepath);
31 | /* Ignore the part of the filepath that is host specific. */
32 | assert_ptr_not_null (strstr (line_entry.filepath, SIMPLE_SRC));
33 | }
34 | { /* Sad path 😢. */
35 | dbg_addr pc = { 0xdeabbeef };
36 | LineEntry line_entry = sd_line_entry_from_pc (dbg, pc);
37 | assert_false (line_entry.is_ok);
38 | assert_ptr_equal (line_entry.filepath, NULL);
39 | }
40 |
41 | dwarf_finish (dbg);
42 | return MUNIT_OK;
43 | }
44 |
45 | SprayResult
46 | callback__store_line (LineEntry *line, void *const void_data)
47 | {
48 | assert (line != NULL);
49 | assert (void_data != NULL);
50 |
51 | static int i = 0;
52 | unsigned *lines = (unsigned *) void_data;
53 | assert (i < 5);
54 | lines[i++] = line->ln;
55 |
56 | return SP_OK;
57 | }
58 |
59 | TEST (iterating_lines_works)
60 | {
61 | Dwarf_Error error = NULL;
62 | Dwarf_Debug dbg = sd_dwarf_init (SIMPLE_64BIT_BIN, &error);
63 | assert_ptr_not_null (dbg);
64 |
65 | unsigned lines[5];
66 |
67 | char *filepath = realpath (SIMPLE_SRC, NULL);
68 | sd_for_each_line (dbg, "main", filepath, callback__store_line, &lines);
69 | dwarf_finish (dbg);
70 | free (filepath);
71 |
72 | unsigned expect[5] = { 9, 10, 11, 12, 13 };
73 | assert_memory_equal (sizeof (unsigned[5]), lines, expect);
74 |
75 | return MUNIT_OK;
76 | }
77 |
78 | bool
79 | callback__test_search (Dwarf_Debug dbg,
80 | Dwarf_Die die,
81 | SearchFor search_for, SearchFindings search_findings)
82 | {
83 | assert (dbg != NULL);
84 | assert (die != NULL);
85 |
86 | const char *const fn_name = (char *) search_for.data;
87 | if (sd_is_subprog_with_name (dbg, die, fn_name))
88 | {
89 | unsigned *level = (unsigned *) search_findings.data;
90 | *level = search_for.level;
91 | return true;
92 | }
93 | else
94 | {
95 | return false;
96 | }
97 | }
98 |
99 | TEST (search_returns_the_correct_result)
100 | {
101 | Dwarf_Error error = NULL;
102 | Dwarf_Debug dbg = sd_dwarf_init (SIMPLE_64BIT_BIN, &error);
103 | assert_ptr_not_null (dbg);
104 | int res = DW_DLV_OK;
105 |
106 | res = sd_search_dwarf_dbg (dbg,
107 | &error,
108 | callback__test_search,
109 | "this_function_name_does_not_exist", NULL);
110 | assert_int (res, ==, DW_DLV_NO_ENTRY);
111 |
112 | unsigned found_at_level = -1; /* Not a valid level. */
113 | res = sd_search_dwarf_dbg (dbg, &error, callback__test_search, "main", // <- This does exist.
114 | &found_at_level);
115 | assert_int (res, ==, DW_DLV_OK);
116 | assert_int (found_at_level, ==, 1);
117 |
118 | dwarf_finish (dbg);
119 |
120 | return MUNIT_OK;
121 | }
122 |
123 | SprayResult
124 | test_get_effective_start_addr (Dwarf_Debug dbg,
125 | const DebugSymbol *sym, dbg_addr *dest)
126 | {
127 | return sd_effective_start_addr (dbg, sym_start_addr (sym),
128 | sym_end_addr (sym), dest);
129 | }
130 |
131 | TEST (get_effective_function_start_works)
132 | {
133 | Dwarf_Error error = NULL;
134 | Dwarf_Debug dbg = sd_dwarf_init (SIMPLE_64BIT_BIN, &error);
135 | assert_ptr_not_null (dbg);
136 | DebugInfo *info = init_debug_info (SIMPLE_64BIT_BIN);
137 | assert_ptr_not_null (info);
138 | const DebugSymbol *sym = sym_by_name ("main", info);
139 | assert_ptr_not_null (sym);
140 |
141 | dbg_addr main_start = { 0 };
142 | SprayResult res = test_get_effective_start_addr (dbg, sym, &main_start);
143 | assert_int (res, ==, SP_OK);
144 | LineEntry line_entry = sd_line_entry_from_pc (dbg, main_start);
145 | assert_true (line_entry.is_ok);
146 | /* 10 is the line number of the first line after the function declaration. */
147 | assert_int (line_entry.ln, ==, 10);
148 |
149 | /* `weird_sum` has a multi-line function declaration. */
150 | sym = sym_by_name ("weird_sum", info);
151 | dbg_addr func_start = { 0 };
152 | res = test_get_effective_start_addr (dbg, sym, &func_start);
153 | assert_int (res, ==, SP_OK);
154 | line_entry = sd_line_entry_from_pc (dbg, func_start);
155 | assert_true (line_entry.is_ok);
156 | /* 10 is the line number of the first line after the function declaration. */
157 | assert_int (line_entry.ln, ==, 3);
158 |
159 | dwarf_finish (dbg);
160 | free_debug_info (&info);
161 |
162 | return MUNIT_OK;
163 | }
164 |
165 | TEST (get_filepath_from_pc_works)
166 | {
167 | Dwarf_Error error = NULL;
168 | Dwarf_Debug dbg = sd_dwarf_init (SIMPLE_64BIT_BIN, &error);
169 | assert_ptr_not_null (dbg);
170 |
171 | {
172 | dbg_addr pc = { 0x00401156 };
173 | char *filepath = sd_filepath_from_pc (dbg, pc);
174 | assert_ptr_not_null (filepath);
175 | char *expect_filepath = realpath (SIMPLE_SRC, NULL);
176 | assert_string_equal (filepath, expect_filepath);
177 | free (filepath);
178 | free (expect_filepath);
179 | }
180 | { /* Sad path. */
181 | dbg_addr pc = { 0xdeadbeef };
182 | char *no_filepath = sd_filepath_from_pc (dbg, pc);
183 | assert_ptr_equal (no_filepath, NULL);
184 | }
185 |
186 | dwarf_finish (dbg);
187 | return MUNIT_OK;
188 | }
189 |
190 | TEST (sd_line_entry_at_works)
191 | {
192 | Dwarf_Error error = NULL;
193 | Dwarf_Debug dbg = sd_dwarf_init (SIMPLE_64BIT_BIN, &error);
194 | assert_ptr_not_null (dbg);
195 |
196 | LineEntry line = sd_line_entry_at (dbg, SIMPLE_SRC, 4);
197 | assert_true (line.is_ok);
198 | assert_int (line.ln, ==, 4);
199 |
200 | dwarf_finish (dbg);
201 |
202 | return MUNIT_OK;
203 | }
204 |
205 | #define ASSERT_TYPE(name, pc, _type) \
206 | { \
207 | SdVarattr var_attr = {0}; \
208 | char *unused_decl_file = NULL; \
209 | unsigned unused_decl_line = 0; \
210 | SprayResult res = sd_runtime_variable( \
211 | dbg, (pc), (name), &var_attr, &unused_decl_file, &unused_decl_line); \
212 | assert_int(res, ==, SP_OK); \
213 | assert_int(var_attr.type.n_nodes, ==, (_type).n_nodes); \
214 | for (size_t i = 0; i < (_type).n_nodes; i++) { \
215 | assert_memory_equal(sizeof(*(_type).nodes), &(_type).nodes[i], \
216 | &var_attr.type.nodes[i]); \
217 | } \
218 | \
219 | free(unused_decl_file); \
220 | del_type(&var_attr.type); \
221 | }
222 |
223 | TEST (finding_basic_variable_types_works)
224 | {
225 | Dwarf_Error error = NULL;
226 | Dwarf_Debug dbg = sd_dwarf_init (TYPE_EXAMPLES_BIN, &error);
227 | assert_ptr_not_null (dbg);
228 |
229 | /* There is no executable code in this CU. */
230 | dbg_addr addr = { 0x0 };
231 |
232 | SdTypenode a_nodes[1] = {
233 | {.tag = NODE_BASE_TYPE,.base_type = {.tag = BASE_TYPE_INT,.size = 4}},
234 | };
235 | SdType a = {.n_nodes = 1,.nodes = (SdTypenode *) & a_nodes };
236 | ASSERT_TYPE ("a", addr, a);
237 |
238 | SdTypenode b_nodes[2] = {
239 | {.tag = NODE_MODIFIER,.modifier = TYPE_MOD_CONST},
240 | {.tag = NODE_BASE_TYPE,.base_type = {.tag = BASE_TYPE_LONG,.size = 8}},
241 | };
242 | SdType b = {.n_nodes = 2,.nodes = (SdTypenode *) & b_nodes };
243 | ASSERT_TYPE ("b", addr, b);
244 |
245 | SdTypenode c_nodes[1] = {
246 | {.tag = NODE_MODIFIER,.modifier = TYPE_MOD_POINTER},
247 | };
248 | SdType c = {.n_nodes = 1,.nodes = (SdTypenode *) & c_nodes };
249 | ASSERT_TYPE ("c", addr, c);
250 |
251 | SdTypenode d_nodes[2] = {
252 | {.tag = NODE_MODIFIER,.modifier = TYPE_MOD_POINTER},
253 | {.tag = NODE_BASE_TYPE,
254 | .base_type = {.tag = BASE_TYPE_LONG_LONG,.size = 8}},
255 | };
256 | SdType d = {.n_nodes = 2,.nodes = (SdTypenode *) & d_nodes };
257 | ASSERT_TYPE ("d", addr, d);
258 |
259 | SdTypenode e_nodes[3] = {
260 | {.tag = NODE_MODIFIER,.modifier = TYPE_MOD_POINTER},
261 | {.tag = NODE_MODIFIER,.modifier = TYPE_MOD_CONST},
262 | {.tag = NODE_BASE_TYPE,.base_type =
263 | {.tag = BASE_TYPE_UNSIGNED_INT,.size = 4}},
264 | };
265 | SdType e = {.n_nodes = 3,.nodes = (SdTypenode *) & e_nodes };
266 | ASSERT_TYPE ("e", addr, e);
267 |
268 | SdTypenode f_nodes[3] = {
269 | {.tag = NODE_MODIFIER,.modifier = TYPE_MOD_CONST},
270 | {.tag = NODE_MODIFIER,.modifier = TYPE_MOD_POINTER},
271 | {.tag = NODE_BASE_TYPE,.base_type = {.tag = BASE_TYPE_INT,.size = 4}},
272 | };
273 | SdType f = {.n_nodes = 3,.nodes = (SdTypenode *) & f_nodes };
274 | ASSERT_TYPE ("f", addr, f);
275 |
276 | SdTypenode g_nodes[6] = {
277 | {.tag = NODE_MODIFIER,.modifier = TYPE_MOD_CONST},
278 | {.tag = NODE_MODIFIER,.modifier = TYPE_MOD_RESTRICT},
279 | {.tag = NODE_MODIFIER,.modifier = TYPE_MOD_POINTER},
280 | {.tag = NODE_MODIFIER,.modifier = TYPE_MOD_CONST},
281 | {.tag = NODE_MODIFIER,.modifier = TYPE_MOD_VOLATILE},
282 | {.tag = NODE_BASE_TYPE,.base_type = {.tag = BASE_TYPE_CHAR,.size = 1}
283 | },
284 | };
285 | SdType g = {.n_nodes = 6,.nodes = (SdTypenode *) & g_nodes };
286 | ASSERT_TYPE ("g", addr, g);
287 |
288 | dwarf_finish (dbg);
289 | return MUNIT_OK;
290 | }
291 |
292 | /*
293 | Assert that the first location description in the location list
294 | for the variable `name` in `func` has the given values.
295 | */
296 | #define ASSERT_LOCDESC(name, pc, opcode_, op1, op2, op3, lowpc_, highpc_, \
297 | file) \
298 | { \
299 | SdLoclist loclist = {0}; \
300 | SdVarattr var_attr = {0}; \
301 | char *decl_file = NULL; \
302 | unsigned decl_line = 0; \
303 | SprayResult res = sd_runtime_variable(dbg, (pc), (name), &var_attr, \
304 | &decl_file, &decl_line); \
305 | assert_int(res, ==, SP_OK); \
306 | res = sd_init_loclist(dbg, var_attr.loc, &loclist); \
307 | assert_int(res, ==, SP_OK); \
308 | assert_int(loclist.ranges[0].lowpc.value, ==, (lowpc_)); \
309 | assert_int(loclist.ranges[0].highpc.value, ==, (highpc_)); \
310 | assert_int(loclist.exprs[0].operations[0].opcode, ==, (opcode_)); \
311 | assert_int(loclist.exprs[0].operations[0].operand1, ==, (op1)); \
312 | assert_int(loclist.exprs[0].operations[0].operand2, ==, (op2)); \
313 | assert_int(loclist.exprs[0].operations[0].operand3, ==, (op3)); \
314 | assert_string_equal(decl_file, (file)); \
315 | free(decl_file); \
316 | del_type(&var_attr.type); \
317 | del_loclist(&loclist); \
318 | }
319 |
320 | TEST (finding_variable_locations_works)
321 | {
322 | Dwarf_Error error = NULL;
323 | Dwarf_Debug dbg = sd_dwarf_init (SIMPLE_64BIT_BIN, &error);
324 | assert_ptr_not_null (dbg);
325 |
326 | dbg_addr main_addr = { 0x401163 }; /* Address from the binary's `main`. */
327 | char *file_path = realpath (SIMPLE_SRC, NULL);
328 | assert_ptr_not_null (file_path);
329 |
330 | ASSERT_LOCDESC ("a", main_addr, DW_OP_fbreg, -8, 0, 0, 0, 0, file_path);
331 |
332 | free (file_path);
333 | dwarf_finish (dbg);
334 | return MUNIT_OK;
335 | }
336 |
337 | TEST (finding_locations_by_scope_works)
338 | {
339 | Dwarf_Error error = NULL;
340 | Dwarf_Debug dbg = sd_dwarf_init (RECURRING_VARIABLES_BIN, &error);
341 | assert_ptr_not_null (dbg);
342 |
343 | dbg_addr main_addr = { 0x401182 }; /* Some address in the binary's `main`. */
344 | dbg_addr blah_addr = { 0x401132 }; /* Some address in the `blah` function. */
345 | char *file_path = realpath (RECURRING_VARIABLES_SRC, NULL);
346 | assert_ptr_not_null (file_path);
347 |
348 | ASSERT_LOCDESC ("a", main_addr, DW_OP_fbreg, -8, 0, 0, 0, 0, file_path);
349 | ASSERT_LOCDESC ("b", main_addr, DW_OP_fbreg, -24, 0, 0, 0, 0, file_path);
350 | ASSERT_LOCDESC ("c", main_addr, DW_OP_fbreg, -32, 0, 0, 0, 0, file_path);
351 |
352 | ASSERT_LOCDESC ("a", blah_addr, DW_OP_addr, 4202512, 0, 0, 0, 0, file_path);
353 | ASSERT_LOCDESC ("b", blah_addr, DW_OP_fbreg, -16, 0, 0, 0, 0, file_path);
354 | ASSERT_LOCDESC ("c", blah_addr, DW_OP_fbreg, -24, 0, 0, 0, 0, file_path);
355 |
356 | free (file_path);
357 | dwarf_finish (dbg);
358 |
359 | return MUNIT_OK;
360 | }
361 |
362 | TEST (finding_variable_declration_files_works)
363 | {
364 | Dwarf_Error error = NULL;
365 | Dwarf_Debug dbg = sd_dwarf_init (EXTERN_VARIABLES_BIN, &error);
366 | assert_ptr_not_null (dbg);
367 |
368 | dbg_addr addr = { 0x40115e };
369 | char *blah_int1_file =
370 | realpath ("tests/assets/extern-variables/first_file.c", NULL);
371 | char *blah_int2_file =
372 | realpath ("tests/assets/extern-variables/second_file.c", NULL);
373 | char *blah_int_another_file =
374 | realpath ("tests/assets/extern-variables/third_file.c", NULL);
375 | char *my_own_int_file =
376 | realpath ("tests/assets/extern-variables/main.c", NULL);
377 | assert_ptr_not_null (blah_int1_file);
378 | assert_ptr_not_null (blah_int2_file);
379 | assert_ptr_not_null (blah_int_another_file);
380 | assert_ptr_not_null (my_own_int_file);
381 |
382 | ASSERT_LOCDESC ("blah_int1", addr, DW_OP_addr, 0x404014, 0, 0, 0, 0,
383 | blah_int1_file);
384 | ASSERT_LOCDESC ("blah_int2", addr, DW_OP_addr, 0x404010, 0, 0, 0, 0,
385 | blah_int2_file);
386 | ASSERT_LOCDESC ("blah_int_another", addr, DW_OP_addr, 0x404018, 0, 0, 0, 0,
387 | blah_int_another_file);
388 | ASSERT_LOCDESC ("my_own_int", addr, DW_OP_addr, 0x40400c, 0, 0, 0, 0,
389 | my_own_int_file);
390 |
391 | free (blah_int1_file);
392 | free (blah_int2_file);
393 | free (blah_int_another_file);
394 | free (my_own_int_file);
395 | dwarf_finish (dbg);
396 |
397 | dbg = sd_dwarf_init (INCLUDE_VARIABLE_BIN, &error);
398 | assert_ptr_not_null (dbg);
399 |
400 | addr = (dbg_addr)
401 | {
402 | 0x401129};
403 | char *blah_file = realpath ("tests/assets/include-variable/header.h", NULL);
404 | char *here_file = realpath ("tests/assets/include-variable/main.c", NULL);
405 | assert_ptr_not_null (blah_file);
406 | assert_ptr_not_null (here_file);
407 |
408 | ASSERT_LOCDESC ("blah", addr, DW_OP_addr, 0x404004, 0, 0, 0, 0, blah_file);
409 | ASSERT_LOCDESC ("here", addr, DW_OP_addr, 0x404008, 0, 0, 0, 0, here_file);
410 |
411 | free (blah_file);
412 | free (here_file);
413 | dwarf_finish (dbg);
414 |
415 | return MUNIT_OK;
416 | }
417 |
418 | TEST (manual_check_locexpr_output)
419 | {
420 | SdExpression first = { 0 };
421 | first.n_operations = 2;
422 | first.operations = calloc (first.n_operations, sizeof (SdOperation));
423 | first.operations[0] = (SdOperation)
424 | {
425 | .opcode = DW_OP_fbreg, /* Has one operand. */
426 | .operands = { 13, 0, 0 },};
427 | first.operations[1] = (SdOperation)
428 | {
429 | .opcode = DW_OP_const_type, /* Has three operands. */
430 | .operands = { 14, 15, 16 },};
431 |
432 | SdExpression second = { 0 };
433 | second.n_operations = 1;
434 | second.operations = calloc (second.n_operations, sizeof (SdOperation));
435 | second.operations[0] = (SdOperation)
436 | {
437 | .opcode = DW_OP_deref_type, /* Has two operands. */
438 | .operands = { 123, 456 },};
439 |
440 | SdLoclist loclist = { 0 };
441 | loclist.n_exprs = 2;
442 | loclist.exprs = calloc (loclist.n_exprs, sizeof (SdExpression));
443 | loclist.exprs[0] = first;
444 | loclist.exprs[1] = second;
445 |
446 | loclist.ranges = calloc (loclist.n_exprs, sizeof (SdLocRange));
447 | loclist.ranges[0] = (SdLocRange)
448 | {
449 | .meaningful = true,.lowpc = { 78 },.highpc = { 910 },};
450 | loclist.ranges[1] = (SdLocRange)
451 | {
452 | .meaningful = true,.lowpc = { 11 },.highpc = { 12 },};
453 |
454 | /* TODO: Replace this test with an integration test,
455 | that captures the output that's emitted here and
456 | checks that the output is correct. */
457 |
458 | printf ("\n"); /* Initial newline for easier inspection. */
459 | print_loclist (loclist);
460 | del_loclist (&loclist);
461 |
462 | return MUNIT_OK;
463 | }
464 |
465 | TEST (validating_compilers_works)
466 | {
467 | Dwarf_Error error = NULL;
468 | Dwarf_Debug dbg = sd_dwarf_init (WRONG_COMPILER_BIN, &error);
469 | assert_ptr_equal (dbg, NULL);
470 | return MUNIT_OK;
471 | }
472 |
473 | TEST (type_attribute_form)
474 | {
475 | Dwarf_Error error = NULL;
476 | Dwarf_Debug dbg = sd_dwarf_init (SIMPLE_64BIT_BIN, &error);
477 | assert_ptr_not_null (dbg);
478 |
479 | SdVarattr var_attr = { 0 };
480 | dbg_addr main_addr = { 0x401163 }; /* Address from the binary's `main`. */
481 | char *decl_file = NULL;
482 | unsigned decl_line = 0;
483 | SprayResult res = sd_runtime_variable (dbg,
484 | main_addr,
485 | "a",
486 | &var_attr,
487 | &decl_file,
488 | &decl_line);
489 | assert_int (res, ==, SP_OK);
490 |
491 | dwarf_finish (dbg);
492 | del_type (&var_attr.type);
493 | free (decl_file);
494 |
495 | return MUNIT_OK;
496 | }
497 |
498 |
499 |
500 | bool
501 | ends_with (const char *str, const char *end)
502 | {
503 | if (str == NULL || end == NULL)
504 | return 0;
505 |
506 | size_t lenstr = strlen (str);
507 | size_t lenend = strlen (end);
508 |
509 | if (lenend > lenstr)
510 | return 0;
511 |
512 | return strncmp(str + lenstr - lenend, end, lenend) == 0;
513 | }
514 |
515 | TEST(get_filepaths_works)
516 | {
517 | Dwarf_Error error = NULL;
518 | Dwarf_Debug dbg = sd_dwarf_init (MANY_FILES_BIN, &error);
519 | assert_ptr_not_null (dbg);
520 |
521 | char **filepaths = sd_get_filepaths (dbg);
522 | char *expected_ends[6] = {
523 | "tests/assets/many-files/foo1.c",
524 | "tests/assets/many-files/baz.h",
525 | "tests/assets/many-files/foo2.c",
526 | "tests/assets/many-files/bar1.h",
527 | "tests/assets/many-files/bar2.h",
528 | "tests/assets/many-files/main.c",
529 | };
530 |
531 | for (int i = 0; filepaths[i] != NULL; i++)
532 | {
533 | assert_int (i, <, 6); /* Don't exceed the number of expected ends. */
534 | assert_true (ends_with (filepaths[i], expected_ends[i]));
535 | free (filepaths[i]);
536 | }
537 |
538 | free (filepaths);
539 |
540 | dwarf_finish (dbg);
541 | return MUNIT_OK;
542 | }
543 |
544 |
545 | MunitTest dwarf_tests[] = {
546 | REG_TEST (get_line_entry_from_pc_works),
547 | REG_TEST (iterating_lines_works),
548 | REG_TEST (search_returns_the_correct_result),
549 | REG_TEST (get_effective_function_start_works),
550 | REG_TEST (get_filepath_from_pc_works),
551 | REG_TEST (sd_line_entry_at_works),
552 | REG_TEST (finding_basic_variable_types_works),
553 | REG_TEST (finding_variable_locations_works),
554 | REG_TEST (finding_locations_by_scope_works),
555 | REG_TEST (manual_check_locexpr_output),
556 | REG_TEST (finding_variable_declration_files_works),
557 | REG_TEST (validating_compilers_works),
558 | REG_TEST (type_attribute_form),
559 | REG_TEST (get_filepaths_works),
560 | {NULL, NULL, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL}
561 | };
562 |
--------------------------------------------------------------------------------
/tests/elf.c:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | #include "test_utils.h"
4 |
5 | #include "../src/spray_elf.h"
6 |
7 | TEST (accept_valid_executable)
8 | {
9 | ElfFile elf_file = { 0 };
10 | ElfParseResult res = se_parse_elf (SIMPLE_64BIT_BIN, &elf_file);
11 | assert_int (res, ==, ELF_PARSE_OK);
12 |
13 | assert_int (elf_file.prog_table.n_headers, ==, 13);
14 | assert_int (elf_file.sect_table.n_headers, ==, 34);
15 |
16 | // Compare some randomly chosen values to those
17 | // returned by `readelf(1)`.
18 |
19 | assert_int (elf_file.prog_table.headers[0].p_type, ==, PT_PHDR);
20 |
21 | Elf64_Phdr load_ph = elf_file.prog_table.headers[3];
22 | assert_int (load_ph.p_type, ==, PT_LOAD);
23 | assert_int (load_ph.p_offset, ==, 0x1000);
24 | assert_int (load_ph.p_vaddr, ==, 0x401000);
25 | assert_int (load_ph.p_paddr, ==, 0x401000);
26 | assert_int (load_ph.p_filesz, ==, 0x181);
27 | assert_int (load_ph.p_memsz, ==, 0x181);
28 | assert_int (load_ph.p_flags, ==, PF_R | PF_X);
29 | assert_int (load_ph.p_align, ==, 0x1000);
30 |
31 | Elf64_Phdr eh_frame_ph = elf_file.prog_table.headers[10];
32 | assert_int (eh_frame_ph.p_type, ==, PT_GNU_EH_FRAME);
33 | assert_int (eh_frame_ph.p_offset, ==, 0x2010);
34 | assert_int (eh_frame_ph.p_vaddr, ==, 0x402010);
35 | assert_int (eh_frame_ph.p_paddr, ==, 0x402010);
36 | assert_int (eh_frame_ph.p_filesz, ==, 0x2c);
37 | assert_int (eh_frame_ph.p_memsz, ==, 0x2c);
38 | assert_int (eh_frame_ph.p_flags, ==, PF_R);
39 | assert_int (eh_frame_ph.p_align, ==, 0x4);
40 |
41 | Elf64_Shdr symtab_sh = elf_file.sect_table.headers[31];
42 | assert_int (symtab_sh.sh_type, ==, SHT_SYMTAB);
43 | assert_int (symtab_sh.sh_addr, ==, 0x0);
44 | assert_int (symtab_sh.sh_offset, ==, 13656);
45 | assert_int (symtab_sh.sh_size, ==, 0x330);
46 | assert_int (symtab_sh.sh_entsize, ==, 0x18);
47 | assert_int (symtab_sh.sh_flags, ==, 0);
48 | assert_int (symtab_sh.sh_link, ==, 32);
49 | assert_int (symtab_sh.sh_info, ==, 18);
50 | assert_int (symtab_sh.sh_addralign, ==, 8);
51 |
52 | se_free_elf (elf_file);
53 | return MUNIT_OK;
54 | }
55 |
56 | TEST (read_elf_symbol_table_entries)
57 | {
58 | ElfFile elf_file = { 0 };
59 | ElfParseResult res = se_parse_elf (MULTI_FILE_BIN, &elf_file);
60 | assert_int (res, ==, ELF_PARSE_OK);
61 |
62 | const Elf64_Sym *main = se_symbol_from_name ("main", &elf_file);
63 | assert_ptr_not_null (main);
64 | assert_string_equal (se_symbol_name (main, &elf_file), "main");
65 | assert_int (se_symbol_binding (main), ==, STB_GLOBAL);
66 | assert_int (se_symbol_type (main), ==, STT_FUNC);
67 | assert_int (se_symbol_visibility (main), ==, STV_DEFAULT);
68 |
69 | const Elf64_Sym *func2 =
70 | se_symbol_from_name ("file2_compute_something", &elf_file);
71 | assert_ptr_not_null (func2);
72 | assert_string_equal (se_symbol_name (func2, &elf_file),
73 | "file2_compute_something");
74 | assert_int (se_symbol_binding (func2), ==, STB_GLOBAL);
75 | assert_int (se_symbol_type (func2), ==, STT_FUNC);
76 | assert_int (se_symbol_visibility (func2), ==, STV_DEFAULT);
77 |
78 | const Elf64_Sym *func1 =
79 | se_symbol_from_addr ((dbg_addr) { 0x00401128 }, &elf_file);
80 | assert_ptr_not_null (func1);
81 | assert_string_equal (se_symbol_name (func1, &elf_file),
82 | "file1_compute_something");
83 | assert_int (se_symbol_binding (func1), ==, STB_GLOBAL);
84 | assert_int (se_symbol_type (func1), ==, STT_FUNC);
85 | assert_int (se_symbol_visibility (func1), ==, STV_DEFAULT);
86 |
87 | se_free_elf (elf_file);
88 | return MUNIT_OK;
89 | }
90 |
91 | TEST (reject_invalid_executables)
92 | {
93 | // The following are a bunch of executables which
94 | // were compiled for unsupported targets (32-bit, ARM etc.)
95 | // All of them should be rejects.
96 |
97 | ElfFile elf_file = { 0 };
98 | ElfParseResult res = se_parse_elf (SIMPLE_32BIT_BIN, &elf_file);
99 | assert_int (res, ==, ELF_PARSE_DISLIKE);
100 | return MUNIT_OK;
101 | }
102 |
103 | MunitTest parse_elf_tests[] = {
104 | REG_TEST (accept_valid_executable),
105 | REG_TEST (reject_invalid_executables),
106 | REG_TEST (read_elf_symbol_table_entries),
107 | {NULL, NULL, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL}
108 | };
109 |
--------------------------------------------------------------------------------
/tests/main.c:
--------------------------------------------------------------------------------
1 | #include "test_utils.h"
2 |
3 | extern MunitTest parse_elf_tests[];
4 | extern MunitTest dwarf_tests[];
5 | extern MunitTest debugger_tests[];
6 |
7 | static MunitSuite suites[] = {
8 | {
9 | "/parse_elf",
10 | parse_elf_tests,
11 | NULL,
12 | 1,
13 | MUNIT_SUITE_OPTION_NONE},
14 | {
15 | "/dwarf_tests",
16 | dwarf_tests,
17 | NULL,
18 | 1,
19 | MUNIT_SUITE_OPTION_NONE},
20 | {
21 | "/debugger_tests",
22 | debugger_tests,
23 | NULL,
24 | 1,
25 | MUNIT_SUITE_OPTION_NONE},
26 | {NULL, NULL, NULL, 0, MUNIT_SUITE_OPTION_NONE}
27 | };
28 |
29 | static const MunitSuite suite = {
30 | "/spray",
31 | NULL,
32 | suites,
33 | 1,
34 | MUNIT_SUITE_OPTION_NONE,
35 | };
36 |
37 | int
38 | main (int argc, char *const *argv)
39 | {
40 | return munit_suite_main (&suite, NULL, argc, argv);
41 | }
42 |
--------------------------------------------------------------------------------
/tests/test_utils.h:
--------------------------------------------------------------------------------
1 | /* Test utilities. */
2 |
3 | #ifndef _SPRAY_TEST_UTILS_H_
4 | #define _SPRAY_TEST_UTILS_H_
5 |
6 | #define MUNIT_ENABLE_ASSERT_ALIASES
7 | #include "../dependencies/munit/munit.h"
8 |
9 | // Names of tests assets
10 | #define SIMPLE_SRC "tests/assets/simple.c"
11 | #define SIMPLE_64BIT_BIN "tests/assets/64bit-linux-simple.bin"
12 | #define SIMPLE_32BIT_BIN "tests/assets/32bit-linux-simple.bin"
13 | #define NESTED_FUNCTIONS_SRC "tests/assets/nested_functions.c"
14 | #define NESTED_FUNCTIONS_BIN "tests/assets/nested-functions.bin"
15 | #define MULTI_FILE_BIN "tests/assets/multi-file.bin"
16 | #define EXTERN_VARIABLES_BIN "tests/assets/extern-variables.bin"
17 | #define PRINT_ARGS_SRC "tests/assets/print_args.c"
18 | #define PRINT_ARGS_BIN "tests/assets/print-args.bin"
19 | #define RECURRING_VARIABLES_SRC "tests/assets/recurring_variables.c"
20 | #define RECURRING_VARIABLES_BIN "tests/assets/recurring-variables.bin"
21 | #define POINTERS_SRC "tests/assets/pointers.c"
22 | #define POINTERS_BIN "tests/assets/pointers.bin"
23 | #define INCLUDE_VARIABLE_BIN "tests/assets/include-variable.bin"
24 | #define WRONG_COMPILER_BIN "tests/assets/wrong-compiler.bin"
25 | #define TYPE_EXAMPLES_BIN "tests/assets/type-examples.bin"
26 | #define MANY_FILES_BIN "tests/assets/many-files.bin"
27 |
28 | // Create a test
29 | #define TEST(name) \
30 | static MunitResult name(MUNIT_UNUSED const MunitParameter p[], MUNIT_UNUSED void* fixture)
31 |
32 | // Register a test.
33 | #define REG_TEST(name) \
34 | { "/"#name, name, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL }
35 |
36 | #endif // _SPRAY_TEST_UTILS_H_
37 |
--------------------------------------------------------------------------------
/tests/tokenize.scm:
--------------------------------------------------------------------------------
1 | (load "src/tokenize.scm")
2 |
3 | (import test
4 | c-regex
5 | tokenizer)
6 |
7 | (test-group "(test-regex)"
8 | (test-group "(test-keyword-regex)"
9 | (test-assert "keyword with space at end is accepted"
10 | (regex-match? keyword-regex "break "))
11 | (test-assert "keyword with tab at end is accepted"
12 | (regex-match? keyword-regex "extern\t"))
13 | (test-assert "identifier starting with keyword is rejected"
14 | (not (regex-match? keyword-regex "breakpoints ")))
15 | (test-assert "other identifier starting with keyword is rejected"
16 | (not (regex-match? keyword-regex "return_value"))))
17 |
18 | (test-group "(test-literal-regex)"
19 | (test "match a string made up of only a string literal"
20 | "\"blah\""
21 | (full-match literal-regex "\"blah\""))
22 | (test "match a string starting with a string literal"
23 | "\"blah\""
24 | (full-match literal-regex "\"blah\" ... some more lame text"))
25 | (test-assert "don't match a string now starting with a string literal"
26 | (not (regex-match? literal-regex "blah ... invalid text \"string literal\"")))
27 | ;; After removing the backslashes to embed this string in this source,
28 | ;; the string below looks like this: "\\ \" ... blah "
29 | (test "match escaped backslashes and quotation marks"
30 | "\"\\\\ \\\" ... blah \""
31 | (full-match literal-regex "\"\\\\ \\\" ... blah \"")))
32 |
33 | (test-group "(test-whitespace-regex)"
34 | (test "only whitespace"
35 | " \t\t\r "
36 | (full-match whitespace-regex " \t\t\r "))
37 | (test-assert "beginning whitespace"
38 | (regex-match? whitespace-regex "\n \t blah"))
39 | (test-assert "no beginning whitespace"
40 | (not (regex-match? whitespace-regex "blah \n "))))
41 |
42 | (test-group "(test-identifier-regex)"
43 | (test-assert "identifier starting with numbers"
44 | (not (regex-match? identifier-regex "98blah")))
45 | (test "identifier including numbers and under scores"
46 | "blah_984baz"
47 | (full-match identifier-regex "blah_984baz"))
48 | (test-assert "identifier starting with whitespace"
49 | (not (regex-match? identifier-regex " \n blah"))))
50 |
51 | (test-group "(test-hex-constant-regex)"
52 | (test-assert "hex constant"
53 | (regex-match? hex-constant-regex "0x81babe"))
54 | (test-assert "no hex prefix"
55 | (not (regex-match? hex-constant-regex "92873")))
56 | (test-assert "type postfixes"
57 | (regex-match? hex-constant-regex "0xbad1dealueLU")))
58 |
59 | (test-group "(test-octal-constant-regex)"
60 | (test-assert "octal constant"
61 | (regex-match? octal-constant-regex "01543672"))
62 | (test-assert "out of octal range"
63 | (not (regex-match? octal-constant-regex "09082"))))
64 |
65 | (test-group "(test-decimal-constant-regex)"
66 | (test-assert "decimal constant"
67 | (regex-match? decimal-constant-regex "780934lu"))
68 | (test-assert "single zero as decimal"
69 | (regex-match? decimal-constant-regex "0")))
70 |
71 | (test-group "(test-char-constant-regex)"
72 | (test-assert "character constant"
73 | (regex-match? char-constant-regex "'a'"))
74 | (test-assert "multi-character constant"
75 | (regex-match? char-constant-regex "'abc\\n'"))
76 | (test-assert (not (regex-match? char-constant-regex "blha 'a'"))))
77 |
78 | (test-group "(test-sci-constant-regex)"
79 | (test-assert "scientific notation constant"
80 | (regex-match? sci-constant-regex "81e-2")))
81 |
82 | (test-group "(test-float-constant-regex-frac)"
83 | (test-assert "fraction floating point constant without whole part"
84 | (regex-match? float-constant-regex-frac ".024E-3F"))
85 | (test-assert "fraction floating point constant with whole part"
86 | (regex-match? float-constant-regex-frac "0184.708e+9fl"))
87 | (test-assert "fraction floating point constant without fraction part"
88 | (not (regex-match? float-constant-regex-frac "98.e+085L"))))
89 |
90 | (test-group "(test-float-constant-regex-whole)"
91 | (test-assert "whole floating point constant without fraction part"
92 | (regex-match? float-constant-regex-whole "983.E-3F"))
93 | (test-assert "whole floating point constant with fraction part"
94 | (regex-match? float-constant-regex-whole "0184.708e+9fl"))
95 | (test-assert "whole floating point constant without whole part"
96 | (not (regex-match? float-constant-regex-whole ".41e+085L"))))
97 |
98 | (test-group "(test-preproc-directive-regex)"
99 | (test-assert "include directive"
100 | (regex-match? preproc-directive-regex "#include"))
101 | (test-assert "undef directive"
102 | (regex-match? preproc-directive-regex "#undef"))
103 | (test-assert "include_next directive"
104 | (regex-match? preproc-directive-regex "#include_next"))
105 | (test "include directive with angle-brackets path"
106 | "#include "
107 | (full-match preproc-directive-regex "#include int main ..."))
108 | (test-assert "include directive with quotes path"
109 | (regex-match? preproc-directive-regex "#import \"blah.h\"")))
110 |
111 | (test-group "(test-comment-text-regex)"
112 | (test "block comment regex"
113 | "blah "
114 | (full-match comment-text-regex "blah */ asdf"))
115 | (test "block comment regex with star and newline"
116 | "blah \n * asdf "
117 | (full-match comment-text-regex "blah \n * asdf */"))
118 | (test "line comment containing block comment"
119 | "blah /* hey */ wow"
120 | (full-match line-comment-text-regex "blah /* hey */ wow\n asdf")))
121 | ;; End of test-group (test-regex).
122 | )
123 |
124 | (test-group "(test-tokenize)"
125 | (test "basic code example"
126 | (tokenize (list "int main(void) {" " int i = 0;" " for (; i < 91; i++) {" " printf(\"Blah: %d\" i);" " }"))
127 | '(((tt-type . "int") (tt-whitespace . " ") (tt-identifier . "main") (tt-special-symbol . "(") (tt-type . "void") (tt-special-symbol . ")") (tt-whitespace . " ") (tt-special-symbol . "{")) ((tt-whitespace . " ") (tt-type . "int") (tt-whitespace . " ") (tt-identifier . "i") (tt-whitespace . " ") (tt-operator . "=") (tt-whitespace . " ") (tt-constant . "0") (tt-special-symbol . ";")) ((tt-whitespace . " ") (tt-keyword . "for") (tt-whitespace . " ") (tt-special-symbol . "(") (tt-special-symbol . ";") (tt-whitespace . " ") (tt-identifier . "i") (tt-whitespace . " ") (tt-operator . "<") (tt-whitespace . " ") (tt-constant . "91") (tt-special-symbol . ";") (tt-whitespace . " ") (tt-identifier . "i") (tt-operator . "++") (tt-special-symbol . ")") (tt-whitespace . " ") (tt-special-symbol . "{")) ((tt-whitespace . " ") (tt-identifier . "printf") (tt-special-symbol . "(") (tt-literal . "\"Blah: %d\"") (tt-whitespace . " ") (tt-identifier . "i") (tt-special-symbol . ")") (tt-special-symbol . ";")) ((tt-whitespace . " ") (tt-special-symbol . "}"))))
128 |
129 | (test "error recovery at whitespace"
130 | (tokenize (list "int Äpfel = (6 + 4) * 9;"))
131 | '(((tt-type . "int") (tt-whitespace . " ") (tt-other . "Äpfel") (tt-whitespace . " ") (tt-operator . "=") (tt-whitespace . " ") (tt-special-symbol . "(") (tt-constant . "6") (tt-whitespace . " ") (tt-operator . "+") (tt-whitespace . " ") (tt-constant . "4") (tt-special-symbol . ")") (tt-whitespace . " ") (tt-operator . "*") (tt-whitespace . " ") (tt-constant . "9") (tt-special-symbol . ";"))))
132 |
133 | (test "single C-style comments"
134 | (tokenize (list "int main(void) {" " /* blah */" " printf(\"blah\");" "}"))
135 | '(((tt-type . "int") (tt-whitespace . " ") (tt-identifier . "main") (tt-special-symbol . "(") (tt-type . "void") (tt-special-symbol . ")") (tt-whitespace . " ") (tt-special-symbol . "{")) ((tt-whitespace . " ") (tt-comment . "/*") (tt-comment-text . " blah ") (tt-uncomment . "*/")) ((tt-whitespace . " ") (tt-identifier . "printf") (tt-special-symbol . "(") (tt-literal . "\"blah\"") (tt-special-symbol . ")") (tt-special-symbol . ";")) ((tt-special-symbol . "}"))))
136 |
137 | (test "multi-line C-style comments"
138 | (tokenize (list "int a = 2;" "/*blah" "asdf */" "int b = 4;"))
139 | '(((tt-type . "int") (tt-whitespace . " ") (tt-identifier . "a") (tt-whitespace . " ") (tt-operator . "=") (tt-whitespace . " ") (tt-constant . "2") (tt-special-symbol . ";")) ((tt-comment . "/*") (tt-comment-text . "blah")) ((tt-comment-text . "asdf ") (tt-uncomment . "*/")) ((tt-type . "int") (tt-whitespace . " ") (tt-identifier . "b") (tt-whitespace . " ") (tt-operator . "=") (tt-whitespace . " ") (tt-constant . "4") (tt-special-symbol . ";"))))
140 |
141 | (test "multi-line C-style comment without end"
142 | (tokenize (list "int blah = 5;" "/* I don't end," "But this is still me"))
143 | '(((tt-type . "int") (tt-whitespace . " ") (tt-identifier . "blah") (tt-whitespace . " ") (tt-operator . "=") (tt-whitespace . " ") (tt-constant . "5") (tt-special-symbol . ";")) ((tt-comment . "/*") (tt-comment-text . " I don't end,")) ((tt-comment-text . "But this is still me"))))
144 |
145 | (test "C-style comment without beginning wraps to start"
146 | (tokenize (list "int main(void) */ {int a = 0;"))
147 | '(((tt-comment-text . "int main(void) ") (tt-trailing-uncomment . "*/") (tt-whitespace . " ") (tt-special-symbol . "{") (tt-type . "int") (tt-whitespace . " ") (tt-identifier . "a") (tt-whitespace . " ") (tt-operator . "=") (tt-whitespace . " ") (tt-constant . "0") (tt-special-symbol . ";"))))
148 |
149 | (test "code can be commented-out"
150 | (tokenize (list "int" "a" "=" "2;" " */ /* another comment */"))
151 | '(((tt-comment-text . "int")) ((tt-comment-text . "a")) ((tt-comment-text . "=")) ((tt-comment-text . "2;")) ((tt-comment-text . " ") (tt-trailing-uncomment . "*/") (tt-whitespace . " ") (tt-comment . "/*") (tt-comment-text . " another comment ") (tt-uncomment . "*/"))))
152 |
153 | (test "C++ style comments can contain block comments"
154 | (tokenize (list "int a = 7; // This C++ style comment can contain this */ or that /*.""// It even continues on the next line!"))
155 | '(((tt-type . "int") (tt-whitespace . " ") (tt-identifier . "a") (tt-whitespace . " ") (tt-operator . "=") (tt-whitespace . " ") (tt-constant . "7") (tt-special-symbol . ";") (tt-whitespace . " ") (tt-comment . "//") (tt-comment-text . " This C++ style comment can contain this */ or that /*.")) ((tt-comment . "//") (tt-comment-text . " It even continues on the next line!"))))
156 |
157 | (test "retrieve include paths"
158 | (tokenize (list "#include" "#include\"debugger.h\"" "#include " "#include \"debugger.h\""))
159 | '(((tt-preproc . "#include") (tt-whitespace . "") (tt-include-filepath . "")) ((tt-preproc . "#include") (tt-whitespace . "") (tt-include-filepath . "\"debugger.h\"")) ((tt-preproc . "#include") (tt-whitespace . " ") (tt-include-filepath . "")) ((tt-preproc . "#include") (tt-whitespace . " ") (tt-include-filepath . "\"debugger.h\"")))
160 | )
161 | (test "don't split identifier starting with keyword"
162 | (tokenize (list "struct breakpoints { int a; };" "struct breakpoints bp = { 5 };"))
163 | '(((tt-keyword . "struct") (tt-whitespace . " ") (tt-identifier . "breakpoints") (tt-whitespace . " ") (tt-special-symbol . "{") (tt-whitespace . " ") (tt-type . "int") (tt-whitespace . " ") (tt-identifier . "a") (tt-special-symbol . ";") (tt-whitespace . " ") (tt-special-symbol . "}") (tt-special-symbol . ";")) ((tt-keyword . "struct") (tt-whitespace . " ") (tt-identifier . "breakpoints") (tt-whitespace . " ") (tt-identifier . "bp") (tt-whitespace . " ") (tt-operator . "=") (tt-whitespace . " ") (tt-special-symbol . "{") (tt-whitespace . " ") (tt-constant . "5") (tt-whitespace . " ") (tt-special-symbol . "}") (tt-special-symbol . ";"))))
164 |
165 | ;; End test-group (test-tokenize).
166 | )
167 |
168 | (test-exit)
169 |
--------------------------------------------------------------------------------