├── .gitignore ├── Makefile.am ├── README.md ├── bb.c ├── callstack.c ├── cfg.c ├── cfggrind.patch ├── cfggrind_asmmap ├── cfggrind_info ├── clo.c ├── data ├── rq1-efficiency │ ├── SPEC2017 │ │ ├── data.csv │ │ ├── plot.R │ │ ├── plot2.R │ │ ├── plots │ │ │ ├── slowdown.pdf │ │ │ └── times.pdf │ │ └── speedup.R │ └── cBench │ │ ├── data.csv │ │ ├── plot.R │ │ ├── plot2.R │ │ ├── plots │ │ ├── slowdown.pdf │ │ └── times.pdf │ │ └── speedup.R ├── rq2-caching │ ├── data.csv │ ├── plot.R │ └── plots │ │ └── cache.pdf ├── rq3-completeness │ ├── ratio │ │ ├── analysis.R │ │ ├── data-cbench.csv │ │ ├── data-spec.csv │ │ ├── plot.R │ │ └── plots │ │ │ ├── cbench-norm.pdf │ │ │ ├── cbench-std.pdf │ │ │ ├── spec-norm.pdf │ │ │ └── spec-std.pdf │ └── relation │ │ ├── data-cbench.csv │ │ ├── data-spec.csv │ │ ├── filter.sh │ │ ├── plot.R │ │ └── plots │ │ ├── cbench-blocks.pdf │ │ ├── cbench-instrs.pdf │ │ ├── spec-blocks.pdf │ │ └── spec-instrs.pdf ├── rq4-incremental │ ├── data-cfgs.csv │ ├── data-instrs.csv │ ├── plot.R │ └── plots │ │ ├── cfgs-part1.pdf │ │ ├── cfgs-part2.pdf │ │ ├── instrs-part1.pdf │ │ └── instrs-part2.pdf ├── rq5-static-vs-dynamic │ ├── analysis.R │ ├── cbench-stripped.csv │ ├── cbench-symbols.csv │ ├── spec-stripped.csv │ └── spec-symbols.csv └── rq6-complexity │ ├── data-cbench.csv │ ├── data-spec.csv │ ├── plot.R │ └── plots │ ├── executed.pdf │ └── runtime.pdf ├── debug.c ├── fdesc.c ├── fn.c ├── global.h ├── instrs.c ├── main.c ├── paper └── SPE20-cfggrind.pdf ├── prototype ├── .gitignore ├── cfg.py ├── config.py ├── group.py ├── instr.py ├── machine.py ├── main.py ├── state.py └── test │ ├── simple.c │ ├── simple.desc │ ├── test.c │ ├── test.desc │ ├── total.c │ └── total.desc ├── smarthash.c ├── smartlist.c ├── tests ├── Makefile.am ├── cfg-ordered.png ├── cfg-signal.png ├── cfg-unordered.png ├── signal.c └── test.c └── threads.c /.gitignore: -------------------------------------------------------------------------------- 1 | Makefile 2 | Makefile.in 3 | *.o 4 | cfggrind-* 5 | .deps 6 | 7 | -------------------------------------------------------------------------------- /Makefile.am: -------------------------------------------------------------------------------- 1 | include $(top_srcdir)/Makefile.tool.am 2 | 3 | #---------------------------------------------------------------------------- 4 | # Headers, etc 5 | #---------------------------------------------------------------------------- 6 | 7 | pkginclude_HEADERS = 8 | 9 | bin_SCRIPTS = \ 10 | cfggrind_asmmap \ 11 | cfggrind_info 12 | 13 | noinst_HEADERS = \ 14 | global.h 15 | 16 | #---------------------------------------------------------------------------- 17 | # cfggrind- 18 | #---------------------------------------------------------------------------- 19 | 20 | noinst_PROGRAMS = cfggrind-@VGCONF_ARCH_PRI@-@VGCONF_OS@ 21 | if VGCONF_HAVE_PLATFORM_SEC 22 | noinst_PROGRAMS += cfggrind-@VGCONF_ARCH_SEC@-@VGCONF_OS@ 23 | endif 24 | 25 | CFGGRIND_SOURCES_COMMON = \ 26 | bb.c \ 27 | callstack.c \ 28 | cfg.c \ 29 | clo.c \ 30 | debug.c \ 31 | fdesc.c \ 32 | fn.c \ 33 | instrs.c \ 34 | main.c \ 35 | smarthash.c \ 36 | smartlist.c \ 37 | threads.c 38 | 39 | cfggrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_SOURCES = \ 40 | $(CFGGRIND_SOURCES_COMMON) 41 | cfggrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CPPFLAGS = \ 42 | $(AM_CPPFLAGS_@VGCONF_PLATFORM_PRI_CAPS@) 43 | cfggrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CFLAGS = $(LTO_CFLAGS) \ 44 | $(AM_CFLAGS_@VGCONF_PLATFORM_PRI_CAPS@) 45 | cfggrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_DEPENDENCIES = \ 46 | $(TOOL_DEPENDENCIES_@VGCONF_PLATFORM_PRI_CAPS@) 47 | cfggrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_LDADD = \ 48 | $(TOOL_LDADD_@VGCONF_PLATFORM_PRI_CAPS@) 49 | cfggrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_LDFLAGS = \ 50 | $(TOOL_LDFLAGS_@VGCONF_PLATFORM_PRI_CAPS@) 51 | cfggrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_LINK = \ 52 | $(top_builddir)/coregrind/link_tool_exe_@VGCONF_OS@ \ 53 | @VALT_LOAD_ADDRESS_PRI@ \ 54 | $(LINK) \ 55 | $(cfggrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CFLAGS) \ 56 | $(cfggrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_LDFLAGS) 57 | 58 | if VGCONF_HAVE_PLATFORM_SEC 59 | cfggrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_SOURCES = \ 60 | $(CFGGRIND_SOURCES_COMMON) 61 | cfggrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CPPFLAGS = \ 62 | $(AM_CPPFLAGS_@VGCONF_PLATFORM_SEC_CAPS@) 63 | cfggrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CFLAGS = $(LTO_CFLAGS) \ 64 | $(AM_CFLAGS_@VGCONF_PLATFORM_SEC_CAPS@) 65 | cfggrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_DEPENDENCIES = \ 66 | $(TOOL_DEPENDENCIES_@VGCONF_PLATFORM_SEC_CAPS@) 67 | cfggrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_LDADD = \ 68 | $(TOOL_LDADD_@VGCONF_PLATFORM_SEC_CAPS@) 69 | cfggrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_LDFLAGS = \ 70 | $(TOOL_LDFLAGS_@VGCONF_PLATFORM_SEC_CAPS@) 71 | cfggrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_LINK = \ 72 | $(top_builddir)/coregrind/link_tool_exe_@VGCONF_OS@ \ 73 | @VALT_LOAD_ADDRESS_SEC@ \ 74 | $(LINK) \ 75 | $(cfggrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CFLAGS) \ 76 | $(cfggrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_LDFLAGS) 77 | endif 78 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CFGgrind 2 | 3 | CFGgrind is a valgrind plugin to reconstruct control flow graphs (CFGs) dynamically by following the execution of binary programs. 4 | This tools allows successive CFGs refinements by supporting multiple executions with different inputs. 5 | We support multi-thread programs with profiling information in the edges, calls and signal handlers. 6 | More details can be found in our [paper](paper/SPE20-cfggrind.pdf) published on Software, Practice & Experience. 7 | ## Building 8 | 9 | To build CFGgrind, first download and unpack valgrind (3.24.0). 10 | 11 | $ wget -qO - https://sourceware.org/pub/valgrind/valgrind-3.24.0.tar.bz2 | tar jxv 12 | 13 | Then, enter directory and clone CFGgrind github repository. 14 | Apply the patch to add the tool in the compilation chain. 15 | 16 | $ cd valgrind-3.24.0 17 | $ git clone https://github.com/rimsa/CFGgrind.git cfggrind 18 | $ patch -p1 < cfggrind/cfggrind.patch 19 | 20 | Build valgrind with CFGgrind. 21 | 22 | $ ./autogen.sh 23 | $ ./configure 24 | $ make -j4 25 | $ sudo make install 26 | 27 | ## Testing 28 | 29 | Compile and use a test program that orders numbers given in the arguments list. 30 | We compile it here with debugging symbols, but it is not required. 31 | 32 | $ cd cfggrind/tests 33 | $ gcc -g -ggdb -O0 -Wall -fno-stack-protector -no-pie -o test test.c 34 | $ ./test 15 4 8 16 42 23 35 | 4 8 15 16 23 42 36 | 37 | First, get the assembly instructions mapping for better CFG visualization. 38 | 39 | $ cfggrind_asmmap ./test > test.map 40 | $ head -n 5 test.map 41 | 0x4004a8:4:sub $0x8,%rsp 42 | 0x4004ac:7:mov 0x200b45(%rip),%rax 43 | 0x4004b3:3:test %rax,%rax 44 | 0x4004b6:2:je 00000000004004bd <_init+0x15> 45 | 0x4004b8:5:callq 0000000000400540 <.plt.got> 46 | 47 | Then, use the tool to generate an output file (test.cfg) that can be used later for CFG refinements. 48 | Also, generate a DOT file for the bubble function (cfg-0x{addr}.dot) with the instructions loaded from the map (test.map). 49 | For more information on the supported options use the --help switch. 50 | 51 | $ valgrind -q --tool=cfggrind --cfg-outfile=test.cfg --instrs-map=test.map --cfg-dump=bubble ./test 4 8 15 16 23 42 52 | 4 8 15 16 23 42 53 | 54 | Generate an image from the DOT file for the bubble function. 55 | 56 | $ ls *.dot 57 | cfg-0x400627.dot 58 | $ dot -Tpng -o cfg-ordered.png cfg-0x400627.dot 59 | 60 | Since the list used in the arguments was ordered, there is a phantom node for the conditional not taken inside the double loop. 61 | 62 |

63 | 64 |

65 | 66 | Use the same reference input (test.cfg) in a new execution with an unordered list as argument. 67 | Ignore the profiling information of the previous run to account only profiling for the next execution. 68 | 69 | $ valgrind -q --tool=cfggrind --cfg-infile=test.cfg --cfg-outfile=test.cfg --instrs-map=test.map --ignore-profiling=yes --cfg-dump=bubble ./test 15 4 8 42 16 23 70 | 4 8 15 16 23 42 71 | 72 | Update the image with the complete CFG now. 73 | 74 | $ dot -Tpng -o cfg-unordered.png cfg-0x400627.dot 75 | 76 |

77 | 78 |

79 | 80 | ## Statistics 81 | 82 | The output produced by CFGgrind can be used to extract statistics information 83 | from a program execution. 84 | The statistics is provided by **cfggrind_info**. This script that can be 85 | used to display information for the whole program or for functions. 86 | It can output the statistics in two formats: json and csv. The csv version 87 | does not output instructions opcode statistics. 88 | 89 | To obtain the whole program statistics, run: 90 | 91 | $ cfggrind_info -s program -i test.map -m json test.cfg 92 | { 93 | "functions": 252, 94 | "complete": 28, 95 | "incomplete": 224, 96 | "blocks": 2872, 97 | "phantoms": 990, 98 | "edges": 4977, 99 | "static": { 100 | "instructions": { 101 | "count": 13177, 102 | "opcodes": { 103 | "unknown": 13012, 104 | "sub": 5, 105 | "mov": 51, 106 | "test": 1, 107 | "je": 3, 108 | "add": 8, 109 | "ret": 7, 110 | "jmp": 5, 111 | "shr": 1, 112 | "sar": 2, 113 | "push": 16, 114 | "bnd": 10, 115 | "xor": 3, 116 | "pop": 3, 117 | "and": 1, 118 | "call": 8, 119 | "cmp": 6, 120 | "cmpb": 1, 121 | "jne": 1, 122 | "movb": 1, 123 | "movl": 3, 124 | "jl": 4, 125 | "nop": 2, 126 | "cltq": 5, 127 | "lea": 8, 128 | "jge": 1, 129 | "addl": 4, 130 | "shl": 3, 131 | "movslq": 1, 132 | "leave": 1 133 | } 134 | }, 135 | "calls": 417, 136 | "signals": 0 137 | }, 138 | "dynamic": { 139 | "instructions": { 140 | "count": 148050, 141 | "opcodes": { 142 | "unknown": 147391, 143 | "sub": 5, 144 | "mov": 248, 145 | "test": 1, 146 | "je": 3, 147 | "add": 51, 148 | "ret": 7, 149 | "jmp": 10, 150 | "shr": 1, 151 | "sar": 2, 152 | "push": 16, 153 | "bnd": 10, 154 | "xor": 3, 155 | "pop": 3, 156 | "and": 1, 157 | "call": 18, 158 | "cmp": 58, 159 | "cmpb": 1, 160 | "jne": 1, 161 | "movb": 1, 162 | "movl": 3, 163 | "jl": 42, 164 | "nop": 2, 165 | "cltq": 43, 166 | "lea": 61, 167 | "jge": 15, 168 | "addl": 33, 169 | "shl": 13, 170 | "movslq": 6, 171 | "leave": 1 172 | } 173 | }, 174 | "calls": 1113, 175 | "signals": 0 176 | } 177 | } 178 | 179 | To obtain statistics for functions, enable it with _-s functions_. When used alone, this option 180 | has the effect of displaying statistics for all functions. To view statistics for specific functions, 181 | combine it with the _-f_ option. This option can be used with multiple function names expressed as 182 | regular expressions. 183 | 184 | $ cfggrind_info -s functions -f "test::main" "test::bubble" -i test.map -m json test.cfg 185 | [ 186 | { 187 | "cfg": "0x4006de", 188 | "invoked": 1, 189 | "complete": true, 190 | "blocks": 13, 191 | "phantoms": 0, 192 | "exit": true, 193 | "halt": false, 194 | "edges": 16, 195 | "static": { 196 | "instructions": { 197 | "count": 67, 198 | "opcodes": { 199 | "push": 2, 200 | "mov": 30, 201 | "sub": 2, 202 | "cltq": 3, 203 | "shl": 3, 204 | "call": 6, 205 | "movl": 2, 206 | "jmp": 2, 207 | "cmp": 2, 208 | "jl": 2, 209 | "lea": 6, 210 | "add": 2, 211 | "movslq": 1, 212 | "addl": 2, 213 | "leave": 1, 214 | "ret": 1 215 | } 216 | }, 217 | "calls": 6, 218 | "signals": 0 219 | }, 220 | "dynamic": { 221 | "instructions": { 222 | "count": 248, 223 | "opcodes": { 224 | "push": 2, 225 | "mov": 107, 226 | "sub": 2, 227 | "cltq": 13, 228 | "shl": 13, 229 | "call": 16, 230 | "movl": 2, 231 | "jmp": 2, 232 | "cmp": 14, 233 | "jl": 14, 234 | "lea": 31, 235 | "add": 12, 236 | "movslq": 6, 237 | "addl": 12, 238 | "leave": 1, 239 | "ret": 1 240 | } 241 | }, 242 | "calls": 16, 243 | "signals": 0 244 | }, 245 | "name": "test::main(18)" 246 | }, 247 | { 248 | "cfg": "0x400607", 249 | "invoked": 1, 250 | "complete": false, 251 | "blocks": 8, 252 | "phantoms": 1, 253 | "exit": true, 254 | "halt": false, 255 | "edges": 12, 256 | "static": { 257 | "instructions": { 258 | "count": 35, 259 | "opcodes": { 260 | "push": 1, 261 | "mov": 13, 262 | "movl": 1, 263 | "jmp": 2, 264 | "cmp": 3, 265 | "jl": 2, 266 | "add": 3, 267 | "nop": 1, 268 | "pop": 1, 269 | "ret": 1, 270 | "cltq": 2, 271 | "lea": 2, 272 | "jge": 1, 273 | "addl": 2 274 | } 275 | }, 276 | "calls": 0, 277 | "signals": 0 278 | }, 279 | "dynamic": { 280 | "instructions": { 281 | "count": 348, 282 | "opcodes": { 283 | "push": 1, 284 | "mov": 133, 285 | "movl": 1, 286 | "jmp": 7, 287 | "cmp": 43, 288 | "jl": 28, 289 | "add": 36, 290 | "nop": 1, 291 | "pop": 1, 292 | "ret": 1, 293 | "cltq": 30, 294 | "lea": 30, 295 | "jge": 15, 296 | "addl": 21 297 | } 298 | }, 299 | "calls": 0, 300 | "signals": 0 301 | }, 302 | "name": "test::bubble(4)" 303 | } 304 | ] 305 | 306 | ## Output Format 307 | 308 | The output format, enabled by the --cfg-outfile argument, has two main formats: **cfg** and **node**. 309 | Optional sections in this format are marked with *curly brackets*. 310 | 311 | A **cfg** must have an address (*cfg-addr*). 312 | If profiling is enabled at compile-time, this address can be followed by an 313 | optional number of invocations separated by a colon (*:invocations*). 314 | The cfg must have a function name (*cfg-name*) in double quotes. 315 | The name can be obtained from the debugging symbols if present, or marked as 316 | *unknown* otherwise. 317 | Finally, the cfg has a flag (*is-complete*) indicating if this CFG is complete -- it has no indirect jumps or calls, 318 | and it has no phantom nodes (nodes never executed during runtime). 319 | 320 | [cfg cfg-addr{:invocations} cfg-name is-complete] 321 | 322 | A **node** models a basic block with instructions, a list of function calls 323 | addresses, a list of signal ids that contains a list of calls handlers, a flag 324 | indicating if the last instruction of the node is an indirect jump or call, and 325 | a list of successors nodes (node address, *exit* or *halt*). 326 | A successor node with an unmapped address is a *phantom* node. 327 | Note that there is no special entry node in this representation. 328 | There is only a single/unique node that has the same address of its CFG that 329 | should be executed first in case of a invocation of this CFG. 330 | 331 | [node cfg-addr node-addr node-size [list of instr-size] [list of cfg-addr{:count}] 332 | [list of signal-id->cfg-addr{:count}] is-indirect [list of succ-node{:count}]] 333 | 334 | A node must belong to a CFG (*cfg-addr*) and it is identified by its starting 335 | node address (*node-addr*) and node size (*node-size*). 336 | Then, a node has a non-empty list of sizes for each of its instructions 337 | (*list of instr-size*) between brackets. 338 | The sum of the size of these instructions must add up to the *node-size*. 339 | The node has a list of called functions, where each function is index by the address of its CFG 340 | followed by an optional invocation count (*list of cfg-addr{:count}*). 341 | This list is empty if there are no function calls for this node. 342 | Also, the node has a mapping of called signal handlers indexed by the signal id (*signal-id*). 343 | Each signal is thus mapped to a CFG (*->cfg-addr*) followed by an optional invocation count (*{:count}*). 344 | Similarly, this list is empty if no signal handlers are activated by this node. 345 | The node contains a marker if the tail instruction does an indirect jump or call (*is-indirect*). 346 | Finally, the node has a list of successor nodes (*list of succ-node*), where each node (*succ-node*) is 347 | an address, *exit* or *halt*. 348 | These successor nodes can be followed, optionally, by its execution counts. 349 | 350 | For example, the main function of file tests/signal.c can be represented as: 351 | 352 | [cfg 0x4005c0:1 "signal::main(11)" true] 353 | [node 0x4005c0 0x4005c0 38 [1 3 4 3 4 8 5 5 5] [0x4004a0:1] [] false [0x4005e6:1]] 354 | [node 0x4005c0 0x4005e6 10 [5 5] [0x400480:1] [] false [0x4005f0:1]] 355 | [node 0x4005c0 0x4005f0 2 [2] [] [] false [0x4005f7:1]] 356 | [node 0x4005c0 0x4005f7 10 [6 2 2] [] [14->0x4005ad:1] false [0x4005f2:56689172 0x400601:1]] 357 | [node 0x4005c0 0x4005f2 5 [5] [] [] false [0x4005f7:56689172]] 358 | [node 0x4005c0 0x400601 22 [4 3 5 5 5] [0x400470:1] [] false [0x400617:1]] 359 | [node 0x4005c0 0x400617 7 [5 1 1] [] [] false [exit:1]] 360 | 361 |

362 | 363 |

364 | -------------------------------------------------------------------------------- /callstack.c: -------------------------------------------------------------------------------- 1 | /*--------------------------------------------------------------------*/ 2 | /*--- CFGgrind ---*/ 3 | /*--- callstack.c ---*/ 4 | /*--------------------------------------------------------------------*/ 5 | 6 | /* 7 | This file is part of CFGgrind, a dynamic control flow graph (CFG) 8 | reconstruction tool. 9 | 10 | Copyright (C) 2023, Andrei Rimsa (andrei@cefetmg.br) 11 | 12 | This tool is derived and contains lot of code from Callgrind 13 | Copyright (C) 2002-2017, Josef Weidendorfer (Josef.Weidendorfer@gmx.de) 14 | 15 | This program is free software; you can redistribute it and/or 16 | modify it under the terms of the GNU General Public License as 17 | published by the Free Software Foundation; either version 2 of the 18 | License, or (at your option) any later version. 19 | 20 | This program is distributed in the hope that it will be useful, but 21 | WITHOUT ANY WARRANTY; without even the implied warranty of 22 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 23 | General Public License for more details. 24 | 25 | You should have received a copy of the GNU General Public License 26 | along with this program; if not, see . 27 | 28 | The GNU General Public License is contained in the file COPYING. 29 | */ 30 | 31 | #include "global.h" 32 | 33 | /*------------------------------------------------------------*/ 34 | /*--- Call stack, operations ---*/ 35 | /*------------------------------------------------------------*/ 36 | 37 | /* Stack of current thread. Gets initialized when switching to 1st thread. 38 | * 39 | * The artificial call stack is an array of call_entry's, representing 40 | * stack frames of the executing program. 41 | * Array call_stack and call_stack_esp have same size and grow on demand. 42 | * Array call_stack_esp holds SPs of corresponding stack frames. 43 | * 44 | */ 45 | 46 | #define N_CALL_STACK_INITIAL_ENTRIES 500 47 | 48 | call_stack CGD_(current_call_stack); 49 | 50 | void CGD_(init_call_stack)(call_stack* s) 51 | { 52 | Int i; 53 | 54 | CGD_ASSERT(s != 0); 55 | 56 | s->size = N_CALL_STACK_INITIAL_ENTRIES; 57 | s->entry = (call_entry*) CGD_MALLOC("cgd.callstack.ics.1", 58 | s->size * sizeof(call_entry)); 59 | s->sp = 0; 60 | 61 | for(i=0; isize; i++) { 62 | s->entry[i].cfg = 0; 63 | s->entry[i].working = 0; 64 | } 65 | } 66 | 67 | void CGD_(destroy_call_stack)(call_stack* s) { 68 | CGD_ASSERT(s != 0); 69 | 70 | CGD_FREE(s->entry); 71 | } 72 | 73 | call_entry* CGD_(get_call_entry)(Int sp) 74 | { 75 | CGD_ASSERT(sp <= CGD_(current_call_stack).sp); 76 | return &(CGD_(current_call_stack).entry[sp]); 77 | } 78 | 79 | void CGD_(copy_current_call_stack)(call_stack* dst) 80 | { 81 | CGD_ASSERT(dst != 0); 82 | 83 | dst->size = CGD_(current_call_stack).size; 84 | dst->entry = CGD_(current_call_stack).entry; 85 | dst->sp = CGD_(current_call_stack).sp; 86 | } 87 | 88 | void CGD_(set_current_call_stack)(call_stack* s) 89 | { 90 | CGD_ASSERT(s != 0); 91 | 92 | CGD_(current_call_stack).size = s->size; 93 | CGD_(current_call_stack).entry = s->entry; 94 | CGD_(current_call_stack).sp = s->sp; 95 | } 96 | 97 | 98 | static __inline__ 99 | void ensure_stack_size(Int i) 100 | { 101 | Int oldsize; 102 | call_stack *cs = &CGD_(current_call_stack); 103 | 104 | if (i < cs->size) return; 105 | 106 | oldsize = cs->size; 107 | cs->size *= 2; 108 | while (i > cs->size) cs->size *= 2; 109 | 110 | cs->entry = (call_entry*) CGD_REALLOC("cgd.callstack.ess.1", 111 | cs->entry, cs->size * sizeof(call_entry)); 112 | 113 | for(i=oldsize; isize; i++) { 114 | cs->entry[i].cfg = 0; 115 | cs->entry[i].working = 0; 116 | } 117 | 118 | CGD_(stat).call_stack_resizes++; 119 | 120 | CGD_DEBUGIF(2) 121 | VG_(printf)(" call stack enlarged to %u entries\n", 122 | CGD_(current_call_stack).size); 123 | } 124 | 125 | /* Push call on call stack. 126 | * 127 | * Increment the usage count for the function called. 128 | * A jump from to , with . 129 | */ 130 | void CGD_(push_call_stack)(BB* from, UInt jmp, BB* to, Addr sp) 131 | { 132 | call_entry* current_entry; 133 | Addr ret_addr; 134 | CFG* called; 135 | #if CFG_NODE_CACHE_SIZE > 0 136 | CfgNodeCallCache* callCache; 137 | #endif 138 | 139 | /* Ensure a call stack of size +1. 140 | */ 141 | ensure_stack_size(CGD_(current_call_stack).sp +1); 142 | current_entry = &(CGD_(current_call_stack).entry[CGD_(current_call_stack).sp]); 143 | 144 | /* return address is only is useful with a real call; 145 | * used to detect RET w/o CALL */ 146 | if (from->jmp[jmp].jmpkind == bjk_Call) { 147 | UInt instr = from->jmp[jmp].instr; 148 | ret_addr = bb_addr(from) + 149 | from->instr[instr].instr_offset + 150 | from->instr[instr].instr_size; 151 | } else { 152 | ret_addr = 0; 153 | } 154 | 155 | called = CGD_(get_cfg)(to->groups[0].group_addr); 156 | 157 | // Let's update the fdesc if it is our first real call to it. 158 | if (!CGD_(cfg_fdesc)(called)) 159 | CGD_(cfg_build_fdesc)(called); 160 | 161 | #if CFG_NODE_CACHE_SIZE > 0 162 | callCache = CGD_(current_state).working->cache.call ? 163 | &(CGD_(current_state).working->cache.call[CFG_NODE_CACHE_INDEX(called->addr)]) : 0; 164 | if (callCache && 165 | callCache->called == called && 166 | callCache->indirect == from->jmp[jmp].indirect) { 167 | #if ENABLE_PROFILING 168 | callCache->count++; 169 | #endif // ENABLE_PROFILING 170 | } else { 171 | #if ENABLE_PROFILING 172 | if (callCache && callCache->count > 0) 173 | CGD_(cfgnode_flush_call_count)(CGD_(current_state).cfg, 174 | CGD_(current_state).working, callCache); 175 | #endif // ENABLE_PROFILING 176 | #endif // CFG_NODE_CACHE_SIZE 177 | CGD_(cfgnode_set_call)(CGD_(current_state).cfg, CGD_(current_state).working, 178 | called, from->jmp[jmp].indirect); 179 | #if CFG_NODE_CACHE_SIZE > 0 180 | } 181 | #endif 182 | 183 | /* put jcc on call stack */ 184 | current_entry->sp = sp; 185 | current_entry->ret_addr = ret_addr; 186 | current_entry->cfg = CGD_(current_state).cfg; 187 | current_entry->working = CGD_(current_state).working; 188 | 189 | CGD_(current_call_stack).sp++; 190 | 191 | /* To allow for above assertion we set context of next frame to 0 */ 192 | CGD_ASSERT(CGD_(current_call_stack).sp < CGD_(current_call_stack).size); 193 | current_entry++; 194 | 195 | current_entry->cfg = 0; 196 | current_entry->working = 0; 197 | 198 | CGD_(current_state).cfg = called; 199 | CGD_(current_state).working = CGD_(cfg_entry_node)(called); 200 | #if ENABLE_PROFILING 201 | called->stats.execs++; 202 | #endif 203 | } 204 | 205 | 206 | /* Pop call stack and update inclusive sums. 207 | * Returns modified fcc. 208 | * 209 | * If the JCC becomes inactive, call entries are freed if possible 210 | */ 211 | void CGD_(pop_call_stack)(Bool halt) { 212 | call_entry* lower_entry; 213 | 214 | if (CGD_(current_state).sig > 0) { 215 | /* Check if we leave a signal handler; this can happen when 216 | * calling longjmp() in the handler */ 217 | CGD_(run_post_signal_on_call_stack_bottom)(); 218 | } 219 | 220 | lower_entry = 221 | &(CGD_(current_call_stack).entry[CGD_(current_call_stack).sp-1]); 222 | 223 | CGD_DEBUG(4,"+ pop_call_stack: frame %d\n", 224 | CGD_(current_call_stack).sp); 225 | 226 | if (halt) { 227 | CGD_(cfgnode_set_halt)(CGD_(current_state).cfg, CGD_(current_state).working); 228 | } else { 229 | #if CFG_NODE_CACHE_SIZE > 0 230 | if (CGD_(current_state).working->cache.exit.enabled) { 231 | #if ENABLE_PROFILING 232 | CGD_(current_state).working->cache.exit.count++; 233 | #endif 234 | } else 235 | #endif 236 | CGD_(cfgnode_set_exit)(CGD_(current_state).cfg, CGD_(current_state).working); 237 | } 238 | 239 | CGD_(current_state).cfg = lower_entry->cfg; 240 | CGD_(current_state).working = lower_entry->working; 241 | 242 | lower_entry->cfg = 0; 243 | lower_entry->working = 0; 244 | 245 | CGD_(current_call_stack).sp--; 246 | } 247 | 248 | 249 | /* Unwind enough CallStack items to sync with current stack pointer. 250 | * Returns the number of stack frames unwinded. 251 | */ 252 | Int CGD_(unwind_call_stack)(Addr sp, Int minpops) 253 | { 254 | Int csp; 255 | Int unwind_count = 0; 256 | CGD_DEBUG(4,"+ unwind_call_stack(sp %#lx, minpops %d): frame %d\n", 257 | sp, minpops, CGD_(current_call_stack).sp); 258 | 259 | /* We pop old stack frames. 260 | * For a call, be p the stack address with return address. 261 | * - call_stack_esp[] has SP after the CALL: p-4 262 | * - current sp is after a RET: >= p 263 | */ 264 | 265 | while( (csp=CGD_(current_call_stack).sp) >0) { 266 | call_entry* top_ce = &(CGD_(current_call_stack).entry[csp-1]); 267 | 268 | if ((top_ce->sp < sp) || 269 | ((top_ce->sp == sp) && minpops>0)) { 270 | 271 | minpops--; 272 | unwind_count++; 273 | CGD_(pop_call_stack)(False); 274 | csp=CGD_(current_call_stack).sp; 275 | continue; 276 | } 277 | break; 278 | } 279 | 280 | CGD_DEBUG(4,"- unwind_call_stack\n"); 281 | return unwind_count; 282 | } 283 | -------------------------------------------------------------------------------- /cfggrind.patch: -------------------------------------------------------------------------------- 1 | diff -urN a/Makefile.am b/Makefile.am 2 | --- a/Makefile.am 2024-11-01 02:07:00.000000000 -0300 3 | +++ b/Makefile.am 2024-12-23 16:00:00.222571366 -0300 4 | @@ -7,6 +7,7 @@ 5 | memcheck \ 6 | cachegrind \ 7 | callgrind \ 8 | + cfggrind \ 9 | helgrind \ 10 | drd \ 11 | massif \ 12 | diff -urN a/configure.ac b/configure.ac 13 | --- a/configure.ac 2024-11-01 02:07:00.000000000 -0300 14 | +++ b/configure.ac 2024-12-23 16:00:00.222571366 -0300 15 | @@ -5671,6 +5671,8 @@ 16 | callgrind/callgrind_annotate 17 | callgrind/callgrind_control 18 | callgrind/tests/Makefile 19 | + cfggrind/Makefile 20 | + cfggrind/tests/Makefile 21 | helgrind/Makefile 22 | helgrind/tests/Makefile 23 | drd/Makefile 24 | -------------------------------------------------------------------------------- /cfggrind_asmmap: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | function fatal() { 4 | echo "error: $@" 1>&2; 5 | exit 1; 6 | } 7 | 8 | function range() { 9 | local program=$1; 10 | local section=$2; 11 | 12 | if [ "$os" == 'Darwin' ]; then 13 | local tmp=$(echo $(otool -l "${program}" 2>/dev/null | \ 14 | gawk -v sect="${section}" \ 15 | '/sectname/ {if ($2 == "__"sect) { enabled=1 } else { enabled=0 }} \ 16 | /addr|size/ {if (enabled == 1) { print $2 }}' | \ 17 | sed 's/^0x0*\(.*\)$/\1/')); 18 | elif [ "$os" == 'Linux' ]; then 19 | local tmp=$(objdump -x "${program}" 2>/dev/null | \ 20 | grep -i "^[[:space:]]*[[:digit:]]*[[:space:]]*\.${section}[[:space:]]" | \ 21 | gawk '{print $3 " " $4}' | \ 22 | sed 's/^0*\(.*\) 0*\(.*\)$/\2 \1/g'); 23 | fi 24 | 25 | if [ -n "${tmp}" ]; then 26 | start=$(echo -n "${tmp}" | gawk '{print $1}' | tr '[[:lower:]]' '[[:upper:]]'); 27 | size=$(echo -n "${tmp}" | gawk '{print $2}' | tr '[[:lower:]]' '[[:upper:]]'); 28 | 29 | echo "0x${start}:0x$(echo "obase=16;ibase=16;${start}+${size}" | bc -l | \ 30 | tr '[[:upper:]]' '[[:lower:]]')"; 31 | fi 32 | } 33 | 34 | function asm() { 35 | local program=$1; 36 | local section=$2; 37 | local r=$(range "${program}" "${section}"); 38 | 39 | if [ -n "${r}" ]; then 40 | local start="${r/:*/}"; 41 | local end="${r/*:/}"; 42 | 43 | if [ "$os" == "Darwin" ]; then 44 | otool -V -s __TEXT "__${section}" "${program}"; 45 | elif [ "$os" == "Linux" ]; then 46 | objdump -d -j ".${section}" --prefix-addresses "${program}" | \ 47 | sed 's/[[:space:]]<[^>]*>[[:space:]]/ /'; 48 | fi | \ 49 | egrep '^[[:digit:]abcdef]+[[:space:]]' | \ 50 | sed 's/^[[:space:]]*0*/0x/' | \ 51 | sed -e 's://[[:space:]].*$::' -e 's:;[[:space:]].*$::' -e 's:#[^[:digit:]-].*$::' | \ 52 | sed 's/[[:space:]]*$//' | \ 53 | gawk -v "last=${end}" '\ 54 | { \ 55 | naddr=tolower($1); \ 56 | if (NR != 1) { \ 57 | diff=strtonum(naddr)-strtonum(addr); \ 58 | printf "%s:%d:%s\n", addr, diff, asm; \ 59 | } \ 60 | \ 61 | addr=naddr; \ 62 | asm=$2; \ 63 | for (i=3;i<=NF;++i) \ 64 | asm=asm " " $i; \ 65 | } \ 66 | END { \ 67 | diff=strtonum(last)-strtonum(addr); \ 68 | printf "%s:%d:%s\n", addr, diff, asm; \ 69 | } \ 70 | '; 71 | fi 72 | } 73 | 74 | if [ $# -ne 1 ]; then 75 | echo "Usage: $0 [Binary program]"; 76 | exit 1; 77 | fi 78 | 79 | [ -r "$1" ] || fatal "invalid binary program: $1"; 80 | 81 | os="$(uname -s)"; 82 | 83 | if [ "$os" == "Darwin" ]; then 84 | sectnames="text stubs stub_helper"; 85 | elif [ "$os" == "Linux" ]; then 86 | sectnames="init plt plt.got text fini"; 87 | else 88 | fatal "Unsupported system: $os"; 89 | fi 90 | 91 | for sectname in ${sectnames}; do 92 | asm $1 $sectname; 93 | done 94 | -------------------------------------------------------------------------------- /clo.c: -------------------------------------------------------------------------------- 1 | /*--------------------------------------------------------------------*/ 2 | /*--- CFGgrind ---*/ 3 | /*--- clo.c ---*/ 4 | /*--------------------------------------------------------------------*/ 5 | 6 | /* 7 | This file is part of CFGgrind, a dynamic control flow graph (CFG) 8 | reconstruction tool. 9 | 10 | Copyright (C) 2023, Andrei Rimsa (andrei@cefetmg.br) 11 | 12 | This tool is derived and contains lot of code from Callgrind 13 | Copyright (C) 2002-2017, Josef Weidendorfer (Josef.Weidendorfer@gmx.de) 14 | 15 | This program is free software; you can redistribute it and/or 16 | modify it under the terms of the GNU General Public License as 17 | published by the Free Software Foundation; either version 2 of the 18 | License, or (at your option) any later version. 19 | 20 | This program is distributed in the hope that it will be useful, but 21 | WITHOUT ANY WARRANTY; without even the implied warranty of 22 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 23 | General Public License for more details. 24 | 25 | You should have received a copy of the GNU General Public License 26 | along with this program; if not, see . 27 | 28 | The GNU General Public License is contained in the file COPYING. 29 | */ 30 | 31 | #include "config.h" // for VG_PREFIX 32 | 33 | #include "global.h" 34 | 35 | 36 | 37 | /*------------------------------------------------------------*/ 38 | /*--- Function specific configuration options ---*/ 39 | /*------------------------------------------------------------*/ 40 | 41 | #define CONFIG_DEFAULT -1 42 | #define CONFIG_FALSE 0 43 | #define CONFIG_TRUE 1 44 | 45 | /*--------------------------------------------------------------------*/ 46 | /*--- Command line processing ---*/ 47 | /*--------------------------------------------------------------------*/ 48 | 49 | Bool CGD_(process_cmd_line_option)(const HChar* arg) 50 | { 51 | const HChar* tmp_str; 52 | 53 | if (False) {} 54 | #if CGD_ENABLE_DEBUG 55 | else if VG_INT_CLO(arg, "--ct-verbose", CGD_(clo).verbose) {} 56 | else if VG_INT_CLO(arg, "--ct-vstart", CGD_(clo).verbose_start) {} 57 | #endif 58 | 59 | else if VG_STR_CLO(arg, "--cfg-outfile", CGD_(clo).cfg_outfile) {} 60 | else if VG_STR_CLO(arg, "--cfg-infile", CGD_(clo).cfg_infile) {} 61 | else if VG_BOOL_CLO(arg, "--ignore-failed-cfg", CGD_(clo).ignore_failed) {} 62 | #if ENABLE_PROFILING 63 | else if VG_BOOL_CLO(arg, "--ignore-profiling", CGD_(clo).ignore_profiling) {} 64 | #endif 65 | else if VG_BOOL_CLO(arg, "--emulate-calls", CGD_(clo).emulate_calls) {} 66 | else if VG_STR_CLO(arg, "--cfg-dump", tmp_str) { 67 | if (VG_(strcasecmp)(tmp_str, "all") == 0) { 68 | CGD_ASSERT(CGD_(clo).dump_cfgs.all == False); 69 | CGD_ASSERT(CGD_(clo).dump_cfgs.addrs == 0); 70 | CGD_ASSERT(CGD_(clo).dump_cfgs.fnames == 0); 71 | 72 | CGD_(clo).dump_cfgs.all = True; 73 | } else if (VG_(strcasecmp)(tmp_str, "none") == 0) { 74 | CGD_ASSERT(CGD_(clo).dump_cfgs.all == False); 75 | CGD_ASSERT(CGD_(clo).dump_cfgs.addrs == 0); 76 | CGD_ASSERT(CGD_(clo).dump_cfgs.fnames == 0); 77 | } else if (VG_(strncasecmp)(tmp_str, "0x", 2) == 0) { 78 | Addr addr; 79 | 80 | CGD_ASSERT(CGD_(clo).dump_cfgs.all == False); 81 | 82 | addr = VG_(strtoull16)(tmp_str, 0); 83 | CGD_ASSERT(addr != 0); 84 | 85 | if (CGD_(clo).dump_cfgs.addrs == 0) 86 | CGD_(clo).dump_cfgs.addrs = CGD_(new_smart_list)(1); 87 | 88 | CGD_(smart_list_add)(CGD_(clo).dump_cfgs.addrs, (void*) addr); 89 | } else { 90 | CGD_ASSERT(CGD_(clo).dump_cfgs.all == False); 91 | 92 | if (CGD_(clo).dump_cfgs.fnames == 0) 93 | CGD_(clo).dump_cfgs.fnames = CGD_(new_smart_list)(1); 94 | 95 | CGD_(smart_list_add)(CGD_(clo).dump_cfgs.fnames, 96 | (void*) CGD_STRDUP("cgd.clo.pclo.1", tmp_str)); 97 | } 98 | } 99 | else if VG_STR_CLO(arg, "--cfg-dump-dir", CGD_(clo).dump_cfgs.dir) {} 100 | else if VG_STR_CLO(arg, "--instrs-map", CGD_(clo).instrs_map) {} 101 | else if VG_STR_CLO(arg, "--mem-mappings", CGD_(clo).mem_mappings) {} 102 | 103 | else 104 | return False; 105 | 106 | return True; 107 | } 108 | 109 | void CGD_(print_usage)(void) 110 | { 111 | VG_(printf)( 112 | "\n control flow graph options:\n" 113 | " --cfg-outfile= CFG output file name\n" 114 | " use %%p to bind the pid to a cfg file (e.g. cfggrind.%%p.cfg)\n" 115 | " --cfg-infile= CFG input file name\n" 116 | " --ignore-failed-cfg=no|yes Ignore failed cfg input file read [no]\n" 117 | #if ENABLE_PROFILING 118 | " --ignore-profiling=no|yes Ignore profiling information from input file [no]\n" 119 | #endif 120 | " --emulate-calls=no|yes Emulate call for jumps in function entries [yes]\n" 121 | " --cfg-dump= Dump DOT cfg file as cfg-.dot [none]\n" 122 | " where is a function name, an address (e.g. 0xNNNNNNNN), all or none\n" 123 | " (can be used multiple times)\n" 124 | " --cfg-dump-dir= Directory where to dump the DOT cfgs [.]\n" 125 | " --instrs-map= Instructions map (address:size:assembly per entry) file\n" 126 | " --mem-mappings= Output file with memory mappings (bin, libs, ...)\n" 127 | ); 128 | } 129 | 130 | void CGD_(print_debug_usage)(void) 131 | { 132 | VG_(printf)( 133 | 134 | #if CGD_ENABLE_DEBUG 135 | " --ct-verbose= Verbosity of standard debug output [0]\n" 136 | " --ct-vstart= Only be verbose after basic block [0]\n" 137 | " --ct-verbose= Verbosity while in \n" 138 | #else 139 | " (none)\n" 140 | #endif 141 | 142 | ); 143 | } 144 | 145 | void CGD_(set_clo_defaults)(void) 146 | { 147 | /* Default values for command line arguments */ 148 | 149 | /* cfg options */ 150 | CGD_(clo).cfg_outfile = 0; 151 | CGD_(clo).cfg_infile = 0; 152 | CGD_(clo).ignore_failed = False; 153 | #if ENABLE_PROFILING 154 | CGD_(clo).ignore_profiling = False; 155 | #endif 156 | CGD_(clo).emulate_calls = True; 157 | CGD_(clo).dump_cfgs.all = False; 158 | CGD_(clo).dump_cfgs.addrs = 0; 159 | CGD_(clo).dump_cfgs.fnames = 0; 160 | CGD_(clo).dump_cfgs.dir = "."; 161 | CGD_(clo).instrs_map = 0; 162 | CGD_(clo).mem_mappings = 0; 163 | 164 | #if CGD_ENABLE_DEBUG 165 | CGD_(clo).verbose = 0; 166 | CGD_(clo).verbose_start = 0; 167 | #endif 168 | } 169 | -------------------------------------------------------------------------------- /data/rq1-efficiency/SPEC2017/data.csv: -------------------------------------------------------------------------------- 1 | benchmark,original,nulgrind,callgrind,cfggrind,cfggrind-prof 2 | 500.perlbench,302,1697,15762,16781,17924 3 | 502.gcc,248,1053,8666,8525,8947 4 | 503.bwaves,802,5123,21643,21942,22470 5 | 505.mcf,306,993,6419,4872,5031 6 | 507.cactuBSSN,269,1609,2654,2422,2496 7 | 508.namd,256,1868,2950,2744,2778 8 | 510.parest,389,3774,13748,12687,12881 9 | 511.povray,405,3118,21165,17120,17842 10 | 519.lbm,236,1612,2145,1926,1951 11 | 520.omnetpp,384,1462,9696,9383,9732 12 | 521.wrf,832,4683,15171,13834,14211 13 | 523.xalancbmk,301,1163,10663,11071,11033 14 | 525.x264,385,1651,10653,10101,10215 15 | 526.blender,265,1659,8348,7200,7443 16 | 527.cam4,447,2638,11221,10917,11196 17 | 531.deepsjeng,284,1095,10824,9845,10259 18 | 538.imagick,1030,5009,19562,16973,17435 19 | 541.leela,462,1532,14660,11476,12103 20 | 544.nab,393,1981,7402,6593,6791 21 | 548.exchange2,490,1853,17343,15848,16521 22 | 549.fotonik3d,388,1889,3079,2854,2942 23 | 554.roms,377,2897,7923,7318,7467 24 | 557.xz,319,1024,7573,6484,6760 25 | 600.perlbench,303,1711,15877,16916,18167 26 | 602.gcc,442,1895,16265,15342,16132 27 | 603.bwaves,3412,58203,422178,358122,373170 28 | 605.mcf,601,2000,12290,9376,10019 29 | 607.cactuBSSN,680,13674,27211,20168,20988 30 | 619.lbm,1246,5325,8303,6034,6258 31 | 620.omnetpp,380,1375,9442,9001,9551 32 | 621.wrf,1193,30276,109213,79556,83793 33 | 623.xalancbmk,308,1170,10703,11015,11142 34 | 625.x264,391,1661,10652,10113,10232 35 | 627.cam4,775,15358,80198,74916,79302 36 | 628.pop2,818,27854,111984,102904,108212 37 | 631.deepsjeng,337,1283,12694,11748,12382 38 | 638.imagick,3068,90852,255653,234030,240532 39 | 641.leela,465,1538,14636,11578,12036 40 | 644.nab,813,20771,77855,70423,72131 41 | 648.exchange2,491,1856,18205,16099,17409 42 | 649.fotonik3d,886,5707,10464,10170,10711 43 | 654.roms,1902,24531,71748,66534,69053 44 | 657.xz,821,5479,32582,27904,29017 45 | -------------------------------------------------------------------------------- /data/rq1-efficiency/SPEC2017/plot.R: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | 3 | usage <- function() { 4 | cat('Usage: Rscript plot.R [Input report] \n'); 5 | cat(' -s Graph only\n'); 6 | cat(' -a Use arithmetic mean\n'); 7 | cat(' -m Exclude mean\n'); 8 | quit(); 9 | } 10 | 11 | "geometric.mean" = function(x, na.rm=TRUE){ 12 | exp(sum(log(x[x > 0]), na.rm=na.rm) / length(x)) 13 | } 14 | 15 | calcColMeans = function(frame, geom=FALSE, na.rm=TRUE) { 16 | if (geom) { 17 | l <- list(); 18 | for (c in 1:ncol(frame)) 19 | l[c] <- geometric.mean(frame[,c], na.rm=na.rm); 20 | names(l) <- colnames(frame); 21 | return(l); 22 | } else { 23 | return(as.list(colMeans(frame))); 24 | } 25 | } 26 | 27 | simple <- FALSE; 28 | arith <- FALSE; 29 | avg <- TRUE; 30 | args <- commandArgs(trailingOnly=TRUE); 31 | for (a in args) { 32 | if (startsWith(a, '-')) { 33 | if (a == '-s') { 34 | simple <- TRUE; 35 | } else if (a == '-a') { 36 | if (!avg) 37 | stop("cannot combine -a with -m"); 38 | arith <- TRUE; 39 | } else if (a == '-m') { 40 | if (arith) 41 | stop("cannot combine -m with -a"); 42 | avg <- FALSE; 43 | } else { 44 | usage(); 45 | } 46 | } else { 47 | if (exists('input')) { 48 | if (exists('output')) { 49 | usage(); 50 | } else { 51 | output <- a; 52 | } 53 | } else { 54 | input <- a; 55 | } 56 | } 57 | } 58 | 59 | if (!exists('input')) 60 | usage(); 61 | 62 | if (!exists('output')) 63 | output <- "output.pdf"; 64 | 65 | data <- read.csv(input, header = TRUE, sep = ','); 66 | if (avg) 67 | data <- rbind(data, data.frame( 68 | benchmark="average", 69 | calcColMeans(data[,2:6], geom=!arith))); 70 | 71 | process <- t(as.matrix(data[,c(2,3,4,6)])); 72 | colnames(process) <- data$benchmark; 73 | 74 | pdf(file=output, width=12, height=5); 75 | if (simple) { 76 | par(oma=c(1.5,4,2,1),mar=c(0,0,0,0)); 77 | } else { 78 | par(oma=c(0,0,1,0),mar=c(6.5,4,0,6.5)); 79 | } 80 | 81 | colors <- c("#14a1ad", "#a9af03", "#f95c3a", "#3414ad"); 82 | ptypes <- c(19, 19, 19, 19); 83 | 84 | p = plot(1, 1, xlim=c(1,length(data$benchmark)), ylim=c(min(process),max(process)), type="n", xlab="", xaxt="n", las=1, ann=F, log="y"); 85 | 86 | for (n in 1:length(colors)) { 87 | points(process[c(n),], pch=ptypes[n], col=colors[n], cex=2); 88 | } 89 | 90 | if (!simple) { 91 | modes <- c("original", "nulgrind", "callgrind", "CFGgrind"); 92 | 93 | mtext(text="Time (s)", at=0.60, side=2, line=-1.5, outer=TRUE); 94 | text(1:length(data$benchmark), min(process)-90, srt=45, adj=1, labels=colnames(process), xpd=T, cex=1.2, offset=10); 95 | 96 | legend(length(data$benchmark)+2.15, max(process)/15, modes, border=T, pch=ptypes, col=colors, cex=1, pt.cex=1.5, xpd=NA); 97 | } 98 | -------------------------------------------------------------------------------- /data/rq1-efficiency/SPEC2017/plot2.R: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | 3 | usage <- function() { 4 | cat('Usage: Rscript plot.R [Input report] \n'); 5 | cat(' -s Graph only\n'); 6 | cat(' -a Use arithmetic mean\n'); 7 | cat(' -m Exclude mean\n'); 8 | quit(); 9 | } 10 | 11 | "geometric.mean" = function(x, na.rm=TRUE){ 12 | exp(sum(log(x[x > 0]), na.rm=na.rm) / length(x)) 13 | } 14 | 15 | calcColMeans = function(frame, geom=FALSE, na.rm=TRUE) { 16 | if (geom) { 17 | l <- list(); 18 | for (c in 1:ncol(frame)) 19 | l[c] <- geometric.mean(frame[,c], na.rm=na.rm); 20 | names(l) <- colnames(frame); 21 | return(l); 22 | } else { 23 | return(as.list(colMeans(frame))); 24 | } 25 | } 26 | 27 | #calcTimes = function(v1, v2) { 28 | # if (v1 <= v2) { 29 | # return(round(((v2 / v1) - 1) * 100, digit=2)); 30 | # } else { 31 | # return(round(((v1 / v2) - 1) * 100, digit=2)); 32 | # } 33 | #} 34 | 35 | simple <- FALSE; 36 | arith <- FALSE; 37 | avg <- TRUE; 38 | args <- commandArgs(trailingOnly=TRUE); 39 | for (a in args) { 40 | if (startsWith(a, '-')) { 41 | if (a == '-s') { 42 | simple <- TRUE; 43 | } else if (a == '-a') { 44 | if (!avg) 45 | stop("cannot combine -a with -m"); 46 | arith <- TRUE; 47 | } else if (a == '-m') { 48 | if (arith) 49 | stop("cannot combine -m with -a"); 50 | avg <- FALSE; 51 | } else { 52 | usage(); 53 | } 54 | } else { 55 | if (exists('input')) { 56 | if (exists('output')) { 57 | usage(); 58 | } else { 59 | output <- a; 60 | } 61 | } else { 62 | input <- a; 63 | } 64 | } 65 | } 66 | 67 | if (!exists('input')) 68 | usage(); 69 | 70 | if (!exists('output')) 71 | output <- "output.pdf"; 72 | 73 | data <- read.csv(input, header = TRUE, sep = ','); 74 | if (avg) 75 | data <- rbind(data, data.frame( 76 | benchmark="average", 77 | calcColMeans(data[,2:6], geom=!arith))); 78 | 79 | original <- data[,c("original")]; 80 | process <- t(as.matrix(data[,c(3,4,6)])); 81 | colnames(process) <- data$benchmark; 82 | for (r in 1:nrow(process)) { 83 | process[c(r),] <- process[c(r),] / original; 84 | } 85 | 86 | pdf(file=output, width=12, height=5); 87 | if (simple) { 88 | par(oma=c(1.5,4,2,1),mar=c(0,0,0,0)); 89 | } else { 90 | par(oma=c(0,0,1,0),mar=c(6.5,4,0,6.5)); 91 | } 92 | 93 | colors <- c("#a9af03", "#f95c3a", "#3414ad"); 94 | ptypes <- c(19, 19, 19); 95 | 96 | p = plot(1, 1, xlim=c(1,length(data$benchmark)), ylim=c(min(process),max(process)), type="n", xlab="", xaxt="n", las=1, ann=F, log="y"); 97 | 98 | for (n in 1:length(colors)) { 99 | points(process[c(n),], pch=ptypes[n], col=colors[n], cex=2); 100 | } 101 | 102 | if (!simple) { 103 | modes <- c("nulgrind", "callgrind", "CFGgrind"); 104 | 105 | mtext(text="Slowdown", at=0.60, side=2, line=-1.5, outer=TRUE); 106 | text(1:length(data$benchmark), min(process)-0.80, srt=45, adj=1, labels=colnames(process), xpd=T, cex=1.2, offset=10); 107 | 108 | legend(length(data$benchmark)+2.15, max(process)/5, modes, border=T, pch=ptypes, col=colors, cex=1, pt.cex=1.5, xpd=NA); 109 | } 110 | -------------------------------------------------------------------------------- /data/rq1-efficiency/SPEC2017/plots/slowdown.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rimsa/CFGgrind/353cb3ba1e4bbd373f14ce743af1c317d1b3a539/data/rq1-efficiency/SPEC2017/plots/slowdown.pdf -------------------------------------------------------------------------------- /data/rq1-efficiency/SPEC2017/plots/times.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rimsa/CFGgrind/353cb3ba1e4bbd373f14ce743af1c317d1b3a539/data/rq1-efficiency/SPEC2017/plots/times.pdf -------------------------------------------------------------------------------- /data/rq1-efficiency/SPEC2017/speedup.R: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | 3 | usage <- function() { 4 | cat('Usage: Rscript plot.R [Input report]\n'); 5 | cat(' -a Use arithmetic mean\n'); 6 | quit(); 7 | } 8 | 9 | "geometric.mean" = function(x, na.rm=TRUE){ 10 | exp(sum(log(x[x > 0]), na.rm=na.rm) / length(x)) 11 | } 12 | 13 | calcColMeans = function(frame, geom=FALSE, na.rm=TRUE) { 14 | if (geom) { 15 | l <- list(); 16 | for (c in 1:ncol(frame)) 17 | l[c] <- geometric.mean(frame[,c], na.rm=na.rm); 18 | names(l) <- colnames(frame); 19 | return(l); 20 | } else { 21 | return(as.list(colMeans(frame))); 22 | } 23 | } 24 | 25 | calcTimes = function(v1, v2) { 26 | if (v1 <= v2) { 27 | return(paste("speedup of ", round(v2 / v1, digit=2), " times", sep="")); 28 | } else { 29 | return(paste("slowdown of ", round(v1 / v2, digit=2), " times", sep="")); 30 | } 31 | } 32 | 33 | arith <- FALSE; 34 | args <- commandArgs(trailingOnly=TRUE); 35 | for (a in args) { 36 | if (startsWith(a, '-')) { 37 | if (a == '-a') { 38 | arith <- TRUE; 39 | } else { 40 | usage(); 41 | } 42 | } else { 43 | if (exists('input')) { 44 | usage(); 45 | } else { 46 | input <- a; 47 | } 48 | } 49 | } 50 | 51 | if (!exists('input')) 52 | usage(); 53 | 54 | data <- read.csv(input, header = TRUE, sep = ','); 55 | data <- rbind(data, data.frame( 56 | benchmark=if (arith) "arithmetic mean" else "geometric mean", 57 | calcColMeans(data[,2:6], geom=!arith))); 58 | 59 | process <- t(as.matrix(data[,2:6])); 60 | colnames(process) <- data$benchmark; 61 | 62 | for (r1 in 1:nrow(process)) { 63 | for (r2 in 1:nrow(process)) { 64 | if (r1 != r2) { 65 | cat(paste(rownames(process)[r1], " (", round(process[r1,ncol(process)], digit=2), ") vs ", sep="")); 66 | cat(paste(rownames(process)[r2], " (", round(process[r2,ncol(process)], digit=2), "): ", sep="")); 67 | cat(paste(calcTimes(process[r1,ncol(process)], process[r2,ncol(process)]), "\n", sep="")); 68 | } 69 | } 70 | cat("\n"); 71 | } 72 | -------------------------------------------------------------------------------- /data/rq1-efficiency/cBench/data.csv: -------------------------------------------------------------------------------- 1 | benchmark,original,nulgrind,callgrind,bftrace,dcfg,cfggrind-noprof,cfggrind-prof 2 | automotive_bitcount,4.03,21.24,224.68,218.96,570.63,178.70,182.71 3 | automotive_qsort1,3.95,25.74,249.21,319.87,1641.04,228.71,230.45 4 | automotive_susan_c,4.15,12.22,48.84,58.19,633.20,39.68,40.04 5 | automotive_susan_e,3.95,15.35,64.65,73.03,648.53,54.54,54.66 6 | automotive_susan_s,3.92,18.24,92.48,98.05,621.55,138.50,140.48 7 | bzip2d,4.49,12.36,76.75,86.65,448.94,72.35,73.25 8 | bzip2e,3.48,13.41,79.29,85.82,480.15,67.05,67.85 9 | consumer_jpeg_c,6.79,18.73,133.05,151.10,868.79,112.41,113.97 10 | consumer_jpeg_d,15.41,28.89,92.72,109.36,1015.49,85.30,86.18 11 | consumer_lame,6.74,44.82,141.35,308.56,605.95,131.36,132.90 12 | consumer_mad,7.13,17.96,82.80,93.11,703.35,71.73,72.57 13 | consumer_tiff2bw,7.88,25.27,178.74,190.28,1426.02,176.41,178.68 14 | consumer_tiff2rgba,15.37,33.01,158.76,185.62,1423.96,168.10,170.54 15 | consumer_tiffdither,6.32,19.73,167.45,198.69,536.62,149.92,150.82 16 | consumer_tiffmedian,6.37,24.24,148.56,160.79,1488.10,128.12,130.07 17 | network_dijkstra,3.63,12.35,111.80,122.87,516.71,117.02,117.54 18 | network_patricia,1.95,9.53,90.80,173.64,467.51,111.01,114.47 19 | office_ghostscript,4.25,27.94,250.46,345.04,1071.88,206.47,211.98 20 | office_ispell,6.55,30.78,244.54,302.91,1025.25,207.38,205.79 21 | office_rsynth,3.28,11.95,26.95,56.91,155.39,24.43,24.51 22 | office_stringsearch1,4.20,47.41,197.87,193.53,540.31,144.94,147.75 23 | security_blowfish_d,7.36,17.94,75.41,74.82,701.11,58.72,59.03 24 | security_blowfish_e,7.42,18.17,75.27,74.89,697.17,77.94,77.95 25 | security_pgp_d,20.32,25.72,119.59,144.03,1702.60,112.86,115.32 26 | security_pgp_e,9.48,25.30,129.76,129.48,1288.52,102.12,102.93 27 | security_rijndael_d,14.09,32.36,152.19,192.34,1921.38,128.25,131.94 28 | security_rijndael_e,13.96,32.31,161.09,211.51,1615.07,140.73,142.80 29 | security_sha,6.70,19.00,111.06,115.12,1341.26,97.39,97.51 30 | telecom_CRC32,3.92,20.14,214.75,217.41,2202.40,187.65,189.45 31 | telecom_adpcm_c,2.61,14.54,122.92,126.00,291.20,111.21,111.01 32 | telecom_adpcm_d,4.49,25.06,188.53,189.09,470.90,176.61,174.27 33 | telecom_gsm,4.38,16.18,89.11,95.77,633.51,87.76,88.65 34 | -------------------------------------------------------------------------------- /data/rq1-efficiency/cBench/plot.R: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | 3 | usage <- function() { 4 | cat('Usage: Rscript plot.R [Input report] \n'); 5 | cat(' -s Graph only\n'); 6 | cat(' -x Exclude calgrind\n'); 7 | cat(' -a Use arithmetic mean\n'); 8 | cat(' -m Exclude mean\n'); 9 | quit(); 10 | } 11 | 12 | "geometric.mean" = function(x, na.rm=TRUE){ 13 | exp(sum(log(x[x > 0]), na.rm=na.rm) / length(x)) 14 | } 15 | 16 | calcColMeans = function(frame, geom=FALSE, na.rm=TRUE) { 17 | if (geom) { 18 | l <- list(); 19 | for (c in 1:ncol(frame)) 20 | l[c] <- geometric.mean(frame[,c], na.rm=na.rm); 21 | names(l) <- colnames(frame); 22 | return(l); 23 | } else { 24 | return(as.list(colMeans(frame))); 25 | } 26 | } 27 | 28 | simple <- FALSE; 29 | callgrind <- TRUE; 30 | arith <- FALSE; 31 | avg <- TRUE; 32 | args <- commandArgs(trailingOnly=TRUE); 33 | for (a in args) { 34 | if (startsWith(a, '-')) { 35 | if (a == '-s') { 36 | simple <- TRUE; 37 | } else if (a == '-x') { 38 | callgrind <- FALSE; 39 | } else if (a == '-a') { 40 | if (!avg) 41 | stop("cannot combine -a with -m"); 42 | arith <- TRUE; 43 | } else if (a == '-m') { 44 | if (arith) 45 | stop("cannot combine -m with -a"); 46 | avg <- FALSE; 47 | } else { 48 | usage(); 49 | } 50 | } else { 51 | if (exists('input')) { 52 | if (exists('output')) { 53 | usage(); 54 | } else { 55 | output <- a; 56 | } 57 | } else { 58 | input <- a; 59 | } 60 | } 61 | } 62 | 63 | if (!exists('input')) 64 | usage(); 65 | 66 | if (!exists('output')) 67 | output <- "output.pdf"; 68 | 69 | data <- read.csv(input, header = TRUE, sep = ','); 70 | if (avg) 71 | data <- rbind(data, data.frame( 72 | benchmark="average", 73 | calcColMeans(data[,2:8], geom=!arith))); 74 | 75 | process <- t(as.matrix(data[,if (callgrind) c(2,6,5,4,8) else c(2,6,5,8)])); 76 | colnames(process) <- data$benchmark; 77 | 78 | pdf(file=output, width=12, height=5); 79 | if (simple) { 80 | par(oma=c(1.5,4,2,0),mar=c(0,0,0,0)); 81 | } else { 82 | par(oma=c(0,0,1,0),mar=c(7.5,5,0,6.5)); 83 | } 84 | 85 | colors <- c("#14a1ad", "#e21877", "#458a26", if (callgrind) "#f95c3a", "#3414ad"); 86 | ptypes <- c(19, 19, 19, if (callgrind) 19, 19); 87 | 88 | p = plot(1, 1, xlim=c(1,length(data$benchmark)), ylim=c(2,max(process)), type="n", xlab="", xaxt="n", las=1, ann=F, log="y"); 89 | 90 | for (n in 1:length(colors)) { 91 | points(process[c(n),], pch=ptypes[n], col=colors[n], cex=2); 92 | } 93 | 94 | if (!simple) { 95 | modes <- c("original", "DCFG", "bfTrace", if (callgrind) "callgrind", "CFGgrind"); 96 | 97 | mtext(text="Time (s)", at=0.60, side=2, line=-2, outer=TRUE); 98 | text(1:length(data$benchmark), 1.25, srt=45, adj=1, labels=colnames(process), xpd=T, cex=1.2, offset=10); 99 | 100 | legend(length(data$benchmark)+1.65, max(process)/15, modes, border=T, pch=ptypes, col=colors, cex=1, pt.cex=1.5, xpd=NA); 101 | } 102 | -------------------------------------------------------------------------------- /data/rq1-efficiency/cBench/plot2.R: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | 3 | usage <- function() { 4 | cat('Usage: Rscript plot.R [Input report] \n'); 5 | cat(' -s Graph only\n'); 6 | cat(' -x Exclude callgrind\n'); 7 | cat(' -a Use arithmetic mean\n'); 8 | cat(' -m Exclude mean\n'); 9 | quit(); 10 | } 11 | 12 | "geometric.mean" = function(x, na.rm=TRUE){ 13 | exp(sum(log(x[x > 0]), na.rm=na.rm) / length(x)) 14 | } 15 | 16 | calcColMeans = function(frame, geom=FALSE, na.rm=TRUE) { 17 | if (geom) { 18 | l <- list(); 19 | for (c in 1:ncol(frame)) 20 | l[c] <- geometric.mean(frame[,c], na.rm=na.rm); 21 | names(l) <- colnames(frame); 22 | return(l); 23 | } else { 24 | return(as.list(colMeans(frame))); 25 | } 26 | } 27 | 28 | #calcTimes = function(v1, v2) { 29 | # if (v1 <= v2) { 30 | # return(round(((v2 / v1) - 1) * 100, digit=2)); 31 | # } else { 32 | # return(round(((v1 / v2) - 1) * 100, digit=2)); 33 | # } 34 | #} 35 | 36 | simple <- FALSE; 37 | callgrind <- TRUE; 38 | arith <- FALSE; 39 | avg <- TRUE; 40 | args <- commandArgs(trailingOnly=TRUE); 41 | for (a in args) { 42 | if (startsWith(a, '-')) { 43 | if (a == '-s') { 44 | simple <- TRUE; 45 | } else if (a == '-x') { 46 | callgrind <- FALSE; 47 | } else if (a == '-a') { 48 | if (!avg) 49 | stop("cannot combine -a with -m"); 50 | arith <- TRUE; 51 | } else if (a == '-m') { 52 | if (arith) 53 | stop("cannot combine -m with -a"); 54 | avg <- FALSE; 55 | } else { 56 | usage(); 57 | } 58 | } else { 59 | if (exists('input')) { 60 | if (exists('output')) { 61 | usage(); 62 | } else { 63 | output <- a; 64 | } 65 | } else { 66 | input <- a; 67 | } 68 | } 69 | } 70 | 71 | if (!exists('input')) 72 | usage(); 73 | 74 | if (!exists('output')) 75 | output <- "output.pdf"; 76 | 77 | data <- read.csv(input, header = TRUE, sep = ','); 78 | if (avg) 79 | data <- rbind(data, data.frame( 80 | benchmark="average", 81 | calcColMeans(data[,2:8], geom=!arith))); 82 | 83 | original <- data[,c("original")]; 84 | process <- t(as.matrix(data[,if (callgrind) c(6,5,4,8) else c(6,5,8)])); 85 | colnames(process) <- data$benchmark; 86 | for (r in 1:nrow(process)) { 87 | process[c(r),] <- process[c(r),] / original; 88 | } 89 | 90 | pdf(file=output, width=12, height=5); 91 | if (simple) { 92 | par(oma=c(1.5,4,2,0),mar=c(0,0,0,0)); 93 | } else { 94 | par(oma=c(0,0,1,0),mar=c(7.5,5,0,6.5)); 95 | } 96 | 97 | colors <- c("#e21877", "#458a26", if (callgrind) "#f95c3a", "#3414ad"); 98 | ptypes <- c(19, if (callgrind) 19, 19, 19); 99 | 100 | p = plot(1, 1, xlim=c(1,length(data$benchmark)), ylim=c(min(process),max(process)), type="n", xlab="", xaxt="n", las=1, ann=F, log="y"); 101 | 102 | for (n in 1:length(colors)) { 103 | points(process[c(n),], pch=ptypes[n], col=colors[n], cex=2); 104 | } 105 | 106 | if (!simple) { 107 | modes <- c("DCFG", "bfTrace", if (callgrind) "callgrind", "CFGgrind"); 108 | 109 | mtext(text="Slowdown", at=0.60, side=2, line=-2, outer=TRUE); 110 | text(1:length(data$benchmark), 4, srt=45, adj=1, labels=colnames(process), xpd=T, cex=1.2, offset=10); 111 | 112 | legend(length(data$benchmark)+1.65, max(process)/5, modes, border=T, pch=ptypes, col=colors, cex=1, pt.cex=1.5, xpd=NA); 113 | } 114 | -------------------------------------------------------------------------------- /data/rq1-efficiency/cBench/plots/slowdown.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rimsa/CFGgrind/353cb3ba1e4bbd373f14ce743af1c317d1b3a539/data/rq1-efficiency/cBench/plots/slowdown.pdf -------------------------------------------------------------------------------- /data/rq1-efficiency/cBench/plots/times.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rimsa/CFGgrind/353cb3ba1e4bbd373f14ce743af1c317d1b3a539/data/rq1-efficiency/cBench/plots/times.pdf -------------------------------------------------------------------------------- /data/rq1-efficiency/cBench/speedup.R: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | 3 | usage <- function() { 4 | cat('Usage: Rscript plot.R [Input report]\n'); 5 | cat(' -a Use arithmetic mean\n'); 6 | quit(); 7 | } 8 | 9 | "geometric.mean" = function(x, na.rm=TRUE){ 10 | exp(sum(log(x[x > 0]), na.rm=na.rm) / length(x)) 11 | } 12 | 13 | calcColMeans = function(frame, geom=FALSE, na.rm=TRUE) { 14 | if (geom) { 15 | l <- list(); 16 | for (c in 1:ncol(frame)) 17 | l[c] <- geometric.mean(frame[,c], na.rm=na.rm); 18 | names(l) <- colnames(frame); 19 | return(l); 20 | } else { 21 | return(as.list(colMeans(frame))); 22 | } 23 | } 24 | 25 | calcTimes = function(v1, v2) { 26 | if (v1 <= v2) { 27 | return(paste("speedup of ", round(v2 / v1, digit=2), " times", sep="")); 28 | } else { 29 | return(paste("slowdown of ", round(v1 / v2, digit=2), " times", sep="")); 30 | } 31 | } 32 | 33 | arith <- FALSE; 34 | args <- commandArgs(trailingOnly=TRUE); 35 | for (a in args) { 36 | if (startsWith(a, '-')) { 37 | if (a == '-a') { 38 | arith <- TRUE; 39 | } else { 40 | usage(); 41 | } 42 | } else { 43 | if (exists('input')) { 44 | usage(); 45 | } else { 46 | input <- a; 47 | } 48 | } 49 | } 50 | 51 | if (!exists('input')) 52 | usage(); 53 | 54 | data <- read.csv(input, header = TRUE, sep = ','); 55 | data <- rbind(data, data.frame( 56 | benchmark=if (arith) "arithmetic mean" else "geometric mean", 57 | calcColMeans(data[,2:8], geom=!arith))); 58 | 59 | process <- t(as.matrix(data[,c(-1)])); 60 | colnames(process) <- data$benchmark; 61 | 62 | for (r1 in 1:nrow(process)) { 63 | for (r2 in 1:nrow(process)) { 64 | if (r1 != r2) { 65 | cat(paste(rownames(process)[r1], " (", round(process[r1,ncol(process)], digit=2), ") vs ", sep="")); 66 | cat(paste(rownames(process)[r2], " (", round(process[r2,ncol(process)], digit=2), "): ", sep="")); 67 | cat(paste(calcTimes(process[r1,ncol(process)], process[r2,ncol(process)]), "\n", sep="")); 68 | } 69 | } 70 | cat("\n"); 71 | } 72 | -------------------------------------------------------------------------------- /data/rq2-caching/data.csv: -------------------------------------------------------------------------------- 1 | mode,perlbench,gcc,mcf,omnetpp,xalancbmk,x264,deepsjeng,leela,exchange2,xz 2 | size02,1049,1322,1360,1466,1890,2045,2621,2776,3420,604 3 | size02,1056,1353,1361,1467,1896,2053,2650,2853,3524,608 4 | size02,1059,1357,1365,1478,1896,2060,2652,2875,3532,609 5 | size04,1004,1116,1253,1588,1691,2215,2256,2992,494,934 6 | size04,1008,1123,1255,1597,1694,2219,2265,3005,503,938 7 | size04,1010,1127,1268,1598,1696,2226,2269,3011,504,945 8 | size06,1017,1077,1257,1539,1647,2198,2320,3023,522,849 9 | size06,1019,1090,1262,1566,1649,2231,2324,3026,529,850 10 | size06,1037,1101,1277,1566,1663,2234,2370,3085,531,859 11 | size08,1108,1219,1484,1973,1984,2851,435,781,883,988 12 | size08,1111,1225,1484,1974,1985,2854,436,787,887,990 13 | size08,1111,1232,1487,1982,1989,2854,436,789,892,991 14 | size10,1004,1027,1104,1417,1519,2013,2093,2746,478,873 15 | size10,1004,1028,1113,1419,1519,2014,2093,2746,478,874 16 | size10,1008,1028,1113,1421,1520,2015,2094,2747,478,874 17 | size12,1020,1185,1502,1528,2042,2087,2807,480,815,882 18 | size12,1020,1188,1509,1529,2043,2097,2816,482,817,889 19 | size12,1024,1189,1510,1530,2048,2102,2833,483,821,891 20 | size14,1092,1312,1454,1954,2015,2643,477,803,891,965 21 | size14,1095,1312,1455,1956,2015,2646,477,803,892,966 22 | size14,1095,1314,1456,1959,2023,2679,478,803,892,972 23 | size16,1070,1139,1379,1836,1873,2670,415,774,868,916 24 | size16,1078,1149,1384,1863,1893,2672,419,777,871,917 25 | size16,1080,1150,1385,1863,1898,2676,419,780,880,920 26 | block,1041,1771,1937,2211,2323,3161,3190,4251,4473,5219 27 | block,1065,1780,1966,2212,2325,3182,3210,4294,4483,5261 28 | block,1077,1788,1981,2235,2338,3186,3212,4305,4497,5264 29 | -------------------------------------------------------------------------------- /data/rq2-caching/plot.R: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | 3 | usage <- function() { 4 | cat('Usage: Rscript plot.R [Input report] \n'); 5 | cat(' -s Graph only\n'); 6 | cat(' -a Use arithmetic mean\n'); 7 | cat(' -m Exclude mean\n'); 8 | quit(); 9 | } 10 | 11 | "geometric.mean" = function(x, na.rm=TRUE){ 12 | exp(sum(log(x[x > 0]), na.rm=na.rm) / length(x)) 13 | } 14 | 15 | calcRowMeans = function(frame, geom=FALSE, na.rm=TRUE) { 16 | if (geom) { 17 | l <- list(); 18 | for (r in 1:nrow(frame)) 19 | l[r] <- geometric.mean(frame[r,], na.rm=na.rm); 20 | names(l) <- rownames(frame); 21 | return(l); 22 | } else { 23 | return(as.list(rowMeans(frame))); 24 | } 25 | } 26 | 27 | simple <- FALSE; 28 | arith <- FALSE; 29 | avg <- TRUE; 30 | args <- commandArgs(trailingOnly=TRUE); 31 | for (a in args) { 32 | if (startsWith(a, '-')) { 33 | if (a == '-s') { 34 | simple <- TRUE; 35 | } else if (a == '-a') { 36 | if (!avg) 37 | stop("cannot combine -a with -m"); 38 | arith <- TRUE; 39 | } else if (a == '-m') { 40 | if (arith) 41 | stop("cannot combine -m with -a"); 42 | avg <- FALSE; 43 | } else { 44 | usage(); 45 | } 46 | } else { 47 | if (exists('input')) { 48 | if (exists('output')) { 49 | usage(); 50 | } else { 51 | output <- a; 52 | } 53 | } else { 54 | input <- a; 55 | } 56 | } 57 | } 58 | 59 | if (!exists('input')) 60 | usage(); 61 | 62 | if (!exists('output')) 63 | output <- "output.pdf"; 64 | 65 | data <- read.csv(input, header = TRUE, sep = ','); 66 | 67 | if (avg) { 68 | data[,c("average")] <- unlist(calcRowMeans(data[,2:11], geom=!arith)); 69 | rel <- cbind(perlbench,gcc,mcf,omnetpp,xalancbmk,x264,deepsjeng,leela,exchange2,xz,average) ~ mode; 70 | } else { 71 | rel <- cbind(perlbench,gcc,mcf,omnetpp,xalancbmk,x264,deepsjeng,leela,exchange2,xz) ~ mode; 72 | } 73 | 74 | bilan <- aggregate(rel, data=data, if (arith) mean else geometric.mean); 75 | rownames(bilan) <- bilan[,1]; 76 | bilan <- as.matrix(bilan[,-1]); 77 | 78 | for (r in 2:nrow(bilan)) { 79 | cat(paste(rownames(bilan)[r], " has a speedup of ", round(bilan[1,ncol(bilan)] / bilan[r,ncol(bilan)], digit=2), " times\n", sep="")); 80 | } 81 | 82 | pdf(file=output, width=12, height=5); 83 | if (simple) { 84 | par(oma=c(2.5,3,2,0),mar=c(0,0,0,0)) 85 | } else { 86 | par(oma=c(2.5,4,2,0),mar=c(2,0,0,0)) 87 | } 88 | 89 | colors <- c("#d20000", "#f95c3a", "#a9af03", "#458a26", "#33dcce", "#14a1ad", "#3414ad", "#b018e2", "#e21877"); 90 | barplot(bilan, beside=T, col=colors, ylim=range(pretty(c(0, max(bilan))))); 91 | 92 | if (!simple) { 93 | modes <- c("no cache", "size 2", "size 4", "size 6", "size 8", "size 10", "size 12", "size 14", "size 16"); 94 | legend(0, 6000, modes, cex=0.8, fill=colors, xpd=NA); 95 | mtext(text="Benchmarks",side=1,line=0.5,outer=TRUE); 96 | mtext(text="Time (s)",side=2,line=2.5,outer=TRUE); 97 | } 98 | -------------------------------------------------------------------------------- /data/rq2-caching/plots/cache.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rimsa/CFGgrind/353cb3ba1e4bbd373f14ce743af1c317d1b3a539/data/rq2-caching/plots/cache.pdf -------------------------------------------------------------------------------- /data/rq3-completeness/ratio/analysis.R: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | 3 | usage <- function() { 4 | cat('Usage: Rscript analysis.R [Input Report]\n'); 5 | cat(' -a Use arithmetic mean\n'); 6 | quit(); 7 | } 8 | 9 | proportion <- function(frame, base, top) { 10 | return(apply(frame, 1, function(x) { 11 | rate <- round((x[1] / x[2]) * 100, digits = 0); 12 | return(paste(rate, "%", sep = "")); 13 | })); 14 | } 15 | 16 | "geometric.mean" = function(x, na.rm=TRUE){ 17 | exp(sum(log(x[x > 0]), na.rm=na.rm) / length(x)) 18 | } 19 | 20 | calcColMeans = function(frame, geom=FALSE, na.rm=TRUE) { 21 | if (geom) { 22 | l <- list(); 23 | for (c in 1:ncol(frame)) 24 | l[c] <- geometric.mean(frame[,c], na.rm=na.rm); 25 | names(l) <- colnames(frame); 26 | return(l); 27 | } else { 28 | return(as.list(colMeans(frame))); 29 | } 30 | } 31 | 32 | arith <- FALSE; 33 | args <- commandArgs(trailingOnly=TRUE); 34 | for (a in args) { 35 | if (startsWith(a, '-')) { 36 | if (a == '-a') { 37 | arith <- TRUE; 38 | } else { 39 | usage(); 40 | } 41 | } else { 42 | if (exists('input')) { 43 | usage(); 44 | } else { 45 | input <- a; 46 | } 47 | } 48 | } 49 | 50 | if (!exists('input')) 51 | usage(); 52 | 53 | data <- read.csv(input, header = TRUE, sep = ','); 54 | data <- rbind(data, data.frame( 55 | benchmark=if (arith) "arithmetic mean" else "geometric mean", 56 | calcColMeans(data[,c(2:4)], geom=!arith))); 57 | 58 | data$unreached <- c(data[,c(2)] - rowSums(data[,c(3,4)])); 59 | 60 | data$completeRate <- proportion(data[,c(3,2)]); 61 | data$incompleteRate <- proportion(data[,c(4,2)]); 62 | data$unreachedRate <- proportion(data[,c(5,2)]); 63 | 64 | allCount <- sum(data[1:nrow(data)-1,c("all")]); 65 | completeCount <- sum(data[1:nrow(data)-1,c("complete")]); 66 | incompleteCount <- sum(data[1:nrow(data)-1,c("incomplete")]); 67 | coverageCount <- completeCount + incompleteCount; 68 | unreachedCount <- sum(data[1:nrow(data)-1,c("unreached")]); 69 | coverageRate <- paste(round((coverageCount / allCount) * 100, digits = 0), "%", sep = ""); 70 | 71 | cat(paste(data[nrow(data), c("benchmark")], "\n", sep="")); 72 | cat(paste("covered: ", coverageCount, " of ", allCount, " (", coverageRate, ") control flow graphs\n", sep="")); 73 | cat(paste(" complete: ", completeCount, " of ", allCount, " (", data[nrow(data),c("completeRate")], ") control flow graphs\n", sep="")); 74 | cat(paste(" incomplete: ", incompleteCount, " of ", allCount, " (", data[nrow(data),c("incompleteRate")], ") control flow graphs\n", sep="")); 75 | cat(paste("unreached: ", unreachedCount, " of ", allCount, " (", data[nrow(data),c("unreachedRate")], ") control flow graphs\n", sep="")); -------------------------------------------------------------------------------- /data/rq3-completeness/ratio/data-cbench.csv: -------------------------------------------------------------------------------- 1 | benchmark,all,complete,incomplete 2 | automotive_bitcount,27,8,6 3 | automotive_qsort1,11,3,6 4 | automotive_susan_c,28,3,10 5 | automotive_susan_e,28,3,11 6 | automotive_susan_s,28,4,10 7 | bzip2d,81,9,35 8 | bzip2e,81,8,25 9 | consumer_jpeg_c,337,30,90 10 | consumer_jpeg_d,323,30,83 11 | consumer_lame,205,40,60 12 | consumer_mad,275,30,50 13 | consumer_tiff2bw,320,28,66 14 | consumer_tiff2rgba,320,32,77 15 | consumer_tiffdither,320,29,74 16 | consumer_tiffmedian,322,25,60 17 | network_dijkstra,13,4,6 18 | network_patricia,13,3,7 19 | office_ghostscript,3488,577,750 20 | office_ispell,119,10,48 21 | office_rsynth,56,16,28 22 | office_stringsearch1,18,4,4 23 | security_blowfish_d,15,3,6 24 | security_blowfish_e,15,3,6 25 | security_pgp_d,327,30,71 26 | security_pgp_e,327,45,79 27 | security_rijndael_d,15,2,8 28 | security_rijndael_e,15,3,8 29 | security_sha,15,5,7 30 | telecom_adpcm_c,10,3,4 31 | telecom_adpcm_d,10,2,5 32 | telecom_CRC32,12,3,5 33 | telecom_gsm,76,9,29 34 | -------------------------------------------------------------------------------- /data/rq3-completeness/ratio/data-spec.csv: -------------------------------------------------------------------------------- 1 | benchmark,all,complete,incomplete 2 | 500.perlbench,2595,236,874 3 | 502.gcc,12950,1899,3982 4 | 503.bwaves,18,8,8 5 | 505.mcf,47,13,25 6 | 507.cactuBSSN,2693,431,497 7 | 508.namd,133,27,38 8 | 510.parest,18160,884,1214 9 | 511.povray,1643,222,406 10 | 519.lbm,27,10,10 11 | 520.omnetpp,6425,844,1277 12 | 521.wrf,7220,1216,559 13 | 523.xalancbmk,14126,1325,1841 14 | 525.x264,567,178,170 15 | 526.blender,39237,2390,1392 16 | 527.cam4,4010,361,816 17 | 531.deepsjeng,119,52,36 18 | 538.imagick,2212,74,255 19 | 541.leela,349,121,82 20 | 544.nab,274,18,53 21 | 548.exchange2,23,10,11 22 | 549.fotonik3d,99,14,31 23 | 554.roms,316,52,84 24 | 557.xz,402,61,100 25 | 600.perlbench,2595,236,874 26 | 602.gcc,12950,1945,3997 27 | 603.bwaves,47,10,34 28 | 605.mcf,47,13,25 29 | 607.cactuBSSN,2772,443,501 30 | 619.lbm,34,10,13 31 | 620.omnetpp,6426,844,1277 32 | 621.wrf,7363,1220,609 33 | 623.xalancbmk,14126,1325,1841 34 | 625.x264,567,178,170 35 | 627.cam4,4206,373,985 36 | 628.pop2,3461,265,897 37 | 631.deepsjeng,119,52,36 38 | 638.imagick,2381,84,320 39 | 641.leela,350,121,82 40 | 644.nab,278,18,58 41 | 648.exchange2,23,10,11 42 | 649.fotonik3d,130,20,51 43 | 654.roms,343,48,115 44 | 657.xz,405,62,105 45 | -------------------------------------------------------------------------------- /data/rq3-completeness/ratio/plot.R: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | 3 | usage <- function() { 4 | cat('Usage: Rscript plot.R [Input Report] \n'); 5 | cat(' -s Graph only\n'); 6 | cat(' -n Normalize\n'); 7 | cat(' -m Exclude mean\n'); 8 | cat(' -a Use arithmetic mean\n'); 9 | cat(' -r Exclude rate\n'); 10 | quit(); 11 | } 12 | 13 | compound <- function(x) { 14 | last <- 0; 15 | unlist( 16 | lapply(x, function (v) { 17 | tmp <- v - last; 18 | last <<- v; 19 | return(tmp); 20 | }) 21 | ); 22 | } 23 | 24 | proportion <- function(frame, base, top) { 25 | return(apply(frame, 1, function(x) { 26 | rate <- round((x[c(base)] / x[c(top)]) * 100, digits = 0); 27 | return(paste(rate, "%", sep = "")); 28 | })); 29 | } 30 | 31 | "geometric.mean" = function(x, na.rm=TRUE){ 32 | exp(sum(log(x[x > 0]), na.rm=na.rm) / length(x)) 33 | } 34 | 35 | calcColMeans = function(frame, geom=FALSE, na.rm=TRUE) { 36 | if (geom) { 37 | l <- list(); 38 | for (c in 1:ncol(frame)) 39 | l[c] <- geometric.mean(frame[,c], na.rm=na.rm); 40 | names(l) <- colnames(frame); 41 | return(l); 42 | } else { 43 | return(as.list(colMeans(frame))); 44 | } 45 | } 46 | 47 | simple <- FALSE; 48 | avg <- TRUE; 49 | rate <- TRUE; 50 | norm <- FALSE; 51 | arith <- FALSE; 52 | extra <- c(); 53 | args <- commandArgs(trailingOnly=TRUE); 54 | for (a in args) { 55 | if (startsWith(a, '-')) { 56 | if (a == '-s') { 57 | simple <- TRUE; 58 | } else if (a == '-n') { 59 | norm <- TRUE; 60 | } else if (a == '-a') { 61 | if (!avg) 62 | stop("cannot combine -a with -m"); 63 | arith <- TRUE; 64 | } else if (a == '-m') { 65 | if (arith) 66 | stop("cannot combine -m with -a"); 67 | avg <- FALSE; 68 | } else if (a == '-r') { 69 | rate <- FALSE; 70 | } else { 71 | usage(); 72 | } 73 | } else { 74 | extra <- append(extra, a); 75 | } 76 | } 77 | 78 | if (length(extra) < 1 || length(extra) > 2) 79 | usage(); 80 | 81 | file <- extra[1]; 82 | output <- extra[2]; 83 | if (is.na(output)) 84 | output <- "output.pdf"; 85 | 86 | data <- read.csv(file, header = TRUE, sep = ','); 87 | if (avg) 88 | data <- rbind(data, data.frame( 89 | benchmark="average", 90 | calcColMeans(data[,2:4], geom=!arith))); 91 | 92 | data$unreached <- c(data[,c(2)] - rowSums(data[,c(3,4)])); 93 | 94 | data$completeRate <- proportion(data[,c(3,2)], "complete", "all"); 95 | data$incompleteRate <- proportion(data[,c(4,2)], "incomplete", "all"); 96 | data$unreachedRate <- proportion(data[,c(5,2)], "unreached", "all"); 97 | 98 | if (norm) { 99 | process <- matrix(0, nrow = 3, ncol = nrow(data)); 100 | colnames(process) <- data$benchmark; 101 | rownames(process) <- colnames(data)[c(5:3)]; 102 | for (b in data$benchmark) { 103 | process[,b] <- unlist(subset(data, benchmark == b)[,5:3] * 100) / subset(data, benchmark == b)[,2]; 104 | } 105 | } else { 106 | process <- t(as.matrix(data[,c(5:3)])); 107 | colnames(process) <- data$benchmark; 108 | } 109 | 110 | pdf(file=output, width=12, height=5); 111 | if (simple) { 112 | par(oma=c(1.5,4,2,0),mar=c(0,0,0,0)); 113 | } else { 114 | par(oma=c(0,1.5,1,0),mar=c(6.5,3.5,0,if (norm) 4.5 else 0)); 115 | } 116 | 117 | colors <- c("#a9af03", "#d20000", "#3414ad"); 118 | 119 | if (norm) { 120 | lim <- range(pretty(c(0, 100))); 121 | p <- barplot(process, beside=!norm, col=colors, xlab="", ylim=lim, xaxt="n", las=1); 122 | } else { 123 | lim <- c(1, max(process) * 2); 124 | p <- barplot(process, beside=!norm, col=colors, xlab="", ylim=lim, log="y", xaxt="n", las=1); 125 | } 126 | 127 | if (rate) { 128 | if (norm) { 129 | text(x=p, y=process[1,] / 2, label=data$unreachedRate, cex=0.5, offset=0, col="white"); 130 | text(x=p, y=process[1,] + (process[2,] / 2), label=data$incompleteRate, cex=0.5, offset=0, col="black"); 131 | text(x=p, y=process[1,] + process[2,] + (process[3,] / 2), label=data$completeRate, cex=0.5, offset=0, col="white"); 132 | } else { 133 | labelvalues <- c(t(data[,8:6])); 134 | yvalues <- c(t(data[,5:3])); 135 | text(x=(p+0.45), y=yvalues, label=labelvalues, srt=90, pos=3, cex=0.70, offset=0.85, col="black"); 136 | } 137 | } 138 | 139 | if (!simple) { 140 | yname = paste("Control Flow Graphs", (if (norm) " (%)" else " (#)"), sep=""); 141 | mtext(text=yname, at=0.60, side=2, line=0, outer=TRUE); 142 | 143 | if (norm) { 144 | text(p, ((lim[2] / 1000) * -30), srt=45, adj=1, labels=colnames(process), xpd=T, cex=1, offset=10); 145 | legend(max(p) + ((max(p) / 100) * 2), lim[2] / 2, rev(rownames(process)), cex=0.8, fill=rev(colors), xpd=NA); 146 | } else { 147 | text(colMeans(p), 0.8, srt=45, adj=1, labels=colnames(process), xpd=T, cex=1, offset=10); 148 | legend(max(p) - (max(p) / 10), lim[2], rev(rownames(process)), cex=0.8, fill=rev(colors), xpd=NA); 149 | } 150 | } 151 | -------------------------------------------------------------------------------- /data/rq3-completeness/ratio/plots/cbench-norm.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rimsa/CFGgrind/353cb3ba1e4bbd373f14ce743af1c317d1b3a539/data/rq3-completeness/ratio/plots/cbench-norm.pdf -------------------------------------------------------------------------------- /data/rq3-completeness/ratio/plots/cbench-std.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rimsa/CFGgrind/353cb3ba1e4bbd373f14ce743af1c317d1b3a539/data/rq3-completeness/ratio/plots/cbench-std.pdf -------------------------------------------------------------------------------- /data/rq3-completeness/ratio/plots/spec-norm.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rimsa/CFGgrind/353cb3ba1e4bbd373f14ce743af1c317d1b3a539/data/rq3-completeness/ratio/plots/spec-norm.pdf -------------------------------------------------------------------------------- /data/rq3-completeness/ratio/plots/spec-std.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rimsa/CFGgrind/353cb3ba1e4bbd373f14ce743af1c317d1b3a539/data/rq3-completeness/ratio/plots/spec-std.pdf -------------------------------------------------------------------------------- /data/rq3-completeness/relation/filter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | function fatal() { 4 | echo "error: $@" 1>&2; 5 | exit 1; 6 | } 7 | 8 | if [ $# -lt 2 ] || [ $# -gt 3 ]; then 9 | echo "Usage: $0 [file] [blocks|instrs] "; 10 | exit 1; 11 | fi 12 | 13 | file=$1; 14 | name=$2; 15 | max=$3; 16 | 17 | if [ "$name" == "blocks" ]; then 18 | col=4; 19 | [ -n "$max" ] || max=100; 20 | elif [ "$name" == "instrs" ]; then 21 | col=5; 22 | [ -n "$max" ] || max=1000; 23 | else 24 | fatal "invalid argument \"$name\"" 1>&2; 25 | fi 26 | 27 | [ -f "${file}" ] || fatal "Invalid file \"$file\""; 28 | 29 | for type in complete incomplete; do 30 | if [ "${type}" == "complete" ]; then 31 | value="true"; 32 | else 33 | value="false"; 34 | fi 35 | 36 | cat ${file} | grep ",${value}," | cut -f${col} -d',' | sort -n | uniq -c | \ 37 | awk -v "name=${name}" -v "lim=${max}" 'BEGIN {print name ",count"} $2 <= lim {print $2 "," $1}' > ${type}.csv 38 | done 39 | -------------------------------------------------------------------------------- /data/rq3-completeness/relation/plot.R: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | 3 | usage <- function() { 4 | cat('Usage: Rscript plot.R [Complete file] [Incomplete file] \n'); 5 | cat(' -i Instructions [default]\n'); 6 | cat(' -b Blocks\n'); 7 | cat(' -s Graph only\n'); 8 | quit(); 9 | } 10 | 11 | instrs <- TRUE; 12 | simple <- FALSE; 13 | extra <- c(); 14 | args <- commandArgs(trailingOnly=TRUE); 15 | for (a in args) { 16 | if (startsWith(a, '-')) { 17 | if (a == '-s') { 18 | simple <- TRUE; 19 | } else if (a == '-i') { 20 | instrs <- TRUE; 21 | } else if (a == '-b') { 22 | instrs <- FALSE; 23 | } else { 24 | usage(); 25 | } 26 | } else { 27 | extra <- append(extra, a); 28 | } 29 | } 30 | 31 | if (length(extra) < 2 || length(extra) > 3) 32 | usage(); 33 | 34 | file1 <- extra[1]; 35 | file2 <- extra[2]; 36 | output <- extra[3]; 37 | if (is.na(output)) 38 | output <- "output.pdf"; 39 | 40 | pdf(file=output, width=8, height=6); 41 | if (simple) { 42 | par(oma=c(2.5,2.5,1,1.5),mar=c(0,0,0,0)); 43 | } else { 44 | par(oma=c(2.5,2.5,1,1.5),mar=c(1.5,1.5,0,0.5)); 45 | } 46 | 47 | complete <- read.csv(file1, header = TRUE, sep = ','); 48 | incomplete <- read.csv(file2, header = TRUE, sep = ','); 49 | 50 | xmax = if (instrs) 1000 else 100; 51 | ymax = max(complete$count, incomplete$count); 52 | 53 | colors <- c("#3414ad", "#d20000"); 54 | ptypes <- c(2, 1); 55 | 56 | plot(0, 1, xlab="", xlim=range(0, xmax), ylim=c(1, ymax), xaxs="i", yaxs="i", ylab="", log="y", cex.axis=1.3); 57 | points(as.matrix(complete), pch=ptypes[1], col=colors[1], cex=1.5, lwd=2); 58 | points(as.matrix(incomplete), pch=ptypes[2], col=colors[2], cex=1.5, lwd=2); 59 | 60 | if (!simple) { 61 | if (instrs) { 62 | legend(xmax-232, ymax, c("Complete", "Incomplete"), cex=1.3, pch=ptypes, col=colors, pt.cex=1.5, pt.lwd=2, xpd=NA); 63 | mtext(text="Instructions", side=1, cex=1.5, line=1, outer=TRUE); 64 | } else { 65 | legend(xmax-23.2, ymax, c("Complete", "Incomplete"), cex=1.3, pch=ptypes, col=colors, pt.cex=1.5, pt.lwd=2, xpd=NA); 66 | mtext(text="Block Nodes", side=1, cex=1.5, line=1, outer=TRUE); 67 | } 68 | mtext(text="Control Flow Graphs (#)", side=2, cex=1.5, line=1, outer=TRUE); 69 | } 70 | -------------------------------------------------------------------------------- /data/rq3-completeness/relation/plots/cbench-blocks.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rimsa/CFGgrind/353cb3ba1e4bbd373f14ce743af1c317d1b3a539/data/rq3-completeness/relation/plots/cbench-blocks.pdf -------------------------------------------------------------------------------- /data/rq3-completeness/relation/plots/cbench-instrs.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rimsa/CFGgrind/353cb3ba1e4bbd373f14ce743af1c317d1b3a539/data/rq3-completeness/relation/plots/cbench-instrs.pdf -------------------------------------------------------------------------------- /data/rq3-completeness/relation/plots/spec-blocks.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rimsa/CFGgrind/353cb3ba1e4bbd373f14ce743af1c317d1b3a539/data/rq3-completeness/relation/plots/spec-blocks.pdf -------------------------------------------------------------------------------- /data/rq3-completeness/relation/plots/spec-instrs.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rimsa/CFGgrind/353cb3ba1e4bbd373f14ce743af1c317d1b3a539/data/rq3-completeness/relation/plots/spec-instrs.pdf -------------------------------------------------------------------------------- /data/rq4-incremental/data-cfgs.csv: -------------------------------------------------------------------------------- 1 | benchmark,dataset,count 2 | automotive_bitcount,max,27 3 | automotive_bitcount,1,14 4 | automotive_bitcount,2,14 5 | automotive_bitcount,3,14 6 | automotive_bitcount,4,14 7 | automotive_bitcount,5,14 8 | automotive_bitcount,6,14 9 | automotive_bitcount,7,14 10 | automotive_bitcount,8,14 11 | automotive_bitcount,9,14 12 | automotive_bitcount,10,14 13 | automotive_bitcount,11,14 14 | automotive_bitcount,12,14 15 | automotive_bitcount,13,14 16 | automotive_bitcount,14,14 17 | automotive_bitcount,15,14 18 | automotive_bitcount,16,14 19 | automotive_bitcount,17,14 20 | automotive_bitcount,18,14 21 | automotive_bitcount,19,14 22 | automotive_bitcount,20,14 23 | automotive_qsort1,max,11 24 | automotive_qsort1,1,9 25 | automotive_qsort1,2,9 26 | automotive_qsort1,3,9 27 | automotive_qsort1,4,9 28 | automotive_qsort1,5,9 29 | automotive_qsort1,6,9 30 | automotive_qsort1,7,9 31 | automotive_qsort1,8,9 32 | automotive_qsort1,9,9 33 | automotive_qsort1,10,9 34 | automotive_qsort1,11,9 35 | automotive_qsort1,12,9 36 | automotive_qsort1,13,9 37 | automotive_qsort1,14,9 38 | automotive_qsort1,15,9 39 | automotive_qsort1,16,9 40 | automotive_qsort1,17,9 41 | automotive_qsort1,18,9 42 | automotive_qsort1,19,9 43 | automotive_qsort1,20,9 44 | automotive_susan_c,max,28 45 | automotive_susan_c,1,13 46 | automotive_susan_c,2,13 47 | automotive_susan_c,3,13 48 | automotive_susan_c,4,13 49 | automotive_susan_c,5,13 50 | automotive_susan_c,6,13 51 | automotive_susan_c,7,13 52 | automotive_susan_c,8,13 53 | automotive_susan_c,9,13 54 | automotive_susan_c,10,13 55 | automotive_susan_c,11,13 56 | automotive_susan_c,12,13 57 | automotive_susan_c,13,13 58 | automotive_susan_c,14,13 59 | automotive_susan_c,15,13 60 | automotive_susan_c,16,13 61 | automotive_susan_c,17,13 62 | automotive_susan_c,18,13 63 | automotive_susan_c,19,13 64 | automotive_susan_c,20,13 65 | automotive_susan_e,max,28 66 | automotive_susan_e,1,14 67 | automotive_susan_e,2,14 68 | automotive_susan_e,3,14 69 | automotive_susan_e,4,14 70 | automotive_susan_e,5,14 71 | automotive_susan_e,6,14 72 | automotive_susan_e,7,14 73 | automotive_susan_e,8,14 74 | automotive_susan_e,9,14 75 | automotive_susan_e,10,14 76 | automotive_susan_e,11,14 77 | automotive_susan_e,12,14 78 | automotive_susan_e,13,14 79 | automotive_susan_e,14,14 80 | automotive_susan_e,15,14 81 | automotive_susan_e,16,14 82 | automotive_susan_e,17,14 83 | automotive_susan_e,18,14 84 | automotive_susan_e,19,14 85 | automotive_susan_e,20,14 86 | automotive_susan_s,max,28 87 | automotive_susan_s,1,13 88 | automotive_susan_s,2,14 89 | automotive_susan_s,3,14 90 | automotive_susan_s,4,14 91 | automotive_susan_s,5,14 92 | automotive_susan_s,6,14 93 | automotive_susan_s,7,14 94 | automotive_susan_s,8,14 95 | automotive_susan_s,9,14 96 | automotive_susan_s,10,14 97 | automotive_susan_s,11,14 98 | automotive_susan_s,12,14 99 | automotive_susan_s,13,14 100 | automotive_susan_s,14,14 101 | automotive_susan_s,15,14 102 | automotive_susan_s,16,14 103 | automotive_susan_s,17,14 104 | automotive_susan_s,18,14 105 | automotive_susan_s,19,14 106 | automotive_susan_s,20,14 107 | bzip2d,max,81 108 | bzip2d,1,25 109 | bzip2d,2,25 110 | bzip2d,3,25 111 | bzip2d,4,43 112 | bzip2d,5,43 113 | bzip2d,6,43 114 | bzip2d,7,43 115 | bzip2d,8,43 116 | bzip2d,9,44 117 | bzip2d,10,44 118 | bzip2d,11,44 119 | bzip2d,12,44 120 | bzip2d,13,44 121 | bzip2d,14,44 122 | bzip2d,15,44 123 | bzip2d,16,44 124 | bzip2d,17,44 125 | bzip2d,18,44 126 | bzip2d,19,44 127 | bzip2d,20,44 128 | bzip2d,21,44 129 | bzip2d,22,44 130 | bzip2d,23,44 131 | bzip2d,24,44 132 | bzip2d,25,44 133 | bzip2d,26,44 134 | bzip2d,27,44 135 | bzip2d,28,44 136 | bzip2d,29,44 137 | bzip2d,30,44 138 | bzip2d,31,44 139 | bzip2d,32,44 140 | bzip2e,max,81 141 | bzip2e,1,31 142 | bzip2e,2,31 143 | bzip2e,3,32 144 | bzip2e,4,32 145 | bzip2e,5,32 146 | bzip2e,6,32 147 | bzip2e,7,32 148 | bzip2e,8,32 149 | bzip2e,9,33 150 | bzip2e,10,33 151 | bzip2e,11,33 152 | bzip2e,12,33 153 | bzip2e,13,33 154 | bzip2e,14,33 155 | bzip2e,15,33 156 | bzip2e,16,33 157 | bzip2e,17,33 158 | bzip2e,18,33 159 | bzip2e,19,33 160 | bzip2e,20,33 161 | bzip2e,21,33 162 | bzip2e,22,33 163 | bzip2e,23,33 164 | bzip2e,24,33 165 | bzip2e,25,33 166 | bzip2e,26,33 167 | bzip2e,27,33 168 | bzip2e,28,33 169 | bzip2e,29,33 170 | bzip2e,30,33 171 | bzip2e,31,33 172 | bzip2e,32,33 173 | consumer_jpeg_c,max,337 174 | consumer_jpeg_c,1,114 175 | consumer_jpeg_c,2,115 176 | consumer_jpeg_c,3,120 177 | consumer_jpeg_c,4,120 178 | consumer_jpeg_c,5,120 179 | consumer_jpeg_c,6,120 180 | consumer_jpeg_c,7,120 181 | consumer_jpeg_c,8,120 182 | consumer_jpeg_c,9,120 183 | consumer_jpeg_c,10,120 184 | consumer_jpeg_c,11,120 185 | consumer_jpeg_c,12,120 186 | consumer_jpeg_c,13,120 187 | consumer_jpeg_c,14,120 188 | consumer_jpeg_c,15,120 189 | consumer_jpeg_c,16,120 190 | consumer_jpeg_c,17,120 191 | consumer_jpeg_c,18,120 192 | consumer_jpeg_c,19,120 193 | consumer_jpeg_c,20,120 194 | consumer_jpeg_d,max,323 195 | consumer_jpeg_d,1,94 196 | consumer_jpeg_d,2,95 197 | consumer_jpeg_d,3,95 198 | consumer_jpeg_d,4,100 199 | consumer_jpeg_d,5,100 200 | consumer_jpeg_d,6,100 201 | consumer_jpeg_d,7,100 202 | consumer_jpeg_d,8,100 203 | consumer_jpeg_d,9,110 204 | consumer_jpeg_d,10,110 205 | consumer_jpeg_d,11,112 206 | consumer_jpeg_d,12,112 207 | consumer_jpeg_d,13,112 208 | consumer_jpeg_d,14,113 209 | consumer_jpeg_d,15,113 210 | consumer_jpeg_d,16,113 211 | consumer_jpeg_d,17,113 212 | consumer_jpeg_d,18,113 213 | consumer_jpeg_d,19,113 214 | consumer_jpeg_d,20,113 215 | consumer_lame,max,205 216 | consumer_lame,1,99 217 | consumer_lame,2,100 218 | consumer_lame,3,100 219 | consumer_lame,4,100 220 | consumer_lame,5,100 221 | consumer_lame,6,100 222 | consumer_lame,7,100 223 | consumer_lame,8,100 224 | consumer_lame,9,100 225 | consumer_lame,10,100 226 | consumer_lame,11,100 227 | consumer_lame,12,100 228 | consumer_lame,13,100 229 | consumer_lame,14,100 230 | consumer_lame,15,100 231 | consumer_lame,16,100 232 | consumer_lame,17,100 233 | consumer_lame,18,100 234 | consumer_lame,19,100 235 | consumer_lame,20,100 236 | consumer_mad,max,275 237 | consumer_mad,1,80 238 | consumer_mad,2,80 239 | consumer_mad,3,80 240 | consumer_mad,4,80 241 | consumer_mad,5,80 242 | consumer_mad,6,80 243 | consumer_mad,7,80 244 | consumer_mad,8,80 245 | consumer_mad,9,80 246 | consumer_mad,10,80 247 | consumer_mad,11,80 248 | consumer_mad,12,80 249 | consumer_mad,13,80 250 | consumer_mad,14,80 251 | consumer_mad,15,80 252 | consumer_mad,16,80 253 | consumer_mad,17,80 254 | consumer_mad,18,80 255 | consumer_mad,19,80 256 | consumer_mad,20,80 257 | consumer_tiff2bw,max,320 258 | consumer_tiff2bw,1,93 259 | consumer_tiff2bw,2,93 260 | consumer_tiff2bw,3,93 261 | consumer_tiff2bw,4,93 262 | consumer_tiff2bw,5,93 263 | consumer_tiff2bw,6,93 264 | consumer_tiff2bw,7,93 265 | consumer_tiff2bw,8,94 266 | consumer_tiff2bw,9,94 267 | consumer_tiff2bw,10,94 268 | consumer_tiff2bw,11,94 269 | consumer_tiff2bw,12,94 270 | consumer_tiff2bw,13,94 271 | consumer_tiff2bw,14,94 272 | consumer_tiff2bw,15,94 273 | consumer_tiff2bw,16,94 274 | consumer_tiff2bw,17,94 275 | consumer_tiff2bw,18,94 276 | consumer_tiff2bw,19,94 277 | consumer_tiff2bw,20,94 278 | consumer_tiff2rgba,max,320 279 | consumer_tiff2rgba,1,107 280 | consumer_tiff2rgba,2,107 281 | consumer_tiff2rgba,3,108 282 | consumer_tiff2rgba,4,108 283 | consumer_tiff2rgba,5,108 284 | consumer_tiff2rgba,6,108 285 | consumer_tiff2rgba,7,108 286 | consumer_tiff2rgba,8,109 287 | consumer_tiff2rgba,9,109 288 | consumer_tiff2rgba,10,109 289 | consumer_tiff2rgba,11,109 290 | consumer_tiff2rgba,12,109 291 | consumer_tiff2rgba,13,109 292 | consumer_tiff2rgba,14,109 293 | consumer_tiff2rgba,15,109 294 | consumer_tiff2rgba,16,109 295 | consumer_tiff2rgba,17,109 296 | consumer_tiff2rgba,18,109 297 | consumer_tiff2rgba,19,109 298 | consumer_tiff2rgba,20,109 299 | consumer_tiffdither,max,320 300 | consumer_tiffdither,1,101 301 | consumer_tiffdither,2,101 302 | consumer_tiffdither,3,101 303 | consumer_tiffdither,4,101 304 | consumer_tiffdither,5,101 305 | consumer_tiffdither,6,101 306 | consumer_tiffdither,7,101 307 | consumer_tiffdither,8,101 308 | consumer_tiffdither,9,101 309 | consumer_tiffdither,10,101 310 | consumer_tiffdither,11,101 311 | consumer_tiffdither,12,101 312 | consumer_tiffdither,13,101 313 | consumer_tiffdither,14,101 314 | consumer_tiffdither,15,101 315 | consumer_tiffdither,16,101 316 | consumer_tiffdither,17,101 317 | consumer_tiffdither,18,101 318 | consumer_tiffdither,19,103 319 | consumer_tiffdither,20,103 320 | consumer_tiffmedian,max,322 321 | consumer_tiffmedian,1,83 322 | consumer_tiffmedian,2,83 323 | consumer_tiffmedian,3,83 324 | consumer_tiffmedian,4,83 325 | consumer_tiffmedian,5,83 326 | consumer_tiffmedian,6,83 327 | consumer_tiffmedian,7,83 328 | consumer_tiffmedian,8,84 329 | consumer_tiffmedian,9,84 330 | consumer_tiffmedian,10,84 331 | consumer_tiffmedian,11,85 332 | consumer_tiffmedian,12,85 333 | consumer_tiffmedian,13,85 334 | consumer_tiffmedian,14,85 335 | consumer_tiffmedian,15,85 336 | consumer_tiffmedian,16,85 337 | consumer_tiffmedian,17,85 338 | consumer_tiffmedian,18,85 339 | consumer_tiffmedian,19,85 340 | consumer_tiffmedian,20,85 341 | network_dijkstra,max,13 342 | network_dijkstra,1,10 343 | network_dijkstra,2,10 344 | network_dijkstra,3,10 345 | network_dijkstra,4,10 346 | network_dijkstra,5,10 347 | network_dijkstra,6,10 348 | network_dijkstra,7,10 349 | network_dijkstra,8,10 350 | network_dijkstra,9,10 351 | network_dijkstra,10,10 352 | network_dijkstra,11,10 353 | network_dijkstra,12,10 354 | network_dijkstra,13,10 355 | network_dijkstra,14,10 356 | network_dijkstra,15,10 357 | network_dijkstra,16,10 358 | network_dijkstra,17,10 359 | network_dijkstra,18,10 360 | network_dijkstra,19,10 361 | network_dijkstra,20,10 362 | network_patricia,max,13 363 | network_patricia,1,10 364 | network_patricia,2,10 365 | network_patricia,3,10 366 | network_patricia,4,10 367 | network_patricia,5,10 368 | network_patricia,6,10 369 | network_patricia,7,10 370 | network_patricia,8,10 371 | network_patricia,9,10 372 | network_patricia,10,10 373 | network_patricia,11,10 374 | network_patricia,12,10 375 | network_patricia,13,10 376 | network_patricia,14,10 377 | network_patricia,15,10 378 | network_patricia,16,10 379 | network_patricia,17,10 380 | network_patricia,18,10 381 | network_patricia,19,10 382 | network_patricia,20,10 383 | office_ghostscript,max,3488 384 | office_ghostscript,1,1015 385 | office_ghostscript,2,1090 386 | office_ghostscript,3,1172 387 | office_ghostscript,4,1172 388 | office_ghostscript,5,1175 389 | office_ghostscript,6,1175 390 | office_ghostscript,7,1193 391 | office_ghostscript,8,1234 392 | office_ghostscript,9,1234 393 | office_ghostscript,10,1295 394 | office_ghostscript,11,1297 395 | office_ghostscript,12,1300 396 | office_ghostscript,13,1300 397 | office_ghostscript,14,1300 398 | office_ghostscript,15,1300 399 | office_ghostscript,16,1300 400 | office_ghostscript,17,1327 401 | office_ghostscript,18,1327 402 | office_ghostscript,19,1327 403 | office_ghostscript,20,1327 404 | office_ispell,max,119 405 | office_ispell,1,49 406 | office_ispell,2,51 407 | office_ispell,3,52 408 | office_ispell,4,52 409 | office_ispell,5,54 410 | office_ispell,6,54 411 | office_ispell,7,54 412 | office_ispell,8,54 413 | office_ispell,9,54 414 | office_ispell,10,54 415 | office_ispell,11,58 416 | office_ispell,12,58 417 | office_ispell,13,58 418 | office_ispell,14,58 419 | office_ispell,15,58 420 | office_ispell,16,58 421 | office_ispell,17,58 422 | office_ispell,18,58 423 | office_ispell,19,58 424 | office_ispell,20,58 425 | office_rsynth,max,56 426 | office_rsynth,1,44 427 | office_rsynth,2,44 428 | office_rsynth,3,44 429 | office_rsynth,4,44 430 | office_rsynth,5,44 431 | office_rsynth,6,44 432 | office_rsynth,7,44 433 | office_rsynth,8,44 434 | office_rsynth,9,44 435 | office_rsynth,10,44 436 | office_rsynth,11,44 437 | office_rsynth,12,44 438 | office_rsynth,13,44 439 | office_rsynth,14,44 440 | office_rsynth,15,44 441 | office_rsynth,16,44 442 | office_rsynth,17,44 443 | office_rsynth,18,44 444 | office_rsynth,19,44 445 | office_rsynth,20,44 446 | office_stringsearch1,max,18 447 | office_stringsearch1,1,8 448 | office_stringsearch1,2,8 449 | office_stringsearch1,3,8 450 | office_stringsearch1,4,8 451 | office_stringsearch1,5,8 452 | office_stringsearch1,6,8 453 | office_stringsearch1,7,8 454 | office_stringsearch1,8,8 455 | office_stringsearch1,9,8 456 | office_stringsearch1,10,8 457 | office_stringsearch1,11,8 458 | office_stringsearch1,12,8 459 | office_stringsearch1,13,8 460 | office_stringsearch1,14,8 461 | office_stringsearch1,15,8 462 | office_stringsearch1,16,8 463 | office_stringsearch1,17,8 464 | office_stringsearch1,18,8 465 | office_stringsearch1,19,8 466 | office_stringsearch1,20,8 467 | security_blowfish_d,max,15 468 | security_blowfish_d,1,9 469 | security_blowfish_d,2,9 470 | security_blowfish_d,3,9 471 | security_blowfish_d,4,9 472 | security_blowfish_d,5,9 473 | security_blowfish_d,6,9 474 | security_blowfish_d,7,9 475 | security_blowfish_d,8,9 476 | security_blowfish_d,9,9 477 | security_blowfish_d,10,9 478 | security_blowfish_d,11,9 479 | security_blowfish_d,12,9 480 | security_blowfish_d,13,9 481 | security_blowfish_d,14,9 482 | security_blowfish_d,15,9 483 | security_blowfish_d,16,9 484 | security_blowfish_d,17,9 485 | security_blowfish_d,18,9 486 | security_blowfish_d,19,9 487 | security_blowfish_d,20,9 488 | security_blowfish_e,max,15 489 | security_blowfish_e,1,9 490 | security_blowfish_e,2,9 491 | security_blowfish_e,3,9 492 | security_blowfish_e,4,9 493 | security_blowfish_e,5,9 494 | security_blowfish_e,6,9 495 | security_blowfish_e,7,9 496 | security_blowfish_e,8,9 497 | security_blowfish_e,9,9 498 | security_blowfish_e,10,9 499 | security_blowfish_e,11,9 500 | security_blowfish_e,12,9 501 | security_blowfish_e,13,9 502 | security_blowfish_e,14,9 503 | security_blowfish_e,15,9 504 | security_blowfish_e,16,9 505 | security_blowfish_e,17,9 506 | security_blowfish_e,18,9 507 | security_blowfish_e,19,9 508 | security_blowfish_e,20,9 509 | security_pgp_d,max,327 510 | security_pgp_d,1,99 511 | security_pgp_d,2,100 512 | security_pgp_d,3,101 513 | security_pgp_d,4,101 514 | security_pgp_d,5,101 515 | security_pgp_d,6,101 516 | security_pgp_d,7,101 517 | security_pgp_d,8,101 518 | security_pgp_d,9,101 519 | security_pgp_d,10,101 520 | security_pgp_d,11,101 521 | security_pgp_d,12,101 522 | security_pgp_d,13,101 523 | security_pgp_d,14,101 524 | security_pgp_d,15,101 525 | security_pgp_d,16,101 526 | security_pgp_d,17,101 527 | security_pgp_d,18,101 528 | security_pgp_d,19,101 529 | security_pgp_d,20,101 530 | security_pgp_e,max,327 531 | security_pgp_e,1,124 532 | security_pgp_e,2,124 533 | security_pgp_e,3,124 534 | security_pgp_e,4,124 535 | security_pgp_e,5,124 536 | security_pgp_e,6,124 537 | security_pgp_e,7,124 538 | security_pgp_e,8,124 539 | security_pgp_e,9,124 540 | security_pgp_e,10,124 541 | security_pgp_e,11,124 542 | security_pgp_e,12,124 543 | security_pgp_e,13,124 544 | security_pgp_e,14,124 545 | security_pgp_e,15,124 546 | security_pgp_e,16,124 547 | security_pgp_e,17,124 548 | security_pgp_e,18,124 549 | security_pgp_e,19,124 550 | security_pgp_e,20,124 551 | security_rijndael_d,max,15 552 | security_rijndael_d,1,10 553 | security_rijndael_d,2,10 554 | security_rijndael_d,3,10 555 | security_rijndael_d,4,10 556 | security_rijndael_d,5,10 557 | security_rijndael_d,6,10 558 | security_rijndael_d,7,10 559 | security_rijndael_d,8,10 560 | security_rijndael_d,9,10 561 | security_rijndael_d,10,10 562 | security_rijndael_d,11,10 563 | security_rijndael_d,12,10 564 | security_rijndael_d,13,10 565 | security_rijndael_d,14,10 566 | security_rijndael_d,15,10 567 | security_rijndael_d,16,10 568 | security_rijndael_d,17,10 569 | security_rijndael_d,18,10 570 | security_rijndael_d,19,10 571 | security_rijndael_d,20,10 572 | security_rijndael_e,max,15 573 | security_rijndael_e,1,11 574 | security_rijndael_e,2,11 575 | security_rijndael_e,3,11 576 | security_rijndael_e,4,11 577 | security_rijndael_e,5,11 578 | security_rijndael_e,6,11 579 | security_rijndael_e,7,11 580 | security_rijndael_e,8,11 581 | security_rijndael_e,9,11 582 | security_rijndael_e,10,11 583 | security_rijndael_e,11,11 584 | security_rijndael_e,12,11 585 | security_rijndael_e,13,11 586 | security_rijndael_e,14,11 587 | security_rijndael_e,15,11 588 | security_rijndael_e,16,11 589 | security_rijndael_e,17,11 590 | security_rijndael_e,18,11 591 | security_rijndael_e,19,11 592 | security_rijndael_e,20,11 593 | security_sha,max,15 594 | security_sha,1,12 595 | security_sha,2,12 596 | security_sha,3,12 597 | security_sha,4,12 598 | security_sha,5,12 599 | security_sha,6,12 600 | security_sha,7,12 601 | security_sha,8,12 602 | security_sha,9,12 603 | security_sha,10,12 604 | security_sha,11,12 605 | security_sha,12,12 606 | security_sha,13,12 607 | security_sha,14,12 608 | security_sha,15,12 609 | security_sha,16,12 610 | security_sha,17,12 611 | security_sha,18,12 612 | security_sha,19,12 613 | security_sha,20,12 614 | telecom_adpcm_c,max,10 615 | telecom_adpcm_c,1,7 616 | telecom_adpcm_c,2,7 617 | telecom_adpcm_c,3,7 618 | telecom_adpcm_c,4,7 619 | telecom_adpcm_c,5,7 620 | telecom_adpcm_c,6,7 621 | telecom_adpcm_c,7,7 622 | telecom_adpcm_c,8,7 623 | telecom_adpcm_c,9,7 624 | telecom_adpcm_c,10,7 625 | telecom_adpcm_c,11,7 626 | telecom_adpcm_c,12,7 627 | telecom_adpcm_c,13,7 628 | telecom_adpcm_c,14,7 629 | telecom_adpcm_c,15,7 630 | telecom_adpcm_c,16,7 631 | telecom_adpcm_c,17,7 632 | telecom_adpcm_c,18,7 633 | telecom_adpcm_c,19,7 634 | telecom_adpcm_c,20,7 635 | telecom_adpcm_d,max,10 636 | telecom_adpcm_d,1,7 637 | telecom_adpcm_d,2,7 638 | telecom_adpcm_d,3,7 639 | telecom_adpcm_d,4,7 640 | telecom_adpcm_d,5,7 641 | telecom_adpcm_d,6,7 642 | telecom_adpcm_d,7,7 643 | telecom_adpcm_d,8,7 644 | telecom_adpcm_d,9,7 645 | telecom_adpcm_d,10,7 646 | telecom_adpcm_d,11,7 647 | telecom_adpcm_d,12,7 648 | telecom_adpcm_d,13,7 649 | telecom_adpcm_d,14,7 650 | telecom_adpcm_d,15,7 651 | telecom_adpcm_d,16,7 652 | telecom_adpcm_d,17,7 653 | telecom_adpcm_d,18,7 654 | telecom_adpcm_d,19,7 655 | telecom_adpcm_d,20,7 656 | telecom_CRC32,max,12 657 | telecom_CRC32,1,8 658 | telecom_CRC32,2,8 659 | telecom_CRC32,3,8 660 | telecom_CRC32,4,8 661 | telecom_CRC32,5,8 662 | telecom_CRC32,6,8 663 | telecom_CRC32,7,8 664 | telecom_CRC32,8,8 665 | telecom_CRC32,9,8 666 | telecom_CRC32,10,8 667 | telecom_CRC32,11,8 668 | telecom_CRC32,12,8 669 | telecom_CRC32,13,8 670 | telecom_CRC32,14,8 671 | telecom_CRC32,15,8 672 | telecom_CRC32,16,8 673 | telecom_CRC32,17,8 674 | telecom_CRC32,18,8 675 | telecom_CRC32,19,8 676 | telecom_CRC32,20,8 677 | telecom_gsm,max,76 678 | telecom_gsm,1,38 679 | telecom_gsm,2,38 680 | telecom_gsm,3,38 681 | telecom_gsm,4,38 682 | telecom_gsm,5,38 683 | telecom_gsm,6,38 684 | telecom_gsm,7,38 685 | telecom_gsm,8,38 686 | telecom_gsm,9,38 687 | telecom_gsm,10,38 688 | telecom_gsm,11,38 689 | telecom_gsm,12,38 690 | telecom_gsm,13,38 691 | telecom_gsm,14,38 692 | telecom_gsm,15,38 693 | telecom_gsm,16,38 694 | telecom_gsm,17,38 695 | telecom_gsm,18,38 696 | telecom_gsm,19,38 697 | telecom_gsm,20,38 698 | -------------------------------------------------------------------------------- /data/rq4-incremental/plot.R: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | 3 | usage <- function() { 4 | cat('Usage: Rscript plot.R [Input report] \n'); 5 | cat(' -s Graph only\n'); 6 | cat(' -c Plot CFGs [default]\n'); 7 | cat(' -i Plot instructions\n'); 8 | cat(' -f First half [default]\n'); 9 | cat(' -l Last half\n'); 10 | quit(); 11 | } 12 | 13 | simple <- FALSE; 14 | type_cfg <- TRUE; 15 | half_first <- TRUE; 16 | args <- commandArgs(trailingOnly=TRUE); 17 | for (a in args) { 18 | if (startsWith(a, '-')) { 19 | if (a == '-s') { 20 | simple <- TRUE; 21 | } else if (a == '-c') { 22 | type_cfg <- TRUE; 23 | } else if (a == '-i') { 24 | type_cfg <- FALSE; 25 | } else if (a == '-f') { 26 | half_first <- TRUE; 27 | } else if (a == '-l') { 28 | half_first <- FALSE; 29 | } else { 30 | usage(); 31 | } 32 | } else { 33 | if (exists('input')) { 34 | if (exists('output')) { 35 | usage(); 36 | } else { 37 | output <- a; 38 | } 39 | } else { 40 | input <- a; 41 | } 42 | } 43 | } 44 | 45 | if (!exists('input')) 46 | usage(); 47 | 48 | if (!exists('output')) 49 | output <- "output.pdf"; 50 | 51 | data <- read.csv(input, header = TRUE, sep = ','); 52 | 53 | benchmarks <- unique(data$benchmark); 54 | process <- vector("list", length(benchmarks) / 2); 55 | 56 | lowest = 100; 57 | highest = 0; 58 | 59 | n <- 1; 60 | for (bench in benchmarks) { 61 | load <- if (half_first) n <= 16 else n > 16; 62 | if (load) { 63 | names(process)[if (half_first) n else n - 16] <- bench; 64 | mvalue <- subset(data, benchmark == bench & dataset == "max")[,c("count")]; 65 | 66 | mds <- max(as.numeric(as.vector(subset(data, benchmark == bench & dataset != "max")[,c("dataset")]))); 67 | stopifnot(mds > 0); 68 | for (c in 1:20) { 69 | c2 <- trunc(c * mds / 20); 70 | v <- (subset(data, benchmark == bench & dataset == c2)[,c("count")] / mvalue) * 100; 71 | 72 | lowest <- min(lowest, v); 73 | highest <- max(highest, v); 74 | 75 | process[[bench]][c] <- v; 76 | } 77 | } 78 | 79 | n <- n + 1; 80 | } 81 | 82 | pdf(file=output, width=10, height=6, bg="white"); 83 | 84 | if (simple) { 85 | par(oma=c(1.5,1.5,0,0),mar=c(1,1,1,1)); 86 | } else { 87 | par(oma=c(1.75,1.75,1,11.5),mar=c(1.75,1.75,0,0)); 88 | } 89 | 90 | p = plot(1, 1, xlim=c(1, 20), ylim=c(lowest, highest), type="n", axes=F, ann=F); 91 | 92 | axis(side=1, at=c(1:20), cex.axis = 1.0); 93 | axis(side=2, at=pretty(c(lowest, highest)), cex.axis = 1.0); 94 | box(); 95 | 96 | n_bms <- length(process); 97 | colors <- rainbow(n_bms); 98 | linetype <- c(1:n_bms); 99 | plotchar <- seq(1, 1 + n_bms, 1); 100 | 101 | n <- 1; 102 | for (bench in names(process)) { 103 | lines(c(1:20), process[[bench]], type="b", lwd=1.5, 104 | lty=linetype[n], col=colors[n], pch=plotchar[n]); 105 | n <- n + 1; 106 | } 107 | 108 | if (!simple) { 109 | mtext(text="Data set", side=1, line=0.5, cex=1.1, outer=TRUE); 110 | mtext(text=if (type_cfg) "CFGs (%)" else "Instructions (%)", side=2, line=0.5, cex=1.1, outer=TRUE); 111 | legend("topright", inset=c(if (half_first) -0.32 else -0.31, 0.15), legend=names(process), cex=1.0, col=colors, 112 | pch=plotchar, lty=linetype, title="Benchmark", xpd=NA); 113 | } 114 | -------------------------------------------------------------------------------- /data/rq4-incremental/plots/cfgs-part1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rimsa/CFGgrind/353cb3ba1e4bbd373f14ce743af1c317d1b3a539/data/rq4-incremental/plots/cfgs-part1.pdf -------------------------------------------------------------------------------- /data/rq4-incremental/plots/cfgs-part2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rimsa/CFGgrind/353cb3ba1e4bbd373f14ce743af1c317d1b3a539/data/rq4-incremental/plots/cfgs-part2.pdf -------------------------------------------------------------------------------- /data/rq4-incremental/plots/instrs-part1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rimsa/CFGgrind/353cb3ba1e4bbd373f14ce743af1c317d1b3a539/data/rq4-incremental/plots/instrs-part1.pdf -------------------------------------------------------------------------------- /data/rq4-incremental/plots/instrs-part2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rimsa/CFGgrind/353cb3ba1e4bbd373f14ce743af1c317d1b3a539/data/rq4-incremental/plots/instrs-part2.pdf -------------------------------------------------------------------------------- /data/rq5-static-vs-dynamic/analysis.R: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | 3 | usage <- function() { 4 | cat('Usage: Rscript analysis.R [Input file]\n'); 5 | quit(); 6 | } 7 | 8 | args <- commandArgs(trailingOnly=TRUE); 9 | if (length(args) != 1) 10 | usage(); 11 | 12 | data <- read.csv(args[1], header = TRUE, sep = ','); 13 | 14 | cfgs <- sum(data$cfgsM) + sum(data$cfgsS) + sum(data$cfgsD); 15 | total <- sum(data$cfgsT); 16 | 17 | cat(paste("CFGs coverage: ", cfgs, " (", round(cfgs * 100 / total, digits=1), "%) of ", total, " (100%)\n\n", sep = '')); 18 | 19 | info <- c("cfgs", "blocks", "edges", "instrs", "calls"); 20 | types <- c("cfggrind (A)", "dyninst (B)", "A∩B", "A\\B", "B\\A"); 21 | 22 | process <- matrix(0, nrow = length(types), ncol = length(info)); 23 | rownames(process) <- info; 24 | colnames(process) <- types; 25 | 26 | for (i in info) { 27 | matched <- sum(data[,paste(i, "M", sep = '')]); 28 | static <- sum(data[,paste(i, "S", sep = '')]); 29 | dynamic <- sum(data[,paste(i, "D", sep = '')]); 30 | process[i,] <- c(matched + dynamic, matched + static, matched, dynamic, static); 31 | } 32 | 33 | print(process); 34 | cat("\n"); 35 | 36 | process2 <- matrix("", nrow = length(types), ncol = length(info) - 2); 37 | rownames(process2) <- info; 38 | colnames(process2) <- tail(types, -2); 39 | for (i in info) { 40 | process2[i, types[3]] <- paste(round(process[i,types[3]] / process[i,types[1]] * 100, digits = 1), '%/', 41 | round(process[i,types[3]] / process[i,types[2]] * 100, digits = 1), '%', sep = ''); 42 | process2[i, types[4]] <- paste(round(process[i,types[4]] / process[i,types[1]] * 100, digits = 1), '%', sep = ''); 43 | process2[i, types[5]] <- paste(round(process[i,types[5]] / process[i,types[2]] * 100, digits = 1), '%', sep = ''); 44 | } 45 | print(process2); 46 | -------------------------------------------------------------------------------- /data/rq5-static-vs-dynamic/cbench-stripped.csv: -------------------------------------------------------------------------------- 1 | benchmark,cfgsT,cfgsM,instrsM,blocksM,phantomsM,edgesM,callsM,cfgsS,instrsS,blocksS,phantomsS,edgesS,callsS,cfgsD,instrsD,blocksD,phantomsD,edgesD,callsD 2 | automotive_bitcount,27,3,108,25,0,38,7,1,38,13,0,16,4,11,204,33,7,56,11 3 | automotive_qsort1,11,4,367,82,0,127,14,0,32,15,0,16,7,5,77,20,8,31,8 4 | automotive_susan_c,28,9,1261,182,0,303,36,12,4283,483,0,744,77,4,68,19,23,29,5 5 | automotive_susan_e,28,10,2128,209,0,324,36,11,3416,456,0,723,77,4,68,19,24,30,5 6 | automotive_susan_s,28,10,691,118,0,178,38,11,4853,547,0,869,75,4,68,17,23,27,4 7 | bzip2d,81,38,10284,1814,0,2939,124,21,3644,1175,0,1635,281,6,75,99,300,159,19 8 | bzip2e,81,27,7389,1219,0,1969,89,32,6539,1770,0,2605,316,6,75,85,169,142,15 9 | consumer_jpeg_c,337,64,2853,639,0,1028,91,40,4966,1193,0,1719,139,56,3506,715,259,1131,203 10 | consumer_jpeg_d,323,59,2772,567,0,904,84,50,6106,1516,0,2204,155,54,3973,809,320,1323,174 11 | consumer_lame,205,94,11093,2048,0,3183,305,49,12277,2306,0,3189,659,6,131,160,265,260,24 12 | consumer_mad,275,63,5747,800,0,1256,159,114,10233,2764,0,4081,650,17,1592,305,225,464,88 13 | consumer_tiff2bw,320,62,2865,723,0,1170,159,49,8837,2388,0,3428,500,32,1300,354,340,596,88 14 | consumer_tiff2rgba,320,75,3275,810,0,1300,185,40,8758,2365,0,3361,523,34,1701,412,389,693,103 15 | consumer_tiffdither,320,65,4162,1053,0,1691,172,46,7375,2030,0,2861,472,38,1371,372,351,621,106 16 | consumer_tiffmedian,322,64,4078,992,0,1597,187,49,8840,2371,0,3392,491,21,788,237,323,399,74 17 | network_dijkstra,13,6,280,70,0,102,26,0,34,10,0,11,8,4,68,15,7,24,4 18 | network_patricia,13,6,368,80,0,118,27,0,114,34,0,41,18,4,68,17,14,26,5 19 | office_ghostscript,3488,592,34258,6752,0,10815,964,426,48360,11453,0,16654,1972,735,22760,5406,1969,8693,2414 20 | office_ispell,119,54,5703,1145,0,1740,266,57,7786,2159,0,2906,743,4,71,58,170,74,7 21 | office_rsynth,56,38,3580,716,0,1084,199,5,1227,388,0,495,129,6,157,62,95,94,19 22 | office_stringsearch1,18,4,414,104,0,148,35,0,41,12,0,13,10,4,68,15,8,24,4 23 | security_blowfish_d,15,5,580,69,0,103,19,0,318,23,0,27,8,4,68,17,11,26,4 24 | security_blowfish_e,15,5,580,70,0,103,19,0,318,22,0,27,8,4,68,16,10,25,4 25 | security_pgp_d,327,95,6061,1154,0,1800,329,206,28250,8493,0,11450,3578,6,395,167,299,261,24 26 | security_pgp_e,327,120,7139,1403,0,2135,412,181,27172,8244,0,11115,3495,4,68,93,249,141,11 27 | security_rijndael_d,15,6,1216,75,0,116,17,3,1129,102,0,131,29,4,68,19,21,28,5 28 | security_rijndael_e,15,7,1195,93,0,139,24,2,1150,84,0,108,22,4,68,18,19,27,4 29 | security_sha,15,8,473,69,0,104,19,0,57,24,0,30,5,4,68,20,13,32,4 30 | telecom_CRC32,12,4,128,26,0,40,10,0,21,12,0,13,3,4,68,17,6,26,4 31 | telecom_adpcm_c,10,3,160,37,0,56,6,0,17,6,0,7,4,4,68,15,5,24,4 32 | telecom_adpcm_d,10,3,147,30,0,45,6,0,18,7,0,8,4,4,68,15,6,24,4 33 | telecom_gsm,76,30,2983,434,0,690,56,11,1642,383,0,508,146,8,218,82,104,125,28 34 | -------------------------------------------------------------------------------- /data/rq5-static-vs-dynamic/cbench-symbols.csv: -------------------------------------------------------------------------------- 1 | benchmark,cfgsT,cfgsM,instrsM,blocksM,phantomsM,edgesM,callsM,cfgsS,instrsS,blocksS,phantomsS,edgesS,callsS,cfgsD,instrsD,blocksD,phantomsD,edgesD,callsD 2 | automotive_bitcount,27,14,300,55,0,88,10,13,289,68,0,104,15,0,12,3,7,6,8 3 | automotive_qsort1,11,9,432,98,0,151,17,2,62,24,0,32,9,0,12,4,8,7,5 4 | automotive_susan_c,28,13,1317,195,0,322,39,15,4315,493,0,762,80,0,12,6,23,10,2 5 | automotive_susan_e,28,14,2184,222,0,343,39,14,3448,466,0,741,80,0,12,6,24,11,2 6 | automotive_susan_s,28,14,747,131,0,197,41,14,4885,557,0,887,78,0,12,4,23,8,1 7 | bzip2d,81,44,10347,1830,0,2964,129,37,4119,1308,0,1840,317,0,12,83,300,134,14 8 | bzip2e,81,33,7452,1235,0,1994,94,48,7014,1903,0,2810,352,0,12,69,169,117,10 9 | consumer_jpeg_c,337,120,6347,1324,0,2109,214,217,22183,4700,0,6943,404,0,12,30,259,50,80 10 | consumer_jpeg_d,323,113,6733,1322,0,2136,146,210,20640,4425,0,6497,484,0,12,54,320,91,112 11 | consumer_lame,205,100,11212,2077,0,3232,317,105,13652,2584,0,3606,733,0,12,131,265,211,12 12 | consumer_mad,275,80,7327,1074,0,1677,228,195,18499,4681,0,6891,855,0,12,31,225,43,19 13 | consumer_tiff2bw,320,94,4153,1001,0,1663,183,226,27307,6336,0,9201,1057,0,12,76,340,103,64 14 | consumer_tiff2rgba,320,109,4964,1137,0,1876,222,211,26482,6210,0,8982,1037,0,12,85,389,117,66 15 | consumer_tiffdither,320,103,5521,1348,0,2205,214,217,25774,5961,0,8613,1011,0,12,77,351,107,64 16 | consumer_tiffmedian,322,85,4854,1156,0,1896,205,237,27822,6433,0,9359,1054,0,12,73,323,100,56 17 | network_dijkstra,13,10,336,83,0,121,29,3,66,20,0,29,10,0,12,2,7,5,1 18 | network_patricia,13,10,424,93,0,137,30,3,150,46,0,62,20,0,12,4,14,7,2 19 | office_ghostscript,3488,1327,57006,11755,0,18929,1955,2161,132787,29458,0,43357,5185,0,12,403,1969,579,1423 20 | office_ispell,119,58,5762,1157,0,1758,269,61,7832,2169,0,2923,746,0,12,46,170,56,4 21 | office_rsynth,56,44,3725,755,0,1145,210,12,1687,509,0,669,178,0,12,23,95,33,8 22 | office_stringsearch1,18,8,470,117,0,167,38,10,457,108,0,164,24,0,12,2,8,5,1 23 | security_blowfish_d,15,9,636,82,0,122,22,6,983,94,0,136,16,0,12,4,11,7,1 24 | security_blowfish_e,15,9,636,83,0,122,22,6,983,93,0,136,16,0,12,3,10,6,1 25 | security_pgp_d,327,101,6444,1243,0,1944,345,226,27557,8348,0,11256,3516,0,12,78,299,117,8 26 | security_pgp_e,327,124,7195,1416,0,2156,417,203,26806,8175,0,11044,3444,0,12,80,249,120,6 27 | security_rijndael_d,15,10,1272,88,0,135,20,5,1159,111,0,147,31,0,12,6,21,9,2 28 | security_rijndael_e,15,11,1251,106,0,158,27,4,1180,93,0,124,24,0,12,5,19,8,1 29 | security_sha,15,12,529,82,0,123,22,3,98,34,0,48,7,0,12,7,13,13,1 30 | telecom_CRC32,12,8,184,39,0,59,13,4,74,27,0,39,5,0,12,4,6,7,1 31 | telecom_adpcm_c,10,7,216,50,0,75,9,3,128,29,0,44,6,0,12,2,5,5,1 32 | telecom_adpcm_d,10,7,203,43,0,64,9,3,140,36,0,55,6,0,12,2,6,5,1 33 | telecom_gsm,76,38,3189,493,0,778,79,38,3573,576,0,786,211,0,12,23,104,37,5 34 | -------------------------------------------------------------------------------- /data/rq5-static-vs-dynamic/spec-stripped.csv: -------------------------------------------------------------------------------- 1 | benchmark,cfgsT,cfgsM,instrsM,blocksM,phantomsM,edgesM,callsM,cfgsS,instrsS,blocksS,phantomsS,edgesS,callsS,cfgsD,instrsD,blocksD,phantomsD,edgesD,callsD 2 | 500.perlbench_r,2595,777,73574,19083,0,31098,3353,521,247584,73136,0,109086,58498,333,28158,7355,5243,11785,2059 3 | 502.gcc_r,12950,4396,392353,105178,0,167988,23748,3220,899620,293861,0,434768,889827,1485,136183,38587,23217,60641,12962 4 | 503.bwaves_r,18,12,6386,560,0,805,193,0,215,56,0,76,16,4,68,56,18,88,11 5 | 505.mcf_r,47,32,2947,584,0,897,100,1,660,242,0,321,25,6,86,41,62,63,65 6 | 507.cactuBSSN_r,2693,678,47816,9242,0,13481,3644,364,391486,89908,0,138040,13063,250,99004,22948,2537,28634,13572 7 | 508.namd_r,133,47,4962,773,0,1144,260,8,1692,430,0,657,209,18,22052,2378,356,3629,208 8 | 510.parest_r,18160,1310,109331,21148,0,31379,6184,2509,436143,106912,0,163895,54786,788,36172,8545,3826,12492,5016 9 | 511.povray_r,1643,523,31760,7160,0,11023,2103,436,105848,22895,0,33002,19999,105,5445,1486,1444,2196,733 10 | 519.lbm_r,27,16,1026,108,0,166,30,4,1100,100,0,132,41,4,68,18,17,30,7 11 | 520.omnetpp_r,6425,785,37218,8717,0,13178,2468,653,89308,25711,0,36986,10316,1336,34272,8373,2445,11982,6119 12 | 521.wrf_r,7220,1751,619544,69863,0,105333,23361,2230,2747852,372008,0,529364,288616,24,7876,7085,13215,10445,1923 13 | 523.xalancbmk_r,14126,1908,91550,20007,0,30327,4294,2639,450166,123339,0,184063,31802,1258,29792,8265,4862,12463,6476 14 | 525.x264_r,567,220,41756,6597,0,10168,969,93,43562,8031,0,11232,1942,128,7478,1501,1159,2456,562 15 | 526.blender_r,39237,1990,99423,22829,0,33854,8198,7152,752683,176994,0,260762,76354,1792,46604,8043,4076,11601,7381 16 | 527.cam4_r,4010,1154,253923,35880,0,52888,12384,1349,597810,106309,0,147310,49430,23,2032,1705,5790,2782,310 17 | 531.deepsjeng_r,119,82,12046,2644,0,4046,457,10,2490,731,0,1032,269,6,114,65,139,101,17 18 | 538.imagick_r,2212,311,15376,3776,0,5837,1264,812,271673,61441,0,87467,20730,18,1441,522,1126,799,237 19 | 541.leela_r,349,187,17853,3277,0,4993,736,68,28763,7924,0,11657,2014,16,549,800,672,1291,99 20 | 544.nab_r,274,67,9241,1654,0,2517,464,78,15635,3641,0,5256,910,4,68,84,327,131,23 21 | 548.exchange2_r,23,17,13964,2493,0,3921,103,0,451,158,0,195,30,4,68,67,71,102,9 22 | 549.fotonik3d_r,99,41,32319,3225,0,5073,729,17,32981,5093,0,6940,1767,4,68,186,623,354,7 23 | 554.roms_r,316,131,84828,7510,0,11307,2196,50,40316,6550,0,8833,2385,5,182,429,888,774,39 24 | 557.xz_r,402,116,9062,1485,0,2366,216,77,6551,1798,0,2483,570,45,5732,1161,285,1783,156 25 | 600.perlbench_s,2595,777,73574,19083,0,31098,3353,518,247350,73077,0,109001,58361,333,28158,7355,5243,11785,2059 26 | 602.gcc_s,12950,4439,402842,108157,0,172524,24563,3177,889131,290882,0,430232,889012,1503,138586,39262,23331,61653,13191 27 | 603.bwaves_s,47,40,10719,789,0,1106,341,1,322,74,0,89,15,4,68,21,42,34,5 28 | 605.mcf_s,47,32,2947,589,0,902,101,1,660,237,0,316,24,6,86,39,61,60,64 29 | 607.cactuBSSN_s,2772,693,71071,9522,0,13923,3712,418,450199,90823,0,139317,13289,251,75048,22867,2553,28579,13596 30 | 619.lbm_s,34,19,1129,132,0,198,45,7,1226,130,0,171,56,4,68,18,20,30,7 31 | 620.omnetpp_s,6426,785,36753,8342,0,12694,2365,653,89773,26087,0,37471,10419,1336,33819,8854,2445,12997,6222 32 | 621.wrf_s,7363,1805,630030,70225,0,105825,23602,2320,2766684,373846,0,531308,309605,24,7951,7096,13285,10448,1939 33 | 623.xalancbmk_s,14126,1908,91550,20007,0,30327,4294,2639,450166,123339,0,184063,31802,1258,29792,8265,4862,12463,6476 34 | 625.x264_s,567,220,41756,6597,0,10168,969,93,43562,8031,0,11232,1942,128,7478,1501,1159,2456,562 35 | 627.cam4_s,4206,1335,315904,44832,0,66724,14207,1390,593225,105436,0,145352,49826,23,2032,1996,6625,3289,333 36 | 628.pop2_s,3461,1130,263190,43747,0,63042,16464,1006,455492,84334,0,115524,43588,32,12017,4591,7163,6901,1218 37 | 631.deepsjeng_s,119,82,12168,2669,0,4079,467,10,2368,706,0,999,259,6,114,65,133,101,17 38 | 638.imagick_s,2381,386,26677,5532,0,8507,1837,909,270661,62347,0,88186,22126,18,1813,733,1591,1116,324 39 | 641.leela_s,350,187,17730,3190,0,4883,713,68,28886,8011,0,11767,2037,16,546,905,672,1479,122 40 | 644.nab_s,278,72,10718,1871,0,2829,541,78,14542,3559,0,5120,910,4,68,88,348,137,23 41 | 648.exchange2_s,23,17,13964,2493,0,3921,103,0,451,158,0,195,30,4,68,67,71,102,9 42 | 649.fotonik3d_s,130,67,35299,3495,0,5460,863,22,33916,5352,0,7276,1792,4,68,229,718,442,7 43 | 654.roms_s,343,158,85797,7690,0,11540,2322,50,40811,6604,0,8909,2404,5,182,432,919,775,47 44 | 657.xz_s,405,122,9394,1554,0,2467,245,76,6702,1844,0,2533,600,45,5774,1175,297,1802,156 45 | -------------------------------------------------------------------------------- /data/rq5-static-vs-dynamic/spec-symbols.csv: -------------------------------------------------------------------------------- 1 | benchmark,cfgsT,cfgsM,instrsM,blocksM,phantomsM,edgesM,callsM,cfgsS,instrsS,blocksS,phantomsS,edgesS,callsS,cfgsD,instrsD,blocksD,phantomsD,edgesD,callsD 2 | 500.perlbench_r,2595,1110,101720,25623,0,41591,4872,1485,298604,86280,0,126621,23153,0,12,815,5243,1292,540 3 | 502.gcc_r,12950,5881,528460,140607,0,223756,33877,7069,1048594,336190,0,491690,104976,0,76,3158,23217,4873,2833 4 | 503.bwaves_r,18,16,6442,573,0,824,196,2,245,65,0,92,18,0,12,43,18,69,8 5 | 505.mcf_r,47,38,3021,603,0,926,103,9,982,317,0,438,37,0,12,22,62,34,62 6 | 507.cactuBSSN_r,2693,928,146808,31578,0,41280,16712,1765,583380,125404,0,186104,21636,0,12,612,2537,835,504 7 | 508.namd_r,133,65,27002,3030,0,4560,452,68,157642,16880,0,24989,1779,0,12,121,356,213,16 8 | 510.parest_r,18160,2098,145491,29225,0,43139,9321,16062,1335747,298193,0,457994,93964,0,12,468,3826,732,1879 9 | 511.povray_r,1643,628,37193,8378,0,12818,2644,1015,127486,26432,0,38382,8063,0,12,268,1444,401,192 10 | 519.lbm_r,27,20,1082,121,0,187,35,7,1379,101,0,137,37,0,12,5,17,9,2 11 | 520.omnetpp_r,6425,2121,71478,16710,0,24602,6067,4304,238558,65645,0,94947,24420,0,12,380,2445,558,2520 12 | 521.wrf_r,7220,1775,627408,71285,0,107283,24193,5445,2566886,333719,0,487712,88806,0,12,5663,13215,8495,1091 13 | 523.xalancbmk_r,14126,3166,121330,27591,0,41805,6847,10960,729021,201553,0,301928,49548,0,12,681,4862,985,3923 14 | 525.x264_r,567,348,49222,7624,0,11860,1023,219,47458,8687,0,12125,1528,0,12,474,1159,764,508 15 | 526.blender_r,39237,3782,146009,30188,0,44460,12669,35454,1431820,344520,0,511299,88342,0,18,684,4076,995,2910 16 | 527.cam4_r,4010,1177,255943,36269,0,53523,12521,2833,779959,137255,0,191658,49337,0,12,1316,5790,2147,173 17 | 531.deepsjeng_r,119,88,12148,2665,0,4083,468,31,3211,900,0,1277,226,0,12,44,139,64,6 18 | 538.imagick_r,2212,329,16805,4163,0,6445,1418,1883,356816,85686,0,120629,29835,0,12,135,1126,191,83 19 | 541.leela_r,349,203,18390,3406,0,5177,817,146,35961,9464,0,14054,2445,0,12,671,672,1107,18 20 | 544.nab_r,274,71,9297,1667,0,2538,469,203,23524,5906,0,8590,1562,0,12,71,327,110,18 21 | 548.exchange2_r,23,21,14020,2506,0,3940,106,2,481,167,0,211,32,0,12,54,71,83,6 22 | 549.fotonik3d_r,99,45,32375,3238,0,5092,732,54,34425,5347,0,7239,1975,0,12,173,623,335,4 23 | 554.roms_r,316,136,84998,7533,0,11361,2217,180,59865,9279,0,12993,2778,0,12,406,888,720,18 24 | 557.xz_r,402,161,14782,2574,0,4045,323,241,12520,3229,0,4592,977,0,12,72,285,104,49 25 | 600.perlbench_s,2595,1110,101720,25623,0,41591,4872,1485,298604,86280,0,126621,23153,0,12,815,5243,1292,540 26 | 602.gcc_s,12950,5942,541350,144313,0,229375,34901,7008,1035704,332484,0,486071,103952,0,78,3106,23331,4802,2853 27 | 603.bwaves_s,47,44,10775,802,0,1125,344,3,352,83,0,105,17,0,12,8,42,15,2 28 | 605.mcf_s,47,38,3021,608,0,931,104,9,982,312,0,433,36,0,12,20,61,31,61 29 | 607.cactuBSSN_s,2772,944,146107,31537,0,41236,16755,1827,586792,126133,0,187010,22002,0,12,852,2553,1266,553 30 | 619.lbm_s,34,23,1185,145,0,219,50,11,1548,140,0,188,57,0,12,5,20,9,2 31 | 620.omnetpp_s,6426,2121,70560,15949,0,23615,5852,4304,239476,66406,0,95934,24635,0,12,1247,2445,2076,2735 32 | 621.wrf_s,7363,1829,637969,71646,0,107773,24433,5534,2584122,336005,0,490268,89485,0,12,5675,13285,8500,1108 33 | 623.xalancbmk_s,14126,3166,121330,27591,0,41805,6847,10960,729021,201553,0,301928,49548,0,12,681,4862,985,3923 34 | 625.x264_s,567,348,49222,7624,0,11860,1023,219,47458,8687,0,12125,1528,0,12,474,1159,764,508 35 | 627.cam4_s,4206,1358,317924,45221,0,67359,14344,2848,756924,133258,0,184891,49149,0,12,1607,6625,2654,196 36 | 628.pop2_s,3461,1162,275195,45754,0,65875,17441,2299,587099,107103,0,148462,37291,0,12,2584,7163,4068,241 37 | 631.deepsjeng_s,119,88,12270,2690,0,4116,478,31,3089,875,0,1244,216,0,12,44,133,64,6 38 | 638.imagick_s,2381,404,28478,6053,0,9318,2059,1977,363711,87781,0,123180,31255,0,12,212,1591,305,102 39 | 641.leela_s,350,203,18264,3316,0,5063,793,146,36087,9554,0,14168,2469,0,12,779,672,1299,42 40 | 644.nab_s,278,76,10774,1884,0,2850,546,202,23708,5935,0,8620,1568,0,12,75,348,116,18 41 | 648.exchange2_s,23,21,14020,2506,0,3940,106,2,481,167,0,211,32,0,12,54,71,83,6 42 | 649.fotonik3d_s,130,71,35355,3508,0,5479,866,59,35360,5606,0,7575,2000,0,12,216,718,423,4 43 | 654.roms_s,343,163,85967,7713,0,11594,2343,180,60170,9336,0,13072,2799,0,12,409,919,721,26 44 | 657.xz_s,405,167,15156,2658,0,4165,354,238,12580,3245,0,4600,999,0,12,71,297,104,47 45 | -------------------------------------------------------------------------------- /data/rq6-complexity/data-cbench.csv: -------------------------------------------------------------------------------- 1 | benchmark,executed,nulgrind,cfggrind 2 | automotive_bitcount,735,21,182 3 | automotive_qsort1,689,25,230 4 | automotive_susan_c,588,12,40 5 | automotive_susan_e,628,15,54 6 | automotive_susan_s,885,18,140 7 | bzip2d,554,12,73 8 | bzip2e,602,13,67 9 | consumer_jpeg_c,563,18,113 10 | consumer_jpeg_d,577,28,86 11 | consumer_lame,598,44,132 12 | consumer_mad,792,17,72 13 | consumer_tiff2bw,608,25,178 14 | consumer_tiff2rgba,537,33,170 15 | consumer_tiffdither,595,19,150 16 | consumer_tiffmedian,640,24,130 17 | network_dijkstra,462,12,117 18 | network_patricia,224,9,114 19 | office_ghostscript,710,27,211 20 | office_ispell,745,30,205 21 | office_rsynth,140,11,24 22 | office_stringsearch1,456,47,147 23 | security_blowfish_d,783,17,59 24 | security_blowfish_e,768,18,77 25 | security_pgp_d,546,25,115 26 | security_pgp_e,824,25,102 27 | security_rijndael_d,824,32,131 28 | security_rijndael_e,826,32,142 29 | security_sha,1169,19,97 30 | telecom_CRC32,500,20,189 31 | telecom_adpcm_c,497,14,111 32 | telecom_adpcm_d,956,25,174 33 | telecom_gsm,710,16,88 34 | -------------------------------------------------------------------------------- /data/rq6-complexity/data-spec.csv: -------------------------------------------------------------------------------- 1 | benchmark,executed,nulgrind,cfggrind 2 | 500.perlbench,2674,1697,17924 3 | 502.gcc,1178,1053,8947 4 | 503.bwaves,7067,5123,22470 5 | 505.mcf,945,993,5031 6 | 507.cactuBSSN,1558,1609,2496 7 | 508.namd,2601,1868,2778 8 | 510.parest,3889,3774,12881 9 | 511.povray,3695,3118,17842 10 | 519.lbm,1659,1612,1951 11 | 520.omnetpp,1105,1462,9732 12 | 521.wrf,4829,4683,14211 13 | 523.xalancbmk,1404,1163,11033 14 | 525.x264,5014,1651,10215 15 | 526.blender,1877,1659,7443 16 | 527.cam4,2980,2638,11196 17 | 531.deepsjeng,1924,1095,10259 18 | 538.imagick,4693,5009,17435 19 | 541.leela,2639,1532,12103 20 | 544.nab,2246,1981,6791 21 | 548.exchange2,4796,1853,16521 22 | 549.fotonik3d,2453,1889,2942 23 | 554.roms,3058,2897,7467 24 | 557.xz,1978,1024,6760 25 | 600.perlbench,2674,1711,18167 26 | 602.gcc,2345,1895,16132 27 | 603.bwaves,97744,58203,373170 28 | 605.mcf,1679,2000,10019 29 | 607.cactuBSSN,12850,13674,20988 30 | 619.lbm,4940,5325,6258 31 | 620.omnetpp,1081,1375,9551 32 | 621.wrf,27795,30276,83793 33 | 623.xalancbmk,1404,1170,11142 34 | 625.x264,5014,1661,10232 35 | 627.cam4,19713,15358,79302 36 | 628.pop2,25530,27854,108212 37 | 631.deepsjeng,2245,1283,12382 38 | 638.imagick,74124,90852,240532 39 | 641.leela,2639,1538,12036 40 | 644.nab,22827,20771,72131 41 | 648.exchange2,4796,1856,17409 42 | 649.fotonik3d,7653,5707,10711 43 | 654.roms,25611,24531,69053 44 | 657.xz,8574,5479,29017 45 | -------------------------------------------------------------------------------- /data/rq6-complexity/plot.R: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | 3 | usage <- function() { 4 | cat('Usage: Rscript plot.R [cBench data] [spec data] \n'); 5 | cat(' -s Graph only\n'); 6 | cat(' -e Executed instructions\n'); 7 | cat(' -r Executed runtime\n'); 8 | quit(); 9 | } 10 | 11 | getrange <- function(from, to) { 12 | c(floor(log10(from)), ceiling(log10(to))); 13 | } 14 | 15 | plotaxis <- function(a, power) { 16 | at <- c(10 ^ (power[1]:power[2])); 17 | ly <- sapply(at, function(i) { 18 | as.expression(bquote(10^ .(log10(i)))) 19 | }); 20 | axis(a, at=at, labels=ly, las=1, cex.axis=1.3); 21 | } 22 | 23 | simple <- FALSE; 24 | extra <- c(); 25 | args <- commandArgs(trailingOnly=TRUE); 26 | for (a in args) { 27 | if (startsWith(a, '-')) { 28 | if (a == '-s') { 29 | simple <- TRUE; 30 | } else if (a == '-e') { 31 | type <- "executed"; 32 | } else if (a == '-r') { 33 | type <- "nulgrind"; 34 | } else { 35 | usage(); 36 | } 37 | } else { 38 | extra <- append(extra, a); 39 | } 40 | } 41 | 42 | if (length(extra) < 2 || length(extra) > 3 || !exists('type')) 43 | usage(); 44 | 45 | file1 <- extra[1]; 46 | file2 <- extra[2]; 47 | output <- extra[3]; 48 | if (is.na(output)) 49 | output <- "output.pdf"; 50 | 51 | pdf(file=output, width=8, height=6); 52 | if (simple) { 53 | par(oma=c(2.5,2.5,1,1.5),mar=c(0,0,0,0)); 54 | } else { 55 | par(oma=c(2.5,2.5,1,1.5),mar=c(1.5,2,0,0.5)); 56 | } 57 | 58 | data1 <- read.csv(file1, header = TRUE, sep = ','); 59 | data2 <- read.csv(file2, header = TRUE, sep = ','); 60 | 61 | matrix1 <- as.matrix(data1[,c(type, "cfggrind")]); 62 | matrix2 <- as.matrix(data2[,c(type, "cfggrind")]); 63 | 64 | xpower <- getrange(min(matrix1[,1], matrix2[,1]), max(matrix1[,1], matrix2[,1])); 65 | ypower <- getrange(min(matrix1[,2], matrix2[,2]), max(matrix1[,2], matrix2[,2])); 66 | 67 | plot(1, 1, xlab="", xlim=c(10 ** xpower[1], 10 ** xpower[2]), 68 | ylab="", ylim=c(10 ** ypower[1], 10 ** ypower[2]), 69 | log="xy", xaxt="n", yaxt="n"); 70 | plotaxis(1, xpower); 71 | plotaxis(2, ypower); 72 | 73 | colors <- c("#3414ad", "#d20000"); 74 | ptypes <- c(2, 1); 75 | 76 | points(matrix1, pch=ptypes[1], col=colors[1], cex=1.5, lwd=2); 77 | points(matrix2, pch=ptypes[2], col=colors[2], cex=1.5, lwd=2); 78 | 79 | if (!simple) { 80 | if (type == "executed") { 81 | legend(10**(xpower[1]-0.12), 10**(ypower[2]+0.2)+0, c("cBench", "SPEC CPU2017"), 82 | cex=1.3, pch=ptypes, 83 | col=colors, pt.cex=1.5, pt.lwd=2, xpd=NA); 84 | mtext(text="Executed instructions (in billions)", 85 | side=1, cex=1.5, line=1, outer=TRUE); 86 | } else { 87 | legend(10**(xpower[1]-0.20), 10**(ypower[2]+0.2)+0, c("cBench", "SPEC CPU2017"), 88 | cex=1.3, pch=ptypes, 89 | col=colors, pt.cex=1.5, pt.lwd=2, xpd=NA); 90 | mtext(text="nulgrind runtime (in seconds)", 91 | side=1, cex=1.5, line=1, outer=TRUE); 92 | } 93 | mtext(text="CFGgrind runtime (in seconds)", side=2, cex=1.5, line=1, outer=TRUE); 94 | } 95 | -------------------------------------------------------------------------------- /data/rq6-complexity/plots/executed.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rimsa/CFGgrind/353cb3ba1e4bbd373f14ce743af1c317d1b3a539/data/rq6-complexity/plots/executed.pdf -------------------------------------------------------------------------------- /data/rq6-complexity/plots/runtime.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rimsa/CFGgrind/353cb3ba1e4bbd373f14ce743af1c317d1b3a539/data/rq6-complexity/plots/runtime.pdf -------------------------------------------------------------------------------- /debug.c: -------------------------------------------------------------------------------- 1 | /*--------------------------------------------------------------------*/ 2 | /*--- CFGgrind ---*/ 3 | /*--- debug.c ---*/ 4 | /*--------------------------------------------------------------------*/ 5 | 6 | /* 7 | This file is part of CFGgrind, a dynamic control flow graph (CFG) 8 | reconstruction tool. 9 | 10 | Copyright (C) 2023, Andrei Rimsa (andrei@cefetmg.br) 11 | 12 | This tool is derived and contains lot of code from Callgrind 13 | Copyright (C) 2002-2017, Josef Weidendorfer (Josef.Weidendorfer@gmx.de) 14 | 15 | This program is free software; you can redistribute it and/or 16 | modify it under the terms of the GNU General Public License as 17 | published by the Free Software Foundation; either version 2 of the 18 | License, or (at your option) any later version. 19 | 20 | This program is distributed in the hope that it will be useful, but 21 | WITHOUT ANY WARRANTY; without even the implied warranty of 22 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 23 | General Public License for more details. 24 | 25 | You should have received a copy of the GNU General Public License 26 | along with this program; if not, see . 27 | 28 | The GNU General Public License is contained in the file COPYING. 29 | */ 30 | 31 | #include "global.h" 32 | 33 | /* If debugging mode of, dummy functions are provided (see below) 34 | */ 35 | #if CGD_ENABLE_DEBUG 36 | 37 | /*------------------------------------------------------------*/ 38 | /*--- Debug output helpers ---*/ 39 | /*------------------------------------------------------------*/ 40 | 41 | static void print_indent(int s) 42 | { 43 | /* max of 40 spaces */ 44 | const HChar sp[] = " "; 45 | if (s>40) s=40; 46 | VG_(printf)("%s", sp+40-s); 47 | } 48 | 49 | void CGD_(print_bb)(int s, BB* bb) 50 | { 51 | if (s<0) { 52 | s = -s; 53 | print_indent(s); 54 | } 55 | 56 | VG_(printf)("BB %#lx (Obj '%s')", bb_addr(bb), bb->obj->name); 57 | } 58 | 59 | void CGD_(print_execstate)(int s, exec_state* es) 60 | { 61 | if (s<0) { 62 | s = -s; 63 | print_indent(s); 64 | } 65 | 66 | if (!es) { 67 | VG_(printf)("ExecState 0x0\n"); 68 | return; 69 | } 70 | 71 | VG_(printf)("ExecState [Sig %d]: jmps_passed %d\n", 72 | es->sig, es->jmps_passed); 73 | } 74 | 75 | /* dump out the current call stack */ 76 | void CGD_(print_stackentry)(int s, int sp) 77 | { 78 | call_entry* ce; 79 | 80 | if (s<0) { 81 | s = -s; 82 | print_indent(s); 83 | } 84 | 85 | ce = CGD_(get_call_entry)(sp); 86 | VG_(printf)("[%-2d] SP %#lx, RA %#lx\n", sp, ce->sp, ce->ret_addr); 87 | } 88 | 89 | /* debug output */ 90 | #if 0 91 | static void print_call_stack() 92 | { 93 | int c; 94 | 95 | VG_(printf)("Call Stack:\n"); 96 | for(c=0;c0) { 134 | if (dir_buf[0]) 135 | VG_(printf)(" (%s/%s:%u)", dir_buf, fl_buf, ln); 136 | else 137 | VG_(printf)(" (%s:%u)", fl_buf, ln); 138 | } 139 | } 140 | 141 | void CGD_(print_addr_ln)(Addr addr) 142 | { 143 | CGD_(print_addr)(addr); 144 | VG_(printf)("\n"); 145 | } 146 | 147 | static ULong bb_written = 0; 148 | 149 | void CGD_(print_bbno)(void) 150 | { 151 | if (bb_written != CGD_(stat).bb_executions) { 152 | bb_written = CGD_(stat).bb_executions; 153 | VG_(printf)("BB# %llu\n",CGD_(stat).bb_executions); 154 | } 155 | } 156 | 157 | #if CGD_DEBUG_MEM 158 | void* CGD_(malloc)(const HChar* cc, UWord s, const HChar* f) { 159 | void* p; 160 | 161 | CGD_UNUSED(cc); 162 | 163 | CGD_DEBUG(3, "Malloc(%lu) in %s: ", s, f); 164 | p = VG_(malloc)(cc, s); 165 | CGD_DEBUG(3, "%p\n", p); 166 | return p; 167 | } 168 | 169 | void* CGD_(realloc)(const HChar* cc, void* p, UWord s, const HChar* f) { 170 | CGD_UNUSED(cc); 171 | 172 | if (p != 0) 173 | CGD_DEBUG(3, "Free in %s: %p\n", f, p); 174 | 175 | CGD_DEBUG(3, "Malloc(%lu) in %s: ", s, f); 176 | p = VG_(realloc)(cc, p, s); 177 | CGD_DEBUG(3, "%p\n", p); 178 | return p; 179 | } 180 | 181 | void CGD_(free)(void* p, const HChar* f) { 182 | CGD_DEBUG(3, "Free in %s: %p\n", f, p); 183 | VG_(free)(p); 184 | } 185 | 186 | HChar* CGD_(strdup)(const HChar* cc, const HChar* s, const HChar* f) { 187 | HChar* p; 188 | 189 | CGD_UNUSED(cc); 190 | 191 | CGD_DEBUG(3, "Strdup(%s) in %s: ", s, f); 192 | p = VG_(strdup)(cc, s); 193 | CGD_DEBUG(3, "%p\n", p); 194 | return p; 195 | } 196 | #endif 197 | 198 | #else /* CGD_ENABLE_DEBUG */ 199 | 200 | void CGD_(print_bbno)(void) {} 201 | void CGD_(print_bb)(int s, BB* bb) {} 202 | void CGD_(print_stackentry)(int s, int sp) {} 203 | void CGD_(print_addr)(Addr addr) {} 204 | void CGD_(print_addr_ln)(Addr addr) {} 205 | 206 | #endif 207 | -------------------------------------------------------------------------------- /fdesc.c: -------------------------------------------------------------------------------- 1 | /*--------------------------------------------------------------------*/ 2 | /*--- CFGgrind ---*/ 3 | /*--- fdesc.c ---*/ 4 | /*--------------------------------------------------------------------*/ 5 | 6 | /* 7 | This file is part of CFGgrind, a dynamic control flow graph (CFG) 8 | reconstruction tool. 9 | 10 | Copyright (C) 2023, Andrei Rimsa (andrei@cefetmg.br) 11 | 12 | This program is free software; you can redistribute it and/or 13 | modify it under the terms of the GNU General Public License as 14 | published by the Free Software Foundation; either version 2 of the 15 | License, or (at your option) any later version. 16 | 17 | This program is distributed in the hope that it will be useful, but 18 | WITHOUT ANY WARRANTY; without even the implied warranty of 19 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | General Public License for more details. 21 | 22 | You should have received a copy of the GNU General Public License 23 | along with this program; if not, see . 24 | 25 | The GNU General Public License is contained in the file COPYING. 26 | */ 27 | 28 | #include "global.h" 29 | 30 | static 31 | const HChar* unknown_name = "???"; 32 | 33 | static 34 | const HChar* main_fname = "main"; 35 | 36 | struct _FunctionDesc { 37 | HChar* obj_name; 38 | HChar* fn_name; 39 | UInt fn_line; 40 | }; 41 | 42 | FunctionDesc* CGD_(new_fdesc)(Addr addr, Bool entry) { 43 | const HChar* tmp; 44 | FunctionDesc* fdesc; 45 | Bool found; 46 | 47 | DiEpoch ep = VG_(current_DiEpoch)(); 48 | found = entry ? 49 | VG_(get_fnname_if_entry)(ep, addr, &tmp) : 50 | VG_(get_fnname)(ep, addr, &tmp); 51 | 52 | if (found) { 53 | fdesc = (FunctionDesc*) CGD_MALLOC("cgd.fdesc.nf.1", sizeof(FunctionDesc)); 54 | 55 | fdesc->fn_name = CGD_STRDUP("cgd.fdesc.nf.2", tmp); 56 | if (!VG_(get_linenum)(ep, addr, &(fdesc->fn_line))) 57 | fdesc->fn_line = 0; 58 | 59 | fdesc->obj_name = VG_(get_objname)(ep, addr, &tmp) ? 60 | CGD_STRDUP("cgd.fdesc.nf.3", tmp) : 0; 61 | } else { 62 | fdesc = 0; 63 | } 64 | 65 | return fdesc; 66 | } 67 | 68 | void CGD_(delete_fdesc)(FunctionDesc* fdesc) { 69 | CGD_ASSERT(fdesc); 70 | CGD_ASSERT(fdesc->fn_name); 71 | 72 | CGD_DATA_FREE(fdesc->fn_name, VG_(strlen)(fdesc->fn_name)); 73 | if (fdesc->obj_name) 74 | CGD_DATA_FREE(fdesc->obj_name, VG_(strlen)(fdesc->obj_name)); 75 | 76 | CGD_DATA_FREE(fdesc, sizeof(FunctionDesc)); 77 | } 78 | 79 | HChar* CGD_(fdesc_object_name)(FunctionDesc* fdesc) { 80 | CGD_ASSERT(fdesc != 0); 81 | return fdesc->obj_name; 82 | } 83 | 84 | HChar* CGD_(fdesc_function_name)(FunctionDesc* fdesc) { 85 | CGD_ASSERT(fdesc != 0); 86 | return fdesc->fn_name; 87 | } 88 | 89 | UInt CGD_(fdesc_function_line)(FunctionDesc* fdesc) { 90 | CGD_ASSERT(fdesc != 0); 91 | return fdesc->fn_line; 92 | } 93 | 94 | void CGD_(print_fdesc)(FunctionDesc* fdesc) { 95 | if (!fdesc) 96 | VG_(printf)("unknown"); 97 | else 98 | VG_(printf)("%s::%s(%u)", 99 | (fdesc->obj_name ? fdesc->obj_name : unknown_name), 100 | (fdesc->fn_name ? fdesc->fn_name : unknown_name), 101 | fdesc->fn_line); 102 | } 103 | 104 | void CGD_(fprint_fdesc)(VgFile* fp, FunctionDesc* fdesc) { 105 | CGD_ASSERT(fp != 0); 106 | 107 | if (!fdesc) 108 | VG_(fprintf)(fp, "unknown"); 109 | else 110 | VG_(fprintf)(fp, "%s::%s(%u)", 111 | (fdesc->obj_name ? fdesc->obj_name : unknown_name), 112 | (fdesc->fn_name ? fdesc->fn_name : unknown_name), 113 | fdesc->fn_line); 114 | } 115 | 116 | HChar* CGD_(fdesc2str)(FunctionDesc* fdesc) { 117 | const HChar* obj_name; 118 | const HChar* fn_name; 119 | HChar* fn_obj_name; 120 | 121 | if (fdesc == NULL) { 122 | return CGD_STRDUP("cgd.fdesc.fts.1", "unknown"); 123 | } else { 124 | obj_name = fdesc->obj_name ? fdesc->obj_name : unknown_name; 125 | fn_name = fdesc->fn_name ? fdesc->fn_name : unknown_name; 126 | 127 | SizeT size = VG_(strlen)(obj_name) + VG_(strlen)(fn_name) + + 3; 128 | fn_obj_name = CGD_MALLOC("cgd.fts.2", size); 129 | if (!fn_obj_name) 130 | VG_(tool_panic)("cfggrind: unable to allocate memory"); 131 | 132 | VG_(snprintf)(fn_obj_name, size, "%s::%s\n", obj_name, fn_name); 133 | 134 | return fn_obj_name; 135 | } 136 | } 137 | 138 | FunctionDesc* CGD_(str2fdesc)(const HChar* str) { 139 | HChar* ptr; 140 | HChar* tmp; 141 | SizeT size; 142 | FunctionDesc* fdesc; 143 | 144 | if (!str || VG_(strcasecmp)(str, "unknown") == 0) 145 | return 0; 146 | 147 | fdesc = (FunctionDesc*) CGD_MALLOC("cgd.fdesc.s2f.1", sizeof(FunctionDesc)); 148 | 149 | ptr = VG_(strrchr)(str, '('); 150 | if (ptr && (*(ptr + 1) >= '0' && *(ptr + 1) <= '9')) { 151 | size = (ptr - str) / sizeof(HChar); 152 | fdesc->obj_name = (HChar*) CGD_MALLOC("cgd.fdesc.s2f.2", ((size + 1) * sizeof(HChar))); 153 | VG_(strncpy)(fdesc->obj_name, str, size); 154 | *(fdesc->obj_name + size) = 0; 155 | 156 | ptr++; 157 | tmp = CGD_STRDUP("cgd.fdesc.s2f.3", ptr); 158 | if ((ptr = VG_(strchr)(tmp, ')'))) 159 | *ptr = 0; 160 | 161 | fdesc->fn_line = VG_(strtoll10)(tmp, 0); 162 | CGD_FREE(tmp); 163 | } else { 164 | fdesc->obj_name = CGD_STRDUP("cgd.fdesc.s2f.4", str); 165 | fdesc->fn_line = 0; 166 | } 167 | 168 | if ((ptr = VG_(strstr)(fdesc->obj_name, "::"))) { 169 | *ptr = 0; 170 | 171 | ptr += 2; 172 | fdesc->fn_name = CGD_STRDUP("cgd.fdesc.s2f.5", ptr); 173 | } else { 174 | fdesc->fn_name = fdesc->obj_name; 175 | fdesc->obj_name = 0; 176 | } 177 | 178 | return fdesc; 179 | } 180 | 181 | Bool CGD_(is_main_function)(FunctionDesc* fdesc) { 182 | return fdesc && fdesc->fn_name && VG_(strcmp)(fdesc->fn_name, main_fname) == 0; 183 | } 184 | 185 | Bool CGD_(compare_functions_desc)(FunctionDesc* fdesc1, FunctionDesc* fdesc2) { 186 | return (fdesc1 && fdesc2 && 187 | !VG_(strcmp)(fdesc1->obj_name, fdesc2->obj_name) && 188 | !VG_(strcmp)(fdesc1->fn_name, fdesc2->fn_name) && 189 | fdesc1->fn_line == fdesc2->fn_line); 190 | } 191 | -------------------------------------------------------------------------------- /instrs.c: -------------------------------------------------------------------------------- 1 | /*--------------------------------------------------------------------*/ 2 | /*--- CFGgrind ---*/ 3 | /*--- instrs.c ---*/ 4 | /*--------------------------------------------------------------------*/ 5 | 6 | /* 7 | This file is part of CFGgrind, a dynamic control flow graph (CFG) 8 | reconstruction tool. 9 | 10 | Copyright (C) 2023, Andrei Rimsa (andrei@cefetmg.br) 11 | 12 | This program is free software; you can redistribute it and/or 13 | modify it under the terms of the GNU General Public License as 14 | published by the Free Software Foundation; either version 2 of the 15 | License, or (at your option) any later version. 16 | 17 | This program is distributed in the hope that it will be useful, but 18 | WITHOUT ANY WARRANTY; without even the implied warranty of 19 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | General Public License for more details. 21 | 22 | You should have received a copy of the GNU General Public License 23 | along with this program; if not, see . 24 | 25 | The GNU General Public License is contained in the file COPYING. 26 | */ 27 | 28 | #include "global.h" 29 | 30 | #define DEFAULT_POOL_SIZE 262144 // 256k instructions 31 | 32 | typedef struct _instrs_hash instrs_hash; 33 | struct _instrs_hash { 34 | UInt size, entries; 35 | UniqueInstr** table; 36 | }; 37 | 38 | instrs_hash pool; 39 | 40 | static 41 | void delete_instr(UniqueInstr* instr) { 42 | CGD_ASSERT(instr != 0); 43 | 44 | if (instr->name) 45 | CGD_FREE(instr->name); 46 | 47 | if (instr->desc) { 48 | if (instr->desc->name != 0) 49 | CGD_FREE(instr->desc->name); 50 | 51 | CGD_DATA_FREE(instr->desc, sizeof(InstrDesc)); 52 | } 53 | 54 | CGD_DATA_FREE(instr, sizeof(UniqueInstr)); 55 | } 56 | 57 | static 58 | HChar* next_line(Int fd) { 59 | Int s, idx; 60 | HChar c; 61 | static HChar buffer[1024]; 62 | 63 | idx = 0; 64 | VG_(memset)(&buffer, 0, sizeof(buffer)); 65 | 66 | while (True) { 67 | CGD_ASSERT(idx >= 0 && idx < ((sizeof(buffer) / sizeof(HChar))-1)); 68 | s = VG_(read)(fd, &c, 1); 69 | if (s == 0 || c == '\n') 70 | break; 71 | 72 | // Ignore carriage returns. 73 | if (c == '\r') 74 | continue; 75 | 76 | buffer[idx++] = c; 77 | } 78 | 79 | return idx > 0 ? buffer : 0; 80 | } 81 | 82 | static 83 | void read_instr_names(void) { 84 | Int fd; 85 | Int size; 86 | Addr addr; 87 | HChar* line; 88 | HChar* tmp; 89 | HChar* name; 90 | UniqueInstr* instr; 91 | 92 | if (CGD_(clo).instrs_map) { 93 | fd = VG_(fd_open)(CGD_(clo).instrs_map, VKI_O_RDONLY, 0); 94 | if (fd < 0) 95 | tl_assert(0); 96 | 97 | while ((line = next_line(fd))) { 98 | tmp = VG_(strchr)(line, ':'); 99 | if (tmp == 0) 100 | continue; 101 | *tmp = 0; 102 | ++tmp; 103 | 104 | name = VG_(strchr)(tmp, ':'); 105 | if (name == 0) 106 | continue; 107 | *name = 0; 108 | ++name; 109 | 110 | addr = VG_(strtoull16)(line, 0); 111 | size = VG_(strtoll10)(tmp, 0); 112 | if (addr != 0 && size > 0 && *name != 0) { 113 | instr = CGD_(get_instr)(addr, size); 114 | instr->name = CGD_STRDUP("cgd.instrs.rin.1", name); 115 | } 116 | } 117 | 118 | VG_(close)(fd); 119 | } 120 | } 121 | 122 | static __inline__ 123 | UInt instrs_hash_idx(Addr addr, UInt size) { 124 | return addr % size; 125 | } 126 | 127 | static 128 | void resize_instrs_pool(void) { 129 | Int i, new_size, conflicts1 = 0; 130 | UniqueInstr **new_table, *curr, *next; 131 | UInt new_idx; 132 | 133 | // increase table by 50%. 134 | new_size = (Int) (1.5f * pool.size); 135 | new_table = (UniqueInstr**) CGD_MALLOC("cgd.instrs.rit.1", 136 | (new_size * sizeof(UniqueInstr*))); 137 | VG_(memset)(new_table, 0, (new_size * sizeof(UniqueInstr*))); 138 | 139 | for (i = 0; i < pool.size; i++) { 140 | if (pool.table[i] == 0) 141 | continue; 142 | 143 | curr = pool.table[i]; 144 | while (curr != 0) { 145 | next = curr->chain; 146 | 147 | new_idx = instrs_hash_idx(curr->addr, new_size); 148 | 149 | curr->chain = new_table[new_idx]; 150 | new_table[new_idx] = curr; 151 | if (curr->chain) 152 | conflicts1++; 153 | 154 | curr = next; 155 | } 156 | } 157 | 158 | CGD_FREE(pool.table); 159 | 160 | CGD_DEBUG(0, "Resize instructions pool: %u => %d (entries %u, conflicts %d)\n", 161 | pool.size, new_size, 162 | pool.entries, conflicts1); 163 | 164 | pool.size = new_size; 165 | pool.table = new_table; 166 | CGD_(stat).instrs_pool_resizes++; 167 | } 168 | 169 | static 170 | UniqueInstr* lookup_instr(Addr addr) { 171 | UniqueInstr* instr; 172 | UInt idx; 173 | 174 | CGD_ASSERT(addr != 0); 175 | 176 | idx = instrs_hash_idx(addr, pool.size); 177 | instr = pool.table[idx]; 178 | 179 | while (instr) { 180 | if (instr->addr == addr) 181 | break; 182 | 183 | instr = instr->chain; 184 | } 185 | 186 | return instr; 187 | } 188 | 189 | void CGD_(init_instrs_pool)(void) { 190 | Int size; 191 | 192 | pool.size = DEFAULT_POOL_SIZE; 193 | pool.entries = 0; 194 | 195 | size = pool.size * sizeof(UniqueInstr*); 196 | pool.table = (UniqueInstr**) CGD_MALLOC("cgd.instrs.iip.1", size); 197 | VG_(memset)(pool.table, 0, size); 198 | 199 | // read instruction names. 200 | read_instr_names(); 201 | } 202 | 203 | void CGD_(destroy_instrs_pool)(void) { 204 | Int i; 205 | 206 | for (i = 0; i < pool.size; i++) { 207 | UniqueInstr* instr = pool.table[i]; 208 | while (instr) { 209 | UniqueInstr* next = instr->chain; 210 | delete_instr(instr); 211 | instr = next; 212 | 213 | pool.entries--; 214 | } 215 | } 216 | 217 | CGD_ASSERT(pool.entries == 0); 218 | 219 | CGD_FREE(pool.table); 220 | pool.table = 0; 221 | } 222 | 223 | UniqueInstr* CGD_(get_instr)(Addr addr, Int size) { 224 | UniqueInstr* instr = lookup_instr(addr); 225 | if (instr) { 226 | CGD_ASSERT(instr->addr == addr); 227 | if (size != 0) { 228 | if (instr->size == 0) { 229 | instr->size = size; 230 | } else { 231 | CGD_ASSERT(instr->size == size); 232 | } 233 | } 234 | } else { 235 | UInt idx; 236 | 237 | /* check fill degree of instructions pool and resize if needed (>80%) */ 238 | pool.entries++; 239 | if (10 * pool.entries / pool.size > 8) 240 | resize_instrs_pool(); 241 | 242 | // Create the instruction. 243 | instr = (UniqueInstr*) CGD_MALLOC("cgd.instrs.gi.1", sizeof(UniqueInstr)); 244 | VG_(memset)(instr, 0, sizeof(UniqueInstr)); 245 | instr->addr = addr; 246 | instr->size = size; 247 | 248 | /* insert into instructions pool */ 249 | idx = instrs_hash_idx(addr, pool.size); 250 | instr->chain = pool.table[idx]; 251 | pool.table[idx] = instr; 252 | } 253 | 254 | return instr; 255 | } 256 | 257 | UniqueInstr* CGD_(find_instr)(Addr addr) { 258 | return lookup_instr(addr); 259 | } 260 | 261 | Addr CGD_(instr_addr)(UniqueInstr* instr) { 262 | CGD_ASSERT(instr != 0); 263 | return instr->addr; 264 | } 265 | 266 | Int CGD_(instr_size)(UniqueInstr* instr) { 267 | CGD_ASSERT(instr != 0); 268 | return instr->size; 269 | } 270 | 271 | const HChar* CGD_(instr_name)(UniqueInstr* instr) { 272 | CGD_ASSERT(instr != 0); 273 | return instr->name; 274 | } 275 | 276 | InstrDesc* CGD_(instr_description)(UniqueInstr* instr) { 277 | CGD_ASSERT(instr != 0); 278 | 279 | if (!instr->desc) { 280 | Bool found; 281 | DiEpoch ep; 282 | UInt tmpline; 283 | const HChar *tmpfile, *tmpdir; 284 | 285 | ep = VG_(current_DiEpoch)(); 286 | found = VG_(get_filename_linenum)(ep, instr->addr, 287 | &(tmpfile), &(tmpdir), &(tmpline)); 288 | 289 | instr->desc = (InstrDesc*) CGD_MALLOC("cgd.instrs.id.1", sizeof(InstrDesc)); 290 | if (found) { 291 | /* Build up an absolute pathname, if there is a directory available */ 292 | instr->desc->name = (HChar*) CGD_MALLOC("cgd.adesc.na.1", 293 | (VG_(strlen)(tmpdir) + 1 + VG_(strlen)(tmpfile) + 1)); 294 | VG_(strcpy)(instr->desc->name, tmpdir); 295 | if (instr->desc->name[0] != '\0') 296 | VG_(strcat)(instr->desc->name, "/"); 297 | VG_(strcat)(instr->desc->name, tmpfile); 298 | 299 | instr->desc->lineno = tmpline; 300 | } else { 301 | instr->desc->name = 0; 302 | instr->desc->lineno = -1; 303 | } 304 | } 305 | 306 | return instr->desc; 307 | } 308 | 309 | Bool CGD_(instrs_cmp)(UniqueInstr* i1, UniqueInstr* i2) { 310 | return i1 && i2 && i1->addr == i2->addr && i1->size == i2->size; 311 | } 312 | 313 | void CGD_(print_instr)(UniqueInstr* instr, Bool complete) { 314 | CGD_ASSERT(instr != 0); 315 | 316 | VG_(printf)("0x%lx [%d]", instr->addr, instr->size); 317 | if (complete) { 318 | VG_(printf)(" ("); 319 | CGD_(print_instr_description)(CGD_(instr_description)(instr)); 320 | VG_(printf)(")"); 321 | } 322 | } 323 | 324 | void CGD_(fprint_instr)(VgFile* fp, UniqueInstr* instr, Bool complete) { 325 | CGD_ASSERT(fp != 0); 326 | CGD_ASSERT(instr != 0); 327 | 328 | VG_(fprintf)(fp, "0x%lx [%d]", instr->addr, instr->size); 329 | if (complete) { 330 | VG_(fprintf)(fp, " ("); 331 | CGD_(fprint_instr_description)(fp, CGD_(instr_description)(instr)); 332 | VG_(fprintf)(fp, ")"); 333 | } 334 | } 335 | 336 | void CGD_(print_instr_description)(InstrDesc* idesc) { 337 | CGD_ASSERT(idesc != 0); 338 | 339 | if (idesc->name) 340 | VG_(printf)("%s:%d", idesc->name, idesc->lineno); 341 | else 342 | VG_(printf)("unknown"); 343 | } 344 | 345 | void CGD_(fprint_instr_description)(VgFile* fp, InstrDesc* idesc) { 346 | CGD_ASSERT(fp != 0); 347 | CGD_ASSERT(idesc != 0); 348 | 349 | if (idesc->name) 350 | VG_(fprintf)(fp, "%s:%d", idesc->name, idesc->lineno); 351 | else 352 | VG_(fprintf)(fp, "unknown"); 353 | } 354 | -------------------------------------------------------------------------------- /paper/SPE20-cfggrind.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rimsa/CFGgrind/353cb3ba1e4bbd373f14ce743af1c317d1b3a539/paper/SPE20-cfggrind.pdf -------------------------------------------------------------------------------- /prototype/.gitignore: -------------------------------------------------------------------------------- 1 | *.dot 2 | __pycache__ 3 | -------------------------------------------------------------------------------- /prototype/cfg.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from instr import * 4 | from group import * 5 | from config import * 6 | 7 | class Node(object): 8 | class Type(Enum): 9 | ENTRY = 1 10 | BASICBLOCK = 2 11 | PHANTOM = 3 12 | EXIT = 4 13 | HALT = 5 14 | 15 | def __init__(self, type): 16 | assert isinstance(type, Node.Type) 17 | self._type = type 18 | self._cache = [ (None, None, 0) for _ in range(CACHE_SIZE) ] 19 | 20 | @property 21 | def type(self): 22 | return self._type 23 | 24 | @property 25 | def cache(self): 26 | return self._cache 27 | 28 | class Edge(object): 29 | def __init__(self, src, dst, count): 30 | self._src = src 31 | self._dst = dst 32 | self._count = count 33 | 34 | @property 35 | def src(self): 36 | return self._src 37 | 38 | @property 39 | def dst(self): 40 | return self._dst 41 | 42 | @dst.setter 43 | def dst(self, dst): 44 | self._dst = dst 45 | 46 | @property 47 | def count(self): 48 | return self._count 49 | 50 | @count.setter 51 | def count(self, count): 52 | self._count = count 53 | 54 | def updateCount(self, count): 55 | assert count >= 0 56 | self._count += count 57 | 58 | class Entry(Node): 59 | def __init__(self): 60 | Node.__init__(self, Node.Type.ENTRY) 61 | 62 | def dot(self, simplified = True, padding = ""): 63 | str = padding + "Entry" 64 | if (not simplified): 65 | str += " [label=\"\",width=0.3,height=0.3,shape=circle,fillcolor=black,style=filled]" 66 | return str 67 | 68 | def __str__(self): 69 | return "entry" 70 | 71 | class BasicBlock(Node): 72 | def __init__(self, group, calls = None, signals = None): 73 | Node.__init__(self, Node.Type.BASICBLOCK) 74 | self._group = group 75 | self._calls = calls if calls else {} 76 | self._signals = signals if signals else {} 77 | 78 | @property 79 | def group(self): 80 | return self._group 81 | 82 | @property 83 | def calls(self): 84 | return self._calls 85 | 86 | @property 87 | def signals(self): 88 | return self._signals 89 | 90 | @property 91 | def addr(self): 92 | return self._group.addr() 93 | 94 | @property 95 | def size(self): 96 | return self._group.size() 97 | 98 | def add_call(self, cfg, count): 99 | if cfg.addr in self._calls: 100 | _, prev_count = self._calls[cfg.addr] 101 | self._calls[cfg.addr] = (cfg, prev_count + count) 102 | else: 103 | self._calls[cfg.addr] = (cfg, count) 104 | 105 | def add_signal(self, sigid, cfg, count): 106 | if sigid in self._signals: 107 | prev_cfg, prev_count = self._signals[sigid] 108 | assert cfg.addr == prev_cfg.addr 109 | self._signals[sigid] = (cfg, prev_count + count) 110 | else: 111 | self._signals[sigid] = (cfg, count) 112 | 113 | def is_direct(self): 114 | t = self._group.tail.type 115 | return t.direct if hasattr(t, 'direct') else True 116 | 117 | def is_indirect(self): 118 | return not self.is_direct() 119 | 120 | def dot(self, simplified = True, padding = ""): 121 | str = padding + "\"0x%x\"" % self.addr 122 | if not simplified: 123 | str += " [label=\"{\n" 124 | str += padding + " 0x%x [%d]\\l\n" % (self.addr, self.size) 125 | str += padding + " | [instrs]\\l\n" 126 | for instr in self._group.instrs(): 127 | str += padding + "   0x%x \<+%d\>: %s\\l\n" % \ 128 | (instr.addr, instr.size, self._escape(instr.text)) 129 | if (len(self._calls) > 0): 130 | str += padding + " | [calls]\\l\n" 131 | for addr in self._calls: 132 | cfg, count = self._calls[addr] 133 | str += padding + "   0x%x \{%d\} (%s)\\l\n" % (addr, count, cfg.name) 134 | if (len(self._signals) > 0): 135 | str += padding + " | [signals]\\l\n" 136 | for sigid in self._signals: 137 | cfg, count = self._signals[sigid] 138 | str += padding + "   %d: 0x%x \{%d\} (%s)\\l\n" % (sigid, cfg.addr, count, cfg.name) 139 | str += padding + "}\"]" 140 | return str 141 | 142 | def _escape(self, str): 143 | return str.replace("<", "\\<").replace(">", "\\>") 144 | 145 | def __cmp__(self, other): 146 | return self.addr == other.addr if isinstance(other, BasicBlock) else False 147 | 148 | def __str__(self, simplified = True): 149 | bb = "basicblock(group: %s, calls: [" % (self._group.__str__(simplified)) 150 | first = True 151 | for addr in self._calls: 152 | if (first): 153 | first = False 154 | else: 155 | bb += ", " 156 | bb += "@0x%x" % addr 157 | if not simplified: 158 | _, count = self._calls[addr] 159 | bb += "{%d}" % count 160 | bb += "], signals: [" 161 | for sigid in self._signals: 162 | cfg, count = self._signals[sigid] 163 | if (first): 164 | first = False 165 | else: 166 | bb += ", " 167 | bb += "@0x%x" % cfg.addr 168 | if not simplified: 169 | bb += "{%d}" % count 170 | bb += "])" 171 | return bb 172 | 173 | class Phantom(Node): 174 | def __init__(self, addr): 175 | Node.__init__(self, Node.Type.PHANTOM) 176 | self._addr = addr 177 | 178 | 179 | @property 180 | def addr(self): 181 | return self._addr 182 | 183 | def dot(self, simplified = True, padding = ""): 184 | str = padding + "\"0x%x\"" % self.addr 185 | if (not simplified): 186 | str += " [label=\"0x%lx\", style=dashed]" % self.addr 187 | return str 188 | 189 | def __cmp__(self, other): 190 | return self.addr == other.addr if isinstance(other, Phantom) else False 191 | 192 | def __str__(self): 193 | return "phantom(@0x%x)" % self._addr 194 | 195 | class Exit(Node): 196 | def __init__(self): 197 | Node.__init__(self, Node.Type.EXIT) 198 | 199 | def dot(self, simplified = True, padding = ""): 200 | str = padding + "Exit" 201 | if (not simplified): 202 | str += " [label=\"\",width=0.3,height=0.3,shape=circle,fillcolor=black,style=filled,peripheries=2]" 203 | return str 204 | 205 | def __str__(self): 206 | return "exit" 207 | 208 | 209 | class Halt(Node): 210 | def __init__(self): 211 | Node.__init__(self, Node.Type.HALT) 212 | 213 | def dot(self, simplified = True, padding = ""): 214 | str = padding + "Halt" 215 | if (not simplified): 216 | str += " [label=\"\",width=0.3,height=0.3,shape=square,fillcolor=black,style=filled,peripheries=2]" 217 | return str 218 | 219 | def __str__(self): 220 | return "halt" 221 | 222 | class CFG(object): 223 | def __init__(self, addr, name = "unknown"): 224 | assert addr != 0 225 | self._addr = addr 226 | self._name = name 227 | self._entry = Entry() 228 | self._exit = Exit() 229 | self._halt = Halt() 230 | self._nodes = [self._entry, self._exit, self._halt] 231 | self._edges = [] 232 | self._valid = False 233 | 234 | @property 235 | def addr(self): 236 | return self._addr 237 | 238 | @property 239 | def name(self): 240 | return self._name if self._name else "unknown" 241 | 242 | @name.setter 243 | def name(self, name): 244 | self._name = name 245 | 246 | @property 247 | def entry(self): 248 | return self._entry 249 | 250 | @property 251 | def exit(self): 252 | return self._exit 253 | 254 | @property 255 | def halt(self): 256 | return self._halt 257 | 258 | @property 259 | def nodes(self): 260 | return self._nodes.copy() 261 | 262 | @property 263 | def edges(self): 264 | return self._edges.copy() 265 | 266 | def addr2node(self, addr): 267 | for node in self.nodes: 268 | if (isinstance(node, BasicBlock) or isinstance(node, Phantom)): 269 | if (node.addr == addr): 270 | return node 271 | 272 | def add_node(self, node): 273 | assert isinstance(node, BasicBlock) or isinstance(node, Phantom) 274 | assert not self.addr2node(node.addr) 275 | self._nodes.append(node) 276 | self._dirty = True 277 | return node 278 | 279 | def _find_edge(self, src, dst): 280 | for edge in self._edges: 281 | if edge.src == src and edge.dst == dst: 282 | return edge 283 | return None 284 | 285 | def add_edge(self, src, dst, count): 286 | assert count >= 0 287 | edge = self._find_edge(src, dst) 288 | if edge: 289 | edge.updateCount(count) 290 | else: 291 | assert src in self._nodes 292 | assert dst in self._nodes 293 | if isinstance(src, Entry): 294 | assert not self.succs(src) 295 | else: 296 | isinstance(src, BasicBlock) 297 | assert not isinstance(dst, Entry) 298 | edge = Edge(src, dst, count) 299 | self._edges.append(edge) 300 | self._dirty = True 301 | return edge.dst 302 | 303 | def succs(self, node): 304 | return [ edge.dst for edge in self._edges if edge.src == node ] 305 | 306 | def preds(self, node): 307 | return [ edge.src for edge in self._edges if edge.dst == node ] 308 | 309 | def remove_node(self, node): 310 | assert node in self._nodes 311 | for pred in self.preds(node): 312 | self.remove_edge(pred, node) 313 | for succ in self.succs(node): 314 | self.remove_edge(node, succ) 315 | self._nodes.remove(node) 316 | 317 | def remove_edge(self, src, dst): 318 | edge = self._find_edge(src, dst) 319 | assert edge 320 | self._edges.remove(edge) 321 | 322 | def is_valid(self): 323 | if self._dirty: 324 | self._valid = self._check() 325 | self._dirty = False 326 | 327 | return self._valid 328 | 329 | def find_node_with_addr(self, addr): 330 | for node in self.nodes: 331 | if isinstance(node, BasicBlock): 332 | for instr in node.group.instrs(): 333 | if instr.addr == addr: 334 | return node 335 | elif isinstance(node, Phantom): 336 | if node.addr == addr: 337 | return node 338 | return None 339 | 340 | def phantom2basicblock(self, old, new): 341 | assert old in self._nodes 342 | assert isinstance(old, Phantom) 343 | assert not new in self._nodes 344 | assert isinstance(new, BasicBlock) 345 | preds = self.preds(old) 346 | self.remove_node(old) 347 | self.add_node(new) 348 | for p in preds: 349 | self.add_edge(p, new, 0) 350 | return new 351 | 352 | def split(self, node, addr): 353 | assert node in self._nodes 354 | assert isinstance(node, BasicBlock) 355 | assert node.group.leader.addr != addr 356 | assert node.group.has_instr_with_addr(addr) 357 | 358 | instr = node.group.pop_leader() 359 | new = BasicBlock(Group(instr)) 360 | self.add_node(new) 361 | while node.group.leader.addr != addr: 362 | instr = node.group.pop_leader() 363 | new.group.add_instr(instr) 364 | 365 | count = 0 366 | for pred in self.preds(node): 367 | edge = self._find_edge(pred, node) 368 | assert edge 369 | count += edge.count 370 | edge.dst = new 371 | 372 | self.add_edge(new, node, count) 373 | return node 374 | 375 | def flush_counts(self, src, group, dst, count): 376 | assert count > 0 377 | size = 0 378 | while size < group.size(): 379 | tmp = self.addr2node(group.leader.addr + size) 380 | assert tmp 381 | 382 | edge = self._find_edge(src, tmp) 383 | assert edge 384 | edge.updateCount(count) 385 | 386 | size += tmp.size 387 | src = tmp 388 | assert size == group.size() 389 | assert src == dst 390 | self._dirty = True 391 | 392 | def _check(self): 393 | has_exit = False 394 | has_halt = False 395 | for node in self._nodes: 396 | preds_total = preds_count = 0 397 | for pred in self.preds(node): 398 | edge = self._find_edge(pred, node) 399 | preds_count += edge.count 400 | preds_total += 1 401 | 402 | succs_total = succs_count = 0 403 | for succ in self.succs(node): 404 | edge = self._find_edge(node, succ) 405 | succs_count += edge.count 406 | succs_total += 1 407 | 408 | if (node == self._entry): 409 | if preds_total != 0 or succs_total == 0 or succs_count == 0: 410 | return False 411 | elif (node == self._exit): 412 | if preds_total > 0: 413 | if succs_total != 0 or preds_count == 0: 414 | return False 415 | 416 | has_exit = True 417 | elif (node == self._halt): 418 | if preds_total > 0: 419 | if succs_total != 0 or preds_count == 0: 420 | return False 421 | 422 | has_halt = True 423 | else: 424 | if preds_total == 0 or succs_total == 0 or preds_count != succs_count: 425 | return False 426 | 427 | return has_exit or has_halt 428 | 429 | def dot(self, working = None): 430 | str = "digraph \"0x%x\" {\n" % self._addr 431 | str += " label = \"0x%x (%s)\"\n" % (self._addr, self._name) 432 | str += " labelloc = \"t\"\n" 433 | str += " viewport = \"1080,1440,1\"\n" 434 | str += " node[shape=record]\n\n" 435 | 436 | unknown = 1 437 | for node in self._nodes: 438 | if (node == self.entry or len(self.preds(node))): 439 | str += node.dot(False, " ") + "\n" 440 | 441 | if (isinstance(node, BasicBlock) and node.is_indirect()): 442 | str += " \"Unknown%d\" [label=\"?\", shape=none]\n" % unknown 443 | str += " \"0x%x\" -> \"Unknown%d\" [style=dashed]\n" % (node.addr, unknown) 444 | unknown += 1 445 | 446 | for edge in self._edges: 447 | str += " " + edge.src.dot() + " -> " + edge.dst.dot() 448 | if isinstance(edge.dst, Phantom): 449 | assert edge.count == 0 450 | str += " [style=dashed]" 451 | elif edge.count > 0: 452 | str += " [label=\"%d\"]" % edge.count 453 | str += "\n" 454 | 455 | if working: 456 | str += " \"working\" [label=\"working\", fillcolor=red, fontcolor=white, style=\"rounded,filled\", shape=diamond]\n" 457 | str += " \"working\" -> " + working.dot() + " [color=red]\n" 458 | 459 | str += "}\n" 460 | return str 461 | 462 | def __str__(self): 463 | cfg = "([" 464 | first = True 465 | for node in self.nodes: 466 | if (first): 467 | first = False 468 | else: 469 | cfg += ", " 470 | cfg += str(node) 471 | cfg += "], [" 472 | first = True 473 | for edge in self._edges: 474 | if (first): 475 | first = False 476 | else: 477 | cfg += ", " 478 | cfg += "(%s, %s, %d)" % (edge.src, edge.dst, edge.count) 479 | cfg += "])" 480 | return cfg 481 | -------------------------------------------------------------------------------- /prototype/config.py: -------------------------------------------------------------------------------- 1 | CACHE_SIZE = 8 2 | -------------------------------------------------------------------------------- /prototype/group.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from instr import * 4 | 5 | class Group(object): 6 | def __init__(self, instr, others = None): 7 | self._sequence = [instr] 8 | self._transfer = not isinstance(instr.type, StandardType) 9 | if others: 10 | self.add_instrs(others) 11 | 12 | @property 13 | def leader(self): 14 | return self._sequence[0] 15 | 16 | @property 17 | def tail(self): 18 | return self._sequence[-1] 19 | 20 | def addr(self): 21 | return self.leader.addr 22 | 23 | def size(self): 24 | s = 0 25 | for instr in self._sequence: 26 | s += instr.size 27 | return s 28 | 29 | def at(self, idx): 30 | return self._sequence[idx] if idx < len(self._sequence) else None 31 | 32 | def next(self, instr): 33 | idx = 0 34 | for i in self._sequence: 35 | if instr == i: 36 | break 37 | 38 | idx = idx + 1 39 | return self._sequence[idx + 1] if (idx + 1) < len(self._sequence) else None 40 | 41 | def instrs(self): 42 | return self._sequence.copy() 43 | 44 | def has_instr(self, instr): 45 | return instr in self._sequence 46 | 47 | def has_instr_with_addr(self, addr): 48 | for instr in self._sequence: 49 | if instr.addr == addr: 50 | return True 51 | return False 52 | 53 | def has_transfer(self): 54 | return self._transfer 55 | 56 | def add_instr(self, instr): 57 | assert isinstance(instr, Instruction) 58 | assert not self._transfer 59 | assert instr.addr == self.tail.addr + self.tail.size 60 | 61 | self._sequence.append(instr) 62 | if (not isinstance(instr.type, StandardType)): 63 | self._transfer = True 64 | 65 | def add_instrs(self, instrs): 66 | for instr in instrs: 67 | self.add_instr(instr) 68 | 69 | def pop_leader(self): 70 | assert len(self._sequence) > 1 71 | return self._sequence.pop(0) 72 | 73 | def __str__(self, simplified = True): 74 | grp = "[" 75 | 76 | if (not self._sequence): 77 | grp = "]" 78 | else: 79 | if (not simplified): 80 | grp += "\n" 81 | 82 | first = True 83 | for instr in self._sequence: 84 | if (simplified): 85 | if first: 86 | first = False 87 | else: 88 | grp += ", " 89 | grp += "@0x%x" % instr.addr 90 | else: 91 | grp += " " + str(instr) + "\n" 92 | 93 | grp += "]" 94 | 95 | return grp 96 | 97 | def __eq__(self, other): 98 | if not isinstance(other, Group): 99 | return False 100 | 101 | return set(self.instrs()) == set(other.instrs()) 102 | 103 | def __hash__(self): 104 | return int(self.addr()) 105 | -------------------------------------------------------------------------------- /prototype/instr.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from enum import Enum 4 | 5 | class Type(object): 6 | class Id(Enum): 7 | STANDARD = 1 8 | JUMP = 2 9 | BRANCH = 3 10 | CALL = 4 11 | RETURN = 5 12 | 13 | def __init__(self, type): 14 | assert isinstance(type, Type.Id) 15 | self._type = type 16 | 17 | @property 18 | def type(self): 19 | return self._type 20 | 21 | class StandardType(Type): 22 | def __init__(self): 23 | Type.__init__(self, Type.Id.STANDARD) 24 | 25 | def __str__(self): 26 | return "standard" 27 | 28 | class JumpType(Type): 29 | def __init__(self, target, direct = True): 30 | if (target == 0): 31 | assert not direct 32 | else: 33 | assert direct 34 | 35 | Type.__init__(self, Type.Id.JUMP) 36 | self._target = target 37 | self._direct = direct 38 | 39 | @property 40 | def target(self): 41 | return self._target 42 | 43 | @property 44 | def direct(self): 45 | return self._direct 46 | 47 | @property 48 | def indirect(self): 49 | return not self._direct 50 | 51 | def __str__(self): 52 | return "jump(@0x%x, %s)" % (self.target, ("direct" if self.direct else "indirect")) 53 | 54 | class BranchType(Type): 55 | def __init__(self, target, fallthrough, direct = True): 56 | if (target == 0): 57 | assert not direct 58 | else: 59 | assert direct 60 | assert fallthrough != 0 61 | 62 | Type.__init__(self, Type.Id.BRANCH) 63 | self._target = target 64 | self._fallthrough = fallthrough 65 | self._direct = direct 66 | 67 | @property 68 | def target(self): 69 | return self._target 70 | 71 | @property 72 | def fallthrough(self): 73 | return self._fallthrough 74 | 75 | @property 76 | def direct(self): 77 | return self._direct 78 | 79 | @property 80 | def indirect(self): 81 | return not self._direct 82 | 83 | def __str__(self): 84 | return "branch(@0x%x, @0x%x, %s)" % (self.target, self.fallthrough, ("direct" if self.direct else "indirect")) 85 | 86 | class CallType(Type): 87 | def __init__(self, target, fallthrough, direct = True): 88 | if (target == 0): 89 | assert not direct 90 | else: 91 | assert direct 92 | 93 | Type.__init__(self, Type.Id.CALL) 94 | self._target = target 95 | self._fallthrough = fallthrough 96 | self._direct = direct 97 | 98 | @property 99 | def target(self): 100 | return self._target 101 | 102 | @property 103 | def fallthrough(self): 104 | return self._fallthrough 105 | 106 | @property 107 | def direct(self): 108 | return self._direct 109 | 110 | @property 111 | def indirect(self): 112 | return not self._direct 113 | 114 | def __str__(self): 115 | return "call(@0x%x, @0x%x, %s)" % (self.target, self.fallthrough, 116 | ("direct" if self.direct else "indirect")) 117 | 118 | class ReturnType(Type): 119 | def __init__(self): 120 | Type.__init__(self, Type.Id.RETURN) 121 | 122 | def __str__(self): 123 | return "return" 124 | 125 | class Instruction(object): 126 | _all = {} 127 | 128 | def __new__(cls, *args, **kwargs): 129 | addr = args[0] 130 | assert addr != 0 131 | assert not (addr in cls._all) 132 | 133 | instance = object.__new__(cls) 134 | cls._all[addr] = instance 135 | return instance 136 | 137 | def __del__(self): 138 | if (self._addr in Instruction._all): 139 | del self._all[self._addr] 140 | 141 | @classmethod 142 | def find(cls, addr): 143 | if (addr in cls._all): 144 | return cls._all[addr] 145 | 146 | @classmethod 147 | def all(cls): 148 | return cls._all.values() 149 | 150 | def __init__(self, addr, size, text = '', type = None): 151 | assert addr != 0 and size != 0 152 | if not type: 153 | type = StandardType() 154 | assert isinstance(type, Type) 155 | self._addr = addr 156 | self._size = size 157 | self._text = text 158 | self._type = type 159 | 160 | @property 161 | def addr(self): 162 | return self._addr 163 | 164 | @property 165 | def size(self): 166 | return self._size 167 | 168 | @property 169 | def text(self): 170 | return self._text 171 | 172 | @property 173 | def type(self): 174 | return self._type 175 | 176 | def __str__(self): 177 | return "(@0x%x, %d, %s, \"%s\")" % (self.addr, self.size, self.type, self.text) 178 | -------------------------------------------------------------------------------- /prototype/machine.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from instr import * 4 | from group import * 5 | import re 6 | 7 | class Machine(object): 8 | def __init__(self, filename): 9 | self._cmds = [] 10 | 11 | with open(filename) as fd: 12 | for line in [re.sub(r'#.*$', '', tmp).strip() for tmp in fd]: 13 | if not line: 14 | continue 15 | 16 | if line.casefold().startswith("load"): 17 | Machine._process_load(line) 18 | elif line.casefold().startswith("exec"): 19 | self._cmds.append(Machine._process_exec(line)) 20 | else: 21 | assert False 22 | 23 | @staticmethod 24 | def _process_load(line): 25 | instr = re.match(r'^\s*([^\(]*)\s*\(\s*([^,]*),\s*([^,]*),\s*\"([^\"]*)\"\s*(.*)\s*\)\s*;\s*$', line) 26 | if instr: 27 | assert instr.group(1).casefold() == "load" 28 | addr = int(instr.group(2), 0) 29 | assert addr != 0 30 | size = int(instr.group(3)) 31 | assert size > 0 32 | text = instr.group(4).strip() 33 | type = re.match(r'^,\s*([^\(]*)\s*\(?\s*([^\)]*)\s*\)?\s*$', instr.group(5)) 34 | if type: 35 | name = type.group(1) 36 | tmp = type.group(2) 37 | args = re.split(r'\s*,\s*', tmp) if tmp else [] 38 | else: 39 | assert not instr.group(5) 40 | name = "standard" 41 | args = [] 42 | 43 | if name.casefold() == "standard": 44 | assert not args 45 | return Instruction(addr, size, text) 46 | elif name == "jump": 47 | assert len(args) >= 1 and len(args) <= 2 48 | target = int(args[0], 0) 49 | if len(args) == 2: 50 | if args[1] == "direct": 51 | direct = True 52 | elif args[1] == "indirect": 53 | direct = False 54 | else: 55 | assert False 56 | else: 57 | direct = True 58 | 59 | return Instruction(addr, size, text, JumpType(target, direct)) 60 | elif name.casefold() == "branch": 61 | assert len(args) >= 2 and len(args) <= 3 62 | 63 | target = int(args[0], 0) 64 | fallthrough = int(args[1], 0) 65 | assert fallthrough != 0 66 | 67 | if len(args) == 3: 68 | if args[2] == "direct": 69 | direct = True 70 | elif args[2] == "indirect": 71 | direct = False 72 | else: 73 | assert False 74 | else: 75 | direct = True 76 | 77 | return Instruction(addr, size, text, BranchType(target, fallthrough, direct)) 78 | elif name == "call": 79 | assert len(args) >= 2 and len(args) <= 3 80 | target = int(args[0], 0) 81 | fallthrough = int(args[1], 0) 82 | assert fallthrough != 0 83 | 84 | if len(args) == 3: 85 | if args[2] == "direct": 86 | direct = True 87 | elif args[2] == "indirect": 88 | direct = False 89 | else: 90 | assert False 91 | else: 92 | direct = True 93 | 94 | return Instruction(addr, size, text, CallType(target, fallthrough, direct)) 95 | elif name == "return": 96 | assert not args 97 | return Instruction(addr, size, text, ReturnType()) 98 | else: 99 | assert False 100 | 101 | @staticmethod 102 | def _process_exec(line): 103 | obj = re.match(r'^\s*([^\(]*)\s*\(\s*(.*)\s*\)\s*;\s*$', line) 104 | if obj: 105 | assert obj.group(1).casefold() == "exec" 106 | return int(obj.group(2), 0) 107 | else: 108 | assert False 109 | 110 | def _find_group(self, addr): 111 | instr = Instruction.find(addr) 112 | assert instr 113 | 114 | group = Group(instr) 115 | while isinstance(instr.type, StandardType): 116 | instr = Instruction.find(instr.addr + instr.size) 117 | if not instr: 118 | break 119 | 120 | group.add_instr(instr) 121 | return group 122 | 123 | def start_addr(self): 124 | return self._cmds[0] 125 | 126 | def run(self): 127 | return list(map(lambda x: self._find_group(x), self._cmds)) 128 | -------------------------------------------------------------------------------- /prototype/main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import sys 4 | 5 | from group import * 6 | from cfg import * 7 | from machine import * 8 | from config import * 9 | from state import * 10 | 11 | __DEBUGGING__ = True 12 | __CACHING__ = False 13 | 14 | def write_cfg(cfg, prefix = "cfg", working = None): 15 | global total 16 | with open("%s%03d.dot" % (prefix, total), "w") as fd: 17 | fd.write(cfg.dot(working)) 18 | total = total + 1 19 | 20 | def process_group(cfg, working, group): 21 | # Current instruction in the working node can be any, 22 | # from leader to tail. We use it to match the 23 | # instructions in the group in sequence. 24 | # 25 | # A nil value indicates that the current 26 | # instruction in the group must be a successor of 27 | # the working node. The first instruction of the 28 | # group must always be a successor, hence the nil value. 29 | curr_instr = None 30 | 31 | # For each instruction in the group. 32 | for instr in group.instrs(): 33 | # If there is a current instruction, it must match 34 | # the instruction being processed. 35 | if curr_instr: 36 | assert curr_instr == instr 37 | # If current instruction is nil, find the successor 38 | # of the working that matches the instruction in the group. 39 | # Create, split or transform node if necessary. 40 | else: 41 | # Search if there is a node that contains an instruction 42 | # with the address of the instruction. 43 | node = cfg.find_node_with_addr(instr.addr) 44 | 45 | # If exists, then this node is or will be a successor of the working node. 46 | if node: 47 | # If it is a phantom node, transform to a basic block. 48 | if isinstance(node, Phantom): 49 | node = cfg.phantom2basicblock(node, BasicBlock(Group(instr))) 50 | # Otherwise it must be a basic block. 51 | # If instruction is not the leader. 52 | elif node.group.leader != instr: 53 | node = cfg.split(node, instr.addr) 54 | 55 | # The leader of this node must match the instruction. 56 | assert node.group.leader == instr 57 | 58 | # Make it a successor of the working or update count. 59 | cfg.add_edge(working, node, 1) 60 | 61 | # Make this node the working node. 62 | working = node 63 | # If it does not exist, the instruction is new and we can: 64 | # (1) append it to the working node (if possible); or 65 | # (2) create a new block with it as the head instruction that 66 | # will be connected to the working. This block will be the 67 | # new working node. 68 | else: 69 | # Append the instruction if possible. 70 | # It must not be the first instruction in the group, 71 | # the node must be a basic block without successors, calls and signals. 72 | if instr != group.leader and isinstance(working, BasicBlock) and \ 73 | (not working.calls) and (not working.signals) and (not cfg.succs(working)): 74 | # Since the instructions are in sequence, the instruction must come 75 | # immediately after the tail of the working node. 76 | assert (working.group.tail.addr + working.group.tail.size) == instr.addr 77 | 78 | # Add the instruction making it the new tail. 79 | working.group.add_instr(instr) 80 | # Create a new block, connect the working to it and 81 | # make it the new working node. 82 | else: 83 | node = cfg.add_node(BasicBlock(Group(instr))) 84 | cfg.add_edge(working, node, 1) 85 | working = node 86 | 87 | # Make the next instruction in the working block the current for the next iteration. 88 | # It can be nil if there are no more instructions left. 89 | curr_instr = working.group.next(instr) 90 | 91 | return working 92 | 93 | def process_type(mapping, type, target_addr): 94 | # For jump instruction, do nothing since it 95 | # will be handled in the next iteration when 96 | # processing the next group. 97 | if isinstance(type, JumpType): 98 | pass 99 | 100 | # For branch instruction, we must take into consideration 101 | # the addresses of the fallthrough and jump (if direct). 102 | elif isinstance(type, BranchType): 103 | # The possible target addresses. 104 | addrs = [type.fallthrough] 105 | if type.direct: 106 | addrs.append(type.target) 107 | 108 | # for each of these addresses. 109 | for addr in addrs: 110 | if addr != target_addr: 111 | # Search for a node that contains an instruction 112 | # with this address. 113 | node = state.current.cfg.find_node_with_addr(addr) 114 | 115 | # If exists, node can be either phantom or basic block. 116 | if node: 117 | # If it is a basic block, split the node if the 118 | # address does not match the leader instruction. 119 | if isinstance(node, BasicBlock) and node.group.leader.addr != addr: 120 | node = state.current.cfg.split(node, addr) 121 | # Otherwise, create a phantom node with this address. 122 | else: 123 | node = state.current.cfg.add_node(Phantom(addr)) 124 | 125 | # Connect the working with this node or update count. 126 | state.current.cfg.add_edge(state.current.working, node, 0) 127 | 128 | if __DEBUGGING__: 129 | write_cfg(state.current.cfg, "step", state.current.working) 130 | 131 | # For call instruction, find the target cfg and added to the call list of 132 | # the working node. Then. save the current state with the return address 133 | # in the call stack and set the new current state with the called cfg 134 | # and its entry node. 135 | elif isinstance(type, CallType): 136 | # Find the called CFG. 137 | called = mapping.setdefault(target_addr, CFG(target_addr)) 138 | 139 | # Add the called cfg to the call list of the working node. 140 | state.current.working.add_call(called, 1) 141 | 142 | if __DEBUGGING__: 143 | write_cfg(state.current.cfg, "step", state.current.working) 144 | 145 | # Push the current state to the call stack with the 146 | # expected return address. 147 | state.callstack.push(state.current, type.fallthrough) 148 | 149 | # Update the current state with the called cfg and its entry node. 150 | state.current = (called, called.entry) 151 | 152 | if __DEBUGGING__: 153 | write_cfg(state.current.cfg, "step", state.current.working) 154 | 155 | # For return instructions, first count how many calls are stacked until it 156 | # reaches the correct return number. 157 | elif isinstance(type, ReturnType): 158 | pops = state.callstack.pops_count(target_addr) 159 | while pops > 0: 160 | # Connect the exit node or update count. 161 | state.current.cfg.add_edge(state.current.working, 162 | state.current.cfg.exit, 1) 163 | 164 | if __DEBUGGING__: 165 | write_cfg(state.current.cfg, "step", state.current.cfg.exit) 166 | 167 | # Pop the current from the call stack. 168 | state.current = state.callstack.pop() 169 | pops -= 1 170 | 171 | else: 172 | assert False, "unreachable code" 173 | 174 | return mapping 175 | 176 | def process_program(mapping, machine): 177 | # The next group of instruction that will be executed. 178 | for group in machine.run(): 179 | addr = group.leader.addr 180 | if not state.current: 181 | initial = mapping.setdefault(addr, CFG(addr)) 182 | state.current = (initial, initial.entry) 183 | 184 | if __DEBUGGING__: 185 | write_cfg(state.current.cfg, "step", state.current.working) 186 | else: 187 | assert isinstance(state.current.working, BasicBlock) 188 | mapping = process_type(mapping, state.current.working.group.tail.type, addr) 189 | 190 | if __CACHING__: 191 | # Check if we processed this group from this working point. 192 | idx = addr % CACHE_SIZE 193 | cached_group, cached_working, cached_count = state.current.working.cache[idx] 194 | if cached_group == group: 195 | # And update the count. 196 | state.current.working.cache[idx] = (cached_group, cached_working, cached_count + 1) 197 | 198 | # In this case, just use the next working from the start. 199 | state.current.working = cached_working 200 | else: 201 | if cached_count > 0: 202 | state.current.cfg.flush_counts(state.current.working, cached_group, cached_working, cached_count) 203 | 204 | # Save the working pointer before processing the group. 205 | prev_working = state.current.working 206 | # Process the group and update working. 207 | state.current.working = process_group(state.current.cfg, state.current.working, group) 208 | # Add the new working node to the cache. 209 | prev_working.cache[idx] = (group, state.current.working, 0) 210 | else: 211 | state.current.working = process_group(state.current.cfg, state.current.working, group) 212 | 213 | if __DEBUGGING__: 214 | write_cfg(state.current.cfg, "step", state.current.working) 215 | 216 | # At the end of the machine execution, connect the working node with the halt node 217 | # or update the count, including all the pending current's in the call stack. 218 | while state.current: 219 | state.current.cfg.add_edge(state.current.working, state.current.cfg.halt, 1) 220 | 221 | if __DEBUGGING__: 222 | write_cfg(state.current.cfg, "step", state.current.cfg.halt) 223 | 224 | state.current = state.callstack.pop() if state.callstack else None 225 | 226 | if __CACHING__: 227 | # Flush all caching 228 | for cfg in mapping.values(): 229 | for src in cfg.nodes: 230 | for (group, dst, count) in src.cache: 231 | if count > 0: 232 | cfg.flush_counts(src, group, dst, count) 233 | 234 | return mapping 235 | 236 | if len(sys.argv) != 2: 237 | print("Usage: %s [Program Description]" % sys.argv[0]) 238 | exit(1) 239 | 240 | total = 1 241 | state = State(None, []) 242 | cfgs = process_program({}, Machine(sys.argv[1])) 243 | 244 | total = 1 245 | for addr in cfgs: 246 | cfg = cfgs[addr] 247 | assert cfg.is_valid() 248 | 249 | write_cfg(cfg) 250 | -------------------------------------------------------------------------------- /prototype/state.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from cfg import * 4 | 5 | class CurrentPair(object): 6 | def __init__(self, cfg = None, working = None): 7 | self.cfg = cfg 8 | self.working = working 9 | 10 | @property 11 | def cfg(self): 12 | return self._cfg 13 | 14 | @cfg.setter 15 | def cfg(self, cfg): 16 | assert (not cfg) or isinstance(cfg, CFG) 17 | self._cfg = cfg 18 | 19 | @property 20 | def working(self): 21 | return self._working 22 | 23 | @working.setter 24 | def working(self, working): 25 | assert (not working) or isinstance(working, Node) 26 | self._working = working 27 | 28 | def copy(self): 29 | return CurrentPair(self.cfg, self.working) 30 | 31 | 32 | from collections import namedtuple 33 | CSEntry = namedtuple('CSEntry', 'current ret_addr') 34 | 35 | class CallStack(object): 36 | def __init__(self, cs = None): 37 | self._callstack = cs if cs else [] 38 | 39 | def push(self, current, ret_addr): 40 | assert isinstance(current, CurrentPair) 41 | self._callstack.append(CSEntry(current, ret_addr)) 42 | 43 | def pops_count(self, ret_addr): 44 | idx = self.size() 45 | while (idx > 0): 46 | idx -= 1 47 | if self._callstack[idx].ret_addr == ret_addr: 48 | return self.size() - idx 49 | return 0 50 | 51 | def pop(self): 52 | assert self.size() > 0 53 | return self._callstack.pop().current 54 | 55 | def size(self): 56 | return len(self._callstack) 57 | 58 | def __bool__(self): 59 | return self.size() > 0 60 | 61 | 62 | class State(object): 63 | def __init__(self, current, cs = None): 64 | self.current = current 65 | self._callstack = CallStack(cs) 66 | 67 | @property 68 | def current(self): 69 | return self._current 70 | 71 | @current.setter 72 | def current(self, current): 73 | if not current: 74 | self._current = None 75 | elif isinstance(current, CurrentPair): 76 | self._current = current.copy() 77 | else: 78 | self._current = CurrentPair(current[0], current[1]) 79 | 80 | @property 81 | def callstack(self): 82 | return self._callstack 83 | -------------------------------------------------------------------------------- /prototype/test/simple.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int inc(int x) { 4 | return x++; 5 | } 6 | 7 | int fmap(int x, int (*func)(int)) { 8 | if (x < 0) 9 | return (*func)(x); 10 | else 11 | exit(1); 12 | } 13 | 14 | int main(int argc, char* argv[]) { 15 | fmap(0, inc); 16 | fmap(-1, inc); 17 | 18 | return 0; 19 | } 20 | -------------------------------------------------------------------------------- /prototype/test/simple.desc: -------------------------------------------------------------------------------- 1 | load(0x100000f40, 1, "push %rbp"); 2 | load(0x100000f41, 3, "mov %rsp,%rbp"); 3 | load(0x100000f44, 4, "sub $0x20,%rsp"); 4 | load(0x100000f48, 2, "xor %eax,%eax"); 5 | load(0x100000f4a, 7, "lea -0x71(%rip),%rcx"); 6 | load(0x100000f51, 7, "movl $0x0,-0x4(%rbp)"); 7 | load(0x100000f58, 3, "mov %edi,-0x8(%rbp)"); 8 | load(0x100000f5b, 4, "mov %rsi,-0x10(%rbp)"); 9 | load(0x100000f5f, 2, "mov %eax,%edi"); 10 | load(0x100000f61, 3, "mov %rcx,%rsi"); 11 | load(0x100000f64, 5, "callq 0x100000f00 ", call(0x100000f00, 0x100000f69)); 12 | load(0x100000f69, 5, "mov $0xffffffff,%edi"); 13 | load(0x100000f6e, 7, "lea -0x95(%rip),%rsi"); 14 | load(0x100000f75, 3, "mov %eax,-0x14(%rbp)"); 15 | load(0x100000f78, 5, "callq 0x100000f00 ", call(0x100000f00, 0x100000f7d)); 16 | load(0x100000f7d, 2, "xor %edi,%edi"); 17 | load(0x100000f7f, 3, "mov %eax,-0x18(%rbp)"); 18 | load(0x100000f82, 2, "mov %edi,%eax"); 19 | load(0x100000f84, 4, "add $0x20,%rsp"); 20 | load(0x100000f88, 1, "pop %rbp"); 21 | load(0x100000f89, 1, "retq", return); 22 | load(0x100000f00, 1, "push %rbp"); 23 | load(0x100000f01, 3, "mov %rsp,%rbp"); 24 | load(0x100000f04, 4, "sub $0x10,%rsp"); 25 | load(0x100000f08, 3, "mov %edi,-0x4(%rbp)"); 26 | load(0x100000f0b, 4, "mov %rsi,-0x10(%rbp)"); 27 | load(0x100000f0f, 4, "cmpl $0x0,-0x4(%rbp)"); 28 | load(0x100000f13, 6, "jge 0x100000f28 ", branch(0x100000f28, 0x100000f19)); 29 | load(0x100000f19, 4, "mov -0x10(%rbp),%rax"); 30 | load(0x100000f1d, 3, "mov -0x4(%rbp),%edi"); 31 | load(0x100000f20, 2, "callq *%rax", call(0, 0x100000f22, indirect)); 32 | load(0x100000f22, 4, "add $0x10,%rsp"); 33 | load(0x100000f26, 1, "pop %rbp"); 34 | load(0x100000f27, 1, "retq", return); 35 | load(0x100000f28, 5, "mov $0x1,%edi"); 36 | load(0x100000f2d, 5, "callq 0x100000f8a", call(0x100000f8a, 0x100000f32)); 37 | load(0x100000ee0, 1, "push %rbp"); 38 | load(0x100000ee1, 3, "mov %rsp,%rbp"); 39 | load(0x100000ee4, 3, "mov %edi,-0x4(%rbp)"); 40 | load(0x100000ee7, 3, "mov -0x4(%rbp),%edi"); 41 | load(0x100000eea, 2, "mov %edi,%eax"); 42 | load(0x100000eec, 3, "add $0x1,%eax"); 43 | load(0x100000eef, 3, "mov %eax,-0x4(%rbp)"); 44 | load(0x100000ef2, 2, "mov %edi,%eax"); 45 | load(0x100000ef4, 1, "pop %rbp"); 46 | load(0x100000ef5, 1, "retq", return); 47 | exec(0x100000f40); 48 | exec(0x100000f00); 49 | exec(0x100000f19); 50 | exec(0x100000ee0); 51 | exec(0x100000f22); 52 | exec(0x100000f69); 53 | exec(0x100000f00); 54 | exec(0x100000f28); 55 | -------------------------------------------------------------------------------- /prototype/test/test.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void swap(int* x, int* y) { 4 | int tmp = *x; 5 | *x = *y; 6 | *y = tmp; 7 | } 8 | 9 | void bubble(int* a, int s) { 10 | int i, j; 11 | 12 | for (i = 0; i < s; i++) { 13 | for (j = (i+1); j < s; j++) { 14 | if (a[j] < a[i]) 15 | swap(&(a[i]), &(a[j])); 16 | } 17 | } 18 | } 19 | 20 | void invert(int* a, int s) { 21 | int i; 22 | 23 | for (i = 0; i < s/2; i++) 24 | swap(&(a[i]), &(a[s-i-1])); 25 | } 26 | 27 | int main(int argc, char* argv[]) { 28 | int array[] = { 23, 16, 4, 15, 42, 8 }; 29 | int size = sizeof(array) / sizeof(int); 30 | 31 | bubble(array, size); 32 | invert(array, size); 33 | 34 | return 0; 35 | } 36 | -------------------------------------------------------------------------------- /prototype/test/test.desc: -------------------------------------------------------------------------------- 1 | load(0x100000dd0, 1, "pushq %rbp"); 2 | load(0x100000dd1, 3, "movq %rsp, %rbp"); 3 | load(0x100000dd4, 4, "movq %rdi, -0x8(%rbp)"); 4 | load(0x100000dd8, 4, "movq %rsi, -0x10(%rbp)"); 5 | load(0x100000ddc, 4, "movq -0x8(%rbp), %rsi"); 6 | load(0x100000de0, 2, "movl (%rsi), %eax"); 7 | load(0x100000de2, 3, "movl %eax, -0x14(%rbp)"); 8 | load(0x100000de5, 4, "movq -0x10(%rbp), %rsi"); 9 | load(0x100000de9, 2, "movl (%rsi), %eax"); 10 | load(0x100000deb, 4, "movq -0x8(%rbp), %rsi"); 11 | load(0x100000def, 2, "movl %eax, (%rsi)"); 12 | load(0x100000df1, 3, "movl -0x14(%rbp), %eax"); 13 | load(0x100000df4, 4, "movq -0x10(%rbp), %rsi"); 14 | load(0x100000df8, 2, "movl %eax, (%rsi)"); 15 | load(0x100000dfa, 1, "popq %rbp"); 16 | load(0x100000dfb, 1, "retq", return); 17 | load(0x100000e00, 1, "pushq %rbp"); 18 | load(0x100000e01, 3, "movq %rsp, %rbp"); 19 | load(0x100000e04, 4, "subq $0x20, %rsp"); 20 | load(0x100000e08, 4, "movq %rdi, -0x8(%rbp)"); 21 | load(0x100000e0c, 3, "movl %esi, -0xc(%rbp)"); 22 | load(0x100000e0f, 7, "movl $0x0, -0x10(%rbp)"); 23 | load(0x100000e16, 3, "movl -0x10(%rbp), %eax"); 24 | load(0x100000e19, 3, "cmpl -0xc(%rbp), %eax"); 25 | load(0x100000e1c, 6, "jge 0x100000e9c", branch(0x100000e9c, 0x100000e22)); 26 | load(0x100000e22, 3, "movl -0x10(%rbp), %eax"); 27 | load(0x100000e25, 3, "addl $0x1, %eax"); 28 | load(0x100000e28, 3, "movl %eax, -0x14(%rbp)"); 29 | load(0x100000e2b, 3, "movl -0x14(%rbp), %eax"); 30 | load(0x100000e2e, 3, "cmpl -0xc(%rbp), %eax"); 31 | load(0x100000e31, 6, "jge 0x100000e89", branch(0x100000e89, 0x100000e37)); 32 | load(0x100000e37, 4, "movslq -0x14(%rbp), %rax"); 33 | load(0x100000e3b, 4, "movq -0x8(%rbp), %rcx"); 34 | load(0x100000e3f, 3, "movl (%rcx,%rax,4), %edx"); 35 | load(0x100000e42, 4, "movslq -0x10(%rbp), %rax"); 36 | load(0x100000e46, 4, "movq -0x8(%rbp), %rcx"); 37 | load(0x100000e4a, 3, "cmpl (%rcx,%rax,4), %edx"); 38 | load(0x100000e4d, 6, "jge 0x100000e76", branch(0x100000e76, 0x100000e53)); 39 | load(0x100000e53, 4, "movslq -0x10(%rbp), %rax"); 40 | load(0x100000e57, 4, "shlq $0x2, %rax"); 41 | load(0x100000e5b, 4, "addq -0x8(%rbp), %rax"); 42 | load(0x100000e5f, 4, "movslq -0x14(%rbp), %rcx"); 43 | load(0x100000e63, 4, "shlq $0x2, %rcx"); 44 | load(0x100000e67, 4, "addq -0x8(%rbp), %rcx"); 45 | load(0x100000e6b, 3, "movq %rax, %rdi"); 46 | load(0x100000e6e, 3, "movq %rcx, %rsi"); 47 | load(0x100000e71, 5, "callq _swap", call(0x100000dd0, 0x100000e76)); 48 | load(0x100000e76, 5, "jmp 0x100000e7b", jump(0x100000e7b)); 49 | load(0x100000e7b, 3, "movl -0x14(%rbp), %eax"); 50 | load(0x100000e7e, 3, "addl $0x1, %eax"); 51 | load(0x100000e81, 3, "movl %eax, -0x14(%rbp)"); 52 | load(0x100000e84, 5, "jmp 0x100000e2b", jump(0x100000e2b)); 53 | load(0x100000e89, 5, "jmp 0x100000e8e", jump(0x100000e8e)); 54 | load(0x100000e8e, 3, "movl -0x10(%rbp), %eax"); 55 | load(0x100000e91, 3, "addl $0x1, %eax"); 56 | load(0x100000e94, 3, "movl %eax, -0x10(%rbp)"); 57 | load(0x100000e97, 5, "jmp 0x100000e16", jump(0x100000e16)); 58 | load(0x100000e9c, 4, "addq $0x20, %rsp"); 59 | load(0x100000ea0, 1, "popq %rbp"); 60 | load(0x100000ea1, 1, "retq", return); 61 | load(0x100000eb0, 1, "pushq %rbp"); 62 | load(0x100000eb1, 3, "movq %rsp, %rbp"); 63 | load(0x100000eb4, 4, "subq $0x20, %rsp"); 64 | load(0x100000eb8, 4, "movq %rdi, -0x8(%rbp)"); 65 | load(0x100000ebc, 3, "movl %esi, -0xc(%rbp)"); 66 | load(0x100000ebf, 7, "movl $0x0, -0x10(%rbp)"); 67 | load(0x100000ec6, 5, "movl $0x2, %eax"); 68 | load(0x100000ecb, 3, "movl -0x10(%rbp), %ecx"); 69 | load(0x100000ece, 3, "movl -0xc(%rbp), %edx"); 70 | load(0x100000ed1, 3, "movl %eax, -0x14(%rbp)"); 71 | load(0x100000ed4, 2, "movl %edx, %eax"); 72 | load(0x100000ed6, 1, "cltd"); 73 | load(0x100000ed7, 3, "movl -0x14(%rbp), %esi"); 74 | load(0x100000eda, 2, "idivl %esi"); 75 | load(0x100000edc, 2, "cmpl %eax, %ecx"); 76 | load(0x100000ede, 6, "jge 0x100000f1d", branch(0x100000f1d, 0x100000ee4)); 77 | load(0x100000ee4, 4, "movslq -0x10(%rbp), %rax"); 78 | load(0x100000ee8, 4, "shlq $0x2, %rax"); 79 | load(0x100000eec, 4, "addq -0x8(%rbp), %rax"); 80 | load(0x100000ef0, 3, "movl -0xc(%rbp), %ecx"); 81 | load(0x100000ef3, 3, "subl -0x10(%rbp), %ecx"); 82 | load(0x100000ef6, 3, "subl $0x1, %ecx"); 83 | load(0x100000ef9, 3, "movslq %ecx, %rdx"); 84 | load(0x100000efc, 4, "shlq $0x2, %rdx"); 85 | load(0x100000f00, 4, "addq -0x8(%rbp), %rdx"); 86 | load(0x100000f04, 3, "movq %rax, %rdi"); 87 | load(0x100000f07, 3, "movq %rdx, %rsi"); 88 | load(0x100000f0a, 5, "callq _swap", call(0x100000dd0, 0x100000f0f)); 89 | load(0x100000f0f, 3, "movl -0x10(%rbp), %eax"); 90 | load(0x100000f12, 3, "addl $0x1, %eax"); 91 | load(0x100000f15, 3, "movl %eax, -0x10(%rbp)"); 92 | load(0x100000f18, 5, "jmp 0x100000ec6", jump(0x100000ec6)); 93 | load(0x100000f1d, 4, "addq $0x20, %rsp"); 94 | load(0x100000f21, 1, "popq %rbp"); 95 | load(0x100000f22, 1, "retq", return); 96 | load(0x100000f30, 1, "pushq %rbp"); 97 | load(0x100000f31, 3, "movq %rsp, %rbp"); 98 | load(0x100000f34, 4, "subq $0x40, %rsp"); 99 | load(0x100000f38, 4, "leaq -0x30(%rbp), %rax"); 100 | load(0x100000f3c, 7, "movl $0x0, -0x4(%rbp)"); 101 | load(0x100000f43, 3, "movl %edi, -0x8(%rbp)"); 102 | load(0x100000f46, 4, "movq %rsi, -0x10(%rbp)"); 103 | load(0x100000f4a, 7, "movq 0x4f(%rip), %rsi"); 104 | load(0x100000f51, 4, "movq %rsi, -0x30(%rbp)"); 105 | load(0x100000f55, 7, "movq 0x4c(%rip), %rsi"); 106 | load(0x100000f5c, 4, "movq %rsi, -0x28(%rbp)"); 107 | load(0x100000f60, 7, "movq 0x49(%rip), %rsi"); 108 | load(0x100000f67, 4, "movq %rsi, -0x20(%rbp)"); 109 | load(0x100000f6b, 7, "movl $0x6, -0x34(%rbp)"); 110 | load(0x100000f72, 3, "movl -0x34(%rbp), %esi"); 111 | load(0x100000f75, 3, "movq %rax, %rdi"); 112 | load(0x100000f78, 5, "callq _bubble", call(0x100000e00, 0x100000f7d)); 113 | load(0x100000f7d, 4, "leaq -0x30(%rbp), %rdi"); 114 | load(0x100000f81, 3, "movl -0x34(%rbp), %esi"); 115 | load(0x100000f84, 5, "callq _invert", call(0x100000eb0, 0x100000f89)); 116 | load(0x100000f89, 2, "xorl %eax, %eax"); 117 | load(0x100000f8b, 4, "addq $0x40, %rsp"); 118 | load(0x100000f8f, 1, "popq %rbp"); 119 | load(0x100000f90, 1, "retq", return); 120 | exec(0x100000f30); 121 | exec(0x100000e00); 122 | exec(0x100000e22); 123 | exec(0x100000e37); 124 | exec(0x100000e76); 125 | exec(0x100000e7b); 126 | exec(0x100000e2b); 127 | exec(0x100000e37); 128 | exec(0x100000e53); 129 | exec(0x100000dd0); 130 | exec(0x100000e76); 131 | exec(0x100000e7b); 132 | exec(0x100000e2b); 133 | exec(0x100000e89); 134 | exec(0x100000e8e); 135 | exec(0x100000e16); 136 | exec(0x100000e9c); 137 | exec(0x100000f7d); 138 | exec(0x100000eb0); 139 | exec(0x100000ee4); 140 | exec(0x100000dd0); 141 | exec(0x100000f0f); 142 | exec(0x100000ec6); 143 | exec(0x100000f1d); 144 | exec(0x100000f89); 145 | -------------------------------------------------------------------------------- /prototype/test/total.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | /* 4 | int total(int a[], int s) { 5 | int i = 0; 6 | int sum = 0; 7 | while (i < s) { 8 | sum += a[i]; 9 | i++; 10 | } 11 | return sum; 12 | } 13 | */ 14 | 15 | int total(int a[], int s) { 16 | __asm__(" push %rbx\n" 17 | " mov %rdi, %rbx\n" 18 | " mov $0x0, %eax\n" 19 | " mov $0x0, %ecx\n" 20 | "loop:\n" 21 | " cmp %esi, %ecx\n" 22 | " jge out\n" 23 | " add (%rbx), %eax\n" 24 | " add $0x4, %rbx\n" 25 | " inc %ecx\n" 26 | " jmp loop\n" 27 | "out:\n" 28 | " pop %rbx\n"); 29 | } 30 | 31 | int main(int argc, char* argv[]) { 32 | /* 33 | int i, n, size; 34 | int a[] = { 4, 8, 15, 16, 23, 42 }; 35 | 36 | size = sizeof(a) / sizeof(int); 37 | n = total(a, size); 38 | */ 39 | int a[] = { 10 }; 40 | return total(a, sizeof(a) / sizeof(int)); 41 | } 42 | -------------------------------------------------------------------------------- /prototype/test/total.desc: -------------------------------------------------------------------------------- 1 | load(0x400492, 1, "push %rbx"); 2 | load(0x400493, 3, "mov %rdi,%rbx"); 3 | load(0x400496, 5, "mov $0x0,%eax"); 4 | load(0x40049b, 5, "mov $0x0,%ecx"); 5 | load(0x4004a0, 2, "cmp %esi,%ecx"); 6 | load(0x4004a2, 2, "jge 0x4004ae", branch(0x4004ae, 0x4004a4)); 7 | load(0x4004a4, 2, "add (%rbx),%eax"); 8 | load(0x4004a6, 4, "add $0x4,%rbx"); 9 | load(0x4004aa, 2, "inc %ecx"); 10 | load(0x4004ac, 2, "jmp 0x4004a0", jump(0x4004a0)); 11 | load(0x4004ae, 1, "pop %rbx"); 12 | load(0x4004af, 1, "retq", return); 13 | load(0x4004b0, 4, "sub $0x10,%rsp"); 14 | load(0x4004b4, 8, "movl $0xa,0xc(%rsp)"); 15 | load(0x4004bc, 5, "lea 0xc(%rsp),%rdi"); 16 | load(0x4004c1, 5, "mov $0x1,%esi"); 17 | load(0x4004c6, 5, "callq 0x400492", call(0x400492, 0x4004cb)); 18 | load(0x4004cb, 4, "add $0x10,%rsp"); 19 | load(0x4004cf, 1, "retq", return); 20 | exec(0x4004b0); 21 | exec(0x400492); 22 | exec(0x4004a4); 23 | exec(0x4004a0); 24 | exec(0x4004ae); 25 | exec(0x4004cb); 26 | -------------------------------------------------------------------------------- /smarthash.c: -------------------------------------------------------------------------------- 1 | /*--------------------------------------------------------------------*/ 2 | /*--- CFGgrind ---*/ 3 | /*--- smarthash.c ---*/ 4 | /*--------------------------------------------------------------------*/ 5 | 6 | /* 7 | This file is part of CFGgrind, a dynamic control flow graph (CFG) 8 | reconstruction tool. 9 | 10 | Copyright (C) 2023, Andrei Rimsa (andrei@cefetmg.br) 11 | 12 | This program is free software; you can redistribute it and/or 13 | modify it under the terms of the GNU General Public License as 14 | published by the Free Software Foundation; either version 2 of the 15 | License, or (at your option) any later version. 16 | 17 | This program is distributed in the hope that it will be useful, but 18 | WITHOUT ANY WARRANTY; without even the implied warranty of 19 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | General Public License for more details. 21 | 22 | You should have received a copy of the GNU General Public License 23 | along with this program; if not, see . 24 | 25 | The GNU General Public License is contained in the file COPYING. 26 | */ 27 | 28 | #include "global.h" 29 | 30 | #ifdef TRACKING_CELLS 31 | #define OPTIMIZED_HASHTABLE 32 | #endif 33 | 34 | struct _SmartHash { 35 | Int count; 36 | Int size; 37 | Bool fixed; 38 | Float growth_rate; 39 | #ifdef OPTIMIZED_HASHTABLE 40 | SmartList* track; 41 | #endif 42 | SmartList** table; // SmartList 43 | }; 44 | 45 | #ifdef OPTIMIZED_HASHTABLE 46 | //static 47 | //Bool cmp_values(void* v1, void* v2) { 48 | // return v1 != 0 && v2 != 0 && v1 == v2; 49 | //} 50 | 51 | static 52 | void add_tracking_value(SmartHash* shash, Int index) { 53 | void* v; 54 | 55 | v = (void*) (((HWord) index) + 1); 56 | CGD_ASSERT(shash->table[index] != 0 && CGD_(smart_list_count)(shash->table[index]) == 1); 57 | // CGD_ASSERT(!CGD_(smart_list_contains)(shash->track, v, cmp_values)); 58 | CGD_(smart_list_add)(shash->track, v); 59 | } 60 | 61 | static 62 | void remove_tracking_value(SmartHash* shash, Int index) { 63 | Int i, count; 64 | 65 | CGD_ASSERT(shash->table[index] == 0 || CGD_(smart_list_is_empty)(shash->table[index])); 66 | 67 | count = CGD_(smart_list_count)(shash->track); 68 | for (i = 0; i < count; i++) { 69 | Int v = ((HWord) CGD_(smart_list_at)(shash->track, i)) - 1; 70 | if (v == index) { 71 | CGD_(smart_list_set)(shash->track, i, CGD_(smart_list_at)(shash->track, (count - 1))); 72 | CGD_(smart_list_set)(shash->track, (count - 1), 0); 73 | return; 74 | } 75 | } 76 | 77 | tl_assert(0); 78 | } 79 | #endif 80 | 81 | static 82 | void grow_smart_hash(SmartHash* shash, HWord (*hash_key)(void*)) { 83 | Int idx, new_idx, new_size; 84 | Int i, count, j, count2; 85 | HWord key; 86 | void* value; 87 | SmartList* list; 88 | SmartList** new_table; 89 | SmartList** new_list; 90 | #ifdef OPTIMIZED_HASHTABLE 91 | SmartList* new_track; 92 | #endif 93 | 94 | new_size = (Int) (shash->size * shash->growth_rate); 95 | CGD_ASSERT(new_size > shash->size); 96 | 97 | new_table = (SmartList**) CGD_MALLOC("cgd.smarthash.gsh.1", (new_size * sizeof(SmartList*))); 98 | VG_(memset)(new_table, 0, (new_size * sizeof(SmartList*))); 99 | 100 | #ifdef OPTIMIZED_HASHTABLE 101 | new_track = CGD_(new_smart_list)(128); 102 | 103 | count = CGD_(smart_list_count)(shash->track); 104 | for (i = 0; i < count; i++) { 105 | idx = ((HWord) CGD_(smart_list_at)(shash->track, i)) - 1; 106 | list = shash->table[idx]; 107 | CGD_ASSERT(list != 0 && !CGD_(smart_list_is_empty)(list)); 108 | #else 109 | CGD_UNUSED(i); 110 | CGD_UNUSED(count); 111 | for (idx = 0; idx < shash->size; idx++) { 112 | list = shash->table[idx]; 113 | if (!list) 114 | continue; 115 | #endif 116 | count2 = CGD_(smart_list_count)(list); 117 | for (j = 0; j < count2; j++) { 118 | value = CGD_(smart_list_at)(list, j); 119 | CGD_ASSERT(value != 0); 120 | 121 | key = (*hash_key)(value); 122 | new_idx = key % new_size; 123 | 124 | new_list = &(new_table[new_idx]); 125 | if (!*new_list) 126 | *new_list = CGD_(new_smart_list)(1); 127 | 128 | CGD_(smart_list_add)(*new_list, value); 129 | #ifdef OPTIMIZED_HASHTABLE 130 | if (CGD_(smart_list_count)(*new_list) == 1) 131 | CGD_(smart_list_add)(new_track, (void*) (((HWord) new_idx) + 1)); 132 | #endif 133 | 134 | CGD_(smart_list_set)(list, j, 0); 135 | } 136 | 137 | CGD_(delete_smart_list)(list); 138 | } 139 | 140 | CGD_FREE(shash->table); 141 | 142 | shash->size = new_size; 143 | shash->table = new_table; 144 | 145 | #ifdef OPTIMIZED_HASHTABLE 146 | CGD_(smart_list_clear)(shash->track, 0); 147 | CGD_(delete_smart_list)(shash->track); 148 | 149 | shash->track = new_track; 150 | #endif 151 | } 152 | 153 | static 154 | SmartHash* create_smart_hash(Int size, Bool fixed) { 155 | SmartHash* shash; 156 | 157 | CGD_ASSERT(size > 0); 158 | 159 | // Make it 7 multiple. 160 | size += size % 7; 161 | 162 | shash = (SmartHash*) CGD_MALLOC("cgd.smarthash.nsh.1", sizeof(SmartHash)); 163 | VG_(memset)(shash, 0, sizeof(SmartHash)); 164 | 165 | shash->size = size; 166 | shash->fixed = fixed; 167 | shash->growth_rate = 2.0f; // default: double the hash. 168 | 169 | #ifdef OPTIMIZED_HASHTABLE 170 | shash->track = CGD_(new_smart_list)(128); 171 | #endif 172 | 173 | shash->table = (SmartList**) CGD_MALLOC("cgd.smarthash.nsh.2", (size * sizeof(SmartList*))); 174 | VG_(memset)(shash->table, 0, (size * sizeof(SmartList*))); 175 | 176 | return shash; 177 | } 178 | 179 | SmartHash* CGD_(new_smart_hash)(Int size) { 180 | return create_smart_hash(size, False); 181 | } 182 | 183 | SmartHash* CGD_(new_fixed_smart_hash)(Int size) { 184 | return create_smart_hash(size, True); 185 | } 186 | 187 | void CGD_(delete_smart_hash)(SmartHash* shash) { 188 | Int idx, i, count; 189 | SmartList* list; 190 | 191 | CGD_ASSERT(shash != 0); 192 | CGD_ASSERT(shash->count == 0); 193 | 194 | #ifdef OPTIMIZED_HASHTABLE 195 | count = CGD_(smart_list_count)(shash->track); 196 | for (i = 0; i < count; i++) { 197 | idx = ((HWord) CGD_(smart_list_at)(shash->track, i)) - 1; 198 | list = shash->table[idx]; 199 | CGD_ASSERT(list != 0 && !CGD_(smart_list_is_empty)(list)); 200 | #else 201 | CGD_UNUSED(i); 202 | CGD_UNUSED(count); 203 | for (idx = 0; idx < shash->size; idx++) { 204 | list = shash->table[idx]; 205 | if (!list) 206 | continue; 207 | #endif 208 | CGD_ASSERT(CGD_(smart_list_is_empty)(list)); 209 | CGD_(delete_smart_list)(list); 210 | } 211 | 212 | #ifdef OPTIMIZED_HASHTABLE 213 | CGD_ASSERT(CGD_(smart_list_is_empty)(shash->track)); 214 | CGD_(delete_smart_list)(shash->track); 215 | #endif 216 | 217 | CGD_FREE(shash->table); 218 | CGD_FREE(shash); 219 | } 220 | 221 | void CGD_(smart_hash_clear)(SmartHash* shash, void (*remove_value)(void*)) { 222 | Int idx, i, count, j, count2; 223 | void* v; 224 | SmartList* list; 225 | 226 | CGD_ASSERT(shash != 0); 227 | 228 | #ifdef OPTIMIZED_HASHTABLE 229 | count = CGD_(smart_list_count)(shash->track); 230 | for (i = 0; i < count; i++) { 231 | idx = ((HWord) CGD_(smart_list_at)(shash->track, i)) - 1; 232 | list = shash->table[idx]; 233 | CGD_ASSERT(list != 0 && !CGD_(smart_list_is_empty)(list)); 234 | #else 235 | CGD_UNUSED(i); 236 | CGD_UNUSED(count); 237 | for (idx = 0; idx < shash->size; idx++) { 238 | list = shash->table[idx]; 239 | if (!list) 240 | continue; 241 | #endif 242 | count2 = CGD_(smart_list_count)(list); 243 | for (j = 0; j < count2; j++) { 244 | v = CGD_(smart_list_at)(list, j); 245 | CGD_ASSERT(v != 0); 246 | CGD_ASSERT(shash->count > 0); 247 | 248 | if (remove_value) 249 | (*remove_value)(v); 250 | 251 | --shash->count; 252 | CGD_(smart_list_set)(list, j, 0); 253 | } 254 | } 255 | 256 | #ifdef OPTIMIZED_HASHTABLE 257 | CGD_(smart_list_clear)(shash->track, 0); 258 | #endif 259 | } 260 | 261 | Int CGD_(smart_hash_count)(SmartHash* shash) { 262 | CGD_ASSERT(shash != 0); 263 | return shash->count; 264 | } 265 | 266 | Int CGD_(smart_hash_size)(SmartHash* shash) { 267 | CGD_ASSERT(shash != 0); 268 | return shash->size; 269 | } 270 | 271 | Bool CGD_(smart_hash_is_empty)(SmartHash* shash) { 272 | CGD_ASSERT(shash != 0); 273 | return shash->count == 0; 274 | } 275 | 276 | Float CGD_(smart_hash_growth_rate)(SmartHash* shash) { 277 | CGD_ASSERT(shash != 0); 278 | return shash->growth_rate; 279 | } 280 | 281 | void CGD_(smart_hash_set_growth_rate)(SmartHash* shash, Float rate) { 282 | CGD_ASSERT(shash != 0); 283 | CGD_ASSERT(rate > 1.0f); 284 | 285 | shash->growth_rate = rate; 286 | } 287 | 288 | void* CGD_(smart_hash_get)(SmartHash* shash, HWord key, HWord (*hash_key)(void*)) { 289 | Int idx; 290 | void* v; 291 | SmartList* list; 292 | 293 | CGD_ASSERT(shash != 0); 294 | CGD_ASSERT(hash_key != 0); 295 | 296 | idx = key % shash->size; 297 | if ((list = shash->table[idx])) { 298 | Int i, count; 299 | 300 | count = CGD_(smart_list_count)(list); 301 | for (i = 0; i < count; i++) { 302 | v = CGD_(smart_list_at)(list, i); 303 | CGD_ASSERT(v != 0); 304 | 305 | if ((*hash_key)(v) == key) 306 | return v; 307 | } 308 | } 309 | 310 | // Not found. 311 | return 0; 312 | } 313 | 314 | void* CGD_(smart_hash_put)(SmartHash* shash, void* value, HWord (*hash_key)(void*)) { 315 | Int idx; 316 | HWord key; 317 | void* v; 318 | SmartList** list; 319 | 320 | CGD_ASSERT(shash != 0); 321 | CGD_ASSERT(value != 0); 322 | CGD_ASSERT(hash_key != 0); 323 | 324 | if (!shash->fixed) { 325 | if ((10 * shash->count / shash->size) > 6) 326 | grow_smart_hash(shash, hash_key); 327 | } 328 | 329 | idx = (*hash_key)(value) % shash->size; 330 | list = &(shash->table[idx]); 331 | 332 | if (!*list) 333 | *list = CGD_(new_smart_list)(1); 334 | else { 335 | Int i, count; 336 | 337 | key = (*hash_key)(value); 338 | count = CGD_(smart_list_count)(*list); 339 | for (i = 0; i < count; i++) { 340 | v = CGD_(smart_list_at)(*list, i); 341 | CGD_ASSERT(v != 0); 342 | 343 | if ((*hash_key)(v) == key) { 344 | // Replace with the new value. 345 | CGD_(smart_list_set)(*list, i, value); 346 | 347 | // Return the old value. 348 | return v; 349 | } 350 | } 351 | } 352 | 353 | CGD_(smart_list_add)(*list, value); 354 | #ifdef OPTIMIZED_HASHTABLE 355 | if (CGD_(smart_list_count)(*list) == 1) 356 | add_tracking_value(shash, idx); 357 | #endif 358 | 359 | ++shash->count; 360 | 361 | return 0; 362 | } 363 | 364 | void* CGD_(smart_hash_remove)(SmartHash* shash, HWord key, HWord (*hash_key)(void*)) { 365 | Int idx; 366 | void* v; 367 | SmartList* list; 368 | 369 | CGD_ASSERT(shash != 0); 370 | CGD_ASSERT(hash_key != 0); 371 | 372 | idx = key % shash->size; 373 | if ((list = shash->table[idx])) { 374 | Int i, count; 375 | 376 | count = CGD_(smart_list_count)(list); 377 | for (i = 0; i < count; i++) { 378 | v = CGD_(smart_list_at)(list, i); 379 | CGD_ASSERT(v != 0); 380 | 381 | if ((*hash_key)(v) == key) { 382 | CGD_(smart_list_set)(list, i, CGD_(smart_list_at)(list, (count - 1))); 383 | CGD_(smart_list_set)(list, (count - 1), 0); 384 | --shash->count; 385 | 386 | #ifdef OPTIMIZED_HASHTABLE 387 | if (CGD_(smart_list_is_empty)(list)) 388 | remove_tracking_value(shash, idx); 389 | #endif 390 | 391 | // Return the old value. 392 | return v; 393 | } 394 | } 395 | } 396 | 397 | return 0; 398 | } 399 | 400 | Bool CGD_(smart_hash_contains)(SmartHash* shash, HWord key, HWord (*hash_key)(void*)) { 401 | return CGD_(smart_hash_get)(shash, key, hash_key) != 0; 402 | } 403 | 404 | void CGD_(smart_hash_forall)(SmartHash* shash, Bool (*func)(void*, void*), void* arg) { 405 | Int idx, i, count, j, count2; 406 | void* v; 407 | SmartList* list; 408 | 409 | CGD_ASSERT(shash != 0); 410 | CGD_ASSERT(func != 0); 411 | 412 | #ifdef OPTIMIZED_HASHTABLE 413 | count = CGD_(smart_list_count)(shash->track); 414 | 415 | i = 0; 416 | while (i < count) { 417 | idx = ((HWord) CGD_(smart_list_at)(shash->track, i)) - 1; 418 | list = shash->table[idx]; 419 | CGD_ASSERT(list != 0 && !CGD_(smart_list_is_empty)(list)); 420 | #else 421 | CGD_UNUSED(i); 422 | CGD_UNUSED(count); 423 | for (idx = 0; idx < shash->size; idx++) { 424 | list = shash->table[idx]; 425 | if (!list) 426 | continue; 427 | #endif 428 | 429 | count2 = CGD_(smart_list_count)(list); 430 | for (j = count2 - 1; j >= 0; j--) { 431 | v = CGD_(smart_list_at)(list, j); 432 | CGD_ASSERT(v != 0); 433 | 434 | if ((*func)(v, arg)) { 435 | Int last = CGD_(smart_list_count)(list) - 1; 436 | 437 | if (j != last) 438 | CGD_(smart_list_set)(list, j, CGD_(smart_list_at)(list, last)); 439 | 440 | CGD_(smart_list_set)(list, last, 0); 441 | --shash->count; 442 | } 443 | } 444 | 445 | #ifdef OPTIMIZED_HASHTABLE 446 | if (CGD_(smart_list_is_empty)(list)) { 447 | CGD_(smart_list_set)(shash->track, i, CGD_(smart_list_at)(shash->track, (count - 1))); 448 | CGD_(smart_list_set)(shash->track, (count - 1), 0); 449 | 450 | --count; 451 | } else { 452 | i++; 453 | } 454 | #endif 455 | } 456 | } 457 | 458 | // This method moves elements to dst from src (removing them). 459 | void CGD_(smart_hash_merge)(SmartHash* dst, SmartHash* src, HWord (*hash_key)(void*)) { 460 | Int idx, i, count, j, count2; 461 | void* v; 462 | SmartList* list; 463 | 464 | CGD_ASSERT(src != 0); 465 | CGD_ASSERT(hash_key != 0); 466 | 467 | #ifdef OPTIMIZED_HASHTABLE 468 | count = CGD_(smart_list_count)(src->track); 469 | for (i = 0; i < count; i++) { 470 | idx = ((HWord) CGD_(smart_list_at)(src->track, i)) - 1; 471 | list = src->table[idx]; 472 | CGD_ASSERT(list != 0 && !CGD_(smart_list_is_empty)(list)); 473 | #else 474 | CGD_UNUSED(i); 475 | CGD_UNUSED(count); 476 | for (idx = 0; idx < src->size; idx++) { 477 | list = src->table[idx]; 478 | if (!list) 479 | continue; 480 | #endif 481 | 482 | count2 = CGD_(smart_list_count)(list); 483 | for (j = count2 - 1; j >= 0; j--) { 484 | v = CGD_(smart_list_at)(list, j); 485 | CGD_ASSERT(v != 0); 486 | 487 | CGD_(smart_list_set)(list, j, 0); 488 | src->count--; 489 | 490 | CGD_(smart_hash_put)(dst, v, hash_key); 491 | } 492 | } 493 | 494 | CGD_ASSERT(CGD_(smart_hash_is_empty)(src)); 495 | #ifdef OPTIMIZED_HASHTABLE 496 | CGD_(smart_list_clear)(src->track, 0); 497 | #endif 498 | } 499 | -------------------------------------------------------------------------------- /tests/Makefile.am: -------------------------------------------------------------------------------- 1 | dist_noinst_SCRIPTS = 2 | 3 | EXTRA_DIST = 4 | -------------------------------------------------------------------------------- /tests/cfg-ordered.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rimsa/CFGgrind/353cb3ba1e4bbd373f14ce743af1c317d1b3a539/tests/cfg-ordered.png -------------------------------------------------------------------------------- /tests/cfg-signal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rimsa/CFGgrind/353cb3ba1e4bbd373f14ce743af1c317d1b3a539/tests/cfg-signal.png -------------------------------------------------------------------------------- /tests/cfg-unordered.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rimsa/CFGgrind/353cb3ba1e4bbd373f14ce743af1c317d1b3a539/tests/cfg-unordered.png -------------------------------------------------------------------------------- /tests/signal.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | int g = 1; 6 | 7 | void handler(int x) { 8 | g = 0; 9 | } 10 | 11 | int main(int argc, char* argv[]) { 12 | unsigned long long c = 0; 13 | 14 | signal(SIGALRM, handler); 15 | alarm(1); 16 | 17 | while (g) { 18 | ++c; 19 | } 20 | 21 | printf("%llu\n", c); 22 | return 0; 23 | } 24 | -------------------------------------------------------------------------------- /tests/test.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | void bubble(int* a, int s) { 5 | int i, j; 6 | 7 | for (i = 0; i < s; i++) { 8 | for (j = (i+1); j < s; j++) { 9 | if (a[j] < a[i]) { 10 | int tmp = a[i]; 11 | a[i] = a[j]; 12 | a[j] = tmp; 13 | } 14 | } 15 | } 16 | } 17 | 18 | int main(int argc, char* argv[]) { 19 | int* array, i; 20 | 21 | array = (int*) malloc((argc-1) * sizeof(int)); 22 | for (i = 1; i < argc; i++) 23 | array[i-1] = atoi(argv[i]); 24 | 25 | bubble(array, argc-1); 26 | 27 | for (i = 1; i < argc; i++) 28 | printf("%d ", array[i-1]); 29 | 30 | printf("\n"); 31 | free(array); 32 | 33 | return 0; 34 | } 35 | 36 | -------------------------------------------------------------------------------- /threads.c: -------------------------------------------------------------------------------- 1 | /*--------------------------------------------------------------------*/ 2 | /*--- CFGgrind ---*/ 3 | /*--- threads.c ---*/ 4 | /*--------------------------------------------------------------------*/ 5 | 6 | /* 7 | This file is part of CFGgrind, a dynamic control flow graph (CFG) 8 | reconstruction tool. 9 | 10 | Copyright (C) 2023, Andrei Rimsa (andrei@cefetmg.br) 11 | 12 | This tool is derived and contains lot of code from Callgrind 13 | Copyright (C) 2002-2017, Josef Weidendorfer (Josef.Weidendorfer@gmx.de) 14 | 15 | This program is free software; you can redistribute it and/or 16 | modify it under the terms of the GNU General Public License as 17 | published by the Free Software Foundation; either version 2 of the 18 | License, or (at your option) any later version. 19 | 20 | This program is distributed in the hope that it will be useful, but 21 | WITHOUT ANY WARRANTY; without even the implied warranty of 22 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 23 | General Public License for more details. 24 | 25 | You should have received a copy of the GNU General Public License 26 | along with this program; if not, see . 27 | 28 | The GNU General Public License is contained in the file COPYING. 29 | */ 30 | 31 | #include "global.h" 32 | 33 | #include "pub_tool_threadstate.h" 34 | 35 | /* forward decls */ 36 | static exec_state* exec_state_save(void); 37 | static exec_state* exec_state_restore(void); 38 | static exec_state* push_exec_state(int); 39 | static exec_state* top_exec_state(void); 40 | 41 | static exec_stack current_states; 42 | 43 | 44 | /*------------------------------------------------------------*/ 45 | /*--- Support for multi-threading ---*/ 46 | /*------------------------------------------------------------*/ 47 | 48 | 49 | /* 50 | * For Valgrind, MT is cooperative (no preemting in our code), 51 | * so we don't need locks... 52 | * 53 | * Per-thread data: 54 | * - call stack 55 | * - call hash 56 | * - event counters: last, current 57 | * 58 | * Even when ignoring MT, we need this functions to set up some 59 | * datastructures for the process (= Thread 1). 60 | */ 61 | 62 | /* current running thread */ 63 | ThreadId CGD_(current_tid); 64 | 65 | static thread_info** threads; 66 | 67 | thread_info** CGD_(get_threads)(void) 68 | { 69 | return threads; 70 | } 71 | 72 | thread_info* CGD_(get_current_thread)(void) 73 | { 74 | return threads[CGD_(current_tid)]; 75 | } 76 | 77 | void CGD_(init_threads)(void) { 78 | UInt i; 79 | 80 | threads = CGD_MALLOC("cgd.threads.it.1", VG_N_THREADS * sizeof threads[0]); 81 | 82 | for(i=0;istates) ); 112 | CGD_(init_call_stack)( &(t->calls) ); 113 | 114 | return t; 115 | } 116 | 117 | static 118 | void delete_thread(thread_info* t) { 119 | CGD_ASSERT(t != 0); 120 | 121 | /* destroy state */ 122 | CGD_(destroy_call_stack)(&(t->calls)); 123 | CGD_(destroy_exec_stack)(&(t->states)); 124 | 125 | CGD_DATA_FREE(t, sizeof(thread_info)); 126 | } 127 | 128 | void CGD_(destroy_threads)(void) { 129 | UInt i; 130 | 131 | for (i = 0; i < VG_N_THREADS; i++) { 132 | if (threads[i]) { 133 | // Update the thread info for the current thread. 134 | if (CGD_(current_tid) == i) { 135 | CGD_(copy_current_exec_stack)(&(threads[i]->states)); 136 | CGD_(copy_current_call_stack)(&(threads[i]->calls)); 137 | } 138 | 139 | delete_thread(threads[i]); 140 | threads[i] = 0; 141 | } 142 | } 143 | 144 | CGD_FREE(threads); 145 | threads = 0; 146 | 147 | CGD_(current_tid) = VG_INVALID_THREADID; 148 | } 149 | 150 | void CGD_(switch_thread)(ThreadId tid) 151 | { 152 | if (tid == CGD_(current_tid)) return; 153 | 154 | CGD_DEBUG(0, ">> thread %u (was %u)\n", tid, CGD_(current_tid)); 155 | 156 | if (CGD_(current_tid) != VG_INVALID_THREADID) { 157 | /* save thread state */ 158 | thread_info* t = threads[CGD_(current_tid)]; 159 | 160 | CGD_ASSERT(t != 0); 161 | 162 | /* current context (including signal handler contexts) */ 163 | exec_state_save(); 164 | CGD_(copy_current_exec_stack)( &(t->states) ); 165 | CGD_(copy_current_call_stack)( &(t->calls) ); 166 | } 167 | 168 | CGD_(current_tid) = tid; 169 | CGD_ASSERT(tid < VG_N_THREADS); 170 | 171 | if (tid != VG_INVALID_THREADID) { 172 | thread_info* t; 173 | 174 | /* load thread state */ 175 | if (threads[tid] == 0) 176 | threads[tid] = new_thread(); 177 | 178 | t = threads[tid]; 179 | 180 | /* current context (including signal handler contexts) */ 181 | CGD_(set_current_exec_stack)( &(t->states) ); 182 | exec_state_restore(); 183 | CGD_(set_current_call_stack)( &(t->calls) ); 184 | } 185 | } 186 | 187 | 188 | void CGD_(run_thread)(ThreadId tid) 189 | { 190 | /* now check for thread switch */ 191 | CGD_(switch_thread)(tid); 192 | } 193 | 194 | void CGD_(pre_signal)(ThreadId tid, Int sigNum, Bool alt_stack) 195 | { 196 | exec_state* es; 197 | exec_state* old_es; 198 | 199 | CGD_DEBUG(0, ">> pre_signal(TID %u, sig %d, alt_st %s)\n", 200 | tid, sigNum, alt_stack ? "yes":"no"); 201 | 202 | /* switch to the thread the handler runs in */ 203 | CGD_(switch_thread)(tid); 204 | 205 | /* save current execution state */ 206 | old_es = exec_state_save(); 207 | 208 | /* setup new cxtinfo struct for this signal handler */ 209 | es = push_exec_state(sigNum); 210 | es->call_stack_bottom = CGD_(current_call_stack).sp; 211 | 212 | /* setup current state for a spontaneous call */ 213 | CGD_(init_exec_state)( &CGD_(current_state) ); 214 | CGD_(current_state).sig = sigNum; 215 | 216 | // Restore CFG and working for signal mapping. 217 | CGD_(current_state).cfg = old_es->cfg; 218 | CGD_(current_state).working = old_es->working; 219 | } 220 | 221 | /* Run post-signal if the stackpointer for call stack is at 222 | * the bottom in current exec state (e.g. a signal handler) 223 | * 224 | * Called from CGD_(pop_call_stack) 225 | */ 226 | void CGD_(run_post_signal_on_call_stack_bottom)(void) 227 | { 228 | exec_state* es = top_exec_state(); 229 | CGD_ASSERT(es != 0); 230 | CGD_ASSERT(CGD_(current_state).sig >0); 231 | 232 | if (CGD_(current_call_stack).sp == es->call_stack_bottom) 233 | CGD_(post_signal)( CGD_(current_tid), CGD_(current_state).sig ); 234 | } 235 | 236 | void CGD_(post_signal)(ThreadId tid, Int sigNum) 237 | { 238 | exec_state* es; 239 | 240 | CGD_DEBUG(0, ">> post_signal(TID %u, sig %d)\n", 241 | tid, sigNum); 242 | 243 | /* thread switching potentially needed, eg. with instrumentation off */ 244 | CGD_(switch_thread)(tid); 245 | CGD_ASSERT(sigNum == CGD_(current_state).sig); 246 | 247 | /* Unwind call stack of this signal handler. 248 | * This should only be needed at finalisation time 249 | */ 250 | es = top_exec_state(); 251 | CGD_ASSERT(es != 0); 252 | while(CGD_(current_call_stack).sp > es->call_stack_bottom) 253 | CGD_(pop_call_stack)(False); 254 | 255 | // Connect the end of the signal handler to the exit node. 256 | CGD_(cfgnode_set_exit)(CGD_(current_state).cfg, CGD_(current_state).working); 257 | 258 | /* restore previous context */ 259 | es->sig = -1; 260 | current_states.sp--; 261 | es = top_exec_state(); 262 | CGD_(current_state).sig = es->sig; 263 | exec_state_restore(); 264 | 265 | /* There is no way to reliable get the thread ID we are switching to 266 | * after this handler returns. So we sync with actual TID at start of 267 | * CGD_(setup_bb)(), which should be the next for cfggrind. 268 | */ 269 | } 270 | 271 | 272 | 273 | /*------------------------------------------------------------*/ 274 | /*--- Execution states in a thread & signal handlers ---*/ 275 | /*------------------------------------------------------------*/ 276 | 277 | /* Each thread can be interrupted by a signal handler, and they 278 | * themselves again. But as there's no scheduling among handlers 279 | * of the same thread, we don't need additional stacks. 280 | * So storing execution contexts and 281 | * adding separators in the callstack(needed to not intermix normal/handler 282 | * functions in contexts) should be enough. 283 | */ 284 | 285 | /* not initialized: call_stack_bottom, sig */ 286 | void CGD_(init_exec_state)(exec_state* es) 287 | { 288 | es->jmps_passed = 0; 289 | es->bb = 0; 290 | es->cfg = 0; 291 | es->working = 0; 292 | } 293 | 294 | 295 | static exec_state* new_exec_state(Int sigNum) 296 | { 297 | exec_state* es; 298 | es = (exec_state*) CGD_MALLOC("cgd.threads.nes.1", 299 | sizeof(exec_state)); 300 | 301 | /* allocate real cost space: needed as incremented by 302 | * simulation functions */ 303 | CGD_(init_exec_state)(es); 304 | es->sig = sigNum; 305 | es->call_stack_bottom = 0; 306 | 307 | return es; 308 | } 309 | 310 | void CGD_(init_exec_stack)(exec_stack* es) 311 | { 312 | Int i; 313 | 314 | CGD_ASSERT(es != 0); 315 | 316 | /* The first element is for the main thread */ 317 | es->entry[0] = new_exec_state(0); 318 | for(i=1;ientry[i] = 0; 320 | es->sp = 0; 321 | } 322 | 323 | void CGD_(destroy_exec_stack)(exec_stack* es) { 324 | Int i; 325 | 326 | CGD_ASSERT(es != 0); 327 | 328 | for (i = 0; i < MAX_SIGHANDLERS; i++) { 329 | if (es->entry[i]) { 330 | CGD_FREE(es->entry[i]); 331 | es->entry[i] = 0; 332 | } 333 | } 334 | } 335 | 336 | void CGD_(copy_current_exec_stack)(exec_stack* dst) 337 | { 338 | Int i; 339 | 340 | dst->sp = current_states.sp; 341 | for(i=0;ientry[i] = current_states.entry[i]; 343 | } 344 | 345 | void CGD_(set_current_exec_stack)(exec_stack* dst) 346 | { 347 | Int i; 348 | 349 | current_states.sp = dst->sp; 350 | for(i=0;ientry[i]; 352 | } 353 | 354 | 355 | /* Get top context info struct of current thread */ 356 | static 357 | exec_state* top_exec_state(void) 358 | { 359 | Int sp = current_states.sp; 360 | exec_state* es; 361 | 362 | CGD_ASSERT((sp >= 0) && (sp < MAX_SIGHANDLERS)); 363 | es = current_states.entry[sp]; 364 | CGD_ASSERT(es != 0); 365 | return es; 366 | } 367 | 368 | /* Allocates a free context info structure for a new entered 369 | * signal handler, putting it on the context stack. 370 | * Returns a pointer to the structure. 371 | */ 372 | static exec_state* push_exec_state(int sigNum) 373 | { 374 | Int sp; 375 | exec_state* es; 376 | 377 | current_states.sp++; 378 | sp = current_states.sp; 379 | 380 | CGD_ASSERT((sigNum > 0) && (sigNum <= _VKI_NSIG)); 381 | CGD_ASSERT((sp > 0) && (sp < MAX_SIGHANDLERS)); 382 | es = current_states.entry[sp]; 383 | if (!es) { 384 | es = new_exec_state(sigNum); 385 | current_states.entry[sp] = es; 386 | } 387 | else 388 | es->sig = sigNum; 389 | 390 | return es; 391 | } 392 | 393 | /* Save current context to top cxtinfo struct */ 394 | static 395 | exec_state* exec_state_save(void) 396 | { 397 | exec_state* es = top_exec_state(); 398 | 399 | es->jmps_passed = CGD_(current_state).jmps_passed; 400 | es->bb = CGD_(current_state).bb; 401 | es->cfg = CGD_(current_state).cfg; 402 | es->working = CGD_(current_state).working; 403 | 404 | CGD_DEBUGIF(1) { 405 | CGD_DEBUG(1, " cxtinfo_save(sig %d): jmps_passed %d\n", 406 | es->sig, es->jmps_passed); 407 | CGD_(print_bb)(-9, es->bb); 408 | } 409 | 410 | /* signal number does not need to be saved */ 411 | CGD_ASSERT(CGD_(current_state).sig == es->sig); 412 | 413 | return es; 414 | } 415 | 416 | static 417 | exec_state* exec_state_restore(void) 418 | { 419 | exec_state* es = top_exec_state(); 420 | 421 | CGD_(current_state).jmps_passed = es->jmps_passed; 422 | CGD_(current_state).bb = es->bb; 423 | CGD_(current_state).sig = es->sig; 424 | CGD_(current_state).cfg = es->cfg; 425 | CGD_(current_state).working = es->working; 426 | 427 | CGD_DEBUGIF(1) { 428 | CGD_DEBUG(1, " exec_state_restore(sig %d): jmps_passed %d\n", 429 | es->sig, es->jmps_passed); 430 | } 431 | 432 | return es; 433 | } 434 | --------------------------------------------------------------------------------