├── CVE-2021-34273.code
├── LICENSE
├── README.md
├── bin
└── achecker.py
├── requirements.txt
├── setup.py
└── src
├── __init__.py
├── cfg
├── __init__.py
├── bb.py
├── cfg.py
├── disassembly.py
├── instruction.py
├── opcodes.py
└── rattle
│ ├── LICENSE
│ ├── __init__.py
│ ├── analyze.py
│ ├── evmasm.py
│ ├── hashes.py
│ ├── recover.py
│ └── ssa.py
├── constraints.py
├── evm
├── __init__.py
├── evm.py
├── exceptions.py
├── results.py
└── state.py
├── exploit.py
├── explorer
├── __init__.py
├── backward.py
└── forward.py
├── flow
├── FSignatures.txt
├── __init__.py
├── analysis_results.py
├── code_info.py
├── symbolic.py
└── tainting.py
├── memory.py
├── project.py
├── slicing.py
├── storage.py
├── teEther_LICENSE
└── util
├── __init__.py
├── frontierset.py
├── intrange.py
├── utils.py
└── z3_extra_util.py
/CVE-2021-34273.code:
--------------------------------------------------------------------------------
1 | 606060405236156100c3576000357c0100000000000000000000000000000000000000000000000000000000900463ffffffff16806306fdde03146100d3578063095ea7b31461016157806318160ddd146101bb57806323b872dd146101e4578063313ce5671461025d57806370a082311461028c5780638da5cb5b146102d957806395d89b411461032e578063a9059cbb146103bc578063a9c7648f14610416578063dd62ed3e14610479578063df32754b146104e5578063f2fde38b146104fa575b34156100ce57600080fd5b600080fd5b34156100de57600080fd5b6100e6610533565b6040518080602001828103825283818151815260200191508051906020019080838360005b8381101561012657808201518184015260208101905061010b565b50505050905090810190601f1680156101535780820380516001836020036101000a031916815260200191505b509250505060405180910390f35b341561016c57600080fd5b6101a1600480803573ffffffffffffffffffffffffffffffffffffffff169060200190919080359060200190919050506105d1565b604051808215151515815260200191505060405180910390f35b34156101c657600080fd5b6101ce6106c3565b6040518082815260200191505060405180910390f35b34156101ef57600080fd5b610243600480803573ffffffffffffffffffffffffffffffffffffffff1690602001909190803573ffffffffffffffffffffffffffffffffffffffff169060200190919080359060200190919050506106c9565b604051808215151515815260200191505060405180910390f35b341561026857600080fd5b610270610945565b604051808260ff1660ff16815260200191505060405180910390f35b341561029757600080fd5b6102c3600480803573ffffffffffffffffffffffffffffffffffffffff16906020019091905050610958565b6040518082815260200191505060405180910390f35b34156102e457600080fd5b6102ec6109a1565b604051808273ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff16815260200191505060405180910390f35b341561033957600080fd5b6103416109c6565b6040518080602001828103825283818151815260200191508051906020019080838360005b83811015610381578082015181840152602081019050610366565b50505050905090810190601f1680156103ae5780820380516001836020036101000a031916815260200191505b509250505060405180910390f35b34156103c757600080fd5b6103fc600480803573ffffffffffffffffffffffffffffffffffffffff16906020019091908035906020019091905050610a64565b604051808215151515815260200191505060405180910390f35b341561042157600080fd5b610477600480803590602001908201803590602001908080602002602001604051908101604052809392919081815260200183836020028082843782019150505050505091908035906020019091905050610bcd565b005b341561048457600080fd5b6104cf600480803573ffffffffffffffffffffffffffffffffffffffff1690602001909190803573ffffffffffffffffffffffffffffffffffffffff16906020019091905050610d1b565b6040518082815260200191505060405180910390f35b34156104f057600080fd5b6104f8610da2565b005b341561050557600080fd5b610531600480803573ffffffffffffffffffffffffffffffffffffffff16906020019091905050610de4565b005b60048054600181600116156101000203166002900480601f0160208091040260200160405190810160405280929190818152602001828054600181600116156101000203166002900480156105c95780601f1061059e576101008083540402835291602001916105c9565b820191906000526020600020905b8154815290600101906020018083116105ac57829003601f168201915b505050505081565b600081600260003373ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff16815260200190815260200160002060008573ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff168152602001908152602001600020819055508273ffffffffffffffffffffffffffffffffffffffff163373ffffffffffffffffffffffffffffffffffffffff167f8c5be1e5ebec7d5bd14f71427d1e84f3dd0314c0f7b2291e5b200ac8c7c3b925846040518082815260200191505060405180910390a36001905092915050565b60035481565b600081600160008673ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff1681526020019081526020016000205410158015610796575081600260008673ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff16815260200190815260200160002060003373ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff1681526020019081526020016000205410155b80156107a25750600082115b156109395781600160008573ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff1681526020019081526020016000206000828254019250508190555081600160008673ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff1681526020019081526020016000206000828254039250508190555081600260008673ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff16815260200190815260200160002060003373ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff168152602001908152602001600020600082825403925050819055508273ffffffffffffffffffffffffffffffffffffffff168473ffffffffffffffffffffffffffffffffffffffff167fddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef846040518082815260200191505060405180910390a36001905061093e565b600090505b9392505050565b600560009054906101000a900460ff1681565b6000600160008373ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff168152602001908152602001600020549050919050565b6000809054906101000a900473ffffffffffffffffffffffffffffffffffffffff1681565b60068054600181600116156101000203166002900480601f016020809104026020016040519081016040528092919081815260200182805460018160011615610100020316600290048015610a5c5780601f10610a3157610100808354040283529160200191610a5c565b820191906000526020600020905b815481529060010190602001808311610a3f57829003601f168201915b505050505081565b600081600160003373ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff1681526020019081526020016000205410158015610ab55750600082115b15610bc25781600160003373ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff1681526020019081526020016000206000828254039250508190555081600160008573ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff168152602001908152602001600020600082825401925050819055508273ffffffffffffffffffffffffffffffffffffffff163373ffffffffffffffffffffffffffffffffffffffff167fddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef846040518082815260200191505060405180910390a360019050610bc7565b600090505b92915050565b60008090505b8251811015610d165781600160003373ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff1681526020019081526020016000206000828254039250508190555081600160008584815181101515610c3c57fe5b9060200190602002015173ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff168152602001908152602001600020600082825401925050819055508281815181101515610c9b57fe5b9060200190602002015173ffffffffffffffffffffffffffffffffffffffff163373ffffffffffffffffffffffffffffffffffffffff167fddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef846040518082815260200191505060405180910390a38080600101915050610bd3565b505050565b6000600260008473ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff16815260200190815260200160002060008373ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff16815260200190815260200160002054905092915050565b336000806101000a81548173ffffffffffffffffffffffffffffffffffffffff021916908373ffffffffffffffffffffffffffffffffffffffff160217905550565b6000809054906101000a900473ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff163373ffffffffffffffffffffffffffffffffffffffff16141515610e3f57600080fd5b806000806101000a81548173ffffffffffffffffffffffffffffffffffffffff021916908373ffffffffffffffffffffffffffffffffffffffff160217905550505600a165627a7a723058200a2bf4fa374a52ee391d2be9ef116c0929697a1a3ee37acebf0cc5d85c6597ff0029
2 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2022 Univ of British Columbia (UBC)
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # AChecker
2 | AChecker (Access Control Checker) is an automated static analysis tool for detecting access control vulnerabilities in Ethereum smart contracts.
3 |
4 | For more details about AChecker, please reference our paper published in ICSE 2023 [AChecker: Statically Detecting Smart Contract
5 | Access Control Vulnerabilities](https://blogs.ubc.ca/dependablesystemslab/2022/12/08/achecker-statically-detecting-smart-contract-access-control-vulnerabilities)
6 |
7 |
8 | If you use AChecker, please cite this paper.
9 |
10 | ```
11 | @inproceedings{ghaleb2023achecker,
12 | title={AChecker: Statically Detecting Smart Contract Access Control Vulnerabilities},
13 | author={Ghaleb, Asem and Rubin, Julia and Pattabiraman, Karthik},
14 | booktitle={Proceedings of the 45th IEEE/ACM International Conference on Software Engineering},
15 | year={2023}
16 | }
17 | ```
18 |
19 | ## Getting Started
20 | **Note:** We tested all scripts provided in this package on an Ubuntu 20.04 LTS machine.
21 |
22 | ### Requirements
23 | * Python 3.8+
24 |
25 | ### Building AChecker
26 |
27 | To build the tool manually, we provide a `requirements.txt` file and the script `setup.py` to simply install the dependencies AChecker requires and build everything as follows.
28 |
29 | Run the following command. Please make sure you are using Python 3.8 or higher.
30 |
31 | ```
32 | cd AChecker
33 | python -m pip install -r requirements.txt
34 | ```
35 |
36 | ### Analyzing a smart contract
37 | Use the following command to run AChecker on a contract bytecode.
38 | ```
39 | python bin/achecker.py -f [path_of_the_contract_bytecode_file] -b
40 | ```
41 | As an example, the following command will run AChecker to analyze the contract with CVE ID 'CVE-2021-34273' in the file named '*CVE-2021-34273.code*'
42 | ```
43 | python bin/achecker.py -f CVE-2021-34273.code -b -m 8
44 | ```
45 |
46 | The option -m enables setting the allocated memory for the analysis (in gigabytes). In this example, the allocated memory limit is set to 8 GB. The default value is 6 GB when the option -m is not used.
47 |
48 | ## Contact
49 | For questions about our paper or this code, please get in touch with Asem Ghaleb (aghaleb@alumni.ubc.ca)
50 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | pysha3>=1.0.2
2 | z3-solver>=4.8.5.0
3 | ijson
4 | requests
5 | lxml
6 | bs4
7 | Cython
8 | configparser
9 | pyevmasm
10 | cbor2
11 | networkx
12 | solc-select
13 | pandas
14 | tabulate
15 | -e .
16 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages
2 |
3 | setup(
4 | name='AChecker',
5 | version='0.1.0',
6 | packages=find_packages(),
7 | install_requires=[],
8 | scripts=[
9 | 'bin/achecker.py'
10 | ],
11 | python_requires='>=3.8',
12 |
13 | )
14 |
--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
1 | from . import cfg
2 | from . import constraints
3 | from . import evm
4 | from . import exploit
5 | from . import explorer
6 | from . import flow
7 | from . import memory
8 | from . import project
9 | from . import slicing
10 | from . import storage
11 | from . import util
12 |
--------------------------------------------------------------------------------
/src/cfg/__init__.py:
--------------------------------------------------------------------------------
1 | from . import bb
2 | from . import cfg
3 | from . import disassembly
4 | from . import instruction
5 | from . import opcodes
6 | #from . import evm_cfg
7 | #from . import tac_cfg
8 |
--------------------------------------------------------------------------------
/src/cfg/bb.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from collections import defaultdict, deque
3 |
4 | from src.util.utils import unique
5 |
6 |
7 | class BB(object):
8 | def __init__(self, ins):
9 | self.ins = ins
10 | self.streads = set() # indices of stack-items that will be read by this BB (0 is the topmost item on stack)
11 | self.stwrites = set() # indices of stack-items that will be written by this BB (0 is the topmost item on stack)
12 | self.stdelta = 0
13 | for i in ins:
14 | i.bb = self
15 | if 0x80 <= i.op <= 0x8f: # Special handling for DUP
16 | ridx = i.op - 0x80 - self.stdelta
17 | widx = -1 - self.stdelta
18 | if ridx not in self.stwrites:
19 | self.streads.add(ridx)
20 | self.stwrites.add(widx)
21 | elif 0x90 <= i.op <= 0x9f: # Special handling for SWAP
22 | idx1 = i.op - 0x8f - self.stdelta
23 | idx2 = - self.stdelta
24 | if idx1 not in self.stwrites:
25 | self.streads.add(idx1)
26 | if idx2 not in self.stwrites:
27 | self.streads.add(idx2)
28 | self.stwrites.add(idx1)
29 | self.stwrites.add(idx2)
30 | else: # assume entire stack is affected otherwise
31 | for j in range(i.ins):
32 | idx = j - self.stdelta
33 | if idx not in self.stwrites:
34 | self.streads.add(idx)
35 | for j in range(i.outs):
36 | idx = i.ins - 1 - j - self.stdelta
37 | self.stwrites.add(idx)
38 | self.stdelta += i.delta
39 | self.streads = {x for x in self.streads if x >= 0}
40 | self.stwrites = {x for x in self.stwrites if x >= 0}
41 | self.start = self.ins[0].addr
42 | self.pred = set()
43 | self.succ = set()
44 | self.succ_addrs = set()
45 | self.pred_paths = defaultdict(set)
46 | self.branch = self.ins[-1].op == 0x57
47 | self.indirect_jump = self.ins[-1].op in (0x56, 0x57)
48 | self.ancestors = set()
49 | self.descendants = set()
50 | # maintain a set of 'must_visit' constraints to limit
51 | # backward-slices to only new slices after new edges are added
52 | # initially, no constraint is given (= empty set)
53 | self.must_visit = [set()]
54 | # also maintain an estimate of how fast we can get from here
55 | # to the root of the cfg
56 | # how fast meaning, how many JUMPI-branches we have to take
57 | self.estimate_constraints = (1 if self.branch else 0) if self.start == 0 else None
58 | # and another estimate fo many backwards branches
59 | # we will encounter to the root
60 | self.estimate_back_branches = 0 if self.start == 0 else None
61 |
62 | @property
63 | def jump_resolved(self):
64 | return not self.indirect_jump or len(self.must_visit) == 0
65 |
66 | def update_ancestors(self, new_ancestors):
67 | new_ancestors = new_ancestors - self.ancestors
68 | if new_ancestors:
69 | self.ancestors.update(new_ancestors)
70 | for s in self.succ:
71 | s.update_ancestors(new_ancestors)
72 |
73 | def update_descendants(self, new_descendants):
74 | new_descendants = new_descendants - self.descendants
75 | if new_descendants:
76 | self.descendants.update(new_descendants)
77 | for p in self.pred:
78 | p.update_descendants(new_descendants)
79 |
80 | def update_estimate_constraints(self):
81 | if all(p.estimate_constraints is None for p in self.pred):
82 | return
83 | best_estimate = min(p.estimate_constraints for p in self.pred if p.estimate_constraints is not None)
84 | if self.branch:
85 | best_estimate += 1
86 | if self.estimate_constraints is None or best_estimate < self.estimate_constraints:
87 | self.estimate_constraints = best_estimate
88 | for s in self.succ:
89 | s.update_estimate_constraints()
90 |
91 | def update_estimate_back_branches(self):
92 | if all(p.estimate_back_branches is None for p in self.pred):
93 | return
94 | best_estimate = min(p.estimate_back_branches for p in self.pred if p.estimate_back_branches is not None)
95 | if len(self.pred) > 1:
96 | best_estimate += 1
97 | if self.estimate_back_branches is None or best_estimate != self.estimate_back_branches:
98 | self.estimate_back_branches = best_estimate
99 | for s in self.succ:
100 | s.update_estimate_back_branches()
101 |
102 | def add_succ(self, other, path):
103 | self.succ.add(other)
104 | other.pred.add(self)
105 | self.update_descendants(other.descendants | {other.start})
106 | other.update_ancestors(self.ancestors | {self.start})
107 | other.update_estimate_constraints()
108 | other.update_estimate_back_branches()
109 | other.pred_paths[self].add(tuple(path))
110 | seen = set()
111 | todo = deque()
112 | todo.append(other)
113 | while todo:
114 | bb = todo.popleft()
115 | if bb not in seen:
116 | seen.add(bb)
117 | if bb.indirect_jump:
118 | bb.must_visit.append({self.start})
119 | # logging.debug('BB@%x, must_visit: %s', bb.start, bb.must_visit)
120 | todo.extend(s for s in bb.succ if s not in seen)
121 |
122 | def _find_jump_target(self):
123 | if len(self.ins) >= 2 and 0x60 <= self.ins[-2].op <= 0x71:
124 | self.must_visit = []
125 | return int.from_bytes(self.ins[-2].arg, byteorder='big')
126 | else:
127 | return None
128 |
129 | def get_succ_addrs_full(self, valid_jump_targets):
130 | from src.slicing import slice_to_program, backward_slice
131 | from src.evm.exceptions import ExternalData
132 | from src.memory import UninitializedRead
133 | from src.evm.evm import run
134 | new_succ_addrs = set()
135 | if self.indirect_jump and not self.jump_resolved:
136 | bs = backward_slice(self.ins[-1], [0], must_visits=self.must_visit)
137 | for b in bs:
138 | if 0x60 <= b[-1].op <= 0x7f:
139 | succ_addr = int.from_bytes(b[-1].arg, byteorder='big')
140 | else:
141 | p = slice_to_program(b)
142 | try:
143 | succ_addr = run(p, check_initialized=True).stack.pop()
144 | except (ExternalData, UninitializedRead):
145 | logging.debug('Failed to compute jump target for BB@{}, slice: \n{}'.format(self.start, '\n'.join('\t{}'.format(ins) for ins in b)))
146 | continue
147 | if succ_addr not in valid_jump_targets:
148 | logging.debug('Jump to invalid address')
149 | continue
150 | path = tuple(unique(ins.bb.start for ins in b if ins.bb))
151 | if succ_addr not in self.succ_addrs:
152 | self.succ_addrs.add(succ_addr)
153 | if (path, succ_addr) not in new_succ_addrs:
154 | new_succ_addrs.add((path, succ_addr))
155 | # We did our best,
156 | # if someone finds a new edge, jump_resolved will be set to False by the BFS in add_succ
157 | self.must_visit = []
158 | return self.succ_addrs, new_succ_addrs
159 |
160 | def get_succ_addrs(self, valid_jump_targets):
161 | if self.ins[-1].op in (0x56, 0x57):
162 | jump_target = self._find_jump_target()
163 | if jump_target is not None:
164 | self.indirect_jump = False
165 | if jump_target in valid_jump_targets:
166 | self.succ_addrs.add(jump_target)
167 | else:
168 | self.indirect_jump = True
169 | else:
170 | self.must_visit = []
171 | if self.ins[-1].op not in (0x00, 0x56, 0xf3, 0xfd, 0xfe, 0xff):
172 | fallthrough = self.ins[-1].next_addr
173 | if fallthrough:
174 | self.succ_addrs.add(fallthrough)
175 | return self.succ_addrs
176 |
177 | def __str__(self):
178 | s = 'BB @ %x\tStack %d' % (self.start, self.stdelta)
179 | s += '\n'
180 | s += 'Stackreads: {%s}' % (', '.join(map(str, sorted(self.streads))))
181 | s += '\n'
182 | s += 'Stackwrites: {%s}' % (', '.join(map(str, sorted(self.stwrites))))
183 | if self.pred:
184 | s += '\n'
185 | s += '\n'.join('%x ->' % pred.start for pred in self.pred)
186 | s += '\n'
187 | s += '\n'.join(str(ins) for ins in self.ins)
188 | if self.succ:
189 | s += '\n'
190 | s += '\n'.join(' -> %x' % succ.start for succ in self.succ)
191 | return s
192 |
193 | def __repr__(self):
194 | return str(self)
195 |
196 | def __lt__(self, other):
197 | return self.start < other.start
198 |
--------------------------------------------------------------------------------
/src/cfg/cfg.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from collections import deque
3 | from src.cfg.bb import BB
4 | import src.cfg.rattle as rattle
5 | import tempfile
6 | import subprocess
7 | import os,sys
8 | import time
9 | from collections import defaultdict
10 | from src.evm.exceptions import TimeoutException
11 |
12 |
13 | class CFG(object):
14 | def __init__(self, bbs, fix_xrefs=True, fix_only_easy_xrefs=False):
15 | self.bbs = sorted(bbs)
16 | self._bb_at = {bb.start: bb for bb in self.bbs}
17 | self._ins_at = {i.addr: i for bb in self.bbs for i in bb.ins}
18 | self.root = self._bb_at[0]
19 | self.valid_jump_targets = frozenset({bb.start for bb in self.bbs if bb.ins[0].name == 'JUMPDEST'})
20 | if fix_xrefs or fix_only_easy_xrefs:
21 | try:
22 | self._xrefs(fix_only_easy_xrefs)
23 | except TimeoutException:
24 | raise TimeoutException("Timed out!")
25 | self._dominators = None
26 | self._dd = dict()
27 |
28 | @property
29 | def bb_addrs(self):
30 | return frozenset(self._bb_at.keys())
31 |
32 | def filter_ins(self, names, reachable=False):
33 | if isinstance(names, str):
34 | names = [names]
35 |
36 | if not reachable:
37 | return [ins for bb in self.bbs for ins in bb.ins if ins.name in names]
38 | else:
39 | return [ins for bb in self.bbs for ins in bb.ins if ins.name in names and 0 in bb.ancestors | {bb.start}]
40 |
41 | def _xrefs(self, fix_only_easy_xrefs=False):
42 | # logging.debug('Fixing Xrefs')
43 | self._easy_xrefs()
44 | # logging.debug('Easy Xrefs fixed, turning to hard ones now')
45 | if not fix_only_easy_xrefs:
46 | self._hard_xrefs()
47 | # logging.debug('Hard Xrefs also fixed, good to go')
48 |
49 | def _easy_xrefs(self):
50 | for pred in self.bbs:
51 | for succ_addr in pred.get_succ_addrs(self.valid_jump_targets):
52 | if succ_addr and succ_addr in self._bb_at:
53 | succ = self._bb_at[succ_addr]
54 | pred.add_succ(succ, {pred.start})
55 |
56 | def _hard_xrefs(self):
57 | new_link = True
58 | links = set()
59 | stime=time.time()
60 | while new_link:
61 | new_link = False
62 | for pred in self.bbs:
63 | if not pred.jump_resolved:
64 | succ_addrs, new_succ_addrs = pred.get_succ_addrs_full(self.valid_jump_targets)
65 | for new_succ_path, succ_addr in new_succ_addrs:
66 | if succ_addr not in self._bb_at:
67 | logging.warning(
68 | 'WARNING, NO BB @ %x (possible successor of BB @ %x)' % (succ_addr, pred.start))
69 | continue
70 | succ = self._bb_at[succ_addr]
71 | pred.add_succ(succ, new_succ_path)
72 | if not (pred.start, succ.start) in links:
73 | # logging.debug('found new link from %x to %x', pred.start, succ.start)
74 | # with open('cfg-tmp%d.dot' % len(links), 'w') as outfile:
75 | # outfile.write(self.to_dot())
76 | new_link = True
77 | links.add((pred.start, succ.start))
78 | def data_dependence(self, ins):
79 | if not ins in self._dd:
80 | from src.slicing import backward_slice
81 | self._dd[ins] = set(i for s in backward_slice(ins) for i in s if i.bb)
82 | return self._dd[ins]
83 |
84 | @property
85 | def dominators(self):
86 | if not self._dominators:
87 | self._compute_dominators()
88 | return self._dominators
89 |
90 | def _compute_dominators(self):
91 | import networkx
92 | g = networkx.DiGraph()
93 | for bb in self.bbs:
94 | for succ in bb.succ:
95 | g.add_edge(bb.start, succ.start)
96 | self._dominators = {self._bb_at[k]: self._bb_at[v] for k, v in networkx.immediate_dominators(g, 0).items()}
97 |
98 | def __str__(self):
99 | return '\n\n'.join(str(bb) for bb in self.bbs)
100 |
101 | def to_dot(self, minimal=False):
102 | s = 'digraph g {\n'
103 | s += '\tsplines=ortho;\n'
104 | s += '\tnode[fontname="courier"];\n'
105 | for bb in sorted(self.bbs):
106 | from_block = ''
107 | if self._dominators:
108 | from_block = 'Dominated by: %x
' % self.dominators[bb].start
109 | from_block += 'From: ' + ', '.join('%x' % pred.start for pred in sorted(bb.pred))
110 | if bb.estimate_constraints is not None:
111 | from_block += '
Min constraints from root: %d' % bb.estimate_constraints
112 | if bb.estimate_back_branches is not None:
113 | from_block += '
Min back branches to root: %d' % bb.estimate_back_branches
114 | to_block = 'To: ' + ', '.join('%x' % succ.start for succ in sorted(bb.succ))
115 | ins_block = '
'.join(
116 | '%4x: %02x %s %s' % (ins.addr, ins.op, ins.name, ins.arg.hex() if ins.arg else '') for ins in bb.ins)
117 | # ancestors = 'Ancestors: %s'%(', '.join('%x'%addr for addr in sorted(a for a in bb.ancestors)))
118 | # descendants = 'Descendants: %s' % (', '.join('%x' % addr for addr in sorted(a for a in bb.descendants)))
119 | # s += '\t%d [shape=box,label=<%x:
%s
%s
%s
>];\n' % (
120 | # bb.start, bb.start, ins_block, ancestors, descendants)
121 | if not minimal:
122 | s += '\t%d [shape=box,label=<%s
%x:
%s
%s
>];\n' % (
123 | bb.start, from_block, bb.start, ins_block, to_block)
124 | else:
125 | s += '\t%d [shape=box,label=<%s
>];\n' % (
126 | bb.start, ins_block)
127 | s += '\n'
128 | for bb in sorted(self.bbs):
129 | for succ in sorted(bb.succ):
130 | pths = succ.pred_paths[bb]
131 | if not minimal:
132 | s += '\t%d -> %d [xlabel="%s"];\n' % (
133 | bb.start, succ.start, '|'.join(' -> '.join('%x' % a for a in p) for p in pths))
134 | else:
135 | s += '\t%d -> %d;\n' % (bb.start, succ.start)
136 | if self._dd:
137 | inter_bb = {}
138 | for k, v in self._dd.items():
139 | jbb = k.bb.start
140 | vbbs = {i.bb.start for i in v if i.bb.start != k.bb.start}
141 | if vbbs:
142 | inter_bb[jbb] = vbbs
143 | l = len(inter_bb)
144 | for i, (k, v) in enumerate(inter_bb.items()):
145 | for j in v:
146 | s += '\t%d -> %d[color="%.3f 1.0 1.0", weight=10];\n' % (j, k, (1.0 * i) / l)
147 | s += '\n'
148 | s += '}'
149 | return s
150 |
151 | def trim(self):
152 | keep = set(self.root.descendants)
153 | self.bbs = [bb for bb in self.bbs if bb.start in keep]
154 | delete = set(self._bb_at.keys()) - keep
155 | for addr in delete:
156 | del self._bb_at[addr]
157 |
158 | def to_json(self):
159 | return {'bbs': [{'start': bb.start,
160 | 'succs': [{'start': succ.start, 'paths': list(succ.pred_paths[bb])} for succ in
161 | sorted(bb.succ)]} for bb in sorted(self.bbs)]}
162 |
163 | @staticmethod
164 | def from_json(json_dict, code):
165 | from .disassembly import disass
166 | bbs = list()
167 | for bb_dict in json_dict['bbs']:
168 | bbs.append(BB(list(disass(code, bb_dict['start']))))
169 | cfg = CFG(bbs, fix_xrefs=False)
170 | for bb_dict in json_dict['bbs']:
171 | bb = cfg._bb_at[bb_dict['start']]
172 | for succ_dict in bb_dict['succs']:
173 | succ = cfg._bb_at[succ_dict['start']]
174 | for path in succ_dict['paths']:
175 | bb.add_succ(succ, path)
176 | return cfg
177 |
178 | @staticmethod
179 | def distance_map(ins):
180 | dm = dict()
181 | todo = deque()
182 | todo.append((ins.bb, 0))
183 | while todo:
184 | bb, d = todo.pop()
185 | if not bb in dm or dm[bb] > d:
186 | dm[bb] = d
187 | for p in bb.pred:
188 | todo.append((p, d + 1 if len(p.succ) > 1 else d))
189 | return dm
190 |
191 | """ Added code start here"""
192 |
193 | def to_ssa(self, code:bytes, minimal=False):
194 | sys.setrecursionlimit(10000)
195 |
196 | edges = []
197 | ssa = rattle.Recover(code, edges=edges, split_functions=False)
198 |
199 | for function in ssa.functions:
200 | g = rattle.ControlFlowGraph(function)
201 | t = tempfile.NamedTemporaryFile(suffix='.dot', mode='w')
202 | t.write(g.dot())
203 | t.flush()
204 |
205 | try:
206 | os.makedirs('output')
207 | except:
208 | pass
209 |
210 | out_file = f'output/{function.desc()}.svg'
211 |
212 | subprocess.call(['dot', '-Tsvg', f'-o{out_file}', t.name])
213 | print(f'[+] Wrote {function.desc()} to {out_file}')
214 |
215 |
216 | def edges(self):
217 | edges=[]
218 | for bb in sorted(self.bbs):
219 | for succ in sorted(bb.succ):
220 | edges.append([bb.start, succ.start])
221 | return edges
222 |
223 | def assert_sinks(self):
224 | instructions = {}
225 | assert_bbs=[bb for bb in self.bbs if len(bb.ins)==1 and hex(bb.ins[0].op)=='0xfe']
226 | for bb in assert_bbs:
227 | for pred in bb.pred:
228 | if 'SLOAD' in[ins.name for ins in pred.ins]:
229 | continue
230 | #Avoid cases when validating
231 | if 'CALLDATALOAD' in[ins.name for ins in pred.ins]:
232 | continue
233 |
234 | instructions[pred.ins[-1]]=bb.ins[0]
235 | return instructions
236 |
237 | def call_sinks(self):
238 | instructions = []
239 | call_insn= [ins for bb in self.bbs for ins in bb.ins if ins.name in set(['CALL']) and 0 in bb.ancestors | {bb.start}]
240 | for call_ins in call_insn:
241 | call_succ=[succ.start for succ in self._bb_at[call_ins.bb.start].succ]
242 | if len(call_succ)==0:
243 | instructions.append(call_ins)
244 | continue
245 | call_bb_ins = self._bb_at[call_ins.bb.start].ins
246 | if len(call_succ)==1 and [ins.name for ins in call_bb_ins[-3:]]==['ISZERO','PUSH2','JUMPI']:
247 | continue
248 | if len(call_succ)==2 and [ins.name for ins in call_bb_ins[call_bb_ins.index(call_ins)+1:] if ins.name in set(['ADD','AND','ISZERO','JUMPI'])] ==['ADD','AND','ISZERO','JUMPI']: #propagate throw
249 | continue
250 |
251 | call_ins_index=call_bb_ins.index(call_ins)
252 | if [ins.name for ins in call_bb_ins[call_ins_index+1:call_ins_index+22] if ins.name in set(['ADD','MSTORE','MLOAD','SUB','SHA3'])]==['ADD','MSTORE','ADD','MLOAD','SUB','SHA3']:
253 | continue
254 | if [ins.name for ins in call_bb_ins[call_ins_index+1:call_ins_index+17] if ins.name in set(['ADD','MLOAD','SUB','SHA3'])]==['ADD','MLOAD','SUB','SHA3']:
255 | continue
256 | if [ins.name for ins in call_bb_ins[call_ins_index+1:call_ins_index+12] if ins.name in set(['ADD','MSTORE','SUB'])]==['ADD','SUB','MSTORE']:
257 | continue
258 |
259 | if len([succ.start for succ in self._bb_at[call_ins.bb.start].succ for ins in succ.ins if ins.name in set(['REVERT','INVALID'])])!=0:
260 | continue
261 | if len([succ.start for succ in self._bb_at[call_ins.bb.start].succ if [ins.name for ins in succ.ins]==['PUSH2','JUMP']])!=0:
262 | continue
263 |
264 | min_succ_bb= self._bb_at[min(call_succ)]
265 |
266 | succ_with_call_bb=[succ for succ in self._bb_at[call_ins.bb.start].succ for ins in succ.ins if ins.name in set(['CALL'])]
267 | if (['%x' %ins.op for ins in min_succ_bb.ins][-1] in set(['fd','fe']) or [ins.name for ins in min_succ_bb.ins]==['PUSH2','JUMP']):
268 | continue
269 | if (['%x' %ins.op for ins in min_succ_bb.ins][-1] not in set(['fd','fe']) and [*['0x','0x'],*['%x' %ins.op for ins in min_succ_bb.ins]][-3] not in set(['3e']) and [ins.name for ins in min_succ_bb.ins]!=['PUSH2','JUMP'] and len(succ_with_call_bb)==0):
270 | instructions.append(call_ins)
271 | elif ([*['0x'],*['%x' %ins.op for ins in min_succ_bb.ins]][-3] in set(['3e'])):
272 | call_ret_succ=[succ.start for succ in self._bb_at[min([succ.start for succ in min_succ_bb.succ])].succ]
273 | ret_min_succ_bb= self._bb_at[min(call_ret_succ)]
274 | if (['%x' %ins.op for ins in ret_min_succ_bb.ins][-1] not in set(['fd','fe'])):
275 | instructions.append(call_ins)
276 | elif (len(succ_with_call_bb)>0):
277 | sec_call_succ=[succ.start for succ in succ_with_call_bb[0].succ]
278 | min_sec_call_succ_bb= self._bb_at[min(sec_call_succ)]
279 | if( [ins.name for ins in min_sec_call_succ_bb.ins]!=['PUSH2','JUMP']):
280 | instructions.append(call_ins)
281 |
282 | return instructions
283 |
284 | def find_loops(self, with_calls=False):
285 | import networkx
286 | g = networkx.DiGraph()
287 | for bb in self.bbs:
288 | for succ in bb.succ:
289 | g.add_edge(bb.start,succ.start)
290 | l= list(networkx.simple_cycles(g))
291 | loops=defaultdict(list)
292 | calls_in_loops=defaultdict(list)
293 | loops_with_calls=[]
294 | loops_with_gas_sanitizers =[]
295 | for i in l:
296 | if len([h for h in i if(len(self._bb_at[h].pred))>2])>0:
297 | #print('11')
298 | continue
299 |
300 | loop_bbs=[bb for j in i for bb in self.bbs if bb.start==j]
301 | if len(i) ==1:
302 | continue
303 | head =[succ.start for bb in loop_bbs for succ in bb.succ if succ.start in i if bb.start>succ.start and len([p.start for p in succ.pred if p.start not in i])!=0 and len(succ.succ)<=2 and ('ADD' in [ins.name for ins in bb.ins] or [ins.name for ins in bb.ins[-2:]]==['PUSH2','JUMP'])]
304 |
305 | if len(head)==0:
306 | continue
307 | if len(loops[head[0]]):
308 | loops[head[0]].pop(0)
309 | loops[head[0]].insert(0,len(i))
310 |
311 | back_edge =[[succ.start,bb.start] for bb in loop_bbs for succ in bb.succ if succ.start in i if bb.start>succ.start]
312 |
313 | if len(i)==2:
314 | body_ins=[ins.name for bb in loop_bbs for ins in bb.ins if bb.start!=head[0] and ins.name in ['ADD','SUB','MLOAD','MSTORE','JUMP','SSTORE','EXP','NOT','MUL','PUSH1','POP','SWAP1','DUP2']]
315 | body_start=[bb.start for bb in loop_bbs if bb.start!=head[0]]
316 | if 'MLOAD' in body_ins:
317 | continue
318 | elif body_ins == ['PUSH1','DUP2','PUSH1','SWAP1','SSTORE','POP','PUSH1','ADD','JUMP']:
319 | head_pred=[pred.start for bb in loop_bbs for pred in bb.pred if bb.start==head[0] and pred.start !=body_start[0]]
320 | head_pred1=[pred.start for pred in self._bb_at[head_pred[0]].pred]
321 | head_pred2=[pred.start for pred in self._bb_at[head_pred1[0]].pred]
322 | head_pred_ins=[ins.name for bb in self._bb_at[head[0]].pred for ins in bb.ins if bb.start !=body_start[0] and ins.name in ['ADD','MSTORE','JUMP','SSTORE','SHA3','SLOAD']]
323 | head_pred1_ins=[ins.name for bb in self._bb_at[head_pred[0]].pred for ins in bb.ins if ins.name in ['ADD','MSTORE','JUMP','SSTORE','SHA3','SLOAD']]
324 | if len(head_pred2)==3:
325 | continue
326 | if head_pred1_ins !=['SLOAD','SSTORE','MSTORE','SHA3','ADD','JUMP'] and head_pred_ins !=['SLOAD','SSTORE','MSTORE','SHA3','ADD','JUMP']:
327 | continue
328 | else:
329 | continue
330 |
331 | if len(i)==3:
332 | body_ins=[ins.name for bb in loop_bbs for ins in bb.ins if bb.start not in back_edge[0] and ins.name in ['ADD','SUB','MLOAD','MSTORE','JUMP','SSTORE','EXP','NOT','MUL']]
333 | if body_ins == ['MLOAD','MSTORE'] or body_ins==['ADD','MLOAD','ADD','MSTORE']:
334 | continue
335 |
336 | head_cnt=0
337 | for k in range(i.index(head[0]),len(i)+i.index(head[0])):
338 | indx=k%len(i)
339 | if len([bb for bb in loop_bbs if bb.start==i[indx] and len(bb.succ)!=0 and len([succ for succ in bb.succ if succ in loop_bbs]) ==len(bb.succ)])!=0:
340 | loops[head[0]].append(i[indx])
341 | head_cnt+=1
342 | elif len([bb for bb in loop_bbs if bb.start==i[indx] and len(bb.succ)!=0 and len([succ for succ in bb.succ if succ in loop_bbs]) ==1 and len(bb.succ)==2 and len([succ for succ in bb.succ if self._ins_at[succ.start].op==254])==1])!=0:
343 | loops[head[0]].append(i[indx])
344 | head_cnt+=1
345 | elif len([bb for bb in loop_bbs if bb.start==i[indx] and len(bb.succ)!=0 and len([succ for succ in bb.succ if succ in loop_bbs]) ==1 and len(bb.succ)==2])!=0:
346 | loops[head[0]].append(i[indx])
347 | head_cnt+=1
348 | break
349 |
350 | if not with_calls:
351 | for bb in loop_bbs:
352 | if bb.start in loops[head[0]] and 'GAS' in [ins.name for ins in bb.ins]:
353 | block_ins =[ins.name for ins in bb.ins if ins.name in ['PUSH2','PUSH3','GAS','GT']]
354 | if block_ins ==['PUSH3','GAS','GT'] or block_ins ==['PUSH2','GAS','GT']:
355 | loops_with_gas_sanitizers.append(head[0])
356 | break
357 | elif bb.start not in loops[head[0]] and 'GAS' in [ins.name for ins in bb.ins]:
358 | block_ins =[ins.name for ins in bb.ins if ins.name in ['PUSH2','PUSH3','GAS','GT','JUMPI']]
359 | if block_ins[-5:] == ['PUSH3','GAS','GT','PUSH2','JUMPI'] or block_ins[-5:] == ['PUSH2','GAS','GT','PUSH2','JUMPI']:
360 | loops_with_gas_sanitizers.append(head[0])
361 | break
362 |
363 | if with_calls:
364 | calls_in_loop= False
365 | for bb in loop_bbs:
366 | if bb.start not in loops[head[0]] and 'CALL' in [ins.name for ins in bb.ins]:
367 | call_succ=[succ.start for succ in self._bb_at[bb.start].succ]
368 | if len(call_succ)==2:
369 | if (['%x' %ins.op for ins in self._bb_at[min(call_succ)].ins][-1] in set(['fd','fe'])):
370 | calls_in_loop=True
371 | loops_with_calls.append(head[0])
372 | calls_in_loops[bb.start].append([ins for ins in bb.ins if 'CALL'==ins.name][0])
373 | break
374 | if with_calls and not calls_in_loop and head[0] not in loops_with_calls:
375 | del loops[head[0]]
376 | elif with_calls and not calls_in_loop:
377 | for i in range(0,head_cnt):
378 | loops[head[0]].pop()
379 |
380 | for san in set(loops_with_gas_sanitizers):
381 | if loops.get(san,'nokey')!='nokey':
382 | del loops[san]
383 |
384 | if not with_calls:
385 | return loops
386 | else:
387 | return calls_in_loops
388 |
389 |
390 |
391 |
392 |
--------------------------------------------------------------------------------
/src/cfg/disassembly.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from collections import deque
3 |
4 | from src.cfg.bb import BB
5 | from src.cfg.instruction import Instruction
6 | from src.cfg.opcodes import opcodes
7 |
8 |
9 | class ArgumentTooShort(Exception):
10 | pass
11 |
12 |
13 | def disass(code, i=0):
14 | assert isinstance(code, bytes)
15 | while i < len(code):
16 | loc = i
17 | op = code[i]
18 | arg = None
19 | inslen = 1
20 | if not op in opcodes:
21 | break
22 | # raise IllegalInstruction('%02x at %d'%(op, i))
23 | if 0x60 <= op <= 0x7f:
24 | arglen = op - 0x5f
25 | inslen += arglen
26 | arg = code[i + 1:i + 1 + arglen]
27 | if len(arg) < arglen:
28 | raise ArgumentTooShort
29 | i += arglen
30 | i += 1
31 | yield Instruction(loc, op, arg)
32 | # End basic block on STOP, JUMP, JUMPI, RETURN, REVERT, RAISE, or if the following instruction is a JUMPDEST
33 | if op in (0x00, 0x56, 0x57, 0xf3, 0xfd, 0xfe, 0xff) or (i < len(code) and code[i] == 0x5b):
34 | break
35 |
36 |
37 | def generate_BBs(code):
38 | fallthrough_locs = [i + 1 for i, c in enumerate(code) if c == 0x57]
39 | jumpdest_locs = [i for i, c in enumerate(code) if c == 0x5b]
40 | leader_candidates = {0} | set(fallthrough_locs) | set(jumpdest_locs)
41 | for l in sorted(leader_candidates):
42 | try:
43 | instructions = list(disass(code, l))
44 | if instructions:
45 | yield BB(instructions)
46 | except:
47 | continue
--------------------------------------------------------------------------------
/src/cfg/instruction.py:
--------------------------------------------------------------------------------
1 | from src.cfg.opcodes import opcodes
2 |
3 |
4 | class Instruction(object):
5 | def __init__(self, addr, op, arg=None):
6 | if not (arg is None or isinstance(arg, bytes)):
7 | raise ValueError('Instruction arg must be bytes or None')
8 | assert arg is None or isinstance(arg, bytes)
9 | opinfo = opcodes[op]
10 | inslen = (op - 0x5f) + 1 if 0x60 <= op <= 0x7f else 1
11 | self.addr = addr
12 | self.next_addr = self.addr + inslen
13 | self.op = op
14 | self.name = opinfo[0]
15 | self.arg = arg
16 | self.ins = opinfo[1]
17 | self.outs = opinfo[2]
18 | self.gas = opinfo[3]
19 | self.delta = self.outs - self.ins
20 | self.bb = None
21 |
22 | def __str__(self):
23 | return '(%5d) %4x:\t%02x\t-%d +%d = %d\t%s%s' % (
24 | self.addr, self.addr, self.op, self.ins, self.outs, self.delta, self.name,
25 | '(%d) %s' % (int.from_bytes(self.arg, byteorder='big'), '\t%s' % self.arg.hex()) if self.arg else '')
26 |
27 | def __repr__(self):
28 | return str(self)
29 |
30 | def __hash__(self):
31 | return 17 * self.addr + 19 * self.op + 23 * hash(self.arg)
32 |
33 | def __eq__(self, other):
34 | return (self.addr == other.addr and
35 | self.op == other.op and
36 | self.arg == other.arg)
37 |
--------------------------------------------------------------------------------
/src/cfg/opcodes.py:
--------------------------------------------------------------------------------
1 | # schema: [opcode, ins, outs, gas]
2 | opcodes = {
3 | 0x00: ['STOP', 0, 0, 0],
4 | 0x01: ['ADD', 2, 1, 3],
5 | 0x02: ['MUL', 2, 1, 5],
6 | 0x03: ['SUB', 2, 1, 3],
7 | 0x04: ['DIV', 2, 1, 5],
8 | 0x05: ['SDIV', 2, 1, 5],
9 | 0x06: ['MOD', 2, 1, 5],
10 | 0x07: ['SMOD', 2, 1, 5],
11 | 0x08: ['ADDMOD', 3, 1, 8],
12 | 0x09: ['MULMOD', 3, 1, 8],
13 | 0x0a: ['EXP', 2, 1, 10],
14 | 0x0b: ['SIGNEXTEND', 2, 1, 5],
15 | 0x10: ['LT', 2, 1, 3],
16 | 0x11: ['GT', 2, 1, 3],
17 | 0x12: ['SLT', 2, 1, 3],
18 | 0x13: ['SGT', 2, 1, 3],
19 | 0x14: ['EQ', 2, 1, 3],
20 | 0x15: ['ISZERO', 1, 1, 3],
21 | 0x16: ['AND', 2, 1, 3],
22 | 0x17: ['OR', 2, 1, 3],
23 | 0x18: ['XOR', 2, 1, 3],
24 | 0x19: ['NOT', 1, 1, 3],
25 | 0x1a: ['BYTE', 2, 1, 3],
26 | 0x1b: ['SHL', 2, 1, 3],
27 | 0x1c: ['SHR', 2, 1, 3],
28 | 0x1d: ['SAR', 2, 1, 3],
29 | 0x20: ['SHA3', 2, 1, 30],
30 | 0x30: ['ADDRESS', 0, 1, 2],
31 | 0x31: ['BALANCE', 1, 1, 20], # now 400
32 | 0x32: ['ORIGIN', 0, 1, 2],
33 | 0x33: ['CALLER', 0, 1, 2],
34 | 0x34: ['CALLVALUE', 0, 1, 2],
35 | 0x35: ['CALLDATALOAD', 1, 1, 3],
36 | 0x36: ['CALLDATASIZE', 0, 1, 2],
37 | 0x37: ['CALLDATACOPY', 3, 0, 3],
38 | 0x38: ['CODESIZE', 0, 1, 2],
39 | 0x39: ['CODECOPY', 3, 0, 3],
40 | 0x3a: ['GASPRICE', 0, 1, 2],
41 | 0x3b: ['EXTCODESIZE', 1, 1, 20], # now 700
42 | 0x3c: ['EXTCODECOPY', 4, 0, 20], # now 700
43 | 0x3d: ['RETURNDATASIZE', 0, 1, 2],
44 | 0x3e: ['RETURNDATACOPY', 3, 0, 3],
45 | 0x40: ['BLOCKHASH', 1, 1, 20],
46 | 0x41: ['COINBASE', 0, 1, 2],
47 | 0x42: ['TIMESTAMP', 0, 1, 2],
48 | 0x43: ['NUMBER', 0, 1, 2],
49 | 0x44: ['DIFFICULTY', 0, 1, 2],
50 | 0x45: ['GASLIMIT', 0, 1, 2],
51 | 0x50: ['POP', 1, 0, 2],
52 | 0x51: ['MLOAD', 1, 1, 3],
53 | 0x52: ['MSTORE', 2, 0, 3],
54 | 0x53: ['MSTORE8', 2, 0, 3],
55 | 0x54: ['SLOAD', 1, 1, 50], # 200 now
56 | 0x55: ['SSTORE', 2, 0, 0], # actual cost 5000-20000 depending on circumstance
57 | 0x56: ['JUMP', 1, 0, 8],
58 | 0x57: ['JUMPI', 2, 0, 10],
59 | 0x58: ['PC', 0, 1, 2],
60 | 0x59: ['MSIZE', 0, 1, 2],
61 | 0x5a: ['GAS', 0, 1, 2],
62 | 0x5b: ['JUMPDEST', 0, 0, 1],
63 | 0xa0: ['LOG0', 2, 0, 375],
64 | 0xa1: ['LOG1', 3, 0, 750],
65 | 0xa2: ['LOG2', 4, 0, 1125],
66 | 0xa3: ['LOG3', 5, 0, 1500],
67 | 0xa4: ['LOG4', 6, 0, 1875],
68 | 0xe1: ['SLOADBYTES', 3, 0, 50], # to be discontinued
69 | 0xe2: ['SSTOREBYTES', 3, 0, 0], # to be discontinued
70 | 0xe3: ['SSIZE', 1, 1, 50], # to be discontinued
71 | 0xf0: ['CREATE', 3, 1, 32000],
72 | 0xf1: ['CALL', 7, 1, 40], # 700 now
73 | 0xf2: ['CALLCODE', 7, 1, 40], # 700 now
74 | 0xf3: ['RETURN', 2, 0, 0],
75 | 0xf4: ['DELEGATECALL', 6, 1, 40], # 700 now
76 | 0xf5: ['CALLBLACKBOX', 7, 1, 40],
77 | 0xfa: ['STATICCALL', 6, 1, 40],
78 | 0xfd: ['REVERT', 2, 0, 0],
79 | 0xfe: ['INVALID', 0, 0, 1],
80 | 0xff: ['SELFDESTRUCT', 1, 0, 0], # 5000 now
81 | }
82 |
83 | for i in range(1, 33):
84 | opcodes[0x5f + i] = ['PUSH' + str(i), 0, 1, 3]
85 |
86 | for i in range(1, 17):
87 | opcodes[0x7f + i] = ['DUP' + str(i), i, i + 1, 3]
88 | opcodes[0x8f + i] = ['SWAP' + str(i), i + 1, i + 1, 3]
89 |
90 | reverse_opcodes = {}
91 | for o in opcodes:
92 | vars()[opcodes[o][0]] = opcodes[o]
93 | reverse_opcodes[opcodes[o][0]] = o
94 |
95 | # Non-opcode gas prices
96 | GDEFAULT = 1
97 | GMEMORY = 3
98 | GQUADRATICMEMDENOM = 512 # 1 gas per 512 quadwords
99 | GEXPONENTBYTE = 10 # cost of EXP exponent per byte
100 | GCOPY = 3 # cost to copy one 32 byte word
101 | GCONTRACTBYTE = 200 # one byte of code in contract creation
102 | GCALLVALUETRANSFER = 9000 # non-zero-valued call
103 | GLOGBYTE = 8 # cost of a byte of logdata
104 |
105 | GTXCOST = 21000 # TX BASE GAS COST
106 | GTXDATAZERO = 4 # TX DATA ZERO BYTE GAS COST
107 | GTXDATANONZERO = 68 # TX DATA NON ZERO BYTE GAS COST
108 | GSHA3WORD = 6 # Cost of SHA3 per word
109 | GSHA256BASE = 60 # Base c of SHA256
110 | GSHA256WORD = 12 # Cost of SHA256 per word
111 | GRIPEMD160BASE = 600 # Base cost of RIPEMD160
112 | GRIPEMD160WORD = 120 # Cost of RIPEMD160 per word
113 | GIDENTITYBASE = 15 # Base cost of indentity
114 | GIDENTITYWORD = 3 # Cost of identity per word
115 | GECRECOVER = 3000 # Cost of ecrecover op
116 |
117 | GSTIPEND = 2300
118 |
119 | GCALLNEWACCOUNT = 25000
120 | GSELFDESTRUCTREFUND = 24000
121 |
122 | GSTORAGEBASE = 2500
123 | GSTORAGEBYTESTORAGE = 250
124 | GSTORAGEBYTECHANGE = 40
125 | GSTORAGEMIN = 2500
126 | GSSIZE = 50
127 | GSLOADBYTES = 50
128 |
129 | GSTORAGEREFUND = 15000
130 | GSTORAGEKILL = 5000
131 | GSTORAGEMOD = 5000
132 | GSTORAGEADD = 20000
133 |
134 | GMODEXPQUADDIVISOR = 100
135 | GECADD = 500
136 | GECMUL = 2000
137 |
138 | GPAIRINGBASE = 100000
139 | GPAIRINGPERPOINT = 80000
140 |
141 | EXP_SUPPLEMENTAL_GAS = 40
142 |
143 | # Anti-DoS HF changes
144 | SLOAD_SUPPLEMENTAL_GAS = 150
145 | CALL_SUPPLEMENTAL_GAS = 660
146 | EXTCODELOAD_SUPPLEMENTAL_GAS = 680
147 | BALANCE_SUPPLEMENTAL_GAS = 380
148 | CALL_CHILD_LIMIT_NUM = 63
149 | CALL_CHILD_LIMIT_DENOM = 64
150 | SELFDESTRUCT_SUPPLEMENTAL_GAS = 5000
151 |
152 | memory_writes = {'CALLDATACOPY': (-1, -3), 'CODECOPY': (-1, -3), 'EXTCODECOPY': (-2, -4), 'MSTORE': (-1, 32),
153 | 'MSTORE8': (-1, 8), 'CALL': (-6, -7), 'CALLCODE': (-6, -7), 'DELEGATECALL': (-5, -6)}
154 | memory_reads = {'SHA3': (-1, -2), 'MLOAD': (-1, 32), 'CREATE': (-2, -3), 'CALL': (-4, -5), 'CALLCODE': (-4, -5),
155 | 'RETURN': (-1, -2), 'DELEGATECALL': (-3, -4)}
156 | storage_writes = {'SSTORE': -1}
157 | storage_reads = {'SLOAD': -1}
158 |
159 | potentially_user_controlled = ['ORIGIN', 'CALLER', 'CALLVALUE', 'CALLDATALOAD', 'CALLDATASIZE', 'CALLDATACOPY',
160 | 'EXTCODESIZE', 'EXTCODECOPY', 'MLOAD', 'SLOAD']
161 |
162 | potentially_direct_user_controlled = ['ORIGIN', 'CALLER', 'CALLVALUE', 'CALLDATALOAD', 'CALLDATASIZE', 'CALLDATACOPY',
163 | 'EXTCODESIZE', 'EXTCODECOPY', 'MLOAD' ,'SLOAD']
164 |
165 | ins_in_ac_check = ['CALLER','SLOAD']
166 |
167 | external_data = ['RETURNDATACOPY', 'RETURNDATASIZE', 'EXTCODESIZE', 'EXTCODECOPY']
168 |
169 | CRITICAL = ['CALL', 'DELEGATECALL', 'CALLCODE', 'SELFDESTRUCT']
170 |
171 | # map denoting attacker controlled stack arguments
172 | CRITICAL_ARGS = {
173 | 'CALL': [1],
174 | 'DELEGATECALL': [1],
175 | 'CALLCODE': [1],
176 | 'SELFDESTRUCT': [0],
177 | 'JUMPI': [1],
178 | 'ISZERO': [0],
179 | 'GT':[0,1],
180 | 'LT':[0,1]
181 | }
182 |
--------------------------------------------------------------------------------
/src/cfg/rattle/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | from .analyze import *
5 | from .evmasm import *
6 | from .recover import Recover
7 |
--------------------------------------------------------------------------------
/src/cfg/rattle/analyze.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | from .recover import *
5 |
6 | logger = logging.getLogger(__name__)
7 |
8 |
9 | class UseDefGraph(object):
10 | value: StackValue
11 |
12 | def __init__(self, value: StackValue) -> None:
13 | self.value = value
14 |
15 | def dot(self) -> str:
16 | rv = ''
17 | rv += 'digraph G {\n'
18 |
19 | es = self.edges(self.value)
20 |
21 | for reader in self.value.readers():
22 | reader_s = str(reader).replace('%', '\\%')
23 | value_s = str(self.value).replace('%', '\\%')
24 | es.append(f"\"{value_s}\" -> \"{reader_s}\"")
25 |
26 | rv += '\n'.join(list(set(es)))
27 | rv += '\n}'
28 |
29 | def edges(self, value) -> List[str]:
30 | rv = []
31 | writer = value.writer
32 | if writer is None:
33 | return []
34 |
35 | value_s = str(value).replace('%', '\\%')
36 | writer_s = str(writer).replace('%', '\\%')
37 | rv.append(f"\"{writer_s}\" -> \"{value_s}\"")
38 |
39 | for arg in writer:
40 | arg_s = str(arg).replace('%', '\\%')
41 | writer_s = str(writer).replace('%', '\\%')
42 | rv.append(f"\"{arg_s}\" -> \"{writer_s}\"")
43 | rv.extend(self.edges(arg))
44 |
45 | for reader in writer.return_value.readers():
46 | reader_s = str(reader).replace('%', '\\%')
47 | value_s = str(value).replace('%', '\\%')
48 | rv.append(f"\"{value_s}\" -> \"{reader_s}\"")
49 |
50 | return rv
51 |
52 |
53 | class DefUseGraph(object):
54 | value: StackValue
55 |
56 | def __init__(self, value: StackValue) -> None:
57 | self.value = value
58 |
59 | def dot(self, filt=None) -> str:
60 | if filt is None:
61 | filt = lambda x: True
62 |
63 | rv = ''
64 | rv += 'digraph G {\n'
65 |
66 | es = self.edges(self.value, filt)
67 |
68 | for reader in self.value.readers():
69 | reader_s = str(reader).replace('%', '\\%')
70 | value_s = str(self.value).replace('%', '\\%')
71 | es.append(f"\"{value_s}\" -> \"{reader_s}\"")
72 |
73 | rv += '\n'.join(list(set(es)))
74 | rv += '\n}'
75 |
76 | return rv
77 |
78 | def edges(self, value, filt) -> List[str]:
79 | rv = []
80 | writer = value.writer
81 | if writer is None:
82 | return []
83 |
84 | value_s = str(value).replace('%', '\\%')
85 | writer_s = str(writer).replace('%', '\\%')
86 | rv.append(f"\"{writer_s}\" -> \"{value_s}\"")
87 |
88 | for reader in writer.return_value.readers():
89 | reader_s = str(reader).replace('%', '\\%')
90 | value_s = str(value).replace('%', '\\%')
91 | rv.append(f"\"{value_s}\" -> \"{reader_s}\"")
92 |
93 | if filt(reader):
94 | rv.extend(self.edges(reader.return_value, filt))
95 |
96 | return rv
97 |
98 |
99 | class ControlFlowGraph(object):
100 | def __init__(self, function: SSAFunction) -> None:
101 | self.function = function
102 |
103 | def dot(self) -> str:
104 | rv = ''
105 | rv += 'digraph G {\n'
106 | rv += 'graph [fontname = "consolas"];\n'
107 | rv += 'node [fontname = "consolas"];\n'
108 | rv += 'edge [fontname = "consolas"];\n'
109 |
110 | name = self.function.desc()
111 | hash = f'Hash: {self.function.hash:#x}'
112 | offset = f'Start: {self.function.offset:#x}'
113 | arguments = f'Arguments: {self.function.arguments()}'
114 | storage = f'Storage: {self.function.storage}'
115 | # memory = f'Memory: {self.function.memory}'
116 |
117 | function_desc = [name, hash, offset, arguments, storage]
118 |
119 | rv += f'ff [label="{{' + '\\l'.join(function_desc) + '\\l}}", shape="record" ];'
120 |
121 | edges = []
122 |
123 | for block in self.function:
124 | block_id = f'block_{block.offset}'
125 | block_body = '\\l'.join([f'{insn.offset:#x}: {insn}' for insn in block])
126 | block_body = block_body.replace('<', '\\<').replace('>', '\\>')
127 | block_dot = f'{block_id} [label="{block_body}\\l", shape="record"];'
128 |
129 | fallthrough_label = ''
130 | jump_label = ''
131 | if len(block.jump_edges) > 0 and block.fallthrough_edge:
132 | fallthrough_label = ' [label=" f", color="red"]'
133 | jump_label = ' [label=" t", color="darkgreen"]'
134 |
135 | if block.fallthrough_edge:
136 | target_block_id = f'block_{block.fallthrough_edge.offset}'
137 | edges.append(f'{block_id} -> {target_block_id}{fallthrough_label};')
138 |
139 | for edge in block.jump_edges:
140 | target_block_id = f'block_{edge.offset}'
141 | edges.append(f'{block_id} -> {target_block_id}{jump_label};')
142 |
143 | rv += block_dot + '\n'
144 |
145 | for edge in edges:
146 | rv += edge + '\n'
147 |
148 | rv += '}\n'
149 |
150 | return rv
151 |
--------------------------------------------------------------------------------
/src/cfg/rattle/evmasm.py:
--------------------------------------------------------------------------------
1 | from typing import Optional, Iterable
2 |
3 | import pyevmasm
4 |
5 |
6 | class EVMAsm(object):
7 | '''
8 | EVM Instruction factory
9 |
10 | Example use::
11 |
12 | >>> from manticore.platforms.evm import EVMAsm
13 | >>> EVMAsm.disassemble_one('\\x60\\x10')
14 | Instruction(0x60, 'PUSH', 1, 0, 1, 0, 'Place 1 byte item on stack.', 16, 0)
15 | >>> EVMAsm.assemble_one('PUSH1 0x10')
16 | Instruction(0x60, 'PUSH', 1, 0, 1, 0, 'Place 1 byte item on stack.', 16, 0)
17 | >>> tuple(EVMAsm.disassemble_all('\\x30\\x31'))
18 | (Instruction(0x30, 'ADDRESS', 0, 0, 1, 2, 'Get address of currently executing account.', None, 0),
19 | Instruction(0x31, 'BALANCE', 0, 1, 1, 20, 'Get balance of the given account.', None, 1))
20 | >>> tuple(EVMAsm.assemble_all('ADDRESS\\nBALANCE'))
21 | (Instruction(0x30, 'ADDRESS', 0, 0, 1, 2, 'Get address of currently executing account.', None, 0),
22 | Instruction(0x31, 'BALANCE', 0, 1, 1, 20, 'Get balance of the given account.', None, 1))
23 | >>> EVMAsm.assemble_hex(
24 | ... """PUSH1 0x60
25 | ... BLOCKHASH
26 | ... MSTORE
27 | ... PUSH1 0x2
28 | ... PUSH2 0x100
29 | ... """
30 | ... )
31 | '0x606040526002610100'
32 | >>> EVMAsm.disassemble_hex('0x606040526002610100')
33 | 'PUSH1 0x60\\nBLOCKHASH\\nMSTORE\\nPUSH1 0x2\\nPUSH2 0x100'
34 | '''
35 |
36 | class EVMInstruction(pyevmasm.Instruction):
37 | def __init__(self, opcode: int, name: str, operand_size: int, pops: int, pushes: int, fee: int,
38 | description: str, operand: Optional[int] = None, pc: Optional[int] = 0) -> None:
39 | '''
40 | This represents an EVM instruction.
41 | EVMAsm will create this for you.
42 |
43 | :param opcode: the opcode value
44 | :param name: instruction name
45 | :param operand_size: immediate operand size in bytes
46 | :param pops: number of items popped from the stack
47 | :param pushes: number of items pushed into the stack
48 | :param fee: gas fee for the instruction
49 | :param description: textual description of the instruction
50 | :param operand: optional immediate operand
51 | :param pc: optional program counter of this instruction in the program
52 |
53 | Example use::
54 |
55 | instruction = EVMAsm.assemble_one('PUSH1 0x10')
56 | print 'Instruction: %s'% instruction
57 | print '\tdescription:', instruction.description
58 | print '\tgroup:', instruction.group
59 | print '\tpc:', instruction.pc
60 | print '\tsize:', instruction.size
61 | print '\thas_operand:', instruction.has_operand
62 | print '\toperand_size:', instruction.operand_size
63 | print '\toperand:', instruction.operand
64 | print '\tsemantics:', instruction.semantics
65 | print '\tpops:', instruction.pops
66 | print '\tpushes:', instruction.pushes
67 | print '\tbytes:', '0x'+instruction.bytes.encode('hex')
68 | print '\twrites to stack:', instruction.writes_to_stack
69 | print '\treads from stack:', instruction.reads_from_stack
70 | print '\twrites to memory:', instruction.writes_to_memory
71 | print '\treads from memory:', instruction.reads_from_memory
72 | print '\twrites to storage:', instruction.writes_to_storage
73 | print '\treads from storage:', instruction.reads_from_storage
74 | print '\tis terminator', instruction.is_terminator
75 |
76 |
77 | '''
78 | super().__init__(opcode, name, operand_size, pops, pushes, fee, description, operand, pc)
79 | if operand_size != 0 and operand is not None:
80 | mask = (1 << operand_size * 8) - 1
81 | if ~mask & operand:
82 | raise ValueError("operand should be %d bits long" % (operand_size * 8))
83 |
84 | def __repr__(self) -> str:
85 | output = 'EVMInstruction(0x{:x}, {}, {:d}, {:d}, {:d}, {:d}, {}, {}, {})'.format(
86 | self._opcode, self._name, self._operand_size,
87 | self._pops, self._pushes, self._fee, self._description, self._operand, self._pc)
88 | return output
89 |
90 | def __hash__(self) -> int:
91 | return hash((self._opcode, self._pops, self._pushes, self._pc))
92 |
93 | @property
94 | def is_push(self) -> bool:
95 | return self.semantics == 'PUSH'
96 |
97 | @property
98 | def is_pop(self) -> bool:
99 | return self.semantics == 'POP'
100 |
101 | @property
102 | def is_dup(self) -> bool:
103 | return self.semantics == 'DUP'
104 |
105 | @property
106 | def is_swap(self) -> bool:
107 | return self.semantics == 'SWAP'
108 |
109 | @property
110 | def is_comparison(self) -> bool:
111 | return self.semantics in ('LT', 'GT', 'SLT', 'SGT', 'EQ', 'ISZERO')
112 |
113 | @property
114 | def is_boolean_logic(self) -> bool:
115 | return self.semantics in ('AND', 'OR', 'XOR', 'NOT')
116 |
117 | @staticmethod
118 | def convert_instruction_to_evminstruction(instruction):
119 | return EVMAsm.EVMInstruction(instruction._opcode, instruction._name, instruction._operand_size,
120 | instruction._pops, instruction._pushes, instruction._fee,
121 | instruction._description, instruction._operand, instruction._pc)
122 |
123 | @staticmethod
124 | def assemble_one(assembler: str, pc: int = 0, fork=pyevmasm.DEFAULT_FORK) -> EVMInstruction:
125 | ''' Assemble one EVM instruction from its textual representation.
126 |
127 | :param assembler: assembler code for one instruction
128 | :param pc: program counter of the instruction in the bytecode (optional)
129 | :return: An Instruction object
130 |
131 | Example use::
132 |
133 | >>> print evm.EVMAsm.assemble_one('LT')
134 |
135 |
136 | '''
137 | instruction = pyevmasm.assemble_one(assembler, pc, fork)
138 | return EVMAsm.convert_instruction_to_evminstruction(instruction)
139 |
140 | @staticmethod
141 | def convert_multiple_instructions_to_evminstructions(instructions):
142 | for i in instructions:
143 | yield EVMAsm.convert_instruction_to_evminstruction(i)
144 |
145 | @staticmethod
146 | def assemble_all(assembler: str, pc: int = 0, fork=pyevmasm.DEFAULT_FORK) -> Iterable[EVMInstruction]:
147 | ''' Assemble a sequence of textual representation of EVM instructions
148 |
149 | :param assembler: assembler code for any number of instructions
150 | :param pc: program counter of the first instruction in the bytecode(optional)
151 | :return: An generator of Instruction objects
152 |
153 | Example use::
154 |
155 | >>> evm.EVMAsm.assemble_one("""PUSH1 0x60\n \
156 | PUSH1 0x40\n \
157 | MSTORE\n \
158 | PUSH1 0x2\n \
159 | PUSH2 0x108\n \
160 | PUSH1 0x0\n \
161 | POP\n \
162 | SSTORE\n \
163 | PUSH1 0x40\n \
164 | MLOAD\n \
165 | """)
166 |
167 | '''
168 | instructions = pyevmasm.assemble_all(assembler, pc, fork)
169 | return EVMAsm.convert_multiple_instructions_to_evminstructions(instructions)
170 |
171 | @staticmethod
172 | def disassemble_one(bytecode: Iterable, pc: int = 0, fork=pyevmasm.DEFAULT_FORK) -> EVMInstruction:
173 | ''' Decode a single instruction from a bytecode
174 |
175 | :param bytecode: the bytecode stream
176 | :param pc: program counter of the instruction in the bytecode(optional)
177 | :type bytecode: iterator/sequence/str
178 | :return: an Instruction object
179 |
180 | Example use::
181 |
182 | >>> print EVMAsm.assemble_one('PUSH1 0x10')
183 |
184 | '''
185 | instruction = pyevmasm.disassemble_one(bytecode, pc, fork)
186 | return EVMAsm.convert_instruction_to_evminstruction(instruction)
187 |
188 | @staticmethod
189 | def disassemble_all(bytecode: Iterable, pc: int = 0, fork=pyevmasm.DEFAULT_FORK) -> Iterable[EVMInstruction]:
190 | ''' Decode all instructions in bytecode
191 |
192 | :param bytecode: an evm bytecode (binary)
193 | :param pc: program counter of the first instruction in the bytecode(optional)
194 | :type bytecode: iterator/sequence/str
195 | :return: An generator of Instruction objects
196 |
197 | Example use::
198 |
199 | >>> for inst in EVMAsm.decode_all(bytecode):
200 | ... print inst
201 |
202 | ...
203 | PUSH1 0x60
204 | PUSH1 0x40
205 | MSTORE
206 | PUSH1 0x2
207 | PUSH2 0x108
208 | PUSH1 0x0
209 | POP
210 | SSTORE
211 | PUSH1 0x40
212 | MLOAD
213 |
214 |
215 | '''
216 | instructions = pyevmasm.disassemble_all(bytecode, pc, fork)
217 | return EVMAsm.convert_multiple_instructions_to_evminstructions(instructions)
218 |
219 | @staticmethod
220 | def disassemble(bytecode: Iterable, pc: int = 0, fork=pyevmasm.DEFAULT_FORK) -> str:
221 | ''' Disassemble an EVM bytecode
222 |
223 | :param bytecode: binary representation of an evm bytecode (hexadecimal)
224 | :param pc: program counter of the first instruction in the bytecode(optional)
225 | :type bytecode: str
226 | :return: the text representation of the assembler code
227 |
228 | Example use::
229 |
230 | >>> EVMAsm.disassemble("\x60\x60\x60\x40\x52\x60\x02\x61\x01\x00")
231 | ...
232 | PUSH1 0x60
233 | BLOCKHASH
234 | MSTORE
235 | PUSH1 0x2
236 | PUSH2 0x100
237 |
238 | '''
239 | return pyevmasm.disassemble(bytecode, pc, fork)
240 |
241 | @staticmethod
242 | def assemble(asmcode, pc=0, fork=pyevmasm.DEFAULT_FORK):
243 | return pyevmasm.assemble(asmcode, pc, fork)
244 |
245 | @staticmethod
246 | def disassemble_hex(bytecode: str, pc: int = 0, fork=pyevmasm.DEFAULT_FORK) -> str:
247 | ''' Disassemble an EVM bytecode
248 |
249 | :param bytecode: canonical representation of an evm bytecode (hexadecimal)
250 | :param int pc: program counter of the first instruction in the bytecode(optional)
251 | :type bytecode: str
252 | :return: the text representation of the assembler code
253 |
254 | Example use::
255 |
256 | >>> EVMAsm.disassemble_hex("0x6060604052600261010")
257 | ...
258 | PUSH1 0x60
259 | BLOCKHASH
260 | MSTORE
261 | PUSH1 0x2
262 | PUSH2 0x100
263 |
264 | '''
265 | return pyevmasm.disassemble_hex(bytecode, pc, fork)
266 |
267 | @staticmethod
268 | def assemble_hex(asmcode, pc=0, fork=pyevmasm.DEFAULT_FORK):
269 | return pyevmasm.assemble_hex(asmcode, pc, fork)
270 |
--------------------------------------------------------------------------------
/src/constraints.py:
--------------------------------------------------------------------------------
1 | import copy
2 | import itertools
3 | import logging
4 | from collections import defaultdict
5 |
6 | from z3 import z3, z3util
7 |
8 | from src.evm.exceptions import IntractablePath
9 | from src.evm.state import SymRead, concrete
10 | from src.util.utils import big_endian_to_int, sha3
11 | from src.util.z3_extra_util import get_vars_non_recursive, to_bytes, simplify_non_const_hashes
12 |
13 |
14 | class UnresolvedConstraints(Exception):
15 | def __init__(self, unresolved):
16 | super(UnresolvedConstraints, self).__init__(unresolved)
17 | self.unresolved = unresolved
18 |
19 |
20 | def array_to_array(model, array, length):
21 | return bytes([model.eval(array[i]).as_long() for i in range(length)])
22 |
23 |
24 | def get_level(name):
25 | try:
26 | return int(name[name.rfind('_') + 1:])
27 | except:
28 | return 0
29 |
30 |
31 | def model_to_calls(model, idx_dict):
32 | calls = defaultdict(dict)
33 | for vref in model:
34 | name = vref.name()
35 | v = model[vref]
36 | if name.split('_')[0] not in ('CALLDATASIZE', 'CALLDATA', 'CALLVALUE', 'CALLER', 'ORIGIN'):
37 | continue
38 | call_index = idx_dict[get_level(name)]
39 | call = calls[call_index]
40 | if name.startswith('CALLDATASIZE'):
41 | payload_size = model.eval(v).as_long()
42 | call['payload_size'] = payload_size
43 | elif name.startswith('CALLDATA'):
44 | call['payload_model'] = v
45 | elif name.startswith('CALLVALUE'):
46 | call['value'] = model.eval(v).as_long()
47 | elif name.startswith('CALLER'):
48 | call['caller'] = model.eval(v).as_long()
49 | elif name.startswith('ORIGIN'):
50 | call['origin'] = model.eval(v).as_long()
51 | else:
52 | logging.warning('CANNOT CONVERT %s', name)
53 |
54 | for call in calls.values():
55 | if 'payload_model' not in call:
56 | call['payload'] = bytes()
57 | else:
58 | assert 'payload_size' in call
59 | call['payload'] = array_to_array(model, call['payload_model'], call['payload_size'])
60 | call.pop('payload_size', None)
61 | call.pop('payload_model', None)
62 |
63 | return [v for k, v in sorted(calls.items())]
64 |
65 |
66 | # MAX_SYM_READ_SIZE = 512
67 | MAX_SYM_READ_SIZE = 256
68 |
69 |
70 | def symread_eq(a, b, size=MAX_SYM_READ_SIZE):
71 | if not isinstance(a, SymRead) and not isinstance(b, SymRead):
72 | if a.size() != b.size():
73 | return z3.BoolVal(False)
74 | else:
75 | return a == b
76 | elif isinstance(a, SymRead) and isinstance(b, SymRead):
77 | # both have symbolic size
78 | return z3.And(a.size == b.size,
79 | *(z3.If(z3.ULT(i, a.size), a.memory[a.start + i] == b.memory[b.start + i], True) for i in
80 | range(size)))
81 | else:
82 | if isinstance(b, SymRead):
83 | # ensure that a is the one with symbolic size
84 | a, b = b, a
85 | return z3.And(a.size == (b.size() // 8), z3.Concat(*a.memory.read(a.start, b.size() // 8)) == b)
86 |
87 |
88 | def symread_neq(a, b, size=MAX_SYM_READ_SIZE):
89 | return z3.Not(symread_eq(a, b, size))
90 |
91 |
92 | def symread_substitute(x, subst):
93 | if not isinstance(x, SymRead):
94 | return z3.simplify(z3.substitute(x, subst))
95 | else:
96 | new_symread = copy.copy(x)
97 | new_symread.memory.memory = z3.simplify(z3.substitute(new_symread.memory.memory, subst))
98 | if not concrete(new_symread.start):
99 | new_symread.start = z3.simplify(z3.substitute(new_symread.start, subst))
100 | if not concrete(new_symread.size):
101 | new_symread.size = z3.simplify(z3.substitute(new_symread.size, subst))
102 | return new_symread
103 |
104 |
105 | def check_model_and_resolve(constraints, sha_constraints):
106 | try:
107 | return check_model_and_resolve_inner(constraints, sha_constraints)
108 | except UnresolvedConstraints:
109 | sha_ids = {sha.get_id() for sha in sha_constraints.keys()}
110 | constraints = [simplify_non_const_hashes(c, sha_ids) for c in constraints]
111 | return check_model_and_resolve_inner(constraints, sha_constraints, second_try=True)
112 |
113 | def check_model_and_resolve_inner(constraints, sha_constraints, second_try=False):
114 | # logging.debug('-' * 32)
115 | extra_constraints = []
116 | s = z3.SolverFor("QF_ABV")
117 | s.add(constraints)
118 | if s.check() != z3.sat:
119 | raise IntractablePath("CHECK", "MODEL")
120 | else:
121 | if not sha_constraints:
122 | return s.model()
123 | while True:
124 | ne_constraints = []
125 | for a, b in itertools.combinations(sha_constraints.keys(), 2):
126 | if (not isinstance(sha_constraints[a], SymRead) and not isinstance(sha_constraints[b], SymRead) and
127 | sha_constraints[a].size() != sha_constraints[b].size()):
128 | ne_constraints.append(a != b)
129 | continue
130 | s = z3.SolverFor("QF_ABV")
131 | s.add(constraints + ne_constraints + extra_constraints + [a != b, symread_neq(sha_constraints[a],
132 | sha_constraints[b])])
133 | check_result = s.check()
134 | # logging.debug("Checking hashes %s and %s: %s", a, b, check_result)
135 | if check_result == z3.unsat:
136 | # logging.debug("Hashes MUST be equal: %s and %s", a, b)
137 | subst = [(a, b)]
138 | extra_constraints = [z3.simplify(z3.substitute(c, subst)) for c in extra_constraints]
139 | extra_constraints.append(symread_eq(symread_substitute(sha_constraints[a], subst),
140 | symread_substitute(sha_constraints[b], subst)))
141 | constraints = [z3.simplify(z3.substitute(c, subst)) for c in constraints]
142 | b_val = symread_substitute(sha_constraints[b], subst)
143 | sha_constraints = {z3.substitute(sha, subst): symread_substitute(sha_value, subst) for
144 | sha, sha_value in
145 | sha_constraints.items() if not sha is a or sha is b}
146 | sha_constraints[b] = b_val
147 | break
148 | else:
149 | # logging.debug("Hashes COULD be equal: %s and %s", a, b)
150 | pass
151 | else:
152 | break
153 |
154 | return check_and_model(constraints + extra_constraints, sha_constraints, ne_constraints, second_try=second_try)
155 |
156 |
157 | def check_and_model(constraints, sha_constraints, ne_constraints, second_try=False):
158 | # logging.debug(' ' * 16 + '-' * 16)
159 |
160 | unresolved = set(sha_constraints.keys())
161 | sol = z3.SolverFor("QF_ABV")
162 | sol.add(ne_constraints)
163 | todo = constraints
164 | progress = True
165 | all_vars = dict()
166 | while progress:
167 | new_todo = []
168 | progress = False
169 | for c in todo:
170 | all_vars[c] = get_vars_non_recursive(c, include_select=True, include_indices=False)
171 | if any(x in unresolved for x in all_vars[c]):
172 | new_todo.append(c)
173 | else:
174 | progress = True
175 | sol.add(c)
176 | unresolved_vars = set(v.get_id() for c in new_todo for v in all_vars[c]) | set(v.get_id() for v in unresolved)
177 | # logging.debug("Unresolved vars: %s", ','.join(map(str, unresolved_vars)))
178 | if sol.check() != z3.sat:
179 | raise IntractablePath()
180 | m = sol.model()
181 | unresolved_todo = list(set(unresolved))
182 | while unresolved_todo:
183 | u = unresolved_todo.pop()
184 | c = sha_constraints[u]
185 | if isinstance(c, SymRead):
186 | vars = set()
187 | if not concrete(c.start):
188 | vars |= get_vars_non_recursive(c.start, include_select=True)
189 | if not concrete(c.size):
190 | vars |= get_vars_non_recursive(c.size, include_select=True)
191 | # logging.debug("Trying to resolve %s, start and size vars: %s", u, ','.join(map(str, vars)))
192 | if any(x.get_id() in unresolved_vars for x in vars):
193 | continue
194 | start = c.start
195 | if not concrete(c.start):
196 | tmp = m.eval(c.start)
197 | if not z3util.is_expr_val(tmp):
198 | continue
199 | start = tmp.as_long()
200 | sol.add(c.start == start)
201 | size = c.size
202 | if not concrete(c.size):
203 | tmp = m.eval(c.size)
204 | if not z3util.is_expr_val(tmp):
205 | continue
206 | size = tmp.as_long()
207 | sol.add(c.size == size)
208 |
209 | data = c.memory.read(start, size)
210 | if isinstance(data, list):
211 | if len(data) > 1:
212 | data = z3.Concat(*data)
213 | elif len(data) == 1:
214 | data = data[0]
215 | else:
216 | raise IntractablePath()
217 | sha_constraints = dict(sha_constraints)
218 | sha_constraints[u] = data
219 | unresolved_todo.append(u)
220 | else:
221 | vars = get_vars_non_recursive(c, include_select=True)
222 | # logging.debug("Trying to resolve %s, vars: %s", u, ','.join(map(str, vars)))
223 | if any(x.get_id() in unresolved_vars for x in vars):
224 | continue
225 | v = m.eval(c)
226 | if z3util.is_expr_val(v):
227 | sha = big_endian_to_int(sha3(to_bytes(v)))
228 | sol.add(c == v)
229 | sol.add(u == sha)
230 | unresolved.remove(u)
231 | progress = True
232 | todo = new_todo
233 | if sol.check() != z3.sat:
234 | raise IntractablePath()
235 | if todo:
236 | if second_try:
237 | raise IntractablePath()
238 | raise UnresolvedConstraints(unresolved)
239 | return sol.model()
240 |
241 |
242 | def dependency_summary(constraints, sha_constraints, detailed=False):
243 | all_dependencies = set(x for c in constraints if z3.is_expr(c) for x in
244 | get_vars_non_recursive(z3.simplify(c), include_select=detailed))
245 | changed = True
246 | while changed:
247 | changed = False
248 | for x in set(all_dependencies):
249 | if x in sha_constraints:
250 | changed = True
251 | all_dependencies.discard(x)
252 | all_dependencies.update(
253 | get_vars_non_recursive(z3.simplify(sha_constraints[x], include_select=detailed)))
254 | return all_dependencies
255 |
--------------------------------------------------------------------------------
/src/evm/__init__.py:
--------------------------------------------------------------------------------
1 | from . import evm
2 | from . import exceptions
3 | from . import results
4 | from . import state
5 |
--------------------------------------------------------------------------------
/src/evm/exceptions.py:
--------------------------------------------------------------------------------
1 | class ExternalData(Exception):
2 | pass
3 |
4 |
5 | class SymbolicError(Exception):
6 | pass
7 |
8 |
9 | class IntractablePath(Exception):
10 | def __init__(self, trace=[], remainingpath=[]):
11 | self.trace = tuple(trace)
12 | self.remainingpath = tuple(remainingpath)
13 |
14 |
15 | class VMException(Exception):
16 | pass
17 |
18 | class TimeoutException(Exception):
19 | pass
20 |
--------------------------------------------------------------------------------
/src/evm/results.py:
--------------------------------------------------------------------------------
1 | import itertools
2 |
3 | from z3 import z3
4 |
5 | from src.evm.state import SymRead, LazySubstituteState, translate
6 | from src.util.z3_extra_util import get_vars_non_recursive, concrete, ast_eq
7 |
8 |
9 | class SymbolicResult(object):
10 | def __init__(self, xid, state, constraints, sha_constraints, target_op, pib):
11 | self.xid = xid
12 | self.state = state
13 | self.constraints = constraints
14 | self.sha_constraints = sha_constraints
15 | self.target_op = target_op
16 | self.calls = 1
17 | self._simplified = False
18 | self.storage_info = StorageInfo(self)
19 | self.possible_intended_behavior=pib
20 |
21 | def simplify(self):
22 | if self._simplified:
23 | return
24 | self.constraints = [z3.simplify(c) for c in self.constraints]
25 | self.sha_constraints = {sha: z3.simplify(sha_value) if not isinstance(sha_value, SymRead) else sha_value for
26 | sha, sha_value in self.sha_constraints.items()}
27 | self._simplified = True
28 |
29 | def copy(self):
30 | new_xid = gen_exec_id()
31 |
32 | self.simplify()
33 |
34 | new_constraints = [translate(c, new_xid) for c in self.constraints]
35 | new_sha_constraints = {translate(sha, new_xid): translate(sha_value, new_xid) if not isinstance(sha_value,
36 | SymRead) else sha_value.translate(
37 | new_xid) for sha, sha_value in
38 | self.sha_constraints.items()}
39 | new_state = self.state.copy(new_xid)
40 |
41 | return SymbolicResult(new_xid, new_state, new_constraints, new_sha_constraints, self.target_op)
42 |
43 | def may_read_from(self, other):
44 | return self.storage_info.may_read_from(other.storage_info)
45 |
46 |
47 | class CombinedSymbolicResult(object):
48 | def __init__(self):
49 | self.results = []
50 | self._constraints = None
51 | self._sha_constraints = None
52 | self._states = None
53 | self._idx_dict = None
54 | self.calls = 0
55 |
56 | def _reset(self):
57 | self._constraints = None
58 | self._sha_constraints = None
59 | self._states = None
60 |
61 | def combine(self, storage=dict(), initial_balance=None):
62 | extra_subst = []
63 |
64 | storage_base = z3.K(z3.BitVecSort(256), z3.BitVecVal(0, 256))
65 | for k, v in storage.items():
66 | storage_base = z3.Store(storage_base, k, v)
67 | for result in self.results:
68 | extra_subst.append((result.state.storage.base, storage_base))
69 | storage_base = z3.substitute(result.state.storage.storage, extra_subst)
70 |
71 | extra_constraints = []
72 | if initial_balance is not None:
73 | balance_base = z3.BitVecVal(initial_balance, 256)
74 | else:
75 | balance_base = None
76 | for result in self.results:
77 | if balance_base is not None:
78 | extra_subst.append((result.state.start_balance, balance_base))
79 | balance_base = z3.substitute(result.state.balance, extra_subst)
80 | else:
81 | balance_base = result.state.balance
82 |
83 | self._states = [LazySubstituteState(r.state, extra_subst) for r in self.results]
84 | self._constraints = [z3.substitute(c, extra_subst) for r in self.results for c in
85 | r.constraints] + extra_constraints
86 | self._sha_constraints = {
87 | sha: z3.substitute(sha_value, extra_subst) if not isinstance(sha_value, SymRead) else sha_value for r in
88 | self.results for sha, sha_value in r.sha_constraints.items()}
89 |
90 | self._idx_dict = {r.xid: i for i, r in enumerate(self.results)}
91 |
92 | def prepend(self, result):
93 | self.calls += 1
94 | self.results = [result] + self.results
95 | self._reset()
96 |
97 | @property
98 | def idx_dict(self):
99 | if self._idx_dict is None:
100 | self.combine()
101 | return self._idx_dict
102 |
103 | @property
104 | def constraints(self):
105 | if self._constraints is None:
106 | self.combine()
107 | return self._constraints
108 |
109 | @property
110 | def sha_constraints(self):
111 | if self._sha_constraints is None:
112 | self.combine()
113 | return self._sha_constraints
114 |
115 | @property
116 | def states(self):
117 | if not self._states:
118 | self.combine()
119 | return self._states
120 |
121 | @property
122 | def state(self):
123 | return self.states[-1]
124 |
125 | def simplify(self):
126 | self._constraints = [z3.simplify(c) for c in self.constraints]
127 | self._sha_constraints = {sha: (z3.simplify(sha_value) if not isinstance(sha_value, SymRead) else sha_value) for
128 | sha, sha_value in self.sha_constraints.items()}
129 |
130 |
131 | class StorageInfo(object):
132 | def __init__(self, result):
133 | self.result = result
134 | self._vars = dict()
135 | self.concrete_reads = set()
136 | self.concrete_writes = set()
137 | self.symbolic_reads = set()
138 | self.symbolic_writes = set()
139 | self.symbolic_hash_reads = set()
140 | self.symbolic_hash_writes = set()
141 | for addr in set(result.state.storage.reads):
142 | if concrete(addr):
143 | self.concrete_reads.add(addr)
144 | else:
145 | x_vars = get_vars_non_recursive(addr, True)
146 | self._vars[addr] = x_vars
147 | if set(x_vars) & set(result.sha_constraints.keys()):
148 | self.symbolic_hash_reads.add(addr)
149 | else:
150 | self.symbolic_reads.add(addr)
151 | for addr in set(result.state.storage.writes):
152 | if concrete(addr):
153 | self.concrete_writes.add(addr)
154 | else:
155 | x_vars = get_vars_non_recursive(addr, True)
156 | self._vars[addr] = x_vars
157 | if set(x_vars) & set(result.sha_constraints.keys()):
158 | self.symbolic_hash_writes.add(addr)
159 | else:
160 | self.symbolic_writes.add(addr)
161 |
162 | def may_read_from(self, other):
163 | if not self.symbolic_reads and not other.symbolic_writes:
164 | # no side has a non-hash-based symbolic access
165 | # => only concrete accesses can intersect
166 | # (or hash-based accesses, which we will check later)
167 | if self.concrete_reads & other.concrete_writes:
168 | return True
169 | else:
170 | # at least one side has a non-hash-based symbolic access
171 | # => if there is at least one concrete or symbolic access
172 | # on the other side, the two could be equal
173 | # (otherwise we have to look at hash-based accesses, see below)
174 | if ((self.symbolic_reads or self.concrete_reads or self.symbolic_hash_reads) and
175 | (other.symbolic_writes or other.concrete_writes or other.symbolic_hash_writes)):
176 | return True
177 |
178 | if self.symbolic_hash_reads and other.symbolic_hash_writes:
179 | for a, b in itertools.product(self.symbolic_hash_reads, other.symbolic_hash_writes):
180 | if not ast_eq(a, b):
181 | continue
182 | hash_a = list(self._vars[a] & set(self.result.sha_constraints.keys()))
183 | hash_b = list(other._vars[b] & set(other.result.sha_constraints.keys()))
184 | if len(hash_a) != 1 or len(hash_b) != 1:
185 | # multiple hashes on either side
186 | # => assume they could be equal
187 | return True
188 | # only one hash on either side
189 | # => check whether these two can actually be equal
190 | d_a = self.result.sha_constraints[hash_a[0]]
191 | d_b = other.result.sha_constraints[hash_b[0]]
192 | if isinstance(d_a, SymRead) or isinstance(d_b, SymRead):
193 | return True
194 | if d_a.size() == d_b.size():
195 | return True
196 |
197 | # at this point, we have checked every possible combination
198 | # => no luck this time
199 | return False
200 |
201 |
202 | def gen_exec_id():
203 | if "xid" not in gen_exec_id.__dict__:
204 | gen_exec_id.xid = 0
205 | else:
206 | gen_exec_id.xid += 1
207 | return gen_exec_id.xid
208 |
--------------------------------------------------------------------------------
/src/evm/state.py:
--------------------------------------------------------------------------------
1 | from z3 import z3
2 |
3 | from src.evm.exceptions import SymbolicError
4 | from src.memory import UninitializedRead
5 | from src.util.z3_extra_util import concrete, get_vars_non_recursive
6 |
7 |
8 | class Stack(list):
9 | def __init__(self, *args):
10 | super(Stack, self).__init__(*args)
11 |
12 | def push(self, v):
13 | self.append(v)
14 |
15 | def append(self, v):
16 | if concrete(v):
17 | v %= 2 ** 256
18 | super(Stack, self).append(v)
19 |
20 |
21 | class Memory(object):
22 | def __init__(self, *args):
23 | self.memory = bytearray(*args)
24 | self._check_initialized = False
25 | self.initialized = set()
26 |
27 | def __getitem__(self, index):
28 | if isinstance(index, slice):
29 | initialized = all(i in self.initialized for i in range(index.start or 0, index.stop, index.step or 1))
30 | else:
31 | initialized = index in self.initialized
32 | if not self._check_initialized or initialized:
33 | return self.memory[index]
34 | else:
35 | raise UninitializedRead(index)
36 |
37 | def __setitem__(self, index, v):
38 | if isinstance(index, slice):
39 | for i in range(index.start or 0, index.stop, index.step or 1):
40 | self.initialized.add(i)
41 | else:
42 | self.initialized.add(index)
43 | self.memory[index] = v
44 |
45 | def set_enforcing(self, enforcing=True):
46 | self._check_initialized = enforcing
47 |
48 | def extend(self, start, size):
49 | if len(self.memory) < start + size:
50 | self.memory += bytearray(start + size - len(self.memory))
51 |
52 | def __len__(self):
53 | return len(self.memory)
54 |
55 |
56 | class SymbolicMemory(object):
57 | MAX_SYMBOLIC_WRITE_SIZE = 256
58 |
59 | def __init__(self):
60 | self.memory = z3.K(z3.BitVecSort(256), z3.BitVecVal(0, 8))
61 | self.write_count = 0
62 | self.read_count = 0
63 |
64 | def __getitem__(self, index):
65 | if isinstance(index, slice):
66 | if index.stop is None:
67 | raise ValueError("Need upper memory address!")
68 | if (index.start is not None and not concrete(index.start)) or not concrete(index.stop):
69 | raise SymbolicError("Use mem.read for symbolic range reads")
70 | r = []
71 | for i in range(index.start or 0, index.stop, index.step or 1):
72 | r.append(self[i])
73 | return r
74 | else:
75 | self.read_count += 1
76 | v = z3.simplify(self.memory[index])
77 | if z3.is_bv_value(v):
78 | return v.as_long()
79 | else:
80 | return v
81 |
82 | def __setitem__(self, index, v):
83 | if isinstance(index, slice):
84 | if index.stop is None:
85 | raise ValueError("Need upper memory address!")
86 | if (index.start is not None and not concrete(index.start)) or not concrete(index.stop):
87 | raise SymbolicError("Use mem.write for symbolic range writes")
88 | for j, i in enumerate(range(index.start or 0, index.stop, index.step or 1)):
89 | self[i] = v[j]
90 | else:
91 | self.write_count += 1
92 | if isinstance(v, str):
93 | v = ord(v)
94 |
95 | if concrete(v):
96 | old_v = self[index]
97 | if not concrete(old_v) or old_v != v:
98 | self.memory = z3.Store(self.memory, index, v)
99 | else:
100 | self.memory = z3.Store(self.memory, index, v)
101 |
102 | def read(self, start, size):
103 | if concrete(start) and concrete(size):
104 | return self[start:start + size]
105 | elif concrete(size):
106 | return [self[start + i] for i in range(size)]
107 | else:
108 | sym_mem = SymbolicMemory()
109 | sym_mem.memory = self.memory
110 | return SymRead(sym_mem, start, size)
111 | # raise SymbolicError("Read of symbolic length")
112 |
113 | def copy(self, istart, ilen, ostart, olen):
114 | if concrete(ilen) and concrete(olen):
115 | self.write(ostart, olen, self.read(istart, min(ilen, olen)) + [0] * max(olen - ilen, 0))
116 | elif concrete(olen):
117 | self.write(ostart, olen, [z3.If(i < ilen, self[istart + i], 0) for i in range(olen)])
118 | else:
119 | self.write(ostart, SymbolicMemory.MAX_SYMBOLIC_WRITE_SIZE,
120 | [z3.If(i < olen, z3.If(i < ilen, self[istart + i], 0), self[ostart + i]) for i in
121 | range(SymbolicMemory.MAX_SYMBOLIC_WRITE_SIZE)])
122 |
123 | def write(self, start, size, val):
124 | if not concrete(size):
125 | raise SymbolicError("Write of symbolic length")
126 | if len(val) != size:
127 | raise ValueError("value does not match length")
128 | if concrete(start) and concrete(size):
129 | self[start:start + size] = val
130 | else: # by now we know that size is concrete
131 | for i in range(size):
132 | self[start + i] = val[i]
133 |
134 | def set_enforcing(self, enforcing=True):
135 | pass
136 |
137 | def extend(self, start, size):
138 | pass
139 |
140 |
141 | class SymRead(object):
142 | def __init__(self, memory, start, size):
143 | self.memory = memory
144 | self.start = start
145 | if not concrete(start):
146 | self.start = z3.simplify(self.start)
147 | self.size = size
148 | if not concrete(size):
149 | self.size = z3.simplify(self.size)
150 |
151 | def translate(self, new_xid):
152 | sym_mem_mem = translate(self.memory.memory, new_xid)
153 | sym_mem = SymbolicMemory()
154 | sym_mem.memory = sym_mem_mem
155 | new_symread = SymRead(sym_mem, 0, 0)
156 | new_symread.start = self.start if concrete(self.start) else translate(self.start, new_xid)
157 | new_symread.size = self.size if concrete(self.size) else translate(self.size, new_xid)
158 | return new_symread
159 |
160 |
161 | class SymbolicStorage(object):
162 | def __init__(self, xid):
163 | self.base = z3.Array('STORAGE_%d' % xid, z3.BitVecSort(256), z3.BitVecSort(256))
164 | self.storage = self.base
165 | self.accesses = list()
166 |
167 | def __getitem__(self, index):
168 | self.accesses.append(('read', index if concrete(index) else z3.simplify(index)))
169 | return self.storage[index]
170 |
171 | def __setitem__(self, index, v):
172 | self.accesses.append(('write', index if concrete(index) else z3.simplify(index)))
173 | self.storage = z3.Store(self.storage, index, v)
174 |
175 | @property
176 | def reads(self):
177 | return [a for t, a in self.accesses if t == 'read']
178 |
179 | @property
180 | def writes(self):
181 | return [a for t, a in self.accesses if t == 'write']
182 |
183 | @property
184 | def all(self):
185 | return [a for t, a in self.accesses]
186 |
187 | def copy(self, new_xid):
188 | new_storage = SymbolicStorage(new_xid)
189 | new_storage.base = translate(self.base, new_xid)
190 | new_storage.storage = translate(self.storage, new_xid)
191 | new_storage.accesses = [(t, a if concrete(a) else translate(a, new_xid)) for t, a in self.accesses]
192 | return new_storage
193 |
194 |
195 | class AbstractEVMState(object):
196 | def __init__(self, code=None):
197 | self.code = code or bytearray()
198 | self.pc = 0
199 | self.stack = Stack()
200 | self.memory = None
201 | self.trace = list()
202 | self.gas = None
203 |
204 | class EVMState(AbstractEVMState):
205 | def __init__(self, code=None, gas=0):
206 | super(EVMState, self).__init__(code)
207 | self.memory = Memory()
208 | self.gas = gas
209 |
210 |
211 | class SymbolicEVMState(AbstractEVMState):
212 |
213 | def __init__(self, xid, code=None):
214 | super(SymbolicEVMState, self).__init__(code)
215 | self.memory = SymbolicMemory()
216 | self.storage = SymbolicStorage(xid)
217 | self.gas = z3.BitVec('GAS_%d' % xid, 256)
218 | self.start_balance = z3.BitVec('BALANCE_%d' % xid, 256)
219 | self.balance = self.start_balance
220 |
221 | def copy(self, new_xid):
222 | # Make a superficial copy of this state.
223 | # Effectively, only the storage is copied,
224 | # as this is sufficient to prepend a
225 | # result with this state to another call
226 | new_storage = self.storage.copy(new_xid)
227 | new_state = SymbolicEVMState(new_xid)
228 | new_state.storage = new_storage
229 | new_state.pc = self.pc
230 | new_state.trace = list(self.trace)
231 | new_state.start_balance = translate(self.start_balance, new_xid)
232 | new_state.balance = translate(self.balance, new_xid)
233 | return new_state
234 |
235 |
236 | class LazySubstituteState(object):
237 | def __init__(self, state, substitutions):
238 | self._state = state
239 | self._substitutions = list(substitutions)
240 | self.memory = LazySubstituteMemory(self._state.memory, substitutions)
241 | self.stack = LazySubstituteStack(self._state.stack, substitutions)
242 | self.code = self._state.code
243 | self.pc = self._state.pc
244 | self.trace = self._state.trace
245 | self.balance = z3.substitute(state.balance, substitutions)
246 |
247 |
248 | class LazySubstituteMemory(object):
249 | def __init__(self, memory, substitutions):
250 | self._memory = memory
251 | self._substitutions = substitutions
252 |
253 | def __getitem__(self, index):
254 | raise NotImplemented()
255 |
256 |
257 | class LazySubstituteStack(object):
258 | def __init__(self, stack, substitutions):
259 | self._stack = stack
260 | self._substitutions = substitutions
261 |
262 | def __getitem__(self, index):
263 | r = self._stack[index]
264 | if isinstance(index, slice):
265 | return [x if concrete(x) else z3.substitute(x, self._substitutions) for x in r]
266 | else:
267 | return r if concrete(r) else z3.substitute(r, self._substitutions)
268 |
269 |
270 | def translate(expr, xid):
271 | substitutions = dict()
272 |
273 | def raw(s):
274 | return '_'.join(s.split('_')[:-1])
275 |
276 | for v in get_vars_non_recursive(expr):
277 | if v not in substitutions:
278 | v_name = raw(v.decl().name())
279 | if v.sort_kind() == z3.Z3_INT_SORT:
280 | substitutions[v] = z3.Int('%s_%d' % (v_name, xid))
281 | elif v.sort_kind() == z3.Z3_BOOL_SORT:
282 | substitutions[v] = z3.Bool('%s_%d' % (v_name, xid))
283 | elif v.sort_kind() == z3.Z3_BV_SORT:
284 | substitutions[v] = z3.BitVec('%s_%d' % (v_name, xid), v.size())
285 | elif v.sort_kind() == z3.Z3_ARRAY_SORT:
286 | substitutions[v] = z3.Array('%s_%d' % (v_name, xid), v.domain(), v.range())
287 | else:
288 | raise Exception('CANNOT CONVERT %s (%d)' % (v, v.sort_kind()))
289 | subst = list(substitutions.items())
290 | return z3.substitute(expr, subst)
291 |
--------------------------------------------------------------------------------
/src/exploit.py:
--------------------------------------------------------------------------------
1 | import itertools
2 | import logging
3 | from collections import defaultdict
4 |
5 | from z3 import z3
6 |
7 | from src.cfg import opcodes
8 | from src.constraints import check_model_and_resolve, model_to_calls
9 | from src.evm.exceptions import IntractablePath
10 | from src.evm.results import CombinedSymbolicResult
11 | from src.util.z3_extra_util import concrete
12 |
13 |
14 | class InfeasibleExploit(Exception):
15 | pass
16 |
17 |
18 | class ExploitContext(object):
19 | def __init__(self, target_addr, shellcode_addr, target_amount, amount_check, initial_balance, initial_storage,
20 | controlled_addrs=set()):
21 | self.target_addr = target_addr
22 | self.shellcode_addr = shellcode_addr
23 | self.target_amount = target_amount
24 | self.amount_check = amount_check
25 | self.initial_balance = initial_balance
26 | self.initial_storage = initial_storage
27 |
28 | # assume we control the target address
29 | self.controlled_addrs = controlled_addrs | {target_addr}
30 |
31 |
32 | def exploit_constraints_call(r, ctx):
33 | addr = r.state.stack[-2]
34 | if not concrete(addr):
35 | addr = z3.simplify(addr)
36 |
37 | amount = r.state.stack[-3]
38 | if not concrete(amount):
39 | amount = z3.simplify(amount)
40 |
41 | extra_constraints = []
42 |
43 | if not concrete(addr):
44 | extra_constraints.append(z3.Extract(159, 0, addr) == ctx.target_addr)
45 | else:
46 | if addr != ctx.target_addr:
47 | raise InfeasibleExploit
48 |
49 | if not concrete(amount):
50 | if ctx.amount_check == '+':
51 | extra_constraints.append(z3.UGE(amount, ctx.target_amount))
52 | elif ctx.amount_check == '-':
53 | extra_constraints.append(z3.UGT(amount, 0))
54 | extra_constraints.append(z3.ULE(amount, ctx.target_amount))
55 | else:
56 | extra_constraints.append(amount == ctx.target_amount)
57 | final_balance = r.state.balance
58 | extra_constraints.append(z3.ULE(amount, final_balance))
59 |
60 | # ensure we're not spending more for this exploit than we gain
61 | total_spent = None
62 | for res in r.results:
63 | callvalue = z3.BitVec('CALLVALUE_%d' % res.xid, 256)
64 | extra_constraints.append(z3.ULE(callvalue, 10 * (10 ** 18))) # keep it semi-reasonable: at most 10 Eth per call
65 | if total_spent is None:
66 | total_spent = callvalue
67 | else:
68 | total_spent += callvalue
69 |
70 | extra_constraints.append(z3.ULT(total_spent, amount))
71 |
72 | # also, ensure the contract does not require a unreasonable start-balance (>100 Eth)
73 | if not ctx.initial_balance:
74 | start_balance = z3.BitVec('BALANCE_%d' % r.results[0].xid, 256)
75 | extra_constraints.append(z3.ULE(start_balance, 100 * (10 ** 18)))
76 |
77 | return extra_constraints
78 |
79 |
80 | def exploit_constraints_callcode(r, ctx):
81 | addr = z3.simplify(r.state.stack[-2])
82 |
83 | extra_constraints = []
84 |
85 | if not concrete(addr):
86 | extra_constraints.append(z3.Extract(159, 0, addr) == ctx.shellcode_addr)
87 | else:
88 | if addr != ctx.shellcode_addr:
89 | raise InfeasibleExploit
90 |
91 | return extra_constraints
92 |
93 |
94 | def exploit_constraints_delegatecall(r, ctx):
95 | addr = z3.simplify(r.state.stack[-2])
96 |
97 | extra_constraints = []
98 |
99 | if not concrete(addr):
100 | extra_constraints.append(z3.Extract(159, 0, addr) == ctx.shellcode_addr)
101 | else:
102 | if addr != ctx.shellcode_addr:
103 | raise InfeasibleExploit
104 |
105 | return extra_constraints
106 |
107 |
108 | def exploit_constraints_selfdestruct(r, ctx):
109 | addr = z3.simplify(r.state.stack[-1])
110 |
111 | extra_constraints = []
112 |
113 | if not concrete(addr):
114 | extra_constraints.append(z3.Extract(159, 0, addr) == ctx.target_addr)
115 | else:
116 | if addr != ctx.target_addr:
117 | raise InfeasibleExploit
118 |
119 | return extra_constraints
120 |
121 |
122 | EXPLOIT_CONSTRAINTS = {
123 | 'CALL': exploit_constraints_call,
124 | 'CALLCODE': exploit_constraints_callcode,
125 | 'DELEGATECALL': exploit_constraints_callcode,
126 | 'SELFDESTRUCT': exploit_constraints_selfdestruct
127 | }
128 |
129 |
130 | def get_exploit_constraints(r, ctx):
131 | target_op = r.results[-1].target_op
132 | if target_op in EXPLOIT_CONSTRAINTS:
133 | return EXPLOIT_CONSTRAINTS[target_op](r, ctx)
134 | else:
135 | return []
136 |
137 | def control_address_constraints(sym_addr, controlled_addrs):
138 | sub_exprs = [sym_addr == controlled_addr for controlled_addr in controlled_addrs]
139 | expr = sub_exprs[0]
140 | for sub_expr in sub_exprs[1:]:
141 | expr = z3.Or(expr, sub_expr)
142 | return expr
143 |
144 | def attempt_exploit(results, ctx):
145 | c = CombinedSymbolicResult()
146 | for r in results[::-1]:
147 | c.prepend(r)
148 | c.combine(ctx.initial_storage, ctx.initial_balance)
149 | c.simplify()
150 | extra_constraints = get_exploit_constraints(c, ctx)
151 |
152 | for res in c.results:
153 | origin = z3.BitVec('ORIGIN_%d' % res.xid, 256)
154 | caller = z3.BitVec('CALLER_%d' % res.xid, 256)
155 | # ensure we control the origin
156 | extra_constraints.append(control_address_constraints(origin, ctx.controlled_addrs))
157 | # and ensure the caller is either the origin or the shellcode address
158 | extra_constraints.append(control_address_constraints(caller, {origin, ctx.shellcode_addr}))
159 |
160 | try:
161 | model = check_model_and_resolve(c.constraints + extra_constraints, c.sha_constraints)
162 |
163 | # enforce we control all ORIGIN-addresses
164 | if any(model[v].as_long() not in ctx.controlled_addrs for v in model if v.name().startswith('ORIGIN')):
165 | raise InfeasibleExploit
166 |
167 | return model_to_calls(model, c.idx_dict), c, model
168 | except IntractablePath:
169 | raise InfeasibleExploit
170 |
171 |
172 | def combined_exploit(p, target_addr, shellcode_addr, target_amount, amount_check='+', initial_storage=dict(),
173 | initial_balance=None,
174 | max_calls=3, controlled_addrs=set(), flags=None):
175 |
176 | flags = flags or set(opcodes.CRITICAL)
177 |
178 | ctx = ExploitContext(target_addr, shellcode_addr, target_amount, amount_check, initial_balance, initial_storage,
179 | controlled_addrs)
180 |
181 | sload_bbs = {ins.bb.start for ins in p.cfg.filter_ins('SLOAD')}
182 | critical_paths = []
183 |
184 | for op in opcodes.CRITICAL:
185 | if op not in flags:
186 | continue
187 | ins = p.cfg.filter_ins(op)
188 | if not ins:
189 | logging.info('No %s instructions', op)
190 | continue
191 | logging.info('Found %d %s instructions', len(ins), op)
192 | for i, i_path, i_r in p.get_constraints(ins, opcodes.CRITICAL_ARGS[op]):
193 | logging.info("%s: %s", op, i)
194 | logging.info("Path: %s", '->'.join('%x' % p for p in i_path))
195 | if set(i_path) & sload_bbs:
196 | # if there is a SLOAD on this path,
197 | # it might benefit from prepending a state-changing path later
198 | critical_paths.append(i_r)
199 | try:
200 | return attempt_exploit([i_r], ctx)
201 | except InfeasibleExploit:
202 | continue
203 | if not critical_paths:
204 | logging.warning("No state-dependent critical path found, aborting")
205 | return
206 |
207 | end_ins = p.cfg.filter_ins('RETURN') + p.cfg.filter_ins('STOP')
208 | if not end_ins:
209 | logging.info('No RETURN or STOP instructions')
210 | return
211 | logging.info('Found %d RETURN and STOP instructions', len(end_ins))
212 | compatible = defaultdict(lambda: [[]]) # list of lists
213 | state_changing_paths = []
214 | for i, (end, end_path, state_changing_r) in enumerate(p.get_constraints(end_ins, find_sstore=True)):
215 | logging.info("End: %s", end)
216 | logging.info("Path: %s", '->'.join('%x' % p for p in end_path))
217 | state_changing_paths.append(state_changing_r)
218 | for j, critical_r in enumerate(critical_paths):
219 | if not critical_r.may_read_from(state_changing_r):
220 | continue
221 | compatible[j][0].append(i)
222 | try:
223 | return attempt_exploit([state_changing_r, critical_r], ctx)
224 | except InfeasibleExploit:
225 | continue
226 |
227 | logging.info('All ends: %s', state_changing_paths)
228 |
229 | storage_compatible = defaultdict(list)
230 | for (i, a_r), (j, b_r) in itertools.product(enumerate(state_changing_paths), enumerate(state_changing_paths)):
231 | if a_r.may_read_from(b_r):
232 | storage_compatible[i].append(j)
233 |
234 | calls = [state_changing_paths]
235 | while len(calls) < max_calls - 1:
236 | new_ends = [r.copy() for r in state_changing_paths]
237 | calls.append(new_ends)
238 | for k, v in compatible.items():
239 | new_compat = set()
240 | for c in v[-1]:
241 | new_compat.update(storage_compatible[c])
242 | v.append(sorted(new_compat))
243 | for i, critical_r in enumerate(critical_paths):
244 | for combo_ids in itertools.product(*compatible[i]):
245 | combo = [critical_r] + [c[j] for c, j in zip(calls, combo_ids)]
246 | try:
247 | return attempt_exploit(combo[::-1], ctx)
248 | except InfeasibleExploit:
249 | continue
250 |
251 | logging.info('Could not exploit any RETURN+CALL')
252 |
--------------------------------------------------------------------------------
/src/explorer/__init__.py:
--------------------------------------------------------------------------------
1 | from . import backward
2 | from . import forward
--------------------------------------------------------------------------------
/src/explorer/backward.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from collections import defaultdict
3 | from queue import PriorityQueue
4 | from src.util.frontierset import FrontierSet
5 |
6 |
7 | class BackwardExplorerState(object):
8 | def __init__(self, bb, gas, must_visit, cost, data):
9 | self.bb = bb
10 | self.gas = gas
11 | self.must_visit = must_visit.copy()
12 | self.data = data
13 | self.cost = cost
14 |
15 | def estimate(self):
16 | """
17 | Return an estimate of how quickly we can reach the root of the tree
18 | This estimate is the sum of the number of branches taken so far (self.cost) and the
19 | estimate given by the next BB to visit (self.bb.estimate)
20 | :return: estimated distance to root
21 | """
22 | if self.bb.estimate_constraints is None:
23 | return self.cost
24 | else:
25 | return self.cost + self.bb.estimate_constraints
26 |
27 | def rank(self):
28 | """
29 | Compute a rank for this state. Order by estimated root-distance first, solve ties by favoring less restricted states
30 | for caching efficiency
31 | :return:
32 | """
33 | return self.estimate(), len(self.must_visit)
34 |
35 | def __lt__(self, other):
36 | return self.rank() < other.rank()
37 |
38 | def __hash__(self):
39 | return sum(a * b for a, b in zip((23, 29, 31), (hash(self.bb), hash(self.must_visit), hash(self.data))))
40 |
41 | def __eq__(self, other):
42 | return self.bb == other.bb and self.must_visit == other.must_visit and self.data == other.data
43 |
44 | def __str__(self):
45 | return 'At: %x, Gas: %s, Must-Visit: %s, Data: %s, Hash: %x' % (
46 | self.bb.start, self.gas, self.must_visit, self.data, hash(self))
47 |
48 |
49 | def generate_sucessors(state, new_data, update_data, predicate=lambda st, pred: True):
50 | new_todo = []
51 | if state.gas is None or state.gas > 0:
52 | # logging.debug('[tr] [gs] passed first if')
53 | new_gas = state.gas
54 | if state.gas and len(state.bb.pred) > 1:
55 | new_gas = state.gas - 1
56 | # logging.debug('[tr] [gs] Preds: %s', state.bb.pred)
57 |
58 | for p in state.bb.pred:
59 | if not predicate(state.data, p):
60 | continue
61 |
62 | new_must_visits = []
63 | for path in state.bb.pred_paths[p]:
64 | new_must_visit = state.must_visit.copy()
65 | for a, b in zip(path[:-1], path[1:]):
66 | new_must_visit.add(b, a)
67 | if p.start in new_must_visit.frontier:
68 | new_must_visit.remove(p.start)
69 | if not new_must_visit.all.issubset(p.ancestors):
70 | # logging.debug('[tr] [gs] Cannot reach any necessary states, aborting! Needed: %s, reachable: %s', new_must_visit, p.ancestors)
71 | continue
72 | new_must_visits.append(new_must_visit)
73 |
74 | new_cost = state.cost + (1 if p.branch else 0)
75 |
76 | for new_must_visit in minimize(new_must_visits):
77 | new_todo.append(BackwardExplorerState(p, new_gas, new_must_visit, new_cost, update_data(new_data, p)))
78 | return new_todo
79 |
80 |
81 | def traverse_back(start_ins, initial_gas, initial_data, advance_data, update_data, finish_path, must_visits=[],
82 | predicate=lambda st, p: True):
83 | """
84 | :param start_ins: Starting instructions
85 | :param initial_gas: Starting "gas". Can be None, in which case it is unlimited
86 | :param initial_data: Starting data
87 | :param advance_data: method to advance data
88 | :param update_data: method to update data
89 | :param must_visits: FrontierSet describing the next nodes that *must* be visited
90 | :param predicate: A function (state, BB) -> Bool describing whether an edge should be taken or not
91 | :return: yields paths as they are explored one-by-one
92 | """
93 | todo = PriorityQueue()
94 |
95 | for ins in start_ins:
96 | # logging.debug('[tr] Starting traversal at %x', ins.addr)
97 | data = initial_data(ins)
98 | bb = ins.bb
99 | gas = initial_gas
100 | # keep tuples of (len(must_visit), state)
101 | # this way, the least restricted state are preferred
102 | # which should maximize caching efficiency
103 | if not must_visits:
104 | must_visits = [FrontierSet()]
105 | for must_visit in minimize(FrontierSet(mv) if mv is not FrontierSet else mv for mv in must_visits):
106 | ts = BackwardExplorerState(bb, gas, must_visit, 0, data)
107 | todo.put(ts)
108 | cache = set()
109 | ended_prematurely = defaultdict(int)
110 | while not todo.empty():
111 | state = todo.get()
112 | # if this BB can be reached via multiple paths, check if we want to cache it
113 | # or whether another path already reached it with the same state
114 | if len(state.bb.succ) > 1:
115 | if state in cache:
116 | # logging.debug('[tr] CACHE HIT')
117 | continue
118 | cache.add(state)
119 | # logging.debug('[tr] Cachesize: %d\t(slicing %x, currently at %x)', len(cache), ins.addr, state.bb.start)
120 | # logging.debug('[tr] Current state: %s', state)
121 | new_data = advance_data(state.data)
122 | if finish_path(new_data):
123 | # logging.debug('[tr] finished path (%s)', new_data)
124 | yield new_data
125 | else:
126 | if state.gas is not None and state.bb.estimate_back_branches is not None and (state.gas == 0 or state.gas < state.bb.estimate_back_branches):
127 | ended_prematurely[state.bb.start] += 1
128 | else:
129 | logging.debug('[tr] continuing path (%s)', new_data)
130 | new_todo = generate_sucessors(state, new_data, update_data, predicate=predicate)
131 | for nt in new_todo:
132 | todo.put(nt)
133 | total_ended = sum(ended_prematurely.values())
134 | if total_ended:
135 | logging.debug("%d paths that ended prematurely due to branches: %s", total_ended,
136 | ', '.join('%x: %d' % (k, v) for k, v in ended_prematurely.items()))
137 | else:
138 | logging.debug("Finished all paths")
139 |
140 |
141 | def minimize(must_visits):
142 | todo = sorted(must_visits, key=len)
143 | while todo:
144 | must_visit = todo[0]
145 | yield must_visit
146 | todo = [mv for mv in todo[1:] if not must_visit.issubset(mv)]
147 |
--------------------------------------------------------------------------------
/src/explorer/forward.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from queue import PriorityQueue
3 |
4 | from src.util.utils import is_subseq, is_substr
5 |
6 |
7 | class ForwardExplorerState(object):
8 | def __init__(self, bb, path=None, branches=None, slices=None):
9 | self.bb = bb
10 | self.path = list(path) + [bb.start] or []
11 | self.seen = set(self.path)
12 | self.branches = branches or 0
13 | self.slices = []
14 | self.finished = set()
15 | #logging.info('Path %s', ' -> '.join('%x' % p for p in self.path))
16 | for slice in slices:
17 | last_pc = None
18 | #print('%x' % self.bb.start)
19 | while slice and slice[0].bb.start == self.bb.start:
20 | if last_pc is None or slice[0].addr > last_pc:
21 | last_pc = slice[0].addr
22 | if len(slice) == 1:
23 | self.finished.add(last_pc)
24 | slice = slice[1:]
25 | else:
26 | break
27 | self.slices.append(slice)
28 |
29 |
30 | def next_states(self):
31 | possible_succs = []
32 | for succ in self.bb.succ:
33 | pths = succ.pred_paths[self.bb]
34 | for pth in pths:
35 | if not set(pth).issubset(self.seen):
36 | continue
37 | if not is_subseq(pth, self.path):
38 | continue
39 | break
40 | else:
41 | continue
42 | possible_succs.append(succ)
43 | next_states = []
44 | branches = self.branches
45 | if len(possible_succs) > 1:
46 | branches += 1
47 | for succ in possible_succs:
48 | next_slices = tuple(
49 | s for s in self.slices if set(i.bb.start for i in s).issubset(succ.descendants | {succ.start}))
50 | if next_slices:
51 | next_states.append(ForwardExplorerState(succ, self.path, branches, next_slices))
52 | return next_states
53 |
54 | def __lt__(self, other):
55 | return self.weight < other.weight
56 |
57 | class ForwardExplorer(object):
58 | def __init__(self, cfg, avoid=frozenset()):
59 | self.dist_map = dict()
60 | self.cfg = cfg
61 | self.blacklist = set()
62 |
63 | def add_to_blacklist(self, path):
64 | self.blacklist.add(tuple(path))
65 |
66 | def weight(self, state):
67 | if state.finished:
68 | return state.branches
69 | else:
70 | return state.branches + min(self.dist_map[s[0].bb.start][state.bb] for s in state.slices)
71 |
72 | def find(self, slices, looplimit=2, avoid=frozenset(), prefix=None):
73 | avoid = frozenset(avoid)
74 | slices = tuple(tuple(i for i in s if i.bb) for s in slices)
75 |
76 | if not slices:
77 | #raise StopIteration
78 | return
79 | # distance from a BB to instruction
80 | for slice in slices:
81 | for i in slice:
82 | if i.bb.start not in self.dist_map:
83 | self.dist_map[i.bb.start] = self.cfg.distance_map(i)
84 | #print('%x' %i.bb.start)
85 | #print(['%x' %d.start for d in self.dist_map[i.bb.start]])
86 |
87 | if prefix is None:
88 | state = ForwardExplorerState(self.cfg.root, [], 0, slices)
89 | else:
90 | state = ForwardExplorerState(self.cfg._ins_at[prefix].bb, prefix, 0, slices)
91 | state.weight = self.weight(state)
92 |
93 | todo = PriorityQueue()
94 | todo.put(state)
95 |
96 | while not todo.empty():
97 | state = todo.get()
98 | if any(is_substr(pth, state.path) for pth in self.blacklist):
99 | logging.info("BLACKLIST hit for %s" % (', '.join('%x' % i for i in state.path)))
100 | continue
101 | if set(i.name for i in state.bb.ins) & avoid:
102 | continue
103 | if state.finished:
104 | for last_pc in state.finished:
105 | yield state.path + [last_pc]
106 | state.finished = set()
107 | state.slices = tuple(s for s in state.slices if s)
108 | if not state.slices:
109 | continue
110 | if state.path.count(state.bb.start) > looplimit:
111 | continue
112 | for next_state in state.next_states():
113 | next_state.weight = self.weight(next_state)
114 | todo.put(next_state)
115 |
--------------------------------------------------------------------------------
/src/flow/__init__.py:
--------------------------------------------------------------------------------
1 | from . import tainting
2 | from . import symbolic
3 | from . import analysis_results
4 | from . import code_info
5 |
--------------------------------------------------------------------------------
/src/flow/analysis_results.py:
--------------------------------------------------------------------------------
1 |
2 | class TainitAnalysisResult(object):
3 | def __init__(self, state, defect_type, target_sink, tainted, sources, sload_sha3_bases, sstore_sha3_bases, sstore_slots, slot_live_access, slot_access_trace, storage_slot_type):
4 | self.state = state
5 | self.defect_type=defect_type
6 | self.target_sink = target_sink
7 | self._tainted = tainted
8 | self.sources = sources
9 | self.sload_sha3_bases = sload_sha3_bases
10 | self.sstore_sha3_bases = sstore_sha3_bases
11 | self.sstore_slots = sstore_slots
12 | self.slot_live_access = slot_live_access
13 | self.slot_access_trace =slot_access_trace
14 | self.storage_slot_type = storage_slot_type
15 |
16 | class TainitAnalysisBugDetails(object):
17 | def __init__(self,unbounded_loops, fun_call_restr,loops_with_calls, gas_griefing, hardcoded_gas, asserts, slot_live_access, temp_slots):
18 | self.unbounded_loops = unbounded_loops
19 | self.fun_call_restr =fun_call_restr
20 | self.loops_with_calls = loops_with_calls
21 | self.gas_griefing = gas_griefing
22 | self.hardcoded_gas = hardcoded_gas
23 | self.asserts = asserts
24 | self.slot_live_access = slot_live_access
25 | self.temp_slots = temp_slots
26 |
27 | class AnalysisBugDetails(object):
28 | def __init__(self,violated_ac_checks,missing_ac_checks,violated_ac_checks_ib):
29 | self.violated_ac_checks = violated_ac_checks
30 | self.missing_ac_checks=missing_ac_checks
31 | self.violated_ac_checks_ib = violated_ac_checks_ib
32 |
33 |
--------------------------------------------------------------------------------
/src/flow/code_info.py:
--------------------------------------------------------------------------------
1 | import os, sys
2 | import logging
3 |
4 | def get_function_sig(cfg,path, type='name'):
5 | #bbs= {bb.start:ins.arg.hex() for bb in p.cfg.bbs for ins in bb.ins if bb.start in path[:-1] and ins.name =='PUSH4' and p.cfg._ins_at[ins.addr+ins.op-0x5f+1].name=='EQ'}
6 | logging.debug("Path: %s", '->'.join('%x' % p for p in path))
7 | bbs= {bb:ins.arg.hex() for bb in path[:-1] for ins in cfg._bb_at[bb].ins if ins.name =='PUSH4' and ins.arg.hex() !='ffffffff' and cfg._ins_at[ins.addr+ins.op-0x5f+1].name=='EQ' and int.from_bytes(cfg._ins_at[ins.addr+ins.op-0x5f+2].arg,'big')==path[path.index(bb)+1]}
8 | other_bbs= {bb:ins.arg.hex() for bb in path[:-1] for ins in cfg._bb_at[bb].ins if ins.name =='PUSH4' and ins.arg.hex() !='ffffffff' and ins.addr+ins.op-0x5f+2 in cfg._ins_at and cfg._ins_at[ins.addr+ins.op-0x5f+2].name=='EQ' and int.from_bytes(cfg._ins_at[ins.addr+ins.op-0x5f+3].arg,'big')==path[path.index(bb)+1]}
9 | bbs.update(other_bbs)
10 | #print(bbs)
11 | bbs_indices=[path.index(bb) for bb in bbs.keys()]
12 | if len(bbs_indices)!=0 and type=='name':
13 | with open(os.path.join(os.path.join(os.getcwd(),"src/flow"),"FSignatures.txt"), 'r') as f:
14 | fsig=dict(x.rstrip().split(None,1) for x in f)
15 | return fsig.get('0x'+str(bbs[path[max(bbs_indices)]]),bbs[path[max(bbs_indices)]])
16 | elif len(bbs_indices)==0 and type=='name':
17 | return '() payable'
18 | elif len(bbs_indices)!=0 and type=='id':
19 | return str(bbs[path[max(bbs_indices)]])
20 | elif len(bbs_indices)==0 and type=='id':
21 | return '0'
22 | elif len(bbs_indices)!=0 and type=='bb':
23 | return path[max(bbs_indices)]
24 | elif len(bbs_indices)==0 and type=='bb':
25 | return 0
26 |
27 | def function_restricted_caller(p, path):
28 | bbs_check_caller= [bb.start for bb in p.cfg.bbs for ins in bb.ins if bb.start in path[:-2] and len(bb.succ_addrs)>=2 and ins.name in ['CALLER', 'ORIGIN'] and (bb.ins[bb.ins.index(ins)+3].name=='EQ' or bb.ins[bb.ins.index(ins)+1].name=='EQ' or bb.ins[-3].name=='EQ')]
29 | if len(bbs_check_caller)!=0:
30 | return True
31 | return False
32 |
--------------------------------------------------------------------------------
/src/flow/symbolic.py:
--------------------------------------------------------------------------------
1 | import itertools
2 | import logging
3 | from collections import defaultdict
4 | #from py import code
5 |
6 | from z3 import z3
7 | from src import cfg
8 |
9 | from src.cfg import opcodes
10 | from src.constraints import check_model_and_resolve, model_to_calls
11 | from src.evm.exceptions import IntractablePath, TimeoutException
12 | from src.evm.results import CombinedSymbolicResult
13 | from src.util.z3_extra_util import concrete
14 | from src.flow import code_info as cinfo
15 |
16 | class InfeasibleExploit(Exception):
17 | pass
18 |
19 |
20 | class ExploitContext(object):
21 | def __init__(self, target_addr, shellcode_addr, target_amount, amount_check, initial_balance, initial_storage,
22 | controlled_addrs=set()):
23 | self.target_addr = target_addr
24 | self.shellcode_addr = shellcode_addr
25 | self.target_amount = target_amount
26 | self.amount_check = amount_check
27 | self.initial_balance = initial_balance
28 | self.initial_storage = initial_storage
29 |
30 | # assume we control the target address
31 | self.controlled_addrs = controlled_addrs | {target_addr}
32 |
33 |
34 | def exploit_constraints_call(r, ctx):
35 | addr = r.state.stack[-2]
36 | if not concrete(addr):
37 | addr = z3.simplify(addr)
38 |
39 | amount = r.state.stack[-3]
40 | if not concrete(amount):
41 | amount = z3.simplify(amount)
42 |
43 | extra_constraints = []
44 |
45 | if not concrete(addr):
46 | extra_constraints.append(z3.Extract(159, 0, addr) == ctx.target_addr)
47 | else:
48 | if addr != ctx.target_addr:
49 | raise InfeasibleExploit
50 |
51 | if not concrete(amount):
52 | if ctx.amount_check == '+':
53 | extra_constraints.append(z3.UGE(amount, ctx.target_amount))
54 | elif ctx.amount_check == '-':
55 | extra_constraints.append(z3.UGT(amount, 0))
56 | extra_constraints.append(z3.ULE(amount, ctx.target_amount))
57 | else:
58 | extra_constraints.append(amount == ctx.target_amount)
59 | final_balance = r.state.balance
60 | extra_constraints.append(z3.ULE(amount, final_balance))
61 |
62 | # ensure we're not spending more for this exploit than we gain
63 | total_spent = None
64 | for res in r.results:
65 | callvalue = z3.BitVec('CALLVALUE_%d' % res.xid, 256)
66 | extra_constraints.append(z3.ULE(callvalue, 10 * (10 ** 18))) # keep it semi-reasonable: at most 10 Eth per call
67 | if total_spent is None:
68 | total_spent = callvalue
69 | else:
70 | total_spent += callvalue
71 |
72 | extra_constraints.append(z3.ULT(total_spent, amount))
73 |
74 | # also, ensure the contract does not require a unreasonable start-balance (>100 Eth)
75 | if not ctx.initial_balance:
76 | start_balance = z3.BitVec('BALANCE_%d' % r.results[0].xid, 256)
77 | extra_constraints.append(z3.ULE(start_balance, 100 * (10 ** 18)))
78 |
79 | return extra_constraints
80 |
81 |
82 | def exploit_constraints_callcode(r, ctx):
83 | addr = z3.simplify(r.state.stack[-2])
84 |
85 | extra_constraints = []
86 |
87 | if not concrete(addr):
88 | extra_constraints.append(z3.Extract(159, 0, addr) == ctx.shellcode_addr)
89 | else:
90 | if addr != ctx.shellcode_addr:
91 | raise InfeasibleExploit
92 |
93 | return extra_constraints
94 |
95 |
96 | def exploit_constraints_delegatecall(r, ctx):
97 | addr = z3.simplify(r.state.stack[-2])
98 |
99 | extra_constraints = []
100 |
101 | if not concrete(addr):
102 | extra_constraints.append(z3.Extract(159, 0, addr) == ctx.shellcode_addr)
103 | else:
104 | if addr != ctx.shellcode_addr:
105 | raise InfeasibleExploit
106 |
107 | return extra_constraints
108 |
109 |
110 | def exploit_constraints_selfdestruct(r, ctx):
111 | addr = z3.simplify(r.state.stack[-1])
112 |
113 | extra_constraints = []
114 |
115 | if not concrete(addr):
116 | extra_constraints.append(z3.Extract(159, 0, addr) == ctx.target_addr)
117 | else:
118 | if addr != ctx.target_addr:
119 | raise InfeasibleExploit
120 |
121 | return extra_constraints
122 |
123 |
124 | EXPLOIT_CONSTRAINTS = {
125 | 'CALL': exploit_constraints_call,
126 | 'CALLCODE': exploit_constraints_callcode,
127 | 'DELEGATECALL': exploit_constraints_callcode,
128 | 'SELFDESTRUCT': exploit_constraints_selfdestruct
129 | }
130 |
131 |
132 | def get_exploit_constraints(r, ctx):
133 | target_op = r.results[-1].target_op
134 | if target_op in EXPLOIT_CONSTRAINTS:
135 | return EXPLOIT_CONSTRAINTS[target_op](r, ctx)
136 | else:
137 | return []
138 |
139 |
140 | def control_address_constraints(sym_addr, controlled_addrs):
141 | sub_exprs = [sym_addr == controlled_addr for controlled_addr in controlled_addrs]
142 | expr = sub_exprs[0]
143 | for sub_expr in sub_exprs[1:]:
144 | expr = z3.Or(expr, sub_expr)
145 | return expr
146 |
147 | def attempt_exploit(results, ctx):
148 | c = CombinedSymbolicResult()
149 | for r in results[::-1]:
150 | c.prepend(r)
151 | c.combine(ctx.initial_storage, ctx.initial_balance)
152 | c.simplify()
153 | extra_constraints = get_exploit_constraints(c, ctx)
154 |
155 | for res in c.results:
156 | origin = z3.BitVec('ORIGIN_%d' % res.xid, 256)
157 | caller = z3.BitVec('CALLER_%d' % res.xid, 256)
158 | # ensure we control the origin
159 | #extra_constraints.append(control_address_constraints(origin, ctx.controlled_addrs))
160 | # and ensure the caller is either the origin or the shellcode address
161 | #extra_constraints.append(control_address_constraints(caller, {origin, ctx.shellcode_addr}))
162 | try:
163 | model = check_model_and_resolve(c.constraints + extra_constraints, c.sha_constraints)
164 |
165 | # enforce we control all ORIGIN-addresses
166 | if any(model[v].as_long() not in ctx.controlled_addrs for v in model if v.name().startswith('ORIGIN')):
167 | raise InfeasibleExploit
168 |
169 | return model_to_calls(model, c.idx_dict), c, model
170 | except TimeoutException:
171 | raise TimeoutException("Timed out!")
172 | except IntractablePath:
173 | raise InfeasibleExploit
174 |
175 |
176 | def validate_path(p, path, mode=None, ac_jumpi=None):
177 | target_addr= int('0x1234', 16)
178 | shellcode_addr= int('0x1000', 16), +1000
179 | target_amount= +1000
180 | amount_check='+'
181 | initial_storage=dict()
182 | initial_balance=None
183 | max_calls=3
184 | controlled_addrs=set()
185 |
186 | fun_sig_bb = cinfo.get_function_sig(p.cfg, path,'bb')
187 | code_path = path[path.index(fun_sig_bb)+2:-1]
188 | if fun_sig_bb==0: #most probably payable()
189 | c=0
190 | while (fun_sig_bb==0 and c+1 '.join('%x' % p for p in path))
97 | try:
98 | ins = imap[path[-1]]
99 | yield ins, path, self.run_symbolic(path, inclusive)
100 | except IntractablePath as e:
101 | bad_path = [i for i in e.trace if i in self.cfg._bb_at] + [e.remainingpath[0]]
102 | dd = self.cfg.data_dependence(self.cfg._ins_at[e.trace[-1]])
103 | if not any(i.name in ('MLOAD', 'SLOAD') for i in dd):
104 | ddbbs = set(i.bb.start for i in dd)
105 | bad_path_start = next((j for j, i in enumerate(bad_path) if i in ddbbs), 0)
106 | bad_path = bad_path[bad_path_start:]
107 | logging.info("Bad path: %s" % (', '.join('%x' % i for i in bad_path)))
108 | exp.add_to_blacklist(bad_path)
109 | continue
110 | except ExternalData:
111 | continue
112 | except TimeoutException:
113 | raise TimeoutException("Timed out!")
114 | except Exception as e:
115 | logging.exception('Failed path due to %s', e)
116 | continue
117 | def _analyze_writes(self):
118 | sstore_ins = self.filter_ins('SSTORE')
119 | self._writes = defaultdict(set)
120 | for store in sstore_ins:
121 | for bs in interesting_slices(store):
122 | bs.append(store)
123 | prg = slice_to_program(bs)
124 | path = sorted(prg.keys())
125 | try:
126 | r = run_symbolic(prg, path, self.code, inclusive=True)
127 | except IntractablePath:
128 | logging.exception('Intractable Path while analyzing writes')
129 | continue
130 | addr = r.state.stack[-1]
131 | if concrete(addr):
132 | self._writes[addr].add(store)
133 | else:
134 | self._writes[None].add(store)
135 | self._writes = dict(self._writes)
136 |
137 | def get_writes_to (self, addr):
138 | concrete_writes = set()
139 | if concrete(addr) and addr in self.writes:
140 | concrete_writes = self.writes[addr]
141 | return concrete_writes, self.symbolic_writes
142 |
143 | def reolve_struct_offset(self, ssa, slice, sload=False, sload_ins=None, sstore=False, sstore_ins=None):
144 | function = [f for f in ssa.functions][0]
145 | if sload:
146 | ssa_block=[ins for block in function if block.offset == sload_ins.bb.start for ins in block.insns]
147 | ssa_ins= [s for s in ssa_block if s.offset == sload_ins.addr][0]
148 | elif sstore:
149 | ssa_block=[ins for block in function if block.offset == sstore_ins.bb.start for ins in block.insns]
150 | ssa_ins= [s for s in ssa_block if s.offset == sstore_ins.addr][0]
151 | struct_offset = None
152 | if ssa_ins.arguments[0]._writer is not None:
153 | if ssa_ins.arguments[0]._writer.insn.name =='ADD':
154 | if ssa_ins.arguments[0]._writer.arguments[0]._writer is not None and \
155 | ssa_ins.arguments[0]._writer.arguments[1]._writer is None:
156 | if ssa_ins.arguments[0]._writer.arguments[0]._writer.insn.name=='SHA3':
157 | struct_offset =ssa_ins.arguments[0]._writer.arguments[1].concrete_value
158 | elif ssa_ins.arguments[0]._writer.arguments[0]._writer is None and \
159 | ssa_ins.arguments[0]._writer.arguments[1]._writer is not None:
160 | if ssa_ins.arguments[0]._writer.arguments[1]._writer.insn.name=='SHA3':
161 | struct_offset =ssa_ins.arguments[0]._writer.arguments[0].concrete_value
162 |
163 | return struct_offset
164 |
165 | def resolve_slot_offset(self, ssa, slice, sload=False, sload_ins=None, sstore=False, sstore_ins=None):
166 | function = [f for f in ssa.functions][0]
167 | if sload:
168 | if [ins.name for ins in slice if ins.name in set(['SLOAD','EXP','DIV','SUB'])] ==['SLOAD','EXP','DIV']:
169 | exp_ins = [ins for ins in slice if ins.name in set(['EXP'])]
170 | ssa_block=[ins for block in function if block.offset == exp_ins[0].bb.start for ins in block.insns]
171 | ssa_ins =[s for s in ssa_block if s.offset == exp_ins[0].addr]
172 | if (ssa_ins[0].arguments[0].concrete_value==256):
173 | start_byte=ssa_ins[0].arguments[1].concrete_value+1
174 | elif (ssa_ins[0].arguments[0].concrete_value==2): #in binary
175 | start_byte=ssa_ins[0].arguments[1].concrete_value/8+1
176 | elif [ins.name for ins in slice if ins.name in set(['SLOAD','EXP','DIV','SUB'])] ==['SLOAD','EXP','SUB']:
177 | #load starting first bye
178 | start_byte=1
179 | elif [ins.name for ins in slice if ins.name in set(['SLOAD','EXP','SUB','DIV'])] ==['SLOAD','EXP','SUB','DIV']:
180 | div_ins = [ins for ins in slice if ins.name in set(['DIV'])]
181 | ssa_block=[ins for block in function if block.offset == div_ins[0].bb.start for ins in block.insns]
182 | ssa_ins =[s for s in ssa_block if s.offset == div_ins[0].addr]
183 | if ssa_ins[0].arguments[1]._writer is None:
184 | pos_str=str('%x' %ssa_ins[0].arguments[1].concrete_value)
185 | start_byte=len(pos_str)//2+1-pos_str.find('1')
186 | else:
187 | exp_ins=ssa_ins[0].arguments[1]._writer
188 | if exp_ins.insn.name =='EXP':
189 | if (exp_ins.arguments[0].concrete_value==256):
190 | start_byte= exp_ins.arguments[1].concrete_value+1
191 | elif (exp_ins.arguments[0].concrete_value==2): #in binary
192 | start_byte=exp_ins.arguments[1].concrete_value/8+1
193 | else:
194 | start_byte='whole'
195 | print('error, check resolve_slot_offset')
196 | else:
197 | start_byte='whole'
198 | elif sstore:
199 | start_byte=None
200 | masking_pattern = [ins.name for ins in slice if ins.name in set(['SLOAD','EXP','SUB','NOT'])]
201 | if len(masking_pattern)==0:
202 | start_byte='whole' # overapproximate the whole slot
203 |
204 | elif 'NOT' not in masking_pattern or 'SLOAD' not in masking_pattern: #Cannot decide what is overwritten without SLOAD
205 | start_byte='whole' #overapproximate the whole slot
206 |
207 | elif len([i for i in masking_pattern if i== 'NOT'])>1:# we may need to overapproximate as we do not know which not is for masking
208 | start_byte='whole' #overapproximate the whole slot
209 |
210 | elif len([i for i in masking_pattern if i == 'NOT'])==1 and 'SLOAD' in masking_pattern:
211 | not_ins =[ins for ins in slice if ins.name =='NOT']
212 | ssa_block=[ins for block in function if block.offset ==not_ins[0].bb.start for ins in block.insns]
213 | ssa_not_ins =[s for s in ssa_block if s.offset in [ins.addr for ins in slice if ins.name in set(['NOT'])]]
214 | if ssa_not_ins[0].arguments[0]._writer is None:
215 | pos_str=str('%x' %ssa_not_ins[0].arguments[0].concrete_value)
216 | start_byte=(len(pos_str)-pos_str.rfind('f'))//2+1
217 | else:
218 | ssa_ins=ssa_not_ins[0].arguments[0]._writer.arguments[1]._writer
219 | if ssa_ins is not None and ssa_ins.insn.name =='EXP' and ssa_ins.arguments[0]._writer is None and ssa_ins.arguments[1]._writer is None:
220 | exp = pow(ssa_ins.arguments[0].concrete_value, ssa_ins.arguments[1].concrete_value, src.util.utils.TT256)
221 | mul = None
222 | if ssa_not_ins[0].arguments[0]._writer.arguments[0]._writer is None:
223 | mul = ssa_not_ins[0].arguments[0]._writer.arguments[0].concrete_value * exp
224 | else:
225 | sub_ins=ssa_not_ins[0].arguments[0]._writer.arguments[0]._writer
226 | if sub_ins.insn.name =='SUB':
227 | if sub_ins.arguments[0]._writer.insn.name=='EXP':
228 | exp1 = pow(sub_ins.arguments[0]._writer.arguments[0].concrete_value, sub_ins.arguments[0]._writer.arguments[1].concrete_value, src.util.utils.TT256)
229 | sub = exp1 - sub_ins.arguments[1].concrete_value
230 | mul = sub * exp
231 | if mul is not None:
232 | bit_mask = '%x' % (src.util.utils.TT256M1 - mul)
233 | start_byte= (len(bit_mask)-bit_mask.rfind('0'))//2+1
234 | elif ssa_not_ins[0].arguments[0]._writer is not None and ssa_not_ins[0].arguments[0]._writer.insn.name=='SUB':
235 | sub_ins=ssa_not_ins[0].arguments[0]._writer
236 | if sub_ins.arguments[0]._writer is not None and sub_ins.arguments[0]._writer.insn.name=='EXP':
237 | exp_ins = sub_ins.arguments[0]._writer
238 | if exp_ins.arguments[0]._writer is None and exp_ins.arguments[1]._writer is None:
239 | exp = pow(exp_ins.arguments[0].concrete_value, exp_ins.arguments[1].concrete_value, src.util.utils.TT256)
240 | sub = exp - sub_ins.arguments[1].concrete_value
241 | bit_mask = '%x' % (src.util.utils.TT256M1 - sub)
242 | start_byte= (len(bit_mask)-bit_mask.rfind('0'))//2+1
243 | elif sub_ins.arguments[0]._writer is not None and sub_ins.arguments[0]._writer.insn.name=='SHL':
244 | shl_ins = sub_ins.arguments[0]._writer
245 | if shl_ins.arguments[0]._writer is None and shl_ins.arguments[1]._writer is None:
246 | shl= (shl_ins.arguments[1].concrete_value << shl_ins.arguments[0].concrete_value)
247 | sub = shl- sub_ins.arguments[1].concrete_value
248 | bit_mask = '%x' % (src.util.utils.TT256M1 - sub)
249 | start_byte= (len(bit_mask)-bit_mask.rfind('0'))//2+1
250 | else:
251 | start_byte='whole' #overapproximate the whole slot
252 |
253 | if not start_byte:
254 | print(sstore_ins)
255 | print(masking_pattern)
256 | return start_byte
257 |
258 | def resolve_access_control_slots(self, ssa, instructions, ac_check_ins, args=None, memory_info=None, restricted=True):
259 | slices = []
260 | other_ac_checks = []
261 | # only check instructions that have a chance to reach root
262 | instructions = [ins for ins in instructions if 0 in ins.bb.ancestors | {ins.bb.start}]
263 | if not instructions:
264 | return
265 | imap = {ins.addr: ins for ins in instructions}
266 | access_sloads = defaultdict(list)
267 | if args:
268 | for jump_ins in instructions:
269 | for bs in interesting_slices(jump_ins, args, reachable=True, restricted=False):
270 | if('%x' %jump_ins.addr) == '2f9':
271 | print(jump_ins)
272 | print(bs)
273 | cur_jump_sloads= [v['sload'] for k in access_sloads if k==jump_ins for v in access_sloads[k]]
274 | if len(cur_jump_sloads)!=0 and any(ins in cur_jump_sloads for ins in bs if ins.name in frozenset(['SLOAD'])):
275 | slices.append(bs+(jump_ins,))
276 | elif len(set(ac_check_ins)&set([ins.name for ins in bs]))==len(ac_check_ins) and not any(ins.name in frozenset(['CALL']) for ins in bs):
277 | slices.append(bs+(jump_ins,))
278 | sload= [i for i in bs if i.name in frozenset(['SLOAD'])]
279 | slot_byte= self.resolve_slot_offset(ssa, bs+(jump_ins,), sload=True, sload_ins=sload[0])
280 | struct_offset= self.reolve_struct_offset(ssa, bs+(jump_ins,), sload=True, sload_ins=sload[0] )
281 | access_sloads[jump_ins].append({'sload':sload[0],'sbyte':slot_byte,'structOffset':struct_offset})
282 | elif any(ins.name in frozenset(['SLOAD']) for ins in bs) and (any(ins.arg==b'\xff' for ins in bs if ins.name in frozenset(['PUSH1'])) or \
283 | any(ins.name in frozenset(['CALLDATALOAD','CALLDATACOPY']) for bb in jump_ins.bb.pred for ins in bb.ins)) and not any(ins.name in frozenset(['CALL']) for ins in bs):
284 | if any(ins.arg==b'\xff' for ins in bs if ins.name in frozenset(['PUSH1'])):
285 | sload_ins=[ins for ins in bs if ins.name in frozenset(['SLOAD']) if any(
286 | ss.arg==b'\xff' and ins.addr < ss.addr 0:
332 | soffset= self.reolve_struct_offset(ssa, s+(ins,),sload=True, sload_ins=sload_ins[0])
333 | struct_offset[ins]=soffset
334 |
335 | checked_ins=[]
336 | c=0
337 | start_time=time.time()
338 | for path in exp.find(slices, avoid=[]):
339 | logging.debug('Path %s', ' -> '.join('%x' % p for p in path))
340 | c+=1
341 | try:
342 | ins = imap[path[-1]]
343 | if sinks:
344 | result = run_static(self.prg, ssa, path, sinks, self.code, inclusive,defect_type=defect_type, storage_slots=storage_slots, storage_sha3_bases=storage_sha3_bases)
345 | if result._tainted and ins.name in set(['SSTORE']):
346 | sstore_slices = [s+(ins,) for s in interesting_slices(ins, [0], memory_info, reachable=True, taintedBy=None, restricted=False)]
347 | soffset= self.reolve_struct_offset(ssa, sstore_slices[0],sstore=True, sstore_ins=ins)
348 | struct_offset[ins]=soffset
349 | yield ins,slot_sbyte,struct_offset, path, result
350 | else:
351 | yield ins, slot_sbyte, struct_offset, path, None
352 | except IntractablePath as e:
353 | bad_path = [i for i in e.trace if i in self.cfg._bb_at] + [e.remainingpath[0]]
354 | dd = self.cfg.data_dependence(self.cfg._ins_at[e.trace[-1]])
355 | if not any(i.name in ('MLOAD', 'SLOAD') for i in dd):
356 | ddbbs = set(i.bb.start for i in dd)
357 | bad_path_start = next((j for j, i in enumerate(bad_path) if i in ddbbs), 0)
358 | bad_path = bad_path[bad_path_start:]
359 | logging.info("Bad path: %s" % (', '.join('%x' % i for i in bad_path)))
360 | exp.add_to_blacklist(bad_path)
361 | continue
362 | except ExternalData:
363 | continue
364 | except TimeoutException:
365 | raise TimeoutException("Timed out!")
366 | except Exception as e:
367 | logging.exception('Failed path due to %s', e)
368 | continue
369 |
370 |
--------------------------------------------------------------------------------
/src/slicing.py:
--------------------------------------------------------------------------------
1 | from src.cfg.instruction import Instruction
2 | from src.cfg.opcodes import potentially_user_controlled
3 | from src.explorer.backward import traverse_back
4 | from src.util.intrange import Range
5 |
6 |
7 | def slice_to_program(s):
8 | pc = 0
9 | program = {}
10 | for ins in s:
11 | program[pc] = ins
12 | pc += ins.next_addr - ins.addr
13 | return program
14 |
15 |
16 | def adjust_stack(backward_slice, stack_delta):
17 | if stack_delta > 0:
18 | backward_slice.extend(Instruction(0x0, 0x63, b'\xde\xad\xc0\xde') for _ in range(abs(stack_delta)))
19 | elif stack_delta < 0:
20 | backward_slice.extend(Instruction(0x0, 0x50) for _ in range(abs(stack_delta)))
21 |
22 |
23 | class SlicingState(object):
24 | def __init__(self, stacksize, stack_underflow, stack_delta, taintmap, memory_taint, backward_slice, instructions):
25 | self.stacksize = stacksize
26 | self.stack_underflow = stack_underflow
27 | self.stack_delta = stack_delta
28 | self.taintmap = frozenset(taintmap)
29 | self.memory_taint = memory_taint
30 | # The actual slice doesn't matter that much. What matters more is the resulting EXPRESSION of the return-address
31 | self.backward_slice = tuple(backward_slice)
32 | self.instructions = tuple(instructions)
33 |
34 | def __hash__(self):
35 | return sum(
36 | a * b for a, b in zip((23, 29, 31, 37, 41), (
37 | self.stacksize, self.stack_delta, hash(self.taintmap), hash(self.instructions),
38 | hash(self.backward_slice))))
39 |
40 | def __eq__(self, other):
41 | return (
42 | self.stacksize == other.stacksize and
43 | self.stack_delta == other.stack_delta and
44 | self.taintmap == other.taintmap and
45 | self.memory_taint == other.memory_taint and
46 | self.backward_slice == other.backward_slice and
47 | self.instructions == other.instructions)
48 |
49 | def __str__(self):
50 | return 'Stacksize: %d, Underflow: %d, Delta: %d, Map: %s, Slice: %s, Instructions: %s' % (
51 | self.stacksize, self.stack_underflow, self.stack_delta, self.taintmap,
52 | ','.join('%x' % i.addr for i in self.backward_slice),
53 | ','.join('%x' % i.addr for i in self.instructions))
54 |
55 |
56 | def advance_slice(slicing_state, memory_info):
57 | stacksize = slicing_state.stacksize
58 | stack_underflow = slicing_state.stack_underflow
59 | stack_delta = slicing_state.stack_delta
60 | taintmap = set(slicing_state.taintmap)
61 | memory_taint = slicing_state.memory_taint
62 | backward_slice = list(slicing_state.backward_slice)
63 | instructions = slicing_state.instructions
64 |
65 | for ins in instructions[::-1]:
66 | slice_candidate = False
67 | if taintmap and stacksize - ins.outs <= max(taintmap):
68 | slice_candidate = True
69 | if memory_info and ins in memory_info and memory_info[ins].writes & memory_taint:
70 | slice_candidate = True
71 | if slice_candidate:
72 | add_to_slice = False
73 | if 0x80 <= ins.op <= 0x8f: # Special handling for DUPa
74 | if stacksize - 1 in taintmap:
75 | add_to_slice = True
76 | in_idx = ins.op - 0x7f
77 | taintmap.remove(stacksize - 1)
78 | taintmap.add((stacksize - 1) - in_idx)
79 | elif 0x90 <= ins.op <= 0x9f: # Special handling for SWAP
80 | in_idx = ins.op - 0x8f
81 | if stacksize - 1 in taintmap or (stacksize - 1) - in_idx in taintmap:
82 | add_to_slice = True
83 | if stacksize - 1 in taintmap and (stacksize - 1) - in_idx in taintmap:
84 | # both tainted => taint does not change
85 | pass
86 | elif stacksize - 1 in taintmap:
87 | taintmap.remove(stacksize - 1)
88 | taintmap.add((stacksize - 1) - in_idx)
89 | elif (stacksize - 1) - in_idx in taintmap:
90 | taintmap.remove((stacksize - 1) - in_idx)
91 | taintmap.add(stacksize - 1)
92 | else: # assume entire stack is affected otherwise
93 | add_to_slice = True
94 | taintmap -= set(range(stacksize - ins.outs, stacksize))
95 | taintmap |= set(range(stacksize - ins.outs, stacksize - ins.delta))
96 |
97 | if add_to_slice:
98 | adjust_stack(backward_slice, stack_delta)
99 | stack_delta = -ins.delta
100 | backward_slice.append(ins)
101 | stack_underflow = min(stack_underflow, stacksize - ins.outs)
102 | if memory_info and ins in memory_info:
103 | ins_info = memory_info[ins]
104 | memory_taint = memory_taint - ins_info.writes + ins_info.reads
105 |
106 | stacksize -= ins.delta
107 | # no taint left? then our job here is done
108 | if not taintmap and not memory_taint:
109 | stack_adjust = stacksize - stack_underflow
110 | if stack_adjust > 0:
111 | adjust_stack(backward_slice, stack_adjust)
112 | return SlicingState(stacksize, stack_underflow, stack_delta, set(taintmap), memory_taint,
113 | list(backward_slice),
114 | [])
115 |
116 | stack_delta += ins.delta
117 |
118 | # still taint left? trace further if gas is still sufficient
119 | return SlicingState(stacksize, stack_underflow, stack_delta, set(taintmap), memory_taint, list(backward_slice),
120 | [])
121 |
122 |
123 | def backward_slice(ins, taint_args=None, memory_info=None, initial_gas=10, must_visits=[], reachable=False):
124 | # logging.debug('backward_slice called')
125 | if ins.ins == 0:
126 | return []
127 | if taint_args:
128 | taintmap = set((ins.ins - 1) - i for i in taint_args)
129 | else:
130 | taintmap = set(range(ins.ins))
131 | if memory_info and ins in memory_info:
132 | memory_taint = memory_info[ins].reads
133 | else:
134 | memory_taint = Range()
135 |
136 | def initial_data(ins):
137 | stacksize = ins.ins
138 | slice = []
139 | stack_underflow = 0
140 | stack_delta = 0
141 | idx = ins.bb.ins.index(ins)
142 | return SlicingState(stacksize, stack_underflow, stack_delta, taintmap, memory_taint, slice,
143 | ins.bb.ins[:idx])
144 |
145 | def advance_data(slicing_state):
146 | return advance_slice(slicing_state, memory_info)
147 |
148 | def update_data(slicing_state, new_bb):
149 | return SlicingState(slicing_state.stacksize, slicing_state.stack_underflow, slicing_state.stack_delta,
150 | set(slicing_state.taintmap), slicing_state.memory_taint, list(slicing_state.backward_slice),
151 | new_bb.ins)
152 |
153 | def finish_path(slicing_state):
154 | return not slicing_state.taintmap and not slicing_state.memory_taint
155 |
156 | # logging.debug('Before loop')
157 | slices = [r.backward_slice[::-1] for r in
158 | traverse_back([ins], initial_gas, initial_data, advance_data, update_data, finish_path, must_visits)]
159 | if not reachable:
160 | return slices
161 | else:
162 | filtered_slices = []
163 | for slice in slices:
164 | first_bb = next(i.bb for i in slice if i.bb)
165 | if 0 in first_bb.ancestors | {first_bb.start}:
166 | filtered_slices.append(slice)
167 | return filtered_slices
168 |
169 |
170 | def interesting_slices(instruction, args=None, memory_info=None, reachable=False, taintedBy=potentially_user_controlled, restricted=True):
171 | if restricted:
172 | return [bs for bs in backward_slice(instruction, args, memory_info, reachable=reachable) if any(
173 | ins.name in taintedBy for ins in bs)]
174 | else:
175 | return [bs for bs in backward_slice(instruction, args, memory_info,reachable=reachable)]
176 |
177 | def change_slices(instruction, args=None, reachable=False):
178 | return [bs for bs in backward_slice(instruction, args, reachable=reachable)]
179 |
--------------------------------------------------------------------------------
/src/storage.py:
--------------------------------------------------------------------------------
1 | from collections import deque
2 |
3 | from src.cfg.opcodes import storage_reads, storage_writes
4 | import src.util.utils
5 | from src.evm.exceptions import TimeoutException
6 |
7 |
8 | class InconsistentSlot(Exception):
9 | pass
10 |
11 | class UninitializedRead(Exception):
12 | def __init__(self, index, *args):
13 | super(UninitializedRead, self).__init__(*args)
14 | if isinstance(index, slice):
15 | self.start = index.start or 0
16 | self.end = index.stop
17 | else:
18 | self.start = index
19 | self.end = index + 1
20 |
21 | def __repr__(self):
22 | return '%s from: %d to %d' % (super(UninitializedRead, self).__repr__(), self.start, self.end)
23 |
24 | def __str__(self):
25 | return '%s from: %d to %d' % (super(UninitializedRead, self).__repr__(), self.start, self.end)
26 |
27 |
28 | class StorageInfo(object):
29 | def __init__(self, reads, writes, read_sha3_bases, write_sha3_bases):
30 | self.reads = reads
31 | self.writes = writes
32 | self.read_sha3_bases= read_sha3_bases
33 | self.write_sha3_bases= write_sha3_bases
34 |
35 | def get_storage_info(ins, code, memory_info=None):
36 | from .slicing import backward_slice, slice_to_program
37 | from .evm.evm import run
38 | from .evm.state import EVMState
39 | from .evm.exceptions import ExternalData
40 | from .util.intrange import Range
41 | targets = []
42 |
43 | read = False
44 | write = False
45 |
46 | if ins.name in storage_reads:
47 | read = True
48 | read_slot_info = storage_reads[ins.name]
49 | if read_slot_info < 0:
50 | targets.append(-1 - read_slot_info)
51 |
52 | if ins.name in storage_writes:
53 | write = True
54 | write_slot_info = storage_writes[ins.name]
55 | if write_slot_info < 0:
56 | targets.append(-1 - write_slot_info)
57 |
58 | if not read and not write:
59 | return None
60 | bs = backward_slice(ins, targets, memory_info)
61 | read_slot = set()
62 | read_slot_sha3_base= dict()
63 | write_slot = set()
64 | write_slot_sha3_base= dict()
65 | for b in bs:
66 | try:
67 | state = run(slice_to_program(b), EVMState(code=code), check_initialized=False)
68 | except UninitializedRead as e:
69 | raise e
70 | except ExternalData as e:
71 | raise e
72 | if read:
73 | new_slot = state.stack[read_slot_info] if read_slot_info < 0 else read_slot_info
74 | if new_slot not in read_slot:
75 | read_slot.add(new_slot)
76 | sha3_ins=[ins for ins in b if ins.name=='SHA3']
77 | mstore_ins=[ins for ins in b if ins.name=='MSTORE']
78 | if len(sha3_ins)==1 and len(mstore_ins)==1:
79 | read_slot_sha3_base[new_slot]=src.util.utils.bytearray_to_int(state.memory[0:32])
80 | elif len(sha3_ins)>=1 and len(mstore_ins)>=2:
81 | read_slot_sha3_base[new_slot]=src.util.utils.bytearray_to_int(state.memory[32:64])
82 | if write:
83 | new_slot = state.stack[write_slot_info] if write_slot_info < 0 else write_slot_info
84 | if new_slot not in write_slot:
85 | write_slot.add(new_slot)
86 | sha3_ins=[ins for ins in b if ins.name=='SHA3']
87 | mstore_ins=[ins for ins in b if ins.name=='MSTORE']
88 | if len(sha3_ins)==1 and len(mstore_ins)==1:
89 | write_slot_sha3_base[new_slot]=src.util.utils.bytearray_to_int(state.memory[0:32])
90 | elif len(sha3_ins)>=1 and len(mstore_ins)>=2:
91 | write_slot_sha3_base[new_slot]=src.util.utils.bytearray_to_int(state.memory[32:64])
92 | return StorageInfo(read_slot, write_slot,read_slot_sha3_base,write_slot_sha3_base)
93 |
94 |
95 | def resolve_all_storage(cfg, code, memory_info=None):
96 | storage_infos = dict()
97 | resolve_later = deque(
98 | ins for bb in cfg.bbs for ins in bb.ins if ins.name in storage_reads or ins.name in storage_writes)
99 | todo = deque()
100 | progress = True
101 | while todo or (progress and resolve_later):
102 | if not todo:
103 | todo = resolve_later
104 | resolve_later = deque()
105 | progress = False
106 | ins = todo.popleft()
107 | try:
108 | mi = get_storage_info(ins, code, memory_info)
109 | if mi:
110 | progress = True
111 | storage_infos[ins] = mi
112 | except TimeoutException:
113 | raise TimeoutException("Timed out!")
114 | except Exception as e:
115 | resolve_later.append(ins)
116 | return storage_infos
117 |
--------------------------------------------------------------------------------
/src/teEther_LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/src/util/__init__.py:
--------------------------------------------------------------------------------
1 | from . import frontierset
2 | from . import intrange
3 | from . import utils
4 | from . import z3_extra_util
--------------------------------------------------------------------------------
/src/util/frontierset.py:
--------------------------------------------------------------------------------
1 | from collections import defaultdict
2 |
3 |
4 | class FrontierSet(object):
5 | """
6 | A set that also maintains a partial topological ordering
7 | The current set of "non-blocked" items can be obtained as
8 | .frontier
9 | """
10 |
11 | def __init__(self, data=None):
12 | self._inhibiting_set = defaultdict(set)
13 | self._blocking_set = defaultdict(set)
14 | self._edges = set()
15 | self._frontier = set()
16 | self._frozenedges = None
17 | self._frozenfrontier = None
18 | self._frozenall = None
19 | if data:
20 | for d in data:
21 | self.add(d)
22 |
23 | def _invalidate(self):
24 | self._frozenedges = None
25 | self._frozenfrontier = None
26 | self._frozenall = None
27 |
28 | @property
29 | def edges(self):
30 | if self._frozenedges is None:
31 | self._frozenedges = frozenset(self._edges)
32 | return self._frozenedges
33 |
34 | @property
35 | def frontier(self):
36 | if self._frozenfrontier is None:
37 | self._frozenfrontier = frozenset(self._frontier)
38 | return self._frozenfrontier
39 |
40 | @property
41 | def all(self):
42 | if self._frozenall is None:
43 | self._frozenall = frozenset(set(self._blocking_set.keys()) | set(self._inhibiting_set.keys()) | self._frontier)
44 | return self._frozenall
45 |
46 | def add(self, a, b=None):
47 | """
48 | Add a to the set.
49 | If b is given, require that a is a necessary prerequisite for b
50 | :param a:
51 | :param b:
52 | :return:
53 | """
54 | self._invalidate()
55 | if b:
56 | self._edges.add((a, b))
57 | self._inhibiting_set[b].add(a)
58 | self._blocking_set[a].add(b)
59 | if not self._inhibiting_set[a]:
60 | self._frontier.add(a)
61 | self._frontier.discard(b)
62 | else:
63 | self._frontier.add(a)
64 |
65 | def remove(self, a):
66 | self._invalidate()
67 | for b in self._blocking_set[a]:
68 | self._edges.discard((b, a))
69 | self._inhibiting_set[b].discard(a)
70 | if not self._inhibiting_set[b]:
71 | self._frontier.add(b)
72 | for c in self._inhibiting_set[a]:
73 | self._edges.discard((a, c))
74 | self._blocking_set[c].discard(a)
75 | del self._blocking_set[a]
76 | del self._inhibiting_set[a]
77 | self._frontier.discard(a)
78 |
79 | def copy(self):
80 | new = FrontierSet()
81 | new._inhibiting_set = self._inhibiting_set.copy()
82 | new._blocking_set = self._blocking_set.copy()
83 | new._edges = self._edges.copy()
84 | new._frontier = self._frontier.copy()
85 | new._invalidate()
86 | return new
87 |
88 | def issubset(self, other):
89 | return self.all.issubset(other.all) and self.edges.issubset(other.edges)
90 |
91 | def __len__(self):
92 | return len(self.all)
93 |
94 | def __eq__(self, other):
95 | return self.edges == other.edges and self.all == other.all
96 |
97 | def __hash__(self):
98 | return 3 * hash(self.edges) + 7 * hash(self.all)
99 |
100 | def __iter__(self):
101 | return iter(self.all)
102 |
103 | def __repr__(self):
104 | return '{%s|%s}' % (
105 | ','.join('%x' % i for i in self.frontier), ','.join('%x' % i for i in self.all - self.frontier))
106 |
--------------------------------------------------------------------------------
/src/util/intrange.py:
--------------------------------------------------------------------------------
1 | class Range(object):
2 | START = 0
3 | END = 1
4 |
5 | def __init__(self, start=None, end=None, points=None):
6 | if not start is None and not end is None and start < end:
7 | self.points = ((start, Range.START), (end, Range.END))
8 | elif points:
9 | self.points = tuple(points)
10 | else:
11 | self.points = tuple()
12 |
13 | def __munch__(self, other, min_depth):
14 | depth = 0
15 | points = []
16 | for i, t in sorted(self.points + other.points):
17 | if depth == min_depth - 1 and t == Range.START:
18 | if points and i == points[-1][0]:
19 | del points[-1]
20 | else:
21 | points.append((i, Range.START))
22 | elif depth == min_depth and t == Range.END:
23 | if points and i == points[-1][0]:
24 | del points[-1]
25 | else:
26 | points.append((i, Range.END))
27 | depth += 1 if t == Range.START else -1
28 | return Range(points=points)
29 |
30 | def __add__(self, other):
31 | return self.__munch__(other, 1)
32 |
33 | def __and__(self, other):
34 | return self.__munch__(other, 2)
35 |
36 | def __sub__(self, other):
37 | return self + Range(points=[(i, 1 - t) for i, t in other.points])
38 |
39 | def __contains__(self, other):
40 | if not isinstance(other, Range):
41 | other = Range(other, other + 1)
42 | return not (other - self).points
43 |
44 | def __or__(self, other):
45 | return self + other
46 |
47 | def __xor__(self, other):
48 | return (self - other) + (other - self)
49 |
50 | def __eq__(self, other):
51 | return not self ^ other
52 |
53 | def __hash__(self):
54 | return hash(self.points)
55 |
56 | def __cmp__(self, other):
57 | for (a, _), (b, _) in zip(self.points, other.points):
58 | if a != b:
59 | return a - b
60 | else:
61 | l1, l2 = len(self), len(other)
62 | return l1 - l2
63 |
64 | def __len__(self):
65 | return sum(b - a for (a, _), (b, _) in zip(self.points[::2], self.points[1::2]))
66 |
67 | def __repr__(self):
68 | return 'Range(' + str(self) + ')'
69 |
70 | def __str__(self):
71 | return ','.join('[%d, %d)' % (a, b) for (a, _), (b, _) in zip(self.points[::2], self.points[1::2]))
72 |
--------------------------------------------------------------------------------
/src/util/utils.py:
--------------------------------------------------------------------------------
1 | from sha3 import keccak_256
2 |
3 |
4 | def sha3(data):
5 | return keccak_256(data).digest()
6 |
7 |
8 | TT256 = 2 ** 256
9 | TT256M1 = 2 ** 256 - 1
10 | TT255 = 2 ** 255
11 | SECP256K1P = 2 ** 256 - 4294968273
12 |
13 |
14 | def big_endian_to_int(x):
15 | return int.from_bytes(x, byteorder='big')
16 |
17 |
18 | def int_to_big_endian(v):
19 | return v.to_bytes(length=(v.bit_length()+7)//8, byteorder='big')
20 |
21 |
22 | def to_string(value):
23 | return str(value)
24 |
25 |
26 | def bytearray_to_bytestr(value):
27 | return bytes(value)
28 |
29 |
30 | def encode_int32(v):
31 | return int_to_big_endian(v).rjust(32, b'\x00')
32 |
33 |
34 | def bytes_to_int(value):
35 | return big_endian_to_int(bytes(value))
36 |
37 |
38 | def bytearray_to_int(value):
39 | return bytes_to_int(bytearray_to_bytestr(value))
40 |
41 |
42 | def is_pow2(x):
43 | return x and not x & (x - 1)
44 |
45 |
46 | def log2(x):
47 | if not is_pow2(x):
48 | raise ValueError("%d is not a power of 2!" % x)
49 | i = -1
50 | while x:
51 | x >>= 1
52 | i += 1
53 | return i
54 |
55 |
56 | def to_signed(i):
57 | return i if i < TT255 else i - TT256
58 |
59 |
60 |
61 | class Denoms:
62 | def __init__(self):
63 | self.wei = 1
64 | self.babbage = 10 ** 3
65 | self.ada = 10 ** 3
66 | self.kwei = 10 ** 6
67 | self.lovelace = 10 ** 6
68 | self.mwei = 10 ** 6
69 | self.shannon = 10 ** 9
70 | self.gwei = 10 ** 9
71 | self.szabo = 10 ** 12
72 | self.finney = 10 ** 15
73 | self.mether = 10 ** 15
74 | self.ether = 10 ** 18
75 | self.turing = 2 ** 256 - 1
76 |
77 |
78 | denoms = Denoms()
79 |
80 |
81 | def unique(l):
82 | last = None
83 | for i in l:
84 | if i != last:
85 | yield i
86 | last = i
87 |
88 |
89 | def is_subseq(a, b):
90 | a = tuple(a)
91 | b = tuple(b)
92 | # True iff a is a subsequence (not substring!) of b
93 | p = 0
94 | for x in a:
95 | try:
96 | p = b.index(x, p) + 1
97 | except ValueError:
98 | return False
99 | return True
100 |
101 |
102 | def is_substr(a, b):
103 | a = tuple(a)
104 | b = tuple(b)
105 | # True iff a is a substring of b
106 | p = 0
107 | l = len(a)
108 | while True:
109 | try:
110 | p = b.index(a[0], p)
111 | if b[p:p + l] == a:
112 | return True
113 | p += 1
114 | except ValueError:
115 | break
116 | return False
117 |
--------------------------------------------------------------------------------
/src/util/z3_extra_util.py:
--------------------------------------------------------------------------------
1 | import numbers
2 |
3 | import z3
4 |
5 |
6 | def to_bytes(v):
7 | return v.as_long().to_bytes(length=(v.size()+7)//8, byteorder='big')
8 |
9 |
10 | def get_vars(f, rs=set()):
11 | """
12 | shameless copy of z3util.get_vars,
13 | but returning select-operations as well.
14 | E.g.
15 | >>> x = z3.Array('x', z3.IntSort(), z3.IntSort())
16 | >>> get_vars(x[5])
17 | [x[5]]
18 | whereas
19 | >>> x = z3.Array('x', z3.IntSort(), z3.IntSort())
20 | >>> z3util.get_vars(x[5])
21 | [x]
22 | """
23 | if not rs:
24 | f = z3.simplify(f)
25 |
26 | if f.decl().kind() == z3.Z3_OP_SELECT:
27 | arr, idx = f.children()
28 | if z3.is_const(arr):
29 | if z3.z3util.is_expr_val(idx):
30 | return rs | {f}
31 | else:
32 | return rs | {f, idx}
33 | if z3.is_const(f):
34 | if z3.z3util.is_expr_val(f):
35 | return rs
36 | else: # variable
37 | return rs | {f}
38 |
39 | else:
40 | for f_ in f.children():
41 | rs = get_vars(f_, rs)
42 |
43 | return set(rs)
44 |
45 |
46 | def get_vars_non_recursive(f, include_select=False, include_indices=True):
47 | todo = [f]
48 | rs = set()
49 | seen = set()
50 | while todo:
51 | expr = todo.pop()
52 | if expr.get_id() in seen:
53 | continue
54 | seen.add(expr.get_id())
55 | if include_select and expr.decl().kind() == z3.Z3_OP_SELECT:
56 | arr, idx = expr.children()
57 | if z3.is_const(arr):
58 | if not include_indices or z3.z3util.is_expr_val(idx):
59 | rs.add(expr)
60 | else:
61 | rs.add(expr)
62 | todo.append(idx)
63 | else:
64 | todo.extend(expr.children())
65 | elif z3.is_const(expr):
66 | if not z3.z3util.is_expr_val(expr):
67 | rs.add(expr)
68 | else:
69 | todo.extend(expr.children())
70 |
71 | return rs
72 |
73 |
74 | def concrete(v):
75 | return isinstance(v, numbers.Number)
76 |
77 |
78 | def is_false(cond):
79 | s = z3.SolverFor("QF_ABV")
80 | s.add(cond)
81 | return s.check() == z3.unsat
82 |
83 |
84 | def is_true(cond):
85 | # NOTE: This differs from `not is_false(cond)`, which corresponds to "may be true"
86 | return is_false(z3.Not(cond))
87 |
88 |
89 | def simplify_non_const_hashes(expr, sha_ids):
90 | while True:
91 | expr = z3.simplify(expr, expand_select_store=True)
92 | sha_subst = get_sha_subst_non_recursive(expr, sha_ids)
93 | if not sha_subst:
94 | break
95 | expr = z3.substitute(expr, [(s, z3.BoolVal(False)) for s in sha_subst])
96 | return expr
97 |
98 |
99 | def is_simple_expr(expr):
100 | """
101 | True if expr does not contain an If, Store, or Select statement
102 | :param expr: the expression to check
103 | :return: True, iff expr does not contain If, Store, or Select
104 | """
105 |
106 | if expr.decl().kind() in {z3.Z3_OP_ITE, z3.Z3_OP_SELECT, z3.Z3_OP_STORE}:
107 | return False
108 | else:
109 | return all(is_simple_expr(c) for c in expr.children())
110 |
111 |
112 | def ast_eq(e1, e2, simplified=False):
113 | if not simplified:
114 | e1 = z3.simplify(e1)
115 | e2 = z3.simplify(e2)
116 | if e1.sort() != e2.sort():
117 | return False
118 | if e1.decl().kind() != e2.decl().kind():
119 | return False
120 | if z3.z3util.is_expr_val(e1) and z3.z3util.is_expr_val(e2):
121 | return e1.as_long() == e2.as_long()
122 | return all(ast_eq(c1, c2, True) for c1, c2 in zip(e1.children(), e2.children()))
123 |
124 |
125 | def get_sha_subst_non_recursive(f, sha_ids):
126 | import timeit
127 | start = timeit.default_timer()
128 | todo = [z3.simplify(f, expand_select_store=True)]
129 | rs = set()
130 | seen = set()
131 | subexprcount = 0
132 | while todo:
133 | expr = todo.pop()
134 | subexprcount += 1
135 | if expr.get_id() in seen:
136 | continue
137 | seen.add(expr.get_id())
138 | if expr.decl().kind() == z3.Z3_OP_EQ and all(is_simple_expr(c) for c in expr.children()):
139 | l, r = expr.children()
140 | lvars, rvars = [{v.get_id() for v in get_vars_non_recursive(e, True)} for e in (l, r)]
141 |
142 | sha_left = bool(lvars & sha_ids)
143 | sha_right = bool(rvars & sha_ids)
144 |
145 | if sha_left and sha_right:
146 | # both sides use a sha-expression
147 | # => can be equal only if ASTs are equal
148 | if not ast_eq(l, r):
149 | rs.add(expr)
150 |
151 | elif sha_left ^ sha_right:
152 | # only one side uses a sha-expression
153 | # => assume not equal (e.g. SHA == 5 seems unlikely)
154 | rs.add(expr)
155 |
156 | else:
157 | todo.extend(expr.children())
158 |
159 | end = timeit.default_timer()
160 | # logging.info("get_sha_subst_non_recursive took %d microseconds (%d subexpressions)", (end-start)*1000000.0, subexprcount)
161 | return rs
162 |
--------------------------------------------------------------------------------