├── CVE-2021-34273.code ├── LICENSE ├── README.md ├── bin └── achecker.py ├── requirements.txt ├── setup.py └── src ├── __init__.py ├── cfg ├── __init__.py ├── bb.py ├── cfg.py ├── disassembly.py ├── instruction.py ├── opcodes.py └── rattle │ ├── LICENSE │ ├── __init__.py │ ├── analyze.py │ ├── evmasm.py │ ├── hashes.py │ ├── recover.py │ └── ssa.py ├── constraints.py ├── evm ├── __init__.py ├── evm.py ├── exceptions.py ├── results.py └── state.py ├── exploit.py ├── explorer ├── __init__.py ├── backward.py └── forward.py ├── flow ├── FSignatures.txt ├── __init__.py ├── analysis_results.py ├── code_info.py ├── symbolic.py └── tainting.py ├── memory.py ├── project.py ├── slicing.py ├── storage.py ├── teEther_LICENSE └── util ├── __init__.py ├── frontierset.py ├── intrange.py ├── utils.py └── z3_extra_util.py /CVE-2021-34273.code: -------------------------------------------------------------------------------- 1 | 606060405236156100c3576000357c0100000000000000000000000000000000000000000000000000000000900463ffffffff16806306fdde03146100d3578063095ea7b31461016157806318160ddd146101bb57806323b872dd146101e4578063313ce5671461025d57806370a082311461028c5780638da5cb5b146102d957806395d89b411461032e578063a9059cbb146103bc578063a9c7648f14610416578063dd62ed3e14610479578063df32754b146104e5578063f2fde38b146104fa575b34156100ce57600080fd5b600080fd5b34156100de57600080fd5b6100e6610533565b6040518080602001828103825283818151815260200191508051906020019080838360005b8381101561012657808201518184015260208101905061010b565b50505050905090810190601f1680156101535780820380516001836020036101000a031916815260200191505b509250505060405180910390f35b341561016c57600080fd5b6101a1600480803573ffffffffffffffffffffffffffffffffffffffff169060200190919080359060200190919050506105d1565b604051808215151515815260200191505060405180910390f35b34156101c657600080fd5b6101ce6106c3565b6040518082815260200191505060405180910390f35b34156101ef57600080fd5b610243600480803573ffffffffffffffffffffffffffffffffffffffff1690602001909190803573ffffffffffffffffffffffffffffffffffffffff169060200190919080359060200190919050506106c9565b604051808215151515815260200191505060405180910390f35b341561026857600080fd5b610270610945565b604051808260ff1660ff16815260200191505060405180910390f35b341561029757600080fd5b6102c3600480803573ffffffffffffffffffffffffffffffffffffffff16906020019091905050610958565b6040518082815260200191505060405180910390f35b34156102e457600080fd5b6102ec6109a1565b604051808273ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff16815260200191505060405180910390f35b341561033957600080fd5b6103416109c6565b6040518080602001828103825283818151815260200191508051906020019080838360005b83811015610381578082015181840152602081019050610366565b50505050905090810190601f1680156103ae5780820380516001836020036101000a031916815260200191505b509250505060405180910390f35b34156103c757600080fd5b6103fc600480803573ffffffffffffffffffffffffffffffffffffffff16906020019091908035906020019091905050610a64565b604051808215151515815260200191505060405180910390f35b341561042157600080fd5b610477600480803590602001908201803590602001908080602002602001604051908101604052809392919081815260200183836020028082843782019150505050505091908035906020019091905050610bcd565b005b341561048457600080fd5b6104cf600480803573ffffffffffffffffffffffffffffffffffffffff1690602001909190803573ffffffffffffffffffffffffffffffffffffffff16906020019091905050610d1b565b6040518082815260200191505060405180910390f35b34156104f057600080fd5b6104f8610da2565b005b341561050557600080fd5b610531600480803573ffffffffffffffffffffffffffffffffffffffff16906020019091905050610de4565b005b60048054600181600116156101000203166002900480601f0160208091040260200160405190810160405280929190818152602001828054600181600116156101000203166002900480156105c95780601f1061059e576101008083540402835291602001916105c9565b820191906000526020600020905b8154815290600101906020018083116105ac57829003601f168201915b505050505081565b600081600260003373ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff16815260200190815260200160002060008573ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff168152602001908152602001600020819055508273ffffffffffffffffffffffffffffffffffffffff163373ffffffffffffffffffffffffffffffffffffffff167f8c5be1e5ebec7d5bd14f71427d1e84f3dd0314c0f7b2291e5b200ac8c7c3b925846040518082815260200191505060405180910390a36001905092915050565b60035481565b600081600160008673ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff1681526020019081526020016000205410158015610796575081600260008673ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff16815260200190815260200160002060003373ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff1681526020019081526020016000205410155b80156107a25750600082115b156109395781600160008573ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff1681526020019081526020016000206000828254019250508190555081600160008673ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff1681526020019081526020016000206000828254039250508190555081600260008673ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff16815260200190815260200160002060003373ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff168152602001908152602001600020600082825403925050819055508273ffffffffffffffffffffffffffffffffffffffff168473ffffffffffffffffffffffffffffffffffffffff167fddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef846040518082815260200191505060405180910390a36001905061093e565b600090505b9392505050565b600560009054906101000a900460ff1681565b6000600160008373ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff168152602001908152602001600020549050919050565b6000809054906101000a900473ffffffffffffffffffffffffffffffffffffffff1681565b60068054600181600116156101000203166002900480601f016020809104026020016040519081016040528092919081815260200182805460018160011615610100020316600290048015610a5c5780601f10610a3157610100808354040283529160200191610a5c565b820191906000526020600020905b815481529060010190602001808311610a3f57829003601f168201915b505050505081565b600081600160003373ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff1681526020019081526020016000205410158015610ab55750600082115b15610bc25781600160003373ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff1681526020019081526020016000206000828254039250508190555081600160008573ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff168152602001908152602001600020600082825401925050819055508273ffffffffffffffffffffffffffffffffffffffff163373ffffffffffffffffffffffffffffffffffffffff167fddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef846040518082815260200191505060405180910390a360019050610bc7565b600090505b92915050565b60008090505b8251811015610d165781600160003373ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff1681526020019081526020016000206000828254039250508190555081600160008584815181101515610c3c57fe5b9060200190602002015173ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff168152602001908152602001600020600082825401925050819055508281815181101515610c9b57fe5b9060200190602002015173ffffffffffffffffffffffffffffffffffffffff163373ffffffffffffffffffffffffffffffffffffffff167fddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef846040518082815260200191505060405180910390a38080600101915050610bd3565b505050565b6000600260008473ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff16815260200190815260200160002060008373ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff16815260200190815260200160002054905092915050565b336000806101000a81548173ffffffffffffffffffffffffffffffffffffffff021916908373ffffffffffffffffffffffffffffffffffffffff160217905550565b6000809054906101000a900473ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff163373ffffffffffffffffffffffffffffffffffffffff16141515610e3f57600080fd5b806000806101000a81548173ffffffffffffffffffffffffffffffffffffffff021916908373ffffffffffffffffffffffffffffffffffffffff160217905550505600a165627a7a723058200a2bf4fa374a52ee391d2be9ef116c0929697a1a3ee37acebf0cc5d85c6597ff0029 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Univ of British Columbia (UBC) 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AChecker 2 | AChecker (Access Control Checker) is an automated static analysis tool for detecting access control vulnerabilities in Ethereum smart contracts. 3 | 4 | For more details about AChecker, please reference our paper published in ICSE 2023 [AChecker: Statically Detecting Smart Contract 5 | Access Control Vulnerabilities](https://blogs.ubc.ca/dependablesystemslab/2022/12/08/achecker-statically-detecting-smart-contract-access-control-vulnerabilities) 6 | 7 | 8 | If you use AChecker, please cite this paper. 9 | 10 | ``` 11 | @inproceedings{ghaleb2023achecker, 12 | title={AChecker: Statically Detecting Smart Contract Access Control Vulnerabilities}, 13 | author={Ghaleb, Asem and Rubin, Julia and Pattabiraman, Karthik}, 14 | booktitle={Proceedings of the 45th IEEE/ACM International Conference on Software Engineering}, 15 | year={2023} 16 | } 17 | ``` 18 | 19 | ## Getting Started 20 | **Note:** We tested all scripts provided in this package on an Ubuntu 20.04 LTS machine. 21 | 22 | ### Requirements 23 | * Python 3.8+ 24 | 25 | ### Building AChecker 26 | 27 | To build the tool manually, we provide a `requirements.txt` file and the script `setup.py` to simply install the dependencies AChecker requires and build everything as follows. 28 | 29 | Run the following command. Please make sure you are using Python 3.8 or higher. 30 | 31 | ``` 32 | cd AChecker 33 | python -m pip install -r requirements.txt 34 | ``` 35 | 36 | ### Analyzing a smart contract 37 | Use the following command to run AChecker on a contract bytecode. 38 | ``` 39 | python bin/achecker.py -f [path_of_the_contract_bytecode_file] -b 40 | ``` 41 | As an example, the following command will run AChecker to analyze the contract with CVE ID 'CVE-2021-34273' in the file named '*CVE-2021-34273.code*' 42 | ``` 43 | python bin/achecker.py -f CVE-2021-34273.code -b -m 8 44 | ``` 45 | 46 | The option -m enables setting the allocated memory for the analysis (in gigabytes). In this example, the allocated memory limit is set to 8 GB. The default value is 6 GB when the option -m is not used. 47 | 48 | ## Contact 49 | For questions about our paper or this code, please get in touch with Asem Ghaleb (aghaleb@alumni.ubc.ca) 50 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pysha3>=1.0.2 2 | z3-solver>=4.8.5.0 3 | ijson 4 | requests 5 | lxml 6 | bs4 7 | Cython 8 | configparser 9 | pyevmasm 10 | cbor2 11 | networkx 12 | solc-select 13 | pandas 14 | tabulate 15 | -e . 16 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup( 4 | name='AChecker', 5 | version='0.1.0', 6 | packages=find_packages(), 7 | install_requires=[], 8 | scripts=[ 9 | 'bin/achecker.py' 10 | ], 11 | python_requires='>=3.8', 12 | 13 | ) 14 | -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- 1 | from . import cfg 2 | from . import constraints 3 | from . import evm 4 | from . import exploit 5 | from . import explorer 6 | from . import flow 7 | from . import memory 8 | from . import project 9 | from . import slicing 10 | from . import storage 11 | from . import util 12 | -------------------------------------------------------------------------------- /src/cfg/__init__.py: -------------------------------------------------------------------------------- 1 | from . import bb 2 | from . import cfg 3 | from . import disassembly 4 | from . import instruction 5 | from . import opcodes 6 | #from . import evm_cfg 7 | #from . import tac_cfg 8 | -------------------------------------------------------------------------------- /src/cfg/bb.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from collections import defaultdict, deque 3 | 4 | from src.util.utils import unique 5 | 6 | 7 | class BB(object): 8 | def __init__(self, ins): 9 | self.ins = ins 10 | self.streads = set() # indices of stack-items that will be read by this BB (0 is the topmost item on stack) 11 | self.stwrites = set() # indices of stack-items that will be written by this BB (0 is the topmost item on stack) 12 | self.stdelta = 0 13 | for i in ins: 14 | i.bb = self 15 | if 0x80 <= i.op <= 0x8f: # Special handling for DUP 16 | ridx = i.op - 0x80 - self.stdelta 17 | widx = -1 - self.stdelta 18 | if ridx not in self.stwrites: 19 | self.streads.add(ridx) 20 | self.stwrites.add(widx) 21 | elif 0x90 <= i.op <= 0x9f: # Special handling for SWAP 22 | idx1 = i.op - 0x8f - self.stdelta 23 | idx2 = - self.stdelta 24 | if idx1 not in self.stwrites: 25 | self.streads.add(idx1) 26 | if idx2 not in self.stwrites: 27 | self.streads.add(idx2) 28 | self.stwrites.add(idx1) 29 | self.stwrites.add(idx2) 30 | else: # assume entire stack is affected otherwise 31 | for j in range(i.ins): 32 | idx = j - self.stdelta 33 | if idx not in self.stwrites: 34 | self.streads.add(idx) 35 | for j in range(i.outs): 36 | idx = i.ins - 1 - j - self.stdelta 37 | self.stwrites.add(idx) 38 | self.stdelta += i.delta 39 | self.streads = {x for x in self.streads if x >= 0} 40 | self.stwrites = {x for x in self.stwrites if x >= 0} 41 | self.start = self.ins[0].addr 42 | self.pred = set() 43 | self.succ = set() 44 | self.succ_addrs = set() 45 | self.pred_paths = defaultdict(set) 46 | self.branch = self.ins[-1].op == 0x57 47 | self.indirect_jump = self.ins[-1].op in (0x56, 0x57) 48 | self.ancestors = set() 49 | self.descendants = set() 50 | # maintain a set of 'must_visit' constraints to limit 51 | # backward-slices to only new slices after new edges are added 52 | # initially, no constraint is given (= empty set) 53 | self.must_visit = [set()] 54 | # also maintain an estimate of how fast we can get from here 55 | # to the root of the cfg 56 | # how fast meaning, how many JUMPI-branches we have to take 57 | self.estimate_constraints = (1 if self.branch else 0) if self.start == 0 else None 58 | # and another estimate fo many backwards branches 59 | # we will encounter to the root 60 | self.estimate_back_branches = 0 if self.start == 0 else None 61 | 62 | @property 63 | def jump_resolved(self): 64 | return not self.indirect_jump or len(self.must_visit) == 0 65 | 66 | def update_ancestors(self, new_ancestors): 67 | new_ancestors = new_ancestors - self.ancestors 68 | if new_ancestors: 69 | self.ancestors.update(new_ancestors) 70 | for s in self.succ: 71 | s.update_ancestors(new_ancestors) 72 | 73 | def update_descendants(self, new_descendants): 74 | new_descendants = new_descendants - self.descendants 75 | if new_descendants: 76 | self.descendants.update(new_descendants) 77 | for p in self.pred: 78 | p.update_descendants(new_descendants) 79 | 80 | def update_estimate_constraints(self): 81 | if all(p.estimate_constraints is None for p in self.pred): 82 | return 83 | best_estimate = min(p.estimate_constraints for p in self.pred if p.estimate_constraints is not None) 84 | if self.branch: 85 | best_estimate += 1 86 | if self.estimate_constraints is None or best_estimate < self.estimate_constraints: 87 | self.estimate_constraints = best_estimate 88 | for s in self.succ: 89 | s.update_estimate_constraints() 90 | 91 | def update_estimate_back_branches(self): 92 | if all(p.estimate_back_branches is None for p in self.pred): 93 | return 94 | best_estimate = min(p.estimate_back_branches for p in self.pred if p.estimate_back_branches is not None) 95 | if len(self.pred) > 1: 96 | best_estimate += 1 97 | if self.estimate_back_branches is None or best_estimate != self.estimate_back_branches: 98 | self.estimate_back_branches = best_estimate 99 | for s in self.succ: 100 | s.update_estimate_back_branches() 101 | 102 | def add_succ(self, other, path): 103 | self.succ.add(other) 104 | other.pred.add(self) 105 | self.update_descendants(other.descendants | {other.start}) 106 | other.update_ancestors(self.ancestors | {self.start}) 107 | other.update_estimate_constraints() 108 | other.update_estimate_back_branches() 109 | other.pred_paths[self].add(tuple(path)) 110 | seen = set() 111 | todo = deque() 112 | todo.append(other) 113 | while todo: 114 | bb = todo.popleft() 115 | if bb not in seen: 116 | seen.add(bb) 117 | if bb.indirect_jump: 118 | bb.must_visit.append({self.start}) 119 | # logging.debug('BB@%x, must_visit: %s', bb.start, bb.must_visit) 120 | todo.extend(s for s in bb.succ if s not in seen) 121 | 122 | def _find_jump_target(self): 123 | if len(self.ins) >= 2 and 0x60 <= self.ins[-2].op <= 0x71: 124 | self.must_visit = [] 125 | return int.from_bytes(self.ins[-2].arg, byteorder='big') 126 | else: 127 | return None 128 | 129 | def get_succ_addrs_full(self, valid_jump_targets): 130 | from src.slicing import slice_to_program, backward_slice 131 | from src.evm.exceptions import ExternalData 132 | from src.memory import UninitializedRead 133 | from src.evm.evm import run 134 | new_succ_addrs = set() 135 | if self.indirect_jump and not self.jump_resolved: 136 | bs = backward_slice(self.ins[-1], [0], must_visits=self.must_visit) 137 | for b in bs: 138 | if 0x60 <= b[-1].op <= 0x7f: 139 | succ_addr = int.from_bytes(b[-1].arg, byteorder='big') 140 | else: 141 | p = slice_to_program(b) 142 | try: 143 | succ_addr = run(p, check_initialized=True).stack.pop() 144 | except (ExternalData, UninitializedRead): 145 | logging.debug('Failed to compute jump target for BB@{}, slice: \n{}'.format(self.start, '\n'.join('\t{}'.format(ins) for ins in b))) 146 | continue 147 | if succ_addr not in valid_jump_targets: 148 | logging.debug('Jump to invalid address') 149 | continue 150 | path = tuple(unique(ins.bb.start for ins in b if ins.bb)) 151 | if succ_addr not in self.succ_addrs: 152 | self.succ_addrs.add(succ_addr) 153 | if (path, succ_addr) not in new_succ_addrs: 154 | new_succ_addrs.add((path, succ_addr)) 155 | # We did our best, 156 | # if someone finds a new edge, jump_resolved will be set to False by the BFS in add_succ 157 | self.must_visit = [] 158 | return self.succ_addrs, new_succ_addrs 159 | 160 | def get_succ_addrs(self, valid_jump_targets): 161 | if self.ins[-1].op in (0x56, 0x57): 162 | jump_target = self._find_jump_target() 163 | if jump_target is not None: 164 | self.indirect_jump = False 165 | if jump_target in valid_jump_targets: 166 | self.succ_addrs.add(jump_target) 167 | else: 168 | self.indirect_jump = True 169 | else: 170 | self.must_visit = [] 171 | if self.ins[-1].op not in (0x00, 0x56, 0xf3, 0xfd, 0xfe, 0xff): 172 | fallthrough = self.ins[-1].next_addr 173 | if fallthrough: 174 | self.succ_addrs.add(fallthrough) 175 | return self.succ_addrs 176 | 177 | def __str__(self): 178 | s = 'BB @ %x\tStack %d' % (self.start, self.stdelta) 179 | s += '\n' 180 | s += 'Stackreads: {%s}' % (', '.join(map(str, sorted(self.streads)))) 181 | s += '\n' 182 | s += 'Stackwrites: {%s}' % (', '.join(map(str, sorted(self.stwrites)))) 183 | if self.pred: 184 | s += '\n' 185 | s += '\n'.join('%x ->' % pred.start for pred in self.pred) 186 | s += '\n' 187 | s += '\n'.join(str(ins) for ins in self.ins) 188 | if self.succ: 189 | s += '\n' 190 | s += '\n'.join(' -> %x' % succ.start for succ in self.succ) 191 | return s 192 | 193 | def __repr__(self): 194 | return str(self) 195 | 196 | def __lt__(self, other): 197 | return self.start < other.start 198 | -------------------------------------------------------------------------------- /src/cfg/cfg.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from collections import deque 3 | from src.cfg.bb import BB 4 | import src.cfg.rattle as rattle 5 | import tempfile 6 | import subprocess 7 | import os,sys 8 | import time 9 | from collections import defaultdict 10 | from src.evm.exceptions import TimeoutException 11 | 12 | 13 | class CFG(object): 14 | def __init__(self, bbs, fix_xrefs=True, fix_only_easy_xrefs=False): 15 | self.bbs = sorted(bbs) 16 | self._bb_at = {bb.start: bb for bb in self.bbs} 17 | self._ins_at = {i.addr: i for bb in self.bbs for i in bb.ins} 18 | self.root = self._bb_at[0] 19 | self.valid_jump_targets = frozenset({bb.start for bb in self.bbs if bb.ins[0].name == 'JUMPDEST'}) 20 | if fix_xrefs or fix_only_easy_xrefs: 21 | try: 22 | self._xrefs(fix_only_easy_xrefs) 23 | except TimeoutException: 24 | raise TimeoutException("Timed out!") 25 | self._dominators = None 26 | self._dd = dict() 27 | 28 | @property 29 | def bb_addrs(self): 30 | return frozenset(self._bb_at.keys()) 31 | 32 | def filter_ins(self, names, reachable=False): 33 | if isinstance(names, str): 34 | names = [names] 35 | 36 | if not reachable: 37 | return [ins for bb in self.bbs for ins in bb.ins if ins.name in names] 38 | else: 39 | return [ins for bb in self.bbs for ins in bb.ins if ins.name in names and 0 in bb.ancestors | {bb.start}] 40 | 41 | def _xrefs(self, fix_only_easy_xrefs=False): 42 | # logging.debug('Fixing Xrefs') 43 | self._easy_xrefs() 44 | # logging.debug('Easy Xrefs fixed, turning to hard ones now') 45 | if not fix_only_easy_xrefs: 46 | self._hard_xrefs() 47 | # logging.debug('Hard Xrefs also fixed, good to go') 48 | 49 | def _easy_xrefs(self): 50 | for pred in self.bbs: 51 | for succ_addr in pred.get_succ_addrs(self.valid_jump_targets): 52 | if succ_addr and succ_addr in self._bb_at: 53 | succ = self._bb_at[succ_addr] 54 | pred.add_succ(succ, {pred.start}) 55 | 56 | def _hard_xrefs(self): 57 | new_link = True 58 | links = set() 59 | stime=time.time() 60 | while new_link: 61 | new_link = False 62 | for pred in self.bbs: 63 | if not pred.jump_resolved: 64 | succ_addrs, new_succ_addrs = pred.get_succ_addrs_full(self.valid_jump_targets) 65 | for new_succ_path, succ_addr in new_succ_addrs: 66 | if succ_addr not in self._bb_at: 67 | logging.warning( 68 | 'WARNING, NO BB @ %x (possible successor of BB @ %x)' % (succ_addr, pred.start)) 69 | continue 70 | succ = self._bb_at[succ_addr] 71 | pred.add_succ(succ, new_succ_path) 72 | if not (pred.start, succ.start) in links: 73 | # logging.debug('found new link from %x to %x', pred.start, succ.start) 74 | # with open('cfg-tmp%d.dot' % len(links), 'w') as outfile: 75 | # outfile.write(self.to_dot()) 76 | new_link = True 77 | links.add((pred.start, succ.start)) 78 | def data_dependence(self, ins): 79 | if not ins in self._dd: 80 | from src.slicing import backward_slice 81 | self._dd[ins] = set(i for s in backward_slice(ins) for i in s if i.bb) 82 | return self._dd[ins] 83 | 84 | @property 85 | def dominators(self): 86 | if not self._dominators: 87 | self._compute_dominators() 88 | return self._dominators 89 | 90 | def _compute_dominators(self): 91 | import networkx 92 | g = networkx.DiGraph() 93 | for bb in self.bbs: 94 | for succ in bb.succ: 95 | g.add_edge(bb.start, succ.start) 96 | self._dominators = {self._bb_at[k]: self._bb_at[v] for k, v in networkx.immediate_dominators(g, 0).items()} 97 | 98 | def __str__(self): 99 | return '\n\n'.join(str(bb) for bb in self.bbs) 100 | 101 | def to_dot(self, minimal=False): 102 | s = 'digraph g {\n' 103 | s += '\tsplines=ortho;\n' 104 | s += '\tnode[fontname="courier"];\n' 105 | for bb in sorted(self.bbs): 106 | from_block = '' 107 | if self._dominators: 108 | from_block = 'Dominated by: %x
' % self.dominators[bb].start 109 | from_block += 'From: ' + ', '.join('%x' % pred.start for pred in sorted(bb.pred)) 110 | if bb.estimate_constraints is not None: 111 | from_block += '
Min constraints from root: %d' % bb.estimate_constraints 112 | if bb.estimate_back_branches is not None: 113 | from_block += '
Min back branches to root: %d' % bb.estimate_back_branches 114 | to_block = 'To: ' + ', '.join('%x' % succ.start for succ in sorted(bb.succ)) 115 | ins_block = '
'.join( 116 | '%4x: %02x %s %s' % (ins.addr, ins.op, ins.name, ins.arg.hex() if ins.arg else '') for ins in bb.ins) 117 | # ancestors = 'Ancestors: %s'%(', '.join('%x'%addr for addr in sorted(a for a in bb.ancestors))) 118 | # descendants = 'Descendants: %s' % (', '.join('%x' % addr for addr in sorted(a for a in bb.descendants))) 119 | # s += '\t%d [shape=box,label=<%x:
%s
%s
%s
>];\n' % ( 120 | # bb.start, bb.start, ins_block, ancestors, descendants) 121 | if not minimal: 122 | s += '\t%d [shape=box,label=<%s
%x:
%s
%s
>];\n' % ( 123 | bb.start, from_block, bb.start, ins_block, to_block) 124 | else: 125 | s += '\t%d [shape=box,label=<%s
>];\n' % ( 126 | bb.start, ins_block) 127 | s += '\n' 128 | for bb in sorted(self.bbs): 129 | for succ in sorted(bb.succ): 130 | pths = succ.pred_paths[bb] 131 | if not minimal: 132 | s += '\t%d -> %d [xlabel="%s"];\n' % ( 133 | bb.start, succ.start, '|'.join(' -> '.join('%x' % a for a in p) for p in pths)) 134 | else: 135 | s += '\t%d -> %d;\n' % (bb.start, succ.start) 136 | if self._dd: 137 | inter_bb = {} 138 | for k, v in self._dd.items(): 139 | jbb = k.bb.start 140 | vbbs = {i.bb.start for i in v if i.bb.start != k.bb.start} 141 | if vbbs: 142 | inter_bb[jbb] = vbbs 143 | l = len(inter_bb) 144 | for i, (k, v) in enumerate(inter_bb.items()): 145 | for j in v: 146 | s += '\t%d -> %d[color="%.3f 1.0 1.0", weight=10];\n' % (j, k, (1.0 * i) / l) 147 | s += '\n' 148 | s += '}' 149 | return s 150 | 151 | def trim(self): 152 | keep = set(self.root.descendants) 153 | self.bbs = [bb for bb in self.bbs if bb.start in keep] 154 | delete = set(self._bb_at.keys()) - keep 155 | for addr in delete: 156 | del self._bb_at[addr] 157 | 158 | def to_json(self): 159 | return {'bbs': [{'start': bb.start, 160 | 'succs': [{'start': succ.start, 'paths': list(succ.pred_paths[bb])} for succ in 161 | sorted(bb.succ)]} for bb in sorted(self.bbs)]} 162 | 163 | @staticmethod 164 | def from_json(json_dict, code): 165 | from .disassembly import disass 166 | bbs = list() 167 | for bb_dict in json_dict['bbs']: 168 | bbs.append(BB(list(disass(code, bb_dict['start'])))) 169 | cfg = CFG(bbs, fix_xrefs=False) 170 | for bb_dict in json_dict['bbs']: 171 | bb = cfg._bb_at[bb_dict['start']] 172 | for succ_dict in bb_dict['succs']: 173 | succ = cfg._bb_at[succ_dict['start']] 174 | for path in succ_dict['paths']: 175 | bb.add_succ(succ, path) 176 | return cfg 177 | 178 | @staticmethod 179 | def distance_map(ins): 180 | dm = dict() 181 | todo = deque() 182 | todo.append((ins.bb, 0)) 183 | while todo: 184 | bb, d = todo.pop() 185 | if not bb in dm or dm[bb] > d: 186 | dm[bb] = d 187 | for p in bb.pred: 188 | todo.append((p, d + 1 if len(p.succ) > 1 else d)) 189 | return dm 190 | 191 | """ Added code start here""" 192 | 193 | def to_ssa(self, code:bytes, minimal=False): 194 | sys.setrecursionlimit(10000) 195 | 196 | edges = [] 197 | ssa = rattle.Recover(code, edges=edges, split_functions=False) 198 | 199 | for function in ssa.functions: 200 | g = rattle.ControlFlowGraph(function) 201 | t = tempfile.NamedTemporaryFile(suffix='.dot', mode='w') 202 | t.write(g.dot()) 203 | t.flush() 204 | 205 | try: 206 | os.makedirs('output') 207 | except: 208 | pass 209 | 210 | out_file = f'output/{function.desc()}.svg' 211 | 212 | subprocess.call(['dot', '-Tsvg', f'-o{out_file}', t.name]) 213 | print(f'[+] Wrote {function.desc()} to {out_file}') 214 | 215 | 216 | def edges(self): 217 | edges=[] 218 | for bb in sorted(self.bbs): 219 | for succ in sorted(bb.succ): 220 | edges.append([bb.start, succ.start]) 221 | return edges 222 | 223 | def assert_sinks(self): 224 | instructions = {} 225 | assert_bbs=[bb for bb in self.bbs if len(bb.ins)==1 and hex(bb.ins[0].op)=='0xfe'] 226 | for bb in assert_bbs: 227 | for pred in bb.pred: 228 | if 'SLOAD' in[ins.name for ins in pred.ins]: 229 | continue 230 | #Avoid cases when validating 231 | if 'CALLDATALOAD' in[ins.name for ins in pred.ins]: 232 | continue 233 | 234 | instructions[pred.ins[-1]]=bb.ins[0] 235 | return instructions 236 | 237 | def call_sinks(self): 238 | instructions = [] 239 | call_insn= [ins for bb in self.bbs for ins in bb.ins if ins.name in set(['CALL']) and 0 in bb.ancestors | {bb.start}] 240 | for call_ins in call_insn: 241 | call_succ=[succ.start for succ in self._bb_at[call_ins.bb.start].succ] 242 | if len(call_succ)==0: 243 | instructions.append(call_ins) 244 | continue 245 | call_bb_ins = self._bb_at[call_ins.bb.start].ins 246 | if len(call_succ)==1 and [ins.name for ins in call_bb_ins[-3:]]==['ISZERO','PUSH2','JUMPI']: 247 | continue 248 | if len(call_succ)==2 and [ins.name for ins in call_bb_ins[call_bb_ins.index(call_ins)+1:] if ins.name in set(['ADD','AND','ISZERO','JUMPI'])] ==['ADD','AND','ISZERO','JUMPI']: #propagate throw 249 | continue 250 | 251 | call_ins_index=call_bb_ins.index(call_ins) 252 | if [ins.name for ins in call_bb_ins[call_ins_index+1:call_ins_index+22] if ins.name in set(['ADD','MSTORE','MLOAD','SUB','SHA3'])]==['ADD','MSTORE','ADD','MLOAD','SUB','SHA3']: 253 | continue 254 | if [ins.name for ins in call_bb_ins[call_ins_index+1:call_ins_index+17] if ins.name in set(['ADD','MLOAD','SUB','SHA3'])]==['ADD','MLOAD','SUB','SHA3']: 255 | continue 256 | if [ins.name for ins in call_bb_ins[call_ins_index+1:call_ins_index+12] if ins.name in set(['ADD','MSTORE','SUB'])]==['ADD','SUB','MSTORE']: 257 | continue 258 | 259 | if len([succ.start for succ in self._bb_at[call_ins.bb.start].succ for ins in succ.ins if ins.name in set(['REVERT','INVALID'])])!=0: 260 | continue 261 | if len([succ.start for succ in self._bb_at[call_ins.bb.start].succ if [ins.name for ins in succ.ins]==['PUSH2','JUMP']])!=0: 262 | continue 263 | 264 | min_succ_bb= self._bb_at[min(call_succ)] 265 | 266 | succ_with_call_bb=[succ for succ in self._bb_at[call_ins.bb.start].succ for ins in succ.ins if ins.name in set(['CALL'])] 267 | if (['%x' %ins.op for ins in min_succ_bb.ins][-1] in set(['fd','fe']) or [ins.name for ins in min_succ_bb.ins]==['PUSH2','JUMP']): 268 | continue 269 | if (['%x' %ins.op for ins in min_succ_bb.ins][-1] not in set(['fd','fe']) and [*['0x','0x'],*['%x' %ins.op for ins in min_succ_bb.ins]][-3] not in set(['3e']) and [ins.name for ins in min_succ_bb.ins]!=['PUSH2','JUMP'] and len(succ_with_call_bb)==0): 270 | instructions.append(call_ins) 271 | elif ([*['0x'],*['%x' %ins.op for ins in min_succ_bb.ins]][-3] in set(['3e'])): 272 | call_ret_succ=[succ.start for succ in self._bb_at[min([succ.start for succ in min_succ_bb.succ])].succ] 273 | ret_min_succ_bb= self._bb_at[min(call_ret_succ)] 274 | if (['%x' %ins.op for ins in ret_min_succ_bb.ins][-1] not in set(['fd','fe'])): 275 | instructions.append(call_ins) 276 | elif (len(succ_with_call_bb)>0): 277 | sec_call_succ=[succ.start for succ in succ_with_call_bb[0].succ] 278 | min_sec_call_succ_bb= self._bb_at[min(sec_call_succ)] 279 | if( [ins.name for ins in min_sec_call_succ_bb.ins]!=['PUSH2','JUMP']): 280 | instructions.append(call_ins) 281 | 282 | return instructions 283 | 284 | def find_loops(self, with_calls=False): 285 | import networkx 286 | g = networkx.DiGraph() 287 | for bb in self.bbs: 288 | for succ in bb.succ: 289 | g.add_edge(bb.start,succ.start) 290 | l= list(networkx.simple_cycles(g)) 291 | loops=defaultdict(list) 292 | calls_in_loops=defaultdict(list) 293 | loops_with_calls=[] 294 | loops_with_gas_sanitizers =[] 295 | for i in l: 296 | if len([h for h in i if(len(self._bb_at[h].pred))>2])>0: 297 | #print('11') 298 | continue 299 | 300 | loop_bbs=[bb for j in i for bb in self.bbs if bb.start==j] 301 | if len(i) ==1: 302 | continue 303 | head =[succ.start for bb in loop_bbs for succ in bb.succ if succ.start in i if bb.start>succ.start and len([p.start for p in succ.pred if p.start not in i])!=0 and len(succ.succ)<=2 and ('ADD' in [ins.name for ins in bb.ins] or [ins.name for ins in bb.ins[-2:]]==['PUSH2','JUMP'])] 304 | 305 | if len(head)==0: 306 | continue 307 | if len(loops[head[0]]): 308 | loops[head[0]].pop(0) 309 | loops[head[0]].insert(0,len(i)) 310 | 311 | back_edge =[[succ.start,bb.start] for bb in loop_bbs for succ in bb.succ if succ.start in i if bb.start>succ.start] 312 | 313 | if len(i)==2: 314 | body_ins=[ins.name for bb in loop_bbs for ins in bb.ins if bb.start!=head[0] and ins.name in ['ADD','SUB','MLOAD','MSTORE','JUMP','SSTORE','EXP','NOT','MUL','PUSH1','POP','SWAP1','DUP2']] 315 | body_start=[bb.start for bb in loop_bbs if bb.start!=head[0]] 316 | if 'MLOAD' in body_ins: 317 | continue 318 | elif body_ins == ['PUSH1','DUP2','PUSH1','SWAP1','SSTORE','POP','PUSH1','ADD','JUMP']: 319 | head_pred=[pred.start for bb in loop_bbs for pred in bb.pred if bb.start==head[0] and pred.start !=body_start[0]] 320 | head_pred1=[pred.start for pred in self._bb_at[head_pred[0]].pred] 321 | head_pred2=[pred.start for pred in self._bb_at[head_pred1[0]].pred] 322 | head_pred_ins=[ins.name for bb in self._bb_at[head[0]].pred for ins in bb.ins if bb.start !=body_start[0] and ins.name in ['ADD','MSTORE','JUMP','SSTORE','SHA3','SLOAD']] 323 | head_pred1_ins=[ins.name for bb in self._bb_at[head_pred[0]].pred for ins in bb.ins if ins.name in ['ADD','MSTORE','JUMP','SSTORE','SHA3','SLOAD']] 324 | if len(head_pred2)==3: 325 | continue 326 | if head_pred1_ins !=['SLOAD','SSTORE','MSTORE','SHA3','ADD','JUMP'] and head_pred_ins !=['SLOAD','SSTORE','MSTORE','SHA3','ADD','JUMP']: 327 | continue 328 | else: 329 | continue 330 | 331 | if len(i)==3: 332 | body_ins=[ins.name for bb in loop_bbs for ins in bb.ins if bb.start not in back_edge[0] and ins.name in ['ADD','SUB','MLOAD','MSTORE','JUMP','SSTORE','EXP','NOT','MUL']] 333 | if body_ins == ['MLOAD','MSTORE'] or body_ins==['ADD','MLOAD','ADD','MSTORE']: 334 | continue 335 | 336 | head_cnt=0 337 | for k in range(i.index(head[0]),len(i)+i.index(head[0])): 338 | indx=k%len(i) 339 | if len([bb for bb in loop_bbs if bb.start==i[indx] and len(bb.succ)!=0 and len([succ for succ in bb.succ if succ in loop_bbs]) ==len(bb.succ)])!=0: 340 | loops[head[0]].append(i[indx]) 341 | head_cnt+=1 342 | elif len([bb for bb in loop_bbs if bb.start==i[indx] and len(bb.succ)!=0 and len([succ for succ in bb.succ if succ in loop_bbs]) ==1 and len(bb.succ)==2 and len([succ for succ in bb.succ if self._ins_at[succ.start].op==254])==1])!=0: 343 | loops[head[0]].append(i[indx]) 344 | head_cnt+=1 345 | elif len([bb for bb in loop_bbs if bb.start==i[indx] and len(bb.succ)!=0 and len([succ for succ in bb.succ if succ in loop_bbs]) ==1 and len(bb.succ)==2])!=0: 346 | loops[head[0]].append(i[indx]) 347 | head_cnt+=1 348 | break 349 | 350 | if not with_calls: 351 | for bb in loop_bbs: 352 | if bb.start in loops[head[0]] and 'GAS' in [ins.name for ins in bb.ins]: 353 | block_ins =[ins.name for ins in bb.ins if ins.name in ['PUSH2','PUSH3','GAS','GT']] 354 | if block_ins ==['PUSH3','GAS','GT'] or block_ins ==['PUSH2','GAS','GT']: 355 | loops_with_gas_sanitizers.append(head[0]) 356 | break 357 | elif bb.start not in loops[head[0]] and 'GAS' in [ins.name for ins in bb.ins]: 358 | block_ins =[ins.name for ins in bb.ins if ins.name in ['PUSH2','PUSH3','GAS','GT','JUMPI']] 359 | if block_ins[-5:] == ['PUSH3','GAS','GT','PUSH2','JUMPI'] or block_ins[-5:] == ['PUSH2','GAS','GT','PUSH2','JUMPI']: 360 | loops_with_gas_sanitizers.append(head[0]) 361 | break 362 | 363 | if with_calls: 364 | calls_in_loop= False 365 | for bb in loop_bbs: 366 | if bb.start not in loops[head[0]] and 'CALL' in [ins.name for ins in bb.ins]: 367 | call_succ=[succ.start for succ in self._bb_at[bb.start].succ] 368 | if len(call_succ)==2: 369 | if (['%x' %ins.op for ins in self._bb_at[min(call_succ)].ins][-1] in set(['fd','fe'])): 370 | calls_in_loop=True 371 | loops_with_calls.append(head[0]) 372 | calls_in_loops[bb.start].append([ins for ins in bb.ins if 'CALL'==ins.name][0]) 373 | break 374 | if with_calls and not calls_in_loop and head[0] not in loops_with_calls: 375 | del loops[head[0]] 376 | elif with_calls and not calls_in_loop: 377 | for i in range(0,head_cnt): 378 | loops[head[0]].pop() 379 | 380 | for san in set(loops_with_gas_sanitizers): 381 | if loops.get(san,'nokey')!='nokey': 382 | del loops[san] 383 | 384 | if not with_calls: 385 | return loops 386 | else: 387 | return calls_in_loops 388 | 389 | 390 | 391 | 392 | -------------------------------------------------------------------------------- /src/cfg/disassembly.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from collections import deque 3 | 4 | from src.cfg.bb import BB 5 | from src.cfg.instruction import Instruction 6 | from src.cfg.opcodes import opcodes 7 | 8 | 9 | class ArgumentTooShort(Exception): 10 | pass 11 | 12 | 13 | def disass(code, i=0): 14 | assert isinstance(code, bytes) 15 | while i < len(code): 16 | loc = i 17 | op = code[i] 18 | arg = None 19 | inslen = 1 20 | if not op in opcodes: 21 | break 22 | # raise IllegalInstruction('%02x at %d'%(op, i)) 23 | if 0x60 <= op <= 0x7f: 24 | arglen = op - 0x5f 25 | inslen += arglen 26 | arg = code[i + 1:i + 1 + arglen] 27 | if len(arg) < arglen: 28 | raise ArgumentTooShort 29 | i += arglen 30 | i += 1 31 | yield Instruction(loc, op, arg) 32 | # End basic block on STOP, JUMP, JUMPI, RETURN, REVERT, RAISE, or if the following instruction is a JUMPDEST 33 | if op in (0x00, 0x56, 0x57, 0xf3, 0xfd, 0xfe, 0xff) or (i < len(code) and code[i] == 0x5b): 34 | break 35 | 36 | 37 | def generate_BBs(code): 38 | fallthrough_locs = [i + 1 for i, c in enumerate(code) if c == 0x57] 39 | jumpdest_locs = [i for i, c in enumerate(code) if c == 0x5b] 40 | leader_candidates = {0} | set(fallthrough_locs) | set(jumpdest_locs) 41 | for l in sorted(leader_candidates): 42 | try: 43 | instructions = list(disass(code, l)) 44 | if instructions: 45 | yield BB(instructions) 46 | except: 47 | continue -------------------------------------------------------------------------------- /src/cfg/instruction.py: -------------------------------------------------------------------------------- 1 | from src.cfg.opcodes import opcodes 2 | 3 | 4 | class Instruction(object): 5 | def __init__(self, addr, op, arg=None): 6 | if not (arg is None or isinstance(arg, bytes)): 7 | raise ValueError('Instruction arg must be bytes or None') 8 | assert arg is None or isinstance(arg, bytes) 9 | opinfo = opcodes[op] 10 | inslen = (op - 0x5f) + 1 if 0x60 <= op <= 0x7f else 1 11 | self.addr = addr 12 | self.next_addr = self.addr + inslen 13 | self.op = op 14 | self.name = opinfo[0] 15 | self.arg = arg 16 | self.ins = opinfo[1] 17 | self.outs = opinfo[2] 18 | self.gas = opinfo[3] 19 | self.delta = self.outs - self.ins 20 | self.bb = None 21 | 22 | def __str__(self): 23 | return '(%5d) %4x:\t%02x\t-%d +%d = %d\t%s%s' % ( 24 | self.addr, self.addr, self.op, self.ins, self.outs, self.delta, self.name, 25 | '(%d) %s' % (int.from_bytes(self.arg, byteorder='big'), '\t%s' % self.arg.hex()) if self.arg else '') 26 | 27 | def __repr__(self): 28 | return str(self) 29 | 30 | def __hash__(self): 31 | return 17 * self.addr + 19 * self.op + 23 * hash(self.arg) 32 | 33 | def __eq__(self, other): 34 | return (self.addr == other.addr and 35 | self.op == other.op and 36 | self.arg == other.arg) 37 | -------------------------------------------------------------------------------- /src/cfg/opcodes.py: -------------------------------------------------------------------------------- 1 | # schema: [opcode, ins, outs, gas] 2 | opcodes = { 3 | 0x00: ['STOP', 0, 0, 0], 4 | 0x01: ['ADD', 2, 1, 3], 5 | 0x02: ['MUL', 2, 1, 5], 6 | 0x03: ['SUB', 2, 1, 3], 7 | 0x04: ['DIV', 2, 1, 5], 8 | 0x05: ['SDIV', 2, 1, 5], 9 | 0x06: ['MOD', 2, 1, 5], 10 | 0x07: ['SMOD', 2, 1, 5], 11 | 0x08: ['ADDMOD', 3, 1, 8], 12 | 0x09: ['MULMOD', 3, 1, 8], 13 | 0x0a: ['EXP', 2, 1, 10], 14 | 0x0b: ['SIGNEXTEND', 2, 1, 5], 15 | 0x10: ['LT', 2, 1, 3], 16 | 0x11: ['GT', 2, 1, 3], 17 | 0x12: ['SLT', 2, 1, 3], 18 | 0x13: ['SGT', 2, 1, 3], 19 | 0x14: ['EQ', 2, 1, 3], 20 | 0x15: ['ISZERO', 1, 1, 3], 21 | 0x16: ['AND', 2, 1, 3], 22 | 0x17: ['OR', 2, 1, 3], 23 | 0x18: ['XOR', 2, 1, 3], 24 | 0x19: ['NOT', 1, 1, 3], 25 | 0x1a: ['BYTE', 2, 1, 3], 26 | 0x1b: ['SHL', 2, 1, 3], 27 | 0x1c: ['SHR', 2, 1, 3], 28 | 0x1d: ['SAR', 2, 1, 3], 29 | 0x20: ['SHA3', 2, 1, 30], 30 | 0x30: ['ADDRESS', 0, 1, 2], 31 | 0x31: ['BALANCE', 1, 1, 20], # now 400 32 | 0x32: ['ORIGIN', 0, 1, 2], 33 | 0x33: ['CALLER', 0, 1, 2], 34 | 0x34: ['CALLVALUE', 0, 1, 2], 35 | 0x35: ['CALLDATALOAD', 1, 1, 3], 36 | 0x36: ['CALLDATASIZE', 0, 1, 2], 37 | 0x37: ['CALLDATACOPY', 3, 0, 3], 38 | 0x38: ['CODESIZE', 0, 1, 2], 39 | 0x39: ['CODECOPY', 3, 0, 3], 40 | 0x3a: ['GASPRICE', 0, 1, 2], 41 | 0x3b: ['EXTCODESIZE', 1, 1, 20], # now 700 42 | 0x3c: ['EXTCODECOPY', 4, 0, 20], # now 700 43 | 0x3d: ['RETURNDATASIZE', 0, 1, 2], 44 | 0x3e: ['RETURNDATACOPY', 3, 0, 3], 45 | 0x40: ['BLOCKHASH', 1, 1, 20], 46 | 0x41: ['COINBASE', 0, 1, 2], 47 | 0x42: ['TIMESTAMP', 0, 1, 2], 48 | 0x43: ['NUMBER', 0, 1, 2], 49 | 0x44: ['DIFFICULTY', 0, 1, 2], 50 | 0x45: ['GASLIMIT', 0, 1, 2], 51 | 0x50: ['POP', 1, 0, 2], 52 | 0x51: ['MLOAD', 1, 1, 3], 53 | 0x52: ['MSTORE', 2, 0, 3], 54 | 0x53: ['MSTORE8', 2, 0, 3], 55 | 0x54: ['SLOAD', 1, 1, 50], # 200 now 56 | 0x55: ['SSTORE', 2, 0, 0], # actual cost 5000-20000 depending on circumstance 57 | 0x56: ['JUMP', 1, 0, 8], 58 | 0x57: ['JUMPI', 2, 0, 10], 59 | 0x58: ['PC', 0, 1, 2], 60 | 0x59: ['MSIZE', 0, 1, 2], 61 | 0x5a: ['GAS', 0, 1, 2], 62 | 0x5b: ['JUMPDEST', 0, 0, 1], 63 | 0xa0: ['LOG0', 2, 0, 375], 64 | 0xa1: ['LOG1', 3, 0, 750], 65 | 0xa2: ['LOG2', 4, 0, 1125], 66 | 0xa3: ['LOG3', 5, 0, 1500], 67 | 0xa4: ['LOG4', 6, 0, 1875], 68 | 0xe1: ['SLOADBYTES', 3, 0, 50], # to be discontinued 69 | 0xe2: ['SSTOREBYTES', 3, 0, 0], # to be discontinued 70 | 0xe3: ['SSIZE', 1, 1, 50], # to be discontinued 71 | 0xf0: ['CREATE', 3, 1, 32000], 72 | 0xf1: ['CALL', 7, 1, 40], # 700 now 73 | 0xf2: ['CALLCODE', 7, 1, 40], # 700 now 74 | 0xf3: ['RETURN', 2, 0, 0], 75 | 0xf4: ['DELEGATECALL', 6, 1, 40], # 700 now 76 | 0xf5: ['CALLBLACKBOX', 7, 1, 40], 77 | 0xfa: ['STATICCALL', 6, 1, 40], 78 | 0xfd: ['REVERT', 2, 0, 0], 79 | 0xfe: ['INVALID', 0, 0, 1], 80 | 0xff: ['SELFDESTRUCT', 1, 0, 0], # 5000 now 81 | } 82 | 83 | for i in range(1, 33): 84 | opcodes[0x5f + i] = ['PUSH' + str(i), 0, 1, 3] 85 | 86 | for i in range(1, 17): 87 | opcodes[0x7f + i] = ['DUP' + str(i), i, i + 1, 3] 88 | opcodes[0x8f + i] = ['SWAP' + str(i), i + 1, i + 1, 3] 89 | 90 | reverse_opcodes = {} 91 | for o in opcodes: 92 | vars()[opcodes[o][0]] = opcodes[o] 93 | reverse_opcodes[opcodes[o][0]] = o 94 | 95 | # Non-opcode gas prices 96 | GDEFAULT = 1 97 | GMEMORY = 3 98 | GQUADRATICMEMDENOM = 512 # 1 gas per 512 quadwords 99 | GEXPONENTBYTE = 10 # cost of EXP exponent per byte 100 | GCOPY = 3 # cost to copy one 32 byte word 101 | GCONTRACTBYTE = 200 # one byte of code in contract creation 102 | GCALLVALUETRANSFER = 9000 # non-zero-valued call 103 | GLOGBYTE = 8 # cost of a byte of logdata 104 | 105 | GTXCOST = 21000 # TX BASE GAS COST 106 | GTXDATAZERO = 4 # TX DATA ZERO BYTE GAS COST 107 | GTXDATANONZERO = 68 # TX DATA NON ZERO BYTE GAS COST 108 | GSHA3WORD = 6 # Cost of SHA3 per word 109 | GSHA256BASE = 60 # Base c of SHA256 110 | GSHA256WORD = 12 # Cost of SHA256 per word 111 | GRIPEMD160BASE = 600 # Base cost of RIPEMD160 112 | GRIPEMD160WORD = 120 # Cost of RIPEMD160 per word 113 | GIDENTITYBASE = 15 # Base cost of indentity 114 | GIDENTITYWORD = 3 # Cost of identity per word 115 | GECRECOVER = 3000 # Cost of ecrecover op 116 | 117 | GSTIPEND = 2300 118 | 119 | GCALLNEWACCOUNT = 25000 120 | GSELFDESTRUCTREFUND = 24000 121 | 122 | GSTORAGEBASE = 2500 123 | GSTORAGEBYTESTORAGE = 250 124 | GSTORAGEBYTECHANGE = 40 125 | GSTORAGEMIN = 2500 126 | GSSIZE = 50 127 | GSLOADBYTES = 50 128 | 129 | GSTORAGEREFUND = 15000 130 | GSTORAGEKILL = 5000 131 | GSTORAGEMOD = 5000 132 | GSTORAGEADD = 20000 133 | 134 | GMODEXPQUADDIVISOR = 100 135 | GECADD = 500 136 | GECMUL = 2000 137 | 138 | GPAIRINGBASE = 100000 139 | GPAIRINGPERPOINT = 80000 140 | 141 | EXP_SUPPLEMENTAL_GAS = 40 142 | 143 | # Anti-DoS HF changes 144 | SLOAD_SUPPLEMENTAL_GAS = 150 145 | CALL_SUPPLEMENTAL_GAS = 660 146 | EXTCODELOAD_SUPPLEMENTAL_GAS = 680 147 | BALANCE_SUPPLEMENTAL_GAS = 380 148 | CALL_CHILD_LIMIT_NUM = 63 149 | CALL_CHILD_LIMIT_DENOM = 64 150 | SELFDESTRUCT_SUPPLEMENTAL_GAS = 5000 151 | 152 | memory_writes = {'CALLDATACOPY': (-1, -3), 'CODECOPY': (-1, -3), 'EXTCODECOPY': (-2, -4), 'MSTORE': (-1, 32), 153 | 'MSTORE8': (-1, 8), 'CALL': (-6, -7), 'CALLCODE': (-6, -7), 'DELEGATECALL': (-5, -6)} 154 | memory_reads = {'SHA3': (-1, -2), 'MLOAD': (-1, 32), 'CREATE': (-2, -3), 'CALL': (-4, -5), 'CALLCODE': (-4, -5), 155 | 'RETURN': (-1, -2), 'DELEGATECALL': (-3, -4)} 156 | storage_writes = {'SSTORE': -1} 157 | storage_reads = {'SLOAD': -1} 158 | 159 | potentially_user_controlled = ['ORIGIN', 'CALLER', 'CALLVALUE', 'CALLDATALOAD', 'CALLDATASIZE', 'CALLDATACOPY', 160 | 'EXTCODESIZE', 'EXTCODECOPY', 'MLOAD', 'SLOAD'] 161 | 162 | potentially_direct_user_controlled = ['ORIGIN', 'CALLER', 'CALLVALUE', 'CALLDATALOAD', 'CALLDATASIZE', 'CALLDATACOPY', 163 | 'EXTCODESIZE', 'EXTCODECOPY', 'MLOAD' ,'SLOAD'] 164 | 165 | ins_in_ac_check = ['CALLER','SLOAD'] 166 | 167 | external_data = ['RETURNDATACOPY', 'RETURNDATASIZE', 'EXTCODESIZE', 'EXTCODECOPY'] 168 | 169 | CRITICAL = ['CALL', 'DELEGATECALL', 'CALLCODE', 'SELFDESTRUCT'] 170 | 171 | # map denoting attacker controlled stack arguments 172 | CRITICAL_ARGS = { 173 | 'CALL': [1], 174 | 'DELEGATECALL': [1], 175 | 'CALLCODE': [1], 176 | 'SELFDESTRUCT': [0], 177 | 'JUMPI': [1], 178 | 'ISZERO': [0], 179 | 'GT':[0,1], 180 | 'LT':[0,1] 181 | } 182 | -------------------------------------------------------------------------------- /src/cfg/rattle/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | from .analyze import * 5 | from .evmasm import * 6 | from .recover import Recover 7 | -------------------------------------------------------------------------------- /src/cfg/rattle/analyze.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | from .recover import * 5 | 6 | logger = logging.getLogger(__name__) 7 | 8 | 9 | class UseDefGraph(object): 10 | value: StackValue 11 | 12 | def __init__(self, value: StackValue) -> None: 13 | self.value = value 14 | 15 | def dot(self) -> str: 16 | rv = '' 17 | rv += 'digraph G {\n' 18 | 19 | es = self.edges(self.value) 20 | 21 | for reader in self.value.readers(): 22 | reader_s = str(reader).replace('%', '\\%') 23 | value_s = str(self.value).replace('%', '\\%') 24 | es.append(f"\"{value_s}\" -> \"{reader_s}\"") 25 | 26 | rv += '\n'.join(list(set(es))) 27 | rv += '\n}' 28 | 29 | def edges(self, value) -> List[str]: 30 | rv = [] 31 | writer = value.writer 32 | if writer is None: 33 | return [] 34 | 35 | value_s = str(value).replace('%', '\\%') 36 | writer_s = str(writer).replace('%', '\\%') 37 | rv.append(f"\"{writer_s}\" -> \"{value_s}\"") 38 | 39 | for arg in writer: 40 | arg_s = str(arg).replace('%', '\\%') 41 | writer_s = str(writer).replace('%', '\\%') 42 | rv.append(f"\"{arg_s}\" -> \"{writer_s}\"") 43 | rv.extend(self.edges(arg)) 44 | 45 | for reader in writer.return_value.readers(): 46 | reader_s = str(reader).replace('%', '\\%') 47 | value_s = str(value).replace('%', '\\%') 48 | rv.append(f"\"{value_s}\" -> \"{reader_s}\"") 49 | 50 | return rv 51 | 52 | 53 | class DefUseGraph(object): 54 | value: StackValue 55 | 56 | def __init__(self, value: StackValue) -> None: 57 | self.value = value 58 | 59 | def dot(self, filt=None) -> str: 60 | if filt is None: 61 | filt = lambda x: True 62 | 63 | rv = '' 64 | rv += 'digraph G {\n' 65 | 66 | es = self.edges(self.value, filt) 67 | 68 | for reader in self.value.readers(): 69 | reader_s = str(reader).replace('%', '\\%') 70 | value_s = str(self.value).replace('%', '\\%') 71 | es.append(f"\"{value_s}\" -> \"{reader_s}\"") 72 | 73 | rv += '\n'.join(list(set(es))) 74 | rv += '\n}' 75 | 76 | return rv 77 | 78 | def edges(self, value, filt) -> List[str]: 79 | rv = [] 80 | writer = value.writer 81 | if writer is None: 82 | return [] 83 | 84 | value_s = str(value).replace('%', '\\%') 85 | writer_s = str(writer).replace('%', '\\%') 86 | rv.append(f"\"{writer_s}\" -> \"{value_s}\"") 87 | 88 | for reader in writer.return_value.readers(): 89 | reader_s = str(reader).replace('%', '\\%') 90 | value_s = str(value).replace('%', '\\%') 91 | rv.append(f"\"{value_s}\" -> \"{reader_s}\"") 92 | 93 | if filt(reader): 94 | rv.extend(self.edges(reader.return_value, filt)) 95 | 96 | return rv 97 | 98 | 99 | class ControlFlowGraph(object): 100 | def __init__(self, function: SSAFunction) -> None: 101 | self.function = function 102 | 103 | def dot(self) -> str: 104 | rv = '' 105 | rv += 'digraph G {\n' 106 | rv += 'graph [fontname = "consolas"];\n' 107 | rv += 'node [fontname = "consolas"];\n' 108 | rv += 'edge [fontname = "consolas"];\n' 109 | 110 | name = self.function.desc() 111 | hash = f'Hash: {self.function.hash:#x}' 112 | offset = f'Start: {self.function.offset:#x}' 113 | arguments = f'Arguments: {self.function.arguments()}' 114 | storage = f'Storage: {self.function.storage}' 115 | # memory = f'Memory: {self.function.memory}' 116 | 117 | function_desc = [name, hash, offset, arguments, storage] 118 | 119 | rv += f'ff [label="{{' + '\\l'.join(function_desc) + '\\l}}", shape="record" ];' 120 | 121 | edges = [] 122 | 123 | for block in self.function: 124 | block_id = f'block_{block.offset}' 125 | block_body = '\\l'.join([f'{insn.offset:#x}: {insn}' for insn in block]) 126 | block_body = block_body.replace('<', '\\<').replace('>', '\\>') 127 | block_dot = f'{block_id} [label="{block_body}\\l", shape="record"];' 128 | 129 | fallthrough_label = '' 130 | jump_label = '' 131 | if len(block.jump_edges) > 0 and block.fallthrough_edge: 132 | fallthrough_label = ' [label=" f", color="red"]' 133 | jump_label = ' [label=" t", color="darkgreen"]' 134 | 135 | if block.fallthrough_edge: 136 | target_block_id = f'block_{block.fallthrough_edge.offset}' 137 | edges.append(f'{block_id} -> {target_block_id}{fallthrough_label};') 138 | 139 | for edge in block.jump_edges: 140 | target_block_id = f'block_{edge.offset}' 141 | edges.append(f'{block_id} -> {target_block_id}{jump_label};') 142 | 143 | rv += block_dot + '\n' 144 | 145 | for edge in edges: 146 | rv += edge + '\n' 147 | 148 | rv += '}\n' 149 | 150 | return rv 151 | -------------------------------------------------------------------------------- /src/cfg/rattle/evmasm.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Iterable 2 | 3 | import pyevmasm 4 | 5 | 6 | class EVMAsm(object): 7 | ''' 8 | EVM Instruction factory 9 | 10 | Example use:: 11 | 12 | >>> from manticore.platforms.evm import EVMAsm 13 | >>> EVMAsm.disassemble_one('\\x60\\x10') 14 | Instruction(0x60, 'PUSH', 1, 0, 1, 0, 'Place 1 byte item on stack.', 16, 0) 15 | >>> EVMAsm.assemble_one('PUSH1 0x10') 16 | Instruction(0x60, 'PUSH', 1, 0, 1, 0, 'Place 1 byte item on stack.', 16, 0) 17 | >>> tuple(EVMAsm.disassemble_all('\\x30\\x31')) 18 | (Instruction(0x30, 'ADDRESS', 0, 0, 1, 2, 'Get address of currently executing account.', None, 0), 19 | Instruction(0x31, 'BALANCE', 0, 1, 1, 20, 'Get balance of the given account.', None, 1)) 20 | >>> tuple(EVMAsm.assemble_all('ADDRESS\\nBALANCE')) 21 | (Instruction(0x30, 'ADDRESS', 0, 0, 1, 2, 'Get address of currently executing account.', None, 0), 22 | Instruction(0x31, 'BALANCE', 0, 1, 1, 20, 'Get balance of the given account.', None, 1)) 23 | >>> EVMAsm.assemble_hex( 24 | ... """PUSH1 0x60 25 | ... BLOCKHASH 26 | ... MSTORE 27 | ... PUSH1 0x2 28 | ... PUSH2 0x100 29 | ... """ 30 | ... ) 31 | '0x606040526002610100' 32 | >>> EVMAsm.disassemble_hex('0x606040526002610100') 33 | 'PUSH1 0x60\\nBLOCKHASH\\nMSTORE\\nPUSH1 0x2\\nPUSH2 0x100' 34 | ''' 35 | 36 | class EVMInstruction(pyevmasm.Instruction): 37 | def __init__(self, opcode: int, name: str, operand_size: int, pops: int, pushes: int, fee: int, 38 | description: str, operand: Optional[int] = None, pc: Optional[int] = 0) -> None: 39 | ''' 40 | This represents an EVM instruction. 41 | EVMAsm will create this for you. 42 | 43 | :param opcode: the opcode value 44 | :param name: instruction name 45 | :param operand_size: immediate operand size in bytes 46 | :param pops: number of items popped from the stack 47 | :param pushes: number of items pushed into the stack 48 | :param fee: gas fee for the instruction 49 | :param description: textual description of the instruction 50 | :param operand: optional immediate operand 51 | :param pc: optional program counter of this instruction in the program 52 | 53 | Example use:: 54 | 55 | instruction = EVMAsm.assemble_one('PUSH1 0x10') 56 | print 'Instruction: %s'% instruction 57 | print '\tdescription:', instruction.description 58 | print '\tgroup:', instruction.group 59 | print '\tpc:', instruction.pc 60 | print '\tsize:', instruction.size 61 | print '\thas_operand:', instruction.has_operand 62 | print '\toperand_size:', instruction.operand_size 63 | print '\toperand:', instruction.operand 64 | print '\tsemantics:', instruction.semantics 65 | print '\tpops:', instruction.pops 66 | print '\tpushes:', instruction.pushes 67 | print '\tbytes:', '0x'+instruction.bytes.encode('hex') 68 | print '\twrites to stack:', instruction.writes_to_stack 69 | print '\treads from stack:', instruction.reads_from_stack 70 | print '\twrites to memory:', instruction.writes_to_memory 71 | print '\treads from memory:', instruction.reads_from_memory 72 | print '\twrites to storage:', instruction.writes_to_storage 73 | print '\treads from storage:', instruction.reads_from_storage 74 | print '\tis terminator', instruction.is_terminator 75 | 76 | 77 | ''' 78 | super().__init__(opcode, name, operand_size, pops, pushes, fee, description, operand, pc) 79 | if operand_size != 0 and operand is not None: 80 | mask = (1 << operand_size * 8) - 1 81 | if ~mask & operand: 82 | raise ValueError("operand should be %d bits long" % (operand_size * 8)) 83 | 84 | def __repr__(self) -> str: 85 | output = 'EVMInstruction(0x{:x}, {}, {:d}, {:d}, {:d}, {:d}, {}, {}, {})'.format( 86 | self._opcode, self._name, self._operand_size, 87 | self._pops, self._pushes, self._fee, self._description, self._operand, self._pc) 88 | return output 89 | 90 | def __hash__(self) -> int: 91 | return hash((self._opcode, self._pops, self._pushes, self._pc)) 92 | 93 | @property 94 | def is_push(self) -> bool: 95 | return self.semantics == 'PUSH' 96 | 97 | @property 98 | def is_pop(self) -> bool: 99 | return self.semantics == 'POP' 100 | 101 | @property 102 | def is_dup(self) -> bool: 103 | return self.semantics == 'DUP' 104 | 105 | @property 106 | def is_swap(self) -> bool: 107 | return self.semantics == 'SWAP' 108 | 109 | @property 110 | def is_comparison(self) -> bool: 111 | return self.semantics in ('LT', 'GT', 'SLT', 'SGT', 'EQ', 'ISZERO') 112 | 113 | @property 114 | def is_boolean_logic(self) -> bool: 115 | return self.semantics in ('AND', 'OR', 'XOR', 'NOT') 116 | 117 | @staticmethod 118 | def convert_instruction_to_evminstruction(instruction): 119 | return EVMAsm.EVMInstruction(instruction._opcode, instruction._name, instruction._operand_size, 120 | instruction._pops, instruction._pushes, instruction._fee, 121 | instruction._description, instruction._operand, instruction._pc) 122 | 123 | @staticmethod 124 | def assemble_one(assembler: str, pc: int = 0, fork=pyevmasm.DEFAULT_FORK) -> EVMInstruction: 125 | ''' Assemble one EVM instruction from its textual representation. 126 | 127 | :param assembler: assembler code for one instruction 128 | :param pc: program counter of the instruction in the bytecode (optional) 129 | :return: An Instruction object 130 | 131 | Example use:: 132 | 133 | >>> print evm.EVMAsm.assemble_one('LT') 134 | 135 | 136 | ''' 137 | instruction = pyevmasm.assemble_one(assembler, pc, fork) 138 | return EVMAsm.convert_instruction_to_evminstruction(instruction) 139 | 140 | @staticmethod 141 | def convert_multiple_instructions_to_evminstructions(instructions): 142 | for i in instructions: 143 | yield EVMAsm.convert_instruction_to_evminstruction(i) 144 | 145 | @staticmethod 146 | def assemble_all(assembler: str, pc: int = 0, fork=pyevmasm.DEFAULT_FORK) -> Iterable[EVMInstruction]: 147 | ''' Assemble a sequence of textual representation of EVM instructions 148 | 149 | :param assembler: assembler code for any number of instructions 150 | :param pc: program counter of the first instruction in the bytecode(optional) 151 | :return: An generator of Instruction objects 152 | 153 | Example use:: 154 | 155 | >>> evm.EVMAsm.assemble_one("""PUSH1 0x60\n \ 156 | PUSH1 0x40\n \ 157 | MSTORE\n \ 158 | PUSH1 0x2\n \ 159 | PUSH2 0x108\n \ 160 | PUSH1 0x0\n \ 161 | POP\n \ 162 | SSTORE\n \ 163 | PUSH1 0x40\n \ 164 | MLOAD\n \ 165 | """) 166 | 167 | ''' 168 | instructions = pyevmasm.assemble_all(assembler, pc, fork) 169 | return EVMAsm.convert_multiple_instructions_to_evminstructions(instructions) 170 | 171 | @staticmethod 172 | def disassemble_one(bytecode: Iterable, pc: int = 0, fork=pyevmasm.DEFAULT_FORK) -> EVMInstruction: 173 | ''' Decode a single instruction from a bytecode 174 | 175 | :param bytecode: the bytecode stream 176 | :param pc: program counter of the instruction in the bytecode(optional) 177 | :type bytecode: iterator/sequence/str 178 | :return: an Instruction object 179 | 180 | Example use:: 181 | 182 | >>> print EVMAsm.assemble_one('PUSH1 0x10') 183 | 184 | ''' 185 | instruction = pyevmasm.disassemble_one(bytecode, pc, fork) 186 | return EVMAsm.convert_instruction_to_evminstruction(instruction) 187 | 188 | @staticmethod 189 | def disassemble_all(bytecode: Iterable, pc: int = 0, fork=pyevmasm.DEFAULT_FORK) -> Iterable[EVMInstruction]: 190 | ''' Decode all instructions in bytecode 191 | 192 | :param bytecode: an evm bytecode (binary) 193 | :param pc: program counter of the first instruction in the bytecode(optional) 194 | :type bytecode: iterator/sequence/str 195 | :return: An generator of Instruction objects 196 | 197 | Example use:: 198 | 199 | >>> for inst in EVMAsm.decode_all(bytecode): 200 | ... print inst 201 | 202 | ... 203 | PUSH1 0x60 204 | PUSH1 0x40 205 | MSTORE 206 | PUSH1 0x2 207 | PUSH2 0x108 208 | PUSH1 0x0 209 | POP 210 | SSTORE 211 | PUSH1 0x40 212 | MLOAD 213 | 214 | 215 | ''' 216 | instructions = pyevmasm.disassemble_all(bytecode, pc, fork) 217 | return EVMAsm.convert_multiple_instructions_to_evminstructions(instructions) 218 | 219 | @staticmethod 220 | def disassemble(bytecode: Iterable, pc: int = 0, fork=pyevmasm.DEFAULT_FORK) -> str: 221 | ''' Disassemble an EVM bytecode 222 | 223 | :param bytecode: binary representation of an evm bytecode (hexadecimal) 224 | :param pc: program counter of the first instruction in the bytecode(optional) 225 | :type bytecode: str 226 | :return: the text representation of the assembler code 227 | 228 | Example use:: 229 | 230 | >>> EVMAsm.disassemble("\x60\x60\x60\x40\x52\x60\x02\x61\x01\x00") 231 | ... 232 | PUSH1 0x60 233 | BLOCKHASH 234 | MSTORE 235 | PUSH1 0x2 236 | PUSH2 0x100 237 | 238 | ''' 239 | return pyevmasm.disassemble(bytecode, pc, fork) 240 | 241 | @staticmethod 242 | def assemble(asmcode, pc=0, fork=pyevmasm.DEFAULT_FORK): 243 | return pyevmasm.assemble(asmcode, pc, fork) 244 | 245 | @staticmethod 246 | def disassemble_hex(bytecode: str, pc: int = 0, fork=pyevmasm.DEFAULT_FORK) -> str: 247 | ''' Disassemble an EVM bytecode 248 | 249 | :param bytecode: canonical representation of an evm bytecode (hexadecimal) 250 | :param int pc: program counter of the first instruction in the bytecode(optional) 251 | :type bytecode: str 252 | :return: the text representation of the assembler code 253 | 254 | Example use:: 255 | 256 | >>> EVMAsm.disassemble_hex("0x6060604052600261010") 257 | ... 258 | PUSH1 0x60 259 | BLOCKHASH 260 | MSTORE 261 | PUSH1 0x2 262 | PUSH2 0x100 263 | 264 | ''' 265 | return pyevmasm.disassemble_hex(bytecode, pc, fork) 266 | 267 | @staticmethod 268 | def assemble_hex(asmcode, pc=0, fork=pyevmasm.DEFAULT_FORK): 269 | return pyevmasm.assemble_hex(asmcode, pc, fork) 270 | -------------------------------------------------------------------------------- /src/constraints.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import itertools 3 | import logging 4 | from collections import defaultdict 5 | 6 | from z3 import z3, z3util 7 | 8 | from src.evm.exceptions import IntractablePath 9 | from src.evm.state import SymRead, concrete 10 | from src.util.utils import big_endian_to_int, sha3 11 | from src.util.z3_extra_util import get_vars_non_recursive, to_bytes, simplify_non_const_hashes 12 | 13 | 14 | class UnresolvedConstraints(Exception): 15 | def __init__(self, unresolved): 16 | super(UnresolvedConstraints, self).__init__(unresolved) 17 | self.unresolved = unresolved 18 | 19 | 20 | def array_to_array(model, array, length): 21 | return bytes([model.eval(array[i]).as_long() for i in range(length)]) 22 | 23 | 24 | def get_level(name): 25 | try: 26 | return int(name[name.rfind('_') + 1:]) 27 | except: 28 | return 0 29 | 30 | 31 | def model_to_calls(model, idx_dict): 32 | calls = defaultdict(dict) 33 | for vref in model: 34 | name = vref.name() 35 | v = model[vref] 36 | if name.split('_')[0] not in ('CALLDATASIZE', 'CALLDATA', 'CALLVALUE', 'CALLER', 'ORIGIN'): 37 | continue 38 | call_index = idx_dict[get_level(name)] 39 | call = calls[call_index] 40 | if name.startswith('CALLDATASIZE'): 41 | payload_size = model.eval(v).as_long() 42 | call['payload_size'] = payload_size 43 | elif name.startswith('CALLDATA'): 44 | call['payload_model'] = v 45 | elif name.startswith('CALLVALUE'): 46 | call['value'] = model.eval(v).as_long() 47 | elif name.startswith('CALLER'): 48 | call['caller'] = model.eval(v).as_long() 49 | elif name.startswith('ORIGIN'): 50 | call['origin'] = model.eval(v).as_long() 51 | else: 52 | logging.warning('CANNOT CONVERT %s', name) 53 | 54 | for call in calls.values(): 55 | if 'payload_model' not in call: 56 | call['payload'] = bytes() 57 | else: 58 | assert 'payload_size' in call 59 | call['payload'] = array_to_array(model, call['payload_model'], call['payload_size']) 60 | call.pop('payload_size', None) 61 | call.pop('payload_model', None) 62 | 63 | return [v for k, v in sorted(calls.items())] 64 | 65 | 66 | # MAX_SYM_READ_SIZE = 512 67 | MAX_SYM_READ_SIZE = 256 68 | 69 | 70 | def symread_eq(a, b, size=MAX_SYM_READ_SIZE): 71 | if not isinstance(a, SymRead) and not isinstance(b, SymRead): 72 | if a.size() != b.size(): 73 | return z3.BoolVal(False) 74 | else: 75 | return a == b 76 | elif isinstance(a, SymRead) and isinstance(b, SymRead): 77 | # both have symbolic size 78 | return z3.And(a.size == b.size, 79 | *(z3.If(z3.ULT(i, a.size), a.memory[a.start + i] == b.memory[b.start + i], True) for i in 80 | range(size))) 81 | else: 82 | if isinstance(b, SymRead): 83 | # ensure that a is the one with symbolic size 84 | a, b = b, a 85 | return z3.And(a.size == (b.size() // 8), z3.Concat(*a.memory.read(a.start, b.size() // 8)) == b) 86 | 87 | 88 | def symread_neq(a, b, size=MAX_SYM_READ_SIZE): 89 | return z3.Not(symread_eq(a, b, size)) 90 | 91 | 92 | def symread_substitute(x, subst): 93 | if not isinstance(x, SymRead): 94 | return z3.simplify(z3.substitute(x, subst)) 95 | else: 96 | new_symread = copy.copy(x) 97 | new_symread.memory.memory = z3.simplify(z3.substitute(new_symread.memory.memory, subst)) 98 | if not concrete(new_symread.start): 99 | new_symread.start = z3.simplify(z3.substitute(new_symread.start, subst)) 100 | if not concrete(new_symread.size): 101 | new_symread.size = z3.simplify(z3.substitute(new_symread.size, subst)) 102 | return new_symread 103 | 104 | 105 | def check_model_and_resolve(constraints, sha_constraints): 106 | try: 107 | return check_model_and_resolve_inner(constraints, sha_constraints) 108 | except UnresolvedConstraints: 109 | sha_ids = {sha.get_id() for sha in sha_constraints.keys()} 110 | constraints = [simplify_non_const_hashes(c, sha_ids) for c in constraints] 111 | return check_model_and_resolve_inner(constraints, sha_constraints, second_try=True) 112 | 113 | def check_model_and_resolve_inner(constraints, sha_constraints, second_try=False): 114 | # logging.debug('-' * 32) 115 | extra_constraints = [] 116 | s = z3.SolverFor("QF_ABV") 117 | s.add(constraints) 118 | if s.check() != z3.sat: 119 | raise IntractablePath("CHECK", "MODEL") 120 | else: 121 | if not sha_constraints: 122 | return s.model() 123 | while True: 124 | ne_constraints = [] 125 | for a, b in itertools.combinations(sha_constraints.keys(), 2): 126 | if (not isinstance(sha_constraints[a], SymRead) and not isinstance(sha_constraints[b], SymRead) and 127 | sha_constraints[a].size() != sha_constraints[b].size()): 128 | ne_constraints.append(a != b) 129 | continue 130 | s = z3.SolverFor("QF_ABV") 131 | s.add(constraints + ne_constraints + extra_constraints + [a != b, symread_neq(sha_constraints[a], 132 | sha_constraints[b])]) 133 | check_result = s.check() 134 | # logging.debug("Checking hashes %s and %s: %s", a, b, check_result) 135 | if check_result == z3.unsat: 136 | # logging.debug("Hashes MUST be equal: %s and %s", a, b) 137 | subst = [(a, b)] 138 | extra_constraints = [z3.simplify(z3.substitute(c, subst)) for c in extra_constraints] 139 | extra_constraints.append(symread_eq(symread_substitute(sha_constraints[a], subst), 140 | symread_substitute(sha_constraints[b], subst))) 141 | constraints = [z3.simplify(z3.substitute(c, subst)) for c in constraints] 142 | b_val = symread_substitute(sha_constraints[b], subst) 143 | sha_constraints = {z3.substitute(sha, subst): symread_substitute(sha_value, subst) for 144 | sha, sha_value in 145 | sha_constraints.items() if not sha is a or sha is b} 146 | sha_constraints[b] = b_val 147 | break 148 | else: 149 | # logging.debug("Hashes COULD be equal: %s and %s", a, b) 150 | pass 151 | else: 152 | break 153 | 154 | return check_and_model(constraints + extra_constraints, sha_constraints, ne_constraints, second_try=second_try) 155 | 156 | 157 | def check_and_model(constraints, sha_constraints, ne_constraints, second_try=False): 158 | # logging.debug(' ' * 16 + '-' * 16) 159 | 160 | unresolved = set(sha_constraints.keys()) 161 | sol = z3.SolverFor("QF_ABV") 162 | sol.add(ne_constraints) 163 | todo = constraints 164 | progress = True 165 | all_vars = dict() 166 | while progress: 167 | new_todo = [] 168 | progress = False 169 | for c in todo: 170 | all_vars[c] = get_vars_non_recursive(c, include_select=True, include_indices=False) 171 | if any(x in unresolved for x in all_vars[c]): 172 | new_todo.append(c) 173 | else: 174 | progress = True 175 | sol.add(c) 176 | unresolved_vars = set(v.get_id() for c in new_todo for v in all_vars[c]) | set(v.get_id() for v in unresolved) 177 | # logging.debug("Unresolved vars: %s", ','.join(map(str, unresolved_vars))) 178 | if sol.check() != z3.sat: 179 | raise IntractablePath() 180 | m = sol.model() 181 | unresolved_todo = list(set(unresolved)) 182 | while unresolved_todo: 183 | u = unresolved_todo.pop() 184 | c = sha_constraints[u] 185 | if isinstance(c, SymRead): 186 | vars = set() 187 | if not concrete(c.start): 188 | vars |= get_vars_non_recursive(c.start, include_select=True) 189 | if not concrete(c.size): 190 | vars |= get_vars_non_recursive(c.size, include_select=True) 191 | # logging.debug("Trying to resolve %s, start and size vars: %s", u, ','.join(map(str, vars))) 192 | if any(x.get_id() in unresolved_vars for x in vars): 193 | continue 194 | start = c.start 195 | if not concrete(c.start): 196 | tmp = m.eval(c.start) 197 | if not z3util.is_expr_val(tmp): 198 | continue 199 | start = tmp.as_long() 200 | sol.add(c.start == start) 201 | size = c.size 202 | if not concrete(c.size): 203 | tmp = m.eval(c.size) 204 | if not z3util.is_expr_val(tmp): 205 | continue 206 | size = tmp.as_long() 207 | sol.add(c.size == size) 208 | 209 | data = c.memory.read(start, size) 210 | if isinstance(data, list): 211 | if len(data) > 1: 212 | data = z3.Concat(*data) 213 | elif len(data) == 1: 214 | data = data[0] 215 | else: 216 | raise IntractablePath() 217 | sha_constraints = dict(sha_constraints) 218 | sha_constraints[u] = data 219 | unresolved_todo.append(u) 220 | else: 221 | vars = get_vars_non_recursive(c, include_select=True) 222 | # logging.debug("Trying to resolve %s, vars: %s", u, ','.join(map(str, vars))) 223 | if any(x.get_id() in unresolved_vars for x in vars): 224 | continue 225 | v = m.eval(c) 226 | if z3util.is_expr_val(v): 227 | sha = big_endian_to_int(sha3(to_bytes(v))) 228 | sol.add(c == v) 229 | sol.add(u == sha) 230 | unresolved.remove(u) 231 | progress = True 232 | todo = new_todo 233 | if sol.check() != z3.sat: 234 | raise IntractablePath() 235 | if todo: 236 | if second_try: 237 | raise IntractablePath() 238 | raise UnresolvedConstraints(unresolved) 239 | return sol.model() 240 | 241 | 242 | def dependency_summary(constraints, sha_constraints, detailed=False): 243 | all_dependencies = set(x for c in constraints if z3.is_expr(c) for x in 244 | get_vars_non_recursive(z3.simplify(c), include_select=detailed)) 245 | changed = True 246 | while changed: 247 | changed = False 248 | for x in set(all_dependencies): 249 | if x in sha_constraints: 250 | changed = True 251 | all_dependencies.discard(x) 252 | all_dependencies.update( 253 | get_vars_non_recursive(z3.simplify(sha_constraints[x], include_select=detailed))) 254 | return all_dependencies 255 | -------------------------------------------------------------------------------- /src/evm/__init__.py: -------------------------------------------------------------------------------- 1 | from . import evm 2 | from . import exceptions 3 | from . import results 4 | from . import state 5 | -------------------------------------------------------------------------------- /src/evm/exceptions.py: -------------------------------------------------------------------------------- 1 | class ExternalData(Exception): 2 | pass 3 | 4 | 5 | class SymbolicError(Exception): 6 | pass 7 | 8 | 9 | class IntractablePath(Exception): 10 | def __init__(self, trace=[], remainingpath=[]): 11 | self.trace = tuple(trace) 12 | self.remainingpath = tuple(remainingpath) 13 | 14 | 15 | class VMException(Exception): 16 | pass 17 | 18 | class TimeoutException(Exception): 19 | pass 20 | -------------------------------------------------------------------------------- /src/evm/results.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | 3 | from z3 import z3 4 | 5 | from src.evm.state import SymRead, LazySubstituteState, translate 6 | from src.util.z3_extra_util import get_vars_non_recursive, concrete, ast_eq 7 | 8 | 9 | class SymbolicResult(object): 10 | def __init__(self, xid, state, constraints, sha_constraints, target_op, pib): 11 | self.xid = xid 12 | self.state = state 13 | self.constraints = constraints 14 | self.sha_constraints = sha_constraints 15 | self.target_op = target_op 16 | self.calls = 1 17 | self._simplified = False 18 | self.storage_info = StorageInfo(self) 19 | self.possible_intended_behavior=pib 20 | 21 | def simplify(self): 22 | if self._simplified: 23 | return 24 | self.constraints = [z3.simplify(c) for c in self.constraints] 25 | self.sha_constraints = {sha: z3.simplify(sha_value) if not isinstance(sha_value, SymRead) else sha_value for 26 | sha, sha_value in self.sha_constraints.items()} 27 | self._simplified = True 28 | 29 | def copy(self): 30 | new_xid = gen_exec_id() 31 | 32 | self.simplify() 33 | 34 | new_constraints = [translate(c, new_xid) for c in self.constraints] 35 | new_sha_constraints = {translate(sha, new_xid): translate(sha_value, new_xid) if not isinstance(sha_value, 36 | SymRead) else sha_value.translate( 37 | new_xid) for sha, sha_value in 38 | self.sha_constraints.items()} 39 | new_state = self.state.copy(new_xid) 40 | 41 | return SymbolicResult(new_xid, new_state, new_constraints, new_sha_constraints, self.target_op) 42 | 43 | def may_read_from(self, other): 44 | return self.storage_info.may_read_from(other.storage_info) 45 | 46 | 47 | class CombinedSymbolicResult(object): 48 | def __init__(self): 49 | self.results = [] 50 | self._constraints = None 51 | self._sha_constraints = None 52 | self._states = None 53 | self._idx_dict = None 54 | self.calls = 0 55 | 56 | def _reset(self): 57 | self._constraints = None 58 | self._sha_constraints = None 59 | self._states = None 60 | 61 | def combine(self, storage=dict(), initial_balance=None): 62 | extra_subst = [] 63 | 64 | storage_base = z3.K(z3.BitVecSort(256), z3.BitVecVal(0, 256)) 65 | for k, v in storage.items(): 66 | storage_base = z3.Store(storage_base, k, v) 67 | for result in self.results: 68 | extra_subst.append((result.state.storage.base, storage_base)) 69 | storage_base = z3.substitute(result.state.storage.storage, extra_subst) 70 | 71 | extra_constraints = [] 72 | if initial_balance is not None: 73 | balance_base = z3.BitVecVal(initial_balance, 256) 74 | else: 75 | balance_base = None 76 | for result in self.results: 77 | if balance_base is not None: 78 | extra_subst.append((result.state.start_balance, balance_base)) 79 | balance_base = z3.substitute(result.state.balance, extra_subst) 80 | else: 81 | balance_base = result.state.balance 82 | 83 | self._states = [LazySubstituteState(r.state, extra_subst) for r in self.results] 84 | self._constraints = [z3.substitute(c, extra_subst) for r in self.results for c in 85 | r.constraints] + extra_constraints 86 | self._sha_constraints = { 87 | sha: z3.substitute(sha_value, extra_subst) if not isinstance(sha_value, SymRead) else sha_value for r in 88 | self.results for sha, sha_value in r.sha_constraints.items()} 89 | 90 | self._idx_dict = {r.xid: i for i, r in enumerate(self.results)} 91 | 92 | def prepend(self, result): 93 | self.calls += 1 94 | self.results = [result] + self.results 95 | self._reset() 96 | 97 | @property 98 | def idx_dict(self): 99 | if self._idx_dict is None: 100 | self.combine() 101 | return self._idx_dict 102 | 103 | @property 104 | def constraints(self): 105 | if self._constraints is None: 106 | self.combine() 107 | return self._constraints 108 | 109 | @property 110 | def sha_constraints(self): 111 | if self._sha_constraints is None: 112 | self.combine() 113 | return self._sha_constraints 114 | 115 | @property 116 | def states(self): 117 | if not self._states: 118 | self.combine() 119 | return self._states 120 | 121 | @property 122 | def state(self): 123 | return self.states[-1] 124 | 125 | def simplify(self): 126 | self._constraints = [z3.simplify(c) for c in self.constraints] 127 | self._sha_constraints = {sha: (z3.simplify(sha_value) if not isinstance(sha_value, SymRead) else sha_value) for 128 | sha, sha_value in self.sha_constraints.items()} 129 | 130 | 131 | class StorageInfo(object): 132 | def __init__(self, result): 133 | self.result = result 134 | self._vars = dict() 135 | self.concrete_reads = set() 136 | self.concrete_writes = set() 137 | self.symbolic_reads = set() 138 | self.symbolic_writes = set() 139 | self.symbolic_hash_reads = set() 140 | self.symbolic_hash_writes = set() 141 | for addr in set(result.state.storage.reads): 142 | if concrete(addr): 143 | self.concrete_reads.add(addr) 144 | else: 145 | x_vars = get_vars_non_recursive(addr, True) 146 | self._vars[addr] = x_vars 147 | if set(x_vars) & set(result.sha_constraints.keys()): 148 | self.symbolic_hash_reads.add(addr) 149 | else: 150 | self.symbolic_reads.add(addr) 151 | for addr in set(result.state.storage.writes): 152 | if concrete(addr): 153 | self.concrete_writes.add(addr) 154 | else: 155 | x_vars = get_vars_non_recursive(addr, True) 156 | self._vars[addr] = x_vars 157 | if set(x_vars) & set(result.sha_constraints.keys()): 158 | self.symbolic_hash_writes.add(addr) 159 | else: 160 | self.symbolic_writes.add(addr) 161 | 162 | def may_read_from(self, other): 163 | if not self.symbolic_reads and not other.symbolic_writes: 164 | # no side has a non-hash-based symbolic access 165 | # => only concrete accesses can intersect 166 | # (or hash-based accesses, which we will check later) 167 | if self.concrete_reads & other.concrete_writes: 168 | return True 169 | else: 170 | # at least one side has a non-hash-based symbolic access 171 | # => if there is at least one concrete or symbolic access 172 | # on the other side, the two could be equal 173 | # (otherwise we have to look at hash-based accesses, see below) 174 | if ((self.symbolic_reads or self.concrete_reads or self.symbolic_hash_reads) and 175 | (other.symbolic_writes or other.concrete_writes or other.symbolic_hash_writes)): 176 | return True 177 | 178 | if self.symbolic_hash_reads and other.symbolic_hash_writes: 179 | for a, b in itertools.product(self.symbolic_hash_reads, other.symbolic_hash_writes): 180 | if not ast_eq(a, b): 181 | continue 182 | hash_a = list(self._vars[a] & set(self.result.sha_constraints.keys())) 183 | hash_b = list(other._vars[b] & set(other.result.sha_constraints.keys())) 184 | if len(hash_a) != 1 or len(hash_b) != 1: 185 | # multiple hashes on either side 186 | # => assume they could be equal 187 | return True 188 | # only one hash on either side 189 | # => check whether these two can actually be equal 190 | d_a = self.result.sha_constraints[hash_a[0]] 191 | d_b = other.result.sha_constraints[hash_b[0]] 192 | if isinstance(d_a, SymRead) or isinstance(d_b, SymRead): 193 | return True 194 | if d_a.size() == d_b.size(): 195 | return True 196 | 197 | # at this point, we have checked every possible combination 198 | # => no luck this time 199 | return False 200 | 201 | 202 | def gen_exec_id(): 203 | if "xid" not in gen_exec_id.__dict__: 204 | gen_exec_id.xid = 0 205 | else: 206 | gen_exec_id.xid += 1 207 | return gen_exec_id.xid 208 | -------------------------------------------------------------------------------- /src/evm/state.py: -------------------------------------------------------------------------------- 1 | from z3 import z3 2 | 3 | from src.evm.exceptions import SymbolicError 4 | from src.memory import UninitializedRead 5 | from src.util.z3_extra_util import concrete, get_vars_non_recursive 6 | 7 | 8 | class Stack(list): 9 | def __init__(self, *args): 10 | super(Stack, self).__init__(*args) 11 | 12 | def push(self, v): 13 | self.append(v) 14 | 15 | def append(self, v): 16 | if concrete(v): 17 | v %= 2 ** 256 18 | super(Stack, self).append(v) 19 | 20 | 21 | class Memory(object): 22 | def __init__(self, *args): 23 | self.memory = bytearray(*args) 24 | self._check_initialized = False 25 | self.initialized = set() 26 | 27 | def __getitem__(self, index): 28 | if isinstance(index, slice): 29 | initialized = all(i in self.initialized for i in range(index.start or 0, index.stop, index.step or 1)) 30 | else: 31 | initialized = index in self.initialized 32 | if not self._check_initialized or initialized: 33 | return self.memory[index] 34 | else: 35 | raise UninitializedRead(index) 36 | 37 | def __setitem__(self, index, v): 38 | if isinstance(index, slice): 39 | for i in range(index.start or 0, index.stop, index.step or 1): 40 | self.initialized.add(i) 41 | else: 42 | self.initialized.add(index) 43 | self.memory[index] = v 44 | 45 | def set_enforcing(self, enforcing=True): 46 | self._check_initialized = enforcing 47 | 48 | def extend(self, start, size): 49 | if len(self.memory) < start + size: 50 | self.memory += bytearray(start + size - len(self.memory)) 51 | 52 | def __len__(self): 53 | return len(self.memory) 54 | 55 | 56 | class SymbolicMemory(object): 57 | MAX_SYMBOLIC_WRITE_SIZE = 256 58 | 59 | def __init__(self): 60 | self.memory = z3.K(z3.BitVecSort(256), z3.BitVecVal(0, 8)) 61 | self.write_count = 0 62 | self.read_count = 0 63 | 64 | def __getitem__(self, index): 65 | if isinstance(index, slice): 66 | if index.stop is None: 67 | raise ValueError("Need upper memory address!") 68 | if (index.start is not None and not concrete(index.start)) or not concrete(index.stop): 69 | raise SymbolicError("Use mem.read for symbolic range reads") 70 | r = [] 71 | for i in range(index.start or 0, index.stop, index.step or 1): 72 | r.append(self[i]) 73 | return r 74 | else: 75 | self.read_count += 1 76 | v = z3.simplify(self.memory[index]) 77 | if z3.is_bv_value(v): 78 | return v.as_long() 79 | else: 80 | return v 81 | 82 | def __setitem__(self, index, v): 83 | if isinstance(index, slice): 84 | if index.stop is None: 85 | raise ValueError("Need upper memory address!") 86 | if (index.start is not None and not concrete(index.start)) or not concrete(index.stop): 87 | raise SymbolicError("Use mem.write for symbolic range writes") 88 | for j, i in enumerate(range(index.start or 0, index.stop, index.step or 1)): 89 | self[i] = v[j] 90 | else: 91 | self.write_count += 1 92 | if isinstance(v, str): 93 | v = ord(v) 94 | 95 | if concrete(v): 96 | old_v = self[index] 97 | if not concrete(old_v) or old_v != v: 98 | self.memory = z3.Store(self.memory, index, v) 99 | else: 100 | self.memory = z3.Store(self.memory, index, v) 101 | 102 | def read(self, start, size): 103 | if concrete(start) and concrete(size): 104 | return self[start:start + size] 105 | elif concrete(size): 106 | return [self[start + i] for i in range(size)] 107 | else: 108 | sym_mem = SymbolicMemory() 109 | sym_mem.memory = self.memory 110 | return SymRead(sym_mem, start, size) 111 | # raise SymbolicError("Read of symbolic length") 112 | 113 | def copy(self, istart, ilen, ostart, olen): 114 | if concrete(ilen) and concrete(olen): 115 | self.write(ostart, olen, self.read(istart, min(ilen, olen)) + [0] * max(olen - ilen, 0)) 116 | elif concrete(olen): 117 | self.write(ostart, olen, [z3.If(i < ilen, self[istart + i], 0) for i in range(olen)]) 118 | else: 119 | self.write(ostart, SymbolicMemory.MAX_SYMBOLIC_WRITE_SIZE, 120 | [z3.If(i < olen, z3.If(i < ilen, self[istart + i], 0), self[ostart + i]) for i in 121 | range(SymbolicMemory.MAX_SYMBOLIC_WRITE_SIZE)]) 122 | 123 | def write(self, start, size, val): 124 | if not concrete(size): 125 | raise SymbolicError("Write of symbolic length") 126 | if len(val) != size: 127 | raise ValueError("value does not match length") 128 | if concrete(start) and concrete(size): 129 | self[start:start + size] = val 130 | else: # by now we know that size is concrete 131 | for i in range(size): 132 | self[start + i] = val[i] 133 | 134 | def set_enforcing(self, enforcing=True): 135 | pass 136 | 137 | def extend(self, start, size): 138 | pass 139 | 140 | 141 | class SymRead(object): 142 | def __init__(self, memory, start, size): 143 | self.memory = memory 144 | self.start = start 145 | if not concrete(start): 146 | self.start = z3.simplify(self.start) 147 | self.size = size 148 | if not concrete(size): 149 | self.size = z3.simplify(self.size) 150 | 151 | def translate(self, new_xid): 152 | sym_mem_mem = translate(self.memory.memory, new_xid) 153 | sym_mem = SymbolicMemory() 154 | sym_mem.memory = sym_mem_mem 155 | new_symread = SymRead(sym_mem, 0, 0) 156 | new_symread.start = self.start if concrete(self.start) else translate(self.start, new_xid) 157 | new_symread.size = self.size if concrete(self.size) else translate(self.size, new_xid) 158 | return new_symread 159 | 160 | 161 | class SymbolicStorage(object): 162 | def __init__(self, xid): 163 | self.base = z3.Array('STORAGE_%d' % xid, z3.BitVecSort(256), z3.BitVecSort(256)) 164 | self.storage = self.base 165 | self.accesses = list() 166 | 167 | def __getitem__(self, index): 168 | self.accesses.append(('read', index if concrete(index) else z3.simplify(index))) 169 | return self.storage[index] 170 | 171 | def __setitem__(self, index, v): 172 | self.accesses.append(('write', index if concrete(index) else z3.simplify(index))) 173 | self.storage = z3.Store(self.storage, index, v) 174 | 175 | @property 176 | def reads(self): 177 | return [a for t, a in self.accesses if t == 'read'] 178 | 179 | @property 180 | def writes(self): 181 | return [a for t, a in self.accesses if t == 'write'] 182 | 183 | @property 184 | def all(self): 185 | return [a for t, a in self.accesses] 186 | 187 | def copy(self, new_xid): 188 | new_storage = SymbolicStorage(new_xid) 189 | new_storage.base = translate(self.base, new_xid) 190 | new_storage.storage = translate(self.storage, new_xid) 191 | new_storage.accesses = [(t, a if concrete(a) else translate(a, new_xid)) for t, a in self.accesses] 192 | return new_storage 193 | 194 | 195 | class AbstractEVMState(object): 196 | def __init__(self, code=None): 197 | self.code = code or bytearray() 198 | self.pc = 0 199 | self.stack = Stack() 200 | self.memory = None 201 | self.trace = list() 202 | self.gas = None 203 | 204 | class EVMState(AbstractEVMState): 205 | def __init__(self, code=None, gas=0): 206 | super(EVMState, self).__init__(code) 207 | self.memory = Memory() 208 | self.gas = gas 209 | 210 | 211 | class SymbolicEVMState(AbstractEVMState): 212 | 213 | def __init__(self, xid, code=None): 214 | super(SymbolicEVMState, self).__init__(code) 215 | self.memory = SymbolicMemory() 216 | self.storage = SymbolicStorage(xid) 217 | self.gas = z3.BitVec('GAS_%d' % xid, 256) 218 | self.start_balance = z3.BitVec('BALANCE_%d' % xid, 256) 219 | self.balance = self.start_balance 220 | 221 | def copy(self, new_xid): 222 | # Make a superficial copy of this state. 223 | # Effectively, only the storage is copied, 224 | # as this is sufficient to prepend a 225 | # result with this state to another call 226 | new_storage = self.storage.copy(new_xid) 227 | new_state = SymbolicEVMState(new_xid) 228 | new_state.storage = new_storage 229 | new_state.pc = self.pc 230 | new_state.trace = list(self.trace) 231 | new_state.start_balance = translate(self.start_balance, new_xid) 232 | new_state.balance = translate(self.balance, new_xid) 233 | return new_state 234 | 235 | 236 | class LazySubstituteState(object): 237 | def __init__(self, state, substitutions): 238 | self._state = state 239 | self._substitutions = list(substitutions) 240 | self.memory = LazySubstituteMemory(self._state.memory, substitutions) 241 | self.stack = LazySubstituteStack(self._state.stack, substitutions) 242 | self.code = self._state.code 243 | self.pc = self._state.pc 244 | self.trace = self._state.trace 245 | self.balance = z3.substitute(state.balance, substitutions) 246 | 247 | 248 | class LazySubstituteMemory(object): 249 | def __init__(self, memory, substitutions): 250 | self._memory = memory 251 | self._substitutions = substitutions 252 | 253 | def __getitem__(self, index): 254 | raise NotImplemented() 255 | 256 | 257 | class LazySubstituteStack(object): 258 | def __init__(self, stack, substitutions): 259 | self._stack = stack 260 | self._substitutions = substitutions 261 | 262 | def __getitem__(self, index): 263 | r = self._stack[index] 264 | if isinstance(index, slice): 265 | return [x if concrete(x) else z3.substitute(x, self._substitutions) for x in r] 266 | else: 267 | return r if concrete(r) else z3.substitute(r, self._substitutions) 268 | 269 | 270 | def translate(expr, xid): 271 | substitutions = dict() 272 | 273 | def raw(s): 274 | return '_'.join(s.split('_')[:-1]) 275 | 276 | for v in get_vars_non_recursive(expr): 277 | if v not in substitutions: 278 | v_name = raw(v.decl().name()) 279 | if v.sort_kind() == z3.Z3_INT_SORT: 280 | substitutions[v] = z3.Int('%s_%d' % (v_name, xid)) 281 | elif v.sort_kind() == z3.Z3_BOOL_SORT: 282 | substitutions[v] = z3.Bool('%s_%d' % (v_name, xid)) 283 | elif v.sort_kind() == z3.Z3_BV_SORT: 284 | substitutions[v] = z3.BitVec('%s_%d' % (v_name, xid), v.size()) 285 | elif v.sort_kind() == z3.Z3_ARRAY_SORT: 286 | substitutions[v] = z3.Array('%s_%d' % (v_name, xid), v.domain(), v.range()) 287 | else: 288 | raise Exception('CANNOT CONVERT %s (%d)' % (v, v.sort_kind())) 289 | subst = list(substitutions.items()) 290 | return z3.substitute(expr, subst) 291 | -------------------------------------------------------------------------------- /src/exploit.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | import logging 3 | from collections import defaultdict 4 | 5 | from z3 import z3 6 | 7 | from src.cfg import opcodes 8 | from src.constraints import check_model_and_resolve, model_to_calls 9 | from src.evm.exceptions import IntractablePath 10 | from src.evm.results import CombinedSymbolicResult 11 | from src.util.z3_extra_util import concrete 12 | 13 | 14 | class InfeasibleExploit(Exception): 15 | pass 16 | 17 | 18 | class ExploitContext(object): 19 | def __init__(self, target_addr, shellcode_addr, target_amount, amount_check, initial_balance, initial_storage, 20 | controlled_addrs=set()): 21 | self.target_addr = target_addr 22 | self.shellcode_addr = shellcode_addr 23 | self.target_amount = target_amount 24 | self.amount_check = amount_check 25 | self.initial_balance = initial_balance 26 | self.initial_storage = initial_storage 27 | 28 | # assume we control the target address 29 | self.controlled_addrs = controlled_addrs | {target_addr} 30 | 31 | 32 | def exploit_constraints_call(r, ctx): 33 | addr = r.state.stack[-2] 34 | if not concrete(addr): 35 | addr = z3.simplify(addr) 36 | 37 | amount = r.state.stack[-3] 38 | if not concrete(amount): 39 | amount = z3.simplify(amount) 40 | 41 | extra_constraints = [] 42 | 43 | if not concrete(addr): 44 | extra_constraints.append(z3.Extract(159, 0, addr) == ctx.target_addr) 45 | else: 46 | if addr != ctx.target_addr: 47 | raise InfeasibleExploit 48 | 49 | if not concrete(amount): 50 | if ctx.amount_check == '+': 51 | extra_constraints.append(z3.UGE(amount, ctx.target_amount)) 52 | elif ctx.amount_check == '-': 53 | extra_constraints.append(z3.UGT(amount, 0)) 54 | extra_constraints.append(z3.ULE(amount, ctx.target_amount)) 55 | else: 56 | extra_constraints.append(amount == ctx.target_amount) 57 | final_balance = r.state.balance 58 | extra_constraints.append(z3.ULE(amount, final_balance)) 59 | 60 | # ensure we're not spending more for this exploit than we gain 61 | total_spent = None 62 | for res in r.results: 63 | callvalue = z3.BitVec('CALLVALUE_%d' % res.xid, 256) 64 | extra_constraints.append(z3.ULE(callvalue, 10 * (10 ** 18))) # keep it semi-reasonable: at most 10 Eth per call 65 | if total_spent is None: 66 | total_spent = callvalue 67 | else: 68 | total_spent += callvalue 69 | 70 | extra_constraints.append(z3.ULT(total_spent, amount)) 71 | 72 | # also, ensure the contract does not require a unreasonable start-balance (>100 Eth) 73 | if not ctx.initial_balance: 74 | start_balance = z3.BitVec('BALANCE_%d' % r.results[0].xid, 256) 75 | extra_constraints.append(z3.ULE(start_balance, 100 * (10 ** 18))) 76 | 77 | return extra_constraints 78 | 79 | 80 | def exploit_constraints_callcode(r, ctx): 81 | addr = z3.simplify(r.state.stack[-2]) 82 | 83 | extra_constraints = [] 84 | 85 | if not concrete(addr): 86 | extra_constraints.append(z3.Extract(159, 0, addr) == ctx.shellcode_addr) 87 | else: 88 | if addr != ctx.shellcode_addr: 89 | raise InfeasibleExploit 90 | 91 | return extra_constraints 92 | 93 | 94 | def exploit_constraints_delegatecall(r, ctx): 95 | addr = z3.simplify(r.state.stack[-2]) 96 | 97 | extra_constraints = [] 98 | 99 | if not concrete(addr): 100 | extra_constraints.append(z3.Extract(159, 0, addr) == ctx.shellcode_addr) 101 | else: 102 | if addr != ctx.shellcode_addr: 103 | raise InfeasibleExploit 104 | 105 | return extra_constraints 106 | 107 | 108 | def exploit_constraints_selfdestruct(r, ctx): 109 | addr = z3.simplify(r.state.stack[-1]) 110 | 111 | extra_constraints = [] 112 | 113 | if not concrete(addr): 114 | extra_constraints.append(z3.Extract(159, 0, addr) == ctx.target_addr) 115 | else: 116 | if addr != ctx.target_addr: 117 | raise InfeasibleExploit 118 | 119 | return extra_constraints 120 | 121 | 122 | EXPLOIT_CONSTRAINTS = { 123 | 'CALL': exploit_constraints_call, 124 | 'CALLCODE': exploit_constraints_callcode, 125 | 'DELEGATECALL': exploit_constraints_callcode, 126 | 'SELFDESTRUCT': exploit_constraints_selfdestruct 127 | } 128 | 129 | 130 | def get_exploit_constraints(r, ctx): 131 | target_op = r.results[-1].target_op 132 | if target_op in EXPLOIT_CONSTRAINTS: 133 | return EXPLOIT_CONSTRAINTS[target_op](r, ctx) 134 | else: 135 | return [] 136 | 137 | def control_address_constraints(sym_addr, controlled_addrs): 138 | sub_exprs = [sym_addr == controlled_addr for controlled_addr in controlled_addrs] 139 | expr = sub_exprs[0] 140 | for sub_expr in sub_exprs[1:]: 141 | expr = z3.Or(expr, sub_expr) 142 | return expr 143 | 144 | def attempt_exploit(results, ctx): 145 | c = CombinedSymbolicResult() 146 | for r in results[::-1]: 147 | c.prepend(r) 148 | c.combine(ctx.initial_storage, ctx.initial_balance) 149 | c.simplify() 150 | extra_constraints = get_exploit_constraints(c, ctx) 151 | 152 | for res in c.results: 153 | origin = z3.BitVec('ORIGIN_%d' % res.xid, 256) 154 | caller = z3.BitVec('CALLER_%d' % res.xid, 256) 155 | # ensure we control the origin 156 | extra_constraints.append(control_address_constraints(origin, ctx.controlled_addrs)) 157 | # and ensure the caller is either the origin or the shellcode address 158 | extra_constraints.append(control_address_constraints(caller, {origin, ctx.shellcode_addr})) 159 | 160 | try: 161 | model = check_model_and_resolve(c.constraints + extra_constraints, c.sha_constraints) 162 | 163 | # enforce we control all ORIGIN-addresses 164 | if any(model[v].as_long() not in ctx.controlled_addrs for v in model if v.name().startswith('ORIGIN')): 165 | raise InfeasibleExploit 166 | 167 | return model_to_calls(model, c.idx_dict), c, model 168 | except IntractablePath: 169 | raise InfeasibleExploit 170 | 171 | 172 | def combined_exploit(p, target_addr, shellcode_addr, target_amount, amount_check='+', initial_storage=dict(), 173 | initial_balance=None, 174 | max_calls=3, controlled_addrs=set(), flags=None): 175 | 176 | flags = flags or set(opcodes.CRITICAL) 177 | 178 | ctx = ExploitContext(target_addr, shellcode_addr, target_amount, amount_check, initial_balance, initial_storage, 179 | controlled_addrs) 180 | 181 | sload_bbs = {ins.bb.start for ins in p.cfg.filter_ins('SLOAD')} 182 | critical_paths = [] 183 | 184 | for op in opcodes.CRITICAL: 185 | if op not in flags: 186 | continue 187 | ins = p.cfg.filter_ins(op) 188 | if not ins: 189 | logging.info('No %s instructions', op) 190 | continue 191 | logging.info('Found %d %s instructions', len(ins), op) 192 | for i, i_path, i_r in p.get_constraints(ins, opcodes.CRITICAL_ARGS[op]): 193 | logging.info("%s: %s", op, i) 194 | logging.info("Path: %s", '->'.join('%x' % p for p in i_path)) 195 | if set(i_path) & sload_bbs: 196 | # if there is a SLOAD on this path, 197 | # it might benefit from prepending a state-changing path later 198 | critical_paths.append(i_r) 199 | try: 200 | return attempt_exploit([i_r], ctx) 201 | except InfeasibleExploit: 202 | continue 203 | if not critical_paths: 204 | logging.warning("No state-dependent critical path found, aborting") 205 | return 206 | 207 | end_ins = p.cfg.filter_ins('RETURN') + p.cfg.filter_ins('STOP') 208 | if not end_ins: 209 | logging.info('No RETURN or STOP instructions') 210 | return 211 | logging.info('Found %d RETURN and STOP instructions', len(end_ins)) 212 | compatible = defaultdict(lambda: [[]]) # list of lists 213 | state_changing_paths = [] 214 | for i, (end, end_path, state_changing_r) in enumerate(p.get_constraints(end_ins, find_sstore=True)): 215 | logging.info("End: %s", end) 216 | logging.info("Path: %s", '->'.join('%x' % p for p in end_path)) 217 | state_changing_paths.append(state_changing_r) 218 | for j, critical_r in enumerate(critical_paths): 219 | if not critical_r.may_read_from(state_changing_r): 220 | continue 221 | compatible[j][0].append(i) 222 | try: 223 | return attempt_exploit([state_changing_r, critical_r], ctx) 224 | except InfeasibleExploit: 225 | continue 226 | 227 | logging.info('All ends: %s', state_changing_paths) 228 | 229 | storage_compatible = defaultdict(list) 230 | for (i, a_r), (j, b_r) in itertools.product(enumerate(state_changing_paths), enumerate(state_changing_paths)): 231 | if a_r.may_read_from(b_r): 232 | storage_compatible[i].append(j) 233 | 234 | calls = [state_changing_paths] 235 | while len(calls) < max_calls - 1: 236 | new_ends = [r.copy() for r in state_changing_paths] 237 | calls.append(new_ends) 238 | for k, v in compatible.items(): 239 | new_compat = set() 240 | for c in v[-1]: 241 | new_compat.update(storage_compatible[c]) 242 | v.append(sorted(new_compat)) 243 | for i, critical_r in enumerate(critical_paths): 244 | for combo_ids in itertools.product(*compatible[i]): 245 | combo = [critical_r] + [c[j] for c, j in zip(calls, combo_ids)] 246 | try: 247 | return attempt_exploit(combo[::-1], ctx) 248 | except InfeasibleExploit: 249 | continue 250 | 251 | logging.info('Could not exploit any RETURN+CALL') 252 | -------------------------------------------------------------------------------- /src/explorer/__init__.py: -------------------------------------------------------------------------------- 1 | from . import backward 2 | from . import forward -------------------------------------------------------------------------------- /src/explorer/backward.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from collections import defaultdict 3 | from queue import PriorityQueue 4 | from src.util.frontierset import FrontierSet 5 | 6 | 7 | class BackwardExplorerState(object): 8 | def __init__(self, bb, gas, must_visit, cost, data): 9 | self.bb = bb 10 | self.gas = gas 11 | self.must_visit = must_visit.copy() 12 | self.data = data 13 | self.cost = cost 14 | 15 | def estimate(self): 16 | """ 17 | Return an estimate of how quickly we can reach the root of the tree 18 | This estimate is the sum of the number of branches taken so far (self.cost) and the 19 | estimate given by the next BB to visit (self.bb.estimate) 20 | :return: estimated distance to root 21 | """ 22 | if self.bb.estimate_constraints is None: 23 | return self.cost 24 | else: 25 | return self.cost + self.bb.estimate_constraints 26 | 27 | def rank(self): 28 | """ 29 | Compute a rank for this state. Order by estimated root-distance first, solve ties by favoring less restricted states 30 | for caching efficiency 31 | :return: 32 | """ 33 | return self.estimate(), len(self.must_visit) 34 | 35 | def __lt__(self, other): 36 | return self.rank() < other.rank() 37 | 38 | def __hash__(self): 39 | return sum(a * b for a, b in zip((23, 29, 31), (hash(self.bb), hash(self.must_visit), hash(self.data)))) 40 | 41 | def __eq__(self, other): 42 | return self.bb == other.bb and self.must_visit == other.must_visit and self.data == other.data 43 | 44 | def __str__(self): 45 | return 'At: %x, Gas: %s, Must-Visit: %s, Data: %s, Hash: %x' % ( 46 | self.bb.start, self.gas, self.must_visit, self.data, hash(self)) 47 | 48 | 49 | def generate_sucessors(state, new_data, update_data, predicate=lambda st, pred: True): 50 | new_todo = [] 51 | if state.gas is None or state.gas > 0: 52 | # logging.debug('[tr] [gs] passed first if') 53 | new_gas = state.gas 54 | if state.gas and len(state.bb.pred) > 1: 55 | new_gas = state.gas - 1 56 | # logging.debug('[tr] [gs] Preds: %s', state.bb.pred) 57 | 58 | for p in state.bb.pred: 59 | if not predicate(state.data, p): 60 | continue 61 | 62 | new_must_visits = [] 63 | for path in state.bb.pred_paths[p]: 64 | new_must_visit = state.must_visit.copy() 65 | for a, b in zip(path[:-1], path[1:]): 66 | new_must_visit.add(b, a) 67 | if p.start in new_must_visit.frontier: 68 | new_must_visit.remove(p.start) 69 | if not new_must_visit.all.issubset(p.ancestors): 70 | # logging.debug('[tr] [gs] Cannot reach any necessary states, aborting! Needed: %s, reachable: %s', new_must_visit, p.ancestors) 71 | continue 72 | new_must_visits.append(new_must_visit) 73 | 74 | new_cost = state.cost + (1 if p.branch else 0) 75 | 76 | for new_must_visit in minimize(new_must_visits): 77 | new_todo.append(BackwardExplorerState(p, new_gas, new_must_visit, new_cost, update_data(new_data, p))) 78 | return new_todo 79 | 80 | 81 | def traverse_back(start_ins, initial_gas, initial_data, advance_data, update_data, finish_path, must_visits=[], 82 | predicate=lambda st, p: True): 83 | """ 84 | :param start_ins: Starting instructions 85 | :param initial_gas: Starting "gas". Can be None, in which case it is unlimited 86 | :param initial_data: Starting data 87 | :param advance_data: method to advance data 88 | :param update_data: method to update data 89 | :param must_visits: FrontierSet describing the next nodes that *must* be visited 90 | :param predicate: A function (state, BB) -> Bool describing whether an edge should be taken or not 91 | :return: yields paths as they are explored one-by-one 92 | """ 93 | todo = PriorityQueue() 94 | 95 | for ins in start_ins: 96 | # logging.debug('[tr] Starting traversal at %x', ins.addr) 97 | data = initial_data(ins) 98 | bb = ins.bb 99 | gas = initial_gas 100 | # keep tuples of (len(must_visit), state) 101 | # this way, the least restricted state are preferred 102 | # which should maximize caching efficiency 103 | if not must_visits: 104 | must_visits = [FrontierSet()] 105 | for must_visit in minimize(FrontierSet(mv) if mv is not FrontierSet else mv for mv in must_visits): 106 | ts = BackwardExplorerState(bb, gas, must_visit, 0, data) 107 | todo.put(ts) 108 | cache = set() 109 | ended_prematurely = defaultdict(int) 110 | while not todo.empty(): 111 | state = todo.get() 112 | # if this BB can be reached via multiple paths, check if we want to cache it 113 | # or whether another path already reached it with the same state 114 | if len(state.bb.succ) > 1: 115 | if state in cache: 116 | # logging.debug('[tr] CACHE HIT') 117 | continue 118 | cache.add(state) 119 | # logging.debug('[tr] Cachesize: %d\t(slicing %x, currently at %x)', len(cache), ins.addr, state.bb.start) 120 | # logging.debug('[tr] Current state: %s', state) 121 | new_data = advance_data(state.data) 122 | if finish_path(new_data): 123 | # logging.debug('[tr] finished path (%s)', new_data) 124 | yield new_data 125 | else: 126 | if state.gas is not None and state.bb.estimate_back_branches is not None and (state.gas == 0 or state.gas < state.bb.estimate_back_branches): 127 | ended_prematurely[state.bb.start] += 1 128 | else: 129 | logging.debug('[tr] continuing path (%s)', new_data) 130 | new_todo = generate_sucessors(state, new_data, update_data, predicate=predicate) 131 | for nt in new_todo: 132 | todo.put(nt) 133 | total_ended = sum(ended_prematurely.values()) 134 | if total_ended: 135 | logging.debug("%d paths that ended prematurely due to branches: %s", total_ended, 136 | ', '.join('%x: %d' % (k, v) for k, v in ended_prematurely.items())) 137 | else: 138 | logging.debug("Finished all paths") 139 | 140 | 141 | def minimize(must_visits): 142 | todo = sorted(must_visits, key=len) 143 | while todo: 144 | must_visit = todo[0] 145 | yield must_visit 146 | todo = [mv for mv in todo[1:] if not must_visit.issubset(mv)] 147 | -------------------------------------------------------------------------------- /src/explorer/forward.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from queue import PriorityQueue 3 | 4 | from src.util.utils import is_subseq, is_substr 5 | 6 | 7 | class ForwardExplorerState(object): 8 | def __init__(self, bb, path=None, branches=None, slices=None): 9 | self.bb = bb 10 | self.path = list(path) + [bb.start] or [] 11 | self.seen = set(self.path) 12 | self.branches = branches or 0 13 | self.slices = [] 14 | self.finished = set() 15 | #logging.info('Path %s', ' -> '.join('%x' % p for p in self.path)) 16 | for slice in slices: 17 | last_pc = None 18 | #print('%x' % self.bb.start) 19 | while slice and slice[0].bb.start == self.bb.start: 20 | if last_pc is None or slice[0].addr > last_pc: 21 | last_pc = slice[0].addr 22 | if len(slice) == 1: 23 | self.finished.add(last_pc) 24 | slice = slice[1:] 25 | else: 26 | break 27 | self.slices.append(slice) 28 | 29 | 30 | def next_states(self): 31 | possible_succs = [] 32 | for succ in self.bb.succ: 33 | pths = succ.pred_paths[self.bb] 34 | for pth in pths: 35 | if not set(pth).issubset(self.seen): 36 | continue 37 | if not is_subseq(pth, self.path): 38 | continue 39 | break 40 | else: 41 | continue 42 | possible_succs.append(succ) 43 | next_states = [] 44 | branches = self.branches 45 | if len(possible_succs) > 1: 46 | branches += 1 47 | for succ in possible_succs: 48 | next_slices = tuple( 49 | s for s in self.slices if set(i.bb.start for i in s).issubset(succ.descendants | {succ.start})) 50 | if next_slices: 51 | next_states.append(ForwardExplorerState(succ, self.path, branches, next_slices)) 52 | return next_states 53 | 54 | def __lt__(self, other): 55 | return self.weight < other.weight 56 | 57 | class ForwardExplorer(object): 58 | def __init__(self, cfg, avoid=frozenset()): 59 | self.dist_map = dict() 60 | self.cfg = cfg 61 | self.blacklist = set() 62 | 63 | def add_to_blacklist(self, path): 64 | self.blacklist.add(tuple(path)) 65 | 66 | def weight(self, state): 67 | if state.finished: 68 | return state.branches 69 | else: 70 | return state.branches + min(self.dist_map[s[0].bb.start][state.bb] for s in state.slices) 71 | 72 | def find(self, slices, looplimit=2, avoid=frozenset(), prefix=None): 73 | avoid = frozenset(avoid) 74 | slices = tuple(tuple(i for i in s if i.bb) for s in slices) 75 | 76 | if not slices: 77 | #raise StopIteration 78 | return 79 | # distance from a BB to instruction 80 | for slice in slices: 81 | for i in slice: 82 | if i.bb.start not in self.dist_map: 83 | self.dist_map[i.bb.start] = self.cfg.distance_map(i) 84 | #print('%x' %i.bb.start) 85 | #print(['%x' %d.start for d in self.dist_map[i.bb.start]]) 86 | 87 | if prefix is None: 88 | state = ForwardExplorerState(self.cfg.root, [], 0, slices) 89 | else: 90 | state = ForwardExplorerState(self.cfg._ins_at[prefix].bb, prefix, 0, slices) 91 | state.weight = self.weight(state) 92 | 93 | todo = PriorityQueue() 94 | todo.put(state) 95 | 96 | while not todo.empty(): 97 | state = todo.get() 98 | if any(is_substr(pth, state.path) for pth in self.blacklist): 99 | logging.info("BLACKLIST hit for %s" % (', '.join('%x' % i for i in state.path))) 100 | continue 101 | if set(i.name for i in state.bb.ins) & avoid: 102 | continue 103 | if state.finished: 104 | for last_pc in state.finished: 105 | yield state.path + [last_pc] 106 | state.finished = set() 107 | state.slices = tuple(s for s in state.slices if s) 108 | if not state.slices: 109 | continue 110 | if state.path.count(state.bb.start) > looplimit: 111 | continue 112 | for next_state in state.next_states(): 113 | next_state.weight = self.weight(next_state) 114 | todo.put(next_state) 115 | -------------------------------------------------------------------------------- /src/flow/__init__.py: -------------------------------------------------------------------------------- 1 | from . import tainting 2 | from . import symbolic 3 | from . import analysis_results 4 | from . import code_info 5 | -------------------------------------------------------------------------------- /src/flow/analysis_results.py: -------------------------------------------------------------------------------- 1 | 2 | class TainitAnalysisResult(object): 3 | def __init__(self, state, defect_type, target_sink, tainted, sources, sload_sha3_bases, sstore_sha3_bases, sstore_slots, slot_live_access, slot_access_trace, storage_slot_type): 4 | self.state = state 5 | self.defect_type=defect_type 6 | self.target_sink = target_sink 7 | self._tainted = tainted 8 | self.sources = sources 9 | self.sload_sha3_bases = sload_sha3_bases 10 | self.sstore_sha3_bases = sstore_sha3_bases 11 | self.sstore_slots = sstore_slots 12 | self.slot_live_access = slot_live_access 13 | self.slot_access_trace =slot_access_trace 14 | self.storage_slot_type = storage_slot_type 15 | 16 | class TainitAnalysisBugDetails(object): 17 | def __init__(self,unbounded_loops, fun_call_restr,loops_with_calls, gas_griefing, hardcoded_gas, asserts, slot_live_access, temp_slots): 18 | self.unbounded_loops = unbounded_loops 19 | self.fun_call_restr =fun_call_restr 20 | self.loops_with_calls = loops_with_calls 21 | self.gas_griefing = gas_griefing 22 | self.hardcoded_gas = hardcoded_gas 23 | self.asserts = asserts 24 | self.slot_live_access = slot_live_access 25 | self.temp_slots = temp_slots 26 | 27 | class AnalysisBugDetails(object): 28 | def __init__(self,violated_ac_checks,missing_ac_checks,violated_ac_checks_ib): 29 | self.violated_ac_checks = violated_ac_checks 30 | self.missing_ac_checks=missing_ac_checks 31 | self.violated_ac_checks_ib = violated_ac_checks_ib 32 | 33 | -------------------------------------------------------------------------------- /src/flow/code_info.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | import logging 3 | 4 | def get_function_sig(cfg,path, type='name'): 5 | #bbs= {bb.start:ins.arg.hex() for bb in p.cfg.bbs for ins in bb.ins if bb.start in path[:-1] and ins.name =='PUSH4' and p.cfg._ins_at[ins.addr+ins.op-0x5f+1].name=='EQ'} 6 | logging.debug("Path: %s", '->'.join('%x' % p for p in path)) 7 | bbs= {bb:ins.arg.hex() for bb in path[:-1] for ins in cfg._bb_at[bb].ins if ins.name =='PUSH4' and ins.arg.hex() !='ffffffff' and cfg._ins_at[ins.addr+ins.op-0x5f+1].name=='EQ' and int.from_bytes(cfg._ins_at[ins.addr+ins.op-0x5f+2].arg,'big')==path[path.index(bb)+1]} 8 | other_bbs= {bb:ins.arg.hex() for bb in path[:-1] for ins in cfg._bb_at[bb].ins if ins.name =='PUSH4' and ins.arg.hex() !='ffffffff' and ins.addr+ins.op-0x5f+2 in cfg._ins_at and cfg._ins_at[ins.addr+ins.op-0x5f+2].name=='EQ' and int.from_bytes(cfg._ins_at[ins.addr+ins.op-0x5f+3].arg,'big')==path[path.index(bb)+1]} 9 | bbs.update(other_bbs) 10 | #print(bbs) 11 | bbs_indices=[path.index(bb) for bb in bbs.keys()] 12 | if len(bbs_indices)!=0 and type=='name': 13 | with open(os.path.join(os.path.join(os.getcwd(),"src/flow"),"FSignatures.txt"), 'r') as f: 14 | fsig=dict(x.rstrip().split(None,1) for x in f) 15 | return fsig.get('0x'+str(bbs[path[max(bbs_indices)]]),bbs[path[max(bbs_indices)]]) 16 | elif len(bbs_indices)==0 and type=='name': 17 | return '() payable' 18 | elif len(bbs_indices)!=0 and type=='id': 19 | return str(bbs[path[max(bbs_indices)]]) 20 | elif len(bbs_indices)==0 and type=='id': 21 | return '0' 22 | elif len(bbs_indices)!=0 and type=='bb': 23 | return path[max(bbs_indices)] 24 | elif len(bbs_indices)==0 and type=='bb': 25 | return 0 26 | 27 | def function_restricted_caller(p, path): 28 | bbs_check_caller= [bb.start for bb in p.cfg.bbs for ins in bb.ins if bb.start in path[:-2] and len(bb.succ_addrs)>=2 and ins.name in ['CALLER', 'ORIGIN'] and (bb.ins[bb.ins.index(ins)+3].name=='EQ' or bb.ins[bb.ins.index(ins)+1].name=='EQ' or bb.ins[-3].name=='EQ')] 29 | if len(bbs_check_caller)!=0: 30 | return True 31 | return False 32 | -------------------------------------------------------------------------------- /src/flow/symbolic.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | import logging 3 | from collections import defaultdict 4 | #from py import code 5 | 6 | from z3 import z3 7 | from src import cfg 8 | 9 | from src.cfg import opcodes 10 | from src.constraints import check_model_and_resolve, model_to_calls 11 | from src.evm.exceptions import IntractablePath, TimeoutException 12 | from src.evm.results import CombinedSymbolicResult 13 | from src.util.z3_extra_util import concrete 14 | from src.flow import code_info as cinfo 15 | 16 | class InfeasibleExploit(Exception): 17 | pass 18 | 19 | 20 | class ExploitContext(object): 21 | def __init__(self, target_addr, shellcode_addr, target_amount, amount_check, initial_balance, initial_storage, 22 | controlled_addrs=set()): 23 | self.target_addr = target_addr 24 | self.shellcode_addr = shellcode_addr 25 | self.target_amount = target_amount 26 | self.amount_check = amount_check 27 | self.initial_balance = initial_balance 28 | self.initial_storage = initial_storage 29 | 30 | # assume we control the target address 31 | self.controlled_addrs = controlled_addrs | {target_addr} 32 | 33 | 34 | def exploit_constraints_call(r, ctx): 35 | addr = r.state.stack[-2] 36 | if not concrete(addr): 37 | addr = z3.simplify(addr) 38 | 39 | amount = r.state.stack[-3] 40 | if not concrete(amount): 41 | amount = z3.simplify(amount) 42 | 43 | extra_constraints = [] 44 | 45 | if not concrete(addr): 46 | extra_constraints.append(z3.Extract(159, 0, addr) == ctx.target_addr) 47 | else: 48 | if addr != ctx.target_addr: 49 | raise InfeasibleExploit 50 | 51 | if not concrete(amount): 52 | if ctx.amount_check == '+': 53 | extra_constraints.append(z3.UGE(amount, ctx.target_amount)) 54 | elif ctx.amount_check == '-': 55 | extra_constraints.append(z3.UGT(amount, 0)) 56 | extra_constraints.append(z3.ULE(amount, ctx.target_amount)) 57 | else: 58 | extra_constraints.append(amount == ctx.target_amount) 59 | final_balance = r.state.balance 60 | extra_constraints.append(z3.ULE(amount, final_balance)) 61 | 62 | # ensure we're not spending more for this exploit than we gain 63 | total_spent = None 64 | for res in r.results: 65 | callvalue = z3.BitVec('CALLVALUE_%d' % res.xid, 256) 66 | extra_constraints.append(z3.ULE(callvalue, 10 * (10 ** 18))) # keep it semi-reasonable: at most 10 Eth per call 67 | if total_spent is None: 68 | total_spent = callvalue 69 | else: 70 | total_spent += callvalue 71 | 72 | extra_constraints.append(z3.ULT(total_spent, amount)) 73 | 74 | # also, ensure the contract does not require a unreasonable start-balance (>100 Eth) 75 | if not ctx.initial_balance: 76 | start_balance = z3.BitVec('BALANCE_%d' % r.results[0].xid, 256) 77 | extra_constraints.append(z3.ULE(start_balance, 100 * (10 ** 18))) 78 | 79 | return extra_constraints 80 | 81 | 82 | def exploit_constraints_callcode(r, ctx): 83 | addr = z3.simplify(r.state.stack[-2]) 84 | 85 | extra_constraints = [] 86 | 87 | if not concrete(addr): 88 | extra_constraints.append(z3.Extract(159, 0, addr) == ctx.shellcode_addr) 89 | else: 90 | if addr != ctx.shellcode_addr: 91 | raise InfeasibleExploit 92 | 93 | return extra_constraints 94 | 95 | 96 | def exploit_constraints_delegatecall(r, ctx): 97 | addr = z3.simplify(r.state.stack[-2]) 98 | 99 | extra_constraints = [] 100 | 101 | if not concrete(addr): 102 | extra_constraints.append(z3.Extract(159, 0, addr) == ctx.shellcode_addr) 103 | else: 104 | if addr != ctx.shellcode_addr: 105 | raise InfeasibleExploit 106 | 107 | return extra_constraints 108 | 109 | 110 | def exploit_constraints_selfdestruct(r, ctx): 111 | addr = z3.simplify(r.state.stack[-1]) 112 | 113 | extra_constraints = [] 114 | 115 | if not concrete(addr): 116 | extra_constraints.append(z3.Extract(159, 0, addr) == ctx.target_addr) 117 | else: 118 | if addr != ctx.target_addr: 119 | raise InfeasibleExploit 120 | 121 | return extra_constraints 122 | 123 | 124 | EXPLOIT_CONSTRAINTS = { 125 | 'CALL': exploit_constraints_call, 126 | 'CALLCODE': exploit_constraints_callcode, 127 | 'DELEGATECALL': exploit_constraints_callcode, 128 | 'SELFDESTRUCT': exploit_constraints_selfdestruct 129 | } 130 | 131 | 132 | def get_exploit_constraints(r, ctx): 133 | target_op = r.results[-1].target_op 134 | if target_op in EXPLOIT_CONSTRAINTS: 135 | return EXPLOIT_CONSTRAINTS[target_op](r, ctx) 136 | else: 137 | return [] 138 | 139 | 140 | def control_address_constraints(sym_addr, controlled_addrs): 141 | sub_exprs = [sym_addr == controlled_addr for controlled_addr in controlled_addrs] 142 | expr = sub_exprs[0] 143 | for sub_expr in sub_exprs[1:]: 144 | expr = z3.Or(expr, sub_expr) 145 | return expr 146 | 147 | def attempt_exploit(results, ctx): 148 | c = CombinedSymbolicResult() 149 | for r in results[::-1]: 150 | c.prepend(r) 151 | c.combine(ctx.initial_storage, ctx.initial_balance) 152 | c.simplify() 153 | extra_constraints = get_exploit_constraints(c, ctx) 154 | 155 | for res in c.results: 156 | origin = z3.BitVec('ORIGIN_%d' % res.xid, 256) 157 | caller = z3.BitVec('CALLER_%d' % res.xid, 256) 158 | # ensure we control the origin 159 | #extra_constraints.append(control_address_constraints(origin, ctx.controlled_addrs)) 160 | # and ensure the caller is either the origin or the shellcode address 161 | #extra_constraints.append(control_address_constraints(caller, {origin, ctx.shellcode_addr})) 162 | try: 163 | model = check_model_and_resolve(c.constraints + extra_constraints, c.sha_constraints) 164 | 165 | # enforce we control all ORIGIN-addresses 166 | if any(model[v].as_long() not in ctx.controlled_addrs for v in model if v.name().startswith('ORIGIN')): 167 | raise InfeasibleExploit 168 | 169 | return model_to_calls(model, c.idx_dict), c, model 170 | except TimeoutException: 171 | raise TimeoutException("Timed out!") 172 | except IntractablePath: 173 | raise InfeasibleExploit 174 | 175 | 176 | def validate_path(p, path, mode=None, ac_jumpi=None): 177 | target_addr= int('0x1234', 16) 178 | shellcode_addr= int('0x1000', 16), +1000 179 | target_amount= +1000 180 | amount_check='+' 181 | initial_storage=dict() 182 | initial_balance=None 183 | max_calls=3 184 | controlled_addrs=set() 185 | 186 | fun_sig_bb = cinfo.get_function_sig(p.cfg, path,'bb') 187 | code_path = path[path.index(fun_sig_bb)+2:-1] 188 | if fun_sig_bb==0: #most probably payable() 189 | c=0 190 | while (fun_sig_bb==0 and c+1 '.join('%x' % p for p in path)) 97 | try: 98 | ins = imap[path[-1]] 99 | yield ins, path, self.run_symbolic(path, inclusive) 100 | except IntractablePath as e: 101 | bad_path = [i for i in e.trace if i in self.cfg._bb_at] + [e.remainingpath[0]] 102 | dd = self.cfg.data_dependence(self.cfg._ins_at[e.trace[-1]]) 103 | if not any(i.name in ('MLOAD', 'SLOAD') for i in dd): 104 | ddbbs = set(i.bb.start for i in dd) 105 | bad_path_start = next((j for j, i in enumerate(bad_path) if i in ddbbs), 0) 106 | bad_path = bad_path[bad_path_start:] 107 | logging.info("Bad path: %s" % (', '.join('%x' % i for i in bad_path))) 108 | exp.add_to_blacklist(bad_path) 109 | continue 110 | except ExternalData: 111 | continue 112 | except TimeoutException: 113 | raise TimeoutException("Timed out!") 114 | except Exception as e: 115 | logging.exception('Failed path due to %s', e) 116 | continue 117 | def _analyze_writes(self): 118 | sstore_ins = self.filter_ins('SSTORE') 119 | self._writes = defaultdict(set) 120 | for store in sstore_ins: 121 | for bs in interesting_slices(store): 122 | bs.append(store) 123 | prg = slice_to_program(bs) 124 | path = sorted(prg.keys()) 125 | try: 126 | r = run_symbolic(prg, path, self.code, inclusive=True) 127 | except IntractablePath: 128 | logging.exception('Intractable Path while analyzing writes') 129 | continue 130 | addr = r.state.stack[-1] 131 | if concrete(addr): 132 | self._writes[addr].add(store) 133 | else: 134 | self._writes[None].add(store) 135 | self._writes = dict(self._writes) 136 | 137 | def get_writes_to (self, addr): 138 | concrete_writes = set() 139 | if concrete(addr) and addr in self.writes: 140 | concrete_writes = self.writes[addr] 141 | return concrete_writes, self.symbolic_writes 142 | 143 | def reolve_struct_offset(self, ssa, slice, sload=False, sload_ins=None, sstore=False, sstore_ins=None): 144 | function = [f for f in ssa.functions][0] 145 | if sload: 146 | ssa_block=[ins for block in function if block.offset == sload_ins.bb.start for ins in block.insns] 147 | ssa_ins= [s for s in ssa_block if s.offset == sload_ins.addr][0] 148 | elif sstore: 149 | ssa_block=[ins for block in function if block.offset == sstore_ins.bb.start for ins in block.insns] 150 | ssa_ins= [s for s in ssa_block if s.offset == sstore_ins.addr][0] 151 | struct_offset = None 152 | if ssa_ins.arguments[0]._writer is not None: 153 | if ssa_ins.arguments[0]._writer.insn.name =='ADD': 154 | if ssa_ins.arguments[0]._writer.arguments[0]._writer is not None and \ 155 | ssa_ins.arguments[0]._writer.arguments[1]._writer is None: 156 | if ssa_ins.arguments[0]._writer.arguments[0]._writer.insn.name=='SHA3': 157 | struct_offset =ssa_ins.arguments[0]._writer.arguments[1].concrete_value 158 | elif ssa_ins.arguments[0]._writer.arguments[0]._writer is None and \ 159 | ssa_ins.arguments[0]._writer.arguments[1]._writer is not None: 160 | if ssa_ins.arguments[0]._writer.arguments[1]._writer.insn.name=='SHA3': 161 | struct_offset =ssa_ins.arguments[0]._writer.arguments[0].concrete_value 162 | 163 | return struct_offset 164 | 165 | def resolve_slot_offset(self, ssa, slice, sload=False, sload_ins=None, sstore=False, sstore_ins=None): 166 | function = [f for f in ssa.functions][0] 167 | if sload: 168 | if [ins.name for ins in slice if ins.name in set(['SLOAD','EXP','DIV','SUB'])] ==['SLOAD','EXP','DIV']: 169 | exp_ins = [ins for ins in slice if ins.name in set(['EXP'])] 170 | ssa_block=[ins for block in function if block.offset == exp_ins[0].bb.start for ins in block.insns] 171 | ssa_ins =[s for s in ssa_block if s.offset == exp_ins[0].addr] 172 | if (ssa_ins[0].arguments[0].concrete_value==256): 173 | start_byte=ssa_ins[0].arguments[1].concrete_value+1 174 | elif (ssa_ins[0].arguments[0].concrete_value==2): #in binary 175 | start_byte=ssa_ins[0].arguments[1].concrete_value/8+1 176 | elif [ins.name for ins in slice if ins.name in set(['SLOAD','EXP','DIV','SUB'])] ==['SLOAD','EXP','SUB']: 177 | #load starting first bye 178 | start_byte=1 179 | elif [ins.name for ins in slice if ins.name in set(['SLOAD','EXP','SUB','DIV'])] ==['SLOAD','EXP','SUB','DIV']: 180 | div_ins = [ins for ins in slice if ins.name in set(['DIV'])] 181 | ssa_block=[ins for block in function if block.offset == div_ins[0].bb.start for ins in block.insns] 182 | ssa_ins =[s for s in ssa_block if s.offset == div_ins[0].addr] 183 | if ssa_ins[0].arguments[1]._writer is None: 184 | pos_str=str('%x' %ssa_ins[0].arguments[1].concrete_value) 185 | start_byte=len(pos_str)//2+1-pos_str.find('1') 186 | else: 187 | exp_ins=ssa_ins[0].arguments[1]._writer 188 | if exp_ins.insn.name =='EXP': 189 | if (exp_ins.arguments[0].concrete_value==256): 190 | start_byte= exp_ins.arguments[1].concrete_value+1 191 | elif (exp_ins.arguments[0].concrete_value==2): #in binary 192 | start_byte=exp_ins.arguments[1].concrete_value/8+1 193 | else: 194 | start_byte='whole' 195 | print('error, check resolve_slot_offset') 196 | else: 197 | start_byte='whole' 198 | elif sstore: 199 | start_byte=None 200 | masking_pattern = [ins.name for ins in slice if ins.name in set(['SLOAD','EXP','SUB','NOT'])] 201 | if len(masking_pattern)==0: 202 | start_byte='whole' # overapproximate the whole slot 203 | 204 | elif 'NOT' not in masking_pattern or 'SLOAD' not in masking_pattern: #Cannot decide what is overwritten without SLOAD 205 | start_byte='whole' #overapproximate the whole slot 206 | 207 | elif len([i for i in masking_pattern if i== 'NOT'])>1:# we may need to overapproximate as we do not know which not is for masking 208 | start_byte='whole' #overapproximate the whole slot 209 | 210 | elif len([i for i in masking_pattern if i == 'NOT'])==1 and 'SLOAD' in masking_pattern: 211 | not_ins =[ins for ins in slice if ins.name =='NOT'] 212 | ssa_block=[ins for block in function if block.offset ==not_ins[0].bb.start for ins in block.insns] 213 | ssa_not_ins =[s for s in ssa_block if s.offset in [ins.addr for ins in slice if ins.name in set(['NOT'])]] 214 | if ssa_not_ins[0].arguments[0]._writer is None: 215 | pos_str=str('%x' %ssa_not_ins[0].arguments[0].concrete_value) 216 | start_byte=(len(pos_str)-pos_str.rfind('f'))//2+1 217 | else: 218 | ssa_ins=ssa_not_ins[0].arguments[0]._writer.arguments[1]._writer 219 | if ssa_ins is not None and ssa_ins.insn.name =='EXP' and ssa_ins.arguments[0]._writer is None and ssa_ins.arguments[1]._writer is None: 220 | exp = pow(ssa_ins.arguments[0].concrete_value, ssa_ins.arguments[1].concrete_value, src.util.utils.TT256) 221 | mul = None 222 | if ssa_not_ins[0].arguments[0]._writer.arguments[0]._writer is None: 223 | mul = ssa_not_ins[0].arguments[0]._writer.arguments[0].concrete_value * exp 224 | else: 225 | sub_ins=ssa_not_ins[0].arguments[0]._writer.arguments[0]._writer 226 | if sub_ins.insn.name =='SUB': 227 | if sub_ins.arguments[0]._writer.insn.name=='EXP': 228 | exp1 = pow(sub_ins.arguments[0]._writer.arguments[0].concrete_value, sub_ins.arguments[0]._writer.arguments[1].concrete_value, src.util.utils.TT256) 229 | sub = exp1 - sub_ins.arguments[1].concrete_value 230 | mul = sub * exp 231 | if mul is not None: 232 | bit_mask = '%x' % (src.util.utils.TT256M1 - mul) 233 | start_byte= (len(bit_mask)-bit_mask.rfind('0'))//2+1 234 | elif ssa_not_ins[0].arguments[0]._writer is not None and ssa_not_ins[0].arguments[0]._writer.insn.name=='SUB': 235 | sub_ins=ssa_not_ins[0].arguments[0]._writer 236 | if sub_ins.arguments[0]._writer is not None and sub_ins.arguments[0]._writer.insn.name=='EXP': 237 | exp_ins = sub_ins.arguments[0]._writer 238 | if exp_ins.arguments[0]._writer is None and exp_ins.arguments[1]._writer is None: 239 | exp = pow(exp_ins.arguments[0].concrete_value, exp_ins.arguments[1].concrete_value, src.util.utils.TT256) 240 | sub = exp - sub_ins.arguments[1].concrete_value 241 | bit_mask = '%x' % (src.util.utils.TT256M1 - sub) 242 | start_byte= (len(bit_mask)-bit_mask.rfind('0'))//2+1 243 | elif sub_ins.arguments[0]._writer is not None and sub_ins.arguments[0]._writer.insn.name=='SHL': 244 | shl_ins = sub_ins.arguments[0]._writer 245 | if shl_ins.arguments[0]._writer is None and shl_ins.arguments[1]._writer is None: 246 | shl= (shl_ins.arguments[1].concrete_value << shl_ins.arguments[0].concrete_value) 247 | sub = shl- sub_ins.arguments[1].concrete_value 248 | bit_mask = '%x' % (src.util.utils.TT256M1 - sub) 249 | start_byte= (len(bit_mask)-bit_mask.rfind('0'))//2+1 250 | else: 251 | start_byte='whole' #overapproximate the whole slot 252 | 253 | if not start_byte: 254 | print(sstore_ins) 255 | print(masking_pattern) 256 | return start_byte 257 | 258 | def resolve_access_control_slots(self, ssa, instructions, ac_check_ins, args=None, memory_info=None, restricted=True): 259 | slices = [] 260 | other_ac_checks = [] 261 | # only check instructions that have a chance to reach root 262 | instructions = [ins for ins in instructions if 0 in ins.bb.ancestors | {ins.bb.start}] 263 | if not instructions: 264 | return 265 | imap = {ins.addr: ins for ins in instructions} 266 | access_sloads = defaultdict(list) 267 | if args: 268 | for jump_ins in instructions: 269 | for bs in interesting_slices(jump_ins, args, reachable=True, restricted=False): 270 | if('%x' %jump_ins.addr) == '2f9': 271 | print(jump_ins) 272 | print(bs) 273 | cur_jump_sloads= [v['sload'] for k in access_sloads if k==jump_ins for v in access_sloads[k]] 274 | if len(cur_jump_sloads)!=0 and any(ins in cur_jump_sloads for ins in bs if ins.name in frozenset(['SLOAD'])): 275 | slices.append(bs+(jump_ins,)) 276 | elif len(set(ac_check_ins)&set([ins.name for ins in bs]))==len(ac_check_ins) and not any(ins.name in frozenset(['CALL']) for ins in bs): 277 | slices.append(bs+(jump_ins,)) 278 | sload= [i for i in bs if i.name in frozenset(['SLOAD'])] 279 | slot_byte= self.resolve_slot_offset(ssa, bs+(jump_ins,), sload=True, sload_ins=sload[0]) 280 | struct_offset= self.reolve_struct_offset(ssa, bs+(jump_ins,), sload=True, sload_ins=sload[0] ) 281 | access_sloads[jump_ins].append({'sload':sload[0],'sbyte':slot_byte,'structOffset':struct_offset}) 282 | elif any(ins.name in frozenset(['SLOAD']) for ins in bs) and (any(ins.arg==b'\xff' for ins in bs if ins.name in frozenset(['PUSH1'])) or \ 283 | any(ins.name in frozenset(['CALLDATALOAD','CALLDATACOPY']) for bb in jump_ins.bb.pred for ins in bb.ins)) and not any(ins.name in frozenset(['CALL']) for ins in bs): 284 | if any(ins.arg==b'\xff' for ins in bs if ins.name in frozenset(['PUSH1'])): 285 | sload_ins=[ins for ins in bs if ins.name in frozenset(['SLOAD']) if any( 286 | ss.arg==b'\xff' and ins.addr < ss.addr 0: 332 | soffset= self.reolve_struct_offset(ssa, s+(ins,),sload=True, sload_ins=sload_ins[0]) 333 | struct_offset[ins]=soffset 334 | 335 | checked_ins=[] 336 | c=0 337 | start_time=time.time() 338 | for path in exp.find(slices, avoid=[]): 339 | logging.debug('Path %s', ' -> '.join('%x' % p for p in path)) 340 | c+=1 341 | try: 342 | ins = imap[path[-1]] 343 | if sinks: 344 | result = run_static(self.prg, ssa, path, sinks, self.code, inclusive,defect_type=defect_type, storage_slots=storage_slots, storage_sha3_bases=storage_sha3_bases) 345 | if result._tainted and ins.name in set(['SSTORE']): 346 | sstore_slices = [s+(ins,) for s in interesting_slices(ins, [0], memory_info, reachable=True, taintedBy=None, restricted=False)] 347 | soffset= self.reolve_struct_offset(ssa, sstore_slices[0],sstore=True, sstore_ins=ins) 348 | struct_offset[ins]=soffset 349 | yield ins,slot_sbyte,struct_offset, path, result 350 | else: 351 | yield ins, slot_sbyte, struct_offset, path, None 352 | except IntractablePath as e: 353 | bad_path = [i for i in e.trace if i in self.cfg._bb_at] + [e.remainingpath[0]] 354 | dd = self.cfg.data_dependence(self.cfg._ins_at[e.trace[-1]]) 355 | if not any(i.name in ('MLOAD', 'SLOAD') for i in dd): 356 | ddbbs = set(i.bb.start for i in dd) 357 | bad_path_start = next((j for j, i in enumerate(bad_path) if i in ddbbs), 0) 358 | bad_path = bad_path[bad_path_start:] 359 | logging.info("Bad path: %s" % (', '.join('%x' % i for i in bad_path))) 360 | exp.add_to_blacklist(bad_path) 361 | continue 362 | except ExternalData: 363 | continue 364 | except TimeoutException: 365 | raise TimeoutException("Timed out!") 366 | except Exception as e: 367 | logging.exception('Failed path due to %s', e) 368 | continue 369 | 370 | -------------------------------------------------------------------------------- /src/slicing.py: -------------------------------------------------------------------------------- 1 | from src.cfg.instruction import Instruction 2 | from src.cfg.opcodes import potentially_user_controlled 3 | from src.explorer.backward import traverse_back 4 | from src.util.intrange import Range 5 | 6 | 7 | def slice_to_program(s): 8 | pc = 0 9 | program = {} 10 | for ins in s: 11 | program[pc] = ins 12 | pc += ins.next_addr - ins.addr 13 | return program 14 | 15 | 16 | def adjust_stack(backward_slice, stack_delta): 17 | if stack_delta > 0: 18 | backward_slice.extend(Instruction(0x0, 0x63, b'\xde\xad\xc0\xde') for _ in range(abs(stack_delta))) 19 | elif stack_delta < 0: 20 | backward_slice.extend(Instruction(0x0, 0x50) for _ in range(abs(stack_delta))) 21 | 22 | 23 | class SlicingState(object): 24 | def __init__(self, stacksize, stack_underflow, stack_delta, taintmap, memory_taint, backward_slice, instructions): 25 | self.stacksize = stacksize 26 | self.stack_underflow = stack_underflow 27 | self.stack_delta = stack_delta 28 | self.taintmap = frozenset(taintmap) 29 | self.memory_taint = memory_taint 30 | # The actual slice doesn't matter that much. What matters more is the resulting EXPRESSION of the return-address 31 | self.backward_slice = tuple(backward_slice) 32 | self.instructions = tuple(instructions) 33 | 34 | def __hash__(self): 35 | return sum( 36 | a * b for a, b in zip((23, 29, 31, 37, 41), ( 37 | self.stacksize, self.stack_delta, hash(self.taintmap), hash(self.instructions), 38 | hash(self.backward_slice)))) 39 | 40 | def __eq__(self, other): 41 | return ( 42 | self.stacksize == other.stacksize and 43 | self.stack_delta == other.stack_delta and 44 | self.taintmap == other.taintmap and 45 | self.memory_taint == other.memory_taint and 46 | self.backward_slice == other.backward_slice and 47 | self.instructions == other.instructions) 48 | 49 | def __str__(self): 50 | return 'Stacksize: %d, Underflow: %d, Delta: %d, Map: %s, Slice: %s, Instructions: %s' % ( 51 | self.stacksize, self.stack_underflow, self.stack_delta, self.taintmap, 52 | ','.join('%x' % i.addr for i in self.backward_slice), 53 | ','.join('%x' % i.addr for i in self.instructions)) 54 | 55 | 56 | def advance_slice(slicing_state, memory_info): 57 | stacksize = slicing_state.stacksize 58 | stack_underflow = slicing_state.stack_underflow 59 | stack_delta = slicing_state.stack_delta 60 | taintmap = set(slicing_state.taintmap) 61 | memory_taint = slicing_state.memory_taint 62 | backward_slice = list(slicing_state.backward_slice) 63 | instructions = slicing_state.instructions 64 | 65 | for ins in instructions[::-1]: 66 | slice_candidate = False 67 | if taintmap and stacksize - ins.outs <= max(taintmap): 68 | slice_candidate = True 69 | if memory_info and ins in memory_info and memory_info[ins].writes & memory_taint: 70 | slice_candidate = True 71 | if slice_candidate: 72 | add_to_slice = False 73 | if 0x80 <= ins.op <= 0x8f: # Special handling for DUPa 74 | if stacksize - 1 in taintmap: 75 | add_to_slice = True 76 | in_idx = ins.op - 0x7f 77 | taintmap.remove(stacksize - 1) 78 | taintmap.add((stacksize - 1) - in_idx) 79 | elif 0x90 <= ins.op <= 0x9f: # Special handling for SWAP 80 | in_idx = ins.op - 0x8f 81 | if stacksize - 1 in taintmap or (stacksize - 1) - in_idx in taintmap: 82 | add_to_slice = True 83 | if stacksize - 1 in taintmap and (stacksize - 1) - in_idx in taintmap: 84 | # both tainted => taint does not change 85 | pass 86 | elif stacksize - 1 in taintmap: 87 | taintmap.remove(stacksize - 1) 88 | taintmap.add((stacksize - 1) - in_idx) 89 | elif (stacksize - 1) - in_idx in taintmap: 90 | taintmap.remove((stacksize - 1) - in_idx) 91 | taintmap.add(stacksize - 1) 92 | else: # assume entire stack is affected otherwise 93 | add_to_slice = True 94 | taintmap -= set(range(stacksize - ins.outs, stacksize)) 95 | taintmap |= set(range(stacksize - ins.outs, stacksize - ins.delta)) 96 | 97 | if add_to_slice: 98 | adjust_stack(backward_slice, stack_delta) 99 | stack_delta = -ins.delta 100 | backward_slice.append(ins) 101 | stack_underflow = min(stack_underflow, stacksize - ins.outs) 102 | if memory_info and ins in memory_info: 103 | ins_info = memory_info[ins] 104 | memory_taint = memory_taint - ins_info.writes + ins_info.reads 105 | 106 | stacksize -= ins.delta 107 | # no taint left? then our job here is done 108 | if not taintmap and not memory_taint: 109 | stack_adjust = stacksize - stack_underflow 110 | if stack_adjust > 0: 111 | adjust_stack(backward_slice, stack_adjust) 112 | return SlicingState(stacksize, stack_underflow, stack_delta, set(taintmap), memory_taint, 113 | list(backward_slice), 114 | []) 115 | 116 | stack_delta += ins.delta 117 | 118 | # still taint left? trace further if gas is still sufficient 119 | return SlicingState(stacksize, stack_underflow, stack_delta, set(taintmap), memory_taint, list(backward_slice), 120 | []) 121 | 122 | 123 | def backward_slice(ins, taint_args=None, memory_info=None, initial_gas=10, must_visits=[], reachable=False): 124 | # logging.debug('backward_slice called') 125 | if ins.ins == 0: 126 | return [] 127 | if taint_args: 128 | taintmap = set((ins.ins - 1) - i for i in taint_args) 129 | else: 130 | taintmap = set(range(ins.ins)) 131 | if memory_info and ins in memory_info: 132 | memory_taint = memory_info[ins].reads 133 | else: 134 | memory_taint = Range() 135 | 136 | def initial_data(ins): 137 | stacksize = ins.ins 138 | slice = [] 139 | stack_underflow = 0 140 | stack_delta = 0 141 | idx = ins.bb.ins.index(ins) 142 | return SlicingState(stacksize, stack_underflow, stack_delta, taintmap, memory_taint, slice, 143 | ins.bb.ins[:idx]) 144 | 145 | def advance_data(slicing_state): 146 | return advance_slice(slicing_state, memory_info) 147 | 148 | def update_data(slicing_state, new_bb): 149 | return SlicingState(slicing_state.stacksize, slicing_state.stack_underflow, slicing_state.stack_delta, 150 | set(slicing_state.taintmap), slicing_state.memory_taint, list(slicing_state.backward_slice), 151 | new_bb.ins) 152 | 153 | def finish_path(slicing_state): 154 | return not slicing_state.taintmap and not slicing_state.memory_taint 155 | 156 | # logging.debug('Before loop') 157 | slices = [r.backward_slice[::-1] for r in 158 | traverse_back([ins], initial_gas, initial_data, advance_data, update_data, finish_path, must_visits)] 159 | if not reachable: 160 | return slices 161 | else: 162 | filtered_slices = [] 163 | for slice in slices: 164 | first_bb = next(i.bb for i in slice if i.bb) 165 | if 0 in first_bb.ancestors | {first_bb.start}: 166 | filtered_slices.append(slice) 167 | return filtered_slices 168 | 169 | 170 | def interesting_slices(instruction, args=None, memory_info=None, reachable=False, taintedBy=potentially_user_controlled, restricted=True): 171 | if restricted: 172 | return [bs for bs in backward_slice(instruction, args, memory_info, reachable=reachable) if any( 173 | ins.name in taintedBy for ins in bs)] 174 | else: 175 | return [bs for bs in backward_slice(instruction, args, memory_info,reachable=reachable)] 176 | 177 | def change_slices(instruction, args=None, reachable=False): 178 | return [bs for bs in backward_slice(instruction, args, reachable=reachable)] 179 | -------------------------------------------------------------------------------- /src/storage.py: -------------------------------------------------------------------------------- 1 | from collections import deque 2 | 3 | from src.cfg.opcodes import storage_reads, storage_writes 4 | import src.util.utils 5 | from src.evm.exceptions import TimeoutException 6 | 7 | 8 | class InconsistentSlot(Exception): 9 | pass 10 | 11 | class UninitializedRead(Exception): 12 | def __init__(self, index, *args): 13 | super(UninitializedRead, self).__init__(*args) 14 | if isinstance(index, slice): 15 | self.start = index.start or 0 16 | self.end = index.stop 17 | else: 18 | self.start = index 19 | self.end = index + 1 20 | 21 | def __repr__(self): 22 | return '%s from: %d to %d' % (super(UninitializedRead, self).__repr__(), self.start, self.end) 23 | 24 | def __str__(self): 25 | return '%s from: %d to %d' % (super(UninitializedRead, self).__repr__(), self.start, self.end) 26 | 27 | 28 | class StorageInfo(object): 29 | def __init__(self, reads, writes, read_sha3_bases, write_sha3_bases): 30 | self.reads = reads 31 | self.writes = writes 32 | self.read_sha3_bases= read_sha3_bases 33 | self.write_sha3_bases= write_sha3_bases 34 | 35 | def get_storage_info(ins, code, memory_info=None): 36 | from .slicing import backward_slice, slice_to_program 37 | from .evm.evm import run 38 | from .evm.state import EVMState 39 | from .evm.exceptions import ExternalData 40 | from .util.intrange import Range 41 | targets = [] 42 | 43 | read = False 44 | write = False 45 | 46 | if ins.name in storage_reads: 47 | read = True 48 | read_slot_info = storage_reads[ins.name] 49 | if read_slot_info < 0: 50 | targets.append(-1 - read_slot_info) 51 | 52 | if ins.name in storage_writes: 53 | write = True 54 | write_slot_info = storage_writes[ins.name] 55 | if write_slot_info < 0: 56 | targets.append(-1 - write_slot_info) 57 | 58 | if not read and not write: 59 | return None 60 | bs = backward_slice(ins, targets, memory_info) 61 | read_slot = set() 62 | read_slot_sha3_base= dict() 63 | write_slot = set() 64 | write_slot_sha3_base= dict() 65 | for b in bs: 66 | try: 67 | state = run(slice_to_program(b), EVMState(code=code), check_initialized=False) 68 | except UninitializedRead as e: 69 | raise e 70 | except ExternalData as e: 71 | raise e 72 | if read: 73 | new_slot = state.stack[read_slot_info] if read_slot_info < 0 else read_slot_info 74 | if new_slot not in read_slot: 75 | read_slot.add(new_slot) 76 | sha3_ins=[ins for ins in b if ins.name=='SHA3'] 77 | mstore_ins=[ins for ins in b if ins.name=='MSTORE'] 78 | if len(sha3_ins)==1 and len(mstore_ins)==1: 79 | read_slot_sha3_base[new_slot]=src.util.utils.bytearray_to_int(state.memory[0:32]) 80 | elif len(sha3_ins)>=1 and len(mstore_ins)>=2: 81 | read_slot_sha3_base[new_slot]=src.util.utils.bytearray_to_int(state.memory[32:64]) 82 | if write: 83 | new_slot = state.stack[write_slot_info] if write_slot_info < 0 else write_slot_info 84 | if new_slot not in write_slot: 85 | write_slot.add(new_slot) 86 | sha3_ins=[ins for ins in b if ins.name=='SHA3'] 87 | mstore_ins=[ins for ins in b if ins.name=='MSTORE'] 88 | if len(sha3_ins)==1 and len(mstore_ins)==1: 89 | write_slot_sha3_base[new_slot]=src.util.utils.bytearray_to_int(state.memory[0:32]) 90 | elif len(sha3_ins)>=1 and len(mstore_ins)>=2: 91 | write_slot_sha3_base[new_slot]=src.util.utils.bytearray_to_int(state.memory[32:64]) 92 | return StorageInfo(read_slot, write_slot,read_slot_sha3_base,write_slot_sha3_base) 93 | 94 | 95 | def resolve_all_storage(cfg, code, memory_info=None): 96 | storage_infos = dict() 97 | resolve_later = deque( 98 | ins for bb in cfg.bbs for ins in bb.ins if ins.name in storage_reads or ins.name in storage_writes) 99 | todo = deque() 100 | progress = True 101 | while todo or (progress and resolve_later): 102 | if not todo: 103 | todo = resolve_later 104 | resolve_later = deque() 105 | progress = False 106 | ins = todo.popleft() 107 | try: 108 | mi = get_storage_info(ins, code, memory_info) 109 | if mi: 110 | progress = True 111 | storage_infos[ins] = mi 112 | except TimeoutException: 113 | raise TimeoutException("Timed out!") 114 | except Exception as e: 115 | resolve_later.append(ins) 116 | return storage_infos 117 | -------------------------------------------------------------------------------- /src/teEther_LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /src/util/__init__.py: -------------------------------------------------------------------------------- 1 | from . import frontierset 2 | from . import intrange 3 | from . import utils 4 | from . import z3_extra_util -------------------------------------------------------------------------------- /src/util/frontierset.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | 3 | 4 | class FrontierSet(object): 5 | """ 6 | A set that also maintains a partial topological ordering 7 | The current set of "non-blocked" items can be obtained as 8 | .frontier 9 | """ 10 | 11 | def __init__(self, data=None): 12 | self._inhibiting_set = defaultdict(set) 13 | self._blocking_set = defaultdict(set) 14 | self._edges = set() 15 | self._frontier = set() 16 | self._frozenedges = None 17 | self._frozenfrontier = None 18 | self._frozenall = None 19 | if data: 20 | for d in data: 21 | self.add(d) 22 | 23 | def _invalidate(self): 24 | self._frozenedges = None 25 | self._frozenfrontier = None 26 | self._frozenall = None 27 | 28 | @property 29 | def edges(self): 30 | if self._frozenedges is None: 31 | self._frozenedges = frozenset(self._edges) 32 | return self._frozenedges 33 | 34 | @property 35 | def frontier(self): 36 | if self._frozenfrontier is None: 37 | self._frozenfrontier = frozenset(self._frontier) 38 | return self._frozenfrontier 39 | 40 | @property 41 | def all(self): 42 | if self._frozenall is None: 43 | self._frozenall = frozenset(set(self._blocking_set.keys()) | set(self._inhibiting_set.keys()) | self._frontier) 44 | return self._frozenall 45 | 46 | def add(self, a, b=None): 47 | """ 48 | Add a to the set. 49 | If b is given, require that a is a necessary prerequisite for b 50 | :param a: 51 | :param b: 52 | :return: 53 | """ 54 | self._invalidate() 55 | if b: 56 | self._edges.add((a, b)) 57 | self._inhibiting_set[b].add(a) 58 | self._blocking_set[a].add(b) 59 | if not self._inhibiting_set[a]: 60 | self._frontier.add(a) 61 | self._frontier.discard(b) 62 | else: 63 | self._frontier.add(a) 64 | 65 | def remove(self, a): 66 | self._invalidate() 67 | for b in self._blocking_set[a]: 68 | self._edges.discard((b, a)) 69 | self._inhibiting_set[b].discard(a) 70 | if not self._inhibiting_set[b]: 71 | self._frontier.add(b) 72 | for c in self._inhibiting_set[a]: 73 | self._edges.discard((a, c)) 74 | self._blocking_set[c].discard(a) 75 | del self._blocking_set[a] 76 | del self._inhibiting_set[a] 77 | self._frontier.discard(a) 78 | 79 | def copy(self): 80 | new = FrontierSet() 81 | new._inhibiting_set = self._inhibiting_set.copy() 82 | new._blocking_set = self._blocking_set.copy() 83 | new._edges = self._edges.copy() 84 | new._frontier = self._frontier.copy() 85 | new._invalidate() 86 | return new 87 | 88 | def issubset(self, other): 89 | return self.all.issubset(other.all) and self.edges.issubset(other.edges) 90 | 91 | def __len__(self): 92 | return len(self.all) 93 | 94 | def __eq__(self, other): 95 | return self.edges == other.edges and self.all == other.all 96 | 97 | def __hash__(self): 98 | return 3 * hash(self.edges) + 7 * hash(self.all) 99 | 100 | def __iter__(self): 101 | return iter(self.all) 102 | 103 | def __repr__(self): 104 | return '{%s|%s}' % ( 105 | ','.join('%x' % i for i in self.frontier), ','.join('%x' % i for i in self.all - self.frontier)) 106 | -------------------------------------------------------------------------------- /src/util/intrange.py: -------------------------------------------------------------------------------- 1 | class Range(object): 2 | START = 0 3 | END = 1 4 | 5 | def __init__(self, start=None, end=None, points=None): 6 | if not start is None and not end is None and start < end: 7 | self.points = ((start, Range.START), (end, Range.END)) 8 | elif points: 9 | self.points = tuple(points) 10 | else: 11 | self.points = tuple() 12 | 13 | def __munch__(self, other, min_depth): 14 | depth = 0 15 | points = [] 16 | for i, t in sorted(self.points + other.points): 17 | if depth == min_depth - 1 and t == Range.START: 18 | if points and i == points[-1][0]: 19 | del points[-1] 20 | else: 21 | points.append((i, Range.START)) 22 | elif depth == min_depth and t == Range.END: 23 | if points and i == points[-1][0]: 24 | del points[-1] 25 | else: 26 | points.append((i, Range.END)) 27 | depth += 1 if t == Range.START else -1 28 | return Range(points=points) 29 | 30 | def __add__(self, other): 31 | return self.__munch__(other, 1) 32 | 33 | def __and__(self, other): 34 | return self.__munch__(other, 2) 35 | 36 | def __sub__(self, other): 37 | return self + Range(points=[(i, 1 - t) for i, t in other.points]) 38 | 39 | def __contains__(self, other): 40 | if not isinstance(other, Range): 41 | other = Range(other, other + 1) 42 | return not (other - self).points 43 | 44 | def __or__(self, other): 45 | return self + other 46 | 47 | def __xor__(self, other): 48 | return (self - other) + (other - self) 49 | 50 | def __eq__(self, other): 51 | return not self ^ other 52 | 53 | def __hash__(self): 54 | return hash(self.points) 55 | 56 | def __cmp__(self, other): 57 | for (a, _), (b, _) in zip(self.points, other.points): 58 | if a != b: 59 | return a - b 60 | else: 61 | l1, l2 = len(self), len(other) 62 | return l1 - l2 63 | 64 | def __len__(self): 65 | return sum(b - a for (a, _), (b, _) in zip(self.points[::2], self.points[1::2])) 66 | 67 | def __repr__(self): 68 | return 'Range(' + str(self) + ')' 69 | 70 | def __str__(self): 71 | return ','.join('[%d, %d)' % (a, b) for (a, _), (b, _) in zip(self.points[::2], self.points[1::2])) 72 | -------------------------------------------------------------------------------- /src/util/utils.py: -------------------------------------------------------------------------------- 1 | from sha3 import keccak_256 2 | 3 | 4 | def sha3(data): 5 | return keccak_256(data).digest() 6 | 7 | 8 | TT256 = 2 ** 256 9 | TT256M1 = 2 ** 256 - 1 10 | TT255 = 2 ** 255 11 | SECP256K1P = 2 ** 256 - 4294968273 12 | 13 | 14 | def big_endian_to_int(x): 15 | return int.from_bytes(x, byteorder='big') 16 | 17 | 18 | def int_to_big_endian(v): 19 | return v.to_bytes(length=(v.bit_length()+7)//8, byteorder='big') 20 | 21 | 22 | def to_string(value): 23 | return str(value) 24 | 25 | 26 | def bytearray_to_bytestr(value): 27 | return bytes(value) 28 | 29 | 30 | def encode_int32(v): 31 | return int_to_big_endian(v).rjust(32, b'\x00') 32 | 33 | 34 | def bytes_to_int(value): 35 | return big_endian_to_int(bytes(value)) 36 | 37 | 38 | def bytearray_to_int(value): 39 | return bytes_to_int(bytearray_to_bytestr(value)) 40 | 41 | 42 | def is_pow2(x): 43 | return x and not x & (x - 1) 44 | 45 | 46 | def log2(x): 47 | if not is_pow2(x): 48 | raise ValueError("%d is not a power of 2!" % x) 49 | i = -1 50 | while x: 51 | x >>= 1 52 | i += 1 53 | return i 54 | 55 | 56 | def to_signed(i): 57 | return i if i < TT255 else i - TT256 58 | 59 | 60 | 61 | class Denoms: 62 | def __init__(self): 63 | self.wei = 1 64 | self.babbage = 10 ** 3 65 | self.ada = 10 ** 3 66 | self.kwei = 10 ** 6 67 | self.lovelace = 10 ** 6 68 | self.mwei = 10 ** 6 69 | self.shannon = 10 ** 9 70 | self.gwei = 10 ** 9 71 | self.szabo = 10 ** 12 72 | self.finney = 10 ** 15 73 | self.mether = 10 ** 15 74 | self.ether = 10 ** 18 75 | self.turing = 2 ** 256 - 1 76 | 77 | 78 | denoms = Denoms() 79 | 80 | 81 | def unique(l): 82 | last = None 83 | for i in l: 84 | if i != last: 85 | yield i 86 | last = i 87 | 88 | 89 | def is_subseq(a, b): 90 | a = tuple(a) 91 | b = tuple(b) 92 | # True iff a is a subsequence (not substring!) of b 93 | p = 0 94 | for x in a: 95 | try: 96 | p = b.index(x, p) + 1 97 | except ValueError: 98 | return False 99 | return True 100 | 101 | 102 | def is_substr(a, b): 103 | a = tuple(a) 104 | b = tuple(b) 105 | # True iff a is a substring of b 106 | p = 0 107 | l = len(a) 108 | while True: 109 | try: 110 | p = b.index(a[0], p) 111 | if b[p:p + l] == a: 112 | return True 113 | p += 1 114 | except ValueError: 115 | break 116 | return False 117 | -------------------------------------------------------------------------------- /src/util/z3_extra_util.py: -------------------------------------------------------------------------------- 1 | import numbers 2 | 3 | import z3 4 | 5 | 6 | def to_bytes(v): 7 | return v.as_long().to_bytes(length=(v.size()+7)//8, byteorder='big') 8 | 9 | 10 | def get_vars(f, rs=set()): 11 | """ 12 | shameless copy of z3util.get_vars, 13 | but returning select-operations as well. 14 | E.g. 15 | >>> x = z3.Array('x', z3.IntSort(), z3.IntSort()) 16 | >>> get_vars(x[5]) 17 | [x[5]] 18 | whereas 19 | >>> x = z3.Array('x', z3.IntSort(), z3.IntSort()) 20 | >>> z3util.get_vars(x[5]) 21 | [x] 22 | """ 23 | if not rs: 24 | f = z3.simplify(f) 25 | 26 | if f.decl().kind() == z3.Z3_OP_SELECT: 27 | arr, idx = f.children() 28 | if z3.is_const(arr): 29 | if z3.z3util.is_expr_val(idx): 30 | return rs | {f} 31 | else: 32 | return rs | {f, idx} 33 | if z3.is_const(f): 34 | if z3.z3util.is_expr_val(f): 35 | return rs 36 | else: # variable 37 | return rs | {f} 38 | 39 | else: 40 | for f_ in f.children(): 41 | rs = get_vars(f_, rs) 42 | 43 | return set(rs) 44 | 45 | 46 | def get_vars_non_recursive(f, include_select=False, include_indices=True): 47 | todo = [f] 48 | rs = set() 49 | seen = set() 50 | while todo: 51 | expr = todo.pop() 52 | if expr.get_id() in seen: 53 | continue 54 | seen.add(expr.get_id()) 55 | if include_select and expr.decl().kind() == z3.Z3_OP_SELECT: 56 | arr, idx = expr.children() 57 | if z3.is_const(arr): 58 | if not include_indices or z3.z3util.is_expr_val(idx): 59 | rs.add(expr) 60 | else: 61 | rs.add(expr) 62 | todo.append(idx) 63 | else: 64 | todo.extend(expr.children()) 65 | elif z3.is_const(expr): 66 | if not z3.z3util.is_expr_val(expr): 67 | rs.add(expr) 68 | else: 69 | todo.extend(expr.children()) 70 | 71 | return rs 72 | 73 | 74 | def concrete(v): 75 | return isinstance(v, numbers.Number) 76 | 77 | 78 | def is_false(cond): 79 | s = z3.SolverFor("QF_ABV") 80 | s.add(cond) 81 | return s.check() == z3.unsat 82 | 83 | 84 | def is_true(cond): 85 | # NOTE: This differs from `not is_false(cond)`, which corresponds to "may be true" 86 | return is_false(z3.Not(cond)) 87 | 88 | 89 | def simplify_non_const_hashes(expr, sha_ids): 90 | while True: 91 | expr = z3.simplify(expr, expand_select_store=True) 92 | sha_subst = get_sha_subst_non_recursive(expr, sha_ids) 93 | if not sha_subst: 94 | break 95 | expr = z3.substitute(expr, [(s, z3.BoolVal(False)) for s in sha_subst]) 96 | return expr 97 | 98 | 99 | def is_simple_expr(expr): 100 | """ 101 | True if expr does not contain an If, Store, or Select statement 102 | :param expr: the expression to check 103 | :return: True, iff expr does not contain If, Store, or Select 104 | """ 105 | 106 | if expr.decl().kind() in {z3.Z3_OP_ITE, z3.Z3_OP_SELECT, z3.Z3_OP_STORE}: 107 | return False 108 | else: 109 | return all(is_simple_expr(c) for c in expr.children()) 110 | 111 | 112 | def ast_eq(e1, e2, simplified=False): 113 | if not simplified: 114 | e1 = z3.simplify(e1) 115 | e2 = z3.simplify(e2) 116 | if e1.sort() != e2.sort(): 117 | return False 118 | if e1.decl().kind() != e2.decl().kind(): 119 | return False 120 | if z3.z3util.is_expr_val(e1) and z3.z3util.is_expr_val(e2): 121 | return e1.as_long() == e2.as_long() 122 | return all(ast_eq(c1, c2, True) for c1, c2 in zip(e1.children(), e2.children())) 123 | 124 | 125 | def get_sha_subst_non_recursive(f, sha_ids): 126 | import timeit 127 | start = timeit.default_timer() 128 | todo = [z3.simplify(f, expand_select_store=True)] 129 | rs = set() 130 | seen = set() 131 | subexprcount = 0 132 | while todo: 133 | expr = todo.pop() 134 | subexprcount += 1 135 | if expr.get_id() in seen: 136 | continue 137 | seen.add(expr.get_id()) 138 | if expr.decl().kind() == z3.Z3_OP_EQ and all(is_simple_expr(c) for c in expr.children()): 139 | l, r = expr.children() 140 | lvars, rvars = [{v.get_id() for v in get_vars_non_recursive(e, True)} for e in (l, r)] 141 | 142 | sha_left = bool(lvars & sha_ids) 143 | sha_right = bool(rvars & sha_ids) 144 | 145 | if sha_left and sha_right: 146 | # both sides use a sha-expression 147 | # => can be equal only if ASTs are equal 148 | if not ast_eq(l, r): 149 | rs.add(expr) 150 | 151 | elif sha_left ^ sha_right: 152 | # only one side uses a sha-expression 153 | # => assume not equal (e.g. SHA == 5 seems unlikely) 154 | rs.add(expr) 155 | 156 | else: 157 | todo.extend(expr.children()) 158 | 159 | end = timeit.default_timer() 160 | # logging.info("get_sha_subst_non_recursive took %d microseconds (%d subexpressions)", (end-start)*1000000.0, subexprcount) 161 | return rs 162 | --------------------------------------------------------------------------------