├── CVE-2021-34273.code
├── LICENSE
├── README.md
├── bin
    └── achecker.py
├── requirements.txt
├── setup.py
└── src
    ├── __init__.py
    ├── cfg
        ├── __init__.py
        ├── bb.py
        ├── cfg.py
        ├── disassembly.py
        ├── instruction.py
        ├── opcodes.py
        └── rattle
        │   ├── LICENSE
        │   ├── __init__.py
        │   ├── analyze.py
        │   ├── evmasm.py
        │   ├── hashes.py
        │   ├── recover.py
        │   └── ssa.py
    ├── constraints.py
    ├── evm
        ├── __init__.py
        ├── evm.py
        ├── exceptions.py
        ├── results.py
        └── state.py
    ├── exploit.py
    ├── explorer
        ├── __init__.py
        ├── backward.py
        └── forward.py
    ├── flow
        ├── FSignatures.txt
        ├── __init__.py
        ├── analysis_results.py
        ├── code_info.py
        ├── symbolic.py
        └── tainting.py
    ├── memory.py
    ├── project.py
    ├── slicing.py
    ├── storage.py
    ├── teEther_LICENSE
    └── util
        ├── __init__.py
        ├── frontierset.py
        ├── intrange.py
        ├── utils.py
        └── z3_extra_util.py


/CVE-2021-34273.code:
--------------------------------------------------------------------------------
1 | 606060405236156100c3576000357c0100000000000000000000000000000000000000000000000000000000900463ffffffff16806306fdde03146100d3578063095ea7b31461016157806318160ddd146101bb57806323b872dd146101e4578063313ce5671461025d57806370a082311461028c5780638da5cb5b146102d957806395d89b411461032e578063a9059cbb146103bc578063a9c7648f14610416578063dd62ed3e14610479578063df32754b146104e5578063f2fde38b146104fa575b34156100ce57600080fd5b600080fd5b34156100de57600080fd5b6100e6610533565b6040518080602001828103825283818151815260200191508051906020019080838360005b8381101561012657808201518184015260208101905061010b565b50505050905090810190601f1680156101535780820380516001836020036101000a031916815260200191505b509250505060405180910390f35b341561016c57600080fd5b6101a1600480803573ffffffffffffffffffffffffffffffffffffffff169060200190919080359060200190919050506105d1565b604051808215151515815260200191505060405180910390f35b34156101c657600080fd5b6101ce6106c3565b6040518082815260200191505060405180910390f35b34156101ef57600080fd5b610243600480803573ffffffffffffffffffffffffffffffffffffffff1690602001909190803573ffffffffffffffffffffffffffffffffffffffff169060200190919080359060200190919050506106c9565b604051808215151515815260200191505060405180910390f35b341561026857600080fd5b610270610945565b604051808260ff1660ff16815260200191505060405180910390f35b341561029757600080fd5b6102c3600480803573ffffffffffffffffffffffffffffffffffffffff16906020019091905050610958565b6040518082815260200191505060405180910390f35b34156102e457600080fd5b6102ec6109a1565b604051808273ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff16815260200191505060405180910390f35b341561033957600080fd5b6103416109c6565b6040518080602001828103825283818151815260200191508051906020019080838360005b83811015610381578082015181840152602081019050610366565b50505050905090810190601f1680156103ae5780820380516001836020036101000a031916815260200191505b509250505060405180910390f35b34156103c757600080fd5b6103fc600480803573ffffffffffffffffffffffffffffffffffffffff16906020019091908035906020019091905050610a64565b604051808215151515815260200191505060405180910390f35b341561042157600080fd5b610477600480803590602001908201803590602001908080602002602001604051908101604052809392919081815260200183836020028082843782019150505050505091908035906020019091905050610bcd565b005b341561048457600080fd5b6104cf600480803573ffffffffffffffffffffffffffffffffffffffff1690602001909190803573ffffffffffffffffffffffffffffffffffffffff16906020019091905050610d1b565b6040518082815260200191505060405180910390f35b34156104f057600080fd5b6104f8610da2565b005b341561050557600080fd5b610531600480803573ffffffffffffffffffffffffffffffffffffffff16906020019091905050610de4565b005b60048054600181600116156101000203166002900480601f0160208091040260200160405190810160405280929190818152602001828054600181600116156101000203166002900480156105c95780601f1061059e576101008083540402835291602001916105c9565b820191906000526020600020905b8154815290600101906020018083116105ac57829003601f168201915b505050505081565b600081600260003373ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff16815260200190815260200160002060008573ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff168152602001908152602001600020819055508273ffffffffffffffffffffffffffffffffffffffff163373ffffffffffffffffffffffffffffffffffffffff167f8c5be1e5ebec7d5bd14f71427d1e84f3dd0314c0f7b2291e5b200ac8c7c3b925846040518082815260200191505060405180910390a36001905092915050565b60035481565b600081600160008673ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff1681526020019081526020016000205410158015610796575081600260008673ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff16815260200190815260200160002060003373ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff1681526020019081526020016000205410155b80156107a25750600082115b156109395781600160008573ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff1681526020019081526020016000206000828254019250508190555081600160008673ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff1681526020019081526020016000206000828254039250508190555081600260008673ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff16815260200190815260200160002060003373ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff168152602001908152602001600020600082825403925050819055508273ffffffffffffffffffffffffffffffffffffffff168473ffffffffffffffffffffffffffffffffffffffff167fddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef846040518082815260200191505060405180910390a36001905061093e565b600090505b9392505050565b600560009054906101000a900460ff1681565b6000600160008373ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff168152602001908152602001600020549050919050565b6000809054906101000a900473ffffffffffffffffffffffffffffffffffffffff1681565b60068054600181600116156101000203166002900480601f016020809104026020016040519081016040528092919081815260200182805460018160011615610100020316600290048015610a5c5780601f10610a3157610100808354040283529160200191610a5c565b820191906000526020600020905b815481529060010190602001808311610a3f57829003601f168201915b505050505081565b600081600160003373ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff1681526020019081526020016000205410158015610ab55750600082115b15610bc25781600160003373ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff1681526020019081526020016000206000828254039250508190555081600160008573ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff168152602001908152602001600020600082825401925050819055508273ffffffffffffffffffffffffffffffffffffffff163373ffffffffffffffffffffffffffffffffffffffff167fddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef846040518082815260200191505060405180910390a360019050610bc7565b600090505b92915050565b60008090505b8251811015610d165781600160003373ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff1681526020019081526020016000206000828254039250508190555081600160008584815181101515610c3c57fe5b9060200190602002015173ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff168152602001908152602001600020600082825401925050819055508281815181101515610c9b57fe5b9060200190602002015173ffffffffffffffffffffffffffffffffffffffff163373ffffffffffffffffffffffffffffffffffffffff167fddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef846040518082815260200191505060405180910390a38080600101915050610bd3565b505050565b6000600260008473ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff16815260200190815260200160002060008373ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff16815260200190815260200160002054905092915050565b336000806101000a81548173ffffffffffffffffffffffffffffffffffffffff021916908373ffffffffffffffffffffffffffffffffffffffff160217905550565b6000809054906101000a900473ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff163373ffffffffffffffffffffffffffffffffffffffff16141515610e3f57600080fd5b806000806101000a81548173ffffffffffffffffffffffffffffffffffffffff021916908373ffffffffffffffffffffffffffffffffffffffff160217905550505600a165627a7a723058200a2bf4fa374a52ee391d2be9ef116c0929697a1a3ee37acebf0cc5d85c6597ff0029
2 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Univ of British Columbia (UBC)
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # AChecker
 2 | AChecker (Access Control Checker) is an automated static analysis tool for detecting access control vulnerabilities in Ethereum smart contracts.
 3 | 
 4 | For more details about AChecker, please reference our paper published in ICSE 2023 [AChecker: Statically Detecting Smart Contract
 5 | Access Control Vulnerabilities](https://blogs.ubc.ca/dependablesystemslab/2022/12/08/achecker-statically-detecting-smart-contract-access-control-vulnerabilities)
 6 | 
 7 | 
 8 | If you use AChecker, please cite this paper.
 9 | 
10 |  ```
11 | @inproceedings{ghaleb2023achecker,
12 |   title={AChecker: Statically Detecting Smart Contract Access Control Vulnerabilities},
13 |   author={Ghaleb, Asem and Rubin, Julia and Pattabiraman, Karthik},
14 |   booktitle={Proceedings of the 45th IEEE/ACM International Conference on Software Engineering},
15 |   year={2023}
16 | }
17 |   ```
18 | 
19 | ## Getting Started
20 | **Note:** We tested all scripts provided in this package on an Ubuntu 20.04 LTS machine.
21 | 
22 | ### Requirements
23 | * Python 3.8+
24 | 
25 | ### Building AChecker 
26 | 
27 | To build the tool manually, we provide a `requirements.txt` file and the script `setup.py` to simply install the dependencies AChecker requires and build everything as follows.
28 | 
29 | Run the following command. Please make sure you are using Python 3.8 or higher.
30 |   
31 | ```
32 | cd AChecker
33 | python -m pip install -r requirements.txt
34 | ```
35 |  
36 |  ### Analyzing a smart contract
37 | Use the following command to run AChecker on a contract bytecode.
38 |  ```
39 | python bin/achecker.py -f [path_of_the_contract_bytecode_file] -b
40 | ```      
41 | As an example, the following command will run AChecker to analyze the contract with CVE ID 'CVE-2021-34273' in the file named '*CVE-2021-34273.code*'
42 | ```
43 | python bin/achecker.py -f CVE-2021-34273.code -b -m 8
44 | ```
45 | 
46 | The option -m enables setting the allocated memory for the analysis (in gigabytes). In this example, the allocated memory limit is set to 8 GB. The default value is 6 GB when the option -m is not used.
47 | 
48 | ## Contact
49 | For questions about our paper or this code, please get in touch with Asem Ghaleb (aghaleb@alumni.ubc.ca)
50 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | pysha3>=1.0.2
 2 | z3-solver>=4.8.5.0
 3 | ijson
 4 | requests
 5 | lxml
 6 | bs4
 7 | Cython
 8 | configparser
 9 | pyevmasm
10 | cbor2
11 | networkx
12 | solc-select
13 | pandas
14 | tabulate
15 | -e .
16 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | setup(
 4 |     name='AChecker',
 5 |     version='0.1.0',
 6 |     packages=find_packages(),
 7 |     install_requires=[],
 8 |     scripts=[       
 9 |         'bin/achecker.py'
10 |     ],      
11 |     python_requires='>=3.8',    
12 |     
13 | )
14 | 


--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
 1 | from . import cfg
 2 | from . import constraints
 3 | from . import evm
 4 | from . import exploit
 5 | from . import explorer
 6 | from . import flow
 7 | from . import memory
 8 | from . import project
 9 | from . import slicing
10 | from . import storage
11 | from . import util
12 | 


--------------------------------------------------------------------------------
/src/cfg/__init__.py:
--------------------------------------------------------------------------------
1 | from . import bb
2 | from . import cfg
3 | from . import disassembly
4 | from . import instruction
5 | from . import opcodes
6 | #from . import evm_cfg
7 | #from . import tac_cfg
8 | 


--------------------------------------------------------------------------------
/src/cfg/bb.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from collections import defaultdict, deque
  3 | 
  4 | from src.util.utils import unique
  5 | 
  6 | 
  7 | class BB(object):
  8 |     def __init__(self, ins):
  9 |         self.ins = ins
 10 |         self.streads = set()  # indices of stack-items that will be read by this BB (0 is the topmost item on stack)
 11 |         self.stwrites = set()  # indices of stack-items that will be written by this BB (0 is the topmost item on stack)
 12 |         self.stdelta = 0
 13 |         for i in ins:
 14 |             i.bb = self
 15 |             if 0x80 <= i.op <= 0x8f:  # Special handling for DUP
 16 |                 ridx = i.op - 0x80 - self.stdelta
 17 |                 widx = -1 - self.stdelta
 18 |                 if ridx not in self.stwrites:
 19 |                     self.streads.add(ridx)
 20 |                 self.stwrites.add(widx)
 21 |             elif 0x90 <= i.op <= 0x9f:  # Special handling for SWAP
 22 |                 idx1 = i.op - 0x8f - self.stdelta
 23 |                 idx2 = - self.stdelta
 24 |                 if idx1 not in self.stwrites:
 25 |                     self.streads.add(idx1)
 26 |                 if idx2 not in self.stwrites:
 27 |                     self.streads.add(idx2)
 28 |                 self.stwrites.add(idx1)
 29 |                 self.stwrites.add(idx2)
 30 |             else:  # assume entire stack is affected otherwise
 31 |                 for j in range(i.ins):
 32 |                     idx = j - self.stdelta
 33 |                     if idx not in self.stwrites:
 34 |                         self.streads.add(idx)
 35 |                 for j in range(i.outs):
 36 |                     idx = i.ins - 1 - j - self.stdelta
 37 |                     self.stwrites.add(idx)
 38 |             self.stdelta += i.delta
 39 |         self.streads = {x for x in self.streads if x >= 0}
 40 |         self.stwrites = {x for x in self.stwrites if x >= 0}
 41 |         self.start = self.ins[0].addr
 42 |         self.pred = set()
 43 |         self.succ = set()
 44 |         self.succ_addrs = set()
 45 |         self.pred_paths = defaultdict(set)
 46 |         self.branch = self.ins[-1].op == 0x57
 47 |         self.indirect_jump = self.ins[-1].op in (0x56, 0x57)
 48 |         self.ancestors = set()
 49 |         self.descendants = set()
 50 |         # maintain a set of 'must_visit' constraints to limit
 51 |         # backward-slices to only new slices after new edges are added
 52 |         # initially, no constraint is given (= empty set)
 53 |         self.must_visit = [set()]
 54 |         # also maintain an estimate of how fast we can get from here
 55 |         # to the root of the cfg
 56 |         # how fast meaning, how many JUMPI-branches we have to take
 57 |         self.estimate_constraints = (1 if self.branch else 0) if self.start == 0 else None
 58 |         # and another estimate fo many backwards branches
 59 |         # we will encounter to the root
 60 |         self.estimate_back_branches = 0 if self.start == 0 else None
 61 | 
 62 |     @property
 63 |     def jump_resolved(self):
 64 |         return not self.indirect_jump or len(self.must_visit) == 0
 65 | 
 66 |     def update_ancestors(self, new_ancestors):
 67 |         new_ancestors = new_ancestors - self.ancestors
 68 |         if new_ancestors:
 69 |             self.ancestors.update(new_ancestors)
 70 |             for s in self.succ:
 71 |                 s.update_ancestors(new_ancestors)
 72 | 
 73 |     def update_descendants(self, new_descendants):
 74 |         new_descendants = new_descendants - self.descendants
 75 |         if new_descendants:
 76 |             self.descendants.update(new_descendants)
 77 |             for p in self.pred:
 78 |                 p.update_descendants(new_descendants)
 79 | 
 80 |     def update_estimate_constraints(self):
 81 |         if all(p.estimate_constraints is None for p in self.pred):
 82 |             return
 83 |         best_estimate = min(p.estimate_constraints for p in self.pred if p.estimate_constraints is not None)
 84 |         if self.branch:
 85 |             best_estimate += 1
 86 |         if self.estimate_constraints is None or best_estimate < self.estimate_constraints:
 87 |             self.estimate_constraints = best_estimate
 88 |             for s in self.succ:
 89 |                 s.update_estimate_constraints()
 90 | 
 91 |     def update_estimate_back_branches(self):
 92 |         if all(p.estimate_back_branches is None for p in self.pred):
 93 |             return
 94 |         best_estimate = min(p.estimate_back_branches for p in self.pred if p.estimate_back_branches is not None)
 95 |         if len(self.pred) > 1:
 96 |             best_estimate += 1
 97 |         if self.estimate_back_branches is None or best_estimate != self.estimate_back_branches:
 98 |             self.estimate_back_branches = best_estimate
 99 |             for s in self.succ:
100 |                 s.update_estimate_back_branches()
101 | 
102 |     def add_succ(self, other, path):
103 |         self.succ.add(other)
104 |         other.pred.add(self)
105 |         self.update_descendants(other.descendants | {other.start})
106 |         other.update_ancestors(self.ancestors | {self.start})
107 |         other.update_estimate_constraints()
108 |         other.update_estimate_back_branches()
109 |         other.pred_paths[self].add(tuple(path))
110 |         seen = set()
111 |         todo = deque()
112 |         todo.append(other)
113 |         while todo:            
114 |             bb = todo.popleft()
115 |             if bb not in seen:
116 |                 seen.add(bb)
117 |                 if bb.indirect_jump:
118 |                     bb.must_visit.append({self.start})
119 |                 # logging.debug('BB@%x, must_visit: %s', bb.start, bb.must_visit)
120 |                 todo.extend(s for s in bb.succ if s not in seen)        
121 | 
122 |     def _find_jump_target(self):
123 |         if len(self.ins) >= 2 and 0x60 <= self.ins[-2].op <= 0x71:
124 |             self.must_visit = []
125 |             return int.from_bytes(self.ins[-2].arg, byteorder='big')
126 |         else:
127 |             return None
128 | 
129 |     def get_succ_addrs_full(self, valid_jump_targets):
130 |         from src.slicing import slice_to_program, backward_slice
131 |         from src.evm.exceptions import ExternalData
132 |         from src.memory import UninitializedRead
133 |         from src.evm.evm import run
134 |         new_succ_addrs = set()
135 |         if self.indirect_jump and not self.jump_resolved:            
136 |             bs = backward_slice(self.ins[-1], [0], must_visits=self.must_visit)            
137 |             for b in bs:
138 |                 if 0x60 <= b[-1].op <= 0x7f:
139 |                     succ_addr = int.from_bytes(b[-1].arg, byteorder='big')
140 |                 else:
141 |                     p = slice_to_program(b)
142 |                     try:
143 |                         succ_addr = run(p, check_initialized=True).stack.pop()
144 |                     except (ExternalData, UninitializedRead):
145 |                         logging.debug('Failed to compute jump target for BB@{}, slice: \n{}'.format(self.start, '\n'.join('\t{}'.format(ins) for ins in b)))
146 |                         continue
147 |                 if succ_addr not in valid_jump_targets:
148 |                     logging.debug('Jump to invalid address')
149 |                     continue
150 |                 path = tuple(unique(ins.bb.start for ins in b if ins.bb))
151 |                 if succ_addr not in self.succ_addrs:
152 |                     self.succ_addrs.add(succ_addr)
153 |                 if (path, succ_addr) not in new_succ_addrs:
154 |                     new_succ_addrs.add((path, succ_addr))
155 |         # We did our best,
156 |         # if someone finds a new edge, jump_resolved will be set to False by the BFS in add_succ
157 |         self.must_visit = []
158 |         return self.succ_addrs, new_succ_addrs
159 | 
160 |     def get_succ_addrs(self, valid_jump_targets):
161 |         if self.ins[-1].op in (0x56, 0x57):
162 |             jump_target = self._find_jump_target()
163 |             if jump_target is not None:
164 |                 self.indirect_jump = False
165 |                 if jump_target in valid_jump_targets:
166 |                     self.succ_addrs.add(jump_target)
167 |             else:
168 |                 self.indirect_jump = True
169 |         else:
170 |             self.must_visit = []
171 |         if self.ins[-1].op not in (0x00, 0x56, 0xf3, 0xfd, 0xfe, 0xff):
172 |             fallthrough = self.ins[-1].next_addr
173 |             if fallthrough:
174 |                 self.succ_addrs.add(fallthrough)
175 |         return self.succ_addrs
176 | 
177 |     def __str__(self):
178 |         s = 'BB @ %x\tStack %d' % (self.start, self.stdelta)
179 |         s += '\n'
180 |         s += 'Stackreads: {%s}' % (', '.join(map(str, sorted(self.streads))))
181 |         s += '\n'
182 |         s += 'Stackwrites: {%s}' % (', '.join(map(str, sorted(self.stwrites))))
183 |         if self.pred:
184 |             s += '\n'
185 |             s += '\n'.join('%x ->' % pred.start for pred in self.pred)
186 |         s += '\n'
187 |         s += '\n'.join(str(ins) for ins in self.ins)
188 |         if self.succ:
189 |             s += '\n'
190 |             s += '\n'.join(' -> %x' % succ.start for succ in self.succ)
191 |         return s
192 | 
193 |     def __repr__(self):
194 |         return str(self)
195 | 
196 |     def __lt__(self, other):
197 |         return self.start < other.start
198 | 


--------------------------------------------------------------------------------
/src/cfg/cfg.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from collections import deque
  3 | from src.cfg.bb import BB
  4 | import src.cfg.rattle as rattle
  5 | import tempfile
  6 | import subprocess
  7 | import os,sys
  8 | import time
  9 | from collections import defaultdict
 10 | from src.evm.exceptions import TimeoutException
 11 | 
 12 | 
 13 | class CFG(object):
 14 |     def __init__(self, bbs, fix_xrefs=True, fix_only_easy_xrefs=False):
 15 |         self.bbs = sorted(bbs)
 16 |         self._bb_at = {bb.start: bb for bb in self.bbs}
 17 |         self._ins_at = {i.addr: i for bb in self.bbs for i in bb.ins}
 18 |         self.root = self._bb_at[0]
 19 |         self.valid_jump_targets = frozenset({bb.start for bb in self.bbs if bb.ins[0].name == 'JUMPDEST'})
 20 |         if fix_xrefs or fix_only_easy_xrefs:
 21 |             try:                
 22 |                 self._xrefs(fix_only_easy_xrefs)
 23 |             except TimeoutException:                
 24 |                 raise TimeoutException("Timed out!")
 25 |         self._dominators = None
 26 |         self._dd = dict()
 27 | 
 28 |     @property
 29 |     def bb_addrs(self):
 30 |         return frozenset(self._bb_at.keys())
 31 | 
 32 |     def filter_ins(self, names, reachable=False):
 33 |         if isinstance(names, str):
 34 |             names = [names]
 35 |         
 36 |         if not reachable:
 37 |             return [ins for bb in self.bbs for ins in bb.ins if ins.name in names]
 38 |         else:
 39 |             return [ins for bb in self.bbs for ins in bb.ins if ins.name in names and 0 in bb.ancestors | {bb.start}]
 40 | 
 41 |     def _xrefs(self, fix_only_easy_xrefs=False):
 42 |         # logging.debug('Fixing Xrefs')
 43 |         self._easy_xrefs()        
 44 |         # logging.debug('Easy Xrefs fixed, turning to hard ones now')
 45 |         if not fix_only_easy_xrefs:
 46 |             self._hard_xrefs()            
 47 |             # logging.debug('Hard Xrefs also fixed, good to go')
 48 | 
 49 |     def _easy_xrefs(self):
 50 |         for pred in self.bbs:
 51 |             for succ_addr in pred.get_succ_addrs(self.valid_jump_targets):
 52 |                 if succ_addr and succ_addr in self._bb_at:
 53 |                     succ = self._bb_at[succ_addr]
 54 |                     pred.add_succ(succ, {pred.start})
 55 | 
 56 |     def _hard_xrefs(self):
 57 |         new_link = True
 58 |         links = set()
 59 |         stime=time.time()
 60 |         while new_link:
 61 |             new_link = False
 62 |             for pred in self.bbs:                
 63 |                 if not pred.jump_resolved:                                
 64 |                     succ_addrs, new_succ_addrs = pred.get_succ_addrs_full(self.valid_jump_targets)
 65 |                     for new_succ_path, succ_addr in new_succ_addrs:
 66 |                         if succ_addr not in self._bb_at:
 67 |                             logging.warning(
 68 |                                 'WARNING, NO BB @ %x (possible successor of BB @ %x)' % (succ_addr, pred.start))
 69 |                             continue
 70 |                         succ = self._bb_at[succ_addr]
 71 |                         pred.add_succ(succ, new_succ_path)
 72 |                         if not (pred.start, succ.start) in links:
 73 |                             # logging.debug('found new link from %x to %x', pred.start, succ.start)
 74 |                             # with open('cfg-tmp%d.dot' % len(links), 'w') as outfile:
 75 |                             #    outfile.write(self.to_dot())
 76 |                             new_link = True
 77 |                             links.add((pred.start, succ.start))
 78 |     def data_dependence(self, ins):
 79 |         if not ins in self._dd:
 80 |             from src.slicing import backward_slice
 81 |             self._dd[ins] = set(i for s in backward_slice(ins) for i in s if i.bb)
 82 |         return self._dd[ins]
 83 | 
 84 |     @property
 85 |     def dominators(self):
 86 |         if not self._dominators:
 87 |             self._compute_dominators()
 88 |         return self._dominators
 89 | 
 90 |     def _compute_dominators(self):
 91 |         import networkx
 92 |         g = networkx.DiGraph()
 93 |         for bb in self.bbs:
 94 |             for succ in bb.succ:
 95 |                 g.add_edge(bb.start, succ.start)
 96 |         self._dominators = {self._bb_at[k]: self._bb_at[v] for k, v in networkx.immediate_dominators(g, 0).items()}
 97 | 
 98 |     def __str__(self):
 99 |         return '\n\n'.join(str(bb) for bb in self.bbs)
100 | 
101 |     def to_dot(self, minimal=False):
102 |         s = 'digraph g {\n'
103 |         s += '\tsplines=ortho;\n'
104 |         s += '\tnode[fontname="courier"];\n'
105 |         for bb in sorted(self.bbs):
106 |             from_block = ''
107 |             if self._dominators:
108 |                 from_block = 'Dominated by: %x<br align="left"/>' % self.dominators[bb].start
109 |             from_block += 'From: ' + ', '.join('%x' % pred.start for pred in sorted(bb.pred))
110 |             if bb.estimate_constraints is not None:
111 |                 from_block += '<br align="left"/>Min constraints from root: %d' % bb.estimate_constraints
112 |             if bb.estimate_back_branches is not None:
113 |                 from_block += '<br align="left"/>Min back branches to root: %d' % bb.estimate_back_branches
114 |             to_block = 'To: ' + ', '.join('%x' % succ.start for succ in sorted(bb.succ))
115 |             ins_block = '<br align="left"/>'.join(
116 |                 '%4x: %02x %s %s' % (ins.addr, ins.op, ins.name, ins.arg.hex() if ins.arg else '') for ins in bb.ins)
117 |             # ancestors = 'Ancestors: %s'%(', '.join('%x'%addr for addr in sorted(a for a in bb.ancestors)))
118 |             # descendants = 'Descendants: %s' % (', '.join('%x' % addr for addr in sorted(a for a in bb.descendants)))
119 |             # s += '\t%d [shape=box,label=<<b>%x</b>:<br align="left"/>%s<br align="left"/>%s<br align="left"/>%s<br align="left"/>>];\n' % (
120 |             #    bb.start, bb.start, ins_block, ancestors, descendants)
121 |             if not minimal:
122 |                 s += '\t%d [shape=box,label=<%s<br align="left"/><b>%x</b>:<br align="left"/>%s<br align="left"/>%s<br align="left"/>>];\n' % (
123 |                     bb.start, from_block, bb.start, ins_block, to_block)
124 |             else:
125 |                 s += '\t%d [shape=box,label=<%s<br align="left"/>>];\n' % (
126 |                     bb.start, ins_block)
127 |         s += '\n'
128 |         for bb in sorted(self.bbs):
129 |             for succ in sorted(bb.succ):
130 |                 pths = succ.pred_paths[bb]
131 |                 if not minimal:
132 |                     s += '\t%d -> %d [xlabel="%s"];\n' % (
133 |                         bb.start, succ.start, '|'.join(' -> '.join('%x' % a for a in p) for p in pths))
134 |                 else:
135 |                     s += '\t%d -> %d;\n' % (bb.start, succ.start)
136 |         if self._dd:
137 |             inter_bb = {}
138 |             for k, v in self._dd.items():
139 |                 jbb = k.bb.start
140 |                 vbbs = {i.bb.start for i in v if i.bb.start != k.bb.start}
141 |                 if vbbs:
142 |                     inter_bb[jbb] = vbbs
143 |             l = len(inter_bb)
144 |             for i, (k, v) in enumerate(inter_bb.items()):
145 |                 for j in v:
146 |                     s += '\t%d -> %d[color="%.3f 1.0 1.0", weight=10];\n' % (j, k, (1.0 * i) / l)
147 |                 s += '\n'
148 |         s += '}'
149 |         return s
150 | 
151 |     def trim(self):
152 |         keep = set(self.root.descendants)
153 |         self.bbs = [bb for bb in self.bbs if bb.start in keep]
154 |         delete = set(self._bb_at.keys()) - keep
155 |         for addr in delete:
156 |             del self._bb_at[addr]
157 | 
158 |     def to_json(self):
159 |         return {'bbs': [{'start': bb.start,
160 |                          'succs': [{'start': succ.start, 'paths': list(succ.pred_paths[bb])} for succ in
161 |                                    sorted(bb.succ)]} for bb in sorted(self.bbs)]}
162 | 
163 |     @staticmethod
164 |     def from_json(json_dict, code):
165 |         from .disassembly import disass
166 |         bbs = list()
167 |         for bb_dict in json_dict['bbs']:
168 |             bbs.append(BB(list(disass(code, bb_dict['start']))))
169 |         cfg = CFG(bbs, fix_xrefs=False)
170 |         for bb_dict in json_dict['bbs']:
171 |             bb = cfg._bb_at[bb_dict['start']]
172 |             for succ_dict in bb_dict['succs']:
173 |                 succ = cfg._bb_at[succ_dict['start']]
174 |                 for path in succ_dict['paths']:
175 |                     bb.add_succ(succ, path)
176 |         return cfg
177 | 
178 |     @staticmethod
179 |     def distance_map(ins):
180 |         dm = dict()
181 |         todo = deque()
182 |         todo.append((ins.bb, 0))
183 |         while todo:
184 |             bb, d = todo.pop()
185 |             if not bb in dm or dm[bb] > d:
186 |                 dm[bb] = d
187 |                 for p in bb.pred:
188 |                     todo.append((p, d + 1 if len(p.succ) > 1 else d))
189 |         return dm
190 |     
191 |     """ Added code start here"""
192 | 
193 |     def to_ssa(self, code:bytes, minimal=False):          
194 |         sys.setrecursionlimit(10000)
195 | 
196 |         edges = []
197 |         ssa = rattle.Recover(code, edges=edges, split_functions=False)        
198 |         
199 |         for function in ssa.functions:
200 |             g = rattle.ControlFlowGraph(function)
201 |             t = tempfile.NamedTemporaryFile(suffix='.dot', mode='w')
202 |             t.write(g.dot())
203 |             t.flush()
204 |             
205 |             try:
206 |                 os.makedirs('output')
207 |             except:
208 |                 pass
209 | 
210 |             out_file = f'output/{function.desc()}.svg'
211 | 
212 |             subprocess.call(['dot', '-Tsvg', f'-o{out_file}', t.name])
213 |             print(f'[+] Wrote {function.desc()} to {out_file}')           
214 | 
215 |         
216 |     def edges(self):
217 |         edges=[]        
218 |         for bb in sorted(self.bbs):
219 |             for succ in sorted(bb.succ):         
220 |                 edges.append([bb.start, succ.start])
221 |         return edges
222 |                 
223 |     def assert_sinks(self):
224 |         instructions = {}
225 |         assert_bbs=[bb for bb in self.bbs if len(bb.ins)==1 and hex(bb.ins[0].op)=='0xfe']
226 |         for bb in assert_bbs:
227 |             for pred in bb.pred:                                
228 |                 if 'SLOAD' in[ins.name for ins in pred.ins]:
229 |                     continue
230 |                 #Avoid cases when validating 
231 |                 if 'CALLDATALOAD' in[ins.name for ins in pred.ins]:
232 |                     continue
233 |                 
234 |                 instructions[pred.ins[-1]]=bb.ins[0]
235 |         return instructions
236 | 
237 |     def call_sinks(self):
238 |         instructions = []          
239 |         call_insn= [ins for bb in self.bbs for ins in bb.ins if ins.name in set(['CALL']) and 0 in bb.ancestors | {bb.start}]
240 |         for call_ins in call_insn:           
241 |             call_succ=[succ.start for succ in self._bb_at[call_ins.bb.start].succ]
242 |             if len(call_succ)==0:
243 |                 instructions.append(call_ins)             
244 |                 continue
245 |             call_bb_ins = self._bb_at[call_ins.bb.start].ins
246 |             if len(call_succ)==1 and [ins.name for ins in call_bb_ins[-3:]]==['ISZERO','PUSH2','JUMPI']: 
247 |                 continue
248 |             if len(call_succ)==2 and [ins.name for ins in call_bb_ins[call_bb_ins.index(call_ins)+1:] if ins.name in set(['ADD','AND','ISZERO','JUMPI'])] ==['ADD','AND','ISZERO','JUMPI']: #propagate throw                
249 |                 continue
250 | 
251 |             call_ins_index=call_bb_ins.index(call_ins)           
252 |             if  [ins.name for ins in call_bb_ins[call_ins_index+1:call_ins_index+22] if ins.name in set(['ADD','MSTORE','MLOAD','SUB','SHA3'])]==['ADD','MSTORE','ADD','MLOAD','SUB','SHA3']:                
253 |                 continue
254 |             if  [ins.name for ins in call_bb_ins[call_ins_index+1:call_ins_index+17] if ins.name in set(['ADD','MLOAD','SUB','SHA3'])]==['ADD','MLOAD','SUB','SHA3']:                
255 |                 continue
256 |             if  [ins.name for ins in call_bb_ins[call_ins_index+1:call_ins_index+12] if ins.name in set(['ADD','MSTORE','SUB'])]==['ADD','SUB','MSTORE']:
257 |                 continue
258 | 
259 |             if len([succ.start for succ in self._bb_at[call_ins.bb.start].succ for ins in succ.ins if ins.name in set(['REVERT','INVALID'])])!=0:
260 |                 continue
261 |             if len([succ.start for succ in self._bb_at[call_ins.bb.start].succ if [ins.name for ins in succ.ins]==['PUSH2','JUMP']])!=0:
262 |                 continue                       
263 | 
264 |             min_succ_bb= self._bb_at[min(call_succ)]                  
265 | 
266 |             succ_with_call_bb=[succ for succ in self._bb_at[call_ins.bb.start].succ for ins in succ.ins if ins.name in set(['CALL'])]
267 |             if (['%x' %ins.op for ins in min_succ_bb.ins][-1] in set(['fd','fe']) or  [ins.name for ins in min_succ_bb.ins]==['PUSH2','JUMP']):
268 |                 continue
269 |             if (['%x' %ins.op for ins in min_succ_bb.ins][-1] not in set(['fd','fe']) and [*['0x','0x'],*['%x' %ins.op for ins in min_succ_bb.ins]][-3] not in set(['3e']) and [ins.name for ins in min_succ_bb.ins]!=['PUSH2','JUMP'] and len(succ_with_call_bb)==0):
270 |                 instructions.append(call_ins)         
271 |             elif ([*['0x'],*['%x' %ins.op for ins in min_succ_bb.ins]][-3] in set(['3e'])):   
272 |                 call_ret_succ=[succ.start for succ in self._bb_at[min([succ.start for succ in min_succ_bb.succ])].succ]            
273 |                 ret_min_succ_bb= self._bb_at[min(call_ret_succ)]               
274 |                 if (['%x' %ins.op for ins in ret_min_succ_bb.ins][-1] not in set(['fd','fe'])):
275 |                     instructions.append(call_ins)           
276 |             elif (len(succ_with_call_bb)>0):
277 |                 sec_call_succ=[succ.start for succ in succ_with_call_bb[0].succ]
278 |                 min_sec_call_succ_bb= self._bb_at[min(sec_call_succ)]
279 |                 if( [ins.name for ins in min_sec_call_succ_bb.ins]!=['PUSH2','JUMP']):
280 |                     instructions.append(call_ins)
281 | 
282 |         return instructions
283 | 
284 |     def find_loops(self, with_calls=False):
285 |         import networkx
286 |         g = networkx.DiGraph()
287 |         for bb in self.bbs:
288 |             for succ in bb.succ:
289 |                 g.add_edge(bb.start,succ.start)                
290 |         l= list(networkx.simple_cycles(g))        
291 |         loops=defaultdict(list)
292 |         calls_in_loops=defaultdict(list)
293 |         loops_with_calls=[]
294 |         loops_with_gas_sanitizers =[]
295 |         for i in l:               
296 |             if len([h for h in i if(len(self._bb_at[h].pred))>2])>0:
297 |                 #print('11')
298 |                 continue
299 |             
300 |             loop_bbs=[bb for j in i for bb in self.bbs if bb.start==j]                                                                        
301 |             if len(i) ==1: 
302 |                 continue            
303 |             head =[succ.start for bb in loop_bbs for succ in bb.succ if succ.start in i if bb.start>succ.start and len([p.start for p in succ.pred if p.start not in i])!=0 and len(succ.succ)<=2 and ('ADD' in [ins.name for ins in bb.ins] or  [ins.name for ins in bb.ins[-2:]]==['PUSH2','JUMP'])]
304 | 
305 |             if len(head)==0:                
306 |                 continue            
307 |             if len(loops[head[0]]):
308 |                 loops[head[0]].pop(0)
309 |             loops[head[0]].insert(0,len(i))
310 |             
311 |             back_edge =[[succ.start,bb.start] for bb in loop_bbs for succ in bb.succ if succ.start in i if bb.start>succ.start]                                
312 |             
313 |             if len(i)==2:
314 |                 body_ins=[ins.name for bb in loop_bbs for ins in bb.ins if bb.start!=head[0] and ins.name in ['ADD','SUB','MLOAD','MSTORE','JUMP','SSTORE','EXP','NOT','MUL','PUSH1','POP','SWAP1','DUP2']]
315 |                 body_start=[bb.start for bb in loop_bbs if bb.start!=head[0]]                
316 |                 if  'MLOAD' in body_ins:                    
317 |                     continue                
318 |                 elif body_ins  == ['PUSH1','DUP2','PUSH1','SWAP1','SSTORE','POP','PUSH1','ADD','JUMP']:
319 |                     head_pred=[pred.start for bb in loop_bbs for pred in bb.pred if bb.start==head[0] and pred.start !=body_start[0]]
320 |                     head_pred1=[pred.start for pred in self._bb_at[head_pred[0]].pred]                 
321 |                     head_pred2=[pred.start for pred in self._bb_at[head_pred1[0]].pred]
322 |                     head_pred_ins=[ins.name for bb in self._bb_at[head[0]].pred for ins in bb.ins if bb.start !=body_start[0] and ins.name in ['ADD','MSTORE','JUMP','SSTORE','SHA3','SLOAD']]
323 |                     head_pred1_ins=[ins.name for bb in self._bb_at[head_pred[0]].pred for ins in bb.ins if ins.name in ['ADD','MSTORE','JUMP','SSTORE','SHA3','SLOAD']]                                         
324 |                     if len(head_pred2)==3:
325 |                         continue
326 |                     if head_pred1_ins !=['SLOAD','SSTORE','MSTORE','SHA3','ADD','JUMP'] and  head_pred_ins !=['SLOAD','SSTORE','MSTORE','SHA3','ADD','JUMP']: 
327 |                         continue
328 |                 else:
329 |                     continue
330 |             
331 |             if len(i)==3:                
332 |                 body_ins=[ins.name for bb in loop_bbs for ins in bb.ins if bb.start not in back_edge[0] and ins.name in ['ADD','SUB','MLOAD','MSTORE','JUMP','SSTORE','EXP','NOT','MUL']]
333 |                 if body_ins  == ['MLOAD','MSTORE'] or body_ins==['ADD','MLOAD','ADD','MSTORE']:                      
334 |                     continue
335 |             
336 |             head_cnt=0
337 |             for k in range(i.index(head[0]),len(i)+i.index(head[0])):                
338 |                 indx=k%len(i)                
339 |                 if len([bb for bb in loop_bbs if bb.start==i[indx] and len(bb.succ)!=0  and len([succ for succ in bb.succ if succ in loop_bbs]) ==len(bb.succ)])!=0:
340 |                     loops[head[0]].append(i[indx])                
341 |                     head_cnt+=1
342 |                 elif len([bb for bb in loop_bbs if bb.start==i[indx] and len(bb.succ)!=0 and len([succ for succ in bb.succ if succ in loop_bbs]) ==1 and len(bb.succ)==2 and len([succ for succ in bb.succ if self._ins_at[succ.start].op==254])==1])!=0:
343 |                     loops[head[0]].append(i[indx])                
344 |                     head_cnt+=1
345 |                 elif len([bb for bb in loop_bbs if bb.start==i[indx] and len(bb.succ)!=0 and len([succ for succ in bb.succ if succ in loop_bbs]) ==1 and len(bb.succ)==2])!=0:
346 |                     loops[head[0]].append(i[indx])
347 |                     head_cnt+=1
348 |                     break        
349 |                       
350 |             if not with_calls:
351 |                 for bb in loop_bbs:
352 |                     if bb.start in loops[head[0]] and 'GAS' in [ins.name for ins in bb.ins]:
353 |                         block_ins =[ins.name for ins in bb.ins if ins.name in ['PUSH2','PUSH3','GAS','GT']]
354 |                         if block_ins ==['PUSH3','GAS','GT'] or  block_ins ==['PUSH2','GAS','GT']:
355 |                             loops_with_gas_sanitizers.append(head[0])
356 |                             break
357 |                     elif bb.start not in loops[head[0]] and 'GAS' in [ins.name for ins in bb.ins]:
358 |                         block_ins =[ins.name for ins in bb.ins if ins.name in ['PUSH2','PUSH3','GAS','GT','JUMPI']]
359 |                         if block_ins[-5:] == ['PUSH3','GAS','GT','PUSH2','JUMPI'] or block_ins[-5:] == ['PUSH2','GAS','GT','PUSH2','JUMPI']:
360 |                             loops_with_gas_sanitizers.append(head[0])
361 |                             break
362 |             
363 |             if with_calls:
364 |                 calls_in_loop= False
365 |                 for bb in loop_bbs:
366 |                     if bb.start not in loops[head[0]] and 'CALL' in [ins.name for ins in bb.ins]:
367 |                         call_succ=[succ.start for succ in self._bb_at[bb.start].succ]                        
368 |                         if len(call_succ)==2:
369 |                             if (['%x' %ins.op for ins in self._bb_at[min(call_succ)].ins][-1] in set(['fd','fe'])): 
370 |                                 calls_in_loop=True
371 |                                 loops_with_calls.append(head[0])
372 |                                 calls_in_loops[bb.start].append([ins for ins in bb.ins if 'CALL'==ins.name][0])
373 |                                 break
374 |             if with_calls and not calls_in_loop and head[0] not in loops_with_calls:
375 |                 del loops[head[0]]
376 |             elif with_calls and not calls_in_loop:
377 |                 for i in range(0,head_cnt):
378 |                     loops[head[0]].pop()
379 | 
380 |             for san in set(loops_with_gas_sanitizers):
381 |                 if loops.get(san,'nokey')!='nokey':
382 |                     del loops[san]
383 |            
384 |         if not with_calls:
385 |             return loops
386 |         else:
387 |             return calls_in_loops
388 | 
389 | 
390 | 
391 | 
392 | 


--------------------------------------------------------------------------------
/src/cfg/disassembly.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from collections import deque
 3 | 
 4 | from src.cfg.bb import BB
 5 | from src.cfg.instruction import Instruction
 6 | from src.cfg.opcodes import opcodes
 7 | 
 8 | 
 9 | class ArgumentTooShort(Exception):
10 |     pass
11 | 
12 | 
13 | def disass(code, i=0):
14 |     assert isinstance(code, bytes)
15 |     while i < len(code):
16 |         loc = i
17 |         op = code[i]
18 |         arg = None
19 |         inslen = 1
20 |         if not op in opcodes:
21 |             break
22 |             # raise IllegalInstruction('%02x at %d'%(op, i))
23 |         if 0x60 <= op <= 0x7f:
24 |             arglen = op - 0x5f
25 |             inslen += arglen
26 |             arg = code[i + 1:i + 1 + arglen]
27 |             if len(arg) < arglen:
28 |                 raise ArgumentTooShort
29 |             i += arglen
30 |         i += 1
31 |         yield Instruction(loc, op, arg)
32 |         # End basic block on STOP, JUMP, JUMPI, RETURN, REVERT, RAISE, or if the following instruction is a JUMPDEST
33 |         if op in (0x00, 0x56, 0x57, 0xf3, 0xfd, 0xfe, 0xff) or (i < len(code) and code[i] == 0x5b):
34 |             break
35 | 
36 | 
37 | def generate_BBs(code):
38 |     fallthrough_locs = [i + 1 for i, c in enumerate(code) if c == 0x57]
39 |     jumpdest_locs = [i for i, c in enumerate(code) if c == 0x5b]
40 |     leader_candidates = {0} | set(fallthrough_locs) | set(jumpdest_locs)
41 |     for l in sorted(leader_candidates):
42 |         try:
43 |             instructions = list(disass(code, l))
44 |             if instructions:
45 |                 yield BB(instructions)            
46 |         except:
47 |             continue        


--------------------------------------------------------------------------------
/src/cfg/instruction.py:
--------------------------------------------------------------------------------
 1 | from src.cfg.opcodes import opcodes
 2 | 
 3 | 
 4 | class Instruction(object):
 5 |     def __init__(self, addr, op, arg=None):
 6 |         if not (arg is None or isinstance(arg, bytes)):
 7 |             raise ValueError('Instruction arg must be bytes or None')
 8 |         assert arg is None or isinstance(arg, bytes)
 9 |         opinfo = opcodes[op]
10 |         inslen = (op - 0x5f) + 1 if 0x60 <= op <= 0x7f else 1
11 |         self.addr = addr
12 |         self.next_addr = self.addr + inslen
13 |         self.op = op
14 |         self.name = opinfo[0]
15 |         self.arg = arg
16 |         self.ins = opinfo[1]
17 |         self.outs = opinfo[2]
18 |         self.gas = opinfo[3]
19 |         self.delta = self.outs - self.ins
20 |         self.bb = None
21 | 
22 |     def __str__(self):
23 |         return '(%5d) %4x:\t%02x\t-%d +%d = %d\t%s%s' % (
24 |             self.addr, self.addr, self.op, self.ins, self.outs, self.delta, self.name,
25 |             '(%d) %s' % (int.from_bytes(self.arg, byteorder='big'), '\t%s' % self.arg.hex()) if self.arg else '')
26 | 
27 |     def __repr__(self):
28 |         return str(self)
29 | 
30 |     def __hash__(self):
31 |         return 17 * self.addr + 19 * self.op + 23 * hash(self.arg)
32 | 
33 |     def __eq__(self, other):
34 |         return (self.addr == other.addr and
35 |                 self.op == other.op and
36 |                 self.arg == other.arg)
37 | 


--------------------------------------------------------------------------------
/src/cfg/opcodes.py:
--------------------------------------------------------------------------------
  1 | # schema: [opcode, ins, outs, gas]
  2 | opcodes = {
  3 |     0x00: ['STOP', 0, 0, 0],
  4 |     0x01: ['ADD', 2, 1, 3],
  5 |     0x02: ['MUL', 2, 1, 5],
  6 |     0x03: ['SUB', 2, 1, 3],
  7 |     0x04: ['DIV', 2, 1, 5],
  8 |     0x05: ['SDIV', 2, 1, 5],
  9 |     0x06: ['MOD', 2, 1, 5],
 10 |     0x07: ['SMOD', 2, 1, 5],
 11 |     0x08: ['ADDMOD', 3, 1, 8],
 12 |     0x09: ['MULMOD', 3, 1, 8],
 13 |     0x0a: ['EXP', 2, 1, 10],
 14 |     0x0b: ['SIGNEXTEND', 2, 1, 5],
 15 |     0x10: ['LT', 2, 1, 3],
 16 |     0x11: ['GT', 2, 1, 3],
 17 |     0x12: ['SLT', 2, 1, 3],
 18 |     0x13: ['SGT', 2, 1, 3],
 19 |     0x14: ['EQ', 2, 1, 3],
 20 |     0x15: ['ISZERO', 1, 1, 3],
 21 |     0x16: ['AND', 2, 1, 3],
 22 |     0x17: ['OR', 2, 1, 3],
 23 |     0x18: ['XOR', 2, 1, 3],
 24 |     0x19: ['NOT', 1, 1, 3],
 25 |     0x1a: ['BYTE', 2, 1, 3],
 26 |     0x1b: ['SHL', 2, 1, 3],
 27 |     0x1c: ['SHR', 2, 1, 3],
 28 |     0x1d: ['SAR', 2, 1, 3],
 29 |     0x20: ['SHA3', 2, 1, 30],
 30 |     0x30: ['ADDRESS', 0, 1, 2],
 31 |     0x31: ['BALANCE', 1, 1, 20],  # now 400
 32 |     0x32: ['ORIGIN', 0, 1, 2],
 33 |     0x33: ['CALLER', 0, 1, 2],
 34 |     0x34: ['CALLVALUE', 0, 1, 2],
 35 |     0x35: ['CALLDATALOAD', 1, 1, 3],
 36 |     0x36: ['CALLDATASIZE', 0, 1, 2],
 37 |     0x37: ['CALLDATACOPY', 3, 0, 3],
 38 |     0x38: ['CODESIZE', 0, 1, 2],
 39 |     0x39: ['CODECOPY', 3, 0, 3],
 40 |     0x3a: ['GASPRICE', 0, 1, 2],
 41 |     0x3b: ['EXTCODESIZE', 1, 1, 20],  # now 700
 42 |     0x3c: ['EXTCODECOPY', 4, 0, 20],  # now 700
 43 |     0x3d: ['RETURNDATASIZE', 0, 1, 2],
 44 |     0x3e: ['RETURNDATACOPY', 3, 0, 3],
 45 |     0x40: ['BLOCKHASH', 1, 1, 20],
 46 |     0x41: ['COINBASE', 0, 1, 2],
 47 |     0x42: ['TIMESTAMP', 0, 1, 2],
 48 |     0x43: ['NUMBER', 0, 1, 2],
 49 |     0x44: ['DIFFICULTY', 0, 1, 2],
 50 |     0x45: ['GASLIMIT', 0, 1, 2],
 51 |     0x50: ['POP', 1, 0, 2],
 52 |     0x51: ['MLOAD', 1, 1, 3],
 53 |     0x52: ['MSTORE', 2, 0, 3],
 54 |     0x53: ['MSTORE8', 2, 0, 3],
 55 |     0x54: ['SLOAD', 1, 1, 50],  # 200 now
 56 |     0x55: ['SSTORE', 2, 0, 0],  # actual cost 5000-20000 depending on circumstance
 57 |     0x56: ['JUMP', 1, 0, 8],
 58 |     0x57: ['JUMPI', 2, 0, 10],
 59 |     0x58: ['PC', 0, 1, 2],
 60 |     0x59: ['MSIZE', 0, 1, 2],
 61 |     0x5a: ['GAS', 0, 1, 2],
 62 |     0x5b: ['JUMPDEST', 0, 0, 1],
 63 |     0xa0: ['LOG0', 2, 0, 375],
 64 |     0xa1: ['LOG1', 3, 0, 750],
 65 |     0xa2: ['LOG2', 4, 0, 1125],
 66 |     0xa3: ['LOG3', 5, 0, 1500],
 67 |     0xa4: ['LOG4', 6, 0, 1875],
 68 |     0xe1: ['SLOADBYTES', 3, 0, 50],  # to be discontinued
 69 |     0xe2: ['SSTOREBYTES', 3, 0, 0],  # to be discontinued
 70 |     0xe3: ['SSIZE', 1, 1, 50],  # to be discontinued
 71 |     0xf0: ['CREATE', 3, 1, 32000],
 72 |     0xf1: ['CALL', 7, 1, 40],  # 700 now
 73 |     0xf2: ['CALLCODE', 7, 1, 40],  # 700 now
 74 |     0xf3: ['RETURN', 2, 0, 0],
 75 |     0xf4: ['DELEGATECALL', 6, 1, 40],  # 700 now
 76 |     0xf5: ['CALLBLACKBOX', 7, 1, 40],
 77 |     0xfa: ['STATICCALL', 6, 1, 40],
 78 |     0xfd: ['REVERT', 2, 0, 0],
 79 |     0xfe: ['INVALID', 0, 0, 1],
 80 |     0xff: ['SELFDESTRUCT', 1, 0, 0],  # 5000 now
 81 | }
 82 | 
 83 | for i in range(1, 33):
 84 |     opcodes[0x5f + i] = ['PUSH' + str(i), 0, 1, 3]
 85 | 
 86 | for i in range(1, 17):
 87 |     opcodes[0x7f + i] = ['DUP' + str(i), i, i + 1, 3]
 88 |     opcodes[0x8f + i] = ['SWAP' + str(i), i + 1, i + 1, 3]
 89 | 
 90 | reverse_opcodes = {}
 91 | for o in opcodes:
 92 |     vars()[opcodes[o][0]] = opcodes[o]
 93 |     reverse_opcodes[opcodes[o][0]] = o
 94 | 
 95 | # Non-opcode gas prices
 96 | GDEFAULT = 1
 97 | GMEMORY = 3
 98 | GQUADRATICMEMDENOM = 512  # 1 gas per 512 quadwords
 99 | GEXPONENTBYTE = 10  # cost of EXP exponent per byte
100 | GCOPY = 3  # cost to copy one 32 byte word
101 | GCONTRACTBYTE = 200  # one byte of code in contract creation
102 | GCALLVALUETRANSFER = 9000  # non-zero-valued call
103 | GLOGBYTE = 8  # cost of a byte of logdata
104 | 
105 | GTXCOST = 21000  # TX BASE GAS COST
106 | GTXDATAZERO = 4  # TX DATA ZERO BYTE GAS COST
107 | GTXDATANONZERO = 68  # TX DATA NON ZERO BYTE GAS COST
108 | GSHA3WORD = 6  # Cost of SHA3 per word
109 | GSHA256BASE = 60  # Base c of SHA256
110 | GSHA256WORD = 12  # Cost of SHA256 per word
111 | GRIPEMD160BASE = 600  # Base cost of RIPEMD160
112 | GRIPEMD160WORD = 120  # Cost of RIPEMD160 per word
113 | GIDENTITYBASE = 15  # Base cost of indentity
114 | GIDENTITYWORD = 3  # Cost of identity per word
115 | GECRECOVER = 3000  # Cost of ecrecover op
116 | 
117 | GSTIPEND = 2300
118 | 
119 | GCALLNEWACCOUNT = 25000
120 | GSELFDESTRUCTREFUND = 24000
121 | 
122 | GSTORAGEBASE = 2500
123 | GSTORAGEBYTESTORAGE = 250
124 | GSTORAGEBYTECHANGE = 40
125 | GSTORAGEMIN = 2500
126 | GSSIZE = 50
127 | GSLOADBYTES = 50
128 | 
129 | GSTORAGEREFUND = 15000
130 | GSTORAGEKILL = 5000
131 | GSTORAGEMOD = 5000
132 | GSTORAGEADD = 20000
133 | 
134 | GMODEXPQUADDIVISOR = 100
135 | GECADD = 500
136 | GECMUL = 2000
137 | 
138 | GPAIRINGBASE = 100000
139 | GPAIRINGPERPOINT = 80000
140 | 
141 | EXP_SUPPLEMENTAL_GAS = 40
142 | 
143 | # Anti-DoS HF changes
144 | SLOAD_SUPPLEMENTAL_GAS = 150
145 | CALL_SUPPLEMENTAL_GAS = 660
146 | EXTCODELOAD_SUPPLEMENTAL_GAS = 680
147 | BALANCE_SUPPLEMENTAL_GAS = 380
148 | CALL_CHILD_LIMIT_NUM = 63
149 | CALL_CHILD_LIMIT_DENOM = 64
150 | SELFDESTRUCT_SUPPLEMENTAL_GAS = 5000
151 | 
152 | memory_writes = {'CALLDATACOPY': (-1, -3), 'CODECOPY': (-1, -3), 'EXTCODECOPY': (-2, -4), 'MSTORE': (-1, 32),
153 |                  'MSTORE8': (-1, 8), 'CALL': (-6, -7), 'CALLCODE': (-6, -7), 'DELEGATECALL': (-5, -6)}
154 | memory_reads = {'SHA3': (-1, -2), 'MLOAD': (-1, 32), 'CREATE': (-2, -3), 'CALL': (-4, -5), 'CALLCODE': (-4, -5),
155 |                 'RETURN': (-1, -2), 'DELEGATECALL': (-3, -4)}
156 | storage_writes = {'SSTORE': -1}
157 | storage_reads = {'SLOAD': -1}
158 | 
159 | potentially_user_controlled = ['ORIGIN', 'CALLER', 'CALLVALUE', 'CALLDATALOAD', 'CALLDATASIZE', 'CALLDATACOPY',
160 |                                'EXTCODESIZE', 'EXTCODECOPY', 'MLOAD', 'SLOAD']
161 | 
162 | potentially_direct_user_controlled = ['ORIGIN', 'CALLER', 'CALLVALUE', 'CALLDATALOAD', 'CALLDATASIZE', 'CALLDATACOPY',
163 |                                'EXTCODESIZE', 'EXTCODECOPY', 'MLOAD' ,'SLOAD']
164 | 
165 | ins_in_ac_check = ['CALLER','SLOAD']
166 | 
167 | external_data = ['RETURNDATACOPY', 'RETURNDATASIZE', 'EXTCODESIZE', 'EXTCODECOPY']
168 | 
169 | CRITICAL = ['CALL', 'DELEGATECALL', 'CALLCODE', 'SELFDESTRUCT']
170 | 
171 | # map denoting attacker controlled stack arguments
172 | CRITICAL_ARGS = {
173 |     'CALL': [1],
174 |     'DELEGATECALL': [1],
175 |     'CALLCODE': [1],
176 |     'SELFDESTRUCT': [0],
177 |     'JUMPI': [1],
178 |     'ISZERO': [0],
179 |     'GT':[0,1],
180 |     'LT':[0,1]
181 | }
182 | 


--------------------------------------------------------------------------------
/src/cfg/rattle/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | 
4 | from .analyze import *
5 | from .evmasm import *
6 | from .recover import Recover
7 | 


--------------------------------------------------------------------------------
/src/cfg/rattle/analyze.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from .recover import *
  5 | 
  6 | logger = logging.getLogger(__name__)
  7 | 
  8 | 
  9 | class UseDefGraph(object):
 10 |     value: StackValue
 11 | 
 12 |     def __init__(self, value: StackValue) -> None:
 13 |         self.value = value
 14 | 
 15 |     def dot(self) -> str:
 16 |         rv = ''
 17 |         rv += 'digraph G {\n'
 18 | 
 19 |         es = self.edges(self.value)
 20 | 
 21 |         for reader in self.value.readers():
 22 |             reader_s = str(reader).replace('%', '\\%')
 23 |             value_s = str(self.value).replace('%', '\\%')
 24 |             es.append(f"\"{value_s}\" -> \"{reader_s}\"")
 25 | 
 26 |         rv += '\n'.join(list(set(es)))
 27 |         rv += '\n}'
 28 | 
 29 |     def edges(self, value) -> List[str]:
 30 |         rv = []
 31 |         writer = value.writer
 32 |         if writer is None:
 33 |             return []
 34 | 
 35 |         value_s = str(value).replace('%', '\\%')
 36 |         writer_s = str(writer).replace('%', '\\%')
 37 |         rv.append(f"\"{writer_s}\" -> \"{value_s}\"")
 38 | 
 39 |         for arg in writer:
 40 |             arg_s = str(arg).replace('%', '\\%')
 41 |             writer_s = str(writer).replace('%', '\\%')
 42 |             rv.append(f"\"{arg_s}\" -> \"{writer_s}\"")
 43 |             rv.extend(self.edges(arg))
 44 | 
 45 |         for reader in writer.return_value.readers():
 46 |             reader_s = str(reader).replace('%', '\\%')
 47 |             value_s = str(value).replace('%', '\\%')
 48 |             rv.append(f"\"{value_s}\" -> \"{reader_s}\"")
 49 | 
 50 |         return rv
 51 | 
 52 | 
 53 | class DefUseGraph(object):
 54 |     value: StackValue
 55 | 
 56 |     def __init__(self, value: StackValue) -> None:
 57 |         self.value = value
 58 | 
 59 |     def dot(self, filt=None) -> str:
 60 |         if filt is None:
 61 |             filt = lambda x: True
 62 | 
 63 |         rv = ''
 64 |         rv += 'digraph G {\n'
 65 | 
 66 |         es = self.edges(self.value, filt)
 67 | 
 68 |         for reader in self.value.readers():
 69 |             reader_s = str(reader).replace('%', '\\%')
 70 |             value_s = str(self.value).replace('%', '\\%')
 71 |             es.append(f"\"{value_s}\" -> \"{reader_s}\"")
 72 | 
 73 |         rv += '\n'.join(list(set(es)))
 74 |         rv += '\n}'
 75 | 
 76 |         return rv
 77 | 
 78 |     def edges(self, value, filt) -> List[str]:
 79 |         rv = []
 80 |         writer = value.writer
 81 |         if writer is None:
 82 |             return []
 83 | 
 84 |         value_s = str(value).replace('%', '\\%')
 85 |         writer_s = str(writer).replace('%', '\\%')
 86 |         rv.append(f"\"{writer_s}\" -> \"{value_s}\"")
 87 | 
 88 |         for reader in writer.return_value.readers():
 89 |             reader_s = str(reader).replace('%', '\\%')
 90 |             value_s = str(value).replace('%', '\\%')
 91 |             rv.append(f"\"{value_s}\" -> \"{reader_s}\"")
 92 | 
 93 |             if filt(reader):
 94 |                 rv.extend(self.edges(reader.return_value, filt))
 95 | 
 96 |         return rv
 97 | 
 98 | 
 99 | class ControlFlowGraph(object):
100 |     def __init__(self, function: SSAFunction) -> None:
101 |         self.function = function
102 | 
103 |     def dot(self) -> str:
104 |         rv = ''
105 |         rv += 'digraph G {\n'
106 |         rv += 'graph [fontname = "consolas"];\n'
107 |         rv += 'node [fontname = "consolas"];\n'
108 |         rv += 'edge [fontname = "consolas"];\n'
109 | 
110 |         name = self.function.desc()
111 |         hash = f'Hash: {self.function.hash:#x}'
112 |         offset = f'Start: {self.function.offset:#x}'
113 |         arguments = f'Arguments: {self.function.arguments()}'
114 |         storage = f'Storage: {self.function.storage}'
115 |         # memory = f'Memory: {self.function.memory}'
116 | 
117 |         function_desc = [name, hash, offset, arguments, storage]
118 | 
119 |         rv += f'ff [label="{{' + '\\l'.join(function_desc) + '\\l}}", shape="record" ];'
120 | 
121 |         edges = []
122 | 
123 |         for block in self.function:
124 |             block_id = f'block_{block.offset}'            
125 |             block_body = '\\l'.join([f'{insn.offset:#x}: {insn}' for insn in block])
126 |             block_body = block_body.replace('<', '\\<').replace('>', '\\>')
127 |             block_dot = f'{block_id} [label="{block_body}\\l", shape="record"];'
128 | 
129 |             fallthrough_label = ''
130 |             jump_label = ''
131 |             if len(block.jump_edges) > 0 and block.fallthrough_edge:
132 |                 fallthrough_label = ' [label=" f", color="red"]'
133 |                 jump_label = ' [label=" t", color="darkgreen"]'
134 | 
135 |             if block.fallthrough_edge:
136 |                 target_block_id = f'block_{block.fallthrough_edge.offset}'
137 |                 edges.append(f'{block_id} -> {target_block_id}{fallthrough_label};')
138 | 
139 |             for edge in block.jump_edges:
140 |                 target_block_id = f'block_{edge.offset}'
141 |                 edges.append(f'{block_id} -> {target_block_id}{jump_label};')
142 | 
143 |             rv += block_dot + '\n'
144 | 
145 |         for edge in edges:
146 |             rv += edge + '\n'
147 | 
148 |         rv += '}\n'
149 | 
150 |         return rv
151 | 


--------------------------------------------------------------------------------
/src/cfg/rattle/evmasm.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional, Iterable
  2 | 
  3 | import pyevmasm
  4 | 
  5 | 
  6 | class EVMAsm(object):
  7 |     '''
  8 |         EVM Instruction factory
  9 | 
 10 |         Example use::
 11 | 
 12 |             >>> from manticore.platforms.evm import EVMAsm
 13 |             >>> EVMAsm.disassemble_one('\\x60\\x10')
 14 |             Instruction(0x60, 'PUSH', 1, 0, 1, 0, 'Place 1 byte item on stack.', 16, 0)
 15 |             >>> EVMAsm.assemble_one('PUSH1 0x10')
 16 |             Instruction(0x60, 'PUSH', 1, 0, 1, 0, 'Place 1 byte item on stack.', 16, 0)
 17 |             >>> tuple(EVMAsm.disassemble_all('\\x30\\x31'))
 18 |             (Instruction(0x30, 'ADDRESS', 0, 0, 1, 2, 'Get address of currently executing account.', None, 0),
 19 |              Instruction(0x31, 'BALANCE', 0, 1, 1, 20, 'Get balance of the given account.', None, 1))
 20 |             >>> tuple(EVMAsm.assemble_all('ADDRESS\\nBALANCE'))
 21 |             (Instruction(0x30, 'ADDRESS', 0, 0, 1, 2, 'Get address of currently executing account.', None, 0),
 22 |              Instruction(0x31, 'BALANCE', 0, 1, 1, 20, 'Get balance of the given account.', None, 1))
 23 |             >>> EVMAsm.assemble_hex(
 24 |             ...                         """PUSH1 0x60
 25 |             ...                            BLOCKHASH
 26 |             ...                            MSTORE
 27 |             ...                            PUSH1 0x2
 28 |             ...                            PUSH2 0x100
 29 |             ...                         """
 30 |             ...                      )
 31 |             '0x606040526002610100'
 32 |             >>> EVMAsm.disassemble_hex('0x606040526002610100')
 33 |             'PUSH1 0x60\\nBLOCKHASH\\nMSTORE\\nPUSH1 0x2\\nPUSH2 0x100'
 34 |     '''
 35 | 
 36 |     class EVMInstruction(pyevmasm.Instruction):
 37 |         def __init__(self, opcode: int, name: str, operand_size: int, pops: int, pushes: int, fee: int,
 38 |                      description: str, operand: Optional[int] = None, pc: Optional[int] = 0) -> None:
 39 |             '''
 40 |             This represents an EVM instruction.
 41 |             EVMAsm will create this for you.
 42 | 
 43 |             :param opcode: the opcode value
 44 |             :param name: instruction name
 45 |             :param operand_size: immediate operand size in bytes
 46 |             :param pops: number of items popped from the stack
 47 |             :param pushes: number of items pushed into the stack
 48 |             :param fee: gas fee for the instruction
 49 |             :param description: textual description of the instruction
 50 |             :param operand: optional immediate operand
 51 |             :param pc: optional program counter of this instruction in the program
 52 | 
 53 |             Example use::
 54 | 
 55 |                 instruction = EVMAsm.assemble_one('PUSH1 0x10')
 56 |                 print 'Instruction: %s'% instruction
 57 |                 print '\tdescription:', instruction.description
 58 |                 print '\tgroup:', instruction.group
 59 |                 print '\tpc:', instruction.pc
 60 |                 print '\tsize:', instruction.size
 61 |                 print '\thas_operand:', instruction.has_operand
 62 |                 print '\toperand_size:', instruction.operand_size
 63 |                 print '\toperand:', instruction.operand
 64 |                 print '\tsemantics:', instruction.semantics
 65 |                 print '\tpops:', instruction.pops
 66 |                 print '\tpushes:', instruction.pushes
 67 |                 print '\tbytes:', '0x'+instruction.bytes.encode('hex')
 68 |                 print '\twrites to stack:', instruction.writes_to_stack
 69 |                 print '\treads from stack:', instruction.reads_from_stack
 70 |                 print '\twrites to memory:', instruction.writes_to_memory
 71 |                 print '\treads from memory:', instruction.reads_from_memory
 72 |                 print '\twrites to storage:', instruction.writes_to_storage
 73 |                 print '\treads from storage:', instruction.reads_from_storage
 74 |                 print '\tis terminator', instruction.is_terminator
 75 | 
 76 | 
 77 |             '''
 78 |             super().__init__(opcode, name, operand_size, pops, pushes, fee, description, operand, pc)
 79 |             if operand_size != 0 and operand is not None:
 80 |                 mask = (1 << operand_size * 8) - 1
 81 |                 if ~mask & operand:
 82 |                     raise ValueError("operand should be %d bits long" % (operand_size * 8))
 83 | 
 84 |         def __repr__(self) -> str:
 85 |             output = 'EVMInstruction(0x{:x}, {}, {:d}, {:d}, {:d}, {:d}, {}, {}, {})'.format(
 86 |                 self._opcode, self._name, self._operand_size,
 87 |                 self._pops, self._pushes, self._fee, self._description, self._operand, self._pc)
 88 |             return output
 89 | 
 90 |         def __hash__(self) -> int:
 91 |             return hash((self._opcode, self._pops, self._pushes, self._pc))
 92 | 
 93 |         @property
 94 |         def is_push(self) -> bool:
 95 |             return self.semantics == 'PUSH'
 96 | 
 97 |         @property
 98 |         def is_pop(self) -> bool:
 99 |             return self.semantics == 'POP'
100 | 
101 |         @property
102 |         def is_dup(self) -> bool:
103 |             return self.semantics == 'DUP'
104 | 
105 |         @property
106 |         def is_swap(self) -> bool:
107 |             return self.semantics == 'SWAP'
108 | 
109 |         @property
110 |         def is_comparison(self) -> bool:
111 |             return self.semantics in ('LT', 'GT', 'SLT', 'SGT', 'EQ', 'ISZERO')
112 | 
113 |         @property
114 |         def is_boolean_logic(self) -> bool:
115 |             return self.semantics in ('AND', 'OR', 'XOR', 'NOT')
116 | 
117 |     @staticmethod
118 |     def convert_instruction_to_evminstruction(instruction):
119 |         return EVMAsm.EVMInstruction(instruction._opcode, instruction._name, instruction._operand_size,
120 |                                      instruction._pops, instruction._pushes, instruction._fee,
121 |                                      instruction._description, instruction._operand, instruction._pc)
122 | 
123 |     @staticmethod
124 |     def assemble_one(assembler: str, pc: int = 0, fork=pyevmasm.DEFAULT_FORK) -> EVMInstruction:
125 |         ''' Assemble one EVM instruction from its textual representation.
126 | 
127 |             :param assembler: assembler code for one instruction
128 |             :param pc: program counter of the instruction in the bytecode (optional)
129 |             :return: An Instruction object
130 | 
131 |             Example use::
132 | 
133 |                 >>> print evm.EVMAsm.assemble_one('LT')
134 | 
135 | 
136 |         '''
137 |         instruction = pyevmasm.assemble_one(assembler, pc, fork)
138 |         return EVMAsm.convert_instruction_to_evminstruction(instruction)
139 | 
140 |     @staticmethod
141 |     def convert_multiple_instructions_to_evminstructions(instructions):
142 |         for i in instructions:
143 |             yield EVMAsm.convert_instruction_to_evminstruction(i)
144 | 
145 |     @staticmethod
146 |     def assemble_all(assembler: str, pc: int = 0, fork=pyevmasm.DEFAULT_FORK) -> Iterable[EVMInstruction]:
147 |         ''' Assemble a sequence of textual representation of EVM instructions
148 | 
149 |             :param assembler: assembler code for any number of instructions
150 |             :param pc: program counter of the first instruction in the bytecode(optional)
151 |             :return: An generator of Instruction objects
152 | 
153 |             Example use::
154 | 
155 |                 >>> evm.EVMAsm.assemble_one("""PUSH1 0x60\n \
156 |                             PUSH1 0x40\n \
157 |                             MSTORE\n \
158 |                             PUSH1 0x2\n \
159 |                             PUSH2 0x108\n \
160 |                             PUSH1 0x0\n \
161 |                             POP\n \
162 |                             SSTORE\n \
163 |                             PUSH1 0x40\n \
164 |                             MLOAD\n \
165 |                             """)
166 | 
167 |         '''
168 |         instructions = pyevmasm.assemble_all(assembler, pc, fork)
169 |         return EVMAsm.convert_multiple_instructions_to_evminstructions(instructions)
170 | 
171 |     @staticmethod
172 |     def disassemble_one(bytecode: Iterable, pc: int = 0, fork=pyevmasm.DEFAULT_FORK) -> EVMInstruction:
173 |         ''' Decode a single instruction from a bytecode
174 | 
175 |             :param bytecode: the bytecode stream
176 |             :param pc: program counter of the instruction in the bytecode(optional)
177 |             :type bytecode: iterator/sequence/str
178 |             :return: an Instruction object
179 | 
180 |             Example use::
181 | 
182 |                 >>> print EVMAsm.assemble_one('PUSH1 0x10')
183 | 
184 |         '''
185 |         instruction = pyevmasm.disassemble_one(bytecode, pc, fork)
186 |         return EVMAsm.convert_instruction_to_evminstruction(instruction)
187 | 
188 |     @staticmethod
189 |     def disassemble_all(bytecode: Iterable, pc: int = 0, fork=pyevmasm.DEFAULT_FORK) -> Iterable[EVMInstruction]:
190 |         ''' Decode all instructions in bytecode
191 | 
192 |             :param bytecode: an evm bytecode (binary)
193 |             :param pc: program counter of the first instruction in the bytecode(optional)
194 |             :type bytecode: iterator/sequence/str
195 |             :return: An generator of Instruction objects
196 | 
197 |             Example use::
198 | 
199 |                 >>> for inst in EVMAsm.decode_all(bytecode):
200 |                 ...    print inst
201 | 
202 |                 ...
203 |                 PUSH1 0x60
204 |                 PUSH1 0x40
205 |                 MSTORE
206 |                 PUSH1 0x2
207 |                 PUSH2 0x108
208 |                 PUSH1 0x0
209 |                 POP
210 |                 SSTORE
211 |                 PUSH1 0x40
212 |                 MLOAD
213 | 
214 | 
215 |         '''
216 |         instructions = pyevmasm.disassemble_all(bytecode, pc, fork)
217 |         return EVMAsm.convert_multiple_instructions_to_evminstructions(instructions)
218 | 
219 |     @staticmethod
220 |     def disassemble(bytecode: Iterable, pc: int = 0, fork=pyevmasm.DEFAULT_FORK) -> str:
221 |         ''' Disassemble an EVM bytecode
222 | 
223 |             :param bytecode: binary representation of an evm bytecode (hexadecimal)
224 |             :param pc: program counter of the first instruction in the bytecode(optional)
225 |             :type bytecode: str
226 |             :return: the text representation of the assembler code
227 | 
228 |             Example use::
229 | 
230 |                 >>> EVMAsm.disassemble("\x60\x60\x60\x40\x52\x60\x02\x61\x01\x00")
231 |                 ...
232 |                 PUSH1 0x60
233 |                 BLOCKHASH
234 |                 MSTORE
235 |                 PUSH1 0x2
236 |                 PUSH2 0x100
237 | 
238 |         '''
239 |         return pyevmasm.disassemble(bytecode, pc, fork)
240 | 
241 |     @staticmethod
242 |     def assemble(asmcode, pc=0, fork=pyevmasm.DEFAULT_FORK):
243 |         return pyevmasm.assemble(asmcode, pc, fork)
244 | 
245 |     @staticmethod
246 |     def disassemble_hex(bytecode: str, pc: int = 0, fork=pyevmasm.DEFAULT_FORK) -> str:
247 |         ''' Disassemble an EVM bytecode
248 | 
249 |             :param bytecode: canonical representation of an evm bytecode (hexadecimal)
250 |             :param int pc: program counter of the first instruction in the bytecode(optional)
251 |             :type bytecode: str
252 |             :return: the text representation of the assembler code
253 | 
254 |             Example use::
255 | 
256 |                 >>> EVMAsm.disassemble_hex("0x6060604052600261010")
257 |                 ...
258 |                 PUSH1 0x60
259 |                 BLOCKHASH
260 |                 MSTORE
261 |                 PUSH1 0x2
262 |                 PUSH2 0x100
263 | 
264 |         '''
265 |         return pyevmasm.disassemble_hex(bytecode, pc, fork)
266 | 
267 |     @staticmethod
268 |     def assemble_hex(asmcode, pc=0, fork=pyevmasm.DEFAULT_FORK):
269 |         return pyevmasm.assemble_hex(asmcode, pc, fork)
270 | 


--------------------------------------------------------------------------------
/src/constraints.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import itertools
  3 | import logging
  4 | from collections import defaultdict
  5 | 
  6 | from z3 import z3, z3util
  7 | 
  8 | from src.evm.exceptions import IntractablePath
  9 | from src.evm.state import SymRead, concrete
 10 | from src.util.utils import big_endian_to_int, sha3
 11 | from src.util.z3_extra_util import get_vars_non_recursive, to_bytes, simplify_non_const_hashes
 12 | 
 13 | 
 14 | class UnresolvedConstraints(Exception):
 15 |     def __init__(self, unresolved):
 16 |         super(UnresolvedConstraints, self).__init__(unresolved)
 17 |         self.unresolved = unresolved
 18 | 
 19 | 
 20 | def array_to_array(model, array, length):
 21 |     return bytes([model.eval(array[i]).as_long() for i in range(length)])
 22 | 
 23 | 
 24 | def get_level(name):
 25 |     try:
 26 |         return int(name[name.rfind('_') + 1:])
 27 |     except:
 28 |         return 0
 29 | 
 30 | 
 31 | def model_to_calls(model, idx_dict):
 32 |     calls = defaultdict(dict)
 33 |     for vref in model:
 34 |         name = vref.name()
 35 |         v = model[vref]
 36 |         if name.split('_')[0] not in ('CALLDATASIZE', 'CALLDATA', 'CALLVALUE', 'CALLER', 'ORIGIN'):
 37 |             continue
 38 |         call_index = idx_dict[get_level(name)]
 39 |         call = calls[call_index]
 40 |         if name.startswith('CALLDATASIZE'):
 41 |             payload_size = model.eval(v).as_long()
 42 |             call['payload_size'] = payload_size
 43 |         elif name.startswith('CALLDATA'):
 44 |             call['payload_model'] = v
 45 |         elif name.startswith('CALLVALUE'):
 46 |             call['value'] = model.eval(v).as_long()
 47 |         elif name.startswith('CALLER'):
 48 |             call['caller'] = model.eval(v).as_long()
 49 |         elif name.startswith('ORIGIN'):
 50 |             call['origin'] = model.eval(v).as_long()
 51 |         else:
 52 |             logging.warning('CANNOT CONVERT %s', name)
 53 | 
 54 |     for call in calls.values():
 55 |         if 'payload_model' not in call:
 56 |             call['payload'] = bytes()
 57 |         else:
 58 |             assert 'payload_size' in call
 59 |             call['payload'] = array_to_array(model, call['payload_model'], call['payload_size'])
 60 |         call.pop('payload_size', None)
 61 |         call.pop('payload_model', None)
 62 | 
 63 |     return [v for k, v in sorted(calls.items())]
 64 | 
 65 | 
 66 | # MAX_SYM_READ_SIZE = 512
 67 | MAX_SYM_READ_SIZE = 256
 68 | 
 69 | 
 70 | def symread_eq(a, b, size=MAX_SYM_READ_SIZE):
 71 |     if not isinstance(a, SymRead) and not isinstance(b, SymRead):
 72 |         if a.size() != b.size():
 73 |             return z3.BoolVal(False)
 74 |         else:
 75 |             return a == b
 76 |     elif isinstance(a, SymRead) and isinstance(b, SymRead):
 77 |         # both have symbolic size
 78 |         return z3.And(a.size == b.size,
 79 |                       *(z3.If(z3.ULT(i, a.size), a.memory[a.start + i] == b.memory[b.start + i], True) for i in
 80 |                         range(size)))
 81 |     else:
 82 |         if isinstance(b, SymRead):
 83 |             # ensure that a is the one with symbolic size
 84 |             a, b = b, a
 85 |         return z3.And(a.size == (b.size() // 8), z3.Concat(*a.memory.read(a.start, b.size() // 8)) == b)
 86 | 
 87 | 
 88 | def symread_neq(a, b, size=MAX_SYM_READ_SIZE):
 89 |     return z3.Not(symread_eq(a, b, size))
 90 | 
 91 | 
 92 | def symread_substitute(x, subst):
 93 |     if not isinstance(x, SymRead):
 94 |         return z3.simplify(z3.substitute(x, subst))
 95 |     else:
 96 |         new_symread = copy.copy(x)
 97 |         new_symread.memory.memory = z3.simplify(z3.substitute(new_symread.memory.memory, subst))
 98 |         if not concrete(new_symread.start):
 99 |             new_symread.start = z3.simplify(z3.substitute(new_symread.start, subst))
100 |         if not concrete(new_symread.size):
101 |             new_symread.size = z3.simplify(z3.substitute(new_symread.size, subst))
102 |         return new_symread
103 | 
104 | 
105 | def check_model_and_resolve(constraints, sha_constraints):
106 |     try:
107 |         return check_model_and_resolve_inner(constraints, sha_constraints)
108 |     except UnresolvedConstraints:
109 |         sha_ids = {sha.get_id() for sha in sha_constraints.keys()}
110 |         constraints = [simplify_non_const_hashes(c, sha_ids) for c in constraints]
111 |         return check_model_and_resolve_inner(constraints, sha_constraints, second_try=True)
112 | 
113 | def check_model_and_resolve_inner(constraints, sha_constraints, second_try=False):    
114 |     # logging.debug('-' * 32)
115 |     extra_constraints = []
116 |     s = z3.SolverFor("QF_ABV")
117 |     s.add(constraints)        
118 |     if s.check() != z3.sat:                
119 |         raise IntractablePath("CHECK", "MODEL")
120 |     else:                
121 |         if not sha_constraints:
122 |             return s.model()
123 |     while True:       
124 |         ne_constraints = []
125 |         for a, b in itertools.combinations(sha_constraints.keys(), 2):
126 |             if (not isinstance(sha_constraints[a], SymRead) and not isinstance(sha_constraints[b], SymRead) and
127 |                     sha_constraints[a].size() != sha_constraints[b].size()):
128 |                 ne_constraints.append(a != b)
129 |                 continue
130 |             s = z3.SolverFor("QF_ABV")
131 |             s.add(constraints + ne_constraints + extra_constraints + [a != b, symread_neq(sha_constraints[a],
132 |                                                                                           sha_constraints[b])])
133 |             check_result = s.check()
134 |             # logging.debug("Checking hashes %s and %s: %s", a, b, check_result)
135 |             if check_result == z3.unsat:
136 |                 # logging.debug("Hashes MUST be equal: %s and %s", a, b)
137 |                 subst = [(a, b)]
138 |                 extra_constraints = [z3.simplify(z3.substitute(c, subst)) for c in extra_constraints]
139 |                 extra_constraints.append(symread_eq(symread_substitute(sha_constraints[a], subst),
140 |                                                     symread_substitute(sha_constraints[b], subst)))
141 |                 constraints = [z3.simplify(z3.substitute(c, subst)) for c in constraints]
142 |                 b_val = symread_substitute(sha_constraints[b], subst)
143 |                 sha_constraints = {z3.substitute(sha, subst): symread_substitute(sha_value, subst) for
144 |                                    sha, sha_value in
145 |                                    sha_constraints.items() if not sha is a or sha is b}
146 |                 sha_constraints[b] = b_val
147 |                 break
148 |             else:
149 |                 # logging.debug("Hashes COULD be equal: %s and %s", a, b)
150 |                 pass
151 |         else:
152 |             break
153 | 
154 |     return check_and_model(constraints + extra_constraints, sha_constraints, ne_constraints, second_try=second_try)
155 | 
156 | 
157 | def check_and_model(constraints, sha_constraints, ne_constraints, second_try=False):
158 |     # logging.debug(' ' * 16 + '-' * 16)
159 | 
160 |     unresolved = set(sha_constraints.keys())
161 |     sol = z3.SolverFor("QF_ABV")
162 |     sol.add(ne_constraints)
163 |     todo = constraints
164 |     progress = True
165 |     all_vars = dict()
166 |     while progress:
167 |         new_todo = []
168 |         progress = False
169 |         for c in todo:
170 |             all_vars[c] = get_vars_non_recursive(c, include_select=True, include_indices=False)
171 |             if any(x in unresolved for x in all_vars[c]):
172 |                 new_todo.append(c)
173 |             else:
174 |                 progress = True
175 |                 sol.add(c)
176 |         unresolved_vars = set(v.get_id() for c in new_todo for v in all_vars[c]) | set(v.get_id() for v in unresolved)
177 |         # logging.debug("Unresolved vars: %s", ','.join(map(str, unresolved_vars)))
178 |         if sol.check() != z3.sat:
179 |             raise IntractablePath()
180 |         m = sol.model()
181 |         unresolved_todo = list(set(unresolved))
182 |         while unresolved_todo:
183 |             u = unresolved_todo.pop()
184 |             c = sha_constraints[u]
185 |             if isinstance(c, SymRead):
186 |                 vars = set()
187 |                 if not concrete(c.start):
188 |                     vars |= get_vars_non_recursive(c.start, include_select=True)
189 |                 if not concrete(c.size):
190 |                     vars |= get_vars_non_recursive(c.size, include_select=True)
191 |                 # logging.debug("Trying to resolve %s, start and size vars: %s", u, ','.join(map(str, vars)))
192 |                 if any(x.get_id() in unresolved_vars for x in vars):
193 |                     continue
194 |                 start = c.start
195 |                 if not concrete(c.start):
196 |                     tmp = m.eval(c.start)
197 |                     if not z3util.is_expr_val(tmp):
198 |                         continue
199 |                     start = tmp.as_long()
200 |                     sol.add(c.start == start)
201 |                 size = c.size
202 |                 if not concrete(c.size):
203 |                     tmp = m.eval(c.size)
204 |                     if not z3util.is_expr_val(tmp):
205 |                         continue
206 |                     size = tmp.as_long()
207 |                     sol.add(c.size == size)
208 | 
209 |                 data = c.memory.read(start, size)
210 |                 if isinstance(data, list):
211 |                     if len(data) > 1:
212 |                         data = z3.Concat(*data)
213 |                     elif len(data) == 1:
214 |                         data = data[0]
215 |                     else:
216 |                         raise IntractablePath()
217 |                 sha_constraints = dict(sha_constraints)
218 |                 sha_constraints[u] = data
219 |                 unresolved_todo.append(u)
220 |             else:
221 |                 vars = get_vars_non_recursive(c, include_select=True)
222 |                 # logging.debug("Trying to resolve %s, vars: %s", u, ','.join(map(str, vars)))
223 |                 if any(x.get_id() in unresolved_vars for x in vars):
224 |                     continue
225 |                 v = m.eval(c)
226 |                 if z3util.is_expr_val(v):
227 |                     sha = big_endian_to_int(sha3(to_bytes(v)))
228 |                     sol.add(c == v)
229 |                     sol.add(u == sha)
230 |                     unresolved.remove(u)
231 |                     progress = True
232 |         todo = new_todo
233 |     if sol.check() != z3.sat:
234 |         raise IntractablePath()
235 |     if todo:
236 |         if second_try:
237 |             raise IntractablePath()
238 |         raise UnresolvedConstraints(unresolved)
239 |     return sol.model()
240 | 
241 | 
242 | def dependency_summary(constraints, sha_constraints, detailed=False):
243 |     all_dependencies = set(x for c in constraints if z3.is_expr(c) for x in
244 |                            get_vars_non_recursive(z3.simplify(c), include_select=detailed))
245 |     changed = True
246 |     while changed:
247 |         changed = False
248 |         for x in set(all_dependencies):
249 |             if x in sha_constraints:
250 |                 changed = True
251 |                 all_dependencies.discard(x)
252 |                 all_dependencies.update(
253 |                     get_vars_non_recursive(z3.simplify(sha_constraints[x], include_select=detailed)))
254 |     return all_dependencies
255 | 


--------------------------------------------------------------------------------
/src/evm/__init__.py:
--------------------------------------------------------------------------------
1 | from . import evm
2 | from . import exceptions
3 | from . import results
4 | from . import state
5 | 


--------------------------------------------------------------------------------
/src/evm/exceptions.py:
--------------------------------------------------------------------------------
 1 | class ExternalData(Exception):
 2 |     pass
 3 | 
 4 | 
 5 | class SymbolicError(Exception):
 6 |     pass
 7 | 
 8 | 
 9 | class IntractablePath(Exception):
10 |     def __init__(self, trace=[], remainingpath=[]):
11 |         self.trace = tuple(trace)
12 |         self.remainingpath = tuple(remainingpath)
13 | 
14 | 
15 | class VMException(Exception):
16 |     pass
17 | 
18 | class TimeoutException(Exception):
19 |     pass
20 | 


--------------------------------------------------------------------------------
/src/evm/results.py:
--------------------------------------------------------------------------------
  1 | import itertools
  2 | 
  3 | from z3 import z3
  4 | 
  5 | from src.evm.state import SymRead, LazySubstituteState, translate
  6 | from src.util.z3_extra_util import get_vars_non_recursive, concrete, ast_eq
  7 | 
  8 | 
  9 | class SymbolicResult(object):
 10 |     def __init__(self, xid, state, constraints, sha_constraints, target_op, pib):
 11 |         self.xid = xid
 12 |         self.state = state
 13 |         self.constraints = constraints
 14 |         self.sha_constraints = sha_constraints
 15 |         self.target_op = target_op
 16 |         self.calls = 1
 17 |         self._simplified = False
 18 |         self.storage_info = StorageInfo(self)
 19 |         self.possible_intended_behavior=pib
 20 | 
 21 |     def simplify(self):
 22 |         if self._simplified:
 23 |             return
 24 |         self.constraints = [z3.simplify(c) for c in self.constraints]
 25 |         self.sha_constraints = {sha: z3.simplify(sha_value) if not isinstance(sha_value, SymRead) else sha_value for
 26 |                                 sha, sha_value in self.sha_constraints.items()}
 27 |         self._simplified = True
 28 | 
 29 |     def copy(self):
 30 |         new_xid = gen_exec_id()
 31 | 
 32 |         self.simplify()
 33 | 
 34 |         new_constraints = [translate(c, new_xid) for c in self.constraints]
 35 |         new_sha_constraints = {translate(sha, new_xid): translate(sha_value, new_xid) if not isinstance(sha_value,
 36 |                                                                                                         SymRead) else sha_value.translate(
 37 |             new_xid) for sha, sha_value in
 38 |                                self.sha_constraints.items()}
 39 |         new_state = self.state.copy(new_xid)
 40 | 
 41 |         return SymbolicResult(new_xid, new_state, new_constraints, new_sha_constraints, self.target_op)
 42 | 
 43 |     def may_read_from(self, other):
 44 |         return self.storage_info.may_read_from(other.storage_info)
 45 | 
 46 | 
 47 | class CombinedSymbolicResult(object):
 48 |     def __init__(self):
 49 |         self.results = []
 50 |         self._constraints = None
 51 |         self._sha_constraints = None
 52 |         self._states = None
 53 |         self._idx_dict = None
 54 |         self.calls = 0
 55 | 
 56 |     def _reset(self):
 57 |         self._constraints = None
 58 |         self._sha_constraints = None
 59 |         self._states = None
 60 | 
 61 |     def combine(self, storage=dict(), initial_balance=None):
 62 |         extra_subst = []
 63 | 
 64 |         storage_base = z3.K(z3.BitVecSort(256), z3.BitVecVal(0, 256))
 65 |         for k, v in storage.items():
 66 |             storage_base = z3.Store(storage_base, k, v)
 67 |         for result in self.results:
 68 |             extra_subst.append((result.state.storage.base, storage_base))
 69 |             storage_base = z3.substitute(result.state.storage.storage, extra_subst)
 70 | 
 71 |         extra_constraints = []
 72 |         if initial_balance is not None:
 73 |             balance_base = z3.BitVecVal(initial_balance, 256)
 74 |         else:
 75 |             balance_base = None
 76 |         for result in self.results:
 77 |             if balance_base is not None:
 78 |                 extra_subst.append((result.state.start_balance, balance_base))
 79 |                 balance_base = z3.substitute(result.state.balance, extra_subst)
 80 |             else:
 81 |                 balance_base = result.state.balance
 82 | 
 83 |         self._states = [LazySubstituteState(r.state, extra_subst) for r in self.results]
 84 |         self._constraints = [z3.substitute(c, extra_subst) for r in self.results for c in
 85 |                              r.constraints] + extra_constraints
 86 |         self._sha_constraints = {
 87 |             sha: z3.substitute(sha_value, extra_subst) if not isinstance(sha_value, SymRead) else sha_value for r in
 88 |             self.results for sha, sha_value in r.sha_constraints.items()}
 89 | 
 90 |         self._idx_dict = {r.xid: i for i, r in enumerate(self.results)}
 91 | 
 92 |     def prepend(self, result):
 93 |         self.calls += 1
 94 |         self.results = [result] + self.results
 95 |         self._reset()
 96 | 
 97 |     @property
 98 |     def idx_dict(self):
 99 |         if self._idx_dict is None:
100 |             self.combine()
101 |         return self._idx_dict
102 | 
103 |     @property
104 |     def constraints(self):
105 |         if self._constraints is None:
106 |             self.combine()
107 |         return self._constraints
108 | 
109 |     @property
110 |     def sha_constraints(self):
111 |         if self._sha_constraints is None:
112 |             self.combine()
113 |         return self._sha_constraints
114 | 
115 |     @property
116 |     def states(self):
117 |         if not self._states:
118 |             self.combine()
119 |         return self._states
120 | 
121 |     @property
122 |     def state(self):
123 |         return self.states[-1]
124 | 
125 |     def simplify(self):
126 |         self._constraints = [z3.simplify(c) for c in self.constraints]
127 |         self._sha_constraints = {sha: (z3.simplify(sha_value) if not isinstance(sha_value, SymRead) else sha_value) for
128 |                                  sha, sha_value in self.sha_constraints.items()}
129 | 
130 | 
131 | class StorageInfo(object):
132 |     def __init__(self, result):
133 |         self.result = result
134 |         self._vars = dict()
135 |         self.concrete_reads = set()
136 |         self.concrete_writes = set()
137 |         self.symbolic_reads = set()
138 |         self.symbolic_writes = set()
139 |         self.symbolic_hash_reads = set()
140 |         self.symbolic_hash_writes = set()
141 |         for addr in set(result.state.storage.reads):
142 |             if concrete(addr):
143 |                 self.concrete_reads.add(addr)
144 |             else:
145 |                 x_vars = get_vars_non_recursive(addr, True)
146 |                 self._vars[addr] = x_vars
147 |                 if set(x_vars) & set(result.sha_constraints.keys()):
148 |                     self.symbolic_hash_reads.add(addr)
149 |                 else:
150 |                     self.symbolic_reads.add(addr)
151 |         for addr in set(result.state.storage.writes):
152 |             if concrete(addr):
153 |                 self.concrete_writes.add(addr)
154 |             else:
155 |                 x_vars = get_vars_non_recursive(addr, True)
156 |                 self._vars[addr] = x_vars
157 |                 if set(x_vars) & set(result.sha_constraints.keys()):
158 |                     self.symbolic_hash_writes.add(addr)
159 |                 else:
160 |                     self.symbolic_writes.add(addr)
161 | 
162 |     def may_read_from(self, other):
163 |         if not self.symbolic_reads and not other.symbolic_writes:
164 |             # no side has a non-hash-based symbolic access
165 |             # => only concrete accesses can intersect
166 |             # (or hash-based accesses, which we will check later)
167 |             if self.concrete_reads & other.concrete_writes:
168 |                 return True
169 |         else:
170 |             # at least one side has a non-hash-based symbolic access
171 |             # => if there is at least one concrete or symbolic access
172 |             # on the other side, the two could be equal
173 |             # (otherwise we have to look at hash-based accesses, see below)
174 |             if ((self.symbolic_reads or self.concrete_reads or self.symbolic_hash_reads) and
175 |                     (other.symbolic_writes or other.concrete_writes or other.symbolic_hash_writes)):
176 |                 return True
177 | 
178 |         if self.symbolic_hash_reads and other.symbolic_hash_writes:
179 |             for a, b in itertools.product(self.symbolic_hash_reads, other.symbolic_hash_writes):
180 |                 if not ast_eq(a, b):
181 |                     continue
182 |                 hash_a = list(self._vars[a] & set(self.result.sha_constraints.keys()))
183 |                 hash_b = list(other._vars[b] & set(other.result.sha_constraints.keys()))
184 |                 if len(hash_a) != 1 or len(hash_b) != 1:
185 |                     # multiple hashes on either side
186 |                     # => assume they could be equal
187 |                     return True
188 |                 # only one hash on either side
189 |                 # => check whether these two can actually be equal
190 |                 d_a = self.result.sha_constraints[hash_a[0]]
191 |                 d_b = other.result.sha_constraints[hash_b[0]]
192 |                 if isinstance(d_a, SymRead) or isinstance(d_b, SymRead):
193 |                     return True
194 |                 if d_a.size() == d_b.size():
195 |                     return True
196 | 
197 |         # at this point, we have checked every possible combination
198 |         # => no luck this time
199 |         return False
200 | 
201 | 
202 | def gen_exec_id():
203 |     if "xid" not in gen_exec_id.__dict__:
204 |         gen_exec_id.xid = 0
205 |     else:
206 |         gen_exec_id.xid += 1
207 |     return gen_exec_id.xid
208 | 


--------------------------------------------------------------------------------
/src/evm/state.py:
--------------------------------------------------------------------------------
  1 | from z3 import z3
  2 | 
  3 | from src.evm.exceptions import SymbolicError
  4 | from src.memory import UninitializedRead
  5 | from src.util.z3_extra_util import concrete, get_vars_non_recursive
  6 | 
  7 | 
  8 | class Stack(list):
  9 |     def __init__(self, *args):
 10 |         super(Stack, self).__init__(*args)
 11 | 
 12 |     def push(self, v):
 13 |         self.append(v)
 14 | 
 15 |     def append(self, v):
 16 |         if concrete(v):
 17 |             v %= 2 ** 256
 18 |         super(Stack, self).append(v)
 19 | 
 20 | 
 21 | class Memory(object):
 22 |     def __init__(self, *args):
 23 |         self.memory = bytearray(*args)
 24 |         self._check_initialized = False
 25 |         self.initialized = set()
 26 | 
 27 |     def __getitem__(self, index):
 28 |         if isinstance(index, slice):
 29 |             initialized = all(i in self.initialized for i in range(index.start or 0, index.stop, index.step or 1))
 30 |         else:
 31 |             initialized = index in self.initialized
 32 |         if not self._check_initialized or initialized:
 33 |             return self.memory[index]
 34 |         else:
 35 |             raise UninitializedRead(index)
 36 | 
 37 |     def __setitem__(self, index, v):
 38 |         if isinstance(index, slice):
 39 |             for i in range(index.start or 0, index.stop, index.step or 1):
 40 |                 self.initialized.add(i)
 41 |         else:
 42 |             self.initialized.add(index)
 43 |         self.memory[index] = v
 44 | 
 45 |     def set_enforcing(self, enforcing=True):
 46 |         self._check_initialized = enforcing
 47 | 
 48 |     def extend(self, start, size):
 49 |         if len(self.memory) < start + size:
 50 |             self.memory += bytearray(start + size - len(self.memory))
 51 | 
 52 |     def __len__(self):
 53 |         return len(self.memory)
 54 | 
 55 | 
 56 | class SymbolicMemory(object):
 57 |     MAX_SYMBOLIC_WRITE_SIZE = 256
 58 | 
 59 |     def __init__(self):
 60 |         self.memory = z3.K(z3.BitVecSort(256), z3.BitVecVal(0, 8))
 61 |         self.write_count = 0
 62 |         self.read_count = 0
 63 | 
 64 |     def __getitem__(self, index):
 65 |         if isinstance(index, slice):
 66 |             if index.stop is None:
 67 |                 raise ValueError("Need upper memory address!")
 68 |             if (index.start is not None and not concrete(index.start)) or not concrete(index.stop):
 69 |                 raise SymbolicError("Use mem.read for symbolic range reads")
 70 |             r = []
 71 |             for i in range(index.start or 0, index.stop, index.step or 1):
 72 |                 r.append(self[i])
 73 |             return r
 74 |         else:
 75 |             self.read_count += 1
 76 |             v = z3.simplify(self.memory[index])
 77 |             if z3.is_bv_value(v):
 78 |                 return v.as_long()
 79 |             else:
 80 |                 return v
 81 | 
 82 |     def __setitem__(self, index, v):
 83 |         if isinstance(index, slice):
 84 |             if index.stop is None:
 85 |                 raise ValueError("Need upper memory address!")
 86 |             if (index.start is not None and not concrete(index.start)) or not concrete(index.stop):
 87 |                 raise SymbolicError("Use mem.write for symbolic range writes")
 88 |             for j, i in enumerate(range(index.start or 0, index.stop, index.step or 1)):
 89 |                 self[i] = v[j]
 90 |         else:
 91 |             self.write_count += 1
 92 |             if isinstance(v, str):
 93 |                 v = ord(v)
 94 | 
 95 |             if concrete(v):
 96 |                 old_v = self[index]
 97 |                 if not concrete(old_v) or old_v != v:
 98 |                     self.memory = z3.Store(self.memory, index, v)
 99 |             else:
100 |                 self.memory = z3.Store(self.memory, index, v)
101 | 
102 |     def read(self, start, size):
103 |         if concrete(start) and concrete(size):
104 |             return self[start:start + size]
105 |         elif concrete(size):
106 |             return [self[start + i] for i in range(size)]
107 |         else:
108 |             sym_mem = SymbolicMemory()
109 |             sym_mem.memory = self.memory
110 |             return SymRead(sym_mem, start, size)
111 |             # raise SymbolicError("Read of symbolic length")
112 | 
113 |     def copy(self, istart, ilen, ostart, olen):
114 |         if concrete(ilen) and concrete(olen):
115 |             self.write(ostart, olen, self.read(istart, min(ilen, olen)) + [0] * max(olen - ilen, 0))
116 |         elif concrete(olen):
117 |             self.write(ostart, olen, [z3.If(i < ilen, self[istart + i], 0) for i in range(olen)])
118 |         else:
119 |             self.write(ostart, SymbolicMemory.MAX_SYMBOLIC_WRITE_SIZE,
120 |                        [z3.If(i < olen, z3.If(i < ilen, self[istart + i], 0), self[ostart + i]) for i in
121 |                         range(SymbolicMemory.MAX_SYMBOLIC_WRITE_SIZE)])
122 | 
123 |     def write(self, start, size, val):
124 |         if not concrete(size):
125 |             raise SymbolicError("Write of symbolic length")
126 |         if len(val) != size:
127 |             raise ValueError("value does not match length")
128 |         if concrete(start) and concrete(size):
129 |             self[start:start + size] = val
130 |         else:  # by now we know that size is concrete
131 |             for i in range(size):
132 |                 self[start + i] = val[i]
133 | 
134 |     def set_enforcing(self, enforcing=True):
135 |         pass
136 | 
137 |     def extend(self, start, size):
138 |         pass
139 | 
140 | 
141 | class SymRead(object):
142 |     def __init__(self, memory, start, size):
143 |         self.memory = memory
144 |         self.start = start
145 |         if not concrete(start):
146 |             self.start = z3.simplify(self.start)
147 |         self.size = size
148 |         if not concrete(size):
149 |             self.size = z3.simplify(self.size)
150 | 
151 |     def translate(self, new_xid):
152 |         sym_mem_mem = translate(self.memory.memory, new_xid)
153 |         sym_mem = SymbolicMemory()
154 |         sym_mem.memory = sym_mem_mem
155 |         new_symread = SymRead(sym_mem, 0, 0)
156 |         new_symread.start = self.start if concrete(self.start) else translate(self.start, new_xid)
157 |         new_symread.size = self.size if concrete(self.size) else translate(self.size, new_xid)
158 |         return new_symread
159 | 
160 | 
161 | class SymbolicStorage(object):
162 |     def __init__(self, xid):
163 |         self.base = z3.Array('STORAGE_%d' % xid, z3.BitVecSort(256), z3.BitVecSort(256))
164 |         self.storage = self.base
165 |         self.accesses = list()
166 | 
167 |     def __getitem__(self, index):
168 |         self.accesses.append(('read', index if concrete(index) else z3.simplify(index)))
169 |         return self.storage[index]
170 | 
171 |     def __setitem__(self, index, v):
172 |         self.accesses.append(('write', index if concrete(index) else z3.simplify(index)))
173 |         self.storage = z3.Store(self.storage, index, v)
174 | 
175 |     @property
176 |     def reads(self):
177 |         return [a for t, a in self.accesses if t == 'read']
178 | 
179 |     @property
180 |     def writes(self):
181 |         return [a for t, a in self.accesses if t == 'write']
182 | 
183 |     @property
184 |     def all(self):
185 |         return [a for t, a in self.accesses]
186 | 
187 |     def copy(self, new_xid):
188 |         new_storage = SymbolicStorage(new_xid)
189 |         new_storage.base = translate(self.base, new_xid)
190 |         new_storage.storage = translate(self.storage, new_xid)
191 |         new_storage.accesses = [(t, a if concrete(a) else translate(a, new_xid)) for t, a in self.accesses]
192 |         return new_storage
193 | 
194 | 
195 | class AbstractEVMState(object):
196 |     def __init__(self, code=None):
197 |         self.code = code or bytearray()
198 |         self.pc = 0
199 |         self.stack = Stack()
200 |         self.memory = None
201 |         self.trace = list()
202 |         self.gas = None
203 | 
204 | class EVMState(AbstractEVMState):
205 |     def __init__(self, code=None, gas=0):
206 |         super(EVMState, self).__init__(code)
207 |         self.memory = Memory()
208 |         self.gas = gas
209 | 
210 | 
211 | class SymbolicEVMState(AbstractEVMState):
212 |     
213 |     def __init__(self, xid, code=None):
214 |         super(SymbolicEVMState, self).__init__(code)
215 |         self.memory = SymbolicMemory()
216 |         self.storage = SymbolicStorage(xid)
217 |         self.gas = z3.BitVec('GAS_%d' % xid, 256)
218 |         self.start_balance = z3.BitVec('BALANCE_%d' % xid, 256)
219 |         self.balance = self.start_balance
220 | 
221 |     def copy(self, new_xid):
222 |         # Make a superficial copy of this state.
223 |         # Effectively, only the storage is copied,
224 |         # as this is sufficient to prepend a
225 |         # result with this state to another call
226 |         new_storage = self.storage.copy(new_xid)
227 |         new_state = SymbolicEVMState(new_xid)
228 |         new_state.storage = new_storage
229 |         new_state.pc = self.pc
230 |         new_state.trace = list(self.trace)
231 |         new_state.start_balance = translate(self.start_balance, new_xid)
232 |         new_state.balance = translate(self.balance, new_xid)
233 |         return new_state
234 | 
235 | 
236 | class LazySubstituteState(object):
237 |     def __init__(self, state, substitutions):
238 |         self._state = state
239 |         self._substitutions = list(substitutions)
240 |         self.memory = LazySubstituteMemory(self._state.memory, substitutions)
241 |         self.stack = LazySubstituteStack(self._state.stack, substitutions)
242 |         self.code = self._state.code
243 |         self.pc = self._state.pc
244 |         self.trace = self._state.trace
245 |         self.balance = z3.substitute(state.balance, substitutions)
246 | 
247 | 
248 | class LazySubstituteMemory(object):
249 |     def __init__(self, memory, substitutions):
250 |         self._memory = memory
251 |         self._substitutions = substitutions
252 | 
253 |     def __getitem__(self, index):
254 |         raise NotImplemented()
255 | 
256 | 
257 | class LazySubstituteStack(object):
258 |     def __init__(self, stack, substitutions):
259 |         self._stack = stack
260 |         self._substitutions = substitutions
261 | 
262 |     def __getitem__(self, index):
263 |         r = self._stack[index]
264 |         if isinstance(index, slice):
265 |             return [x if concrete(x) else z3.substitute(x, self._substitutions) for x in r]
266 |         else:
267 |             return r if concrete(r) else z3.substitute(r, self._substitutions)
268 | 
269 | 
270 | def translate(expr, xid):
271 |     substitutions = dict()
272 | 
273 |     def raw(s):
274 |         return '_'.join(s.split('_')[:-1])
275 | 
276 |     for v in get_vars_non_recursive(expr):
277 |         if v not in substitutions:
278 |             v_name = raw(v.decl().name())
279 |             if v.sort_kind() == z3.Z3_INT_SORT:
280 |                 substitutions[v] = z3.Int('%s_%d' % (v_name, xid))
281 |             elif v.sort_kind() == z3.Z3_BOOL_SORT:
282 |                 substitutions[v] = z3.Bool('%s_%d' % (v_name, xid))
283 |             elif v.sort_kind() == z3.Z3_BV_SORT:
284 |                 substitutions[v] = z3.BitVec('%s_%d' % (v_name, xid), v.size())
285 |             elif v.sort_kind() == z3.Z3_ARRAY_SORT:
286 |                 substitutions[v] = z3.Array('%s_%d' % (v_name, xid), v.domain(), v.range())
287 |             else:
288 |                 raise Exception('CANNOT CONVERT %s (%d)' % (v, v.sort_kind()))
289 |     subst = list(substitutions.items())
290 |     return z3.substitute(expr, subst)
291 | 


--------------------------------------------------------------------------------
/src/exploit.py:
--------------------------------------------------------------------------------
  1 | import itertools
  2 | import logging
  3 | from collections import defaultdict
  4 | 
  5 | from z3 import z3
  6 | 
  7 | from src.cfg import opcodes
  8 | from src.constraints import check_model_and_resolve, model_to_calls
  9 | from src.evm.exceptions import IntractablePath
 10 | from src.evm.results import CombinedSymbolicResult
 11 | from src.util.z3_extra_util import concrete
 12 | 
 13 | 
 14 | class InfeasibleExploit(Exception):
 15 |     pass
 16 | 
 17 | 
 18 | class ExploitContext(object):
 19 |     def __init__(self, target_addr, shellcode_addr, target_amount, amount_check, initial_balance, initial_storage,
 20 |                  controlled_addrs=set()):
 21 |         self.target_addr = target_addr
 22 |         self.shellcode_addr = shellcode_addr
 23 |         self.target_amount = target_amount
 24 |         self.amount_check = amount_check
 25 |         self.initial_balance = initial_balance
 26 |         self.initial_storage = initial_storage
 27 | 
 28 |         # assume we control the target address
 29 |         self.controlled_addrs = controlled_addrs | {target_addr}
 30 | 
 31 | 
 32 | def exploit_constraints_call(r, ctx):
 33 |     addr = r.state.stack[-2]
 34 |     if not concrete(addr):
 35 |         addr = z3.simplify(addr)
 36 | 
 37 |     amount = r.state.stack[-3]
 38 |     if not concrete(amount):
 39 |         amount = z3.simplify(amount)
 40 | 
 41 |     extra_constraints = []
 42 | 
 43 |     if not concrete(addr):
 44 |         extra_constraints.append(z3.Extract(159, 0, addr) == ctx.target_addr)
 45 |     else:
 46 |         if addr != ctx.target_addr:
 47 |             raise InfeasibleExploit
 48 | 
 49 |     if not concrete(amount):
 50 |         if ctx.amount_check == '+':
 51 |             extra_constraints.append(z3.UGE(amount, ctx.target_amount))
 52 |         elif ctx.amount_check == '-':
 53 |             extra_constraints.append(z3.UGT(amount, 0))
 54 |             extra_constraints.append(z3.ULE(amount, ctx.target_amount))
 55 |         else:
 56 |             extra_constraints.append(amount == ctx.target_amount)
 57 |         final_balance = r.state.balance
 58 |         extra_constraints.append(z3.ULE(amount, final_balance))
 59 | 
 60 |     # ensure we're not spending more for this exploit than we gain
 61 |     total_spent = None
 62 |     for res in r.results:
 63 |         callvalue = z3.BitVec('CALLVALUE_%d' % res.xid, 256)
 64 |         extra_constraints.append(z3.ULE(callvalue, 10 * (10 ** 18)))  # keep it semi-reasonable: at most 10 Eth per call
 65 |         if total_spent is None:
 66 |             total_spent = callvalue
 67 |         else:
 68 |             total_spent += callvalue
 69 | 
 70 |     extra_constraints.append(z3.ULT(total_spent, amount))
 71 | 
 72 |     # also, ensure the contract does not require a unreasonable start-balance (>100 Eth)
 73 |     if not ctx.initial_balance:
 74 |         start_balance = z3.BitVec('BALANCE_%d' % r.results[0].xid, 256)
 75 |         extra_constraints.append(z3.ULE(start_balance, 100 * (10 ** 18)))
 76 | 
 77 |     return extra_constraints
 78 | 
 79 | 
 80 | def exploit_constraints_callcode(r, ctx):
 81 |     addr = z3.simplify(r.state.stack[-2])
 82 | 
 83 |     extra_constraints = []
 84 | 
 85 |     if not concrete(addr):
 86 |         extra_constraints.append(z3.Extract(159, 0, addr) == ctx.shellcode_addr)
 87 |     else:
 88 |         if addr != ctx.shellcode_addr:
 89 |             raise InfeasibleExploit
 90 | 
 91 |     return extra_constraints
 92 | 
 93 | 
 94 | def exploit_constraints_delegatecall(r, ctx):
 95 |     addr = z3.simplify(r.state.stack[-2])
 96 | 
 97 |     extra_constraints = []
 98 | 
 99 |     if not concrete(addr):
100 |         extra_constraints.append(z3.Extract(159, 0, addr) == ctx.shellcode_addr)
101 |     else:
102 |         if addr != ctx.shellcode_addr:
103 |             raise InfeasibleExploit
104 | 
105 |     return extra_constraints
106 | 
107 | 
108 | def exploit_constraints_selfdestruct(r, ctx):
109 |     addr = z3.simplify(r.state.stack[-1])
110 | 
111 |     extra_constraints = []
112 | 
113 |     if not concrete(addr):
114 |         extra_constraints.append(z3.Extract(159, 0, addr) == ctx.target_addr)
115 |     else:
116 |         if addr != ctx.target_addr:
117 |             raise InfeasibleExploit
118 | 
119 |     return extra_constraints
120 | 
121 | 
122 | EXPLOIT_CONSTRAINTS = {
123 |     'CALL': exploit_constraints_call,
124 |     'CALLCODE': exploit_constraints_callcode,
125 |     'DELEGATECALL': exploit_constraints_callcode,
126 |     'SELFDESTRUCT': exploit_constraints_selfdestruct
127 | }
128 | 
129 | 
130 | def get_exploit_constraints(r, ctx):
131 |     target_op = r.results[-1].target_op
132 |     if target_op in EXPLOIT_CONSTRAINTS:
133 |         return EXPLOIT_CONSTRAINTS[target_op](r, ctx)
134 |     else:
135 |         return []
136 | 
137 | def control_address_constraints(sym_addr, controlled_addrs):
138 |     sub_exprs = [sym_addr == controlled_addr for controlled_addr in controlled_addrs]
139 |     expr = sub_exprs[0]
140 |     for sub_expr in sub_exprs[1:]:
141 |         expr = z3.Or(expr, sub_expr)
142 |     return expr
143 | 
144 | def attempt_exploit(results, ctx):
145 |     c = CombinedSymbolicResult()
146 |     for r in results[::-1]:
147 |         c.prepend(r)
148 |     c.combine(ctx.initial_storage, ctx.initial_balance)
149 |     c.simplify()
150 |     extra_constraints = get_exploit_constraints(c, ctx)
151 | 
152 |     for res in c.results:
153 |         origin = z3.BitVec('ORIGIN_%d' % res.xid, 256)
154 |         caller = z3.BitVec('CALLER_%d' % res.xid, 256)
155 |         # ensure we control the origin
156 |         extra_constraints.append(control_address_constraints(origin, ctx.controlled_addrs))
157 |         # and ensure the caller is either the origin or the shellcode address
158 |         extra_constraints.append(control_address_constraints(caller, {origin, ctx.shellcode_addr}))
159 | 
160 |     try:
161 |         model = check_model_and_resolve(c.constraints + extra_constraints, c.sha_constraints)
162 | 
163 |         # enforce we control all ORIGIN-addresses
164 |         if any(model[v].as_long() not in ctx.controlled_addrs for v in model if v.name().startswith('ORIGIN')):
165 |             raise InfeasibleExploit
166 | 
167 |         return model_to_calls(model, c.idx_dict), c, model
168 |     except IntractablePath:
169 |         raise InfeasibleExploit
170 | 
171 | 
172 | def combined_exploit(p, target_addr, shellcode_addr, target_amount, amount_check='+', initial_storage=dict(),
173 |                      initial_balance=None,
174 |                      max_calls=3, controlled_addrs=set(), flags=None):
175 | 
176 |     flags = flags or set(opcodes.CRITICAL)
177 | 
178 |     ctx = ExploitContext(target_addr, shellcode_addr, target_amount, amount_check, initial_balance, initial_storage,
179 |                          controlled_addrs)
180 | 
181 |     sload_bbs = {ins.bb.start for ins in p.cfg.filter_ins('SLOAD')}    
182 |     critical_paths = []
183 |     
184 |     for op in opcodes.CRITICAL:
185 |         if op not in flags:
186 |             continue
187 |         ins = p.cfg.filter_ins(op)
188 |         if not ins:
189 |             logging.info('No %s instructions', op)
190 |             continue
191 |         logging.info('Found %d %s instructions', len(ins), op)        
192 |         for i, i_path, i_r in p.get_constraints(ins, opcodes.CRITICAL_ARGS[op]):
193 |             logging.info("%s: %s", op, i)
194 |             logging.info("Path: %s", '->'.join('%x' % p for p in i_path))
195 |             if set(i_path) & sload_bbs:
196 |                 # if there is a SLOAD on this path,
197 |                 # it might benefit from prepending a state-changing path later
198 |                 critical_paths.append(i_r)
199 |             try:
200 |                 return attempt_exploit([i_r], ctx)
201 |             except InfeasibleExploit:
202 |                 continue    
203 |     if not critical_paths:
204 |         logging.warning("No state-dependent critical path found, aborting")
205 |         return
206 | 
207 |     end_ins = p.cfg.filter_ins('RETURN') + p.cfg.filter_ins('STOP')
208 |     if not end_ins:
209 |         logging.info('No RETURN or STOP instructions')
210 |         return
211 |     logging.info('Found %d RETURN and STOP instructions', len(end_ins))
212 |     compatible = defaultdict(lambda: [[]])  # list of lists
213 |     state_changing_paths = []
214 |     for i, (end, end_path, state_changing_r) in enumerate(p.get_constraints(end_ins, find_sstore=True)):
215 |         logging.info("End: %s", end)
216 |         logging.info("Path: %s", '->'.join('%x' % p for p in end_path))
217 |         state_changing_paths.append(state_changing_r)
218 |         for j, critical_r in enumerate(critical_paths):
219 |             if not critical_r.may_read_from(state_changing_r):
220 |                 continue
221 |             compatible[j][0].append(i)
222 |             try:
223 |                 return attempt_exploit([state_changing_r, critical_r], ctx)
224 |             except InfeasibleExploit:
225 |                 continue
226 | 
227 |     logging.info('All ends: %s', state_changing_paths)
228 | 
229 |     storage_compatible = defaultdict(list)
230 |     for (i, a_r), (j, b_r) in itertools.product(enumerate(state_changing_paths), enumerate(state_changing_paths)):
231 |         if a_r.may_read_from(b_r):
232 |             storage_compatible[i].append(j)
233 | 
234 |     calls = [state_changing_paths]
235 |     while len(calls) < max_calls - 1:
236 |         new_ends = [r.copy() for r in state_changing_paths]
237 |         calls.append(new_ends)
238 |         for k, v in compatible.items():
239 |             new_compat = set()
240 |             for c in v[-1]:
241 |                 new_compat.update(storage_compatible[c])
242 |             v.append(sorted(new_compat))
243 |         for i, critical_r in enumerate(critical_paths):
244 |             for combo_ids in itertools.product(*compatible[i]):
245 |                 combo = [critical_r] + [c[j] for c, j in zip(calls, combo_ids)]
246 |                 try:
247 |                     return attempt_exploit(combo[::-1], ctx)
248 |                 except InfeasibleExploit:
249 |                     continue
250 | 
251 |     logging.info('Could not exploit any RETURN+CALL')
252 | 


--------------------------------------------------------------------------------
/src/explorer/__init__.py:
--------------------------------------------------------------------------------
1 | from . import backward
2 | from . import forward


--------------------------------------------------------------------------------
/src/explorer/backward.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from collections import defaultdict
  3 | from queue import PriorityQueue
  4 | from src.util.frontierset import FrontierSet
  5 | 
  6 | 
  7 | class BackwardExplorerState(object):
  8 |     def __init__(self, bb, gas, must_visit, cost, data):
  9 |         self.bb = bb
 10 |         self.gas = gas
 11 |         self.must_visit = must_visit.copy()
 12 |         self.data = data
 13 |         self.cost = cost
 14 | 
 15 |     def estimate(self):
 16 |         """
 17 |         Return an estimate of how quickly we can reach the root of the tree
 18 |         This estimate is the sum of the number of branches taken so far (self.cost) and the
 19 |         estimate given by the next BB to visit (self.bb.estimate)
 20 |         :return: estimated distance to root
 21 |         """
 22 |         if self.bb.estimate_constraints is None:
 23 |             return self.cost
 24 |         else:
 25 |             return self.cost + self.bb.estimate_constraints
 26 | 
 27 |     def rank(self):
 28 |         """
 29 |         Compute a rank for this state. Order by estimated root-distance first, solve ties by favoring less restricted states
 30 |         for caching efficiency
 31 |         :return:
 32 |         """
 33 |         return self.estimate(), len(self.must_visit)
 34 | 
 35 |     def __lt__(self, other):
 36 |         return self.rank() < other.rank()
 37 | 
 38 |     def __hash__(self):
 39 |         return sum(a * b for a, b in zip((23, 29, 31), (hash(self.bb), hash(self.must_visit), hash(self.data))))
 40 | 
 41 |     def __eq__(self, other):
 42 |         return self.bb == other.bb and self.must_visit == other.must_visit and self.data == other.data
 43 | 
 44 |     def __str__(self):
 45 |         return 'At: %x, Gas: %s, Must-Visit: %s, Data: %s, Hash: %x' % (
 46 |         self.bb.start, self.gas, self.must_visit, self.data, hash(self))
 47 | 
 48 | 
 49 | def generate_sucessors(state, new_data, update_data, predicate=lambda st, pred: True):
 50 |     new_todo = []
 51 |     if state.gas is None or state.gas > 0:
 52 |         # logging.debug('[tr] [gs] passed first if')
 53 |         new_gas = state.gas
 54 |         if state.gas and len(state.bb.pred) > 1:
 55 |             new_gas = state.gas - 1
 56 |         # logging.debug('[tr] [gs] Preds: %s', state.bb.pred)
 57 | 
 58 |         for p in state.bb.pred:
 59 |             if not predicate(state.data, p):
 60 |                 continue
 61 | 
 62 |             new_must_visits = []
 63 |             for path in state.bb.pred_paths[p]:
 64 |                 new_must_visit = state.must_visit.copy()
 65 |                 for a, b in zip(path[:-1], path[1:]):
 66 |                     new_must_visit.add(b, a)
 67 |                 if p.start in new_must_visit.frontier:
 68 |                     new_must_visit.remove(p.start)
 69 |                 if not new_must_visit.all.issubset(p.ancestors):
 70 |                     # logging.debug('[tr] [gs] Cannot reach any necessary states, aborting! Needed: %s, reachable: %s', new_must_visit, p.ancestors)
 71 |                     continue
 72 |                 new_must_visits.append(new_must_visit)
 73 | 
 74 |             new_cost = state.cost + (1 if p.branch else 0)
 75 | 
 76 |             for new_must_visit in minimize(new_must_visits):
 77 |                 new_todo.append(BackwardExplorerState(p, new_gas, new_must_visit, new_cost, update_data(new_data, p)))
 78 |     return new_todo
 79 | 
 80 | 
 81 | def traverse_back(start_ins, initial_gas, initial_data, advance_data, update_data, finish_path, must_visits=[],
 82 |                   predicate=lambda st, p: True):
 83 |     """
 84 |     :param start_ins: Starting instructions
 85 |     :param initial_gas: Starting "gas". Can be None, in which case it is unlimited
 86 |     :param initial_data: Starting data
 87 |     :param advance_data: method to advance data
 88 |     :param update_data: method to update data
 89 |     :param must_visits: FrontierSet describing the next nodes that *must* be visited
 90 |     :param predicate: A function (state, BB) -> Bool describing whether an edge should be taken or not
 91 |     :return: yields paths as they are explored one-by-one
 92 |     """
 93 |     todo = PriorityQueue()
 94 | 
 95 |     for ins in start_ins:
 96 |         # logging.debug('[tr] Starting traversal at %x', ins.addr)
 97 |         data = initial_data(ins)
 98 |         bb = ins.bb
 99 |         gas = initial_gas        
100 |         # keep tuples of (len(must_visit), state)
101 |         # this way, the least restricted state are preferred
102 |         # which should maximize caching efficiency
103 |         if not must_visits:
104 |             must_visits = [FrontierSet()]        
105 |         for must_visit in minimize(FrontierSet(mv) if mv is not FrontierSet else mv for mv in must_visits):                        
106 |             ts = BackwardExplorerState(bb, gas, must_visit, 0, data)            
107 |             todo.put(ts)            
108 |     cache = set()
109 |     ended_prematurely = defaultdict(int)    
110 |     while not todo.empty():        
111 |         state = todo.get()         
112 |         # if this BB can be reached via multiple paths, check if we want to cache it
113 |         # or whether another path already reached it with the same state               
114 |         if len(state.bb.succ) > 1:
115 |             if state in cache:
116 |                 # logging.debug('[tr] CACHE HIT')
117 |                 continue
118 |             cache.add(state)
119 |         # logging.debug('[tr] Cachesize: %d\t(slicing %x, currently at %x)', len(cache), ins.addr, state.bb.start)
120 |         # logging.debug('[tr] Current state: %s', state)
121 |         new_data = advance_data(state.data)
122 |         if finish_path(new_data):            
123 |             # logging.debug('[tr] finished path (%s)', new_data)
124 |             yield new_data
125 |         else:
126 |             if state.gas is not None and state.bb.estimate_back_branches is not None and (state.gas == 0 or state.gas < state.bb.estimate_back_branches):
127 |                 ended_prematurely[state.bb.start] += 1
128 |             else:
129 |                 logging.debug('[tr] continuing path (%s)', new_data)
130 |                 new_todo = generate_sucessors(state, new_data, update_data, predicate=predicate)
131 |                 for nt in new_todo:                    
132 |                     todo.put(nt)
133 |     total_ended = sum(ended_prematurely.values())
134 |     if total_ended:
135 |         logging.debug("%d paths that ended prematurely due to branches: %s", total_ended,
136 |                      ', '.join('%x: %d' % (k, v) for k, v in ended_prematurely.items()))
137 |     else:
138 |         logging.debug("Finished all paths")
139 | 
140 | 
141 | def minimize(must_visits):
142 |     todo = sorted(must_visits, key=len)
143 |     while todo:
144 |         must_visit = todo[0]
145 |         yield must_visit
146 |         todo = [mv for mv in todo[1:] if not must_visit.issubset(mv)]
147 | 


--------------------------------------------------------------------------------
/src/explorer/forward.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from queue import PriorityQueue
  3 | 
  4 | from src.util.utils import is_subseq, is_substr
  5 | 
  6 | 
  7 | class ForwardExplorerState(object):
  8 |     def __init__(self, bb, path=None, branches=None, slices=None):
  9 |         self.bb = bb
 10 |         self.path = list(path) + [bb.start] or []
 11 |         self.seen = set(self.path)
 12 |         self.branches = branches or 0
 13 |         self.slices = []
 14 |         self.finished = set()    
 15 |         #logging.info('Path %s', ' -> '.join('%x' % p for p in self.path))
 16 |         for slice in slices:
 17 |             last_pc = None            
 18 |             #print('%x' % self.bb.start)
 19 |             while slice and slice[0].bb.start == self.bb.start:
 20 |                 if last_pc is None or slice[0].addr > last_pc:
 21 |                     last_pc = slice[0].addr
 22 |                     if len(slice) == 1:
 23 |                         self.finished.add(last_pc)
 24 |                     slice = slice[1:]
 25 |                 else:
 26 |                     break
 27 |             self.slices.append(slice)
 28 |         
 29 | 
 30 |     def next_states(self):
 31 |         possible_succs = []
 32 |         for succ in self.bb.succ:          
 33 |             pths = succ.pred_paths[self.bb]           
 34 |             for pth in pths:                            
 35 |                 if not set(pth).issubset(self.seen):
 36 |                     continue
 37 |                 if not is_subseq(pth, self.path):
 38 |                     continue
 39 |                 break
 40 |             else:
 41 |                 continue
 42 |             possible_succs.append(succ)        
 43 |         next_states = []        
 44 |         branches = self.branches
 45 |         if len(possible_succs) > 1:
 46 |             branches += 1
 47 |         for succ in possible_succs:
 48 |             next_slices = tuple(
 49 |                 s for s in self.slices if set(i.bb.start for i in s).issubset(succ.descendants | {succ.start}))
 50 |             if next_slices:        
 51 |                 next_states.append(ForwardExplorerState(succ, self.path, branches, next_slices))
 52 |         return next_states
 53 | 
 54 |     def __lt__(self, other):
 55 |         return self.weight < other.weight
 56 | 
 57 | class ForwardExplorer(object):
 58 |     def __init__(self, cfg, avoid=frozenset()):
 59 |         self.dist_map = dict()
 60 |         self.cfg = cfg
 61 |         self.blacklist = set()
 62 | 
 63 |     def add_to_blacklist(self, path):
 64 |         self.blacklist.add(tuple(path))
 65 | 
 66 |     def weight(self, state):
 67 |         if state.finished:
 68 |             return state.branches
 69 |         else:
 70 |             return state.branches + min(self.dist_map[s[0].bb.start][state.bb] for s in state.slices)
 71 | 
 72 |     def find(self, slices, looplimit=2, avoid=frozenset(), prefix=None):    
 73 |         avoid = frozenset(avoid)
 74 |         slices = tuple(tuple(i for i in s if i.bb) for s in slices)       
 75 |         
 76 |         if not slices:            
 77 |             #raise StopIteration
 78 |             return 
 79 |         # distance from a BB to instruction
 80 |         for slice in slices:                        
 81 |             for i in slice:               
 82 |                 if i.bb.start not in self.dist_map:
 83 |                     self.dist_map[i.bb.start] = self.cfg.distance_map(i)
 84 |                 #print('%x' %i.bb.start)
 85 |                 #print(['%x' %d.start for d in self.dist_map[i.bb.start]])
 86 |        
 87 |         if prefix is None:
 88 |             state = ForwardExplorerState(self.cfg.root, [], 0, slices)                        
 89 |         else:
 90 |             state = ForwardExplorerState(self.cfg._ins_at[prefix].bb, prefix, 0, slices)
 91 |         state.weight = self.weight(state)
 92 |         
 93 |         todo = PriorityQueue()
 94 |         todo.put(state)
 95 |           
 96 |         while not todo.empty():                                                
 97 |             state = todo.get()            
 98 |             if any(is_substr(pth, state.path) for pth in self.blacklist):
 99 |                 logging.info("BLACKLIST hit for %s" % (', '.join('%x' % i for i in state.path)))
100 |                 continue            
101 |             if set(i.name for i in state.bb.ins) & avoid:                            
102 |                 continue
103 |             if state.finished:                
104 |                 for last_pc in state.finished:                                                        
105 |                     yield state.path + [last_pc]
106 |                 state.finished = set()
107 |                 state.slices = tuple(s for s in state.slices if s)
108 |                 if not state.slices:
109 |                     continue
110 |             if state.path.count(state.bb.start) > looplimit:                
111 |                 continue
112 |             for next_state in state.next_states():
113 |                 next_state.weight = self.weight(next_state)
114 |                 todo.put(next_state)
115 | 


--------------------------------------------------------------------------------
/src/flow/__init__.py:
--------------------------------------------------------------------------------
1 | from . import tainting
2 | from . import symbolic
3 | from . import analysis_results
4 | from . import code_info
5 | 


--------------------------------------------------------------------------------
/src/flow/analysis_results.py:
--------------------------------------------------------------------------------
 1 | 
 2 | class TainitAnalysisResult(object):
 3 |     def __init__(self, state, defect_type, target_sink, tainted, sources, sload_sha3_bases, sstore_sha3_bases, sstore_slots, slot_live_access, slot_access_trace, storage_slot_type):
 4 |         self.state = state    
 5 |         self.defect_type=defect_type       
 6 |         self.target_sink = target_sink            
 7 |         self._tainted = tainted
 8 |         self.sources = sources
 9 |         self.sload_sha3_bases = sload_sha3_bases
10 |         self.sstore_sha3_bases = sstore_sha3_bases
11 |         self.sstore_slots = sstore_slots
12 |         self.slot_live_access = slot_live_access
13 |         self.slot_access_trace =slot_access_trace
14 |         self.storage_slot_type = storage_slot_type
15 |  
16 | class TainitAnalysisBugDetails(object):
17 |     def __init__(self,unbounded_loops, fun_call_restr,loops_with_calls, gas_griefing, hardcoded_gas, asserts, slot_live_access, temp_slots):        
18 |         self.unbounded_loops = unbounded_loops 
19 |         self.fun_call_restr =fun_call_restr
20 |         self.loops_with_calls = loops_with_calls
21 |         self.gas_griefing = gas_griefing
22 |         self.hardcoded_gas = hardcoded_gas
23 |         self.asserts = asserts
24 |         self.slot_live_access = slot_live_access
25 |         self.temp_slots = temp_slots
26 | 
27 | class AnalysisBugDetails(object):
28 |     def __init__(self,violated_ac_checks,missing_ac_checks,violated_ac_checks_ib):        
29 |         self.violated_ac_checks = violated_ac_checks
30 |         self.missing_ac_checks=missing_ac_checks
31 |         self.violated_ac_checks_ib = violated_ac_checks_ib        
32 |         
33 |  


--------------------------------------------------------------------------------
/src/flow/code_info.py:
--------------------------------------------------------------------------------
 1 | import os, sys
 2 | import logging
 3 | 
 4 | def get_function_sig(cfg,path, type='name'):    
 5 |     #bbs= {bb.start:ins.arg.hex() for bb in p.cfg.bbs for ins in bb.ins if bb.start in path[:-1] and ins.name =='PUSH4' and p.cfg._ins_at[ins.addr+ins.op-0x5f+1].name=='EQ'}
 6 |     logging.debug("Path: %s", '->'.join('%x' % p for p in path))                                                    
 7 |     bbs= {bb:ins.arg.hex() for bb in path[:-1] for ins in cfg._bb_at[bb].ins  if ins.name =='PUSH4' and ins.arg.hex() !='ffffffff' and cfg._ins_at[ins.addr+ins.op-0x5f+1].name=='EQ' and int.from_bytes(cfg._ins_at[ins.addr+ins.op-0x5f+2].arg,'big')==path[path.index(bb)+1]}    
 8 |     other_bbs= {bb:ins.arg.hex() for bb in path[:-1] for ins in cfg._bb_at[bb].ins  if ins.name =='PUSH4' and ins.arg.hex() !='ffffffff' and ins.addr+ins.op-0x5f+2 in cfg._ins_at and cfg._ins_at[ins.addr+ins.op-0x5f+2].name=='EQ' and int.from_bytes(cfg._ins_at[ins.addr+ins.op-0x5f+3].arg,'big')==path[path.index(bb)+1]}    
 9 |     bbs.update(other_bbs)
10 |     #print(bbs)
11 |     bbs_indices=[path.index(bb) for bb in bbs.keys()]    
12 |     if len(bbs_indices)!=0 and type=='name':           
13 |         with open(os.path.join(os.path.join(os.getcwd(),"src/flow"),"FSignatures.txt"), 'r') as f:      
14 |             fsig=dict(x.rstrip().split(None,1) for x in f)  
15 |         return fsig.get('0x'+str(bbs[path[max(bbs_indices)]]),bbs[path[max(bbs_indices)]])
16 |     elif len(bbs_indices)==0 and type=='name':
17 |         return '() payable'
18 |     elif len(bbs_indices)!=0 and type=='id':
19 |         return str(bbs[path[max(bbs_indices)]])
20 |     elif len(bbs_indices)==0 and type=='id':
21 |         return '0'
22 |     elif len(bbs_indices)!=0 and type=='bb':
23 |         return path[max(bbs_indices)]
24 |     elif len(bbs_indices)==0 and type=='bb':
25 |         return 0
26 | 
27 | def function_restricted_caller(p, path):    
28 |     bbs_check_caller= [bb.start for bb in p.cfg.bbs for ins in bb.ins if bb.start in path[:-2] and len(bb.succ_addrs)>=2 and ins.name in ['CALLER', 'ORIGIN'] and (bb.ins[bb.ins.index(ins)+3].name=='EQ' or bb.ins[bb.ins.index(ins)+1].name=='EQ' or bb.ins[-3].name=='EQ')]
29 |     if len(bbs_check_caller)!=0:
30 |         return True
31 |     return False
32 | 


--------------------------------------------------------------------------------
/src/flow/symbolic.py:
--------------------------------------------------------------------------------
  1 | import itertools
  2 | import logging
  3 | from collections import defaultdict
  4 | #from py import code
  5 | 
  6 | from z3 import z3
  7 | from src import cfg
  8 | 
  9 | from src.cfg import opcodes
 10 | from src.constraints import check_model_and_resolve, model_to_calls
 11 | from src.evm.exceptions import IntractablePath, TimeoutException
 12 | from src.evm.results import CombinedSymbolicResult
 13 | from src.util.z3_extra_util import concrete
 14 | from src.flow import code_info as cinfo
 15 | 
 16 | class InfeasibleExploit(Exception):
 17 |     pass
 18 | 
 19 | 
 20 | class ExploitContext(object):
 21 |     def __init__(self, target_addr, shellcode_addr, target_amount, amount_check, initial_balance, initial_storage,
 22 |                  controlled_addrs=set()):
 23 |         self.target_addr = target_addr
 24 |         self.shellcode_addr = shellcode_addr
 25 |         self.target_amount = target_amount
 26 |         self.amount_check = amount_check
 27 |         self.initial_balance = initial_balance
 28 |         self.initial_storage = initial_storage
 29 | 
 30 |         # assume we control the target address
 31 |         self.controlled_addrs = controlled_addrs | {target_addr}
 32 | 
 33 | 
 34 | def exploit_constraints_call(r, ctx):
 35 |     addr = r.state.stack[-2]
 36 |     if not concrete(addr):
 37 |         addr = z3.simplify(addr)
 38 | 
 39 |     amount = r.state.stack[-3]
 40 |     if not concrete(amount):
 41 |         amount = z3.simplify(amount)
 42 | 
 43 |     extra_constraints = []
 44 | 
 45 |     if not concrete(addr):
 46 |         extra_constraints.append(z3.Extract(159, 0, addr) == ctx.target_addr)
 47 |     else:
 48 |         if addr != ctx.target_addr:
 49 |             raise InfeasibleExploit
 50 | 
 51 |     if not concrete(amount):
 52 |         if ctx.amount_check == '+':
 53 |             extra_constraints.append(z3.UGE(amount, ctx.target_amount))
 54 |         elif ctx.amount_check == '-':
 55 |             extra_constraints.append(z3.UGT(amount, 0))
 56 |             extra_constraints.append(z3.ULE(amount, ctx.target_amount))
 57 |         else:
 58 |             extra_constraints.append(amount == ctx.target_amount)
 59 |         final_balance = r.state.balance
 60 |         extra_constraints.append(z3.ULE(amount, final_balance))
 61 | 
 62 |     # ensure we're not spending more for this exploit than we gain
 63 |     total_spent = None
 64 |     for res in r.results:
 65 |         callvalue = z3.BitVec('CALLVALUE_%d' % res.xid, 256)
 66 |         extra_constraints.append(z3.ULE(callvalue, 10 * (10 ** 18)))  # keep it semi-reasonable: at most 10 Eth per call
 67 |         if total_spent is None:
 68 |             total_spent = callvalue
 69 |         else:
 70 |             total_spent += callvalue
 71 | 
 72 |     extra_constraints.append(z3.ULT(total_spent, amount))
 73 | 
 74 |     # also, ensure the contract does not require a unreasonable start-balance (>100 Eth)
 75 |     if not ctx.initial_balance:
 76 |         start_balance = z3.BitVec('BALANCE_%d' % r.results[0].xid, 256)
 77 |         extra_constraints.append(z3.ULE(start_balance, 100 * (10 ** 18)))
 78 | 
 79 |     return extra_constraints
 80 | 
 81 | 
 82 | def exploit_constraints_callcode(r, ctx):
 83 |     addr = z3.simplify(r.state.stack[-2])
 84 | 
 85 |     extra_constraints = []
 86 | 
 87 |     if not concrete(addr):
 88 |         extra_constraints.append(z3.Extract(159, 0, addr) == ctx.shellcode_addr)
 89 |     else:
 90 |         if addr != ctx.shellcode_addr:
 91 |             raise InfeasibleExploit
 92 | 
 93 |     return extra_constraints
 94 | 
 95 | 
 96 | def exploit_constraints_delegatecall(r, ctx):
 97 |     addr = z3.simplify(r.state.stack[-2])
 98 | 
 99 |     extra_constraints = []
100 | 
101 |     if not concrete(addr):
102 |         extra_constraints.append(z3.Extract(159, 0, addr) == ctx.shellcode_addr)
103 |     else:
104 |         if addr != ctx.shellcode_addr:
105 |             raise InfeasibleExploit
106 | 
107 |     return extra_constraints
108 | 
109 | 
110 | def exploit_constraints_selfdestruct(r, ctx):
111 |     addr = z3.simplify(r.state.stack[-1])
112 | 
113 |     extra_constraints = []
114 | 
115 |     if not concrete(addr):
116 |         extra_constraints.append(z3.Extract(159, 0, addr) == ctx.target_addr)
117 |     else:
118 |         if addr != ctx.target_addr:
119 |             raise InfeasibleExploit
120 | 
121 |     return extra_constraints
122 | 
123 | 
124 | EXPLOIT_CONSTRAINTS = {
125 |     'CALL': exploit_constraints_call,
126 |     'CALLCODE': exploit_constraints_callcode,
127 |     'DELEGATECALL': exploit_constraints_callcode,
128 |     'SELFDESTRUCT': exploit_constraints_selfdestruct
129 | }
130 | 
131 | 
132 | def get_exploit_constraints(r, ctx):
133 |     target_op = r.results[-1].target_op    
134 |     if target_op in EXPLOIT_CONSTRAINTS:
135 |         return EXPLOIT_CONSTRAINTS[target_op](r, ctx)
136 |     else:
137 |         return []
138 | 
139 | 
140 | def control_address_constraints(sym_addr, controlled_addrs):
141 |     sub_exprs = [sym_addr == controlled_addr for controlled_addr in controlled_addrs]
142 |     expr = sub_exprs[0]
143 |     for sub_expr in sub_exprs[1:]:
144 |         expr = z3.Or(expr, sub_expr)    
145 |     return expr
146 | 
147 | def attempt_exploit(results, ctx):
148 |     c = CombinedSymbolicResult()
149 |     for r in results[::-1]:        
150 |         c.prepend(r)
151 |     c.combine(ctx.initial_storage, ctx.initial_balance)
152 |     c.simplify()
153 |     extra_constraints = get_exploit_constraints(c, ctx)
154 | 
155 |     for res in c.results:
156 |         origin = z3.BitVec('ORIGIN_%d' % res.xid, 256)
157 |         caller = z3.BitVec('CALLER_%d' % res.xid, 256)
158 |         # ensure we control the origin
159 |         #extra_constraints.append(control_address_constraints(origin, ctx.controlled_addrs))
160 |         # and ensure the caller is either the origin or the shellcode address
161 |         #extra_constraints.append(control_address_constraints(caller, {origin, ctx.shellcode_addr}))
162 |     try:
163 |         model = check_model_and_resolve(c.constraints + extra_constraints, c.sha_constraints)
164 |                 
165 |         # enforce we control all ORIGIN-addresses
166 |         if any(model[v].as_long() not in ctx.controlled_addrs for v in model if v.name().startswith('ORIGIN')):
167 |             raise InfeasibleExploit
168 | 
169 |         return model_to_calls(model, c.idx_dict), c, model
170 |     except TimeoutException:                
171 |             raise TimeoutException("Timed out!")
172 |     except IntractablePath:
173 |         raise InfeasibleExploit
174 | 
175 | 
176 | def validate_path(p, path, mode=None, ac_jumpi=None):    
177 |     target_addr= int('0x1234', 16)
178 |     shellcode_addr= int('0x1000', 16), +1000
179 |     target_amount= +1000
180 |     amount_check='+'
181 |     initial_storage=dict()
182 |     initial_balance=None
183 |     max_calls=3
184 |     controlled_addrs=set()
185 | 
186 |     fun_sig_bb = cinfo.get_function_sig(p.cfg, path,'bb')    
187 |     code_path = path[path.index(fun_sig_bb)+2:-1]    
188 |     if fun_sig_bb==0: #most probably payable()                
189 |         c=0
190 |         while (fun_sig_bb==0 and c+1<len(path)):            
191 |             bb=path[c]
192 |             if bb==0:
193 |                 c=c+1
194 |                 continue    
195 |             bb_ins=[ins.name for ins in p.cfg._bb_at[bb].ins]
196 |             if not set(bb_ins)& set(['CALLDATALOAD','CALLDATACOPY','CALLDATASIZE']) and \
197 |                 bb_ins != ['DUP1','PUSH4','EQ','PUSH2','JUMPI']:
198 |                 fun_sig_bb=bb
199 |                 code_path=path[path.index(fun_sig_bb):-1]
200 |                 break    
201 |             else:
202 |                 c=c+1                
203 |     ctx = ExploitContext(target_addr, shellcode_addr, target_amount, amount_check, initial_balance, initial_storage,
204 |                          controlled_addrs)
205 |     try:        
206 |         symbolic_constr =p.run_symbolic(path, mode=mode, code_path=code_path, ac_jumpi=ac_jumpi)                                                                      
207 |     except TimeoutException:                
208 |             raise TimeoutException("Timed out!")
209 |     except Exception:
210 |         return 'error', None
211 |     critical_paths = []
212 |     try:        
213 |         results = attempt_exploit([symbolic_constr], ctx)        
214 |         if results:
215 |             call, r, model = results
216 | 
217 |         return model, symbolic_constr.possible_intended_behavior
218 |     except TimeoutException:                
219 |             raise TimeoutException("Timed out!")
220 |     except InfeasibleExploit:
221 |         pass
222 | 
223 | 


--------------------------------------------------------------------------------
/src/memory.py:
--------------------------------------------------------------------------------
  1 | from collections import deque
  2 | 
  3 | from src.cfg.opcodes import memory_reads, memory_writes
  4 | from src.evm.exceptions import TimeoutException
  5 | 
  6 | 
  7 | class InconsistentRange(Exception):
  8 |     pass
  9 | 
 10 | 
 11 | class UninitializedRead(Exception):
 12 |     def __init__(self, index, *args):
 13 |         super(UninitializedRead, self).__init__(*args)
 14 |         if isinstance(index, slice):
 15 |             self.start = index.start or 0
 16 |             self.end = index.stop
 17 |         else:
 18 |             self.start = index
 19 |             self.end = index + 1
 20 | 
 21 |     def __repr__(self):
 22 |         return '%s from: %d to %d' % (super(UninitializedRead, self).__repr__(), self.start, self.end)
 23 | 
 24 |     def __str__(self):
 25 |         return '%s from: %d to %d' % (super(UninitializedRead, self).__repr__(), self.start, self.end)
 26 | 
 27 | 
 28 | class MemoryInfo(object):
 29 |     def __init__(self, reads, writes):
 30 |         self.reads = reads
 31 |         self.writes = writes
 32 | 
 33 | 
 34 | def get_memory_info(ins, code, memory_infos=None):
 35 |     from .slicing import backward_slice, slice_to_program
 36 |     from .evm.evm import run
 37 |     from .evm.state import EVMState
 38 |     from .evm.exceptions import ExternalData
 39 |     from .util.intrange import Range
 40 |     targets = []
 41 | 
 42 |     read = False
 43 |     write = False
 44 | 
 45 |     if ins.name in memory_reads:
 46 |         read = True
 47 |         read_offset_info, read_size_info = memory_reads[ins.name]
 48 |         if read_offset_info < 0:
 49 |             targets.append(-1 - read_offset_info)
 50 |         if read_size_info < 0:
 51 |             targets.append(-1 - read_size_info)
 52 |     if ins.name in memory_writes:
 53 |         write = True
 54 |         write_offset_info, write_size_info = memory_writes[ins.name]
 55 |         if write_offset_info < 0:
 56 |             targets.append(-1 - write_offset_info)
 57 |         if write_size_info < 0:
 58 |             targets.append(-1 - write_size_info)
 59 | 
 60 |     if not read and not write:
 61 |         return None
 62 | 
 63 |     bs = backward_slice(ins, targets, memory_infos)
 64 | 
 65 |     read_range = None
 66 |     write_range = None
 67 |     for b in bs:
 68 |         try:
 69 |             state = run(slice_to_program(b), EVMState(code=code), check_initialized=True)
 70 |         except UninitializedRead as e:
 71 |             raise e
 72 |         except ExternalData as e:
 73 |             raise e
 74 |         if read:
 75 |             read_offset = state.stack[read_offset_info] if read_offset_info < 0 else read_offset_info
 76 |             read_size = state.stack[read_size_info] if read_size_info < 0 else read_size_info
 77 |             new_range = Range(read_offset, read_offset + read_size)
 78 |             if read_range is None:
 79 |                 read_range = new_range
 80 |             elif read_range != new_range:
 81 |                 raise InconsistentRange()
 82 |         if write:
 83 |             write_offset = state.stack[write_offset_info] if write_offset_info < 0 else write_offset_info
 84 |             write_size = state.stack[write_size_info] if write_size_info < 0 else write_size_info
 85 |             new_range = Range(write_offset, write_offset + write_size)
 86 |             if write_range is None:
 87 |                 write_range = new_range
 88 |             elif write_range != new_range:
 89 |                 raise InconsistentRange()
 90 |     return MemoryInfo(read_range or Range(), write_range or Range())
 91 | 
 92 | 
 93 | def resolve_all_memory(cfg, code):
 94 |     memory_infos = dict()    
 95 |     resolve_later = deque(
 96 |         ins for bb in cfg.bbs for ins in bb.ins if ins.name in memory_reads or ins.name in memory_writes)
 97 |     todo = deque()
 98 |     progress = True    
 99 |     while todo or (progress and resolve_later):
100 |         if not todo:
101 |             todo = resolve_later
102 |             resolve_later = deque()
103 |             progress = False
104 |         ins = todo.popleft()
105 |         try:
106 |             mi = get_memory_info(ins, code, memory_infos)                                            
107 |             if mi:              
108 |                 progress = True
109 |                 memory_infos[ins] = mi
110 |         except TimeoutException:                
111 |                 raise TimeoutException("Timed out!")
112 |         except Exception as e:            
113 |             resolve_later.append(ins)
114 |     return memory_infos
115 | 


--------------------------------------------------------------------------------
/src/project.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from collections import defaultdict
  3 | #from msilib.schema import MsiAssembly
  4 | import time
  5 | from tracemalloc import start
  6 | 
  7 | from requests import post
  8 | from src.cfg.cfg import CFG
  9 | from src.cfg.disassembly import generate_BBs
 10 | from src.cfg.opcodes import external_data
 11 | from src.evm.evm import run, run_symbolic
 12 | from src.evm.exceptions import IntractablePath, ExternalData, TimeoutException
 13 | from src.explorer.forward import ForwardExplorer
 14 | from src.slicing import interesting_slices, slice_to_program
 15 | from src.util.z3_extra_util import concrete
 16 | import src.util.utils
 17 | from src.flow.tainting import run_static
 18 | from src.flow import code_info as cinfo
 19 | 
 20 | def load(path):
 21 |     with open(path) as infile:
 22 |         return Project(bytes.fromhex(infile.read().strip()))
 23 | 
 24 | def load_json(path):
 25 |     import json
 26 |     with open(path) as infile:
 27 |         return Project.from_json(json.load(infile))
 28 | 
 29 | class Project(object):
 30 |     def __init__(self, code, cfg=None):
 31 |         self.code = code
 32 |         self._prg = None
 33 |         self._cfg = cfg
 34 |         self._writes = None
 35 | 
 36 |     @property
 37 |     def writes(self):
 38 |         if not self._writes:
 39 |             self._analyze_writes()
 40 |         return self._writes
 41 | 
 42 |     @property
 43 |     def symbolic_writes(self):
 44 |         return self.writes[None]
 45 | 
 46 |     @property
 47 |     def cfg(self):
 48 |         if not self._cfg:
 49 |             self._cfg = CFG(generate_BBs(self.code))
 50 |         return self._cfg
 51 | 
 52 |     @property
 53 |     def prg(self):
 54 |         if not self._prg:
 55 |             self._prg = {ins.addr: ins for bb in self.cfg.bbs for ins in bb.ins}
 56 |         return self._prg
 57 | 
 58 |     def to_json(self):
 59 |         return {'code': self.code.hex(), 'cfg': self.cfg.to_json()}
 60 | 
 61 |     @staticmethod
 62 |     def from_json(json_dict):
 63 |         code = bytes.fromhex(json_dict['code'])
 64 |         cfg = CFG.from_json(json_dict['cfg'], code)
 65 |         return Project(code, cfg)
 66 | 
 67 |     def run(self, program):
 68 |         return run(program, code=self.code)
 69 | 
 70 |     def run_symbolic(self, path, inclusive=False, mode=None, code_path=None, ac_jumpi=None):        
 71 |         #return run_symbolic(self.prg, path, self.code, inclusive=inclusive)
 72 |         return run_symbolic(self.prg, path, self.code, inclusive=inclusive, code_path=code_path, mode=mode, ac_jumpi=ac_jumpi)
 73 | 
 74 |     
 75 |     def get_constraints(self, instructions, args=None, inclusive=False, find_sstore=False):
 76 |         # only check instructions that have a chance to reach root
 77 |         instructions = [ins for ins in instructions if 0 in ins.bb.ancestors | {ins.bb.start}]
 78 |         
 79 |         if not instructions:
 80 |             return
 81 |         imap = {ins.addr: ins for ins in instructions}
 82 | 
 83 |         exp = ForwardExplorer(self.cfg)
 84 |         
 85 |         if args:
 86 |             slices = [s + (ins,) for ins in instructions for s in interesting_slices(ins, args, reachable=True)]                        
 87 |         else:
 88 |             # Are we looking for a state-changing path?
 89 |             if find_sstore:                
 90 |                 sstores = self.cfg.filter_ins('SSTORE', reachable=True)
 91 |                 slices = [(sstore, ins) for sstore in sstores for ins in instructions]                                
 92 |             else:
 93 |                 slices = [(ins,) for ins in instructions]
 94 |             
 95 |         for path in exp.find(slices, avoid=external_data):                        
 96 |             logging.debug('Path %s', ' -> '.join('%x' % p for p in path))
 97 |             try:
 98 |                 ins = imap[path[-1]]
 99 |                 yield ins, path, self.run_symbolic(path, inclusive)
100 |             except IntractablePath as e:
101 |                 bad_path = [i for i in e.trace if i in self.cfg._bb_at] + [e.remainingpath[0]]
102 |                 dd = self.cfg.data_dependence(self.cfg._ins_at[e.trace[-1]])
103 |                 if not any(i.name in ('MLOAD', 'SLOAD')     for i in dd):
104 |                     ddbbs = set(i.bb.start for i in dd)
105 |                     bad_path_start = next((j for j, i in enumerate(bad_path) if i in ddbbs), 0)
106 |                     bad_path = bad_path[bad_path_start:]
107 |                 logging.info("Bad path: %s" % (', '.join('%x' % i for i in bad_path)))
108 |                 exp.add_to_blacklist(bad_path)
109 |                 continue
110 |             except ExternalData:
111 |                 continue
112 |             except TimeoutException:
113 |                 raise TimeoutException("Timed out!")
114 |             except Exception as e:
115 |                 logging.exception('Failed path due to %s', e)
116 |                 continue
117 |     def _analyze_writes(self):        
118 |         sstore_ins = self.filter_ins('SSTORE')
119 |         self._writes = defaultdict(set)
120 |         for store in sstore_ins:
121 |             for bs in interesting_slices(store):
122 |                 bs.append(store)
123 |                 prg = slice_to_program(bs)
124 |                 path = sorted(prg.keys())
125 |                 try:
126 |                     r = run_symbolic(prg, path, self.code, inclusive=True)
127 |                 except IntractablePath:
128 |                     logging.exception('Intractable Path while analyzing writes')
129 |                     continue
130 |                 addr = r.state.stack[-1]
131 |                 if concrete(addr):
132 |                     self._writes[addr].add(store)
133 |                 else:
134 |                     self._writes[None].add(store)
135 |         self._writes = dict(self._writes)
136 | 
137 |     def get_writes_to (self, addr):
138 |         concrete_writes = set()
139 |         if concrete(addr) and addr in self.writes:
140 |             concrete_writes = self.writes[addr]
141 |         return concrete_writes, self.symbolic_writes
142 | 
143 |     def reolve_struct_offset(self, ssa, slice, sload=False, sload_ins=None, sstore=False, sstore_ins=None):
144 |         function = [f for f in ssa.functions][0]          
145 |         if sload:                                            
146 |             ssa_block=[ins for block in function if block.offset == sload_ins.bb.start for ins in block.insns]   
147 |             ssa_ins= [s for s in ssa_block if s.offset == sload_ins.addr][0]
148 |         elif sstore:
149 |             ssa_block=[ins for block in function if block.offset == sstore_ins.bb.start for ins in block.insns]   
150 |             ssa_ins= [s for s in ssa_block if s.offset == sstore_ins.addr][0]            
151 |         struct_offset = None
152 |         if ssa_ins.arguments[0]._writer is not None:
153 |             if ssa_ins.arguments[0]._writer.insn.name =='ADD':
154 |                 if ssa_ins.arguments[0]._writer.arguments[0]._writer is not None and \
155 |                     ssa_ins.arguments[0]._writer.arguments[1]._writer is None:
156 |                     if  ssa_ins.arguments[0]._writer.arguments[0]._writer.insn.name=='SHA3':                        
157 |                         struct_offset =ssa_ins.arguments[0]._writer.arguments[1].concrete_value
158 |                 elif  ssa_ins.arguments[0]._writer.arguments[0]._writer is None and \
159 |                     ssa_ins.arguments[0]._writer.arguments[1]._writer is not None:
160 |                     if ssa_ins.arguments[0]._writer.arguments[1]._writer.insn.name=='SHA3':
161 |                         struct_offset =ssa_ins.arguments[0]._writer.arguments[0].concrete_value
162 |             
163 |         return struct_offset
164 |         
165 |     def resolve_slot_offset(self, ssa, slice, sload=False, sload_ins=None, sstore=False, sstore_ins=None):        
166 |         function = [f for f in ssa.functions][0]                                                                                 
167 |         if sload:            
168 |             if [ins.name for ins in slice if ins.name in set(['SLOAD','EXP','DIV','SUB'])] ==['SLOAD','EXP','DIV']:            
169 |                 exp_ins = [ins for ins in slice if ins.name in set(['EXP'])]
170 |                 ssa_block=[ins for block in function if block.offset == exp_ins[0].bb.start for ins in block.insns]   
171 |                 ssa_ins =[s for s in ssa_block if s.offset == exp_ins[0].addr]
172 |                 if (ssa_ins[0].arguments[0].concrete_value==256):
173 |                     start_byte=ssa_ins[0].arguments[1].concrete_value+1                                    
174 |                 elif (ssa_ins[0].arguments[0].concrete_value==2): #in binary
175 |                     start_byte=ssa_ins[0].arguments[1].concrete_value/8+1                                
176 |             elif [ins.name for ins in slice if ins.name in set(['SLOAD','EXP','DIV','SUB'])] ==['SLOAD','EXP','SUB']:
177 |                 #load starting first bye
178 |                 start_byte=1                      
179 |             elif [ins.name for ins in slice if ins.name in set(['SLOAD','EXP','SUB','DIV'])] ==['SLOAD','EXP','SUB','DIV']:            
180 |                 div_ins = [ins for ins in slice if ins.name in set(['DIV'])]
181 |                 ssa_block=[ins for block in function if block.offset == div_ins[0].bb.start for ins in block.insns]   
182 |                 ssa_ins =[s for s in ssa_block if s.offset == div_ins[0].addr]
183 |                 if ssa_ins[0].arguments[1]._writer is None:
184 |                     pos_str=str('%x' %ssa_ins[0].arguments[1].concrete_value)
185 |                     start_byte=len(pos_str)//2+1-pos_str.find('1')            
186 |                 else:                    
187 |                     exp_ins=ssa_ins[0].arguments[1]._writer
188 |                     if exp_ins.insn.name =='EXP':
189 |                         if (exp_ins.arguments[0].concrete_value==256):
190 |                             start_byte= exp_ins.arguments[1].concrete_value+1                
191 |                         elif (exp_ins.arguments[0].concrete_value==2): #in binary
192 |                             start_byte=exp_ins.arguments[1].concrete_value/8+1                                                    
193 |                     else:
194 |                         start_byte='whole'
195 |                         print('error, check resolve_slot_offset')                    
196 |             else:
197 |                 start_byte='whole'                                            
198 |         elif sstore:                        
199 |             start_byte=None
200 |             masking_pattern = [ins.name for ins in slice if ins.name in set(['SLOAD','EXP','SUB','NOT'])]                                    
201 |             if len(masking_pattern)==0:
202 |                 start_byte='whole' # overapproximate the whole slot 
203 | 
204 |             elif 'NOT' not in masking_pattern or 'SLOAD' not in masking_pattern: #Cannot decide what is overwritten without SLOAD   
205 |                 start_byte='whole' #overapproximate  the whole slot 
206 |             
207 |             elif len([i for i in masking_pattern if i== 'NOT'])>1:# we may need to overapproximate as we do not know which not is for masking
208 |                 start_byte='whole' #overapproximate the whole slot                 
209 |             
210 |             elif len([i for i in masking_pattern if i == 'NOT'])==1 and 'SLOAD' in masking_pattern:              
211 |                 not_ins =[ins for ins in slice if ins.name =='NOT']
212 |                 ssa_block=[ins for block in function if block.offset ==not_ins[0].bb.start for ins in block.insns]   
213 |                 ssa_not_ins =[s for s in ssa_block if s.offset in [ins.addr for ins in slice if ins.name in set(['NOT'])]]                
214 |                 if ssa_not_ins[0].arguments[0]._writer is None:
215 |                     pos_str=str('%x' %ssa_not_ins[0].arguments[0].concrete_value)
216 |                     start_byte=(len(pos_str)-pos_str.rfind('f'))//2+1
217 |                 else:                                
218 |                     ssa_ins=ssa_not_ins[0].arguments[0]._writer.arguments[1]._writer                    
219 |                     if ssa_ins is not None and ssa_ins.insn.name =='EXP' and ssa_ins.arguments[0]._writer is None and ssa_ins.arguments[1]._writer is None:
220 |                         exp = pow(ssa_ins.arguments[0].concrete_value, ssa_ins.arguments[1].concrete_value, src.util.utils.TT256)
221 |                         mul = None
222 |                         if ssa_not_ins[0].arguments[0]._writer.arguments[0]._writer is None:
223 |                             mul = ssa_not_ins[0].arguments[0]._writer.arguments[0].concrete_value * exp
224 |                         else:
225 |                             sub_ins=ssa_not_ins[0].arguments[0]._writer.arguments[0]._writer
226 |                             if sub_ins.insn.name =='SUB':
227 |                                 if sub_ins.arguments[0]._writer.insn.name=='EXP':
228 |                                     exp1 = pow(sub_ins.arguments[0]._writer.arguments[0].concrete_value, sub_ins.arguments[0]._writer.arguments[1].concrete_value, src.util.utils.TT256)                                    
229 |                                     sub = exp1 - sub_ins.arguments[1].concrete_value
230 |                                     mul = sub * exp
231 |                         if mul is not None:           
232 |                             bit_mask = '%x' % (src.util.utils.TT256M1 - mul)                         
233 |                             start_byte= (len(bit_mask)-bit_mask.rfind('0'))//2+1                                                                        
234 |                     elif ssa_not_ins[0].arguments[0]._writer is not None and ssa_not_ins[0].arguments[0]._writer.insn.name=='SUB':                        
235 |                         sub_ins=ssa_not_ins[0].arguments[0]._writer
236 |                         if sub_ins.arguments[0]._writer is not None and sub_ins.arguments[0]._writer.insn.name=='EXP':
237 |                             exp_ins = sub_ins.arguments[0]._writer
238 |                             if exp_ins.arguments[0]._writer is None and exp_ins.arguments[1]._writer is None:
239 |                                 exp = pow(exp_ins.arguments[0].concrete_value, exp_ins.arguments[1].concrete_value, src.util.utils.TT256)                                    
240 |                                 sub = exp - sub_ins.arguments[1].concrete_value                                
241 |                                 bit_mask = '%x' % (src.util.utils.TT256M1 - sub)                         
242 |                                 start_byte= (len(bit_mask)-bit_mask.rfind('0'))//2+1                                  
243 |                         elif sub_ins.arguments[0]._writer is not None and sub_ins.arguments[0]._writer.insn.name=='SHL':                                   
244 |                             shl_ins = sub_ins.arguments[0]._writer                      
245 |                             if shl_ins.arguments[0]._writer is None and shl_ins.arguments[1]._writer is None:
246 |                                 shl= (shl_ins.arguments[1].concrete_value << shl_ins.arguments[0].concrete_value)
247 |                                 sub = shl- sub_ins.arguments[1].concrete_value                 
248 |                                 bit_mask = '%x' % (src.util.utils.TT256M1 - sub)                         
249 |                                 start_byte= (len(bit_mask)-bit_mask.rfind('0'))//2+1
250 |                     else:                    
251 |                         start_byte='whole' #overapproximate the whole slot                                     
252 |             
253 |             if not start_byte:
254 |                 print(sstore_ins)                
255 |                 print(masking_pattern)            
256 |         return start_byte
257 | 
258 |     def resolve_access_control_slots(self, ssa, instructions, ac_check_ins, args=None, memory_info=None, restricted=True):        
259 |         slices = []
260 |         other_ac_checks = []
261 |         # only check instructions that have a chance to reach root                        
262 |         instructions = [ins for ins in instructions if 0 in ins.bb.ancestors | {ins.bb.start}] 
263 |         if not instructions:
264 |             return
265 |         imap = {ins.addr: ins for ins in instructions}
266 |         access_sloads = defaultdict(list)        
267 |         if args:                            
268 |             for jump_ins in instructions:                
269 |                 for bs in interesting_slices(jump_ins, args, reachable=True, restricted=False):
270 |                     if('%x' %jump_ins.addr) == '2f9':                    
271 |                         print(jump_ins)                        
272 |                         print(bs)
273 |                     cur_jump_sloads= [v['sload'] for k in access_sloads if k==jump_ins for v in access_sloads[k]]
274 |                     if len(cur_jump_sloads)!=0 and any(ins in cur_jump_sloads for ins in bs if ins.name in frozenset(['SLOAD'])): 
275 |                         slices.append(bs+(jump_ins,))                                                                        
276 |                     elif len(set(ac_check_ins)&set([ins.name for ins in bs]))==len(ac_check_ins) and not any(ins.name in frozenset(['CALL']) for ins in bs):                          
277 |                         slices.append(bs+(jump_ins,))  
278 |                         sload= [i for i in bs if i.name in frozenset(['SLOAD'])]                                                              
279 |                         slot_byte= self.resolve_slot_offset(ssa, bs+(jump_ins,), sload=True, sload_ins=sload[0])                        
280 |                         struct_offset= self.reolve_struct_offset(ssa, bs+(jump_ins,), sload=True, sload_ins=sload[0] )
281 |                         access_sloads[jump_ins].append({'sload':sload[0],'sbyte':slot_byte,'structOffset':struct_offset}) 
282 |                     elif any(ins.name in frozenset(['SLOAD']) for ins in bs) and (any(ins.arg==b'\xff' for ins in bs if ins.name in frozenset(['PUSH1'])) or \
283 |                         any(ins.name in frozenset(['CALLDATALOAD','CALLDATACOPY']) for bb in jump_ins.bb.pred for ins in bb.ins)) and not any(ins.name in frozenset(['CALL']) for ins in bs):                                                                                                
284 |                         if any(ins.arg==b'\xff' for ins in bs if ins.name in frozenset(['PUSH1'])):
285 |                             sload_ins=[ins for ins in bs if ins.name in frozenset(['SLOAD']) if any(
286 |                             ss.arg==b'\xff' and ins.addr < ss.addr <jump_ins.addr for ss in bs if ss.name in frozenset(['PUSH1']))]
287 |                         else:
288 |                             sload_ins=[ins for ins in bs if ins.name in frozenset(['SLOAD']) if not any(
289 |                             ss.name in frozenset(['GT','LT']) and ss.bb.start == jump_ins.bb.start for ss in bs)]                                                        
290 |                         if not sload_ins: 
291 |                             continue                         
292 |                         for s in interesting_slices(sload_ins[0],[0], memory_info, reachable=False, restricted=False):
293 |                             if any(ins.name in frozenset(['CALLER']) for ins in s):                                                                                                
294 |                                 slices.append(bs+(jump_ins,)) 
295 |                                 sload= [i for i in bs if i.name in frozenset(['SLOAD'])]                                                                                                             
296 |                                 slot_byte= self.resolve_slot_offset(ssa, bs+(jump_ins,), sload=True, sload_ins=sload[0])                                
297 |                                 struct_offset= self.reolve_struct_offset(ssa, bs+(jump_ins,), sload=True, sload_ins=sload[0] )
298 | 
299 |                                 access_sloads[jump_ins].append({'sload':sload[0],'sbyte':slot_byte,'structOffset':struct_offset})                                                                                                                                        
300 |                     elif set([ins.name for ins in bs])&set(['CALLER','PUSH20','EQ']) == set(['CALLER','PUSH20','EQ']):
301 |                        other_ac_checks.append(jump_ins)
302 |                                                             
303 |         return access_sloads, slices, imap, other_ac_checks
304 | 
305 |     def extract_control_paths(self, slices, imap):
306 |         exp = ForwardExplorer(self.cfg)        
307 |         for path in exp.find(slices, avoid=[]):           
308 |             ins = imap[path[-1]]                                                                                                                
309 |             yield ins, path
310 | 
311 |     def extract_paths(self,ssa, instructions, sinks, taintedBy, defect_type, args=None, storage_slots=None, storage_sha3_bases=None, inclusive=False, find_sstore=False, restricted=True, memory_info=None):        
312 |         # only check instructions that have a chance to reach root                        
313 |         instructions = [ins for ins in instructions if 0 in ins.bb.ancestors | {ins.bb.start}] 
314 |         if not instructions:
315 |             return
316 |         imap = {ins.addr: ins for ins in instructions}        
317 |         exp = ForwardExplorer(self.cfg)                    
318 |         slices= []
319 |         slot_sbyte={}
320 |         struct_offset={}
321 |         for ins in instructions:
322 |              for s in interesting_slices(ins, args, memory_info, reachable=True, taintedBy=taintedBy, restricted=restricted):
323 |                 slices.append(s+(ins,)) 
324 |                 if ins.name =='SSTORE':                    
325 |                     sbyte = self.resolve_slot_offset(ssa, s+(ins,), sstore=True, sstore_ins=ins)
326 |                     slot_sbyte[ins] = sbyte                                      
327 |                 elif ins.name  in set(['SELFDESTRUCT','DELEGATECALL']): 
328 |                     sbyte = self.resolve_slot_offset(ssa, s+(ins,), sload=True)                    
329 |                     slot_sbyte[ins]=sbyte
330 |                     sload_ins =[ins for ins in s if ins.name in set(['SLOAD'])]               
331 |                     if len(sload_ins)>0:                                        
332 |                         soffset= self.reolve_struct_offset(ssa, s+(ins,),sload=True, sload_ins=sload_ins[0]) 
333 |                         struct_offset[ins]=soffset
334 |         
335 |         checked_ins=[] 
336 |         c=0      
337 |         start_time=time.time()            
338 |         for path in exp.find(slices, avoid=[]):            
339 |             logging.debug('Path %s', ' -> '.join('%x' % p for p in path))                                                                 
340 |             c+=1            
341 |             try:                    
342 |                 ins = imap[path[-1]]                                                                                                                                                
343 |                 if sinks:        
344 |                     result = run_static(self.prg, ssa, path, sinks, self.code, inclusive,defect_type=defect_type, storage_slots=storage_slots, storage_sha3_bases=storage_sha3_bases)                                                                
345 |                     if result._tainted and ins.name in set(['SSTORE']):                                                    
346 |                         sstore_slices = [s+(ins,) for s in interesting_slices(ins, [0], memory_info, reachable=True, taintedBy=None, restricted=False)]                                                                                                                                                                                
347 |                         soffset= self.reolve_struct_offset(ssa, sstore_slices[0],sstore=True, sstore_ins=ins)
348 |                         struct_offset[ins]=soffset                                                            
349 |                     yield ins,slot_sbyte,struct_offset, path, result
350 |                 else:
351 |                     yield ins, slot_sbyte, struct_offset, path, None                        
352 |             except IntractablePath as e:                
353 |                 bad_path = [i for i in e.trace if i in self.cfg._bb_at] + [e.remainingpath[0]]
354 |                 dd = self.cfg.data_dependence(self.cfg._ins_at[e.trace[-1]])
355 |                 if not any(i.name in ('MLOAD', 'SLOAD')     for i in dd):
356 |                     ddbbs = set(i.bb.start for i in dd)
357 |                     bad_path_start = next((j for j, i in enumerate(bad_path) if i in ddbbs), 0)
358 |                     bad_path = bad_path[bad_path_start:]
359 |                 logging.info("Bad path: %s" % (', '.join('%x' % i for i in bad_path)))
360 |                 exp.add_to_blacklist(bad_path)
361 |                 continue
362 |             except ExternalData:
363 |                 continue
364 |             except TimeoutException:
365 |                 raise TimeoutException("Timed out!")
366 |             except Exception as e:
367 |                 logging.exception('Failed path due to %s', e)                     
368 |                 continue            
369 | 
370 | 


--------------------------------------------------------------------------------
/src/slicing.py:
--------------------------------------------------------------------------------
  1 | from src.cfg.instruction import Instruction
  2 | from src.cfg.opcodes import potentially_user_controlled
  3 | from src.explorer.backward import traverse_back
  4 | from src.util.intrange import Range
  5 | 
  6 | 
  7 | def slice_to_program(s):
  8 |     pc = 0
  9 |     program = {}
 10 |     for ins in s:
 11 |         program[pc] = ins
 12 |         pc += ins.next_addr - ins.addr
 13 |     return program
 14 | 
 15 | 
 16 | def adjust_stack(backward_slice, stack_delta):
 17 |     if stack_delta > 0:
 18 |         backward_slice.extend(Instruction(0x0, 0x63, b'\xde\xad\xc0\xde') for _ in range(abs(stack_delta)))
 19 |     elif stack_delta < 0:
 20 |         backward_slice.extend(Instruction(0x0, 0x50) for _ in range(abs(stack_delta)))
 21 | 
 22 | 
 23 | class SlicingState(object):
 24 |     def __init__(self, stacksize, stack_underflow, stack_delta, taintmap, memory_taint, backward_slice, instructions):
 25 |         self.stacksize = stacksize
 26 |         self.stack_underflow = stack_underflow
 27 |         self.stack_delta = stack_delta
 28 |         self.taintmap = frozenset(taintmap)
 29 |         self.memory_taint = memory_taint
 30 |         # The actual slice doesn't matter that much. What matters more is the resulting EXPRESSION of the return-address
 31 |         self.backward_slice = tuple(backward_slice)
 32 |         self.instructions = tuple(instructions)
 33 | 
 34 |     def __hash__(self):
 35 |         return sum(
 36 |             a * b for a, b in zip((23, 29, 31, 37, 41), (
 37 |                 self.stacksize, self.stack_delta, hash(self.taintmap), hash(self.instructions),
 38 |                 hash(self.backward_slice))))
 39 | 
 40 |     def __eq__(self, other):
 41 |         return (
 42 |                 self.stacksize == other.stacksize and
 43 |                 self.stack_delta == other.stack_delta and
 44 |                 self.taintmap == other.taintmap and
 45 |                 self.memory_taint == other.memory_taint and
 46 |                 self.backward_slice == other.backward_slice and
 47 |                 self.instructions == other.instructions)
 48 | 
 49 |     def __str__(self):
 50 |         return 'Stacksize: %d, Underflow: %d, Delta: %d, Map: %s, Slice: %s, Instructions: %s' % (
 51 |             self.stacksize, self.stack_underflow, self.stack_delta, self.taintmap,
 52 |             ','.join('%x' % i.addr for i in self.backward_slice),
 53 |             ','.join('%x' % i.addr for i in self.instructions))
 54 | 
 55 | 
 56 | def advance_slice(slicing_state, memory_info):
 57 |     stacksize = slicing_state.stacksize
 58 |     stack_underflow = slicing_state.stack_underflow
 59 |     stack_delta = slicing_state.stack_delta
 60 |     taintmap = set(slicing_state.taintmap)
 61 |     memory_taint = slicing_state.memory_taint
 62 |     backward_slice = list(slicing_state.backward_slice)
 63 |     instructions = slicing_state.instructions    
 64 | 
 65 |     for ins in instructions[::-1]:      
 66 |         slice_candidate = False
 67 |         if taintmap and stacksize - ins.outs <= max(taintmap):
 68 |             slice_candidate = True
 69 |         if memory_info and ins in memory_info and memory_info[ins].writes & memory_taint:
 70 |             slice_candidate = True
 71 |         if slice_candidate:
 72 |             add_to_slice = False
 73 |             if 0x80 <= ins.op <= 0x8f:  # Special handling for DUPa
 74 |                 if stacksize - 1 in taintmap:
 75 |                     add_to_slice = True
 76 |                     in_idx = ins.op - 0x7f
 77 |                     taintmap.remove(stacksize - 1)
 78 |                     taintmap.add((stacksize - 1) - in_idx)
 79 |             elif 0x90 <= ins.op <= 0x9f:  # Special handling for SWAP
 80 |                 in_idx = ins.op - 0x8f
 81 |                 if stacksize - 1 in taintmap or (stacksize - 1) - in_idx in taintmap:
 82 |                     add_to_slice = True
 83 |                     if stacksize - 1 in taintmap and (stacksize - 1) - in_idx in taintmap:
 84 |                         # both tainted => taint does not change
 85 |                         pass
 86 |                     elif stacksize - 1 in taintmap:
 87 |                         taintmap.remove(stacksize - 1)
 88 |                         taintmap.add((stacksize - 1) - in_idx)
 89 |                     elif (stacksize - 1) - in_idx in taintmap:
 90 |                         taintmap.remove((stacksize - 1) - in_idx)
 91 |                         taintmap.add(stacksize - 1)            
 92 |             else:  # assume entire stack is affected otherwise
 93 |                 add_to_slice = True
 94 |                 taintmap -= set(range(stacksize - ins.outs, stacksize))
 95 |                 taintmap |= set(range(stacksize - ins.outs, stacksize - ins.delta))
 96 | 
 97 |             if add_to_slice:
 98 |                 adjust_stack(backward_slice, stack_delta)
 99 |                 stack_delta = -ins.delta
100 |                 backward_slice.append(ins)
101 |                 stack_underflow = min(stack_underflow, stacksize - ins.outs)
102 |                 if memory_info and ins in memory_info:
103 |                     ins_info = memory_info[ins]
104 |                     memory_taint = memory_taint - ins_info.writes + ins_info.reads
105 | 
106 |         stacksize -= ins.delta
107 |         # no taint left? then our job here is done
108 |         if not taintmap and not memory_taint:
109 |             stack_adjust = stacksize - stack_underflow
110 |             if stack_adjust > 0:
111 |                 adjust_stack(backward_slice, stack_adjust)
112 |             return SlicingState(stacksize, stack_underflow, stack_delta, set(taintmap), memory_taint,
113 |                                 list(backward_slice),
114 |                                 [])
115 | 
116 |         stack_delta += ins.delta
117 | 
118 |     # still taint left? trace further if gas is still sufficient
119 |     return SlicingState(stacksize, stack_underflow, stack_delta, set(taintmap), memory_taint, list(backward_slice),
120 |                         [])
121 | 
122 | 
123 | def backward_slice(ins, taint_args=None, memory_info=None, initial_gas=10, must_visits=[], reachable=False):
124 |     # logging.debug('backward_slice called')
125 |     if ins.ins == 0:
126 |         return []
127 |     if taint_args:
128 |         taintmap = set((ins.ins - 1) - i for i in taint_args)                
129 |     else:
130 |         taintmap = set(range(ins.ins))
131 |     if memory_info and ins in memory_info:
132 |         memory_taint = memory_info[ins].reads
133 |     else:
134 |         memory_taint = Range()
135 | 
136 |     def initial_data(ins):
137 |         stacksize = ins.ins
138 |         slice = []
139 |         stack_underflow = 0
140 |         stack_delta = 0
141 |         idx = ins.bb.ins.index(ins)
142 |         return SlicingState(stacksize, stack_underflow, stack_delta, taintmap, memory_taint, slice,
143 |                             ins.bb.ins[:idx])
144 | 
145 |     def advance_data(slicing_state):
146 |         return advance_slice(slicing_state, memory_info)
147 | 
148 |     def update_data(slicing_state, new_bb):
149 |         return SlicingState(slicing_state.stacksize, slicing_state.stack_underflow, slicing_state.stack_delta,
150 |                             set(slicing_state.taintmap), slicing_state.memory_taint, list(slicing_state.backward_slice),
151 |                             new_bb.ins)
152 | 
153 |     def finish_path(slicing_state):
154 |         return not slicing_state.taintmap and not slicing_state.memory_taint
155 | 
156 |     # logging.debug('Before loop')
157 |     slices = [r.backward_slice[::-1] for r in
158 |               traverse_back([ins], initial_gas, initial_data, advance_data, update_data, finish_path, must_visits)]
159 |     if not reachable:
160 |         return slices
161 |     else:
162 |         filtered_slices = []
163 |         for slice in slices:
164 |             first_bb = next(i.bb for i in slice if i.bb)
165 |             if 0 in first_bb.ancestors | {first_bb.start}:
166 |                 filtered_slices.append(slice)
167 |         return filtered_slices
168 | 
169 | 
170 | def interesting_slices(instruction, args=None, memory_info=None, reachable=False, taintedBy=potentially_user_controlled, restricted=True):
171 |     if restricted:
172 |         return [bs for bs in backward_slice(instruction, args, memory_info, reachable=reachable) if any(
173 |             ins.name in taintedBy for ins in bs)]                   
174 |     else:
175 |         return [bs for bs in backward_slice(instruction, args, memory_info,reachable=reachable)]
176 | 
177 | def change_slices(instruction, args=None, reachable=False):    
178 |     return [bs for bs in backward_slice(instruction, args, reachable=reachable)]
179 | 


--------------------------------------------------------------------------------
/src/storage.py:
--------------------------------------------------------------------------------
  1 | from collections import deque
  2 | 
  3 | from src.cfg.opcodes import storage_reads, storage_writes
  4 | import src.util.utils
  5 | from src.evm.exceptions import TimeoutException
  6 | 
  7 | 
  8 | class InconsistentSlot(Exception):
  9 |     pass
 10 | 
 11 | class UninitializedRead(Exception):
 12 |     def __init__(self, index, *args):
 13 |         super(UninitializedRead, self).__init__(*args)
 14 |         if isinstance(index, slice):
 15 |             self.start = index.start or 0
 16 |             self.end = index.stop
 17 |         else:
 18 |             self.start = index
 19 |             self.end = index + 1
 20 | 
 21 |     def __repr__(self):
 22 |         return '%s from: %d to %d' % (super(UninitializedRead, self).__repr__(), self.start, self.end)
 23 | 
 24 |     def __str__(self):
 25 |         return '%s from: %d to %d' % (super(UninitializedRead, self).__repr__(), self.start, self.end)
 26 | 
 27 | 
 28 | class StorageInfo(object):
 29 |     def __init__(self, reads, writes, read_sha3_bases, write_sha3_bases):
 30 |         self.reads = reads
 31 |         self.writes = writes
 32 |         self.read_sha3_bases= read_sha3_bases
 33 |         self.write_sha3_bases= write_sha3_bases
 34 | 
 35 | def get_storage_info(ins, code, memory_info=None):
 36 |     from .slicing import backward_slice, slice_to_program
 37 |     from .evm.evm import run
 38 |     from .evm.state import EVMState
 39 |     from .evm.exceptions import ExternalData
 40 |     from .util.intrange import Range
 41 |     targets = []
 42 | 
 43 |     read = False
 44 |     write = False
 45 | 
 46 |     if ins.name in storage_reads:
 47 |         read = True
 48 |         read_slot_info = storage_reads[ins.name]        
 49 |         if read_slot_info < 0:
 50 |             targets.append(-1 - read_slot_info)
 51 |        
 52 |     if ins.name in storage_writes:
 53 |         write = True
 54 |         write_slot_info = storage_writes[ins.name]        
 55 |         if write_slot_info < 0:
 56 |             targets.append(-1 - write_slot_info)
 57 |        
 58 |     if not read and not write:
 59 |         return None
 60 |     bs = backward_slice(ins, targets, memory_info)
 61 |     read_slot = set()
 62 |     read_slot_sha3_base= dict()
 63 |     write_slot = set()
 64 |     write_slot_sha3_base= dict()
 65 |     for b in bs:
 66 |         try:
 67 |             state = run(slice_to_program(b), EVMState(code=code), check_initialized=False)                                                
 68 |         except UninitializedRead as e:
 69 |             raise e
 70 |         except ExternalData as e:
 71 |             raise e
 72 |         if read:
 73 |             new_slot = state.stack[read_slot_info] if read_slot_info < 0 else read_slot_info                                                           
 74 |             if new_slot not in read_slot:
 75 |                 read_slot.add(new_slot)
 76 |                 sha3_ins=[ins for ins in b if ins.name=='SHA3']                
 77 |                 mstore_ins=[ins for ins in b if ins.name=='MSTORE']                
 78 |                 if len(sha3_ins)==1 and len(mstore_ins)==1: 
 79 |                     read_slot_sha3_base[new_slot]=src.util.utils.bytearray_to_int(state.memory[0:32])
 80 |                 elif len(sha3_ins)>=1 and len(mstore_ins)>=2: 
 81 |                     read_slot_sha3_base[new_slot]=src.util.utils.bytearray_to_int(state.memory[32:64])                                                                            
 82 |         if write:
 83 |             new_slot = state.stack[write_slot_info] if write_slot_info < 0 else write_slot_info                        
 84 |             if new_slot not in write_slot:
 85 |                 write_slot.add(new_slot)     
 86 |                 sha3_ins=[ins for ins in b if ins.name=='SHA3']                
 87 |                 mstore_ins=[ins for ins in b if ins.name=='MSTORE']
 88 |                 if len(sha3_ins)==1 and len(mstore_ins)==1: 
 89 |                     write_slot_sha3_base[new_slot]=src.util.utils.bytearray_to_int(state.memory[0:32])
 90 |                 elif len(sha3_ins)>=1 and len(mstore_ins)>=2: 
 91 |                     write_slot_sha3_base[new_slot]=src.util.utils.bytearray_to_int(state.memory[32:64])                                                            
 92 |     return StorageInfo(read_slot, write_slot,read_slot_sha3_base,write_slot_sha3_base)
 93 | 
 94 | 
 95 | def resolve_all_storage(cfg, code, memory_info=None):
 96 |     storage_infos = dict()
 97 |     resolve_later = deque(
 98 |         ins for bb in cfg.bbs for ins in bb.ins if ins.name in storage_reads or ins.name in storage_writes)
 99 |     todo = deque()    
100 |     progress = True    
101 |     while todo or (progress and resolve_later):
102 |         if not todo:
103 |             todo = resolve_later
104 |             resolve_later = deque()
105 |             progress = False
106 |         ins = todo.popleft()        
107 |         try:            
108 |             mi = get_storage_info(ins, code, memory_info)                                                        
109 |             if mi:                             
110 |                 progress = True
111 |                 storage_infos[ins] = mi
112 |         except TimeoutException:                
113 |             raise TimeoutException("Timed out!")
114 |         except Exception as e:
115 |             resolve_later.append(ins)
116 |     return storage_infos
117 | 


--------------------------------------------------------------------------------
/src/teEther_LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/src/util/__init__.py:
--------------------------------------------------------------------------------
1 | from . import frontierset
2 | from . import intrange
3 | from . import utils
4 | from . import z3_extra_util


--------------------------------------------------------------------------------
/src/util/frontierset.py:
--------------------------------------------------------------------------------
  1 | from collections import defaultdict
  2 | 
  3 | 
  4 | class FrontierSet(object):
  5 |     """
  6 |     A set that also maintains a partial topological ordering
  7 |     The current set of "non-blocked" items can be obtained as
  8 |     .frontier
  9 |     """
 10 | 
 11 |     def __init__(self, data=None):
 12 |         self._inhibiting_set = defaultdict(set)
 13 |         self._blocking_set = defaultdict(set)
 14 |         self._edges = set()
 15 |         self._frontier = set()
 16 |         self._frozenedges = None
 17 |         self._frozenfrontier = None
 18 |         self._frozenall = None
 19 |         if data:
 20 |             for d in data:
 21 |                 self.add(d)
 22 | 
 23 |     def _invalidate(self):
 24 |         self._frozenedges = None
 25 |         self._frozenfrontier = None
 26 |         self._frozenall = None
 27 | 
 28 |     @property
 29 |     def edges(self):
 30 |         if self._frozenedges is None:
 31 |             self._frozenedges = frozenset(self._edges)
 32 |         return self._frozenedges
 33 | 
 34 |     @property
 35 |     def frontier(self):
 36 |         if self._frozenfrontier is None:
 37 |             self._frozenfrontier = frozenset(self._frontier)
 38 |         return self._frozenfrontier
 39 | 
 40 |     @property
 41 |     def all(self):
 42 |         if self._frozenall is None:
 43 |             self._frozenall = frozenset(set(self._blocking_set.keys()) | set(self._inhibiting_set.keys()) | self._frontier)
 44 |         return self._frozenall
 45 | 
 46 |     def add(self, a, b=None):
 47 |         """
 48 |         Add a to the set.
 49 |         If b is given, require that a is a necessary prerequisite for b
 50 |         :param a:
 51 |         :param b:
 52 |         :return:
 53 |         """
 54 |         self._invalidate()
 55 |         if b:
 56 |             self._edges.add((a, b))
 57 |             self._inhibiting_set[b].add(a)
 58 |             self._blocking_set[a].add(b)
 59 |             if not self._inhibiting_set[a]:
 60 |                 self._frontier.add(a)
 61 |             self._frontier.discard(b)
 62 |         else:
 63 |             self._frontier.add(a)
 64 | 
 65 |     def remove(self, a):
 66 |         self._invalidate()
 67 |         for b in self._blocking_set[a]:
 68 |             self._edges.discard((b, a))
 69 |             self._inhibiting_set[b].discard(a)
 70 |             if not self._inhibiting_set[b]:
 71 |                 self._frontier.add(b)
 72 |         for c in self._inhibiting_set[a]:
 73 |             self._edges.discard((a, c))
 74 |             self._blocking_set[c].discard(a)
 75 |         del self._blocking_set[a]
 76 |         del self._inhibiting_set[a]
 77 |         self._frontier.discard(a)
 78 | 
 79 |     def copy(self):
 80 |         new = FrontierSet()
 81 |         new._inhibiting_set = self._inhibiting_set.copy()
 82 |         new._blocking_set = self._blocking_set.copy()
 83 |         new._edges = self._edges.copy()
 84 |         new._frontier = self._frontier.copy()
 85 |         new._invalidate()
 86 |         return new
 87 | 
 88 |     def issubset(self, other):
 89 |         return self.all.issubset(other.all) and self.edges.issubset(other.edges)
 90 | 
 91 |     def __len__(self):
 92 |         return len(self.all)
 93 | 
 94 |     def __eq__(self, other):
 95 |         return self.edges == other.edges and self.all == other.all
 96 | 
 97 |     def __hash__(self):
 98 |         return 3 * hash(self.edges) + 7 * hash(self.all)
 99 | 
100 |     def __iter__(self):
101 |         return iter(self.all)
102 | 
103 |     def __repr__(self):
104 |         return '{%s|%s}' % (
105 |         ','.join('%x' % i for i in self.frontier), ','.join('%x' % i for i in self.all - self.frontier))
106 | 


--------------------------------------------------------------------------------
/src/util/intrange.py:
--------------------------------------------------------------------------------
 1 | class Range(object):
 2 |     START = 0
 3 |     END = 1
 4 | 
 5 |     def __init__(self, start=None, end=None, points=None):
 6 |         if not start is None and not end is None and start < end:
 7 |             self.points = ((start, Range.START), (end, Range.END))
 8 |         elif points:
 9 |             self.points = tuple(points)
10 |         else:
11 |             self.points = tuple()
12 | 
13 |     def __munch__(self, other, min_depth):
14 |         depth = 0
15 |         points = []
16 |         for i, t in sorted(self.points + other.points):
17 |             if depth == min_depth - 1 and t == Range.START:
18 |                 if points and i == points[-1][0]:
19 |                     del points[-1]
20 |                 else:
21 |                     points.append((i, Range.START))
22 |             elif depth == min_depth and t == Range.END:
23 |                 if points and i == points[-1][0]:
24 |                     del points[-1]
25 |                 else:
26 |                     points.append((i, Range.END))
27 |             depth += 1 if t == Range.START else -1
28 |         return Range(points=points)
29 | 
30 |     def __add__(self, other):
31 |         return self.__munch__(other, 1)
32 | 
33 |     def __and__(self, other):
34 |         return self.__munch__(other, 2)
35 | 
36 |     def __sub__(self, other):
37 |         return self + Range(points=[(i, 1 - t) for i, t in other.points])
38 | 
39 |     def __contains__(self, other):
40 |         if not isinstance(other, Range):
41 |             other = Range(other, other + 1)
42 |         return not (other - self).points
43 | 
44 |     def __or__(self, other):
45 |         return self + other
46 | 
47 |     def __xor__(self, other):
48 |         return (self - other) + (other - self)
49 | 
50 |     def __eq__(self, other):
51 |         return not self ^ other
52 | 
53 |     def __hash__(self):
54 |         return hash(self.points)
55 | 
56 |     def __cmp__(self, other):
57 |         for (a, _), (b, _) in zip(self.points, other.points):
58 |             if a != b:
59 |                 return a - b
60 |         else:
61 |             l1, l2 = len(self), len(other)
62 |             return l1 - l2
63 | 
64 |     def __len__(self):
65 |         return sum(b - a for (a, _), (b, _) in zip(self.points[::2], self.points[1::2]))
66 | 
67 |     def __repr__(self):
68 |         return 'Range(' + str(self) + ')'
69 | 
70 |     def __str__(self):
71 |         return ','.join('[%d, %d)' % (a, b) for (a, _), (b, _) in zip(self.points[::2], self.points[1::2]))
72 | 


--------------------------------------------------------------------------------
/src/util/utils.py:
--------------------------------------------------------------------------------
  1 | from sha3 import keccak_256
  2 | 
  3 | 
  4 | def sha3(data):
  5 |     return keccak_256(data).digest()
  6 | 
  7 | 
  8 | TT256 = 2 ** 256
  9 | TT256M1 = 2 ** 256 - 1
 10 | TT255 = 2 ** 255
 11 | SECP256K1P = 2 ** 256 - 4294968273
 12 | 
 13 | 
 14 | def big_endian_to_int(x):
 15 |     return int.from_bytes(x, byteorder='big')
 16 | 
 17 | 
 18 | def int_to_big_endian(v):
 19 |     return v.to_bytes(length=(v.bit_length()+7)//8, byteorder='big')
 20 | 
 21 | 
 22 | def to_string(value):
 23 |     return str(value)
 24 | 
 25 | 
 26 | def bytearray_to_bytestr(value):
 27 |     return bytes(value)
 28 | 
 29 | 
 30 | def encode_int32(v):
 31 |     return int_to_big_endian(v).rjust(32, b'\x00')
 32 | 
 33 | 
 34 | def bytes_to_int(value):
 35 |     return big_endian_to_int(bytes(value))
 36 | 
 37 | 
 38 | def bytearray_to_int(value):
 39 |     return bytes_to_int(bytearray_to_bytestr(value))
 40 | 
 41 | 
 42 | def is_pow2(x):
 43 |     return x and not x & (x - 1)
 44 | 
 45 | 
 46 | def log2(x):
 47 |     if not is_pow2(x):
 48 |         raise ValueError("%d is not a power of 2!" % x)
 49 |     i = -1
 50 |     while x:
 51 |         x >>= 1
 52 |         i += 1
 53 |     return i
 54 | 
 55 | 
 56 | def to_signed(i):
 57 |     return i if i < TT255 else i - TT256
 58 | 
 59 | 
 60 | 
 61 | class Denoms:
 62 |     def __init__(self):
 63 |         self.wei = 1
 64 |         self.babbage = 10 ** 3
 65 |         self.ada = 10 ** 3
 66 |         self.kwei = 10 ** 6
 67 |         self.lovelace = 10 ** 6
 68 |         self.mwei = 10 ** 6
 69 |         self.shannon = 10 ** 9
 70 |         self.gwei = 10 ** 9
 71 |         self.szabo = 10 ** 12
 72 |         self.finney = 10 ** 15
 73 |         self.mether = 10 ** 15
 74 |         self.ether = 10 ** 18
 75 |         self.turing = 2 ** 256 - 1
 76 | 
 77 | 
 78 | denoms = Denoms()
 79 | 
 80 | 
 81 | def unique(l):
 82 |     last = None
 83 |     for i in l:
 84 |         if i != last:
 85 |             yield i
 86 |         last = i
 87 | 
 88 | 
 89 | def is_subseq(a, b):
 90 |     a = tuple(a)
 91 |     b = tuple(b)
 92 |     # True iff a is a subsequence (not substring!) of b
 93 |     p = 0
 94 |     for x in a:
 95 |         try:
 96 |             p = b.index(x, p) + 1
 97 |         except ValueError:
 98 |             return False
 99 |     return True
100 | 
101 | 
102 | def is_substr(a, b):
103 |     a = tuple(a)
104 |     b = tuple(b)
105 |     # True iff a is a substring of b
106 |     p = 0
107 |     l = len(a)
108 |     while True:
109 |         try:
110 |             p = b.index(a[0], p)
111 |             if b[p:p + l] == a:
112 |                 return True
113 |             p += 1
114 |         except ValueError:
115 |             break
116 |     return False
117 | 


--------------------------------------------------------------------------------
/src/util/z3_extra_util.py:
--------------------------------------------------------------------------------
  1 | import numbers
  2 | 
  3 | import z3
  4 | 
  5 | 
  6 | def to_bytes(v):
  7 |     return v.as_long().to_bytes(length=(v.size()+7)//8, byteorder='big')
  8 | 
  9 | 
 10 | def get_vars(f, rs=set()):
 11 |     """
 12 |     shameless copy of z3util.get_vars,
 13 |     but returning select-operations as well.
 14 |     E.g.
 15 |     >>> x = z3.Array('x', z3.IntSort(), z3.IntSort())
 16 |     >>> get_vars(x[5])
 17 |     [x[5]]
 18 |     whereas
 19 |     >>> x = z3.Array('x', z3.IntSort(), z3.IntSort())
 20 |     >>> z3util.get_vars(x[5])
 21 |     [x]
 22 |     """
 23 |     if not rs:
 24 |         f = z3.simplify(f)
 25 | 
 26 |     if f.decl().kind() == z3.Z3_OP_SELECT:
 27 |         arr, idx = f.children()
 28 |         if z3.is_const(arr):
 29 |             if z3.z3util.is_expr_val(idx):
 30 |                 return rs | {f}
 31 |             else:
 32 |                 return rs | {f, idx}
 33 |     if z3.is_const(f):
 34 |         if z3.z3util.is_expr_val(f):
 35 |             return rs
 36 |         else:  # variable
 37 |             return rs | {f}
 38 | 
 39 |     else:
 40 |         for f_ in f.children():
 41 |             rs = get_vars(f_, rs)
 42 | 
 43 |         return set(rs)
 44 | 
 45 | 
 46 | def get_vars_non_recursive(f, include_select=False, include_indices=True):
 47 |     todo = [f]
 48 |     rs = set()
 49 |     seen = set()
 50 |     while todo:
 51 |         expr = todo.pop()
 52 |         if expr.get_id() in seen:
 53 |             continue
 54 |         seen.add(expr.get_id())
 55 |         if include_select and expr.decl().kind() == z3.Z3_OP_SELECT:
 56 |             arr, idx = expr.children()
 57 |             if z3.is_const(arr):
 58 |                 if not include_indices or z3.z3util.is_expr_val(idx):
 59 |                     rs.add(expr)
 60 |                 else:
 61 |                     rs.add(expr)
 62 |                     todo.append(idx)
 63 |             else:
 64 |                 todo.extend(expr.children())
 65 |         elif z3.is_const(expr):
 66 |             if not z3.z3util.is_expr_val(expr):
 67 |                 rs.add(expr)
 68 |         else:
 69 |             todo.extend(expr.children())
 70 | 
 71 |     return rs
 72 | 
 73 | 
 74 | def concrete(v):
 75 |     return isinstance(v, numbers.Number)
 76 | 
 77 | 
 78 | def is_false(cond):
 79 |     s = z3.SolverFor("QF_ABV")
 80 |     s.add(cond)
 81 |     return s.check() == z3.unsat
 82 | 
 83 | 
 84 | def is_true(cond):
 85 |     # NOTE: This differs from `not is_false(cond)`, which corresponds to "may be true"
 86 |     return is_false(z3.Not(cond))
 87 | 
 88 | 
 89 | def simplify_non_const_hashes(expr, sha_ids):
 90 |     while True:
 91 |         expr = z3.simplify(expr, expand_select_store=True)
 92 |         sha_subst = get_sha_subst_non_recursive(expr, sha_ids)
 93 |         if not sha_subst:
 94 |             break
 95 |         expr = z3.substitute(expr, [(s, z3.BoolVal(False)) for s in sha_subst])
 96 |     return expr
 97 | 
 98 | 
 99 | def is_simple_expr(expr):
100 |     """
101 |         True if expr does not contain an If, Store, or Select statement
102 |     :param expr: the expression to check
103 |     :return: True, iff expr does not contain If, Store, or Select
104 |     """
105 | 
106 |     if expr.decl().kind() in {z3.Z3_OP_ITE, z3.Z3_OP_SELECT, z3.Z3_OP_STORE}:
107 |         return False
108 |     else:
109 |         return all(is_simple_expr(c) for c in expr.children())
110 | 
111 | 
112 | def ast_eq(e1, e2, simplified=False):
113 |     if not simplified:
114 |         e1 = z3.simplify(e1)
115 |         e2 = z3.simplify(e2)
116 |     if e1.sort() != e2.sort():
117 |         return False
118 |     if e1.decl().kind() != e2.decl().kind():
119 |         return False
120 |     if z3.z3util.is_expr_val(e1) and z3.z3util.is_expr_val(e2):
121 |         return e1.as_long() == e2.as_long()
122 |     return all(ast_eq(c1, c2, True) for c1, c2 in zip(e1.children(), e2.children()))
123 | 
124 | 
125 | def get_sha_subst_non_recursive(f, sha_ids):
126 |     import timeit
127 |     start = timeit.default_timer()
128 |     todo = [z3.simplify(f, expand_select_store=True)]
129 |     rs = set()
130 |     seen = set()
131 |     subexprcount = 0
132 |     while todo:
133 |         expr = todo.pop()
134 |         subexprcount += 1
135 |         if expr.get_id() in seen:
136 |             continue
137 |         seen.add(expr.get_id())
138 |         if expr.decl().kind() == z3.Z3_OP_EQ and all(is_simple_expr(c) for c in expr.children()):
139 |             l, r = expr.children()
140 |             lvars, rvars = [{v.get_id() for v in get_vars_non_recursive(e, True)} for e in (l, r)]
141 | 
142 |             sha_left = bool(lvars & sha_ids)
143 |             sha_right = bool(rvars & sha_ids)
144 | 
145 |             if sha_left and sha_right:
146 |                 # both sides use a sha-expression
147 |                 # => can be equal only if ASTs are equal
148 |                 if not ast_eq(l, r):
149 |                     rs.add(expr)
150 | 
151 |             elif sha_left ^ sha_right:
152 |                 # only one side uses a sha-expression
153 |                 # => assume not equal (e.g. SHA == 5 seems unlikely)
154 |                 rs.add(expr)
155 | 
156 |         else:
157 |             todo.extend(expr.children())
158 | 
159 |     end = timeit.default_timer()
160 |     # logging.info("get_sha_subst_non_recursive took %d microseconds (%d subexpressions)", (end-start)*1000000.0, subexprcount)
161 |     return rs
162 | 


--------------------------------------------------------------------------------