├── .gitignore ├── README.md ├── defi_path_finder ├── __init__.py ├── collector.py ├── preprocessor.py └── utils.py ├── examples.py └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | dist/ 2 | venv/ 3 | build/ 4 | __pycache__/ 5 | 6 | defi_path_finder.egg-info/ 7 | 8 | .idea/ 9 | .vscode/ 10 | 11 | *.pyc 12 | *.zip 13 | *.pkl 14 | 15 | .DS_Store -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DeFi Arbitrage Path Finder 2 | 3 | A simple arbitrage path finder for multiple DEXs. 4 | 5 | Two way arbitrage paths are rather simple to find. However, n-way paths get complicated real quick. 6 | 7 | This project is an attempt to map triangular paths using 2 DEXs. 8 | The concepts used here can easily extend over to 4-way, 5-way, n-way paths. 9 | 10 | The sample exchanges used here are: 11 | - Polygon Sushiswap V2, 12 | - Polygon Meshswap 13 | 14 | Sushiswap V2 data is retrieved through a subgraph endpoint. 15 | And Meshswap data is retrieved through their specific API endpoint. 16 | 17 | Visit: https://solidquant.github.io/blog/post/building-a-defi-arbitrage-path-finder-(1)/ 18 | 19 | for more information. 20 | 21 | 22 | ### Usage: 23 | 24 | Install dependencies: 25 | ```bash 26 | pip install -r requirements.txt 27 | ``` 28 | 29 | Run the sample code in examples.py: 30 | 31 | ```python 32 | import pickle 33 | 34 | from defi_path_finder import Preprocessor 35 | from defi_path_finder import make_triangular_paths 36 | 37 | if __name__ == '__main__': 38 | p = Preprocessor() 39 | pools, reserves = p.load_data(500) 40 | 41 | pairs, paths = make_triangular_paths(pools, [], 6) 42 | 43 | # pickle data to load and use later 44 | save_data = { 45 | 'EXCHANGES': p.EXCHANGES, 46 | 'TOKENS': p.TOKENS, 47 | 'POOLS': p.POOLS, 48 | 'reserves': reserves, 49 | 'pairs': pairs, 50 | 'paths': paths 51 | } 52 | 53 | f = open('./paths.pkl', 'wb') 54 | pickle.dump(save_data, f) 55 | f.close() 56 | ``` 57 | 58 | The code above runs for approximately 10 minutes using 6 processes to run this code in parallel. 59 | 60 | ### TODO: 61 | 62 | - add simulation helpers to find the most profitable arbitrage path 63 | - extend the module to work for n-way paths as well 64 | - add market impact simulation functions for various AMM types to accommodate for all variants of Uniswap and others -------------------------------------------------------------------------------- /defi_path_finder/__init__.py: -------------------------------------------------------------------------------- 1 | from defi_path_finder.collector import Collector 2 | from defi_path_finder.preprocessor import Preprocessor 3 | from defi_path_finder.utils import ( 4 | make_triangular_paths, 5 | make_3_pairs, 6 | make_paths_array, 7 | _make_proceess_inputs, 8 | _make_3_pairs_process, 9 | ) -------------------------------------------------------------------------------- /defi_path_finder/collector.py: -------------------------------------------------------------------------------- 1 | import math 2 | import time 3 | import requests 4 | import pandas as pd 5 | 6 | 7 | class Collector: 8 | """ 9 | Collector indexed blockchain data from subgraphs and protocol specific endpoints 10 | """ 11 | 12 | def __init__(self): 13 | self.URLS = { 14 | 'polygon': { 15 | 'sushiswap_v2': 'https://api.thegraph.com/subgraphs/name/sushiswap/matic-exchange', 16 | 'meshswap': 'https://ss.meshswap.fi/stat/recentPoolInfo.min.json' 17 | } 18 | } 19 | 20 | def get_polygon_sushiswap_v2_pools(self, pool_cnt: int = 500): 21 | return self._get_pools('polygon', 'sushiswap_v2', pool_cnt) 22 | 23 | def get_polygon_meshswap_pools(self): 24 | return self._get_pools('polygon', 'meshswap', 0) 25 | 26 | def get_polygon_meshswap_tokens(self): 27 | tokens = requests.get('https://ss.meshswap.fi/stat/tokenInfo.min.json?t=1686709443064').json() 28 | tokens = pd.DataFrame(tokens) 29 | tokens.rename(columns=tokens.iloc[0], inplace=True) 30 | tokens.drop(tokens.index[0], inplace=True) 31 | return list(tokens.T.to_dict().values()) 32 | 33 | def _get_pools(self, chain: str, protocol: str, pool_cnt: int): 34 | url = self.URLS[chain.lower()][protocol.lower()] 35 | 36 | if 'v2' in protocol: 37 | return self._request_v2_pools(url, pool_cnt) 38 | else: 39 | return self._request_special_pools(url) 40 | 41 | def _request_v2_pools(self, url: str, pool_cnt: int): 42 | query = """ 43 | query pairs($skip: Int, $first: Int) { 44 | pairs(skip: $skip, first: $first, orderBy: reserveUSD, orderDirection: desc) { 45 | id 46 | token0 { 47 | id 48 | symbol 49 | decimals 50 | } 51 | token1 { 52 | id 53 | symbol 54 | decimals 55 | } 56 | reserve0 57 | reserve1 58 | } 59 | } 60 | """ 61 | return self._request(url, query, pool_cnt, 2) 62 | 63 | def _request(self, url: str, query: str, pool_cnt: int, version: int): 64 | max_req = 500 # max request per request (get 500 pools per call) 65 | pools = [] 66 | loop_cnt = math.ceil(pool_cnt / max_req) 67 | 68 | for i in range(loop_cnt): 69 | vars = { 70 | 'skip': max_req * i, 71 | 'first': max_req 72 | } 73 | res = requests.post(url, json={'query': query, 'variables': vars}) 74 | data = res.json() 75 | if version == 2: 76 | pools.extend(data['data']['pairs']) 77 | print(f'{url} - skip: {vars["skip"]} / first: {vars["first"]}') 78 | time.sleep(1) 79 | 80 | return pools 81 | 82 | def _request_special_pools(self, url: str): 83 | res = requests.get(url) 84 | data = res.json() 85 | pools = pd.DataFrame(data['recentPool']) 86 | pools.rename(columns=pools.iloc[0], inplace=True) 87 | pools.drop(pools.index[0], inplace=True) 88 | return list(pools.T.to_dict().values()) 89 | 90 | 91 | if __name__ == '__main__': 92 | c = Collector() 93 | 94 | sushi_pools = c.get_polygon_sushiswap_v2_pools() 95 | mesh_pools = c.get_polygon_meshswap_pools() 96 | 97 | print(sushi_pools) 98 | print(mesh_pools) 99 | -------------------------------------------------------------------------------- /defi_path_finder/preprocessor.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | from defi_path_finder.collector import Collector 5 | 6 | 7 | class Preprocessor: 8 | """ 9 | Preprocesses raw form of indexed data collected by Collector 10 | """ 11 | 12 | # encodes exchanges into integer values 13 | EXCHANGES = { 14 | 'sushiswap_v2': 0, 15 | 'meshswap': 1 16 | } 17 | 18 | # keeps basic information about tokens (address, symbol, decimals) 19 | TOKENS = {} 20 | 21 | # keeps contract address of pools 22 | POOLS = {} 23 | 24 | def __init__(self): 25 | self.collector = Collector() 26 | 27 | self._token_id = 0 28 | 29 | def polygon_sushiswap_v2_pools(self, pool_cnt: int = 500): 30 | pools = self.collector.get_polygon_sushiswap_v2_pools(pool_cnt) 31 | 32 | for p in pools: 33 | token0 = p['token0'].copy() 34 | token1 = p['token1'].copy() 35 | 36 | token0_id = token0.pop('id') 37 | token1_id = token1.pop('id') 38 | 39 | if token0_id not in self.TOKENS: 40 | self.TOKENS[token0_id] = {'id': self._token_id, **token0} 41 | self._token_id += 1 42 | 43 | if token1_id not in self.TOKENS: 44 | self.TOKENS[token1_id] = {'id': self._token_id, **token1} 45 | self._token_id += 1 46 | 47 | mapped = [] 48 | 49 | for p in pools: 50 | t0 = self.TOKENS[p['token0']['id']]['id'] 51 | t1 = self.TOKENS[p['token1']['id']]['id'] 52 | pool_id = f'{t0}_{t1}_{self.EXCHANGES["sushiswap_v2"]}' 53 | self.POOLS[pool_id] = p['id'] 54 | 55 | mapped.append({ 56 | 'token0': t0, 57 | 'token1': t1, 58 | 'exchange': self.EXCHANGES['sushiswap_v2'], 59 | 'reserve0': float(p['reserve0']), 60 | 'reserve1': float(p['reserve1']) 61 | }) 62 | 63 | pools_df = pd.DataFrame(mapped) 64 | return pools_df 65 | 66 | def polygon_meshswap_v2_pools(self): 67 | tokens = self.collector.get_polygon_meshswap_tokens() 68 | tokens = {d['address']: {'symbol': d['symbol'], 'decimals': d['decimal']} for d in tokens} 69 | 70 | pools = self.collector.get_polygon_meshswap_pools() 71 | 72 | mapped = [] 73 | 74 | for p in pools: 75 | token0 = p['token0'] 76 | token1 = p['token1'] 77 | 78 | if token0 not in self.TOKENS: 79 | token_info = tokens[token0] 80 | self.TOKENS[token0] = {'id': self._token_id, **token_info} 81 | self._token_id += 1 82 | 83 | if token1 not in self.TOKENS: 84 | token_info = tokens[token1] 85 | self.TOKENS[token1] = {'id': self._token_id, **token_info} 86 | self._token_id += 1 87 | 88 | t0 = self.TOKENS[token0]['id'] 89 | t1 = self.TOKENS[token1]['id'] 90 | pool_id = f'{t0}_{t1}_{self.EXCHANGES["meshswap"]}' 91 | self.POOLS[pool_id] = p['exchange_address'] 92 | 93 | mapped.append({ 94 | 'token0': t0, 95 | 'token1': t1, 96 | 'exchange': self.EXCHANGES['meshswap'], 97 | 'reserve0': float(p['amount0']), 98 | 'reserve1': float(p['amount1']) 99 | }) 100 | 101 | pools_df = pd.DataFrame(mapped) 102 | return pools_df 103 | 104 | def load_data(self, pool_cnt: int = 500): 105 | # 1. Collect raw indexed data 106 | sushi = self.polygon_sushiswap_v2_pools(pool_cnt) 107 | mesh = self.polygon_meshswap_v2_pools() 108 | 109 | # 2. Create a numpy array by mapping token, exchange data to integers 110 | _arr = [] 111 | 112 | for pools_df in [sushi, mesh]: 113 | pools = np.zeros((len(pools_df), 3), dtype=int) 114 | pools[:] = pools_df[['token0', 'token1', 'exchange']].values 115 | _arr.append(pools) 116 | 117 | pools_array = np.concatenate(_arr, axis=0) 118 | 119 | # 3. Create a numpy array containing reserves data for all existing pools 120 | reserves_array = np.zeros(( 121 | len(self.TOKENS), 122 | len(self.TOKENS), 123 | len(self.EXCHANGES), 124 | 2 125 | ), dtype=float) 126 | 127 | for pools_df in [sushi, mesh]: 128 | for _, row in pools_df.iterrows(): 129 | t0 = int(row['token0']) 130 | t1 = int(row['token1']) 131 | e = int(row['exchange']) 132 | r0 = row['reserve0'] 133 | r1 = row['reserve1'] 134 | 135 | reserves_array[t0, t1, e, :] = [r0, r1] 136 | 137 | return pools_array, reserves_array 138 | 139 | 140 | if __name__ == '__main__': 141 | p = Preprocessor() 142 | pools, reserves = p.load_data() 143 | 144 | print(pools) -------------------------------------------------------------------------------- /defi_path_finder/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from tqdm import tqdm 3 | import multiprocessing 4 | from itertools import combinations, permutations, product 5 | 6 | multiprocessing.freeze_support() 7 | 8 | 9 | def make_triangular_paths(data: np.array, 10 | include_tokens: list = [], 11 | process_cnt: int = 5) -> (dict, np.array): 12 | 13 | pairs = make_3_pairs(data, include_tokens, process_cnt) 14 | paths = make_paths_array(pairs) 15 | return pairs, paths 16 | 17 | 18 | def make_3_pairs(data: np.array, 19 | include_tokens: list, 20 | process_cnt: int = 5) -> dict: 21 | 22 | """ 23 | :param data: is an array type from Preprocessor.load_data() (pools_array) 24 | :param include_tokens: can filter out tokens. if you input [0] and 0 is WMATIC, you make sure 25 | to create 3 pairs that have WMATIC in it 26 | :param process_cnt: the number of cpu's. To be used in multiprocessing 27 | :return: dict type value that is used in make_paths_array 28 | """ 29 | if len(include_tokens) > 0: 30 | # filter out the data beforehand to optimize running time 31 | data = data[np.any(np.isin(data[:, :2], include_tokens), axis=1)] 32 | 33 | tokens = np.unique(data[:, :2]) 34 | combinations_list = list(combinations(tokens, 3)) 35 | 36 | process_inputs = _make_proceess_inputs(data, 37 | include_tokens, 38 | combinations_list, 39 | process_cnt) 40 | 41 | mp = multiprocessing.Pool(processes=process_cnt) 42 | 43 | pairs = {} 44 | 45 | for result in mp.starmap(_make_3_pairs_process, process_inputs): 46 | pairs = {**result, **pairs} 47 | 48 | return pairs 49 | 50 | 51 | def make_paths_array(pairs: dict) -> np.array: 52 | """ 53 | :param pairs: should be dict from running make_3_pairs 54 | :return: np.array value of all possible triangular paths 55 | """ 56 | all_paths = [] 57 | 58 | i = 0 59 | 60 | for key, filtered in pairs.items(): 61 | tokens = np.array(key) 62 | path_combos = np.array(list(permutations(tokens))) 63 | 64 | pathify = lambda x: np.vstack((x, np.roll(x, -1))).T 65 | possible_paths = [pathify(c) for c in path_combos] 66 | 67 | match_pools = lambda x, match: x[(x[:, -2:] == match).all(axis=1)] 68 | 69 | for p in possible_paths: 70 | matched_pools = [match_pools(filtered, token_in_out) for token_in_out in p] 71 | path = [np.array(path) for path in product(*matched_pools)] 72 | all_paths.extend(path) 73 | # print(f'({i} / {len(pairs)}) {p.reshape(-1)}: {len(path)} added') 74 | 75 | i += 1 76 | 77 | return np.array(all_paths) 78 | 79 | 80 | def _make_proceess_inputs(data: np.array, 81 | include_tokens: list, 82 | combinations_list: list, 83 | process_cnt: int): 84 | 85 | n = len(combinations_list) 86 | part_size = n // process_cnt 87 | extra = n % process_cnt 88 | 89 | inputs = [] 90 | start = 0 91 | 92 | for i in range(process_cnt): 93 | if i < extra: 94 | end = start + part_size + 1 95 | else: 96 | end = start + part_size 97 | 98 | inputs.append((data, include_tokens, combinations_list[start:end])) 99 | start = end 100 | 101 | return inputs 102 | 103 | 104 | def _make_3_pairs_process(data: np.array, 105 | include_tokens: list, 106 | combinations_list: list): 107 | 108 | pairs = {} 109 | 110 | for c in tqdm(combinations_list): 111 | if not set(include_tokens).issubset(set(c)): 112 | continue 113 | 114 | # 1. filter data where token0, token1 are both in the combo set 115 | # a combo set would look like: (0, 1, 2) 116 | filtered_array = data[np.all(np.isin(data[:, :2], c), axis=1)] 117 | _f = filtered_array[:, :2] 118 | 119 | # filtered_array should contain 3 unique tokens and at least 3 unique pools for triangular arbitrage 120 | if not np.unique(_f).shape[0] == 3 or not np.unique(_f, axis=0).shape[0] >= 3: 121 | continue 122 | 123 | # 2: create a new array with token_in, token_out info 124 | # the columns for full_array will be: token0, token1, exchange, token_in, token_out 125 | full_array = np.zeros((filtered_array.shape[0] * 2, 5), dtype=int) 126 | full_array[:, :3] = np.concatenate([filtered_array, filtered_array]) 127 | full_array[:, 3:] = np.concatenate([_f, np.flip(_f, axis=1)]) 128 | 129 | pairs[c] = full_array 130 | 131 | return pairs 132 | -------------------------------------------------------------------------------- /examples.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | from defi_path_finder import Preprocessor 4 | from defi_path_finder import make_triangular_paths 5 | 6 | 7 | if __name__ == '__main__': 8 | p = Preprocessor() 9 | pools, reserves = p.load_data(500) 10 | 11 | pairs, paths = make_triangular_paths(pools, [], 6) 12 | 13 | # pickle data to load and use later 14 | save_data = { 15 | 'EXCHANGES': p.EXCHANGES, 16 | 'TOKENS': p.TOKENS, 17 | 'POOLS': p.POOLS, 18 | 'reserves': reserves, 19 | 'pairs': pairs, 20 | 'paths': paths 21 | } 22 | 23 | f = open('./paths.pkl', 'wb') 24 | pickle.dump(save_data, f) 25 | f.close() 26 | 27 | print(paths) -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | argcomplete==2.0.0 2 | certifi==2023.5.7 3 | charset-normalizer==3.1.0 4 | click==8.1.3 5 | idna==3.4 6 | numpy==1.24.3 7 | pandas==2.0.2 8 | pipx==1.0.0 9 | python-dateutil==2.8.2 10 | pytz==2023.3 11 | requests==2.31.0 12 | six==1.16.0 13 | tqdm==4.65.0 14 | tzdata==2023.3 15 | urllib3==2.0.3 16 | userpath==1.8.0 17 | --------------------------------------------------------------------------------