├── .gitignore
├── README.md
├── defi_path_finder
    ├── __init__.py
    ├── collector.py
    ├── preprocessor.py
    └── utils.py
├── examples.py
└── requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
 1 | dist/
 2 | venv/
 3 | build/
 4 | __pycache__/
 5 | 
 6 | defi_path_finder.egg-info/
 7 | 
 8 | .idea/
 9 | .vscode/
10 | 
11 | *.pyc
12 | *.zip
13 | *.pkl
14 | 
15 | .DS_Store


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # DeFi Arbitrage Path Finder
 2 | 
 3 | A simple arbitrage path finder for multiple DEXs.
 4 | 
 5 | Two way arbitrage paths are rather simple to find. However, n-way paths get complicated real quick.
 6 | 
 7 | This project is an attempt to map triangular paths using 2 DEXs.
 8 | The concepts used here can easily extend over to 4-way, 5-way, n-way paths.
 9 | 
10 | The sample exchanges used here are:
11 | - Polygon Sushiswap V2,
12 | - Polygon Meshswap
13 | 
14 | Sushiswap V2 data is retrieved through a subgraph endpoint.
15 | And Meshswap data is retrieved through their specific API endpoint.
16 | 
17 | Visit: https://solidquant.github.io/blog/post/building-a-defi-arbitrage-path-finder-(1)/
18 | 
19 | for more information.
20 | 
21 | 
22 | ### Usage:
23 | 
24 | Install dependencies:
25 | ```bash
26 | pip install -r requirements.txt
27 | ```
28 | 
29 | Run the sample code in examples.py:
30 | 
31 | ```python
32 | import pickle
33 | 
34 | from defi_path_finder import Preprocessor
35 | from defi_path_finder import make_triangular_paths
36 | 
37 | if __name__ == '__main__':
38 |     p = Preprocessor()
39 |     pools, reserves = p.load_data(500)
40 | 
41 |     pairs, paths = make_triangular_paths(pools, [], 6)
42 | 
43 |     # pickle data to load and use later
44 |     save_data = {
45 |         'EXCHANGES': p.EXCHANGES,
46 |         'TOKENS': p.TOKENS,
47 |         'POOLS': p.POOLS,
48 |         'reserves': reserves,
49 |         'pairs': pairs,
50 |         'paths': paths
51 |     }
52 | 
53 |     f = open('./paths.pkl', 'wb')
54 |     pickle.dump(save_data, f)
55 |     f.close()
56 | ```
57 | 
58 | The code above runs for approximately 10 minutes using 6 processes to run this code in parallel.
59 | 
60 | ### TODO:
61 | 
62 | - add simulation helpers to find the most profitable arbitrage path
63 | - extend the module to work for n-way paths as well
64 | - add market impact simulation functions for various AMM types to accommodate for all variants of Uniswap and others


--------------------------------------------------------------------------------
/defi_path_finder/__init__.py:
--------------------------------------------------------------------------------
1 | from defi_path_finder.collector import Collector
2 | from defi_path_finder.preprocessor import Preprocessor
3 | from defi_path_finder.utils import (
4 |     make_triangular_paths,
5 |     make_3_pairs,
6 |     make_paths_array,
7 |     _make_proceess_inputs,
8 |     _make_3_pairs_process,
9 | )


--------------------------------------------------------------------------------
/defi_path_finder/collector.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import time
 3 | import requests
 4 | import pandas as pd
 5 | 
 6 | 
 7 | class Collector:
 8 |     """
 9 |     Collector indexed blockchain data from subgraphs and protocol specific endpoints
10 |     """
11 | 
12 |     def __init__(self):
13 |         self.URLS = {
14 |             'polygon': {
15 |                 'sushiswap_v2': 'https://api.thegraph.com/subgraphs/name/sushiswap/matic-exchange',
16 |                 'meshswap': 'https://ss.meshswap.fi/stat/recentPoolInfo.min.json'
17 |             }
18 |         }
19 | 
20 |     def get_polygon_sushiswap_v2_pools(self, pool_cnt: int = 500):
21 |         return self._get_pools('polygon', 'sushiswap_v2', pool_cnt)
22 | 
23 |     def get_polygon_meshswap_pools(self):
24 |         return self._get_pools('polygon', 'meshswap', 0)
25 | 
26 |     def get_polygon_meshswap_tokens(self):
27 |         tokens = requests.get('https://ss.meshswap.fi/stat/tokenInfo.min.json?t=1686709443064').json()
28 |         tokens = pd.DataFrame(tokens)
29 |         tokens.rename(columns=tokens.iloc[0], inplace=True)
30 |         tokens.drop(tokens.index[0], inplace=True)
31 |         return list(tokens.T.to_dict().values())
32 | 
33 |     def _get_pools(self, chain: str, protocol: str, pool_cnt: int):
34 |         url = self.URLS[chain.lower()][protocol.lower()]
35 | 
36 |         if 'v2' in protocol:
37 |             return self._request_v2_pools(url, pool_cnt)
38 |         else:
39 |             return self._request_special_pools(url)
40 | 
41 |     def _request_v2_pools(self, url: str, pool_cnt: int):
42 |         query = """
43 |         query pairs($skip: Int, $first: Int) {
44 |             pairs(skip: $skip, first: $first, orderBy: reserveUSD, orderDirection: desc) {
45 |                 id
46 |                 token0 {
47 |                     id
48 |                     symbol
49 |                     decimals
50 |                 }
51 |                 token1 {
52 |                     id
53 |                     symbol
54 |                     decimals
55 |                 }
56 |                 reserve0
57 |                 reserve1
58 |             }
59 |         }
60 |         """
61 |         return self._request(url, query, pool_cnt, 2)
62 | 
63 |     def _request(self, url: str, query: str, pool_cnt: int, version: int):
64 |         max_req = 500  # max request per request (get 500 pools per call)
65 |         pools = []
66 |         loop_cnt = math.ceil(pool_cnt / max_req)
67 | 
68 |         for i in range(loop_cnt):
69 |             vars = {
70 |                 'skip': max_req * i,
71 |                 'first': max_req
72 |             }
73 |             res = requests.post(url, json={'query': query, 'variables': vars})
74 |             data = res.json()
75 |             if version == 2:
76 |                 pools.extend(data['data']['pairs'])
77 |             print(f'{url} - skip: {vars["skip"]} / first: {vars["first"]}')
78 |             time.sleep(1)
79 | 
80 |         return pools
81 | 
82 |     def _request_special_pools(self, url: str):
83 |         res = requests.get(url)
84 |         data = res.json()
85 |         pools = pd.DataFrame(data['recentPool'])
86 |         pools.rename(columns=pools.iloc[0], inplace=True)
87 |         pools.drop(pools.index[0], inplace=True)
88 |         return list(pools.T.to_dict().values())
89 | 
90 | 
91 | if __name__ == '__main__':
92 |     c = Collector()
93 | 
94 |     sushi_pools = c.get_polygon_sushiswap_v2_pools()
95 |     mesh_pools = c.get_polygon_meshswap_pools()
96 | 
97 |     print(sushi_pools)
98 |     print(mesh_pools)
99 | 


--------------------------------------------------------------------------------
/defi_path_finder/preprocessor.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | 
  4 | from defi_path_finder.collector import Collector
  5 | 
  6 | 
  7 | class Preprocessor:
  8 |     """
  9 |     Preprocesses raw form of indexed data collected by Collector
 10 |     """
 11 | 
 12 |     # encodes exchanges into integer values
 13 |     EXCHANGES = {
 14 |         'sushiswap_v2': 0,
 15 |         'meshswap': 1
 16 |     }
 17 | 
 18 |     # keeps basic information about tokens (address, symbol, decimals)
 19 |     TOKENS = {}
 20 | 
 21 |     # keeps contract address of pools
 22 |     POOLS = {}
 23 | 
 24 |     def __init__(self):
 25 |         self.collector = Collector()
 26 | 
 27 |         self._token_id = 0
 28 | 
 29 |     def polygon_sushiswap_v2_pools(self, pool_cnt: int = 500):
 30 |         pools = self.collector.get_polygon_sushiswap_v2_pools(pool_cnt)
 31 | 
 32 |         for p in pools:
 33 |             token0 = p['token0'].copy()
 34 |             token1 = p['token1'].copy()
 35 | 
 36 |             token0_id = token0.pop('id')
 37 |             token1_id = token1.pop('id')
 38 | 
 39 |             if token0_id not in self.TOKENS:
 40 |                 self.TOKENS[token0_id] = {'id': self._token_id, **token0}
 41 |                 self._token_id += 1
 42 | 
 43 |             if token1_id not in self.TOKENS:
 44 |                 self.TOKENS[token1_id] = {'id': self._token_id, **token1}
 45 |                 self._token_id += 1
 46 | 
 47 |         mapped = []
 48 | 
 49 |         for p in pools:
 50 |             t0 = self.TOKENS[p['token0']['id']]['id']
 51 |             t1 = self.TOKENS[p['token1']['id']]['id']
 52 |             pool_id = f'{t0}_{t1}_{self.EXCHANGES["sushiswap_v2"]}'
 53 |             self.POOLS[pool_id] = p['id']
 54 | 
 55 |             mapped.append({
 56 |                 'token0': t0,
 57 |                 'token1': t1,
 58 |                 'exchange': self.EXCHANGES['sushiswap_v2'],
 59 |                 'reserve0': float(p['reserve0']),
 60 |                 'reserve1': float(p['reserve1'])
 61 |             })
 62 | 
 63 |         pools_df = pd.DataFrame(mapped)
 64 |         return pools_df
 65 | 
 66 |     def polygon_meshswap_v2_pools(self):
 67 |         tokens = self.collector.get_polygon_meshswap_tokens()
 68 |         tokens = {d['address']: {'symbol': d['symbol'], 'decimals': d['decimal']} for d in tokens}
 69 | 
 70 |         pools = self.collector.get_polygon_meshswap_pools()
 71 | 
 72 |         mapped = []
 73 | 
 74 |         for p in pools:
 75 |             token0 = p['token0']
 76 |             token1 = p['token1']
 77 | 
 78 |             if token0 not in self.TOKENS:
 79 |                 token_info = tokens[token0]
 80 |                 self.TOKENS[token0] = {'id': self._token_id, **token_info}
 81 |                 self._token_id += 1
 82 | 
 83 |             if token1 not in self.TOKENS:
 84 |                 token_info = tokens[token1]
 85 |                 self.TOKENS[token1] = {'id': self._token_id, **token_info}
 86 |                 self._token_id += 1
 87 | 
 88 |             t0 = self.TOKENS[token0]['id']
 89 |             t1 = self.TOKENS[token1]['id']
 90 |             pool_id = f'{t0}_{t1}_{self.EXCHANGES["meshswap"]}'
 91 |             self.POOLS[pool_id] = p['exchange_address']
 92 | 
 93 |             mapped.append({
 94 |                 'token0': t0,
 95 |                 'token1': t1,
 96 |                 'exchange': self.EXCHANGES['meshswap'],
 97 |                 'reserve0': float(p['amount0']),
 98 |                 'reserve1': float(p['amount1'])
 99 |             })
100 | 
101 |         pools_df = pd.DataFrame(mapped)
102 |         return pools_df
103 | 
104 |     def load_data(self, pool_cnt: int = 500):
105 |         # 1. Collect raw indexed data
106 |         sushi = self.polygon_sushiswap_v2_pools(pool_cnt)
107 |         mesh = self.polygon_meshswap_v2_pools()
108 | 
109 |         # 2. Create a numpy array by mapping token, exchange data to integers
110 |         _arr = []
111 | 
112 |         for pools_df in [sushi, mesh]:
113 |             pools = np.zeros((len(pools_df), 3), dtype=int)
114 |             pools[:] = pools_df[['token0', 'token1', 'exchange']].values
115 |             _arr.append(pools)
116 | 
117 |         pools_array = np.concatenate(_arr, axis=0)
118 | 
119 |         # 3. Create a numpy array containing reserves data for all existing pools
120 |         reserves_array = np.zeros((
121 |             len(self.TOKENS),
122 |             len(self.TOKENS),
123 |             len(self.EXCHANGES),
124 |             2
125 |         ), dtype=float)
126 | 
127 |         for pools_df in [sushi, mesh]:
128 |             for _, row in pools_df.iterrows():
129 |                 t0 = int(row['token0'])
130 |                 t1 = int(row['token1'])
131 |                 e = int(row['exchange'])
132 |                 r0 = row['reserve0']
133 |                 r1 = row['reserve1']
134 | 
135 |                 reserves_array[t0, t1, e, :] = [r0, r1]
136 | 
137 |         return pools_array, reserves_array
138 | 
139 | 
140 | if __name__ == '__main__':
141 |     p = Preprocessor()
142 |     pools, reserves = p.load_data()
143 | 
144 |     print(pools)


--------------------------------------------------------------------------------
/defi_path_finder/utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from tqdm import tqdm
  3 | import multiprocessing
  4 | from itertools import combinations, permutations, product
  5 | 
  6 | multiprocessing.freeze_support()
  7 | 
  8 | 
  9 | def make_triangular_paths(data: np.array,
 10 |                           include_tokens: list = [],
 11 |                           process_cnt: int = 5) -> (dict, np.array):
 12 | 
 13 |     pairs = make_3_pairs(data, include_tokens, process_cnt)
 14 |     paths = make_paths_array(pairs)
 15 |     return pairs, paths
 16 | 
 17 | 
 18 | def make_3_pairs(data: np.array,
 19 |                  include_tokens: list,
 20 |                  process_cnt: int = 5) -> dict:
 21 | 
 22 |     """
 23 |     :param data: is an array type from Preprocessor.load_data() (pools_array)
 24 |     :param include_tokens: can filter out tokens. if you input [0] and 0 is WMATIC, you make sure
 25 |                            to create 3 pairs that have WMATIC in it
 26 |     :param process_cnt: the number of cpu's. To be used in multiprocessing
 27 |     :return: dict type value that is used in make_paths_array
 28 |     """
 29 |     if len(include_tokens) > 0:
 30 |         # filter out the data beforehand to optimize running time
 31 |         data = data[np.any(np.isin(data[:, :2], include_tokens), axis=1)]
 32 | 
 33 |     tokens = np.unique(data[:, :2])
 34 |     combinations_list = list(combinations(tokens, 3))
 35 | 
 36 |     process_inputs = _make_proceess_inputs(data,
 37 |                                            include_tokens,
 38 |                                            combinations_list,
 39 |                                            process_cnt)
 40 | 
 41 |     mp = multiprocessing.Pool(processes=process_cnt)
 42 | 
 43 |     pairs = {}
 44 | 
 45 |     for result in mp.starmap(_make_3_pairs_process, process_inputs):
 46 |         pairs = {**result, **pairs}
 47 | 
 48 |     return pairs
 49 | 
 50 | 
 51 | def make_paths_array(pairs: dict) -> np.array:
 52 |     """
 53 |     :param pairs: should be dict from running make_3_pairs
 54 |     :return: np.array value of all possible triangular paths
 55 |     """
 56 |     all_paths = []
 57 | 
 58 |     i = 0
 59 | 
 60 |     for key, filtered in pairs.items():
 61 |         tokens = np.array(key)
 62 |         path_combos = np.array(list(permutations(tokens)))
 63 | 
 64 |         pathify = lambda x: np.vstack((x, np.roll(x, -1))).T
 65 |         possible_paths = [pathify(c) for c in path_combos]
 66 | 
 67 |         match_pools = lambda x, match: x[(x[:, -2:] == match).all(axis=1)]
 68 | 
 69 |         for p in possible_paths:
 70 |             matched_pools = [match_pools(filtered, token_in_out) for token_in_out in p]
 71 |             path = [np.array(path) for path in product(*matched_pools)]
 72 |             all_paths.extend(path)
 73 |             # print(f'({i} / {len(pairs)}) {p.reshape(-1)}: {len(path)} added')
 74 | 
 75 |         i += 1
 76 | 
 77 |     return np.array(all_paths)
 78 | 
 79 | 
 80 | def _make_proceess_inputs(data: np.array,
 81 |                           include_tokens: list,
 82 |                           combinations_list: list,
 83 |                           process_cnt: int):
 84 | 
 85 |     n = len(combinations_list)
 86 |     part_size = n // process_cnt
 87 |     extra = n % process_cnt
 88 | 
 89 |     inputs = []
 90 |     start = 0
 91 | 
 92 |     for i in range(process_cnt):
 93 |         if i < extra:
 94 |             end = start + part_size + 1
 95 |         else:
 96 |             end = start + part_size
 97 | 
 98 |         inputs.append((data, include_tokens, combinations_list[start:end]))
 99 |         start = end
100 | 
101 |     return inputs
102 | 
103 | 
104 | def _make_3_pairs_process(data: np.array,
105 |                           include_tokens: list,
106 |                           combinations_list: list):
107 | 
108 |     pairs = {}
109 | 
110 |     for c in tqdm(combinations_list):
111 |         if not set(include_tokens).issubset(set(c)):
112 |             continue
113 | 
114 |         # 1. filter data where token0, token1 are both in the combo set
115 |         #    a combo set would look like: (0, 1, 2)
116 |         filtered_array = data[np.all(np.isin(data[:, :2], c), axis=1)]
117 |         _f = filtered_array[:, :2]
118 | 
119 |         # filtered_array should contain 3 unique tokens and at least 3 unique pools for triangular arbitrage
120 |         if not np.unique(_f).shape[0] == 3 or not np.unique(_f, axis=0).shape[0] >= 3:
121 |             continue
122 | 
123 |         # 2: create a new array with token_in, token_out info
124 |         # the columns for full_array will be: token0, token1, exchange, token_in, token_out
125 |         full_array = np.zeros((filtered_array.shape[0] * 2, 5), dtype=int)
126 |         full_array[:, :3] = np.concatenate([filtered_array, filtered_array])
127 |         full_array[:, 3:] = np.concatenate([_f, np.flip(_f, axis=1)])
128 | 
129 |         pairs[c] = full_array
130 | 
131 |     return pairs
132 | 


--------------------------------------------------------------------------------
/examples.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | 
 3 | from defi_path_finder import Preprocessor
 4 | from defi_path_finder import make_triangular_paths
 5 | 
 6 | 
 7 | if __name__ == '__main__':
 8 |     p = Preprocessor()
 9 |     pools, reserves = p.load_data(500)
10 | 
11 |     pairs, paths = make_triangular_paths(pools, [], 6)
12 | 
13 |     # pickle data to load and use later
14 |     save_data = {
15 |         'EXCHANGES': p.EXCHANGES,
16 |         'TOKENS': p.TOKENS,
17 |         'POOLS': p.POOLS,
18 |         'reserves': reserves,
19 |         'pairs': pairs,
20 |         'paths': paths
21 |     }
22 | 
23 |     f = open('./paths.pkl', 'wb')
24 |     pickle.dump(save_data, f)
25 |     f.close()
26 | 
27 |     print(paths)


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | argcomplete==2.0.0
 2 | certifi==2023.5.7
 3 | charset-normalizer==3.1.0
 4 | click==8.1.3
 5 | idna==3.4
 6 | numpy==1.24.3
 7 | pandas==2.0.2
 8 | pipx==1.0.0
 9 | python-dateutil==2.8.2
10 | pytz==2023.3
11 | requests==2.31.0
12 | six==1.16.0
13 | tqdm==4.65.0
14 | tzdata==2023.3
15 | urllib3==2.0.3
16 | userpath==1.8.0
17 | 


--------------------------------------------------------------------------------