├── .gitignore ├── LICENSE ├── README.md ├── cryptoinscriber └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | env/ 2 | __pycache__/ 3 | *.pyc 4 | 5 | out/ 6 | bin/ 7 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Shawn Pang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CryptoInscriber 2 | 3 | CryptoInscriber - a live cryptocurrency historical trade data poller. Download live historical trade data from any cryptoexchange, be it for machine learning, trading bots, trading strategies, or perhaps minute-level data is just way too expensive. 4 | 5 | Outputs to `out/` by default in CSV format. Title will be `{EXCHANGE}_{MARKET}_{ASCTIME}.csv` with columns `Transaction ID, Timestamp (milli), Price, Amount, Side` 6 | 7 | Tested on Debian Stretch, and Raspberry Pi 3 Stretch. 8 | 9 | ## Installation using Virtualenv 10 | 11 | ```sh 12 | sudo apt install python3 python3-pip 13 | pip3 install virtualenv 14 | python3 -m virtualenv -p python3 env 15 | source env/bin/activate 16 | pip install -r requirements.txt 17 | ./cryptoinscriber 18 | ``` 19 | 20 | ## Usage 21 | 22 | Poll for trade data from Bitstamp on market BTC/USD 23 | 24 | `./cryptoinscriber -e bitstamp -m btc/usd` 25 | 26 | Poll for trade data from Bitfinex on market BTC/USD, limit to 20 executions per poll 27 | 28 | `./cryptoinscriber -e bitfinex -m btc/usd -c '{"limit_trades": 20}'` 29 | 30 | Poll for trade data from Quoinex on market BTC/JPY, iterate twice only, limit to 100 executions per poll 31 | 32 | `./cryptoinscriber -e bitfinex -m btc/usd -i 2 -c '{"limit": 100}'` 33 | 34 | -------------------------------------------------------------------------------- /cryptoinscriber: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import argparse 4 | import ccxt 5 | import csv 6 | import json 7 | import os 8 | import sys 9 | import time 10 | from typing import Type, List 11 | 12 | banner = r""" 13 | ____ ____ _ _ ___ ___ ____ _ __ _ ____ ____ ____ _ ___ ____ ____ 14 | |___ |--< Y |--' | [__] | | \| ==== |___ |--< | |==] |=== |--< 15 | """ 16 | 17 | 18 | def dedup(tdCurr: dict, tdPrev: dict) -> dict: 19 | if tdCurr == tdPrev: 20 | return {} 21 | if tdPrev == None: 22 | return tdCurr 23 | tdCurrIDs = [execution['id'] for execution in tdCurr] 24 | tdPrevLatestID = tdPrev[0]['id'] 25 | if tdPrevLatestID not in tdCurrIDs: 26 | print('[-] Possibly missing executions between {} and {}...'.format( 27 | time.asctime(time.localtime(int(str(tdPrev[0]['timestamp'])[:-3]))), 28 | time.asctime(time.localtime(int(str(tdCurr[-1]['timestamp'])[:-3]))) 29 | )) 30 | return tdCurr 31 | return tdCurr[:tdCurrIDs.index(tdPrev[0]['id'])] 32 | 33 | 34 | def convert(tradeData: dict, lite: bool) -> List[List]: 35 | converted = [] 36 | for execution in tradeData: 37 | # ID, Timestamp, Price, Amount, Side 38 | if lite: 39 | converted.insert(0, [ 40 | execution['timestamp'], 41 | round(float(execution['price'])), 42 | execution['amount'], 43 | ]) 44 | else: 45 | converted.insert(0, [ 46 | execution['id'], 47 | execution['timestamp'], 48 | execution['price'], 49 | execution['amount'], 50 | execution['side'] 51 | ]) 52 | return converted 53 | 54 | 55 | def buildParser() -> argparse.ArgumentParser: 56 | 57 | # Define Parser 58 | parser = argparse.ArgumentParser( 59 | formatter_class=argparse.ArgumentDefaultsHelpFormatter, 60 | description='CryptoInscriber | Cryptocurrency historical trade data poller' 61 | ) 62 | 63 | # Exchange 64 | parser.add_argument('-e', '--exchange', metavar='bitstamp', required=True, help='cryptocurrency exchange name, see https://github.com/ccxt/ccxt#supported-cryptocurrency-exchange-markets for available exchanges') 65 | 66 | # Market's Symbol/Pair 67 | parser.add_argument('-m', '--market', metavar='BTC/USD', required=True, help='market symbol/pair to poll from') 68 | 69 | # Rate Limit Per Minute 70 | parser.add_argument('-r', '--rate', metavar='60', type=int, default=60, help='rate limit, max number or requests per minute') 71 | 72 | # Iterations 73 | parser.add_argument('-i', '--iterations', metavar='0', type=int, default=0, help='number of times to poll executions before quitting, 0 to run indefinitely') 74 | 75 | # Output Directory 76 | parser.add_argument('-o', '--output', metavar='out', default='out', help='output directory to store CSV files') 77 | 78 | # Custom Params 79 | parser.add_argument('-c', '--custom', metavar='\'{"limit": 20}\'', default='{}', type=json.loads, help='custom exchange-specific parameters to use while polling for executions') 80 | 81 | # Mininmal Output 82 | parser.add_argument('-l', '--lite', action='store_true', help='minimizes output rows; removes ID (first column), rounds price to nearest whole number (assuming decimal precision is not important), removes side (5th column)') 83 | 84 | return parser 85 | 86 | 87 | def parseArgs(parser: argparse.ArgumentParser) -> argparse.Namespace: 88 | 89 | # Validate Args Length 90 | if len(sys.argv) < 2: 91 | print(banner) 92 | parser.print_help() 93 | exit(1) 94 | 95 | # Parse 96 | args = parser.parse_args() 97 | return args 98 | 99 | 100 | def getExchange(exchangeName: str) -> ccxt.Exchange: 101 | try: 102 | exchange = getattr(ccxt, exchangeName)() 103 | except AttributeError: 104 | print('[-] Error: "{}" exchange is not available. Refer to https://github.com/ccxt/ccxt#supported-cryptocurrency-exchange-markets for available markets.'.format(exchangeName)) 105 | exit(1) 106 | exchange.load_markets() 107 | return exchange 108 | 109 | 110 | def createOutputDir(directory: str) -> None: 111 | if os.path.isdir(directory): 112 | return 113 | try: 114 | os.mkdir(directory) 115 | except FileExistsError: 116 | print('[-] A non-directory item "{}" already exists. Please use another output name.'.format(directory)) 117 | exit(1) 118 | 119 | 120 | def getTradeData(exchange: ccxt.Exchange, market: str, customParams: dict = {}) -> List[dict]: 121 | return exchange.fetch_trades(market, params=customParams) 122 | 123 | 124 | def writeCSV(data: List[List], filename: str) -> None: 125 | with open(filename, 'a') as f: 126 | fcsv = csv.writer(f) 127 | fcsv.writerows(data) 128 | 129 | 130 | def main(): 131 | 132 | try: 133 | parser = buildParser() 134 | args = parseArgs(parser) 135 | exchange = getExchange(args.exchange) 136 | market = args.market.upper() 137 | delay = 60 / args.rate 138 | iterations = args.iterations 139 | 140 | createOutputDir(args.output) 141 | outputFile = os.path.join(args.output, '{}_{}_{}.csv'.format( 142 | exchange.name, 143 | market.replace('/', ''), 144 | time.strftime('%Y-%m-%d_%H-%M-%S') 145 | )) 146 | 147 | tradeDataPrev = None 148 | 149 | while args.iterations == 0 or iterations > 0: 150 | 151 | try: 152 | tradeData = getTradeData(exchange, market, customParams=args.custom) 153 | except ccxt.NetworkError: 154 | print('[-] Facing network issues, retrying in {} seconds...'.format(delay)) 155 | time.sleep(delay) 156 | continue 157 | 158 | tradeDataClean = dedup(tradeData, tradeDataPrev) 159 | tradeDataClean = convert(tradeDataClean, args.lite) 160 | writeCSV(tradeDataClean, outputFile) 161 | tradeDataPrev = tradeData 162 | 163 | print('[{}] Recorded {} trade executions. Sleeping for {} seconds...'.format( 164 | time.asctime(), 165 | len(tradeDataClean), 166 | delay 167 | )) 168 | 169 | if args.iterations: 170 | iterations -= 1 171 | time.sleep(delay) 172 | 173 | except KeyboardInterrupt: 174 | pass 175 | 176 | 177 | if __name__ == '__main__': 178 | main() 179 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | aiodns==1.1.1 2 | aiohttp==2.3.2 3 | async-timeout==2.0.0 4 | cchardet==2.1.1 5 | ccxt==1.10.63 6 | chardet==3.0.4 7 | multidict==3.3.2 8 | pycares==2.3.0 9 | yarl==0.14.0 10 | --------------------------------------------------------------------------------