├── src ├── __init__.py ├── common.py ├── constants.py ├── models.py ├── tradebot.py ├── api.py └── scraper.py ├── analysis ├── __init__.py ├── stockdata.py └── financials.py ├── backends ├── __init__.py └── signinja │ ├── __init__.py │ ├── config.yaml │ ├── predefined │ ├── eoddata.yaml │ └── robinhood.yaml │ └── utils.py ├── requirements.txt ├── .gitignore ├── config.yaml ├── LICENSE ├── redtide.py └── README.md /src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /analysis/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /backends/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /backends/signinja/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /backends/signinja/config.yaml: -------------------------------------------------------------------------------- 1 | predefined_auths: 'backends/signinja/predefined' 2 | loadup_wait_sec: 3 3 | login_wait_sec: 3 4 | wait_2fa_timeout: 600 5 | wait_2fa_interval: 2 -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | DateTime==4.3 2 | lxml==4.5.0 3 | numpy==1.18.4 4 | pandas==1.0.3 5 | python-dateutil==2.8.1 6 | PyYAML==5.3.1 7 | requests==2.23.0 8 | scipy==1.4.1 9 | selenium==3.141.0 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/* 2 | src/__pycache__/* 3 | data/live_quotes/* 4 | data/full_history/* 5 | data/analysis/* 6 | data/archive/* 7 | data/* 8 | *slurm-* 9 | *.zip 10 | *.pyc 11 | data/.DS_Store 12 | .DS_Store 13 | *.csv -------------------------------------------------------------------------------- /backends/signinja/predefined/eoddata.yaml: -------------------------------------------------------------------------------- 1 | url: 'http://eoddata.com/symbols.aspx' 2 | username: '//input[@id="ctl00_cph1_ls1_txtEmail"]' 3 | password: '//input[@id="ctl00_cph1_ls1_txtPassword"]' 4 | submit: '//input[@id="ctl00_cph1_ls1_btnLogin"]' 5 | fail_catch: 6 | in_page_source: 'Invalid email or password' 7 | loadup_wait_sec: 0 8 | login_wait_sec: 1 9 | login_success_catch: 10 | in_page_source: 'Account Details' 11 | -------------------------------------------------------------------------------- /backends/signinja/predefined/robinhood.yaml: -------------------------------------------------------------------------------- 1 | url: 'https://robinhood.com/login' 2 | username: '//input[@name="username"]' 3 | password: '//input[@name="password"]' 4 | submit: '//button[@type="submit"]' 5 | fail_catch: 6 | in_page_source: 'Unable to log' 7 | has_2fa_page: true 8 | wait_2fa_timeout: 3600 9 | return_webdriver: true 10 | loadup_wait_sec: 4 11 | login_wait_sec: 5 12 | login_success_catch: 13 | in_page_source: 'Cash' 14 | -------------------------------------------------------------------------------- /config.yaml: -------------------------------------------------------------------------------- 1 | file_paths: 2 | dir_data: 'data' 3 | dir_listings: 'listings' 4 | dir_full_history: 'full_history' 5 | dir_financials: 'financials' 6 | dir_options: 'options' 7 | all_symbols: 'all_symbols.txt' 8 | excluded_symbols: 'excluded_symbols.txt' 9 | 10 | selenium: 11 | path_geckodriver: 'geckodriver' 12 | 13 | alpha_vantage: 14 | api_key_file: '/home/john/auths/alphavantage_api_key' 15 | calls_per_minute: 5 16 | calls_per_day: 500 17 | 18 | eoddata: 19 | auth_file: '/home/john/auths/eoddata' 20 | exchanges: ['AMEX', 'NYSE', 'NASDAQ'] 21 | 22 | robinhood: 23 | auth_file: '/home/john/auths/robinhood' 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Jiun Yen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/common.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import re 4 | import requests 5 | from numpy import ceil 6 | from random import choices 7 | from string import ascii_letters 8 | from datetime import datetime, timedelta 9 | from dateutil import parser 10 | 11 | 12 | regxs = { 13 | 'currency': re.compile(r'[$,]'), 14 | 'rh_shares': re.compile(r'Shares* ([0-9,]*)'), 15 | 'rh_port_stocks': re.compile(r'([A-Z]+)\n([0-9,]+)\s+Share'), 16 | } 17 | 18 | 19 | def gen_rand_name(k=6): 20 | return ''.join(choices(ascii_letters, k=k)) 21 | 22 | 23 | def float_money(v): 24 | return float(regxs['currency'].sub('', v)) 25 | 26 | 27 | def str_money(v, decimal=2, comma=False): 28 | if comma: 29 | return ("{:,.%df}" % decimal).format(v) 30 | return ("{:.%df}" % decimal).format(v) 31 | 32 | 33 | def gen_symbol_batches(symbs, n_batches=None, batch_size=None): 34 | """ 35 | For multiprocessing. 36 | :param symbs: 37 | :param n_batches: 38 | :param batch_size: 39 | :return: 40 | """ 41 | 42 | if not batch_size: 43 | if not n_batches: 44 | n_batches = 20 45 | n_symbs = len(symbs) 46 | batch_size = ceil(n_symbs / n_batches) 47 | 48 | symb_batches = [] 49 | batch = [] 50 | for i, s in enumerate(symbs): 51 | 52 | if i % batch_size == 0: 53 | if batch: 54 | symb_batches.append(batch) 55 | batch = [] 56 | 57 | batch.append(s) 58 | 59 | if batch: 60 | symb_batches.append(batch) 61 | 62 | return symb_batches 63 | 64 | 65 | def get_wallstreet_time(open_time=(9,30), close_time=(16,0), offset_close=(0, 0)): 66 | res = { 67 | 'is_market_open': False, 68 | 'datetime_str': requests.get('http://worldtimeapi.org/api/timezone/' 69 | 'America/New_York').json()['datetime'] 70 | } 71 | dt_ny = parser.parse(res['datetime_str']) 72 | dt_start = datetime(year=dt_ny.year, month=dt_ny.month, day=dt_ny.day, 73 | hour=open_time[0], minute=open_time[1]) 74 | dt_end = datetime(year=dt_ny.year, month=dt_ny.month, day=dt_ny.day, 75 | hour=close_time[0], minute=close_time[1]) \ 76 | - timedelta(hours=offset_close[0], minutes=offset_close[1]) 77 | dt_ny = datetime(year=dt_ny.year, month=dt_ny.month, day=dt_ny.day, 78 | hour=dt_ny.hour, minute=dt_ny.minute, second=dt_ny.second) 79 | res['datetime'] = dt_ny 80 | weekday = dt_ny.weekday() 81 | if weekday < 5 and dt_start <= dt_ny < dt_end: 82 | res['is_market_open'] = True 83 | res['open_in'] = 0 84 | res['close_in'] = (dt_end - dt_ny).total_seconds() 85 | else: 86 | if dt_ny < dt_start: 87 | add_days = 0 if weekday < 5 else (7 - weekday) 88 | else: 89 | add_days = 1 if weekday < 4 else (7 - weekday) 90 | dt_start = dt_start + timedelta(days=add_days) 91 | res['open_in'] = (dt_start - dt_ny).total_seconds() 92 | dt_end = dt_end + timedelta(days=add_days) 93 | res['close_in'] = (dt_end - dt_ny).total_seconds() 94 | return res 95 | -------------------------------------------------------------------------------- /redtide.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Import 4 | import argparse 5 | from src.scraper import Stock 6 | from src.tradebot import TradeBot 7 | 8 | # Run 9 | if __name__ == '__main__': 10 | 11 | parser = argparse.ArgumentParser(description='Redtide is coming') 12 | parser.add_argument('-d', dest='daily_history', action='store_true', help='Retrieve historic data (daily resolution)') 13 | parser.add_argument('--symb', dest='symbol', default='', help='Specific ticker symbol') 14 | parser.add_argument('-c', dest='compile', action='store_true', help='Compile symbols') 15 | parser.add_argument('-v', dest='verbose', action='store_true', help='Verbose') 16 | parser.add_argument('--concat', dest='concat', default=None, help='Concatenate data into one file, give path') 17 | parser.add_argument('--file', dest='list', default=None, help='Symbol file') 18 | parser.add_argument('--from', dest='from_date', default=None, help='Analyze data from this date') 19 | parser.add_argument('--to', dest='to_date', default=None, help='Analyze data until this date') 20 | parser.add_argument('--bot', dest='bot', action='store_true', help='Run trade bot') 21 | parser.add_argument('--budget', dest='budget', default=500, type=int, help='Max budget for trade bot') 22 | parser.add_argument('--stocks', dest='stocks', default=5, type=int, help='Max stocks to hold by trade bot') 23 | parser.add_argument('--maxloss', dest='maxloss', default=0.9, type=float, help='Max loss by trade bot') 24 | 25 | args = parser.parse_args() 26 | 27 | # Compile stock symbols 28 | # Basically build a list of symbols that's acceptable with Yahoo Finance's URL 29 | if args.compile: 30 | s = Stock(verbose=args.verbose) 31 | if args.list: 32 | s.compile_symbols(p_symbs=args.list, append=True, batch_size=40) 33 | else: 34 | s.compile_symbols() 35 | 36 | # Scrape either daily history or live quotes on Yahoo Finance 37 | # Live quote is so slow, so bad, I hate it, I build it, I apologize for it 38 | # But daily history is what Redtide is really for 39 | if args.daily_history: 40 | s = Stock(mode='full_history', verbose=args.verbose) 41 | if args.symbol: 42 | symb,success = s.pull_daily_and_write(args.symbol) 43 | print('Symb: %s (%d)' % (symb, success)) 44 | elif args.list: 45 | s.pull_daily_and_write_batch(p_symbs=args.list) 46 | else: 47 | s.pull_daily_and_write_batch() 48 | 49 | # To concatenate individual stock historic data files into one 50 | # It'd take a long long time to concatenate everything, and not recommended 51 | # use --from and --to to select range (no --to means anything between --from to now) 52 | # i.e. $ python redtide -v --concat out_file.csv --from 2019-01-10 53 | if args.concat: 54 | s = Stock(verbose=args.verbose) 55 | p_out = args.concat + '.csv' if not args.concat.endswith('.csv') else args.concat 56 | _ = s.concat(from_date=args.from_date, to_date=args.to_date, p_out=p_out) 57 | print('Saved to: %s' % p_out) 58 | 59 | # Run Robinhood tradebot 60 | if args.bot: 61 | bot = TradeBot('robinhood', n_splits=args.stocks, 62 | allowance=args.budget, max_loss=args.maxloss) 63 | bot.run() 64 | -------------------------------------------------------------------------------- /src/constants.py: -------------------------------------------------------------------------------- 1 | import os 2 | import yaml 3 | 4 | 5 | # Redtide folder 6 | DIR_MAIN = os.path.realpath(os.path.join(os.path.split(__file__)[0], '..')) 7 | 8 | 9 | # Configuration file 10 | FILE_CONFIG = os.path.join(DIR_MAIN, 'config.yaml') 11 | 12 | 13 | # Load config 14 | redtide_configs = yaml.load(open(FILE_CONFIG), Loader=yaml.FullLoader) 15 | 16 | 17 | # Necessary directories and files 18 | file_path_configs = redtide_configs.get('file_paths', {}) 19 | DIR_DATA = os.path.join(DIR_MAIN, file_path_configs.get('dir_data', 'data')) 20 | DIR_LISTINGS = os.path.join(DIR_DATA, file_path_configs.get('dir_listings', 'listings')) 21 | DIR_HISTORY = os.path.join(DIR_DATA, file_path_configs.get('dir_full_history', 'full_history')) 22 | DIR_FINANCIALS = os.path.join(DIR_DATA, file_path_configs.get('dir_financials', 'financials')) 23 | DIR_OPTIONS = os.path.join(DIR_DATA, file_path_configs.get('dir_options', 'options')) 24 | FILE_ALL_SYMBOLS = os.path.join(DIR_DATA, file_path_configs.get('all_symbols', 'all_symbols.txt')) 25 | FILE_EXCLUDED_SYMBOLS = os.path.join(DIR_DATA, file_path_configs.get('excluded_symbols', 'excluded_symbols.txt')) 26 | 27 | # Selenium 28 | GECKODRIVER_PATH = redtide_configs.get('selenium', {}).get('path_geckodriver', 'geckodriver') 29 | SERVICE_LOG = 'NUL' if os.name == 'nt' else '/dev/null' 30 | 31 | 32 | # Create data folder if it does not exist 33 | if not os.path.isdir(DIR_DATA): 34 | os.makedirs(DIR_DATA) 35 | print('Created:', DIR_DATA) 36 | 37 | 38 | # Compile listing files of exchanges 39 | # These files should be under data/listings/ 40 | # and should have name .txt 41 | # i.e. NYSE.txt 42 | def compile_listings(): 43 | listing_files = {} 44 | if os.path.isdir(DIR_LISTINGS): 45 | for f in os.listdir(DIR_LISTINGS): 46 | ex_name = os.path.splitext(f)[0] 47 | f_path = os.path.join(DIR_LISTINGS, f) 48 | if os.path.isfile(f_path): 49 | listing_files[ex_name] = f_path 50 | else: 51 | print('Invalid path for {} at {}'.format(ex_name, f_path)) 52 | 53 | if listing_files: 54 | print('Found listing files for {} exchanges: {}'.format( 55 | len(listing_files), ', '.join(list(listing_files.keys())))) 56 | return listing_files 57 | FILES_LISTINGS = compile_listings() 58 | 59 | 60 | # Create the folder to store all basic financial data 61 | if not os.path.isdir(DIR_FINANCIALS): 62 | os.makedirs(DIR_FINANCIALS) 63 | 64 | # Create the folder to store all options data 65 | if not os.path.isdir(DIR_OPTIONS): 66 | os.makedirs(DIR_OPTIONS) 67 | 68 | 69 | # URLs to format later 70 | URL_YAHOO = 'https://finance.yahoo.com/quote/%s' 71 | URL_YAHOO_FINANCIALS = URL_YAHOO + '/financials?' 72 | URL_YAHOO_PROFILE = URL_YAHOO + '/profile?' 73 | URL_YAHOO_PERFORMANCE = URL_YAHOO + '/performance?' 74 | URL_YAHOO_OPTIONS = URL_YAHOO + '/options?' 75 | URL_YAHOO_DAILY = URL_YAHOO + '/history?period1=%d&period2=%d&interval=1d&filter=history&frequency=1d' 76 | URL_ALPHA_VANTAGE_INTRADAY = 'https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY&' \ 77 | 'symbol={symbol}&interval={interval}&outputsize=full&apikey={apikey}' 78 | 79 | 80 | # Alpha Vantage user API key 81 | if 'alpha_vantage' in redtide_configs and 'api_key_file' in redtide_configs['alpha_vantage'] \ 82 | and os.path.isfile(redtide_configs['alpha_vantage'].get('api_key_file', '')): 83 | ALPHA_VANTAGE_API_KEY = open(redtide_configs['alpha_vantage']['api_key_file'], 'r').read().strip() 84 | else: 85 | ALPHA_VANTAGE_API_KEY = None 86 | 87 | 88 | # eoddata.com authentication file 89 | # username (line 1), pw (line 2) 90 | URL_EODDATA = 'http://eoddata.com/symbols.aspx' 91 | URL_EODDATA_GET_SYMBOLS = 'http://eoddata.com/Data/symbollist.aspx?e=%s' 92 | EODDATA_EXCHANGES = [] 93 | FILE_EODDATA_AUTH = None 94 | if 'eoddata' in redtide_configs: 95 | eoddata_configs = redtide_configs['eoddata'] 96 | if 'auth_file' in eoddata_configs and os.path.isfile(eoddata_configs['auth_file']): 97 | FILE_EODDATA_AUTH = eoddata_configs['auth_file'] 98 | if 'exchanges' in eoddata_configs and eoddata_configs['exchanges']: 99 | EODDATA_EXCHANGES = eoddata_configs['exchanges'] 100 | 101 | # Robinhood authentication file 102 | # username (line 1), pw (line 2) 103 | FILE_ROBINHOOD_AUTH = None 104 | if 'robinhood' in redtide_configs \ 105 | and 'auth_file' in redtide_configs['robinhood'] \ 106 | and os.path.isfile(redtide_configs['robinhood']['auth_file']): 107 | FILE_ROBINHOOD_AUTH = redtide_configs['robinhood']['auth_file'] 108 | -------------------------------------------------------------------------------- /analysis/stockdata.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | 4 | from src.constants import DIR_HISTORY, DIR_FINANCIALS 5 | 6 | 7 | class StockData(object): 8 | 9 | def __init__(self, symbols=None, date0='2010-01-01'): 10 | if symbols: 11 | if not isinstance(symbols, list) or not isinstance(symbols[0], str): 12 | raise ValueError('symbols arg must be a list of strings') 13 | self.symbols = symbols 14 | print('Given %d symbols' % len(symbols)) 15 | else: 16 | self.symbols = [f.replace('.csv', '') for f in os.listdir(DIR_HISTORY) 17 | if f.endswith('.csv')] 18 | print('Found %d symbols' % len(self.symbols)) 19 | 20 | # entities with fiancial reports are considered "companies" 21 | self.companies = [f.replace('.pkl', '') for f in os.listdir(DIR_FINANCIALS) 22 | if f.endswith('.pkl')] 23 | print('Found %d companies' % len(self.companies)) 24 | 25 | self.df_history = None 26 | self.date0 = date0 27 | self.load_histories() 28 | 29 | self.df_corr = None 30 | self.corr_date0 = None 31 | self.df_interest = self.build_col_ratio() 32 | self.correlate(df=self.df_interest) 33 | 34 | @property 35 | def symbols(self): 36 | return self._symbols 37 | 38 | @symbols.setter 39 | def symbols(self, value): 40 | if not isinstance(value, list): 41 | raise ValueError('symbols must be list') 42 | self._symbols = value 43 | 44 | @property 45 | def companies(self): 46 | return self._companies 47 | 48 | @companies.setter 49 | def companies(self, value): 50 | if not isinstance(value, list): 51 | raise ValueError('companies must be list') 52 | self._companies = value 53 | 54 | @property 55 | def df_history(self): 56 | return self._df_history 57 | 58 | @df_history.setter 59 | def df_history(self, value): 60 | if not isinstance(value, pd.DataFrame) and value is not None: 61 | raise ValueError('df_history must be Pandas DataFrame') 62 | self._df_history = value 63 | 64 | @property 65 | def df_interest(self): 66 | return self._df_interest 67 | 68 | @df_interest.setter 69 | def df_interest(self, value): 70 | if not isinstance(value, pd.DataFrame) and value is not None: 71 | raise ValueError('df_interest must be Pandas DataFrame') 72 | self._df_interest = value 73 | 74 | @property 75 | def df_corr(self): 76 | return self._df_corr 77 | 78 | @df_corr.setter 79 | def df_corr(self, value): 80 | if not isinstance(value, pd.DataFrame) and value is not None: 81 | raise ValueError('df_corr must be Pandas DataFrame') 82 | self._df_corr = value 83 | 84 | def load_histories(self): 85 | print('Loading price histories...') 86 | history = [] 87 | for s in self.symbols: 88 | p_history = os.path.join(DIR_HISTORY, '%s.csv' % s) 89 | history.append(pd.read_csv(p_history)\ 90 | .set_index('date')\ 91 | .sort_index()\ 92 | .loc[self.date0:].reset_index()\ 93 | .assign(symbol=s)) 94 | 95 | print(' |- Building dataframe...') 96 | self.df_history = pd.concat(history)\ 97 | .set_index(['date', 'symbol']).sort_index() 98 | 99 | print('Loaded price histories') 100 | return self 101 | 102 | def get_history_of_stock(self, symbol): 103 | return self.df_history.loc[pd.IndexSlice[:, symbol], :] 104 | 105 | def build_col_ratio(self, col1='high', col2='open', days=30): 106 | print('Building dataframe of {} / {} ratio over past {}' 107 | ' days...'.format(col1, col2, days)) 108 | idx = self.df_history[col2] != 0 109 | df = (self.df_history[col1][idx] / self.df_history[col2][idx])\ 110 | .reset_index().rename(columns={0:'ratio'})\ 111 | .pivot(index='date', columns='symbol', values='ratio')\ 112 | .iloc[-days:].dropna(axis=1) 113 | return df 114 | 115 | def correlate(self, df=None, col='adjclose', days=30, companies_only=True): 116 | if df is None and col: 117 | print('Correlating {} over past {} trade-days...'.format(col, days)) 118 | df = self.df_history[col].reset_index()\ 119 | .pivot(index='date', columns='symbol', values=col)\ 120 | .iloc[-days:].dropna(axis=1) 121 | df = df.pct_change().dropna() 122 | else: 123 | print('Correlating with provided dataframe...') 124 | 125 | self.corr_date0 = df.index[0] 126 | 127 | if companies_only: 128 | symbs = list(set(self.companies) & set(df.columns)) 129 | self.df_corr = df[symbs].corr() 130 | else: 131 | self.df_corr = df.corr() 132 | return self 133 | -------------------------------------------------------------------------------- /backends/signinja/utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import yaml 4 | import os 5 | from time import sleep, time 6 | from selenium import webdriver 7 | from selenium.webdriver.firefox.options import Options 8 | from requests import Session 9 | from src.constants import DIR_MAIN 10 | 11 | 12 | config_path = os.path.join(DIR_MAIN, 'backends/signinja/config.yaml') 13 | configs = yaml.load(open(config_path), Loader=yaml.FullLoader) 14 | 15 | 16 | def start_webdrive_instance(url, headless=True, exe=None, log_path='/dev/null'): 17 | o = Options() 18 | o.headless = headless 19 | if exe is None: 20 | driver = webdriver.Firefox(service_log_path=log_path, options=o) 21 | else: 22 | driver = webdriver.Firefox(executable_path=exe, 23 | service_log_path=log_path, options=o) 24 | driver.get(url) 25 | 26 | return driver 27 | 28 | 29 | def headless_login(auth, url='', return_driver=False, headless=True, exe=None, log_path='/dev/null'): 30 | """ 31 | To use Selenium to login and get a requests session 32 | that's logged in. 33 | 34 | :param auth: dict of list, key:[xpath, value] 35 | i.e. 'username': ['//input[@name="username"]', 'johndoe'] 36 | 'password': ['//input[@name="password"]', 'abcde1234'] 37 | 'submit': ['//input[@name="submit"]'] 38 | also contain other params as key:value 39 | i.e. 'has_2fa_page': True 40 | :param url: login page URL 41 | :param return_driver: Bool, optional, whether to return 42 | Selenium Webdriver obj also; if not, return 43 | only requests Session obj 44 | :param headless: Bool, whether to be headless 45 | :param exe: 46 | :param log_path: 47 | :return: a requests session or Selenium Webdriver 48 | """ 49 | 50 | if not url: 51 | url = auth.get('url') 52 | if url is None: 53 | raise ValueError('Need either url arg or "url" in auth') 54 | 55 | # navigate to login page 56 | if headless: 57 | # any site config that would require head 58 | if auth.get('has_2fa_page', False): 59 | headless = False 60 | 61 | driver = start_webdrive_instance(url, headless=headless, exe=exe, log_path=log_path) 62 | loadup_wait = int(auth.get('loadup_wait_sec', False) 63 | or configs.get('loadup_wait_sec', 3)) 64 | driver.implicitly_wait(int(loadup_wait)) 65 | 66 | # submit authentication data 67 | # username 68 | un = auth.get('username') 69 | if un is not None: 70 | try: 71 | driver.find_element_by_xpath(un[0]).send_keys(un[1]) 72 | except: 73 | driver.quit() 74 | raise 75 | else: 76 | driver.quit() 77 | raise ValueError('username not in auth') 78 | 79 | # password 80 | pw = auth.get('password') 81 | if pw is not None: 82 | try: 83 | driver.find_element_by_xpath(pw[0]).send_keys(pw[1]) 84 | except: 85 | driver.quit() 86 | raise 87 | else: 88 | driver.quit() 89 | raise ValueError('password not in auth') 90 | 91 | # submit 92 | submit = auth.get('submit') 93 | if submit is not None: 94 | try: 95 | driver.find_element_by_xpath(submit[0]).click() 96 | 97 | # enforced explicit wait 98 | login_wait = int(auth.get('login_wait_sec', False) 99 | or configs.get('login_wait_sec', 3)) 100 | sleep(login_wait) 101 | except: 102 | driver.quit() 103 | raise 104 | else: 105 | driver.quit() 106 | raise ValueError('submit not in auth') 107 | 108 | if auth.get('fail_catch') is not None: 109 | fail_catch = auth['fail_catch'] 110 | if 'in_page_source' in fail_catch: 111 | if fail_catch.get('in_page_source') in driver.page_source: 112 | driver.quit() 113 | raise ValueError('Authentication failed') 114 | 115 | def _login_success(): 116 | return False 117 | if auth.get('login_success_catch') is not None: 118 | if 'in_page_source' in auth.get('login_success_catch'): 119 | def _login_success(): 120 | return auth['login_success_catch']['in_page_source']\ 121 | in driver.page_source 122 | 123 | if auth.get('has_2fa_page', False): 124 | n0 = len(driver.page_source) 125 | timeout = int(auth.get('wait_2fa_timeout', False) 126 | or configs.get('wait_2fa_timeout', 600)) 127 | t0 = time() 128 | wait_interval = int(configs.get('wait_2fa_interval', 2)) 129 | sleep(wait_interval) 130 | while (time() - t0) < timeout and \ 131 | (not _login_success() or len(driver.page_source) == n0): 132 | sleep(wait_interval) 133 | 134 | if auth.get('login_success_catch') is not None: 135 | if not _login_success(): 136 | driver.quit() 137 | raise ValueError('Login failed based on login_success_catch' 138 | ' definition') 139 | 140 | # copy cookies to requests session 141 | session = Session() 142 | _ = [session.cookies.set( 143 | c['name'], c['value']) for c in driver.get_cookies()] 144 | 145 | if return_driver or auth.get('return_webdriver', False): 146 | return session, driver 147 | 148 | driver.quit() 149 | return session 150 | 151 | 152 | def build_auth(xpaths, username, password): 153 | auth = {} 154 | 155 | try: 156 | auth['username'] = [xpaths.pop('username'), username] 157 | except: 158 | raise ValueError('username not in predefined') 159 | 160 | try: 161 | auth['password'] = [xpaths.pop('password'), password] 162 | except: 163 | raise ValueError('password not in predefined') 164 | 165 | try: 166 | auth['submit'] = [xpaths.pop('submit')] 167 | except: 168 | raise ValueError('submit not in predefined') 169 | 170 | return auth 171 | 172 | 173 | def auth_from_yaml(file_path, username=None, password=None): 174 | auth_configs = yaml.load(open(file_path), Loader=yaml.FullLoader) 175 | if username is None or password is None: 176 | auth_file = auth_configs.get('auth_file', False) 177 | if auth_file and os.path.isfile(auth_file): 178 | username, password = open(auth_file, 'r')\ 179 | .read().strip().split('\n') 180 | else: 181 | raise ValueError('Need either username and password' 182 | ' args or valid auth_file, cannot' 183 | ' find: %s' % auth_file) 184 | auth = build_auth(auth_configs, username, password) 185 | auth.update(auth_configs) 186 | return auth 187 | 188 | 189 | def predefined_auth(name, username=None, password=None): 190 | if configs is not None and 'predefined_auths' in configs: 191 | d_auths = os.path.join(DIR_MAIN, configs.get('predefined_auths')) 192 | if not os.path.isdir(d_auths): 193 | raise ValueError('Predefine_auths dir does not exists') 194 | else: 195 | raise ValueError('Config missing predefine_auths dir path') 196 | 197 | p_auth = os.path.join(d_auths, '%s.yaml' % name) 198 | if not os.path.isfile(p_auth): 199 | raise ValueError('No such file: %s' % p_auth) 200 | 201 | return auth_from_yaml(p_auth, username, password) 202 | -------------------------------------------------------------------------------- /analysis/financials.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | import numpy as np 4 | import pandas as pd 5 | from collections import defaultdict 6 | 7 | from src.constants import DIR_FINANCIALS 8 | 9 | 10 | class FinancialAnalysis(object): 11 | 12 | def __init__(self): 13 | symbols = [p[:-4] for p in os.listdir(DIR_FINANCIALS) 14 | if os.path.isfile(os.path.join(DIR_FINANCIALS, p))] 15 | self.financials = self.load_financial_data(symbols) 16 | 17 | # compile balancesheets 18 | print('Compiling balancesheets ...') 19 | self.req_cols = {'stock', 'T', 'netIncome'} 20 | self.df_yr = None 21 | self.symbols_yr = None 22 | self.df_qt = None 23 | self.symbols_qt = None 24 | self.symbols_both = None 25 | self.compile_all_balancesheets(req_cols=self.req_cols) 26 | self.prices = {} 27 | 28 | # evaluate 29 | print('Evaluating performances ...') 30 | self.greats = None 31 | self.bads = None 32 | self.sorted_greats = None 33 | self.great_corrs = None 34 | self.sorted_bads = None 35 | self.bad_corrs = None 36 | self.evaluate() 37 | 38 | @property 39 | def symbols(self): 40 | return list(self.financials.keys()) 41 | 42 | @property 43 | def n_symbols(self): 44 | return len(self.financials.keys()) 45 | 46 | @property 47 | def n_symbols_yr(self): 48 | return len(self.symbols_yr) 49 | 50 | @property 51 | def n_symbols_qt(self): 52 | return len(self.symbols_qt) 53 | 54 | @property 55 | def n_symbols_both(self): 56 | return len(self.symbols_both) 57 | 58 | @property 59 | def n_symbols_greats(self): 60 | return len(self.greats) 61 | 62 | @property 63 | def n_symbols_bads(self): 64 | return len(self.bads) 65 | 66 | def price(self, symbol): 67 | if symbol in self.financials: 68 | return self.financials[symbol].get('price', {})\ 69 | .get('regularMarketPrice', {}).get('raw') 70 | 71 | @staticmethod 72 | def load_financial_data(symbols, currency='USD'): 73 | data = defaultdict(dict) 74 | for s in symbols: 75 | try: 76 | d = pickle.load(open(os.path.join( 77 | DIR_FINANCIALS, s + '.pkl'), 'rb')) 78 | except: 79 | continue 80 | if currency: 81 | try: 82 | if d['earnings']['financialCurrency'] != currency: 83 | continue 84 | except: 85 | continue 86 | data[s] = d 87 | print('Loaded quote summary for %d stocks' % len(data)) 88 | return data 89 | 90 | @staticmethod 91 | def _stock_filter(data, min_market_cap=2e9, min_daily_vol=1e6): 92 | try: 93 | if data['price']['marketCap']['raw'] >= min_market_cap \ 94 | and data['price']['averageDailyVolume10Day']\ 95 | ['raw'] >= min_daily_vol: 96 | return True 97 | except: 98 | pass 99 | return False 100 | 101 | def compile_metric(self, metric='regularMarketOpen'): 102 | x = [] 103 | for v in self.financials.values(): 104 | if self._stock_filter(v): 105 | x.append(v['price'][metric]['raw']) 106 | return np.array(x) 107 | 108 | def _get_history(self, symb, quarterly=False, key='cashflowStatementHistory', fmt={'endDate'}, ignore={'maxAge'}): 109 | data = self.financials[symb] 110 | key = key + ('Quarterly' if quarterly else '') 111 | bals = data.get(key) 112 | if bals is None: 113 | return None 114 | subkeys = list(bals.keys()) 115 | subkeys.remove('maxAge') 116 | if subkeys: 117 | bals = bals.get(subkeys[0]) 118 | else: 119 | return None 120 | if not bals: 121 | return None 122 | 123 | x = defaultdict(list) 124 | n = len(bals) 125 | for i, b in enumerate(bals): 126 | x['T'].append(n - i) 127 | for k, v in b.items(): 128 | if k in ignore: 129 | continue 130 | if k in fmt: 131 | x[k].append(v['fmt']) 132 | elif v: 133 | x[k].append(v['raw'] if 'raw' in v else v) 134 | d = {} 135 | while x: 136 | k, v = x.popitem() 137 | if len(v) != n: 138 | continue 139 | d[k] = v 140 | return pd.DataFrame(d).assign(stock=symb) 141 | 142 | def full_compile(self, quarterly=False, key='cashflowStatementHistory'): 143 | df = pd.concat( 144 | [self._get_history(s, quarterly=quarterly, key=key) for s in self.financials], 145 | ignore_index=True) 146 | cols = list(df.columns) 147 | cols.remove('stock') 148 | cols.remove('T') 149 | cols.remove('endDate') 150 | cols = ['stock', 'endDate'] + cols 151 | return df[cols] 152 | 153 | def _compile_balancesheets(self, quarterly=False, req_cols={'netIncome'}, col_standard=None): 154 | print('Compiling from %d loaded stocks' % self.n_symbols) 155 | 156 | df_list = [] 157 | if not req_cols: 158 | if col_standard is None: 159 | col_standard = ['AMD', 'AAPL', 'INTC'] 160 | 161 | req_cols = set.intersection( 162 | *[set(self._get_history(s, quarterly=quarterly).columns) for s in col_standard]) 163 | 164 | print('Only with columns: {}'.format(req_cols)) 165 | for s, d in self.financials.items(): 166 | if not self._stock_filter(d): 167 | continue 168 | df = self._get_history(s, quarterly=quarterly) 169 | if df is not None: 170 | cols = set(df.columns) 171 | if not (req_cols - cols): 172 | df_list.append(df[req_cols]) 173 | 174 | print('Building dataframe - %d stocks' % len(df_list)) 175 | return pd.concat(df_list) 176 | 177 | def compile_all_balancesheets(self, req_cols={'netIncome'}, col_standard=None): 178 | self.df_yr = self._compile_balancesheets(False, req_cols=req_cols, col_standard=col_standard) 179 | self.symbols_yr = list(self.df_yr['stock'].unique()) 180 | self.df_qt = self._compile_balancesheets(True, req_cols=req_cols, col_standard=col_standard) 181 | self.symbols_qt = list(self.df_qt['stock'].unique()) 182 | self.symbols_both = list(set(self.symbols_yr) & set(self.symbols_qt)) 183 | return self 184 | 185 | def sort_history_symbs(self, df, symbs, by='netIncome', ascending=True): 186 | grouped = df.loc[np.any([df.stock == s for s in symbs], axis=0), ['T', 'stock', by]].groupby('stock') 187 | ser_corr = grouped.corr().loc[pd.IndexSlice[:, 'T'], by].droplevel(level=1).sort_values(ascending=ascending) 188 | return list(ser_corr.index), ser_corr 189 | 190 | @staticmethod 191 | def rank_stocks(lists, w=None): 192 | """ 193 | Sort based on ranks in each list of sorted symbols in lists 194 | 195 | :param lists: list of list of symbols 196 | :param w: weight of each list 197 | :return: 198 | """ 199 | if w is None: 200 | w = [1.] * len(lists) 201 | pts = defaultdict(list) 202 | for i, l in enumerate(lists): 203 | for j, s in enumerate(l): 204 | pts[s].append(j * w[i]) 205 | return [k[0] for k in sorted(pts.items(), key=lambda kv: np.prod(kv[1]))] 206 | 207 | def find_bads(self, df): 208 | df_income = df[['T', 'stock', 'netIncome']].pivot(index='T', columns='stock', values='netIncome') 209 | df_noreturn = (df_income < 0).sum(axis=0) 210 | return set(df_noreturn[df_noreturn == 4].index) 211 | 212 | def find_greats(self, df): 213 | df_income = df[['T', 'stock', 'netIncome']].pivot(index='T', columns='stock', values='netIncome') 214 | df_allreturn = (df_income > 0).sum(axis=0) 215 | return set(df_allreturn[df_allreturn == 4].index) 216 | 217 | def is_bad(self, symb): 218 | if symb in self.symbols_both: 219 | if symb in self.bads: 220 | print('bad, rank %d / %d' % (self.sorted_bads.index(symb) + 1, len(self.sorted_bads))) 221 | return 1 222 | else: 223 | # not shit 224 | return 0 225 | else: 226 | # Don't know 227 | return -1 228 | 229 | def is_great(self, symb): 230 | if symb in self.symbols_both: 231 | if symb in self.greats: 232 | print('great, rank %d / %d' % (self.sorted_greats.index(symb) + 1, len(self.sorted_greats))) 233 | return 1 234 | else: 235 | # not great 236 | return 0 237 | else: 238 | # Don't know 239 | return -1 240 | 241 | def eval_bads(self, recent=True): 242 | if recent: 243 | self.bads = list(self.find_bads(self.df_qt)) 244 | else: 245 | bad_yr = self.find_bads(self.df_yr) 246 | bad_qt = self.find_bads(self.df_qt) 247 | self.bads = list(bad_yr & bad_qt) 248 | 249 | def eval_greats(self, recent=True): 250 | if recent: 251 | self.greats = list(self.find_greats(self.df_qt)) 252 | else: 253 | great_yr = self.find_greats(self.df_yr) 254 | great_qt = self.find_greats(self.df_qt) 255 | self.greats = list(great_yr & great_qt) 256 | 257 | def evaluate(self, recent=True): 258 | self.eval_bads(recent=recent) 259 | symbs, self.bad_corrs = self.sort_history_symbs(self.df_qt, self.bads) 260 | self.sorted_bads = self.rank_stocks([symbs]) 261 | 262 | self.eval_greats(recent=recent) 263 | symbs, self.great_corrs = self.sort_history_symbs(self.df_qt, self.greats, ascending=False) 264 | self.sorted_greats = self.rank_stocks([symbs]) 265 | 266 | def random_greats(self, n=10, trend_up=True): 267 | if trend_up: 268 | return np.random.choice(self.great_corrs[self.great_corrs > 0].index, n, replace=False) 269 | else: 270 | return np.random.choice(self.greats, n, replace=False) 271 | 272 | def random_bads(self, n=10, trend_down=True): 273 | if trend_down: 274 | return np.random.choice(self.bad_corrs[self.bad_corrs < 0].index, n, replace=False) 275 | else: 276 | return np.random.choice(self.bads, n, replace=False) 277 | -------------------------------------------------------------------------------- /src/models.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import requests 4 | from datetime import datetime 5 | from time import sleep 6 | from lxml import html 7 | from collections import defaultdict 8 | from scipy.stats import linregress, ttest_ind 9 | from multiprocessing import Pool 10 | 11 | from src.constants import URL_YAHOO_PROFILE, DIR_DATA 12 | from src.common import str_money, gen_symbol_batches 13 | 14 | 15 | class Order(object): 16 | def __init__(self, symbol, method, shares, price_atm=None, status=None): 17 | self.date = datetime.now() 18 | self.symbol = symbol 19 | self.method = method 20 | self.shares = shares 21 | self.price_atm = price_atm # price at the moment of transaction 22 | self.status = status 23 | 24 | @property 25 | def price(self): 26 | if self.status is None: 27 | return None 28 | return self.status.get('price') 29 | 30 | @property 31 | def order_status(self): 32 | if self.status is None: 33 | return None 34 | return self.status.get('status') 35 | 36 | @property 37 | def value(self): 38 | if self.status is None: 39 | return None 40 | return self.status.get('value') 41 | 42 | 43 | class Orders(object): 44 | def __init__(self): 45 | self.history = [] 46 | self.stock_history_idx = defaultdict(list) 47 | 48 | def has_stock(self, symbol): 49 | return symbol in self.stock_history_idx 50 | 51 | def append(self, order): 52 | self.history.append(order) 53 | idx = len(self.history) - 1 54 | self.stock_history_idx[order.symbol].append(idx) 55 | 56 | def stock_recent_order(self, symbol): 57 | if symbol in self.stock_history_idx: 58 | return self.history[self.stock_history_idx[symbol][-1]] 59 | return None 60 | 61 | 62 | class Stock(object): 63 | 64 | def __init__(self, symbol='', auto_update=True, cache_stale_sec=15): 65 | self.symbol = symbol 66 | self.url = URL_YAHOO_PROFILE % symbol if symbol else '' 67 | self.cached_response = None 68 | self.cache_datetime = None 69 | self.cache_stale_sec = cache_stale_sec 70 | self.metrics = {} 71 | self._trend_data = {'dt': [], 'price': [], 'vol': []} 72 | 73 | # write any data collected in the day to file 74 | if self.symbol: 75 | d_live = os.path.join(DIR_DATA, 'live_quotes') 76 | if not os.path.isdir(d_live): 77 | os.makedirs(d_live) 78 | self.p_quotes = os.path.join(d_live, self.symbol) 79 | else: 80 | self.p_quotes = None 81 | 82 | self.__cache_datetime = None 83 | self.__price = None 84 | self.__volume = None 85 | 86 | self.auto_update = auto_update 87 | if auto_update: 88 | self.update() 89 | 90 | def _msg(self, msg): 91 | return msg + ' for %s' % self.symbol 92 | 93 | @staticmethod 94 | def to_str(v): 95 | return str_money(v, decimal=2, comma=False) 96 | 97 | def update(self): 98 | if self.url: 99 | self.cached_response = requests.get(self.url) 100 | self.cache_datetime = datetime.now() 101 | self.load_data() 102 | self.write_quote() 103 | price = self.metrics.get('price') 104 | volume = self.metrics.get('volume') 105 | if price and volume: 106 | if self.__cache_datetime and self.__price and self.__volume: 107 | self._trend_data['dt'].append((self.cache_datetime - self.__cache_datetime).total_seconds()) 108 | self._trend_data['price'].append(price / self.__price - 1) 109 | self._trend_data['vol'].append(volume / self.__volume - 1) 110 | else: 111 | # delta-time is referenced to start 112 | # this is risky, hoping price and volume are always populated in metrics upon update 113 | self.__cache_datetime = self.cache_datetime 114 | self.__price = price 115 | self.__volume = volume 116 | else: 117 | print(self._msg('! trend update skipped (time={} price={} volume={})'.format( 118 | self.cache_datetime, price, volume))) 119 | else: 120 | print(self._msg('No URL, stock not updated')) 121 | return self 122 | 123 | def write_quote(self): 124 | if self.p_quotes is not None: 125 | _ = open(self.p_quotes, 'a+').write(json.dumps(self.metrics) + '\n') 126 | 127 | def _parse_json_data(self): 128 | """ 129 | Parse real-time data JSON payload Yahoo uses 130 | to update the webpage. 131 | 132 | :return: dict 133 | """ 134 | nodes = html.fromstring(self.cached_response.text).xpath( 135 | "//script[contains(text(), '{\"context')]") 136 | if nodes: 137 | try: 138 | i0 = nodes[0].text.find('{"context') 139 | i1 = nodes[0].text.rfind('};') + 1 140 | return json.loads(nodes[0].text[i0:i1]) 141 | except: 142 | return None 143 | return None 144 | 145 | @staticmethod 146 | def _load_data(data): 147 | x = {} 148 | for k, v in data.items(): 149 | if v: 150 | if isinstance(v, dict) and 'raw' in v: 151 | x[k] = v['raw'] 152 | else: 153 | x[k] = v 154 | return x 155 | 156 | def _populate_metrics(self, data): 157 | self.metrics['datetime'] = self.cache_datetime.isoformat() 158 | price_data = self._load_data(data.get('price')) 159 | self.metrics['price'] = price_data.get('regularMarketPrice') 160 | self.metrics['change'] = price_data.get('regularMarketChange') 161 | self.metrics['volume'] = price_data.get('regularMarketVolume') 162 | self.metrics['high'] = price_data.get('regularMarketDayHigh') 163 | self.metrics['low'] = price_data.get('regularMarketDayLow') 164 | self.metrics['shares'] = price_data.get('sharesOutstanding') 165 | self.metrics['market_cap'] = price_data.get('marketCap') 166 | self.metrics['currency'] = price_data.get('currency') 167 | self.metrics['previous_close'] = price_data.get('regularMarketPreviousClose') 168 | self.metrics['open'] = price_data.get('regularMarketOpen') 169 | summary_data = self._load_data(data.get('summaryDetail')) 170 | self.metrics['volumne10days'] = summary_data.get('averageVolume10days') 171 | self.metrics['bid'] = summary_data.get('bid') 172 | self.metrics['bid_size'] = summary_data.get('bidSize') 173 | self.metrics['ask'] = summary_data.get('ask') 174 | self.metrics['ask_size'] = summary_data.get('askSize') 175 | 176 | def load_data(self): 177 | data = self._parse_json_data() 178 | if data is None: 179 | print('Failed to parse data for %s' % self.symbol) 180 | else: 181 | try: 182 | self._populate_metrics(data['context']['dispatcher']['stores']['QuoteSummaryStore']) 183 | except: 184 | print(self._msg('Could not populate metrics')) 185 | return self 186 | 187 | def update_if_stale(self): 188 | if self.auto_update and (self.cache_datetime is None 189 | or (datetime.now() - self.cache_datetime).total_seconds() > self.cache_stale_sec): 190 | self.update() 191 | return True 192 | return False 193 | 194 | @property 195 | def currency(self): 196 | return self.metrics.get('currency') 197 | 198 | @property 199 | def price(self): 200 | self.update_if_stale() 201 | return self.metrics.get('price') 202 | 203 | @property 204 | def open_price(self): 205 | return self.metrics.get('open') 206 | 207 | @property 208 | def bid(self): 209 | self.update_if_stale() 210 | return self.metrics.get('bid') 211 | 212 | @property 213 | def ask(self): 214 | self.update_if_stale() 215 | return self.metrics.get('ask') 216 | 217 | @property 218 | def bid_size(self): 219 | self.update_if_stale() 220 | return self.metrics.get('bid_size') 221 | 222 | @property 223 | def ask_size(self): 224 | self.update_if_stale() 225 | return self.metrics.get('ask_size') 226 | 227 | @property 228 | def volume(self): 229 | self.update_if_stale() 230 | return self.metrics.get('volume') 231 | 232 | @property 233 | def open_close_change(self): 234 | if self.metrics.get('open') and self.metrics.get('previous_close'): 235 | return self.metrics['open'] / self.metrics['previous_close'] - 1 236 | return None 237 | 238 | @property 239 | def ask_bid_ratio(self): 240 | self.update_if_stale() 241 | if self.metrics.get('ask_size') and self.metrics.get('bid_size'): 242 | return self.metrics['ask_size'] / self.metrics['bid_size'] 243 | return None 244 | 245 | def price_trend(self, k=5): 246 | """ 247 | Return linregress result 248 | res.slope 249 | res.rvalue 250 | res.pvalue (null: slope = 0) 251 | res.stderrs 252 | :param k: 253 | :return: 254 | """ 255 | self.update_if_stale() 256 | if len(self._trend_data['dt']) >= k: 257 | x = self._trend_data['dt'][-k:] 258 | y = self._trend_data['price'][-k:] 259 | return linregress(x, y) 260 | return None 261 | 262 | def volume_trend(self, k=5): 263 | self.update_if_stale() 264 | if len(self._trend_data['dt']) >= k: 265 | x = self._trend_data['dt'][-k:] 266 | y = self._trend_data['vol'][-k:] 267 | return linregress(x, y) 268 | return None 269 | 270 | 271 | class StatRes(object): 272 | def __init__(self): 273 | self.__attributes = defaultdict(list) 274 | self.__appended = False 275 | self.__built = False 276 | 277 | @property 278 | def can_build(self): 279 | return self.__appended 280 | 281 | def __repr__(self): 282 | if self.__built: 283 | msg = 'StatRes({})'.format( 284 | ', '.join(['{}={}'.format(k, getattr(self, k)) 285 | for k in self.__attributes])) 286 | return msg 287 | else: 288 | return 'StatRes()' 289 | 290 | def append(self, k, v): 291 | if v is not None: 292 | self.__attributes[k].append(v) 293 | self.__appended = True 294 | 295 | def build(self): 296 | for k, v in self.__attributes.items(): 297 | if v: 298 | setattr(self, k, sum(v)/len(v)) 299 | else: 300 | setattr(self, k, None) 301 | self.__built = True 302 | return self 303 | 304 | 305 | class Stocks(object): 306 | 307 | def __init__(self, symbols=None): 308 | self.stocks = defaultdict(Stock) 309 | 310 | self.add_symbols(symbols) 311 | 312 | self.__n_cpus = os.cpu_count() 313 | 314 | @property 315 | def n_stocks(self): 316 | return len(self.stocks) 317 | 318 | @property 319 | def symbols(self): 320 | return list(self.stocks.keys()) 321 | 322 | def has_stock(self, symbol): 323 | return symbol in self.stocks 324 | 325 | def get_stock(self, symbol): 326 | if self.has_stock(symbol): 327 | return self.stocks[symbol] 328 | return None 329 | 330 | def add_symbols(self, symbols): 331 | if isinstance(symbols, list): 332 | for s in symbols: 333 | if self.has_stock(s): 334 | continue 335 | try: 336 | self.add_stock(Stock(s, auto_update=False)) 337 | except: 338 | print('Could not add {}'.format(s)) 339 | elif isinstance(symbols, str) and not self.has_stock(symbols): 340 | try: 341 | self.add_stock(Stock(symbols, auto_update=False)) 342 | except: 343 | print('Count not add {}'.format(symbols)) 344 | 345 | def add_stock(self, stock): 346 | if not isinstance(stock, Stock): 347 | raise ValueError('stock arg must be a Stock object') 348 | 349 | if stock.symbol not in self.stocks: 350 | self.stocks[stock.symbol] = stock 351 | return True 352 | return False 353 | 354 | def remove(self, symbol): 355 | if self.has_stock(symbol): 356 | self.stocks.pop(symbol) 357 | return True 358 | return False 359 | 360 | def _update(self, symbol): 361 | return self.stocks[symbol].update() 362 | 363 | def update(self, symbol=None, batch_scale=10): 364 | if symbol is None: 365 | batches = gen_symbol_batches(self.symbols, batch_size=int(self.__n_cpus * batch_scale)) 366 | for i, batch in enumerate(batches): 367 | with Pool(processes=self.__n_cpus) as pool: 368 | res = pool.map(self._update, batch) 369 | for s in res: 370 | self.stocks[s.symbol] = s 371 | sleep(1) 372 | return True 373 | elif self.has_stock(symbol): 374 | self.stocks[symbol].update() 375 | return True 376 | return False 377 | 378 | def price_trend(self, symbol=None, k=5, metric='rvalue', baseval=0): 379 | if k < 3: 380 | raise ValueError('Stocks.price_trend: k >= 3 is a must') 381 | if metric not in ['slope', 'rvalue', 'pvalue', 'stderr']: 382 | raise ValueError('Stocks.price_trend: invalid metric: {}'.format(metric)) 383 | if symbol is None: 384 | values = [] 385 | for s in self.stocks.values(): 386 | r = s.price_trend(k) 387 | if r: 388 | values.append(getattr(r, metric)) 389 | n = len(values) 390 | if n >= 2: 391 | # n depends on stocks watching, meanless if few 392 | return ttest_ind(values, [baseval]*n, equal_var=False) 393 | elif self.has_stock(symbol): 394 | return self.stocks[symbol].price_trend(k) 395 | return None 396 | 397 | def volume_trend(self, symbol=None, k=5, metric='rvalue', baseval=0): 398 | if k < 3: 399 | raise ValueError('Stocks.volume_trend: k >= 3 is a must') 400 | if metric not in ['slope', 'rvalue', 'pvalue', 'stderr']: 401 | raise ValueError('Stocks.volume_trend: invalid metric: {}'.format(metric)) 402 | if symbol is None: 403 | values = [] 404 | for s in self.stocks.values(): 405 | r = s.volume_trend(k) 406 | if r: 407 | values.append(getattr(r, metric)) 408 | n = len(values) 409 | if n >= 2: 410 | return ttest_ind(values, [baseval]*n, equal_var=False) 411 | elif self.has_stock(symbol): 412 | return self.stocks[symbol].volume_trend(k) 413 | return None 414 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Redtide v0.2 2 | My attempt to scrape stock data, analyze the market, ... and tradebot 3 | 4 | **Python 3.5+** 5 | 6 | **( 2020.06.07 ) For those who already used Redtide before.** 7 | 1. Thank you! 8 | 2. I pretty much rebuilt it from the ground up and SQUASHED ALL the 9 | commits. **Pull with caution!** 10 | 11 | #### Main changes 12 | 1. **Live-quote option is deprecated!** It sucked, pretty sure no one 13 | used it. Pulling live-quote still exists as a functionality of the new 14 | `Stock` data model. More on this below. 15 | 2. Streamlined the stock symbol compilation step by automatically 16 | logging into **eoddata.com** (with your auth using Selenium) 17 | 3. Scrape **company's financial data** during the symbol compilation 18 | step 19 | 4. Introducing HoodAPI that uses Selenium to automate trades on 20 | Robinhood, example: 21 | ``` 22 | from src.api import HoodAPI 23 | hood = HoodAPI() 24 | hood.make_order('buy', 'AMD', shares=5, order_type='limit', price=52) 25 | hood.order_status('AMD') 26 | ``` 27 | 5. An experimental **trade bot** option (`--bot`), but go through it 28 | carefully first (at least read the brief description below). Run this 29 | **after compiling symbols and financial data:** 30 | ``` 31 | $ python3 redtide.py --bot --budget 500 --stocks 5 --maxloss 0.9 32 | ``` 33 | 6. The `tzset` issue on Windows is resolved. 34 | Everything **should** work on Windows. I tested on Windows 10. 35 | 7. All that ML stuff is removed because they are utterly useless! I 36 | honestly believe that everyone is modeling it better than I am. T_T 37 | 38 | ## Intro 39 | If you are looking at this, chances are you were on Reddit. This is 40 | currently documented very very poorly. You have been warned. Hopefully, 41 | you just want **a simple way to grab historic data** (end-of-day data) 42 | from Yahoo Finance to do your own awesome analysis. 43 | Or if you want to **venture into automated day-trade**. Then Redtide may 44 | be a solution for you. 45 | For now, I'm a bit too busy and stressed out by work 46 | to document anything thoroughly, so... have fun! 47 | 48 | ## Install / Setup 49 | #### A. Download code base 50 | 1. Just clone this repository. You'll want to do everything 51 | in the **redtide/** folder (I'll make a wheel at some point) 52 | - I highly recommend creating and doing everything in a virtual 53 | environment, i.e. 54 | ``` 55 | $ cd redtide/ 56 | $ python3 -m venv . 57 | $ source bin/activate 58 | ``` 59 | 2. Use the **requirements.txt** file to make sure you have 60 | everything you'll need. Run this: 61 | ``` 62 | $ pip3 install -r requirements.txt 63 | ``` 64 | 65 | #### B. Setup Selenium 66 | If you are not familiar with Selenium, it is basically a we browser 67 | instance that you can control in Python. Generally used in website 68 | test automation, but I'm using it for difficult logins and navigating 69 | complex websites. 70 | 71 | Skip this if you already have Firefox (gecko webdriver) in PATH. As in, 72 | you know running `webdriver.Firefox()` without any arguments will work. 73 | 74 | **Steps (assume you already installed the Python reqs):** 75 | 1. Download gecko webdriver from here (scroll down to Assets): 76 | https://github.com/mozilla/geckodriver/releases 77 | 2. Unpack and put the executable anywhere you want it 78 | 3. In `config.yaml`, point `"path_geckodriver"` to the path of the 79 | gecko executable (the file you just unpacked), example: 80 | ``` 81 | selenium: 82 | path_geckodriver: "where/you/put/geckodriver" 83 | ``` 84 | 85 | #### C. Setup authentications 86 | 1. **[eoddata.com](http://eoddata.com/symbols.aspx) auth** 87 | - This is so Redtide knows what stock symbols are out there 88 | - Register a free account on 89 | **[eoddata.com](http://eoddata.com/symbols.aspx)** 90 | - Create a text file (whatever name you want, anywhere you want) 91 | - In the text file, line 1: username, line 2: password; example: 92 | ``` 93 | admin 94 | password 95 | ``` 96 | - In **config.yaml** point the `auth_file` path under `eoddata` to 97 | the path of the login file you just created 98 | - **Alternatively,** if you are concerned with security, you can 99 | just log in and download the symbol list of each exchange manually 100 | and put them in a folder named "**listings**" under the folder 101 | **data/**. You'll have to create the **listings/** folder but 102 | Redtide will look in there first. But note that you'll want to 103 | repeat this everytime you pull data since stocks can get delisted 104 | and enlisted, and at the moment I don't know if there's a source for 105 | "diffs" on stock listings. 106 | - **Note: I don't work for eoddata.com, just find it to be 107 | a good source** 108 | 2. **Robinhood auth** (Optional, if you want to use **HoodAPI**) 109 | - Same steps as eoddata.com auth, create a username-password file 110 | but path pointed for `auth_file` under `robinhood` in 111 | **config.yaml** 112 | 113 | #### You are now ready to go! 114 | 115 | ## Pull stock histories 116 | 117 | ### How this works 118 | 1. **"stock exchange files,"** which are just lists of stock 119 | symbols tell Redtide which symbols to look for data 120 | on Yahoo Fiance. Redtide check with Yahoo to make sure 121 | these symbols can be found and verify spelling (i.e. "-" or "."). 122 | 2. While checking the symbols, Redtide also grabs the **financial data** 123 | of each company as it compiles the symbols. 124 | 3. User gets a chance to examine the compiled list of stock 125 | symbols to make sure they are good to go, or go straight the pulling 126 | historical data by stacking command line options. 127 | 4. Redtide pulls the **End-of-Day** movements all these stocks 128 | as far back as Yahoo provides. 129 | 130 | ### Steps to run 131 | 132 | #### 1. Auto-update and compile stock exchange listings 133 | - Navigate to the **redtide/** folder and run the following command 134 | in a terminal. 135 | ``` 136 | $ python3 redtide.py -v -c 137 | ``` 138 | 139 | - Compilation can take an hour or so depending on the **number of 140 | processors** and your **internet speed**. 141 | - When it's done, you'll see the new **all_symbols.txt** and 142 | **excluded_symbols.txt** under the **redtide/data/** folder 143 | 144 | **Pro tip** If you have a list of stocks that you care about, and don't 145 | care about any other tickers. Then create your own **all_symbols.txt** 146 | and keep it in **redtide/data/** and skip this step (`-c`) entirely 147 | until you want to update their financial data. 148 | 149 | #### 2. Pull data 150 | 151 | - Navigate to **redtide/** folder, and run: 152 | ``` 153 | $ python3 redtide.py -v -d 154 | ``` 155 | - This will about an hour or so. How long depends on the **number of 156 | processors** you have and your **internet connection speed**. 157 | - When this is done, you will see a **full_history/** folder under 158 | **redtide/data/** that contains all the goodies (i.e. AMD.csv). 159 | 160 | **Pro tip** You can chain both steps by doing: 161 | ``` 162 | $ python3 redtide.py -v -c -d 163 | ``` 164 | 165 | ## Extra stuff 166 | 167 | If you want to see some other options: 168 | ``` 169 | $ python3 redtide.py -h 170 | ``` 171 | 172 | ### HoodAPI (Robinhood trade automation) 173 | At the moment, Robinhood does not have an official API. 174 | There are a few unofficial Robhinhood APIs but it seems that they are 175 | either not maintained anymore or "working with Robinhood LLC" to stay 176 | online. Meaning there's a dependency on Robinhood. This can also mean 177 | that they are more stable but also mean Robinhood can have control 178 | over it's access (i.e. daily call limits, is it profitable, etc.) 179 | So... **HoodAPI** is built using Selenium with the intention of having 180 | full control but not as reliable (i.e. if Robinhood changes layout, 181 | HoodAPI will need to be updated with how to navigate). 182 | 183 | **Note: will need to setup Robinhood auth in the setup step. 184 | 185 | There are 2 basic concepts: 186 | 1. **Action** Making buy/sell order is a type of action. Cancel order is 187 | another type of action. **Only market order and limit order for now**, 188 | but the code to switch any order type is done, just not the form-filling 189 | part for the other types. 190 | 2. **Verify Status** Check the status of the action you requested to 191 | make sure it was successful. 192 | 193 | ``` 194 | from src.api import HoodAPI 195 | hood = HoodAPI() 196 | hood.make_order('buy', 'AMD', shares=5) 197 | status = hood.order_status('AMD') 198 | ``` 199 | 200 | If order went through then `status['status']` would be `"Done"`. 201 | Otherwise, it can be another Robinhood status message, like 202 | `"Pending"`. 203 | It is **important** to understand that `order_status(s)` checks your 204 | most recent order of that stock (i.e. the first element in the order 205 | history for that stock). 206 | If status is not `"Done"`, then you can cancel it with 207 | 208 | ``` 209 | hood.cancel_order('AMD') 210 | ``` 211 | 212 | Here also, `cancel_order(s)` only tries to cancel your most recent order 213 | of that stock. 214 | 215 | When you are done, call `quit()` to exit safely. This is to prevent a 216 | lingering Firefox session in the background. 217 | ``` 218 | hood.quit() 219 | ``` 220 | 221 | **Must be very mindful of the current caveat of `check_status(s)` and 222 | `cancel_order(s)` as you implement, which, again, is that they only 223 | act on your most recent order of the specified stock** 224 | 225 | ### Stock and Stocks classes 226 | The goal here is to track stock prices, volumes, bids, asks, etc. of 227 | each stock. A `Stock('AMD')` object would retrieve and store the latest 228 | data on AMD from Yahoo when you call `.update()`. A `Stocks([...])` 229 | object lets you track multiple tickers via multiprocessing. 230 | 231 | #### Stock 232 | A **Stock()** object can be instantiated for any symbol found on 233 | Yahoo. 234 | When `auto_update=True` (default), then update is automatically 235 | done as the Stock object is instantiated. Thus, you can check the 236 | fields like price and volume. `auto_update=True` would also enforce 237 | update when certain fields are called, but to prevent update being 238 | too frequence, a staleness limit on the data (i.e. 15 seconds default) 239 | is used so that update is only performed if data is stale. 240 | 241 | ``` 242 | from src.models import Stock 243 | from time import sleep 244 | 245 | s = Stock('AMD', cache_stale_sec=15) 246 | print(s.price) 247 | print(s.volume) 248 | print(s.bid) 249 | print(s.bid_size) 250 | 251 | sleep(15) 252 | print(s.price) # update before returning price 253 | ``` 254 | 255 | A bunch of metrics are track during each `.update()` call, but not 256 | all of them are as useful as other, therefore, don't have a dedicated 257 | field for them. But all data can be access under `s.metrics` dict. Any 258 | of the following metrics that also have a dedicated field would also 259 | perform auto-update if `auto_update=True`. 260 | 261 | | Yahoo metrics | Stock obj metrics | has field | 262 | |---|---|---| 263 | | regularMarketPrice | price | yes | 264 | | regularMarketVolume | volume | yes | 265 | | bid | bid | yes | 266 | | bidSize | bid_size | yes | 267 | | ask | ask | yes | 268 | | askSize | ask_size | yes | 269 | | currency | currency | yes | 270 | | regularMarketChange | change | no | 271 | | regularMarketDayHigh | high | no | 272 | | regularMarketDayLow | low | no | 273 | | sharesOutstanding | shares | no | 274 | | marketCap | market_cap | no | 275 | | regularMarketPreviousClose | previous_close | no | 276 | | regularMarketOpen | open | no | 277 | | averageVolume10days | volumne10days | no | 278 | 279 | During each update(), these metrics are also written to 280 | `data/live_quotes/ (default path) as lines of JSON (note: not 281 | a JSON file!) Each line in the file can be interpreted with a JSON 282 | interpreter. The sole reason that this is so weird is because all this 283 | is still quite experimental, and I want a structure that's as easily 284 | accessible as it is dynamic. 285 | 286 | **s.price_trend(k=5)** returns a Scipy linregress result object on the 287 | linear regression fit of the last 5 price data points (5 `update()` 288 | calls are required) 289 | 290 | **s.volume_trend(k=5)** returns a Scipy linregress result object on the 291 | linear regression fit of the last 5 volume data points (5 `update()` 292 | calls are required) 293 | 294 | #### Stocks 295 | **Stocks()** object manages multiple `Stock()` objects. Example: 296 | ``` 297 | from src.models import Stocks 298 | from time import sleep 299 | 300 | ss = Stocks(['AMD', 'AAPL', 'TSLA']).update() 301 | ss.add_symbols(['UAL', 'DAL', 'AAL']) 302 | ss.remove('TSLA') 303 | 304 | # update with 5 sec intervals 305 | for _ in range(5): 306 | sleep(5) 307 | ss.update() 308 | 309 | # ask "what is the price trend of all the stocks I'm tracking?" 310 | # this does a T-test with the R^2 values of all the stocks against 311 | # zeros. Not the best way to do this statistics, but less data 312 | # required to get a rough estimate. 313 | ss.price_trend(k=5, metric='rvalue') 314 | ``` 315 | 316 | 317 | ### Trade bot (very very experimental) 318 | It's so experimental, I haven't made a single cent with it yet! Trading 319 | works, but I'm new to day-trade and I'm not sure what's a good strategy 320 | here. The trade bot basically utilizes the **HoodAPI** to automate 321 | trades. Feel free to look into to the code **src/tradebot.py** and I 322 | sincerely hope you are more successful than I am. 323 | 324 | To run with default ($500 budget, at at most 5 stocks, 90% budget max 325 | loss) 326 | ``` 327 | $ python3 redtide.py --bot 328 | ``` 329 | 330 | Or more controls 331 | ``` 332 | $ python3 redtide.py --bot --budget 1000 --stocks 10 --maxloss 0.95 333 | ``` 334 | 335 | May the Force be with you... 336 | s 337 | ## AlphaVantage 338 | **I want to exploit this great resource via crowd sourcing.** 339 | They limit free users to 5 requests per minute and 500 340 | requests per day. There are about 8600 tickers in AMEX, NYSE, 341 | and NASDAQ. Their API lets user get intraday data up to a week 342 | so it'll take **18 users less than 2 hours** to collect intraday 343 | data each EOD. Code for this is in progress. There are 344 | much more we can do by joining force. Let me know if this 345 | is interesting to you. :) 346 | 347 | ## Issues and workarounds 348 | 349 | - **u/siem** found a problem with how Macs handles forking. His full comment: 350 | 351 | "I had problems running the code at first on my Mac 352 | ("python +\[__NSPlaceholderDate initialize] may have been in progress 353 | in another thread when fork() was called.") - 354 | supposedly Apple has changed their fork implementation to disallow forking with active threads. 355 | When I ran this before running the code I didn't get errors: 356 | 357 | ```export OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES```" 358 | 359 | - **u/siem** also found that after 2000-3000 pulls, 360 | connection to Yahoo Finance could fail. 361 | Possibly due to a tempoary IP ban. 362 | After that, I also noticed that around 120 fast page crawls, there's a temporary IP ban. 363 | To circumvent this issue, I implemented pauses around 10 - 20 seconds for every 100 page loads. 364 | Also, did the same for compiling the symbols, except the pauses are 5 - 10 seconds for every 200 page loads. 365 | - **New:** If connection fails or you get banned temporary, it will try 366 | to fetch for the failed ones again (**maximum of 5 passes**). If after 5 passes 367 | , there are still failed symbols left, they 368 | will be written to a **failed_symbs-<5 random characters>.txt** file. 369 | And you'll see a suggestion to run something like the following to retry. 370 | ``` 371 | # For failed daily history fetches 372 | $ python3 redtide.py -v -d --file failed_symbs-abe93.txt 373 | 374 | # For failed symbols during symbol compile 375 | $ python3 redtide.py -v -c --file data/excluded_symbols.txt 376 | ``` 377 | 378 | ## Shout-outs 379 | - Helpful Redditor **u/siem** discovered ways to resolve forking issue on Mac and IP ban issue with Yahoo. 380 | - Big thanks to **John Ecker** (JECKER@rollins.edu) and **Lukáš Vokráčko** (vokracko) for help make Redtide better with debugging and better documentations! Much appreciated! 381 | 382 | ## TODO 383 | Gosh... Where do I even begin... email me if you are interested. 384 | 385 | ## Contact 386 | Let me know what you think, and if you want to help out on my odd 387 | endeavor to be less poor. 388 | I don't respond very quickly, but I always try to respond: 389 | jiunyyen@gmail.com 390 | -------------------------------------------------------------------------------- /src/tradebot.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from time import sleep, time 3 | from datetime import datetime 4 | from collections import defaultdict 5 | 6 | from analysis.financials import FinancialAnalysis 7 | from src.common import get_wallstreet_time 8 | from src.models import Stocks 9 | from src.api import HoodAPI 10 | 11 | 12 | class TradeBot(object): 13 | """ 14 | Trade automatically on Robinhood 15 | Main strategy is modified scalping: 16 | no upper bound, but moving lower bound as price increase. 17 | 1. Identify companies with great earning histories 18 | and meet all following criteria 19 | - Positive net profit past 4 years 20 | - Positive net profit past 4 quarters 21 | - high market cap, 2 billion 22 | - high daily volume 1 million 23 | 2. Identify ones that opened high and remained high 24 | with no down slope during first 10 minutes since 25 | market open, and sort them by this criteria 26 | (note: of the stocks opened higher than previous close, 27 | the ones with smallest high/close ratio are selected 28 | - this is based on personal observation, my guess is 29 | if it opens too high, there's less room to go up more??) 30 | Also, only 3 x n_splits will be selected, so we don't 31 | have to track too many stocks. 32 | 3. Of these, select top N stocks and evenly distribute 33 | allowance among them. N >= 10 and shares >= 1 34 | Use (allowance / N) > cost_per_share to scan the list 35 | of stocks from top to bottom and retrieve ones that 36 | qualifies. If reached the end and still partitions 37 | left unassigned, assign them to already assigned stock 38 | by simply scanning the list again 39 | The goal is to maximize diversity, chances are diversity 40 | is more important than any other metrics 41 | 4. Onces the stocks are selected and their shares to buy 42 | are calculated, make these order immediately with 43 | market price. 44 | 5. Make sell order only if price drops below lower bound 45 | (i.e. -0.5%) of the previously polled value, which include 46 | buy order price 47 | 6. Once a buy-sell is complete, this partition is free to 48 | be assigned to any stock (not currently assigned). Take 49 | the top 10 qualifying stocks (price < partition) and 50 | monitor their movement for 3 minutes, then reassign this 51 | partition if trend is good as evaluated before. 52 | 7. Repeat step 6. until market close. 53 | 54 | S1. Watch the overall market trend. If all the assigned stocks 55 | are all going down, then sell everything and stop. Watch 56 | the market using the top N (in step 2) as representatives. 57 | If market goes back up, then do step 6. 58 | """ 59 | def __init__(self, trader=None, allowance=1000, n_splits=10, max_loss=0.95): 60 | print('Initializing TradeBotAPI ...') 61 | 62 | # initialize trader 63 | if trader == 'robinhood': 64 | self.trader = HoodAPI() 65 | elif trader == 'paper': 66 | # TODO - paper trade system 67 | self.trader = None 68 | else: 69 | self.trader = None 70 | 71 | self.trade_end_offset = (0, 10) # offset by 10 min 72 | self.allowance = allowance 73 | self.net_worth = allowance 74 | self.max_loss = allowance * max_loss 75 | self.n_splits = n_splits 76 | self.partition_size = np.floor(self.allowance / self.n_splits) 77 | self.fa = FinancialAnalysis() 78 | self.symbols_qualified = [s for s in self.fa.greats if self.fa.price(s) and self.fa.price(s) < self.partition_size] 79 | self.stocks = Stocks(self.symbols_qualified) 80 | self.holding = {} # dict of buy price 81 | self.pending_buy = [] 82 | self.pending_sell = [] 83 | self.pending_cancel = [] 84 | 85 | self.__watch_interval_sec = 45 86 | self.__watch_iters = 10 87 | self.__trend_window = 5 88 | self.__paritions_used = 0 # increment when buy order made, decrement only when sold or buy order canceled 89 | self.lb_ratio = -0.005 90 | self.cached_price = {} 91 | print('TradeBotAPI initialized.') 92 | 93 | def run(self): 94 | wst = get_wallstreet_time() 95 | if not wst['is_market_open']: 96 | print("Waiting {:.2f} hours for market to open".format(wst['open_in']/3600)) 97 | sleep(wst['open_in']) 98 | print('Market now open') 99 | if self.trader is not None: 100 | self.begin_trade() 101 | else: 102 | print('No trader') 103 | 104 | def begin_trade(self): 105 | """ 106 | Call this at market open to find stocks to scalp. 107 | Watch the first 10 iterations before calculating trend. 108 | :return: 109 | """ 110 | print('\nBegin trading ...\n') 111 | 112 | # 1. scan for open-high stocks 113 | # Take 60 - 75 sec for 342 stocks 114 | # About 5 sec for 30 115 | print('Scanning %d stocks for open-high ...' % self.stocks.n_stocks) 116 | t0 = time() 117 | self.stocks.update() 118 | remove_stocks = [] 119 | for symb, stock in self.stocks.stocks.items(): 120 | if stock.open_close_change is None or stock.open_close_change <= 0: 121 | remove_stocks.append(symb) 122 | for s in remove_stocks: 123 | self.stocks.remove(s) 124 | print('|- scan took {:.2f} sec'.format(time() - t0)) 125 | 126 | # 2. Sort open-close ratio from low to high 127 | # take the first N-split X 3 to watch for 128 | # It seems like the ones that open too high do not growth much 129 | # but the ones the open slighly high are more likely to grow 130 | symbs = np.array(self.stocks.symbols) 131 | changes = np.array([self.stocks.get_stock(s).open_close_change for s in symbs]) 132 | idx = np.argsort(changes) 133 | n_track = 3 * self.n_splits 134 | if len(symbs) > n_track: 135 | remove_stocks = symbs[idx][n_track:] 136 | for s in remove_stocks: 137 | self.stocks.remove(s) 138 | self.symbols_qualified = self.stocks.symbols 139 | print('Tracking %d qualifying stocks' % self.stocks.n_stocks) 140 | 141 | # 3. Conitnue to monitor the qualifying stocks for 142 | # more iterations 143 | for i_iter in range(self.__watch_iters): 144 | sleep(self.__watch_interval_sec) 145 | self.stocks.update() 146 | print('|- watch iter {} / {}'.format(i_iter+1, self.__watch_iters)) 147 | 148 | # 4. run sequence until trade end or if there are pendings 149 | wst = get_wallstreet_time(offset_close=self.trade_end_offset) 150 | while wst['is_market_open'] or self.has_pending: 151 | self.trade_sequence() 152 | if self.net_worth <= self.max_loss: 153 | print('! Reach max loss, selling/cancelling everything.') 154 | if self.pending_buy: 155 | self.cancel_all_pending(method='buy') 156 | if self.holding: 157 | self.batch_order(list(self.holding.keys()), 'sell') 158 | break 159 | sleep(self.__watch_interval_sec) 160 | 161 | # 5. close trader 162 | self.trader.quit() 163 | print('\nHappy trade day!') 164 | print('${:,.2f} ===> ${:,.2f}'.format(self.allowance, self.net_worth)) 165 | 166 | def sort_buyables(self): 167 | wst = get_wallstreet_time(offset_close=self.trade_end_offset) 168 | if not wst['is_market_open']: 169 | # prevent buy when near end of day 170 | return None 171 | 172 | rvalues = [self.stocks.get_stock(s).price_trend(k=self.__trend_window).rvalue 173 | for s in self.stocks.symbols] 174 | idx = np.argsort(rvalues)[::-1] 175 | symbs = np.array(self.stocks.symbols)[idx] 176 | buy_symbs = [] 177 | for s in symbs: 178 | if s not in self.holding \ 179 | and s not in self.pending_buy \ 180 | and s not in self.pending_sell \ 181 | and s not in self.pending_cancel: 182 | buy_symbs.append(s) 183 | return buy_symbs 184 | 185 | def sell_criteria(self, symbol): 186 | wst = get_wallstreet_time(offset_close=self.trade_end_offset) 187 | if wst['is_market_open']: 188 | # sell everything by end of day 189 | return True 190 | 191 | if symbol not in self.holding or symbol in self.pending_sell: 192 | return False 193 | 194 | stat = self.stocks.price_trend(symbol, k=self.__trend_window) 195 | stock = self.stocks.get_stock(symbol) 196 | # diff is relative to previous cached price 197 | diff = stock.price - self.cached_price[symbol] 198 | lb = self.lb_ratio * stock.open_price 199 | print(': {} rval={:.5f} pval={:.5f} diff={:.2f} lb={:.2f}'.format( 200 | symbol, stat.rvalue, stat.pvalue, diff, lb)) 201 | if diff <= lb: 202 | print('sell criteria ({}): below lower bound'.format(symbol)) 203 | return True 204 | # elif stat.pvalue <= 0.1 and stat.rvalue < 0: 205 | # # Too sensitive at the moment 206 | # print('sell criteria ({}): trending down'.format(symbol)) 207 | # return True 208 | return False 209 | 210 | @property 211 | def partitions_remain(self): 212 | return self.n_splits - self.__paritions_used 213 | 214 | @property 215 | def has_pending(self): 216 | if self.pending_buy: 217 | return True 218 | elif self.pending_sell: 219 | return True 220 | elif self.pending_cancel: 221 | return True 222 | return False 223 | 224 | def trade_sequence(self): 225 | print('\n___ sequence {} ______'.format(datetime.now())) 226 | # check pending status 227 | self.get_all_pending_status() 228 | 229 | # get updates on qualified stocks 230 | # don't update cached_price yet, 231 | # need the previous cached_price 232 | # to determine sell criteria 233 | self.stocks.update() 234 | 235 | # check if there are stocks that should be 236 | # sold off 237 | for s in self.stocks.symbols: 238 | # sell stocks that should be dumped off 239 | if self.sell_criteria(s) and self.sell(s): 240 | self.pending_sell.append(s) 241 | 242 | # check global trend and make sure still in trade period 243 | # TODO - sell/buy criteria are bad! Need to figure out 244 | # a decent strategy 245 | stat = self.stocks.price_trend(k=self.__trend_window) 246 | global_statistic = stat.statistic 247 | global_pvalue = stat.pvalue 248 | print(': Global stat={} pval={}'.format(global_statistic, global_pvalue)) 249 | wst = get_wallstreet_time(offset_close=self.trade_end_offset) 250 | if wst['is_market_open'] and global_statistic > 4 and global_pvalue < 1e-4: 251 | # see if there are partitions available 252 | # to buy stocks that are worth it 253 | if self.partitions_remain > 0: 254 | # get all the symbols worth investing 255 | buyable_symbols = self.sort_buyables() 256 | 257 | # this tracks N x parition for each symbol 258 | # so if there are more partitions left than 259 | # buyable_symbols, same symbol can be assigned 260 | # more than 1 partition 261 | stock_partitions = defaultdict(int) 262 | 263 | if buyable_symbols: 264 | for i in range(self.partitions_remain): 265 | symb = buyable_symbols[i % len(buyable_symbols)] 266 | stock_partitions[symb] += 1 267 | for symb, p in stock_partitions.items(): 268 | if self.buy(symb, p * self.partition_size): 269 | self.pending_buy.append(symb) 270 | elif not wst['is_market_open'] or (global_statistic < -4 and global_pvalue < 1e-4): 271 | if not wst['is_market_open']: 272 | print('! End of day soon, selling everything...') 273 | # sell all and cancel all buy orders 274 | if self.pending_buy: 275 | self.cancel_all_pending('buy') 276 | if self.holding: 277 | self.batch_order(list(self.holding.keys()), 'sell') 278 | else: 279 | print('! Does not meet buy or sell criteria, continue watching market ...') 280 | 281 | # update cached_price 282 | for s in self.cached_price: 283 | if self.stocks.get_stock(s).price: 284 | self.cached_price[s] = self.stocks.get_stock(s).price 285 | 286 | 287 | def _check_order_complete_status(self, symbol, target='Done', max_try=10): 288 | if target not in ['Done', 'Canceled']: 289 | raise ValueError('target arg must be either Done or Canceled') 290 | status = self.trader.order_status(symbol) 291 | if status is None: 292 | print('>>> lost track of order for {} <<<'.format(symbol)) 293 | return False 294 | i_try = 0 295 | while status['status'] != target and i_try < max_try: 296 | sleep(1) 297 | status = self.trader.order_status(symbol) 298 | i_try += 1 299 | if status['status'] == target: 300 | recent_order = self.trader.orders.stock_recent_order(symbol) 301 | # update cached order status 302 | recent_order.status = status 303 | if target == 'Canceled': 304 | msg_head = '[x] Canceled {} order'.format(symbol) 305 | elif status.get('type'): 306 | if 'Buy' in status.get('type'): 307 | msg_head = '[+] Bought {} shared of {} at {}'.format(status.get('shares'), symbol, status.get('price')) 308 | elif 'Sell' in status.get('type'): 309 | msg_head = '[-] Sold {} shares of {} at {}'.format(status.get('shares'), symbol, status.get('price')) 310 | else: 311 | print('[?] Unknown status type for {}: {}'.format(symbol, status.get('type'))) 312 | return False 313 | else: 314 | print('[?] Status for {} is missing type'.format(symbol)) 315 | return False 316 | 317 | print(msg_head, 'succesfully!') 318 | return True 319 | else: 320 | print('[?] Cannot confirm if status of {} order is {}'.format(symbol, target)) 321 | return False 322 | 323 | def buy(self, symbol, partition_size=None): 324 | if partition_size is None: 325 | partition_size = self.partition_size 326 | shares = int(np.floor(partition_size / self.stocks.get_stock(symbol).update().price)) 327 | try: 328 | print('Buying {} shares of {} ...'.format(shares, symbol)) 329 | if self.trader.make_order('buy', symbol, shares=shares, order_type='market'): 330 | print('|- sent buy order') 331 | else: 332 | print('|- failed to send buy order') 333 | return False 334 | except Exception as e: 335 | print('|- failed to buy {} ({} shares), exception: {}'.format(symbol, shares, e)) 336 | return False 337 | self.__paritions_used += 1 338 | return True 339 | 340 | def sell(self, symbol): 341 | if not self.trader.orders.has_stock(symbol): 342 | print('No {} shares to sell'.format(symbol)) 343 | return False 344 | shares = self.trader.orders.stock_recent_order(symbol).shares 345 | try: 346 | print('Selling {} shares of {} ...'.format(shares, symbol)) 347 | if self.trader.make_order('sell', symbol, order_type='market'): 348 | print('|- sent sell order') 349 | else: 350 | print('|- failed to send sell order') 351 | return False 352 | except Exception as e: 353 | print('|- failed to sell {}, exception: {}'.format(symbol, e)) 354 | return False 355 | return True 356 | 357 | def cancel(self, symbol): 358 | if symbol in self.pending_buy: 359 | if self.trader.cancel_order(symbol): 360 | self.pending_buy.remove(symbol) 361 | self.pending_cancel.append(symbol) 362 | return True 363 | elif symbol in self.pending_sell: 364 | if self.trader.cancel_order(symbol): 365 | self.pending_sell.remove(symbol) 366 | self.pending_cancel.append(symbol) 367 | return True 368 | else: 369 | print('! No pending buy or sell for {}'.format(symbol)) 370 | return False 371 | 372 | def cancel_all_pending(self, method): 373 | if method not in ['buy', 'sell']: 374 | raise ValueError('method must be either buy or sell') 375 | if method == 'buy': 376 | for symbol in self.pending_buy: 377 | if self.trader.cancel_order(symbol): 378 | self.pending_buy.remove(symbol) 379 | self.pending_cancel.append(symbol) 380 | else: 381 | for symbol in self.pending_sell: 382 | if self.trader.cancel_order(symbol): 383 | self.pending_sell.remove(symbol) 384 | self.pending_cancel.append(symbol) 385 | self.get_all_pending_status() 386 | 387 | def batch_order(self, symbols, method, gap_sec=1): 388 | """ 389 | This make batch orders, but does not check status 390 | 391 | :param symbols: 392 | :param method: 393 | :param gap_sec: 394 | :return: 395 | """ 396 | if method not in ['buy', 'sell']: 397 | raise ValueError('method must be either buy or sell') 398 | for symbol in symbols: 399 | if method == 'buy': 400 | if symbol not in self.holding \ 401 | and symbol not in self.pending_buy \ 402 | and symbol not in self.pending_sell \ 403 | and symbol not in self.pending_cancel: 404 | if self.buy(symbol): 405 | self.pending_buy.append(symbol) 406 | sleep(gap_sec) 407 | else: 408 | print('! Cannot buy: already pending or holding {}'.format(symbol)) 409 | else: 410 | if symbol in self.holding and symbol not in self.pending_sell: 411 | if self.sell(symbol): 412 | self.pending_sell.append(symbol) 413 | sleep(gap_sec) 414 | else: 415 | print('! Cannot sell: not holding {}'.format(symbol)) 416 | 417 | def update_net_worth(self, buy_value, sell_value): 418 | trade_value = sell_value - buy_value 419 | self.net_worth += trade_value 420 | if trade_value > 0: 421 | msg = '[ GAIN ]' 422 | elif trade_value < 0: 423 | msg = '[ LOSS ]' 424 | else: 425 | msg = '[ EVEN ]' 426 | msg += ' ${:,.2f} ==> net ${:,.2f}'.format(trade_value, self.net_worth) 427 | print(msg) 428 | 429 | def get_all_pending_status(self): 430 | for symbol in self.pending_buy: 431 | if self._check_order_complete_status(symbol, 'Done'): 432 | self.pending_buy.remove(symbol) 433 | order = self.trader.orders.stock_recent_order(symbol) 434 | self.holding[symbol] = order.value 435 | self.cached_price[symbol] = order.price 436 | for symbol in self.pending_sell: 437 | if self._check_order_complete_status(symbol, 'Done'): 438 | self.__paritions_used -= 1 439 | self.pending_sell.remove(symbol) 440 | order = self.trader.orders.stock_recent_order(symbol) 441 | self.update_net_worth(self.holding[symbol], order.value) 442 | self.holding.pop(symbol) 443 | self.cached_price.pop(symbol) 444 | self.trader.close_tab_by_stock(symbol) 445 | for symbol in self.pending_cancel: 446 | if self._check_order_complete_status(symbol, 'Canceled'): 447 | self.pending_cancel.remove(symbol) 448 | order = self.trader.orders.stock_recent_order(symbol) 449 | if 'Buy' in order.method: 450 | self.__paritions_used -= 1 451 | self.trader.close_tab_by_stock(symbol) 452 | -------------------------------------------------------------------------------- /src/api.py: -------------------------------------------------------------------------------- 1 | from time import sleep 2 | from selenium.common import exceptions as selenium_exceptions 3 | from selenium.webdriver.common.keys import Keys 4 | 5 | from backends.signinja.utils import predefined_auth, headless_login 6 | from src.common import float_money, regxs 7 | from src.models import Order, Orders 8 | from src.constants import FILE_ROBINHOOD_AUTH, GECKODRIVER_PATH, SERVICE_LOG 9 | 10 | 11 | class HoodAPI(object): 12 | 13 | """ 14 | The API for automating Robinhood trade orders 15 | """ 16 | 17 | __urls = { 18 | 'portfolio': 'https://robinhood.com', 19 | 'stocks': 'https://robinhood.com/stocks/{}' 20 | } 21 | 22 | @classmethod 23 | def stock_url(cls, symbol='AMD'): 24 | return cls.__urls['stocks'].format(symbol.upper()) 25 | 26 | def __init__(self): 27 | print('Initializing HoodAPI...') 28 | 29 | # Start headless driver 30 | if FILE_ROBINHOOD_AUTH is None: 31 | raise IOError('Need Robinhood auth file for login') 32 | auth = predefined_auth('robinhood', username='foo', password='bar') 33 | auth['username'][1], auth['password'][1] = open(FILE_ROBINHOOD_AUTH, 'r')\ 34 | .read().strip().split('\n') 35 | try: 36 | _, d = headless_login(auth, exe=GECKODRIVER_PATH, log_path=SERVICE_LOG) 37 | self.driver = d 38 | except: 39 | print('Failed to sign into Robinhood') 40 | raise 41 | 42 | self.worth = -1 43 | self.buy_power = -1 44 | 45 | # These keeps track of stock's tab 46 | # do not directly reference them. 47 | # Access data from methods 48 | self.__stock_tabs = {} 49 | self.__tab_stocks = {} 50 | 51 | # Ensure the first tab is always the portfolio page 52 | self.to_url(self.__urls['portfolio']) 53 | tab_id = self.current_tab_id 54 | self.add_stock_tab('portfolio', tab_id) 55 | self.get_portfolio_values() 56 | 57 | # portfolio value at initialization 58 | self.__worth_initial = self.worth 59 | self.__buy_power_initial = self.buy_power 60 | 61 | # track orders since initializatoin 62 | self.orders = Orders() 63 | 64 | print('HoodAPI initialized.') 65 | 66 | @property 67 | def driver(self): 68 | return self.__driver 69 | 70 | @driver.setter 71 | def driver(self, value): 72 | if not hasattr(self, 'driver'): 73 | self.__driver = value 74 | else: 75 | print('webdriver override is forbidden') 76 | 77 | @property 78 | def tabs(self): 79 | return self.driver.window_handles 80 | 81 | @property 82 | def current_tab_id(self): 83 | try: 84 | return self.driver.current_window_handle 85 | except selenium_exceptions.NoSuchWindowException: 86 | print('Failed to get current tab_id, likely close. Switch to tab 0') 87 | try: 88 | self.to_tab_by_index(0) 89 | return self.driver.current_window_handle 90 | except: 91 | print('Still failed to get current tab_id') 92 | raise 93 | except: 94 | print('Failed to get current tab_id for unexpected reason') 95 | raise 96 | 97 | @property 98 | def net_change(self): 99 | self.get_portfolio_values() 100 | return self.worth - self.__worth_initial 101 | 102 | def quit(self): 103 | self.driver.quit() 104 | 105 | def get_tab_id_from_stock(self, symbol): 106 | if symbol in self.__stock_tabs: 107 | return self.__stock_tabs[symbol] 108 | return None 109 | 110 | def get_stock_from_tab_id(self, tab_id): 111 | if tab_id in self.__tab_stocks: 112 | return self.__tab_stocks[tab_id] 113 | return None 114 | 115 | def stock_has_tab(self, symbol): 116 | return symbol in self.__stock_tabs 117 | 118 | def add_stock_tab(self, symbol, tab_id): 119 | if symbol in self.__stock_tabs: 120 | print('{} already has a tab {}'.format( 121 | symbol, self.__stock_tabs[symbol])) 122 | return False 123 | 124 | self.__stock_tabs[symbol] = tab_id 125 | self.__tab_stocks[tab_id] = symbol 126 | return True 127 | 128 | def remove_stock_tab(self, symbol=None, tab_id=None): 129 | if symbol is None and tab_id is None: 130 | raise ValueError('Must pass either symbol or tab_id arg') 131 | 132 | if symbol is not None: 133 | tab_id = self.get_tab_id_from_stock(symbol) 134 | if tab_id is not None: 135 | self.__stock_tabs.pop(symbol) 136 | if tab_id in self.__tab_stocks: 137 | self.__tab_stocks.pop(tab_id) 138 | else: 139 | print('Stock {} does not exist'.format(symbol)) 140 | else: 141 | symbol = self.get_stock_from_tab_id(tab_id) 142 | if symbol is not None: 143 | self.__tab_stocks.pop(tab_id) 144 | if symbol in self.__stock_tabs: 145 | self.__stock_tabs.pop(symbol) 146 | else: 147 | print('Tab {} does not exist'.format(tab_id)) 148 | 149 | def new_tab(self): 150 | tabs0 = self.tabs[:] 151 | self.driver.execute_script("window.open()") 152 | if self.tabs != tabs0: 153 | tab_id = (set(self.tabs) - set(tabs0)).pop() 154 | self.to_tab_by_id(tab_id) 155 | return tab_id 156 | else: 157 | raise RuntimeError('Could not create new tab') 158 | 159 | def close_tab(self, tab_id=None): 160 | if tab_id is None: 161 | tab_id = self.current_tab_id 162 | if tab_id == self.get_tab_id_from_stock('portfolio'): 163 | print('Closing the first portfolio page is not allowed') 164 | return False 165 | 166 | # if manually closed portfolio tab 167 | if len(self.tabs) <= 1: 168 | print('No close: must have at least 1 tab open; otherwise .quit()') 169 | return False 170 | 171 | def _close_tab(): 172 | tab0 = self.current_tab_id 173 | try: 174 | self.driver.execute_script("window.close()") 175 | except: 176 | print('Failed to execute window.close()') 177 | raise 178 | if tab0 not in self.tabs: 179 | if self.get_stock_from_tab_id(tab0) is not None: 180 | self.remove_stock_tab(tab_id=tab0) 181 | else: 182 | raise RuntimeError('Could not close tab_id {}'.format(tab_id)) 183 | 184 | if tab_id not in self.tabs: 185 | raise IndexError('tab_id {} not in {}'.format(tab_id, self.tabs)) 186 | 187 | if self.current_tab_id != tab_id: 188 | return_tab_id = self.current_tab_id 189 | self.to_tab_by_id(tab_id) 190 | else: 191 | return_tab_id = None 192 | for i in self.tabs: 193 | if i != tab_id: 194 | return_tab_id = self.tabs[0] 195 | break 196 | if return_tab_id is None: 197 | msg = 'Could not find a return_tab_id from {}' \ 198 | ' that not {}'.format(self.tabs, tab_id) 199 | raise IndexError(msg) 200 | _close_tab() 201 | self.to_tab_by_id(return_tab_id) 202 | 203 | print('Tab_id {} closed'.format(tab_id)) 204 | return True 205 | 206 | def close_tab_by_stock(self, symbol): 207 | tab_id = self.get_tab_id_from_stock(symbol) 208 | if tab_id is not None: 209 | return self.close_tab(tab_id) 210 | else: 211 | print('No stock tab to close for stock', symbol) 212 | return False 213 | 214 | def to_tab_by_id(self, tab_id): 215 | try: 216 | self.driver.switch_to_window(tab_id) 217 | except: 218 | print('Failed to switch to tab_id {}'.format(tab_id)) 219 | raise 220 | 221 | def to_tab_by_index(self, idx): 222 | try: 223 | self.to_tab_by_id(self.tabs[idx]) 224 | except: 225 | print('Failed to switch to tab {}'.format(idx)) 226 | raise 227 | 228 | def to_url(self, url): 229 | tab_id = self.current_tab_id 230 | if self.get_stock_from_tab_id(tab_id) is not None: 231 | print('Going to another url from a stock tab is not allowed') 232 | return False 233 | else: 234 | self.driver.get(url) 235 | sleep(4) 236 | return True 237 | 238 | def to_portfolio_tab(self): 239 | try: 240 | self.to_tab_by_id(self.get_tab_id_from_stock('portfolio')) 241 | return True 242 | except Exception as e: 243 | print('Failed to switch to portfolio tab: {}'.format(e)) 244 | return False 245 | 246 | def new_tab_url(self, url=''): 247 | if not url: 248 | url = self.__urls['portfolio'] 249 | 250 | new_tab_id = None 251 | try: 252 | new_tab_id = self.new_tab() 253 | self.to_url(url) 254 | except: 255 | print('Failed to open url in new tab: {}'.format(url)) 256 | if new_tab_id is not None: 257 | try: 258 | print('Try closing new tab') 259 | self.close_tab(tab_id=new_tab_id) 260 | except Exception as e: 261 | print('Could not close new tab because: {}'.format(e)) 262 | raise 263 | return new_tab_id 264 | 265 | def new_tab_stock(self, symbol): 266 | if not symbol: 267 | raise ValueError('Need symbol') 268 | 269 | if not self.stock_has_tab(symbol): 270 | tab_id = self.new_tab_url(self.stock_url(symbol)) 271 | self.add_stock_tab(symbol, tab_id) 272 | sleep(1) 273 | else: 274 | tab_id = self.get_tab_id_from_stock(symbol) 275 | 276 | # navigate to the stock tab 277 | self.to_tab_by_id(tab_id) 278 | 279 | return tab_id 280 | 281 | def get_portfolio_values(self): 282 | return_tab_id = None 283 | try: 284 | portfolio_id = self.get_tab_id_from_stock('portfolio') 285 | if portfolio_id != self.current_tab_id: 286 | return_tab_id = self.current_tab_id 287 | self.to_tab_by_id(self.get_tab_id_from_stock('portfolio')) 288 | 289 | # Worth 290 | res = self.driver.find_element_by_xpath( 291 | '//main[@class="main-container"]//header').text 292 | self.worth = float_money(res.split('\n')[0]) 293 | print('Worth:', self.worth) 294 | 295 | # Buying power 296 | res = self.driver.find_element_by_xpath( 297 | '//div[@class="sidebar-content"]//button').text 298 | self.buy_power = float_money(res.split('\n')[-1]) 299 | print('Buying power:', self.buy_power) 300 | except: 301 | print('Failed to get protfolio values') 302 | raise 303 | finally: 304 | if return_tab_id is not None: 305 | self.to_tab_by_id(return_tab_id) 306 | 307 | def get_portfolio_stocks(self): 308 | self.to_portfolio_tab() 309 | portfolio_stocks = {} 310 | if 'class="sidebar-content"' in self.driver.page_source: 311 | text = self.driver.find_element_by_xpath('//div[@class="sidebar-content"]').text 312 | if text: 313 | res = regxs['rh_port_stocks'].findall(text) 314 | if res: 315 | for symb, shares in res: 316 | portfolio_stocks[symb] = shares 317 | return portfolio_stocks 318 | 319 | def _order_method(self, action='buy'): 320 | if action not in ['buy', 'sell']: 321 | raise ValueError('Invalid order method, action={}'.format(action)) 322 | 323 | if self._available_shares() > 0: 324 | if action == 'buy': 325 | self.driver.find_elements_by_xpath('//div[@role="button"]')[0].click() 326 | else: 327 | self.driver.find_elements_by_xpath('//div[@role="button"]')[1].click() 328 | elif action == 'sell': 329 | return False 330 | return True 331 | 332 | def _send_shares(self, n): 333 | self.driver.find_element_by_name("quantity").send_keys(str(n)) 334 | 335 | def _review_order(self): 336 | self.driver.find_element_by_xpath( 337 | '//button[@data-testid="OrderFormControls-Review"]').click() 338 | 339 | def _edit_order(self): 340 | # for when in review, to go back to edit 341 | self.driver.find_element_by_xpath('//button[contains(.,"Edit")]').click() 342 | 343 | def _change_order_type(self, order_type='market'): 344 | if order_type not in ['market', 'limit', 'stopLoss', 'stopLimit', 'trailingStop']: 345 | raise ValueError('Invalid order type, order_type={}'.format(order_type)) 346 | 347 | def _select(v): 348 | self.driver.find_element_by_xpath( 349 | '//span[contains(., "{} Order")]/parent::span' 350 | '/parent::div/parent::div'.format(v)).click() 351 | 352 | # click the drop-down 353 | self.driver.find_elements_by_xpath( 354 | '//form[@data-testid="OrderForm"]/div[1]/div[1]/div')[-1].click() 355 | 356 | if order_type == 'market': 357 | _select('Market') 358 | elif order_type == 'limit': 359 | _select('Limit') 360 | elif order_type == 'stopLoss': 361 | _select('Stop Loss') 362 | elif order_type == 'stopLimit': 363 | _select('Stop Limit') 364 | elif order_type == 'trailingStop': 365 | _select('Trailing Stop') 366 | else: 367 | self.driver.find_element_by_tag_name('body').send_keys(Keys.ESCAPE) 368 | raise ValueError('No such order type: {}'.format(order_type)) 369 | 370 | def _set_limit_price(self, price): 371 | self.driver.find_element_by_xpath('//input[@name="limitPrice"]').send_keys(str(price)) 372 | 373 | def _current_market_price(self): 374 | self._change_order_type('market') 375 | res = self.driver.find_element_by_xpath( 376 | '//span[contains(., "Market Price")]/parent::a/parent::div/parent::div').text 377 | return float_money(res.split('\n')[-1]) 378 | 379 | def _estimate_cost(self): 380 | res = self.driver.find_element_by_xpath( 381 | '//span[contains(., "Estimated Cost")]/parent::div/parent::div').text 382 | return float_money(res.split('\n')[-1]) 383 | 384 | def _make_order(self): 385 | self.driver.find_element_by_xpath('//button[@data-testid="OrderFormControls-Submit"]').click() 386 | 387 | def _done_after_order(self): 388 | self.driver.find_element_by_xpath('//button[@data-testid="OrderFormDone"]').click() 389 | 390 | def _order_status(self): 391 | """ 392 | pending buy order: {'type': 'Limit Buy', 'date': 'May 21, 2020', 'status': 'Pending'} 393 | completed buy (after placed): {'type': 'Limit Buy', 'date': 'May 21, 2020', 'status': 'Placed'} 394 | completed buy (after purchase): {'type': 'Limit Buy', 'date': '15m', 'cost': 1767.0, 'shares': 5, 'price': 353.4, 'status': 'Done'} 395 | :return: 396 | """ 397 | res = self.driver.find_element_by_xpath('//header[@data-testid="rh-ExpandableItem-buttonContent"]').text 398 | tmp = res.split('\n') 399 | if len(tmp) == 3: 400 | status = { 401 | 'type': tmp[0], 402 | 'time': tmp[1], # date when queued but elapse time since cancel when caceled 403 | 'status': tmp[2] 404 | } 405 | elif len(tmp) == 4: 406 | tmp2 = tmp[-1].split() 407 | status = { 408 | 'type': tmp[0], 409 | 'time': tmp[1], 410 | 'value': float_money(tmp[2]), 411 | 'shares': int(tmp2[0]), 412 | 'price': float_money(tmp2[-1]), 413 | 'status': 'Done' 414 | } 415 | else: 416 | status = { 417 | 'status': None, 418 | 'raw': res 419 | } 420 | return status 421 | 422 | def _cancel_order(self): 423 | self.driver.find_element_by_xpath('//button[@data-testid="rh-ExpandableItem-button"]').click() 424 | i_try = 0 425 | while i_try < 10: 426 | try: 427 | self.driver.find_element_by_xpath('//a[text()[contains(., "Cancel Order")]]').click() 428 | break 429 | except Exception as e: 430 | if i_try < 9: 431 | i_try += 1 432 | sleep(1) 433 | else: 434 | raise e 435 | 436 | def _available_shares(self): 437 | if 'class="grid-2"' in self.driver.page_source: 438 | text = self.driver.find_element_by_xpath('//div[@class="grid-2"]').text 439 | if text: 440 | res = regxs['rh_shares'].findall(text) 441 | if res and res[0]: 442 | if len(res) > 1: 443 | print('! _available_shares: more than 1 regex match: {}'.format(res)) 444 | try: 445 | return int(res[0].replace(',', '')) 446 | except Exception as e: 447 | print('! failed to parse shares: {}'.format(e)) 448 | return 0 449 | 450 | def _can_trade(self): 451 | if 'Page not found' not in self.driver.page_source and 'not supported' not in self.driver.page_source: 452 | return True 453 | else: 454 | return False 455 | 456 | def make_order(self, method, symbol, shares=None, order_type='market', price=None): 457 | if method not in ['buy', 'sell']: 458 | raise ValueError('Order method can only be buy or sell, not: {}'.format(method)) 459 | 460 | if method == 'buy' and shares is None: 461 | raise ValueError('Need shares to make buy order') 462 | 463 | if order_type not in ['market', 'limit']: 464 | raise ValueError('"{}" is not a valid order type'.format(order_type)) 465 | 466 | if order_type == 'limit' and price is None: 467 | raise ValueError('Need price for Limit Order') 468 | 469 | if shares is not None and not isinstance(shares, int): 470 | raise ValueError('shares must be an integer') 471 | 472 | if price is not None and not isinstance(price, float) and not isinstance(price, int): 473 | raise ValueError('price must be int or float') 474 | 475 | # 1. navigate to or make new tab for stock 476 | self.new_tab_stock(symbol) 477 | 478 | if method == 'buy': 479 | # check if tradable 480 | if not self._can_trade(): 481 | print('Cannot trade this stock:', symbol) 482 | return False 483 | 484 | # 2. set to Buy 485 | self._order_method('buy') 486 | 487 | # 3. check portfolio buy power to make sure this purchase is possible 488 | self.get_portfolio_values() 489 | if order_type == 'limit': 490 | cost = shares * price 491 | else: 492 | stock_price = self._current_market_price() 493 | cost = shares * stock_price 494 | print('{} is trading at {}, {} shares is {}'.format( 495 | symbol, stock_price, shares, cost)) 496 | if cost >= self.buy_power: 497 | print('Not enough buying power ({}) for this' 498 | ' order, cost: {}'.format(self.buy_power, cost)) 499 | return False 500 | else: 501 | # 2. set to Sell 502 | self._order_method('sell') 503 | 504 | # 3. check if there are enough shares 505 | shares_avail = self._available_shares() 506 | if shares_avail == 0: 507 | print('No shares of {} to sell'.format(symbol)) 508 | return False 509 | 510 | if shares is None: 511 | shares = shares_avail 512 | elif shares > shares_avail: 513 | print('Not enough shares of {}, own {}, trying to' 514 | ' sell {}'.format(symbol, shares_avail, shares)) 515 | return False 516 | 517 | # 4. set order type 518 | self._change_order_type(order_type) 519 | 520 | # 5. enter shares 521 | self._send_shares(shares) 522 | 523 | # 6. if not "Market Order", add addition requirements 524 | if order_type == 'limit': 525 | self._set_limit_price(price) 526 | 527 | # 7. go to Review 528 | if price is None: 529 | price = self._current_market_price() 530 | self._review_order() 531 | 532 | # 8. buy 533 | self._make_order() 534 | self._done_after_order() 535 | 536 | # add to order history 537 | order = Order(symbol, method, shares, price) 538 | self.orders.append(order) 539 | return True 540 | 541 | def cancel_order(self, symbol): 542 | if not self.orders.has_stock(symbol): 543 | print('No orders for', symbol) 544 | return False 545 | 546 | # either to go stock tab or create new 547 | self.new_tab_stock(symbol) 548 | 549 | recent_order = self.orders.stock_recent_order(symbol) 550 | if recent_order.order_status is None: 551 | i_try = 0 552 | while recent_order.order_status is None and i_try < 10: 553 | sleep(1) 554 | recent_order.status = self._order_status() 555 | i_try += 1 556 | if recent_order.order_status is None: 557 | print('Cannot cancel ({}): could not get status of most recent order'.format(symbol)) 558 | return False 559 | 560 | if recent_order.order_status == 'Done': 561 | print('Cannot cancel ({}): order already went through'.format(symbol)) 562 | return False 563 | elif recent_order.order_status == 'Canceled': 564 | print('Recent order for {} is already canceled'.format(symbol)) 565 | return False 566 | 567 | try: 568 | self._cancel_order() 569 | except Exception as e: 570 | print('Cannot cancel ({}): cancel failed:\n{}'.format(symbol, e)) 571 | return False 572 | return True 573 | 574 | def order_status(self, symbol): 575 | if not self.orders.has_stock(symbol): 576 | print('No orders for', symbol) 577 | return None 578 | 579 | # either to go stock tab or create new 580 | self.new_tab_stock(symbol) 581 | 582 | return self._order_status() 583 | -------------------------------------------------------------------------------- /src/scraper.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Import 4 | import requests 5 | import re 6 | import os 7 | import json 8 | import datetime 9 | import pandas as pd 10 | import numpy as np 11 | import pickle 12 | import subprocess 13 | from time import sleep, time 14 | from lxml import html 15 | from src.common import gen_rand_name, gen_symbol_batches, get_wallstreet_time 16 | from backends.signinja.utils import headless_login, predefined_auth 17 | from multiprocessing import Pool 18 | from platform import mac_ver 19 | from src.constants import (DIR_DATA, FILES_LISTINGS, FILE_ALL_SYMBOLS, 20 | FILE_EXCLUDED_SYMBOLS, URL_YAHOO, URL_YAHOO_DAILY, 21 | URL_ALPHA_VANTAGE_INTRADAY, ALPHA_VANTAGE_API_KEY, 22 | FILE_EODDATA_AUTH, URL_EODDATA_GET_SYMBOLS, 23 | EODDATA_EXCHANGES, URL_YAHOO_FINANCIALS, DIR_FINANCIALS, 24 | URL_YAHOO_OPTIONS, DIR_OPTIONS, GECKODRIVER_PATH, SERVICE_LOG) 25 | 26 | 27 | # To by-pass Mac's new security things that causes multiprocessing to crash 28 | v = None 29 | try: 30 | v = mac_ver() 31 | except: 32 | print('Did not detect MAC') 33 | 34 | if v and v[0] and int(v[0].split('.')[0]) >= 10: 35 | print('Detected Mac > High Sierra, deploy multiprocessing fix') 36 | try: 37 | _ = subprocess.Popen('export OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES', shell=True) 38 | except: 39 | print('\tFailed to send fix: export OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES') 40 | 41 | 42 | # Classes 43 | # consider segmenting this into separate classes for getting stock data and processing for ML 44 | class Stock: 45 | """ 46 | A class for scraping stock data 47 | 48 | mode is either full_history, live_quotes, or intraday 49 | 50 | """ 51 | def __init__(self, mode='full_history', verbose=False): 52 | 53 | # os.environ['TZ'] = 'America/New_York' 54 | # tzset() 55 | 56 | self._dt_wallstreet_ = get_wallstreet_time()['datetime'] - datetime.datetime.now() 57 | self._dir_out_ = os.path.join(DIR_DATA, mode) 58 | self._date_format_ = '%Y-%m-%d' 59 | self._date_time_format_ = '%Y-%m-%d-%H-%M-%S' 60 | self._max_connection_attempts_ = 20 61 | 62 | self.rexp_live_dollar = re.compile(r'starQuote.*?<') 63 | self.rexp_live_volume = re.compile(r'olume<.+?') 64 | self.rexp_yahoo_prices_list = re.compile(r'"prices":\[.*?\]') 65 | 66 | self.n_cpu = os.cpu_count() 67 | self.verbose = verbose 68 | self.symbs = list() 69 | self.live_now = False 70 | self.dfs = None 71 | self.alpha_vantage_state = 'ok' 72 | 73 | if mode and not os.path.isdir(self._dir_out_): 74 | os.makedirs(self._dir_out_) 75 | print('Created: %s' % self._dir_out_) 76 | 77 | def pull_daily(self, symb, period1=0, period2=0): 78 | """ 79 | Grabs historical data for symbol from dates period1 to period2 80 | :param symb: symbol to grab 81 | :param period1: starting time period in seconds 82 | :param period2: ending time period in seconds 83 | :return: data: nested list containing pricing data in following format: 84 | [... [date, volume, open, close, high, low, adjusted close]...] 85 | note - consider adjusting this to standard OHLC order 86 | """ 87 | 88 | # grab prices up to the current day if period2 not set 89 | if not period2: 90 | period2 = datetime.datetime.now().timestamp() 91 | 92 | # url to grab prices 93 | url = URL_YAHOO_DAILY % (symb, period1, period2) 94 | 95 | # try requesting for daily history 96 | r, msg = self._try_request(url) 97 | 98 | # if request failed 99 | if r is None or r.status_code != requests.codes.ok: 100 | if self.verbose: 101 | print('Page load failed for {}: {}'.format(symb, msg)) 102 | return -1 103 | 104 | # this parse the list of prices under "HistoricalPriceStore" 105 | res = self.rexp_yahoo_prices_list.search(r.text) 106 | if res: 107 | price_data = json.loads('{%s}' % re.search(r'"prices":\[.*?\]', r.text).group(0))['prices'] 108 | else: 109 | print('Cannot find data for %s' % symb) 110 | return -1 111 | 112 | data = { 113 | 'date': [], 114 | 'volume': [], 115 | 'open': [], 116 | 'high': [], 117 | 'low': [], 118 | 'close': [], 119 | 'adjclose': [], 120 | } 121 | for d in price_data: 122 | # TODO - rows without volume (or any of these metrics) 123 | # are likely Splits. Need to store this data somehow 124 | # currently, ignoring them 125 | if d.get('volume', None): 126 | timestamp = datetime.datetime.fromtimestamp(int(d['date'])) \ 127 | + self._dt_wallstreet_ 128 | data['date'].append(timestamp.strftime(self._date_format_)) 129 | data['volume'].append(d.get('volume', None)) 130 | data['open'].append(d.get('open', None)) 131 | data['high'].append(d.get('high', None)) 132 | data['low'].append(d.get('low', None)) 133 | data['close'].append(d.get('close', None)) 134 | data['adjclose'].append(d.get('adjclose', None)) 135 | 136 | if self.verbose: 137 | print('Pulled %d days of %s' % (len(data['date']), symb)) 138 | 139 | return data 140 | 141 | def write_data(self, data, symb, dir_out='', columns=None, fmt='csv'): 142 | """ 143 | Writes pricing data out to csv file as dir_out/symb.csv 144 | :param data: list, pricing data 145 | :param symb: string, stock symbol for pricing data 146 | :param dir_out: string, the directory to write to 147 | :param columns: list or None, the order of columns 148 | :param fmt: string, output format, default csv 149 | :return: p_out, string of 'dir_out/symb.csv' 150 | """ 151 | if not dir_out: 152 | dir_out = self._dir_out_ 153 | p_out = os.path.join(dir_out, '%s.%s' % (symb, fmt)) 154 | 155 | if not columns: 156 | columns = list(data.keys()) 157 | # columns = 'date volume open close high low adjclose'.split() 158 | 159 | with open(p_out, 'w+') as f: 160 | _ = f.write(','.join(columns) + '\n') 161 | for i in range(len(data[columns[0]])): 162 | out_str = ','.join(['{}'.format(data[c][i]) for c in columns]) + '\n' 163 | _ = f.write(out_str) 164 | 165 | if self.verbose: 166 | print('Wrote %s data to %s' % (symb, p_out)) 167 | 168 | return p_out 169 | 170 | def get_all_symbols(self, try_compiled=True): 171 | """ 172 | Returns a sorted list of all symbols from the all_symbols.txt files if present, 173 | or from a combination of all symbols in the listing files in DIR_LISTINGS 174 | :param try_compiled, Bool, whether to attempt getting symbols from all_symbols.txt 175 | :return: sorted_symbs, a sorted list of stock symbols 176 | """ 177 | 178 | symbs = [] 179 | 180 | if try_compiled and os.path.isfile(FILE_ALL_SYMBOLS): 181 | 182 | if self.verbose: 183 | print('Using %s for symbols ...' % FILE_ALL_SYMBOLS) 184 | 185 | with open(FILE_ALL_SYMBOLS, 'r') as f: 186 | symbs = list(set(f.read().strip().split('\n'))) 187 | 188 | elif FILES_LISTINGS: 189 | 190 | # Remove subgroup extensions of tickers, 191 | # i.e. ABCD-A -> ABCD 192 | # Some stocks do need it, but this is not 193 | # compatible with Yahoo Finance at the moment 194 | # So currently, just removed, and hope for 195 | # the best. Need improvement! 196 | rexp = re.compile(r'-[a-zA-Z]$') 197 | 198 | for ex_name, f_listing in FILES_LISTINGS.items(): 199 | symbs += [rexp.sub('',s) for s in 200 | pd.read_table(f_listing)['Symbol'].values] 201 | 202 | # remove redundant symbols 203 | symbs = list(set(symbs)) 204 | 205 | elif FILE_EODDATA_AUTH and EODDATA_EXCHANGES: 206 | if self.verbose: 207 | print('Attempting to get symbols from eoddata.com ...') 208 | auth = predefined_auth('eoddata', username='foo', password='bar') 209 | auth['username'][1], auth['password'][1] = open(FILE_EODDATA_AUTH, 'r')\ 210 | .read().strip().split('\n') 211 | if self.verbose: 212 | print('Try signing into eoddata.com ...') 213 | session = headless_login(auth, exe=GECKODRIVER_PATH, log_path=SERVICE_LOG) 214 | if self.verbose: 215 | print('Signed into eoddata.com') 216 | 217 | def _verify_response(parsed_symbols, maxlen=10, cutoff=0.95): 218 | # if parsed_symbols do actually contains symbols 219 | # almost all of the values should be less than 10 chars 220 | ratio = len([1 for s in parsed_symbols if len(s) <= maxlen]) \ 221 | / len(parsed_symbols) 222 | return ratio >= cutoff 223 | 224 | for exchange in EODDATA_EXCHANGES: 225 | r = session.get(URL_EODDATA_GET_SYMBOLS % exchange) 226 | s = [i.split('\t')[0] for i in r.text.strip().split('\n')][1:] 227 | if _verify_response(s): 228 | symbs += s 229 | if self.verbose: 230 | print('Got %d symbols for %s from eoddata.com' 231 | % (len(s), exchange)) 232 | 233 | elif self.verbose: 234 | print('Missing symbol file.') 235 | 236 | if symbs and self.verbose: 237 | print('\tFound %d symbols' % len(symbs)) 238 | 239 | sorted_symbs = sorted(symbs) 240 | return sorted_symbs 241 | 242 | def pull_daily_and_write_batch(self, symbs=None, p_symbs='', i_pass=1, max_pass=2): 243 | """ 244 | Grabs pricing history for all stock symbols in symbs and writes out data to csv's 245 | by making n = len(symbs) calls of retrieve_symb(symbol) for each symbol in symbs. 246 | Uses multiple CPUs if available. 247 | :param symbs: list of stock symbols to retrieve from yahoo 248 | :param p_symbs: file containing list of symbols for symbs 249 | :param i_pass: current attempt at grabbing failed symbols 250 | :param max_pass: max attempts to grab failed symbols 251 | :return nothing, as we'll call retrieve_symb to write our csv files 252 | """ 253 | 254 | t0 = time() 255 | 256 | if p_symbs: 257 | if os.path.isfile(p_symbs): 258 | with open(p_symbs, 'r') as f: 259 | symbs = f.read().strip().split('\n') 260 | else: 261 | print('No such file: %s' % p_symbs) 262 | 263 | elif symbs is None: 264 | symbs = self.get_all_symbols() 265 | updated_symbs = self.get_updated_symbs() 266 | if updated_symbs: 267 | symbs = list(set(symbs) - set(updated_symbs)) 268 | if self.verbose: 269 | print('Skip pull for %d symbols, only pulling for %d symbols ...' % (len(updated_symbs), len(symbs))) 270 | 271 | n_symbs = len(symbs) 272 | n_success = 0 273 | failed_symbs = [] 274 | print('Pulling for %d symbols ...' % n_symbs) 275 | 276 | if self.n_cpu > 1: 277 | 278 | print('\tUsing %d CPUs' % self.n_cpu) 279 | 280 | # To avoid getting blocked by Yahoo, pause for 10 - 20 seconds after 100 symbols 281 | symb_batches = gen_symbol_batches(symbs, batch_size=100) 282 | n_symb_completed = 0 283 | 284 | # for each batch of symbols have a worker thread execute self.retrieve_symb(batch) 285 | # which then calls write_history to write our pricing csv 286 | for batch in symb_batches: 287 | with Pool(processes=self.n_cpu) as pool: 288 | res = pool.map(self.pull_daily_and_write, batch) 289 | 290 | for symb,success in res: 291 | if success: 292 | n_success += 1 293 | else: 294 | failed_symbs.append(symb) 295 | 296 | n_symb_completed += len(batch) 297 | if self.verbose: 298 | print('{0:.1f}% completed - {1:.0f} / {2:.0f}'.format(n_symb_completed / n_symbs * 100, n_symb_completed, n_symbs)) 299 | 300 | # pause to avoid Yahoo block 301 | if n_symb_completed != n_symbs: 302 | tpause = np.random.randint(10, 21) 303 | print('Pause for %d seconds' % tpause) 304 | sleep(tpause) 305 | 306 | else: 307 | 308 | for i, symb in enumerate(symbs): 309 | 310 | if self.verbose: 311 | print('Pulling %d / %d - %s ...' % (i+1, n_symbs, symb)) 312 | 313 | data = self.pull_daily(symb) 314 | if data != -1: 315 | _ = self.write_data(data, symb) 316 | n_success += 1 317 | else: 318 | failed_symbs.append(symb) 319 | 320 | if self.verbose: 321 | print('\nRetrieved full histories for %d / %d symbols' % (n_success, n_symbs)) 322 | 323 | # does this suggest failed_symbs should be it's own keyword arg? 324 | if failed_symbs: 325 | print('Failed for:') 326 | for symb in failed_symbs: 327 | print('\t%s' % symb) 328 | 329 | if i_pass < max_pass: 330 | i_pass += 1 331 | print('\n|--- Pass %d (try to fetch %d failed ones, maximum %d passes ---|' % (i_pass, len(failed_symbs), max_pass)) 332 | self.pull_daily_and_write_batch(symbs=failed_symbs, p_symbs=p_symbs, i_pass=i_pass, max_pass=max_pass) 333 | else: 334 | p_symbs_fail = 'failed_symbs-%s.txt' % (''.join(np.random.choice(list('abcdefgh12345678'), 5))) 335 | with open(p_symbs_fail, 'w+') as f: 336 | _ = f.write('\n'.join(failed_symbs)) 337 | print('Failed symbols written to: %s' % p_symbs_fail) 338 | print('Run this to try fetching the missed symbols again:\npython3 redtide.py -v -d --file %s' % p_symbs_fail) 339 | 340 | print('\tTime elapsed: %.2f hours\n' % ((time() - t0)/3600)) 341 | 342 | return 343 | 344 | def get_updated_symbs(self): 345 | """ Returns a list of updated symbols. 346 | This is determined based on the last modified timestamp of the symbol 347 | file. If timestamp is greater than market close time, then it is most 348 | likely updated. 349 | EOD is 4 PM NY time 350 | """ 351 | 352 | t = datetime.datetime.today() 353 | t_close = (datetime.datetime(year=t.year, month=t.month, day=t.day, hour=16) 354 | - self._dt_wallstreet_).timestamp() 355 | 356 | symbs = [] 357 | for p in os.listdir(self._dir_out_): 358 | t = os.path.getmtime(os.path.join(self._dir_out_, p)) 359 | if t > t_close: 360 | symbs.append(p.replace('.csv', '')) 361 | 362 | if self.verbose: 363 | print('Full history up-to-date for %d symbols' % len(symbs)) 364 | 365 | return symbs 366 | 367 | def pull_daily_and_write(self, symb): 368 | """ calls pull_history to grab price data on symb and writes result 369 | to csv using write_history 370 | """ 371 | 372 | success = False 373 | 374 | if self.verbose: 375 | print('Pulling %s ...' % symb) 376 | 377 | data = self.pull_daily(symb) 378 | if data != -1: 379 | _ = self.write_data(data, symb) 380 | success = True 381 | 382 | return symb, success 383 | 384 | def get_full_histories_from_file(self, symbs=None): 385 | """ reads price history from csv files into Pandas Dataframes stored in self.dfs """ 386 | 387 | self.dfs = {} 388 | 389 | if not symbs: 390 | 391 | symbs = self.get_all_symbols() 392 | 393 | if self.verbose: 394 | print('Reading %d full histories to dataframes ...' % len(symbs)) 395 | 396 | for symb in symbs: 397 | 398 | p_data = os.path.join(self._dir_out_, '%s.csv' % symb) 399 | 400 | if not os.path.isfile(p_data): 401 | if self.verbose: 402 | print('No full history available: %s' % symb) 403 | continue 404 | 405 | df = pd.read_csv(p_data, index_col='date', parse_dates=True) 406 | self.dfs[symb] = df 407 | 408 | if self.verbose: 409 | print('Read %d / %d full histories to dataframes' % (len(self.dfs), len(symbs))) 410 | 411 | return self 412 | 413 | def save_financial_data(self, symb, as_json=False): 414 | r, _ = self._try_request(URL_YAHOO_FINANCIALS % symb) 415 | if r is None or r.status_code != requests.codes.ok \ 416 | or 'Symbol Lookup' in r.text[:300]: 417 | print('%s does not have financials' % symb) 418 | return 419 | 420 | try: 421 | root = html.fromstring(r.text) 422 | node = root.xpath("//script[contains(text(), '{\"context')]")[0] 423 | i0 = node.text.find('{"context') 424 | i1 = node.text.rfind('};') + 1 425 | json_data = json.loads(node.text[i0:i1])['context']['dispatcher']['stores']['QuoteSummaryStore'] 426 | if as_json: 427 | json.dump( 428 | json_data, 429 | open(os.path.join( 430 | DIR_FINANCIALS, symb + '.json'), 'w+') 431 | ) 432 | else: 433 | with open(os.path.join( 434 | DIR_FINANCIALS, symb + '.pkl'), 'wb') as f: 435 | pickle.dump(json_data, f) 436 | except: 437 | if self.verbose: 438 | print('Could not parse financials for {}' % symb) 439 | raise 440 | 441 | return 442 | 443 | def save_options_data(self, symb, as_json=False): 444 | r, _ = self._try_request(URL_YAHOO_OPTIONS % symb) 445 | if r is None or r.status_code != requests.codes.ok \ 446 | or 'Symbol Lookup' in r.text[:300]: 447 | print('%s does not have options' % symb) 448 | return 449 | 450 | try: 451 | root = html.fromstring(r.text) 452 | node = root.xpath("//script[contains(text(), '{\"context')]")[0] 453 | i0 = node.text.find('{"context') 454 | i1 = node.text.rfind('};') + 1 455 | json_data = json.loads(node.text[i0:i1])['context']['dispatcher']['stores']['OptionContractsStore'] 456 | if as_json: 457 | json.dump( 458 | json_data, 459 | open(os.path.join( 460 | DIR_OPTIONS, symb + '.json'), 'w+') 461 | ) 462 | else: 463 | with open(os.path.join( 464 | DIR_OPTIONS, symb + '.pkl'), 'wb') as f: 465 | pickle.dump(json_data, f) 466 | except: 467 | if self.verbose: 468 | print('Could not parse options for {}' % symb) 469 | raise 470 | 471 | return 472 | 473 | def compile_symbols(self, p_symbs=None, append=False, batch_size=-1): 474 | 475 | print('Compiling symbols ...') 476 | 477 | if p_symbs: 478 | with open(p_symbs, 'r') as f: 479 | self.symbs = f.read().strip().split('\n') 480 | else: 481 | self.symbs = self.get_all_symbols(try_compiled=False) 482 | n_symbs = len(self.symbs) 483 | n_compiled = 0 484 | n_excluded = 0 485 | 486 | # Initialize/clear write files 487 | if not append: 488 | with open(FILE_ALL_SYMBOLS, 'w+') as f, open(FILE_EXCLUDED_SYMBOLS, 'w+') as fx: 489 | _ = f.write('') 490 | _ = fx.write('') 491 | 492 | if self.verbose: 493 | print('Looking up symbols on Yahoo Finance ...') 494 | 495 | if batch_size == -1: 496 | batch_size = np.random.randint(30, 100) 497 | 498 | symb_batches = gen_symbol_batches(self.symbs, batch_size=batch_size) 499 | n_symb_completed = 0 500 | for batch in symb_batches: 501 | with Pool(processes=self.n_cpu, maxtasksperchild=1) as pool: 502 | res = pool.map(self._compile_symb, batch) 503 | 504 | with open(FILE_ALL_SYMBOLS, 'a+') as f, open(FILE_EXCLUDED_SYMBOLS, 'a+') as fx: 505 | for symb, success in res: 506 | if success: 507 | _ = f.write('%s\n' % symb) 508 | n_compiled += 1 509 | else: 510 | _ = fx.write('%s\n' % symb) 511 | n_excluded += 1 512 | 513 | n_symb_completed += len(batch) 514 | if self.verbose: 515 | print('{0:.1f}% completed - {1:.0f} / {2:.0f}'.format(n_symb_completed / n_symbs * 100, n_symb_completed, n_symbs)) 516 | 517 | tpause = np.random.randint(5, 11) 518 | print('Pause for %s seconds' % tpause) 519 | sleep(tpause) 520 | 521 | # Remove duplicates when using append 522 | if append: 523 | with open(FILE_ALL_SYMBOLS, 'r') as f: 524 | symbs = set(f.read().strip().split('\n')) 525 | with open(FILE_ALL_SYMBOLS, 'w+') as f: 526 | _ = f.write('\n'.join(symbs)) 527 | 528 | print('Started with: %d\nCompiled: %d\nExcluded: %d\nCompiled to: %s' % (n_symbs, n_compiled, n_excluded, FILE_ALL_SYMBOLS)) 529 | 530 | return 531 | 532 | def _try_request(self, url): 533 | 534 | r = None 535 | n_tries = 0 536 | msg = '' 537 | while n_tries < self._max_connection_attempts_: 538 | try: 539 | r = requests.get(url) 540 | if r.status_code == 200: 541 | break 542 | else: 543 | n_tries += 1 544 | if n_tries >= self._max_connection_attempts_: 545 | msg = 'Try exceeded, response code {}'.format(r.status_code) 546 | except(KeyboardInterrupt, SystemExit): 547 | raise 548 | except Exception as e: 549 | n_tries += 1 550 | if n_tries >= self._max_connection_attempts_: 551 | msg = e 552 | 553 | return r, msg 554 | 555 | def _check_symbol_(self, symb, parse_financials=True, parse_options=False): 556 | 557 | """ 558 | Check whether a symbol exists on Yahoo Finance, where historical data will be retrieved 559 | 560 | :param symb: String 561 | :param parse_financials: get all financial data on the company 562 | :return: 1 - found, 0 - not found, -1 - connection error, -x - request message code x 563 | """ 564 | 565 | msg = 0 566 | 567 | # Try original symbol string 568 | url = URL_YAHOO % symb 569 | r, _ = self._try_request(url) 570 | if r is None: 571 | msg = -1 572 | else: 573 | if r.status_code != requests.codes.ok: 574 | msg = -r.status_code 575 | elif 'Symbol Lookup' not in r.text[:300]: 576 | msg = 1 577 | 578 | # Try replace . with - if do-able 579 | if msg != 1 and '.' in symb: 580 | url = URL_YAHOO % symb.replace('.', '-') 581 | r, _ = self._try_request(url) 582 | if r is None: 583 | msg = -1 584 | else: 585 | if r.status_code != requests.codes.ok: 586 | msg = -r.status_code 587 | elif 'Symbol not found' not in r.text[:250]: 588 | msg = 1 589 | symb = symb.replace('.', '-') 590 | 591 | # parse financial data 592 | if parse_financials and msg == 1 and 'financials?' in r.text: 593 | self.save_financial_data(symb) 594 | 595 | # parse options data 596 | if parse_options and msg == 1 and 'options?' in r.text: 597 | self.save_options_data(symb) 598 | 599 | return msg, symb 600 | 601 | def _compile_symb(self, symb): 602 | 603 | msg, symb = self._check_symbol_(symb) 604 | if msg < 1: 605 | 606 | if self.verbose: 607 | if msg == 0: 608 | print('%s excluded - not found' % symb) 609 | elif msg == -1: 610 | print('%s excluded - max connection attempt reached' % symb) 611 | else: 612 | print('%s excluded - request code: %d' % (symb, -msg)) 613 | 614 | return symb, False 615 | 616 | return symb, True 617 | 618 | def concat(self, from_date=None, to_date=None, p_out=None, return_df=False): 619 | 620 | if not to_date: 621 | to_date = datetime.datetime.now().strftime('%Y-%m-%d') 622 | 623 | if not from_date: 624 | # Default 60 days 625 | from_date = (datetime.datetime.strptime(to_date, '%Y-%m-%d') - datetime.timedelta(days=60)).strftime('%Y-%m-%d') 626 | 627 | if not p_out: 628 | p_out = os.path.join(DIR_DATA, gen_rand_name()) 629 | 630 | symbs = self.get_all_symbols() 631 | n_symbs = len(symbs) 632 | 633 | print('Concatenating %d symbols between %s - %s ...' % (n_symbs, from_date, to_date)) 634 | 635 | new_file = True 636 | for i, symb in enumerate(symbs): 637 | 638 | p_data = os.path.join(self._dir_out_, '%s.csv' % symb) 639 | 640 | if not os.path.isfile(p_data): 641 | if self.verbose: 642 | print('No full history available: %s' % symb) 643 | continue 644 | 645 | dtmp = pd.read_csv(p_data, index_col='date', parse_dates=True)[to_date:from_date].assign(symbol=symb) 646 | if new_file: 647 | dtmp.to_csv(p_out, mode='w+', header=True) 648 | new_file = False 649 | else: 650 | dtmp.to_csv(p_out, mode='a', header=False) 651 | 652 | if (i + 1) % 400 == 0: 653 | print('{0:.1f}% completed'.format((i+1)/n_symbs*100)) 654 | 655 | print('Concatenated to', p_out) 656 | 657 | if return_df: 658 | print('Building dataframe...') 659 | df = pd.read_csv(p_out, index_col='date', parse_dates=True) 660 | return df 661 | else: 662 | return None 663 | 664 | def pull_intraday(self, symb, interval='1min'): 665 | 666 | api_params = { 667 | 'symbol': symb, 668 | 'interval': interval, 669 | 'apikey': ALPHA_VANTAGE_API_KEY, 670 | } 671 | data = None 672 | 673 | url = URL_ALPHA_VANTAGE_INTRADAY.format(**api_params) 674 | try: 675 | r = requests.get(url) 676 | if r.status_code == 200: 677 | time_series = r.json().get('Time Series ({})'.format(interval), None) 678 | data = { 679 | 'datetime': [], 680 | 'open': [], 681 | 'high': [], 682 | 'low': [], 683 | 'close': [], 684 | 'volume': [], 685 | } 686 | if time_series: 687 | def to_numeric(val, dtype=float): 688 | if val is None: 689 | return None 690 | try: 691 | return dtype(val) 692 | except: 693 | if self.verbose: 694 | print('Unknown Alpha Vantage value: {}'.format(val)) 695 | return val 696 | 697 | for timestamp, values in time_series.items(): 698 | data['datetime'].append(timestamp) 699 | data['open'].append(to_numeric(values.get('1. open', None))) 700 | data['high'].append(to_numeric(values.get('2. high', None))) 701 | data['low'].append(to_numeric(values.get('3. low', None))) 702 | data['close'].append(to_numeric(values.get('4. close', None))) 703 | data['volume'].append(to_numeric(values.get('5. volume', None), dtype=int)) 704 | # else: 705 | # with self.lock: 706 | # self.alpha_vantage_state = 'max_per_min_reached' 707 | # TODO - complete max limit exceed, break-out logic 708 | print('Response from Alpha Vantage is not ok, response {}'.format(r.status_code)) 709 | except Exception as e: 710 | print('Failed to pull intraday for {}: {}'.format(symb, e)) 711 | 712 | return data 713 | 714 | def pull_intraday_batch_and_write(self, symbs=None, p_symbs='', i_pass=1, max_pass=5, interval='1min'): 715 | """ 716 | Using Alpha Vantage's API to get intraday data for all symbols in symbs. 717 | :param symbs: 718 | :param p_symbs: 719 | :param i_pass: 720 | :param max_pass: 721 | :param interval: 722 | :return: 723 | """ 724 | 725 | t0 = time() 726 | 727 | if p_symbs: 728 | if os.path.isfile(p_symbs): 729 | with open(p_symbs, 'r') as f: 730 | symbs = f.read().strip().split('\n') 731 | else: 732 | print('No such file: %s' % p_symbs) 733 | 734 | elif symbs is None: 735 | symbs = self.get_all_symbols() 736 | updated_symbs = self.get_updated_symbs() 737 | if updated_symbs: 738 | symbs = list(set(symbs) - set(updated_symbs)) 739 | if self.verbose: 740 | print( 741 | 'Skip pull for %d symbols, only pulling for %d symbols ...' % (len(updated_symbs), len(symbs))) 742 | 743 | n_symbs = len(symbs) 744 | n_success = 0 745 | failed_symbs = [] 746 | print('Pulling for %d symbols ...' % n_symbs) 747 | 748 | if self.n_cpu > 1: 749 | 750 | print('\tUsing %d CPUs' % self.n_cpu) 751 | 752 | symb_batches = gen_symbol_batches(symbs, batch_size=5) 753 | n_symb_completed = 0 754 | 755 | # for each batch of symbols have a worker thread execute self.retrieve_symb(batch) 756 | # which then calls write_history to write our pricing csv 757 | for batch in symb_batches: 758 | with Pool(processes=self.n_cpu) as pool: 759 | res = pool.map(self.pull_daily_and_write, batch) 760 | 761 | for symb, success in res: 762 | if success: 763 | n_success += 1 764 | else: 765 | failed_symbs.append(symb) 766 | 767 | n_symb_completed += len(batch) 768 | if self.verbose: 769 | print('{0:.1f}% completed - {1:.0f} / {2:.0f}'.format(n_symb_completed / n_symbs * 100, 770 | n_symb_completed, n_symbs)) 771 | 772 | else: 773 | 774 | for i, symb in enumerate(symbs): 775 | 776 | if self.verbose: 777 | print('Pulling %d / %d - %s ...' % (i + 1, n_symbs, symb)) 778 | 779 | data = self.pull_daily(symb) 780 | if data != -1: 781 | _ = self.write_data(data, symb) 782 | n_success += 1 783 | else: 784 | failed_symbs.append(symb) 785 | 786 | if self.verbose: 787 | print('\nRetrieved intraday for %d / %d symbols' % (n_success, n_symbs)) 788 | 789 | # does this suggest failed_symbs should be it's own keyword arg? 790 | if failed_symbs: 791 | print('Failed for:') 792 | for symb in failed_symbs: 793 | print('\t%s' % symb) 794 | 795 | if i_pass < max_pass: 796 | i_pass += 1 797 | print('\n|--- Pass %d (try to fetch %d failed ones, maximum %d passes ---|' % ( 798 | i_pass, len(failed_symbs), max_pass)) 799 | self.pull_intraday_batch_and_write(symbs=failed_symbs, p_symbs=p_symbs, i_pass=i_pass, max_pass=max_pass, interval=interval) 800 | else: 801 | p_symbs_failed = 'failed_symbs-%s.txt' % (''.join(np.random.choice(list('abcdefgh12345678'), 5))) 802 | with open(p_symbs_failed, 'w+') as f: 803 | _ = f.write('\n'.join(failed_symbs)) 804 | print('Failed symbols written to: %s' % p_symbs_failed) 805 | print('Run this to try fetching the missed symbols again:\npython3 redtide.py -v -d --file %s' % p_symbs_failed) 806 | 807 | print('\tTime elapsed: %.2f hours\n' % ((time() - t0) / 3600)) 808 | 809 | return 810 | --------------------------------------------------------------------------------