├── __init__.py ├── .vscode ├── last.sql └── temp.sql ├── backtest_strategies ├── __init__.py ├── utilities │ ├── __init__.py │ ├── momentum_crashes │ │ ├── __init__.py │ │ └── momentum_strategy_helpers.py │ ├── rays_long_short_strategy │ │ ├── __init__.py │ │ └── rays_long_short_strategy_helpers.py │ └── helper_functions.py ├── aapl_backtest.py ├── rays_long_short_strategy.py └── momentum_crashes_backtest.py ├── .gitignore ├── .idea ├── vcs.xml ├── modules.xml ├── misc.xml ├── springbok-shared.iml └── workspace.xml ├── mkdirs.py ├── .zipline └── extension.py ├── requirements.txt ├── readme.md └── process_data.py /__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.vscode/last.sql: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.vscode/temp.sql: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /backtest_strategies/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /backtest_strategies/utilities/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /backtest_strategies/utilities/momentum_crashes/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /backtest_strategies/utilities/rays_long_short_strategy/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # ignoring csv files 2 | *.csv 3 | *.pages 4 | **/__pycache__ 5 | .python-version 6 | 7 | settings.json -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /mkdirs.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | # run this file first to make directories for everything 4 | 5 | os.mkdir('data_downloads') 6 | os.mkdir('backtest_outputs') 7 | os.mkdir('processed_data') 8 | os.mkdir('processed_data/fundamentals') 9 | os.mkdir('processed_data/pricing') 10 | os.mkdir('processed_data/pricing/daily') -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 7 | -------------------------------------------------------------------------------- /.zipline/extension.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | from zipline.data.bundles import register 4 | from zipline.data.bundles.csvdir import csvdir_equities 5 | 6 | start_session = pd.Timestamp('2013-01-02', tz='UTC') 7 | end_session = pd.Timestamp('2018-07-03', tz='UTC') 8 | 9 | register( 10 | 'sharadar-pricing', 11 | csvdir_equities( 12 | ['daily'], 13 | '/Users/calmitchell/s/Springbok-filled/processed_data/pricing', 14 | ), 15 | calendar_name='NYSE', # US equities 16 | start_session=start_session, 17 | end_session=end_session 18 | ) 19 | 20 | -------------------------------------------------------------------------------- /.idea/springbok-shared.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 16 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | alembic==0.9.10 2 | astroid==1.6.4 3 | autopep8==1.3.5 4 | bcolz==0.12.1 5 | Bottleneck==1.2.1 6 | certifi==2018.4.16 7 | chardet==3.0.4 8 | click==6.7 9 | contextlib2==0.5.5 10 | cycler==0.10.0 11 | cyordereddict==1.0.0 12 | Cython==0.28.3 13 | decorator==4.3.0 14 | empyrical==0.5.0 15 | idna==2.7 16 | intervaltree==2.1.0 17 | isort==4.3.4 18 | kiwisolver==1.0.1 19 | lazy-object-proxy==1.3.1 20 | Logbook==1.4.0 21 | lru-dict==1.1.6 22 | Mako==1.0.7 23 | MarkupSafe==1.0 24 | matplotlib==2.2.2 25 | mccabe==0.6.1 26 | multipledispatch==0.5.0 27 | networkx==1.11 28 | numexpr==2.6.5 29 | numpy==1.14.5 30 | pandas==0.18.1 31 | pandas-datareader==0.5.0 32 | patsy==0.5.0 33 | pycodestyle==2.4.0 34 | pylint==1.9.1 35 | pyparsing==2.2.0 36 | python-dateutil==2.7.3 37 | python-editor==1.0.3 38 | pytz==2018.5 39 | requests==2.19.1 40 | requests-file==1.4.3 41 | requests-ftp==0.3.1 42 | scipy==1.1.0 43 | six==1.11.0 44 | sortedcontainers==2.0.4 45 | SQLAlchemy==1.2.9 46 | statsmodels==0.9.0 47 | tables==3.4.4 48 | toolz==0.9.0 49 | urllib3==1.23 50 | wrapt==1.10.11 51 | zipline==1.2.0+67.gebcf7474 52 | -------------------------------------------------------------------------------- /backtest_strategies/utilities/momentum_crashes/momentum_strategy_helpers.py: -------------------------------------------------------------------------------- 1 | from zipline.api import order_target_percent, order_target 2 | 3 | def get_longs(context): 4 | 5 | if context.market_type == 'bull': 6 | return context.output.sort_values(['lagged_returns'])[-100:] 7 | else: 8 | return context.output.sort_values(['lagged_returns'])[:100] 9 | 10 | def get_shorts(context): 11 | if context.market_type == 'bear': 12 | return context.output.sort_values(['lagged_returns'])[:100] 13 | else: 14 | return context.output.sort_values(['lagged_returns'])[-100:] 15 | 16 | def portfolio_logic(context): 17 | longs_to_remove = [] 18 | 19 | for asset in context.longs_portfolio: # search portfolio for positions to close out 20 | if asset not in context.longs.index: 21 | longs_to_remove.append(asset) 22 | order_target(asset, 0) 23 | 24 | for asset in context.longs.index: # search context.longs for stocks to add to portfolio 25 | order_target_percent(asset, .00025) 26 | if asset not in context.longs_portfolio: 27 | context.longs_portfolio[asset] = True 28 | 29 | 30 | for key in longs_to_remove: 31 | context.longs_portfolio.pop(key) 32 | 33 | shorts_to_remove = [] 34 | 35 | for asset in context.shorts_portfolio: # search portfolio for positions to close out 36 | if asset not in context.shorts.index: 37 | shorts_to_remove.append(asset) 38 | order_target(asset, 0) 39 | 40 | for asset in context.shorts.index: # search context.shorts for stocks to add to portfolio 41 | if asset not in context.shorts_portfolio: 42 | context.shorts_portfolio[asset] = True 43 | order_target_percent(asset, -0.00025) 44 | 45 | for key in shorts_to_remove: 46 | context.shorts_portfolio.pop(key) -------------------------------------------------------------------------------- /backtest_strategies/utilities/rays_long_short_strategy/rays_long_short_strategy_helpers.py: -------------------------------------------------------------------------------- 1 | from zipline.api import order_target_percent, order_target 2 | 3 | def get_longs(filtered_by_cap): 4 | pe1_longs = filtered_by_cap.sort_values(['pe1'])[:1000] # filter 1000 stocks with lowest pe ratios 5 | eg_longs = pe1_longs.sort_values(['earnings_growth'])[-500:] # filter 500 stocks with highest earning growth 6 | return eg_longs.sort_values(['de'])[:100] # filter top 100 stocks by lowest debt equity ratio 7 | 8 | 9 | def get_shorts(filtered_by_cap): 10 | pe1_shorts = filtered_by_cap.sort_values(['pe1'])[-1000:] # same thing but backwards for shorts 11 | eg_shorts = pe1_shorts.sort_values(['earnings_growth'])[:500] 12 | return eg_shorts.sort_values(['de'])[-100:] 13 | 14 | def portfolio_logic(context): 15 | longs_to_remove = [] 16 | 17 | for asset in context.longs_portfolio: # search portfolio for positions to close out 18 | if asset not in context.longs.index: 19 | longs_to_remove.append(asset) 20 | order_target(asset, 0) 21 | 22 | for asset in context.longs.index: # search context.longs for stocks to add to portfolio 23 | if asset not in context.longs_portfolio: 24 | context.longs_portfolio[asset] = True 25 | order_target_percent(asset, .005) 26 | 27 | for key in longs_to_remove: 28 | context.longs_portfolio.pop(key) 29 | 30 | shorts_to_remove = [] 31 | 32 | for asset in context.shorts_portfolio: # search portfolio for positions to close out 33 | if asset not in context.shorts.index: 34 | shorts_to_remove.append(asset) 35 | order_target(asset, 0) 36 | 37 | for asset in context.shorts.index: # search context.shorts for stocks to add to portfolio 38 | if asset not in context.shorts_portfolio: 39 | context.shorts_portfolio[asset] = True 40 | order_target_percent(asset, -0.005) 41 | 42 | for key in shorts_to_remove: 43 | context.shorts_portfolio.pop(key) 44 | -------------------------------------------------------------------------------- /backtest_strategies/utilities/helper_functions.py: -------------------------------------------------------------------------------- 1 | from zipline.pipeline.data import Column, BoundColumn 2 | from collections import OrderedDict 3 | import os 4 | import pandas as pd 5 | 6 | 7 | def get_pricing_securities(pricing_directory): 8 | """ 9 | Builds a list of all securities, represented by a csv file, in the pricing_directory folder 10 | :param pricing_directory: 11 | :return: return list, an ordered dict of security names 12 | """ 13 | 14 | return_dict = OrderedDict() 15 | 16 | for root, dirs, files in os.walk(pricing_directory): 17 | for file in files: 18 | if file.endswith('.csv'): 19 | return_dict[file[:-4]] = True 20 | 21 | return return_dict 22 | 23 | 24 | def get_dates(fundamentals_directory): 25 | """ 26 | Looks at first csv file in fundamentals_directory, build list of securities and dates 27 | :param fundamentals_directory: 28 | :return: return_dict: an ordered dict with the name of every security as a key, and True as the value 29 | dates: a list of all dates, as datestamps, that are in the csv index. 30 | """ 31 | return_dict = OrderedDict() 32 | for root, dirs, files in os.walk(fundamentals_directory): 33 | for file in files: 34 | if file.endswith('.csv'): 35 | fundamental_tickers_df = pd.read_csv('{}{}'.format(fundamentals_directory, file), index_col=0) 36 | 37 | for ticker in fundamental_tickers_df.columns: 38 | return_dict[ticker] = True 39 | 40 | dates = fundamental_tickers_df.index.tolist() 41 | 42 | return return_dict, dates 43 | 44 | 45 | def get_tickers_in_both(pricing_assets, fundamental_assets): 46 | """ 47 | Compares tickers from pricing assets, and fundamental assets, and filter out tickers not in both 48 | :param pricing_assets: 49 | :param fundamental_assets: 50 | :return: list 51 | """ 52 | tickers_in_both = [] 53 | 54 | for ticker in pricing_assets: 55 | if ticker in fundamental_assets: 56 | tickers_in_both.append(ticker) 57 | 58 | return tickers_in_both 59 | 60 | 61 | def convert_to_date_stamps(dates): 62 | """ 63 | Given a list of dates, convert to tz aware datestamps, returns as list. 64 | :param dates: 65 | :return: list 66 | """ 67 | datestamps = [] 68 | 69 | for date in dates: 70 | tz_aware_date = pd.Timestamp(date, tz='utc') 71 | datestamps.append(tz_aware_date) 72 | 73 | return datestamps 74 | 75 | 76 | def make_frame(data_name, fundamentals_directory, tickers): 77 | return pd.read_csv('{}{}.csv'.format(fundamentals_directory, data_name), usecols=tickers) 78 | 79 | def reformat_frame(df, date_stamps, sids): 80 | df.index, df.columns = date_stamps, sids 81 | 82 | def set_dataset_columns(data_points, cls): 83 | for point in data_points: 84 | setattr(cls, point, Column(dtype=float)) 85 | return cls 86 | -------------------------------------------------------------------------------- /backtest_strategies/aapl_backtest.py: -------------------------------------------------------------------------------- 1 | from utilities import helper_functions 2 | 3 | from zipline.data import bundles 4 | from zipline.pipeline import Pipeline, CustomFactor 5 | from zipline.pipeline.data import USEquityPricing, Column, DataSet 6 | from zipline.pipeline.factors import Returns 7 | from zipline.pipeline.loaders.frame import DataFrameLoader 8 | from zipline.utils.run_algo import load_extensions 9 | from zipline import run_algorithm 10 | from zipline.api import ( 11 | attach_pipeline, 12 | pipeline_output, 13 | get_open_orders, 14 | symbol, 15 | set_max_leverage, 16 | order_target_percent, 17 | record 18 | ) 19 | 20 | import os 21 | import datetime as dt 22 | 23 | import pandas as pd 24 | import matplotlib 25 | matplotlib.use('TkAgg') # This forces MatPlotLib to use TkAgg as a backend 26 | import matplotlib.pyplot as plt 27 | 28 | def prepare_data(bundle_data): 29 | """ 30 | This function takes a data bundle and matches fundamental data points to the correct asset objects. 31 | :param bundle_data: The data bundle that you ingested from SEP 32 | :return: A dictionary of loaders to be used within a data pipeline, and a DataSet class with the correct columns 33 | """ 34 | 35 | """ 36 | Enter the name of the data points you wish to use in the backtest here. The names need to match the name of the 37 | appropriate CSV file found in processed_data/fundamentals 38 | """ 39 | data_points = ['marketcap'] 40 | 41 | # Specify where our CSV files live 42 | fundamentals_directory = '../processed_data/fundamentals/' 43 | pricing_directory = '../processed_data/pricing/daily/' 44 | 45 | # pricing_assets is an ordered dict that contains the name of every security in the pricing directory 46 | pricing_assets = helper_functions.get_pricing_securities(pricing_directory) 47 | 48 | """ 49 | fundamental_assets is an ordered dict that contains the name of every security in the fundamentals directory 50 | dates is a list of dates that the fundamentals directory is indexed by 51 | """ 52 | fundamental_assets, dates = helper_functions.get_dates(fundamentals_directory) 53 | 54 | # Securities that are in both pricing_assets, and fundamental_assets 55 | tickers = helper_functions.get_tickers_in_both(pricing_assets, fundamental_assets) 56 | 57 | date_stamps = helper_functions.convert_to_date_stamps(dates) 58 | 59 | data_frames = {} 60 | 61 | for data in data_points: 62 | # creates a dataframe for each data point, puts it in the data_frames dict 63 | data_frames[data] = helper_functions.make_frame(data, fundamentals_directory, tickers) 64 | 65 | for data_frame in data_frames: 66 | """ 67 | assets variable becomes a list of Asset objects, sids becomes a list of SID objects corresponding to the correct 68 | assets. 69 | """ 70 | assets = bundle_data.asset_finder.lookup_symbols([ticker for ticker in data_frames[data_frame].columns], 71 | as_of_date=None) 72 | sids = pd.Int64Index([asset.sid for asset in assets]) 73 | break 74 | 75 | 76 | class MyDataSet(DataSet): 77 | """ 78 | We need to create an attribute for each needed data point within MyDataSet, before __new__() runs... 79 | This is so MyDataSet converts the Column types into BoundColumn types. 80 | """ 81 | for point in data_points: 82 | locals()[point] = Column(dtype=float) 83 | 84 | """ 85 | We are finally ready to create a dictionary of data frame loaders, with corresponding BoundColumn attributes 86 | within our MyDataSet class. 87 | """ 88 | data_frame_loaders = {} 89 | 90 | for data_frame in data_frames: 91 | """ 92 | Reindexes the dataframe indexes with date_stamps instead of dates, and replaces the column names (which are 93 | currently strings) with SIDS. 94 | """ 95 | data_frames[data_frame].index, data_frames[data_frame].columns = date_stamps, sids 96 | 97 | for attr in data_frames: 98 | """ 99 | Fills data_frame_loaders with key value pairs of: MyDataSet.attribute_name: DataFrameLoader(attribute_name 100 | """ 101 | data_frame_loaders[getattr(MyDataSet, attr)] = DataFrameLoader(getattr(MyDataSet, attr), data_frames[attr]) 102 | 103 | return data_frame_loaders, MyDataSet 104 | 105 | def make_pipeline(): 106 | 107 | yearly_returns = Returns(window_length=252) 108 | 109 | monthly_returns = Returns(window_length=21) 110 | 111 | lagged_returns = yearly_returns - monthly_returns 112 | 113 | return Pipeline( 114 | columns={ 115 | 'lagged_returns': lagged_returns, 116 | 'marketcap': MyDataSet.marketcap.latest, 117 | }, 118 | screen=lagged_returns.notnull() & 119 | MyDataSet.marketcap.latest.notnull() & 120 | MyDataSet.marketcap.latest.top(500) 121 | ) 122 | 123 | def initialize(context): 124 | """ 125 | Function runs once, at the start of the backtest. You must attach_pipeline() here. 126 | :param context: A common namespace to keep variables in 127 | :return: 128 | """ 129 | 130 | 131 | attach_pipeline( 132 | make_pipeline(), 133 | 'data_pipe' 134 | ) 135 | 136 | def before_trading_start(context, data): 137 | """ 138 | Runs once a day, before trading start 139 | :param context: The common namespace 140 | :param data: 141 | :return: 142 | """ 143 | 144 | 145 | def handle_data(context, data): 146 | """ 147 | Runs every day, at market open 148 | :param context: Common namespace 149 | :param data: 150 | :return: 151 | """ 152 | 153 | order_target_percent(symbol('AAPL'), 1) 154 | 155 | def analyze(context, perf): 156 | """ 157 | Helper function that runs at the end of backtest for analysis 158 | :param context: Common namespace 159 | :param perf: The data which shows how the backtest performed 160 | :return: 161 | """ 162 | perf.to_csv('../backtest_outputs/backtest_on_{}.csv'.format(str(dt.datetime.now()))) 163 | 164 | fig = plt.figure() 165 | ax1 = fig.add_subplot(211) 166 | perf.portfolio_value.plot(ax=ax1) 167 | ax1.set_ylabel('portfolio value in $') 168 | plt.legend(loc=0) 169 | plt.show() 170 | 171 | if __name__ == "__main__": 172 | 173 | load_extensions( 174 | default=True, 175 | extensions=[], 176 | strict=True, 177 | environ=os.environ, 178 | ) 179 | 180 | bundle = bundles.load('sharadar-pricing') # This is a bundle made from Sharadar SEP data 181 | 182 | data_frame_loaders, MyDataSet = prepare_data(bundle) 183 | 184 | print('Made it to run_algorithm') 185 | 186 | run_algorithm( 187 | bundle='sharadar-pricing', 188 | before_trading_start=before_trading_start, 189 | start=pd.Timestamp('2017-01-02', tz='utc'), 190 | end=pd.Timestamp('2018-04-20', tz='utc'), 191 | initialize=initialize, 192 | analyze=analyze, 193 | capital_base=10000, 194 | handle_data=handle_data, 195 | data_frame_loaders=data_frame_loaders 196 | ) 197 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Using Fundamental Data For Backtests Within Zipline 2 | 3 | Follow along Youtube video: https://www.youtube.com/watch?v=vh42tQDDC1U 4 | 5 | It is common to see questions related to using external, fundamental data for backtests within Zipline. 6 | I have developed an easy way of implementing data from Sharadar’s SF1 (fundamentals) and SEP (pricing) datasets. 7 | These are very popular datasets because of their relative robustness, in relation to their low price. 8 | 9 | Others have written about this topic, such as Jonathan Larkin on [Zipline Github issue #911](https://github.com/quantopian/zipline/issues/911) 10 | and Peter Harrington on his [AlphaCompiler blog](http://alphacompiler.com/blog/6/). 11 | 12 | I tried both of these methods, and found that neither of them worked for my particular use case. 13 | It is worth noting that my code borrows heavily from Jonathan Larkin’s implementation of dispatching custom loaders when creating a data pipeline, with a special shout out to Scott Sanderson for helping me alter Zipline to make the run_algorithm() function accept dataframe loaders. 14 | 15 | Because this method uses Pandas Data Frames to load all data into the data pipeline using the [DataFrameLoader class](https://github.com/quantopian/zipline/blob/master/zipline/pipeline/loaders/frame.py), you can only load data which will fit in your computer’s RAM. My next improvement will be implementing the [Blaze loader](https://github.com/quantopian/zipline/blob/master/zipline/pipeline/loaders/blaze/core.py) in order to increase loading speed, and do away with this annoying limitation. 16 | 17 | **OK, lets get started.** 18 | 19 | The official Zipline docs recommends using Conda to set up this environment, but it seems that Conda sometimes doesn’t install the very latest version of Zipline. 20 | I use pip and Pyenv to set up my environment, here are my step by step instructions for setting up shop on macOS. 21 | 22 | **PS these instructions are for macOS. I will try to accomodate Windows and Linux users, but make no guarantees.** 23 | 24 | #### Step 0: Clone this repo into the directory of your choice 25 | ``` 26 | git clone https://github.com/calmitchell617/Springbok.git 27 | ``` 28 | 29 | #### Step 1: Install Python 3.5.5 using Pyenv 30 | 31 | I followed [this tutorial](https://medium.com/@pimterry/setting-up-pyenv-on-os-x-with-homebrew-56c7541fd331) to set up Python 3.5.5 using Pyenv. Assuming you already have Homebrew installed, here are the esssential steps: 32 | 33 | ``` 34 | CFLAGS="-I$(xcrun --show-sdk-path)/usr/include" 35 | brew install pyenv 36 | brew install readline 37 | pyenv install 3.5.5 38 | pyenv versions 39 | ``` 40 | 41 | The command that starts with CFLAGS is a known issue to the Pyenv folks, and they have created a page for common build problems. 42 | If that command fails, or any of the subsequent commands fail, check out [this page](https://github.com/pyenv/pyenv/wiki/Common-build-problems) to see if your problem can be solved there. 43 | 44 | Look to make sure Python 3.5.5 is listed 45 | ``` 46 | pyenv local 3.5.5 47 | eval "$(pyenv init -)” 48 | python --version 49 | ``` 50 | Check to make sure you are running Python 3.5.5 .. If so, great! 51 | 52 | #### Step 2: Install Zipline's dependencies using pip 53 | ``` 54 | pip install --upgrade pip 55 | ``` 56 | Makes sure pip is up to date 57 | ``` 58 | pip install numpy 59 | pip install pandas 60 | pip install cython 61 | pip install -U setuptools 62 | pip install matplotlib 63 | ``` 64 | 65 | #### Step 3: Install my modified version of Zipline 66 | 67 | Navigate to the directory that you want my branch of zipline to download to, and run on the command line: 68 | ``` 69 | git clone https://github.com/calmitchell617/zipline.git 70 | pip install (copy and paste the path of where you installed my zipline repo here) 71 | ``` 72 | Zipline should now install, and we can now use this environment to do all kinds of fun stuff 73 | 74 | #### Step 4: Download and process data from Quandl / Sharadar 75 | 76 | Run mkdirs.py to setup the proper folder structure 77 | 78 | Download pricing and fundamental data from Quandl. 79 | Unzip, and put these 2 files in the data_downloads folder. 80 | **Do not put the zip files in the data_downloads folder.** 81 | Pricing: https://www.quandl.com/databases/SEP/documentation/batch-download 82 | Fundamentals: https://www.quandl.com/databases/SF1/documentation/batch-download 83 | 84 | Run process_data.py **will take an hour + to process all of the data.** Will print “Done!” When it’s done. 85 | 86 | #### Step 5: Ingest the data bundle 87 | 88 | We need to ingest all pricing data into what’s known as a data bundle. 89 | To do this, we will take our CSV files, which have been processed into the correct OHLCV format, and run the “zipline ingest” command from the command line. 90 | To successfully run this command, we will have to make a few changes to Zipline itself, however. 91 | 92 | The .zipline file folder is hidden by default, so you need to alter your computer to reveal hidden files and directories. 93 | **In macOS Sierra and later, this is quite easy. While in Finder, press “Command + Shift + . “, and hidden files and folders will be revealed.** 94 | 95 | Important: If you don't see the .zipline directory, I put an empty version of it in the repository. Copy and paste it into your home directory. 96 | 97 | For Windows or Linux, Google search “(your OS here) reveal hidden files” and your OS version. There will definitely be a tutorial to help you. 98 | 99 | Now find your .zipline folder, which is under your user directory. 100 | For example, on Mac, this is under MacintoshHD -> Users -> the username you are currently using. 101 | 102 | Open “extension.py” to modify it. 103 | 104 | Change the start_session variable to match the date which your pricing data starts on 105 | 106 | Change the end_session variable to match the date that your pricing data ends on. 107 | 108 | Change the first parameter in the register() function to: sharadar-pricing 109 | 110 | Make sure the first parameter of the csvdir_equites() function is: ['daily'] 111 | 112 | Make the second parameter of the csvdir_equites() function the full directory of your pricing folder... 113 | 114 | For example, my directory is ' /Users/calmitchell/s/springbok-shared/processed_data/pricing/ ' 115 | 116 | This folder should contain one other folder named: daily 117 | 118 | Make sure you are running Python 3.5.5 with Zipline installed properly, step 1 above, then run: 119 | ``` 120 | zipline ingest -b 'sharadar-pricing' 121 | ``` 122 | This will take a few minutes. Make sure to check to see if any fatal errors occurred, and 123 | also check the .zipline directory for a new folder called sharadar-pricing. 124 | 125 | Open the folder within that folder, and if its not empty, that means you are in business! 126 | 127 | #### Step 6: 128 | 129 | Run basic_backtest.py to ensure everything is working 130 | 131 | At this point, you have ingested pricing data, processed fundamental data into a known directory, and run a backtest using the data. 132 | Check out the comments and structure of basic_backtest.py to further your understanding of how to work with Zipline. 133 | 134 | #### Optional step 7: 135 | 136 | Download historical "SPY" pricing from yahoo finance. Delete adjust close column, and change date column to match the pricing files in processed_data/daily. Reingest the files. 137 | 138 | ### Happy alpha hunting! 139 | -------------------------------------------------------------------------------- /backtest_strategies/rays_long_short_strategy.py: -------------------------------------------------------------------------------- 1 | from utilities import helper_functions 2 | from utilities.rays_long_short_strategy import rays_long_short_strategy_helpers 3 | 4 | 5 | from zipline.data import bundles 6 | from zipline.pipeline import Pipeline 7 | from zipline.pipeline.data import USEquityPricing, Column, DataSet 8 | from zipline.pipeline.loaders.frame import DataFrameLoader 9 | from zipline.utils.run_algo import load_extensions 10 | from zipline import run_algorithm 11 | from zipline.api import ( 12 | attach_pipeline, 13 | pipeline_output, 14 | get_open_orders, 15 | record 16 | ) 17 | 18 | import os 19 | import datetime as dt 20 | 21 | import pandas as pd 22 | import matplotlib 23 | matplotlib.use('TkAgg') # This forces MatPlotLib to use TkAgg as a backend 24 | import matplotlib.pyplot as plt 25 | 26 | 27 | def prepare_data(bundle_data): 28 | """ 29 | This function takes a data bundle and matches fundamental data points to the correct asset objects. 30 | :param bundle_data: The data bundle that you ingested from SEP 31 | :return: A dictionary of loaders to be used within a data pipeline, and a DataSet class with the correct columns 32 | """ 33 | 34 | """ 35 | Enter the name of the data points you wish to use in the backtest here. The names need to match the name of the 36 | appropriate CSV file found in processed_data/fundamentals 37 | """ 38 | data_points = ['pe1', 'de', 'earnings_growth', 'marketcap'] 39 | 40 | # Specify where our CSV files live 41 | fundamentals_directory = '../processed_data/fundamentals/' 42 | pricing_directory = '../processed_data/pricing/daily/' 43 | 44 | # pricing_assets is an ordered dict that contains the name of every security in the pricing directory 45 | pricing_assets = helper_functions.get_pricing_securities(pricing_directory) 46 | 47 | """ 48 | fundamental_assets is an ordered dict that contains the name of every security in the fundamentals directory 49 | dates is a list of dates that the fundamentals directory is indexed by 50 | """ 51 | fundamental_assets, dates = helper_functions.get_dates(fundamentals_directory) 52 | 53 | # Securities that are in both pricing_assets, and fundamental_assets 54 | tickers = helper_functions.get_tickers_in_both(pricing_assets, fundamental_assets) 55 | 56 | date_stamps = helper_functions.convert_to_date_stamps(dates) 57 | 58 | data_frames = {} 59 | 60 | for data in data_points: 61 | # creates a dataframe for each data point, puts it in the data_frames dict 62 | data_frames[data] = helper_functions.make_frame(data, fundamentals_directory, tickers) 63 | 64 | for data_frame in data_frames: 65 | """ 66 | assets variable becomes a list of Asset objects, sids becomes a list of SID objects corresponding to the correct 67 | assets. 68 | """ 69 | assets = bundle_data.asset_finder.lookup_symbols([ticker for ticker in data_frames[data_frame].columns], 70 | as_of_date=None) 71 | sids = pd.Int64Index([asset.sid for asset in assets]) 72 | break 73 | 74 | 75 | class MyDataSet(DataSet): 76 | """ 77 | We need to create an attribute for each needed data point within MyDataSet, before __new__() runs... 78 | This is so MyDataSet converts the Column types into BoundColumn types. 79 | """ 80 | for point in data_points: 81 | locals()[point] = Column(dtype=float) 82 | 83 | """ 84 | We are finally ready to create a dictionary of data frame loaders, with corresponding BoundColumn attributes 85 | within our MyDataSet class. 86 | """ 87 | data_frame_loaders = {} 88 | 89 | for data_frame in data_frames: 90 | """ 91 | Reindexes the dataframe indexes with date_stamps instead of dates, and replaces the column names (which are 92 | currently strings) with SIDS. 93 | """ 94 | data_frames[data_frame].index, data_frames[data_frame].columns = date_stamps, sids 95 | 96 | for attr in data_frames: 97 | """ 98 | Filles data_frame_loaders with key value pairs of: MyDataSet.attribute_name: DataFrameLoader(attribute_name 99 | """ 100 | data_frame_loaders[getattr(MyDataSet, attr)] = DataFrameLoader(getattr(MyDataSet, attr), data_frames[attr]) 101 | 102 | return data_frame_loaders, MyDataSet 103 | 104 | def make_pipeline(): 105 | 106 | return Pipeline( 107 | columns={ 108 | 'price': USEquityPricing.close.latest, 109 | 'pe1': MyDataSet.pe1.latest, 110 | 'de': MyDataSet.de.latest, 111 | 'earnings_growth': MyDataSet.earnings_growth.latest, 112 | 'marketcap': MyDataSet.marketcap.latest, 113 | }, 114 | screen=USEquityPricing.close.latest.notnull() & 115 | MyDataSet.de.latest.notnull() & 116 | MyDataSet.pe1.latest.notnull() & 117 | MyDataSet.earnings_growth.latest.notnull() & 118 | MyDataSet.marketcap.latest.notnull() 119 | ) 120 | 121 | def initialize(context): 122 | """ 123 | Function runs once, at the start of the backtest. You must attach_pipeline() here. 124 | :param context: A common namespace to keep variables in 125 | :return: 126 | """ 127 | 128 | context.longs_portfolio = {} 129 | context.shorts_portfolio = {} 130 | 131 | attach_pipeline( 132 | make_pipeline(), 133 | 'data_pipe' 134 | ) 135 | 136 | def before_trading_start(context, data): 137 | """ 138 | Runs once a day, before trading start 139 | :param context: The common namespace 140 | :param data: 141 | :return: 142 | """ 143 | context.output = pipeline_output('data_pipe') 144 | 145 | context.cap_plays = context.output.sort_values(['marketcap'])[-4000:] # take top 4000 stocks by market cap for liquidity 146 | 147 | context.longs = rays_long_short_strategy_helpers.get_longs(context.cap_plays) 148 | 149 | context.shorts = rays_long_short_strategy_helpers.get_shorts(context.cap_plays) 150 | 151 | record(open_orders=str(get_open_orders())) 152 | 153 | 154 | def handle_data(context, data): 155 | """ 156 | Runs every day, at market open 157 | :param context: Common namepsace 158 | :param data: 159 | :return: 160 | """ 161 | 162 | context = rays_long_short_strategy_helpers.portfolio_logic(context) 163 | 164 | def analyze(context, perf): 165 | """ 166 | Helper function that runs at the end of backtest for analysis 167 | :param context: Common namespace 168 | :param perf: The data which shows how the backtest performed 169 | :return: 170 | """ 171 | perf.to_csv('../backtest_outputs/backtest_on_{}.csv'.format(str(dt.datetime.now()))) 172 | 173 | fig = plt.figure() 174 | ax1 = fig.add_subplot(211) 175 | perf.portfolio_value.plot(ax=ax1) 176 | ax1.set_ylabel('portfolio value in $') 177 | plt.legend(loc=0) 178 | plt.show() 179 | 180 | if __name__ == "__main__": 181 | 182 | load_extensions( 183 | default=True, 184 | extensions=[], 185 | strict=True, 186 | environ=os.environ, 187 | ) 188 | 189 | bundle = bundles.load('sharadar-pricing') # This is a bundle made from Sharadar SEP data 190 | 191 | data_frame_loaders, MyDataSet = prepare_data(bundle) 192 | 193 | print('Made it to run_algorithm') 194 | 195 | run_algorithm( 196 | bundle='sharadar-pricing', 197 | before_trading_start=before_trading_start, 198 | start=pd.Timestamp('2018-01-02', tz='utc'), 199 | end=pd.Timestamp('2018-04-20', tz='utc'), 200 | initialize=initialize, 201 | analyze=analyze, 202 | capital_base=1000000, 203 | handle_data=handle_data, 204 | data_frame_loaders=data_frame_loaders 205 | ) 206 | -------------------------------------------------------------------------------- /backtest_strategies/momentum_crashes_backtest.py: -------------------------------------------------------------------------------- 1 | from utilities import helper_functions as helper_functions 2 | from utilities.momentum_crashes import momentum_strategy_helpers as momentum_strategy_helpers 3 | 4 | from zipline.data import bundles 5 | from zipline.pipeline import Pipeline 6 | from zipline.pipeline.data import Column, DataSet 7 | from zipline.pipeline.factors import Returns 8 | from zipline.pipeline.loaders.frame import DataFrameLoader 9 | from zipline.utils.run_algo import load_extensions 10 | from zipline import run_algorithm 11 | from zipline.api import ( 12 | attach_pipeline, 13 | pipeline_output, 14 | symbol, 15 | set_max_leverage, 16 | record 17 | ) 18 | 19 | import os 20 | import datetime as dt 21 | 22 | import pandas as pd 23 | import matplotlib 24 | matplotlib.use('TkAgg') # This forces MatPlotLib to use TkAgg as a backend 25 | import matplotlib.pyplot as plt 26 | 27 | 28 | def prepare_data(bundle_data): 29 | """ 30 | This function takes a data bundle and matches fundamental data points to the correct asset objects. 31 | :param bundle_data: The data bundle that you ingested from SEP 32 | :return: A dictionary of loaders to be used within a data pipeline, and a DataSet class with the correct columns 33 | """ 34 | 35 | """ 36 | Enter the name of the data points you wish to use in the backtest here. The names need to match the name of the 37 | appropriate CSV file found in processed_data/fundamentals 38 | """ 39 | data_points = ['marketcap'] 40 | 41 | # Specify where our CSV files live 42 | fundamentals_directory = '../processed_data/fundamentals/' 43 | pricing_directory = '../processed_data/pricing/daily/' 44 | 45 | # pricing_assets is an ordered dict that contains the name of every security in the pricing directory 46 | pricing_assets = helper_functions.get_pricing_securities(pricing_directory) 47 | 48 | """ 49 | fundamental_assets is an ordered dict that contains the name of every security in the fundamentals directory 50 | dates is a list of dates that the fundamentals directory is indexed by 51 | """ 52 | fundamental_assets, dates = helper_functions.get_dates(fundamentals_directory) 53 | 54 | # Securities that are in both pricing_assets, and fundamental_assets 55 | tickers = helper_functions.get_tickers_in_both(pricing_assets, fundamental_assets) 56 | 57 | date_stamps = helper_functions.convert_to_date_stamps(dates) 58 | 59 | data_frames = {} 60 | 61 | for data in data_points: 62 | # creates a dataframe for each data point, puts it in the data_frames dict 63 | data_frames[data] = helper_functions.make_frame(data, fundamentals_directory, tickers) 64 | 65 | for data_frame in data_frames: 66 | """ 67 | assets variable becomes a list of Asset objects, sids becomes a list of SID objects corresponding to the correct 68 | assets. 69 | """ 70 | assets = bundle_data.asset_finder.lookup_symbols([ticker for ticker in data_frames[data_frame].columns], 71 | as_of_date=None) 72 | sids = pd.Int64Index([asset.sid for asset in assets]) 73 | break 74 | 75 | 76 | class MyDataSet(DataSet): 77 | """ 78 | We need to create an attribute for each needed data point within MyDataSet, before __new__() runs... 79 | This is so MyDataSet converts the Column types into BoundColumn types. 80 | """ 81 | for point in data_points: 82 | locals()[point] = Column(dtype=float) 83 | 84 | """ 85 | We are finally ready to create a dictionary of data frame loaders, with corresponding BoundColumn attributes 86 | within our MyDataSet class. 87 | """ 88 | data_frame_loaders = {} 89 | 90 | for data_frame in data_frames: 91 | """ 92 | Reindexes the dataframe indexes with date_stamps instead of dates, and replaces the column names (which are 93 | currently strings) with SIDS. 94 | """ 95 | data_frames[data_frame].index, data_frames[data_frame].columns = date_stamps, sids 96 | 97 | for attr in data_frames: 98 | """ 99 | Fills data_frame_loaders with key value pairs of: MyDataSet.attribute_name: DataFrameLoader(attribute_name 100 | """ 101 | data_frame_loaders[getattr(MyDataSet, attr)] = DataFrameLoader(getattr(MyDataSet, attr), data_frames[attr]) 102 | 103 | return data_frame_loaders, MyDataSet 104 | 105 | def make_pipeline(): 106 | 107 | yearly_returns = Returns(window_length=252) 108 | 109 | monthly_returns = Returns(window_length=21) 110 | 111 | lagged_returns = yearly_returns - monthly_returns 112 | 113 | return Pipeline( 114 | columns={ 115 | 'lagged_returns': lagged_returns, 116 | 'marketcap': MyDataSet.marketcap.latest, 117 | }, 118 | screen=lagged_returns.notnull() & 119 | MyDataSet.marketcap.latest.notnull() & 120 | MyDataSet.marketcap.latest.top(500) 121 | ) 122 | 123 | def initialize(context): 124 | """ 125 | Function runs once, at the start of the backtest. You must attach_pipeline() here. 126 | :param context: A common namespace to keep variables in 127 | :return: 128 | """ 129 | 130 | context.longs_portfolio = {} 131 | context.shorts_portfolio = {} 132 | set_max_leverage(1) 133 | 134 | attach_pipeline( 135 | make_pipeline(), 136 | 'data_pipe' 137 | ) 138 | 139 | def before_trading_start(context, data): 140 | """ 141 | Runs once a day, before trading start 142 | :param context: The common namespace 143 | :param data: 144 | :return: 145 | """ 146 | context.output = pipeline_output('data_pipe') 147 | 148 | market_type(context, data) 149 | 150 | context.longs = momentum_strategy_helpers.get_longs(context) 151 | 152 | context.shorts = momentum_strategy_helpers.get_shorts(context) 153 | 154 | record(market_type=str(context.market_type)) 155 | 156 | 157 | def market_type(context, data): 158 | """ 159 | Attempts to quantify if we are in a bull, or bear market, based on whether SPY is higher than it was a year ago. 160 | """ 161 | 162 | history = data.history(symbol('SPY'), 'close', 252, '1d') 163 | 164 | if history[251] - history[0] >= 0: 165 | context.market_type = 'bull' 166 | 167 | else: 168 | context.market_type = 'bear' 169 | 170 | def handle_data(context, data): 171 | """ 172 | Runs every day, at market open 173 | :param context: Common namespace 174 | :param data: 175 | :return: 176 | """ 177 | 178 | context = momentum_strategy_helpers.portfolio_logic(context) 179 | 180 | def analyze(context, perf): 181 | """ 182 | Helper function that runs at the end of backtest for analysis 183 | :param context: Common namespace 184 | :param perf: The data which shows how the backtest performed 185 | :return: 186 | """ 187 | perf.to_csv('../backtest_outputs/backtest_on_{}.csv'.format(str(dt.datetime.now()))) 188 | 189 | fig = plt.figure() 190 | ax1 = fig.add_subplot(211) 191 | perf.portfolio_value.plot(ax=ax1) 192 | ax1.set_ylabel('portfolio value in $') 193 | plt.legend(loc=0) 194 | plt.show() 195 | 196 | if __name__ == "__main__": 197 | 198 | load_extensions( 199 | default=True, 200 | extensions=[], 201 | strict=True, 202 | environ=os.environ, 203 | ) 204 | 205 | bundle = bundles.load('sharadar-pricing') # This is a bundle made from Sharadar SEP data 206 | 207 | data_frame_loaders, MyDataSet = prepare_data(bundle) 208 | 209 | print('Made it to run_algorithm') 210 | 211 | run_algorithm( 212 | bundle='sharadar-pricing', 213 | before_trading_start=before_trading_start, 214 | start=pd.Timestamp('2018-01-02', tz='utc'), 215 | end=pd.Timestamp('2018-04-20', tz='utc'), 216 | initialize=initialize, 217 | analyze=analyze, 218 | capital_base=1000000, 219 | handle_data=handle_data, 220 | data_frame_loaders=data_frame_loaders 221 | ) 222 | -------------------------------------------------------------------------------- /process_data.py: -------------------------------------------------------------------------------- 1 | def bundle_prep(): 2 | 3 | downloads_directory = 'data_downloads' 4 | directory = 'processed_data/pricing/daily' 5 | 6 | tickers = OrderedDict() 7 | 8 | for root, dirs, files in os.walk(downloads_directory): # Lets get all of our tickers 9 | for file in files: 10 | if file.startswith('SHARADAR_SEP'): 11 | pricing_df = pd.read_csv('{}/{}'.format(downloads_directory, file)) 12 | 13 | for ticker in pricing_df['ticker']: 14 | if ticker not in tickers: 15 | tickers[ticker] = True 16 | 17 | for ticker in tickers: 18 | with open('{}/{}.csv'.format(directory, ticker), 'w') as processed_file: 19 | writer = csv.writer(processed_file) 20 | writer.writerow(['date', 'open', 'high', 'low', 'close', 'volume']) 21 | 22 | iterator = pricing_df.iterrows() 23 | next(iterator) 24 | for i, row in iterator: 25 | with open('{}/{}.csv'.format(directory, row['ticker']), 'a') as ticker_file: 26 | ticker_writer = csv.writer(ticker_file) 27 | ticker_writer.writerow( 28 | [ 29 | row['date'], 30 | row['open'], 31 | row['high'], 32 | row['low'], 33 | row['close'], 34 | row['volume'] 35 | 36 | ] 37 | ) 38 | 39 | for ticker in tickers: # we need to reindex the files to deal with missing data (we will forward fill) 40 | 41 | df = pd.read_csv('{}/{}.csv'.format(directory, ticker), index_col='date') 42 | length = len(df.index) - 1 43 | start_date = df.index[0] 44 | end_date = df.index[length] 45 | 46 | sessions = get_calendar('NYSE').sessions_in_range(start_date, end_date).tolist() 47 | 48 | for i in range(len(sessions)): 49 | sessions[i] = str(sessions[i]) 50 | 51 | try: 52 | df = df.reindex(sessions, method='pad') 53 | os.remove('{}/{}.csv'.format(directory, ticker)) 54 | df.to_csv('{}/{}.csv'.format(directory, ticker)) 55 | except ValueError: 56 | print(ticker) 57 | os.remove('{}/{}.csv'.format(directory, ticker)) 58 | continue 59 | 60 | print("Bundle prep is finished.") 61 | 62 | 63 | def fundamentals_prep(): 64 | 65 | downloads_directory = 'data_downloads' 66 | directory = 'processed_data/fundamentals' 67 | 68 | for root, dirs, files in os.walk(downloads_directory): # Lets get all of our tickers 69 | for file in files: 70 | if file.startswith('SHARADAR_SF1'): 71 | with open('{}/{}'.format(downloads_directory, file), 'r') as read_file: 72 | with open('{}/all_arq.csv'.format(directory), 'w') as write_file: 73 | reader = csv.reader(read_file) 74 | writer = csv.writer(write_file) 75 | first_line_gone = False 76 | for row in reader: 77 | if first_line_gone is True: 78 | if row[1] == 'ARQ': 79 | writer.writerow(row) 80 | else: 81 | writer.writerow(row) 82 | first_line_gone = True 83 | 84 | desired_data = [ 85 | 'accoci', 86 | 'assets', 87 | 'assetsc', 88 | 'assetsnc', 89 | 'bvps', 90 | 'capex', 91 | 'cashneq', 92 | 'cashnequsd', 93 | 'cor', 94 | 'consolinc', 95 | 'currentratio', 96 | 'de', 97 | 'debt', 98 | 'debtc', 99 | 'debtnc', 100 | 'debtusd', 101 | 'deferredrev', 102 | 'depamor', 103 | 'deposits', 104 | 'divyield', 105 | 'dps', 106 | 'ebit', 107 | 'ebitda', 108 | 'ebitdamargin', 109 | 'ebitdausd', 110 | 'ebitusd', 111 | 'ebt', 112 | 'eps', 113 | 'epsdil', 114 | 'epsusd', 115 | 'equity', 116 | 'equityusd', 117 | 'ev', 118 | 'evebit', 119 | 'evebitda', 120 | 'fcf', 121 | 'fcfps', 122 | 'fxusd', 123 | 'gp', 124 | 'grossmargin', 125 | 'intangibles', 126 | 'intexp', 127 | 'invcap', 128 | 'invcapavg', 129 | 'inventory', 130 | 'investments', 131 | 'investmentsc', 132 | 'investmentsnc', 133 | 'liabilities', 134 | 'liabilitiesc', 135 | 'liabilitiesnc', 136 | 'marketcap', 137 | 'ncf', 138 | 'ncfbus', 139 | 'ncfcommon', 140 | 'ncfdebt', 141 | 'ncfdiv', 142 | 'ncff', 143 | 'ncfi', 144 | 'ncfinv', 145 | 'ncfo', 146 | 'ncfx', 147 | 'netinc', 148 | 'netinccmn', 149 | 'netinccmnusd', 150 | 'netincdis', 151 | 'netincnci', 152 | 'netmargin', 153 | 'opex', 154 | 'opinc', 155 | 'payables', 156 | 'payoutratio', 157 | 'pb', 158 | 'pe', 159 | 'pe1', 160 | 'ppnenet', 161 | 'prefdivis', 162 | 'price', 163 | 'ps', 164 | 'ps1', 165 | 'receivables', 166 | 'retearn', 167 | 'revenue', 168 | 'revenueusd', 169 | 'rnd', 170 | 'sbcomp', 171 | 'sgna', 172 | 'sharefactor', 173 | 'sharesbas', 174 | 'shareswa', 175 | 'shareswadil', 176 | 'sps', 177 | 'tangibles', 178 | 'taxassets', 179 | 'taxexp', 180 | 'taxliabilities', 181 | 'tbvps', 182 | 'workingcapital' 183 | ] 184 | 185 | # get all dates and tickers 186 | 187 | main_df = pd.read_csv('{}/all_arq.csv'.format(directory)) 188 | date_stamps = sorted(set(main_df['datekey'].tolist())) 189 | for i in range(len(date_stamps)): 190 | date_stamps[i] = pd.Timestamp(date_stamps[i], tz='UTC') 191 | tickers = sorted(set(main_df['ticker'].tolist())) 192 | 193 | for column in desired_data: 194 | read_df = main_df[['datekey', 'ticker', column]] 195 | write_df = pd.DataFrame(index=date_stamps, columns=tickers) 196 | 197 | iterator = read_df.iterrows() 198 | for i, row in iterator: 199 | write_df[row['ticker']].loc[row['datekey']] = row[column] 200 | 201 | write_df.to_csv('{}/{}.csv'.format(directory, column)) 202 | 203 | try: 204 | os.remove('{}/all_arq.csv'.format(directory)) 205 | except OSError: 206 | pass 207 | 208 | fundamentals_directory = 'processed_data/fundamentals' 209 | 210 | earnings_df = pd.read_csv('{}/netinc.csv'.format(fundamentals_directory), index_col=0) 211 | revenue_df = pd.read_csv('{}/revenue.csv'.format(fundamentals_directory), index_col=0) 212 | growth_df = earnings_df 213 | new_df = pd.DataFrame(index=earnings_df.index, columns=earnings_df.columns) 214 | 215 | tickers = OrderedDict() 216 | 217 | for ticker in earnings_df.columns: 218 | tickers[ticker] = {'prev_earnings': None, 'cur_earnings': None, 'rev_growth': None} 219 | 220 | iterator = growth_df.iterrows() 221 | 222 | for i, row in iterator: 223 | 224 | for ticker in tickers: 225 | if not pd.isnull(row[ticker]): 226 | if row[ticker] != tickers[ticker]['cur_earnings']: 227 | 228 | if tickers[ticker]['cur_earnings'] is None: # first value 229 | tickers[ticker]['cur_earnings'] = row[ticker] 230 | continue 231 | 232 | tickers[ticker]['prev_earnings'] = tickers[ticker]['cur_earnings'] 233 | tickers[ticker]['cur_earnings'] = row[ticker] 234 | 235 | rev = revenue_df.loc[i, ticker] 236 | 237 | if rev != 0: 238 | tickers[ticker]['rev_growth'] = ( 239 | (tickers[ticker]['cur_earnings'] - tickers[ticker]['prev_earnings']) / rev) 240 | new_df.ix[i, ticker] = tickers[ticker]['rev_growth'] 241 | else: 242 | new_df.ix[i, ticker] = tickers[ticker]['rev_growth'] 243 | 244 | new_df.to_csv('{}/earnings_growth.csv'.format(fundamentals_directory)) 245 | 246 | directory = 'processed_data/fundamentals' 247 | 248 | data_list = [] 249 | 250 | data = OrderedDict() 251 | 252 | for root, dirs, files in os.walk(directory): # Lets get all of our tickers 253 | for file in files: 254 | if file.endswith('csv'): 255 | data_list.append(file[:-4]) 256 | 257 | data_list.sort() 258 | 259 | for point in data_list: 260 | data[point] = True 261 | 262 | for point in data_list: # we need to reindex the files to deal with missing data (we will forward fill) 263 | 264 | df = pd.read_csv('{}/{}.csv'.format(directory, point), index_col=0) 265 | length = len(df.index) - 1 266 | start_date = df.index[0] 267 | end_date = df.index[length] 268 | 269 | actual_sessions = df.index 270 | 271 | stamps = [str(pd.Timestamp(session, tz='UTC', offset='C')) for session in actual_sessions] 272 | 273 | df.index = stamps 274 | 275 | sessions = get_calendar('NYSE').sessions_in_range(start_date, end_date).tolist() 276 | 277 | for i in range(len(sessions)): 278 | sessions[i] = str(sessions[i]) 279 | 280 | try: 281 | df = df.reindex(sessions) 282 | df = df.fillna(method='pad') 283 | os.remove('{}/{}.csv'.format(directory, point)) 284 | except ValueError: 285 | print(point) 286 | continue 287 | 288 | df.to_csv('{}/{}.csv'.format(directory, point)) 289 | 290 | print("Fundamentals prep is finished.") 291 | 292 | 293 | if __name__ == "__main__": 294 | 295 | import multiprocessing 296 | import os 297 | import pandas as pd 298 | import csv 299 | from collections import OrderedDict 300 | from zipline.utils.calendars import get_calendar 301 | 302 | # creating processes 303 | p1 = multiprocessing.Process(target=bundle_prep) 304 | p2 = multiprocessing.Process(target=fundamentals_prep) 305 | 306 | p1.start() 307 | p2.start() 308 | 309 | p1.join() 310 | p2.join() 311 | 312 | # both processes finished 313 | print("Done!") 314 | -------------------------------------------------------------------------------- /.idea/workspace.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 52 | 53 | 54 | 55 | aapl 56 | Pe1 57 | Rev 58 | pe1 59 | rev 60 | print 61 | pd 62 | held 63 | sids 64 | sold 65 | inf 66 | sort 67 | loc 68 | datestamps 69 | longs_portfolio 70 | shorts_portfolio 71 | MyDataSet 72 | cap 73 | eg 74 | loaders 75 | get_lo 76 | import 77 | processed_directory 78 | directory = 79 | all_ 80 | order_target 81 | longs 82 | winners 83 | losers 84 | record 85 | 86 | 87 | 88 | 89 | 91 | 92 | 135 | 136 | 137 | 138 | 139 | true 140 | DEFINITION_ORDER 141 | 142 | 143 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 |