├── __init__.py
├── .vscode
├── last.sql
└── temp.sql
├── backtest_strategies
├── __init__.py
├── utilities
│ ├── __init__.py
│ ├── momentum_crashes
│ │ ├── __init__.py
│ │ └── momentum_strategy_helpers.py
│ ├── rays_long_short_strategy
│ │ ├── __init__.py
│ │ └── rays_long_short_strategy_helpers.py
│ └── helper_functions.py
├── aapl_backtest.py
├── rays_long_short_strategy.py
└── momentum_crashes_backtest.py
├── .gitignore
├── .idea
├── vcs.xml
├── modules.xml
├── misc.xml
├── springbok-shared.iml
└── workspace.xml
├── mkdirs.py
├── .zipline
└── extension.py
├── requirements.txt
├── readme.md
└── process_data.py
/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.vscode/last.sql:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.vscode/temp.sql:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/backtest_strategies/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/backtest_strategies/utilities/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/backtest_strategies/utilities/momentum_crashes/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/backtest_strategies/utilities/rays_long_short_strategy/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # ignoring csv files
2 | *.csv
3 | *.pages
4 | **/__pycache__
5 | .python-version
6 |
7 | settings.json
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/mkdirs.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | # run this file first to make directories for everything
4 |
5 | os.mkdir('data_downloads')
6 | os.mkdir('backtest_outputs')
7 | os.mkdir('processed_data')
8 | os.mkdir('processed_data/fundamentals')
9 | os.mkdir('processed_data/pricing')
10 | os.mkdir('processed_data/pricing/daily')
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/.zipline/extension.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 |
3 | from zipline.data.bundles import register
4 | from zipline.data.bundles.csvdir import csvdir_equities
5 |
6 | start_session = pd.Timestamp('2013-01-02', tz='UTC')
7 | end_session = pd.Timestamp('2018-07-03', tz='UTC')
8 |
9 | register(
10 | 'sharadar-pricing',
11 | csvdir_equities(
12 | ['daily'],
13 | '/Users/calmitchell/s/Springbok-filled/processed_data/pricing',
14 | ),
15 | calendar_name='NYSE', # US equities
16 | start_session=start_session,
17 | end_session=end_session
18 | )
19 |
20 |
--------------------------------------------------------------------------------
/.idea/springbok-shared.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | alembic==0.9.10
2 | astroid==1.6.4
3 | autopep8==1.3.5
4 | bcolz==0.12.1
5 | Bottleneck==1.2.1
6 | certifi==2018.4.16
7 | chardet==3.0.4
8 | click==6.7
9 | contextlib2==0.5.5
10 | cycler==0.10.0
11 | cyordereddict==1.0.0
12 | Cython==0.28.3
13 | decorator==4.3.0
14 | empyrical==0.5.0
15 | idna==2.7
16 | intervaltree==2.1.0
17 | isort==4.3.4
18 | kiwisolver==1.0.1
19 | lazy-object-proxy==1.3.1
20 | Logbook==1.4.0
21 | lru-dict==1.1.6
22 | Mako==1.0.7
23 | MarkupSafe==1.0
24 | matplotlib==2.2.2
25 | mccabe==0.6.1
26 | multipledispatch==0.5.0
27 | networkx==1.11
28 | numexpr==2.6.5
29 | numpy==1.14.5
30 | pandas==0.18.1
31 | pandas-datareader==0.5.0
32 | patsy==0.5.0
33 | pycodestyle==2.4.0
34 | pylint==1.9.1
35 | pyparsing==2.2.0
36 | python-dateutil==2.7.3
37 | python-editor==1.0.3
38 | pytz==2018.5
39 | requests==2.19.1
40 | requests-file==1.4.3
41 | requests-ftp==0.3.1
42 | scipy==1.1.0
43 | six==1.11.0
44 | sortedcontainers==2.0.4
45 | SQLAlchemy==1.2.9
46 | statsmodels==0.9.0
47 | tables==3.4.4
48 | toolz==0.9.0
49 | urllib3==1.23
50 | wrapt==1.10.11
51 | zipline==1.2.0+67.gebcf7474
52 |
--------------------------------------------------------------------------------
/backtest_strategies/utilities/momentum_crashes/momentum_strategy_helpers.py:
--------------------------------------------------------------------------------
1 | from zipline.api import order_target_percent, order_target
2 |
3 | def get_longs(context):
4 |
5 | if context.market_type == 'bull':
6 | return context.output.sort_values(['lagged_returns'])[-100:]
7 | else:
8 | return context.output.sort_values(['lagged_returns'])[:100]
9 |
10 | def get_shorts(context):
11 | if context.market_type == 'bear':
12 | return context.output.sort_values(['lagged_returns'])[:100]
13 | else:
14 | return context.output.sort_values(['lagged_returns'])[-100:]
15 |
16 | def portfolio_logic(context):
17 | longs_to_remove = []
18 |
19 | for asset in context.longs_portfolio: # search portfolio for positions to close out
20 | if asset not in context.longs.index:
21 | longs_to_remove.append(asset)
22 | order_target(asset, 0)
23 |
24 | for asset in context.longs.index: # search context.longs for stocks to add to portfolio
25 | order_target_percent(asset, .00025)
26 | if asset not in context.longs_portfolio:
27 | context.longs_portfolio[asset] = True
28 |
29 |
30 | for key in longs_to_remove:
31 | context.longs_portfolio.pop(key)
32 |
33 | shorts_to_remove = []
34 |
35 | for asset in context.shorts_portfolio: # search portfolio for positions to close out
36 | if asset not in context.shorts.index:
37 | shorts_to_remove.append(asset)
38 | order_target(asset, 0)
39 |
40 | for asset in context.shorts.index: # search context.shorts for stocks to add to portfolio
41 | if asset not in context.shorts_portfolio:
42 | context.shorts_portfolio[asset] = True
43 | order_target_percent(asset, -0.00025)
44 |
45 | for key in shorts_to_remove:
46 | context.shorts_portfolio.pop(key)
--------------------------------------------------------------------------------
/backtest_strategies/utilities/rays_long_short_strategy/rays_long_short_strategy_helpers.py:
--------------------------------------------------------------------------------
1 | from zipline.api import order_target_percent, order_target
2 |
3 | def get_longs(filtered_by_cap):
4 | pe1_longs = filtered_by_cap.sort_values(['pe1'])[:1000] # filter 1000 stocks with lowest pe ratios
5 | eg_longs = pe1_longs.sort_values(['earnings_growth'])[-500:] # filter 500 stocks with highest earning growth
6 | return eg_longs.sort_values(['de'])[:100] # filter top 100 stocks by lowest debt equity ratio
7 |
8 |
9 | def get_shorts(filtered_by_cap):
10 | pe1_shorts = filtered_by_cap.sort_values(['pe1'])[-1000:] # same thing but backwards for shorts
11 | eg_shorts = pe1_shorts.sort_values(['earnings_growth'])[:500]
12 | return eg_shorts.sort_values(['de'])[-100:]
13 |
14 | def portfolio_logic(context):
15 | longs_to_remove = []
16 |
17 | for asset in context.longs_portfolio: # search portfolio for positions to close out
18 | if asset not in context.longs.index:
19 | longs_to_remove.append(asset)
20 | order_target(asset, 0)
21 |
22 | for asset in context.longs.index: # search context.longs for stocks to add to portfolio
23 | if asset not in context.longs_portfolio:
24 | context.longs_portfolio[asset] = True
25 | order_target_percent(asset, .005)
26 |
27 | for key in longs_to_remove:
28 | context.longs_portfolio.pop(key)
29 |
30 | shorts_to_remove = []
31 |
32 | for asset in context.shorts_portfolio: # search portfolio for positions to close out
33 | if asset not in context.shorts.index:
34 | shorts_to_remove.append(asset)
35 | order_target(asset, 0)
36 |
37 | for asset in context.shorts.index: # search context.shorts for stocks to add to portfolio
38 | if asset not in context.shorts_portfolio:
39 | context.shorts_portfolio[asset] = True
40 | order_target_percent(asset, -0.005)
41 |
42 | for key in shorts_to_remove:
43 | context.shorts_portfolio.pop(key)
44 |
--------------------------------------------------------------------------------
/backtest_strategies/utilities/helper_functions.py:
--------------------------------------------------------------------------------
1 | from zipline.pipeline.data import Column, BoundColumn
2 | from collections import OrderedDict
3 | import os
4 | import pandas as pd
5 |
6 |
7 | def get_pricing_securities(pricing_directory):
8 | """
9 | Builds a list of all securities, represented by a csv file, in the pricing_directory folder
10 | :param pricing_directory:
11 | :return: return list, an ordered dict of security names
12 | """
13 |
14 | return_dict = OrderedDict()
15 |
16 | for root, dirs, files in os.walk(pricing_directory):
17 | for file in files:
18 | if file.endswith('.csv'):
19 | return_dict[file[:-4]] = True
20 |
21 | return return_dict
22 |
23 |
24 | def get_dates(fundamentals_directory):
25 | """
26 | Looks at first csv file in fundamentals_directory, build list of securities and dates
27 | :param fundamentals_directory:
28 | :return: return_dict: an ordered dict with the name of every security as a key, and True as the value
29 | dates: a list of all dates, as datestamps, that are in the csv index.
30 | """
31 | return_dict = OrderedDict()
32 | for root, dirs, files in os.walk(fundamentals_directory):
33 | for file in files:
34 | if file.endswith('.csv'):
35 | fundamental_tickers_df = pd.read_csv('{}{}'.format(fundamentals_directory, file), index_col=0)
36 |
37 | for ticker in fundamental_tickers_df.columns:
38 | return_dict[ticker] = True
39 |
40 | dates = fundamental_tickers_df.index.tolist()
41 |
42 | return return_dict, dates
43 |
44 |
45 | def get_tickers_in_both(pricing_assets, fundamental_assets):
46 | """
47 | Compares tickers from pricing assets, and fundamental assets, and filter out tickers not in both
48 | :param pricing_assets:
49 | :param fundamental_assets:
50 | :return: list
51 | """
52 | tickers_in_both = []
53 |
54 | for ticker in pricing_assets:
55 | if ticker in fundamental_assets:
56 | tickers_in_both.append(ticker)
57 |
58 | return tickers_in_both
59 |
60 |
61 | def convert_to_date_stamps(dates):
62 | """
63 | Given a list of dates, convert to tz aware datestamps, returns as list.
64 | :param dates:
65 | :return: list
66 | """
67 | datestamps = []
68 |
69 | for date in dates:
70 | tz_aware_date = pd.Timestamp(date, tz='utc')
71 | datestamps.append(tz_aware_date)
72 |
73 | return datestamps
74 |
75 |
76 | def make_frame(data_name, fundamentals_directory, tickers):
77 | return pd.read_csv('{}{}.csv'.format(fundamentals_directory, data_name), usecols=tickers)
78 |
79 | def reformat_frame(df, date_stamps, sids):
80 | df.index, df.columns = date_stamps, sids
81 |
82 | def set_dataset_columns(data_points, cls):
83 | for point in data_points:
84 | setattr(cls, point, Column(dtype=float))
85 | return cls
86 |
--------------------------------------------------------------------------------
/backtest_strategies/aapl_backtest.py:
--------------------------------------------------------------------------------
1 | from utilities import helper_functions
2 |
3 | from zipline.data import bundles
4 | from zipline.pipeline import Pipeline, CustomFactor
5 | from zipline.pipeline.data import USEquityPricing, Column, DataSet
6 | from zipline.pipeline.factors import Returns
7 | from zipline.pipeline.loaders.frame import DataFrameLoader
8 | from zipline.utils.run_algo import load_extensions
9 | from zipline import run_algorithm
10 | from zipline.api import (
11 | attach_pipeline,
12 | pipeline_output,
13 | get_open_orders,
14 | symbol,
15 | set_max_leverage,
16 | order_target_percent,
17 | record
18 | )
19 |
20 | import os
21 | import datetime as dt
22 |
23 | import pandas as pd
24 | import matplotlib
25 | matplotlib.use('TkAgg') # This forces MatPlotLib to use TkAgg as a backend
26 | import matplotlib.pyplot as plt
27 |
28 | def prepare_data(bundle_data):
29 | """
30 | This function takes a data bundle and matches fundamental data points to the correct asset objects.
31 | :param bundle_data: The data bundle that you ingested from SEP
32 | :return: A dictionary of loaders to be used within a data pipeline, and a DataSet class with the correct columns
33 | """
34 |
35 | """
36 | Enter the name of the data points you wish to use in the backtest here. The names need to match the name of the
37 | appropriate CSV file found in processed_data/fundamentals
38 | """
39 | data_points = ['marketcap']
40 |
41 | # Specify where our CSV files live
42 | fundamentals_directory = '../processed_data/fundamentals/'
43 | pricing_directory = '../processed_data/pricing/daily/'
44 |
45 | # pricing_assets is an ordered dict that contains the name of every security in the pricing directory
46 | pricing_assets = helper_functions.get_pricing_securities(pricing_directory)
47 |
48 | """
49 | fundamental_assets is an ordered dict that contains the name of every security in the fundamentals directory
50 | dates is a list of dates that the fundamentals directory is indexed by
51 | """
52 | fundamental_assets, dates = helper_functions.get_dates(fundamentals_directory)
53 |
54 | # Securities that are in both pricing_assets, and fundamental_assets
55 | tickers = helper_functions.get_tickers_in_both(pricing_assets, fundamental_assets)
56 |
57 | date_stamps = helper_functions.convert_to_date_stamps(dates)
58 |
59 | data_frames = {}
60 |
61 | for data in data_points:
62 | # creates a dataframe for each data point, puts it in the data_frames dict
63 | data_frames[data] = helper_functions.make_frame(data, fundamentals_directory, tickers)
64 |
65 | for data_frame in data_frames:
66 | """
67 | assets variable becomes a list of Asset objects, sids becomes a list of SID objects corresponding to the correct
68 | assets.
69 | """
70 | assets = bundle_data.asset_finder.lookup_symbols([ticker for ticker in data_frames[data_frame].columns],
71 | as_of_date=None)
72 | sids = pd.Int64Index([asset.sid for asset in assets])
73 | break
74 |
75 |
76 | class MyDataSet(DataSet):
77 | """
78 | We need to create an attribute for each needed data point within MyDataSet, before __new__() runs...
79 | This is so MyDataSet converts the Column types into BoundColumn types.
80 | """
81 | for point in data_points:
82 | locals()[point] = Column(dtype=float)
83 |
84 | """
85 | We are finally ready to create a dictionary of data frame loaders, with corresponding BoundColumn attributes
86 | within our MyDataSet class.
87 | """
88 | data_frame_loaders = {}
89 |
90 | for data_frame in data_frames:
91 | """
92 | Reindexes the dataframe indexes with date_stamps instead of dates, and replaces the column names (which are
93 | currently strings) with SIDS.
94 | """
95 | data_frames[data_frame].index, data_frames[data_frame].columns = date_stamps, sids
96 |
97 | for attr in data_frames:
98 | """
99 | Fills data_frame_loaders with key value pairs of: MyDataSet.attribute_name: DataFrameLoader(attribute_name
100 | """
101 | data_frame_loaders[getattr(MyDataSet, attr)] = DataFrameLoader(getattr(MyDataSet, attr), data_frames[attr])
102 |
103 | return data_frame_loaders, MyDataSet
104 |
105 | def make_pipeline():
106 |
107 | yearly_returns = Returns(window_length=252)
108 |
109 | monthly_returns = Returns(window_length=21)
110 |
111 | lagged_returns = yearly_returns - monthly_returns
112 |
113 | return Pipeline(
114 | columns={
115 | 'lagged_returns': lagged_returns,
116 | 'marketcap': MyDataSet.marketcap.latest,
117 | },
118 | screen=lagged_returns.notnull() &
119 | MyDataSet.marketcap.latest.notnull() &
120 | MyDataSet.marketcap.latest.top(500)
121 | )
122 |
123 | def initialize(context):
124 | """
125 | Function runs once, at the start of the backtest. You must attach_pipeline() here.
126 | :param context: A common namespace to keep variables in
127 | :return:
128 | """
129 |
130 |
131 | attach_pipeline(
132 | make_pipeline(),
133 | 'data_pipe'
134 | )
135 |
136 | def before_trading_start(context, data):
137 | """
138 | Runs once a day, before trading start
139 | :param context: The common namespace
140 | :param data:
141 | :return:
142 | """
143 |
144 |
145 | def handle_data(context, data):
146 | """
147 | Runs every day, at market open
148 | :param context: Common namespace
149 | :param data:
150 | :return:
151 | """
152 |
153 | order_target_percent(symbol('AAPL'), 1)
154 |
155 | def analyze(context, perf):
156 | """
157 | Helper function that runs at the end of backtest for analysis
158 | :param context: Common namespace
159 | :param perf: The data which shows how the backtest performed
160 | :return:
161 | """
162 | perf.to_csv('../backtest_outputs/backtest_on_{}.csv'.format(str(dt.datetime.now())))
163 |
164 | fig = plt.figure()
165 | ax1 = fig.add_subplot(211)
166 | perf.portfolio_value.plot(ax=ax1)
167 | ax1.set_ylabel('portfolio value in $')
168 | plt.legend(loc=0)
169 | plt.show()
170 |
171 | if __name__ == "__main__":
172 |
173 | load_extensions(
174 | default=True,
175 | extensions=[],
176 | strict=True,
177 | environ=os.environ,
178 | )
179 |
180 | bundle = bundles.load('sharadar-pricing') # This is a bundle made from Sharadar SEP data
181 |
182 | data_frame_loaders, MyDataSet = prepare_data(bundle)
183 |
184 | print('Made it to run_algorithm')
185 |
186 | run_algorithm(
187 | bundle='sharadar-pricing',
188 | before_trading_start=before_trading_start,
189 | start=pd.Timestamp('2017-01-02', tz='utc'),
190 | end=pd.Timestamp('2018-04-20', tz='utc'),
191 | initialize=initialize,
192 | analyze=analyze,
193 | capital_base=10000,
194 | handle_data=handle_data,
195 | data_frame_loaders=data_frame_loaders
196 | )
197 |
--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 | # Using Fundamental Data For Backtests Within Zipline
2 |
3 | Follow along Youtube video: https://www.youtube.com/watch?v=vh42tQDDC1U
4 |
5 | It is common to see questions related to using external, fundamental data for backtests within Zipline.
6 | I have developed an easy way of implementing data from Sharadar’s SF1 (fundamentals) and SEP (pricing) datasets.
7 | These are very popular datasets because of their relative robustness, in relation to their low price.
8 |
9 | Others have written about this topic, such as Jonathan Larkin on [Zipline Github issue #911](https://github.com/quantopian/zipline/issues/911)
10 | and Peter Harrington on his [AlphaCompiler blog](http://alphacompiler.com/blog/6/).
11 |
12 | I tried both of these methods, and found that neither of them worked for my particular use case.
13 | It is worth noting that my code borrows heavily from Jonathan Larkin’s implementation of dispatching custom loaders when creating a data pipeline, with a special shout out to Scott Sanderson for helping me alter Zipline to make the run_algorithm() function accept dataframe loaders.
14 |
15 | Because this method uses Pandas Data Frames to load all data into the data pipeline using the [DataFrameLoader class](https://github.com/quantopian/zipline/blob/master/zipline/pipeline/loaders/frame.py), you can only load data which will fit in your computer’s RAM. My next improvement will be implementing the [Blaze loader](https://github.com/quantopian/zipline/blob/master/zipline/pipeline/loaders/blaze/core.py) in order to increase loading speed, and do away with this annoying limitation.
16 |
17 | **OK, lets get started.**
18 |
19 | The official Zipline docs recommends using Conda to set up this environment, but it seems that Conda sometimes doesn’t install the very latest version of Zipline.
20 | I use pip and Pyenv to set up my environment, here are my step by step instructions for setting up shop on macOS.
21 |
22 | **PS these instructions are for macOS. I will try to accomodate Windows and Linux users, but make no guarantees.**
23 |
24 | #### Step 0: Clone this repo into the directory of your choice
25 | ```
26 | git clone https://github.com/calmitchell617/Springbok.git
27 | ```
28 |
29 | #### Step 1: Install Python 3.5.5 using Pyenv
30 |
31 | I followed [this tutorial](https://medium.com/@pimterry/setting-up-pyenv-on-os-x-with-homebrew-56c7541fd331) to set up Python 3.5.5 using Pyenv. Assuming you already have Homebrew installed, here are the esssential steps:
32 |
33 | ```
34 | CFLAGS="-I$(xcrun --show-sdk-path)/usr/include"
35 | brew install pyenv
36 | brew install readline
37 | pyenv install 3.5.5
38 | pyenv versions
39 | ```
40 |
41 | The command that starts with CFLAGS is a known issue to the Pyenv folks, and they have created a page for common build problems.
42 | If that command fails, or any of the subsequent commands fail, check out [this page](https://github.com/pyenv/pyenv/wiki/Common-build-problems) to see if your problem can be solved there.
43 |
44 | Look to make sure Python 3.5.5 is listed
45 | ```
46 | pyenv local 3.5.5
47 | eval "$(pyenv init -)”
48 | python --version
49 | ```
50 | Check to make sure you are running Python 3.5.5 .. If so, great!
51 |
52 | #### Step 2: Install Zipline's dependencies using pip
53 | ```
54 | pip install --upgrade pip
55 | ```
56 | Makes sure pip is up to date
57 | ```
58 | pip install numpy
59 | pip install pandas
60 | pip install cython
61 | pip install -U setuptools
62 | pip install matplotlib
63 | ```
64 |
65 | #### Step 3: Install my modified version of Zipline
66 |
67 | Navigate to the directory that you want my branch of zipline to download to, and run on the command line:
68 | ```
69 | git clone https://github.com/calmitchell617/zipline.git
70 | pip install (copy and paste the path of where you installed my zipline repo here)
71 | ```
72 | Zipline should now install, and we can now use this environment to do all kinds of fun stuff
73 |
74 | #### Step 4: Download and process data from Quandl / Sharadar
75 |
76 | Run mkdirs.py to setup the proper folder structure
77 |
78 | Download pricing and fundamental data from Quandl.
79 | Unzip, and put these 2 files in the data_downloads folder.
80 | **Do not put the zip files in the data_downloads folder.**
81 | Pricing: https://www.quandl.com/databases/SEP/documentation/batch-download
82 | Fundamentals: https://www.quandl.com/databases/SF1/documentation/batch-download
83 |
84 | Run process_data.py **will take an hour + to process all of the data.** Will print “Done!” When it’s done.
85 |
86 | #### Step 5: Ingest the data bundle
87 |
88 | We need to ingest all pricing data into what’s known as a data bundle.
89 | To do this, we will take our CSV files, which have been processed into the correct OHLCV format, and run the “zipline ingest” command from the command line.
90 | To successfully run this command, we will have to make a few changes to Zipline itself, however.
91 |
92 | The .zipline file folder is hidden by default, so you need to alter your computer to reveal hidden files and directories.
93 | **In macOS Sierra and later, this is quite easy. While in Finder, press “Command + Shift + . “, and hidden files and folders will be revealed.**
94 |
95 | Important: If you don't see the .zipline directory, I put an empty version of it in the repository. Copy and paste it into your home directory.
96 |
97 | For Windows or Linux, Google search “(your OS here) reveal hidden files” and your OS version. There will definitely be a tutorial to help you.
98 |
99 | Now find your .zipline folder, which is under your user directory.
100 | For example, on Mac, this is under MacintoshHD -> Users -> the username you are currently using.
101 |
102 | Open “extension.py” to modify it.
103 |
104 | Change the start_session variable to match the date which your pricing data starts on
105 |
106 | Change the end_session variable to match the date that your pricing data ends on.
107 |
108 | Change the first parameter in the register() function to: sharadar-pricing
109 |
110 | Make sure the first parameter of the csvdir_equites() function is: ['daily']
111 |
112 | Make the second parameter of the csvdir_equites() function the full directory of your pricing folder...
113 |
114 | For example, my directory is ' /Users/calmitchell/s/springbok-shared/processed_data/pricing/ '
115 |
116 | This folder should contain one other folder named: daily
117 |
118 | Make sure you are running Python 3.5.5 with Zipline installed properly, step 1 above, then run:
119 | ```
120 | zipline ingest -b 'sharadar-pricing'
121 | ```
122 | This will take a few minutes. Make sure to check to see if any fatal errors occurred, and
123 | also check the .zipline directory for a new folder called sharadar-pricing.
124 |
125 | Open the folder within that folder, and if its not empty, that means you are in business!
126 |
127 | #### Step 6:
128 |
129 | Run basic_backtest.py to ensure everything is working
130 |
131 | At this point, you have ingested pricing data, processed fundamental data into a known directory, and run a backtest using the data.
132 | Check out the comments and structure of basic_backtest.py to further your understanding of how to work with Zipline.
133 |
134 | #### Optional step 7:
135 |
136 | Download historical "SPY" pricing from yahoo finance. Delete adjust close column, and change date column to match the pricing files in processed_data/daily. Reingest the files.
137 |
138 | ### Happy alpha hunting!
139 |
--------------------------------------------------------------------------------
/backtest_strategies/rays_long_short_strategy.py:
--------------------------------------------------------------------------------
1 | from utilities import helper_functions
2 | from utilities.rays_long_short_strategy import rays_long_short_strategy_helpers
3 |
4 |
5 | from zipline.data import bundles
6 | from zipline.pipeline import Pipeline
7 | from zipline.pipeline.data import USEquityPricing, Column, DataSet
8 | from zipline.pipeline.loaders.frame import DataFrameLoader
9 | from zipline.utils.run_algo import load_extensions
10 | from zipline import run_algorithm
11 | from zipline.api import (
12 | attach_pipeline,
13 | pipeline_output,
14 | get_open_orders,
15 | record
16 | )
17 |
18 | import os
19 | import datetime as dt
20 |
21 | import pandas as pd
22 | import matplotlib
23 | matplotlib.use('TkAgg') # This forces MatPlotLib to use TkAgg as a backend
24 | import matplotlib.pyplot as plt
25 |
26 |
27 | def prepare_data(bundle_data):
28 | """
29 | This function takes a data bundle and matches fundamental data points to the correct asset objects.
30 | :param bundle_data: The data bundle that you ingested from SEP
31 | :return: A dictionary of loaders to be used within a data pipeline, and a DataSet class with the correct columns
32 | """
33 |
34 | """
35 | Enter the name of the data points you wish to use in the backtest here. The names need to match the name of the
36 | appropriate CSV file found in processed_data/fundamentals
37 | """
38 | data_points = ['pe1', 'de', 'earnings_growth', 'marketcap']
39 |
40 | # Specify where our CSV files live
41 | fundamentals_directory = '../processed_data/fundamentals/'
42 | pricing_directory = '../processed_data/pricing/daily/'
43 |
44 | # pricing_assets is an ordered dict that contains the name of every security in the pricing directory
45 | pricing_assets = helper_functions.get_pricing_securities(pricing_directory)
46 |
47 | """
48 | fundamental_assets is an ordered dict that contains the name of every security in the fundamentals directory
49 | dates is a list of dates that the fundamentals directory is indexed by
50 | """
51 | fundamental_assets, dates = helper_functions.get_dates(fundamentals_directory)
52 |
53 | # Securities that are in both pricing_assets, and fundamental_assets
54 | tickers = helper_functions.get_tickers_in_both(pricing_assets, fundamental_assets)
55 |
56 | date_stamps = helper_functions.convert_to_date_stamps(dates)
57 |
58 | data_frames = {}
59 |
60 | for data in data_points:
61 | # creates a dataframe for each data point, puts it in the data_frames dict
62 | data_frames[data] = helper_functions.make_frame(data, fundamentals_directory, tickers)
63 |
64 | for data_frame in data_frames:
65 | """
66 | assets variable becomes a list of Asset objects, sids becomes a list of SID objects corresponding to the correct
67 | assets.
68 | """
69 | assets = bundle_data.asset_finder.lookup_symbols([ticker for ticker in data_frames[data_frame].columns],
70 | as_of_date=None)
71 | sids = pd.Int64Index([asset.sid for asset in assets])
72 | break
73 |
74 |
75 | class MyDataSet(DataSet):
76 | """
77 | We need to create an attribute for each needed data point within MyDataSet, before __new__() runs...
78 | This is so MyDataSet converts the Column types into BoundColumn types.
79 | """
80 | for point in data_points:
81 | locals()[point] = Column(dtype=float)
82 |
83 | """
84 | We are finally ready to create a dictionary of data frame loaders, with corresponding BoundColumn attributes
85 | within our MyDataSet class.
86 | """
87 | data_frame_loaders = {}
88 |
89 | for data_frame in data_frames:
90 | """
91 | Reindexes the dataframe indexes with date_stamps instead of dates, and replaces the column names (which are
92 | currently strings) with SIDS.
93 | """
94 | data_frames[data_frame].index, data_frames[data_frame].columns = date_stamps, sids
95 |
96 | for attr in data_frames:
97 | """
98 | Filles data_frame_loaders with key value pairs of: MyDataSet.attribute_name: DataFrameLoader(attribute_name
99 | """
100 | data_frame_loaders[getattr(MyDataSet, attr)] = DataFrameLoader(getattr(MyDataSet, attr), data_frames[attr])
101 |
102 | return data_frame_loaders, MyDataSet
103 |
104 | def make_pipeline():
105 |
106 | return Pipeline(
107 | columns={
108 | 'price': USEquityPricing.close.latest,
109 | 'pe1': MyDataSet.pe1.latest,
110 | 'de': MyDataSet.de.latest,
111 | 'earnings_growth': MyDataSet.earnings_growth.latest,
112 | 'marketcap': MyDataSet.marketcap.latest,
113 | },
114 | screen=USEquityPricing.close.latest.notnull() &
115 | MyDataSet.de.latest.notnull() &
116 | MyDataSet.pe1.latest.notnull() &
117 | MyDataSet.earnings_growth.latest.notnull() &
118 | MyDataSet.marketcap.latest.notnull()
119 | )
120 |
121 | def initialize(context):
122 | """
123 | Function runs once, at the start of the backtest. You must attach_pipeline() here.
124 | :param context: A common namespace to keep variables in
125 | :return:
126 | """
127 |
128 | context.longs_portfolio = {}
129 | context.shorts_portfolio = {}
130 |
131 | attach_pipeline(
132 | make_pipeline(),
133 | 'data_pipe'
134 | )
135 |
136 | def before_trading_start(context, data):
137 | """
138 | Runs once a day, before trading start
139 | :param context: The common namespace
140 | :param data:
141 | :return:
142 | """
143 | context.output = pipeline_output('data_pipe')
144 |
145 | context.cap_plays = context.output.sort_values(['marketcap'])[-4000:] # take top 4000 stocks by market cap for liquidity
146 |
147 | context.longs = rays_long_short_strategy_helpers.get_longs(context.cap_plays)
148 |
149 | context.shorts = rays_long_short_strategy_helpers.get_shorts(context.cap_plays)
150 |
151 | record(open_orders=str(get_open_orders()))
152 |
153 |
154 | def handle_data(context, data):
155 | """
156 | Runs every day, at market open
157 | :param context: Common namepsace
158 | :param data:
159 | :return:
160 | """
161 |
162 | context = rays_long_short_strategy_helpers.portfolio_logic(context)
163 |
164 | def analyze(context, perf):
165 | """
166 | Helper function that runs at the end of backtest for analysis
167 | :param context: Common namespace
168 | :param perf: The data which shows how the backtest performed
169 | :return:
170 | """
171 | perf.to_csv('../backtest_outputs/backtest_on_{}.csv'.format(str(dt.datetime.now())))
172 |
173 | fig = plt.figure()
174 | ax1 = fig.add_subplot(211)
175 | perf.portfolio_value.plot(ax=ax1)
176 | ax1.set_ylabel('portfolio value in $')
177 | plt.legend(loc=0)
178 | plt.show()
179 |
180 | if __name__ == "__main__":
181 |
182 | load_extensions(
183 | default=True,
184 | extensions=[],
185 | strict=True,
186 | environ=os.environ,
187 | )
188 |
189 | bundle = bundles.load('sharadar-pricing') # This is a bundle made from Sharadar SEP data
190 |
191 | data_frame_loaders, MyDataSet = prepare_data(bundle)
192 |
193 | print('Made it to run_algorithm')
194 |
195 | run_algorithm(
196 | bundle='sharadar-pricing',
197 | before_trading_start=before_trading_start,
198 | start=pd.Timestamp('2018-01-02', tz='utc'),
199 | end=pd.Timestamp('2018-04-20', tz='utc'),
200 | initialize=initialize,
201 | analyze=analyze,
202 | capital_base=1000000,
203 | handle_data=handle_data,
204 | data_frame_loaders=data_frame_loaders
205 | )
206 |
--------------------------------------------------------------------------------
/backtest_strategies/momentum_crashes_backtest.py:
--------------------------------------------------------------------------------
1 | from utilities import helper_functions as helper_functions
2 | from utilities.momentum_crashes import momentum_strategy_helpers as momentum_strategy_helpers
3 |
4 | from zipline.data import bundles
5 | from zipline.pipeline import Pipeline
6 | from zipline.pipeline.data import Column, DataSet
7 | from zipline.pipeline.factors import Returns
8 | from zipline.pipeline.loaders.frame import DataFrameLoader
9 | from zipline.utils.run_algo import load_extensions
10 | from zipline import run_algorithm
11 | from zipline.api import (
12 | attach_pipeline,
13 | pipeline_output,
14 | symbol,
15 | set_max_leverage,
16 | record
17 | )
18 |
19 | import os
20 | import datetime as dt
21 |
22 | import pandas as pd
23 | import matplotlib
24 | matplotlib.use('TkAgg') # This forces MatPlotLib to use TkAgg as a backend
25 | import matplotlib.pyplot as plt
26 |
27 |
28 | def prepare_data(bundle_data):
29 | """
30 | This function takes a data bundle and matches fundamental data points to the correct asset objects.
31 | :param bundle_data: The data bundle that you ingested from SEP
32 | :return: A dictionary of loaders to be used within a data pipeline, and a DataSet class with the correct columns
33 | """
34 |
35 | """
36 | Enter the name of the data points you wish to use in the backtest here. The names need to match the name of the
37 | appropriate CSV file found in processed_data/fundamentals
38 | """
39 | data_points = ['marketcap']
40 |
41 | # Specify where our CSV files live
42 | fundamentals_directory = '../processed_data/fundamentals/'
43 | pricing_directory = '../processed_data/pricing/daily/'
44 |
45 | # pricing_assets is an ordered dict that contains the name of every security in the pricing directory
46 | pricing_assets = helper_functions.get_pricing_securities(pricing_directory)
47 |
48 | """
49 | fundamental_assets is an ordered dict that contains the name of every security in the fundamentals directory
50 | dates is a list of dates that the fundamentals directory is indexed by
51 | """
52 | fundamental_assets, dates = helper_functions.get_dates(fundamentals_directory)
53 |
54 | # Securities that are in both pricing_assets, and fundamental_assets
55 | tickers = helper_functions.get_tickers_in_both(pricing_assets, fundamental_assets)
56 |
57 | date_stamps = helper_functions.convert_to_date_stamps(dates)
58 |
59 | data_frames = {}
60 |
61 | for data in data_points:
62 | # creates a dataframe for each data point, puts it in the data_frames dict
63 | data_frames[data] = helper_functions.make_frame(data, fundamentals_directory, tickers)
64 |
65 | for data_frame in data_frames:
66 | """
67 | assets variable becomes a list of Asset objects, sids becomes a list of SID objects corresponding to the correct
68 | assets.
69 | """
70 | assets = bundle_data.asset_finder.lookup_symbols([ticker for ticker in data_frames[data_frame].columns],
71 | as_of_date=None)
72 | sids = pd.Int64Index([asset.sid for asset in assets])
73 | break
74 |
75 |
76 | class MyDataSet(DataSet):
77 | """
78 | We need to create an attribute for each needed data point within MyDataSet, before __new__() runs...
79 | This is so MyDataSet converts the Column types into BoundColumn types.
80 | """
81 | for point in data_points:
82 | locals()[point] = Column(dtype=float)
83 |
84 | """
85 | We are finally ready to create a dictionary of data frame loaders, with corresponding BoundColumn attributes
86 | within our MyDataSet class.
87 | """
88 | data_frame_loaders = {}
89 |
90 | for data_frame in data_frames:
91 | """
92 | Reindexes the dataframe indexes with date_stamps instead of dates, and replaces the column names (which are
93 | currently strings) with SIDS.
94 | """
95 | data_frames[data_frame].index, data_frames[data_frame].columns = date_stamps, sids
96 |
97 | for attr in data_frames:
98 | """
99 | Fills data_frame_loaders with key value pairs of: MyDataSet.attribute_name: DataFrameLoader(attribute_name
100 | """
101 | data_frame_loaders[getattr(MyDataSet, attr)] = DataFrameLoader(getattr(MyDataSet, attr), data_frames[attr])
102 |
103 | return data_frame_loaders, MyDataSet
104 |
105 | def make_pipeline():
106 |
107 | yearly_returns = Returns(window_length=252)
108 |
109 | monthly_returns = Returns(window_length=21)
110 |
111 | lagged_returns = yearly_returns - monthly_returns
112 |
113 | return Pipeline(
114 | columns={
115 | 'lagged_returns': lagged_returns,
116 | 'marketcap': MyDataSet.marketcap.latest,
117 | },
118 | screen=lagged_returns.notnull() &
119 | MyDataSet.marketcap.latest.notnull() &
120 | MyDataSet.marketcap.latest.top(500)
121 | )
122 |
123 | def initialize(context):
124 | """
125 | Function runs once, at the start of the backtest. You must attach_pipeline() here.
126 | :param context: A common namespace to keep variables in
127 | :return:
128 | """
129 |
130 | context.longs_portfolio = {}
131 | context.shorts_portfolio = {}
132 | set_max_leverage(1)
133 |
134 | attach_pipeline(
135 | make_pipeline(),
136 | 'data_pipe'
137 | )
138 |
139 | def before_trading_start(context, data):
140 | """
141 | Runs once a day, before trading start
142 | :param context: The common namespace
143 | :param data:
144 | :return:
145 | """
146 | context.output = pipeline_output('data_pipe')
147 |
148 | market_type(context, data)
149 |
150 | context.longs = momentum_strategy_helpers.get_longs(context)
151 |
152 | context.shorts = momentum_strategy_helpers.get_shorts(context)
153 |
154 | record(market_type=str(context.market_type))
155 |
156 |
157 | def market_type(context, data):
158 | """
159 | Attempts to quantify if we are in a bull, or bear market, based on whether SPY is higher than it was a year ago.
160 | """
161 |
162 | history = data.history(symbol('SPY'), 'close', 252, '1d')
163 |
164 | if history[251] - history[0] >= 0:
165 | context.market_type = 'bull'
166 |
167 | else:
168 | context.market_type = 'bear'
169 |
170 | def handle_data(context, data):
171 | """
172 | Runs every day, at market open
173 | :param context: Common namespace
174 | :param data:
175 | :return:
176 | """
177 |
178 | context = momentum_strategy_helpers.portfolio_logic(context)
179 |
180 | def analyze(context, perf):
181 | """
182 | Helper function that runs at the end of backtest for analysis
183 | :param context: Common namespace
184 | :param perf: The data which shows how the backtest performed
185 | :return:
186 | """
187 | perf.to_csv('../backtest_outputs/backtest_on_{}.csv'.format(str(dt.datetime.now())))
188 |
189 | fig = plt.figure()
190 | ax1 = fig.add_subplot(211)
191 | perf.portfolio_value.plot(ax=ax1)
192 | ax1.set_ylabel('portfolio value in $')
193 | plt.legend(loc=0)
194 | plt.show()
195 |
196 | if __name__ == "__main__":
197 |
198 | load_extensions(
199 | default=True,
200 | extensions=[],
201 | strict=True,
202 | environ=os.environ,
203 | )
204 |
205 | bundle = bundles.load('sharadar-pricing') # This is a bundle made from Sharadar SEP data
206 |
207 | data_frame_loaders, MyDataSet = prepare_data(bundle)
208 |
209 | print('Made it to run_algorithm')
210 |
211 | run_algorithm(
212 | bundle='sharadar-pricing',
213 | before_trading_start=before_trading_start,
214 | start=pd.Timestamp('2018-01-02', tz='utc'),
215 | end=pd.Timestamp('2018-04-20', tz='utc'),
216 | initialize=initialize,
217 | analyze=analyze,
218 | capital_base=1000000,
219 | handle_data=handle_data,
220 | data_frame_loaders=data_frame_loaders
221 | )
222 |
--------------------------------------------------------------------------------
/process_data.py:
--------------------------------------------------------------------------------
1 | def bundle_prep():
2 |
3 | downloads_directory = 'data_downloads'
4 | directory = 'processed_data/pricing/daily'
5 |
6 | tickers = OrderedDict()
7 |
8 | for root, dirs, files in os.walk(downloads_directory): # Lets get all of our tickers
9 | for file in files:
10 | if file.startswith('SHARADAR_SEP'):
11 | pricing_df = pd.read_csv('{}/{}'.format(downloads_directory, file))
12 |
13 | for ticker in pricing_df['ticker']:
14 | if ticker not in tickers:
15 | tickers[ticker] = True
16 |
17 | for ticker in tickers:
18 | with open('{}/{}.csv'.format(directory, ticker), 'w') as processed_file:
19 | writer = csv.writer(processed_file)
20 | writer.writerow(['date', 'open', 'high', 'low', 'close', 'volume'])
21 |
22 | iterator = pricing_df.iterrows()
23 | next(iterator)
24 | for i, row in iterator:
25 | with open('{}/{}.csv'.format(directory, row['ticker']), 'a') as ticker_file:
26 | ticker_writer = csv.writer(ticker_file)
27 | ticker_writer.writerow(
28 | [
29 | row['date'],
30 | row['open'],
31 | row['high'],
32 | row['low'],
33 | row['close'],
34 | row['volume']
35 |
36 | ]
37 | )
38 |
39 | for ticker in tickers: # we need to reindex the files to deal with missing data (we will forward fill)
40 |
41 | df = pd.read_csv('{}/{}.csv'.format(directory, ticker), index_col='date')
42 | length = len(df.index) - 1
43 | start_date = df.index[0]
44 | end_date = df.index[length]
45 |
46 | sessions = get_calendar('NYSE').sessions_in_range(start_date, end_date).tolist()
47 |
48 | for i in range(len(sessions)):
49 | sessions[i] = str(sessions[i])
50 |
51 | try:
52 | df = df.reindex(sessions, method='pad')
53 | os.remove('{}/{}.csv'.format(directory, ticker))
54 | df.to_csv('{}/{}.csv'.format(directory, ticker))
55 | except ValueError:
56 | print(ticker)
57 | os.remove('{}/{}.csv'.format(directory, ticker))
58 | continue
59 |
60 | print("Bundle prep is finished.")
61 |
62 |
63 | def fundamentals_prep():
64 |
65 | downloads_directory = 'data_downloads'
66 | directory = 'processed_data/fundamentals'
67 |
68 | for root, dirs, files in os.walk(downloads_directory): # Lets get all of our tickers
69 | for file in files:
70 | if file.startswith('SHARADAR_SF1'):
71 | with open('{}/{}'.format(downloads_directory, file), 'r') as read_file:
72 | with open('{}/all_arq.csv'.format(directory), 'w') as write_file:
73 | reader = csv.reader(read_file)
74 | writer = csv.writer(write_file)
75 | first_line_gone = False
76 | for row in reader:
77 | if first_line_gone is True:
78 | if row[1] == 'ARQ':
79 | writer.writerow(row)
80 | else:
81 | writer.writerow(row)
82 | first_line_gone = True
83 |
84 | desired_data = [
85 | 'accoci',
86 | 'assets',
87 | 'assetsc',
88 | 'assetsnc',
89 | 'bvps',
90 | 'capex',
91 | 'cashneq',
92 | 'cashnequsd',
93 | 'cor',
94 | 'consolinc',
95 | 'currentratio',
96 | 'de',
97 | 'debt',
98 | 'debtc',
99 | 'debtnc',
100 | 'debtusd',
101 | 'deferredrev',
102 | 'depamor',
103 | 'deposits',
104 | 'divyield',
105 | 'dps',
106 | 'ebit',
107 | 'ebitda',
108 | 'ebitdamargin',
109 | 'ebitdausd',
110 | 'ebitusd',
111 | 'ebt',
112 | 'eps',
113 | 'epsdil',
114 | 'epsusd',
115 | 'equity',
116 | 'equityusd',
117 | 'ev',
118 | 'evebit',
119 | 'evebitda',
120 | 'fcf',
121 | 'fcfps',
122 | 'fxusd',
123 | 'gp',
124 | 'grossmargin',
125 | 'intangibles',
126 | 'intexp',
127 | 'invcap',
128 | 'invcapavg',
129 | 'inventory',
130 | 'investments',
131 | 'investmentsc',
132 | 'investmentsnc',
133 | 'liabilities',
134 | 'liabilitiesc',
135 | 'liabilitiesnc',
136 | 'marketcap',
137 | 'ncf',
138 | 'ncfbus',
139 | 'ncfcommon',
140 | 'ncfdebt',
141 | 'ncfdiv',
142 | 'ncff',
143 | 'ncfi',
144 | 'ncfinv',
145 | 'ncfo',
146 | 'ncfx',
147 | 'netinc',
148 | 'netinccmn',
149 | 'netinccmnusd',
150 | 'netincdis',
151 | 'netincnci',
152 | 'netmargin',
153 | 'opex',
154 | 'opinc',
155 | 'payables',
156 | 'payoutratio',
157 | 'pb',
158 | 'pe',
159 | 'pe1',
160 | 'ppnenet',
161 | 'prefdivis',
162 | 'price',
163 | 'ps',
164 | 'ps1',
165 | 'receivables',
166 | 'retearn',
167 | 'revenue',
168 | 'revenueusd',
169 | 'rnd',
170 | 'sbcomp',
171 | 'sgna',
172 | 'sharefactor',
173 | 'sharesbas',
174 | 'shareswa',
175 | 'shareswadil',
176 | 'sps',
177 | 'tangibles',
178 | 'taxassets',
179 | 'taxexp',
180 | 'taxliabilities',
181 | 'tbvps',
182 | 'workingcapital'
183 | ]
184 |
185 | # get all dates and tickers
186 |
187 | main_df = pd.read_csv('{}/all_arq.csv'.format(directory))
188 | date_stamps = sorted(set(main_df['datekey'].tolist()))
189 | for i in range(len(date_stamps)):
190 | date_stamps[i] = pd.Timestamp(date_stamps[i], tz='UTC')
191 | tickers = sorted(set(main_df['ticker'].tolist()))
192 |
193 | for column in desired_data:
194 | read_df = main_df[['datekey', 'ticker', column]]
195 | write_df = pd.DataFrame(index=date_stamps, columns=tickers)
196 |
197 | iterator = read_df.iterrows()
198 | for i, row in iterator:
199 | write_df[row['ticker']].loc[row['datekey']] = row[column]
200 |
201 | write_df.to_csv('{}/{}.csv'.format(directory, column))
202 |
203 | try:
204 | os.remove('{}/all_arq.csv'.format(directory))
205 | except OSError:
206 | pass
207 |
208 | fundamentals_directory = 'processed_data/fundamentals'
209 |
210 | earnings_df = pd.read_csv('{}/netinc.csv'.format(fundamentals_directory), index_col=0)
211 | revenue_df = pd.read_csv('{}/revenue.csv'.format(fundamentals_directory), index_col=0)
212 | growth_df = earnings_df
213 | new_df = pd.DataFrame(index=earnings_df.index, columns=earnings_df.columns)
214 |
215 | tickers = OrderedDict()
216 |
217 | for ticker in earnings_df.columns:
218 | tickers[ticker] = {'prev_earnings': None, 'cur_earnings': None, 'rev_growth': None}
219 |
220 | iterator = growth_df.iterrows()
221 |
222 | for i, row in iterator:
223 |
224 | for ticker in tickers:
225 | if not pd.isnull(row[ticker]):
226 | if row[ticker] != tickers[ticker]['cur_earnings']:
227 |
228 | if tickers[ticker]['cur_earnings'] is None: # first value
229 | tickers[ticker]['cur_earnings'] = row[ticker]
230 | continue
231 |
232 | tickers[ticker]['prev_earnings'] = tickers[ticker]['cur_earnings']
233 | tickers[ticker]['cur_earnings'] = row[ticker]
234 |
235 | rev = revenue_df.loc[i, ticker]
236 |
237 | if rev != 0:
238 | tickers[ticker]['rev_growth'] = (
239 | (tickers[ticker]['cur_earnings'] - tickers[ticker]['prev_earnings']) / rev)
240 | new_df.ix[i, ticker] = tickers[ticker]['rev_growth']
241 | else:
242 | new_df.ix[i, ticker] = tickers[ticker]['rev_growth']
243 |
244 | new_df.to_csv('{}/earnings_growth.csv'.format(fundamentals_directory))
245 |
246 | directory = 'processed_data/fundamentals'
247 |
248 | data_list = []
249 |
250 | data = OrderedDict()
251 |
252 | for root, dirs, files in os.walk(directory): # Lets get all of our tickers
253 | for file in files:
254 | if file.endswith('csv'):
255 | data_list.append(file[:-4])
256 |
257 | data_list.sort()
258 |
259 | for point in data_list:
260 | data[point] = True
261 |
262 | for point in data_list: # we need to reindex the files to deal with missing data (we will forward fill)
263 |
264 | df = pd.read_csv('{}/{}.csv'.format(directory, point), index_col=0)
265 | length = len(df.index) - 1
266 | start_date = df.index[0]
267 | end_date = df.index[length]
268 |
269 | actual_sessions = df.index
270 |
271 | stamps = [str(pd.Timestamp(session, tz='UTC', offset='C')) for session in actual_sessions]
272 |
273 | df.index = stamps
274 |
275 | sessions = get_calendar('NYSE').sessions_in_range(start_date, end_date).tolist()
276 |
277 | for i in range(len(sessions)):
278 | sessions[i] = str(sessions[i])
279 |
280 | try:
281 | df = df.reindex(sessions)
282 | df = df.fillna(method='pad')
283 | os.remove('{}/{}.csv'.format(directory, point))
284 | except ValueError:
285 | print(point)
286 | continue
287 |
288 | df.to_csv('{}/{}.csv'.format(directory, point))
289 |
290 | print("Fundamentals prep is finished.")
291 |
292 |
293 | if __name__ == "__main__":
294 |
295 | import multiprocessing
296 | import os
297 | import pandas as pd
298 | import csv
299 | from collections import OrderedDict
300 | from zipline.utils.calendars import get_calendar
301 |
302 | # creating processes
303 | p1 = multiprocessing.Process(target=bundle_prep)
304 | p2 = multiprocessing.Process(target=fundamentals_prep)
305 |
306 | p1.start()
307 | p2.start()
308 |
309 | p1.join()
310 | p2.join()
311 |
312 | # both processes finished
313 | print("Done!")
314 |
--------------------------------------------------------------------------------
/.idea/workspace.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
52 |
53 |
54 |
55 | aapl
56 | Pe1
57 | Rev
58 | pe1
59 | rev
60 | print
61 | pd
62 | held
63 | sids
64 | sold
65 | inf
66 | sort
67 | loc
68 | datestamps
69 | longs_portfolio
70 | shorts_portfolio
71 | MyDataSet
72 | cap
73 | eg
74 | loaders
75 | get_lo
76 | import
77 | processed_directory
78 | directory =
79 | all_
80 | order_target
81 | longs
82 | winners
83 | losers
84 | record
85 |
86 |
87 |
88 |
89 |